diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2023-12-18 20:30:12 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2024-04-06 20:11:55 +0000 |
commit | 5f757f3ff9144b609b3c433dfd370cc6bdc191ad (patch) | |
tree | 1b4e980b866cd26a00af34c0a653eb640bd09caf /contrib/llvm-project/llvm/lib/Target/RISCV | |
parent | 3e1c8a35f741a5d114d0ba670b15191355711fe9 (diff) | |
parent | 312c0ed19cc5276a17bacf2120097bec4515b0f1 (diff) | |
download | src-5f757f3ff9144b609b3c433dfd370cc6bdc191ad.tar.gz src-5f757f3ff9144b609b3c433dfd370cc6bdc191ad.zip |
Merge llvm-project main llvmorg-18-init-15088-gd14ee76181fb
This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and
openmp to llvm-project main llvmorg-18-init-15088-gd14ee76181fb.
PR: 276104
MFC after: 1 month
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/RISCV')
99 files changed, 16003 insertions, 5640 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 046a208921ae..f3ea0f597eec 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -16,7 +16,6 @@ #include "TargetInfo/RISCVTargetInfo.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" @@ -108,10 +107,9 @@ class RISCVAsmParser : public MCTargetAsmParser { uint64_t &ErrorInfo, bool MatchingInlineAsm) override; - bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc, - SMLoc &EndLoc) override; - OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc, - SMLoc &EndLoc) override; + bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override; + ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, + SMLoc &EndLoc) override; bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) override; @@ -204,6 +202,7 @@ class RISCVAsmParser : public MCTargetAsmParser { ParseStatus parseFRMArg(OperandVector &Operands); ParseStatus parseFenceArg(OperandVector &Operands); ParseStatus parseReglist(OperandVector &Operands); + ParseStatus parseRegReg(OperandVector &Operands); ParseStatus parseRetval(OperandVector &Operands); ParseStatus parseZcmpSpimm(OperandVector &Operands); @@ -260,6 +259,7 @@ class RISCVAsmParser : public MCTargetAsmParser { std::unique_ptr<RISCVOperand> defaultMaskRegOp() const; std::unique_ptr<RISCVOperand> defaultFRMArgOp() const; + std::unique_ptr<RISCVOperand> defaultFRMArgLegacyOp() const; public: enum RISCVMatchResultTy { @@ -286,11 +286,11 @@ public: setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); auto ABIName = StringRef(Options.ABIName); - if (ABIName.endswith("f") && !getSTI().hasFeature(RISCV::FeatureStdExtF)) { + if (ABIName.ends_with("f") && !getSTI().hasFeature(RISCV::FeatureStdExtF)) { errs() << "Hard-float 'f' ABI can't be used for a target that " "doesn't support the F instruction set extension (ignoring " "target-abi)\n"; - } else if (ABIName.endswith("d") && + } else if (ABIName.ends_with("d") && !getSTI().hasFeature(RISCV::FeatureStdExtD)) { errs() << "Hard-float 'd' ABI can't be used for a target that " "doesn't support the D instruction set extension (ignoring " @@ -325,6 +325,7 @@ struct RISCVOperand final : public MCParsedAsmOperand { Fence, Rlist, Spimm, + RegReg, } Kind; struct RegOp { @@ -369,6 +370,11 @@ struct RISCVOperand final : public MCParsedAsmOperand { unsigned Val; }; + struct RegRegOp { + MCRegister Reg1; + MCRegister Reg2; + }; + SMLoc StartLoc, EndLoc; union { StringRef Tok; @@ -381,6 +387,7 @@ struct RISCVOperand final : public MCParsedAsmOperand { struct FenceOp Fence; struct RlistOp Rlist; struct SpimmOp Spimm; + struct RegRegOp RegReg; }; RISCVOperand(KindTy K) : Kind(K) {} @@ -421,6 +428,9 @@ public: case KindTy::Spimm: Spimm = o.Spimm; break; + case KindTy::RegReg: + RegReg = o.RegReg; + break; } } @@ -445,6 +455,7 @@ public: bool isImm() const override { return Kind == KindTy::Immediate; } bool isMem() const override { return false; } bool isSystemRegister() const { return Kind == KindTy::SystemRegister; } + bool isRegReg() const { return Kind == KindTy::RegReg; } bool isRlist() const { return Kind == KindTy::Rlist; } bool isSpimm() const { return Kind == KindTy::Spimm; } @@ -564,6 +575,7 @@ public: /// Return true if the operand is a valid floating point rounding mode. bool isFRMArg() const { return Kind == KindTy::FRM; } + bool isFRMArgLegacy() const { return Kind == KindTy::FRM; } bool isRTZArg() const { return isFRMArg() && FRM.FRM == RISCVFPRndMode::RTZ; } /// Return true if the operand is a valid fli.s floating-point immediate. @@ -660,6 +672,7 @@ public: bool isUImm6() const { return IsUImm<6>(); } bool isUImm7() const { return IsUImm<7>(); } bool isUImm8() const { return IsUImm<8>(); } + bool isUImm20() const { return IsUImm<20>(); } bool isUImm8GE32() const { int64_t Imm; @@ -1024,6 +1037,10 @@ public: RISCVZC::printSpimm(Spimm.Val, OS); OS << '>'; break; + case KindTy::RegReg: + OS << "<RegReg: Reg1 " << RegName(RegReg.Reg1); + OS << " Reg2 " << RegName(RegReg.Reg2); + break; } } @@ -1107,6 +1124,16 @@ public: return Op; } + static std::unique_ptr<RISCVOperand> createRegReg(unsigned Reg1No, + unsigned Reg2No, SMLoc S) { + auto Op = std::make_unique<RISCVOperand>(KindTy::RegReg); + Op->RegReg.Reg1 = Reg1No; + Op->RegReg.Reg2 = Reg2No; + Op->StartLoc = S; + Op->EndLoc = S; + return Op; + } + static std::unique_ptr<RISCVOperand> createSpimm(unsigned Spimm, SMLoc S) { auto Op = std::make_unique<RISCVOperand>(KindTy::Spimm); Op->Spimm.Val = Spimm; @@ -1182,6 +1209,12 @@ public: Inst.addOperand(MCOperand::createImm(Rlist.Val)); } + void addRegRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(RegReg.Reg1)); + Inst.addOperand(MCOperand::createReg(RegReg.Reg2)); + } + void addSpimmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::createImm(Spimm.Val)); @@ -1478,6 +1511,8 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, "operand must be a symbol with " "%hi/%tprel_hi modifier or an integer in " "the range"); + case Match_InvalidUImm20: + return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 20) - 1); case Match_InvalidUImm20AUIPC: return generateImmOutOfRangeError( Operands, ErrorInfo, 0, (1 << 20) - 1, @@ -1546,6 +1581,10 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_InvalidRnumArg: { return generateImmOutOfRangeError(Operands, ErrorInfo, 0, 10); } + case Match_InvalidRegReg: { + SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc(); + return Error(ErrorLoc, "operands must be register and register"); + } } llvm_unreachable("Unknown match type detected!"); @@ -1571,27 +1610,26 @@ static MCRegister matchRegisterNameHelper(bool IsRVE, StringRef Name) { return Reg; } -bool RISCVAsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc, +bool RISCVAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) { - if (tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success) + if (!tryParseRegister(Reg, StartLoc, EndLoc).isSuccess()) return Error(StartLoc, "invalid register name"); return false; } -OperandMatchResultTy RISCVAsmParser::tryParseRegister(MCRegister &RegNo, - SMLoc &StartLoc, - SMLoc &EndLoc) { +ParseStatus RISCVAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, + SMLoc &EndLoc) { const AsmToken &Tok = getParser().getTok(); StartLoc = Tok.getLoc(); EndLoc = Tok.getEndLoc(); StringRef Name = getLexer().getTok().getIdentifier(); - RegNo = matchRegisterNameHelper(isRVE(), Name); - if (!RegNo) - return MatchOperand_NoMatch; + Reg = matchRegisterNameHelper(isRVE(), Name); + if (!Reg) + return ParseStatus::NoMatch; getParser().Lex(); // Eat identifier token. - return MatchOperand_Success; + return ParseStatus::Success; } ParseStatus RISCVAsmParser::parseRegister(OperandVector &Operands, @@ -2379,6 +2417,37 @@ ParseStatus RISCVAsmParser::parseZeroOffsetMemOp(OperandVector &Operands) { return ParseStatus::Success; } +ParseStatus RISCVAsmParser::parseRegReg(OperandVector &Operands) { + // RR : a2(a1) + if (getLexer().getKind() != AsmToken::Identifier) + return ParseStatus::NoMatch; + + StringRef RegName = getLexer().getTok().getIdentifier(); + MCRegister Reg = matchRegisterNameHelper(isRVE(), RegName); + if (!Reg) + return Error(getLoc(), "invalid register"); + getLexer().Lex(); + + if (parseToken(AsmToken::LParen, "expected '(' or invalid operand")) + return ParseStatus::Failure; + + if (getLexer().getKind() != AsmToken::Identifier) + return Error(getLoc(), "expected register"); + + StringRef Reg2Name = getLexer().getTok().getIdentifier(); + MCRegister Reg2 = matchRegisterNameHelper(isRVE(), Reg2Name); + if (!Reg2) + return Error(getLoc(), "invalid register"); + getLexer().Lex(); + + if (parseToken(AsmToken::RParen, "expected ')'")) + return ParseStatus::Failure; + + Operands.push_back(RISCVOperand::createRegReg(Reg, Reg2, getLoc())); + + return ParseStatus::Success; +} + ParseStatus RISCVAsmParser::parseReglist(OperandVector &Operands) { // Rlist: {ra [, s0[-sN]]} // XRlist: {x1 [, x8[-x9][, x18[-xN]]]} @@ -2974,8 +3043,7 @@ void RISCVAsmParser::emitToStreamer(MCStreamer &S, const MCInst &Inst) { void RISCVAsmParser::emitLoadImm(MCRegister DestReg, int64_t Value, MCStreamer &Out) { - RISCVMatInt::InstSeq Seq = - RISCVMatInt::generateInstSeq(Value, getSTI().getFeatureBits()); + RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Value, getSTI()); MCRegister SrcReg = RISCV::X0; for (const RISCVMatInt::Inst &Inst : Seq) { @@ -3200,7 +3268,7 @@ void RISCVAsmParser::emitVMSGE(MCInst &Inst, unsigned Opcode, SMLoc IDLoc, .addOperand(Inst.getOperand(1)) .addOperand(Inst.getOperand(2)) .addOperand(Inst.getOperand(3)) - .addOperand(Inst.getOperand(4))); + .addReg(RISCV::NoRegister)); emitToStreamer(Out, MCInstBuilder(RISCV::VMANDN_MM) .addOperand(Inst.getOperand(0)) .addOperand(Inst.getOperand(0)) @@ -3209,8 +3277,8 @@ void RISCVAsmParser::emitVMSGE(MCInst &Inst, unsigned Opcode, SMLoc IDLoc, // masked va >= x, any vd // // pseudoinstruction: vmsge{u}.vx vd, va, x, v0.t, vt - // expansion: vmslt{u}.vx vt, va, x; vmandn.mm vt, v0, vt; vmandn.mm vd, - // vd, v0; vmor.mm vd, vt, vd + // expansion: vmslt{u}.vx vt, va, x; vmandn.mm vt, v0, vt; + // vmandn.mm vd, vd, v0; vmor.mm vd, vt, vd assert(Inst.getOperand(1).getReg() != RISCV::V0 && "The temporary vector register should not be V0."); emitToStreamer(Out, MCInstBuilder(Opcode) @@ -3256,6 +3324,11 @@ std::unique_ptr<RISCVOperand> RISCVAsmParser::defaultFRMArgOp() const { llvm::SMLoc()); } +std::unique_ptr<RISCVOperand> RISCVAsmParser::defaultFRMArgLegacyOp() const { + return RISCVOperand::createFRMArg(RISCVFPRndMode::RoundingMode::RNE, + llvm::SMLoc()); +} + bool RISCVAsmParser::validateInstruction(MCInst &Inst, OperandVector &Operands) { unsigned Opcode = Inst.getOpcode(); @@ -3352,16 +3425,21 @@ bool RISCVAsmParser::validateInstruction(MCInst &Inst, } unsigned DestReg = Inst.getOperand(0).getReg(); + unsigned Offset = 0; + int TiedOp = MCID.getOperandConstraint(1, MCOI::TIED_TO); + if (TiedOp == 0) + Offset = 1; + // Operands[1] will be the first operand, DestReg. SMLoc Loc = Operands[1]->getStartLoc(); if (MCID.TSFlags & RISCVII::VS2Constraint) { - unsigned CheckReg = Inst.getOperand(1).getReg(); + unsigned CheckReg = Inst.getOperand(Offset + 1).getReg(); if (DestReg == CheckReg) return Error(Loc, "The destination vector register group cannot overlap" " the source vector register group."); } - if ((MCID.TSFlags & RISCVII::VS1Constraint) && (Inst.getOperand(2).isReg())) { - unsigned CheckReg = Inst.getOperand(2).getReg(); + if ((MCID.TSFlags & RISCVII::VS1Constraint) && Inst.getOperand(Offset + 2).isReg()) { + unsigned CheckReg = Inst.getOperand(Offset + 2).getReg(); if (DestReg == CheckReg) return Error(Loc, "The destination vector register group cannot overlap" " the source vector register group."); diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index e6ea6baa72ff..53e2b6b4d94e 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -196,10 +196,7 @@ static DecodeStatus DecodeVRRegisterClass(MCInst &Inst, uint32_t RegNo, static DecodeStatus DecodeVRM2RegisterClass(MCInst &Inst, uint32_t RegNo, uint64_t Address, const MCDisassembler *Decoder) { - if (RegNo >= 32) - return MCDisassembler::Fail; - - if (RegNo % 2) + if (RegNo >= 32 || RegNo % 2) return MCDisassembler::Fail; const RISCVDisassembler *Dis = @@ -216,10 +213,7 @@ static DecodeStatus DecodeVRM2RegisterClass(MCInst &Inst, uint32_t RegNo, static DecodeStatus DecodeVRM4RegisterClass(MCInst &Inst, uint32_t RegNo, uint64_t Address, const MCDisassembler *Decoder) { - if (RegNo >= 32) - return MCDisassembler::Fail; - - if (RegNo % 4) + if (RegNo >= 32 || RegNo % 4) return MCDisassembler::Fail; const RISCVDisassembler *Dis = @@ -236,10 +230,7 @@ static DecodeStatus DecodeVRM4RegisterClass(MCInst &Inst, uint32_t RegNo, static DecodeStatus DecodeVRM8RegisterClass(MCInst &Inst, uint32_t RegNo, uint64_t Address, const MCDisassembler *Decoder) { - if (RegNo >= 32) - return MCDisassembler::Fail; - - if (RegNo % 8) + if (RegNo >= 32 || RegNo % 8) return MCDisassembler::Fail; const RISCVDisassembler *Dis = @@ -256,16 +247,11 @@ static DecodeStatus DecodeVRM8RegisterClass(MCInst &Inst, uint32_t RegNo, static DecodeStatus decodeVMaskReg(MCInst &Inst, uint64_t RegNo, uint64_t Address, const MCDisassembler *Decoder) { - MCRegister Reg = RISCV::NoRegister; - switch (RegNo) { - default: + if (RegNo > 2) { return MCDisassembler::Fail; - case 0: - Reg = RISCV::V0; - break; - case 1: - break; } + MCRegister Reg = (RegNo == 0) ? RISCV::V0 : RISCV::NoRegister; + Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -367,6 +353,9 @@ static DecodeStatus decodeXTHeadMemPair(MCInst &Inst, uint32_t Insn, static DecodeStatus decodeZcmpRlist(MCInst &Inst, unsigned Imm, uint64_t Address, const void *Decoder); +static DecodeStatus decodeRegReg(MCInst &Inst, uint32_t Insn, uint64_t Address, + const MCDisassembler *Decoder); + static DecodeStatus decodeZcmpSpimm(MCInst &Inst, unsigned Imm, uint64_t Address, const void *Decoder); @@ -464,6 +453,15 @@ static DecodeStatus decodeZcmpRlist(MCInst &Inst, unsigned Imm, return MCDisassembler::Success; } +static DecodeStatus decodeRegReg(MCInst &Inst, uint32_t Insn, uint64_t Address, + const MCDisassembler *Decoder) { + uint32_t Rs1 = fieldFromInstruction(Insn, 0, 5); + uint32_t Rs2 = fieldFromInstruction(Insn, 5, 5); + DecodeGPRRegisterClass(Inst, Rs1, Address, Decoder); + DecodeGPRRegisterClass(Inst, Rs2, Address, Decoder); + return MCDisassembler::Success; +} + // spimm is based on rlist now. static DecodeStatus decodeZcmpSpimm(MCInst &Inst, unsigned Imm, uint64_t Address, const void *Decoder) { @@ -528,43 +526,65 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size, TRY_TO_DECODE_FEATURE(RISCV::FeatureStdExtZfinx, DecoderTableRVZfinx32, "RVZfinx table (Float in Integer)"); TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXVentanaCondOps, - DecoderTableVentana32, "Ventana custom opcode table"); - TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadBa, DecoderTableTHeadBa32, + DecoderTableXVentana32, "Ventana custom opcode table"); + TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadBa, DecoderTableXTHeadBa32, "XTHeadBa custom opcode table"); - TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadBb, DecoderTableTHeadBb32, + TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadBb, DecoderTableXTHeadBb32, "XTHeadBb custom opcode table"); - TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadBs, DecoderTableTHeadBs32, + TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadBs, DecoderTableXTHeadBs32, "XTHeadBs custom opcode table"); TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadCondMov, - DecoderTableTHeadCondMov32, + DecoderTableXTHeadCondMov32, "XTHeadCondMov custom opcode table"); - TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadCmo, DecoderTableTHeadCmo32, + TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadCmo, DecoderTableXTHeadCmo32, "XTHeadCmo custom opcode table"); TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadFMemIdx, - DecoderTableTHeadFMemIdx32, + DecoderTableXTHeadFMemIdx32, "XTHeadFMemIdx custom opcode table"); - TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadMac, DecoderTableTHeadMac32, + TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadMac, DecoderTableXTHeadMac32, "XTHeadMac custom opcode table"); TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadMemIdx, - DecoderTableTHeadMemIdx32, + DecoderTableXTHeadMemIdx32, "XTHeadMemIdx custom opcode table"); TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadMemPair, - DecoderTableTHeadMemPair32, + DecoderTableXTHeadMemPair32, "XTHeadMemPair custom opcode table"); TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadSync, - DecoderTableTHeadSync32, + DecoderTableXTHeadSync32, "XTHeadSync custom opcode table"); - TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadVdot, DecoderTableTHeadV32, + TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadVdot, DecoderTableXTHeadVdot32, "XTHeadVdot custom opcode table"); TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXSfvcp, DecoderTableXSfvcp32, "SiFive VCIX custom opcode table"); + TRY_TO_DECODE_FEATURE( + RISCV::FeatureVendorXSfvqmaccdod, DecoderTableXSfvqmaccdod32, + "SiFive Matrix Multiplication (2x8 and 8x2) Instruction opcode table"); + TRY_TO_DECODE_FEATURE( + RISCV::FeatureVendorXSfvqmaccqoq, DecoderTableXSfvqmaccqoq32, + "SiFive Matrix Multiplication (4x8 and 8x4) Instruction opcode table"); + TRY_TO_DECODE_FEATURE( + RISCV::FeatureVendorXSfvfwmaccqqq, DecoderTableXSfvfwmaccqqq32, + "SiFive Matrix Multiplication Instruction opcode table"); + TRY_TO_DECODE_FEATURE( + RISCV::FeatureVendorXSfvfnrclipxfqf, DecoderTableXSfvfnrclipxfqf32, + "SiFive FP32-to-int8 Ranged Clip Instructions opcode table"); TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXSfcie, DecoderTableXSfcie32, "Sifive CIE custom opcode table"); TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXCVbitmanip, DecoderTableXCVbitmanip32, "CORE-V Bit Manipulation custom opcode table"); + TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXCVelw, DecoderTableXCVelw32, + "CORE-V Event load custom opcode table"); TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXCVmac, DecoderTableXCVmac32, "CORE-V MAC custom opcode table"); + TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXCVmem, DecoderTableXCVmem32, + "CORE-V MEM custom opcode table"); + TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXCValu, DecoderTableXCValu32, + "CORE-V ALU custom opcode table"); + TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXCVsimd, DecoderTableXCVsimd32, + "CORE-V SIMD extensions custom opcode table"); + TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXCVbi, DecoderTableXCVbi32, + "CORE-V Immediate Branching custom opcode table"); TRY_TO_DECODE(true, DecoderTable32, "RISCV32 table"); return MCDisassembler::Fail; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp index 5505f89a32f2..50ed85acdec0 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp @@ -14,9 +14,11 @@ #include "RISCVCallLowering.h" #include "RISCVISelLowering.h" +#include "RISCVMachineFunctionInfo.h" #include "RISCVSubtarget.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/MachineFrameInfo.h" using namespace llvm; @@ -46,37 +48,118 @@ public: const DataLayout &DL = MF.getDataLayout(); const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>(); - return RISCVAssignFn(DL, Subtarget.getTargetABI(), ValNo, ValVT, LocVT, - LocInfo, Flags, State, /*IsFixed=*/true, IsRet, - Info.Ty, *Subtarget.getTargetLowering(), - /*FirstMaskArgument=*/std::nullopt); + if (RISCVAssignFn(DL, Subtarget.getTargetABI(), ValNo, ValVT, LocVT, + LocInfo, Flags, State, Info.IsFixed, IsRet, Info.Ty, + *Subtarget.getTargetLowering(), + /*FirstMaskArgument=*/std::nullopt)) + return true; + + StackSize = State.getStackSize(); + return false; } }; struct RISCVOutgoingValueHandler : public CallLowering::OutgoingValueHandler { RISCVOutgoingValueHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI, MachineInstrBuilder MIB) - : OutgoingValueHandler(B, MRI), MIB(MIB) {} - - MachineInstrBuilder MIB; - + : OutgoingValueHandler(B, MRI), MIB(MIB), + Subtarget(MIRBuilder.getMF().getSubtarget<RISCVSubtarget>()) {} Register getStackAddress(uint64_t MemSize, int64_t Offset, MachinePointerInfo &MPO, ISD::ArgFlagsTy Flags) override { - llvm_unreachable("not implemented"); + MachineFunction &MF = MIRBuilder.getMF(); + LLT p0 = LLT::pointer(0, Subtarget.getXLen()); + LLT sXLen = LLT::scalar(Subtarget.getXLen()); + + if (!SPReg) + SPReg = MIRBuilder.buildCopy(p0, Register(RISCV::X2)).getReg(0); + + auto OffsetReg = MIRBuilder.buildConstant(sXLen, Offset); + + auto AddrReg = MIRBuilder.buildPtrAdd(p0, SPReg, OffsetReg); + + MPO = MachinePointerInfo::getStack(MF, Offset); + return AddrReg.getReg(0); } void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, - MachinePointerInfo &MPO, CCValAssign &VA) override { - llvm_unreachable("not implemented"); + const MachinePointerInfo &MPO, + const CCValAssign &VA) override { + MachineFunction &MF = MIRBuilder.getMF(); + uint64_t LocMemOffset = VA.getLocMemOffset(); + + // TODO: Move StackAlignment to subtarget and share with FrameLowering. + auto MMO = + MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, MemTy, + commonAlignment(Align(16), LocMemOffset)); + + Register ExtReg = extendRegister(ValVReg, VA); + MIRBuilder.buildStore(ExtReg, Addr, *MMO); } void assignValueToReg(Register ValVReg, Register PhysReg, - CCValAssign VA) override { + const CCValAssign &VA) override { + // If we're passing an f32 value into an i64, anyextend before copying. + if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) + ValVReg = MIRBuilder.buildAnyExt(LLT::scalar(64), ValVReg).getReg(0); + Register ExtReg = extendRegister(ValVReg, VA); MIRBuilder.buildCopy(PhysReg, ExtReg); MIB.addUse(PhysReg, RegState::Implicit); } + + unsigned assignCustomValue(CallLowering::ArgInfo &Arg, + ArrayRef<CCValAssign> VAs, + std::function<void()> *Thunk) override { + assert(VAs.size() >= 2 && "Expected at least 2 VAs."); + const CCValAssign &VALo = VAs[0]; + const CCValAssign &VAHi = VAs[1]; + + assert(VAHi.needsCustom() && "Value doesn't need custom handling"); + assert(VALo.getValNo() == VAHi.getValNo() && + "Values belong to different arguments"); + + assert(VALo.getLocVT() == MVT::i32 && VAHi.getLocVT() == MVT::i32 && + VALo.getValVT() == MVT::f64 && VAHi.getValVT() == MVT::f64 && + "unexpected custom value"); + + Register NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)), + MRI.createGenericVirtualRegister(LLT::scalar(32))}; + MIRBuilder.buildUnmerge(NewRegs, Arg.Regs[0]); + + if (VAHi.isMemLoc()) { + LLT MemTy(VAHi.getLocVT()); + + MachinePointerInfo MPO; + Register StackAddr = getStackAddress( + MemTy.getSizeInBytes(), VAHi.getLocMemOffset(), MPO, Arg.Flags[0]); + + assignValueToAddress(NewRegs[1], StackAddr, MemTy, MPO, + const_cast<CCValAssign &>(VAHi)); + } + + auto assignFunc = [=]() { + assignValueToReg(NewRegs[0], VALo.getLocReg(), VALo); + if (VAHi.isRegLoc()) + assignValueToReg(NewRegs[1], VAHi.getLocReg(), VAHi); + }; + + if (Thunk) { + *Thunk = assignFunc; + return 1; + } + + assignFunc(); + return 1; + } + +private: + MachineInstrBuilder MIB; + + // Cache the SP register vreg if we need it more than once in this call site. + Register SPReg; + + const RISCVSubtarget &Subtarget; }; struct RISCVIncomingValueAssigner : public CallLowering::IncomingValueAssigner { @@ -103,33 +186,105 @@ public: const DataLayout &DL = MF.getDataLayout(); const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>(); - return RISCVAssignFn(DL, Subtarget.getTargetABI(), ValNo, ValVT, LocVT, - LocInfo, Flags, State, /*IsFixed=*/true, IsRet, - Info.Ty, *Subtarget.getTargetLowering(), - /*FirstMaskArgument=*/std::nullopt); + if (LocVT.isScalableVector()) + MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall(); + + if (RISCVAssignFn(DL, Subtarget.getTargetABI(), ValNo, ValVT, LocVT, + LocInfo, Flags, State, /*IsFixed=*/true, IsRet, Info.Ty, + *Subtarget.getTargetLowering(), + /*FirstMaskArgument=*/std::nullopt)) + return true; + + StackSize = State.getStackSize(); + return false; } }; struct RISCVIncomingValueHandler : public CallLowering::IncomingValueHandler { RISCVIncomingValueHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI) - : IncomingValueHandler(B, MRI) {} + : IncomingValueHandler(B, MRI), + Subtarget(MIRBuilder.getMF().getSubtarget<RISCVSubtarget>()) {} Register getStackAddress(uint64_t MemSize, int64_t Offset, MachinePointerInfo &MPO, ISD::ArgFlagsTy Flags) override { - llvm_unreachable("not implemented"); + MachineFrameInfo &MFI = MIRBuilder.getMF().getFrameInfo(); + + int FI = MFI.CreateFixedObject(MemSize, Offset, /*Immutable=*/true); + MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI); + return MIRBuilder.buildFrameIndex(LLT::pointer(0, Subtarget.getXLen()), FI) + .getReg(0); } void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, - MachinePointerInfo &MPO, CCValAssign &VA) override { - llvm_unreachable("not implemented"); + const MachinePointerInfo &MPO, + const CCValAssign &VA) override { + MachineFunction &MF = MIRBuilder.getMF(); + auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOLoad, MemTy, + inferAlignFromPtrInfo(MF, MPO)); + MIRBuilder.buildLoad(ValVReg, Addr, *MMO); } void assignValueToReg(Register ValVReg, Register PhysReg, - CCValAssign VA) override { - // Copy argument received in physical register to desired VReg. + const CCValAssign &VA) override { + markPhysRegUsed(PhysReg); + IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA); + } + + unsigned assignCustomValue(CallLowering::ArgInfo &Arg, + ArrayRef<CCValAssign> VAs, + std::function<void()> *Thunk) override { + assert(VAs.size() >= 2 && "Expected at least 2 VAs."); + const CCValAssign &VALo = VAs[0]; + const CCValAssign &VAHi = VAs[1]; + + assert(VAHi.needsCustom() && "Value doesn't need custom handling"); + assert(VALo.getValNo() == VAHi.getValNo() && + "Values belong to different arguments"); + + assert(VALo.getLocVT() == MVT::i32 && VAHi.getLocVT() == MVT::i32 && + VALo.getValVT() == MVT::f64 && VAHi.getValVT() == MVT::f64 && + "unexpected custom value"); + + Register NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)), + MRI.createGenericVirtualRegister(LLT::scalar(32))}; + + if (VAHi.isMemLoc()) { + LLT MemTy(VAHi.getLocVT()); + + MachinePointerInfo MPO; + Register StackAddr = getStackAddress( + MemTy.getSizeInBytes(), VAHi.getLocMemOffset(), MPO, Arg.Flags[0]); + + assignValueToAddress(NewRegs[1], StackAddr, MemTy, MPO, + const_cast<CCValAssign &>(VAHi)); + } + + assignValueToReg(NewRegs[0], VALo.getLocReg(), VALo); + if (VAHi.isRegLoc()) + assignValueToReg(NewRegs[1], VAHi.getLocReg(), VAHi); + + MIRBuilder.buildMergeLikeInstr(Arg.Regs[0], NewRegs); + + return 1; + } + + /// How the physical register gets marked varies between formal + /// parameters (it's a basic-block live-in), and a call instruction + /// (it's an implicit-def of the BL). + virtual void markPhysRegUsed(MCRegister PhysReg) = 0; + +private: + const RISCVSubtarget &Subtarget; +}; + +struct RISCVFormalArgHandler : public RISCVIncomingValueHandler { + RISCVFormalArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI) + : RISCVIncomingValueHandler(B, MRI) {} + + void markPhysRegUsed(MCRegister PhysReg) override { + MIRBuilder.getMRI()->addLiveIn(PhysReg); MIRBuilder.getMBB().addLiveIn(PhysReg); - MIRBuilder.buildCopy(ValVReg, PhysReg); } }; @@ -138,14 +293,11 @@ struct RISCVCallReturnHandler : public RISCVIncomingValueHandler { MachineInstrBuilder &MIB) : RISCVIncomingValueHandler(B, MRI), MIB(MIB) {} - MachineInstrBuilder MIB; - - void assignValueToReg(Register ValVReg, Register PhysReg, - CCValAssign VA) override { - // Copy argument received in physical register to desired VReg. + void markPhysRegUsed(MCRegister PhysReg) override { MIB.addDef(PhysReg, RegState::Implicit); - MIRBuilder.buildCopy(ValVReg, PhysReg); } + + MachineInstrBuilder MIB; }; } // namespace @@ -153,6 +305,80 @@ struct RISCVCallReturnHandler : public RISCVIncomingValueHandler { RISCVCallLowering::RISCVCallLowering(const RISCVTargetLowering &TLI) : CallLowering(&TLI) {} +/// Return true if scalable vector with ScalarTy is legal for lowering. +static bool isLegalElementTypeForRVV(Type *EltTy, + const RISCVSubtarget &Subtarget) { + if (EltTy->isPointerTy()) + return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true; + if (EltTy->isIntegerTy(1) || EltTy->isIntegerTy(8) || + EltTy->isIntegerTy(16) || EltTy->isIntegerTy(32)) + return true; + if (EltTy->isIntegerTy(64)) + return Subtarget.hasVInstructionsI64(); + if (EltTy->isHalfTy()) + return Subtarget.hasVInstructionsF16(); + if (EltTy->isBFloatTy()) + return Subtarget.hasVInstructionsBF16(); + if (EltTy->isFloatTy()) + return Subtarget.hasVInstructionsF32(); + if (EltTy->isDoubleTy()) + return Subtarget.hasVInstructionsF64(); + return false; +} + +// TODO: Support all argument types. +// TODO: Remove IsLowerArgs argument by adding support for vectors in lowerCall. +static bool isSupportedArgumentType(Type *T, const RISCVSubtarget &Subtarget, + bool IsLowerArgs = false) { + // TODO: Integers larger than 2*XLen are passed indirectly which is not + // supported yet. + if (T->isIntegerTy()) + return T->getIntegerBitWidth() <= Subtarget.getXLen() * 2; + if (T->isFloatTy() || T->isDoubleTy()) + return true; + if (T->isPointerTy()) + return true; + // TODO: Support fixed vector types. + if (IsLowerArgs && T->isVectorTy() && Subtarget.hasVInstructions() && + T->isScalableTy() && + isLegalElementTypeForRVV(T->getScalarType(), Subtarget)) + return true; + return false; +} + +// TODO: Only integer, pointer and aggregate types are supported now. +// TODO: Remove IsLowerRetVal argument by adding support for vectors in +// lowerCall. +static bool isSupportedReturnType(Type *T, const RISCVSubtarget &Subtarget, + bool IsLowerRetVal = false) { + // TODO: Integers larger than 2*XLen are passed indirectly which is not + // supported yet. + if (T->isIntegerTy()) + return T->getIntegerBitWidth() <= Subtarget.getXLen() * 2; + if (T->isFloatTy() || T->isDoubleTy()) + return true; + if (T->isPointerTy()) + return true; + + if (T->isArrayTy()) + return isSupportedReturnType(T->getArrayElementType(), Subtarget); + + if (T->isStructTy()) { + auto StructT = cast<StructType>(T); + for (unsigned i = 0, e = StructT->getNumElements(); i != e; ++i) + if (!isSupportedReturnType(StructT->getElementType(i), Subtarget)) + return false; + return true; + } + + if (IsLowerRetVal && T->isVectorTy() && Subtarget.hasVInstructions() && + T->isScalableTy() && + isLegalElementTypeForRVV(T->getScalarType(), Subtarget)) + return true; + + return false; +} + bool RISCVCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder, const Value *Val, ArrayRef<Register> VRegs, @@ -160,8 +386,9 @@ bool RISCVCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder, if (!Val) return true; - // TODO: Only integer, pointer and aggregate types are supported now. - if (!Val->getType()->isIntOrPtrTy() && !Val->getType()->isAggregateType()) + const RISCVSubtarget &Subtarget = + MIRBuilder.getMF().getSubtarget<RISCVSubtarget>(); + if (!isSupportedReturnType(Val->getType(), Subtarget, /*IsLowerRetVal=*/true)) return false; MachineFunction &MF = MIRBuilder.getMF(); @@ -196,25 +423,89 @@ bool RISCVCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, return true; } +/// If there are varargs that were passed in a0-a7, the data in those registers +/// must be copied to the varargs save area on the stack. +void RISCVCallLowering::saveVarArgRegisters( + MachineIRBuilder &MIRBuilder, CallLowering::IncomingValueHandler &Handler, + IncomingValueAssigner &Assigner, CCState &CCInfo) const { + MachineFunction &MF = MIRBuilder.getMF(); + const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>(); + unsigned XLenInBytes = Subtarget.getXLen() / 8; + ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); + MachineFrameInfo &MFI = MF.getFrameInfo(); + RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); + + // Size of the vararg save area. For now, the varargs save area is either + // zero or large enough to hold a0-a7. + int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); + int FI; + + // If all registers are allocated, then all varargs must be passed on the + // stack and we don't need to save any argregs. + if (VarArgsSaveSize == 0) { + int VaArgOffset = Assigner.StackSize; + FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); + } else { + int VaArgOffset = -VarArgsSaveSize; + FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true); + + // If saving an odd number of registers then create an extra stack slot to + // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures + // offsets to even-numbered registered remain 2*XLEN-aligned. + if (Idx % 2) { + MFI.CreateFixedObject(XLenInBytes, + VaArgOffset - static_cast<int>(XLenInBytes), true); + VarArgsSaveSize += XLenInBytes; + } + + const LLT p0 = LLT::pointer(MF.getDataLayout().getAllocaAddrSpace(), + Subtarget.getXLen()); + const LLT sXLen = LLT::scalar(Subtarget.getXLen()); + + auto FIN = MIRBuilder.buildFrameIndex(p0, FI); + auto Offset = MIRBuilder.buildConstant( + MRI.createGenericVirtualRegister(sXLen), XLenInBytes); + + // Copy the integer registers that may have been used for passing varargs + // to the vararg save area. + const MVT XLenVT = Subtarget.getXLenVT(); + for (unsigned I = Idx; I < ArgRegs.size(); ++I) { + const Register VReg = MRI.createGenericVirtualRegister(sXLen); + Handler.assignValueToReg( + VReg, ArgRegs[I], + CCValAssign::getReg(I + MF.getFunction().getNumOperands(), XLenVT, + ArgRegs[I], XLenVT, CCValAssign::Full)); + auto MPO = + MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes); + MIRBuilder.buildStore(VReg, FIN, MPO, inferAlignFromPtrInfo(MF, MPO)); + FIN = MIRBuilder.buildPtrAdd(MRI.createGenericVirtualRegister(p0), + FIN.getReg(0), Offset); + } + } + + // Record the frame index of the first variable argument which is a value + // necessary to G_VASTART. + RVFI->setVarArgsFrameIndex(FI); + RVFI->setVarArgsSaveSize(VarArgsSaveSize); +} + bool RISCVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef<ArrayRef<Register>> VRegs, FunctionLoweringInfo &FLI) const { - // Early exit if there are no arguments. - if (F.arg_empty()) + // Early exit if there are no arguments. varargs are not part of F.args() but + // must be lowered. + if (F.arg_empty() && !F.isVarArg()) return true; - // TODO: Support vararg functions. - if (F.isVarArg()) - return false; - - // TODO: Support all argument types. + const RISCVSubtarget &Subtarget = + MIRBuilder.getMF().getSubtarget<RISCVSubtarget>(); for (auto &Arg : F.args()) { - if (Arg.getType()->isIntegerTy()) - continue; - if (Arg.getType()->isPointerTy()) - continue; - return false; + if (!isSupportedArgumentType(Arg.getType(), Subtarget, + /*IsLowerArgs=*/true)) + return false; } MachineFunction &MF = MIRBuilder.getMF(); @@ -239,10 +530,18 @@ bool RISCVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, RISCVIncomingValueAssigner Assigner( CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV, /*IsRet=*/false); - RISCVIncomingValueHandler Handler(MIRBuilder, MF.getRegInfo()); + RISCVFormalArgHandler Handler(MIRBuilder, MF.getRegInfo()); - return determineAndHandleAssignments(Handler, Assigner, SplitArgInfos, - MIRBuilder, CC, F.isVarArg()); + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CC, F.isVarArg(), MIRBuilder.getMF(), ArgLocs, F.getContext()); + if (!determineAssignments(Assigner, SplitArgInfos, CCInfo) || + !handleAssignments(Handler, SplitArgInfos, CCInfo, ArgLocs, MIRBuilder)) + return false; + + if (F.isVarArg()) + saveVarArgRegisters(MIRBuilder, Handler, Assigner, CCInfo); + + return true; } bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, @@ -252,21 +551,20 @@ bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const Function &F = MF.getFunction(); CallingConv::ID CC = F.getCallingConv(); - // TODO: Support vararg functions. - if (Info.IsVarArg) - return false; - - // TODO: Support all argument types. + const RISCVSubtarget &Subtarget = + MIRBuilder.getMF().getSubtarget<RISCVSubtarget>(); for (auto &AInfo : Info.OrigArgs) { - if (AInfo.Ty->isIntegerTy()) - continue; - if (AInfo.Ty->isPointerTy()) - continue; - if (AInfo.Ty->isFloatingPointTy()) - continue; - return false; + if (!isSupportedArgumentType(AInfo.Ty, Subtarget)) + return false; } + if (!Info.OrigRet.Ty->isVoidTy() && + !isSupportedReturnType(Info.OrigRet.Ty, Subtarget)) + return false; + + MachineInstrBuilder CallSeqStart = + MIRBuilder.buildInstr(RISCV::ADJCALLSTACKDOWN); + SmallVector<ArgInfo, 32> SplitArgInfos; SmallVector<ISD::OutputArg, 8> Outs; for (auto &AInfo : Info.OrigArgs) { @@ -279,14 +577,17 @@ bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, // TODO: Support tail calls. Info.IsTailCall = false; + // Select the recommended relocation type R_RISCV_CALL_PLT. if (!Info.Callee.isReg()) - Info.Callee.setTargetFlags(RISCVII::MO_CALL); + Info.Callee.setTargetFlags(RISCVII::MO_PLT); MachineInstrBuilder Call = MIRBuilder .buildInstrNoInsert(Info.Callee.isReg() ? RISCV::PseudoCALLIndirect : RISCV::PseudoCALL) .add(Info.Callee); + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); + Call.addRegMask(TRI->getCallPreservedMask(MF, Info.CallConv)); RISCVOutgoingValueAssigner ArgAssigner( CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV, @@ -298,22 +599,26 @@ bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, MIRBuilder.insertInstr(Call); + CallSeqStart.addImm(ArgAssigner.StackSize).addImm(0); + MIRBuilder.buildInstr(RISCV::ADJCALLSTACKUP) + .addImm(ArgAssigner.StackSize) + .addImm(0); + + // If Callee is a reg, since it is used by a target specific + // instruction, it must have a register class matching the + // constraint of that instruction. + if (Call->getOperand(0).isReg()) + constrainOperandRegClass(MF, *TRI, MF.getRegInfo(), + *Subtarget.getInstrInfo(), + *Subtarget.getRegBankInfo(), *Call, + Call->getDesc(), Call->getOperand(0), 0); + if (Info.OrigRet.Ty->isVoidTy()) return true; - // TODO: Only integer, pointer and aggregate types are supported now. - if (!Info.OrigRet.Ty->isIntOrPtrTy() && !Info.OrigRet.Ty->isAggregateType()) - return false; - SmallVector<ArgInfo, 4> SplitRetInfos; splitToValueTypes(Info.OrigRet, SplitRetInfos, DL, CC); - // Assignments should be handled *before* the merging of values takes place. - // To ensure this, the insert point is temporarily adjusted to just after the - // call instruction. - MachineBasicBlock::iterator CallInsertPt = Call; - MIRBuilder.setInsertPt(MIRBuilder.getMBB(), std::next(CallInsertPt)); - RISCVIncomingValueAssigner RetAssigner( CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV, /*IsRet=*/true); @@ -322,8 +627,5 @@ bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, MIRBuilder, CC, Info.IsVarArg)) return false; - // Readjust insert point to end of basic block. - MIRBuilder.setMBB(MIRBuilder.getMBB()); - return true; } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.h b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.h index d80a666f3489..abe704b4a645 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.h @@ -42,6 +42,11 @@ public: private: bool lowerReturnVal(MachineIRBuilder &MIRBuilder, const Value *Val, ArrayRef<Register> VRegs, MachineInstrBuilder &Ret) const; + + void saveVarArgRegisters(MachineIRBuilder &MIRBuilder, + CallLowering::IncomingValueHandler &Handler, + IncomingValueAssigner &Assigner, + CCState &CCInfo) const; }; } // end namespace llvm diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp index 691439b3a18b..61bdbfc47d94 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp @@ -11,17 +11,23 @@ /// \todo This should be generated by TableGen. //===----------------------------------------------------------------------===// +#include "MCTargetDesc/RISCVMatInt.h" #include "RISCVRegisterBankInfo.h" #include "RISCVSubtarget.h" #include "RISCVTargetMachine.h" #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" +#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/IR/IntrinsicsRISCV.h" #include "llvm/Support/Debug.h" #define DEBUG_TYPE "riscv-isel" using namespace llvm; +using namespace MIPatternMatch; #define GET_GLOBALISEL_PREDICATE_BITSET #include "RISCVGenGlobalISel.inc" @@ -35,16 +41,86 @@ public: const RISCVSubtarget &STI, const RISCVRegisterBankInfo &RBI); - bool select(MachineInstr &I) override; + bool select(MachineInstr &MI) override; static const char *getName() { return DEBUG_TYPE; } private: + const TargetRegisterClass * + getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB) const; + + bool isRegInGprb(Register Reg, MachineRegisterInfo &MRI) const; + bool isRegInFprb(Register Reg, MachineRegisterInfo &MRI) const; + + // tblgen-erated 'select' implementation, used as the initial selector for + // the patterns that don't require complex C++. bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; + // A lowering phase that runs before any selection attempts. + // Returns true if the instruction was modified. + void preISelLower(MachineInstr &MI, MachineIRBuilder &MIB, + MachineRegisterInfo &MRI); + + bool replacePtrWithInt(MachineOperand &Op, MachineIRBuilder &MIB, + MachineRegisterInfo &MRI); + + // Custom selection methods + bool selectCopy(MachineInstr &MI, MachineRegisterInfo &MRI) const; + bool selectImplicitDef(MachineInstr &MI, MachineIRBuilder &MIB, + MachineRegisterInfo &MRI) const; + bool materializeImm(Register Reg, int64_t Imm, MachineIRBuilder &MIB) const; + bool selectAddr(MachineInstr &MI, MachineIRBuilder &MIB, + MachineRegisterInfo &MRI, bool IsLocal = true, + bool IsExternWeak = false) const; + bool selectSExtInreg(MachineInstr &MI, MachineIRBuilder &MIB) const; + bool selectSelect(MachineInstr &MI, MachineIRBuilder &MIB, + MachineRegisterInfo &MRI) const; + bool selectFPCompare(MachineInstr &MI, MachineIRBuilder &MIB, + MachineRegisterInfo &MRI) const; + bool selectIntrinsicWithSideEffects(MachineInstr &MI, MachineIRBuilder &MIB, + MachineRegisterInfo &MRI) const; + void emitFence(AtomicOrdering FenceOrdering, SyncScope::ID FenceSSID, + MachineIRBuilder &MIB) const; + bool selectMergeValues(MachineInstr &MI, MachineIRBuilder &MIB, + MachineRegisterInfo &MRI) const; + bool selectUnmergeValues(MachineInstr &MI, MachineIRBuilder &MIB, + MachineRegisterInfo &MRI) const; + + ComplexRendererFns selectShiftMask(MachineOperand &Root) const; + ComplexRendererFns selectAddrRegImm(MachineOperand &Root) const; + + ComplexRendererFns selectSHXADDOp(MachineOperand &Root, unsigned ShAmt) const; + template <unsigned ShAmt> + ComplexRendererFns selectSHXADDOp(MachineOperand &Root) const { + return selectSHXADDOp(Root, ShAmt); + } + + ComplexRendererFns selectSHXADD_UWOp(MachineOperand &Root, + unsigned ShAmt) const; + template <unsigned ShAmt> + ComplexRendererFns selectSHXADD_UWOp(MachineOperand &Root) const { + return selectSHXADD_UWOp(Root, ShAmt); + } + + // Custom renderers for tablegen + void renderNegImm(MachineInstrBuilder &MIB, const MachineInstr &MI, + int OpIdx) const; + void renderImmSubFromXLen(MachineInstrBuilder &MIB, const MachineInstr &MI, + int OpIdx) const; + void renderImmSubFrom32(MachineInstrBuilder &MIB, const MachineInstr &MI, + int OpIdx) const; + void renderImmPlus1(MachineInstrBuilder &MIB, const MachineInstr &MI, + int OpIdx) const; + void renderImm(MachineInstrBuilder &MIB, const MachineInstr &MI, + int OpIdx) const; + + void renderTrailingZeros(MachineInstrBuilder &MIB, const MachineInstr &MI, + int OpIdx) const; + const RISCVSubtarget &STI; const RISCVInstrInfo &TII; const RISCVRegisterInfo &TRI; const RISCVRegisterBankInfo &RBI; + const RISCVTargetMachine &TM; // FIXME: This is necessary because DAGISel uses "Subtarget->" and GlobalISel // uses "STI." in the code generated by TableGen. We need to unify the name of @@ -70,6 +146,7 @@ RISCVInstructionSelector::RISCVInstructionSelector( const RISCVTargetMachine &TM, const RISCVSubtarget &STI, const RISCVRegisterBankInfo &RBI) : STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI), + TM(TM), #define GET_GLOBALISEL_PREDICATES_INIT #include "RISCVGenGlobalISel.inc" @@ -80,19 +157,1111 @@ RISCVInstructionSelector::RISCVInstructionSelector( { } -bool RISCVInstructionSelector::select(MachineInstr &I) { +InstructionSelector::ComplexRendererFns +RISCVInstructionSelector::selectShiftMask(MachineOperand &Root) const { + // TODO: Also check if we are seeing the result of an AND operation which + // could be bypassed since we only check the lower log2(xlen) bits. + return {{[=](MachineInstrBuilder &MIB) { MIB.add(Root); }}}; +} + +InstructionSelector::ComplexRendererFns +RISCVInstructionSelector::selectSHXADDOp(MachineOperand &Root, + unsigned ShAmt) const { + using namespace llvm::MIPatternMatch; + MachineFunction &MF = *Root.getParent()->getParent()->getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + if (!Root.isReg()) + return std::nullopt; + Register RootReg = Root.getReg(); + + const unsigned XLen = STI.getXLen(); + APInt Mask, C2; + Register RegY; + std::optional<bool> LeftShift; + // (and (shl y, c2), mask) + if (mi_match(RootReg, MRI, + m_GAnd(m_GShl(m_Reg(RegY), m_ICst(C2)), m_ICst(Mask)))) + LeftShift = true; + // (and (lshr y, c2), mask) + else if (mi_match(RootReg, MRI, + m_GAnd(m_GLShr(m_Reg(RegY), m_ICst(C2)), m_ICst(Mask)))) + LeftShift = false; + + if (LeftShift.has_value()) { + if (*LeftShift) + Mask &= maskTrailingZeros<uint64_t>(C2.getLimitedValue()); + else + Mask &= maskTrailingOnes<uint64_t>(XLen - C2.getLimitedValue()); + + if (Mask.isShiftedMask()) { + unsigned Leading = XLen - Mask.getActiveBits(); + unsigned Trailing = Mask.countr_zero(); + // Given (and (shl y, c2), mask) in which mask has no leading zeros and + // c3 trailing zeros. We can use an SRLI by c3 - c2 followed by a SHXADD. + if (*LeftShift && Leading == 0 && C2.ult(Trailing) && Trailing == ShAmt) { + Register DstReg = + MRI.createGenericVirtualRegister(MRI.getType(RootReg)); + return {{[=](MachineInstrBuilder &MIB) { + MachineIRBuilder(*MIB.getInstr()) + .buildInstr(RISCV::SRLI, {DstReg}, {RegY}) + .addImm(Trailing - C2.getLimitedValue()); + MIB.addReg(DstReg); + }}}; + } + + // Given (and (lshr y, c2), mask) in which mask has c2 leading zeros and + // c3 trailing zeros. We can use an SRLI by c2 + c3 followed by a SHXADD. + if (!*LeftShift && Leading == C2 && Trailing == ShAmt) { + Register DstReg = + MRI.createGenericVirtualRegister(MRI.getType(RootReg)); + return {{[=](MachineInstrBuilder &MIB) { + MachineIRBuilder(*MIB.getInstr()) + .buildInstr(RISCV::SRLI, {DstReg}, {RegY}) + .addImm(Leading + Trailing); + MIB.addReg(DstReg); + }}}; + } + } + } + + LeftShift.reset(); + + // (shl (and y, mask), c2) + if (mi_match(RootReg, MRI, + m_GShl(m_OneNonDBGUse(m_GAnd(m_Reg(RegY), m_ICst(Mask))), + m_ICst(C2)))) + LeftShift = true; + // (lshr (and y, mask), c2) + else if (mi_match(RootReg, MRI, + m_GLShr(m_OneNonDBGUse(m_GAnd(m_Reg(RegY), m_ICst(Mask))), + m_ICst(C2)))) + LeftShift = false; + + if (LeftShift.has_value() && Mask.isShiftedMask()) { + unsigned Leading = XLen - Mask.getActiveBits(); + unsigned Trailing = Mask.countr_zero(); + + // Given (shl (and y, mask), c2) in which mask has 32 leading zeros and + // c3 trailing zeros. If c1 + c3 == ShAmt, we can emit SRLIW + SHXADD. + bool Cond = *LeftShift && Leading == 32 && Trailing > 0 && + (Trailing + C2.getLimitedValue()) == ShAmt; + if (!Cond) + // Given (lshr (and y, mask), c2) in which mask has 32 leading zeros and + // c3 trailing zeros. If c3 - c1 == ShAmt, we can emit SRLIW + SHXADD. + Cond = !*LeftShift && Leading == 32 && C2.ult(Trailing) && + (Trailing - C2.getLimitedValue()) == ShAmt; + + if (Cond) { + Register DstReg = MRI.createGenericVirtualRegister(MRI.getType(RootReg)); + return {{[=](MachineInstrBuilder &MIB) { + MachineIRBuilder(*MIB.getInstr()) + .buildInstr(RISCV::SRLIW, {DstReg}, {RegY}) + .addImm(Trailing); + MIB.addReg(DstReg); + }}}; + } + } + + return std::nullopt; +} + +InstructionSelector::ComplexRendererFns +RISCVInstructionSelector::selectSHXADD_UWOp(MachineOperand &Root, + unsigned ShAmt) const { + using namespace llvm::MIPatternMatch; + MachineFunction &MF = *Root.getParent()->getParent()->getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + if (!Root.isReg()) + return std::nullopt; + Register RootReg = Root.getReg(); + + // Given (and (shl x, c2), mask) in which mask is a shifted mask with + // 32 - ShAmt leading zeros and c2 trailing zeros. We can use SLLI by + // c2 - ShAmt followed by SHXADD_UW with ShAmt for x amount. + APInt Mask, C2; + Register RegX; + if (mi_match( + RootReg, MRI, + m_OneNonDBGUse(m_GAnd(m_OneNonDBGUse(m_GShl(m_Reg(RegX), m_ICst(C2))), + m_ICst(Mask))))) { + Mask &= maskTrailingZeros<uint64_t>(C2.getLimitedValue()); + + if (Mask.isShiftedMask()) { + unsigned Leading = Mask.countl_zero(); + unsigned Trailing = Mask.countr_zero(); + if (Leading == 32 - ShAmt && C2 == Trailing && Trailing > ShAmt) { + Register DstReg = + MRI.createGenericVirtualRegister(MRI.getType(RootReg)); + return {{[=](MachineInstrBuilder &MIB) { + MachineIRBuilder(*MIB.getInstr()) + .buildInstr(RISCV::SLLI, {DstReg}, {RegX}) + .addImm(C2.getLimitedValue() - ShAmt); + MIB.addReg(DstReg); + }}}; + } + } + } + + return std::nullopt; +} + +InstructionSelector::ComplexRendererFns +RISCVInstructionSelector::selectAddrRegImm(MachineOperand &Root) const { + MachineFunction &MF = *Root.getParent()->getParent()->getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + if (!Root.isReg()) + return std::nullopt; + + MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); + if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, + }}; + } + + if (isBaseWithConstantOffset(Root, MRI)) { + MachineOperand &LHS = RootDef->getOperand(1); + MachineOperand &RHS = RootDef->getOperand(2); + MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg()); + MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg()); + + int64_t RHSC = RHSDef->getOperand(1).getCImm()->getSExtValue(); + if (isInt<12>(RHSC)) { + if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); }, + }}; + + return {{[=](MachineInstrBuilder &MIB) { MIB.add(LHS); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); }}}; + } + } + + // TODO: Need to get the immediate from a G_PTR_ADD. Should this be done in + // the combiner? + return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }}}; +} + +/// Returns the RISCVCC::CondCode that corresponds to the CmpInst::Predicate CC. +/// CC Must be an ICMP Predicate. +static RISCVCC::CondCode getRISCVCCFromICmp(CmpInst::Predicate CC) { + switch (CC) { + default: + llvm_unreachable("Expected ICMP CmpInst::Predicate."); + case CmpInst::Predicate::ICMP_EQ: + return RISCVCC::COND_EQ; + case CmpInst::Predicate::ICMP_NE: + return RISCVCC::COND_NE; + case CmpInst::Predicate::ICMP_ULT: + return RISCVCC::COND_LTU; + case CmpInst::Predicate::ICMP_SLT: + return RISCVCC::COND_LT; + case CmpInst::Predicate::ICMP_UGE: + return RISCVCC::COND_GEU; + case CmpInst::Predicate::ICMP_SGE: + return RISCVCC::COND_GE; + } +} + +static void getOperandsForBranch(Register CondReg, MachineRegisterInfo &MRI, + RISCVCC::CondCode &CC, Register &LHS, + Register &RHS) { + // Try to fold an ICmp. If that fails, use a NE compare with X0. + CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE; + if (!mi_match(CondReg, MRI, m_GICmp(m_Pred(Pred), m_Reg(LHS), m_Reg(RHS)))) { + LHS = CondReg; + RHS = RISCV::X0; + CC = RISCVCC::COND_NE; + return; + } + + // We found an ICmp, do some canonicalizations. + + // Adjust comparisons to use comparison with 0 if possible. + if (auto Constant = getIConstantVRegSExtVal(RHS, MRI)) { + switch (Pred) { + case CmpInst::Predicate::ICMP_SGT: + // Convert X > -1 to X >= 0 + if (*Constant == -1) { + CC = RISCVCC::COND_GE; + RHS = RISCV::X0; + return; + } + break; + case CmpInst::Predicate::ICMP_SLT: + // Convert X < 1 to 0 >= X + if (*Constant == 1) { + CC = RISCVCC::COND_GE; + RHS = LHS; + LHS = RISCV::X0; + return; + } + break; + default: + break; + } + } + + switch (Pred) { + default: + llvm_unreachable("Expected ICMP CmpInst::Predicate."); + case CmpInst::Predicate::ICMP_EQ: + case CmpInst::Predicate::ICMP_NE: + case CmpInst::Predicate::ICMP_ULT: + case CmpInst::Predicate::ICMP_SLT: + case CmpInst::Predicate::ICMP_UGE: + case CmpInst::Predicate::ICMP_SGE: + // These CCs are supported directly by RISC-V branches. + break; + case CmpInst::Predicate::ICMP_SGT: + case CmpInst::Predicate::ICMP_SLE: + case CmpInst::Predicate::ICMP_UGT: + case CmpInst::Predicate::ICMP_ULE: + // These CCs are not supported directly by RISC-V branches, but changing the + // direction of the CC and swapping LHS and RHS are. + Pred = CmpInst::getSwappedPredicate(Pred); + std::swap(LHS, RHS); + break; + } + + CC = getRISCVCCFromICmp(Pred); + return; +} + +bool RISCVInstructionSelector::select(MachineInstr &MI) { + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + MachineIRBuilder MIB(MI); + + preISelLower(MI, MIB, MRI); + const unsigned Opc = MI.getOpcode(); + + if (!MI.isPreISelOpcode() || Opc == TargetOpcode::G_PHI) { + if (Opc == TargetOpcode::PHI || Opc == TargetOpcode::G_PHI) { + const Register DefReg = MI.getOperand(0).getReg(); + const LLT DefTy = MRI.getType(DefReg); + + const RegClassOrRegBank &RegClassOrBank = + MRI.getRegClassOrRegBank(DefReg); + + const TargetRegisterClass *DefRC = + RegClassOrBank.dyn_cast<const TargetRegisterClass *>(); + if (!DefRC) { + if (!DefTy.isValid()) { + LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n"); + return false; + } + + const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>(); + DefRC = getRegClassForTypeOnBank(DefTy, RB); + if (!DefRC) { + LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n"); + return false; + } + } + + MI.setDesc(TII.get(TargetOpcode::PHI)); + return RBI.constrainGenericRegister(DefReg, *DefRC, MRI); + } - if (!isPreISelGenericOpcode(I.getOpcode())) { // Certain non-generic instructions also need some special handling. + if (MI.isCopy()) + return selectCopy(MI, MRI); + + return true; + } + + if (selectImpl(MI, *CoverageInfo)) + return true; + + switch (Opc) { + case TargetOpcode::G_ANYEXT: + case TargetOpcode::G_PTRTOINT: + case TargetOpcode::G_INTTOPTR: + case TargetOpcode::G_TRUNC: + return selectCopy(MI, MRI); + case TargetOpcode::G_CONSTANT: { + Register DstReg = MI.getOperand(0).getReg(); + int64_t Imm = MI.getOperand(1).getCImm()->getSExtValue(); + + if (!materializeImm(DstReg, Imm, MIB)) + return false; + + MI.eraseFromParent(); + return true; + } + case TargetOpcode::G_FCONSTANT: { + // TODO: Use constant pool for complext constants. + // TODO: Optimize +0.0 to use fcvt.d.w for s64 on rv32. + Register DstReg = MI.getOperand(0).getReg(); + const APFloat &FPimm = MI.getOperand(1).getFPImm()->getValueAPF(); + APInt Imm = FPimm.bitcastToAPInt(); + unsigned Size = MRI.getType(DstReg).getSizeInBits(); + if (Size == 32 || (Size == 64 && Subtarget->is64Bit())) { + Register GPRReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); + if (!materializeImm(GPRReg, Imm.getSExtValue(), MIB)) + return false; + + unsigned Opcode = Size == 64 ? RISCV::FMV_D_X : RISCV::FMV_W_X; + auto FMV = MIB.buildInstr(Opcode, {DstReg}, {GPRReg}); + if (!FMV.constrainAllUses(TII, TRI, RBI)) + return false; + } else { + assert(Size == 64 && !Subtarget->is64Bit() && + "Unexpected size or subtarget"); + // Split into two pieces and build through the stack. + Register GPRRegHigh = MRI.createVirtualRegister(&RISCV::GPRRegClass); + Register GPRRegLow = MRI.createVirtualRegister(&RISCV::GPRRegClass); + if (!materializeImm(GPRRegHigh, Imm.extractBits(32, 32).getSExtValue(), + MIB)) + return false; + if (!materializeImm(GPRRegLow, Imm.trunc(32).getSExtValue(), MIB)) + return false; + MachineInstrBuilder PairF64 = MIB.buildInstr( + RISCV::BuildPairF64Pseudo, {DstReg}, {GPRRegLow, GPRRegHigh}); + if (!PairF64.constrainAllUses(TII, TRI, RBI)) + return false; + } + + MI.eraseFromParent(); + return true; + } + case TargetOpcode::G_GLOBAL_VALUE: { + auto *GV = MI.getOperand(1).getGlobal(); + if (GV->isThreadLocal()) { + // TODO: implement this case. + return false; + } + + return selectAddr(MI, MIB, MRI, GV->isDSOLocal(), + GV->hasExternalWeakLinkage()); + } + case TargetOpcode::G_JUMP_TABLE: + case TargetOpcode::G_CONSTANT_POOL: + return selectAddr(MI, MIB, MRI); + case TargetOpcode::G_BRCOND: { + Register LHS, RHS; + RISCVCC::CondCode CC; + getOperandsForBranch(MI.getOperand(0).getReg(), MRI, CC, LHS, RHS); + + auto Bcc = MIB.buildInstr(RISCVCC::getBrCond(CC), {}, {LHS, RHS}) + .addMBB(MI.getOperand(1).getMBB()); + MI.eraseFromParent(); + return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI); + } + case TargetOpcode::G_BRJT: { + // FIXME: Move to legalization? + const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); + unsigned EntrySize = MJTI->getEntrySize(MF.getDataLayout()); + assert((EntrySize == 4 || (Subtarget->is64Bit() && EntrySize == 8)) && + "Unsupported jump-table entry size"); + assert( + (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 || + MJTI->getEntryKind() == MachineJumpTableInfo::EK_Custom32 || + MJTI->getEntryKind() == MachineJumpTableInfo::EK_BlockAddress) && + "Unexpected jump-table entry kind"); + + auto SLL = + MIB.buildInstr(RISCV::SLLI, {&RISCV::GPRRegClass}, {MI.getOperand(2)}) + .addImm(Log2_32(EntrySize)); + if (!SLL.constrainAllUses(TII, TRI, RBI)) + return false; + + // TODO: Use SHXADD. Moving to legalization would fix this automatically. + auto ADD = MIB.buildInstr(RISCV::ADD, {&RISCV::GPRRegClass}, + {MI.getOperand(0), SLL.getReg(0)}); + if (!ADD.constrainAllUses(TII, TRI, RBI)) + return false; + + unsigned LdOpc = EntrySize == 8 ? RISCV::LD : RISCV::LW; + auto Dest = + MIB.buildInstr(LdOpc, {&RISCV::GPRRegClass}, {ADD.getReg(0)}) + .addImm(0) + .addMemOperand(MF.getMachineMemOperand( + MachinePointerInfo::getJumpTable(MF), MachineMemOperand::MOLoad, + EntrySize, Align(MJTI->getEntryAlignment(MF.getDataLayout())))); + if (!Dest.constrainAllUses(TII, TRI, RBI)) + return false; + + // If the Kind is EK_LabelDifference32, the table stores an offset from + // the location of the table. Add the table address to get an absolute + // address. + if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32) { + Dest = MIB.buildInstr(RISCV::ADD, {&RISCV::GPRRegClass}, + {Dest.getReg(0), MI.getOperand(0)}); + if (!Dest.constrainAllUses(TII, TRI, RBI)) + return false; + } + + auto Branch = + MIB.buildInstr(RISCV::PseudoBRIND, {}, {Dest.getReg(0)}).addImm(0); + if (!Branch.constrainAllUses(TII, TRI, RBI)) + return false; + + MI.eraseFromParent(); + return true; + } + case TargetOpcode::G_BRINDIRECT: + MI.setDesc(TII.get(RISCV::PseudoBRIND)); + MI.addOperand(MachineOperand::CreateImm(0)); + return constrainSelectedInstRegOperands(MI, TII, TRI, RBI); + case TargetOpcode::G_SEXT_INREG: + return selectSExtInreg(MI, MIB); + case TargetOpcode::G_FRAME_INDEX: { + // TODO: We may want to replace this code with the SelectionDAG patterns, + // which fail to get imported because it uses FrameAddrRegImm, which is a + // ComplexPattern + MI.setDesc(TII.get(RISCV::ADDI)); + MI.addOperand(MachineOperand::CreateImm(0)); + return constrainSelectedInstRegOperands(MI, TII, TRI, RBI); + } + case TargetOpcode::G_SELECT: + return selectSelect(MI, MIB, MRI); + case TargetOpcode::G_FCMP: + return selectFPCompare(MI, MIB, MRI); + case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: + return selectIntrinsicWithSideEffects(MI, MIB, MRI); + case TargetOpcode::G_FENCE: { + AtomicOrdering FenceOrdering = + static_cast<AtomicOrdering>(MI.getOperand(0).getImm()); + SyncScope::ID FenceSSID = + static_cast<SyncScope::ID>(MI.getOperand(1).getImm()); + emitFence(FenceOrdering, FenceSSID, MIB); + MI.eraseFromParent(); + return true; + } + case TargetOpcode::G_IMPLICIT_DEF: + return selectImplicitDef(MI, MIB, MRI); + case TargetOpcode::G_MERGE_VALUES: + return selectMergeValues(MI, MIB, MRI); + case TargetOpcode::G_UNMERGE_VALUES: + return selectUnmergeValues(MI, MIB, MRI); + default: + return false; + } +} + +bool RISCVInstructionSelector::selectMergeValues( + MachineInstr &MI, MachineIRBuilder &MIB, MachineRegisterInfo &MRI) const { + assert(MI.getOpcode() == TargetOpcode::G_MERGE_VALUES); + + // Build a F64 Pair from operands + if (MI.getNumOperands() != 3) + return false; + Register Dst = MI.getOperand(0).getReg(); + Register Lo = MI.getOperand(1).getReg(); + Register Hi = MI.getOperand(2).getReg(); + if (!isRegInFprb(Dst, MRI) || !isRegInGprb(Lo, MRI) || !isRegInGprb(Hi, MRI)) + return false; + MI.setDesc(TII.get(RISCV::BuildPairF64Pseudo)); + return constrainSelectedInstRegOperands(MI, TII, TRI, RBI); +} + +bool RISCVInstructionSelector::selectUnmergeValues( + MachineInstr &MI, MachineIRBuilder &MIB, MachineRegisterInfo &MRI) const { + assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES); + + // Split F64 Src into two s32 parts + if (MI.getNumOperands() != 3) + return false; + Register Src = MI.getOperand(2).getReg(); + Register Lo = MI.getOperand(0).getReg(); + Register Hi = MI.getOperand(1).getReg(); + if (!isRegInFprb(Src, MRI) || !isRegInGprb(Lo, MRI) || !isRegInGprb(Hi, MRI)) + return false; + MI.setDesc(TII.get(RISCV::SplitF64Pseudo)); + return constrainSelectedInstRegOperands(MI, TII, TRI, RBI); +} + +bool RISCVInstructionSelector::replacePtrWithInt(MachineOperand &Op, + MachineIRBuilder &MIB, + MachineRegisterInfo &MRI) { + Register PtrReg = Op.getReg(); + assert(MRI.getType(PtrReg).isPointer() && "Operand is not a pointer!"); + + const LLT sXLen = LLT::scalar(STI.getXLen()); + auto PtrToInt = MIB.buildPtrToInt(sXLen, PtrReg); + MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(RISCV::GPRBRegBankID)); + Op.setReg(PtrToInt.getReg(0)); + return select(*PtrToInt); +} + +void RISCVInstructionSelector::preISelLower(MachineInstr &MI, + MachineIRBuilder &MIB, + MachineRegisterInfo &MRI) { + switch (MI.getOpcode()) { + case TargetOpcode::G_PTR_ADD: { + Register DstReg = MI.getOperand(0).getReg(); + const LLT sXLen = LLT::scalar(STI.getXLen()); + + replacePtrWithInt(MI.getOperand(1), MIB, MRI); + MI.setDesc(TII.get(TargetOpcode::G_ADD)); + MRI.setType(DstReg, sXLen); + break; + } + case TargetOpcode::G_PTRMASK: { + Register DstReg = MI.getOperand(0).getReg(); + const LLT sXLen = LLT::scalar(STI.getXLen()); + replacePtrWithInt(MI.getOperand(1), MIB, MRI); + MI.setDesc(TII.get(TargetOpcode::G_AND)); + MRI.setType(DstReg, sXLen); + } + } +} + +void RISCVInstructionSelector::renderNegImm(MachineInstrBuilder &MIB, + const MachineInstr &MI, + int OpIdx) const { + assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && + "Expected G_CONSTANT"); + int64_t CstVal = MI.getOperand(1).getCImm()->getSExtValue(); + MIB.addImm(-CstVal); +} + +void RISCVInstructionSelector::renderImmSubFromXLen(MachineInstrBuilder &MIB, + const MachineInstr &MI, + int OpIdx) const { + assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && + "Expected G_CONSTANT"); + uint64_t CstVal = MI.getOperand(1).getCImm()->getZExtValue(); + MIB.addImm(STI.getXLen() - CstVal); +} + +void RISCVInstructionSelector::renderImmSubFrom32(MachineInstrBuilder &MIB, + const MachineInstr &MI, + int OpIdx) const { + assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && + "Expected G_CONSTANT"); + uint64_t CstVal = MI.getOperand(1).getCImm()->getZExtValue(); + MIB.addImm(32 - CstVal); +} + +void RISCVInstructionSelector::renderImmPlus1(MachineInstrBuilder &MIB, + const MachineInstr &MI, + int OpIdx) const { + assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && + "Expected G_CONSTANT"); + int64_t CstVal = MI.getOperand(1).getCImm()->getSExtValue(); + MIB.addImm(CstVal + 1); +} + +void RISCVInstructionSelector::renderImm(MachineInstrBuilder &MIB, + const MachineInstr &MI, + int OpIdx) const { + assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && + "Expected G_CONSTANT"); + int64_t CstVal = MI.getOperand(1).getCImm()->getSExtValue(); + MIB.addImm(CstVal); +} + +void RISCVInstructionSelector::renderTrailingZeros(MachineInstrBuilder &MIB, + const MachineInstr &MI, + int OpIdx) const { + assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && + "Expected G_CONSTANT"); + uint64_t C = MI.getOperand(1).getCImm()->getZExtValue(); + MIB.addImm(llvm::countr_zero(C)); +} + +const TargetRegisterClass *RISCVInstructionSelector::getRegClassForTypeOnBank( + LLT Ty, const RegisterBank &RB) const { + if (RB.getID() == RISCV::GPRBRegBankID) { + if (Ty.getSizeInBits() <= 32 || (STI.is64Bit() && Ty.getSizeInBits() == 64)) + return &RISCV::GPRRegClass; + } + + if (RB.getID() == RISCV::FPRBRegBankID) { + if (Ty.getSizeInBits() == 32) + return &RISCV::FPR32RegClass; + if (Ty.getSizeInBits() == 64) + return &RISCV::FPR64RegClass; + } + + // TODO: Non-GPR register classes. + return nullptr; +} + +bool RISCVInstructionSelector::isRegInGprb(Register Reg, + MachineRegisterInfo &MRI) const { + return RBI.getRegBank(Reg, MRI, TRI)->getID() == RISCV::GPRBRegBankID; +} + +bool RISCVInstructionSelector::isRegInFprb(Register Reg, + MachineRegisterInfo &MRI) const { + return RBI.getRegBank(Reg, MRI, TRI)->getID() == RISCV::FPRBRegBankID; +} + +bool RISCVInstructionSelector::selectCopy(MachineInstr &MI, + MachineRegisterInfo &MRI) const { + Register DstReg = MI.getOperand(0).getReg(); + + if (DstReg.isPhysical()) + return true; + + const TargetRegisterClass *DstRC = getRegClassForTypeOnBank( + MRI.getType(DstReg), *RBI.getRegBank(DstReg, MRI, TRI)); + assert(DstRC && + "Register class not available for LLT, register bank combination"); + + // No need to constrain SrcReg. It will get constrained when + // we hit another of its uses or its defs. + // Copies do not have constraints. + if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { + LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(MI.getOpcode()) + << " operand\n"); + return false; + } + + MI.setDesc(TII.get(RISCV::COPY)); + return true; +} + +bool RISCVInstructionSelector::selectImplicitDef( + MachineInstr &MI, MachineIRBuilder &MIB, MachineRegisterInfo &MRI) const { + assert(MI.getOpcode() == TargetOpcode::G_IMPLICIT_DEF); + + const Register DstReg = MI.getOperand(0).getReg(); + const TargetRegisterClass *DstRC = getRegClassForTypeOnBank( + MRI.getType(DstReg), *RBI.getRegBank(DstReg, MRI, TRI)); + + assert(DstRC && + "Register class not available for LLT, register bank combination"); + + if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { + LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(MI.getOpcode()) + << " operand\n"); + } + MI.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); + return true; +} + +bool RISCVInstructionSelector::materializeImm(Register DstReg, int64_t Imm, + MachineIRBuilder &MIB) const { + MachineRegisterInfo &MRI = *MIB.getMRI(); + + if (Imm == 0) { + MIB.buildCopy(DstReg, Register(RISCV::X0)); + RBI.constrainGenericRegister(DstReg, RISCV::GPRRegClass, MRI); + return true; + } + + RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, *Subtarget); + unsigned NumInsts = Seq.size(); + Register SrcReg = RISCV::X0; + + for (unsigned i = 0; i < NumInsts; i++) { + Register TmpReg = i < NumInsts - 1 + ? MRI.createVirtualRegister(&RISCV::GPRRegClass) + : DstReg; + const RISCVMatInt::Inst &I = Seq[i]; + MachineInstr *Result; + + switch (I.getOpndKind()) { + case RISCVMatInt::Imm: + // clang-format off + Result = MIB.buildInstr(I.getOpcode(), {TmpReg}, {}) + .addImm(I.getImm()); + // clang-format on + break; + case RISCVMatInt::RegX0: + Result = MIB.buildInstr(I.getOpcode(), {TmpReg}, + {SrcReg, Register(RISCV::X0)}); + break; + case RISCVMatInt::RegReg: + Result = MIB.buildInstr(I.getOpcode(), {TmpReg}, {SrcReg, SrcReg}); + break; + case RISCVMatInt::RegImm: + Result = + MIB.buildInstr(I.getOpcode(), {TmpReg}, {SrcReg}).addImm(I.getImm()); + break; + } + + if (!constrainSelectedInstRegOperands(*Result, TII, TRI, RBI)) + return false; + + SrcReg = TmpReg; + } + + return true; +} + +bool RISCVInstructionSelector::selectAddr(MachineInstr &MI, + MachineIRBuilder &MIB, + MachineRegisterInfo &MRI, + bool IsLocal, + bool IsExternWeak) const { + assert((MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE || + MI.getOpcode() == TargetOpcode::G_JUMP_TABLE || + MI.getOpcode() == TargetOpcode::G_CONSTANT_POOL) && + "Unexpected opcode"); + + const MachineOperand &DispMO = MI.getOperand(1); + + Register DefReg = MI.getOperand(0).getReg(); + const LLT DefTy = MRI.getType(DefReg); + + // When HWASAN is used and tagging of global variables is enabled + // they should be accessed via the GOT, since the tagged address of a global + // is incompatible with existing code models. This also applies to non-pic + // mode. + if (TM.isPositionIndependent() || Subtarget->allowTaggedGlobals()) { + if (IsLocal && !Subtarget->allowTaggedGlobals()) { + // Use PC-relative addressing to access the symbol. This generates the + // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym)) + // %pcrel_lo(auipc)). + MI.setDesc(TII.get(RISCV::PseudoLLA)); + return constrainSelectedInstRegOperands(MI, TII, TRI, RBI); + } + + // Use PC-relative addressing to access the GOT for this symbol, then + // load the address from the GOT. This generates the pattern (PseudoLGA + // sym), which expands to (ld (addi (auipc %got_pcrel_hi(sym)) + // %pcrel_lo(auipc))). + MachineFunction &MF = *MI.getParent()->getParent(); + MachineMemOperand *MemOp = MF.getMachineMemOperand( + MachinePointerInfo::getGOT(MF), + MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | + MachineMemOperand::MOInvariant, + DefTy, Align(DefTy.getSizeInBits() / 8)); + + auto Result = MIB.buildInstr(RISCV::PseudoLGA, {DefReg}, {}) + .addDisp(DispMO, 0) + .addMemOperand(MemOp); + + if (!constrainSelectedInstRegOperands(*Result, TII, TRI, RBI)) + return false; + + MI.eraseFromParent(); + return true; + } + + switch (TM.getCodeModel()) { + default: { + reportGISelFailure(const_cast<MachineFunction &>(*MF), *TPC, *MORE, + getName(), "Unsupported code model for lowering", MI); + return false; + } + case CodeModel::Small: { + // Must lie within a single 2 GiB address range and must lie between + // absolute addresses -2 GiB and +2 GiB. This generates the pattern (addi + // (lui %hi(sym)) %lo(sym)). + Register AddrHiDest = MRI.createVirtualRegister(&RISCV::GPRRegClass); + MachineInstr *AddrHi = MIB.buildInstr(RISCV::LUI, {AddrHiDest}, {}) + .addDisp(DispMO, 0, RISCVII::MO_HI); + + if (!constrainSelectedInstRegOperands(*AddrHi, TII, TRI, RBI)) + return false; + + auto Result = MIB.buildInstr(RISCV::ADDI, {DefReg}, {AddrHiDest}) + .addDisp(DispMO, 0, RISCVII::MO_LO); + + if (!constrainSelectedInstRegOperands(*Result, TII, TRI, RBI)) + return false; + + MI.eraseFromParent(); return true; } + case CodeModel::Medium: + // Emit LGA/LLA instead of the sequence it expands to because the pcrel_lo + // relocation needs to reference a label that points to the auipc + // instruction itself, not the global. This cannot be done inside the + // instruction selector. + if (IsExternWeak) { + // An extern weak symbol may be undefined, i.e. have value 0, which may + // not be within 2GiB of PC, so use GOT-indirect addressing to access the + // symbol. This generates the pattern (PseudoLGA sym), which expands to + // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). + MachineFunction &MF = *MI.getParent()->getParent(); + MachineMemOperand *MemOp = MF.getMachineMemOperand( + MachinePointerInfo::getGOT(MF), + MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | + MachineMemOperand::MOInvariant, + DefTy, Align(DefTy.getSizeInBits() / 8)); + + auto Result = MIB.buildInstr(RISCV::PseudoLGA, {DefReg}, {}) + .addDisp(DispMO, 0) + .addMemOperand(MemOp); + + if (!constrainSelectedInstRegOperands(*Result, TII, TRI, RBI)) + return false; + + MI.eraseFromParent(); + return true; + } + + // Generate a sequence for accessing addresses within any 2GiB range + // within the address space. This generates the pattern (PseudoLLA sym), + // which expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)). + MI.setDesc(TII.get(RISCV::PseudoLLA)); + return constrainSelectedInstRegOperands(MI, TII, TRI, RBI); + } + + return false; +} + +bool RISCVInstructionSelector::selectSExtInreg(MachineInstr &MI, + MachineIRBuilder &MIB) const { + if (!STI.isRV64()) + return false; + + const MachineOperand &Size = MI.getOperand(2); + // Only Size == 32 (i.e. shift by 32 bits) is acceptable at this point. + if (!Size.isImm() || Size.getImm() != 32) + return false; + + const MachineOperand &Src = MI.getOperand(1); + const MachineOperand &Dst = MI.getOperand(0); + // addiw rd, rs, 0 (i.e. sext.w rd, rs) + MachineInstr *NewMI = + MIB.buildInstr(RISCV::ADDIW, {Dst.getReg()}, {Src.getReg()}).addImm(0U); + + if (!constrainSelectedInstRegOperands(*NewMI, TII, TRI, RBI)) + return false; + + MI.eraseFromParent(); + return true; +} + +bool RISCVInstructionSelector::selectSelect(MachineInstr &MI, + MachineIRBuilder &MIB, + MachineRegisterInfo &MRI) const { + auto &SelectMI = cast<GSelect>(MI); + + Register LHS, RHS; + RISCVCC::CondCode CC; + getOperandsForBranch(SelectMI.getCondReg(), MRI, CC, LHS, RHS); + + Register DstReg = SelectMI.getReg(0); + + unsigned Opc = RISCV::Select_GPR_Using_CC_GPR; + if (RBI.getRegBank(DstReg, MRI, TRI)->getID() == RISCV::FPRBRegBankID) { + unsigned Size = MRI.getType(DstReg).getSizeInBits(); + Opc = Size == 32 ? RISCV::Select_FPR32_Using_CC_GPR + : RISCV::Select_FPR64_Using_CC_GPR; + } + + MachineInstr *Result = MIB.buildInstr(Opc) + .addDef(DstReg) + .addReg(LHS) + .addReg(RHS) + .addImm(CC) + .addReg(SelectMI.getTrueReg()) + .addReg(SelectMI.getFalseReg()); + MI.eraseFromParent(); + return constrainSelectedInstRegOperands(*Result, TII, TRI, RBI); +} + +// Convert an FCMP predicate to one of the supported F or D instructions. +static unsigned getFCmpOpcode(CmpInst::Predicate Pred, unsigned Size) { + assert((Size == 32 || Size == 64) && "Unsupported size"); + switch (Pred) { + default: + llvm_unreachable("Unsupported predicate"); + case CmpInst::FCMP_OLT: + return Size == 32 ? RISCV::FLT_S : RISCV::FLT_D; + case CmpInst::FCMP_OLE: + return Size == 32 ? RISCV::FLE_S : RISCV::FLE_D; + case CmpInst::FCMP_OEQ: + return Size == 32 ? RISCV::FEQ_S : RISCV::FEQ_D; + } +} + +// Try legalizing an FCMP by swapping or inverting the predicate to one that +// is supported. +static bool legalizeFCmpPredicate(Register &LHS, Register &RHS, + CmpInst::Predicate &Pred, bool &NeedInvert) { + auto isLegalFCmpPredicate = [](CmpInst::Predicate Pred) { + return Pred == CmpInst::FCMP_OLT || Pred == CmpInst::FCMP_OLE || + Pred == CmpInst::FCMP_OEQ; + }; + + assert(!isLegalFCmpPredicate(Pred) && "Predicate already legal?"); - if (selectImpl(I, *CoverageInfo)) + CmpInst::Predicate InvPred = CmpInst::getSwappedPredicate(Pred); + if (isLegalFCmpPredicate(InvPred)) { + Pred = InvPred; + std::swap(LHS, RHS); return true; + } + + InvPred = CmpInst::getInversePredicate(Pred); + NeedInvert = true; + if (isLegalFCmpPredicate(InvPred)) { + Pred = InvPred; + return true; + } + InvPred = CmpInst::getSwappedPredicate(InvPred); + if (isLegalFCmpPredicate(InvPred)) { + Pred = InvPred; + std::swap(LHS, RHS); + return true; + } return false; } +// Emit a sequence of instructions to compare LHS and RHS using Pred. Return +// the result in DstReg. +// FIXME: Maybe we should expand this earlier. +bool RISCVInstructionSelector::selectFPCompare(MachineInstr &MI, + MachineIRBuilder &MIB, + MachineRegisterInfo &MRI) const { + auto &CmpMI = cast<GFCmp>(MI); + CmpInst::Predicate Pred = CmpMI.getCond(); + + Register DstReg = CmpMI.getReg(0); + Register LHS = CmpMI.getLHSReg(); + Register RHS = CmpMI.getRHSReg(); + + unsigned Size = MRI.getType(LHS).getSizeInBits(); + assert((Size == 32 || Size == 64) && "Unexpected size"); + + Register TmpReg = DstReg; + + bool NeedInvert = false; + // First try swapping operands or inverting. + if (legalizeFCmpPredicate(LHS, RHS, Pred, NeedInvert)) { + if (NeedInvert) + TmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); + auto Cmp = MIB.buildInstr(getFCmpOpcode(Pred, Size), {TmpReg}, {LHS, RHS}); + if (!Cmp.constrainAllUses(TII, TRI, RBI)) + return false; + } else if (Pred == CmpInst::FCMP_ONE || Pred == CmpInst::FCMP_UEQ) { + // fcmp one LHS, RHS => (OR (FLT LHS, RHS), (FLT RHS, LHS)) + NeedInvert = Pred == CmpInst::FCMP_UEQ; + auto Cmp1 = MIB.buildInstr(getFCmpOpcode(CmpInst::FCMP_OLT, Size), + {&RISCV::GPRRegClass}, {LHS, RHS}); + if (!Cmp1.constrainAllUses(TII, TRI, RBI)) + return false; + auto Cmp2 = MIB.buildInstr(getFCmpOpcode(CmpInst::FCMP_OLT, Size), + {&RISCV::GPRRegClass}, {RHS, LHS}); + if (!Cmp2.constrainAllUses(TII, TRI, RBI)) + return false; + if (NeedInvert) + TmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); + auto Or = + MIB.buildInstr(RISCV::OR, {TmpReg}, {Cmp1.getReg(0), Cmp2.getReg(0)}); + if (!Or.constrainAllUses(TII, TRI, RBI)) + return false; + } else if (Pred == CmpInst::FCMP_ORD || Pred == CmpInst::FCMP_UNO) { + // fcmp ord LHS, RHS => (AND (FEQ LHS, LHS), (FEQ RHS, RHS)) + // FIXME: If LHS and RHS are the same we can use a single FEQ. + NeedInvert = Pred == CmpInst::FCMP_UNO; + auto Cmp1 = MIB.buildInstr(getFCmpOpcode(CmpInst::FCMP_OEQ, Size), + {&RISCV::GPRRegClass}, {LHS, LHS}); + if (!Cmp1.constrainAllUses(TII, TRI, RBI)) + return false; + auto Cmp2 = MIB.buildInstr(getFCmpOpcode(CmpInst::FCMP_OEQ, Size), + {&RISCV::GPRRegClass}, {RHS, RHS}); + if (!Cmp2.constrainAllUses(TII, TRI, RBI)) + return false; + if (NeedInvert) + TmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); + auto And = + MIB.buildInstr(RISCV::AND, {TmpReg}, {Cmp1.getReg(0), Cmp2.getReg(0)}); + if (!And.constrainAllUses(TII, TRI, RBI)) + return false; + } else + llvm_unreachable("Unhandled predicate"); + + // Emit an XORI to invert the result if needed. + if (NeedInvert) { + auto Xor = MIB.buildInstr(RISCV::XORI, {DstReg}, {TmpReg}).addImm(1); + if (!Xor.constrainAllUses(TII, TRI, RBI)) + return false; + } + + MI.eraseFromParent(); + return true; +} + +bool RISCVInstructionSelector::selectIntrinsicWithSideEffects( + MachineInstr &MI, MachineIRBuilder &MIB, MachineRegisterInfo &MRI) const { + assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS && + "Unexpected opcode"); + // Find the intrinsic ID. + unsigned IntrinID = cast<GIntrinsic>(MI).getIntrinsicID(); + + // Select the instruction. + switch (IntrinID) { + default: + return false; + case Intrinsic::trap: + MIB.buildInstr(RISCV::UNIMP, {}, {}); + break; + case Intrinsic::debugtrap: + MIB.buildInstr(RISCV::EBREAK, {}, {}); + break; + } + + MI.eraseFromParent(); + return true; +} + +void RISCVInstructionSelector::emitFence(AtomicOrdering FenceOrdering, + SyncScope::ID FenceSSID, + MachineIRBuilder &MIB) const { + if (STI.hasStdExtZtso()) { + // The only fence that needs an instruction is a sequentially-consistent + // cross-thread fence. + if (FenceOrdering == AtomicOrdering::SequentiallyConsistent && + FenceSSID == SyncScope::System) { + // fence rw, rw + MIB.buildInstr(RISCV::FENCE, {}, {}) + .addImm(RISCVFenceField::R | RISCVFenceField::W) + .addImm(RISCVFenceField::R | RISCVFenceField::W); + return; + } + + // MEMBARRIER is a compiler barrier; it codegens to a no-op. + MIB.buildInstr(TargetOpcode::MEMBARRIER, {}, {}); + return; + } + + // singlethread fences only synchronize with signal handlers on the same + // thread and thus only need to preserve instruction order, not actually + // enforce memory ordering. + if (FenceSSID == SyncScope::SingleThread) { + MIB.buildInstr(TargetOpcode::MEMBARRIER, {}, {}); + return; + } + + // Refer to Table A.6 in the version 2.3 draft of the RISC-V Instruction Set + // Manual: Volume I. + unsigned Pred, Succ; + switch (FenceOrdering) { + default: + llvm_unreachable("Unexpected ordering"); + case AtomicOrdering::AcquireRelease: + // fence acq_rel -> fence.tso + MIB.buildInstr(RISCV::FENCE_TSO, {}, {}); + return; + case AtomicOrdering::Acquire: + // fence acquire -> fence r, rw + Pred = RISCVFenceField::R; + Succ = RISCVFenceField::R | RISCVFenceField::W; + break; + case AtomicOrdering::Release: + // fence release -> fence rw, w + Pred = RISCVFenceField::R | RISCVFenceField::W; + Succ = RISCVFenceField::W; + break; + case AtomicOrdering::SequentiallyConsistent: + // fence seq_cst -> fence rw, rw + Pred = RISCVFenceField::R | RISCVFenceField::W; + Succ = RISCVFenceField::R | RISCVFenceField::W; + break; + } + MIB.buildInstr(RISCV::FENCE, {}, {}).addImm(Pred).addImm(Succ); +} + namespace llvm { InstructionSelector * createRISCVInstructionSelector(const RISCVTargetMachine &TM, diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index 3f829cc2e677..8f03a7ac41d3 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -11,23 +11,452 @@ //===----------------------------------------------------------------------===// #include "RISCVLegalizerInfo.h" +#include "RISCVMachineFunctionInfo.h" #include "RISCVSubtarget.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" +#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Type.h" using namespace llvm; +using namespace LegalityPredicates; +using namespace LegalizeMutations; -RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) { - const unsigned XLen = ST.getXLen(); - const LLT XLenLLT = LLT::scalar(XLen); +// Is this type supported by scalar FP arithmetic operations given the current +// subtarget. +static LegalityPredicate typeIsScalarFPArith(unsigned TypeIdx, + const RISCVSubtarget &ST) { + return [=, &ST](const LegalityQuery &Query) { + return Query.Types[TypeIdx].isScalar() && + ((ST.hasStdExtF() && Query.Types[TypeIdx].getSizeInBits() == 32) || + (ST.hasStdExtD() && Query.Types[TypeIdx].getSizeInBits() == 64)); + }; +} + +RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) + : STI(ST), XLen(STI.getXLen()), sXLen(LLT::scalar(XLen)) { + const LLT sDoubleXLen = LLT::scalar(2 * XLen); + const LLT p0 = LLT::pointer(0, XLen); + const LLT s1 = LLT::scalar(1); + const LLT s8 = LLT::scalar(8); + const LLT s16 = LLT::scalar(16); + const LLT s32 = LLT::scalar(32); + const LLT s64 = LLT::scalar(64); using namespace TargetOpcode; getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR}) - .legalFor({XLenLLT}) - .clampScalar(0, XLenLLT, XLenLLT); + .legalFor({s32, sXLen}) + .widenScalarToNextPow2(0) + .clampScalar(0, s32, sXLen); + + getActionDefinitionsBuilder( + {G_UADDE, G_UADDO, G_USUBE, G_USUBO}).lower(); + + getActionDefinitionsBuilder({G_SADDO, G_SSUBO}).minScalar(0, sXLen).lower(); + + auto &ShiftActions = getActionDefinitionsBuilder({G_ASHR, G_LSHR, G_SHL}); + if (ST.is64Bit()) + ShiftActions.customFor({{s32, s32}}); + ShiftActions.legalFor({{s32, s32}, {s32, sXLen}, {sXLen, sXLen}}) + .widenScalarToNextPow2(0) + .clampScalar(1, s32, sXLen) + .clampScalar(0, s32, sXLen) + .minScalarSameAs(1, 0); + + if (ST.is64Bit()) { + getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT}) + .legalFor({{sXLen, s32}}) + .maxScalar(0, sXLen); + + getActionDefinitionsBuilder(G_SEXT_INREG) + .customFor({sXLen}) + .maxScalar(0, sXLen) + .lower(); + } else { + getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT}).maxScalar(0, sXLen); + + getActionDefinitionsBuilder(G_SEXT_INREG).maxScalar(0, sXLen).lower(); + } + + // Merge/Unmerge + for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) { + unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1; + unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0; + auto &MergeUnmergeActions = getActionDefinitionsBuilder(Op); + if (XLen == 32 && ST.hasStdExtD()) { + LLT IdxZeroTy = G_MERGE_VALUES ? s64 : s32; + LLT IdxOneTy = G_MERGE_VALUES ? s32 : s64; + MergeUnmergeActions.legalFor({IdxZeroTy, IdxOneTy}); + } + MergeUnmergeActions.widenScalarToNextPow2(LitTyIdx, XLen) + .widenScalarToNextPow2(BigTyIdx, XLen) + .clampScalar(LitTyIdx, sXLen, sXLen) + .clampScalar(BigTyIdx, sXLen, sXLen); + } + + getActionDefinitionsBuilder({G_FSHL, G_FSHR}).lower(); + + auto &RotateActions = getActionDefinitionsBuilder({G_ROTL, G_ROTR}); + if (ST.hasStdExtZbb()) { + RotateActions.legalFor({{s32, sXLen}, {sXLen, sXLen}}); + // Widen s32 rotate amount to s64 so SDAG patterns will match. + if (ST.is64Bit()) + RotateActions.widenScalarIf(all(typeIs(0, s32), typeIs(1, s32)), + changeTo(1, sXLen)); + } + RotateActions.lower(); + + getActionDefinitionsBuilder(G_BITREVERSE).maxScalar(0, sXLen).lower(); + + auto &BSWAPActions = getActionDefinitionsBuilder(G_BSWAP); + if (ST.hasStdExtZbb()) + BSWAPActions.legalFor({sXLen}).clampScalar(0, sXLen, sXLen); + else + BSWAPActions.maxScalar(0, sXLen).lower(); + + auto &CountZerosActions = getActionDefinitionsBuilder({G_CTLZ, G_CTTZ}); + auto &CountZerosUndefActions = + getActionDefinitionsBuilder({G_CTLZ_ZERO_UNDEF, G_CTTZ_ZERO_UNDEF}); + if (ST.hasStdExtZbb()) { + CountZerosActions.legalFor({{s32, s32}, {sXLen, sXLen}}) + .clampScalar(0, s32, sXLen) + .widenScalarToNextPow2(0) + .scalarSameSizeAs(1, 0); + } else { + CountZerosActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower(); + CountZerosUndefActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0); + } + CountZerosUndefActions.lower(); + + auto &CTPOPActions = getActionDefinitionsBuilder(G_CTPOP); + if (ST.hasStdExtZbb()) { + CTPOPActions.legalFor({{s32, s32}, {sXLen, sXLen}}) + .clampScalar(0, s32, sXLen) + .widenScalarToNextPow2(0) + .scalarSameSizeAs(1, 0); + } else { + CTPOPActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower(); + } + + getActionDefinitionsBuilder({G_CONSTANT, G_IMPLICIT_DEF}) + .legalFor({s32, sXLen, p0}) + .widenScalarToNextPow2(0) + .clampScalar(0, s32, sXLen); + + getActionDefinitionsBuilder(G_ICMP) + .legalFor({{sXLen, sXLen}, {sXLen, p0}}) + .widenScalarToNextPow2(1) + .clampScalar(1, sXLen, sXLen) + .clampScalar(0, sXLen, sXLen); + + auto &SelectActions = getActionDefinitionsBuilder(G_SELECT).legalFor( + {{s32, sXLen}, {p0, sXLen}}); + if (XLen == 64 || ST.hasStdExtD()) + SelectActions.legalFor({{s64, sXLen}}); + SelectActions.widenScalarToNextPow2(0) + .clampScalar(0, s32, (XLen == 64 || ST.hasStdExtD()) ? s64 : s32) + .clampScalar(1, sXLen, sXLen); + + auto &LoadStoreActions = + getActionDefinitionsBuilder({G_LOAD, G_STORE}) + .legalForTypesWithMemDesc({{s32, p0, s8, 8}, + {s32, p0, s16, 16}, + {s32, p0, s32, 32}, + {p0, p0, sXLen, XLen}}); + auto &ExtLoadActions = + getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD}) + .legalForTypesWithMemDesc({{s32, p0, s8, 8}, {s32, p0, s16, 16}}); + if (XLen == 64) { + LoadStoreActions.legalForTypesWithMemDesc({{s64, p0, s8, 8}, + {s64, p0, s16, 16}, + {s64, p0, s32, 32}, + {s64, p0, s64, 64}}); + ExtLoadActions.legalForTypesWithMemDesc( + {{s64, p0, s8, 8}, {s64, p0, s16, 16}, {s64, p0, s32, 32}}); + } else if (ST.hasStdExtD()) { + LoadStoreActions.legalForTypesWithMemDesc({{s64, p0, s64, 64}}); + } + LoadStoreActions.clampScalar(0, s32, sXLen).lower(); + ExtLoadActions.widenScalarToNextPow2(0).clampScalar(0, s32, sXLen).lower(); + + getActionDefinitionsBuilder({G_PTR_ADD, G_PTRMASK}).legalFor({{p0, sXLen}}); + + getActionDefinitionsBuilder(G_PTRTOINT) + .legalFor({{sXLen, p0}}) + .clampScalar(0, sXLen, sXLen); + + getActionDefinitionsBuilder(G_INTTOPTR) + .legalFor({{p0, sXLen}}) + .clampScalar(1, sXLen, sXLen); + + getActionDefinitionsBuilder(G_BRCOND).legalFor({sXLen}).minScalar(0, sXLen); + + getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, sXLen}}); + + getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0}); + + getActionDefinitionsBuilder(G_PHI) + .legalFor({p0, sXLen}) + .widenScalarToNextPow2(0) + .clampScalar(0, sXLen, sXLen); + + getActionDefinitionsBuilder({G_GLOBAL_VALUE, G_JUMP_TABLE, G_CONSTANT_POOL}) + .legalFor({p0}); + + if (ST.hasStdExtM() || ST.hasStdExtZmmul()) { + getActionDefinitionsBuilder(G_MUL) + .legalFor({s32, sXLen}) + .widenScalarToNextPow2(0) + .clampScalar(0, s32, sXLen); + + // clang-format off + getActionDefinitionsBuilder({G_SMULH, G_UMULH}) + .legalFor({sXLen}) + .lower(); + // clang-format on + + getActionDefinitionsBuilder({G_SMULO, G_UMULO}).minScalar(0, sXLen).lower(); + } else { + getActionDefinitionsBuilder(G_MUL) + .libcallFor({sXLen, sDoubleXLen}) + .widenScalarToNextPow2(0) + .clampScalar(0, sXLen, sDoubleXLen); + + getActionDefinitionsBuilder({G_SMULH, G_UMULH}).lowerFor({sXLen}); + + getActionDefinitionsBuilder({G_SMULO, G_UMULO}) + .minScalar(0, sXLen) + // Widen sXLen to sDoubleXLen so we can use a single libcall to get + // the low bits for the mul result and high bits to do the overflow + // check. + .widenScalarIf(typeIs(0, sXLen), + LegalizeMutations::changeTo(0, sDoubleXLen)) + .lower(); + } + + if (ST.hasStdExtM()) { + getActionDefinitionsBuilder({G_UDIV, G_SDIV, G_UREM, G_SREM}) + .legalFor({s32, sXLen}) + .libcallFor({sDoubleXLen}) + .clampScalar(0, s32, sDoubleXLen) + .widenScalarToNextPow2(0); + } else { + getActionDefinitionsBuilder({G_UDIV, G_SDIV, G_UREM, G_SREM}) + .libcallFor({sXLen, sDoubleXLen}) + .clampScalar(0, sXLen, sDoubleXLen) + .widenScalarToNextPow2(0); + } + + auto &AbsActions = getActionDefinitionsBuilder(G_ABS); + if (ST.hasStdExtZbb()) + AbsActions.customFor({s32, sXLen}).minScalar(0, sXLen); + AbsActions.lower(); + + auto &MinMaxActions = + getActionDefinitionsBuilder({G_UMAX, G_UMIN, G_SMAX, G_SMIN}); + if (ST.hasStdExtZbb()) + MinMaxActions.legalFor({sXLen}).minScalar(0, sXLen); + MinMaxActions.lower(); + + getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0}); + + getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall(); + + getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower(); + + // FP Operations + + getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FNEG, + G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM}) + .legalIf(typeIsScalarFPArith(0, ST)); + + getActionDefinitionsBuilder(G_FCOPYSIGN) + .legalIf(all(typeIsScalarFPArith(0, ST), typeIsScalarFPArith(1, ST))); + + getActionDefinitionsBuilder(G_FPTRUNC).legalIf( + [=, &ST](const LegalityQuery &Query) -> bool { + return (ST.hasStdExtD() && typeIs(0, s32)(Query) && + typeIs(1, s64)(Query)); + }); + getActionDefinitionsBuilder(G_FPEXT).legalIf( + [=, &ST](const LegalityQuery &Query) -> bool { + return (ST.hasStdExtD() && typeIs(0, s64)(Query) && + typeIs(1, s32)(Query)); + }); + + getActionDefinitionsBuilder(G_FCMP) + .legalIf(all(typeIs(0, sXLen), typeIsScalarFPArith(1, ST))) + .clampScalar(0, sXLen, sXLen); + + // TODO: Support vector version of G_IS_FPCLASS. + getActionDefinitionsBuilder(G_IS_FPCLASS) + .customIf(all(typeIs(0, s1), typeIsScalarFPArith(1, ST))); + + getActionDefinitionsBuilder(G_FCONSTANT) + .legalIf(typeIsScalarFPArith(0, ST)) + .lowerFor({s32, s64}); + + getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI}) + .legalIf(all(typeInSet(0, {s32, sXLen}), typeIsScalarFPArith(1, ST))) + .widenScalarToNextPow2(0) + .clampScalar(0, s32, sXLen); + + getActionDefinitionsBuilder({G_SITOFP, G_UITOFP}) + .legalIf(all(typeIsScalarFPArith(0, ST), typeInSet(1, {s32, sXLen}))) + .widenScalarToNextPow2(1) + .clampScalar(1, s32, sXLen); + + // FIXME: We can do custom inline expansion like SelectionDAG. + // FIXME: Legal with Zfa. + getActionDefinitionsBuilder({G_FCEIL, G_FFLOOR}) + .libcallFor({s32, s64}); + + getActionDefinitionsBuilder(G_VASTART).customFor({p0}); + + // va_list must be a pointer, but most sized types are pretty easy to handle + // as the destination. + getActionDefinitionsBuilder(G_VAARG) + // TODO: Implement narrowScalar and widenScalar for G_VAARG for types + // outside the [s32, sXLen] range. + .clampScalar(0, s32, sXLen) + .lowerForCartesianProduct({s32, sXLen, p0}, {p0}); getLegacyLegalizerInfo().computeTables(); } + +static Type *getTypeForLLT(LLT Ty, LLVMContext &C) { + if (Ty.isVector()) + return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()), + Ty.getNumElements()); + return IntegerType::get(C, Ty.getSizeInBits()); +} + +bool RISCVLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, + MachineInstr &MI) const { + Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID(); + switch (IntrinsicID) { + default: + return false; + case Intrinsic::vacopy: { + // vacopy arguments must be legal because of the intrinsic signature. + // No need to check here. + + MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; + MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); + MachineFunction &MF = *MI.getMF(); + const DataLayout &DL = MIRBuilder.getDataLayout(); + LLVMContext &Ctx = MF.getFunction().getContext(); + + Register DstLst = MI.getOperand(1).getReg(); + LLT PtrTy = MRI.getType(DstLst); + + // Load the source va_list + Align Alignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx)); + MachineMemOperand *LoadMMO = MF.getMachineMemOperand( + MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, Alignment); + auto Tmp = MIRBuilder.buildLoad(PtrTy, MI.getOperand(2), *LoadMMO); + + // Store the result in the destination va_list + MachineMemOperand *StoreMMO = MF.getMachineMemOperand( + MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, Alignment); + MIRBuilder.buildStore(DstLst, Tmp, *StoreMMO); + + MI.eraseFromParent(); + return true; + } + } +} + +bool RISCVLegalizerInfo::legalizeShlAshrLshr( + MachineInstr &MI, MachineIRBuilder &MIRBuilder, + GISelChangeObserver &Observer) const { + assert(MI.getOpcode() == TargetOpcode::G_ASHR || + MI.getOpcode() == TargetOpcode::G_LSHR || + MI.getOpcode() == TargetOpcode::G_SHL); + MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); + // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the + // imported patterns can select it later. Either way, it will be legal. + Register AmtReg = MI.getOperand(2).getReg(); + auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI); + if (!VRegAndVal) + return true; + // Check the shift amount is in range for an immediate form. + uint64_t Amount = VRegAndVal->Value.getZExtValue(); + if (Amount > 31) + return true; // This will have to remain a register variant. + auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount); + Observer.changingInstr(MI); + MI.getOperand(2).setReg(ExtCst.getReg(0)); + Observer.changedInstr(MI); + return true; +} + +bool RISCVLegalizerInfo::legalizeVAStart(MachineInstr &MI, + MachineIRBuilder &MIRBuilder) const { + // Stores the address of the VarArgsFrameIndex slot into the memory location + assert(MI.getOpcode() == TargetOpcode::G_VASTART); + MachineFunction *MF = MI.getParent()->getParent(); + RISCVMachineFunctionInfo *FuncInfo = MF->getInfo<RISCVMachineFunctionInfo>(); + int FI = FuncInfo->getVarArgsFrameIndex(); + LLT AddrTy = MIRBuilder.getMRI()->getType(MI.getOperand(0).getReg()); + auto FINAddr = MIRBuilder.buildFrameIndex(AddrTy, FI); + assert(MI.hasOneMemOperand()); + MIRBuilder.buildStore(FINAddr, MI.getOperand(0).getReg(), + *MI.memoperands()[0]); + MI.eraseFromParent(); + return true; +} + +bool RISCVLegalizerInfo::legalizeCustom(LegalizerHelper &Helper, + MachineInstr &MI) const { + MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; + GISelChangeObserver &Observer = Helper.Observer; + switch (MI.getOpcode()) { + default: + // No idea what to do. + return false; + case TargetOpcode::G_ABS: + return Helper.lowerAbsToMaxNeg(MI); + case TargetOpcode::G_SHL: + case TargetOpcode::G_ASHR: + case TargetOpcode::G_LSHR: + return legalizeShlAshrLshr(MI, MIRBuilder, Observer); + case TargetOpcode::G_SEXT_INREG: { + // Source size of 32 is sext.w. + int64_t SizeInBits = MI.getOperand(2).getImm(); + if (SizeInBits == 32) + return true; + + return Helper.lower(MI, 0, /* Unused hint type */ LLT()) == + LegalizerHelper::Legalized; + } + case TargetOpcode::G_IS_FPCLASS: { + Register GISFPCLASS = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + const MachineOperand &ImmOp = MI.getOperand(2); + MachineIRBuilder MIB(MI); + + // Turn LLVM IR's floating point classes to that in RISC-V, + // by simply rotating the 10-bit immediate right by two bits. + APInt GFpClassImm(10, static_cast<uint64_t>(ImmOp.getImm())); + auto FClassMask = MIB.buildConstant(sXLen, GFpClassImm.rotr(2).zext(XLen)); + auto ConstZero = MIB.buildConstant(sXLen, 0); + + auto GFClass = MIB.buildInstr(RISCV::G_FCLASS, {sXLen}, {Src}); + auto And = MIB.buildAnd(sXLen, GFClass, FClassMask); + MIB.buildICmp(CmpInst::ICMP_NE, GISFPCLASS, And, ConstZero); + + MI.eraseFromParent(); + return true; + } + case TargetOpcode::G_VASTART: + return legalizeVAStart(MI, MIRBuilder); + } + + llvm_unreachable("expected switch to return"); +} diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h index 960410ead62c..48c36976501f 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h @@ -17,12 +17,29 @@ namespace llvm { +class GISelChangeObserver; +class MachineIRBuilder; class RISCVSubtarget; /// This class provides the information for the target register banks. class RISCVLegalizerInfo : public LegalizerInfo { + const RISCVSubtarget &STI; + const unsigned XLen; + const LLT sXLen; + public: RISCVLegalizerInfo(const RISCVSubtarget &ST); + + bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override; + + bool legalizeIntrinsic(LegalizerHelper &Helper, + MachineInstr &MI) const override; + +private: + bool legalizeShlAshrLshr(MachineInstr &MI, MachineIRBuilder &MIRBuilder, + GISelChangeObserver &Observer) const; + + bool legalizeVAStart(MachineInstr &MI, MachineIRBuilder &MIRBuilder) const; }; } // end namespace llvm #endif diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVO0PreLegalizerCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVO0PreLegalizerCombiner.cpp new file mode 100644 index 000000000000..be77979512e0 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVO0PreLegalizerCombiner.cpp @@ -0,0 +1,155 @@ +//=== RISCVO0PreLegalizerCombiner.cpp -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass does combining of machine instructions at the generic MI level, +// before the legalizer. +// +//===----------------------------------------------------------------------===// + +#include "RISCVSubtarget.h" +#include "llvm/CodeGen/GlobalISel/Combiner.h" +#include "llvm/CodeGen/GlobalISel/CombinerHelper.h" +#include "llvm/CodeGen/GlobalISel/CombinerInfo.h" +#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h" +#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetPassConfig.h" + +#define GET_GICOMBINER_DEPS +#include "RISCVGenO0PreLegalizeGICombiner.inc" +#undef GET_GICOMBINER_DEPS + +#define DEBUG_TYPE "riscv-O0-prelegalizer-combiner" + +using namespace llvm; + +namespace { +#define GET_GICOMBINER_TYPES +#include "RISCVGenO0PreLegalizeGICombiner.inc" +#undef GET_GICOMBINER_TYPES + +class RISCVO0PreLegalizerCombinerImpl : public Combiner { +protected: + // TODO: Make CombinerHelper methods const. + mutable CombinerHelper Helper; + const RISCVO0PreLegalizerCombinerImplRuleConfig &RuleConfig; + const RISCVSubtarget &STI; + +public: + RISCVO0PreLegalizerCombinerImpl( + MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC, + GISelKnownBits &KB, GISelCSEInfo *CSEInfo, + const RISCVO0PreLegalizerCombinerImplRuleConfig &RuleConfig, + const RISCVSubtarget &STI); + + static const char *getName() { return "RISCVO0PreLegalizerCombiner"; } + + bool tryCombineAll(MachineInstr &I) const override; + +private: +#define GET_GICOMBINER_CLASS_MEMBERS +#include "RISCVGenO0PreLegalizeGICombiner.inc" +#undef GET_GICOMBINER_CLASS_MEMBERS +}; + +#define GET_GICOMBINER_IMPL +#include "RISCVGenO0PreLegalizeGICombiner.inc" +#undef GET_GICOMBINER_IMPL + +RISCVO0PreLegalizerCombinerImpl::RISCVO0PreLegalizerCombinerImpl( + MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC, + GISelKnownBits &KB, GISelCSEInfo *CSEInfo, + const RISCVO0PreLegalizerCombinerImplRuleConfig &RuleConfig, + const RISCVSubtarget &STI) + : Combiner(MF, CInfo, TPC, &KB, CSEInfo), + Helper(Observer, B, /*IsPreLegalize*/ true, &KB), RuleConfig(RuleConfig), + STI(STI), +#define GET_GICOMBINER_CONSTRUCTOR_INITS +#include "RISCVGenO0PreLegalizeGICombiner.inc" +#undef GET_GICOMBINER_CONSTRUCTOR_INITS +{ +} + +// Pass boilerplate +// ================ + +class RISCVO0PreLegalizerCombiner : public MachineFunctionPass { +public: + static char ID; + + RISCVO0PreLegalizerCombiner(); + + StringRef getPassName() const override { + return "RISCVO0PreLegalizerCombiner"; + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override; + +private: + RISCVO0PreLegalizerCombinerImplRuleConfig RuleConfig; +}; +} // end anonymous namespace + +void RISCVO0PreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetPassConfig>(); + AU.setPreservesCFG(); + getSelectionDAGFallbackAnalysisUsage(AU); + AU.addRequired<GISelKnownBitsAnalysis>(); + AU.addPreserved<GISelKnownBitsAnalysis>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +RISCVO0PreLegalizerCombiner::RISCVO0PreLegalizerCombiner() + : MachineFunctionPass(ID) { + initializeRISCVO0PreLegalizerCombinerPass(*PassRegistry::getPassRegistry()); + + if (!RuleConfig.parseCommandLineOption()) + report_fatal_error("Invalid rule identifier"); +} + +bool RISCVO0PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + auto &TPC = getAnalysis<TargetPassConfig>(); + + const Function &F = MF.getFunction(); + GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); + + const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>(); + + CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, + /*LegalizerInfo*/ nullptr, /*EnableOpt*/ false, + F.hasOptSize(), F.hasMinSize()); + RISCVO0PreLegalizerCombinerImpl Impl(MF, CInfo, &TPC, *KB, + /*CSEInfo*/ nullptr, RuleConfig, ST); + return Impl.combineMachineInstrs(); +} + +char RISCVO0PreLegalizerCombiner::ID = 0; +INITIALIZE_PASS_BEGIN(RISCVO0PreLegalizerCombiner, DEBUG_TYPE, + "Combine RISC-V machine instrs before legalization", false, + false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) +INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass) +INITIALIZE_PASS_END(RISCVO0PreLegalizerCombiner, DEBUG_TYPE, + "Combine RISC-V machine instrs before legalization", false, + false) + +namespace llvm { +FunctionPass *createRISCVO0PreLegalizerCombiner() { + return new RISCVO0PreLegalizerCombiner(); +} +} // end namespace llvm diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp new file mode 100644 index 000000000000..9c28944abc76 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp @@ -0,0 +1,173 @@ +//=== RISCVPostLegalizerCombiner.cpp --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Post-legalization combines on generic MachineInstrs. +/// +/// The combines here must preserve instruction legality. +/// +/// Combines which don't rely on instruction legality should go in the +/// RISCVPreLegalizerCombiner. +/// +//===----------------------------------------------------------------------===// + +#include "RISCVTargetMachine.h" +#include "llvm/CodeGen/GlobalISel/CSEInfo.h" +#include "llvm/CodeGen/GlobalISel/Combiner.h" +#include "llvm/CodeGen/GlobalISel/CombinerHelper.h" +#include "llvm/CodeGen/GlobalISel/CombinerInfo.h" +#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h" +#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetPassConfig.h" + +#define GET_GICOMBINER_DEPS +#include "RISCVGenPostLegalizeGICombiner.inc" +#undef GET_GICOMBINER_DEPS + +#define DEBUG_TYPE "riscv-postlegalizer-combiner" + +using namespace llvm; + +namespace { + +#define GET_GICOMBINER_TYPES +#include "RISCVGenPostLegalizeGICombiner.inc" +#undef GET_GICOMBINER_TYPES + +class RISCVPostLegalizerCombinerImpl : public Combiner { +protected: + // TODO: Make CombinerHelper methods const. + mutable CombinerHelper Helper; + const RISCVPostLegalizerCombinerImplRuleConfig &RuleConfig; + const RISCVSubtarget &STI; + +public: + RISCVPostLegalizerCombinerImpl( + MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC, + GISelKnownBits &KB, GISelCSEInfo *CSEInfo, + const RISCVPostLegalizerCombinerImplRuleConfig &RuleConfig, + const RISCVSubtarget &STI, MachineDominatorTree *MDT, + const LegalizerInfo *LI); + + static const char *getName() { return "RISCVPostLegalizerCombiner"; } + + bool tryCombineAll(MachineInstr &I) const override; + +private: +#define GET_GICOMBINER_CLASS_MEMBERS +#include "RISCVGenPostLegalizeGICombiner.inc" +#undef GET_GICOMBINER_CLASS_MEMBERS +}; + +#define GET_GICOMBINER_IMPL +#include "RISCVGenPostLegalizeGICombiner.inc" +#undef GET_GICOMBINER_IMPL + +RISCVPostLegalizerCombinerImpl::RISCVPostLegalizerCombinerImpl( + MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC, + GISelKnownBits &KB, GISelCSEInfo *CSEInfo, + const RISCVPostLegalizerCombinerImplRuleConfig &RuleConfig, + const RISCVSubtarget &STI, MachineDominatorTree *MDT, + const LegalizerInfo *LI) + : Combiner(MF, CInfo, TPC, &KB, CSEInfo), + Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI), + RuleConfig(RuleConfig), STI(STI), +#define GET_GICOMBINER_CONSTRUCTOR_INITS +#include "RISCVGenPostLegalizeGICombiner.inc" +#undef GET_GICOMBINER_CONSTRUCTOR_INITS +{ +} + +class RISCVPostLegalizerCombiner : public MachineFunctionPass { +public: + static char ID; + + RISCVPostLegalizerCombiner(); + + StringRef getPassName() const override { + return "RISCVPostLegalizerCombiner"; + } + + bool runOnMachineFunction(MachineFunction &MF) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + +private: + RISCVPostLegalizerCombinerImplRuleConfig RuleConfig; +}; +} // end anonymous namespace + +void RISCVPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetPassConfig>(); + AU.setPreservesCFG(); + getSelectionDAGFallbackAnalysisUsage(AU); + AU.addRequired<GISelKnownBitsAnalysis>(); + AU.addPreserved<GISelKnownBitsAnalysis>(); + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<GISelCSEAnalysisWrapperPass>(); + AU.addPreserved<GISelCSEAnalysisWrapperPass>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +RISCVPostLegalizerCombiner::RISCVPostLegalizerCombiner() + : MachineFunctionPass(ID) { + initializeRISCVPostLegalizerCombinerPass(*PassRegistry::getPassRegistry()); + + if (!RuleConfig.parseCommandLineOption()) + report_fatal_error("Invalid rule identifier"); +} + +bool RISCVPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + assert(MF.getProperties().hasProperty( + MachineFunctionProperties::Property::Legalized) && + "Expected a legalized function?"); + auto *TPC = &getAnalysis<TargetPassConfig>(); + const Function &F = MF.getFunction(); + bool EnableOpt = + MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F); + + const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>(); + const auto *LI = ST.getLegalizerInfo(); + + GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); + MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>(); + GISelCSEAnalysisWrapper &Wrapper = + getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper(); + auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig()); + + CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, + /*LegalizerInfo*/ nullptr, EnableOpt, F.hasOptSize(), + F.hasMinSize()); + RISCVPostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, CSEInfo, + RuleConfig, ST, MDT, LI); + return Impl.combineMachineInstrs(); +} + +char RISCVPostLegalizerCombiner::ID = 0; +INITIALIZE_PASS_BEGIN(RISCVPostLegalizerCombiner, DEBUG_TYPE, + "Combine RISC-V MachineInstrs after legalization", false, + false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) +INITIALIZE_PASS_END(RISCVPostLegalizerCombiner, DEBUG_TYPE, + "Combine RISC-V MachineInstrs after legalization", false, + false) + +namespace llvm { +FunctionPass *createRISCVPostLegalizerCombiner() { + return new RISCVPostLegalizerCombiner(); +} +} // end namespace llvm diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVPreLegalizerCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVPreLegalizerCombiner.cpp new file mode 100644 index 000000000000..9a35fffae058 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVPreLegalizerCombiner.cpp @@ -0,0 +1,169 @@ +//=== RISCVPreLegalizerCombiner.cpp ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass does combining of machine instructions at the generic MI level, +// before the legalizer. +// +//===----------------------------------------------------------------------===// + +#include "RISCVSubtarget.h" +#include "llvm/CodeGen/GlobalISel/CSEInfo.h" +#include "llvm/CodeGen/GlobalISel/Combiner.h" +#include "llvm/CodeGen/GlobalISel/CombinerHelper.h" +#include "llvm/CodeGen/GlobalISel/CombinerInfo.h" +#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h" +#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetPassConfig.h" + +#define GET_GICOMBINER_DEPS +#include "RISCVGenPreLegalizeGICombiner.inc" +#undef GET_GICOMBINER_DEPS + +#define DEBUG_TYPE "riscv-prelegalizer-combiner" + +using namespace llvm; + +namespace { + +#define GET_GICOMBINER_TYPES +#include "RISCVGenPreLegalizeGICombiner.inc" +#undef GET_GICOMBINER_TYPES + +class RISCVPreLegalizerCombinerImpl : public Combiner { +protected: + // TODO: Make CombinerHelper methods const. + mutable CombinerHelper Helper; + const RISCVPreLegalizerCombinerImplRuleConfig &RuleConfig; + const RISCVSubtarget &STI; + +public: + RISCVPreLegalizerCombinerImpl( + MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC, + GISelKnownBits &KB, GISelCSEInfo *CSEInfo, + const RISCVPreLegalizerCombinerImplRuleConfig &RuleConfig, + const RISCVSubtarget &STI, MachineDominatorTree *MDT, + const LegalizerInfo *LI); + + static const char *getName() { return "RISCV00PreLegalizerCombiner"; } + + bool tryCombineAll(MachineInstr &I) const override; + +private: +#define GET_GICOMBINER_CLASS_MEMBERS +#include "RISCVGenPreLegalizeGICombiner.inc" +#undef GET_GICOMBINER_CLASS_MEMBERS +}; + +#define GET_GICOMBINER_IMPL +#include "RISCVGenPreLegalizeGICombiner.inc" +#undef GET_GICOMBINER_IMPL + +RISCVPreLegalizerCombinerImpl::RISCVPreLegalizerCombinerImpl( + MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC, + GISelKnownBits &KB, GISelCSEInfo *CSEInfo, + const RISCVPreLegalizerCombinerImplRuleConfig &RuleConfig, + const RISCVSubtarget &STI, MachineDominatorTree *MDT, + const LegalizerInfo *LI) + : Combiner(MF, CInfo, TPC, &KB, CSEInfo), + Helper(Observer, B, /*IsPreLegalize*/ true, &KB, MDT, LI), + RuleConfig(RuleConfig), STI(STI), +#define GET_GICOMBINER_CONSTRUCTOR_INITS +#include "RISCVGenPreLegalizeGICombiner.inc" +#undef GET_GICOMBINER_CONSTRUCTOR_INITS +{ +} + +// Pass boilerplate +// ================ + +class RISCVPreLegalizerCombiner : public MachineFunctionPass { +public: + static char ID; + + RISCVPreLegalizerCombiner(); + + StringRef getPassName() const override { return "RISCVPreLegalizerCombiner"; } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override; + +private: + RISCVPreLegalizerCombinerImplRuleConfig RuleConfig; +}; +} // end anonymous namespace + +void RISCVPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetPassConfig>(); + AU.setPreservesCFG(); + getSelectionDAGFallbackAnalysisUsage(AU); + AU.addRequired<GISelKnownBitsAnalysis>(); + AU.addPreserved<GISelKnownBitsAnalysis>(); + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<GISelCSEAnalysisWrapperPass>(); + AU.addPreserved<GISelCSEAnalysisWrapperPass>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +RISCVPreLegalizerCombiner::RISCVPreLegalizerCombiner() + : MachineFunctionPass(ID) { + initializeRISCVPreLegalizerCombinerPass(*PassRegistry::getPassRegistry()); + + if (!RuleConfig.parseCommandLineOption()) + report_fatal_error("Invalid rule identifier"); +} + +bool RISCVPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + auto &TPC = getAnalysis<TargetPassConfig>(); + + // Enable CSE. + GISelCSEAnalysisWrapper &Wrapper = + getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper(); + auto *CSEInfo = &Wrapper.get(TPC.getCSEConfig()); + + const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>(); + const auto *LI = ST.getLegalizerInfo(); + + const Function &F = MF.getFunction(); + bool EnableOpt = + MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F); + GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); + MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>(); + CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, + /*LegalizerInfo*/ nullptr, EnableOpt, F.hasOptSize(), + F.hasMinSize()); + RISCVPreLegalizerCombinerImpl Impl(MF, CInfo, &TPC, *KB, CSEInfo, RuleConfig, + ST, MDT, LI); + return Impl.combineMachineInstrs(); +} + +char RISCVPreLegalizerCombiner::ID = 0; +INITIALIZE_PASS_BEGIN(RISCVPreLegalizerCombiner, DEBUG_TYPE, + "Combine RISC-V machine instrs before legalization", false, + false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) +INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass) +INITIALIZE_PASS_END(RISCVPreLegalizerCombiner, DEBUG_TYPE, + "Combine RISC-V machine instrs before legalization", false, + false) + +namespace llvm { +FunctionPass *createRISCVPreLegalizerCombiner() { + return new RISCVPreLegalizerCombiner(); +} +} // end namespace llvm diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp index 9b601902ad20..cf0ff63a5e51 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp @@ -12,6 +12,7 @@ #include "RISCVRegisterBankInfo.h" #include "MCTargetDesc/RISCVMCTargetDesc.h" +#include "RISCVSubtarget.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterBank.h" #include "llvm/CodeGen/RegisterBankInfo.h" @@ -20,7 +21,448 @@ #define GET_TARGET_REGBANK_IMPL #include "RISCVGenRegisterBank.inc" +namespace llvm { +namespace RISCV { + +const RegisterBankInfo::PartialMapping PartMappings[] = { + {0, 32, GPRBRegBank}, + {0, 64, GPRBRegBank}, + {0, 32, FPRBRegBank}, + {0, 64, FPRBRegBank}, +}; + +enum PartialMappingIdx { + PMI_GPRB32 = 0, + PMI_GPRB64 = 1, + PMI_FPRB32 = 2, + PMI_FPRB64 = 3, +}; + +const RegisterBankInfo::ValueMapping ValueMappings[] = { + // Invalid value mapping. + {nullptr, 0}, + // Maximum 3 GPR operands; 32 bit. + {&PartMappings[PMI_GPRB32], 1}, + {&PartMappings[PMI_GPRB32], 1}, + {&PartMappings[PMI_GPRB32], 1}, + // Maximum 3 GPR operands; 64 bit. + {&PartMappings[PMI_GPRB64], 1}, + {&PartMappings[PMI_GPRB64], 1}, + {&PartMappings[PMI_GPRB64], 1}, + // Maximum 3 FPR operands; 32 bit. + {&PartMappings[PMI_FPRB32], 1}, + {&PartMappings[PMI_FPRB32], 1}, + {&PartMappings[PMI_FPRB32], 1}, + // Maximum 3 FPR operands; 64 bit. + {&PartMappings[PMI_FPRB64], 1}, + {&PartMappings[PMI_FPRB64], 1}, + {&PartMappings[PMI_FPRB64], 1}, +}; + +enum ValueMappingIdx { + InvalidIdx = 0, + GPRB32Idx = 1, + GPRB64Idx = 4, + FPRB32Idx = 7, + FPRB64Idx = 10, +}; +} // namespace RISCV +} // namespace llvm + using namespace llvm; RISCVRegisterBankInfo::RISCVRegisterBankInfo(unsigned HwMode) : RISCVGenRegisterBankInfo(HwMode) {} + +const RegisterBank & +RISCVRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC, + LLT Ty) const { + switch (RC.getID()) { + default: + llvm_unreachable("Register class not supported"); + case RISCV::GPRRegClassID: + case RISCV::GPRF16RegClassID: + case RISCV::GPRF32RegClassID: + case RISCV::GPRNoX0RegClassID: + case RISCV::GPRNoX0X2RegClassID: + case RISCV::GPRJALRRegClassID: + case RISCV::GPRTCRegClassID: + case RISCV::GPRC_and_GPRTCRegClassID: + case RISCV::GPRCRegClassID: + case RISCV::GPRC_and_SR07RegClassID: + case RISCV::SR07RegClassID: + case RISCV::SPRegClassID: + case RISCV::GPRX0RegClassID: + return getRegBank(RISCV::GPRBRegBankID); + case RISCV::FPR64RegClassID: + case RISCV::FPR16RegClassID: + case RISCV::FPR32RegClassID: + case RISCV::FPR64CRegClassID: + case RISCV::FPR32CRegClassID: + return getRegBank(RISCV::FPRBRegBankID); + case RISCV::VMRegClassID: + case RISCV::VRRegClassID: + case RISCV::VRNoV0RegClassID: + case RISCV::VRM2RegClassID: + case RISCV::VRM2NoV0RegClassID: + case RISCV::VRM4RegClassID: + case RISCV::VRM4NoV0RegClassID: + case RISCV::VMV0RegClassID: + case RISCV::VRM2_with_sub_vrm1_0_in_VMV0RegClassID: + case RISCV::VRM4_with_sub_vrm1_0_in_VMV0RegClassID: + case RISCV::VRM8RegClassID: + case RISCV::VRM8NoV0RegClassID: + case RISCV::VRM8_with_sub_vrm1_0_in_VMV0RegClassID: + return getRegBank(RISCV::VRBRegBankID); + } +} + +static const RegisterBankInfo::ValueMapping *getFPValueMapping(unsigned Size) { + assert(Size == 32 || Size == 64); + unsigned Idx = Size == 64 ? RISCV::FPRB64Idx : RISCV::FPRB32Idx; + return &RISCV::ValueMappings[Idx]; +} + +/// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode, +/// having only floating-point operands. +/// FIXME: this is copied from target AArch64. Needs some code refactor here to +/// put this function in GlobalISel/Utils.cpp. +static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) { + switch (Opc) { + case TargetOpcode::G_FADD: + case TargetOpcode::G_FSUB: + case TargetOpcode::G_FMUL: + case TargetOpcode::G_FMA: + case TargetOpcode::G_FDIV: + case TargetOpcode::G_FCONSTANT: + case TargetOpcode::G_FPEXT: + case TargetOpcode::G_FPTRUNC: + case TargetOpcode::G_FCEIL: + case TargetOpcode::G_FFLOOR: + case TargetOpcode::G_FNEARBYINT: + case TargetOpcode::G_FNEG: + case TargetOpcode::G_FCOPYSIGN: + case TargetOpcode::G_FCOS: + case TargetOpcode::G_FSIN: + case TargetOpcode::G_FLOG10: + case TargetOpcode::G_FLOG: + case TargetOpcode::G_FLOG2: + case TargetOpcode::G_FSQRT: + case TargetOpcode::G_FABS: + case TargetOpcode::G_FEXP: + case TargetOpcode::G_FRINT: + case TargetOpcode::G_INTRINSIC_TRUNC: + case TargetOpcode::G_INTRINSIC_ROUND: + case TargetOpcode::G_INTRINSIC_ROUNDEVEN: + case TargetOpcode::G_FMAXNUM: + case TargetOpcode::G_FMINNUM: + case TargetOpcode::G_FMAXIMUM: + case TargetOpcode::G_FMINIMUM: + return true; + } + return false; +} + +// TODO: Make this more like AArch64? +bool RISCVRegisterBankInfo::hasFPConstraints( + const MachineInstr &MI, const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const { + if (isPreISelGenericFloatingPointOpcode(MI.getOpcode())) + return true; + + // If we have a copy instruction, we could be feeding floating point + // instructions. + if (MI.getOpcode() != TargetOpcode::COPY) + return false; + + return getRegBank(MI.getOperand(0).getReg(), MRI, TRI) == &RISCV::FPRBRegBank; +} + +bool RISCVRegisterBankInfo::onlyUsesFP(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const { + switch (MI.getOpcode()) { + case TargetOpcode::G_FPTOSI: + case TargetOpcode::G_FPTOUI: + case TargetOpcode::G_FCMP: + return true; + default: + break; + } + + return hasFPConstraints(MI, MRI, TRI); +} + +bool RISCVRegisterBankInfo::onlyDefinesFP(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const { + switch (MI.getOpcode()) { + case TargetOpcode::G_SITOFP: + case TargetOpcode::G_UITOFP: + return true; + default: + break; + } + + return hasFPConstraints(MI, MRI, TRI); +} + +bool RISCVRegisterBankInfo::anyUseOnlyUseFP( + Register Def, const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const { + return any_of( + MRI.use_nodbg_instructions(Def), + [&](const MachineInstr &UseMI) { return onlyUsesFP(UseMI, MRI, TRI); }); +} + +const RegisterBankInfo::InstructionMapping & +RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { + const unsigned Opc = MI.getOpcode(); + + // Try the default logic for non-generic instructions that are either copies + // or already have some operands assigned to banks. + if (!isPreISelGenericOpcode(Opc) || Opc == TargetOpcode::G_PHI) { + const InstructionMapping &Mapping = getInstrMappingImpl(MI); + if (Mapping.isValid()) + return Mapping; + } + + const MachineFunction &MF = *MI.getParent()->getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetSubtargetInfo &STI = MF.getSubtarget(); + const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); + + unsigned GPRSize = getMaximumSize(RISCV::GPRBRegBankID); + assert((GPRSize == 32 || GPRSize == 64) && "Unexpected GPR size"); + + unsigned NumOperands = MI.getNumOperands(); + const ValueMapping *GPRValueMapping = + &RISCV::ValueMappings[GPRSize == 64 ? RISCV::GPRB64Idx + : RISCV::GPRB32Idx]; + + switch (Opc) { + case TargetOpcode::G_ADD: + case TargetOpcode::G_SUB: + case TargetOpcode::G_SHL: + case TargetOpcode::G_ASHR: + case TargetOpcode::G_LSHR: + case TargetOpcode::G_AND: + case TargetOpcode::G_OR: + case TargetOpcode::G_XOR: + case TargetOpcode::G_MUL: + case TargetOpcode::G_SDIV: + case TargetOpcode::G_SREM: + case TargetOpcode::G_SMULH: + case TargetOpcode::G_SMAX: + case TargetOpcode::G_SMIN: + case TargetOpcode::G_UDIV: + case TargetOpcode::G_UREM: + case TargetOpcode::G_UMULH: + case TargetOpcode::G_UMAX: + case TargetOpcode::G_UMIN: + case TargetOpcode::G_PTR_ADD: + case TargetOpcode::G_PTRTOINT: + case TargetOpcode::G_INTTOPTR: + case TargetOpcode::G_TRUNC: + case TargetOpcode::G_ANYEXT: + case TargetOpcode::G_SEXT: + case TargetOpcode::G_ZEXT: + case TargetOpcode::G_SEXTLOAD: + case TargetOpcode::G_ZEXTLOAD: + return getInstructionMapping(DefaultMappingID, /*Cost=*/1, GPRValueMapping, + NumOperands); + case TargetOpcode::G_FADD: + case TargetOpcode::G_FSUB: + case TargetOpcode::G_FMUL: + case TargetOpcode::G_FDIV: + case TargetOpcode::G_FABS: + case TargetOpcode::G_FNEG: + case TargetOpcode::G_FSQRT: + case TargetOpcode::G_FMAXNUM: + case TargetOpcode::G_FMINNUM: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + return getInstructionMapping(DefaultMappingID, /*Cost=*/1, + getFPValueMapping(Ty.getSizeInBits()), + NumOperands); + } + case TargetOpcode::G_IMPLICIT_DEF: { + Register Dst = MI.getOperand(0).getReg(); + auto Mapping = GPRValueMapping; + // FIXME: May need to do a better job determining when to use FPRB. + // For example, the look through COPY case: + // %0:_(s32) = G_IMPLICIT_DEF + // %1:_(s32) = COPY %0 + // $f10_d = COPY %1(s32) + if (anyUseOnlyUseFP(Dst, MRI, TRI)) + Mapping = getFPValueMapping(MRI.getType(Dst).getSizeInBits()); + return getInstructionMapping(DefaultMappingID, /*Cost=*/1, Mapping, + NumOperands); + } + } + + SmallVector<const ValueMapping *, 4> OpdsMapping(NumOperands); + + switch (Opc) { + case TargetOpcode::G_LOAD: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + OpdsMapping[0] = GPRValueMapping; + OpdsMapping[1] = GPRValueMapping; + // Use FPR64 for s64 loads on rv32. + if (GPRSize == 32 && Ty.getSizeInBits() == 64) { + assert(MF.getSubtarget<RISCVSubtarget>().hasStdExtD()); + OpdsMapping[0] = getFPValueMapping(Ty.getSizeInBits()); + break; + } + + // Check if that load feeds fp instructions. + // In that case, we want the default mapping to be on FPR + // instead of blind map every scalar to GPR. + if (anyUseOnlyUseFP(MI.getOperand(0).getReg(), MRI, TRI)) + // If we have at least one direct use in a FP instruction, + // assume this was a floating point load in the IR. If it was + // not, we would have had a bitcast before reaching that + // instruction. + OpdsMapping[0] = getFPValueMapping(Ty.getSizeInBits()); + + break; + } + case TargetOpcode::G_STORE: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + OpdsMapping[0] = GPRValueMapping; + OpdsMapping[1] = GPRValueMapping; + // Use FPR64 for s64 stores on rv32. + if (GPRSize == 32 && Ty.getSizeInBits() == 64) { + assert(MF.getSubtarget<RISCVSubtarget>().hasStdExtD()); + OpdsMapping[0] = getFPValueMapping(Ty.getSizeInBits()); + break; + } + + MachineInstr *DefMI = MRI.getVRegDef(MI.getOperand(0).getReg()); + if (onlyDefinesFP(*DefMI, MRI, TRI)) + OpdsMapping[0] = getFPValueMapping(Ty.getSizeInBits()); + break; + } + case TargetOpcode::G_SELECT: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + + // Try to minimize the number of copies. If we have more floating point + // constrained values than not, then we'll put everything on FPR. Otherwise, + // everything has to be on GPR. + unsigned NumFP = 0; + + // Use FPR64 for s64 select on rv32. + if (GPRSize == 32 && Ty.getSizeInBits() == 64) { + NumFP = 3; + } else { + // Check if the uses of the result always produce floating point values. + // + // For example: + // + // %z = G_SELECT %cond %x %y + // fpr = G_FOO %z ... + if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()), + [&](const MachineInstr &UseMI) { + return onlyUsesFP(UseMI, MRI, TRI); + })) + ++NumFP; + + // Check if the defs of the source values always produce floating point + // values. + // + // For example: + // + // %x = G_SOMETHING_ALWAYS_FLOAT %a ... + // %z = G_SELECT %cond %x %y + // + // Also check whether or not the sources have already been decided to be + // FPR. Keep track of this. + // + // This doesn't check the condition, since the condition is always an + // integer. + for (unsigned Idx = 2; Idx < 4; ++Idx) { + Register VReg = MI.getOperand(Idx).getReg(); + MachineInstr *DefMI = MRI.getVRegDef(VReg); + if (getRegBank(VReg, MRI, TRI) == &RISCV::FPRBRegBank || + onlyDefinesFP(*DefMI, MRI, TRI)) + ++NumFP; + } + } + + // Condition operand is always GPR. + OpdsMapping[1] = GPRValueMapping; + + const ValueMapping *Mapping = GPRValueMapping; + if (NumFP >= 2) + Mapping = getFPValueMapping(Ty.getSizeInBits()); + + OpdsMapping[0] = OpdsMapping[2] = OpdsMapping[3] = Mapping; + break; + } + case TargetOpcode::G_FPTOSI: + case TargetOpcode::G_FPTOUI: + case RISCV::G_FCLASS: { + LLT Ty = MRI.getType(MI.getOperand(1).getReg()); + OpdsMapping[0] = GPRValueMapping; + OpdsMapping[1] = getFPValueMapping(Ty.getSizeInBits()); + break; + } + case TargetOpcode::G_SITOFP: + case TargetOpcode::G_UITOFP: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + OpdsMapping[0] = getFPValueMapping(Ty.getSizeInBits()); + OpdsMapping[1] = GPRValueMapping; + break; + } + case TargetOpcode::G_FCMP: { + LLT Ty = MRI.getType(MI.getOperand(2).getReg()); + + unsigned Size = Ty.getSizeInBits(); + assert((Size == 32 || Size == 64) && "Unsupported size for G_FCMP"); + + OpdsMapping[0] = GPRValueMapping; + OpdsMapping[2] = OpdsMapping[3] = getFPValueMapping(Size); + break; + } + case TargetOpcode::G_MERGE_VALUES: { + // Use FPR64 for s64 merge on rv32. + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + if (GPRSize == 32 && Ty.getSizeInBits() == 64) { + assert(MF.getSubtarget<RISCVSubtarget>().hasStdExtD()); + OpdsMapping[0] = getFPValueMapping(Ty.getSizeInBits()); + OpdsMapping[1] = GPRValueMapping; + OpdsMapping[2] = GPRValueMapping; + } + break; + } + case TargetOpcode::G_UNMERGE_VALUES: { + // Use FPR64 for s64 unmerge on rv32. + LLT Ty = MRI.getType(MI.getOperand(2).getReg()); + if (GPRSize == 32 && Ty.getSizeInBits() == 64) { + assert(MF.getSubtarget<RISCVSubtarget>().hasStdExtD()); + OpdsMapping[0] = GPRValueMapping; + OpdsMapping[1] = GPRValueMapping; + OpdsMapping[2] = getFPValueMapping(Ty.getSizeInBits()); + } + break; + } + default: + // By default map all scalars to GPR. + for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { + auto &MO = MI.getOperand(Idx); + if (!MO.isReg() || !MO.getReg()) + continue; + LLT Ty = MRI.getType(MO.getReg()); + if (!Ty.isValid()) + continue; + + if (isPreISelGenericFloatingPointOpcode(Opc)) + OpdsMapping[Idx] = getFPValueMapping(Ty.getSizeInBits()); + else + OpdsMapping[Idx] = GPRValueMapping; + } + break; + } + + return getInstructionMapping(DefaultMappingID, /*Cost=*/1, + getOperandsMapping(OpdsMapping), NumOperands); +} diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.h index ee6d4db27880..abd0837395f6 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.h @@ -32,6 +32,29 @@ protected: class RISCVRegisterBankInfo final : public RISCVGenRegisterBankInfo { public: RISCVRegisterBankInfo(unsigned HwMode); + + const RegisterBank &getRegBankFromRegClass(const TargetRegisterClass &RC, + LLT Ty) const override; + + const InstructionMapping & + getInstrMapping(const MachineInstr &MI) const override; + +private: + /// \returns true if \p MI only uses and defines FPRs. + bool hasFPConstraints(const MachineInstr &MI, const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const; + + /// \returns true if \p MI only uses FPRs. + bool onlyUsesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const; + + /// \returns true if any use of \p Def only user FPRs. + bool anyUseOnlyUseFP(Register Def, const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const; + + /// \returns true if \p MI only defines FPRs. + bool onlyDefinesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const; }; } // end namespace llvm #endif diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBanks.td b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBanks.td index b49f8259e382..b1ef815fe373 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBanks.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBanks.td @@ -10,4 +10,11 @@ //===----------------------------------------------------------------------===// /// General Purpose Registers: X. -def GPRRegBank : RegisterBank<"GPRB", [GPR]>; +def GPRBRegBank : RegisterBank<"GPRB", [GPR]>; + +/// Floating Point Registers: F. +def FPRBRegBank : RegisterBank<"FPRB", [FPR64]>; + +/// Vector Registers : V. +def VRBRegBank : RegisterBank<"VRB", [VRM8]>; + diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp index 8f8684e30b3a..aba2511959af 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp @@ -14,7 +14,6 @@ #include "RISCVCustomBehaviour.h" #include "MCTargetDesc/RISCVMCTargetDesc.h" #include "RISCV.h" -#include "RISCVInstrInfo.h" #include "TargetInfo/RISCVTargetInfo.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/Debug.h" @@ -64,9 +63,9 @@ uint8_t RISCVLMULInstrument::getLMUL() const { .Case("M2", 0b001) .Case("M4", 0b010) .Case("M8", 0b011) - .Case("MF2", 0b101) + .Case("MF2", 0b111) .Case("MF4", 0b110) - .Case("MF8", 0b111); + .Case("MF8", 0b101); } const llvm::StringRef RISCVSEWInstrument::DESC_NAME = "RISCV-SEW"; @@ -186,13 +185,46 @@ RISCVInstrumentManager::createInstruments(const MCInst &Inst) { return SmallVector<UniqueInstrument>(); } +static std::pair<uint8_t, uint8_t> +getEEWAndEMULForUnitStrideLoadStore(unsigned Opcode, RISCVII::VLMUL LMUL, + uint8_t SEW) { + uint8_t EEW; + switch (Opcode) { + case RISCV::VLM_V: + case RISCV::VSM_V: + case RISCV::VLE8_V: + case RISCV::VSE8_V: + EEW = 8; + break; + case RISCV::VLE16_V: + case RISCV::VSE16_V: + EEW = 16; + break; + case RISCV::VLE32_V: + case RISCV::VSE32_V: + EEW = 32; + break; + case RISCV::VLE64_V: + case RISCV::VSE64_V: + EEW = 64; + break; + default: + llvm_unreachable("Opcode is not a vector unit stride load nor store"); + } + + auto EMUL = RISCVVType::getSameRatioLMUL(SEW, LMUL, EEW); + if (!EEW) + llvm_unreachable("Invalid SEW or LMUL for new ratio"); + return std::make_pair(EEW, *EMUL); +} + unsigned RISCVInstrumentManager::getSchedClassID( const MCInstrInfo &MCII, const MCInst &MCI, const llvm::SmallVector<Instrument *> &IVec) const { unsigned short Opcode = MCI.getOpcode(); unsigned SchedClassID = MCII.get(Opcode).getSchedClass(); - // Unpack all possible RISCV instruments from IVec. + // Unpack all possible RISC-V instruments from IVec. RISCVLMULInstrument *LI = nullptr; RISCVSEWInstrument *SI = nullptr; for (auto &I : IVec) { @@ -215,12 +247,23 @@ unsigned RISCVInstrumentManager::getSchedClassID( // or (Opcode, LMUL, SEW) if SEW instrument is active, and depends on LMUL // and SEW, or (Opcode, LMUL, 0) if does not depend on SEW. uint8_t SEW = SI ? SI->getSEW() : 0; - // Check if it depends on LMUL and SEW - const RISCVVInversePseudosTable::PseudoInfo *RVV = - RISCVVInversePseudosTable::getBaseInfo(Opcode, LMUL, SEW); - // Check if it depends only on LMUL - if (!RVV) - RVV = RISCVVInversePseudosTable::getBaseInfo(Opcode, LMUL, 0); + + const RISCVVInversePseudosTable::PseudoInfo *RVV = nullptr; + if (Opcode == RISCV::VLM_V || Opcode == RISCV::VSM_V || + Opcode == RISCV::VLE8_V || Opcode == RISCV::VSE8_V || + Opcode == RISCV::VLE16_V || Opcode == RISCV::VSE16_V || + Opcode == RISCV::VLE32_V || Opcode == RISCV::VSE32_V || + Opcode == RISCV::VLE64_V || Opcode == RISCV::VSE64_V) { + RISCVII::VLMUL VLMUL = static_cast<RISCVII::VLMUL>(LMUL); + auto [EEW, EMUL] = getEEWAndEMULForUnitStrideLoadStore(Opcode, VLMUL, SEW); + RVV = RISCVVInversePseudosTable::getBaseInfo(Opcode, EMUL, EEW); + } else { + // Check if it depends on LMUL and SEW + RVV = RISCVVInversePseudosTable::getBaseInfo(Opcode, LMUL, SEW); + // Check if it depends only on LMUL + if (!RVV) + RVV = RISCVVInversePseudosTable::getBaseInfo(Opcode, LMUL, 0); + } // Not a RVV instr if (!RVV) { diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp index 1b890fbe041a..716fb67c5824 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp @@ -1,4 +1,4 @@ -//===-- RISCVAsmBackend.cpp - RISCV Assembler Backend ---------------------===// +//===-- RISCVAsmBackend.cpp - RISC-V Assembler Backend --------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -19,6 +19,7 @@ #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/ErrorHandling.h" @@ -27,6 +28,15 @@ using namespace llvm; +static cl::opt<bool> RelaxBranches("riscv-asm-relax-branches", cl::init(true), + cl::Hidden); +// Temporary workaround for old linkers that do not support ULEB128 relocations, +// which are abused by DWARF v5 DW_LLE_offset_pair/DW_RLE_offset_pair +// implemented in Clang/LLVM. +static cl::opt<bool> ULEB128Reloc( + "riscv-uleb128-reloc", cl::init(true), cl::Hidden, + cl::desc("Emit R_RISCV_SET_ULEB128/E_RISCV_SUB_ULEB128 if appropriate")); + std::optional<MCFixupKind> RISCVAsmBackend::getFixupKind(StringRef Name) const { if (STI.getTargetTriple().isOSBinFormatELF()) { unsigned Type; @@ -76,24 +86,6 @@ RISCVAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { {"fixup_riscv_call_plt", 0, 64, MCFixupKindInfo::FKF_IsPCRel}, {"fixup_riscv_relax", 0, 0, 0}, {"fixup_riscv_align", 0, 0, 0}, - - {"fixup_riscv_set_8", 0, 8, 0}, - {"fixup_riscv_add_8", 0, 8, 0}, - {"fixup_riscv_sub_8", 0, 8, 0}, - - {"fixup_riscv_set_16", 0, 16, 0}, - {"fixup_riscv_add_16", 0, 16, 0}, - {"fixup_riscv_sub_16", 0, 16, 0}, - - {"fixup_riscv_set_32", 0, 32, 0}, - {"fixup_riscv_add_32", 0, 32, 0}, - {"fixup_riscv_sub_32", 0, 32, 0}, - - {"fixup_riscv_add_64", 0, 64, 0}, - {"fixup_riscv_sub_64", 0, 64, 0}, - - {"fixup_riscv_set_6b", 2, 6, 0}, - {"fixup_riscv_sub_6b", 2, 6, 0}, }; static_assert((std::size(Infos)) == RISCV::NumTargetFixupKinds, "Not all fixup kinds added to Infos array"); @@ -116,7 +108,8 @@ RISCVAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { // necessary for correctness as offsets may change during relaxation. bool RISCVAsmBackend::shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, - const MCValue &Target) { + const MCValue &Target, + const MCSubtargetInfo *STI) { if (Fixup.getKind() >= FirstLiteralRelocationKind) return true; switch (Fixup.getTargetKind()) { @@ -126,6 +119,7 @@ bool RISCVAsmBackend::shouldForceRelocation(const MCAssembler &Asm, case FK_Data_2: case FK_Data_4: case FK_Data_8: + case FK_Data_leb128: if (Target.isAbsolute()) return false; break; @@ -135,7 +129,7 @@ bool RISCVAsmBackend::shouldForceRelocation(const MCAssembler &Asm, return true; } - return STI.hasFeature(RISCV::FeatureRelax) || ForceRelocs; + return STI->hasFeature(RISCV::FeatureRelax) || ForceRelocs; } bool RISCVAsmBackend::fixupNeedsRelaxationAdvanced(const MCFixup &Fixup, @@ -144,15 +138,12 @@ bool RISCVAsmBackend::fixupNeedsRelaxationAdvanced(const MCFixup &Fixup, const MCRelaxableFragment *DF, const MCAsmLayout &Layout, const bool WasForced) const { + if (!RelaxBranches) + return false; + int64_t Offset = int64_t(Value); unsigned Kind = Fixup.getTargetKind(); - // We only do conditional branch relaxation when the symbol is resolved. - // For conditional branch, the immediate must be in the range - // [-4096, 4094]. - if (Kind == RISCV::fixup_riscv_branch) - return Resolved && !isInt<13>(Offset); - // Return true if the symbol is actually unresolved. // Resolved could be always false when shouldForceRelocation return true. // We use !WasForced to indicate that the symbol is unresolved and not forced @@ -171,6 +162,10 @@ bool RISCVAsmBackend::fixupNeedsRelaxationAdvanced(const MCFixup &Fixup, // For compressed jump instructions the immediate must be // in the range [-2048, 2046]. return Offset > 2046 || Offset < -2048; + case RISCV::fixup_riscv_branch: + // For conditional branch instructions the immediate must be + // in the range [-4096, 4095]. + return !isInt<13>(Offset); } } @@ -251,7 +246,7 @@ bool RISCVAsmBackend::relaxDwarfLineAddr(MCDwarfLineAddrFragment &DF, OS << uint8_t(dwarf::DW_LNS_fixed_advance_pc); Offset = OS.tell(); Fixup = RISCV::getRelocPairForSize(2); - support::endian::write<uint16_t>(OS, 0, support::little); + support::endian::write<uint16_t>(OS, 0, llvm::endianness::little); } const MCBinaryExpr &MBE = cast<MCBinaryExpr>(AddrDelta); @@ -301,27 +296,31 @@ bool RISCVAsmBackend::relaxDwarfCFA(MCDwarfCallFrameFragment &DF, auto AddFixups = [&Fixups, &AddrDelta](unsigned Offset, std::pair<unsigned, unsigned> Fixup) { const MCBinaryExpr &MBE = cast<MCBinaryExpr>(AddrDelta); - Fixups.push_back(MCFixup::create( - Offset, MBE.getLHS(), static_cast<MCFixupKind>(std::get<0>(Fixup)))); - Fixups.push_back(MCFixup::create( - Offset, MBE.getRHS(), static_cast<MCFixupKind>(std::get<1>(Fixup)))); + Fixups.push_back( + MCFixup::create(Offset, MBE.getLHS(), + static_cast<MCFixupKind>(FirstLiteralRelocationKind + + std::get<0>(Fixup)))); + Fixups.push_back( + MCFixup::create(Offset, MBE.getRHS(), + static_cast<MCFixupKind>(FirstLiteralRelocationKind + + std::get<1>(Fixup)))); }; if (isUIntN(6, Value)) { OS << uint8_t(dwarf::DW_CFA_advance_loc); - AddFixups(0, {RISCV::fixup_riscv_set_6b, RISCV::fixup_riscv_sub_6b}); + AddFixups(0, {ELF::R_RISCV_SET6, ELF::R_RISCV_SUB6}); } else if (isUInt<8>(Value)) { OS << uint8_t(dwarf::DW_CFA_advance_loc1); - support::endian::write<uint8_t>(OS, 0, support::little); - AddFixups(1, {RISCV::fixup_riscv_set_8, RISCV::fixup_riscv_sub_8}); + support::endian::write<uint8_t>(OS, 0, llvm::endianness::little); + AddFixups(1, {ELF::R_RISCV_SET8, ELF::R_RISCV_SUB8}); } else if (isUInt<16>(Value)) { OS << uint8_t(dwarf::DW_CFA_advance_loc2); - support::endian::write<uint16_t>(OS, 0, support::little); - AddFixups(1, {RISCV::fixup_riscv_set_16, RISCV::fixup_riscv_sub_16}); + support::endian::write<uint16_t>(OS, 0, llvm::endianness::little); + AddFixups(1, {ELF::R_RISCV_SET16, ELF::R_RISCV_SUB16}); } else if (isUInt<32>(Value)) { OS << uint8_t(dwarf::DW_CFA_advance_loc4); - support::endian::write<uint32_t>(OS, 0, support::little); - AddFixups(1, {RISCV::fixup_riscv_set_32, RISCV::fixup_riscv_sub_32}); + support::endian::write<uint32_t>(OS, 0, llvm::endianness::little); + AddFixups(1, {ELF::R_RISCV_SET32, ELF::R_RISCV_SUB32}); } else { llvm_unreachable("unsupported CFA encoding"); } @@ -330,6 +329,18 @@ bool RISCVAsmBackend::relaxDwarfCFA(MCDwarfCallFrameFragment &DF, return true; } +bool RISCVAsmBackend::relaxLEB128(MCLEBFragment &LF, MCAsmLayout &Layout, + int64_t &Value) const { + if (LF.isSigned()) + return false; + const MCExpr &Expr = LF.getValue(); + if (ULEB128Reloc) { + LF.getFixups().push_back( + MCFixup::create(0, &Expr, FK_Data_leb128, Expr.getLoc())); + } + return Expr.evaluateKnownAbsolute(Value, Layout); +} + // Given a compressed control flow instruction this function returns // the expanded instruction. unsigned RISCVAsmBackend::getRelaxedOpcode(unsigned Op) const { @@ -400,25 +411,12 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, case RISCV::fixup_riscv_tls_got_hi20: case RISCV::fixup_riscv_tls_gd_hi20: llvm_unreachable("Relocation should be unconditionally forced\n"); - case RISCV::fixup_riscv_set_8: - case RISCV::fixup_riscv_add_8: - case RISCV::fixup_riscv_sub_8: - case RISCV::fixup_riscv_set_16: - case RISCV::fixup_riscv_add_16: - case RISCV::fixup_riscv_sub_16: - case RISCV::fixup_riscv_set_32: - case RISCV::fixup_riscv_add_32: - case RISCV::fixup_riscv_sub_32: - case RISCV::fixup_riscv_add_64: - case RISCV::fixup_riscv_sub_64: case FK_Data_1: case FK_Data_2: case FK_Data_4: case FK_Data_8: - case FK_Data_6b: + case FK_Data_leb128: return Value; - case RISCV::fixup_riscv_set_6b: - return Value & 0x03; case RISCV::fixup_riscv_lo12_i: case RISCV::fixup_riscv_pcrel_lo12_i: case RISCV::fixup_riscv_tprel_lo12_i: @@ -483,6 +481,8 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, return UpperImm | ((LowerImm << 20) << 32); } case RISCV::fixup_riscv_rvc_jump: { + if (!isInt<12>(Value)) + Ctx.reportError(Fixup.getLoc(), "fixup value out of range"); // Need to produce offset[11|4|9:8|10|6|7|3:1|5] from the 11-bit Value. unsigned Bit11 = (Value >> 11) & 0x1; unsigned Bit4 = (Value >> 4) & 0x1; @@ -497,6 +497,8 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, return Value; } case RISCV::fixup_riscv_rvc_branch: { + if (!isInt<9>(Value)) + Ctx.reportError(Fixup.getLoc(), "fixup value out of range"); // Need to produce offset[8|4:3], [reg 3 bit], offset[7:6|2:1|5] unsigned Bit8 = (Value >> 8) & 0x1; unsigned Bit7_6 = (Value >> 6) & 0x3; @@ -513,8 +515,8 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, bool RISCVAsmBackend::evaluateTargetFixup( const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFixup &Fixup, - const MCFragment *DF, const MCValue &Target, uint64_t &Value, - bool &WasForced) { + const MCFragment *DF, const MCValue &Target, const MCSubtargetInfo *STI, + uint64_t &Value, bool &WasForced) { const MCFixup *AUIPCFixup; const MCFragment *AUIPCDF; MCValue AUIPCTarget; @@ -564,7 +566,7 @@ bool RISCVAsmBackend::evaluateTargetFixup( Value = Layout.getSymbolOffset(SA) + AUIPCTarget.getConstant(); Value -= Layout.getFragmentOffset(AUIPCDF) + AUIPCFixup->getOffset(); - if (shouldForceRelocation(Asm, *AUIPCFixup, AUIPCTarget)) { + if (shouldForceRelocation(Asm, *AUIPCFixup, AUIPCTarget, STI)) { WasForced = true; return false; } @@ -596,6 +598,10 @@ bool RISCVAsmBackend::handleAddSubRelocations(const MCAsmLayout &Layout, TA = ELF::R_RISCV_ADD64; TB = ELF::R_RISCV_SUB64; break; + case llvm::FK_Data_leb128: + TA = ELF::R_RISCV_SET_ULEB128; + TB = ELF::R_RISCV_SUB_ULEB128; + break; default: llvm_unreachable("unsupported fixup size"); } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h index 0ea1f32e8296..2ad6534ac8bc 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h @@ -31,8 +31,8 @@ class RISCVAsmBackend : public MCAsmBackend { public: RISCVAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit, const MCTargetOptions &Options) - : MCAsmBackend(support::little, RISCV::fixup_riscv_relax), STI(STI), - OSABI(OSABI), Is64Bit(Is64Bit), TargetOptions(Options) { + : MCAsmBackend(llvm::endianness::little, RISCV::fixup_riscv_relax), + STI(STI), OSABI(OSABI), Is64Bit(Is64Bit), TargetOptions(Options) { RISCVFeatures::validate(STI.getTargetTriple(), STI.getFeatureBits()); } ~RISCVAsmBackend() override = default; @@ -50,8 +50,8 @@ public: bool evaluateTargetFixup(const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFixup &Fixup, const MCFragment *DF, - const MCValue &Target, uint64_t &Value, - bool &WasForced) override; + const MCValue &Target, const MCSubtargetInfo *STI, + uint64_t &Value, bool &WasForced) override; bool handleAddSubRelocations(const MCAsmLayout &Layout, const MCFragment &F, const MCFixup &Fixup, const MCValue &Target, @@ -66,7 +66,8 @@ public: createObjectTargetWriter() const override; bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, - const MCValue &Target) override; + const MCValue &Target, + const MCSubtargetInfo *STI) override; bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, const MCRelaxableFragment *DF, @@ -99,6 +100,8 @@ public: bool &WasRelaxed) const override; bool relaxDwarfCFA(MCDwarfCallFrameFragment &DF, MCAsmLayout &Layout, bool &WasRelaxed) const override; + bool relaxLEB128(MCLEBFragment &LF, MCAsmLayout &Layout, + int64_t &Value) const override; bool writeNopData(raw_ostream &OS, uint64_t Count, const MCSubtargetInfo *STI) const override; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp index 0a42c6faee29..66a46a485f53 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp @@ -47,11 +47,11 @@ ABI computeTargetABI(const Triple &TT, const FeatureBitset &FeatureBits, errs() << "'" << ABIName << "' is not a recognized ABI for this target (ignoring target-abi)\n"; - } else if (ABIName.startswith("ilp32") && IsRV64) { + } else if (ABIName.starts_with("ilp32") && IsRV64) { errs() << "32-bit ABIs are not supported for 64-bit targets (ignoring " "target-abi)\n"; TargetABI = ABI_Unknown; - } else if (ABIName.startswith("lp64") && !IsRV64) { + } else if (ABIName.starts_with("lp64") && !IsRV64) { errs() << "64-bit ABIs are not supported for 32-bit targets (ignoring " "target-abi)\n"; TargetABI = ABI_Unknown; @@ -206,6 +206,17 @@ unsigned RISCVVType::getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul) { return (SEW * 8) / LMul; } +std::optional<RISCVII::VLMUL> +RISCVVType::getSameRatioLMUL(unsigned SEW, RISCVII::VLMUL VLMUL, unsigned EEW) { + unsigned Ratio = RISCVVType::getSEWLMULRatio(SEW, VLMUL); + unsigned EMULFixedPoint = (EEW * 8) / Ratio; + bool Fractional = EMULFixedPoint < 8; + unsigned EMUL = Fractional ? 8 / EMULFixedPoint : EMULFixedPoint / 8; + if (!isValidLMUL(EMUL, Fractional)) + return std::nullopt; + return RISCVVType::encodeLMUL(EMUL, Fractional); +} + // Include the auto-generated portion of the compress emitter. #define GEN_UNCOMPRESS_INSTR #define GEN_COMPRESS_INSTR @@ -242,7 +253,7 @@ int RISCVLoadFPImm::getLoadFPImm(APFloat FPImm) { "Unexpected semantics"); // Handle the minimum normalized value which is different for each type. - if (FPImm.isSmallestNormalized()) + if (FPImm.isSmallestNormalized() && !FPImm.isNegative()) return 1; // Convert to single precision to use its lookup table. @@ -273,7 +284,7 @@ int RISCVLoadFPImm::getLoadFPImm(APFloat FPImm) { if (Sign) { if (Entry == 16) return 0; - return false; + return -1; } return Entry; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index f86419319dd3..00b4751905f6 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -203,6 +203,35 @@ static inline unsigned getVecPolicyOpNum(const MCInstrDesc &Desc) { return Desc.getNumOperands() - 1; } +/// \returns the index to the rounding mode immediate value if any, otherwise +/// returns -1. +static inline int getFRMOpNum(const MCInstrDesc &Desc) { + const uint64_t TSFlags = Desc.TSFlags; + if (!hasRoundModeOp(TSFlags) || usesVXRM(TSFlags)) + return -1; + + // The operand order + // -------------------------------------- + // | n-1 (if any) | n-2 | n-3 | n-4 | + // | policy | sew | vl | frm | + // -------------------------------------- + return getVLOpNum(Desc) - 1; +} + +/// \returns the index to the rounding mode immediate value if any, otherwise +/// returns -1. +static inline int getVXRMOpNum(const MCInstrDesc &Desc) { + const uint64_t TSFlags = Desc.TSFlags; + if (!hasRoundModeOp(TSFlags) || !usesVXRM(TSFlags)) + return -1; + // The operand order + // -------------------------------------- + // | n-1 (if any) | n-2 | n-3 | n-4 | + // | policy | sew | vl | vxrm | + // -------------------------------------- + return getVLOpNum(Desc) - 1; +} + // Is the first def operand tied to the first use operand. This is true for // vector pseudo instructions that have a merge operand for tail/mask // undisturbed. It's also true for vector FMA instructions where one of the @@ -506,6 +535,8 @@ void printVType(unsigned VType, raw_ostream &OS); unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul); +std::optional<RISCVII::VLMUL> +getSameRatioLMUL(unsigned SEW, RISCVII::VLMUL VLMUL, unsigned EEW); } // namespace RISCVVType namespace RISCVRVC { diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp index db7dc1aed7fc..0799267eaf7c 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp @@ -27,7 +27,7 @@ public: // Return true if the given relocation must be with a symbol rather than // section plus offset. - bool needsRelocateWithSymbol(const MCSymbol &Sym, + bool needsRelocateWithSymbol(const MCValue &Val, const MCSymbol &Sym, unsigned Type) const override { // TODO: this is very conservative, update once RISC-V psABI requirements // are clarified. @@ -89,22 +89,6 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx, return ELF::R_RISCV_CALL_PLT; case RISCV::fixup_riscv_call_plt: return ELF::R_RISCV_CALL_PLT; - case RISCV::fixup_riscv_add_8: - return ELF::R_RISCV_ADD8; - case RISCV::fixup_riscv_sub_8: - return ELF::R_RISCV_SUB8; - case RISCV::fixup_riscv_add_16: - return ELF::R_RISCV_ADD16; - case RISCV::fixup_riscv_sub_16: - return ELF::R_RISCV_SUB16; - case RISCV::fixup_riscv_add_32: - return ELF::R_RISCV_ADD32; - case RISCV::fixup_riscv_sub_32: - return ELF::R_RISCV_SUB32; - case RISCV::fixup_riscv_add_64: - return ELF::R_RISCV_ADD64; - case RISCV::fixup_riscv_sub_64: - return ELF::R_RISCV_SUB64; } } @@ -143,32 +127,6 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx, return ELF::R_RISCV_RELAX; case RISCV::fixup_riscv_align: return ELF::R_RISCV_ALIGN; - case RISCV::fixup_riscv_set_6b: - return ELF::R_RISCV_SET6; - case RISCV::fixup_riscv_sub_6b: - return ELF::R_RISCV_SUB6; - case RISCV::fixup_riscv_add_8: - return ELF::R_RISCV_ADD8; - case RISCV::fixup_riscv_set_8: - return ELF::R_RISCV_SET8; - case RISCV::fixup_riscv_sub_8: - return ELF::R_RISCV_SUB8; - case RISCV::fixup_riscv_set_16: - return ELF::R_RISCV_SET16; - case RISCV::fixup_riscv_add_16: - return ELF::R_RISCV_ADD16; - case RISCV::fixup_riscv_sub_16: - return ELF::R_RISCV_SUB16; - case RISCV::fixup_riscv_set_32: - return ELF::R_RISCV_SET32; - case RISCV::fixup_riscv_add_32: - return ELF::R_RISCV_ADD32; - case RISCV::fixup_riscv_sub_32: - return ELF::R_RISCV_SUB32; - case RISCV::fixup_riscv_add_64: - return ELF::R_RISCV_ADD64; - case RISCV::fixup_riscv_sub_64: - return ELF::R_RISCV_SUB64; } } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp index e43cb8b40d83..9db5148208b3 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp @@ -125,6 +125,65 @@ void RISCVTargetELFStreamer::emitDirectiveVariantCC(MCSymbol &Symbol) { void RISCVELFStreamer::reset() { static_cast<RISCVTargetStreamer *>(getTargetStreamer())->reset(); MCELFStreamer::reset(); + MappingSymbolCounter = 0; + LastMappingSymbols.clear(); + LastEMS = EMS_None; +} + +void RISCVELFStreamer::emitDataMappingSymbol() { + if (LastEMS == EMS_Data) + return; + emitMappingSymbol("$d"); + LastEMS = EMS_Data; +} + +void RISCVELFStreamer::emitInstructionsMappingSymbol() { + if (LastEMS == EMS_Instructions) + return; + emitMappingSymbol("$x"); + LastEMS = EMS_Instructions; +} + +void RISCVELFStreamer::emitMappingSymbol(StringRef Name) { + auto *Symbol = cast<MCSymbolELF>(getContext().getOrCreateSymbol( + Name + "." + Twine(MappingSymbolCounter++))); + emitLabel(Symbol); + Symbol->setType(ELF::STT_NOTYPE); + Symbol->setBinding(ELF::STB_LOCAL); +} + +void RISCVELFStreamer::changeSection(MCSection *Section, + const MCExpr *Subsection) { + // We have to keep track of the mapping symbol state of any sections we + // use. Each one should start off as EMS_None, which is provided as the + // default constructor by DenseMap::lookup. + LastMappingSymbols[getPreviousSection().first] = LastEMS; + LastEMS = LastMappingSymbols.lookup(Section); + + MCELFStreamer::changeSection(Section, Subsection); +} + +void RISCVELFStreamer::emitInstruction(const MCInst &Inst, + const MCSubtargetInfo &STI) { + emitInstructionsMappingSymbol(); + MCELFStreamer::emitInstruction(Inst, STI); +} + +void RISCVELFStreamer::emitBytes(StringRef Data) { + emitDataMappingSymbol(); + MCELFStreamer::emitBytes(Data); +} + +void RISCVELFStreamer::emitFill(const MCExpr &NumBytes, uint64_t FillValue, + SMLoc Loc) { + emitDataMappingSymbol(); + MCELFStreamer::emitFill(NumBytes, FillValue, Loc); +} + +void RISCVELFStreamer::emitValueImpl(const MCExpr *Value, unsigned Size, + SMLoc Loc) { + emitDataMappingSymbol(); + MCELFStreamer::emitValueImpl(Value, Size, Loc); } namespace llvm { diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h index e68f70261146..a6f54bf67b5d 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h @@ -16,12 +16,27 @@ using namespace llvm; class RISCVELFStreamer : public MCELFStreamer { void reset() override; + void emitDataMappingSymbol(); + void emitInstructionsMappingSymbol(); + void emitMappingSymbol(StringRef Name); + + enum ElfMappingSymbol { EMS_None, EMS_Instructions, EMS_Data }; + + int64_t MappingSymbolCounter = 0; + DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols; + ElfMappingSymbol LastEMS = EMS_None; public: RISCVELFStreamer(MCContext &C, std::unique_ptr<MCAsmBackend> MAB, std::unique_ptr<MCObjectWriter> MOW, std::unique_ptr<MCCodeEmitter> MCE) : MCELFStreamer(C, std::move(MAB), std::move(MOW), std::move(MCE)) {} + + void changeSection(MCSection *Section, const MCExpr *Subsection) override; + void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override; + void emitBytes(StringRef Data) override; + void emitFill(const MCExpr &NumBytes, uint64_t FillValue, SMLoc Loc) override; + void emitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override; }; namespace llvm { diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h index 5727aab3cd4c..74bd9398a9ef 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h @@ -1,4 +1,4 @@ -//===-- RISCVFixupKinds.h - RISCV Specific Fixup Entries --------*- C++ -*-===// +//===-- RISCVFixupKinds.h - RISC-V Specific Fixup Entries -------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -9,6 +9,7 @@ #ifndef LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVFIXUPKINDS_H #define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVFIXUPKINDS_H +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCFixup.h" #include <utility> @@ -70,42 +71,6 @@ enum Fixups { // Used to generate an R_RISCV_ALIGN relocation, which indicates the linker // should fixup the alignment after linker relaxation. fixup_riscv_align, - // 8-bit fixup corresponding to R_RISCV_SET8 for local label assignment. - fixup_riscv_set_8, - // 8-bit fixup corresponding to R_RISCV_ADD8 for 8-bit symbolic difference - // paired relocations. - fixup_riscv_add_8, - // 8-bit fixup corresponding to R_RISCV_SUB8 for 8-bit symbolic difference - // paired relocations. - fixup_riscv_sub_8, - // 16-bit fixup corresponding to R_RISCV_SET16 for local label assignment. - fixup_riscv_set_16, - // 16-bit fixup corresponding to R_RISCV_ADD16 for 16-bit symbolic difference - // paired reloctions. - fixup_riscv_add_16, - // 16-bit fixup corresponding to R_RISCV_SUB16 for 16-bit symbolic difference - // paired reloctions. - fixup_riscv_sub_16, - // 32-bit fixup corresponding to R_RISCV_SET32 for local label assignment. - fixup_riscv_set_32, - // 32-bit fixup corresponding to R_RISCV_ADD32 for 32-bit symbolic difference - // paired relocations. - fixup_riscv_add_32, - // 32-bit fixup corresponding to R_RISCV_SUB32 for 32-bit symbolic difference - // paired relocations. - fixup_riscv_sub_32, - // 64-bit fixup corresponding to R_RISCV_ADD64 for 64-bit symbolic difference - // paired relocations. - fixup_riscv_add_64, - // 64-bit fixup corresponding to R_RISCV_SUB64 for 64-bit symbolic difference - // paired relocations. - fixup_riscv_sub_64, - // 6-bit fixup corresponding to R_RISCV_SET6 for local label assignment in - // DWARF CFA. - fixup_riscv_set_6b, - // 6-bit fixup corresponding to R_RISCV_SUB6 for local label assignment in - // DWARF CFA. - fixup_riscv_sub_6b, // Used as a sentinel, must be the last fixup_riscv_invalid, @@ -118,17 +83,21 @@ getRelocPairForSize(unsigned Size) { default: llvm_unreachable("unsupported fixup size"); case 1: - return std::make_pair(MCFixupKind(RISCV::fixup_riscv_add_8), - MCFixupKind(RISCV::fixup_riscv_sub_8)); + return std::make_pair( + MCFixupKind(FirstLiteralRelocationKind + ELF::R_RISCV_ADD8), + MCFixupKind(FirstLiteralRelocationKind + ELF::R_RISCV_SUB8)); case 2: - return std::make_pair(MCFixupKind(RISCV::fixup_riscv_add_16), - MCFixupKind(RISCV::fixup_riscv_sub_16)); + return std::make_pair( + MCFixupKind(FirstLiteralRelocationKind + ELF::R_RISCV_ADD16), + MCFixupKind(FirstLiteralRelocationKind + ELF::R_RISCV_SUB16)); case 4: - return std::make_pair(MCFixupKind(RISCV::fixup_riscv_add_32), - MCFixupKind(RISCV::fixup_riscv_sub_32)); + return std::make_pair( + MCFixupKind(FirstLiteralRelocationKind + ELF::R_RISCV_ADD32), + MCFixupKind(FirstLiteralRelocationKind + ELF::R_RISCV_SUB32)); case 8: - return std::make_pair(MCFixupKind(RISCV::fixup_riscv_add_64), - MCFixupKind(RISCV::fixup_riscv_sub_64)); + return std::make_pair( + MCFixupKind(FirstLiteralRelocationKind + ELF::R_RISCV_ADD64), + MCFixupKind(FirstLiteralRelocationKind + ELF::R_RISCV_SUB64)); } } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp index 8e98abd65aab..195dda0b8b14 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp @@ -16,6 +16,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" @@ -75,7 +76,7 @@ void RISCVInstPrinter::printInst(const MCInst *MI, uint64_t Address, } void RISCVInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) const { - O << getRegisterName(Reg); + markup(O, Markup::Register) << getRegisterName(Reg); } void RISCVInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, @@ -90,7 +91,7 @@ void RISCVInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } if (MO.isImm()) { - O << MO.getImm(); + markup(O, Markup::Immediate) << formatImm(MO.getImm()); return; } @@ -110,9 +111,9 @@ void RISCVInstPrinter::printBranchOperand(const MCInst *MI, uint64_t Address, uint64_t Target = Address + MO.getImm(); if (!STI.hasFeature(RISCV::Feature64Bit)) Target &= 0xffffffff; - O << formatHex(Target); + markup(O, Markup::Target) << formatHex(Target); } else { - O << MO.getImm(); + markup(O, Markup::Target) << formatImm(MO.getImm()); } } @@ -123,11 +124,11 @@ void RISCVInstPrinter::printCSRSystemRegister(const MCInst *MI, unsigned OpNo, auto SiFiveReg = RISCVSysReg::lookupSiFiveRegByEncoding(Imm); auto SysReg = RISCVSysReg::lookupSysRegByEncoding(Imm); if (SiFiveReg && SiFiveReg->haveVendorRequiredFeatures(STI.getFeatureBits())) - O << SiFiveReg->Name; + markup(O, Markup::Register) << SiFiveReg->Name; else if (SysReg && SysReg->haveRequiredFeatures(STI.getFeatureBits())) - O << SysReg->Name; + markup(O, Markup::Register) << SysReg->Name; else - O << Imm; + markup(O, Markup::Register) << formatImm(Imm); } void RISCVInstPrinter::printFenceArg(const MCInst *MI, unsigned OpNo, @@ -157,16 +158,29 @@ void RISCVInstPrinter::printFRMArg(const MCInst *MI, unsigned OpNo, O << ", " << RISCVFPRndMode::roundingModeToString(FRMArg); } +void RISCVInstPrinter::printFRMArgLegacy(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + auto FRMArg = + static_cast<RISCVFPRndMode::RoundingMode>(MI->getOperand(OpNo).getImm()); + // Never print rounding mode if it's the default 'rne'. This ensures the + // output can still be parsed by older tools that erroneously failed to + // accept a rounding mode. + if (FRMArg == RISCVFPRndMode::RoundingMode::RNE) + return; + O << ", " << RISCVFPRndMode::roundingModeToString(FRMArg); +} + void RISCVInstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Imm = MI->getOperand(OpNo).getImm(); if (Imm == 1) { - O << "min"; + markup(O, Markup::Immediate) << "min"; } else if (Imm == 30) { - O << "inf"; + markup(O, Markup::Immediate) << "inf"; } else if (Imm == 31) { - O << "nan"; + markup(O, Markup::Immediate) << "nan"; } else { float FPVal = RISCVLoadFPImm::getFPImm(Imm); // If the value is an integer, print a .0 fraction. Otherwise, use %g to @@ -174,9 +188,9 @@ void RISCVInstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNo, // if it is shorter than printing as a decimal. The smallest value requires // 12 digits of precision including the decimal. if (FPVal == (int)(FPVal)) - O << format("%.1f", FPVal); + markup(O, Markup::Immediate) << format("%.1f", FPVal); else - O << format("%.12g", FPVal); + markup(O, Markup::Immediate) << format("%.12g", FPVal); } } @@ -198,7 +212,7 @@ void RISCVInstPrinter::printVTypeI(const MCInst *MI, unsigned OpNo, // or non-zero in bits 8 and above. if (RISCVVType::getVLMUL(Imm) == RISCVII::VLMUL::LMUL_RESERVED || RISCVVType::getSEW(Imm) > 64 || (Imm >> 8) != 0) { - O << Imm; + O << formatImm(Imm); return; } // Print the text form. @@ -211,16 +225,30 @@ void RISCVInstPrinter::printRlist(const MCInst *MI, unsigned OpNo, O << "{"; switch (Imm) { case RISCVZC::RLISTENCODE::RA: - O << (ArchRegNames ? "x1" : "ra"); + markup(O, Markup::Register) << (ArchRegNames ? "x1" : "ra"); break; case RISCVZC::RLISTENCODE::RA_S0: - O << (ArchRegNames ? "x1, x8" : "ra, s0"); + markup(O, Markup::Register) << (ArchRegNames ? "x1" : "ra"); + O << ", "; + markup(O, Markup::Register) << (ArchRegNames ? "x8" : "s0"); break; case RISCVZC::RLISTENCODE::RA_S0_S1: - O << (ArchRegNames ? "x1, x8-x9" : "ra, s0-s1"); + markup(O, Markup::Register) << (ArchRegNames ? "x1" : "ra"); + O << ", "; + markup(O, Markup::Register) << (ArchRegNames ? "x8" : "s0"); + O << '-'; + markup(O, Markup::Register) << (ArchRegNames ? "x9" : "s1"); break; case RISCVZC::RLISTENCODE::RA_S0_S2: - O << (ArchRegNames ? "x1, x8-x9, x18" : "ra, s0-s2"); + markup(O, Markup::Register) << (ArchRegNames ? "x1" : "ra"); + O << ", "; + markup(O, Markup::Register) << (ArchRegNames ? "x8" : "s0"); + O << '-'; + markup(O, Markup::Register) << (ArchRegNames ? "x9" : "s2"); + if (ArchRegNames) { + O << ", "; + markup(O, Markup::Register) << "x18"; + } break; case RISCVZC::RLISTENCODE::RA_S0_S3: case RISCVZC::RLISTENCODE::RA_S0_S4: @@ -229,11 +257,21 @@ void RISCVInstPrinter::printRlist(const MCInst *MI, unsigned OpNo, case RISCVZC::RLISTENCODE::RA_S0_S7: case RISCVZC::RLISTENCODE::RA_S0_S8: case RISCVZC::RLISTENCODE::RA_S0_S9: - O << (ArchRegNames ? "x1, x8-x9, x18-" : "ra, s0-") - << getRegisterName(RISCV::X19 + (Imm - RISCVZC::RLISTENCODE::RA_S0_S3)); - break; case RISCVZC::RLISTENCODE::RA_S0_S11: - O << (ArchRegNames ? "x1, x8-x9, x18-x27" : "ra, s0-s11"); + markup(O, Markup::Register) << (ArchRegNames ? "x1" : "ra"); + O << ", "; + markup(O, Markup::Register) << (ArchRegNames ? "x8" : "s0"); + O << '-'; + if (ArchRegNames) { + markup(O, Markup::Register) << "x9"; + O << ", "; + markup(O, Markup::Register) << "x18"; + O << '-'; + } + markup(O, Markup::Register) << getRegisterName( + RISCV::X19 + (Imm == RISCVZC::RLISTENCODE::RA_S0_S11 + ? 8 + : Imm - RISCVZC::RLISTENCODE::RA_S0_S3)); break; default: llvm_unreachable("invalid register list"); @@ -241,6 +279,22 @@ void RISCVInstPrinter::printRlist(const MCInst *MI, unsigned OpNo, O << "}"; } +void RISCVInstPrinter::printRegReg(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNo); + + assert(MO.isReg() && "printRegReg can only print register operands"); + if (MO.getReg() == RISCV::NoRegister) + return; + printRegName(O, MO.getReg()); + + O << "("; + const MCOperand &MO1 = MI->getOperand(OpNo + 1); + assert(MO1.isReg() && "printRegReg can only print register operands"); + printRegName(O, MO1.getReg()); + O << ")"; +} + void RISCVInstPrinter::printSpimm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { int64_t Imm = MI->getOperand(OpNo).getImm(); @@ -256,6 +310,8 @@ void RISCVInstPrinter::printSpimm(const MCInst *MI, unsigned OpNo, if (Opcode == RISCV::CM_PUSH) Spimm = -Spimm; + // RAII guard for ANSI color escape sequences + WithMarkup ScopedMarkup = markup(O, Markup::Immediate); RISCVZC::printSpimm(Spimm, O); } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h index 20f12af13008..4512bd5f4c4b 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h @@ -40,6 +40,8 @@ public: const MCSubtargetInfo &STI, raw_ostream &O); void printFRMArg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printFRMArgLegacy(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); void printFPImmOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printZeroOffsetMemOp(const MCInst *MI, unsigned OpNo, @@ -52,7 +54,8 @@ public: raw_ostream &O); void printSpimm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); - + void printRegReg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); // Autogenerated by tblgen. std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override; void printInstruction(const MCInst *MI, uint64_t Address, diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp index b63a5cea823e..82fed50bce75 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp @@ -92,6 +92,10 @@ public: unsigned getRlistOpValue(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const; + + unsigned getRegReg(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; }; } // end anonymous namespace @@ -137,7 +141,7 @@ void RISCVMCCodeEmitter::expandFunctionCall(const MCInst &MI, // Emit AUIPC Ra, Func with R_RISCV_CALL relocation type. TmpInst = MCInstBuilder(RISCV::AUIPC).addReg(Ra).addExpr(CallExpr); Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); - support::endian::write(CB, Binary, support::little); + support::endian::write(CB, Binary, llvm::endianness::little); if (MI.getOpcode() == RISCV::PseudoTAIL || MI.getOpcode() == RISCV::PseudoJump) @@ -147,7 +151,7 @@ void RISCVMCCodeEmitter::expandFunctionCall(const MCInst &MI, // Emit JALR Ra, Ra, 0 TmpInst = MCInstBuilder(RISCV::JALR).addReg(Ra).addReg(Ra).addImm(0); Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); - support::endian::write(CB, Binary, support::little); + support::endian::write(CB, Binary, llvm::endianness::little); } // Expand PseudoAddTPRel to a simple ADD with the correct relocation. @@ -186,7 +190,7 @@ void RISCVMCCodeEmitter::expandAddTPRel(const MCInst &MI, .addOperand(SrcReg) .addOperand(TPReg); uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); - support::endian::write(CB, Binary, support::little); + support::endian::write(CB, Binary, llvm::endianness::little); } static unsigned getInvertedBranchOp(unsigned BrOp) { @@ -240,14 +244,14 @@ void RISCVMCCodeEmitter::expandLongCondBr(const MCInst &MI, Opcode == RISCV::PseudoLongBNE ? RISCV::C_BEQZ : RISCV::C_BNEZ; MCInst TmpInst = MCInstBuilder(InvOpc).addReg(SrcReg1).addImm(6); uint16_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); - support::endian::write<uint16_t>(CB, Binary, support::little); + support::endian::write<uint16_t>(CB, Binary, llvm::endianness::little); Offset = 2; } else { unsigned InvOpc = getInvertedBranchOp(Opcode); MCInst TmpInst = MCInstBuilder(InvOpc).addReg(SrcReg1).addReg(SrcReg2).addImm(8); uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); - support::endian::write(CB, Binary, support::little); + support::endian::write(CB, Binary, llvm::endianness::little); Offset = 4; } @@ -255,7 +259,7 @@ void RISCVMCCodeEmitter::expandLongCondBr(const MCInst &MI, MCInst TmpInst = MCInstBuilder(RISCV::JAL).addReg(RISCV::X0).addOperand(SrcSymbol); uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); - support::endian::write(CB, Binary, support::little); + support::endian::write(CB, Binary, llvm::endianness::little); Fixups.clear(); if (SrcSymbol.isExpr()) { @@ -306,12 +310,12 @@ void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI, llvm_unreachable("Unhandled encodeInstruction length!"); case 2: { uint16_t Bits = getBinaryCodeForInstr(MI, Fixups, STI); - support::endian::write<uint16_t>(CB, Bits, support::little); + support::endian::write<uint16_t>(CB, Bits, llvm::endianness::little); break; } case 4: { uint32_t Bits = getBinaryCodeForInstr(MI, Fixups, STI); - support::endian::write(CB, Bits, support::little); + support::endian::write(CB, Bits, llvm::endianness::little); break; } } @@ -442,8 +446,11 @@ unsigned RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo, RelaxCandidate = true; break; } - } else if (Kind == MCExpr::SymbolRef && - cast<MCSymbolRefExpr>(Expr)->getKind() == MCSymbolRefExpr::VK_None) { + } else if ((Kind == MCExpr::SymbolRef && + cast<MCSymbolRefExpr>(Expr)->getKind() == + MCSymbolRefExpr::VK_None) || + Kind == MCExpr::Binary) { + // FIXME: Sub kind binary exprs have chance of underflow. if (MIFrm == RISCVII::InstFormatJ) { FixupKind = RISCV::fixup_riscv_jal; } else if (MIFrm == RISCVII::InstFormatB) { @@ -503,4 +510,17 @@ unsigned RISCVMCCodeEmitter::getRlistOpValue(const MCInst &MI, unsigned OpNo, return Imm; } +unsigned RISCVMCCodeEmitter::getRegReg(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpNo); + const MCOperand &MO1 = MI.getOperand(OpNo + 1); + assert(MO.isReg() && MO1.isReg() && "Expected registers."); + + unsigned Op = Ctx.getRegisterInfo()->getEncodingValue(MO.getReg()); + unsigned Op1 = Ctx.getRegisterInfo()->getEncodingValue(MO1.getReg()); + + return Op | Op1 << 5; +} + #include "RISCVGenMCCodeEmitter.inc" diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp index 75af5c2de094..79e56a7a6d03 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp @@ -31,6 +31,7 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/ErrorHandling.h" +#include <bitset> #define GET_INSTRINFO_MC_DESC #define ENABLE_INSTR_PREDICATE_VERIFIER @@ -114,10 +115,79 @@ static MCTargetStreamer *createRISCVNullTargetStreamer(MCStreamer &S) { namespace { class RISCVMCInstrAnalysis : public MCInstrAnalysis { + int64_t GPRState[31] = {}; + std::bitset<31> GPRValidMask; + + static bool isGPR(unsigned Reg) { + return Reg >= RISCV::X0 && Reg <= RISCV::X31; + } + + static unsigned getRegIndex(unsigned Reg) { + assert(isGPR(Reg) && Reg != RISCV::X0 && "Invalid GPR reg"); + return Reg - RISCV::X1; + } + + void setGPRState(unsigned Reg, std::optional<int64_t> Value) { + if (Reg == RISCV::X0) + return; + + auto Index = getRegIndex(Reg); + + if (Value) { + GPRState[Index] = *Value; + GPRValidMask.set(Index); + } else { + GPRValidMask.reset(Index); + } + } + + std::optional<int64_t> getGPRState(unsigned Reg) const { + if (Reg == RISCV::X0) + return 0; + + auto Index = getRegIndex(Reg); + + if (GPRValidMask.test(Index)) + return GPRState[Index]; + return std::nullopt; + } + public: explicit RISCVMCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {} + void resetState() override { GPRValidMask.reset(); } + + void updateState(const MCInst &Inst, uint64_t Addr) override { + // Terminators mark the end of a basic block which means the sequentially + // next instruction will be the first of another basic block and the current + // state will typically not be valid anymore. For calls, we assume all + // registers may be clobbered by the callee (TODO: should we take the + // calling convention into account?). + if (isTerminator(Inst) || isCall(Inst)) { + resetState(); + return; + } + + switch (Inst.getOpcode()) { + default: { + // Clear the state of all defined registers for instructions that we don't + // explicitly support. + auto NumDefs = Info->get(Inst.getOpcode()).getNumDefs(); + for (unsigned I = 0; I < NumDefs; ++I) { + auto DefReg = Inst.getOperand(I).getReg(); + if (isGPR(DefReg)) + setGPRState(DefReg, std::nullopt); + } + break; + } + case RISCV::AUIPC: + setGPRState(Inst.getOperand(0).getReg(), + Addr + (Inst.getOperand(1).getImm() << 12)); + break; + } + } + bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, uint64_t &Target) const override { if (isConditionalBranch(Inst)) { @@ -140,6 +210,15 @@ public: return true; } + if (Inst.getOpcode() == RISCV::JALR) { + if (auto TargetRegState = getGPRState(Inst.getOperand(1).getReg())) { + Target = *TargetRegState + Inst.getOperand(2).getImm(); + return true; + } + + return false; + } + return false; } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp index f659779e9772..4358a5b878e6 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp @@ -45,13 +45,12 @@ static int getInstSeqCost(RISCVMatInt::InstSeq &Res, bool HasRVC) { } // Recursively generate a sequence for materializing an integer. -static void generateInstSeqImpl(int64_t Val, - const FeatureBitset &ActiveFeatures, +static void generateInstSeqImpl(int64_t Val, const MCSubtargetInfo &STI, RISCVMatInt::InstSeq &Res) { - bool IsRV64 = ActiveFeatures[RISCV::Feature64Bit]; + bool IsRV64 = STI.hasFeature(RISCV::Feature64Bit); // Use BSETI for a single bit that can't be expressed by a single LUI or ADDI. - if (ActiveFeatures[RISCV::FeatureStdExtZbs] && isPowerOf2_64(Val) && + if (STI.hasFeature(RISCV::FeatureStdExtZbs) && isPowerOf2_64(Val) && (!isInt<32>(Val) || Val == 0x800)) { Res.emplace_back(RISCV::BSETI, Log2_64(Val)); return; @@ -122,7 +121,7 @@ static void generateInstSeqImpl(int64_t Val, ShiftAmount -= 12; Val = (uint64_t)Val << 12; } else if (isUInt<32>((uint64_t)Val << 12) && - ActiveFeatures[RISCV::FeatureStdExtZba]) { + STI.hasFeature(RISCV::FeatureStdExtZba)) { // Reduce the shift amount and add zeros to the LSBs so it will match // LUI, then shift left with SLLI.UW to clear the upper 32 set bits. ShiftAmount -= 12; @@ -133,7 +132,7 @@ static void generateInstSeqImpl(int64_t Val, // Try to use SLLI_UW for Val when it is uint32 but not int32. if (isUInt<32>((uint64_t)Val) && !isInt<32>((uint64_t)Val) && - ActiveFeatures[RISCV::FeatureStdExtZba]) { + STI.hasFeature(RISCV::FeatureStdExtZba)) { // Use LUI+ADDI or LUI to compose, then clear the upper 32 bits with // SLLI_UW. Val = ((uint64_t)Val) | (0xffffffffull << 32); @@ -141,7 +140,7 @@ static void generateInstSeqImpl(int64_t Val, } } - generateInstSeqImpl(Val, ActiveFeatures, Res); + generateInstSeqImpl(Val, STI, Res); // Skip shift if we were able to use LUI directly. if (ShiftAmount) { @@ -171,10 +170,60 @@ static unsigned extractRotateInfo(int64_t Val) { return 0; } +static void generateInstSeqLeadingZeros(int64_t Val, const MCSubtargetInfo &STI, + RISCVMatInt::InstSeq &Res) { + assert(Val > 0 && "Expected postive val"); + + unsigned LeadingZeros = llvm::countl_zero((uint64_t)Val); + uint64_t ShiftedVal = (uint64_t)Val << LeadingZeros; + // Fill in the bits that will be shifted out with 1s. An example where this + // helps is trailing one masks with 32 or more ones. This will generate + // ADDI -1 and an SRLI. + ShiftedVal |= maskTrailingOnes<uint64_t>(LeadingZeros); + + RISCVMatInt::InstSeq TmpSeq; + generateInstSeqImpl(ShiftedVal, STI, TmpSeq); + + // Keep the new sequence if it is an improvement or the original is empty. + if ((TmpSeq.size() + 1) < Res.size() || + (Res.empty() && TmpSeq.size() < 8)) { + TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros); + Res = TmpSeq; + } + + // Some cases can benefit from filling the lower bits with zeros instead. + ShiftedVal &= maskTrailingZeros<uint64_t>(LeadingZeros); + TmpSeq.clear(); + generateInstSeqImpl(ShiftedVal, STI, TmpSeq); + + // Keep the new sequence if it is an improvement or the original is empty. + if ((TmpSeq.size() + 1) < Res.size() || + (Res.empty() && TmpSeq.size() < 8)) { + TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros); + Res = TmpSeq; + } + + // If we have exactly 32 leading zeros and Zba, we can try using zext.w at + // the end of the sequence. + if (LeadingZeros == 32 && STI.hasFeature(RISCV::FeatureStdExtZba)) { + // Try replacing upper bits with 1. + uint64_t LeadingOnesVal = Val | maskLeadingOnes<uint64_t>(LeadingZeros); + TmpSeq.clear(); + generateInstSeqImpl(LeadingOnesVal, STI, TmpSeq); + + // Keep the new sequence if it is an improvement. + if ((TmpSeq.size() + 1) < Res.size() || + (Res.empty() && TmpSeq.size() < 8)) { + TmpSeq.emplace_back(RISCV::ADD_UW, 0); + Res = TmpSeq; + } + } +} + namespace llvm::RISCVMatInt { -InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) { +InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI) { RISCVMatInt::InstSeq Res; - generateInstSeqImpl(Val, ActiveFeatures, Res); + generateInstSeqImpl(Val, STI, Res); // If the low 12 bits are non-zero, the first expansion may end with an ADDI // or ADDIW. If there are trailing zeros, try generating a sign extended @@ -187,9 +236,9 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) { // NOTE: We don't check for C extension to minimize differences in generated // code. bool IsShiftedCompressible = - isInt<6>(ShiftedVal) && !ActiveFeatures[RISCV::TuneLUIADDIFusion]; + isInt<6>(ShiftedVal) && !STI.hasFeature(RISCV::TuneLUIADDIFusion); RISCVMatInt::InstSeq TmpSeq; - generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq); + generateInstSeqImpl(ShiftedVal, STI, TmpSeq); // Keep the new sequence if it is an improvement. if ((TmpSeq.size() + 1) < Res.size() || IsShiftedCompressible) { @@ -203,65 +252,56 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) { if (Res.size() <= 2) return Res; - assert(ActiveFeatures[RISCV::Feature64Bit] && + assert(STI.hasFeature(RISCV::Feature64Bit) && "Expected RV32 to only need 2 instructions"); - // If the constant is positive we might be able to generate a shifted constant - // with no leading zeros and use a final SRLI to restore them. - if (Val > 0) { - assert(Res.size() > 2 && "Expected longer sequence"); - unsigned LeadingZeros = llvm::countl_zero((uint64_t)Val); - uint64_t ShiftedVal = (uint64_t)Val << LeadingZeros; - // Fill in the bits that will be shifted out with 1s. An example where this - // helps is trailing one masks with 32 or more ones. This will generate - // ADDI -1 and an SRLI. - ShiftedVal |= maskTrailingOnes<uint64_t>(LeadingZeros); - + // If the lower 13 bits are something like 0x17ff, try to add 1 to change the + // lower 13 bits to 0x1800. We can restore this with an ADDI of -1 at the end + // of the sequence. Call generateInstSeqImpl on the new constant which may + // subtract 0xfffffffffffff800 to create another ADDI. This will leave a + // constant with more than 12 trailing zeros for the next recursive step. + if ((Val & 0xfff) != 0 && (Val & 0x1800) == 0x1000) { + int64_t Imm12 = -(0x800 - (Val & 0xfff)); + int64_t AdjustedVal = Val - Imm12; RISCVMatInt::InstSeq TmpSeq; - generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq); + generateInstSeqImpl(AdjustedVal, STI, TmpSeq); // Keep the new sequence if it is an improvement. if ((TmpSeq.size() + 1) < Res.size()) { - TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros); + TmpSeq.emplace_back(RISCV::ADDI, Imm12); Res = TmpSeq; } + } - // Some cases can benefit from filling the lower bits with zeros instead. - ShiftedVal &= maskTrailingZeros<uint64_t>(LeadingZeros); - TmpSeq.clear(); - generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq); - - // Keep the new sequence if it is an improvement. - if ((TmpSeq.size() + 1) < Res.size()) { - TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros); - Res = TmpSeq; - } + // If the constant is positive we might be able to generate a shifted constant + // with no leading zeros and use a final SRLI to restore them. + if (Val > 0 && Res.size() > 2) { + generateInstSeqLeadingZeros(Val, STI, Res); + } - // If we have exactly 32 leading zeros and Zba, we can try using zext.w at - // the end of the sequence. - if (LeadingZeros == 32 && ActiveFeatures[RISCV::FeatureStdExtZba]) { - // Try replacing upper bits with 1. - uint64_t LeadingOnesVal = Val | maskLeadingOnes<uint64_t>(LeadingZeros); - TmpSeq.clear(); - generateInstSeqImpl(LeadingOnesVal, ActiveFeatures, TmpSeq); + // If the constant is negative, trying inverting and using our trailing zero + // optimizations. Use an xori to invert the final value. + if (Val < 0 && Res.size() > 3) { + uint64_t InvertedVal = ~(uint64_t)Val; + RISCVMatInt::InstSeq TmpSeq; + generateInstSeqLeadingZeros(InvertedVal, STI, TmpSeq); - // Keep the new sequence if it is an improvement. - if ((TmpSeq.size() + 1) < Res.size()) { - TmpSeq.emplace_back(RISCV::ADD_UW, 0); - Res = TmpSeq; - } + // Keep it if we found a sequence that is smaller after inverting. + if (!TmpSeq.empty() && (TmpSeq.size() + 1) < Res.size()) { + TmpSeq.emplace_back(RISCV::XORI, -1); + Res = TmpSeq; } } // If the Low and High halves are the same, use pack. The pack instruction // packs the XLEN/2-bit lower halves of rs1 and rs2 into rd, with rs1 in the // lower half and rs2 in the upper half. - if (Res.size() > 2 && ActiveFeatures[RISCV::FeatureStdExtZbkb]) { + if (Res.size() > 2 && STI.hasFeature(RISCV::FeatureStdExtZbkb)) { int64_t LoVal = SignExtend64<32>(Val); int64_t HiVal = SignExtend64<32>(Val >> 32); if (LoVal == HiVal) { RISCVMatInt::InstSeq TmpSeq; - generateInstSeqImpl(LoVal, ActiveFeatures, TmpSeq); + generateInstSeqImpl(LoVal, STI, TmpSeq); if ((TmpSeq.size() + 1) < Res.size()) { TmpSeq.emplace_back(RISCV::PACK, 0); Res = TmpSeq; @@ -270,7 +310,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) { } // Perform optimization with BCLRI/BSETI in the Zbs extension. - if (Res.size() > 2 && ActiveFeatures[RISCV::FeatureStdExtZbs]) { + if (Res.size() > 2 && STI.hasFeature(RISCV::FeatureStdExtZbs)) { // 1. For values in range 0xffffffff 7fffffff ~ 0xffffffff 00000000, // call generateInstSeqImpl with Val|0x80000000 (which is expected be // an int32), then emit (BCLRI r, 31). @@ -288,7 +328,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) { } if (isInt<32>(NewVal)) { RISCVMatInt::InstSeq TmpSeq; - generateInstSeqImpl(NewVal, ActiveFeatures, TmpSeq); + generateInstSeqImpl(NewVal, STI, TmpSeq); if ((TmpSeq.size() + 1) < Res.size()) { TmpSeq.emplace_back(Opc, 31); Res = TmpSeq; @@ -302,7 +342,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) { uint32_t Hi = Hi_32(Val); Opc = 0; RISCVMatInt::InstSeq TmpSeq; - generateInstSeqImpl(Lo, ActiveFeatures, TmpSeq); + generateInstSeqImpl(Lo, STI, TmpSeq); // Check if it is profitable to use BCLRI/BSETI. if (Lo > 0 && TmpSeq.size() + llvm::popcount(Hi) < Res.size()) { Opc = RISCV::BSETI; @@ -323,7 +363,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) { } // Perform optimization with SH*ADD in the Zba extension. - if (Res.size() > 2 && ActiveFeatures[RISCV::FeatureStdExtZba]) { + if (Res.size() > 2 && STI.hasFeature(RISCV::FeatureStdExtZba)) { int64_t Div = 0; unsigned Opc = 0; RISCVMatInt::InstSeq TmpSeq; @@ -340,7 +380,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) { } // Build the new instruction sequence. if (Div > 0) { - generateInstSeqImpl(Val / Div, ActiveFeatures, TmpSeq); + generateInstSeqImpl(Val / Div, STI, TmpSeq); if ((TmpSeq.size() + 1) < Res.size()) { TmpSeq.emplace_back(Opc, 0); Res = TmpSeq; @@ -367,7 +407,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) { assert(Lo12 != 0 && "unexpected instruction sequence for immediate materialisation"); assert(TmpSeq.empty() && "Expected empty TmpSeq"); - generateInstSeqImpl(Hi52 / Div, ActiveFeatures, TmpSeq); + generateInstSeqImpl(Hi52 / Div, STI, TmpSeq); if ((TmpSeq.size() + 2) < Res.size()) { TmpSeq.emplace_back(Opc, 0); TmpSeq.emplace_back(RISCV::ADDI, Lo12); @@ -379,14 +419,14 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) { // Perform optimization with rori in the Zbb and th.srri in the XTheadBb // extension. - if (Res.size() > 2 && (ActiveFeatures[RISCV::FeatureStdExtZbb] || - ActiveFeatures[RISCV::FeatureVendorXTHeadBb])) { + if (Res.size() > 2 && (STI.hasFeature(RISCV::FeatureStdExtZbb) || + STI.hasFeature(RISCV::FeatureVendorXTHeadBb))) { if (unsigned Rotate = extractRotateInfo(Val)) { RISCVMatInt::InstSeq TmpSeq; uint64_t NegImm12 = llvm::rotl<uint64_t>(Val, Rotate); assert(isInt<12>(NegImm12)); TmpSeq.emplace_back(RISCV::ADDI, NegImm12); - TmpSeq.emplace_back(ActiveFeatures[RISCV::FeatureStdExtZbb] + TmpSeq.emplace_back(STI.hasFeature(RISCV::FeatureStdExtZbb) ? RISCV::RORI : RISCV::TH_SRRI, Rotate); @@ -396,11 +436,44 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) { return Res; } -int getIntMatCost(const APInt &Val, unsigned Size, - const FeatureBitset &ActiveFeatures, bool CompressionCost) { - bool IsRV64 = ActiveFeatures[RISCV::Feature64Bit]; - bool HasRVC = CompressionCost && (ActiveFeatures[RISCV::FeatureStdExtC] || - ActiveFeatures[RISCV::FeatureStdExtZca]); +InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, + unsigned &ShiftAmt, unsigned &AddOpc) { + int64_t LoVal = SignExtend64<32>(Val); + if (LoVal == 0) + return RISCVMatInt::InstSeq(); + + // Subtract the LoVal to emulate the effect of the final ADD. + uint64_t Tmp = (uint64_t)Val - (uint64_t)LoVal; + assert(Tmp != 0); + + // Use trailing zero counts to figure how far we need to shift LoVal to line + // up with the remaining constant. + // TODO: This algorithm assumes all non-zero bits in the low 32 bits of the + // final constant come from LoVal. + unsigned TzLo = llvm::countr_zero((uint64_t)LoVal); + unsigned TzHi = llvm::countr_zero(Tmp); + assert(TzLo < 32 && TzHi >= 32); + ShiftAmt = TzHi - TzLo; + AddOpc = RISCV::ADD; + + if (Tmp == ((uint64_t)LoVal << ShiftAmt)) + return RISCVMatInt::generateInstSeq(LoVal, STI); + + // If we have Zba, we can use (ADD_UW X, (SLLI X, 32)). + if (STI.hasFeature(RISCV::FeatureStdExtZba) && Lo_32(Val) == Hi_32(Val)) { + ShiftAmt = 32; + AddOpc = RISCV::ADD_UW; + return RISCVMatInt::generateInstSeq(LoVal, STI); + } + + return RISCVMatInt::InstSeq(); +} + +int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, + bool CompressionCost) { + bool IsRV64 = STI.hasFeature(RISCV::Feature64Bit); + bool HasRVC = CompressionCost && (STI.hasFeature(RISCV::FeatureStdExtC) || + STI.hasFeature(RISCV::FeatureStdExtZca)); int PlatRegSize = IsRV64 ? 64 : 32; // Split the constant into platform register sized chunks, and calculate cost @@ -408,7 +481,7 @@ int getIntMatCost(const APInt &Val, unsigned Size, int Cost = 0; for (unsigned ShiftVal = 0; ShiftVal < Size; ShiftVal += PlatRegSize) { APInt Chunk = Val.ashr(ShiftVal).sextOrTrunc(PlatRegSize); - InstSeq MatSeq = generateInstSeq(Chunk.getSExtValue(), ActiveFeatures); + InstSeq MatSeq = generateInstSeq(Chunk.getSExtValue(), STI); Cost += getInstSeqCost(MatSeq, HasRVC); } return std::max(1, Cost); @@ -429,6 +502,7 @@ OpndKind Inst::getOpndKind() const { return RISCVMatInt::RegReg; case RISCV::ADDI: case RISCV::ADDIW: + case RISCV::XORI: case RISCV::SLLI: case RISCV::SRLI: case RISCV::SLLI_UW: diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h index ae7b8d402184..780f685463f3 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h @@ -10,7 +10,7 @@ #define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_MATINT_H #include "llvm/ADT/SmallVector.h" -#include "llvm/TargetParser/SubtargetFeature.h" +#include "llvm/MC/MCSubtargetInfo.h" #include <cstdint> namespace llvm { @@ -46,7 +46,15 @@ using InstSeq = SmallVector<Inst, 8>; // simple struct is produced rather than directly emitting the instructions in // order to allow this helper to be used from both the MC layer and during // instruction selection. -InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures); +InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI); + +// Helper to generate an instruction sequence that can materialize the given +// immediate value into a register using an additional temporary register. This +// handles cases where the constant can be generated by (ADD (SLLI X, C), X) or +// (ADD_UW (SLLI X, C) X). The sequence to generate X is returned. ShiftAmt is +// provides the SLLI and AddOpc indicates ADD or ADD_UW. +InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, + unsigned &ShiftAmt, unsigned &AddOpc); // Helper to estimate the number of instructions required to materialise the // given immediate value into a register. This estimate does not account for @@ -58,8 +66,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures); // If CompressionCost is true it will use a different cost calculation if RVC is // enabled. This should be used to compare two different sequences to determine // which is more compressible. -int getIntMatCost(const APInt &Val, unsigned Size, - const FeatureBitset &ActiveFeatures, +int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost = false); } // namespace RISCVMatInt } // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.h index 107ca51520b7..9eb18099894b 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.h @@ -18,13 +18,8 @@ #include "llvm/Target/TargetMachine.h" namespace llvm { -class AsmPrinter; class FunctionPass; class InstructionSelector; -class MCInst; -class MCOperand; -class MachineInstr; -class MachineOperand; class PassRegistry; class RISCVRegisterBankInfo; class RISCVSubtarget; @@ -33,8 +28,11 @@ class RISCVTargetMachine; FunctionPass *createRISCVCodeGenPreparePass(); void initializeRISCVCodeGenPreparePass(PassRegistry &); +FunctionPass *createRISCVDeadRegisterDefinitionsPass(); +void initializeRISCVDeadRegisterDefinitionsPass(PassRegistry &); + FunctionPass *createRISCVISelDag(RISCVTargetMachine &TM, - CodeGenOpt::Level OptLevel); + CodeGenOptLevel OptLevel); FunctionPass *createRISCVMakeCompressibleOptPass(); void initializeRISCVMakeCompressibleOptPass(PassRegistry &); @@ -42,6 +40,9 @@ void initializeRISCVMakeCompressibleOptPass(PassRegistry &); FunctionPass *createRISCVGatherScatterLoweringPass(); void initializeRISCVGatherScatterLoweringPass(PassRegistry &); +FunctionPass *createRISCVFoldMasksPass(); +void initializeRISCVFoldMasksPass(PassRegistry &); + FunctionPass *createRISCVOptWInstrsPass(); void initializeRISCVOptWInstrsPass(PassRegistry &); @@ -60,9 +61,14 @@ void initializeRISCVExpandAtomicPseudoPass(PassRegistry &); FunctionPass *createRISCVInsertVSETVLIPass(); void initializeRISCVInsertVSETVLIPass(PassRegistry &); +FunctionPass *createRISCVPostRAExpandPseudoPass(); +void initializeRISCVPostRAExpandPseudoPass(PassRegistry &); FunctionPass *createRISCVInsertReadWriteCSRPass(); void initializeRISCVInsertReadWriteCSRPass(PassRegistry &); +FunctionPass *createRISCVInsertWriteVXRMPass(); +void initializeRISCVInsertWriteVXRMPass(PassRegistry &); + FunctionPass *createRISCVRedundantCopyEliminationPass(); void initializeRISCVRedundantCopyEliminationPass(PassRegistry &); @@ -80,6 +86,15 @@ InstructionSelector *createRISCVInstructionSelector(const RISCVTargetMachine &, RISCVSubtarget &, RISCVRegisterBankInfo &); void initializeRISCVDAGToDAGISelPass(PassRegistry &); + +FunctionPass *createRISCVPostLegalizerCombiner(); +void initializeRISCVPostLegalizerCombinerPass(PassRegistry &); + +FunctionPass *createRISCVO0PreLegalizerCombiner(); +void initializeRISCVO0PreLegalizerCombinerPass(PassRegistry &); + +FunctionPass *createRISCVPreLegalizerCombiner(); +void initializeRISCVPreLegalizerCombinerPass(PassRegistry &); } // namespace llvm #endif diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp index d2520d932ddf..0fd514fa87cd 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp @@ -36,6 +36,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/RISCVISAInfo.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h" @@ -46,6 +47,10 @@ using namespace llvm; STATISTIC(RISCVNumInstrsCompressed, "Number of RISC-V Compressed instructions emitted"); +namespace llvm { +extern const SubtargetFeatureKV RISCVFeatureKV[RISCV::NumSubtargetFeatures]; +} // namespace llvm + namespace { class RISCVAsmPrinter : public AsmPrinter { const RISCVSubtarget *STI; @@ -57,6 +62,15 @@ public: StringRef getPassName() const override { return "RISC-V Assembly Printer"; } + void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM, + const MachineInstr &MI); + + void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, + const MachineInstr &MI); + + void LowerSTATEPOINT(MCStreamer &OutStreamer, StackMaps &SM, + const MachineInstr &MI); + bool runOnMachineFunction(MachineFunction &MF) override; void emitInstruction(const MachineInstr *MI) override; @@ -83,6 +97,7 @@ public: void emitEndOfAsmFile(Module &M) override; void emitFunctionEntryLabel() override; + bool emitDirectiveOptionArch(); private: void emitAttributes(); @@ -93,6 +108,78 @@ private: }; } +void RISCVAsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM, + const MachineInstr &MI) { + unsigned NOPBytes = STI->getFeatureBits()[RISCV::FeatureStdExtC] ? 2 : 4; + unsigned NumNOPBytes = StackMapOpers(&MI).getNumPatchBytes(); + + auto &Ctx = OutStreamer.getContext(); + MCSymbol *MILabel = Ctx.createTempSymbol(); + OutStreamer.emitLabel(MILabel); + + SM.recordStackMap(*MILabel, MI); + assert(NumNOPBytes % NOPBytes == 0 && + "Invalid number of NOP bytes requested!"); + + // Scan ahead to trim the shadow. + const MachineBasicBlock &MBB = *MI.getParent(); + MachineBasicBlock::const_iterator MII(MI); + ++MII; + while (NumNOPBytes > 0) { + if (MII == MBB.end() || MII->isCall() || + MII->getOpcode() == RISCV::DBG_VALUE || + MII->getOpcode() == TargetOpcode::PATCHPOINT || + MII->getOpcode() == TargetOpcode::STACKMAP) + break; + ++MII; + NumNOPBytes -= 4; + } + + // Emit nops. + emitNops(NumNOPBytes / NOPBytes); +} + +// Lower a patchpoint of the form: +// [<def>], <id>, <numBytes>, <target>, <numArgs> +void RISCVAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, + const MachineInstr &MI) { + unsigned NOPBytes = STI->getFeatureBits()[RISCV::FeatureStdExtC] ? 2 : 4; + + auto &Ctx = OutStreamer.getContext(); + MCSymbol *MILabel = Ctx.createTempSymbol(); + OutStreamer.emitLabel(MILabel); + SM.recordPatchPoint(*MILabel, MI); + + PatchPointOpers Opers(&MI); + + unsigned EncodedBytes = 0; + + // Emit padding. + unsigned NumBytes = Opers.getNumPatchBytes(); + assert(NumBytes >= EncodedBytes && + "Patchpoint can't request size less than the length of a call."); + assert((NumBytes - EncodedBytes) % NOPBytes == 0 && + "Invalid number of NOP bytes requested!"); + emitNops((NumBytes - EncodedBytes) / NOPBytes); +} + +void RISCVAsmPrinter::LowerSTATEPOINT(MCStreamer &OutStreamer, StackMaps &SM, + const MachineInstr &MI) { + unsigned NOPBytes = STI->getFeatureBits()[RISCV::FeatureStdExtC] ? 2 : 4; + + StatepointOpers SOpers(&MI); + if (unsigned PatchBytes = SOpers.getNumPatchBytes()) { + assert(PatchBytes % NOPBytes == 0 && + "Invalid number of NOP bytes requested!"); + emitNops(PatchBytes / NOPBytes); + } + + auto &Ctx = OutStreamer.getContext(); + MCSymbol *MILabel = Ctx.createTempSymbol(); + OutStreamer.emitLabel(MILabel); + SM.recordStatepoint(*MILabel, MI); +} + void RISCVAsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) { MCInst CInst; bool Res = RISCVRVC::compress(CInst, Inst, *STI); @@ -160,6 +247,12 @@ void RISCVAsmPrinter::emitInstruction(const MachineInstr *MI) { case RISCV::PseudoRVVInitUndefM4: case RISCV::PseudoRVVInitUndefM8: return; + case TargetOpcode::STACKMAP: + return LowerSTACKMAP(*OutStreamer, SM, *MI); + case TargetOpcode::PATCHPOINT: + return LowerPATCHPOINT(*OutStreamer, SM, *MI); + case TargetOpcode::STATEPOINT: + return LowerSTATEPOINT(*OutStreamer, SM, *MI); } MCInst OutInst; @@ -230,7 +323,7 @@ bool RISCVAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, // RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand). if (!AddrReg.isReg()) return true; - if (!Offset.isImm() && !Offset.isGlobal()) + if (!Offset.isImm() && !Offset.isGlobal() && !Offset.isBlockAddress()) return true; MCOperand MCO; @@ -239,17 +332,49 @@ bool RISCVAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, if (Offset.isImm()) OS << MCO.getImm(); - else if (Offset.isGlobal()) + else if (Offset.isGlobal() || Offset.isBlockAddress()) OS << *MCO.getExpr(); OS << "(" << RISCVInstPrinter::getRegisterName(AddrReg.getReg()) << ")"; return false; } +bool RISCVAsmPrinter::emitDirectiveOptionArch() { + RISCVTargetStreamer &RTS = + static_cast<RISCVTargetStreamer &>(*OutStreamer->getTargetStreamer()); + SmallVector<RISCVOptionArchArg> NeedEmitStdOptionArgs; + const MCSubtargetInfo &MCSTI = *TM.getMCSubtargetInfo(); + for (const auto &Feature : RISCVFeatureKV) { + if (STI->hasFeature(Feature.Value) == MCSTI.hasFeature(Feature.Value)) + continue; + + if (!llvm::RISCVISAInfo::isSupportedExtensionFeature(Feature.Key)) + continue; + + auto Delta = STI->hasFeature(Feature.Value) ? RISCVOptionArchArgType::Plus + : RISCVOptionArchArgType::Minus; + NeedEmitStdOptionArgs.emplace_back(Delta, Feature.Key); + } + if (!NeedEmitStdOptionArgs.empty()) { + RTS.emitDirectiveOptionPush(); + RTS.emitDirectiveOptionArch(NeedEmitStdOptionArgs); + return true; + } + + return false; +} + bool RISCVAsmPrinter::runOnMachineFunction(MachineFunction &MF) { STI = &MF.getSubtarget<RISCVSubtarget>(); + RISCVTargetStreamer &RTS = + static_cast<RISCVTargetStreamer &>(*OutStreamer->getTargetStreamer()); + + bool EmittedOptionArch = emitDirectiveOptionArch(); SetupMachineFunction(MF); emitFunctionBody(); + + if (EmittedOptionArch) + RTS.emitDirectiveOptionPop(); return false; } @@ -738,13 +863,14 @@ static bool lowerRISCVVMachineInstrToMCInst(const MachineInstr *MI, uint64_t TSFlags = MCID.TSFlags; unsigned NumOps = MI->getNumExplicitOperands(); - // Skip policy, VL and SEW operands which are the last operands if present. + // Skip policy, SEW, VL, VXRM/FRM operands which are the last operands if + // present. if (RISCVII::hasVecPolicyOp(TSFlags)) --NumOps; - if (RISCVII::hasVLOp(TSFlags)) - --NumOps; if (RISCVII::hasSEWOp(TSFlags)) --NumOps; + if (RISCVII::hasVLOp(TSFlags)) + --NumOps; if (RISCVII::hasRoundModeOp(TSFlags)) --NumOps; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp index 2fcd9a40588a..f9d8401bab7b 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp @@ -28,8 +28,6 @@ using namespace llvm; #define DEBUG_TYPE "riscv-codegenprepare" #define PASS_NAME "RISC-V CodeGenPrepare" -STATISTIC(NumZExtToSExt, "Number of SExt instructions converted to ZExt"); - namespace { class RISCVCodeGenPrepare : public FunctionPass, @@ -52,59 +50,14 @@ public: } bool visitInstruction(Instruction &I) { return false; } - bool visitZExtInst(ZExtInst &I); bool visitAnd(BinaryOperator &BO); }; } // end anonymous namespace -bool RISCVCodeGenPrepare::visitZExtInst(ZExtInst &ZExt) { - if (!ST->is64Bit()) - return false; - - Value *Src = ZExt.getOperand(0); - - // We only care about ZExt from i32 to i64. - if (!ZExt.getType()->isIntegerTy(64) || !Src->getType()->isIntegerTy(32)) - return false; - - // Look for an opportunity to replace (i64 (zext (i32 X))) with a sext if we - // can determine that the sign bit of X is zero via a dominating condition. - // This often occurs with widened induction variables. - if (isImpliedByDomCondition(ICmpInst::ICMP_SGE, Src, - Constant::getNullValue(Src->getType()), &ZExt, - *DL).value_or(false)) { - auto *SExt = new SExtInst(Src, ZExt.getType(), "", &ZExt); - SExt->takeName(&ZExt); - SExt->setDebugLoc(ZExt.getDebugLoc()); - - ZExt.replaceAllUsesWith(SExt); - ZExt.eraseFromParent(); - ++NumZExtToSExt; - return true; - } - - // Convert (zext (abs(i32 X, i1 1))) -> (sext (abs(i32 X, i1 1))). If abs of - // INT_MIN is poison, the sign bit is zero. - using namespace PatternMatch; - if (match(Src, m_Intrinsic<Intrinsic::abs>(m_Value(), m_One()))) { - auto *SExt = new SExtInst(Src, ZExt.getType(), "", &ZExt); - SExt->takeName(&ZExt); - SExt->setDebugLoc(ZExt.getDebugLoc()); - - ZExt.replaceAllUsesWith(SExt); - ZExt.eraseFromParent(); - ++NumZExtToSExt; - return true; - } - - return false; -} - // Try to optimize (i64 (and (zext/sext (i32 X), C1))) if C1 has bit 31 set, -// but bits 63:32 are zero. If we can prove that bit 31 of X is 0, we can fill -// the upper 32 bits with ones. A separate transform will turn (zext X) into -// (sext X) for the same condition. +// but bits 63:32 are zero. If we know that bit 31 of X is 0, we can fill +// the upper 32 bits with ones. bool RISCVCodeGenPrepare::visitAnd(BinaryOperator &BO) { if (!ST->is64Bit()) return false; @@ -112,9 +65,17 @@ bool RISCVCodeGenPrepare::visitAnd(BinaryOperator &BO) { if (!BO.getType()->isIntegerTy(64)) return false; - // Left hand side should be sext or zext. + auto canBeSignExtend = [](Instruction *I) { + if (isa<SExtInst>(I)) + return true; + if (isa<ZExtInst>(I)) + return I->hasNonNeg(); + return false; + }; + + // Left hand side should be a sext or zext nneg. Instruction *LHS = dyn_cast<Instruction>(BO.getOperand(0)); - if (!LHS || (!isa<SExtInst>(LHS) && !isa<ZExtInst>(LHS))) + if (!LHS || !canBeSignExtend(LHS)) return false; Value *LHSSrc = LHS->getOperand(0); @@ -135,13 +96,6 @@ bool RISCVCodeGenPrepare::visitAnd(BinaryOperator &BO) { if (!isUInt<32>(C) || isInt<12>(C) || !isInt<12>(SignExtend64<32>(C))) return false; - // If we can determine the sign bit of the input is 0, we can replace the - // And mask constant. - if (!isImpliedByDomCondition(ICmpInst::ICMP_SGE, LHSSrc, - Constant::getNullValue(LHSSrc->getType()), - LHS, *DL).value_or(false)) - return false; - // Sign extend the constant and replace the And operand. C = SignExtend64<32>(C); BO.setOperand(1, ConstantInt::get(LHS->getType(), C)); diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCombine.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCombine.td new file mode 100644 index 000000000000..3a5afb1b075c --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCombine.td @@ -0,0 +1,27 @@ +//=- RISCVCombine.td - Define RISC-V Combine Rules -----------*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +include "llvm/Target/GlobalISel/Combine.td" + +def RISCVPreLegalizerCombiner: GICombiner< + "RISCVPreLegalizerCombinerImpl", [all_combines]> { +} + +def RISCVO0PreLegalizerCombiner: GICombiner< + "RISCVO0PreLegalizerCombinerImpl", [optnone_combines]> { +} + +// Post-legalization combines which are primarily optimizations. +// TODO: Add more combines. +def RISCVPostLegalizerCombiner + : GICombiner<"RISCVPostLegalizerCombinerImpl", + [redundant_and, identity_combines]> { +} diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp new file mode 100644 index 000000000000..df607236f7d5 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp @@ -0,0 +1,103 @@ +//===- RISCVDeadRegisterDefinitions.cpp - Replace dead defs w/ zero reg --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// +// +// This pass rewrites Rd to x0 for instrs whose return values are unused. +// +//===---------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVInstrInfo.h" +#include "RISCVSubtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; +#define DEBUG_TYPE "riscv-dead-defs" +#define RISCV_DEAD_REG_DEF_NAME "RISC-V Dead register definitions" + +STATISTIC(NumDeadDefsReplaced, "Number of dead definitions replaced"); + +namespace { +class RISCVDeadRegisterDefinitions : public MachineFunctionPass { +public: + static char ID; + + RISCVDeadRegisterDefinitions() : MachineFunctionPass(ID) {} + bool runOnMachineFunction(MachineFunction &MF) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { return RISCV_DEAD_REG_DEF_NAME; } +}; +} // end anonymous namespace + +char RISCVDeadRegisterDefinitions::ID = 0; +INITIALIZE_PASS(RISCVDeadRegisterDefinitions, DEBUG_TYPE, + RISCV_DEAD_REG_DEF_NAME, false, false) + +FunctionPass *llvm::createRISCVDeadRegisterDefinitionsPass() { + return new RISCVDeadRegisterDefinitions(); +} + +bool RISCVDeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) + return false; + + const MachineRegisterInfo *MRI = &MF.getRegInfo(); + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + LLVM_DEBUG(dbgs() << "***** RISCVDeadRegisterDefinitions *****\n"); + + bool MadeChange = false; + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + // We only handle non-computational instructions since some NOP encodings + // are reserved for HINT instructions. + const MCInstrDesc &Desc = MI.getDesc(); + if (!Desc.mayLoad() && !Desc.mayStore() && + !Desc.hasUnmodeledSideEffects()) + continue; + // For PseudoVSETVLIX0, Rd = X0 has special meaning. + if (MI.getOpcode() == RISCV::PseudoVSETVLIX0) + continue; + for (int I = 0, E = Desc.getNumDefs(); I != E; ++I) { + MachineOperand &MO = MI.getOperand(I); + if (!MO.isReg() || !MO.isDef() || MO.isEarlyClobber()) + continue; + // Be careful not to change the register if it's a tied operand. + if (MI.isRegTiedToUseOperand(I)) { + LLVM_DEBUG(dbgs() << " Ignoring, def is tied operand.\n"); + continue; + } + // We should not have any relevant physreg defs that are replacable by + // zero before register allocation. So we just check for dead vreg defs. + Register Reg = MO.getReg(); + if (!Reg.isVirtual() || (!MO.isDead() && !MRI->use_nodbg_empty(Reg))) + continue; + LLVM_DEBUG(dbgs() << " Dead def operand #" << I << " in:\n "; + MI.print(dbgs())); + const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI, MF); + if (!(RC && RC->contains(RISCV::X0))) { + LLVM_DEBUG(dbgs() << " Ignoring, register is not a GPR.\n"); + continue; + } + MO.setReg(RISCV::X0); + MO.setIsDead(); + LLVM_DEBUG(dbgs() << " Replacing with zero register. New:\n "; + MI.print(dbgs())); + ++NumDeadDefsReplaced; + MadeChange = true; + } + } + } + + return MadeChange; +} diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp index d10bba26023f..bb772fc5da92 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp @@ -30,6 +30,7 @@ namespace { class RISCVExpandAtomicPseudo : public MachineFunctionPass { public: + const RISCVSubtarget *STI; const RISCVInstrInfo *TII; static char ID; @@ -72,7 +73,8 @@ private: char RISCVExpandAtomicPseudo::ID = 0; bool RISCVExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) { - TII = MF.getSubtarget<RISCVSubtarget>().getInstrInfo(); + STI = &MF.getSubtarget<RISCVSubtarget>(); + TII = STI->getInstrInfo(); #ifndef NDEBUG const unsigned OldSize = getInstSizeInBytes(MF); @@ -105,8 +107,8 @@ bool RISCVExpandAtomicPseudo::expandMBB(MachineBasicBlock &MBB) { bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI) { - // RISCVInstrInfo::getInstSizeInBytes expects that the total size of the - // expanded instructions for each pseudo is correct in the Size field of the + // RISCVInstrInfo::getInstSizeInBytes expects that the total size of the + // expanded instructions for each pseudo is correct in the Size field of the // tablegen definition for the pseudo. switch (MBBI->getOpcode()) { case RISCV::PseudoAtomicLoadNand32: @@ -148,24 +150,30 @@ bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB, return false; } -static unsigned getLRForRMW32(AtomicOrdering Ordering) { +static unsigned getLRForRMW32(AtomicOrdering Ordering, + const RISCVSubtarget *Subtarget) { switch (Ordering) { default: llvm_unreachable("Unexpected AtomicOrdering"); case AtomicOrdering::Monotonic: return RISCV::LR_W; case AtomicOrdering::Acquire: + if (Subtarget->hasStdExtZtso()) + return RISCV::LR_W; return RISCV::LR_W_AQ; case AtomicOrdering::Release: return RISCV::LR_W; case AtomicOrdering::AcquireRelease: + if (Subtarget->hasStdExtZtso()) + return RISCV::LR_W; return RISCV::LR_W_AQ; case AtomicOrdering::SequentiallyConsistent: return RISCV::LR_W_AQ_RL; } } -static unsigned getSCForRMW32(AtomicOrdering Ordering) { +static unsigned getSCForRMW32(AtomicOrdering Ordering, + const RISCVSubtarget *Subtarget) { switch (Ordering) { default: llvm_unreachable("Unexpected AtomicOrdering"); @@ -174,32 +182,42 @@ static unsigned getSCForRMW32(AtomicOrdering Ordering) { case AtomicOrdering::Acquire: return RISCV::SC_W; case AtomicOrdering::Release: + if (Subtarget->hasStdExtZtso()) + return RISCV::SC_W; return RISCV::SC_W_RL; case AtomicOrdering::AcquireRelease: + if (Subtarget->hasStdExtZtso()) + return RISCV::SC_W; return RISCV::SC_W_RL; case AtomicOrdering::SequentiallyConsistent: return RISCV::SC_W_RL; } } -static unsigned getLRForRMW64(AtomicOrdering Ordering) { +static unsigned getLRForRMW64(AtomicOrdering Ordering, + const RISCVSubtarget *Subtarget) { switch (Ordering) { default: llvm_unreachable("Unexpected AtomicOrdering"); case AtomicOrdering::Monotonic: return RISCV::LR_D; case AtomicOrdering::Acquire: + if (Subtarget->hasStdExtZtso()) + return RISCV::LR_D; return RISCV::LR_D_AQ; case AtomicOrdering::Release: return RISCV::LR_D; case AtomicOrdering::AcquireRelease: + if (Subtarget->hasStdExtZtso()) + return RISCV::LR_D; return RISCV::LR_D_AQ; case AtomicOrdering::SequentiallyConsistent: return RISCV::LR_D_AQ_RL; } } -static unsigned getSCForRMW64(AtomicOrdering Ordering) { +static unsigned getSCForRMW64(AtomicOrdering Ordering, + const RISCVSubtarget *Subtarget) { switch (Ordering) { default: llvm_unreachable("Unexpected AtomicOrdering"); @@ -208,27 +226,33 @@ static unsigned getSCForRMW64(AtomicOrdering Ordering) { case AtomicOrdering::Acquire: return RISCV::SC_D; case AtomicOrdering::Release: + if (Subtarget->hasStdExtZtso()) + return RISCV::SC_D; return RISCV::SC_D_RL; case AtomicOrdering::AcquireRelease: + if (Subtarget->hasStdExtZtso()) + return RISCV::SC_D; return RISCV::SC_D_RL; case AtomicOrdering::SequentiallyConsistent: return RISCV::SC_D_RL; } } -static unsigned getLRForRMW(AtomicOrdering Ordering, int Width) { +static unsigned getLRForRMW(AtomicOrdering Ordering, int Width, + const RISCVSubtarget *Subtarget) { if (Width == 32) - return getLRForRMW32(Ordering); + return getLRForRMW32(Ordering, Subtarget); if (Width == 64) - return getLRForRMW64(Ordering); + return getLRForRMW64(Ordering, Subtarget); llvm_unreachable("Unexpected LR width\n"); } -static unsigned getSCForRMW(AtomicOrdering Ordering, int Width) { +static unsigned getSCForRMW(AtomicOrdering Ordering, int Width, + const RISCVSubtarget *Subtarget) { if (Width == 32) - return getSCForRMW32(Ordering); + return getSCForRMW32(Ordering, Subtarget); if (Width == 64) - return getSCForRMW64(Ordering); + return getSCForRMW64(Ordering, Subtarget); llvm_unreachable("Unexpected SC width\n"); } @@ -236,7 +260,8 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL, MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB, MachineBasicBlock *DoneMBB, - AtomicRMWInst::BinOp BinOp, int Width) { + AtomicRMWInst::BinOp BinOp, int Width, + const RISCVSubtarget *STI) { Register DestReg = MI.getOperand(0).getReg(); Register ScratchReg = MI.getOperand(1).getReg(); Register AddrReg = MI.getOperand(2).getReg(); @@ -249,7 +274,7 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI, // binop scratch, dest, val // sc.[w|d] scratch, scratch, (addr) // bnez scratch, loop - BuildMI(LoopMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg) + BuildMI(LoopMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)), DestReg) .addReg(AddrReg); switch (BinOp) { default: @@ -263,7 +288,7 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI, .addImm(-1); break; } - BuildMI(LoopMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg) + BuildMI(LoopMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)), ScratchReg) .addReg(AddrReg) .addReg(ScratchReg); BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) @@ -294,10 +319,13 @@ static void insertMaskedMerge(const RISCVInstrInfo *TII, DebugLoc DL, .addReg(ScratchReg); } -static void doMaskedAtomicBinOpExpansion( - const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL, - MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB, - MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width) { +static void doMaskedAtomicBinOpExpansion(const RISCVInstrInfo *TII, + MachineInstr &MI, DebugLoc DL, + MachineBasicBlock *ThisMBB, + MachineBasicBlock *LoopMBB, + MachineBasicBlock *DoneMBB, + AtomicRMWInst::BinOp BinOp, int Width, + const RISCVSubtarget *STI) { assert(Width == 32 && "Should never need to expand masked 64-bit operations"); Register DestReg = MI.getOperand(0).getReg(); Register ScratchReg = MI.getOperand(1).getReg(); @@ -315,7 +343,7 @@ static void doMaskedAtomicBinOpExpansion( // xor scratch, destreg, scratch // sc.w scratch, scratch, (alignedaddr) // bnez scratch, loop - BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg) + BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering, STI)), DestReg) .addReg(AddrReg); switch (BinOp) { default: @@ -348,7 +376,7 @@ static void doMaskedAtomicBinOpExpansion( insertMaskedMerge(TII, DL, LoopMBB, ScratchReg, DestReg, ScratchReg, MaskReg, ScratchReg); - BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg) + BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering, STI)), ScratchReg) .addReg(AddrReg) .addReg(ScratchReg); BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) @@ -380,10 +408,11 @@ bool RISCVExpandAtomicPseudo::expandAtomicBinOp( MBB.addSuccessor(LoopMBB); if (!IsMasked) - doAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, Width); + doAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, Width, + STI); else doMaskedAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, - Width); + Width, STI); NextMBBI = MBB.end(); MI.eraseFromParent(); @@ -455,7 +484,7 @@ bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp( // mv scratch1, destreg // [sext scratch2 if signed min/max] // ifnochangeneeded scratch2, incr, .looptail - BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg) + BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering, STI)), DestReg) .addReg(AddrReg); BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), Scratch2Reg) .addReg(DestReg) @@ -507,7 +536,7 @@ bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp( // .looptail: // sc.w scratch1, scratch1, (addr) // bnez scratch1, loop - BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering)), Scratch1Reg) + BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering, STI)), Scratch1Reg) .addReg(AddrReg) .addReg(Scratch1Reg); BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE)) @@ -635,7 +664,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg( // .loophead: // lr.[w|d] dest, (addr) // bne dest, cmpval, done - BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg) + BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)), + DestReg) .addReg(AddrReg); BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE)) .addReg(DestReg) @@ -644,7 +674,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg( // .looptail: // sc.[w|d] scratch, newval, (addr) // bnez scratch, loophead - BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg) + BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)), + ScratchReg) .addReg(AddrReg) .addReg(NewValReg); BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE)) @@ -657,7 +688,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg( // and scratch, dest, mask // bne scratch, cmpval, done Register MaskReg = MI.getOperand(5).getReg(); - BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg) + BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)), + DestReg) .addReg(AddrReg); BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), ScratchReg) .addReg(DestReg) @@ -675,7 +707,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg( // bnez scratch, loophead insertMaskedMerge(TII, DL, LoopTailMBB, ScratchReg, DestReg, NewValReg, MaskReg, ScratchReg); - BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg) + BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)), + ScratchReg) .addReg(AddrReg) .addReg(ScratchReg); BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE)) diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp index 58896ee1b388..24a13f93af88 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -34,9 +34,7 @@ public: const RISCVInstrInfo *TII; static char ID; - RISCVExpandPseudo() : MachineFunctionPass(ID) { - initializeRISCVExpandPseudoPass(*PassRegistry::getPassRegistry()); - } + RISCVExpandPseudo() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; @@ -119,6 +117,23 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB, case RISCV::PseudoCCXOR: case RISCV::PseudoCCADDW: case RISCV::PseudoCCSUBW: + case RISCV::PseudoCCSLL: + case RISCV::PseudoCCSRL: + case RISCV::PseudoCCSRA: + case RISCV::PseudoCCADDI: + case RISCV::PseudoCCSLLI: + case RISCV::PseudoCCSRLI: + case RISCV::PseudoCCSRAI: + case RISCV::PseudoCCANDI: + case RISCV::PseudoCCORI: + case RISCV::PseudoCCXORI: + case RISCV::PseudoCCSLLW: + case RISCV::PseudoCCSRLW: + case RISCV::PseudoCCSRAW: + case RISCV::PseudoCCADDIW: + case RISCV::PseudoCCSLLIW: + case RISCV::PseudoCCSRLIW: + case RISCV::PseudoCCSRAIW: return expandCCOp(MBB, MBBI, NextMBBI); case RISCV::PseudoVSETVLI: case RISCV::PseudoVSETVLIX0: @@ -188,11 +203,28 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB, llvm_unreachable("Unexpected opcode!"); case RISCV::PseudoCCADD: NewOpc = RISCV::ADD; break; case RISCV::PseudoCCSUB: NewOpc = RISCV::SUB; break; + case RISCV::PseudoCCSLL: NewOpc = RISCV::SLL; break; + case RISCV::PseudoCCSRL: NewOpc = RISCV::SRL; break; + case RISCV::PseudoCCSRA: NewOpc = RISCV::SRA; break; case RISCV::PseudoCCAND: NewOpc = RISCV::AND; break; case RISCV::PseudoCCOR: NewOpc = RISCV::OR; break; case RISCV::PseudoCCXOR: NewOpc = RISCV::XOR; break; + case RISCV::PseudoCCADDI: NewOpc = RISCV::ADDI; break; + case RISCV::PseudoCCSLLI: NewOpc = RISCV::SLLI; break; + case RISCV::PseudoCCSRLI: NewOpc = RISCV::SRLI; break; + case RISCV::PseudoCCSRAI: NewOpc = RISCV::SRAI; break; + case RISCV::PseudoCCANDI: NewOpc = RISCV::ANDI; break; + case RISCV::PseudoCCORI: NewOpc = RISCV::ORI; break; + case RISCV::PseudoCCXORI: NewOpc = RISCV::XORI; break; case RISCV::PseudoCCADDW: NewOpc = RISCV::ADDW; break; case RISCV::PseudoCCSUBW: NewOpc = RISCV::SUBW; break; + case RISCV::PseudoCCSLLW: NewOpc = RISCV::SLLW; break; + case RISCV::PseudoCCSRLW: NewOpc = RISCV::SRLW; break; + case RISCV::PseudoCCSRAW: NewOpc = RISCV::SRAW; break; + case RISCV::PseudoCCADDIW: NewOpc = RISCV::ADDIW; break; + case RISCV::PseudoCCSLLIW: NewOpc = RISCV::SLLIW; break; + case RISCV::PseudoCCSRLIW: NewOpc = RISCV::SRLIW; break; + case RISCV::PseudoCCSRAIW: NewOpc = RISCV::SRAIW; break; } BuildMI(TrueBB, DL, TII->get(NewOpc), DestReg) .add(MI.getOperand(5)) @@ -275,8 +307,8 @@ bool RISCVExpandPseudo::expandRV32ZdinxStore(MachineBasicBlock &MBB, .addReg(MBBI->getOperand(1).getReg()) .add(MBBI->getOperand(2)); if (MBBI->getOperand(2).isGlobal() || MBBI->getOperand(2).isCPI()) { - // FIXME: Zdinx RV32 can not work on unaligned scalar memory. - assert(!STI->enableUnalignedScalarMem()); + // FIXME: Zdinx RV32 can not work on unaligned memory. + assert(!STI->hasFastUnalignedAccess()); assert(MBBI->getOperand(2).getOffset() % 8 == 0); MBBI->getOperand(2).setOffset(MBBI->getOperand(2).getOffset() + 4); @@ -347,9 +379,7 @@ public: const RISCVInstrInfo *TII; static char ID; - RISCVPreRAExpandPseudo() : MachineFunctionPass(ID) { - initializeRISCVPreRAExpandPseudoPass(*PassRegistry::getPassRegistry()); - } + RISCVPreRAExpandPseudo() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td index 4ce9c41eaf5c..294927aecb94 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -73,7 +73,7 @@ def HasStdExtZihintpause : Predicate<"Subtarget->hasStdExtZihintpause()">, "'Zihintpause' (Pause Hint)">; def FeatureStdExtZihintntl - : SubtargetFeature<"experimental-zihintntl", "HasStdExtZihintntl", "true", + : SubtargetFeature<"zihintntl", "HasStdExtZihintntl", "true", "'Zihintntl' (Non-Temporal Locality Hints)">; def HasStdExtZihintntl : Predicate<"Subtarget->hasStdExtZihintntl()">, AssemblerPredicate<(all_of FeatureStdExtZihintntl), @@ -159,7 +159,7 @@ def HasStdExtZhinxOrZhinxmin "'Zhinxmin' (Half Float in Integer Minimal)">; def FeatureStdExtZfa - : SubtargetFeature<"experimental-zfa", "HasStdExtZfa", "true", + : SubtargetFeature<"zfa", "HasStdExtZfa", "true", "'Zfa' (Additional Floating-Point)", [FeatureStdExtF]>; def HasStdExtZfa : Predicate<"Subtarget->hasStdExtZfa()">, @@ -444,8 +444,8 @@ def FeatureStdExtV def HasVInstructions : Predicate<"Subtarget->hasVInstructions()">, AssemblerPredicate< (any_of FeatureStdExtZve32x), - "'V' (Vector Extension for Application Processors), 'Zve32x' or " - "'Zve64x' (Vector Extensions for Embedded Processors)">; + "'V' (Vector Extension for Application Processors), 'Zve32x' " + "(Vector Extensions for Embedded Processors)">; def HasVInstructionsI64 : Predicate<"Subtarget->hasVInstructionsI64()">, AssemblerPredicate< (any_of FeatureStdExtZve64x), @@ -454,17 +454,25 @@ def HasVInstructionsI64 : Predicate<"Subtarget->hasVInstructionsI64()">, def HasVInstructionsAnyF : Predicate<"Subtarget->hasVInstructionsAnyF()">, AssemblerPredicate< (any_of FeatureStdExtZve32f), - "'V' (Vector Extension for Application Processors), 'Zve32f', " - "'Zve64f' or 'Zve64d' (Vector Extensions for Embedded Processors)">; + "'V' (Vector Extension for Application Processors), 'Zve32f' " + "(Vector Extensions for Embedded Processors)">; def HasVInstructionsF64 : Predicate<"Subtarget->hasVInstructionsF64()">; def HasVInstructionsFullMultiply : Predicate<"Subtarget->hasVInstructionsFullMultiply()">; +def FeatureStdExtZfbfmin + : SubtargetFeature<"experimental-zfbfmin", "HasStdExtZfbfmin", "true", + "'Zfbfmin' (Scalar BF16 Converts)", + [FeatureStdExtF]>; +def HasStdExtZfbfmin : Predicate<"Subtarget->hasStdExtZfbfmin()">, + AssemblerPredicate<(all_of FeatureStdExtZfbfmin), + "'Zfbfmin' (Scalar BF16 Converts)">; + def FeatureStdExtZvfbfmin : SubtargetFeature<"experimental-zvfbfmin", "HasStdExtZvfbfmin", "true", "'Zvbfmin' (Vector BF16 Converts)", - [FeatureStdExtZve32f]>; + [FeatureStdExtZve32f, FeatureStdExtZfbfmin]>; def HasStdExtZvfbfmin : Predicate<"Subtarget->hasStdExtZvfbfmin()">, AssemblerPredicate<(all_of FeatureStdExtZvfbfmin), "'Zvfbfmin' (Vector BF16 Converts)">; @@ -472,18 +480,30 @@ def HasStdExtZvfbfmin : Predicate<"Subtarget->hasStdExtZvfbfmin()">, def FeatureStdExtZvfbfwma : SubtargetFeature<"experimental-zvfbfwma", "HasStdExtZvfbfwma", "true", "'Zvfbfwma' (Vector BF16 widening mul-add)", - [FeatureStdExtZve32f]>; + [FeatureStdExtZvfbfmin]>; def HasStdExtZvfbfwma : Predicate<"Subtarget->hasStdExtZvfbfwma()">, AssemblerPredicate<(all_of FeatureStdExtZvfbfwma), "'Zvfbfwma' (Vector BF16 widening mul-add)">; +def HasVInstructionsBF16 : Predicate<"Subtarget->hasVInstructionsBF16()">; + def FeatureStdExtZvfh : SubtargetFeature<"zvfh", "HasStdExtZvfh", "true", "'Zvfh' (Vector Half-Precision Floating-Point)", [FeatureStdExtZve32f, FeatureStdExtZfhmin]>; +def FeatureStdExtZvfhmin + : SubtargetFeature<"zvfhmin", "HasStdExtZvfhmin", "true", + "'Zvfhmin' (Vector Half-Precision Floating-Point Minimal)", + [FeatureStdExtZve32f]>; + def HasVInstructionsF16 : Predicate<"Subtarget->hasVInstructionsF16()">; +def HasVInstructionsF16Minimal : Predicate<"Subtarget->hasVInstructionsF16Minimal()">, + AssemblerPredicate<(any_of FeatureStdExtZvfhmin, FeatureStdExtZvfh), + "'Zvfhmin' (Vector Half-Precision Floating-Point Minimal) or " + "'Zvfh' (Vector Half-Precision Floating-Point)">; + def HasStdExtZfhOrZvfh : Predicate<"Subtarget->hasStdExtZfh() || Subtarget->hasStdExtZvfh()">, AssemblerPredicate<(any_of FeatureStdExtZfh, FeatureStdExtZvfh), @@ -529,9 +549,10 @@ def HasStdExtSvinval : Predicate<"Subtarget->hasStdExtSvinval()">, def FeatureStdExtZtso : SubtargetFeature<"experimental-ztso", "HasStdExtZtso", "true", "'Ztso' (Memory Model - Total Store Order)">; -def HasStdExtZtso : Predicate<"Subtarget->hasStdExtZTso()">, +def HasStdExtZtso : Predicate<"Subtarget->hasStdExtZtso()">, AssemblerPredicate<(all_of FeatureStdExtZtso), "'Ztso' (Memory Model - Total Store Order)">; +def NotHasStdExtZtso : Predicate<"!Subtarget->hasStdExtZtso()">; def FeatureStdExtZawrs : SubtargetFeature<"zawrs", "HasStdExtZawrs", "true", "'Zawrs' (Wait on Reservation Set)">; @@ -539,12 +560,20 @@ def HasStdExtZawrs : Predicate<"Subtarget->hasStdExtZawrs()">, AssemblerPredicate<(all_of FeatureStdExtZawrs), "'Zawrs' (Wait on Reservation Set)">; +def FeatureStdExtZvkb + : SubtargetFeature<"experimental-zvkb", "HasStdExtZvkb", "true", + "'Zvkb' (Vector Bit-manipulation used in Cryptography)">; +def HasStdExtZvkb : Predicate<"Subtarget->hasStdExtZvkb()">, + AssemblerPredicate<(all_of FeatureStdExtZvkb), + "'Zvkb' (Vector Bit-manipulation used in Cryptography)">; + def FeatureStdExtZvbb : SubtargetFeature<"experimental-zvbb", "HasStdExtZvbb", "true", - "'Zvbb' (Vector Bit-manipulation used in Cryptography)">; + "'Zvbb' (Vector basic bit-manipulation instructions.)", + [FeatureStdExtZvkb]>; def HasStdExtZvbb : Predicate<"Subtarget->hasStdExtZvbb()">, AssemblerPredicate<(all_of FeatureStdExtZvbb), - "'Zvbb' (Vector Bit-manipulation used in Cryptography)">; + "'Zvbb' (Vector basic bit-manipulation instructions.)">; def FeatureStdExtZvbc : SubtargetFeature<"experimental-zvbc", "HasStdExtZvbc", "true", @@ -560,16 +589,6 @@ def HasStdExtZvkg : Predicate<"Subtarget->hasStdExtZvkg()">, AssemblerPredicate<(all_of FeatureStdExtZvkg), "'Zvkg' (Vector GCM instructions for Cryptography)">; -def FeatureStdExtZvkn - : SubtargetFeature<"experimental-zvkn", "HasStdExtZvkn", "true", - "This extension is shorthand for the following set of " - "other extensions: Zvkned, Zvknhb, Zvbb, Zvbc, and Zvkt.">; - -def FeatureStdExtZvknc - : SubtargetFeature<"experimental-zvknc", "HasStdExtZvknc", "true", - "This extension is shorthand for the following set of " - "other extensions: Zvkn and Zvbc.">; - def FeatureStdExtZvkned : SubtargetFeature<"experimental-zvkned", "HasStdExtZvkned", "true", "'Zvkned' (Vector AES Encryption & Decryption (Single Round))">; @@ -577,32 +596,24 @@ def HasStdExtZvkned : Predicate<"Subtarget->hasStdExtZvkned()">, AssemblerPredicate<(all_of FeatureStdExtZvkned), "'Zvkned' (Vector AES Encryption & Decryption (Single Round))">; -def FeatureStdExtZvkng - : SubtargetFeature<"experimental-zvkng", "HasStdExtZvkng", "true", - "This extension is shorthand for the following set of " - "other extensions: Zvkn and Zvkg.">; - def FeatureStdExtZvknha : SubtargetFeature<"experimental-zvknha", "HasStdExtZvknha", "true", "'Zvknha' (Vector SHA-2 (SHA-256 only))">; - -def FeatureStdExtZvknhb - : SubtargetFeature<"experimental-zvknhb", "HasStdExtZvknhb", "true", - "'Zvknhb' (Vector SHA-2 (SHA-256 and SHA-512))", - [FeatureStdExtZvknha]>; def HasStdExtZvknha : Predicate<"Subtarget->hasStdExtZvknha()">, AssemblerPredicate<(all_of FeatureStdExtZvknha), "'Zvknha' (Vector SHA-2 (SHA-256 only))">; -def FeatureStdExtZvks - : SubtargetFeature<"experimental-zvks", "HasStdExtZvks", "true", - "This extension is shorthand for the following set of " - "other extensions: Zvksed, Zvksh, Zvbb, Zvbc, and Zvkt.">; +def FeatureStdExtZvknhb + : SubtargetFeature<"experimental-zvknhb", "HasStdExtZvknhb", "true", + "'Zvknhb' (Vector SHA-2 (SHA-256 and SHA-512))", + [FeatureStdExtZve64x]>; +def HasStdExtZvknhb : Predicate<"Subtarget->hasStdExtZvknhb()">, + AssemblerPredicate<(all_of FeatureStdExtZvknhb), + "'Zvknhb' (Vector SHA-2 (SHA-256 and SHA-512))">; -def FeatureStdExtZvksc - : SubtargetFeature<"experimental-zvksc", "HasStdExtZvksc", "true", - "This extension is shorthand for the following set of " - "other extensions: Zvks and Zvbc.">; +def HasStdExtZvknhaOrZvknhb : Predicate<"Subtarget->hasStdExtZvknha() || Subtarget->hasStdExtZvknhb()">, + AssemblerPredicate<(any_of FeatureStdExtZvknha, FeatureStdExtZvknhb), + "'Zvknha' or 'Zvknhb' (Vector SHA-2)">; def FeatureStdExtZvksed : SubtargetFeature<"experimental-zvksed", "HasStdExtZvksed", "true", @@ -611,11 +622,6 @@ def HasStdExtZvksed : Predicate<"Subtarget->hasStdExtZvksed()">, AssemblerPredicate<(all_of FeatureStdExtZvksed), "'Zvksed' (SM4 Block Cipher Instructions)">; -def FeatureStdExtZvksg - : SubtargetFeature<"experimental-zvksg", "HasStdExtZvksg", "true", - "This extension is shorthand for the following set of " - "other extensions: Zvks and Zvkg.">; - def FeatureStdExtZvksh : SubtargetFeature<"experimental-zvksh", "HasStdExtZvksh", "true", "'Zvksh' (SM3 Hash Function Instructions)">; @@ -627,6 +633,53 @@ def FeatureStdExtZvkt : SubtargetFeature<"experimental-zvkt", "HasStdExtZvkt", "true", "'Zvkt' (Vector Data-Independent Execution Latency)">; +// Zvk short-hand extensions + +def FeatureStdExtZvkn + : SubtargetFeature<"experimental-zvkn", "HasStdExtZvkn", "true", + "This extension is shorthand for the following set of " + "other extensions: Zvkned, Zvknhb, Zvkb and Zvkt.", + [FeatureStdExtZvkned, FeatureStdExtZvknhb, + FeatureStdExtZvkb, FeatureStdExtZvkt]>; + +def FeatureStdExtZvknc + : SubtargetFeature<"experimental-zvknc", "HasStdExtZvknc", "true", + "This extension is shorthand for the following set of " + "other extensions: Zvkn and Zvbc.", + [FeatureStdExtZvkn, FeatureStdExtZvbc]>; + +def FeatureStdExtZvkng + : SubtargetFeature<"experimental-zvkng", "HasStdExtZvkng", "true", + "This extension is shorthand for the following set of " + "other extensions: Zvkn and Zvkg.", + [FeatureStdExtZvkn, FeatureStdExtZvkg]>; + +def FeatureStdExtZvks + : SubtargetFeature<"experimental-zvks", "HasStdExtZvks", "true", + "This extension is shorthand for the following set of " + "other extensions: Zvksed, Zvksh, Zvkb and Zvkt.", + [FeatureStdExtZvksed, FeatureStdExtZvksh, + FeatureStdExtZvkb, FeatureStdExtZvkt]>; + +def FeatureStdExtZvksc + : SubtargetFeature<"experimental-zvksc", "HasStdExtZvksc", "true", + "This extension is shorthand for the following set of " + "other extensions: Zvks and Zvbc.", + [FeatureStdExtZvks, FeatureStdExtZvbc]>; + +def FeatureStdExtZvksg + : SubtargetFeature<"experimental-zvksg", "HasStdExtZvksg", "true", + "This extension is shorthand for the following set of " + "other extensions: Zvks and Zvkg.", + [FeatureStdExtZvks, FeatureStdExtZvkg]>; + +def FeatureStdExtZicfilp + : SubtargetFeature<"experimental-zicfilp", "HasStdExtZicfilp", "true", + "'Zicfilp' (Landing pad)">; +def HasStdExtZicfilp : Predicate<"Subtarget->hasStdExtZicfilp()">, + AssemblerPredicate<(all_of FeatureStdExtZicfilp), + "'Zicfilp' (Landing pad)">; + def FeatureStdExtZicond : SubtargetFeature<"experimental-zicond", "HasStdExtZicond", "true", "'Zicond' (Integer Conditional Operations)">; @@ -635,34 +688,25 @@ def HasStdExtZicond : Predicate<"Subtarget->hasStdExtZicond()">, "'Zicond' (Integer Conditional Operations)">; def FeatureStdExtSmaia - : SubtargetFeature<"experimental-smaia", "HasStdExtSmaia", "true", + : SubtargetFeature<"smaia", "HasStdExtSmaia", "true", "'Smaia' (Smaia encompasses all added CSRs and all " "modifications to interrupt response behavior that the " "AIA specifies for a hart, over all privilege levels.)", []>; def FeatureStdExtSsaia - : SubtargetFeature<"experimental-ssaia", "HasStdExtSsaia", "true", + : SubtargetFeature<"ssaia", "HasStdExtSsaia", "true", "'Ssaia' (Ssaia is essentially the same as Smaia except " "excluding the machine-level CSRs and behavior not " "directly visible to supervisor level.)", []>; -def FeatureStdExtZfbfmin - : SubtargetFeature<"experimental-zfbfmin", "HasStdExtZfbfmin", "true", - "'Zfbfmin' (Scalar BF16 Converts)", - [FeatureStdExtF]>; -def HasStdExtZfbfmin : Predicate<"Subtarget->hasStdExtZfbfmin()">, - AssemblerPredicate<(all_of FeatureStdExtZfbfmin), - "'Zfbfmin' (Scalar BF16 Converts)">; - def HasHalfFPLoadStoreMove : Predicate<"Subtarget->hasHalfFPLoadStoreMove()">, AssemblerPredicate<(any_of FeatureStdExtZfh, FeatureStdExtZfhmin, - FeatureStdExtZfbfmin, FeatureStdExtZvfbfwma), + FeatureStdExtZfbfmin), "'Zfh' (Half-Precision Floating-Point) or " "'Zfhmin' (Half-Precision Floating-Point Minimal) or " - "'Zfbfmin' (Scalar BF16 Converts) or " - "'Zvfbfwma' (Vector BF16 widening mul-add)">; + "'Zfbfmin' (Scalar BF16 Converts)">; def FeatureStdExtZacas : SubtargetFeature<"experimental-zacas", "HasStdExtZacas", "true", @@ -776,6 +820,45 @@ def HasVendorXSfcie : Predicate<"Subtarget->hasVendorXSfcie()">, AssemblerPredicate<(all_of FeatureVendorXSfcie), "'XSfcie' (SiFive Custom Instruction Extension SCIE.)">; +def FeatureVendorXSfvqmaccdod + : SubtargetFeature<"xsfvqmaccdod", "HasVendorXSfvqmaccdod", "true", + "'XSfvqmaccdod' (SiFive Int8 Matrix Multiplication Instructions (2-by-8 and 8-by-2))", + [FeatureStdExtZve32x]>; +def HasVendorXSfvqmaccdod : Predicate<"Subtarget->hasVendorXSfvqmaccdod()">, + AssemblerPredicate<(all_of FeatureVendorXSfvqmaccdod), + "'XSfvqmaccdod' (SiFive Int8 Matrix Multiplication Instructions (2-by-8 and 8-by-2))">; + +def FeatureVendorXSfvqmaccqoq + : SubtargetFeature<"xsfvqmaccqoq", "HasVendorXSfvqmaccqoq", "true", + "'XSfvqmaccqoq' (SiFive Int8 Matrix Multiplication Instructions (4-by-8 and 8-by-4))", + [FeatureStdExtZve32x]>; +def HasVendorXSfvqmaccqoq : Predicate<"Subtarget->hasVendorXSfvqmaccqoq()">, + AssemblerPredicate<(all_of FeatureVendorXSfvqmaccqoq), + "'XSfvqmaccqoq' (SiFive Int8 Matrix Multiplication Instructions (4-by-8 and 8-by-4))">; + +def FeatureVendorXSfvfwmaccqqq + : SubtargetFeature<"xsfvfwmaccqqq", "HasVendorXSfvfwmaccqqq", "true", + "'XSfvfwmaccqqq' (SiFive Matrix Multiply Accumulate Instruction and 4-by-4))", + [FeatureStdExtZve32f, FeatureStdExtZvfbfmin]>; +def HasVendorXSfvfwmaccqqq : Predicate<"Subtarget->hasVendorXSfvfwmaccqqq()">, + AssemblerPredicate<(all_of FeatureVendorXSfvfwmaccqqq), + "'XSfvfwmaccqqq' (SiFive Matrix Multiply Accumulate Instruction and 4-by-4))">; + +def FeatureVendorXSfvfnrclipxfqf + : SubtargetFeature<"xsfvfnrclipxfqf", "HasVendorXSfvfnrclipxfqf", "true", + "'XSfvfnrclipxfqf' (SiFive FP32-to-int8 Ranged Clip Instructions)", + [FeatureStdExtZve32f]>; +def HasVendorXSfvfnrclipxfqf : Predicate<"Subtarget->hasVendorXSfvfnrclipxfqf()">, + AssemblerPredicate<(all_of FeatureVendorXSfvfnrclipxfqf), + "'XSfvfnrclipxfqf' (SiFive FP32-to-int8 Ranged Clip Instructions)">; +def FeatureVendorXCVelw + : SubtargetFeature<"xcvelw", "HasVendorXCVelw", "true", + "'XCVelw' (CORE-V Event Load Word)">; +def HasVendorXCVelw + : Predicate<"Subtarget->hasVendorXCVelw()">, + AssemblerPredicate<(any_of FeatureVendorXCVelw), + "'XCVelw' (CORE-V Event Load Word)">; + def FeatureVendorXCVbitmanip : SubtargetFeature<"xcvbitmanip", "HasVendorXCVbitmanip", "true", "'XCVbitmanip' (CORE-V Bit Manipulation)">; @@ -790,6 +873,36 @@ def HasVendorXCVmac : Predicate<"Subtarget->hasVendorXCVmac()">, AssemblerPredicate<(all_of FeatureVendorXCVmac), "'XCVmac' (CORE-V Multiply-Accumulate)">; +def FeatureVendorXCVmem + : SubtargetFeature<"xcvmem", "HasVendorXCVmem", "true", + "'XCVmem' (CORE-V Post-incrementing Load & Store)">; +def HasVendorXCVmem + : Predicate<"Subtarget->hasVendorXCVmem()">, + AssemblerPredicate<(any_of FeatureVendorXCVmem), + "'XCVmem' (CORE-V Post-incrementing Load & Store)">; + +def FeatureVendorXCValu + : SubtargetFeature<"xcvalu", "HasVendorXCValu", "true", + "'XCValu' (CORE-V ALU Operations)">; +def HasVendorXCValu : Predicate<"Subtarget->hasVendorXCValu()">, + AssemblerPredicate<(all_of FeatureVendorXCValu), + "'XCValu' (CORE-V ALU Operations)">; + +def FeatureVendorXCVsimd + : SubtargetFeature<"xcvsimd", "HasVendorXCvsimd", "true", + "'XCVsimd' (CORE-V SIMD ALU)">; +def HasVendorXCVsimd + : Predicate<"Subtarget->hasVendorXCVsimd()">, + AssemblerPredicate<(any_of FeatureVendorXCVsimd), + "'XCVsimd' (CORE-V SIMD ALU)">; + +def FeatureVendorXCVbi + : SubtargetFeature<"xcvbi", "HasVendorXCVbi", "true", + "'XCVbi' (CORE-V Immediate Branching)">; +def HasVendorXCVbi : Predicate<"Subtarget->hasVendorXCVbi()">, + AssemblerPredicate<(all_of FeatureVendorXCVbi), + "'XCVbi' (CORE-V Immediate Branching)">; + //===----------------------------------------------------------------------===// // LLVM specific features and extensions //===----------------------------------------------------------------------===// @@ -833,15 +946,13 @@ def FeatureTrailingSeqCstFence : SubtargetFeature<"seq-cst-trailing-fence", "true", "Enable trailing fence for seq-cst store.">; -def FeatureUnalignedScalarMem - : SubtargetFeature<"unaligned-scalar-mem", "EnableUnalignedScalarMem", - "true", "Has reasonably performant unaligned scalar " - "loads and stores">; +def FeatureFastUnalignedAccess + : SubtargetFeature<"fast-unaligned-access", "HasFastUnalignedAccess", + "true", "Has reasonably performant unaligned " + "loads and stores (both scalar and vector)">; -def FeatureUnalignedVectorMem - : SubtargetFeature<"unaligned-vector-mem", "EnableUnalignedVectorMem", - "true", "Has reasonably performant unaligned vector " - "loads and stores">; +def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler", + "UsePostRAScheduler", "true", "Schedule again after register allocation">; def TuneNoOptimizedZeroStrideLoad : SubtargetFeature<"no-optimized-zero-stride-load", "HasOptimizedZeroStrideLoad", @@ -859,6 +970,16 @@ def TuneLUIADDIFusion : SubtargetFeature<"lui-addi-fusion", "HasLUIADDIFusion", "true", "Enable LUI+ADDI macrofusion">; +def TuneAUIPCADDIFusion + : SubtargetFeature<"auipc-addi-fusion", "HasAUIPCADDIFusion", + "true", "Enable AUIPC+ADDI macrofusion">; +def TuneShiftedZExtFusion + : SubtargetFeature<"shifted-zext-fusion", "HasShiftedZExtFusion", + "true", "Enable SLLI+SRLI to be fused when computing (shifted) zero extension">; +def TuneLDADDFusion + : SubtargetFeature<"ld-add-fusion", "HasLDADDFusion", + "true", "Enable LD+ADD macrofusion.">; + def TuneNoDefaultUnroll : SubtargetFeature<"no-default-unroll", "EnableDefaultUnroll", "false", "Disable default unroll preference.">; @@ -876,6 +997,13 @@ def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7", [TuneNoDefaultUnroll, TuneShortForwardBranchOpt]>; +def TuneVeyronFusions : SubtargetFeature<"ventana-veyron", "RISCVProcFamily", "VentanaVeyron", + "Ventana Veyron-Series processors", + [TuneLUIADDIFusion, + TuneAUIPCADDIFusion, + TuneShiftedZExtFusion, + TuneLDADDFusion]>; + // Assume that lock-free native-width atomics are available, even if the target // and operating system combination would not usually provide them. The user // is responsible for providing any necessary __sync implementations. Code diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp new file mode 100644 index 000000000000..6ee006525df5 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp @@ -0,0 +1,216 @@ +//===- RISCVFoldMasks.cpp - MI Vector Pseudo Mask Peepholes ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// +// +// This pass performs various peephole optimisations that fold masks into vector +// pseudo instructions after instruction selection. +// +// Currently it converts +// PseudoVMERGE_VVM %false, %false, %true, %allonesmask, %vl, %sew +// -> +// PseudoVMV_V_V %false, %true, %vl, %sew +// +//===---------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVISelDAGToDAG.h" +#include "RISCVSubtarget.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "riscv-fold-masks" + +namespace { + +class RISCVFoldMasks : public MachineFunctionPass { +public: + static char ID; + const TargetInstrInfo *TII; + MachineRegisterInfo *MRI; + const TargetRegisterInfo *TRI; + RISCVFoldMasks() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &MF) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::IsSSA); + } + + StringRef getPassName() const override { return "RISC-V Fold Masks"; } + +private: + bool convertToUnmasked(MachineInstr &MI, MachineInstr *MaskDef); + bool convertVMergeToVMv(MachineInstr &MI, MachineInstr *MaskDef); + + bool isAllOnesMask(MachineInstr *MaskDef); +}; + +} // namespace + +char RISCVFoldMasks::ID = 0; + +INITIALIZE_PASS(RISCVFoldMasks, DEBUG_TYPE, "RISC-V Fold Masks", false, false) + +bool RISCVFoldMasks::isAllOnesMask(MachineInstr *MaskDef) { + if (!MaskDef) + return false; + assert(MaskDef->isCopy() && MaskDef->getOperand(0).getReg() == RISCV::V0); + Register SrcReg = TRI->lookThruCopyLike(MaskDef->getOperand(1).getReg(), MRI); + if (!SrcReg.isVirtual()) + return false; + MaskDef = MRI->getVRegDef(SrcReg); + if (!MaskDef) + return false; + + // TODO: Check that the VMSET is the expected bitwidth? The pseudo has + // undefined behaviour if it's the wrong bitwidth, so we could choose to + // assume that it's all-ones? Same applies to its VL. + switch (MaskDef->getOpcode()) { + case RISCV::PseudoVMSET_M_B1: + case RISCV::PseudoVMSET_M_B2: + case RISCV::PseudoVMSET_M_B4: + case RISCV::PseudoVMSET_M_B8: + case RISCV::PseudoVMSET_M_B16: + case RISCV::PseudoVMSET_M_B32: + case RISCV::PseudoVMSET_M_B64: + return true; + default: + return false; + } +} + +// Transform (VMERGE_VVM_<LMUL> false, false, true, allones, vl, sew) to +// (VMV_V_V_<LMUL> false, true, vl, sew). It may decrease uses of VMSET. +bool RISCVFoldMasks::convertVMergeToVMv(MachineInstr &MI, MachineInstr *V0Def) { +#define CASE_VMERGE_TO_VMV(lmul) \ + case RISCV::PseudoVMERGE_VVM_##lmul: \ + NewOpc = RISCV::PseudoVMV_V_V_##lmul; \ + break; + unsigned NewOpc; + switch (MI.getOpcode()) { + default: + return false; + CASE_VMERGE_TO_VMV(MF8) + CASE_VMERGE_TO_VMV(MF4) + CASE_VMERGE_TO_VMV(MF2) + CASE_VMERGE_TO_VMV(M1) + CASE_VMERGE_TO_VMV(M2) + CASE_VMERGE_TO_VMV(M4) + CASE_VMERGE_TO_VMV(M8) + } + + Register MergeReg = MI.getOperand(1).getReg(); + Register FalseReg = MI.getOperand(2).getReg(); + // Check merge == false (or merge == undef) + if (MergeReg != RISCV::NoRegister && TRI->lookThruCopyLike(MergeReg, MRI) != + TRI->lookThruCopyLike(FalseReg, MRI)) + return false; + + assert(MI.getOperand(4).isReg() && MI.getOperand(4).getReg() == RISCV::V0); + if (!isAllOnesMask(V0Def)) + return false; + + MI.setDesc(TII->get(NewOpc)); + MI.removeOperand(1); // Merge operand + MI.tieOperands(0, 1); // Tie false to dest + MI.removeOperand(3); // Mask operand + MI.addOperand( + MachineOperand::CreateImm(RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)); + + // vmv.v.v doesn't have a mask operand, so we may be able to inflate the + // register class for the destination and merge operands e.g. VRNoV0 -> VR + MRI->recomputeRegClass(MI.getOperand(0).getReg()); + MRI->recomputeRegClass(MI.getOperand(1).getReg()); + return true; +} + +bool RISCVFoldMasks::convertToUnmasked(MachineInstr &MI, + MachineInstr *MaskDef) { + const RISCV::RISCVMaskedPseudoInfo *I = + RISCV::getMaskedPseudoInfo(MI.getOpcode()); + if (!I) + return false; + + if (!isAllOnesMask(MaskDef)) + return false; + + // There are two classes of pseudos in the table - compares and + // everything else. See the comment on RISCVMaskedPseudo for details. + const unsigned Opc = I->UnmaskedPseudo; + const MCInstrDesc &MCID = TII->get(Opc); + const bool HasPolicyOp = RISCVII::hasVecPolicyOp(MCID.TSFlags); + const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MCID); +#ifndef NDEBUG + const MCInstrDesc &MaskedMCID = TII->get(MI.getOpcode()); + assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) == + RISCVII::hasVecPolicyOp(MCID.TSFlags) && + "Masked and unmasked pseudos are inconsistent"); + assert(HasPolicyOp == HasPassthru && "Unexpected pseudo structure"); +#endif + (void)HasPolicyOp; + + MI.setDesc(MCID); + + // TODO: Increment all MaskOpIdxs in tablegen by num of explicit defs? + unsigned MaskOpIdx = I->MaskOpIdx + MI.getNumExplicitDefs(); + MI.removeOperand(MaskOpIdx); + + // The unmasked pseudo will no longer be constrained to the vrnov0 reg class, + // so try and relax it to vr. + MRI->recomputeRegClass(MI.getOperand(0).getReg()); + unsigned PassthruOpIdx = MI.getNumExplicitDefs(); + if (HasPassthru) { + if (MI.getOperand(PassthruOpIdx).getReg() != RISCV::NoRegister) + MRI->recomputeRegClass(MI.getOperand(PassthruOpIdx).getReg()); + } else + MI.removeOperand(PassthruOpIdx); + + return true; +} + +bool RISCVFoldMasks::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) + return false; + + // Skip if the vector extension is not enabled. + const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>(); + if (!ST.hasVInstructions()) + return false; + + TII = ST.getInstrInfo(); + MRI = &MF.getRegInfo(); + TRI = MRI->getTargetRegisterInfo(); + + bool Changed = false; + + // Masked pseudos coming out of isel will have their mask operand in the form: + // + // $v0:vr = COPY %mask:vr + // %x:vr = Pseudo_MASK %a:vr, %b:br, $v0:vr + // + // Because $v0 isn't in SSA, keep track of it so we can check the mask operand + // on each pseudo. + MachineInstr *CurrentV0Def; + for (MachineBasicBlock &MBB : MF) { + CurrentV0Def = nullptr; + for (MachineInstr &MI : MBB) { + Changed |= convertToUnmasked(MI, CurrentV0Def); + Changed |= convertVMergeToVMv(MI, CurrentV0Def); + + if (MI.definesRegister(RISCV::V0, TRI)) + CurrentV0Def = &MI; + } + } + + return Changed; +} + +FunctionPass *llvm::createRISCVFoldMasksPass() { return new RISCVFoldMasks(); } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index f312cc8129dd..8dfea6d38620 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -226,37 +226,38 @@ getRestoreLibCallName(const MachineFunction &MF, return RestoreLibCalls[LibCallID]; } -// Return encoded value for PUSH/POP instruction, representing -// registers to store/load. -static unsigned getPushPopEncoding(const Register MaxReg) { +// Return encoded value and register count for PUSH/POP instruction, +// representing registers to store/load. +static std::pair<unsigned, unsigned> +getPushPopEncodingAndNum(const Register MaxReg) { switch (MaxReg) { default: llvm_unreachable("Unexpected Reg for Push/Pop Inst"); case RISCV::X27: /*s11*/ case RISCV::X26: /*s10*/ - return llvm::RISCVZC::RLISTENCODE::RA_S0_S11; + return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S11, 13); case RISCV::X25: /*s9*/ - return llvm::RISCVZC::RLISTENCODE::RA_S0_S9; + return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S9, 11); case RISCV::X24: /*s8*/ - return llvm::RISCVZC::RLISTENCODE::RA_S0_S8; + return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S8, 10); case RISCV::X23: /*s7*/ - return llvm::RISCVZC::RLISTENCODE::RA_S0_S7; + return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S7, 9); case RISCV::X22: /*s6*/ - return llvm::RISCVZC::RLISTENCODE::RA_S0_S6; + return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S6, 8); case RISCV::X21: /*s5*/ - return llvm::RISCVZC::RLISTENCODE::RA_S0_S5; + return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S5, 7); case RISCV::X20: /*s4*/ - return llvm::RISCVZC::RLISTENCODE::RA_S0_S4; + return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S4, 6); case RISCV::X19: /*s3*/ - return llvm::RISCVZC::RLISTENCODE::RA_S0_S3; + return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S3, 5); case RISCV::X18: /*s2*/ - return llvm::RISCVZC::RLISTENCODE::RA_S0_S2; + return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S2, 4); case RISCV::X9: /*s1*/ - return llvm::RISCVZC::RLISTENCODE::RA_S0_S1; + return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S1, 3); case RISCV::X8: /*s0*/ - return llvm::RISCVZC::RLISTENCODE::RA_S0; + return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0, 2); case RISCV::X1: /*ra*/ - return llvm::RISCVZC::RLISTENCODE::RA; + return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA, 1); } } @@ -265,9 +266,10 @@ static Register getMaxPushPopReg(const MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI) { Register MaxPushPopReg = RISCV::NoRegister; for (auto &CS : CSI) { - Register Reg = CS.getReg(); - if (RISCV::PGPRRegClass.contains(Reg)) - MaxPushPopReg = std::max(MaxPushPopReg.id(), Reg.id()); + // RISCVRegisterInfo::hasReservedSpillSlot assigns negative frame indices to + // registers which can be saved by Zcmp Push. + if (CS.getFrameIdx() < 0) + MaxPushPopReg = std::max(MaxPushPopReg.id(), CS.getReg().id()); } // if rlist is {rs, s0-s10}, then s11 will also be included if (MaxPushPopReg == RISCV::X26) @@ -275,16 +277,6 @@ static Register getMaxPushPopReg(const MachineFunction &MF, return MaxPushPopReg; } -static uint64_t adjSPInPushPop(MachineBasicBlock::iterator MBBI, - unsigned RequiredStack, unsigned FreePushStack, - bool IsPop) { - if (FreePushStack > RequiredStack) - RequiredStack = 0; - unsigned Spimm = std::min(RequiredStack, 48u); - MBBI->getOperand(1).setImm(Spimm); - return alignTo(RequiredStack - Spimm, 16); -} - // Return true if the specified function should have a dedicated frame // pointer register. This is true if frame pointer elimination is // disabled, if it needs dynamic stack realignment, if the function has @@ -514,8 +506,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, // FIXME (note copied from Lanai): This appears to be overallocating. Needs // investigation. Get the number of bytes to allocate from the FrameInfo. uint64_t StackSize = getStackSizeWithRVVPadding(MF); - uint64_t RealStackSize = - StackSize + RVFI->getLibCallStackSize() + RVFI->getRVPushStackSize(); + uint64_t RealStackSize = StackSize + RVFI->getReservedSpillsSize(); uint64_t RVVStackSize = RVFI->getRVVStackSize(); // Early exit if there is no need to allocate on the stack @@ -535,13 +526,13 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, RealStackSize = FirstSPAdjustAmount; } - if (RVFI->isPushable(MF) && FirstFrameSetup->getOpcode() == RISCV::CM_PUSH) { + if (RVFI->isPushable(MF) && FirstFrameSetup != MBB.end() && + FirstFrameSetup->getOpcode() == RISCV::CM_PUSH) { // Use available stack adjustment in push instruction to allocate additional // stack space. - unsigned PushStack = RVFI->getRVPushRegs() * (STI.getXLen() / 8); - unsigned SpImmBase = RVFI->getRVPushStackSize(); - StackSize = adjSPInPushPop(FirstFrameSetup, StackSize, - (SpImmBase - PushStack), true); + uint64_t Spimm = std::min(StackSize, (uint64_t)48); + FirstFrameSetup->getOperand(1).setImm(Spimm); + StackSize -= Spimm; } if (StackSize != 0) { @@ -584,8 +575,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, Offset = FrameIdx * (int64_t)STI.getXLen() / 8; } } else { - Offset = MFI.getObjectOffset(Entry.getFrameIdx()) - - RVFI->getLibCallStackSize(); + Offset = MFI.getObjectOffset(FrameIdx) - RVFI->getReservedSpillsSize(); } Register Reg = Entry.getReg(); unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( @@ -730,8 +720,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, LastFrameDestroy = std::prev(MBBI, CSI.size()); uint64_t StackSize = getStackSizeWithRVVPadding(MF); - uint64_t RealStackSize = - StackSize + RVFI->getLibCallStackSize() + RVFI->getRVPushStackSize(); + uint64_t RealStackSize = StackSize + RVFI->getReservedSpillsSize(); uint64_t FPOffset = RealStackSize - RVFI->getVarArgsSaveSize(); uint64_t RVVStackSize = RVFI->getRVVStackSize(); @@ -776,9 +765,9 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, MBBI->getOpcode() == RISCV::CM_POP) { // Use available stack adjustment in pop instruction to deallocate stack // space. - unsigned PushStack = RVFI->getRVPushRegs() * (STI.getXLen() / 8); - unsigned SpImmBase = RVFI->getRVPushStackSize(); - StackSize = adjSPInPushPop(MBBI, StackSize, (SpImmBase - PushStack), true); + uint64_t Spimm = std::min(StackSize, (uint64_t)48); + MBBI->getOperand(1).setImm(Spimm); + StackSize -= Spimm; } // Deallocate stack @@ -882,7 +871,7 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, if (FrameReg == getFPReg(STI)) { Offset += StackOffset::getFixed(RVFI->getVarArgsSaveSize()); if (FI >= 0) - Offset -= StackOffset::getFixed(RVFI->getLibCallStackSize()); + Offset -= StackOffset::getFixed(RVFI->getReservedSpillsSize()); // When using FP to access scalable vector objects, we need to minus // the frame size. // @@ -950,8 +939,7 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, assert(!RI->hasStackRealignment(MF) && "Can't index across variable sized realign"); Offset += StackOffset::get(getStackSizeWithRVVPadding(MF) + - RVFI->getLibCallStackSize() + - RVFI->getRVPushStackSize(), + RVFI->getReservedSpillsSize(), RVFI->getRVVStackSize()); } else { Offset += StackOffset::getFixed(MFI.getStackSize()); @@ -993,11 +981,11 @@ void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF, RISCV::X5, RISCV::X6, RISCV::X7, /* t0-t2 */ RISCV::X10, RISCV::X11, /* a0-a1, a2-a7 */ RISCV::X12, RISCV::X13, RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17, - RISCV::X28, RISCV::X29, RISCV::X30, RISCV::X31, 0 /* t3-t6 */ + RISCV::X28, RISCV::X29, RISCV::X30, RISCV::X31 /* t3-t6 */ }; - for (unsigned i = 0; CSRegs[i]; ++i) - SavedRegs.set(CSRegs[i]); + for (auto Reg : CSRegs) + SavedRegs.set(Reg); if (MF.getSubtarget<RISCVSubtarget>().hasStdExtF()) { @@ -1277,7 +1265,8 @@ MachineBasicBlock::iterator RISCVFrameLowering::eliminateCallFramePseudoInstr( // We would like to split the SP adjustment to reduce prologue/epilogue // as following instructions. In this way, the offset of the callee saved -// register could fit in a single store. +// register could fit in a single store. Supposed that the first sp adjust +// amount is 2032. // add sp,sp,-2032 // sw ra,2028(sp) // sw s0,2024(sp) @@ -1295,19 +1284,60 @@ RISCVFrameLowering::getFirstSPAdjustAmount(const MachineFunction &MF) const { // Disable SplitSPAdjust if save-restore libcall is used. The callee-saved // registers will be pushed by the save-restore libcalls, so we don't have to // split the SP adjustment in this case. - if (RVFI->getLibCallStackSize() || RVFI->getRVPushStackSize()) + if (RVFI->getReservedSpillsSize()) return 0; // Return the FirstSPAdjustAmount if the StackSize can not fit in a signed // 12-bit and there exists a callee-saved register needing to be pushed. if (!isInt<12>(StackSize) && (CSI.size() > 0)) { - // FirstSPAdjustAmount is chosen as (2048 - StackAlign) because 2048 will - // cause sp = sp + 2048 in the epilogue to be split into multiple + // FirstSPAdjustAmount is chosen at most as (2048 - StackAlign) because + // 2048 will cause sp = sp + 2048 in the epilogue to be split into multiple // instructions. Offsets smaller than 2048 can fit in a single load/store // instruction, and we have to stick with the stack alignment. 2048 has // 16-byte alignment. The stack alignment for RV32 and RV64 is 16 and for // RV32E it is 4. So (2048 - StackAlign) will satisfy the stack alignment. - return 2048 - getStackAlign().value(); + const uint64_t StackAlign = getStackAlign().value(); + + // Amount of (2048 - StackAlign) will prevent callee saved and restored + // instructions be compressed, so try to adjust the amount to the largest + // offset that stack compression instructions accept when target supports + // compression instructions. + if (STI.hasStdExtCOrZca()) { + // The compression extensions may support the following instructions: + // riscv32: c.lwsp rd, offset[7:2] => 2^(6 + 2) + // c.swsp rs2, offset[7:2] => 2^(6 + 2) + // c.flwsp rd, offset[7:2] => 2^(6 + 2) + // c.fswsp rs2, offset[7:2] => 2^(6 + 2) + // riscv64: c.ldsp rd, offset[8:3] => 2^(6 + 3) + // c.sdsp rs2, offset[8:3] => 2^(6 + 3) + // c.fldsp rd, offset[8:3] => 2^(6 + 3) + // c.fsdsp rs2, offset[8:3] => 2^(6 + 3) + const uint64_t RVCompressLen = STI.getXLen() * 8; + // Compared with amount (2048 - StackAlign), StackSize needs to + // satisfy the following conditions to avoid using more instructions + // to adjust the sp after adjusting the amount, such as + // StackSize meets the condition (StackSize <= 2048 + RVCompressLen), + // case1: Amount is 2048 - StackAlign: use addi + addi to adjust sp. + // case2: Amount is RVCompressLen: use addi + addi to adjust sp. + auto CanCompress = [&](uint64_t CompressLen) -> bool { + if (StackSize <= 2047 + CompressLen || + (StackSize > 2048 * 2 - StackAlign && + StackSize <= 2047 * 2 + CompressLen) || + StackSize > 2048 * 3 - StackAlign) + return true; + + return false; + }; + // In the epilogue, addi sp, sp, 496 is used to recover the sp and it + // can be compressed(C.ADDI16SP, offset can be [-512, 496]), but + // addi sp, sp, 512 can not be compressed. So try to use 496 first. + const uint64_t ADDI16SPCompressLen = 496; + if (STI.is64Bit() && CanCompress(ADDI16SPCompressLen)) + return ADDI16SPCompressLen; + if (CanCompress(RVCompressLen)) + return RVCompressLen; + } + return 2048 - StackAlign; } return 0; } @@ -1328,14 +1358,12 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters( RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>(); if (RVFI->isPushable(*MF)) { Register MaxReg = getMaxPushPopReg(*MF, CSI); - unsigned PushedRegNum = - getPushPopEncoding(MaxReg) - llvm::RISCVZC::RLISTENCODE::RA + 1; - RVFI->setRVPushRegs(PushedRegNum); - RVFI->setRVPushStackSize(alignTo((STI.getXLen() / 8) * PushedRegNum, 16)); - if (MaxReg != RISCV::NoRegister) { + auto [RegEnc, PushedRegNum] = getPushPopEncodingAndNum(MaxReg); + RVFI->setRVPushRegs(PushedRegNum); + RVFI->setRVPushStackSize(alignTo((STI.getXLen() / 8) * PushedRegNum, 16)); + // Use encoded number to represent registers to spill. - unsigned RegEnc = getPushPopEncoding(MaxReg); RVFI->setRVPushRlist(RegEnc); MachineInstrBuilder PushBuilder = BuildMI(MBB, MI, DL, TII.get(RISCV::CM_PUSH)) diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h index 79adc83e8d65..9bc100981f2f 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h @@ -61,7 +61,7 @@ public: const TargetRegisterInfo *TRI) const override; // Get the first stack adjustment amount for SplitSPAdjust. - // Return 0 if we don't want to to split the SP adjustment in prologue and + // Return 0 if we don't want to split the SP adjustment in prologue and // epilogue. uint64_t getFirstSPAdjustAmount(const MachineFunction &MF) const; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVGISel.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVGISel.td new file mode 100644 index 000000000000..5f16ffb0a024 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVGISel.td @@ -0,0 +1,159 @@ +//===-- RISCVGIsel.td - RISC-V GlobalISel Patterns ---------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file contains patterns that are relevant to GlobalISel, including +/// GIComplexOperandMatcher definitions for equivalent SelectionDAG +/// ComplexPatterns. +// +//===----------------------------------------------------------------------===// + +include "RISCV.td" +include "RISCVCombine.td" + +def simm12Plus1 : ImmLeaf<XLenVT, [{ + return (isInt<12>(Imm) && Imm != -2048) || Imm == 2048;}]>; +def simm12Plus1i32 : ImmLeaf<i32, [{ + return (isInt<12>(Imm) && Imm != -2048) || Imm == 2048;}]>; + +// FIXME: This doesn't check that the G_CONSTANT we're deriving the immediate +// from is only used once +def simm12Minus1Nonzero : ImmLeaf<XLenVT, [{ + return (Imm >= -2049 && Imm < 0) || (Imm > 0 && Imm <= 2046);}]>; + +def simm12Minus1NonzeroNonNeg1 : ImmLeaf<XLenVT, [{ + return (Imm >= -2049 && Imm < -1) || (Imm > 0 && Imm <= 2046);}]>; + +// Return an immediate value plus 1. +def ImmPlus1 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(N->getSExtValue() + 1, SDLoc(N), + N->getValuePtrVTpe(0));}]>; + +def GINegImm : GICustomOperandRenderer<"renderNegImm">, + GISDNodeXFormEquiv<NegImm>; + +def GIImmSubFromXLen : GICustomOperandRenderer<"renderImmSubFromXLen">, + GISDNodeXFormEquiv<ImmSubFromXLen>; +def GIImmSubFrom32 : GICustomOperandRenderer<"renderImmSubFrom32">, + GISDNodeXFormEquiv<ImmSubFrom32>; + +def GIImmPlus1 : + GICustomOperandRenderer<"renderImmPlus1">, + GISDNodeXFormEquiv<ImmPlus1>; + +def GIAddrRegImm : + GIComplexOperandMatcher<s32, "selectAddrRegImm">, + GIComplexPatternEquiv<AddrRegImm>; + +def gi_as_i64imm : GICustomOperandRenderer<"renderImm">, + GISDNodeXFormEquiv<as_i64imm>; + +def gi_trailing_zero : GICustomOperandRenderer<"renderTrailingZeros">, + GISDNodeXFormEquiv<TrailingZeros>; + +// FIXME: This is labelled as handling 's32', however the ComplexPattern it +// refers to handles both i32 and i64 based on the HwMode. Currently this LLT +// parameter appears to be ignored so this pattern works for both, however we +// should add a LowLevelTypeByHwMode, and use that to define our XLenLLT instead +// here. +def GIShiftMaskXLen : + GIComplexOperandMatcher<s32, "selectShiftMask">, + GIComplexPatternEquiv<shiftMaskXLen>; +def GIShiftMask32 : + GIComplexOperandMatcher<s32, "selectShiftMask">, + GIComplexPatternEquiv<shiftMask32>; + +def gi_sh1add_op : GIComplexOperandMatcher<s32, "selectSHXADDOp<1>">, + GIComplexPatternEquiv<sh1add_op>; +def gi_sh2add_op : GIComplexOperandMatcher<s32, "selectSHXADDOp<2>">, + GIComplexPatternEquiv<sh2add_op>; +def gi_sh3add_op : GIComplexOperandMatcher<s32, "selectSHXADDOp<3>">, + GIComplexPatternEquiv<sh3add_op>; + +def gi_sh1add_uw_op : GIComplexOperandMatcher<s32, "selectSHXADD_UWOp<1>">, + GIComplexPatternEquiv<sh1add_uw_op>; +def gi_sh2add_uw_op : GIComplexOperandMatcher<s32, "selectSHXADD_UWOp<2>">, + GIComplexPatternEquiv<sh2add_uw_op>; +def gi_sh3add_uw_op : GIComplexOperandMatcher<s32, "selectSHXADD_UWOp<3>">, + GIComplexPatternEquiv<sh3add_uw_op>; + +// FIXME: Canonicalize (sub X, C) -> (add X, -C) earlier. +def : Pat<(XLenVT (sub GPR:$rs1, simm12Plus1:$imm)), + (ADDI GPR:$rs1, (NegImm simm12Plus1:$imm))>; + +let Predicates = [IsRV64] in { +def : Pat<(i32 (sub GPR:$rs1, simm12Plus1i32:$imm)), + (ADDIW GPR:$rs1, (i64 (NegImm $imm)))>; + +def : Pat<(i32 (shl GPR:$rs1, (i32 GPR:$rs2))), (SLLW GPR:$rs1, GPR:$rs2)>; +def : Pat<(i32 (sra GPR:$rs1, (i32 GPR:$rs2))), (SRAW GPR:$rs1, GPR:$rs2)>; +def : Pat<(i32 (srl GPR:$rs1, (i32 GPR:$rs2))), (SRLW GPR:$rs1, GPR:$rs2)>; +} + +// Ptr type used in patterns with GlobalISelEmitter +def PtrVT : PtrValueTypeByHwMode<XLenVT, 0>; + +// Define pattern expansions for pointer ult/slt conditional codes +def : Pat<(XLenVT (setult (PtrVT GPR:$rs1), simm12:$imm12)), + (SLTIU GPR:$rs1, simm12:$imm12)>; +def : Pat<(XLenVT (setult (PtrVT GPR:$rs1), (PtrVT GPR:$rs2))), + (SLTU GPR:$rs1, GPR:$rs2)>; +def : Pat<(XLenVT (setlt (PtrVT GPR:$rs1), simm12:$imm12)), + (SLTI GPR:$rs1, simm12:$imm12)>; +def : Pat<(XLenVT (setlt (PtrVT GPR:$rs1), (PtrVT GPR:$rs2))), + (SLT GPR:$rs1, GPR:$rs2)>; + +// Define pattern expansions for setcc operations that aren't directly +// handled by a RISC-V instruction. +foreach Ty = [PtrVT, XLenVT] in { +def : Pat<(XLenVT (seteq (Ty GPR:$rs1), (Ty 0))), (SLTIU GPR:$rs1, 1)>; +def : Pat<(XLenVT (seteq (Ty GPR:$rs1), (Ty simm12Plus1:$imm12))), + (SLTIU (ADDI GPR:$rs1, (NegImm simm12Plus1:$imm12)), 1)>; +def : Pat<(XLenVT (seteq (Ty GPR:$rs1), (Ty GPR:$rs2))), + (SLTIU (XOR GPR:$rs1, GPR:$rs2), 1)>; +def : Pat<(XLenVT (setne (Ty GPR:$rs1), (Ty 0))), (SLTU (XLenVT X0), GPR:$rs1)>; +def : Pat<(XLenVT (setne (Ty GPR:$rs1), (Ty simm12Plus1:$imm12))), + (SLTU (XLenVT X0), (ADDI GPR:$rs1, (NegImm simm12Plus1:$imm12)))>; +def : Pat<(XLenVT (setne (Ty GPR:$rs1), (Ty GPR:$rs2))), + (SLTU (XLenVT X0), (XOR GPR:$rs1, GPR:$rs2))>; +def : Pat<(XLenVT (setugt (Ty GPR:$rs1), (Ty simm12Minus1NonzeroNonNeg1:$imm))), + (XORI (SLTIU GPR:$rs1, + (ImmPlus1 simm12Minus1NonzeroNonNeg1:$imm)), 1)>; +def : Pat<(XLenVT (setugt (Ty GPR:$rs1), (Ty GPR:$rs2))), + (SLTU GPR:$rs2, GPR:$rs1)>; +def : Pat<(XLenVT (setgt (Ty GPR:$rs1), (Ty simm12Minus1Nonzero:$imm))), + (XORI (SLTI GPR:$rs1, (ImmPlus1 simm12Minus1Nonzero:$imm)), 1)>; +def : Pat<(XLenVT (setgt (Ty GPR:$rs1), (Ty GPR:$rs2))), + (SLT GPR:$rs2, GPR:$rs1)>; +def : Pat<(XLenVT (setuge (XLenVT GPR:$rs1), (Ty simm12:$imm))), + (XORI (SLTIU GPR:$rs1, simm12:$imm), 1)>; +def : Pat<(XLenVT (setuge (Ty GPR:$rs1), (Ty GPR:$rs2))), + (XORI (SLTU GPR:$rs1, GPR:$rs2), 1)>; +def : Pat<(XLenVT (setge (Ty GPR:$rs1), (Ty simm12:$imm))), + (XORI (SLTI GPR:$rs1, simm12:$imm), 1)>; +def : Pat<(XLenVT (setge (Ty GPR:$rs1), (Ty GPR:$rs2))), + (XORI (SLT GPR:$rs1, GPR:$rs2), 1)>; +def : Pat<(XLenVT (setule (Ty GPR:$rs1), (Ty simm12Minus1NonzeroNonNeg1:$imm))), + (SLTIU GPR:$rs1, (ImmPlus1 simm12Minus1NonzeroNonNeg1:$imm))>; +def : Pat<(XLenVT (setule (Ty GPR:$rs1), (Ty GPR:$rs2))), + (XORI (SLTU GPR:$rs2, GPR:$rs1), 1)>; +def : Pat<(XLenVT (setle (Ty GPR:$rs1), (Ty simm12Minus1Nonzero:$imm))), + (SLTI GPR:$rs1, (ImmPlus1 simm12Minus1Nonzero:$imm))>; +def : Pat<(XLenVT (setle (Ty GPR:$rs1), (Ty GPR:$rs2))), + (XORI (SLT GPR:$rs2, GPR:$rs1), 1)>; +} + +let Predicates = [IsRV32] in { +def : LdPat<load, LW, PtrVT>; +def : StPat<store, SW, GPR, PtrVT>; +} + +let Predicates = [IsRV64] in { +def : LdPat<load, LD, PtrVT>; +def : StPat<store, SD, GPR, PtrVT>; +} diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp index b9c69a966b4a..5ad1e082344e 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp @@ -67,7 +67,7 @@ private: bool tryCreateStridedLoadStore(IntrinsicInst *II, Type *DataType, Value *Ptr, Value *AlignOp); - std::pair<Value *, Value *> determineBaseAndStride(GetElementPtrInst *GEP, + std::pair<Value *, Value *> determineBaseAndStride(Instruction *Ptr, IRBuilderBase &Builder); bool matchStridedRecurrence(Value *Index, Loop *L, Value *&Stride, @@ -321,9 +321,19 @@ bool RISCVGatherScatterLowering::matchStridedRecurrence(Value *Index, Loop *L, } std::pair<Value *, Value *> -RISCVGatherScatterLowering::determineBaseAndStride(GetElementPtrInst *GEP, +RISCVGatherScatterLowering::determineBaseAndStride(Instruction *Ptr, IRBuilderBase &Builder) { + // A gather/scatter of a splat is a zero strided load/store. + if (auto *BasePtr = getSplatValue(Ptr)) { + Type *IntPtrTy = DL->getIntPtrType(BasePtr->getType()); + return std::make_pair(BasePtr, ConstantInt::get(IntPtrTy, 0)); + } + + auto *GEP = dyn_cast<GetElementPtrInst>(Ptr); + if (!GEP) + return std::make_pair(nullptr, nullptr); + auto I = StridedAddrs.find(GEP); if (I != StridedAddrs.end()) return I->second; @@ -331,8 +341,12 @@ RISCVGatherScatterLowering::determineBaseAndStride(GetElementPtrInst *GEP, SmallVector<Value *, 2> Ops(GEP->operands()); // Base pointer needs to be a scalar. - if (Ops[0]->getType()->isVectorTy()) - return std::make_pair(nullptr, nullptr); + Value *ScalarBase = Ops[0]; + if (ScalarBase->getType()->isVectorTy()) { + ScalarBase = getSplatValue(ScalarBase); + if (!ScalarBase) + return std::make_pair(nullptr, nullptr); + } std::optional<unsigned> VecOperand; unsigned TypeScale = 0; @@ -362,11 +376,19 @@ RISCVGatherScatterLowering::determineBaseAndStride(GetElementPtrInst *GEP, // We can't extract the stride if the arithmetic is done at a different size // than the pointer type. Adding the stride later may not wrap correctly. // Technically we could handle wider indices, but I don't expect that in - // practice. + // practice. Handle one special case here - constants. This simplifies + // writing test cases. Value *VecIndex = Ops[*VecOperand]; Type *VecIntPtrTy = DL->getIntPtrType(GEP->getType()); - if (VecIndex->getType() != VecIntPtrTy) - return std::make_pair(nullptr, nullptr); + if (VecIndex->getType() != VecIntPtrTy) { + auto *VecIndexC = dyn_cast<Constant>(VecIndex); + if (!VecIndexC) + return std::make_pair(nullptr, nullptr); + if (VecIndex->getType()->getScalarSizeInBits() > VecIntPtrTy->getScalarSizeInBits()) + VecIndex = ConstantFoldCastInstruction(Instruction::Trunc, VecIndexC, VecIntPtrTy); + else + VecIndex = ConstantFoldCastInstruction(Instruction::SExt, VecIndexC, VecIntPtrTy); + } // Handle the non-recursive case. This is what we see if the vectorizer // decides to use a scalar IV + vid on demand instead of a vector IV. @@ -379,7 +401,7 @@ RISCVGatherScatterLowering::determineBaseAndStride(GetElementPtrInst *GEP, Ops[*VecOperand] = Start; Type *SourceTy = GEP->getSourceElementType(); Value *BasePtr = - Builder.CreateGEP(SourceTy, Ops[0], ArrayRef(Ops).drop_front()); + Builder.CreateGEP(SourceTy, ScalarBase, ArrayRef(Ops).drop_front()); // Convert stride to pointer size if needed. Type *IntPtrTy = DL->getIntPtrType(BasePtr->getType()); @@ -415,7 +437,7 @@ RISCVGatherScatterLowering::determineBaseAndStride(GetElementPtrInst *GEP, Ops[*VecOperand] = BasePhi; Type *SourceTy = GEP->getSourceElementType(); Value *BasePtr = - Builder.CreateGEP(SourceTy, Ops[0], ArrayRef(Ops).drop_front()); + Builder.CreateGEP(SourceTy, ScalarBase, ArrayRef(Ops).drop_front()); // Final adjustments to stride should go in the start block. Builder.SetInsertPoint( @@ -448,17 +470,17 @@ bool RISCVGatherScatterLowering::tryCreateStridedLoadStore(IntrinsicInst *II, if (!TLI->isTypeLegal(DataTypeVT)) return false; - // Pointer should be a GEP. - auto *GEP = dyn_cast<GetElementPtrInst>(Ptr); - if (!GEP) + // Pointer should be an instruction. + auto *PtrI = dyn_cast<Instruction>(Ptr); + if (!PtrI) return false; - LLVMContext &Ctx = GEP->getContext(); + LLVMContext &Ctx = PtrI->getContext(); IRBuilder<InstSimplifyFolder> Builder(Ctx, *DL); - Builder.SetInsertPoint(GEP); + Builder.SetInsertPoint(PtrI); Value *BasePtr, *Stride; - std::tie(BasePtr, Stride) = determineBaseAndStride(GEP, Builder); + std::tie(BasePtr, Stride) = determineBaseAndStride(PtrI, Builder); if (!BasePtr) return false; assert(Stride != nullptr); @@ -481,8 +503,8 @@ bool RISCVGatherScatterLowering::tryCreateStridedLoadStore(IntrinsicInst *II, II->replaceAllUsesWith(Call); II->eraseFromParent(); - if (GEP->use_empty()) - RecursivelyDeleteTriviallyDeadInstructions(GEP); + if (PtrI->use_empty()) + RecursivelyDeleteTriviallyDeadInstructions(PtrI); return true; } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 901204043b3c..09b3ab96974c 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -22,13 +22,18 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include <optional> using namespace llvm; #define DEBUG_TYPE "riscv-isel" #define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection" +static cl::opt<bool> UsePseudoMovImm( + "riscv-use-rematerializable-movimm", cl::Hidden, + cl::desc("Use a rematerializable pseudoinstruction for 2 instruction " + "constant materialization"), + cl::init(false)); + namespace llvm::RISCV { #define GET_RISCVVSSEGTable_IMPL #define GET_RISCVVLSEGTable_IMPL @@ -61,8 +66,11 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() { VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL; SDLoc DL(N); SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT()); - Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), - N->getOperand(0), VL); + SDValue Src = N->getOperand(0); + if (VT.isInteger()) + Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(), + N->getOperand(0)); + Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL); break; } case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: { @@ -83,7 +91,7 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() { // Create temporary stack for each expanding node. SDValue StackSlot = - CurDAG->CreateStackTemporary(TypeSize::Fixed(8), Align(4)); + CurDAG->CreateStackTemporary(TypeSize::getFixed(8), Align(8)); int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex(); MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); @@ -91,7 +99,7 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() { Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8)); SDValue OffsetSlot = - CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL); + CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::getFixed(4), DL); Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4), Align(8)); @@ -142,13 +150,25 @@ void RISCVDAGToDAGISel::PostprocessISelDAG() { continue; MadeChange |= doPeepholeSExtW(N); - MadeChange |= doPeepholeMaskedRVV(N); + + // FIXME: This is here only because the VMerge transform doesn't + // know how to handle masked true inputs. Once that has been moved + // to post-ISEL, this can be deleted as well. + MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N)); } CurDAG->setRoot(Dummy.getValue()); MadeChange |= doPeepholeMergeVVMFold(); + // After we're done with everything else, convert IMPLICIT_DEF + // passthru operands to NoRegister. This is required to workaround + // an optimization deficiency in MachineCSE. This really should + // be merged back into each of the patterns (i.e. there's no good + // reason not to go directly to NoReg), but is being done this way + // to allow easy backporting. + MadeChange |= doPeepholeNoRegPassThru(); + if (MadeChange) CurDAG->RemoveDeadNodes(); } @@ -184,28 +204,32 @@ static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, int64_t Imm, const RISCVSubtarget &Subtarget) { - RISCVMatInt::InstSeq Seq = - RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits()); + RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget); - // See if we can create this constant as (ADD (SLLI X, 32), X) where X is at + // Use a rematerializable pseudo instruction for short sequences if enabled. + if (Seq.size() == 2 && UsePseudoMovImm) + return SDValue( + CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT, + CurDAG->getTargetConstant(Imm, DL, VT)), + 0); + + // See if we can create this constant as (ADD (SLLI X, C), X) where X is at // worst an LUI+ADDIW. This will require an extra register, but avoids a // constant pool. + // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where + // low and high 32 bits are the same and bit 31 and 63 are set. if (Seq.size() > 3) { - int64_t LoVal = SignExtend64<32>(Imm); - int64_t HiVal = SignExtend64<32>(((uint64_t)Imm - (uint64_t)LoVal) >> 32); - if (LoVal == HiVal) { - RISCVMatInt::InstSeq SeqLo = - RISCVMatInt::generateInstSeq(LoVal, Subtarget.getFeatureBits()); - if ((SeqLo.size() + 2) < Seq.size()) { - SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo); - - SDValue SLLI = SDValue( - CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo, - CurDAG->getTargetConstant(32, DL, VT)), - 0); - return SDValue(CurDAG->getMachineNode(RISCV::ADD, DL, VT, Lo, SLLI), - 0); - } + unsigned ShiftAmt, AddOpc; + RISCVMatInt::InstSeq SeqLo = + RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc); + if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) { + SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo); + + SDValue SLLI = SDValue( + CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo, + CurDAG->getTargetConstant(ShiftAmt, DL, VT)), + 0); + return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0); } } @@ -552,6 +576,12 @@ void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) { SDValue VLOperand; unsigned Opcode = RISCV::PseudoVSETVLI; + if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) { + const unsigned VLEN = Subtarget->getRealMinVLen(); + if (VLEN == Subtarget->getRealMaxVLen()) + if (VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue()) + VLMax = true; + } if (VLMax || isAllOnesConstant(Node->getOperand(1))) { VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT); Opcode = RISCV::PseudoVSETVLIX0; @@ -808,7 +838,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { switch (Opcode) { case ISD::Constant: { - assert(VT == Subtarget->getXLenVT() && "Unexpected VT"); + assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT"); auto *ConstNode = cast<ConstantSDNode>(Node); if (ConstNode->isZero()) { SDValue New = @@ -832,26 +862,34 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { } case ISD::ConstantFP: { const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF(); - int FPImm = static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm( - APF, VT); + auto [FPImm, NeedsFNeg] = + static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(APF, + VT); if (FPImm >= 0) { unsigned Opc; + unsigned FNegOpc; switch (VT.SimpleTy) { default: llvm_unreachable("Unexpected size"); case MVT::f16: Opc = RISCV::FLI_H; + FNegOpc = RISCV::FSGNJN_H; break; case MVT::f32: Opc = RISCV::FLI_S; + FNegOpc = RISCV::FSGNJN_S; break; case MVT::f64: Opc = RISCV::FLI_D; + FNegOpc = RISCV::FSGNJN_D; break; } - SDNode *Res = CurDAG->getMachineNode( Opc, DL, VT, CurDAG->getTargetConstant(FPImm, DL, XLenVT)); + if (NeedsFNeg) + Res = CurDAG->getMachineNode(FNegOpc, DL, VT, SDValue(Res, 0), + SDValue(Res, 0)); + ReplaceNode(Node, Res); return; } @@ -866,10 +904,16 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(), *Subtarget); + bool HasZdinx = Subtarget->hasStdExtZdinx(); + bool Is64Bit = Subtarget->is64Bit(); unsigned Opc; switch (VT.SimpleTy) { default: llvm_unreachable("Unexpected size"); + case MVT::bf16: + assert(Subtarget->hasStdExtZfbfmin()); + Opc = RISCV::FMV_H_X; + break; case MVT::f16: Opc = Subtarget->hasStdExtZhinxOrZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X; @@ -881,20 +925,29 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { // For RV32, we can't move from a GPR, we need to convert instead. This // should only happen for +0.0 and -0.0. assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant"); - bool HasZdinx = Subtarget->hasStdExtZdinx(); - if (Subtarget->is64Bit()) + if (Is64Bit) Opc = HasZdinx ? RISCV::COPY : RISCV::FMV_D_X; else Opc = HasZdinx ? RISCV::FCVT_D_W_IN32X : RISCV::FCVT_D_W; break; } - SDNode *Res = CurDAG->getMachineNode(Opc, DL, VT, Imm); + SDNode *Res; + if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W) + Res = CurDAG->getMachineNode( + Opc, DL, VT, Imm, + CurDAG->getTargetConstant(RISCVFPRndMode::RNE, DL, XLenVT)); + else + Res = CurDAG->getMachineNode(Opc, DL, VT, Imm); // For f64 -0.0, we need to insert a fneg.d idiom. - if (NegZeroF64) - Res = CurDAG->getMachineNode(RISCV::FSGNJN_D, DL, VT, SDValue(Res, 0), - SDValue(Res, 0)); + if (NegZeroF64) { + Opc = RISCV::FSGNJN_D; + if (HasZdinx) + Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X; + Res = + CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0)); + } ReplaceNode(Node, Res); return; @@ -2082,8 +2135,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad()) break; - SmallVector<SDValue> Operands = - {CurDAG->getUNDEF(VT), Ld->getBasePtr()}; + SmallVector<SDValue> Operands = { + SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0), + Ld->getBasePtr()}; if (IsStrided) Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT)); uint64_t Policy = RISCVII::MASK_AGNOSTIC | RISCVII::TAIL_AGNOSTIC; @@ -2141,12 +2195,13 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { } bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand( - const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { + const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, + std::vector<SDValue> &OutOps) { // Always produce a register and immediate operand, as expected by // RISCVAsmPrinter::PrintAsmMemoryOperand. switch (ConstraintID) { - case InlineAsm::Constraint_o: - case InlineAsm::Constraint_m: { + case InlineAsm::ConstraintCode::o: + case InlineAsm::ConstraintCode::m: { SDValue Op0, Op1; bool Found = SelectAddrRegImm(Op, Op0, Op1); assert(Found && "SelectAddrRegImm should always succeed"); @@ -2155,7 +2210,7 @@ bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand( OutOps.push_back(Op1); return false; } - case InlineAsm::Constraint_A: + case InlineAsm::ConstraintCode::A: OutOps.push_back(Op); OutOps.push_back( CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT())); @@ -2205,7 +2260,8 @@ bool RISCVDAGToDAGISel::SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, // Fold constant addresses. static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, const RISCVSubtarget *Subtarget, - SDValue Addr, SDValue &Base, SDValue &Offset) { + SDValue Addr, SDValue &Base, SDValue &Offset, + bool IsPrefetch = false) { if (!isa<ConstantSDNode>(Addr)) return false; @@ -2217,6 +2273,9 @@ static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, int64_t Lo12 = SignExtend64<12>(CVal); int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12; if (!Subtarget->is64Bit() || isInt<32>(Hi)) { + if (IsPrefetch && (Lo12 & 0b11111) != 0) + return false; + if (Hi) { int64_t Hi20 = (Hi >> 12) & 0xfffff; Base = SDValue( @@ -2231,14 +2290,15 @@ static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, } // Ask how constant materialization would handle this constant. - RISCVMatInt::InstSeq Seq = - RISCVMatInt::generateInstSeq(CVal, Subtarget->getFeatureBits()); + RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget); // If the last instruction would be an ADDI, we can fold its immediate and // emit the rest of the sequence as the base. if (Seq.back().getOpcode() != RISCV::ADDI) return false; Lo12 = Seq.back().getImm(); + if (IsPrefetch && (Lo12 & 0b11111) != 0) + return false; // Drop the last instruction. Seq.pop_back(); @@ -2419,14 +2479,85 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, return true; } +/// Similar to SelectAddrRegImm, except that the least significant 5 bits of +/// Offset shoule be all zeros. +bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, + SDValue &Offset) { + if (SelectAddrFrameIndex(Addr, Base, Offset)) + return true; + + SDLoc DL(Addr); + MVT VT = Addr.getSimpleValueType(); + + if (CurDAG->isBaseWithConstantOffset(Addr)) { + int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); + if (isInt<12>(CVal)) { + Base = Addr.getOperand(0); + + // Early-out if not a valid offset. + if ((CVal & 0b11111) != 0) { + Base = Addr; + Offset = CurDAG->getTargetConstant(0, DL, VT); + return true; + } + + if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base)) + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT); + Offset = CurDAG->getTargetConstant(CVal, DL, VT); + return true; + } + } + + // Handle ADD with large immediates. + if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) { + int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); + assert(!(isInt<12>(CVal) && isInt<12>(CVal)) && + "simm12 not already handled?"); + + // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save + // one instruction by folding adjustment (-2048 or 2016) into the address. + if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) { + int64_t Adj = CVal < 0 ? -2048 : 2016; + int64_t AdjustedOffset = CVal - Adj; + Base = SDValue(CurDAG->getMachineNode( + RISCV::ADDI, DL, VT, Addr.getOperand(0), + CurDAG->getTargetConstant(AdjustedOffset, DL, VT)), + 0); + Offset = CurDAG->getTargetConstant(Adj, DL, VT); + return true; + } + + if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base, + Offset, true)) { + // Insert an ADD instruction with the materialized Hi52 bits. + Base = SDValue( + CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base), + 0); + return true; + } + } + + if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset, true)) + return true; + + Base = Addr; + Offset = CurDAG->getTargetConstant(0, DL, VT); + return true; +} + bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt) { ShAmt = N; + // Peek through zext. + if (ShAmt->getOpcode() == ISD::ZERO_EXTEND) + ShAmt = ShAmt.getOperand(0); + // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift // amount. If there is an AND on the shift amount, we can bypass it if it // doesn't affect any of those bits. - if (ShAmt.getOpcode() == ISD::AND && isa<ConstantSDNode>(ShAmt.getOperand(1))) { + if (ShAmt.getOpcode() == ISD::AND && + isa<ConstantSDNode>(ShAmt.getOperand(1))) { const APInt &AndMask = ShAmt.getConstantOperandAPInt(1); // Since the max shift amount is a power of 2 we can subtract 1 to make a @@ -2729,6 +2860,36 @@ bool RISCVDAGToDAGISel::selectSHXADD_UWOp(SDValue N, unsigned ShAmt, return false; } +static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo, + unsigned Bits, + const TargetInstrInfo *TII) { + unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode()); + + if (!MCOpcode) + return false; + + const MCInstrDesc &MCID = TII->get(User->getMachineOpcode()); + const uint64_t TSFlags = MCID.TSFlags; + if (!RISCVII::hasSEWOp(TSFlags)) + return false; + assert(RISCVII::hasVLOp(TSFlags)); + + bool HasGlueOp = User->getGluedNode() != nullptr; + unsigned ChainOpIdx = User->getNumOperands() - HasGlueOp - 1; + bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other; + bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags); + unsigned VLIdx = + User->getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2; + const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1); + + if (UserOpNo == VLIdx) + return false; + + auto NumDemandedBits = + RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW); + return NumDemandedBits && Bits >= *NumDemandedBits; +} + // Return true if all users of this SDNode* only consume the lower \p Bits. // This can be used to form W instructions for add/sub/mul/shl even when the // root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if @@ -2751,6 +2912,11 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits, if (Depth >= SelectionDAG::MaxRecursionDepth) return false; + // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked + // the VT. Ensure the type is scalar to avoid wasting time on vectors. + if (Depth == 0 && !Node->getValueType(0).isScalarInteger()) + return false; + for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) { SDNode *User = *UI; // Users of this node should have already been instruction selected @@ -2760,6 +2926,8 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits, // TODO: Add more opcodes? switch (User->getMachineOpcode()) { default: + if (vectorPseudoHasAllNBitUsers(User, UI.getOperandNo(), Bits, TII)) + break; return false; case RISCV::ADDW: case RISCV::ADDIW: @@ -2937,27 +3105,41 @@ bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) { return true; } +static SDValue findVSplat(SDValue N) { + if (N.getOpcode() == ISD::INSERT_SUBVECTOR) { + if (!N.getOperand(0).isUndef()) + return SDValue(); + N = N.getOperand(1); + } + SDValue Splat = N; + if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL && + Splat.getOpcode() != RISCVISD::VMV_S_X_VL) || + !Splat.getOperand(0).isUndef()) + return SDValue(); + assert(Splat.getNumOperands() == 3 && "Unexpected number of operands"); + return Splat; +} + bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) { - if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef()) + SDValue Splat = findVSplat(N); + if (!Splat) return false; - assert(N.getNumOperands() == 3 && "Unexpected number of operands"); - SplatVal = N.getOperand(1); + + SplatVal = Splat.getOperand(1); return true; } -using ValidateFn = bool (*)(int64_t); - -static bool selectVSplatSimmHelper(SDValue N, SDValue &SplatVal, - SelectionDAG &DAG, - const RISCVSubtarget &Subtarget, - ValidateFn ValidateImm) { - if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() || - !isa<ConstantSDNode>(N.getOperand(1))) +static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal, + SelectionDAG &DAG, + const RISCVSubtarget &Subtarget, + std::function<bool(int64_t)> ValidateImm) { + SDValue Splat = findVSplat(N); + if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1))) return false; - assert(N.getNumOperands() == 3 && "Unexpected number of operands"); - int64_t SplatImm = - cast<ConstantSDNode>(N.getOperand(1))->getSExtValue(); + const unsigned SplatEltSize = Splat.getScalarValueSizeInBits(); + assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() && + "Unexpected splat operand type"); // The semantics of RISCVISD::VMV_V_X_VL is that when the operand // type is wider than the resulting vector element type: an implicit @@ -2966,34 +3148,31 @@ static bool selectVSplatSimmHelper(SDValue N, SDValue &SplatVal, // any zero-extended immediate. // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first // sign-extending to (XLenVT -1). - MVT XLenVT = Subtarget.getXLenVT(); - assert(XLenVT == N.getOperand(1).getSimpleValueType() && - "Unexpected splat operand type"); - MVT EltVT = N.getSimpleValueType().getVectorElementType(); - if (EltVT.bitsLT(XLenVT)) - SplatImm = SignExtend64(SplatImm, EltVT.getSizeInBits()); + APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize); + + int64_t SplatImm = SplatConst.getSExtValue(); if (!ValidateImm(SplatImm)) return false; - SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), XLenVT); + SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT()); return true; } bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) { - return selectVSplatSimmHelper(N, SplatVal, *CurDAG, *Subtarget, - [](int64_t Imm) { return isInt<5>(Imm); }); + return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget, + [](int64_t Imm) { return isInt<5>(Imm); }); } bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) { - return selectVSplatSimmHelper( + return selectVSplatImmHelper( N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; }); } bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal) { - return selectVSplatSimmHelper( + return selectVSplatImmHelper( N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) { return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16); }); @@ -3001,29 +3180,34 @@ bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N, bool RISCVDAGToDAGISel::selectVSplatUimm(SDValue N, unsigned Bits, SDValue &SplatVal) { - if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() || - !isa<ConstantSDNode>(N.getOperand(1))) - return false; - - int64_t SplatImm = - cast<ConstantSDNode>(N.getOperand(1))->getSExtValue(); - - if (!isUIntN(Bits, SplatImm)) - return false; - - SplatVal = - CurDAG->getTargetConstant(SplatImm, SDLoc(N), Subtarget->getXLenVT()); - - return true; + return selectVSplatImmHelper( + N, SplatVal, *CurDAG, *Subtarget, + [Bits](int64_t Imm) { return isUIntN(Bits, Imm); }); } -bool RISCVDAGToDAGISel::selectExtOneUseVSplat(SDValue N, SDValue &SplatVal) { - if (N->getOpcode() == ISD::SIGN_EXTEND || - N->getOpcode() == ISD::ZERO_EXTEND) { - if (!N.hasOneUse()) +bool RISCVDAGToDAGISel::selectLow8BitsVSplat(SDValue N, SDValue &SplatVal) { + // Truncates are custom lowered during legalization. + auto IsTrunc = [this](SDValue N) { + if (N->getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL) + return false; + SDValue VL; + selectVLOp(N->getOperand(2), VL); + // Any vmset_vl is ok, since any bits past VL are undefined and we can + // assume they are set. + return N->getOperand(1).getOpcode() == RISCVISD::VMSET_VL && + isa<ConstantSDNode>(VL) && + cast<ConstantSDNode>(VL)->getSExtValue() == RISCV::VLMaxSentinel; + }; + + // We can have multiple nested truncates, so unravel them all if needed. + while (N->getOpcode() == ISD::SIGN_EXTEND || + N->getOpcode() == ISD::ZERO_EXTEND || IsTrunc(N)) { + if (!N.hasOneUse() || + N.getValueType().getSizeInBits().getKnownMinValue() < 8) return false; N = N->getOperand(0); } + return selectVSplat(N, SplatVal); } @@ -3038,8 +3222,12 @@ bool RISCVDAGToDAGISel::selectFPImm(SDValue N, SDValue &Imm) { MVT VT = CFP->getSimpleValueType(0); - if (static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(APF, - VT) >= 0) + // Even if this FPImm requires an additional FNEG (i.e. the second element of + // the returned pair is true) we still prefer FLI + FNEG over immediate + // materialization as the latter might generate a longer instruction sequence. + if (static_cast<const RISCVTargetLowering *>(TLI) + ->getLegalZfaFPImm(APF, VT) + .first >= 0) return false; MVT XLenVT = Subtarget->getXLenVT(); @@ -3125,6 +3313,9 @@ bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) { case RISCV::TH_MULAH: case RISCV::TH_MULSW: case RISCV::TH_MULSH: + if (N0.getValueType() == MVT::i32) + break; + // Result is already sign extended just remove the sext.w. // NOTE: We only handle the nodes that are selected with hasAllWUsers. ReplaceUses(N, N0.getNode()); @@ -3154,6 +3345,12 @@ static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) { // Check the instruction defining V0; it needs to be a VMSET pseudo. SDValue MaskSetter = Glued->getOperand(2); + // Sometimes the VMSET is wrapped in a COPY_TO_REGCLASS, e.g. if the mask came + // from an extract_subvector or insert_subvector. + if (MaskSetter->isMachineOpcode() && + MaskSetter->getMachineOpcode() == RISCV::COPY_TO_REGCLASS) + MaskSetter = MaskSetter->getOperand(0); + const auto IsVMSet = [](unsigned Opc) { return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 || Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 || @@ -3183,7 +3380,7 @@ static bool isImplicitDef(SDValue V) { // corresponding "unmasked" pseudo versions. The mask we're interested in will // take the form of a V0 physical register operand, with a glued // register-setting instruction. -bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) { +bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) { const RISCV::RISCVMaskedPseudoInfo *I = RISCV::getMaskedPseudoInfo(N->getMachineOpcode()); if (!I) @@ -3222,7 +3419,12 @@ bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) { if (auto *TGlued = Glued->getGluedNode()) Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1)); - SDNode *Result = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); + MachineSDNode *Result = + CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); + + if (!N->memoperands_empty()) + CurDAG->setNodeMemRefs(Result, N->memoperands()); + Result->setFlags(N->getFlags()); ReplaceUses(N, Result); @@ -3230,21 +3432,11 @@ bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) { } static bool IsVMerge(SDNode *N) { - unsigned Opc = N->getMachineOpcode(); - return Opc == RISCV::PseudoVMERGE_VVM_MF8 || - Opc == RISCV::PseudoVMERGE_VVM_MF4 || - Opc == RISCV::PseudoVMERGE_VVM_MF2 || - Opc == RISCV::PseudoVMERGE_VVM_M1 || - Opc == RISCV::PseudoVMERGE_VVM_M2 || - Opc == RISCV::PseudoVMERGE_VVM_M4 || Opc == RISCV::PseudoVMERGE_VVM_M8; + return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMERGE_VVM; } static bool IsVMv(SDNode *N) { - unsigned Opc = N->getMachineOpcode(); - return Opc == RISCV::PseudoVMV_V_V_MF8 || Opc == RISCV::PseudoVMV_V_V_MF4 || - Opc == RISCV::PseudoVMV_V_V_MF2 || Opc == RISCV::PseudoVMV_V_V_M1 || - Opc == RISCV::PseudoVMV_V_V_M2 || Opc == RISCV::PseudoVMV_V_V_M4 || - Opc == RISCV::PseudoVMV_V_V_M8; + return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMV_V_V; } static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL) { @@ -3336,6 +3528,11 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { if (!Info) return false; + // When Mask is not a true mask, this transformation is illegal for some + // operations whose results are affected by mask, like viota.m. + if (Info->MaskAffectsResult && Mask && !usesAllOnesMask(Mask, Glue)) + return false; + if (HasTiedDest && !isImplicitDef(True->getOperand(0))) { // The vmerge instruction must be TU. // FIXME: This could be relaxed, but we need to handle the policy for the @@ -3503,10 +3700,13 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { // Add the glue for the CopyToReg of mask->v0. Ops.push_back(Glue); - SDNode *Result = + MachineSDNode *Result = CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops); Result->setFlags(True->getFlags()); + if (!cast<MachineSDNode>(True)->memoperands_empty()) + CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(True)->memoperands()); + // Replace vmerge.vvm node by Result. ReplaceUses(SDValue(N, 0), SDValue(Result, 0)); @@ -3514,46 +3714,30 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx) ReplaceUses(True.getValue(Idx), SDValue(Result, Idx)); - // Try to transform Result to unmasked intrinsic. - doPeepholeMaskedRVV(Result); return true; } -// Transform (VMERGE_VVM_<LMUL> false, false, true, allones, vl, sew) to -// (VMV_V_V_<LMUL> false, true, vl, sew). It may decrease uses of VMSET. -bool RISCVDAGToDAGISel::performVMergeToVMv(SDNode *N) { -#define CASE_VMERGE_TO_VMV(lmul) \ - case RISCV::PseudoVMERGE_VVM_##lmul: \ - NewOpc = RISCV::PseudoVMV_V_V_##lmul; \ - break; - unsigned NewOpc; - switch (N->getMachineOpcode()) { - default: - llvm_unreachable("Expected VMERGE_VVM_<LMUL> instruction."); - CASE_VMERGE_TO_VMV(MF8) - CASE_VMERGE_TO_VMV(MF4) - CASE_VMERGE_TO_VMV(MF2) - CASE_VMERGE_TO_VMV(M1) - CASE_VMERGE_TO_VMV(M2) - CASE_VMERGE_TO_VMV(M4) - CASE_VMERGE_TO_VMV(M8) - } +bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() { + bool MadeChange = false; + SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); - if (!usesAllOnesMask(N, /* MaskOpIdx */ 3)) - return false; + while (Position != CurDAG->allnodes_begin()) { + SDNode *N = &*--Position; + if (N->use_empty() || !N->isMachineOpcode()) + continue; - SDLoc DL(N); - SDValue PolicyOp = - CurDAG->getTargetConstant(/*TUMU*/ 0, DL, Subtarget->getXLenVT()); - SDNode *Result = CurDAG->getMachineNode( - NewOpc, DL, N->getValueType(0), - {N->getOperand(1), N->getOperand(2), N->getOperand(4), N->getOperand(5), - PolicyOp}); - ReplaceUses(N, Result); - return true; + if (IsVMerge(N) || IsVMv(N)) + MadeChange |= performCombineVMergeAndVOps(N); + } + return MadeChange; } -bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() { +/// If our passthru is an implicit_def, use noreg instead. This side +/// steps issues with MachineCSE not being able to CSE expressions with +/// IMPLICIT_DEF operands while preserving the semantic intent. See +/// pr64282 for context. Note that this transform is the last one +/// performed at ISEL DAG to DAG. +bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() { bool MadeChange = false; SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); @@ -3562,18 +3746,34 @@ bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() { if (N->use_empty() || !N->isMachineOpcode()) continue; - if (IsVMerge(N) || IsVMv(N)) - MadeChange |= performCombineVMergeAndVOps(N); - if (IsVMerge(N) && N->getOperand(0) == N->getOperand(1)) - MadeChange |= performVMergeToVMv(N); + const unsigned Opc = N->getMachineOpcode(); + if (!RISCVVPseudosTable::getPseudoInfo(Opc) || + !RISCVII::isFirstDefTiedToFirstUse(TII->get(Opc)) || + !isImplicitDef(N->getOperand(0))) + continue; + + SmallVector<SDValue> Ops; + Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0))); + for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) { + SDValue Op = N->getOperand(I); + Ops.push_back(Op); + } + + MachineSDNode *Result = + CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); + Result->setFlags(N->getFlags()); + CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands()); + ReplaceUses(N, Result); + MadeChange = true; } return MadeChange; } + // This pass converts a legalized DAG into a RISCV-specific DAG, ready // for instruction scheduling. FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM, - CodeGenOpt::Level OptLevel) { + CodeGenOptLevel OptLevel) { return new RISCVDAGToDAGISel(TM, OptLevel); } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index 281719c12e70..77e174135a59 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -30,7 +30,7 @@ public: RISCVDAGToDAGISel() = delete; explicit RISCVDAGToDAGISel(RISCVTargetMachine &TargetMachine, - CodeGenOpt::Level OptLevel) + CodeGenOptLevel OptLevel) : SelectionDAGISel(ID, TargetMachine, OptLevel) {} bool runOnMachineFunction(MachineFunction &MF) override { @@ -43,7 +43,8 @@ public: void Select(SDNode *Node) override; - bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, + bool SelectInlineAsmMemoryOperand(const SDValue &Op, + InlineAsm::ConstraintCode ConstraintID, std::vector<SDValue> &OutOps) override; bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset); @@ -53,6 +54,7 @@ public: bool SelectAddrRegImmINX(SDValue Addr, SDValue &Base, SDValue &Offset) { return SelectAddrRegImm(Addr, Base, Offset, true); } + bool SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, SDValue &Offset); bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount, SDValue &Base, SDValue &Index, SDValue &Scale); @@ -134,7 +136,9 @@ public: } bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal); bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal); - bool selectExtOneUseVSplat(SDValue N, SDValue &SplatVal); + // Matches the splat of a value which can be extended or truncated, such that + // only the bottom 8 bits are preserved. + bool selectLow8BitsVSplat(SDValue N, SDValue &SplatVal); bool selectFPImm(SDValue N, SDValue &Imm); bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm); @@ -183,9 +187,9 @@ public: private: bool doPeepholeSExtW(SDNode *Node); - bool doPeepholeMaskedRVV(SDNode *Node); + bool doPeepholeMaskedRVV(MachineSDNode *Node); bool doPeepholeMergeVVMFold(); - bool performVMergeToVMv(SDNode *N); + bool doPeepholeNoRegPassThru(); bool performCombineVMergeAndVOps(SDNode *N); }; @@ -259,6 +263,7 @@ struct RISCVMaskedPseudoInfo { uint16_t MaskedPseudo; uint16_t UnmaskedPseudo; uint8_t MaskOpIdx; + uint8_t MaskAffectsResult : 1; }; #define GET_RISCVVSSEGTable_DECL diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index f030982cb815..03e994586d0c 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -27,6 +27,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DiagnosticInfo.h" @@ -38,6 +39,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/InstructionCost.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -73,6 +75,10 @@ static cl::opt<int> "use for creating a floating-point immediate value"), cl::init(2)); +static cl::opt<bool> + RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden, + cl::desc("Make i32 a legal type for SelectionDAG on RV64.")); + RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI) : TargetLowering(TM), Subtarget(STI) { @@ -113,6 +119,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // Set up the register classes. addRegisterClass(XLenVT, &RISCV::GPRRegClass); + if (Subtarget.is64Bit() && RV64LegalI32) + addRegisterClass(MVT::i32, &RISCV::GPRRegClass); if (Subtarget.hasStdExtZfhOrZfhmin()) addRegisterClass(MVT::f16, &RISCV::FPR16RegClass); @@ -145,6 +153,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, static const MVT::SimpleValueType F16VecVTs[] = { MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16}; + static const MVT::SimpleValueType BF16VecVTs[] = { + MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16, + MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16}; static const MVT::SimpleValueType F32VecVTs[] = { MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32}; static const MVT::SimpleValueType F64VecVTs[] = { @@ -154,7 +165,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, auto addRegClassForRVV = [this](MVT VT) { // Disable the smallest fractional LMUL types if ELEN is less than // RVVBitsPerBlock. - unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELEN(); + unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen(); if (VT.getVectorMinNumElements() < MinElts) return; @@ -183,10 +194,14 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, addRegClassForRVV(VT); } - if (Subtarget.hasVInstructionsF16()) + if (Subtarget.hasVInstructionsF16Minimal()) for (MVT VT : F16VecVTs) addRegClassForRVV(VT); + if (Subtarget.hasVInstructionsBF16()) + for (MVT VT : BF16VecVTs) + addRegClassForRVV(VT); + if (Subtarget.hasVInstructionsF32()) for (MVT VT : F32VecVTs) addRegClassForRVV(VT); @@ -228,8 +243,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::BR_JT, MVT::Other, Expand); setOperationAction(ISD::BR_CC, XLenVT, Expand); + if (RV64LegalI32 && Subtarget.is64Bit()) + setOperationAction(ISD::BR_CC, MVT::i32, Expand); setOperationAction(ISD::BRCOND, MVT::Other, Custom); setOperationAction(ISD::SELECT_CC, XLenVT, Expand); + if (RV64LegalI32 && Subtarget.is64Bit()) + setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); setCondCodeAction(ISD::SETLE, XLenVT, Expand); setCondCodeAction(ISD::SETGT, XLenVT, Custom); @@ -238,6 +257,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setCondCodeAction(ISD::SETUGT, XLenVT, Custom); setCondCodeAction(ISD::SETUGE, XLenVT, Expand); + if (RV64LegalI32 && Subtarget.is64Bit()) + setOperationAction(ISD::SETCC, MVT::i32, Promote); + setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand); setOperationAction(ISD::VASTART, MVT::Other, Custom); @@ -253,14 +275,14 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, if (Subtarget.is64Bit()) { setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); - setOperationAction(ISD::LOAD, MVT::i32, Custom); - - setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL}, - MVT::i32, Custom); - - setOperationAction(ISD::SADDO, MVT::i32, Custom); - setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT}, - MVT::i32, Custom); + if (!RV64LegalI32) { + setOperationAction(ISD::LOAD, MVT::i32, Custom); + setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL}, + MVT::i32, Custom); + setOperationAction(ISD::SADDO, MVT::i32, Custom); + setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT}, + MVT::i32, Custom); + } } else { setLibcallName( {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128}, @@ -268,19 +290,36 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setLibcallName(RTLIB::MULO_I64, nullptr); } - if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) + if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) { setOperationAction({ISD::MUL, ISD::MULHS, ISD::MULHU}, XLenVT, Expand); - else if (Subtarget.is64Bit()) - setOperationAction(ISD::MUL, {MVT::i32, MVT::i128}, Custom); - else + if (RV64LegalI32 && Subtarget.is64Bit()) + setOperationAction(ISD::MUL, MVT::i32, Promote); + } else if (Subtarget.is64Bit()) { + setOperationAction(ISD::MUL, MVT::i128, Custom); + if (!RV64LegalI32) + setOperationAction(ISD::MUL, MVT::i32, Custom); + } else { setOperationAction(ISD::MUL, MVT::i64, Custom); + } - if (!Subtarget.hasStdExtM()) + if (!Subtarget.hasStdExtM()) { setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, XLenVT, Expand); - else if (Subtarget.is64Bit()) - setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM}, - {MVT::i8, MVT::i16, MVT::i32}, Custom); + if (RV64LegalI32 && Subtarget.is64Bit()) + setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, MVT::i32, + Promote); + } else if (Subtarget.is64Bit()) { + if (!RV64LegalI32) + setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM}, + {MVT::i8, MVT::i16, MVT::i32}, Custom); + } + + if (RV64LegalI32 && Subtarget.is64Bit()) { + setOperationAction({ISD::MULHS, ISD::MULHU}, MVT::i32, Expand); + setOperationAction( + {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, MVT::i32, + Expand); + } setOperationAction( {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, XLenVT, @@ -290,14 +329,18 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, Custom); if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) { - if (Subtarget.is64Bit()) + if (!RV64LegalI32 && Subtarget.is64Bit()) setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom); } else if (Subtarget.hasVendorXTHeadBb()) { if (Subtarget.is64Bit()) setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom); setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Custom); + } else if (Subtarget.hasVendorXCVbitmanip()) { + setOperationAction(ISD::ROTL, XLenVT, Expand); } else { setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Expand); + if (RV64LegalI32 && Subtarget.is64Bit()) + setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Expand); } // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll @@ -307,37 +350,74 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, Subtarget.hasVendorXTHeadBb()) ? Legal : Expand); - // Zbkb can use rev8+brev8 to implement bitreverse. - setOperationAction(ISD::BITREVERSE, XLenVT, - Subtarget.hasStdExtZbkb() ? Custom : Expand); + if (RV64LegalI32 && Subtarget.is64Bit()) + setOperationAction(ISD::BSWAP, MVT::i32, + (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() || + Subtarget.hasVendorXTHeadBb()) + ? Promote + : Expand); + + + if (Subtarget.hasVendorXCVbitmanip()) { + setOperationAction(ISD::BITREVERSE, XLenVT, Legal); + } else { + // Zbkb can use rev8+brev8 to implement bitreverse. + setOperationAction(ISD::BITREVERSE, XLenVT, + Subtarget.hasStdExtZbkb() ? Custom : Expand); + } if (Subtarget.hasStdExtZbb()) { setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, XLenVT, Legal); + if (RV64LegalI32 && Subtarget.is64Bit()) + setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, MVT::i32, + Promote); - if (Subtarget.is64Bit()) - setOperationAction( - {ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, - MVT::i32, Custom); - } else { - setOperationAction({ISD::CTTZ, ISD::CTLZ, ISD::CTPOP}, XLenVT, Expand); + if (Subtarget.is64Bit()) { + if (RV64LegalI32) + setOperationAction(ISD::CTTZ, MVT::i32, Legal); + else + setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom); + } + } else if (!Subtarget.hasVendorXCVbitmanip()) { + setOperationAction({ISD::CTTZ, ISD::CTPOP}, XLenVT, Expand); + if (RV64LegalI32 && Subtarget.is64Bit()) + setOperationAction({ISD::CTTZ, ISD::CTPOP}, MVT::i32, Expand); } - if (Subtarget.hasVendorXTHeadBb()) { - setOperationAction(ISD::CTLZ, XLenVT, Legal); - + if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() || + Subtarget.hasVendorXCVbitmanip()) { // We need the custom lowering to make sure that the resulting sequence // for the 32bit case is efficient on 64bit targets. - if (Subtarget.is64Bit()) - setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom); + if (Subtarget.is64Bit()) { + if (RV64LegalI32) { + setOperationAction(ISD::CTLZ, MVT::i32, + Subtarget.hasStdExtZbb() ? Legal : Promote); + if (!Subtarget.hasStdExtZbb()) + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote); + } else + setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom); + } + } else { + setOperationAction(ISD::CTLZ, XLenVT, Expand); + if (RV64LegalI32 && Subtarget.is64Bit()) + setOperationAction(ISD::CTLZ, MVT::i32, Expand); } - if (Subtarget.is64Bit()) + if (!RV64LegalI32 && Subtarget.is64Bit() && + !Subtarget.hasShortForwardBranchOpt()) setOperationAction(ISD::ABS, MVT::i32, Custom); + // We can use PseudoCCSUB to implement ABS. + if (Subtarget.hasShortForwardBranchOpt()) + setOperationAction(ISD::ABS, XLenVT, Legal); + if (!Subtarget.hasVendorXTHeadCondMov()) setOperationAction(ISD::SELECT, XLenVT, Custom); + if (RV64LegalI32 && Subtarget.is64Bit()) + setOperationAction(ISD::SELECT, MVT::i32, Promote); + static const unsigned FPLegalNodeTypes[] = { ISD::FMINNUM, ISD::FMAXNUM, ISD::LRINT, ISD::LLRINT, ISD::LROUND, ISD::LLROUND, @@ -361,7 +441,18 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) setOperationAction(ISD::BITCAST, MVT::i16, Custom); - + + static const unsigned ZfhminZfbfminPromoteOps[] = { + ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, + ISD::FSUB, ISD::FMUL, ISD::FMA, + ISD::FDIV, ISD::FSQRT, ISD::FABS, + ISD::FNEG, ISD::STRICT_FMA, ISD::STRICT_FADD, + ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, + ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, + ISD::SETCC, ISD::FCEIL, ISD::FFLOOR, + ISD::FTRUNC, ISD::FRINT, ISD::FROUND, + ISD::FROUNDEVEN, ISD::SELECT}; + if (Subtarget.hasStdExtZfbfmin()) { setOperationAction(ISD::BITCAST, MVT::i16, Custom); setOperationAction(ISD::BITCAST, MVT::bf16, Custom); @@ -369,6 +460,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom); setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); setOperationAction(ISD::ConstantFP, MVT::bf16, Expand); + setOperationAction(ISD::SELECT_CC, MVT::bf16, Expand); + setOperationAction(ISD::BR_CC, MVT::bf16, Expand); + setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote); + setOperationAction(ISD::FREM, MVT::bf16, Promote); + // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the + // DAGCombiner::visitFP_ROUND probably needs improvements first. + setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand); } if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) { @@ -379,18 +477,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SELECT, MVT::f16, Custom); setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom); } else { - static const unsigned ZfhminPromoteOps[] = { - ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, - ISD::FSUB, ISD::FMUL, ISD::FMA, - ISD::FDIV, ISD::FSQRT, ISD::FABS, - ISD::FNEG, ISD::STRICT_FMA, ISD::STRICT_FADD, - ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, - ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, - ISD::SETCC, ISD::FCEIL, ISD::FFLOOR, - ISD::FTRUNC, ISD::FRINT, ISD::FROUND, - ISD::FROUNDEVEN, ISD::SELECT}; - - setOperationAction(ZfhminPromoteOps, MVT::f16, Promote); + setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote); setOperationAction({ISD::STRICT_LRINT, ISD::STRICT_LLRINT, ISD::STRICT_LROUND, ISD::STRICT_LLROUND}, MVT::f16, Legal); @@ -409,7 +496,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, Subtarget.hasStdExtZfa() ? Legal : Promote); setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI, ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP, - ISD::FEXP2, ISD::FLOG, ISD::FLOG2, ISD::FLOG10}, + ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2, + ISD::FLOG10}, MVT::f16, Promote); // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have @@ -439,6 +527,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(FPOpToExpand, MVT::f32, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); setTruncStoreAction(MVT::f32, MVT::f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand); + setTruncStoreAction(MVT::f32, MVT::bf16, Expand); setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom); setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom); setOperationAction(ISD::FP_TO_BF16, MVT::f32, @@ -481,6 +571,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(FPOpToExpand, MVT::f64, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); setTruncStoreAction(MVT::f64, MVT::f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand); + setTruncStoreAction(MVT::f64, MVT::bf16, Expand); setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom); setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom); setOperationAction(ISD::FP_TO_BF16, MVT::f64, @@ -504,6 +596,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP}, XLenVT, Legal); + if (RV64LegalI32 && Subtarget.is64Bit()) + setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT, + ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP}, + MVT::i32, Legal); + setOperationAction(ISD::GET_ROUNDING, XLenVT, Custom); setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); } @@ -548,6 +645,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setBooleanVectorContents(ZeroOrOneBooleanContent); setOperationAction(ISD::VSCALE, XLenVT, Custom); + if (RV64LegalI32 && Subtarget.is64Bit()) + setOperationAction(ISD::VSCALE, MVT::i32, Custom); // RVV intrinsics may have illegal operands. // We also need to custom legalize vmv.x.s. @@ -576,7 +675,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND, ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN, ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX, - ISD::VP_ABS}; + ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE}; static const unsigned FloatingPointVPOps[] = { ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL, @@ -588,7 +687,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, - ISD::VP_FRINT, ISD::VP_FNEARBYINT}; + ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS, + ISD::EXPERIMENTAL_VP_REVERSE}; static const unsigned IntegerVecReduceOps[] = { ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, @@ -659,9 +759,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // Expand all extending loads to types larger than this, and truncating // stores from types larger than this. for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) { - setTruncStoreAction(OtherVT, VT, Expand); - setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, OtherVT, - VT, Expand); + setTruncStoreAction(VT, OtherVT, Expand); + setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT, + OtherVT, Expand); } setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT, @@ -673,6 +773,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); + setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom); + setOperationPromotedToType( ISD::VECTOR_SPLICE, VT, MVT::getVectorVT(MVT::i8, VT.getVectorElementCount())); @@ -695,8 +797,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT, Legal); - setOperationAction({ISD::VP_FSHL, ISD::VP_FSHR}, VT, Expand); - // Custom-lower extensions and truncations from/to mask types. setOperationAction({ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, VT, Custom); @@ -712,7 +812,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, VT, Custom); setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT, Custom); - + setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom); setOperationAction( {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT, Legal); @@ -751,8 +851,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) { setTruncStoreAction(VT, OtherVT, Expand); - setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, OtherVT, - VT, Expand); + setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT, + OtherVT, Expand); } setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom); @@ -761,15 +861,22 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // Splice setOperationAction(ISD::VECTOR_SPLICE, VT, Custom); + if (Subtarget.hasStdExtZvkb()) { + setOperationAction(ISD::BSWAP, VT, Legal); + setOperationAction(ISD::VP_BSWAP, VT, Custom); + } else { + setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand); + setOperationAction({ISD::ROTL, ISD::ROTR}, VT, Expand); + } + if (Subtarget.hasStdExtZvbb()) { - setOperationAction({ISD::BITREVERSE, ISD::BSWAP}, VT, Legal); - setOperationAction({ISD::VP_BITREVERSE, ISD::VP_BSWAP}, VT, Custom); + setOperationAction(ISD::BITREVERSE, VT, Legal); + setOperationAction(ISD::VP_BITREVERSE, VT, Custom); setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ, ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP}, VT, Custom); } else { - setOperationAction({ISD::BITREVERSE, ISD::BSWAP}, VT, Expand); - setOperationAction({ISD::VP_BITREVERSE, ISD::VP_BSWAP}, VT, Expand); + setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand); setOperationAction({ISD::CTLZ, ISD::CTTZ, ISD::CTPOP}, VT, Expand); setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ, ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP}, @@ -784,8 +891,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF}, VT, Custom); } - - setOperationAction({ISD::ROTL, ISD::ROTR}, VT, Expand); } } @@ -802,6 +907,27 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE, }; + // TODO: support more ops. + static const unsigned ZvfhminPromoteOps[] = { + ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB, + ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT, + ISD::FABS, ISD::FNEG, ISD::FCOPYSIGN, ISD::FCEIL, + ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN, ISD::FRINT, + ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SETCC, ISD::FMAXIMUM, + ISD::FMINIMUM, ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, + ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA}; + + // TODO: support more vp ops. + static const unsigned ZvfhminPromoteVPOps[] = { + ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL, + ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS, + ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD, + ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT, + ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL, + ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN, + ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT, + ISD::VP_FNEARBYINT, ISD::VP_SETCC}; + // Sets common operation actions on RVV floating-point vector types. const auto SetCommonVFPActions = [&](MVT VT) { setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); @@ -817,6 +943,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setCondCodeAction(VFPCCToExpand, VT, Expand); setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, VT, Legal); + setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Custom); setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT, @@ -833,6 +960,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSINCOS, VT, Expand); setOperationAction(ISD::FEXP, VT, Expand); setOperationAction(ISD::FEXP2, VT, Expand); + setOperationAction(ISD::FEXP10, VT, Expand); setOperationAction(ISD::FLOG, VT, Expand); setOperationAction(ISD::FLOG2, VT, Expand); setOperationAction(ISD::FLOG10, VT, Expand); @@ -891,6 +1019,38 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, continue; SetCommonVFPActions(VT); } + } else if (Subtarget.hasVInstructionsF16Minimal()) { + for (MVT VT : F16VecVTs) { + if (!isTypeLegal(VT)) + continue; + setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); + setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT, + Custom); + setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom); + setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT, + Custom); + setOperationAction(ISD::SELECT_CC, VT, Expand); + setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, + ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, + VT, Custom); + setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, + ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR}, + VT, Custom); + setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); + // load/store + setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); + + // Custom split nxv32f16 since nxv32f32 if not legal. + if (VT == MVT::nxv32f16) { + setOperationAction(ZvfhminPromoteOps, VT, Custom); + setOperationAction(ZvfhminPromoteVPOps, VT, Custom); + continue; + } + // Add more promote ops. + MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); + setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT); + setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT); + } } if (Subtarget.hasVInstructionsF32()) { @@ -922,8 +1082,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(Op, VT, Expand); for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) { setTruncStoreAction(VT, OtherVT, Expand); - setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, - OtherVT, VT, Expand); + setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT, + OtherVT, Expand); } // Custom lower fixed vector undefs to scalable vector undefs to avoid @@ -986,6 +1146,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_TRUNCATE}, VT, Custom); + + setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom); continue; } @@ -1039,13 +1201,22 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(IntegerVPOps, VT, Custom); - // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the - // range of f32. - EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); - if (isTypeLegal(FloatVT)) - setOperationAction( - {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT, - Custom); + if (Subtarget.hasStdExtZvkb()) + setOperationAction({ISD::BSWAP, ISD::ROTL, ISD::ROTR}, VT, Custom); + + if (Subtarget.hasStdExtZvbb()) { + setOperationAction({ISD::BITREVERSE, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, + ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTPOP}, + VT, Custom); + } else { + // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the + // range of f32. + EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); + if (isTypeLegal(FloatVT)) + setOperationAction( + {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT, + Custom); + } } for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { @@ -1066,6 +1237,34 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // expansion to a build_vector of 0s. setOperationAction(ISD::UNDEF, VT, Custom); + if (VT.getVectorElementType() == MVT::f16 && + !Subtarget.hasVInstructionsF16()) { + setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); + setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT, + Custom); + setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom); + setOperationAction( + {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT, + Custom); + setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, + ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, + VT, Custom); + setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, + ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR}, + VT, Custom); + setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); + setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); + MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); + // Don't promote f16 vector operations to f32 if f32 vector type is + // not legal. + // TODO: could split the f16 vector into two vectors and do promotion. + if (!isTypeLegal(F32VecVT)) + continue; + setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT); + setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT); + continue; + } + // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT, Custom); @@ -1088,7 +1287,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV, ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT, ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM, - ISD::IS_FPCLASS}, + ISD::IS_FPCLASS, ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Custom); setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); @@ -1132,14 +1331,20 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, } } + if (Subtarget.hasStdExtA()) { + setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand); + if (RV64LegalI32 && Subtarget.is64Bit()) + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); + } + if (Subtarget.hasForcedAtomics()) { - // Set atomic rmw/cas operations to expand to force __sync libcalls. + // Force __sync libcalls to be emitted for atomic rmw/cas operations. setOperationAction( {ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD, ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR, ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN, ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX}, - XLenVT, Expand); + XLenVT, LibCall); } if (Subtarget.hasVendorXTHeadMemIdx()) { @@ -1166,11 +1371,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment()); setPrefLoopAlignment(Subtarget.getPrefLoopAlignment()); - setMinimumJumpTableEntries(5); - - // Jumps are expensive, compared to logic - setJumpIsExpensive(); - setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND, ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT}); @@ -1197,7 +1397,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER, ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL, ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR, - ISD::CONCAT_VECTORS}); + ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS, + ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL, + ISD::INSERT_VECTOR_ELT}); if (Subtarget.hasVendorXTHeadMemPair()) setTargetDAGCombine({ISD::LOAD, ISD::STORE}); if (Subtarget.useRVVForFixedLengthVectors()) @@ -1239,7 +1441,7 @@ bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT, return true; // Don't allow VF=1 if those types are't legal. - if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELEN()) + if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen()) return true; // VLEN=32 support is incomplete. @@ -1602,11 +1804,12 @@ bool RISCVTargetLowering::signExtendConstant(const ConstantInt *CI) const { } bool RISCVTargetLowering::isCheapToSpeculateCttz(Type *Ty) const { - return Subtarget.hasStdExtZbb(); + return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXCVbitmanip(); } bool RISCVTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const { - return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb(); + return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() || + Subtarget.hasVendorXCVbitmanip(); } bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial( @@ -1677,7 +1880,7 @@ bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, // replace. If we don't support unaligned scalar mem, prefer the constant // pool. // TODO: Can the caller pass down the alignment? - if (!Subtarget.enableUnalignedScalarMem()) + if (!Subtarget.hasFastUnalignedAccess()) return true; // Prefer to keep the load if it would require many instructions. @@ -1686,8 +1889,7 @@ bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, // TODO: Should we keep the load only when we're definitely going to emit a // constant pool? - RISCVMatInt::InstSeq Seq = - RISCVMatInt::generateInstSeq(Val, Subtarget.getFeatureBits()); + RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, Subtarget); return Seq.size() <= Subtarget.getMaxBuildIntsCost(); } @@ -1844,8 +2046,11 @@ bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const { // If the vector op is supported, but the scalar op is not, the transform may // not be worthwhile. + // Permit a vector binary operation can be converted to scalar binary + // operation which is custom lowered with illegal type. EVT ScalarVT = VecVT.getScalarType(); - return isOperationLegalOrCustomOrPromote(Opc, ScalarVT); + return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) || + isOperationCustom(Opc, ScalarVT); } bool RISCVTargetLowering::isOffsetFoldingLegal( @@ -1857,11 +2062,17 @@ bool RISCVTargetLowering::isOffsetFoldingLegal( return false; } -// Returns 0-31 if the fli instruction is available for the type and this is -// legal FP immediate for the type. Returns -1 otherwise. -int RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm, EVT VT) const { +// Return one of the followings: +// (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value. +// (2) `{0-31 value, true}` if Imm is negative and FLI is available for its +// positive counterpart, which will be materialized from the first returned +// element. The second returned element indicated that there should be a FNEG +// followed. +// (3) `{-1, _}` if there is no way FLI can be used to materialize Imm. +std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm, + EVT VT) const { if (!Subtarget.hasStdExtZfa()) - return -1; + return std::make_pair(-1, false); bool IsSupportedVT = false; if (VT == MVT::f16) { @@ -1874,9 +2085,14 @@ int RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm, EVT VT) const { } if (!IsSupportedVT) - return -1; + return std::make_pair(-1, false); - return RISCVLoadFPImm::getLoadFPImm(Imm); + int Index = RISCVLoadFPImm::getLoadFPImm(Imm); + if (Index < 0 && Imm.isNegative()) + // Try the combination of its positive counterpart + FNEG. + return std::make_pair(RISCVLoadFPImm::getLoadFPImm(-Imm), true); + else + return std::make_pair(Index, false); } bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, @@ -1888,11 +2104,13 @@ bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, IsLegalVT = Subtarget.hasStdExtFOrZfinx(); else if (VT == MVT::f64) IsLegalVT = Subtarget.hasStdExtDOrZdinx(); + else if (VT == MVT::bf16) + IsLegalVT = Subtarget.hasStdExtZfbfmin(); if (!IsLegalVT) return false; - if (getLegalZfaFPImm(Imm, VT) >= 0) + if (getLegalZfaFPImm(Imm, VT).first >= 0) return true; // Cannot create a 64 bit floating-point immediate value for rv32. @@ -1901,14 +2119,17 @@ bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, // -0.0 can be created by fmv + fneg. return Imm.isZero(); } - // Special case: the cost for -0.0 is 1. - int Cost = Imm.isNegZero() - ? 1 - : RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), - Subtarget.getXLen(), - Subtarget.getFeatureBits()); - // If the constantpool data is already in cache, only Cost 1 is cheaper. - return Cost < FPImmCost; + + // Special case: fmv + fneg + if (Imm.isNegZero()) + return true; + + // Building an integer and then converting requires a fmv at the end of + // the integer sequence. + const int Cost = + 1 + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), Subtarget.getXLen(), + Subtarget); + return Cost <= FPImmCost; } // TODO: This is very conservative. @@ -1953,7 +2174,12 @@ MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, !Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) return MVT::f32; - return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); + MVT PartVT = TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); + + if (RV64LegalI32 && Subtarget.is64Bit() && PartVT == MVT::i32) + return MVT::i64; + + return PartVT; } unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, @@ -1968,6 +2194,21 @@ unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); } +unsigned RISCVTargetLowering::getVectorTypeBreakdownForCallingConv( + LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, + unsigned &NumIntermediates, MVT &RegisterVT) const { + unsigned NumRegs = TargetLowering::getVectorTypeBreakdownForCallingConv( + Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT); + + if (RV64LegalI32 && Subtarget.is64Bit() && IntermediateVT == MVT::i32) + IntermediateVT = MVT::i64; + + if (RV64LegalI32 && Subtarget.is64Bit() && RegisterVT == MVT::i32) + RegisterVT = MVT::i64; + + return NumRegs; +} + // Changes the condition code and swaps operands if necessary, so the SetCC // operation matches one of the comparisons supported directly by branches // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare @@ -2010,7 +2251,7 @@ static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, } break; case ISD::SETLT: - // Convert X < 1 to 0 <= X. + // Convert X < 1 to 0 >= X. if (C == 1) { RHS = LHS; LHS = DAG.getConstant(0, DL, RHS.getValueType()); @@ -2228,7 +2469,7 @@ static bool useRVVForFixedLengthVectorVT(MVT VT, return false; break; case MVT::f16: - if (!Subtarget.hasVInstructionsF16()) + if (!Subtarget.hasVInstructionsF16Minimal()) return false; break; case MVT::f32: @@ -2242,7 +2483,7 @@ static bool useRVVForFixedLengthVectorVT(MVT VT, } // Reject elements larger than ELEN. - if (EltVT.getSizeInBits() > Subtarget.getELEN()) + if (EltVT.getSizeInBits() > Subtarget.getELen()) return false; unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen); @@ -2271,7 +2512,7 @@ static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT, "Expected legal fixed length vector!"); unsigned MinVLen = Subtarget.getRealMinVLen(); - unsigned MaxELen = Subtarget.getELEN(); + unsigned MaxELen = Subtarget.getELen(); MVT EltVT = VT.getVectorElementType(); switch (EltVT.SimpleTy) { @@ -2348,16 +2589,32 @@ static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); } -static SDValue getVLOp(uint64_t NumElts, const SDLoc &DL, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { +static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, + SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { + // If we know the exact VLEN, our VL is exactly equal to VLMAX, and + // we can't encode the AVL as an immediate, use the VLMAX encoding. + const auto [MinVLMAX, MaxVLMAX] = + RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget); + if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX && NumElts > 31) + return DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()); + return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT()); } static std::pair<SDValue, SDValue> +getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + assert(VecVT.isScalableVector() && "Expecting a scalable vector"); + SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()); + SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG); + return {Mask, VL}; +} + +static std::pair<SDValue, SDValue> getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { assert(ContainerVT.isScalableVector() && "Expecting scalable container type"); - SDValue VL = getVLOp(NumElts, DL, DAG, Subtarget); + SDValue VL = getVLOp(NumElts, ContainerVT, DL, DAG, Subtarget); SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG); return {Mask, VL}; } @@ -2373,18 +2630,7 @@ getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG, Subtarget); assert(ContainerVT.isScalableVector() && "Expecting scalable container type"); - MVT XLenVT = Subtarget.getXLenVT(); - SDValue VL = DAG.getRegister(RISCV::X0, XLenVT); - SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG); - return {Mask, VL}; -} - -// As above but assuming the given type is a scalable vector type. -static std::pair<SDValue, SDValue> -getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { - assert(VecVT.isScalableVector() && "Expecting a scalable vector"); - return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget); + return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget); } SDValue RISCVTargetLowering::computeVLMax(MVT VecVT, const SDLoc &DL, @@ -2394,6 +2640,25 @@ SDValue RISCVTargetLowering::computeVLMax(MVT VecVT, const SDLoc &DL, VecVT.getVectorElementCount()); } +std::pair<unsigned, unsigned> +RISCVTargetLowering::computeVLMAXBounds(MVT VecVT, + const RISCVSubtarget &Subtarget) { + assert(VecVT.isScalableVector() && "Expected scalable vector"); + + unsigned EltSize = VecVT.getScalarSizeInBits(); + unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue(); + + unsigned VectorBitsMax = Subtarget.getRealMaxVLen(); + unsigned MaxVLMAX = + RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize); + + unsigned VectorBitsMin = Subtarget.getRealMinVLen(); + unsigned MinVLMAX = + RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize); + + return std::make_pair(MinVLMAX, MaxVLMAX); +} + // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few // of either is (currently) supported. This can get us into an infinite loop // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR @@ -2407,6 +2672,51 @@ bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles( return false; } +InstructionCost RISCVTargetLowering::getLMULCost(MVT VT) const { + // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is + // implementation-defined. + if (!VT.isVector()) + return InstructionCost::getInvalid(); + unsigned DLenFactor = Subtarget.getDLenFactor(); + unsigned Cost; + if (VT.isScalableVector()) { + unsigned LMul; + bool Fractional; + std::tie(LMul, Fractional) = + RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(VT)); + if (Fractional) + Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1; + else + Cost = (LMul * DLenFactor); + } else { + Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor); + } + return Cost; +} + + +/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv +/// is generally quadratic in the number of vreg implied by LMUL. Note that +/// operand (index and possibly mask) are handled separately. +InstructionCost RISCVTargetLowering::getVRGatherVVCost(MVT VT) const { + return getLMULCost(VT) * getLMULCost(VT); +} + +/// Return the cost of a vrgather.vi (or vx) instruction for the type VT. +/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL, +/// or may track the vrgather.vv cost. It is implementation-dependent. +InstructionCost RISCVTargetLowering::getVRGatherVICost(MVT VT) const { + return getLMULCost(VT); +} + +/// Return the cost of a vslidedown.vi/vx or vslideup.vi/vx instruction +/// for the type VT. (This does not cover the vslide1up or vslide1down +/// variants.) Slides may be linear in the number of vregs implied by LMUL, +/// or may track the vrgather.vv cost. It is implementation-dependent. +InstructionCost RISCVTargetLowering::getVSlideCost(MVT VT) const { + return getLMULCost(VT); +} + static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { // RISC-V FP-to-int conversions saturate to the destination register size, but @@ -2420,9 +2730,10 @@ static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT; if (!DstVT.isVector()) { - // In absense of Zfh, promote f16 to f32, then saturate the result. - if (Src.getSimpleValueType() == MVT::f16 && - !Subtarget.hasStdExtZfhOrZhinx()) { + // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate + // the result. + if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) || + Src.getValueType() == MVT::bf16) { Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src); } @@ -2778,6 +3089,31 @@ lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT())); } +// Expand vector LRINT and LLRINT by converting to the integer domain. +static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + MVT VT = Op.getSimpleValueType(); + assert(VT.isVector() && "Unexpected type"); + + SDLoc DL(Op); + SDValue Src = Op.getOperand(0); + MVT ContainerVT = VT; + + if (VT.isFixedLengthVector()) { + ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); + Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); + } + + auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); + SDValue Truncated = + DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, ContainerVT, Src, Mask, VL); + + if (!VT.isFixedLengthVector()) + return Truncated; + + return convertFromScalableVector(VT, Truncated, DAG, Subtarget); +} + static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op, @@ -2802,6 +3138,14 @@ getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops); } +static MVT getLMUL1VT(MVT VT) { + assert(VT.getVectorElementType().getSizeInBits() <= 64 && + "Unexpected vector MVT"); + return MVT::getScalableVectorVT( + VT.getVectorElementType(), + RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits()); +} + struct VIDSequence { int64_t StepNumerator; unsigned StepDenominator; @@ -2975,8 +3319,124 @@ static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, return convertFromScalableVector(VT, Gather, DAG, Subtarget); } -static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { + +/// Try and optimize BUILD_VECTORs with "dominant values" - these are values +/// which constitute a large proportion of the elements. In such cases we can +/// splat a vector with the dominant element and make up the shortfall with +/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable. +/// Note that this includes vectors of 2 elements by association. The +/// upper-most element is the "dominant" one, allowing us to use a splat to +/// "insert" the upper element, and an insert of the lower element at position +/// 0, which improves codegen. +static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + MVT VT = Op.getSimpleValueType(); + assert(VT.isFixedLengthVector() && "Unexpected vector!"); + + MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); + + SDLoc DL(Op); + auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); + + MVT XLenVT = Subtarget.getXLenVT(); + unsigned NumElts = Op.getNumOperands(); + + SDValue DominantValue; + unsigned MostCommonCount = 0; + DenseMap<SDValue, unsigned> ValueCounts; + unsigned NumUndefElts = + count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); }); + + // Track the number of scalar loads we know we'd be inserting, estimated as + // any non-zero floating-point constant. Other kinds of element are either + // already in registers or are materialized on demand. The threshold at which + // a vector load is more desirable than several scalar materializion and + // vector-insertion instructions is not known. + unsigned NumScalarLoads = 0; + + for (SDValue V : Op->op_values()) { + if (V.isUndef()) + continue; + + ValueCounts.insert(std::make_pair(V, 0)); + unsigned &Count = ValueCounts[V]; + if (0 == Count) + if (auto *CFP = dyn_cast<ConstantFPSDNode>(V)) + NumScalarLoads += !CFP->isExactlyValue(+0.0); + + // Is this value dominant? In case of a tie, prefer the highest element as + // it's cheaper to insert near the beginning of a vector than it is at the + // end. + if (++Count >= MostCommonCount) { + DominantValue = V; + MostCommonCount = Count; + } + } + + assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR"); + unsigned NumDefElts = NumElts - NumUndefElts; + unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2; + + // Don't perform this optimization when optimizing for size, since + // materializing elements and inserting them tends to cause code bloat. + if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts && + (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) && + ((MostCommonCount > DominantValueCountThreshold) || + (ValueCounts.size() <= Log2_32(NumDefElts)))) { + // Start by splatting the most common element. + SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue); + + DenseSet<SDValue> Processed{DominantValue}; + + // We can handle an insert into the last element (of a splat) via + // v(f)slide1down. This is slightly better than the vslideup insert + // lowering as it avoids the need for a vector group temporary. It + // is also better than using vmerge.vx as it avoids the need to + // materialize the mask in a vector register. + if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1); + !LastOp.isUndef() && ValueCounts[LastOp] == 1 && + LastOp != DominantValue) { + Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); + auto OpCode = + VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL; + if (!VT.isFloatingPoint()) + LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp); + Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec, + LastOp, Mask, VL); + Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget); + Processed.insert(LastOp); + } + + MVT SelMaskTy = VT.changeVectorElementType(MVT::i1); + for (const auto &OpIdx : enumerate(Op->ops())) { + const SDValue &V = OpIdx.value(); + if (V.isUndef() || !Processed.insert(V).second) + continue; + if (ValueCounts[V] == 1) { + Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V, + DAG.getConstant(OpIdx.index(), DL, XLenVT)); + } else { + // Blend in all instances of this value using a VSELECT, using a + // mask where each bit signals whether that element is the one + // we're after. + SmallVector<SDValue> Ops; + transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) { + return DAG.getConstant(V == V1, DL, XLenVT); + }); + Vec = DAG.getNode(ISD::VSELECT, DL, VT, + DAG.getBuildVector(SelMaskTy, DL, Ops), + DAG.getSplatBuildVector(VT, DL, V), Vec); + } + } + + return Vec; + } + + return SDValue(); +} + +static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { MVT VT = Op.getSimpleValueType(); assert(VT.isFixedLengthVector() && "Unexpected vector!"); @@ -3008,94 +3468,68 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // XLenVT if we're producing a v8i1. This results in more consistent // codegen across RV32 and RV64. unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen()); - NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELEN()); - if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { - // If we have to use more than one INSERT_VECTOR_ELT then this - // optimization is likely to increase code size; avoid peforming it in - // such a case. We can use a load from a constant pool in this case. - if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits) - return SDValue(); - // Now we can create our integer vector type. Note that it may be larger - // than the resulting mask type: v4i1 would use v1i8 as its integer type. - unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits); - MVT IntegerViaVecVT = - MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits), - IntegerViaVecElts); - - uint64_t Bits = 0; - unsigned BitPos = 0, IntegerEltIdx = 0; - SmallVector<SDValue, 8> Elts(IntegerViaVecElts); - - for (unsigned I = 0; I < NumElts;) { - SDValue V = Op.getOperand(I); - bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue(); - Bits |= ((uint64_t)BitValue << BitPos); - ++BitPos; - ++I; - - // Once we accumulate enough bits to fill our scalar type or process the - // last element, insert into our vector and clear our accumulated data. - if (I % NumViaIntegerBits == 0 || I == NumElts) { - if (NumViaIntegerBits <= 32) - Bits = SignExtend64<32>(Bits); - SDValue Elt = DAG.getConstant(Bits, DL, XLenVT); - Elts[IntegerEltIdx] = Elt; - Bits = 0; - BitPos = 0; - IntegerEltIdx++; - } - } - - SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts); - - if (NumElts < NumViaIntegerBits) { - // If we're producing a smaller vector than our minimum legal integer - // type, bitcast to the equivalent (known-legal) mask type, and extract - // our final mask. - assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type"); - Vec = DAG.getBitcast(MVT::v8i1, Vec); - Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec, - DAG.getConstant(0, DL, XLenVT)); - } else { - // Else we must have produced an integer type with the same size as the - // mask type; bitcast for the final result. - assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits()); - Vec = DAG.getBitcast(VT, Vec); + NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen()); + // If we have to use more than one INSERT_VECTOR_ELT then this + // optimization is likely to increase code size; avoid peforming it in + // such a case. We can use a load from a constant pool in this case. + if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits) + return SDValue(); + // Now we can create our integer vector type. Note that it may be larger + // than the resulting mask type: v4i1 would use v1i8 as its integer type. + unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits); + MVT IntegerViaVecVT = + MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits), + IntegerViaVecElts); + + uint64_t Bits = 0; + unsigned BitPos = 0, IntegerEltIdx = 0; + SmallVector<SDValue, 8> Elts(IntegerViaVecElts); + + for (unsigned I = 0; I < NumElts;) { + SDValue V = Op.getOperand(I); + bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue(); + Bits |= ((uint64_t)BitValue << BitPos); + ++BitPos; + ++I; + + // Once we accumulate enough bits to fill our scalar type or process the + // last element, insert into our vector and clear our accumulated data. + if (I % NumViaIntegerBits == 0 || I == NumElts) { + if (NumViaIntegerBits <= 32) + Bits = SignExtend64<32>(Bits); + SDValue Elt = DAG.getConstant(Bits, DL, XLenVT); + Elts[IntegerEltIdx] = Elt; + Bits = 0; + BitPos = 0; + IntegerEltIdx++; } - - return Vec; } - // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask - // vector type, we have a legal equivalently-sized i8 type, so we can use - // that. - MVT WideVecVT = VT.changeVectorElementType(MVT::i8); - SDValue VecZero = DAG.getConstant(0, DL, WideVecVT); + SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts); - SDValue WideVec; - if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { - // For a splat, perform a scalar truncate before creating the wider - // vector. - assert(Splat.getValueType() == XLenVT && - "Unexpected type for i1 splat value"); - Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat, - DAG.getConstant(1, DL, XLenVT)); - WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat); + if (NumElts < NumViaIntegerBits) { + // If we're producing a smaller vector than our minimum legal integer + // type, bitcast to the equivalent (known-legal) mask type, and extract + // our final mask. + assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type"); + Vec = DAG.getBitcast(MVT::v8i1, Vec); + Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec, + DAG.getConstant(0, DL, XLenVT)); } else { - SmallVector<SDValue, 8> Ops(Op->op_values()); - WideVec = DAG.getBuildVector(WideVecVT, DL, Ops); - SDValue VecOne = DAG.getConstant(1, DL, WideVecVT); - WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne); + // Else we must have produced an integer type with the same size as the + // mask type; bitcast for the final result. + assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits()); + Vec = DAG.getBitcast(VT, Vec); } - return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE); + return Vec; } if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { - if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget)) - return Gather; unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL; + if (!VT.isFloatingPoint()) + Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat); Splat = DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL); return convertFromScalableVector(VT, Splat, DAG, Subtarget); @@ -3142,18 +3576,16 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget); if ((StepOpcode == ISD::MUL && SplatStepVal != 1) || (StepOpcode == ISD::SHL && SplatStepVal != 0)) { - SDValue SplatStep = DAG.getSplatBuildVector( - VIDVT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT)); + SDValue SplatStep = DAG.getConstant(SplatStepVal, DL, VIDVT); VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep); } if (StepDenominator != 1) { - SDValue SplatStep = DAG.getSplatBuildVector( - VIDVT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT)); + SDValue SplatStep = + DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT); VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep); } if (Addend != 0 || Negate) { - SDValue SplatAddend = DAG.getSplatBuildVector( - VIDVT, DL, DAG.getConstant(Addend, DL, XLenVT)); + SDValue SplatAddend = DAG.getConstant(Addend, DL, VIDVT); VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend, VID); } @@ -3165,6 +3597,48 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, } } + // For very small build_vectors, use a single scalar insert of a constant. + // TODO: Base this on constant rematerialization cost, not size. + const unsigned EltBitSize = VT.getScalarSizeInBits(); + if (VT.getSizeInBits() <= 32 && + ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { + MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits()); + assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) && + "Unexpected sequence type"); + // If we can use the original VL with the modified element type, this + // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this + // be moved into InsertVSETVLI? + unsigned ViaVecLen = + (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1; + MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen); + + uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize); + uint64_t SplatValue = 0; + // Construct the amalgamated value at this larger vector type. + for (const auto &OpIdx : enumerate(Op->op_values())) { + const auto &SeqV = OpIdx.value(); + if (!SeqV.isUndef()) + SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask) + << (OpIdx.index() * EltBitSize)); + } + + // On RV64, sign-extend from 32 to 64 bits where possible in order to + // achieve better constant materializion. + if (Subtarget.is64Bit() && ViaIntVT == MVT::i32) + SplatValue = SignExtend64<32>(SplatValue); + + SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT, + DAG.getUNDEF(ViaVecVT), + DAG.getConstant(SplatValue, DL, XLenVT), + DAG.getConstant(0, DL, XLenVT)); + if (ViaVecLen != 1) + Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, + MVT::getVectorVT(ViaIntVT, 1), Vec, + DAG.getConstant(0, DL, XLenVT)); + return DAG.getBitcast(VT, Vec); + } + + // Attempt to detect "hidden" splats, which only reveal themselves as splats // when re-interpreted as a vector with a larger element type. For example, // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1 @@ -3173,7 +3647,6 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // TODO: This optimization could also work on non-constant splats, but it // would require bit-manipulation instructions to construct the splat value. SmallVector<SDValue> Sequence; - unsigned EltBitSize = VT.getScalarSizeInBits(); const auto *BV = cast<BuildVectorSDNode>(Op); if (VT.isInteger() && EltBitSize < 64 && ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && @@ -3181,11 +3654,19 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, (Sequence.size() * EltBitSize) <= 64) { unsigned SeqLen = Sequence.size(); MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen); - MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen); assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 || ViaIntVT == MVT::i64) && "Unexpected sequence type"); + // If we can use the original VL with the modified element type, this + // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this + // be moved into InsertVSETVLI? + const unsigned RequiredVL = NumElts / SeqLen; + const unsigned ViaVecLen = + (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ? + NumElts : RequiredVL; + MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen); + unsigned EltIdx = 0; uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize); uint64_t SplatValue = 0; @@ -3219,94 +3700,171 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, DAG.getUNDEF(ViaContainerVT), DAG.getConstant(SplatValue, DL, XLenVT), ViaVL); Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget); + if (ViaVecLen != RequiredVL) + Splat = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, + MVT::getVectorVT(ViaIntVT, RequiredVL), Splat, + DAG.getConstant(0, DL, XLenVT)); return DAG.getBitcast(VT, Splat); } } - // Try and optimize BUILD_VECTORs with "dominant values" - these are values - // which constitute a large proportion of the elements. In such cases we can - // splat a vector with the dominant element and make up the shortfall with - // INSERT_VECTOR_ELTs. - // Note that this includes vectors of 2 elements by association. The - // upper-most element is the "dominant" one, allowing us to use a splat to - // "insert" the upper element, and an insert of the lower element at position - // 0, which improves codegen. - SDValue DominantValue; - unsigned MostCommonCount = 0; - DenseMap<SDValue, unsigned> ValueCounts; - unsigned NumUndefElts = - count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); }); + // If the number of signbits allows, see if we can lower as a <N x i8>. + // Our main goal here is to reduce LMUL (and thus work) required to + // build the constant, but we will also narrow if the resulting + // narrow vector is known to materialize cheaply. + // TODO: We really should be costing the smaller vector. There are + // profitable cases this misses. + if (EltBitSize > 8 && VT.isInteger() && + (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen())) { + unsigned SignBits = DAG.ComputeNumSignBits(Op); + if (EltBitSize - SignBits < 8) { + SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8), + DL, Op->ops()); + Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8), + Source, DAG, Subtarget); + SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL); + return convertFromScalableVector(VT, Res, DAG, Subtarget); + } + } - // Track the number of scalar loads we know we'd be inserting, estimated as - // any non-zero floating-point constant. Other kinds of element are either - // already in registers or are materialized on demand. The threshold at which - // a vector load is more desirable than several scalar materializion and - // vector-insertion instructions is not known. - unsigned NumScalarLoads = 0; + if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget)) + return Res; - for (SDValue V : Op->op_values()) { - if (V.isUndef()) - continue; + // For constant vectors, use generic constant pool lowering. Otherwise, + // we'd have to materialize constants in GPRs just to move them into the + // vector. + return SDValue(); +} - ValueCounts.insert(std::make_pair(V, 0)); - unsigned &Count = ValueCounts[V]; - if (0 == Count) - if (auto *CFP = dyn_cast<ConstantFPSDNode>(V)) - NumScalarLoads += !CFP->isExactlyValue(+0.0); +static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + MVT VT = Op.getSimpleValueType(); + assert(VT.isFixedLengthVector() && "Unexpected vector!"); - // Is this value dominant? In case of a tie, prefer the highest element as - // it's cheaper to insert near the beginning of a vector than it is at the - // end. - if (++Count >= MostCommonCount) { - DominantValue = V; - MostCommonCount = Count; + if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || + ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) + return lowerBuildVectorOfConstants(Op, DAG, Subtarget); + + MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); + + SDLoc DL(Op); + auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); + + MVT XLenVT = Subtarget.getXLenVT(); + + if (VT.getVectorElementType() == MVT::i1) { + // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask + // vector type, we have a legal equivalently-sized i8 type, so we can use + // that. + MVT WideVecVT = VT.changeVectorElementType(MVT::i8); + SDValue VecZero = DAG.getConstant(0, DL, WideVecVT); + + SDValue WideVec; + if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { + // For a splat, perform a scalar truncate before creating the wider + // vector. + Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat, + DAG.getConstant(1, DL, Splat.getValueType())); + WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat); + } else { + SmallVector<SDValue, 8> Ops(Op->op_values()); + WideVec = DAG.getBuildVector(WideVecVT, DL, Ops); + SDValue VecOne = DAG.getConstant(1, DL, WideVecVT); + WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne); } + + return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE); } - assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR"); - unsigned NumDefElts = NumElts - NumUndefElts; - unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2; + if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { + if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget)) + return Gather; + unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL + : RISCVISD::VMV_V_X_VL; + if (!VT.isFloatingPoint()) + Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat); + Splat = + DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL); + return convertFromScalableVector(VT, Splat, DAG, Subtarget); + } - // Don't perform this optimization when optimizing for size, since - // materializing elements and inserting them tends to cause code bloat. - if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts && - (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) && - ((MostCommonCount > DominantValueCountThreshold) || - (ValueCounts.size() <= Log2_32(NumDefElts)))) { - // Start by splatting the most common element. - SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue); + if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget)) + return Res; - DenseSet<SDValue> Processed{DominantValue}; - MVT SelMaskTy = VT.changeVectorElementType(MVT::i1); - for (const auto &OpIdx : enumerate(Op->ops())) { - const SDValue &V = OpIdx.value(); - if (V.isUndef() || !Processed.insert(V).second) - continue; - if (ValueCounts[V] == 1) { - Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V, - DAG.getConstant(OpIdx.index(), DL, XLenVT)); - } else { - // Blend in all instances of this value using a VSELECT, using a - // mask where each bit signals whether that element is the one - // we're after. - SmallVector<SDValue> Ops; - transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) { - return DAG.getConstant(V == V1, DL, XLenVT); - }); - Vec = DAG.getNode(ISD::VSELECT, DL, VT, - DAG.getBuildVector(SelMaskTy, DL, Ops), - DAG.getSplatBuildVector(VT, DL, V), Vec); - } + // If we're compiling for an exact VLEN value, we can split our work per + // register in the register group. + const unsigned MinVLen = Subtarget.getRealMinVLen(); + const unsigned MaxVLen = Subtarget.getRealMaxVLen(); + if (MinVLen == MaxVLen && VT.getSizeInBits().getKnownMinValue() > MinVLen) { + MVT ElemVT = VT.getVectorElementType(); + unsigned ElemsPerVReg = MinVLen / ElemVT.getFixedSizeInBits(); + EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); + MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg); + MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget); + assert(M1VT == getLMUL1VT(M1VT)); + + // The following semantically builds up a fixed length concat_vector + // of the component build_vectors. We eagerly lower to scalable and + // insert_subvector here to avoid DAG combining it back to a large + // build_vector. + SmallVector<SDValue> BuildVectorOps(Op->op_begin(), Op->op_end()); + unsigned NumOpElts = M1VT.getVectorMinNumElements(); + SDValue Vec = DAG.getUNDEF(ContainerVT); + for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) { + auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg); + SDValue SubBV = + DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps); + SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget); + unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts; + Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV, + DAG.getVectorIdxConstant(InsertIdx, DL)); } + return convertFromScalableVector(VT, Vec, DAG, Subtarget); + } - return Vec; + // Cap the cost at a value linear to the number of elements in the vector. + // The default lowering is to use the stack. The vector store + scalar loads + // is linear in VL. However, at high lmuls vslide1down and vslidedown end up + // being (at least) linear in LMUL. As a result, using the vslidedown + // lowering for every element ends up being VL*LMUL.. + // TODO: Should we be directly costing the stack alternative? Doing so might + // give us a more accurate upper bound. + InstructionCost LinearBudget = VT.getVectorNumElements() * 2; + + // TODO: unify with TTI getSlideCost. + InstructionCost PerSlideCost = 1; + switch (RISCVTargetLowering::getLMUL(ContainerVT)) { + default: break; + case RISCVII::VLMUL::LMUL_2: + PerSlideCost = 2; + break; + case RISCVII::VLMUL::LMUL_4: + PerSlideCost = 4; + break; + case RISCVII::VLMUL::LMUL_8: + PerSlideCost = 8; + break; } - // For constant vectors, use generic constant pool lowering. Otherwise, - // we'd have to materialize constants in GPRs just to move them into the - // vector. - if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || - ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) + // TODO: Should we be using the build instseq then cost + evaluate scheme + // we use for integer constants here? + unsigned UndefCount = 0; + for (const SDValue &V : Op->ops()) { + if (V.isUndef()) { + UndefCount++; + continue; + } + if (UndefCount) { + LinearBudget -= PerSlideCost; + UndefCount = 0; + } + LinearBudget -= PerSlideCost; + } + if (UndefCount) { + LinearBudget -= PerSlideCost; + } + + if (LinearBudget < 0) return SDValue(); assert((!VT.isFloatingPoint() || @@ -3315,13 +3873,24 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC; - SDValue Vec = DAG.getUNDEF(ContainerVT); - unsigned UndefCount = 0; - for (const SDValue &V : Op->ops()) { + SDValue Vec; + UndefCount = 0; + for (SDValue V : Op->ops()) { if (V.isUndef()) { UndefCount++; continue; } + + // Start our sequence with a TA splat in the hopes that hardware is able to + // recognize there's no dependency on the prior value of our temporary + // register. + if (!Vec) { + Vec = DAG.getSplatVector(VT, DL, V); + Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); + UndefCount = 0; + continue; + } + if (UndefCount) { const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT()); Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT), @@ -3330,6 +3899,8 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, } auto OpCode = VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL; + if (!VT.isFloatingPoint()) + V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V); Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec, V, Mask, VL); } @@ -3354,19 +3925,43 @@ static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, if ((LoC >> 31) == HiC) return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL); - // If vl is equal to XLEN_MAX and Hi constant is equal to Lo, we could use - // vmv.v.x whose EEW = 32 to lower it. - if (LoC == HiC && isAllOnesConstant(VL)) { - MVT InterVT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2); - // TODO: if vl <= min(VLMAX), we can also do this. But we could not - // access the subtarget here now. - auto InterVec = DAG.getNode( - RISCVISD::VMV_V_X_VL, DL, InterVT, DAG.getUNDEF(InterVT), Lo, - DAG.getRegister(RISCV::X0, MVT::i32)); - return DAG.getNode(ISD::BITCAST, DL, VT, InterVec); + // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo, + // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use + // vlmax vsetvli or vsetivli to change the VL. + // FIXME: Support larger constants? + // FIXME: Support non-constant VLs by saturating? + if (LoC == HiC) { + SDValue NewVL; + if (isAllOnesConstant(VL) || + (isa<RegisterSDNode>(VL) && + cast<RegisterSDNode>(VL)->getReg() == RISCV::X0)) + NewVL = DAG.getRegister(RISCV::X0, MVT::i32); + else if (isa<ConstantSDNode>(VL) && + isUInt<4>(cast<ConstantSDNode>(VL)->getZExtValue())) + NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL); + + if (NewVL) { + MVT InterVT = + MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2); + auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT, + DAG.getUNDEF(InterVT), Lo, + DAG.getRegister(RISCV::X0, MVT::i32)); + return DAG.getNode(ISD::BITCAST, DL, VT, InterVec); + } } } + // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended. + if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo && + isa<ConstantSDNode>(Hi.getOperand(1)) && + Hi.getConstantOperandVal(1) == 31) + return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL); + + // If the hi bits of the splat are undefined, then it's fine to just splat Lo + // even if it might be sign extended. + if (Hi.isUndef()) + return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL); + // Fall back to a stack store and stride x0 vector load. return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo, Hi, VL); @@ -3393,12 +3988,8 @@ static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, bool HasPassthru = Passthru && !Passthru.isUndef(); if (!HasPassthru && !Passthru) Passthru = DAG.getUNDEF(VT); - if (VT.isFloatingPoint()) { - // If VL is 1, we could use vfmv.s.f. - if (isOneConstant(VL)) - return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL); + if (VT.isFloatingPoint()) return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL); - } MVT XLenVT = Subtarget.getXLenVT(); @@ -3411,12 +4002,6 @@ static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, unsigned ExtOpc = isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar); - ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar); - // If VL is 1 and the scalar value won't benefit from immediate, we could - // use vmv.s.x. - if (isOneConstant(VL) && - (!Const || isNullConstant(Scalar) || !isInt<5>(Const->getSExtValue()))) - return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL); return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL); } @@ -3431,14 +4016,6 @@ static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG); } -static MVT getLMUL1VT(MVT VT) { - assert(VT.getVectorElementType().getSizeInBits() <= 64 && - "Unexpected vector MVT"); - return MVT::getScalableVectorVT( - VT.getVectorElementType(), - RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits()); -} - // This function lowers an insert of a scalar operand Scalar into lane // 0 of the vector regardless of the value of VL. The contents of the // remaining lanes of the result vector are unspecified. VL is assumed @@ -3446,24 +4023,34 @@ static MVT getLMUL1VT(MVT VT) { static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { - const MVT XLenVT = Subtarget.getXLenVT(); + assert(VT.isScalableVector() && "Expect VT is scalable vector type."); + const MVT XLenVT = Subtarget.getXLenVT(); SDValue Passthru = DAG.getUNDEF(VT); - if (VT.isFloatingPoint()) { - // TODO: Use vmv.v.i for appropriate constants - // Use M1 or smaller to avoid over constraining register allocation - const MVT M1VT = getLMUL1VT(VT); - auto InnerVT = VT.bitsLE(M1VT) ? VT : M1VT; - SDValue Result = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, InnerVT, - DAG.getUNDEF(InnerVT), Scalar, VL); - if (VT != InnerVT) - Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, - DAG.getUNDEF(VT), - Result, DAG.getConstant(0, DL, XLenVT)); - return Result; + + if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + isNullConstant(Scalar.getOperand(1))) { + SDValue ExtractedVal = Scalar.getOperand(0); + MVT ExtractedVT = ExtractedVal.getSimpleValueType(); + MVT ExtractedContainerVT = ExtractedVT; + if (ExtractedContainerVT.isFixedLengthVector()) { + ExtractedContainerVT = getContainerForFixedLengthVector( + DAG, ExtractedContainerVT, Subtarget); + ExtractedVal = convertToScalableVector(ExtractedContainerVT, ExtractedVal, + DAG, Subtarget); + } + if (ExtractedContainerVT.bitsLE(VT)) + return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, ExtractedVal, + DAG.getConstant(0, DL, XLenVT)); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal, + DAG.getConstant(0, DL, XLenVT)); } + if (VT.isFloatingPoint()) + return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, + DAG.getUNDEF(VT), Scalar, VL); + // Avoid the tricky legalization cases by falling back to using the // splat code which already handles it gracefully. if (!Scalar.getValueType().bitsLE(XLenVT)) @@ -3478,24 +4065,8 @@ static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, unsigned ExtOpc = isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar); - // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or - // higher would involve overly constraining the register allocator for - // no purpose. - if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar)) { - if (!isNullConstant(Scalar) && isInt<5>(Const->getSExtValue()) && - VT.bitsLE(getLMUL1VT(VT))) - return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL); - } - // Use M1 or smaller to avoid over constraining register allocation - const MVT M1VT = getLMUL1VT(VT); - auto InnerVT = VT.bitsLE(M1VT) ? VT : M1VT; - SDValue Result = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, InnerVT, - DAG.getUNDEF(InnerVT), Scalar, VL); - if (VT != InnerVT) - Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, - DAG.getUNDEF(VT), - Result, DAG.getConstant(0, DL, XLenVT)); - return Result; + return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, + DAG.getUNDEF(VT), Scalar, VL); } // Is this a shuffle extracts either the even or odd elements of a vector? @@ -3509,7 +4080,7 @@ static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, SDValue V2, ArrayRef<int> Mask, const RISCVSubtarget &Subtarget) { // Need to be able to widen the vector. - if (VT.getScalarSizeInBits() >= Subtarget.getELEN()) + if (VT.getScalarSizeInBits() >= Subtarget.getELen()) return false; // Both input must be extracts. @@ -3553,7 +4124,7 @@ static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget) { // We need to be able to widen elements to the next larger integer type. - if (VT.getScalarSizeInBits() >= Subtarget.getELEN()) + if (VT.getScalarSizeInBits() >= Subtarget.getELen()) return false; int Size = Mask.size(); @@ -3882,6 +4453,8 @@ static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, auto OpCode = IsVSlidedown ? (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) : (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL); + if (!VT.isFloatingPoint()) + Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat); auto Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), convertToScalableVector(ContainerVT, V2, DAG, Subtarget), @@ -3904,7 +4477,7 @@ static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget); } - assert(VecVT.getScalarSizeInBits() < Subtarget.getELEN()); + assert(VecVT.getScalarSizeInBits() < Subtarget.getELen()); // We're working with a vector of the same size as the resulting // interleaved vector, but with half the number of elements and @@ -3925,24 +4498,37 @@ static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget); SDValue Passthru = DAG.getUNDEF(WideContainerVT); - // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with - // vwaddu.vv - SDValue Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, - EvenV, OddV, Passthru, Mask, VL); - - // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1) - SDValue AllOnesVec = DAG.getSplatVector( - VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT())); - SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT, OddV, - AllOnesVec, Passthru, Mask, VL); - - // Add the two together so we get - // (OddV * 0xff...ff) + (OddV + EvenV) - // = (OddV * 0x100...00) + EvenV - // = (OddV << VecVT.getScalarSizeInBits()) + EvenV - // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx - Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT, Interleaved, - OddsMul, Passthru, Mask, VL); + SDValue Interleaved; + if (Subtarget.hasStdExtZvbb()) { + // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV. + SDValue OffsetVec = + DAG.getSplatVector(VecContainerVT, DL, + DAG.getConstant(VecVT.getScalarSizeInBits(), DL, + Subtarget.getXLenVT())); + Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV, + OffsetVec, Passthru, Mask, VL); + Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT, + Interleaved, EvenV, Passthru, Mask, VL); + } else { + // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with + // vwaddu.vv + Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV, + OddV, Passthru, Mask, VL); + + // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1) + SDValue AllOnesVec = DAG.getSplatVector( + VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT())); + SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT, + OddV, AllOnesVec, Passthru, Mask, VL); + + // Add the two together so we get + // (OddV * 0xff...ff) + (OddV + EvenV) + // = (OddV * 0x100...00) + EvenV + // = (OddV << VecVT.getScalarSizeInBits()) + EvenV + // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx + Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT, + Interleaved, OddsMul, Passthru, Mask, VL); + } // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty> MVT ResultContainerVT = MVT::getVectorVT( @@ -3961,6 +4547,96 @@ static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, return Interleaved; } +// If we have a vector of bits that we want to reverse, we can use a vbrev on a +// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse. +static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, + SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + SDLoc DL(SVN); + MVT VT = SVN->getSimpleValueType(0); + SDValue V = SVN->getOperand(0); + unsigned NumElts = VT.getVectorNumElements(); + + assert(VT.getVectorElementType() == MVT::i1); + + if (!ShuffleVectorInst::isReverseMask(SVN->getMask(), + SVN->getMask().size()) || + !SVN->getOperand(1).isUndef()) + return SDValue(); + + unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts)); + EVT ViaVT = EVT::getVectorVT( + *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1); + EVT ViaBitVT = + EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits()); + + // If we don't have zvbb or the larger element type > ELEN, the operation will + // be illegal. + if (!Subtarget.getTargetLowering()->isOperationLegalOrCustom(ISD::BITREVERSE, + ViaVT) || + !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT)) + return SDValue(); + + // If the bit vector doesn't fit exactly into the larger element type, we need + // to insert it into the larger vector and then shift up the reversed bits + // afterwards to get rid of the gap introduced. + if (ViaEltSize > NumElts) + V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT), + V, DAG.getVectorIdxConstant(0, DL)); + + SDValue Res = + DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V)); + + // Shift up the reversed bits if the vector didn't exactly fit into the larger + // element type. + if (ViaEltSize > NumElts) + Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res, + DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT)); + + Res = DAG.getBitcast(ViaBitVT, Res); + + if (ViaEltSize > NumElts) + Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, + DAG.getVectorIdxConstant(0, DL)); + return Res; +} + +// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can +// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this +// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor. +static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, + SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + SDLoc DL(SVN); + + EVT VT = SVN->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + unsigned EltSizeInBits = VT.getScalarSizeInBits(); + unsigned NumSubElts, RotateAmt; + if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2, + NumElts, NumSubElts, RotateAmt)) + return SDValue(); + MVT RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts), + NumElts / NumSubElts); + + // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x. + if (!Subtarget.getTargetLowering()->isTypeLegal(RotateVT)) + return SDValue(); + + SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0)); + + SDValue Rotate; + // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap, + // so canonicalize to vrev8. + if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8) + Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op); + else + Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op, + DAG.getConstant(RotateAmt, DL, RotateVT)); + + return DAG.getBitcast(VT, Rotate); +} + static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { SDValue V1 = Op.getOperand(0); @@ -3971,8 +4647,15 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, unsigned NumElts = VT.getVectorNumElements(); ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); - // Promote i1 shuffle to i8 shuffle. if (VT.getVectorElementType() == MVT::i1) { + // Lower to a vror.vi of a larger element type if possible before we promote + // i1s to i8s. + if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget)) + return V; + if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget)) + return V; + + // Promote i1 shuffle to i8 shuffle. MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()); V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1); V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT) @@ -4008,8 +4691,8 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) { auto *Ld = cast<LoadSDNode>(V); Offset *= SVT.getStoreSize(); - SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), - TypeSize::Fixed(Offset), DL); + SDValue NewAddr = DAG.getMemBasePlusOffset( + Ld->getBasePtr(), TypeSize::getFixed(Offset), DL); // If this is SEW=64 on RV32, use a strided load with a stride of x0. if (SVT.isInteger() && SVT.bitsGT(XLenVT)) { @@ -4071,6 +4754,12 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG)) return V; + // A bitrotate will be one instruction on Zvkb, so try to lower to it first if + // available. + if (Subtarget.hasStdExtZvkb()) + if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget)) + return V; + // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may // be undef which can be handled with a single SLIDEDOWN/UP. int LoSrc, HiSrc; @@ -4197,6 +4886,12 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, if (IsSelect) return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2); + // We might be able to express the shuffle as a bitrotate. But even if we + // don't have Zvkb and have to expand, the expanded sequence of approx. 2 + // shifts and a vor will have a higher throughput than a vrgather. + if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget)) + return V; + if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) { // On such a large vector we're unable to use i8 as the index type. // FIXME: We could promote the index to i16 and use vrgatherei16, but that @@ -4216,6 +4911,15 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, IndexVT = IndexVT.changeVectorElementType(MVT::i16); } + // If the mask allows, we can do all the index computation in 16 bits. This + // requires less work and less register pressure at high LMUL, and creates + // smaller constants which may be cheaper to materialize. + if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) && + (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) { + GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL; + IndexVT = IndexVT.changeVectorElementType(MVT::i16); + } + MVT IndexContainerVT = ContainerVT.changeVectorElementType(IndexVT.getScalarType()); @@ -4490,26 +5194,26 @@ static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, if (!Subtarget.useConstantPoolForLargeInts()) return Op; - RISCVMatInt::InstSeq Seq = - RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits()); + RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget); if (Seq.size() <= Subtarget.getMaxBuildIntsCost()) return Op; - // Special case. See if we can build the constant as (ADD (SLLI X, 32), X) do + // Optimizations below are disabled for opt size. If we're optimizing for + // size, use a constant pool. + if (DAG.shouldOptForSize()) + return SDValue(); + + // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do // that if it will avoid a constant pool. // It will require an extra temporary register though. - if (!DAG.shouldOptForSize()) { - int64_t LoVal = SignExtend64<32>(Imm); - int64_t HiVal = SignExtend64<32>(((uint64_t)Imm - (uint64_t)LoVal) >> 32); - if (LoVal == HiVal) { - RISCVMatInt::InstSeq SeqLo = - RISCVMatInt::generateInstSeq(LoVal, Subtarget.getFeatureBits()); - if ((SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost()) - return Op; - } - } + // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where + // low and high 32 bits are the same and bit 31 and 63 are set. + unsigned ShiftAmt, AddOpc; + RISCVMatInt::InstSeq SeqLo = + RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc); + if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost()) + return Op; - // Expand to a constant pool using the default expansion code. return SDValue(); } @@ -4547,8 +5251,7 @@ SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op, SDLoc DL(Op); MVT VT = Op.getSimpleValueType(); MVT XLenVT = Subtarget.getXLenVT(); - auto CNode = cast<ConstantSDNode>(Op.getOperand(1)); - unsigned Check = CNode->getZExtValue(); + unsigned Check = Op.getConstantOperandVal(1); unsigned TDCMask = 0; if (Check & fcSNan) TDCMask |= RISCV::FPMASK_Signaling_NaN; @@ -4582,6 +5285,10 @@ SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op, if (VT.isScalableVector()) { MVT DstVT = VT0.changeVectorElementTypeToInteger(); auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget); + if (Op.getOpcode() == ISD::VP_IS_FPCLASS) { + Mask = Op.getOperand(2); + VL = Op.getOperand(3); + } SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask, VL, Op->getFlags()); if (IsOneBitMask) @@ -4598,7 +5305,13 @@ SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op, MVT ContainerVT = getContainerForFixedLengthVector(VT); MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger(); auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget); - + if (Op.getOpcode() == ISD::VP_IS_FPCLASS) { + Mask = Op.getOperand(2); + MVT MaskContainerVT = + getContainerForFixedLengthVector(Mask.getSimpleValueType()); + Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget); + VL = Op.getOperand(3); + } Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget); SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0, @@ -4616,7 +5329,7 @@ SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op, SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS, TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL); - SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); + SDValue SplatZero = DAG.getConstant(0, DL, XLenVT); SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT, DAG.getUNDEF(ContainerDstVT), SplatZero, VL); @@ -4626,10 +5339,11 @@ SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op, return convertFromScalableVector(VT, VMSNE, DAG, Subtarget); } - SDValue FPCLASS = DAG.getNode(RISCVISD::FPCLASS, DL, VT, Op.getOperand(0)); - SDValue AND = DAG.getNode(ISD::AND, DL, VT, FPCLASS, TDCMaskV); - return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, XLenVT), - ISD::CondCode::SETNE); + SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0)); + SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV); + SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT), + ISD::CondCode::SETNE); + return DAG.getNode(ISD::TRUNCATE, DL, VT, Res); } // Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these @@ -4637,38 +5351,88 @@ SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op, static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { SDLoc DL(Op); - EVT VT = Op.getValueType(); + MVT VT = Op.getSimpleValueType(); SDValue X = Op.getOperand(0); SDValue Y = Op.getOperand(1); - MVT XLenVT = Subtarget.getXLenVT(); + if (!VT.isVector()) { + MVT XLenVT = Subtarget.getXLenVT(); - // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This - // ensures that when one input is a nan, the other will also be a nan allowing - // the nan to propagate. If both inputs are nan, this will swap the inputs - // which is harmless. - // FIXME: Handle nonans FMF and use isKnownNeverNaN. - SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ); - SDValue NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X); + // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This + // ensures that when one input is a nan, the other will also be a nan + // allowing the nan to propagate. If both inputs are nan, this will swap the + // inputs which is harmless. - SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ); - SDValue NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y); + SDValue NewY = Y; + if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) { + SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ); + NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X); + } + + SDValue NewX = X; + if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) { + SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ); + NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y); + } + + unsigned Opc = + Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN; + return DAG.getNode(Opc, DL, VT, NewX, NewY); + } + + // Check no NaNs before converting to fixed vector scalable. + bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X); + bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y); + + MVT ContainerVT = VT; + if (VT.isFixedLengthVector()) { + ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); + X = convertToScalableVector(ContainerVT, X, DAG, Subtarget); + Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget); + } + + auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); + + SDValue NewY = Y; + if (!XIsNeverNan) { + SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(), + {X, X, DAG.getCondCode(ISD::SETOEQ), + DAG.getUNDEF(ContainerVT), Mask, VL}); + NewY = + DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, XIsNonNan, Y, X, VL); + } + + SDValue NewX = X; + if (!YIsNeverNan) { + SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(), + {Y, Y, DAG.getCondCode(ISD::SETOEQ), + DAG.getUNDEF(ContainerVT), Mask, VL}); + NewX = + DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, YIsNonNan, X, Y, VL); + } unsigned Opc = - Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN; - return DAG.getNode(Opc, DL, VT, NewX, NewY); + Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::VFMAX_VL : RISCVISD::VFMIN_VL; + SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY, + DAG.getUNDEF(ContainerVT), Mask, VL); + if (VT.isFixedLengthVector()) + Res = convertFromScalableVector(VT, Res, DAG, Subtarget); + return Res; } -/// Get a RISCV target specified VL op for a given SDNode. +/// Get a RISC-V target specified VL op for a given SDNode. static unsigned getRISCVVLOp(SDValue Op) { #define OP_CASE(NODE) \ case ISD::NODE: \ return RISCVISD::NODE##_VL; +#define VP_CASE(NODE) \ + case ISD::VP_##NODE: \ + return RISCVISD::NODE##_VL; + // clang-format off switch (Op.getOpcode()) { default: llvm_unreachable("don't have RISC-V specified VL op for this SDNode"); - // clang-format off OP_CASE(ADD) OP_CASE(SUB) OP_CASE(MUL) @@ -4681,6 +5445,13 @@ static unsigned getRISCVVLOp(SDValue Op) { OP_CASE(SHL) OP_CASE(SRA) OP_CASE(SRL) + OP_CASE(ROTL) + OP_CASE(ROTR) + OP_CASE(BSWAP) + OP_CASE(CTTZ) + OP_CASE(CTLZ) + OP_CASE(CTPOP) + OP_CASE(BITREVERSE) OP_CASE(SADDSAT) OP_CASE(UADDSAT) OP_CASE(SSUBSAT) @@ -4696,47 +5467,113 @@ static unsigned getRISCVVLOp(SDValue Op) { OP_CASE(SMAX) OP_CASE(UMIN) OP_CASE(UMAX) - OP_CASE(FMINNUM) - OP_CASE(FMAXNUM) OP_CASE(STRICT_FADD) OP_CASE(STRICT_FSUB) OP_CASE(STRICT_FMUL) OP_CASE(STRICT_FDIV) OP_CASE(STRICT_FSQRT) - // clang-format on -#undef OP_CASE + VP_CASE(ADD) // VP_ADD + VP_CASE(SUB) // VP_SUB + VP_CASE(MUL) // VP_MUL + VP_CASE(SDIV) // VP_SDIV + VP_CASE(SREM) // VP_SREM + VP_CASE(UDIV) // VP_UDIV + VP_CASE(UREM) // VP_UREM + VP_CASE(SHL) // VP_SHL + VP_CASE(FADD) // VP_FADD + VP_CASE(FSUB) // VP_FSUB + VP_CASE(FMUL) // VP_FMUL + VP_CASE(FDIV) // VP_FDIV + VP_CASE(FNEG) // VP_FNEG + VP_CASE(FABS) // VP_FABS + VP_CASE(SMIN) // VP_SMIN + VP_CASE(SMAX) // VP_SMAX + VP_CASE(UMIN) // VP_UMIN + VP_CASE(UMAX) // VP_UMAX + VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN + VP_CASE(SETCC) // VP_SETCC + VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP + VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP + VP_CASE(BITREVERSE) // VP_BITREVERSE + VP_CASE(BSWAP) // VP_BSWAP + VP_CASE(CTLZ) // VP_CTLZ + VP_CASE(CTTZ) // VP_CTTZ + VP_CASE(CTPOP) // VP_CTPOP + case ISD::CTLZ_ZERO_UNDEF: + case ISD::VP_CTLZ_ZERO_UNDEF: + return RISCVISD::CTLZ_VL; + case ISD::CTTZ_ZERO_UNDEF: + case ISD::VP_CTTZ_ZERO_UNDEF: + return RISCVISD::CTTZ_VL; case ISD::FMA: + case ISD::VP_FMA: return RISCVISD::VFMADD_VL; case ISD::STRICT_FMA: return RISCVISD::STRICT_VFMADD_VL; case ISD::AND: + case ISD::VP_AND: if (Op.getSimpleValueType().getVectorElementType() == MVT::i1) return RISCVISD::VMAND_VL; return RISCVISD::AND_VL; case ISD::OR: + case ISD::VP_OR: if (Op.getSimpleValueType().getVectorElementType() == MVT::i1) return RISCVISD::VMOR_VL; return RISCVISD::OR_VL; case ISD::XOR: + case ISD::VP_XOR: if (Op.getSimpleValueType().getVectorElementType() == MVT::i1) return RISCVISD::VMXOR_VL; return RISCVISD::XOR_VL; + case ISD::VP_SELECT: + return RISCVISD::VSELECT_VL; + case ISD::VP_MERGE: + return RISCVISD::VP_MERGE_VL; + case ISD::VP_ASHR: + return RISCVISD::SRA_VL; + case ISD::VP_LSHR: + return RISCVISD::SRL_VL; + case ISD::VP_SQRT: + return RISCVISD::FSQRT_VL; + case ISD::VP_SIGN_EXTEND: + return RISCVISD::VSEXT_VL; + case ISD::VP_ZERO_EXTEND: + return RISCVISD::VZEXT_VL; + case ISD::VP_FP_TO_SINT: + return RISCVISD::VFCVT_RTZ_X_F_VL; + case ISD::VP_FP_TO_UINT: + return RISCVISD::VFCVT_RTZ_XU_F_VL; + case ISD::FMINNUM: + case ISD::VP_FMINNUM: + return RISCVISD::VFMIN_VL; + case ISD::FMAXNUM: + case ISD::VP_FMAXNUM: + return RISCVISD::VFMAX_VL; } + // clang-format on +#undef OP_CASE +#undef VP_CASE } /// Return true if a RISC-V target specified op has a merge operand. static bool hasMergeOp(unsigned Opcode) { assert(Opcode > RISCVISD::FIRST_NUMBER && - Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL && + Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE && "not a RISC-V target specific op"); - assert(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL - RISCVISD::FIRST_NUMBER == 421 && - "adding target specific op should update this function"); - if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::FMAXNUM_VL) + static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == + 125 && + RISCVISD::LAST_RISCV_STRICTFP_OPCODE - + ISD::FIRST_TARGET_STRICTFP_OPCODE == + 21 && + "adding target specific op should update this function"); + if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL) return true; if (Opcode == RISCVISD::FCOPYSIGN_VL) return true; if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL) return true; + if (Opcode == RISCVISD::SETCC_VL) + return true; if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL) return true; return false; @@ -4745,10 +5582,14 @@ static bool hasMergeOp(unsigned Opcode) { /// Return true if a RISC-V target specified op has a mask operand. static bool hasMaskOp(unsigned Opcode) { assert(Opcode > RISCVISD::FIRST_NUMBER && - Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL && + Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE && "not a RISC-V target specific op"); - assert(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL - RISCVISD::FIRST_NUMBER == 421 && - "adding target specific op should update this function"); + static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == + 125 && + RISCVISD::LAST_RISCV_STRICTFP_OPCODE - + ISD::FIRST_TARGET_STRICTFP_OPCODE == + 21 && + "adding target specific op should update this function"); if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL) return true; if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL) @@ -4759,6 +5600,112 @@ static bool hasMaskOp(unsigned Opcode) { return false; } +static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG) { + auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType()); + SDLoc DL(Op); + + SmallVector<SDValue, 4> LoOperands(Op.getNumOperands()); + SmallVector<SDValue, 4> HiOperands(Op.getNumOperands()); + + for (unsigned j = 0; j != Op.getNumOperands(); ++j) { + if (!Op.getOperand(j).getValueType().isVector()) { + LoOperands[j] = Op.getOperand(j); + HiOperands[j] = Op.getOperand(j); + continue; + } + std::tie(LoOperands[j], HiOperands[j]) = + DAG.SplitVector(Op.getOperand(j), DL); + } + + SDValue LoRes = + DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags()); + SDValue HiRes = + DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags()); + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes); +} + +static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG) { + assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op"); + auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType()); + SDLoc DL(Op); + + SmallVector<SDValue, 4> LoOperands(Op.getNumOperands()); + SmallVector<SDValue, 4> HiOperands(Op.getNumOperands()); + + for (unsigned j = 0; j != Op.getNumOperands(); ++j) { + if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) { + std::tie(LoOperands[j], HiOperands[j]) = + DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL); + continue; + } + if (!Op.getOperand(j).getValueType().isVector()) { + LoOperands[j] = Op.getOperand(j); + HiOperands[j] = Op.getOperand(j); + continue; + } + std::tie(LoOperands[j], HiOperands[j]) = + DAG.SplitVector(Op.getOperand(j), DL); + } + + SDValue LoRes = + DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags()); + SDValue HiRes = + DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags()); + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes); +} + +static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG) { + SDLoc DL(Op); + + auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL); + auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL); + auto [EVLLo, EVLHi] = + DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL); + + SDValue ResLo = + DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), + {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags()); + return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), + {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags()); +} + +static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG) { + + assert(Op->isStrictFPOpcode()); + + auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0)); + + SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1)); + SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1)); + + SDLoc DL(Op); + + SmallVector<SDValue, 4> LoOperands(Op.getNumOperands()); + SmallVector<SDValue, 4> HiOperands(Op.getNumOperands()); + + for (unsigned j = 0; j != Op.getNumOperands(); ++j) { + if (!Op.getOperand(j).getValueType().isVector()) { + LoOperands[j] = Op.getOperand(j); + HiOperands[j] = Op.getOperand(j); + continue; + } + std::tie(LoOperands[j], HiOperands[j]) = + DAG.SplitVector(Op.getOperand(j), DL); + } + + SDValue LoRes = + DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags()); + HiOperands[0] = LoRes.getValue(1); + SDValue HiRes = + DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags()); + + SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0), + LoRes.getValue(0), HiRes.getValue(0)); + return DAG.getMergeValues({V, HiRes.getValue(1)}, DL); +} + SDValue RISCVTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { @@ -4796,6 +5743,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return lowerShiftRightParts(Op, DAG, false); case ISD::ROTL: case ISD::ROTR: + if (Op.getValueType().isFixedLengthVector()) { + assert(Subtarget.hasStdExtZvkb()); + return lowerToScalableOp(Op, DAG); + } assert(Subtarget.hasVendorXTHeadBb() && !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) && "Unexpected custom legalization"); @@ -4889,6 +5840,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return LowerIS_FPCLASS(Op, DAG); case ISD::BITREVERSE: { MVT VT = Op.getSimpleValueType(); + if (VT.isFixedLengthVector()) { + assert(Subtarget.hasStdExtZvbb()); + return lowerToScalableOp(Op, DAG); + } SDLoc DL(Op); assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization"); assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode"); @@ -4931,6 +5886,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, if (VT.isFixedLengthVector()) ContainerVT = getContainerForFixedLengthVector(VT); SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; + Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar); SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Scalar, VL); if (VT.isFixedLengthVector()) @@ -4938,9 +5894,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return V; } case ISD::VSCALE: { + MVT XLenVT = Subtarget.getXLenVT(); MVT VT = Op.getSimpleValueType(); SDLoc DL(Op); - SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT); + SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT); // We define our scalable vector types for lmul=1 to use a 64 bit known // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate // vscale as VLENB / 8. @@ -4953,22 +5910,23 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, if (isPowerOf2_64(Val)) { uint64_t Log2 = Log2_64(Val); if (Log2 < 3) - return DAG.getNode(ISD::SRL, DL, VT, VLENB, - DAG.getConstant(3 - Log2, DL, VT)); - if (Log2 > 3) - return DAG.getNode(ISD::SHL, DL, VT, VLENB, - DAG.getConstant(Log2 - 3, DL, VT)); - return VLENB; - } - // If the multiplier is a multiple of 8, scale it down to avoid needing - // to shift the VLENB value. - if ((Val % 8) == 0) - return DAG.getNode(ISD::MUL, DL, VT, VLENB, - DAG.getConstant(Val / 8, DL, VT)); - - SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB, - DAG.getConstant(3, DL, VT)); - return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0)); + Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res, + DAG.getConstant(3 - Log2, DL, VT)); + else if (Log2 > 3) + Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res, + DAG.getConstant(Log2 - 3, DL, XLenVT)); + } else if ((Val % 8) == 0) { + // If the multiplier is a multiple of 8, scale it down to avoid needing + // to shift the VLENB value. + Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res, + DAG.getConstant(Val / 8, DL, XLenVT)); + } else { + SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res, + DAG.getConstant(3, DL, XLenVT)); + Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale, + DAG.getConstant(Val, DL, XLenVT)); + } + return DAG.getNode(ISD::TRUNCATE, DL, VT, Res); } case ISD::FPOWI: { // Custom promote f16 powi with illegal i32 integer type on RV64. Once @@ -4986,6 +5944,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, } case ISD::FMAXIMUM: case ISD::FMINIMUM: + if (Op.getValueType() == MVT::nxv32f16 && + (Subtarget.hasVInstructionsF16Minimal() && + !Subtarget.hasVInstructionsF16())) + return SplitVectorOp(Op, DAG); return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget); case ISD::FP_EXTEND: { SDLoc DL(Op); @@ -5026,10 +5988,42 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::STRICT_FP_ROUND: case ISD::STRICT_FP_EXTEND: return lowerStrictFPExtendOrRoundLike(Op, DAG); - case ISD::FP_TO_SINT: - case ISD::FP_TO_UINT: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: + if (Op.getValueType().isVector() && + Op.getValueType().getScalarType() == MVT::f16 && + (Subtarget.hasVInstructionsF16Minimal() && + !Subtarget.hasVInstructionsF16())) { + if (Op.getValueType() == MVT::nxv32f16) + return SplitVectorOp(Op, DAG); + // int -> f32 + SDLoc DL(Op); + MVT NVT = + MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()); + SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops()); + // f32 -> f16 + return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC, + DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)); + } + [[fallthrough]]; + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + if (SDValue Op1 = Op.getOperand(0); + Op1.getValueType().isVector() && + Op1.getValueType().getScalarType() == MVT::f16 && + (Subtarget.hasVInstructionsF16Minimal() && + !Subtarget.hasVInstructionsF16())) { + if (Op1.getValueType() == MVT::nxv32f16) + return SplitVectorOp(Op, DAG); + // f16 -> f32 + SDLoc DL(Op); + MVT NVT = MVT::getVectorVT(MVT::f32, + Op1.getValueType().getVectorElementCount()); + SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1); + // f32 -> int + return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec); + } + [[fallthrough]]; case ISD::STRICT_FP_TO_SINT: case ISD::STRICT_FP_TO_UINT: case ISD::STRICT_SINT_TO_FP: @@ -5180,7 +6174,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16); SDValue Res = makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first; - if (Subtarget.is64Bit()) + if (Subtarget.is64Bit() && !RV64LegalI32) return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res); return DAG.getBitcast(MVT::i32, Res); } @@ -5209,7 +6203,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16); SDValue Res = makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first; - if (Subtarget.is64Bit()) + if (Subtarget.is64Bit() && !RV64LegalI32) return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res); return DAG.getBitcast(MVT::i32, Res); } @@ -5236,6 +6230,9 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::FROUND: case ISD::FROUNDEVEN: return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); + case ISD::LRINT: + case ISD::LLRINT: + return lowerVectorXRINT(Op, DAG, Subtarget); case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_UMAX: case ISD::VECREDUCE_SMAX: @@ -5262,6 +6259,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::VP_REDUCE_SEQ_FADD: case ISD::VP_REDUCE_FMIN: case ISD::VP_REDUCE_FMAX: + if (Op.getOperand(1).getValueType() == MVT::nxv32f16 && + (Subtarget.hasVInstructionsF16Minimal() && + !Subtarget.hasVInstructionsF16())) + return SplitVectorReductionOp(Op, DAG); return lowerVPREDUCE(Op, DAG); case ISD::VP_REDUCE_AND: case ISD::VP_REDUCE_OR: @@ -5291,6 +6292,21 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG, Subtarget); case ISD::SPLAT_VECTOR: + if (Op.getValueType().getScalarType() == MVT::f16 && + (Subtarget.hasVInstructionsF16Minimal() && + !Subtarget.hasVInstructionsF16())) { + if (Op.getValueType() == MVT::nxv32f16) + return SplitVectorOp(Op, DAG); + SDLoc DL(Op); + SDValue NewScalar = + DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0)); + SDValue NewSplat = DAG.getNode( + ISD::SPLAT_VECTOR, DL, + MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()), + NewScalar); + return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NewSplat, + DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)); + } if (Op.getValueType().getVectorElementType() == MVT::i1) return lowerVectorMaskSplat(Op, DAG); return SDValue(); @@ -5387,6 +6403,11 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return DAG.getSetCC(DL, VT, RHS, LHS, CCVal); } + if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 && + (Subtarget.hasVInstructionsF16Minimal() && + !Subtarget.hasVInstructionsF16())) + return SplitVectorOp(Op, DAG); + return lowerFixedLengthVectorSetccToRVV(Op, DAG); } case ISD::ADD: @@ -5401,6 +6422,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::SREM: case ISD::UDIV: case ISD::UREM: + case ISD::BSWAP: + case ISD::CTPOP: return lowerToScalableOp(Op, DAG); case ISD::SHL: case ISD::SRA: @@ -5411,10 +6434,6 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() && "Unexpected custom legalisation"); return SDValue(); - case ISD::SADDSAT: - case ISD::UADDSAT: - case ISD::SSUBSAT: - case ISD::USUBSAT: case ISD::FADD: case ISD::FSUB: case ISD::FMUL: @@ -5423,23 +6442,40 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::FABS: case ISD::FSQRT: case ISD::FMA: + case ISD::FMINNUM: + case ISD::FMAXNUM: + if (Op.getValueType() == MVT::nxv32f16 && + (Subtarget.hasVInstructionsF16Minimal() && + !Subtarget.hasVInstructionsF16())) + return SplitVectorOp(Op, DAG); + [[fallthrough]]; + case ISD::SADDSAT: + case ISD::UADDSAT: + case ISD::SSUBSAT: + case ISD::USUBSAT: case ISD::SMIN: case ISD::SMAX: case ISD::UMIN: case ISD::UMAX: - case ISD::FMINNUM: - case ISD::FMAXNUM: return lowerToScalableOp(Op, DAG); case ISD::ABS: case ISD::VP_ABS: return lowerABS(Op, DAG); case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: + if (Subtarget.hasStdExtZvbb()) + return lowerToScalableOp(Op, DAG); + assert(Op.getOpcode() != ISD::CTTZ); return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG); case ISD::VSELECT: return lowerFixedLengthVectorSelectToRVV(Op, DAG); case ISD::FCOPYSIGN: + if (Op.getValueType() == MVT::nxv32f16 && + (Subtarget.hasVInstructionsF16Minimal() && + !Subtarget.hasVInstructionsF16())) + return SplitVectorOp(Op, DAG); return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG); case ISD::STRICT_FADD: case ISD::STRICT_FSUB: @@ -5447,6 +6483,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::STRICT_FDIV: case ISD::STRICT_FSQRT: case ISD::STRICT_FMA: + if (Op.getValueType() == MVT::nxv32f16 && + (Subtarget.hasVInstructionsF16Minimal() && + !Subtarget.hasVInstructionsF16())) + return SplitStrictFPVectorOp(Op, DAG); return lowerToScalableOp(Op, DAG); case ISD::STRICT_FSETCC: case ISD::STRICT_FSETCCS: @@ -5472,106 +6512,115 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::EH_DWARF_CFA: return lowerEH_DWARF_CFA(Op, DAG); case ISD::VP_SELECT: - return lowerVPOp(Op, DAG, RISCVISD::VSELECT_VL); case ISD::VP_MERGE: - return lowerVPOp(Op, DAG, RISCVISD::VP_MERGE_VL); case ISD::VP_ADD: - return lowerVPOp(Op, DAG, RISCVISD::ADD_VL, /*HasMergeOp*/ true); case ISD::VP_SUB: - return lowerVPOp(Op, DAG, RISCVISD::SUB_VL, /*HasMergeOp*/ true); case ISD::VP_MUL: - return lowerVPOp(Op, DAG, RISCVISD::MUL_VL, /*HasMergeOp*/ true); case ISD::VP_SDIV: - return lowerVPOp(Op, DAG, RISCVISD::SDIV_VL, /*HasMergeOp*/ true); case ISD::VP_UDIV: - return lowerVPOp(Op, DAG, RISCVISD::UDIV_VL, /*HasMergeOp*/ true); case ISD::VP_SREM: - return lowerVPOp(Op, DAG, RISCVISD::SREM_VL, /*HasMergeOp*/ true); case ISD::VP_UREM: - return lowerVPOp(Op, DAG, RISCVISD::UREM_VL, /*HasMergeOp*/ true); + return lowerVPOp(Op, DAG); case ISD::VP_AND: - return lowerLogicVPOp(Op, DAG, RISCVISD::VMAND_VL, RISCVISD::AND_VL); case ISD::VP_OR: - return lowerLogicVPOp(Op, DAG, RISCVISD::VMOR_VL, RISCVISD::OR_VL); case ISD::VP_XOR: - return lowerLogicVPOp(Op, DAG, RISCVISD::VMXOR_VL, RISCVISD::XOR_VL); - case ISD::VP_ASHR: - return lowerVPOp(Op, DAG, RISCVISD::SRA_VL, /*HasMergeOp*/ true); - case ISD::VP_LSHR: - return lowerVPOp(Op, DAG, RISCVISD::SRL_VL, /*HasMergeOp*/ true); - case ISD::VP_SHL: - return lowerVPOp(Op, DAG, RISCVISD::SHL_VL, /*HasMergeOp*/ true); + return lowerLogicVPOp(Op, DAG); case ISD::VP_FADD: - return lowerVPOp(Op, DAG, RISCVISD::FADD_VL, /*HasMergeOp*/ true); case ISD::VP_FSUB: - return lowerVPOp(Op, DAG, RISCVISD::FSUB_VL, /*HasMergeOp*/ true); case ISD::VP_FMUL: - return lowerVPOp(Op, DAG, RISCVISD::FMUL_VL, /*HasMergeOp*/ true); case ISD::VP_FDIV: - return lowerVPOp(Op, DAG, RISCVISD::FDIV_VL, /*HasMergeOp*/ true); case ISD::VP_FNEG: - return lowerVPOp(Op, DAG, RISCVISD::FNEG_VL); case ISD::VP_FABS: - return lowerVPOp(Op, DAG, RISCVISD::FABS_VL); case ISD::VP_SQRT: - return lowerVPOp(Op, DAG, RISCVISD::FSQRT_VL); case ISD::VP_FMA: - return lowerVPOp(Op, DAG, RISCVISD::VFMADD_VL); case ISD::VP_FMINNUM: - return lowerVPOp(Op, DAG, RISCVISD::FMINNUM_VL, /*HasMergeOp*/ true); case ISD::VP_FMAXNUM: - return lowerVPOp(Op, DAG, RISCVISD::FMAXNUM_VL, /*HasMergeOp*/ true); case ISD::VP_FCOPYSIGN: - return lowerVPOp(Op, DAG, RISCVISD::FCOPYSIGN_VL, /*HasMergeOp*/ true); + if (Op.getValueType() == MVT::nxv32f16 && + (Subtarget.hasVInstructionsF16Minimal() && + !Subtarget.hasVInstructionsF16())) + return SplitVPOp(Op, DAG); + [[fallthrough]]; + case ISD::VP_ASHR: + case ISD::VP_LSHR: + case ISD::VP_SHL: + return lowerVPOp(Op, DAG); + case ISD::VP_IS_FPCLASS: + return LowerIS_FPCLASS(Op, DAG); case ISD::VP_SIGN_EXTEND: case ISD::VP_ZERO_EXTEND: if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1) return lowerVPExtMaskOp(Op, DAG); - return lowerVPOp(Op, DAG, - Op.getOpcode() == ISD::VP_SIGN_EXTEND - ? RISCVISD::VSEXT_VL - : RISCVISD::VZEXT_VL); + return lowerVPOp(Op, DAG); case ISD::VP_TRUNCATE: return lowerVectorTruncLike(Op, DAG); case ISD::VP_FP_EXTEND: case ISD::VP_FP_ROUND: return lowerVectorFPExtendOrRoundLike(Op, DAG); - case ISD::VP_FP_TO_SINT: - return lowerVPFPIntConvOp(Op, DAG, RISCVISD::VFCVT_RTZ_X_F_VL); - case ISD::VP_FP_TO_UINT: - return lowerVPFPIntConvOp(Op, DAG, RISCVISD::VFCVT_RTZ_XU_F_VL); case ISD::VP_SINT_TO_FP: - return lowerVPFPIntConvOp(Op, DAG, RISCVISD::SINT_TO_FP_VL); case ISD::VP_UINT_TO_FP: - return lowerVPFPIntConvOp(Op, DAG, RISCVISD::UINT_TO_FP_VL); + if (Op.getValueType().isVector() && + Op.getValueType().getScalarType() == MVT::f16 && + (Subtarget.hasVInstructionsF16Minimal() && + !Subtarget.hasVInstructionsF16())) { + if (Op.getValueType() == MVT::nxv32f16) + return SplitVPOp(Op, DAG); + // int -> f32 + SDLoc DL(Op); + MVT NVT = + MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()); + auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops()); + // f32 -> f16 + return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC, + DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)); + } + [[fallthrough]]; + case ISD::VP_FP_TO_SINT: + case ISD::VP_FP_TO_UINT: + if (SDValue Op1 = Op.getOperand(0); + Op1.getValueType().isVector() && + Op1.getValueType().getScalarType() == MVT::f16 && + (Subtarget.hasVInstructionsF16Minimal() && + !Subtarget.hasVInstructionsF16())) { + if (Op1.getValueType() == MVT::nxv32f16) + return SplitVPOp(Op, DAG); + // f16 -> f32 + SDLoc DL(Op); + MVT NVT = MVT::getVectorVT(MVT::f32, + Op1.getValueType().getVectorElementCount()); + SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1); + // f32 -> int + return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), + {WidenVec, Op.getOperand(1), Op.getOperand(2)}); + } + return lowerVPFPIntConvOp(Op, DAG); case ISD::VP_SETCC: + if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 && + (Subtarget.hasVInstructionsF16Minimal() && + !Subtarget.hasVInstructionsF16())) + return SplitVPOp(Op, DAG); if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1) return lowerVPSetCCMaskOp(Op, DAG); - return lowerVPOp(Op, DAG, RISCVISD::SETCC_VL, /*HasMergeOp*/ true); + [[fallthrough]]; case ISD::VP_SMIN: - return lowerVPOp(Op, DAG, RISCVISD::SMIN_VL, /*HasMergeOp*/ true); case ISD::VP_SMAX: - return lowerVPOp(Op, DAG, RISCVISD::SMAX_VL, /*HasMergeOp*/ true); case ISD::VP_UMIN: - return lowerVPOp(Op, DAG, RISCVISD::UMIN_VL, /*HasMergeOp*/ true); case ISD::VP_UMAX: - return lowerVPOp(Op, DAG, RISCVISD::UMAX_VL, /*HasMergeOp*/ true); case ISD::VP_BITREVERSE: - return lowerVPOp(Op, DAG, RISCVISD::BITREVERSE_VL, /*HasMergeOp*/ true); case ISD::VP_BSWAP: - return lowerVPOp(Op, DAG, RISCVISD::BSWAP_VL, /*HasMergeOp*/ true); + return lowerVPOp(Op, DAG); case ISD::VP_CTLZ: case ISD::VP_CTLZ_ZERO_UNDEF: if (Subtarget.hasStdExtZvbb()) - return lowerVPOp(Op, DAG, RISCVISD::CTLZ_VL, /*HasMergeOp*/ true); + return lowerVPOp(Op, DAG); return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG); case ISD::VP_CTTZ: case ISD::VP_CTTZ_ZERO_UNDEF: if (Subtarget.hasStdExtZvbb()) - return lowerVPOp(Op, DAG, RISCVISD::CTTZ_VL, /*HasMergeOp*/ true); + return lowerVPOp(Op, DAG); return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG); case ISD::VP_CTPOP: - return lowerVPOp(Op, DAG, RISCVISD::CTPOP_VL, /*HasMergeOp*/ true); + return lowerVPOp(Op, DAG); case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: return lowerVPStridedLoad(Op, DAG); case ISD::EXPERIMENTAL_VP_STRIDED_STORE: @@ -5583,7 +6632,13 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::VP_FROUND: case ISD::VP_FROUNDEVEN: case ISD::VP_FROUNDTOZERO: + if (Op.getValueType() == MVT::nxv32f16 && + (Subtarget.hasVInstructionsF16Minimal() && + !Subtarget.hasVInstructionsF16())) + return SplitVPOp(Op, DAG); return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); + case ISD::EXPERIMENTAL_VP_REVERSE: + return lowerVPReverseExperimental(Op, DAG); } } @@ -5630,15 +6685,15 @@ SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, // Use PC-relative addressing to access the GOT for this symbol, then load // the address from the GOT. This generates the pattern (PseudoLGA sym), // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). + SDValue Load = + SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0); MachineFunction &MF = DAG.getMachineFunction(); MachineMemOperand *MemOp = MF.getMachineMemOperand( MachinePointerInfo::getGOT(MF), MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant, LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8)); - SDValue Load = - DAG.getMemIntrinsicNode(RISCVISD::LGA, DL, DAG.getVTList(Ty, MVT::Other), - {DAG.getEntryNode(), Addr}, Ty, MemOp); + DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp}); return Load; } @@ -5660,16 +6715,15 @@ SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, // not be within 2GiB of PC, so use GOT-indirect addressing to access the // symbol. This generates the pattern (PseudoLGA sym), which expands to // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). + SDValue Load = + SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0); MachineFunction &MF = DAG.getMachineFunction(); MachineMemOperand *MemOp = MF.getMachineMemOperand( MachinePointerInfo::getGOT(MF), MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant, LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8)); - SDValue Load = - DAG.getMemIntrinsicNode(RISCVISD::LGA, DL, - DAG.getVTList(Ty, MVT::Other), - {DAG.getEntryNode(), Addr}, Ty, MemOp); + DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp}); return Load; } @@ -5724,15 +6778,15 @@ SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, // the pattern (PseudoLA_TLS_IE sym), which expands to // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)). SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); + SDValue Load = + SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0); MachineFunction &MF = DAG.getMachineFunction(); MachineMemOperand *MemOp = MF.getMachineMemOperand( MachinePointerInfo::getGOT(MF), MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant, LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8)); - SDValue Load = DAG.getMemIntrinsicNode( - RISCVISD::LA_TLS_IE, DL, DAG.getVTList(Ty, MVT::Other), - {DAG.getEntryNode(), Addr}, Ty, MemOp); + DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp}); // Add the thread pointer. SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); @@ -5768,7 +6822,8 @@ SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, // This generates the pattern (PseudoLA_TLS_GD sym), which expands to // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)). SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); - SDValue Load = DAG.getNode(RISCVISD::LA_TLS_GD, DL, Ty, Addr); + SDValue Load = + SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0); // Prepare argument list to generate call. ArgListTy Args; @@ -5904,56 +6959,6 @@ static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, return SDValue(); } -/// RISC-V doesn't have general instructions for integer setne/seteq, but we can -/// check for equality with 0. This function emits nodes that convert the -/// seteq/setne into something that can be compared with 0. -/// Based on RISCVDAGToDAGISel::selectSETCC but modified to produce -/// target-independent SelectionDAG nodes rather than machine nodes. -static SDValue selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, - SelectionDAG &DAG) { - assert(ISD::isIntEqualitySetCC(ExpectedCCVal) && - "Unexpected condition code!"); - - // We're looking for a setcc. - if (N->getOpcode() != ISD::SETCC) - return SDValue(); - - // Must be an equality comparison. - ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get(); - if (CCVal != ExpectedCCVal) - return SDValue(); - - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - - if (!LHS.getValueType().isScalarInteger()) - return SDValue(); - - // If the RHS side is 0, we don't need any extra instructions, return the LHS. - if (isNullConstant(RHS)) - return LHS; - - SDLoc DL(N); - - if (auto *C = dyn_cast<ConstantSDNode>(RHS)) { - int64_t CVal = C->getSExtValue(); - // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and - // non-zero otherwise. - if (CVal == -2048) - return DAG.getNode(ISD::XOR, DL, N->getValueType(0), LHS, - DAG.getConstant(CVal, DL, N->getValueType(0))); - // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the - // LHS is equal to the RHS and non-zero otherwise. - if (isInt<12>(CVal) || CVal == 2048) - return DAG.getNode(ISD::ADD, DL, N->getValueType(0), LHS, - DAG.getConstant(-CVal, DL, N->getValueType(0))); - } - - // If nothing else we can XOR the LHS and RHS to produce zero if they are - // equal and a non-zero value if they aren't. - return DAG.getNode(ISD::XOR, DL, N->getValueType(0), LHS, RHS); -} - // Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants // into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable. // For now we only consider transformation profitable if `binOp(c0, c1)` ends up @@ -6041,35 +7046,6 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { // sequence or RISCVISD::SELECT_CC node (branch-based select). if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) && VT.isScalarInteger()) { - if (SDValue NewCondV = selectSETCC(CondV, ISD::SETNE, DAG)) { - // (select (riscv_setne c), t, 0) -> (czero_eqz t, c) - if (isNullConstant(FalseV)) - return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, NewCondV); - // (select (riscv_setne c), 0, f) -> (czero_nez f, c) - if (isNullConstant(TrueV)) - return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, NewCondV); - // (select (riscv_setne c), t, f) -> (or (czero_eqz t, c), (czero_nez f, - // c) - return DAG.getNode( - ISD::OR, DL, VT, - DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, NewCondV), - DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, NewCondV)); - } - if (SDValue NewCondV = selectSETCC(CondV, ISD::SETEQ, DAG)) { - // (select (riscv_seteq c), t, 0) -> (czero_nez t, c) - if (isNullConstant(FalseV)) - return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, TrueV, NewCondV); - // (select (riscv_seteq c), 0, f) -> (czero_eqz f, c) - if (isNullConstant(TrueV)) - return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, FalseV, NewCondV); - // (select (riscv_seteq c), t, f) -> (or (czero_eqz f, c), (czero_nez t, - // c) - return DAG.getNode( - ISD::OR, DL, VT, - DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, FalseV, NewCondV), - DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, TrueV, NewCondV)); - } - // (select c, t, 0) -> (czero_eqz t, c) if (isNullConstant(FalseV)) return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV); @@ -6090,10 +7066,17 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { ISD::OR, DL, VT, FalseV, DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV)); + // Try some other optimizations before falling back to generic lowering. + if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget)) + return V; + // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c)) - return DAG.getNode(ISD::OR, DL, VT, - DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV), - DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV)); + // Unless we have the short forward branch optimization. + if (!Subtarget.hasShortForwardBranchOpt()) + return DAG.getNode( + ISD::OR, DL, VT, + DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV), + DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV)); } if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget)) @@ -6297,7 +7280,7 @@ SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op, // if Shamt-XLEN < 0: // Shamt < XLEN // Lo = Lo << Shamt - // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 ^ Shamt)) + // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt)) // else: // Lo = 0 // Hi = Lo << (Shamt-XLEN) @@ -6336,7 +7319,7 @@ SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, // SRA expansion: // if Shamt-XLEN < 0: // Shamt < XLEN - // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1)) + // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt)) // Hi = Hi >>s Shamt // else: // Lo = Hi >>s (Shamt-XLEN); @@ -6344,7 +7327,7 @@ SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, // // SRL expansion: // if Shamt-XLEN < 0: // Shamt < XLEN - // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1)) + // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt)) // Hi = Hi >>u Shamt // else: // Lo = Hi >>u (Shamt-XLEN); @@ -6394,12 +7377,9 @@ SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op, SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second; return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL); } - MVT XLenVT = Subtarget.getXLenVT(); - assert(SplatVal.getValueType() == XLenVT && - "Unexpected type for i1 splat value"); MVT InterVT = VT.changeVectorElementType(MVT::i8); - SplatVal = DAG.getNode(ISD::AND, DL, XLenVT, SplatVal, - DAG.getConstant(1, DL, XLenVT)); + SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal, + DAG.getConstant(1, DL, SplatVal.getValueType())); SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal); SDValue Zero = DAG.getConstant(0, DL, InterVT); return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE); @@ -6420,37 +7400,19 @@ SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op, SDValue Lo = Op.getOperand(0); SDValue Hi = Op.getOperand(1); - if (VecVT.isFixedLengthVector()) { - MVT ContainerVT = getContainerForFixedLengthVector(VecVT); - SDLoc DL(Op); - auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second; + MVT ContainerVT = VecVT; + if (VecVT.isFixedLengthVector()) + ContainerVT = getContainerForFixedLengthVector(VecVT); - SDValue Res = - splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG); - return convertFromScalableVector(VecVT, Res, DAG, Subtarget); - } + auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second; - if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) { - int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue(); - int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue(); - // If Hi constant is all the same sign bit as Lo, lower this as a custom - // node in order to try and match RVV vector/scalar instructions. - if ((LoC >> 31) == HiC) - return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT), - Lo, DAG.getRegister(RISCV::X0, MVT::i32)); - } + SDValue Res = + splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG); - // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended. - if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo && - isa<ConstantSDNode>(Hi.getOperand(1)) && - Hi.getConstantOperandVal(1) == 31) - return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT), Lo, - DAG.getRegister(RISCV::X0, MVT::i32)); + if (VecVT.isFixedLengthVector()) + Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget); - // Fall back to use a stack store and stride x0 vector load. Use X0 as VL. - return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VecVT, - DAG.getUNDEF(VecVT), Lo, Hi, - DAG.getRegister(RISCV::X0, MVT::i32)); + return Res; } // Custom-lower extensions from mask vectors by using a vselect either with 1 @@ -6754,6 +7716,32 @@ RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op, return Result; } +// Given a scalable vector type and an index into it, returns the type for the +// smallest subvector that the index fits in. This can be used to reduce LMUL +// for operations like vslidedown. +// +// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32. +static std::optional<MVT> +getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + assert(VecVT.isScalableVector()); + const unsigned EltSize = VecVT.getScalarSizeInBits(); + const unsigned VectorBitsMin = Subtarget.getRealMinVLen(); + const unsigned MinVLMAX = VectorBitsMin / EltSize; + MVT SmallerVT; + if (MaxIdx < MinVLMAX) + SmallerVT = getLMUL1VT(VecVT); + else if (MaxIdx < MinVLMAX * 2) + SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT(); + else if (MaxIdx < MinVLMAX * 4) + SmallerVT = getLMUL1VT(VecVT) + .getDoubleNumVectorElementsVT() + .getDoubleNumVectorElementsVT(); + if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT)) + return std::nullopt; + return SmallerVT; +} + // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the // first position of a vector, and that vector is slid up to the insert index. // By limiting the active vector length to index+1 and merging with the @@ -6784,6 +7772,43 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); } + // If we know the index we're going to insert at, we can shrink Vec so that + // we're performing the scalar inserts and slideup on a smaller LMUL. + MVT OrigContainerVT = ContainerVT; + SDValue OrigVec = Vec; + SDValue AlignedIdx; + if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) { + const unsigned OrigIdx = IdxC->getZExtValue(); + // Do we know an upper bound on LMUL? + if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx, + DL, DAG, Subtarget)) { + ContainerVT = *ShrunkVT; + AlignedIdx = DAG.getVectorIdxConstant(0, DL); + } + + // If we're compiling for an exact VLEN value, we can always perform + // the insert in m1 as we can determine the register corresponding to + // the index in the register group. + const unsigned MinVLen = Subtarget.getRealMinVLen(); + const unsigned MaxVLen = Subtarget.getRealMaxVLen(); + const MVT M1VT = getLMUL1VT(ContainerVT); + if (MinVLen == MaxVLen && ContainerVT.bitsGT(M1VT)) { + EVT ElemVT = VecVT.getVectorElementType(); + unsigned ElemsPerVReg = MinVLen / ElemVT.getFixedSizeInBits(); + unsigned RemIdx = OrigIdx % ElemsPerVReg; + unsigned SubRegIdx = OrigIdx / ElemsPerVReg; + unsigned ExtractIdx = + SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue(); + AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL); + Idx = DAG.getVectorIdxConstant(RemIdx, DL); + ContainerVT = M1VT; + } + + if (AlignedIdx) + Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec, + AlignedIdx); + } + MVT XLenVT = Subtarget.getXLenVT(); bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64; @@ -6807,7 +7832,13 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, unsigned Opc = VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL; if (isNullConstant(Idx)) { + if (!VecVT.isFloatingPoint()) + Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val); Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL); + + if (AlignedIdx) + Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec, + Vec, AlignedIdx); if (!VecVT.isFixedLengthVector()) return Vec; return convertFromScalableVector(VecVT, Vec, DAG, Subtarget); @@ -6840,6 +7871,10 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, // Bitcast back to the right container type. ValInVec = DAG.getBitcast(ContainerVT, ValInVec); + if (AlignedIdx) + ValInVec = + DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec, + ValInVec, AlignedIdx); if (!VecVT.isFixedLengthVector()) return ValInVec; return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget); @@ -6870,6 +7905,10 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, Policy = RISCVII::TAIL_AGNOSTIC; SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec, Idx, Mask, InsertVL, Policy); + + if (AlignedIdx) + Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec, + Slideup, AlignedIdx); if (!VecVT.isFixedLengthVector()) return Slideup; return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget); @@ -6899,8 +7938,9 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); SDValue Vfirst = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL); - return DAG.getSetCC(DL, XLenVT, Vfirst, DAG.getConstant(0, DL, XLenVT), - ISD::SETEQ); + SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst, + DAG.getConstant(0, DL, XLenVT), ISD::SETEQ); + return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res); } if (VecVT.isFixedLengthVector()) { unsigned NumElts = VecVT.getVectorNumElements(); @@ -6909,7 +7949,7 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, unsigned WidenVecLen; SDValue ExtractElementIdx; SDValue ExtractBitIdx; - unsigned MaxEEW = Subtarget.getELEN(); + unsigned MaxEEW = Subtarget.getELen(); MVT LargestEltVT = MVT::getIntegerVT( std::min(MaxEEW, unsigned(XLenVT.getSizeInBits()))); if (NumElts <= LargestEltVT.getSizeInBits()) { @@ -6938,8 +7978,9 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, // Extract the bit from GPR. SDValue ShiftRight = DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx); - return DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight, - DAG.getConstant(1, DL, XLenVT)); + SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight, + DAG.getConstant(1, DL, XLenVT)); + return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res); } } // Otherwise, promote to an i8 vector and extract from that. @@ -6955,6 +7996,61 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); } + // If we're compiling for an exact VLEN value and we have a known + // constant index, we can always perform the extract in m1 (or + // smaller) as we can determine the register corresponding to + // the index in the register group. + const unsigned MinVLen = Subtarget.getRealMinVLen(); + const unsigned MaxVLen = Subtarget.getRealMaxVLen(); + if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx); + IdxC && MinVLen == MaxVLen && + VecVT.getSizeInBits().getKnownMinValue() > MinVLen) { + MVT M1VT = getLMUL1VT(ContainerVT); + unsigned OrigIdx = IdxC->getZExtValue(); + EVT ElemVT = VecVT.getVectorElementType(); + unsigned ElemsPerVReg = MinVLen / ElemVT.getFixedSizeInBits(); + unsigned RemIdx = OrigIdx % ElemsPerVReg; + unsigned SubRegIdx = OrigIdx / ElemsPerVReg; + unsigned ExtractIdx = + SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue(); + Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec, + DAG.getVectorIdxConstant(ExtractIdx, DL)); + Idx = DAG.getVectorIdxConstant(RemIdx, DL); + ContainerVT = M1VT; + } + + // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which + // contains our index. + std::optional<uint64_t> MaxIdx; + if (VecVT.isFixedLengthVector()) + MaxIdx = VecVT.getVectorNumElements() - 1; + if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) + MaxIdx = IdxC->getZExtValue(); + if (MaxIdx) { + if (auto SmallerVT = + getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) { + ContainerVT = *SmallerVT; + Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec, + DAG.getConstant(0, DL, XLenVT)); + } + } + + // If after narrowing, the required slide is still greater than LMUL2, + // fallback to generic expansion and go through the stack. This is done + // for a subtle reason: extracting *all* elements out of a vector is + // widely expected to be linear in vector size, but because vslidedown + // is linear in LMUL, performing N extracts using vslidedown becomes + // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack + // seems to have the same problem (the store is linear in LMUL), but the + // generic expansion *memoizes* the store, and thus for many extracts of + // the same vector we end up with one store and a bunch of loads. + // TODO: We don't have the same code for insert_vector_elt because we + // have BUILD_VECTOR and handle the degenerate case there. Should we + // consider adding an inverse BUILD_VECTOR node? + MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT(); + if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector()) + return SDValue(); + // If the index is 0, the vector is already in the right position. if (!isNullConstant(Idx)) { // Use a VL of 1 to avoid processing more elements than we need. @@ -7062,16 +8158,8 @@ static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, // Optimize for constant AVL if (isa<ConstantSDNode>(AVL)) { - unsigned EltSize = VT.getScalarSizeInBits(); - unsigned MinSize = VT.getSizeInBits().getKnownMinValue(); - - unsigned VectorBitsMax = Subtarget.getRealMaxVLen(); - unsigned MaxVLMAX = - RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize); - - unsigned VectorBitsMin = Subtarget.getRealMinVLen(); - unsigned MinVLMAX = - RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize); + const auto [MinVLMAX, MaxVLMAX] = + RISCVTargetLowering::computeVLMAXBounds(VT, Subtarget); uint64_t AVLInt = cast<ConstantSDNode>(AVL)->getZExtValue(); if (AVLInt <= MinVLMAX) { @@ -7182,7 +8270,7 @@ static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, // Determine the VF that corresponds to LMUL 1 for ElementWidth. unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth; // We don't support VF==1 with ELEN==32. - unsigned MinVF = RISCV::RVVBitsPerBlock / Subtarget.getELEN(); + unsigned MinVF = RISCV::RVVBitsPerBlock / Subtarget.getELen(); unsigned VF = N->getConstantOperandVal(2); assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) && @@ -7202,7 +8290,39 @@ static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1)); SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT); - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul); + SDValue Res = + DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul); + return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res); +} + +static void getVCIXOperands(SDValue &Op, SelectionDAG &DAG, + SmallVector<SDValue> &Ops) { + SDLoc DL(Op); + + const RISCVSubtarget &Subtarget = + DAG.getMachineFunction().getSubtarget<RISCVSubtarget>(); + for (const SDValue &V : Op->op_values()) { + EVT ValType = V.getValueType(); + if (ValType.isScalableVector() && ValType.isFloatingPoint()) { + MVT InterimIVT = + MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()), + ValType.getVectorElementCount()); + Ops.push_back(DAG.getBitcast(InterimIVT, V)); + } else if (ValType.isFixedLengthVector()) { + MVT OpContainerVT = getContainerForFixedLengthVector( + DAG, V.getSimpleValueType(), Subtarget); + Ops.push_back(convertToScalableVector(OpContainerVT, V, DAG, Subtarget)); + } else + Ops.push_back(V); + } +} + +// LMUL * VLEN should be greater than or equal to EGS * SEW +static inline bool isValidEGW(int EGS, EVT VT, + const RISCVSubtarget &Subtarget) { + return (Subtarget.getRealMinVLen() * + VT.getSizeInBits().getKnownMinValue()) / RISCV::RVVBitsPerBlock >= + EGS * VT.getScalarSizeInBits(); } SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, @@ -7238,12 +8358,30 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break; } + if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) { + SDValue NewOp = + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1)); + SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp); + return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res); + } + return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1)); } case Intrinsic::riscv_sm4ks: case Intrinsic::riscv_sm4ed: { unsigned Opc = IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED; + + if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) { + SDValue NewOp0 = + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1)); + SDValue NewOp1 = + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2)); + SDValue Res = + DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, Op.getOperand(3)); + return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res); + } + return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); } @@ -7254,20 +8392,43 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1)); } case Intrinsic::riscv_clmul: + if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) { + SDValue NewOp0 = + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1)); + SDValue NewOp1 = + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2)); + SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1); + return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res); + } return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1), Op.getOperand(2)); case Intrinsic::riscv_clmulh: - return DAG.getNode(RISCVISD::CLMULH, DL, XLenVT, Op.getOperand(1), - Op.getOperand(2)); - case Intrinsic::riscv_clmulr: - return DAG.getNode(RISCVISD::CLMULR, DL, XLenVT, Op.getOperand(1), - Op.getOperand(2)); + case Intrinsic::riscv_clmulr: { + unsigned Opc = + IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR; + if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) { + SDValue NewOp0 = + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1)); + SDValue NewOp1 = + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2)); + NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, + DAG.getConstant(32, DL, MVT::i64)); + NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1, + DAG.getConstant(32, DL, MVT::i64)); + SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1); + Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res, + DAG.getConstant(32, DL, MVT::i64)); + return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res); + } + + return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2)); + } case Intrinsic::experimental_get_vector_length: return lowerGetVectorLength(Op.getNode(), DAG, Subtarget); - case Intrinsic::riscv_vmv_x_s: - assert(Op.getValueType() == XLenVT && "Unexpected VT!"); - return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(), - Op.getOperand(1)); + case Intrinsic::riscv_vmv_x_s: { + SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1)); + return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res); + } case Intrinsic::riscv_vfmv_f_s: return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(), Op.getOperand(1), DAG.getConstant(0, DL, XLenVT)); @@ -7325,6 +8486,86 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal, Vec, VL); } + // EGS * EEW >= 128 bits + case Intrinsic::riscv_vaesdf_vv: + case Intrinsic::riscv_vaesdf_vs: + case Intrinsic::riscv_vaesdm_vv: + case Intrinsic::riscv_vaesdm_vs: + case Intrinsic::riscv_vaesef_vv: + case Intrinsic::riscv_vaesef_vs: + case Intrinsic::riscv_vaesem_vv: + case Intrinsic::riscv_vaesem_vs: + case Intrinsic::riscv_vaeskf1: + case Intrinsic::riscv_vaeskf2: + case Intrinsic::riscv_vaesz_vs: + case Intrinsic::riscv_vsm4k: + case Intrinsic::riscv_vsm4r_vv: + case Intrinsic::riscv_vsm4r_vs: { + if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) || + !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) || + !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget)) + report_fatal_error("EGW should be greater than or equal to 4 * SEW."); + return Op; + } + // EGS * EEW >= 256 bits + case Intrinsic::riscv_vsm3c: + case Intrinsic::riscv_vsm3me: { + if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) || + !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget)) + report_fatal_error("EGW should be greater than or equal to 8 * SEW."); + return Op; + } + // zvknha(SEW=32)/zvknhb(SEW=[32|64]) + case Intrinsic::riscv_vsha2ch: + case Intrinsic::riscv_vsha2cl: + case Intrinsic::riscv_vsha2ms: { + if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 && + !Subtarget.hasStdExtZvknhb()) + report_fatal_error("SEW=64 needs Zvknhb to be enabled."); + if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) || + !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) || + !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget)) + report_fatal_error("EGW should be greater than or equal to 4 * SEW."); + return Op; + } + case Intrinsic::riscv_sf_vc_v_x: + case Intrinsic::riscv_sf_vc_v_i: + case Intrinsic::riscv_sf_vc_v_xv: + case Intrinsic::riscv_sf_vc_v_iv: + case Intrinsic::riscv_sf_vc_v_vv: + case Intrinsic::riscv_sf_vc_v_fv: + case Intrinsic::riscv_sf_vc_v_xvv: + case Intrinsic::riscv_sf_vc_v_ivv: + case Intrinsic::riscv_sf_vc_v_vvv: + case Intrinsic::riscv_sf_vc_v_fvv: + case Intrinsic::riscv_sf_vc_v_xvw: + case Intrinsic::riscv_sf_vc_v_ivw: + case Intrinsic::riscv_sf_vc_v_vvw: + case Intrinsic::riscv_sf_vc_v_fvw: { + MVT VT = Op.getSimpleValueType(); + + SmallVector<SDValue> Ops; + getVCIXOperands(Op, DAG, Ops); + + MVT RetVT = VT; + if (VT.isFixedLengthVector()) + RetVT = getContainerForFixedLengthVector(VT); + else if (VT.isFloatingPoint()) + RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()), + VT.getVectorElementCount()); + + SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Ops); + + if (VT.isFixedLengthVector()) + NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget); + else if (VT.isFloatingPoint()) + NewNode = DAG.getBitcast(VT, NewNode); + + if (Op == NewNode) + break; + + return NewNode; + } } return lowerVectorIntrinsicScalars(Op, DAG, Subtarget); @@ -7425,7 +8666,8 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, MVT VT = Op->getSimpleValueType(0); MVT ContainerVT = getContainerForFixedLengthVector(VT); - SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget); + SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, + Subtarget); SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT); auto *Load = cast<MemIntrinsicSDNode>(Op); SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT); @@ -7445,6 +8687,49 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Results.push_back(Result.getValue(NF)); return DAG.getMergeValues(Results, DL); } + case Intrinsic::riscv_sf_vc_v_x_se: + case Intrinsic::riscv_sf_vc_v_i_se: + case Intrinsic::riscv_sf_vc_v_xv_se: + case Intrinsic::riscv_sf_vc_v_iv_se: + case Intrinsic::riscv_sf_vc_v_vv_se: + case Intrinsic::riscv_sf_vc_v_fv_se: + case Intrinsic::riscv_sf_vc_v_xvv_se: + case Intrinsic::riscv_sf_vc_v_ivv_se: + case Intrinsic::riscv_sf_vc_v_vvv_se: + case Intrinsic::riscv_sf_vc_v_fvv_se: + case Intrinsic::riscv_sf_vc_v_xvw_se: + case Intrinsic::riscv_sf_vc_v_ivw_se: + case Intrinsic::riscv_sf_vc_v_vvw_se: + case Intrinsic::riscv_sf_vc_v_fvw_se: { + MVT VT = Op.getSimpleValueType(); + SDLoc DL(Op); + SmallVector<SDValue> Ops; + getVCIXOperands(Op, DAG, Ops); + + MVT RetVT = VT; + if (VT.isFixedLengthVector()) + RetVT = getContainerForFixedLengthVector(VT); + else if (VT.isFloatingPoint()) + RetVT = MVT::getVectorVT(MVT::getIntegerVT(RetVT.getScalarSizeInBits()), + RetVT.getVectorElementCount()); + + SDVTList VTs = DAG.getVTList({RetVT, MVT::Other}); + SDValue NewNode = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops); + + if (VT.isFixedLengthVector()) { + SDValue FixedVector = + convertFromScalableVector(VT, NewNode, DAG, Subtarget); + NewNode = DAG.getMergeValues({FixedVector, NewNode.getValue(1)}, DL); + } else if (VT.isFloatingPoint()) { + SDValue BitCast = DAG.getBitcast(VT, NewNode.getValue(0)); + NewNode = DAG.getMergeValues({BitCast, NewNode.getValue(1)}, DL); + } + + if (Op == NewNode) + break; + + return NewNode; + } } return lowerVectorIntrinsicScalars(Op, DAG, Subtarget); @@ -7517,7 +8802,8 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op, MVT VT = Op->getOperand(2).getSimpleValueType(); MVT ContainerVT = getContainerForFixedLengthVector(VT); - SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget); + SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, + Subtarget); SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT); SDValue Ptr = Op->getOperand(NF + 2); @@ -7532,6 +8818,73 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op, ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops, FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand()); } + case Intrinsic::riscv_sf_vc_x_se_e8mf8: + case Intrinsic::riscv_sf_vc_x_se_e8mf4: + case Intrinsic::riscv_sf_vc_x_se_e8mf2: + case Intrinsic::riscv_sf_vc_x_se_e8m1: + case Intrinsic::riscv_sf_vc_x_se_e8m2: + case Intrinsic::riscv_sf_vc_x_se_e8m4: + case Intrinsic::riscv_sf_vc_x_se_e8m8: + case Intrinsic::riscv_sf_vc_x_se_e16mf4: + case Intrinsic::riscv_sf_vc_x_se_e16mf2: + case Intrinsic::riscv_sf_vc_x_se_e16m1: + case Intrinsic::riscv_sf_vc_x_se_e16m2: + case Intrinsic::riscv_sf_vc_x_se_e16m4: + case Intrinsic::riscv_sf_vc_x_se_e16m8: + case Intrinsic::riscv_sf_vc_x_se_e32mf2: + case Intrinsic::riscv_sf_vc_x_se_e32m1: + case Intrinsic::riscv_sf_vc_x_se_e32m2: + case Intrinsic::riscv_sf_vc_x_se_e32m4: + case Intrinsic::riscv_sf_vc_x_se_e32m8: + case Intrinsic::riscv_sf_vc_x_se_e64m1: + case Intrinsic::riscv_sf_vc_x_se_e64m2: + case Intrinsic::riscv_sf_vc_x_se_e64m4: + case Intrinsic::riscv_sf_vc_x_se_e64m8: + case Intrinsic::riscv_sf_vc_i_se_e8mf8: + case Intrinsic::riscv_sf_vc_i_se_e8mf4: + case Intrinsic::riscv_sf_vc_i_se_e8mf2: + case Intrinsic::riscv_sf_vc_i_se_e8m1: + case Intrinsic::riscv_sf_vc_i_se_e8m2: + case Intrinsic::riscv_sf_vc_i_se_e8m4: + case Intrinsic::riscv_sf_vc_i_se_e8m8: + case Intrinsic::riscv_sf_vc_i_se_e16mf4: + case Intrinsic::riscv_sf_vc_i_se_e16mf2: + case Intrinsic::riscv_sf_vc_i_se_e16m1: + case Intrinsic::riscv_sf_vc_i_se_e16m2: + case Intrinsic::riscv_sf_vc_i_se_e16m4: + case Intrinsic::riscv_sf_vc_i_se_e16m8: + case Intrinsic::riscv_sf_vc_i_se_e32mf2: + case Intrinsic::riscv_sf_vc_i_se_e32m1: + case Intrinsic::riscv_sf_vc_i_se_e32m2: + case Intrinsic::riscv_sf_vc_i_se_e32m4: + case Intrinsic::riscv_sf_vc_i_se_e32m8: + case Intrinsic::riscv_sf_vc_i_se_e64m1: + case Intrinsic::riscv_sf_vc_i_se_e64m2: + case Intrinsic::riscv_sf_vc_i_se_e64m4: + case Intrinsic::riscv_sf_vc_i_se_e64m8: + case Intrinsic::riscv_sf_vc_xv_se: + case Intrinsic::riscv_sf_vc_iv_se: + case Intrinsic::riscv_sf_vc_vv_se: + case Intrinsic::riscv_sf_vc_fv_se: + case Intrinsic::riscv_sf_vc_xvv_se: + case Intrinsic::riscv_sf_vc_ivv_se: + case Intrinsic::riscv_sf_vc_vvv_se: + case Intrinsic::riscv_sf_vc_fvv_se: + case Intrinsic::riscv_sf_vc_xvw_se: + case Intrinsic::riscv_sf_vc_ivw_se: + case Intrinsic::riscv_sf_vc_vvw_se: + case Intrinsic::riscv_sf_vc_fvw_se: { + SmallVector<SDValue> Ops; + getVCIXOperands(Op, DAG, Ops); + + SDValue NewNode = + DAG.getNode(ISD::INTRINSIC_VOID, SDLoc(Op), Op->getVTList(), Ops); + + if (Op == NewNode) + break; + + return NewNode; + } } return lowerVectorIntrinsicScalars(Op, DAG, Subtarget); @@ -7541,23 +8894,40 @@ static unsigned getRVVReductionOp(unsigned ISDOpcode) { switch (ISDOpcode) { default: llvm_unreachable("Unhandled reduction"); + case ISD::VP_REDUCE_ADD: case ISD::VECREDUCE_ADD: return RISCVISD::VECREDUCE_ADD_VL; + case ISD::VP_REDUCE_UMAX: case ISD::VECREDUCE_UMAX: return RISCVISD::VECREDUCE_UMAX_VL; + case ISD::VP_REDUCE_SMAX: case ISD::VECREDUCE_SMAX: return RISCVISD::VECREDUCE_SMAX_VL; + case ISD::VP_REDUCE_UMIN: case ISD::VECREDUCE_UMIN: return RISCVISD::VECREDUCE_UMIN_VL; + case ISD::VP_REDUCE_SMIN: case ISD::VECREDUCE_SMIN: return RISCVISD::VECREDUCE_SMIN_VL; + case ISD::VP_REDUCE_AND: case ISD::VECREDUCE_AND: return RISCVISD::VECREDUCE_AND_VL; + case ISD::VP_REDUCE_OR: case ISD::VECREDUCE_OR: return RISCVISD::VECREDUCE_OR_VL; + case ISD::VP_REDUCE_XOR: case ISD::VECREDUCE_XOR: return RISCVISD::VECREDUCE_XOR_VL; + case ISD::VP_REDUCE_FADD: + return RISCVISD::VECREDUCE_FADD_VL; + case ISD::VP_REDUCE_SEQ_FADD: + return RISCVISD::VECREDUCE_SEQ_FADD_VL; + case ISD::VP_REDUCE_FMAX: + return RISCVISD::VECREDUCE_FMAX_VL; + case ISD::VP_REDUCE_FMIN: + return RISCVISD::VECREDUCE_FMIN_VL; } + } SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op, @@ -7575,8 +8945,6 @@ SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op, "Unexpected reduction lowering"); MVT XLenVT = Subtarget.getXLenVT(); - assert(Op.getValueType() == XLenVT && - "Expected reduction output to be legalized to XLenVT"); MVT ContainerVT = VecVT; if (VecVT.isFixedLengthVector()) { @@ -7630,6 +8998,7 @@ SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op, } SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC); + SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC); if (!IsVP) return SetCC; @@ -7640,7 +9009,7 @@ SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op, // 0 for an inactive vector, and so we've already received the neutral value: // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we // can simply include the start value. - return DAG.getNode(BaseOpc, DL, XLenVT, SetCC, Op.getOperand(0)); + return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0)); } static bool isNonZeroAVL(SDValue AVL) { @@ -7716,9 +9085,19 @@ SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op, auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); - SDValue NeutralElem = - DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags()); - return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), NeutralElem, Vec, + SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags()); + switch (BaseOpc) { + case ISD::AND: + case ISD::OR: + case ISD::UMAX: + case ISD::UMIN: + case ISD::SMAX: + case ISD::SMIN: + MVT XLenVT = Subtarget.getXLenVT(); + StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec, + DAG.getConstant(0, DL, XLenVT)); + } + return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec, Mask, VL, DL, DAG, Subtarget); } @@ -7726,11 +9105,11 @@ SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op, // the vector SDValue and the scalar SDValue required to lower this to a // RISCVISD node. static std::tuple<unsigned, SDValue, SDValue> -getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) { +getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, + const RISCVSubtarget &Subtarget) { SDLoc DL(Op); auto Flags = Op->getFlags(); unsigned Opcode = Op.getOpcode(); - unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Opcode); switch (Opcode) { default: llvm_unreachable("Unhandled reduction"); @@ -7744,11 +9123,16 @@ getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) { return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1), Op.getOperand(0)); case ISD::VECREDUCE_FMIN: - return std::make_tuple(RISCVISD::VECREDUCE_FMIN_VL, Op.getOperand(0), - DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags)); - case ISD::VECREDUCE_FMAX: - return std::make_tuple(RISCVISD::VECREDUCE_FMAX_VL, Op.getOperand(0), - DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags)); + case ISD::VECREDUCE_FMAX: { + MVT XLenVT = Subtarget.getXLenVT(); + SDValue Front = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0), + DAG.getConstant(0, DL, XLenVT)); + unsigned RVVOpc = (Opcode == ISD::VECREDUCE_FMIN) + ? RISCVISD::VECREDUCE_FMIN_VL + : RISCVISD::VECREDUCE_FMAX_VL; + return std::make_tuple(RVVOpc, Op.getOperand(0), Front); + } } } @@ -7760,7 +9144,7 @@ SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op, unsigned RVVOpcode; SDValue VectorVal, ScalarVal; std::tie(RVVOpcode, VectorVal, ScalarVal) = - getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT); + getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget); MVT VecVT = VectorVal.getSimpleValueType(); MVT ContainerVT = VecVT; @@ -7774,37 +9158,6 @@ SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op, VectorVal, Mask, VL, DL, DAG, Subtarget); } -static unsigned getRVVVPReductionOp(unsigned ISDOpcode) { - switch (ISDOpcode) { - default: - llvm_unreachable("Unhandled reduction"); - case ISD::VP_REDUCE_ADD: - return RISCVISD::VECREDUCE_ADD_VL; - case ISD::VP_REDUCE_UMAX: - return RISCVISD::VECREDUCE_UMAX_VL; - case ISD::VP_REDUCE_SMAX: - return RISCVISD::VECREDUCE_SMAX_VL; - case ISD::VP_REDUCE_UMIN: - return RISCVISD::VECREDUCE_UMIN_VL; - case ISD::VP_REDUCE_SMIN: - return RISCVISD::VECREDUCE_SMIN_VL; - case ISD::VP_REDUCE_AND: - return RISCVISD::VECREDUCE_AND_VL; - case ISD::VP_REDUCE_OR: - return RISCVISD::VECREDUCE_OR_VL; - case ISD::VP_REDUCE_XOR: - return RISCVISD::VECREDUCE_XOR_VL; - case ISD::VP_REDUCE_FADD: - return RISCVISD::VECREDUCE_FADD_VL; - case ISD::VP_REDUCE_SEQ_FADD: - return RISCVISD::VECREDUCE_SEQ_FADD_VL; - case ISD::VP_REDUCE_FMAX: - return RISCVISD::VECREDUCE_FMAX_VL; - case ISD::VP_REDUCE_FMIN: - return RISCVISD::VECREDUCE_FMIN_VL; - } -} - SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); @@ -7817,7 +9170,7 @@ SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op, return SDValue(); MVT VecVT = VecEVT.getSimpleVT(); - unsigned RVVOpcode = getRVVVPReductionOp(Op.getOpcode()); + unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode()); if (VecVT.isFixedLengthVector()) { auto ContainerVT = getContainerForFixedLengthVector(VecVT); @@ -7892,19 +9245,24 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op, ContainerVT = getContainerForFixedLengthVector(VecVT); Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); } - SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, - DAG.getUNDEF(ContainerVT), SubVec, - DAG.getConstant(0, DL, XLenVT)); + if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) { + SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, + DAG.getUNDEF(ContainerVT), SubVec, + DAG.getConstant(0, DL, XLenVT)); SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget); return DAG.getBitcast(Op.getValueType(), SubVec); } + + SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, + DAG.getUNDEF(ContainerVT), SubVec, + DAG.getConstant(0, DL, XLenVT)); SDValue Mask = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; // Set the vector length to only the number of elements we care about. Note // that for slideup this includes the offset. unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements(); - SDValue VL = getVLOp(EndIndex, DL, DAG, Subtarget); + SDValue VL = getVLOp(EndIndex, ContainerVT, DL, DAG, Subtarget); // Use tail agnostic policy if we're inserting over Vec's tail. unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED; @@ -8051,26 +9409,38 @@ SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op, } } + // With an index of 0 this is a cast-like subvector, which can be performed + // with subregister operations. + if (OrigIdx == 0) + return Op; + // If the subvector vector is a fixed-length type, we cannot use subregister // manipulation to simplify the codegen; we don't know which register of a // LMUL group contains the specific subvector as we only know the minimum // register size. Therefore we must slide the vector group down the full // amount. if (SubVecVT.isFixedLengthVector()) { - // With an index of 0 this is a cast-like subvector, which can be performed - // with subregister operations. - if (OrigIdx == 0) - return Op; MVT ContainerVT = VecVT; if (VecVT.isFixedLengthVector()) { ContainerVT = getContainerForFixedLengthVector(VecVT); Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); } + + // Shrink down Vec so we're performing the slidedown on a smaller LMUL. + unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1; + if (auto ShrunkVT = + getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) { + ContainerVT = *ShrunkVT; + Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec, + DAG.getVectorIdxConstant(0, DL)); + } + SDValue Mask = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; // Set the vector length to only the number of elements we care about. This // avoids sliding down elements we're going to discard straight away. - SDValue VL = getVLOp(SubVecVT.getVectorNumElements(), DL, DAG, Subtarget); + SDValue VL = getVLOp(SubVecVT.getVectorNumElements(), ContainerVT, DL, DAG, + Subtarget); SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT); SDValue Slidedown = getVSlidedown(DAG, Subtarget, DL, ContainerVT, @@ -8092,17 +9462,18 @@ SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op, if (RemIdx == 0) return Op; - // Else we must shift our vector register directly to extract the subvector. - // Do this using VSLIDEDOWN. + // Else SubVecVT is a fractional LMUL and may need to be slid down. + assert(RISCVVType::decodeVLMUL(getLMUL(SubVecVT)).second); // If the vector type is an LMUL-group type, extract a subvector equal to the - // nearest full vector register type. This should resolve to a EXTRACT_SUBREG - // instruction. + // nearest full vector register type. MVT InterSubVT = VecVT; if (VecVT.bitsGT(getLMUL1VT(VecVT))) { + // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and + // we should have successfully decomposed the extract into a subregister. + assert(SubRegIdx != RISCV::NoSubRegister); InterSubVT = getLMUL1VT(VecVT); - Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec, - DAG.getConstant(OrigIdx - RemIdx, DL, XLenVT)); + Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, InterSubVT, Vec); } // Slide this vector register down by the desired number of elements in order @@ -8200,7 +9571,7 @@ SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op, // We can deinterleave through vnsrl.wi if the element type is smaller than // ELEN - if (VecVT.getScalarSizeInBits() < Subtarget.getELEN()) { + if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) { SDValue Even = getDeinterleaveViaVNSRL(DL, VecVT, Concat, true, Subtarget, DAG); SDValue Odd = @@ -8269,7 +9640,7 @@ SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op, // If the element type is smaller than ELEN, then we can interleave with // vwaddu.vv and vwmaccu.vx - if (VecVT.getScalarSizeInBits() < Subtarget.getELEN()) { + if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) { Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL, DAG, Subtarget); } else { @@ -8476,7 +9847,20 @@ RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op, MVT XLenVT = Subtarget.getXLenVT(); MVT ContainerVT = getContainerForFixedLengthVector(VT); - SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget); + // If we know the exact VLEN and our fixed length vector completely fills + // the container, use a whole register load instead. + const auto [MinVLMAX, MaxVLMAX] = + RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget); + if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() && + getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) { + SDValue NewLoad = + DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(), + Load->getMemOperand()); + SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget); + return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL); + } + + SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, Subtarget); bool IsMaskOp = VT.getVectorElementType() == MVT::i1; SDValue IntID = DAG.getTargetConstant( @@ -8520,11 +9904,22 @@ RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op, MVT ContainerVT = getContainerForFixedLengthVector(VT); - SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget); - SDValue NewValue = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget); + + // If we know the exact VLEN and our fixed length vector completely fills + // the container, use a whole register store instead. + const auto [MinVLMAX, MaxVLMAX] = + RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget); + if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() && + getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) + return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(), + Store->getMemOperand()); + + SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, + Subtarget); + bool IsMaskOp = VT.getVectorElementType() == MVT::i1; SDValue IntID = DAG.getTargetConstant( IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT); @@ -8902,9 +10297,10 @@ SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, // * The EVL operand is promoted from i32 to i64 on RV64. // * Fixed-length vectors are converted to their scalable-vector container // types. -SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG, - unsigned RISCVISDOpc, - bool HasMergeOp) const { +SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const { + unsigned RISCVISDOpc = getRISCVVLOp(Op); + bool HasMergeOp = hasMergeOp(RISCVISDOpc); + SDLoc DL(Op); MVT VT = Op.getSimpleValueType(); SmallVector<SDValue, 4> Ops; @@ -9053,13 +10449,14 @@ SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op, } // Lower Floating-Point/Integer Type-Convert VP SDNodes -SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG, - unsigned RISCVISDOpc) const { +SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op, + SelectionDAG &DAG) const { SDLoc DL(Op); SDValue Src = Op.getOperand(0); SDValue Mask = Op.getOperand(1); SDValue VL = Op.getOperand(2); + unsigned RISCVISDOpc = getRISCVVLOp(Op); MVT DstVT = Op.getSimpleValueType(); MVT SrcVT = Src.getSimpleValueType(); @@ -9185,12 +10582,132 @@ SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG, return convertFromScalableVector(VT, Result, DAG, Subtarget); } -SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op, SelectionDAG &DAG, - unsigned MaskOpc, - unsigned VecOpc) const { +SDValue +RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + MVT VT = Op.getSimpleValueType(); + MVT XLenVT = Subtarget.getXLenVT(); + + SDValue Op1 = Op.getOperand(0); + SDValue Mask = Op.getOperand(1); + SDValue EVL = Op.getOperand(2); + + MVT ContainerVT = VT; + if (VT.isFixedLengthVector()) { + ContainerVT = getContainerForFixedLengthVector(VT); + Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget); + MVT MaskVT = getMaskTypeFor(ContainerVT); + Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); + } + + MVT GatherVT = ContainerVT; + MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger(); + // Check if we are working with mask vectors + bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1; + if (IsMaskVector) { + GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8); + + // Expand input operand + SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT, + DAG.getUNDEF(IndicesVT), + DAG.getConstant(1, DL, XLenVT), EVL); + SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT, + DAG.getUNDEF(IndicesVT), + DAG.getConstant(0, DL, XLenVT), EVL); + Op1 = DAG.getNode(RISCVISD::VSELECT_VL, DL, IndicesVT, Op1, SplatOne, + SplatZero, EVL); + } + + unsigned EltSize = GatherVT.getScalarSizeInBits(); + unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue(); + unsigned VectorBitsMax = Subtarget.getRealMaxVLen(); + unsigned MaxVLMAX = + RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize); + + unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL; + // If this is SEW=8 and VLMAX is unknown or more than 256, we need + // to use vrgatherei16.vv. + // TODO: It's also possible to use vrgatherei16.vv for other types to + // decrease register width for the index calculation. + // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16. + if (MaxVLMAX > 256 && EltSize == 8) { + // If this is LMUL=8, we have to split before using vrgatherei16.vv. + // Split the vector in half and reverse each half using a full register + // reverse. + // Swap the halves and concatenate them. + // Slide the concatenated result by (VLMax - VL). + if (MinSize == (8 * RISCV::RVVBitsPerBlock)) { + auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT); + auto [Lo, Hi] = DAG.SplitVector(Op1, DL); + + SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo); + SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi); + + // Reassemble the low and high pieces reversed. + // NOTE: this Result is unmasked (because we do not need masks for + // shuffles). If in the future this has to change, we can use a SELECT_VL + // between Result and UNDEF using the mask originally passed to VP_REVERSE + SDValue Result = + DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev); + + // Slide off any elements from past EVL that were reversed into the low + // elements. + unsigned MinElts = GatherVT.getVectorMinNumElements(); + SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT, + DAG.getConstant(MinElts, DL, XLenVT)); + SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL); + + Result = getVSlidedown(DAG, Subtarget, DL, GatherVT, + DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL); + + if (IsMaskVector) { + // Truncate Result back to a mask vector + Result = + DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT, + {Result, DAG.getConstant(0, DL, GatherVT), + DAG.getCondCode(ISD::SETNE), + DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL}); + } + + if (!VT.isFixedLengthVector()) + return Result; + return convertFromScalableVector(VT, Result, DAG, Subtarget); + } + + // Just promote the int type to i16 which will double the LMUL. + IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount()); + GatherOpc = RISCVISD::VRGATHEREI16_VV_VL; + } + + SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL); + SDValue VecLen = + DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT)); + SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT, + DAG.getUNDEF(IndicesVT), VecLen, EVL); + SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID, + DAG.getUNDEF(IndicesVT), Mask, EVL); + SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB, + DAG.getUNDEF(GatherVT), Mask, EVL); + + if (IsMaskVector) { + // Truncate Result back to a mask vector + Result = DAG.getNode( + RISCVISD::SETCC_VL, DL, ContainerVT, + {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE), + DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL}); + } + + if (!VT.isFixedLengthVector()) + return Result; + return convertFromScalableVector(VT, Result, DAG, Subtarget); +} + +SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op, + SelectionDAG &DAG) const { MVT VT = Op.getSimpleValueType(); if (VT.getVectorElementType() != MVT::i1) - return lowerVPOp(Op, DAG, VecOpc, true); + return lowerVPOp(Op, DAG); // It is safe to drop mask parameter as masked-off elements are undef. SDValue Op1 = Op->getOperand(0); @@ -9206,7 +10723,7 @@ SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op, SelectionDAG &DAG, } SDLoc DL(Op); - SDValue Val = DAG.getNode(MaskOpc, DL, ContainerVT, Op1, Op2, VL); + SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL); if (!IsFixed) return Val; return convertFromScalableVector(VT, Val, DAG, Subtarget); @@ -9366,10 +10883,7 @@ SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op, if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) { IndexVT = IndexVT.changeVectorElementType(XLenVT); - SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, Mask.getValueType(), - VL); - Index = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, IndexVT, Index, - TrueMask, VL); + Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index); } unsigned IntID = @@ -9468,10 +10982,7 @@ SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op, if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) { IndexVT = IndexVT.changeVectorElementType(XLenVT); - SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, Mask.getValueType(), - VL); - Index = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, IndexVT, Index, - TrueMask, VL); + Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index); } unsigned IntID = @@ -9539,6 +11050,8 @@ SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op, (RISCVFPRndMode::RUP << 4 * int(RoundingMode::TowardPositive)) | (RISCVFPRndMode::RMM << 4 * int(RoundingMode::NearestTiesToAway)); + RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue); + SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue, DAG.getConstant(2, DL, XLenVT)); SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT, @@ -9653,8 +11166,11 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(Res.getValue(1)); return; } - // In absense of Zfh, promote f16 to f32, then convert. - if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) + // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then + // convert. + if ((Op0.getValueType() == MVT::f16 && + !Subtarget.hasStdExtZfhOrZhinx()) || + Op0.getValueType() == MVT::bf16) Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0); unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; @@ -10281,6 +11797,136 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, } } +/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP +/// which corresponds to it. +static unsigned getVecReduceOpcode(unsigned Opc) { + switch (Opc) { + default: + llvm_unreachable("Unhandled binary to transfrom reduction"); + case ISD::ADD: + return ISD::VECREDUCE_ADD; + case ISD::UMAX: + return ISD::VECREDUCE_UMAX; + case ISD::SMAX: + return ISD::VECREDUCE_SMAX; + case ISD::UMIN: + return ISD::VECREDUCE_UMIN; + case ISD::SMIN: + return ISD::VECREDUCE_SMIN; + case ISD::AND: + return ISD::VECREDUCE_AND; + case ISD::OR: + return ISD::VECREDUCE_OR; + case ISD::XOR: + return ISD::VECREDUCE_XOR; + case ISD::FADD: + // Note: This is the associative form of the generic reduction opcode. + return ISD::VECREDUCE_FADD; + } +} + +/// Perform two related transforms whose purpose is to incrementally recognize +/// an explode_vector followed by scalar reduction as a vector reduction node. +/// This exists to recover from a deficiency in SLP which can't handle +/// forests with multiple roots sharing common nodes. In some cases, one +/// of the trees will be vectorized, and the other will remain (unprofitably) +/// scalarized. +static SDValue +combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + + // This transforms need to run before all integer types have been legalized + // to i64 (so that the vector element type matches the add type), and while + // it's safe to introduce odd sized vector types. + if (DAG.NewNodesMustHaveLegalTypes) + return SDValue(); + + // Without V, this transform isn't useful. We could form the (illegal) + // operations and let them be scalarized again, but there's really no point. + if (!Subtarget.hasVInstructions()) + return SDValue(); + + const SDLoc DL(N); + const EVT VT = N->getValueType(0); + const unsigned Opc = N->getOpcode(); + + // For FADD, we only handle the case with reassociation allowed. We + // could handle strict reduction order, but at the moment, there's no + // known reason to, and the complexity isn't worth it. + // TODO: Handle fminnum and fmaxnum here + if (!VT.isInteger() && + (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation())) + return SDValue(); + + const unsigned ReduceOpc = getVecReduceOpcode(Opc); + assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) && + "Inconsistent mappings"); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + + if (!LHS.hasOneUse() || !RHS.hasOneUse()) + return SDValue(); + + if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT) + std::swap(LHS, RHS); + + if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + !isa<ConstantSDNode>(RHS.getOperand(1))) + return SDValue(); + + uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue(); + SDValue SrcVec = RHS.getOperand(0); + EVT SrcVecVT = SrcVec.getValueType(); + assert(SrcVecVT.getVectorElementType() == VT); + if (SrcVecVT.isScalableVector()) + return SDValue(); + + if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen()) + return SDValue(); + + // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to + // reduce_op (extract_subvector [2 x VT] from V). This will form the + // root of our reduction tree. TODO: We could extend this to any two + // adjacent aligned constant indices if desired. + if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) { + uint64_t LHSIdx = + cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue(); + if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) { + EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2); + SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec, + DAG.getVectorIdxConstant(0, DL)); + return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags()); + } + } + + // Match (binop (reduce (extract_subvector V, 0), + // (extract_vector_elt V, sizeof(SubVec)))) + // into a reduction of one more element from the original vector V. + if (LHS.getOpcode() != ReduceOpc) + return SDValue(); + + SDValue ReduceVec = LHS.getOperand(0); + if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR && + ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) && + isNullConstant(ReduceVec.getOperand(1)) && + ReduceVec.getValueType().getVectorNumElements() == RHSIdx) { + // For illegal types (e.g. 3xi32), most will be combined again into a + // wider (hopefully legal) type. If this is a terminal state, we are + // relying on type legalization here to produce something reasonable + // and this lowering quality could probably be improved. (TODO) + EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1); + SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec, + DAG.getVectorIdxConstant(0, DL)); + auto Flags = ReduceVec->getFlags(); + Flags.intersectWith(N->getFlags()); + return DAG.getNode(ReduceOpc, DL, VT, Vec, Flags); + } + + return SDValue(); +} + + // Try to fold (<bop> x, (reduction.<bop> vec, start)) static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { @@ -10453,8 +12099,23 @@ static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, if (VT.isVector()) return SDValue(); - if (!Subtarget.hasShortForwardBranchOpt() || - (Slct.getOpcode() != ISD::SELECT && + if (!Subtarget.hasShortForwardBranchOpt()) { + // (select cond, x, (and x, c)) has custom lowering with Zicond. + if ((!Subtarget.hasStdExtZicond() && + !Subtarget.hasVendorXVentanaCondOps()) || + N->getOpcode() != ISD::AND) + return SDValue(); + + // Maybe harmful when condition code has multiple use. + if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse()) + return SDValue(); + + // Maybe harmful when VT is wider than XLen. + if (VT.getSizeInBits() > Subtarget.getXLen()) + return SDValue(); + } + + if ((Slct.getOpcode() != ISD::SELECT && Slct.getOpcode() != RISCVISD::SELECT_CC) || !Slct.hasOneUse()) return SDValue(); @@ -10573,7 +12234,7 @@ static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT)); } -// Try to turn (add (xor (setcc X, Y), 1) -1) into (neg (setcc X, Y)). +// Try to turn (add (xor bool, 1) -1) into (neg bool). static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -10584,9 +12245,13 @@ static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG) { if (!isAllOnesConstant(N1)) return SDValue(); - // Look for an (xor (setcc X, Y), 1). - if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)) || - N0.getOperand(0).getOpcode() != ISD::SETCC) + // Look for (xor X, 1). + if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1))) + return SDValue(); + + // First xor input should be 0 or 1. + APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1); + if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask)) return SDValue(); // Emit a negate of the setcc. @@ -10604,6 +12269,9 @@ static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, return V; if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) return V; + if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) + return V; + // fold (add (select lhs, rhs, cc, 0, y), x) -> // (select lhs, rhs, cc, x, (add x, y)) return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); @@ -10732,7 +12400,7 @@ static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, // shift amounts larger than 31 would produce poison. If we wait until // type legalization, we'll create RISCVISD::SRLW and we can't recover it // to use a BEXT instruction. - if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 && + if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 && N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) { SDLoc DL(N0); @@ -10759,7 +12427,7 @@ static SDValue performANDCombine(SDNode *N, // shift amounts larger than 31 would produce poison. If we wait until // type legalization, we'll create RISCVISD::SRLW and we can't recover it // to use a BEXT instruction. - if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && + if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) && N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) { @@ -10774,6 +12442,8 @@ static SDValue performANDCombine(SDNode *N, if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) return V; + if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) + return V; if (DCI.isAfterLegalizeDAG()) if (SDValue V = combineDeMorganOfBoolean(N, DAG)) @@ -10784,17 +12454,64 @@ static SDValue performANDCombine(SDNode *N, return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget); } +// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez. +// FIXME: Generalize to other binary operators with same operand. +static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, + SelectionDAG &DAG) { + assert(N->getOpcode() == ISD::OR && "Unexpected opcode"); + + if (N0.getOpcode() != RISCVISD::CZERO_EQZ || + N1.getOpcode() != RISCVISD::CZERO_NEZ || + !N0.hasOneUse() || !N1.hasOneUse()) + return SDValue(); + + // Should have the same condition. + SDValue Cond = N0.getOperand(1); + if (Cond != N1.getOperand(1)) + return SDValue(); + + SDValue TrueV = N0.getOperand(0); + SDValue FalseV = N1.getOperand(0); + + if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR || + TrueV.getOperand(1) != FalseV.getOperand(1) || + !isOneConstant(TrueV.getOperand(1)) || + !TrueV.hasOneUse() || !FalseV.hasOneUse()) + return SDValue(); + + EVT VT = N->getValueType(0); + SDLoc DL(N); + + SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0), + Cond); + SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0), + Cond); + SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1); + return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1)); +} + static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget) { SelectionDAG &DAG = DCI.DAG; if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) return V; + if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) + return V; if (DCI.isAfterLegalizeDAG()) if (SDValue V = combineDeMorganOfBoolean(N, DAG)) return V; + // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom. + // We may be able to pull a common operation out of the true and false value. + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG)) + return V; + if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG)) + return V; + // fold (or (select cond, 0, y), x) -> // (select cond, x, (or x, y)) return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); @@ -10805,6 +12522,21 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); + // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use + // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create + // RISCVISD:::SLLW and we can't recover it to use a BSET instruction. + if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && + N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) && + N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) && + !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) { + SDLoc DL(N); + SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0)); + SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1)); + SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1); + SDValue And = DAG.getNOT(DL, Shl, MVT::i64); + return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And); + } + // fold (xor (sllw 1, x), -1) -> (rolw ~1, x) // NOTE: Assumes ROL being legal means ROLW is legal. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -10817,7 +12549,7 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, } // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt) - if (N0.hasOneUse() && N0.getOpcode() == ISD::SETCC && isOneConstant(N1)) { + if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) { auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0)); ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); if (ConstN00 && CC == ISD::SETLT) { @@ -10832,32 +12564,102 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) return V; + if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) + return V; + // fold (xor (select cond, 0, y), x) -> // (select cond, x, (xor x, y)) return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); } -// According to the property that indexed load/store instructions -// zero-extended their indices, \p narrowIndex tries to narrow the type of index -// operand if it is matched to pattern (shl (zext x to ty), C) and bits(x) + C < -// bits(ty). -static SDValue narrowIndex(SDValue N, SelectionDAG &DAG) { - if (N.getOpcode() != ISD::SHL || !N->hasOneUse()) +static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + if (!VT.isVector()) return SDValue(); + SDLoc DL(N); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue MulOper; + unsigned AddSubOpc; + + // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y) + // (mul x, add (y, 1)) -> (add x, (mul x, y)) + // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y)) + // (mul x, (sub 1, y)) -> (sub x, (mul x, y)) + auto IsAddSubWith1 = [&](SDValue V) -> bool { + AddSubOpc = V->getOpcode(); + if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) { + SDValue Opnd = V->getOperand(1); + MulOper = V->getOperand(0); + if (AddSubOpc == ISD::SUB) + std::swap(Opnd, MulOper); + if (isOneOrOneSplat(Opnd)) + return true; + } + return false; + }; + + if (IsAddSubWith1(N0)) { + SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper); + return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal); + } + + if (IsAddSubWith1(N1)) { + SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper); + return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal); + } + + return SDValue(); +} + +/// According to the property that indexed load/store instructions zero-extend +/// their indices, try to narrow the type of index operand. +static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) { + if (isIndexTypeSigned(IndexType)) + return false; + + if (!N->hasOneUse()) + return false; + + EVT VT = N.getValueType(); + SDLoc DL(N); + + // In general, what we're doing here is seeing if we can sink a truncate to + // a smaller element type into the expression tree building our index. + // TODO: We can generalize this and handle a bunch more cases if useful. + + // Narrow a buildvector to the narrowest element type. This requires less + // work and less register pressure at high LMUL, and creates smaller constants + // which may be cheaper to materialize. + if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) { + KnownBits Known = DAG.computeKnownBits(N); + unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits()); + LLVMContext &C = *DAG.getContext(); + EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C); + if (ResultVT.bitsLT(VT.getVectorElementType())) { + N = DAG.getNode(ISD::TRUNCATE, DL, + VT.changeVectorElementType(ResultVT), N); + return true; + } + } + + // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty). + if (N.getOpcode() != ISD::SHL) + return false; + SDValue N0 = N.getOperand(0); if (N0.getOpcode() != ISD::ZERO_EXTEND && N0.getOpcode() != RISCVISD::VZEXT_VL) - return SDValue(); + return false;; if (!N0->hasOneUse()) - return SDValue(); + return false;; APInt ShAmt; SDValue N1 = N.getOperand(1); if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt)) - return SDValue(); + return false;; - SDLoc DL(N); SDValue Src = N0.getOperand(0); EVT SrcVT = Src.getValueType(); unsigned SrcElen = SrcVT.getScalarSizeInBits(); @@ -10867,14 +12669,15 @@ static SDValue narrowIndex(SDValue N, SelectionDAG &DAG) { // Skip if NewElen is not narrower than the original extended type. if (NewElen >= N0.getValueType().getScalarSizeInBits()) - return SDValue(); + return false; EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen); EVT NewVT = SrcVT.changeVectorElementType(NewEltVT); SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops()); SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT); - return DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec); + N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec); + return true; } // Replace (seteq (i64 (and X, 0xffffffff)), C1) with @@ -11949,10 +13752,18 @@ static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG) { VL); } -static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG) { +static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { if (SDValue V = combineVFMADD_VLWithVFNEG_VL(N, DAG)) return V; + if (N->getValueType(0).isScalableVector() && + N->getValueType(0).getVectorElementType() == MVT::f32 && + (Subtarget.hasVInstructionsF16Minimal() && + !Subtarget.hasVInstructionsF16())) { + return SDValue(); + } + // FIXME: Ignore strict opcodes for now. if (N->isTargetStrictFPOpcode()) return SDValue(); @@ -12003,7 +13814,15 @@ static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG) { N->getOperand(2), Mask, VL); } -static SDValue performVFMUL_VLCombine(SDNode *N, SelectionDAG &DAG) { +static SDValue performVFMUL_VLCombine(SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + if (N->getValueType(0).isScalableVector() && + N->getValueType(0).getVectorElementType() == MVT::f32 && + (Subtarget.hasVInstructionsF16Minimal() && + !Subtarget.hasVInstructionsF16())) { + return SDValue(); + } + // FIXME: Ignore strict opcodes for now. assert(!N->isTargetStrictFPOpcode() && "Unexpected opcode"); @@ -12036,7 +13855,15 @@ static SDValue performVFMUL_VLCombine(SDNode *N, SelectionDAG &DAG) { Op1, Merge, Mask, VL); } -static SDValue performFADDSUB_VLCombine(SDNode *N, SelectionDAG &DAG) { +static SDValue performFADDSUB_VLCombine(SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + if (N->getValueType(0).isScalableVector() && + N->getValueType(0).getVectorElementType() == MVT::f32 && + (Subtarget.hasVInstructionsF16Minimal() && + !Subtarget.hasVInstructionsF16())) { + return SDValue(); + } + SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); SDValue Merge = N->getOperand(2); @@ -12267,12 +14094,10 @@ static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, // shift can be omitted. // Fold setlt (sra X, N), 0 -> setlt X, 0 and // setge (sra X, N), 0 -> setge X, 0 - if (auto *RHSConst = dyn_cast<ConstantSDNode>(RHS.getNode())) { - if ((CCVal == ISD::SETGE || CCVal == ISD::SETLT) && - LHS.getOpcode() == ISD::SRA && RHSConst->isZero()) { - LHS = LHS.getOperand(0); - return true; - } + if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) && + LHS.getOpcode() == ISD::SRA) { + LHS = LHS.getOperand(0); + return true; } if (!ISD::isIntEqualitySetCC(CCVal)) @@ -12358,9 +14183,13 @@ static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped) { bool Commutative = true; - switch (TrueVal.getOpcode()) { + unsigned Opc = TrueVal.getOpcode(); + switch (Opc) { default: return SDValue(); + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: case ISD::SUB: Commutative = false; break; @@ -12383,12 +14212,18 @@ static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, EVT VT = N->getValueType(0); SDLoc DL(N); - SDValue Zero = DAG.getConstant(0, DL, VT); SDValue OtherOp = TrueVal.getOperand(1 - OpToFold); + EVT OtherOpVT = OtherOp->getValueType(0); + SDValue IdentityOperand = + DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags()); + if (!Commutative) + IdentityOperand = DAG.getConstant(0, DL, OtherOpVT); + assert(IdentityOperand && "No identity operand!"); if (Swapped) - std::swap(OtherOp, Zero); - SDValue NewSel = DAG.getSelect(DL, VT, N->getOperand(0), OtherOp, Zero); + std::swap(OtherOp, IdentityOperand); + SDValue NewSel = + DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand); return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel); } @@ -12453,11 +14288,45 @@ static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG) { return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0)); } +static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + SDValue Cond = N->getOperand(0); + SDValue True = N->getOperand(1); + SDValue False = N->getOperand(2); + SDLoc DL(N); + EVT VT = N->getValueType(0); + EVT CondVT = Cond.getValueType(); + + if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse()) + return SDValue(); + + // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate + // BEXTI, where C is power of 2. + if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() && + (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) { + SDValue LHS = Cond.getOperand(0); + SDValue RHS = Cond.getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); + if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND && + isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) { + uint64_t MaskVal = LHS.getConstantOperandVal(1); + if (isPowerOf2_64(MaskVal) && !isInt<12>(MaskVal)) + return DAG.getSelect(DL, VT, + DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE), + False, True); + } + } + return SDValue(); +} + static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG)) return Folded; + if (SDValue V = useInversedSetcc(N, DAG, Subtarget)) + return V; + if (Subtarget.hasShortForwardBranchOpt()) return SDValue(); @@ -12468,6 +14337,132 @@ static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true); } +/// If we have a build_vector where each lane is binop X, C, where C +/// is a constant (but not necessarily the same constant on all lanes), +/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..). +/// We assume that materializing a constant build vector will be no more +/// expensive that performing O(n) binops. +static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget, + const RISCVTargetLowering &TLI) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + + assert(!VT.isScalableVector() && "unexpected build vector"); + + if (VT.getVectorNumElements() == 1) + return SDValue(); + + const unsigned Opcode = N->op_begin()->getNode()->getOpcode(); + if (!TLI.isBinOp(Opcode)) + return SDValue(); + + if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT)) + return SDValue(); + + SmallVector<SDValue> LHSOps; + SmallVector<SDValue> RHSOps; + for (SDValue Op : N->ops()) { + if (Op.isUndef()) { + // We can't form a divide or remainder from undef. + if (!DAG.isSafeToSpeculativelyExecute(Opcode)) + return SDValue(); + + LHSOps.push_back(Op); + RHSOps.push_back(Op); + continue; + } + + // TODO: We can handle operations which have an neutral rhs value + // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track + // of profit in a more explicit manner. + if (Op.getOpcode() != Opcode || !Op.hasOneUse()) + return SDValue(); + + LHSOps.push_back(Op.getOperand(0)); + if (!isa<ConstantSDNode>(Op.getOperand(1)) && + !isa<ConstantFPSDNode>(Op.getOperand(1))) + return SDValue(); + // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may + // have different LHS and RHS types. + if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType()) + return SDValue(); + RHSOps.push_back(Op.getOperand(1)); + } + + return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps), + DAG.getBuildVector(VT, DL, RHSOps)); +} + +static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget, + const RISCVTargetLowering &TLI) { + SDValue InVec = N->getOperand(0); + SDValue InVal = N->getOperand(1); + SDValue EltNo = N->getOperand(2); + SDLoc DL(N); + + EVT VT = InVec.getValueType(); + if (VT.isScalableVector()) + return SDValue(); + + if (!InVec.hasOneUse()) + return SDValue(); + + // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt + // move the insert_vector_elts into the arms of the binop. Note that + // the new RHS must be a constant. + const unsigned InVecOpcode = InVec->getOpcode(); + if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) && + InVal.hasOneUse()) { + SDValue InVecLHS = InVec->getOperand(0); + SDValue InVecRHS = InVec->getOperand(1); + SDValue InValLHS = InVal->getOperand(0); + SDValue InValRHS = InVal->getOperand(1); + + if (!ISD::isBuildVectorOfConstantSDNodes(InVecRHS.getNode())) + return SDValue(); + if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS)) + return SDValue(); + // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may + // have different LHS and RHS types. + if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType()) + return SDValue(); + SDValue LHS = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, + InVecLHS, InValLHS, EltNo); + SDValue RHS = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, + InVecRHS, InValRHS, EltNo); + return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS); + } + + // Given insert_vector_elt (concat_vectors ...), InVal, Elt + // move the insert_vector_elt to the source operand of the concat_vector. + if (InVec.getOpcode() != ISD::CONCAT_VECTORS) + return SDValue(); + + auto *IndexC = dyn_cast<ConstantSDNode>(EltNo); + if (!IndexC) + return SDValue(); + unsigned Elt = IndexC->getZExtValue(); + + EVT ConcatVT = InVec.getOperand(0).getValueType(); + if (ConcatVT.getVectorElementType() != InVal.getValueType()) + return SDValue(); + unsigned ConcatNumElts = ConcatVT.getVectorNumElements(); + SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, DL, + EltNo.getValueType()); + + unsigned ConcatOpIdx = Elt / ConcatNumElts; + SDValue ConcatOp = InVec.getOperand(ConcatOpIdx); + ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT, + ConcatOp, InVal, NewIdx); + + SmallVector<SDValue> ConcatOps; + ConcatOps.append(InVec->op_begin(), InVec->op_end()); + ConcatOps[ConcatOpIdx] = ConcatOp; + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); +} + // If we're concatenating a series of vector loads like // concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ... // Then we can turn this into a strided load by widening the vector elements @@ -12492,13 +14487,11 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); EVT BaseLdVT = BaseLd->getValueType(0); - SDValue BasePtr = BaseLd->getBasePtr(); // Go through the loads and check that they're strided - SDValue CurPtr = BasePtr; - SDValue Stride; + SmallVector<LoadSDNode *> Lds; + Lds.push_back(BaseLd); Align Align = BaseLd->getAlign(); - for (SDValue Op : N->ops().drop_front()) { auto *Ld = dyn_cast<LoadSDNode>(Op); if (!Ld || !Ld->isSimple() || !Op.hasOneUse() || @@ -12506,42 +14499,46 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, Ld->getValueType(0) != BaseLdVT) return SDValue(); - SDValue Ptr = Ld->getBasePtr(); - // Check that each load's pointer is (add CurPtr, Stride) - if (Ptr.getOpcode() != ISD::ADD || Ptr.getOperand(0) != CurPtr) - return SDValue(); - SDValue Offset = Ptr.getOperand(1); - if (!Stride) - Stride = Offset; - else if (Offset != Stride) - return SDValue(); + Lds.push_back(Ld); // The common alignment is the most restrictive (smallest) of all the loads Align = std::min(Align, Ld->getAlign()); - - CurPtr = Ptr; } - // A special case is if the stride is exactly the width of one of the loads, - // in which case it's contiguous and can be combined into a regular vle - // without changing the element size - if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride); - ConstStride && - ConstStride->getZExtValue() == BaseLdVT.getFixedSizeInBits() / 8) { - MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), - VT.getStoreSize(), Align); - // Can't do the combine if the load isn't naturally aligned with the element - // type - if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(), - DAG.getDataLayout(), VT, *MMO)) + using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>; + auto GetPtrDiff = [&DAG](LoadSDNode *Ld1, + LoadSDNode *Ld2) -> std::optional<PtrDiff> { + // If the load ptrs can be decomposed into a common (Base + Index) with a + // common constant stride, then return the constant stride. + BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG); + BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG); + if (BIO1.equalBaseIndex(BIO2, DAG)) + return {{BIO2.getOffset() - BIO1.getOffset(), false}}; + + // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride) + SDValue P1 = Ld1->getBasePtr(); + SDValue P2 = Ld2->getBasePtr(); + if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1) + return {{P2.getOperand(1), false}}; + if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2) + return {{P1.getOperand(1), true}}; + + return std::nullopt; + }; + + // Get the distance between the first and second loads + auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]); + if (!BaseDiff) + return SDValue(); + + // Check all the loads are the same distance apart + for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++) + if (GetPtrDiff(*It, *std::next(It)) != BaseDiff) return SDValue(); - SDValue WideLoad = DAG.getLoad(VT, DL, BaseLd->getChain(), BasePtr, MMO); - for (SDValue Ld : N->ops()) - DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), WideLoad); - return WideLoad; - } + // TODO: At this point, we've successfully matched a generalized gather + // load. Maybe we should emit that, and then move the specialized + // matchers above and below into a DAG combine? // Get the widened scalar type, e.g. v4i8 -> i64 unsigned WideScalarBitWidth = @@ -12557,21 +14554,29 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, if (!TLI.isLegalStridedLoadStore(WideVecVT, Align)) return SDValue(); - MVT ContainerVT = TLI.getContainerForFixedLengthVector(WideVecVT); - SDValue VL = - getDefaultVLOps(WideVecVT, ContainerVT, DL, DAG, Subtarget).second; - SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); + auto [StrideVariant, MustNegateStride] = *BaseDiff; + SDValue Stride = std::holds_alternative<SDValue>(StrideVariant) + ? std::get<SDValue>(StrideVariant) + : DAG.getConstant(std::get<int64_t>(StrideVariant), DL, + Lds[0]->getOffset().getValueType()); + if (MustNegateStride) + Stride = DAG.getNegative(Stride, DL, Stride.getValueType()); + + SDVTList VTs = DAG.getVTList({WideVecVT, MVT::Other}); SDValue IntID = - DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, Subtarget.getXLenVT()); - SDValue Ops[] = {BaseLd->getChain(), - IntID, - DAG.getUNDEF(ContainerVT), - BasePtr, - Stride, - VL}; + DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL, + Subtarget.getXLenVT()); + + SDValue AllOneMask = + DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL, + DAG.getConstant(1, DL, MVT::i1)); + + SDValue Ops[] = {BaseLd->getChain(), IntID, DAG.getUNDEF(WideVecVT), + BaseLd->getBasePtr(), Stride, AllOneMask}; uint64_t MemSize; - if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride)) + if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride); + ConstStride && ConstStride->getSExtValue() >= 0) // total size = (elsize * n) + (stride - elsize) * (n-1) // = elsize + stride * (n-1) MemSize = WideScalarVT.getSizeInBits() + @@ -12589,11 +14594,7 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, for (SDValue Ld : N->ops()) DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad); - // Note: Perform the bitcast before the convertFromScalableVector so we have - // balanced pairs of convertFromScalable/convertToScalable - SDValue Res = DAG.getBitcast( - TLI.getContainerForFixedLengthVector(VT.getSimpleVT()), StridedLoad); - return convertFromScalableVector(VT, Res, DAG, Subtarget); + return DAG.getBitcast(VT.getSimpleVT(), StridedLoad); } static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, @@ -12653,9 +14654,121 @@ static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, return DAG.getNode(Opc, DL, VT, Ops); } +static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, + ISD::MemIndexType &IndexType, + RISCVTargetLowering::DAGCombinerInfo &DCI) { + if (!DCI.isBeforeLegalize()) + return false; + + SelectionDAG &DAG = DCI.DAG; + const MVT XLenVT = + DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT(); + + const EVT IndexVT = Index.getValueType(); + + // RISC-V indexed loads only support the "unsigned unscaled" addressing + // mode, so anything else must be manually legalized. + if (!isIndexTypeSigned(IndexType)) + return false; + + if (IndexVT.getVectorElementType().bitsLT(XLenVT)) { + // Any index legalization should first promote to XLenVT, so we don't lose + // bits when scaling. This may create an illegal index type so we let + // LLVM's legalization take care of the splitting. + // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet. + Index = DAG.getNode(ISD::SIGN_EXTEND, DL, + IndexVT.changeVectorElementType(XLenVT), Index); + } + IndexType = ISD::UNSIGNED_SCALED; + return true; +} + +/// Match the index vector of a scatter or gather node as the shuffle mask +/// which performs the rearrangement if possible. Will only match if +/// all lanes are touched, and thus replacing the scatter or gather with +/// a unit strided access and shuffle is legal. +static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, + SmallVector<int> &ShuffleMask) { + if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode())) + return false; + if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode())) + return false; + + const unsigned ElementSize = VT.getScalarStoreSize(); + const unsigned NumElems = VT.getVectorNumElements(); + + // Create the shuffle mask and check all bits active + assert(ShuffleMask.empty()); + BitVector ActiveLanes(NumElems); + for (unsigned i = 0; i < Index->getNumOperands(); i++) { + // TODO: We've found an active bit of UB, and could be + // more aggressive here if desired. + if (Index->getOperand(i)->isUndef()) + return false; + uint64_t C = Index->getConstantOperandVal(i); + if (C % ElementSize != 0) + return false; + C = C / ElementSize; + if (C >= NumElems) + return false; + ShuffleMask.push_back(C); + ActiveLanes.set(C); + } + return ActiveLanes.all(); +} + +/// Match the index of a gather or scatter operation as an operation +/// with twice the element width and half the number of elements. This is +/// generally profitable (if legal) because these operations are linear +/// in VL, so even if we cause some extract VTYPE/VL toggles, we still +/// come out ahead. +static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, + Align BaseAlign, const RISCVSubtarget &ST) { + if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode())) + return false; + if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode())) + return false; + + // Attempt a doubling. If we can use a element type 4x or 8x in + // size, this will happen via multiply iterations of the transform. + const unsigned NumElems = VT.getVectorNumElements(); + if (NumElems % 2 != 0) + return false; + + const unsigned ElementSize = VT.getScalarStoreSize(); + const unsigned WiderElementSize = ElementSize * 2; + if (WiderElementSize > ST.getELen()/8) + return false; + + if (!ST.hasFastUnalignedAccess() && BaseAlign < WiderElementSize) + return false; + + for (unsigned i = 0; i < Index->getNumOperands(); i++) { + // TODO: We've found an active bit of UB, and could be + // more aggressive here if desired. + if (Index->getOperand(i)->isUndef()) + return false; + // TODO: This offset check is too strict if we support fully + // misaligned memory operations. + uint64_t C = Index->getConstantOperandVal(i); + if (i % 2 == 0) { + if (C % WiderElementSize != 0) + return false; + continue; + } + uint64_t Last = Index->getConstantOperandVal(i-1); + if (C != Last + ElementSize) + return false; + } + return true; +} + + SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; + const MVT XLenVT = Subtarget.getXLenVT(); + SDLoc DL(N); // Helper to call SimplifyDemandedBits on an operand of N where only some low // bits are demanded. N will be added to the Worklist if it was not deleted. @@ -12687,8 +14800,6 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return DCI.CombineTo(N, Lo, Hi); } - SDLoc DL(N); - // It's cheaper to materialise two 32-bit integers than to load a double // from the constant pool and transfer it to integer registers through the // stack. @@ -12795,14 +14906,21 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return performORCombine(N, DCI, Subtarget); case ISD::XOR: return performXORCombine(N, DAG, Subtarget); + case ISD::MUL: + return performMULCombine(N, DAG); case ISD::FADD: case ISD::UMAX: case ISD::UMIN: case ISD::SMAX: case ISD::SMIN: case ISD::FMAXNUM: - case ISD::FMINNUM: - return combineBinOpToReduce(N, DAG, Subtarget); + case ISD::FMINNUM: { + if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) + return V; + if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) + return V; + return SDValue(); + } case ISD::SETCC: return performSETCCCombine(N, DAG, Subtarget); case ISD::SIGN_EXTEND_INREG: @@ -12829,6 +14947,56 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, } } return SDValue(); + case RISCVISD::TRUNCATE_VECTOR_VL: { + // trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1)) + // This would be benefit for the cases where X and Y are both the same value + // type of low precision vectors. Since the truncate would be lowered into + // n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate + // restriction, such pattern would be expanded into a series of "vsetvli" + // and "vnsrl" instructions later to reach this point. + auto IsTruncNode = [](SDValue V) { + if (V.getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL) + return false; + SDValue VL = V.getOperand(2); + auto *C = dyn_cast<ConstantSDNode>(VL); + // Assume all TRUNCATE_VECTOR_VL nodes use VLMAX for VMSET_VL operand + bool IsVLMAXForVMSET = (C && C->isAllOnes()) || + (isa<RegisterSDNode>(VL) && + cast<RegisterSDNode>(VL)->getReg() == RISCV::X0); + return V.getOperand(1).getOpcode() == RISCVISD::VMSET_VL && + IsVLMAXForVMSET; + }; + + SDValue Op = N->getOperand(0); + + // We need to first find the inner level of TRUNCATE_VECTOR_VL node + // to distinguish such pattern. + while (IsTruncNode(Op)) { + if (!Op.hasOneUse()) + return SDValue(); + Op = Op.getOperand(0); + } + + if (Op.getOpcode() == ISD::SRA && Op.hasOneUse()) { + SDValue N0 = Op.getOperand(0); + SDValue N1 = Op.getOperand(1); + if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() && + N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse()) { + SDValue N00 = N0.getOperand(0); + SDValue N10 = N1.getOperand(0); + if (N00.getValueType().isVector() && + N00.getValueType() == N10.getValueType() && + N->getValueType(0) == N10.getValueType()) { + unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1; + SDValue SMin = DAG.getNode( + ISD::SMIN, SDLoc(N1), N->getValueType(0), N10, + DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0))); + return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin); + } + } + } + break; + } case ISD::TRUNCATE: return performTRUNCATECombine(N, DAG, Subtarget); case ISD::SELECT: @@ -12939,6 +15107,19 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, } } + // If both true/false are an xor with 1, pull through the select. + // This can occur after op legalization if both operands are setccs that + // require an xor to invert. + // FIXME: Generalize to other binary ops with identical operand? + if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR && + TrueV.getOperand(1) == FalseV.getOperand(1) && + isOneConstant(TrueV.getOperand(1)) && + TrueV.hasOneUse() && FalseV.hasOneUse()) { + SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC, + TrueV.getOperand(0), FalseV.getOperand(0)); + return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1)); + } + return SDValue(); } case RISCVISD::BR_CC: { @@ -12985,75 +15166,187 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0), DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound)); } - case ISD::MGATHER: - case ISD::MSCATTER: - case ISD::VP_GATHER: - case ISD::VP_SCATTER: { - if (!DCI.isBeforeLegalize()) - break; - SDValue Index, ScaleOp; - bool IsIndexSigned = false; - if (const auto *VPGSN = dyn_cast<VPGatherScatterSDNode>(N)) { - Index = VPGSN->getIndex(); - ScaleOp = VPGSN->getScale(); - IsIndexSigned = VPGSN->isIndexSigned(); - assert(!VPGSN->isIndexScaled() && - "Scaled gather/scatter should not be formed"); - } else { - const auto *MGSN = cast<MaskedGatherScatterSDNode>(N); - Index = MGSN->getIndex(); - ScaleOp = MGSN->getScale(); - IsIndexSigned = MGSN->isIndexSigned(); - assert(!MGSN->isIndexScaled() && - "Scaled gather/scatter should not be formed"); + case ISD::MGATHER: { + const auto *MGN = dyn_cast<MaskedGatherSDNode>(N); + const EVT VT = N->getValueType(0); + SDValue Index = MGN->getIndex(); + SDValue ScaleOp = MGN->getScale(); + ISD::MemIndexType IndexType = MGN->getIndexType(); + assert(!MGN->isIndexScaled() && + "Scaled gather/scatter should not be formed"); + SDLoc DL(N); + if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI)) + return DAG.getMaskedGather( + N->getVTList(), MGN->getMemoryVT(), DL, + {MGN->getChain(), MGN->getPassThru(), MGN->getMask(), + MGN->getBasePtr(), Index, ScaleOp}, + MGN->getMemOperand(), IndexType, MGN->getExtensionType()); + + if (narrowIndex(Index, IndexType, DAG)) + return DAG.getMaskedGather( + N->getVTList(), MGN->getMemoryVT(), DL, + {MGN->getChain(), MGN->getPassThru(), MGN->getMask(), + MGN->getBasePtr(), Index, ScaleOp}, + MGN->getMemOperand(), IndexType, MGN->getExtensionType()); + + if (Index.getOpcode() == ISD::BUILD_VECTOR && + MGN->getExtensionType() == ISD::NON_EXTLOAD) { + if (std::optional<VIDSequence> SimpleVID = isSimpleVIDSequence(Index); + SimpleVID && SimpleVID->StepDenominator == 1) { + const int64_t StepNumerator = SimpleVID->StepNumerator; + const int64_t Addend = SimpleVID->Addend; + + // Note: We don't need to check alignment here since (by assumption + // from the existance of the gather), our offsets must be sufficiently + // aligned. + + const EVT PtrVT = getPointerTy(DAG.getDataLayout()); + assert(MGN->getBasePtr()->getValueType(0) == PtrVT); + assert(IndexType == ISD::UNSIGNED_SCALED); + SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(), + DAG.getConstant(Addend, DL, PtrVT)); + + SDVTList VTs = DAG.getVTList({VT, MVT::Other}); + SDValue IntID = + DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL, + XLenVT); + SDValue Ops[] = + {MGN->getChain(), IntID, MGN->getPassThru(), BasePtr, + DAG.getConstant(StepNumerator, DL, XLenVT), MGN->getMask()}; + return DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, + Ops, VT, MGN->getMemOperand()); + } } - EVT IndexVT = Index.getValueType(); - MVT XLenVT = Subtarget.getXLenVT(); - // RISC-V indexed loads only support the "unsigned unscaled" addressing - // mode, so anything else must be manually legalized. - bool NeedsIdxLegalization = - (IsIndexSigned && IndexVT.getVectorElementType().bitsLT(XLenVT)); - if (!NeedsIdxLegalization) - break; - SDLoc DL(N); + SmallVector<int> ShuffleMask; + if (MGN->getExtensionType() == ISD::NON_EXTLOAD && + matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) { + SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(), + MGN->getBasePtr(), DAG.getUNDEF(XLenVT), + MGN->getMask(), DAG.getUNDEF(VT), + MGN->getMemoryVT(), MGN->getMemOperand(), + ISD::UNINDEXED, ISD::NON_EXTLOAD); + SDValue Shuffle = + DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask); + return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL); + } - // Any index legalization should first promote to XLenVT, so we don't lose - // bits when scaling. This may create an illegal index type so we let - // LLVM's legalization take care of the splitting. - // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet. - if (IndexVT.getVectorElementType().bitsLT(XLenVT)) { - IndexVT = IndexVT.changeVectorElementType(XLenVT); - Index = DAG.getNode(IsIndexSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, - DL, IndexVT, Index); + if (MGN->getExtensionType() == ISD::NON_EXTLOAD && + matchIndexAsWiderOp(VT, Index, MGN->getMask(), + MGN->getMemOperand()->getBaseAlign(), Subtarget)) { + SmallVector<SDValue> NewIndices; + for (unsigned i = 0; i < Index->getNumOperands(); i += 2) + NewIndices.push_back(Index.getOperand(i)); + EVT IndexVT = Index.getValueType() + .getHalfNumVectorElementsVT(*DAG.getContext()); + Index = DAG.getBuildVector(IndexVT, DL, NewIndices); + + unsigned ElementSize = VT.getScalarStoreSize(); + EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2); + auto EltCnt = VT.getVectorElementCount(); + assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!"); + EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT, + EltCnt.divideCoefficientBy(2)); + SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru()); + EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, + EltCnt.divideCoefficientBy(2)); + SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1)); + + SDValue Gather = + DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL, + {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(), + Index, ScaleOp}, + MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD); + SDValue Result = DAG.getBitcast(VT, Gather.getValue(0)); + return DAG.getMergeValues({Result, Gather.getValue(1)}, DL); + } + break; + } + case ISD::MSCATTER:{ + const auto *MSN = dyn_cast<MaskedScatterSDNode>(N); + SDValue Index = MSN->getIndex(); + SDValue ScaleOp = MSN->getScale(); + ISD::MemIndexType IndexType = MSN->getIndexType(); + assert(!MSN->isIndexScaled() && + "Scaled gather/scatter should not be formed"); + + SDLoc DL(N); + if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI)) + return DAG.getMaskedScatter( + N->getVTList(), MSN->getMemoryVT(), DL, + {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(), + Index, ScaleOp}, + MSN->getMemOperand(), IndexType, MSN->isTruncatingStore()); + + if (narrowIndex(Index, IndexType, DAG)) + return DAG.getMaskedScatter( + N->getVTList(), MSN->getMemoryVT(), DL, + {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(), + Index, ScaleOp}, + MSN->getMemOperand(), IndexType, MSN->isTruncatingStore()); + + EVT VT = MSN->getValue()->getValueType(0); + SmallVector<int> ShuffleMask; + if (!MSN->isTruncatingStore() && + matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) { + SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(), + DAG.getUNDEF(VT), ShuffleMask); + return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(), + DAG.getUNDEF(XLenVT), MSN->getMask(), + MSN->getMemoryVT(), MSN->getMemOperand(), + ISD::UNINDEXED, false); } + break; + } + case ISD::VP_GATHER: { + const auto *VPGN = dyn_cast<VPGatherSDNode>(N); + SDValue Index = VPGN->getIndex(); + SDValue ScaleOp = VPGN->getScale(); + ISD::MemIndexType IndexType = VPGN->getIndexType(); + assert(!VPGN->isIndexScaled() && + "Scaled gather/scatter should not be formed"); + + SDLoc DL(N); + if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI)) + return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL, + {VPGN->getChain(), VPGN->getBasePtr(), Index, + ScaleOp, VPGN->getMask(), + VPGN->getVectorLength()}, + VPGN->getMemOperand(), IndexType); - ISD::MemIndexType NewIndexTy = ISD::UNSIGNED_SCALED; - if (const auto *VPGN = dyn_cast<VPGatherSDNode>(N)) + if (narrowIndex(Index, IndexType, DAG)) return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL, {VPGN->getChain(), VPGN->getBasePtr(), Index, ScaleOp, VPGN->getMask(), VPGN->getVectorLength()}, - VPGN->getMemOperand(), NewIndexTy); - if (const auto *VPSN = dyn_cast<VPScatterSDNode>(N)) + VPGN->getMemOperand(), IndexType); + + break; + } + case ISD::VP_SCATTER: { + const auto *VPSN = dyn_cast<VPScatterSDNode>(N); + SDValue Index = VPSN->getIndex(); + SDValue ScaleOp = VPSN->getScale(); + ISD::MemIndexType IndexType = VPSN->getIndexType(); + assert(!VPSN->isIndexScaled() && + "Scaled gather/scatter should not be formed"); + + SDLoc DL(N); + if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI)) return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL, {VPSN->getChain(), VPSN->getValue(), VPSN->getBasePtr(), Index, ScaleOp, VPSN->getMask(), VPSN->getVectorLength()}, - VPSN->getMemOperand(), NewIndexTy); - if (const auto *MGN = dyn_cast<MaskedGatherSDNode>(N)) - return DAG.getMaskedGather( - N->getVTList(), MGN->getMemoryVT(), DL, - {MGN->getChain(), MGN->getPassThru(), MGN->getMask(), - MGN->getBasePtr(), Index, ScaleOp}, - MGN->getMemOperand(), NewIndexTy, MGN->getExtensionType()); - const auto *MSN = cast<MaskedScatterSDNode>(N); - return DAG.getMaskedScatter( - N->getVTList(), MSN->getMemoryVT(), DL, - {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(), - Index, ScaleOp}, - MSN->getMemOperand(), NewIndexTy, MSN->isTruncatingStore()); + VPSN->getMemOperand(), IndexType); + + if (narrowIndex(Index, IndexType, DAG)) + return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL, + {VPSN->getChain(), VPSN->getValue(), + VPSN->getBasePtr(), Index, ScaleOp, + VPSN->getMask(), VPSN->getVectorLength()}, + VPSN->getMemOperand(), IndexType); + break; } case RISCVISD::SRA_VL: case RISCVISD::SRL_VL: @@ -13062,7 +15355,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) { // We don't need the upper 32 bits of a 64-bit element for a shift amount. SDLoc DL(N); - SDValue VL = N->getOperand(3); + SDValue VL = N->getOperand(4); EVT VT = N->getValueType(0); ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT), ShAmt.getOperand(1), VL); @@ -13108,12 +15401,12 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, case RISCVISD::STRICT_VFNMADD_VL: case RISCVISD::STRICT_VFMSUB_VL: case RISCVISD::STRICT_VFNMSUB_VL: - return performVFMADD_VLCombine(N, DAG); + return performVFMADD_VLCombine(N, DAG, Subtarget); case RISCVISD::FMUL_VL: - return performVFMUL_VLCombine(N, DAG); + return performVFMUL_VLCombine(N, DAG, Subtarget); case RISCVISD::FADD_VL: case RISCVISD::FSUB_VL: - return performFADDSUB_VLCombine(N, DAG); + return performFADDSUB_VLCombine(N, DAG, Subtarget); case ISD::LOAD: case ISD::STORE: { if (DCI.isAfterLegalizeDAG()) @@ -13149,16 +15442,17 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, ISD::isBuildVectorOfConstantSDNodes(Val.getNode())) { // Get the constant vector bits APInt NewC(Val.getValueSizeInBits(), 0); + uint64_t EltSize = Val.getScalarValueSizeInBits(); for (unsigned i = 0; i < Val.getNumOperands(); i++) { if (Val.getOperand(i).isUndef()) continue; - NewC.insertBits(Val.getConstantOperandAPInt(i), - i * Val.getScalarValueSizeInBits()); + NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize), + i * EltSize); } MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits()); - if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), - Subtarget.getFeatureBits(), true) <= 2 && + if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget, + true) <= 2 && allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), NewVT, *Store->getMemOperand())) { SDValue NewV = DAG.getConstant(NewC, DL, NewVT); @@ -13201,7 +15495,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, SDValue Src = Val.getOperand(0); MVT VecVT = Src.getSimpleValueType(); // VecVT should be scalable and memory VT should match the element type. - if (VecVT.isScalableVector() && + if (!Store->isIndexed() && VecVT.isScalableVector() && MemVT == VecVT.getVectorElementType()) { SDLoc DL(N); MVT MaskVT = getMaskTypeFor(VecVT); @@ -13226,19 +15520,51 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return Gather; break; } + case ISD::BUILD_VECTOR: + if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this)) + return V; + break; case ISD::CONCAT_VECTORS: if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this)) return V; break; + case ISD::INSERT_VECTOR_ELT: + if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this)) + return V; + break; + case RISCVISD::VFMV_V_F_VL: { + const MVT VT = N->getSimpleValueType(0); + SDValue Passthru = N->getOperand(0); + SDValue Scalar = N->getOperand(1); + SDValue VL = N->getOperand(2); + + // If VL is 1, we can use vfmv.s.f. + if (isOneConstant(VL)) + return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL); + break; + } case RISCVISD::VMV_V_X_VL: { + const MVT VT = N->getSimpleValueType(0); + SDValue Passthru = N->getOperand(0); + SDValue Scalar = N->getOperand(1); + SDValue VL = N->getOperand(2); + // Tail agnostic VMV.V.X only demands the vector element bitwidth from the // scalar input. - unsigned ScalarSize = N->getOperand(1).getValueSizeInBits(); - unsigned EltWidth = N->getValueType(0).getScalarSizeInBits(); - if (ScalarSize > EltWidth && N->getOperand(0).isUndef()) + unsigned ScalarSize = Scalar.getValueSizeInBits(); + unsigned EltWidth = VT.getScalarSizeInBits(); + if (ScalarSize > EltWidth && Passthru.isUndef()) if (SimplifyDemandedLowBitsHelper(1, EltWidth)) return SDValue(N, 0); + // If VL is 1 and the scalar value won't benefit from immediate, we can + // use vmv.s.x. + ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar); + if (isOneConstant(VL) && + (!Const || Const->isZero() || + !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5))) + return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL); + break; } case RISCVISD::VFMV_S_F_VL: { @@ -13258,6 +15584,35 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return Src.getOperand(0); // TODO: Use insert_subvector/extract_subvector to change widen/narrow? } + [[fallthrough]]; + } + case RISCVISD::VMV_S_X_VL: { + const MVT VT = N->getSimpleValueType(0); + SDValue Passthru = N->getOperand(0); + SDValue Scalar = N->getOperand(1); + SDValue VL = N->getOperand(2); + + // Use M1 or smaller to avoid over constraining register allocation + const MVT M1VT = getLMUL1VT(VT); + if (M1VT.bitsLT(VT)) { + SDValue M1Passthru = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru, + DAG.getVectorIdxConstant(0, DL)); + SDValue Result = + DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL); + Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result, + DAG.getConstant(0, DL, XLenVT)); + return Result; + } + + // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or + // higher would involve overly constraining the register allocator for + // no purpose. + if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar); + Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) && + VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef()) + return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL); + break; } case ISD::INTRINSIC_VOID: @@ -13269,6 +15624,43 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, // By default we do not combine any intrinsic. default: return SDValue(); + case Intrinsic::riscv_masked_strided_load: { + MVT VT = N->getSimpleValueType(0); + auto *Load = cast<MemIntrinsicSDNode>(N); + SDValue PassThru = N->getOperand(2); + SDValue Base = N->getOperand(3); + SDValue Stride = N->getOperand(4); + SDValue Mask = N->getOperand(5); + + // If the stride is equal to the element size in bytes, we can use + // a masked.load. + const unsigned ElementSize = VT.getScalarStoreSize(); + if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride); + StrideC && StrideC->getZExtValue() == ElementSize) + return DAG.getMaskedLoad(VT, DL, Load->getChain(), Base, + DAG.getUNDEF(XLenVT), Mask, PassThru, + Load->getMemoryVT(), Load->getMemOperand(), + ISD::UNINDEXED, ISD::NON_EXTLOAD); + return SDValue(); + } + case Intrinsic::riscv_masked_strided_store: { + auto *Store = cast<MemIntrinsicSDNode>(N); + SDValue Value = N->getOperand(2); + SDValue Base = N->getOperand(3); + SDValue Stride = N->getOperand(4); + SDValue Mask = N->getOperand(5); + + // If the stride is equal to the element size in bytes, we can use + // a masked.store. + const unsigned ElementSize = Value.getValueType().getScalarStoreSize(); + if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride); + StrideC && StrideC->getZExtValue() == ElementSize) + return DAG.getMaskedStore(Store->getChain(), DL, Value, Base, + DAG.getUNDEF(XLenVT), Mask, + Store->getMemoryVT(), Store->getMemOperand(), + ISD::UNINDEXED, false); + return SDValue(); + } case Intrinsic::riscv_vcpop: case Intrinsic::riscv_vcpop_mask: case Intrinsic::riscv_vfirst: @@ -13287,23 +15679,6 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return DAG.getConstant(-1, DL, VT); return DAG.getConstant(0, DL, VT); } - case Intrinsic::riscv_vloxei: - case Intrinsic::riscv_vloxei_mask: - case Intrinsic::riscv_vluxei: - case Intrinsic::riscv_vluxei_mask: - case Intrinsic::riscv_vsoxei: - case Intrinsic::riscv_vsoxei_mask: - case Intrinsic::riscv_vsuxei: - case Intrinsic::riscv_vsuxei_mask: - if (SDValue V = narrowIndex(N->getOperand(4), DAG)) { - SmallVector<SDValue, 8> Ops(N->ops()); - Ops[4] = V; - const auto *MemSD = cast<MemIntrinsicSDNode>(N); - return DAG.getMemIntrinsicNode(N->getOpcode(), SDLoc(N), N->getVTList(), - Ops, MemSD->getMemoryVT(), - MemSD->getMemOperand()); - } - return SDValue(); } } case ISD::BITCAST: { @@ -13386,12 +15761,12 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift( // Neither constant will fit into an immediate, so find materialisation // costs. - int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), - Subtarget.getFeatureBits(), - /*CompressionCost*/true); + int C1Cost = + RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget, + /*CompressionCost*/ true); int ShiftedC1Cost = RISCVMatInt::getIntMatCost( - ShiftedC1Int, Ty.getSizeInBits(), Subtarget.getFeatureBits(), - /*CompressionCost*/true); + ShiftedC1Int, Ty.getSizeInBits(), Subtarget, + /*CompressionCost*/ true); // Materialising `c1` is cheaper than materialising `c1 << c2`, so the // combine should be prevented. @@ -13562,6 +15937,15 @@ void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, Known = Known.sext(BitWidth); break; } + case RISCVISD::SLLW: { + KnownBits Known2; + Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32)); + // Restore the original width by sign extending. + Known = Known.sext(BitWidth); + break; + } case RISCVISD::CTZW: { KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros(); @@ -13600,7 +15984,7 @@ void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, Known.One.setBit(Log2_32(MinVLenB)); break; } - case RISCVISD::FPCLASS: { + case RISCVISD::FCLASS: { // fclass will only set one of the low 10 bits. Known.Zero.setBitsFrom(10); break; @@ -13615,7 +15999,7 @@ void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, break; case Intrinsic::riscv_vsetvli: case Intrinsic::riscv_vsetvlimax: - // Assume that VL output is >= 65536. + // Assume that VL output is <= 65536. // TODO: Take SEW and LMUL into account. if (BitWidth > 17) Known.Zero.setBitsFrom(17); @@ -13705,6 +16089,7 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( assert(Subtarget.hasStdExtA()); return 33; } + break; } } @@ -14187,47 +16572,6 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, return TailMBB; } -static MachineBasicBlock *emitVFCVT_RM(MachineInstr &MI, MachineBasicBlock *BB, - unsigned Opcode) { - DebugLoc DL = MI.getDebugLoc(); - - const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); - - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - Register SavedFRM = MRI.createVirtualRegister(&RISCV::GPRRegClass); - - assert(MI.getNumOperands() == 8 || MI.getNumOperands() == 7); - unsigned FRMIdx = MI.getNumOperands() == 8 ? 4 : 3; - - // Update FRM and save the old value. - BuildMI(*BB, MI, DL, TII.get(RISCV::SwapFRMImm), SavedFRM) - .addImm(MI.getOperand(FRMIdx).getImm()); - - // Emit an VFCVT with the FRM == DYN - auto MIB = BuildMI(*BB, MI, DL, TII.get(Opcode)); - - for (unsigned I = 0; I < MI.getNumOperands(); I++) - if (I != FRMIdx) - MIB = MIB.add(MI.getOperand(I)); - else - MIB = MIB.add(MachineOperand::CreateImm(7)); // frm = DYN - - MIB.add(MachineOperand::CreateReg(RISCV::FRM, - /*IsDef*/ false, - /*IsImp*/ true)); - - if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) - MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); - - // Restore FRM. - BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFRM)) - .addReg(SavedFRM, RegState::Kill); - - // Erase the pseudoinstruction. - MI.eraseFromParent(); - return BB; -} - static MachineBasicBlock *emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc, @@ -14472,43 +16816,6 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X, Subtarget); -#define PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, LMUL) \ - case RISCV::RMOpc##_##LMUL: \ - return emitVFCVT_RM(MI, BB, RISCV::Opc##_##LMUL); \ - case RISCV::RMOpc##_##LMUL##_MASK: \ - return emitVFCVT_RM(MI, BB, RISCV::Opc##_##LMUL##_MASK); - -#define PseudoVFCVT_RM_CASE(RMOpc, Opc) \ - PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M1) \ - PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M2) \ - PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M4) \ - PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF2) \ - PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF4) - -#define PseudoVFCVT_RM_CASE_M8(RMOpc, Opc) \ - PseudoVFCVT_RM_CASE(RMOpc, Opc) \ - PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M8) - -#define PseudoVFCVT_RM_CASE_MF8(RMOpc, Opc) \ - PseudoVFCVT_RM_CASE(RMOpc, Opc) \ - PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF8) - - // VFCVT - PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_X_F_V, PseudoVFCVT_X_F_V) - PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_XU_F_V, PseudoVFCVT_XU_F_V) - PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_F_XU_V, PseudoVFCVT_F_XU_V) - PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_F_X_V, PseudoVFCVT_F_X_V) - - // VFWCVT - PseudoVFCVT_RM_CASE(PseudoVFWCVT_RM_XU_F_V, PseudoVFWCVT_XU_F_V); - PseudoVFCVT_RM_CASE(PseudoVFWCVT_RM_X_F_V, PseudoVFWCVT_X_F_V); - - // VFNCVT - PseudoVFCVT_RM_CASE_MF8(PseudoVFNCVT_RM_XU_F_W, PseudoVFNCVT_XU_F_W); - PseudoVFCVT_RM_CASE_MF8(PseudoVFNCVT_RM_X_F_W, PseudoVFNCVT_X_F_W); - PseudoVFCVT_RM_CASE(PseudoVFNCVT_RM_F_XU_W, PseudoVFNCVT_F_XU_W); - PseudoVFCVT_RM_CASE(PseudoVFNCVT_RM_F_X_W, PseudoVFNCVT_F_X_W); - case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK: return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK, RISCV::PseudoVFCVT_F_X_V_M1_MASK); @@ -14535,41 +16842,26 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, case RISCV::PseudoFROUND_D_INX: case RISCV::PseudoFROUND_D_IN32X: return emitFROUND(MI, BB, Subtarget); + case TargetOpcode::STATEPOINT: + case TargetOpcode::STACKMAP: + case TargetOpcode::PATCHPOINT: + if (!Subtarget.is64Bit()) + report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only " + "supported on 64-bit targets"); + return emitPatchPoint(MI, BB); } } -// Returns the index to the rounding mode immediate value if any, otherwise the -// function will return None. -static std::optional<unsigned> getRoundModeIdx(const MachineInstr &MI) { - uint64_t TSFlags = MI.getDesc().TSFlags; - if (!RISCVII::hasRoundModeOp(TSFlags)) - return std::nullopt; - - // The operand order - // ------------------------------------- - // | n-1 (if any) | n-2 | n-3 | n-4 | - // | policy | sew | vl | rm | - // ------------------------------------- - return MI.getNumExplicitOperands() - RISCVII::hasVecPolicyOp(TSFlags) - 3; -} - void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const { - // Add FRM dependency to vector floating-point instructions with dynamic - // rounding mode. - if (auto RoundModeIdx = getRoundModeIdx(MI)) { - unsigned FRMImm = MI.getOperand(*RoundModeIdx).getImm(); - if (FRMImm == RISCVFPRndMode::DYN && !MI.readsRegister(RISCV::FRM)) { - MI.addOperand(MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, - /*isImp*/ true)); - } - } - // Add FRM dependency to any instructions with dynamic rounding mode. - unsigned Opc = MI.getOpcode(); - auto Idx = RISCV::getNamedOperandIdx(Opc, RISCV::OpName::frm); - if (Idx < 0) - return; + int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm); + if (Idx < 0) { + // Vector pseudos have FRM index indicated by TSFlags. + Idx = RISCVII::getFRMOpNum(MI.getDesc()); + if (Idx < 0) + return; + } if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN) return; // If the instruction already reads FRM, don't add another read. @@ -14604,10 +16896,6 @@ void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, // register-size fields in the same situations they would be for fixed // arguments. -static const MCPhysReg ArgGPRs[] = { - RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, - RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 -}; static const MCPhysReg ArgFPR16s[] = { RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H @@ -14632,6 +16920,14 @@ static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4, RISCV::V20M4}; static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8}; +ArrayRef<MCPhysReg> RISCV::getArgGPRs() { + static const MCPhysReg ArgGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, + RISCV::X13, RISCV::X14, RISCV::X15, + RISCV::X16, RISCV::X17}; + + return ArrayRef(ArgGPRs); +} + // Pass a 2*XLEN argument that has been split into two XLEN values through // registers or the stack as necessary. static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, @@ -14639,6 +16935,7 @@ static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2) { unsigned XLenInBytes = XLen / 8; + ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(); if (Register Reg = State.AllocateReg(ArgGPRs)) { // At least one half can be passed via register. State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, @@ -14759,6 +17056,8 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, LocInfo = CCValAssign::BCvt; } + ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(); + // If this is a variadic argument, the RISC-V calling convention requires // that it is assigned an 'even' or 'aligned' register if it has 8-byte // alignment (RV32) or 16-byte alignment (RV64). An aligned register should @@ -14785,23 +17084,29 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, // Handle passing f64 on RV32D with a soft float ABI or when floating point // registers are exhausted. if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) { - assert(!ArgFlags.isSplit() && PendingLocs.empty() && - "Can't lower f64 if it is split"); + assert(PendingLocs.empty() && "Can't lower f64 if it is split"); // Depending on available argument GPRS, f64 may be passed in a pair of // GPRs, split between a GPR and the stack, or passed completely on the // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these // cases. Register Reg = State.AllocateReg(ArgGPRs); - LocVT = MVT::i32; if (!Reg) { unsigned StackOffset = State.AllocateStack(8, Align(8)); State.addLoc( CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); return false; } - if (!State.AllocateReg(ArgGPRs)) - State.AllocateStack(4, Align(4)); - State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + LocVT = MVT::i32; + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + Register HiReg = State.AllocateReg(ArgGPRs); + if (HiReg) { + State.addLoc( + CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo)); + } else { + unsigned StackOffset = State.AllocateStack(4, Align(4)); + State.addLoc( + CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); + } return false; } @@ -15002,12 +17307,18 @@ static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, break; case CCValAssign::BCvt: if (VA.getLocVT().isInteger() && - (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) + (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) { Val = DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val); - else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) - Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val); - else + } else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) { + if (RV64LegalI32) { + Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Val); + Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val); + } else { + Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val); + } + } else { Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); + } break; } return Val; @@ -15061,13 +17372,19 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, Val = convertToScalableVector(LocVT, Val, DAG, Subtarget); break; case CCValAssign::BCvt: - if (VA.getLocVT().isInteger() && - (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) - Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val); - else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) - Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val); - else + if (LocVT.isInteger() && + (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) { + Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val); + } else if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32) { + if (RV64LegalI32) { + Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val); + Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val); + } else { + Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val); + } + } else { Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); + } break; } return Val; @@ -15110,38 +17427,32 @@ static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, } static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, - const CCValAssign &VA, const SDLoc &DL) { + const CCValAssign &VA, + const CCValAssign &HiVA, + const SDLoc &DL) { assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && "Unexpected VA"); MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); - if (VA.isMemLoc()) { - // f64 is passed on the stack. - int FI = - MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*IsImmutable=*/true); - SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); - return DAG.getLoad(MVT::f64, DL, Chain, FIN, - MachinePointerInfo::getFixedStack(MF, FI)); - } - assert(VA.isRegLoc() && "Expected register VA assignment"); Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); RegInfo.addLiveIn(VA.getLocReg(), LoVReg); SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); SDValue Hi; - if (VA.getLocReg() == RISCV::X17) { + if (HiVA.isMemLoc()) { // Second half of f64 is passed on the stack. - int FI = MFI.CreateFixedObject(4, 0, /*IsImmutable=*/true); + int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(), + /*IsImmutable=*/true); SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, MachinePointerInfo::getFixedStack(MF, FI)); } else { // Second half of f64 is passed in another GPR. Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); - RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg); + RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg); Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); } return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); @@ -15346,6 +17657,8 @@ SDValue RISCVTargetLowering::LowerFormalArguments( report_fatal_error("Unsupported calling convention"); case CallingConv::C: case CallingConv::Fast: + case CallingConv::SPIR_KERNEL: + case CallingConv::GRAAL: break; case CallingConv::GHC: if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx()) @@ -15384,15 +17697,16 @@ SDValue RISCVTargetLowering::LowerFormalArguments( CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV); - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) { CCValAssign &VA = ArgLocs[i]; SDValue ArgValue; // Passing f64 on RV32D with a soft float ABI must be handled as a special // case. - if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) - ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL); - else if (VA.isRegLoc()) - ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[i], *this); + if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { + assert(VA.needsCustom()); + ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL); + } else if (VA.isRegLoc()) + ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this); else ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); @@ -15404,12 +17718,12 @@ SDValue RISCVTargetLowering::LowerFormalArguments( // stores are relative to that. InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, MachinePointerInfo())); - unsigned ArgIndex = Ins[i].OrigArgIndex; - unsigned ArgPartOffset = Ins[i].PartOffset; + unsigned ArgIndex = Ins[InsIdx].OrigArgIndex; + unsigned ArgPartOffset = Ins[InsIdx].PartOffset; assert(VA.getValVT().isVector() || ArgPartOffset == 0); - while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { + while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) { CCValAssign &PartVA = ArgLocs[i + 1]; - unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset; + unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset; SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL); if (PartVA.getValVT().isScalableVector()) Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset); @@ -15417,6 +17731,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments( InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, MachinePointerInfo())); ++i; + ++InsIdx; } continue; } @@ -15428,57 +17743,56 @@ SDValue RISCVTargetLowering::LowerFormalArguments( MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall(); if (IsVarArg) { - ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs); + ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(); unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); const TargetRegisterClass *RC = &RISCV::GPRRegClass; MachineFrameInfo &MFI = MF.getFrameInfo(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); - // Offset of the first variable argument from stack pointer, and size of - // the vararg save area. For now, the varargs save area is either zero or - // large enough to hold a0-a7. - int VaArgOffset, VarArgsSaveSize; + // Size of the vararg save area. For now, the varargs save area is either + // zero or large enough to hold a0-a7. + int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); + int FI; // If all registers are allocated, then all varargs must be passed on the // stack and we don't need to save any argregs. - if (ArgRegs.size() == Idx) { - VaArgOffset = CCInfo.getStackSize(); - VarArgsSaveSize = 0; + if (VarArgsSaveSize == 0) { + int VaArgOffset = CCInfo.getStackSize(); + FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); } else { - VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); - VaArgOffset = -VarArgsSaveSize; + int VaArgOffset = -VarArgsSaveSize; + FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true); + + // If saving an odd number of registers then create an extra stack slot to + // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures + // offsets to even-numbered registered remain 2*XLEN-aligned. + if (Idx % 2) { + MFI.CreateFixedObject( + XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true); + VarArgsSaveSize += XLenInBytes; + } + + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); + + // Copy the integer registers that may have been used for passing varargs + // to the vararg save area. + for (unsigned I = Idx; I < ArgRegs.size(); ++I) { + const Register Reg = RegInfo.createVirtualRegister(RC); + RegInfo.addLiveIn(ArgRegs[I], Reg); + SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); + SDValue Store = DAG.getStore( + Chain, DL, ArgValue, FIN, + MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes)); + OutChains.push_back(Store); + FIN = + DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL); + } } // Record the frame index of the first variable argument // which is a value necessary to VASTART. - int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); RVFI->setVarArgsFrameIndex(FI); - - // If saving an odd number of registers then create an extra stack slot to - // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures - // offsets to even-numbered registered remain 2*XLEN-aligned. - if (Idx % 2) { - MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true); - VarArgsSaveSize += XLenInBytes; - } - - // Copy the integer registers that may have been used for passing varargs - // to the vararg save area. - for (unsigned I = Idx; I < ArgRegs.size(); - ++I, VaArgOffset += XLenInBytes) { - const Register Reg = RegInfo.createVirtualRegister(RC); - RegInfo.addLiveIn(ArgRegs[I], Reg); - SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); - FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); - SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); - SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, - MachinePointerInfo::getFixedStack(MF, FI)); - cast<StoreSDNode>(Store.getNode()) - ->getMemOperand() - ->setValue((Value *)nullptr); - OutChains.push_back(Store); - } RVFI->setVarArgsSaveSize(VarArgsSaveSize); } @@ -15632,15 +17946,16 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; SmallVector<SDValue, 8> MemOpChains; SDValue StackPtr; - for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { + for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e; + ++i, ++OutIdx) { CCValAssign &VA = ArgLocs[i]; - SDValue ArgValue = OutVals[i]; - ISD::ArgFlagsTy Flags = Outs[i].Flags; + SDValue ArgValue = OutVals[OutIdx]; + ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags; // Handle passing f64 on RV32D with a soft float ABI as a special case. - bool IsF64OnRV32DSoftABI = - VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64; - if (IsF64OnRV32DSoftABI && VA.isRegLoc()) { + if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { + assert(VA.isRegLoc() && "Expected register VA assignment"); + assert(VA.needsCustom()); SDValue SplitF64 = DAG.getNode( RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); SDValue Lo = SplitF64.getValue(0); @@ -15649,32 +17964,33 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, Register RegLo = VA.getLocReg(); RegsToPass.push_back(std::make_pair(RegLo, Lo)); - if (RegLo == RISCV::X17) { + // Get the CCValAssign for the Hi part. + CCValAssign &HiVA = ArgLocs[++i]; + + if (HiVA.isMemLoc()) { // Second half of f64 is passed on the stack. - // Work out the address of the stack slot. if (!StackPtr.getNode()) StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); + SDValue Address = + DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, + DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL)); // Emit the store. MemOpChains.push_back( - DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo())); + DAG.getStore(Chain, DL, Hi, Address, MachinePointerInfo())); } else { // Second half of f64 is passed in another GPR. - assert(RegLo < RISCV::X31 && "Invalid register pair"); - Register RegHigh = RegLo + 1; + Register RegHigh = HiVA.getLocReg(); RegsToPass.push_back(std::make_pair(RegHigh, Hi)); } continue; } - // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way - // as any other MemLoc. - // Promote the value if needed. // For now, only handle fully promoted and indirect arguments. if (VA.getLocInfo() == CCValAssign::Indirect) { // Store the argument in a stack slot and pass its address. Align StackAlign = - std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG), + std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG), getPrefTypeAlign(ArgValue.getValueType(), DAG)); TypeSize StoredSize = ArgValue.getValueType().getStoreSize(); // If the original argument was split (e.g. i128), we need @@ -15682,16 +17998,16 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, // Vectors may be partly split to registers and partly to the stack, in // which case the base address is partly offset and subsequent stores are // relative to that. - unsigned ArgIndex = Outs[i].OrigArgIndex; - unsigned ArgPartOffset = Outs[i].PartOffset; + unsigned ArgIndex = Outs[OutIdx].OrigArgIndex; + unsigned ArgPartOffset = Outs[OutIdx].PartOffset; assert(VA.getValVT().isVector() || ArgPartOffset == 0); // Calculate the total size to store. We don't have access to what we're // actually storing other than performing the loop and collecting the // info. SmallVector<std::pair<SDValue, SDValue>> Parts; - while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { - SDValue PartValue = OutVals[i + 1]; - unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset; + while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) { + SDValue PartValue = OutVals[OutIdx + 1]; + unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset; SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL); EVT PartVT = PartValue.getValueType(); if (PartVT.isScalableVector()) @@ -15700,6 +18016,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG)); Parts.push_back(std::make_pair(PartValue, Offset)); ++i; + ++OutIdx; } SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign); int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); @@ -15841,7 +18158,8 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, RISCV::CC_RISCV); // Copy all of the result registers out of their specified physreg. - for (auto &VA : RVLocs) { + for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { + auto &VA = RVLocs[i]; // Copy the value out SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); @@ -15850,9 +18168,9 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, Glue = RetValue.getValue(2); if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { - assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment"); - SDValue RetValue2 = - DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue); + assert(VA.needsCustom()); + SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(), + MVT::i32, Glue); Chain = RetValue2.getValue(1); Glue = RetValue2.getValue(2); RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, @@ -15915,21 +18233,21 @@ RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, SmallVector<SDValue, 4> RetOps(1, Chain); // Copy the result values into the output registers. - for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { - SDValue Val = OutVals[i]; + for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) { + SDValue Val = OutVals[OutIdx]; CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { // Handle returning f64 on RV32D with a soft float ABI. assert(VA.isRegLoc() && "Expected return via registers"); + assert(VA.needsCustom()); SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), Val); SDValue Lo = SplitF64.getValue(0); SDValue Hi = SplitF64.getValue(1); Register RegLo = VA.getLocReg(); - assert(RegLo < RISCV::X31 && "Invalid register pair"); - Register RegHi = RegLo + 1; + Register RegHi = RVLocs[++i].getLocReg(); if (STI.isRegisterReservedByUser(RegLo) || STI.isRegisterReservedByUser(RegHi)) @@ -16067,10 +18385,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(ADD_LO) NODE_NAME_CASE(HI) NODE_NAME_CASE(LLA) - NODE_NAME_CASE(LGA) NODE_NAME_CASE(ADD_TPREL) - NODE_NAME_CASE(LA_TLS_IE) - NODE_NAME_CASE(LA_TLS_GD) NODE_NAME_CASE(MULHSU) NODE_NAME_CASE(SLLW) NODE_NAME_CASE(SRAW) @@ -16097,7 +18412,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(FP_ROUND_BF16) NODE_NAME_CASE(FP_EXTEND_BF16) NODE_NAME_CASE(FROUND) - NODE_NAME_CASE(FPCLASS) + NODE_NAME_CASE(FCLASS) NODE_NAME_CASE(FMAX) NODE_NAME_CASE(FMIN) NODE_NAME_CASE(READ_CYCLE_WIDE) @@ -16159,6 +18474,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(SREM_VL) NODE_NAME_CASE(SRA_VL) NODE_NAME_CASE(SRL_VL) + NODE_NAME_CASE(ROTL_VL) + NODE_NAME_CASE(ROTR_VL) NODE_NAME_CASE(SUB_VL) NODE_NAME_CASE(UDIV_VL) NODE_NAME_CASE(UREM_VL) @@ -16193,8 +18510,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(CTLZ_VL) NODE_NAME_CASE(CTTZ_VL) NODE_NAME_CASE(CTPOP_VL) - NODE_NAME_CASE(FMINNUM_VL) - NODE_NAME_CASE(FMAXNUM_VL) + NODE_NAME_CASE(VFMIN_VL) + NODE_NAME_CASE(VFMAX_VL) NODE_NAME_CASE(MULHS_VL) NODE_NAME_CASE(MULHU_VL) NODE_NAME_CASE(VFCVT_RTZ_X_F_VL) @@ -16241,6 +18558,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(VWADDU_W_VL) NODE_NAME_CASE(VWSUB_W_VL) NODE_NAME_CASE(VWSUBU_W_VL) + NODE_NAME_CASE(VWSLL_VL) NODE_NAME_CASE(VFWMUL_VL) NODE_NAME_CASE(VFWADD_VL) NODE_NAME_CASE(VFWSUB_VL) @@ -16314,6 +18632,12 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, // TODO: Support fixed vectors up to XLen for P extension? if (VT.isVector()) break; + if (VT == MVT::f16 && Subtarget.hasStdExtZhinxOrZhinxmin()) + return std::make_pair(0U, &RISCV::GPRF16RegClass); + if (VT == MVT::f32 && Subtarget.hasStdExtZfinx()) + return std::make_pair(0U, &RISCV::GPRF32RegClass); + if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit()) + return std::make_pair(0U, &RISCV::GPRPF64RegClass); return std::make_pair(0U, &RISCV::GPRNoX0RegClass); case 'f': if (Subtarget.hasStdExtZfhOrZfhmin() && VT == MVT::f16) @@ -16501,13 +18825,13 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, return Res; } -unsigned +InlineAsm::ConstraintCode RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const { // Currently only support length 1 constraints. if (ConstraintCode.size() == 1) { switch (ConstraintCode[0]) { case 'A': - return InlineAsm::Constraint_A; + return InlineAsm::ConstraintCode::A; default: break; } @@ -16517,10 +18841,10 @@ RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const { } void RISCVTargetLowering::LowerAsmOperandForConstraint( - SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, + SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops, SelectionDAG &DAG) const { // Currently only support length 1 constraints. - if (Constraint.length() == 1) { + if (Constraint.size() == 1) { switch (Constraint[0]) { case 'I': // Validate & create a 12-bit signed immediate operand. @@ -16581,8 +18905,11 @@ Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder, Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const { - if (Subtarget.hasStdExtZtso()) + if (Subtarget.hasStdExtZtso()) { + if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) + return Builder.CreateFence(Ord); return nullptr; + } if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord)) return Builder.CreateFence(AtomicOrdering::Acquire); @@ -16666,6 +18993,22 @@ getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) { Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { + // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace + // the atomic instruction with an AtomicRMWInst::And/Or with appropriate + // mask, as this produces better code than the LR/SC loop emitted by + // int_riscv_masked_atomicrmw_xchg. + if (AI->getOperation() == AtomicRMWInst::Xchg && + isa<ConstantInt>(AI->getValOperand())) { + ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand()); + if (CVal->isZero()) + return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr, + Builder.CreateNot(Mask, "Inv_Mask"), + AI->getAlign(), Ord); + if (CVal->isMinusOne()) + return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask, + AI->getAlign(), Ord); + } + unsigned XLen = Subtarget.getXLen(); Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering())); @@ -16741,9 +19084,13 @@ Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( return Result; } -bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT IndexVT, +bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const { - return false; + // We have indexed loads for all legal index types. Indices are always + // zero extended + return Extend.getOpcode() == ISD::ZERO_EXTEND && + isTypeLegal(Extend.getValueType()) && + isTypeLegal(Extend.getOperand(0).getValueType()); } bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT, @@ -16999,8 +19346,8 @@ bool RISCVTargetLowering::allowsMisalignedMemoryAccesses( unsigned *Fast) const { if (!VT.isVector()) { if (Fast) - *Fast = Subtarget.enableUnalignedScalarMem(); - return Subtarget.enableUnalignedScalarMem(); + *Fast = Subtarget.hasFastUnalignedAccess(); + return Subtarget.hasFastUnalignedAccess(); } // All vector implementations must support element alignment @@ -17016,8 +19363,51 @@ bool RISCVTargetLowering::allowsMisalignedMemoryAccesses( // misaligned accesses. TODO: Work through the codegen implications of // allowing such accesses to be formed, and considered fast. if (Fast) - *Fast = Subtarget.enableUnalignedVectorMem(); - return Subtarget.enableUnalignedVectorMem(); + *Fast = Subtarget.hasFastUnalignedAccess(); + return Subtarget.hasFastUnalignedAccess(); +} + + +EVT RISCVTargetLowering::getOptimalMemOpType(const MemOp &Op, + const AttributeList &FuncAttributes) const { + if (!Subtarget.hasVInstructions()) + return MVT::Other; + + if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) + return MVT::Other; + + // We use LMUL1 memory operations here for a non-obvious reason. Our caller + // has an expansion threshold, and we want the number of hardware memory + // operations to correspond roughly to that threshold. LMUL>1 operations + // are typically expanded linearly internally, and thus correspond to more + // than one actual memory operation. Note that store merging and load + // combining will typically form larger LMUL operations from the LMUL1 + // operations emitted here, and that's okay because combining isn't + // introducing new memory operations; it's just merging existing ones. + const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8; + if (Op.size() < MinVLenInBytes) + // TODO: Figure out short memops. For the moment, do the default thing + // which ends up using scalar sequences. + return MVT::Other; + + // Prefer i8 for non-zero memset as it allows us to avoid materializing + // a large scalar constant and instead use vmv.v.x/i to do the + // broadcast. For everything else, prefer ELenVT to minimize VL and thus + // maximize the chance we can encode the size in the vsetvli. + MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen()); + MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT; + + // Do we have sufficient alignment for our preferred VT? If not, revert + // to largest size allowed by our alignment criteria. + if (PreferredVT != MVT::i8 && !Subtarget.hasFastUnalignedAccess()) { + Align RequiredAlign(PreferredVT.getStoreSize()); + if (Op.isFixedDstAlign()) + RequiredAlign = std::min(RequiredAlign, Op.getDstAlign()); + if (Op.isMemcpy()) + RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign()); + PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8); + } + return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize()); } bool RISCVTargetLowering::splitValueIntoRegisterParts( @@ -17142,10 +19532,8 @@ static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) { Module *M = IRB.GetInsertBlock()->getParent()->getParent(); Function *ThreadPointerFunc = Intrinsic::getDeclaration(M, Intrinsic::thread_pointer); - return IRB.CreatePointerCast( - IRB.CreateConstGEP1_32(IRB.getInt8Ty(), - IRB.CreateCall(ThreadPointerFunc), Offset), - IRB.getInt8PtrTy()->getPointerTo(0)); + return IRB.CreateConstGEP1_32(IRB.getInt8Ty(), + IRB.CreateCall(ThreadPointerFunc), Offset); } Value *RISCVTargetLowering::getIRStackGuard(IRBuilderBase &IRB) const { @@ -17203,7 +19591,7 @@ bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType, if (!isLegalElementTypeForRVV(ScalarType)) return false; - if (!Subtarget.enableUnalignedVectorMem() && + if (!Subtarget.hasFastUnalignedAccess() && Alignment < ScalarType.getStoreSize()) return false; @@ -17503,6 +19891,72 @@ bool RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable( return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY); } +bool RISCVTargetLowering::isCtpopFast(EVT VT) const { + if (VT.isScalableVector()) + return isTypeLegal(VT) && Subtarget.hasStdExtZvbb(); + if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb()) + return true; + return Subtarget.hasStdExtZbb() && + (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector()); +} + +unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT, + ISD::CondCode Cond) const { + return isCtpopFast(VT) ? 0 : 1; +} + +bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const { + // At the moment, the only scalable instruction GISel knows how to lower is + // ret with scalable argument. + + if (Inst.getType()->isScalableTy()) + return true; + + for (unsigned i = 0; i < Inst.getNumOperands(); ++i) + if (Inst.getOperand(i)->getType()->isScalableTy() && + !isa<ReturnInst>(&Inst)) + return true; + + if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) { + if (AI->getAllocatedType()->isScalableTy()) + return true; + } + + return false; +} + +SDValue +RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, + SelectionDAG &DAG, + SmallVectorImpl<SDNode *> &Created) const { + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); + if (isIntDivCheap(N->getValueType(0), Attr)) + return SDValue(N, 0); // Lower SDIV as SDIV + + // Only perform this transform if short forward branch opt is supported. + if (!Subtarget.hasShortForwardBranchOpt()) + return SDValue(); + EVT VT = N->getValueType(0); + if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit()))) + return SDValue(); + + // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw. + if (Divisor.sgt(2048) || Divisor.slt(-2048)) + return SDValue(); + return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created); +} + +bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest( + EVT VT, const APInt &AndMask) const { + if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) + return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024); + return TargetLowering::shouldFoldSelectWithSingleBitTest(VT, AndMask); +} + +unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const { + return Subtarget.getMinimumJumpTableEntries(); +} + namespace llvm::RISCVVIntrinsicsTable { #define GET_RISCVVIntrinsicsTable_IMPL diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h index 164ded95a1b5..41a2dc5771c8 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -22,9 +22,12 @@ #include <optional> namespace llvm { +class InstructionCost; class RISCVSubtarget; struct RISCVRegisterInfo; + namespace RISCVISD { +// clang-format off enum NodeType : unsigned { FIRST_NUMBER = ISD::BUILTIN_OP_END, RET_GLUE, @@ -54,9 +57,6 @@ enum NodeType : unsigned { // Selected as PseudoAddTPRel. Used to emit a TP-relative relocation. ADD_TPREL, - // Load address. - LA_TLS_GD, - // Multiply high for signedxunsigned. MULHSU, // RV64I shifts, directly matching the semantics of the named RISC-V @@ -121,7 +121,7 @@ enum NodeType : unsigned { // inserter. FROUND, - FPCLASS, + FCLASS, // Floating point fmax and fmin matching the RISC-V instruction semantics. FMAX, FMIN, @@ -143,10 +143,11 @@ enum NodeType : unsigned { SM3P0, SM3P1, // Vector Extension + FIRST_VL_VECTOR_OP, // VMV_V_V_VL matches the semantics of vmv.v.v but includes an extra operand // for the VL value to be used for the operation. The first operand is // passthru operand. - VMV_V_V_VL, + VMV_V_V_VL = FIRST_VL_VECTOR_OP, // VMV_V_X_VL matches the semantics of vmv.v.x but includes an extra operand // for the VL value to be used for the operation. The first operand is // passthru operand. @@ -166,15 +167,13 @@ enum NodeType : unsigned { // expanded late to two scalar stores and a stride 0 vector load. // The first operand is passthru operand. SPLAT_VECTOR_SPLIT_I64_VL, - // Read VLENB CSR - READ_VLENB, // Truncates a RVV integer vector by one power-of-two. Carries both an extra // mask and VL operand. TRUNCATE_VECTOR_VL, // Matches the semantics of vslideup/vslidedown. The first operand is the - // pass-thru operand, the second is the source vector, the third is the - // XLenVT index (either constant or non-constant), the fourth is the mask - // and the fifth the VL. + // pass-thru operand, the second is the source vector, the third is the XLenVT + // index (either constant or non-constant), the fourth is the mask, the fifth + // is the VL and the sixth is the policy. VSLIDEUP_VL, VSLIDEDOWN_VL, // Matches the semantics of vslide1up/slide1down. The first operand is @@ -232,6 +231,8 @@ enum NodeType : unsigned { SREM_VL, SRA_VL, SRL_VL, + ROTL_VL, + ROTR_VL, SUB_VL, UDIV_VL, UREM_VL, @@ -258,8 +259,8 @@ enum NodeType : unsigned { FSUB_VL, FMUL_VL, FDIV_VL, - FMINNUM_VL, - FMAXNUM_VL, + VFMIN_VL, + VFMAX_VL, // Vector unary ops with a mask as a second operand and VL as a third operand. FNEG_VL, @@ -307,6 +308,7 @@ enum NodeType : unsigned { VWADDU_W_VL, VWSUB_W_VL, VWSUBU_W_VL, + VWSLL_VL, VFWMUL_VL, VFWADD_VL, @@ -360,6 +362,10 @@ enum NodeType : unsigned { // vfirst.m with additional mask and VL operands. VFIRST_VL, + LAST_VL_VECTOR_OP = VFIRST_VL, + + // Read VLENB CSR + READ_VLENB, // Reads value of CSR. // The first operand is a chain pointer. The second specifies address of the // required CSR. Two results are produced, the read value and the new chain @@ -405,22 +411,19 @@ enum NodeType : unsigned { STRICT_FSETCC_VL, STRICT_FSETCCS_VL, STRICT_VFROUND_NOEXCEPT_VL, + LAST_RISCV_STRICTFP_OPCODE = STRICT_VFROUND_NOEXCEPT_VL, // WARNING: Do not add anything in the end unless you want the node to // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all // opcodes will be thought as target memory ops! - // Represents an AUIPC+L[WD] pair. Selected to PseudoLGA. - LGA = ISD::FIRST_TARGET_MEMORY_OPCODE, - // Load initial exec thread-local address. - LA_TLS_IE, - - TH_LWD, + TH_LWD = ISD::FIRST_TARGET_MEMORY_OPCODE, TH_LWUD, TH_LDD, TH_SWD, TH_SDD, }; +// clang-format on } // namespace RISCVISD class RISCVTargetLowering : public TargetLowering { @@ -464,7 +467,7 @@ public: SmallVectorImpl<Use *> &Ops) const override; bool shouldScalarizeBinop(SDValue VecOp) const override; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; - int getLegalZfaFPImm(const APFloat &Imm, EVT VT) const; + std::pair<int, bool> getLegalZfaFPImm(const APFloat &Imm, EVT VT) const; bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, @@ -487,6 +490,12 @@ public: CallingConv::ID CC, EVT VT) const override; + unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, + CallingConv::ID CC, EVT VT, + EVT &IntermediateVT, + unsigned &NumIntermediates, + MVT &RegisterVT) const override; + bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override; @@ -514,6 +523,13 @@ public: shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override; + /// Return the cost of LMUL for linear operations. + InstructionCost getLMULCost(MVT VT) const; + + InstructionCost getVRGatherVVCost(MVT VT) const; + InstructionCost getVRGatherVICost(MVT VT) const; + InstructionCost getVSlideCost(MVT VT) const; + // Provide custom lowering hooks for some operations. SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, @@ -552,13 +568,14 @@ public: ConstraintType getConstraintType(StringRef Constraint) const override; - unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override; + InlineAsm::ConstraintCode + getInlineAsmMemConstraint(StringRef ConstraintCode) const override; std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override; - void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, + void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops, SelectionDAG &DAG) const override; @@ -592,6 +609,10 @@ public: } bool convertSelectOfConstantsToMath(EVT VT) const override { return true; } + bool isCtpopFast(EVT VT) const override; + + unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override; + bool preferZeroCompareBranch() const override { return true; } bool shouldInsertFencesForAtomic(const Instruction *I) const override { @@ -698,6 +719,9 @@ public: MachineMemOperand::Flags Flags = MachineMemOperand::MONone, unsigned *Fast = nullptr) const override; + EVT getOptimalMemOpType(const MemOp &Op, + const AttributeList &FuncAttributes) const override; + bool splitValueIntoRegisterParts( SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) @@ -720,7 +744,13 @@ public: // The following equations have been reordered to prevent loss of precision // when calculating fractional LMUL. return ((VectorBits / EltSize) * MinSize) / RISCV::RVVBitsPerBlock; - }; + } + + // Return inclusive (low, high) bounds on the value of VLMAX for the + // given scalable container type given known bounds on VLEN. + static std::pair<unsigned, unsigned> + computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget); + static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul); static unsigned getSubregIndexByMVT(MVT VT, unsigned Index); static unsigned getRegClassIDForVecVT(MVT VT); @@ -730,7 +760,7 @@ public: const RISCVRegisterInfo *TRI); MVT getContainerForFixedLengthVector(MVT VT) const; - bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const override; + bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override; bool isLegalElementTypeForRVV(EVT ScalarTy) const; @@ -777,6 +807,8 @@ public: unsigned getMaxSupportedInterleaveFactor() const override { return 8; } + bool fallBackToDAGISel(const Instruction &Inst) const override; + bool lowerInterleavedLoad(LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles, ArrayRef<unsigned> Indices, @@ -874,14 +906,12 @@ private: SelectionDAG &DAG) const; SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG) const; SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerVPOp(SDValue Op, SelectionDAG &DAG, unsigned RISCVISDOpc, - bool HasMergeOp = false) const; - SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, - unsigned VecOpc) const; + SDValue lowerVPOp(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPExtMaskOp(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPSetCCMaskOp(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG, - unsigned RISCVISDOpc) const; + SDValue lowerVPReverseExperimental(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPStridedLoad(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPStridedStore(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFixedLengthVectorExtendToRVV(SDValue Op, SelectionDAG &DAG, @@ -935,6 +965,14 @@ private: /// For available scheduling models FDIV + two independent FMULs are much /// faster than two FDIVs. unsigned combineRepeatedFPDivisors() const override; + + SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, + SmallVectorImpl<SDNode *> &Created) const override; + + bool shouldFoldSelectWithSingleBitTest(EVT VT, + const APInt &AndMask) const override; + + unsigned getMinimumJumpTableEntries() const override; }; namespace RISCV { @@ -954,6 +992,9 @@ bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State); + +ArrayRef<MCPhysReg> getArgGPRs(); + } // end namespace RISCV namespace RISCVVIntrinsicsTable { diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp index 4b26c27bb4f8..b807abcc5681 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp @@ -8,8 +8,9 @@ // This file implements the machine function pass to insert read/write of CSR-s // of the RISC-V instructions. // -// Currently the pass implements naive insertion of a write to vxrm before an -// RVV fixed-point instruction. +// Currently the pass implements: +// -Writing and saving frm before an RVV floating-point instruction with a +// static rounding mode and restores the value after. // //===----------------------------------------------------------------------===// @@ -30,9 +31,7 @@ class RISCVInsertReadWriteCSR : public MachineFunctionPass { public: static char ID; - RISCVInsertReadWriteCSR() : MachineFunctionPass(ID) { - initializeRISCVInsertReadWriteCSRPass(*PassRegistry::getPassRegistry()); - } + RISCVInsertReadWriteCSR() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; @@ -56,60 +55,36 @@ char RISCVInsertReadWriteCSR::ID = 0; INITIALIZE_PASS(RISCVInsertReadWriteCSR, DEBUG_TYPE, RISCV_INSERT_READ_WRITE_CSR_NAME, false, false) -// Returns the index to the rounding mode immediate value if any, otherwise the -// function will return None. -static std::optional<unsigned> getRoundModeIdx(const MachineInstr &MI) { - uint64_t TSFlags = MI.getDesc().TSFlags; - if (!RISCVII::hasRoundModeOp(TSFlags)) - return std::nullopt; - - // The operand order - // ------------------------------------- - // | n-1 (if any) | n-2 | n-3 | n-4 | - // | policy | sew | vl | rm | - // ------------------------------------- - return MI.getNumExplicitOperands() - RISCVII::hasVecPolicyOp(TSFlags) - 3; -} - -// This function inserts a write to vxrm when encountering an RVV fixed-point -// instruction. +// This function also swaps frm and restores it when encountering an RVV +// floating point instruction with a static rounding mode. bool RISCVInsertReadWriteCSR::emitWriteRoundingMode(MachineBasicBlock &MBB) { bool Changed = false; for (MachineInstr &MI : MBB) { - if (auto RoundModeIdx = getRoundModeIdx(MI)) { - if (RISCVII::usesVXRM(MI.getDesc().TSFlags)) { - unsigned VXRMImm = MI.getOperand(*RoundModeIdx).getImm(); - - Changed = true; - - BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::WriteVXRMImm)) - .addImm(VXRMImm); - MI.addOperand(MachineOperand::CreateReg(RISCV::VXRM, /*IsDef*/ false, - /*IsImp*/ true)); - } else { // FRM - unsigned FRMImm = MI.getOperand(*RoundModeIdx).getImm(); - - // The value is a hint to this pass to not alter the frm value. - if (FRMImm == RISCVFPRndMode::DYN) - continue; - - Changed = true; - - // Save - MachineRegisterInfo *MRI = &MBB.getParent()->getRegInfo(); - Register SavedFRM = MRI->createVirtualRegister(&RISCV::GPRRegClass); - BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::SwapFRMImm), - SavedFRM) - .addImm(FRMImm); - MI.addOperand(MachineOperand::CreateReg(RISCV::FRM, /*IsDef*/ false, - /*IsImp*/ true)); - // Restore - MachineInstrBuilder MIB = - BuildMI(*MBB.getParent(), {}, TII->get(RISCV::WriteFRM)) - .addReg(SavedFRM); - MBB.insertAfter(MI, MIB); - } - } + int FRMIdx = RISCVII::getFRMOpNum(MI.getDesc()); + if (FRMIdx < 0) + continue; + + unsigned FRMImm = MI.getOperand(FRMIdx).getImm(); + + // The value is a hint to this pass to not alter the frm value. + if (FRMImm == RISCVFPRndMode::DYN) + continue; + + Changed = true; + + // Save + MachineRegisterInfo *MRI = &MBB.getParent()->getRegInfo(); + Register SavedFRM = MRI->createVirtualRegister(&RISCV::GPRRegClass); + BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::SwapFRMImm), + SavedFRM) + .addImm(FRMImm); + MI.addOperand(MachineOperand::CreateReg(RISCV::FRM, /*IsDef*/ false, + /*IsImp*/ true)); + // Restore + MachineInstrBuilder MIB = + BuildMI(*MBB.getParent(), {}, TII->get(RISCV::WriteFRM)) + .addReg(SavedFRM); + MBB.insertAfter(MI, MIB); } return Changed; } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index f1ebe63cfa14..3400b24e0abb 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -67,16 +67,28 @@ static bool isVLPreservingConfig(const MachineInstr &MI) { return RISCV::X0 == MI.getOperand(0).getReg(); } -static uint16_t getRVVMCOpcode(uint16_t RVVPseudoOpcode) { - const RISCVVPseudosTable::PseudoInfo *RVV = - RISCVVPseudosTable::getPseudoInfo(RVVPseudoOpcode); - if (!RVV) - return 0; - return RVV->BaseInstr; +static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr &MI) { + switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { + default: + return false; + case RISCV::VFMV_S_F: + case RISCV::VFMV_V_F: + return true; + } +} + +static bool isScalarExtractInstr(const MachineInstr &MI) { + switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { + default: + return false; + case RISCV::VMV_X_S: + case RISCV::VFMV_F_S: + return true; + } } -static bool isScalarMoveInstr(const MachineInstr &MI) { - switch (getRVVMCOpcode(MI.getOpcode())) { +static bool isScalarInsertInstr(const MachineInstr &MI) { + switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { default: return false; case RISCV::VMV_S_X: @@ -86,7 +98,7 @@ static bool isScalarMoveInstr(const MachineInstr &MI) { } static bool isScalarSplatInstr(const MachineInstr &MI) { - switch (getRVVMCOpcode(MI.getOpcode())) { + switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { default: return false; case RISCV::VMV_V_I: @@ -97,7 +109,7 @@ static bool isScalarSplatInstr(const MachineInstr &MI) { } static bool isVSlideInstr(const MachineInstr &MI) { - switch (getRVVMCOpcode(MI.getOpcode())) { + switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { default: return false; case RISCV::VSLIDEDOWN_VX: @@ -111,7 +123,7 @@ static bool isVSlideInstr(const MachineInstr &MI) { /// Get the EEW for a load or store instruction. Return std::nullopt if MI is /// not a load or store which ignores SEW. static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) { - switch (getRVVMCOpcode(MI.getOpcode())) { + switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { default: return std::nullopt; case RISCV::VLE8_V: @@ -137,6 +149,13 @@ static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) { } } +static bool isNonZeroLoadImmediate(MachineInstr &MI) { + return MI.getOpcode() == RISCV::ADDI && + MI.getOperand(1).isReg() && MI.getOperand(2).isImm() && + MI.getOperand(1).getReg() == RISCV::X0 && + MI.getOperand(2).getImm() != 0; +} + /// Return true if this is an operation on mask registers. Note that /// this includes both arithmetic/logical ops and load/store (vlm/vsm). static bool isMaskRegOp(const MachineInstr &MI) { @@ -160,9 +179,13 @@ static bool hasUndefinedMergeOp(const MachineInstr &MI, // lanes are undefined. return true; - // If the tied operand is an IMPLICIT_DEF (or a REG_SEQUENCE whose operands - // are solely IMPLICIT_DEFS), the pass through lanes are undefined. + // If the tied operand is NoReg, an IMPLICIT_DEF, or a REG_SEQEUENCE whose + // operands are solely IMPLICIT_DEFS, then the pass through lanes are + // undefined. const MachineOperand &UseMO = MI.getOperand(UseOpIdx); + if (UseMO.getReg() == RISCV::NoRegister) + return true; + if (MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg())) { if (UseMI->isImplicitDef()) return true; @@ -188,10 +211,14 @@ struct DemandedFields { bool VLZeroness = false; // What properties of SEW we need to preserve. enum : uint8_t { - SEWEqual = 2, // The exact value of SEW needs to be preserved. - SEWGreaterThanOrEqual = 1, // SEW can be changed as long as it's greater + SEWEqual = 3, // The exact value of SEW needs to be preserved. + SEWGreaterThanOrEqual = 2, // SEW can be changed as long as it's greater // than or equal to the original value. - SEWNone = 0 // We don't need to preserve SEW at all. + SEWGreaterThanOrEqualAndLessThan64 = + 1, // SEW can be changed as long as it's greater + // than or equal to the original value, but must be less + // than 64. + SEWNone = 0 // We don't need to preserve SEW at all. } SEW = SEWNone; bool LMUL = false; bool SEWLMULRatio = false; @@ -243,6 +270,9 @@ struct DemandedFields { case SEWGreaterThanOrEqual: OS << "SEWGreaterThanOrEqual"; break; + case SEWGreaterThanOrEqualAndLessThan64: + OS << "SEWGreaterThanOrEqualAndLessThan64"; + break; case SEWNone: OS << "SEWNone"; break; @@ -270,13 +300,23 @@ inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) { /// of instructions) which use only the Used subfields and properties. static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType, const DemandedFields &Used) { - if (Used.SEW == DemandedFields::SEWEqual && - RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType)) - return false; - - if (Used.SEW == DemandedFields::SEWGreaterThanOrEqual && - RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType)) - return false; + switch (Used.SEW) { + case DemandedFields::SEWNone: + break; + case DemandedFields::SEWEqual: + if (RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType)) + return false; + break; + case DemandedFields::SEWGreaterThanOrEqual: + if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType)) + return false; + break; + case DemandedFields::SEWGreaterThanOrEqualAndLessThan64: + if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType) || + RISCVVType::getSEW(NewVType) >= 64) + return false; + break; + } if (Used.LMUL && RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType)) @@ -302,7 +342,8 @@ static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType, /// Return the fields and properties demanded by the provided instruction. DemandedFields getDemanded(const MachineInstr &MI, - const MachineRegisterInfo *MRI) { + const MachineRegisterInfo *MRI, + const RISCVSubtarget *ST) { // Warning: This function has to work on both the lowered (i.e. post // emitVSETVLIs) and pre-lowering forms. The main implication of this is // that it can't use the value of a SEW, VL, or Policy operand as they might @@ -354,7 +395,7 @@ DemandedFields getDemanded(const MachineInstr &MI, } // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0. - if (isScalarMoveInstr(MI)) { + if (isScalarInsertInstr(MI)) { Res.LMUL = false; Res.SEWLMULRatio = false; Res.VLAny = false; @@ -365,11 +406,23 @@ DemandedFields getDemanded(const MachineInstr &MI, // tail lanes to either be the original value or -1. We are writing // unknown bits to the lanes here. if (hasUndefinedMergeOp(MI, *MRI)) { - Res.SEW = DemandedFields::SEWGreaterThanOrEqual; + if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64()) + Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64; + else + Res.SEW = DemandedFields::SEWGreaterThanOrEqual; Res.TailPolicy = false; } } + // vmv.x.s, and vmv.f.s are unconditional and ignore everything except SEW. + if (isScalarExtractInstr(MI)) { + assert(!RISCVII::hasVLOp(TSFlags)); + Res.LMUL = false; + Res.SEWLMULRatio = false; + Res.TailPolicy = false; + Res.MaskPolicy = false; + } + return Res; } @@ -431,8 +484,22 @@ public: return AVLImm; } + void setAVL(VSETVLIInfo Info) { + assert(Info.isValid()); + if (Info.isUnknown()) + setUnknown(); + else if (Info.hasAVLReg()) + setAVLReg(Info.getAVLReg()); + else { + assert(Info.hasAVLImm()); + setAVLImm(Info.getAVLImm()); + } + } + unsigned getSEW() const { return SEW; } RISCVII::VLMUL getVLMUL() const { return VLMul; } + bool getTailAgnostic() const { return TailAgnostic; } + bool getMaskAgnostic() const { return MaskAgnostic; } bool hasNonZeroAVL(const MachineRegisterInfo &MRI) const { if (hasAVLImm()) @@ -441,10 +508,7 @@ public: if (getAVLReg() == RISCV::X0) return true; if (MachineInstr *MI = MRI.getVRegDef(getAVLReg()); - MI && MI->getOpcode() == RISCV::ADDI && - MI->getOperand(1).isReg() && MI->getOperand(2).isImm() && - MI->getOperand(1).getReg() == RISCV::X0 && - MI->getOperand(2).getImm() != 0) + MI && isNonZeroLoadImmediate(*MI)) return true; return false; } @@ -485,6 +549,8 @@ public: MaskAgnostic = MA; } + void setVLMul(RISCVII::VLMUL VLMul) { this->VLMul = VLMul; } + unsigned encodeVTYPE() const { assert(isValid() && !isUnknown() && !SEWLMULRatioOnly && "Can't encode VTYPE for uninitialized or unknown"); @@ -545,12 +611,6 @@ public: if (SEWLMULRatioOnly) return false; - // If the instruction doesn't need an AVLReg and the SEW matches, consider - // it compatible. - if (Require.hasAVLReg() && Require.AVLReg == RISCV::NoRegister) - if (SEW == Require.SEW) - return true; - if (Used.VLAny && !hasSameAVL(Require)) return false; @@ -661,10 +721,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) { #endif struct BlockData { - // The VSETVLIInfo that represents the net changes to the VL/VTYPE registers - // made by this block. Calculated in Phase 1. - VSETVLIInfo Change; - // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this // block. Calculated in Phase 2. VSETVLIInfo Exit; @@ -680,6 +736,7 @@ struct BlockData { }; class RISCVInsertVSETVLI : public MachineFunctionPass { + const RISCVSubtarget *ST; const TargetInstrInfo *TII; MachineRegisterInfo *MRI; @@ -689,9 +746,7 @@ class RISCVInsertVSETVLI : public MachineFunctionPass { public: static char ID; - RISCVInsertVSETVLI() : MachineFunctionPass(ID) { - initializeRISCVInsertVSETVLIPass(*PassRegistry::getPassRegistry()); - } + RISCVInsertVSETVLI() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -712,9 +767,10 @@ private: MachineBasicBlock::iterator InsertPt, DebugLoc DL, const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo); - void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI); - void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI); - bool computeVLVTYPEChanges(const MachineBasicBlock &MBB); + void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) const; + void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) const; + bool computeVLVTYPEChanges(const MachineBasicBlock &MBB, + VSETVLIInfo &Info) const; void computeIncomingVLVTYPE(const MachineBasicBlock &MBB); void emitVSETVLIs(MachineBasicBlock &MBB); void doLocalPostpass(MachineBasicBlock &MBB); @@ -729,6 +785,25 @@ char RISCVInsertVSETVLI::ID = 0; INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME, false, false) +// Return a VSETVLIInfo representing the changes made by this VSETVLI or +// VSETIVLI instruction. +static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) { + VSETVLIInfo NewInfo; + if (MI.getOpcode() == RISCV::PseudoVSETIVLI) { + NewInfo.setAVLImm(MI.getOperand(1).getImm()); + } else { + assert(MI.getOpcode() == RISCV::PseudoVSETVLI || + MI.getOpcode() == RISCV::PseudoVSETVLIX0); + Register AVLReg = MI.getOperand(1).getReg(); + assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) && + "Can't handle X0, X0 vsetvli yet"); + NewInfo.setAVLReg(AVLReg); + } + NewInfo.setVTYPE(MI.getOperand(2).getImm()); + + return NewInfo; +} + static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags, const MachineRegisterInfo *MRI) { VSETVLIInfo InstrInfo; @@ -779,6 +854,7 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags, InstrInfo.setAVLReg(VLOp.getReg()); } } else { + assert(isScalarExtractInstr(MI)); InstrInfo.setAVLReg(RISCV::NoRegister); } #ifndef NDEBUG @@ -788,6 +864,21 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags, #endif InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); + // If AVL is defined by a vsetvli with the same VLMAX, we can replace the + // AVL operand with the AVL of the defining vsetvli. We avoid general + // register AVLs to avoid extending live ranges without being sure we can + // kill the original source reg entirely. + if (InstrInfo.hasAVLReg() && InstrInfo.getAVLReg().isVirtual()) { + MachineInstr *DefMI = MRI->getVRegDef(InstrInfo.getAVLReg()); + if (DefMI && isVectorConfigInstr(*DefMI)) { + VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(*DefMI); + if (DefInstrInfo.hasSameVLMAX(InstrInfo) && + (DefInstrInfo.hasAVLImm() || DefInstrInfo.getAVLReg() == RISCV::X0)) { + InstrInfo.setAVL(DefInstrInfo); + } + } + } + return InstrInfo; } @@ -798,25 +889,6 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo); } -// Return a VSETVLIInfo representing the changes made by this VSETVLI or -// VSETIVLI instruction. -static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) { - VSETVLIInfo NewInfo; - if (MI.getOpcode() == RISCV::PseudoVSETIVLI) { - NewInfo.setAVLImm(MI.getOperand(1).getImm()); - } else { - assert(MI.getOpcode() == RISCV::PseudoVSETVLI || - MI.getOpcode() == RISCV::PseudoVSETVLIX0); - Register AVLReg = MI.getOperand(1).getReg(); - assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) && - "Can't handle X0, X0 vsetvli yet"); - NewInfo.setAVLReg(AVLReg); - } - NewInfo.setVTYPE(MI.getOperand(2).getImm()); - - return NewInfo; -} - void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, DebugLoc DL, const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) { @@ -875,10 +947,10 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, .addReg(RISCV::VL, RegState::Implicit); return; } - // Otherwise use an AVL of 0 to avoid depending on previous vl. + // Otherwise use an AVL of 1 to avoid depending on previous vl. BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI)) .addReg(RISCV::X0, RegState::Define | RegState::Dead) - .addImm(0) + .addImm(1) .addImm(Info.encodeVTYPE()); return; } @@ -916,7 +988,7 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI, if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly()) return true; - DemandedFields Used = getDemanded(MI, MRI); + DemandedFields Used = getDemanded(MI, MRI, ST); // A slidedown/slideup with an *undefined* merge op can freely clobber // elements not copied from the source vector (e.g. masked off, tail, or @@ -944,7 +1016,10 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI, Used.LMUL = false; Used.SEWLMULRatio = false; Used.VLAny = false; - Used.SEW = DemandedFields::SEWGreaterThanOrEqual; + if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64()) + Used.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64; + else + Used.SEW = DemandedFields::SEWGreaterThanOrEqual; Used.TailPolicy = false; } @@ -969,67 +1044,82 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI, return true; } -// Given an incoming state reaching MI, modifies that state so that it is minimally -// compatible with MI. The resulting state is guaranteed to be semantically legal -// for MI, but may not be the state requested by MI. -void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) { +// If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we +// maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more +// places. +static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo, + DemandedFields &Demanded) { + VSETVLIInfo Info = NewInfo; + + if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() && + !PrevInfo.isUnknown()) { + if (auto NewVLMul = RISCVVType::getSameRatioLMUL( + PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW())) + Info.setVLMul(*NewVLMul); + Demanded.LMUL = true; + } + + return Info; +} + +// Given an incoming state reaching MI, minimally modifies that state so that it +// is compatible with MI. The resulting state is guaranteed to be semantically +// legal for MI, but may not be the state requested by MI. +void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, + const MachineInstr &MI) const { uint64_t TSFlags = MI.getDesc().TSFlags; if (!RISCVII::hasSEWOp(TSFlags)) return; const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI); + assert(NewInfo.isValid() && !NewInfo.isUnknown()); if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info)) return; const VSETVLIInfo PrevInfo = Info; - Info = NewInfo; - - if (!RISCVII::hasVLOp(TSFlags)) - return; - - // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and - // VL > 0. We can discard the user requested AVL and just use the last - // one if we can prove it equally zero. This removes a vsetvli entirely - // if the types match or allows use of cheaper avl preserving variant - // if VLMAX doesn't change. If VLMAX might change, we couldn't use - // the 'vsetvli x0, x0, vtype" variant, so we avoid the transform to - // prevent extending live range of an avl register operand. + if (!Info.isValid() || Info.isUnknown()) + Info = NewInfo; + + DemandedFields Demanded = getDemanded(MI, MRI, ST); + const VSETVLIInfo IncomingInfo = adjustIncoming(PrevInfo, NewInfo, Demanded); + + // If MI only demands that VL has the same zeroness, we only need to set the + // AVL if the zeroness differs. This removes a vsetvli entirely if the types + // match or allows use of cheaper avl preserving variant if VLMAX doesn't + // change. If VLMAX might change, we couldn't use the 'vsetvli x0, x0, vtype" + // variant, so we avoid the transform to prevent extending live range of an + // avl register operand. // TODO: We can probably relax this for immediates. - if (isScalarMoveInstr(MI) && PrevInfo.isValid() && - PrevInfo.hasEquallyZeroAVL(Info, *MRI) && - Info.hasSameVLMAX(PrevInfo)) { - if (PrevInfo.hasAVLImm()) - Info.setAVLImm(PrevInfo.getAVLImm()); - else - Info.setAVLReg(PrevInfo.getAVLReg()); - return; - } - - // If AVL is defined by a vsetvli with the same VLMAX, we can - // replace the AVL operand with the AVL of the defining vsetvli. - // We avoid general register AVLs to avoid extending live ranges - // without being sure we can kill the original source reg entirely. - if (!Info.hasAVLReg() || !Info.getAVLReg().isVirtual()) - return; - MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg()); - if (!DefMI || !isVectorConfigInstr(*DefMI)) - return; - - VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); - if (DefInfo.hasSameVLMAX(Info) && - (DefInfo.hasAVLImm() || DefInfo.getAVLReg() == RISCV::X0)) { - if (DefInfo.hasAVLImm()) - Info.setAVLImm(DefInfo.getAVLImm()); - else - Info.setAVLReg(DefInfo.getAVLReg()); - return; + bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(PrevInfo, *MRI) && + IncomingInfo.hasSameVLMAX(PrevInfo); + if (Demanded.VLAny || (Demanded.VLZeroness && !EquallyZero)) + Info.setAVL(IncomingInfo); + + Info.setVTYPE( + ((Demanded.LMUL || Demanded.SEWLMULRatio) ? IncomingInfo : Info) + .getVLMUL(), + ((Demanded.SEW || Demanded.SEWLMULRatio) ? IncomingInfo : Info).getSEW(), + // Prefer tail/mask agnostic since it can be relaxed to undisturbed later + // if needed. + (Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() || + IncomingInfo.getTailAgnostic(), + (Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() || + IncomingInfo.getMaskAgnostic()); + + // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep + // the AVL. + if (Info.hasSEWLMULRatioOnly()) { + VSETVLIInfo RatiolessInfo = IncomingInfo; + RatiolessInfo.setAVL(Info); + Info = RatiolessInfo; } } // Given a state with which we evaluated MI (see transferBefore above for why // this might be different that the state MI requested), modify the state to // reflect the changes MI might make. -void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) { +void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info, + const MachineInstr &MI) const { if (isVectorConfigInstr(MI)) { Info = getInfoForVSETVLI(MI); return; @@ -1048,18 +1138,18 @@ void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info, const MachineInstr &MI Info = VSETVLIInfo::getUnknown(); } -bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) { +bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB, + VSETVLIInfo &Info) const { bool HadVectorOp = false; - BlockData &BBInfo = BlockInfo[MBB.getNumber()]; - BBInfo.Change = BBInfo.Pred; + Info = BlockInfo[MBB.getNumber()].Pred; for (const MachineInstr &MI : MBB) { - transferBefore(BBInfo.Change, MI); + transferBefore(Info, MI); if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags)) HadVectorOp = true; - transferAfter(BBInfo.Change, MI); + transferAfter(Info, MI); } return HadVectorOp; @@ -1098,8 +1188,8 @@ void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) { // compatibility checks performed a blocks output state can change based on // the input state. To cache, we'd have to add logic for finding // never-compatible state changes. - computeVLVTYPEChanges(MBB); - VSETVLIInfo TmpStatus = BBInfo.Change; + VSETVLIInfo TmpStatus; + computeVLVTYPEChanges(MBB, TmpStatus); // If the new exit value matches the old exit value, we don't need to revisit // any blocks. @@ -1205,9 +1295,20 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { if (RISCVII::hasVLOp(TSFlags)) { MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); if (VLOp.isReg()) { + Register Reg = VLOp.getReg(); + MachineInstr *VLOpDef = MRI->getVRegDef(Reg); + // Erase the AVL operand from the instruction. VLOp.setReg(RISCV::NoRegister); VLOp.setIsKill(false); + + // If the AVL was an immediate > 31, then it would have been emitted + // as an ADDI. However, the ADDI might not have been used in the + // vsetvli, or a vsetvli might not have been emitted, so it may be + // dead now. + if (VLOpDef && TII->isAddImmediate(*VLOpDef, Reg) && + MRI->use_nodbg_empty(Reg)) + VLOpDef->eraseFromParent(); } MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false, /*isImp*/ true)); @@ -1251,36 +1352,12 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { } } -/// Return true if the VL value configured must be equal to the requested one. -static bool hasFixedResult(const VSETVLIInfo &Info, const RISCVSubtarget &ST) { - if (!Info.hasAVLImm()) - // VLMAX is always the same value. - // TODO: Could extend to other registers by looking at the associated vreg - // def placement. - return RISCV::X0 == Info.getAVLReg(); - - unsigned AVL = Info.getAVLImm(); - unsigned SEW = Info.getSEW(); - unsigned AVLInBits = AVL * SEW; - - unsigned LMul; - bool Fractional; - std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(Info.getVLMUL()); - - if (Fractional) - return ST.getRealMinVLen() / LMul >= AVLInBits; - return ST.getRealMinVLen() * LMul >= AVLInBits; -} - /// Perform simple partial redundancy elimination of the VSETVLI instructions /// we're about to insert by looking for cases where we can PRE from the /// beginning of one block to the end of one of its predecessors. Specifically, /// this is geared to catch the common case of a fixed length vsetvl in a single /// block loop when it could execute once in the preheader instead. void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { - const MachineFunction &MF = *MBB.getParent(); - const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>(); - if (!BlockInfo[MBB.getNumber()].Pred.isUnknown()) return; @@ -1308,9 +1385,21 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { if (UnavailablePred->succ_size() != 1) return; - // If VL can be less than AVL, then we can't reduce the frequency of exec. - if (!hasFixedResult(AvailableInfo, ST)) - return; + // If the AVL value is a register (other than our VLMAX sentinel), + // we need to prove the value is available at the point we're going + // to insert the vsetvli at. + if (AvailableInfo.hasAVLReg() && RISCV::X0 != AvailableInfo.getAVLReg()) { + MachineInstr *AVLDefMI = MRI->getVRegDef(AvailableInfo.getAVLReg()); + if (!AVLDefMI) + return; + // This is an inline dominance check which covers the case of + // UnavailablePred being the preheader of a loop. + if (AVLDefMI->getParent() != UnavailablePred) + return; + for (auto &TermMI : UnavailablePred->terminators()) + if (&TermMI == AVLDefMI) + return; + } // Model the effect of changing the input state of the block MBB to // AvailableInfo. We're looking for two issues here; one legality, @@ -1370,9 +1459,16 @@ static void doUnion(DemandedFields &A, DemandedFields B) { A.MaskPolicy |= B.MaskPolicy; } -static bool isNonZeroAVL(const MachineOperand &MO) { - if (MO.isReg()) - return RISCV::X0 == MO.getReg(); +static bool isNonZeroAVL(const MachineOperand &MO, + const MachineRegisterInfo &MRI) { + if (MO.isReg()) { + if (MO.getReg() == RISCV::X0) + return true; + if (MachineInstr *MI = MRI.getVRegDef(MO.getReg()); + MI && isNonZeroLoadImmediate(*MI)) + return true; + return false; + } assert(MO.isImm()); return 0 != MO.getImm(); } @@ -1381,7 +1477,8 @@ static bool isNonZeroAVL(const MachineOperand &MO) { // fields which would be observed. static bool canMutatePriorConfig(const MachineInstr &PrevMI, const MachineInstr &MI, - const DemandedFields &Used) { + const DemandedFields &Used, + const MachineRegisterInfo &MRI) { // If the VL values aren't equal, return false if either a) the former is // demanded, or b) we can't rewrite the former to be the later for // implementation reasons. @@ -1389,29 +1486,21 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI, if (Used.VLAny) return false; - // TODO: Requires more care in the mutation... - if (isVLPreservingConfig(PrevMI)) - return false; - // We don't bother to handle the equally zero case here as it's largely // uninteresting. - if (Used.VLZeroness && - (!isNonZeroAVL(MI.getOperand(1)) || - !isNonZeroAVL(PrevMI.getOperand(1)))) - return false; + if (Used.VLZeroness) { + if (isVLPreservingConfig(PrevMI)) + return false; + if (!isNonZeroAVL(MI.getOperand(1), MRI) || + !isNonZeroAVL(PrevMI.getOperand(1), MRI)) + return false; + } // TODO: Track whether the register is defined between // PrevMI and MI. if (MI.getOperand(1).isReg() && RISCV::X0 != MI.getOperand(1).getReg()) return false; - - // TODO: We need to change the result register to allow this rewrite - // without the result forming a vl preserving vsetvli which is not - // a correct state merge. - if (PrevMI.getOperand(0).getReg() == RISCV::X0 && - MI.getOperand(1).isReg()) - return false; } if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm()) @@ -1433,7 +1522,7 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) { for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) { if (!isVectorConfigInstr(MI)) { - doUnion(Used, getDemanded(MI, MRI)); + doUnion(Used, getDemanded(MI, MRI, ST)); continue; } @@ -1447,25 +1536,32 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) { ToDelete.push_back(&MI); // Leave NextMI unchanged continue; - } else if (canMutatePriorConfig(MI, *NextMI, Used)) { + } else if (canMutatePriorConfig(MI, *NextMI, Used, *MRI)) { if (!isVLPreservingConfig(*NextMI)) { + MI.getOperand(0).setReg(NextMI->getOperand(0).getReg()); + MI.getOperand(0).setIsDead(false); + Register OldVLReg; + if (MI.getOperand(1).isReg()) + OldVLReg = MI.getOperand(1).getReg(); if (NextMI->getOperand(1).isImm()) MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm()); else MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false); + if (OldVLReg) { + MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg); + if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) && + MRI->use_nodbg_empty(OldVLReg)) + VLOpDef->eraseFromParent(); + } MI.setDesc(NextMI->getDesc()); } MI.getOperand(2).setImm(NextMI->getOperand(2).getImm()); - // Don't delete a vsetvli if its result might be used. - Register NextVRefDef = NextMI->getOperand(0).getReg(); - if (NextVRefDef == RISCV::X0 || - (NextVRefDef.isVirtual() && MRI->use_nodbg_empty(NextVRefDef))) - ToDelete.push_back(NextMI); + ToDelete.push_back(NextMI); // fallthrough } } NextMI = &MI; - Used = getDemanded(MI, MRI); + Used = getDemanded(MI, MRI, ST); } for (auto *MI : ToDelete) @@ -1488,13 +1584,13 @@ void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) { bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { // Skip if the vector extension is not enabled. - const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>(); - if (!ST.hasVInstructions()) + ST = &MF.getSubtarget<RISCVSubtarget>(); + if (!ST->hasVInstructions()) return false; LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n"); - TII = ST.getInstrInfo(); + TII = ST->getInstrInfo(); MRI = &MF.getRegInfo(); assert(BlockInfo.empty() && "Expect empty block infos"); @@ -1504,10 +1600,11 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { // Phase 1 - determine how VL/VTYPE are affected by the each block. for (const MachineBasicBlock &MBB : MF) { - HaveVectorOp |= computeVLVTYPEChanges(MBB); + VSETVLIInfo TmpStatus; + HaveVectorOp |= computeVLVTYPEChanges(MBB, TmpStatus); // Initial exit state is whatever change we found in the block. BlockData &BBInfo = BlockInfo[MBB.getNumber()]; - BBInfo.Exit = BBInfo.Change; + BBInfo.Exit = TmpStatus; LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB) << " is " << BBInfo.Exit << "\n"); @@ -1552,22 +1649,6 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock &MBB : MF) doLocalPostpass(MBB); - // Once we're fully done rewriting all the instructions, do a final pass - // through to check for VSETVLIs which write to an unused destination. - // For the non X0, X0 variant, we can replace the destination register - // with X0 to reduce register pressure. This is really a generic - // optimization which can be applied to any dead def (TODO: generalize). - for (MachineBasicBlock &MBB : MF) { - for (MachineInstr &MI : MBB) { - if (MI.getOpcode() == RISCV::PseudoVSETVLI || - MI.getOpcode() == RISCV::PseudoVSETIVLI) { - Register VRegDef = MI.getOperand(0).getReg(); - if (VRegDef != RISCV::X0 && MRI->use_nodbg_empty(VRegDef)) - MI.getOperand(0).setReg(RISCV::X0); - } - } - } - // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output // of VLEFF/VLSEGFF. for (MachineBasicBlock &MBB : MF) diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp new file mode 100644 index 000000000000..de2227f82192 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp @@ -0,0 +1,458 @@ +//===-- RISCVInsertWriteVXRM.cpp - Insert Write of RISC-V VXRM CSR --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass inserts writes to the VXRM CSR as needed by vector instructions. +// Each instruction that uses VXRM carries an operand that contains its required +// VXRM value. This pass tries to optimize placement to avoid redundant writes +// to VXRM. +// +// This is done using 2 dataflow algorithms. The first is a forward data flow +// to calculate where a VXRM value is available. The second is a backwards +// dataflow to determine where a VXRM value is anticipated. +// +// Finally, we use the results of these two dataflows to insert VXRM writes +// where a value is anticipated, but not available. +// +// FIXME: This pass does not split critical edges, so there can still be some +// redundancy. +// +// FIXME: If we are willing to have writes that aren't always needed, we could +// reduce the number of VXRM writes in some cases. +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/RISCVBaseInfo.h" +#include "RISCV.h" +#include "RISCVSubtarget.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include <queue> + +using namespace llvm; + +#define DEBUG_TYPE "riscv-insert-write-vxrm" +#define RISCV_INSERT_WRITE_VXRM_NAME "RISC-V Insert Write VXRM Pass" + +namespace { + +class VXRMInfo { + uint8_t VXRMImm = 0; + + enum : uint8_t { + Uninitialized, + Static, + Unknown, + } State = Uninitialized; + +public: + VXRMInfo() {} + + static VXRMInfo getUnknown() { + VXRMInfo Info; + Info.setUnknown(); + return Info; + } + + bool isValid() const { return State != Uninitialized; } + void setUnknown() { State = Unknown; } + bool isUnknown() const { return State == Unknown; } + + bool isStatic() const { return State == Static; } + + void setVXRMImm(unsigned Imm) { + assert(Imm <= 3 && "Unexpected VXRM value"); + VXRMImm = Imm; + State = Static; + } + unsigned getVXRMImm() const { + assert(isStatic() && VXRMImm <= 3 && "Unexpected state"); + return VXRMImm; + } + + bool operator==(const VXRMInfo &Other) const { + // Uninitialized is only equal to another Uninitialized. + if (State != Other.State) + return false; + + if (isStatic()) + return VXRMImm == Other.VXRMImm; + + assert((isValid() || isUnknown()) && "Unexpected state"); + return true; + } + + bool operator!=(const VXRMInfo &Other) const { return !(*this == Other); } + + // Calculate the VXRMInfo visible to a block assuming this and Other are + // both predecessors. + VXRMInfo intersect(const VXRMInfo &Other) const { + // If the new value isn't valid, ignore it. + if (!Other.isValid()) + return *this; + + // If this value isn't valid, this must be the first predecessor, use it. + if (!isValid()) + return Other; + + // If either is unknown, the result is unknown. + if (isUnknown() || Other.isUnknown()) + return VXRMInfo::getUnknown(); + + // If we have an exact match, return this. + if (*this == Other) + return *this; + + // Otherwise the result is unknown. + return VXRMInfo::getUnknown(); + } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Support for debugging, callable in GDB: V->dump() + LLVM_DUMP_METHOD void dump() const { + print(dbgs()); + dbgs() << "\n"; + } + + void print(raw_ostream &OS) const { + OS << '{'; + if (!isValid()) + OS << "Uninitialized"; + else if (isUnknown()) + OS << "Unknown"; + else + OS << getVXRMImm(); + OS << '}'; + } +#endif +}; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_ATTRIBUTE_USED +inline raw_ostream &operator<<(raw_ostream &OS, const VXRMInfo &V) { + V.print(OS); + return OS; +} +#endif + +struct BlockData { + // Indicates if the block uses VXRM. Uninitialized means no use. + VXRMInfo VXRMUse; + + // Indicates the VXRM output from the block. Unitialized means transparent. + VXRMInfo VXRMOut; + + // Keeps track of the available VXRM value at the start of the basic bloc. + VXRMInfo AvailableIn; + + // Keeps track of the available VXRM value at the end of the basic block. + VXRMInfo AvailableOut; + + // Keeps track of what VXRM is anticipated at the start of the basic block. + VXRMInfo AnticipatedIn; + + // Keeps track of what VXRM is anticipated at the end of the basic block. + VXRMInfo AnticipatedOut; + + // Keeps track of whether the block is already in the queue. + bool InQueue; + + BlockData() = default; +}; + +class RISCVInsertWriteVXRM : public MachineFunctionPass { + const TargetInstrInfo *TII; + + std::vector<BlockData> BlockInfo; + std::queue<const MachineBasicBlock *> WorkList; + +public: + static char ID; + + RISCVInsertWriteVXRM() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { + return RISCV_INSERT_WRITE_VXRM_NAME; + } + +private: + bool computeVXRMChanges(const MachineBasicBlock &MBB); + void computeAvailable(const MachineBasicBlock &MBB); + void computeAnticipated(const MachineBasicBlock &MBB); + void emitWriteVXRM(MachineBasicBlock &MBB); +}; + +} // end anonymous namespace + +char RISCVInsertWriteVXRM::ID = 0; + +INITIALIZE_PASS(RISCVInsertWriteVXRM, DEBUG_TYPE, RISCV_INSERT_WRITE_VXRM_NAME, + false, false) + +bool RISCVInsertWriteVXRM::computeVXRMChanges(const MachineBasicBlock &MBB) { + BlockData &BBInfo = BlockInfo[MBB.getNumber()]; + + bool NeedVXRMWrite = false; + for (const MachineInstr &MI : MBB) { + int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc()); + if (VXRMIdx >= 0) { + unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm(); + + if (!BBInfo.VXRMUse.isValid()) + BBInfo.VXRMUse.setVXRMImm(NewVXRMImm); + + BBInfo.VXRMOut.setVXRMImm(NewVXRMImm); + NeedVXRMWrite = true; + continue; + } + + if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VXRM)) { + if (!BBInfo.VXRMUse.isValid()) + BBInfo.VXRMUse.setUnknown(); + + BBInfo.VXRMOut.setUnknown(); + } + } + + return NeedVXRMWrite; +} + +void RISCVInsertWriteVXRM::computeAvailable(const MachineBasicBlock &MBB) { + BlockData &BBInfo = BlockInfo[MBB.getNumber()]; + + BBInfo.InQueue = false; + + VXRMInfo Available; + if (MBB.pred_empty()) { + Available.setUnknown(); + } else { + for (const MachineBasicBlock *P : MBB.predecessors()) + Available = Available.intersect(BlockInfo[P->getNumber()].AvailableOut); + } + + // If we don't have any valid available info, wait until we do. + if (!Available.isValid()) + return; + + if (Available != BBInfo.AvailableIn) { + BBInfo.AvailableIn = Available; + LLVM_DEBUG(dbgs() << "AvailableIn state of " << printMBBReference(MBB) + << " changed to " << BBInfo.AvailableIn << "\n"); + } + + if (BBInfo.VXRMOut.isValid()) + Available = BBInfo.VXRMOut; + + if (Available == BBInfo.AvailableOut) + return; + + BBInfo.AvailableOut = Available; + LLVM_DEBUG(dbgs() << "AvailableOut state of " << printMBBReference(MBB) + << " changed to " << BBInfo.AvailableOut << "\n"); + + // Add the successors to the work list so that we can propagate. + for (MachineBasicBlock *S : MBB.successors()) { + if (!BlockInfo[S->getNumber()].InQueue) { + BlockInfo[S->getNumber()].InQueue = true; + WorkList.push(S); + } + } +} + +void RISCVInsertWriteVXRM::computeAnticipated(const MachineBasicBlock &MBB) { + BlockData &BBInfo = BlockInfo[MBB.getNumber()]; + + BBInfo.InQueue = false; + + VXRMInfo Anticipated; + if (MBB.succ_empty()) { + Anticipated.setUnknown(); + } else { + for (const MachineBasicBlock *S : MBB.successors()) + Anticipated = + Anticipated.intersect(BlockInfo[S->getNumber()].AnticipatedIn); + } + + // If we don't have any valid anticipated info, wait until we do. + if (!Anticipated.isValid()) + return; + + if (Anticipated != BBInfo.AnticipatedOut) { + BBInfo.AnticipatedOut = Anticipated; + LLVM_DEBUG(dbgs() << "AnticipatedOut state of " << printMBBReference(MBB) + << " changed to " << BBInfo.AnticipatedOut << "\n"); + } + + // If this block reads VXRM, copy it. + if (BBInfo.VXRMUse.isValid()) + Anticipated = BBInfo.VXRMUse; + + if (Anticipated == BBInfo.AnticipatedIn) + return; + + BBInfo.AnticipatedIn = Anticipated; + LLVM_DEBUG(dbgs() << "AnticipatedIn state of " << printMBBReference(MBB) + << " changed to " << BBInfo.AnticipatedIn << "\n"); + + // Add the predecessors to the work list so that we can propagate. + for (MachineBasicBlock *P : MBB.predecessors()) { + if (!BlockInfo[P->getNumber()].InQueue) { + BlockInfo[P->getNumber()].InQueue = true; + WorkList.push(P); + } + } +} + +void RISCVInsertWriteVXRM::emitWriteVXRM(MachineBasicBlock &MBB) { + const BlockData &BBInfo = BlockInfo[MBB.getNumber()]; + + VXRMInfo Info = BBInfo.AvailableIn; + + // Flag to indicates we need to insert a VXRM write. We want to delay it as + // late as possible in this block. + bool PendingInsert = false; + + // Insert VXRM write if anticipated and not available. + if (BBInfo.AnticipatedIn.isStatic()) { + // If this is the entry block and the value is anticipated, insert. + if (MBB.isEntryBlock()) { + PendingInsert = true; + } else { + // Search for any predecessors that wouldn't satisfy our requirement and + // insert a write VXRM if needed. + // NOTE: If one predecessor is able to provide the requirement, but + // another isn't, it means we have a critical edge. The better placement + // would be to split the critical edge. + for (MachineBasicBlock *P : MBB.predecessors()) { + const BlockData &PInfo = BlockInfo[P->getNumber()]; + // If it's available out of the predecessor, then we're ok. + if (PInfo.AvailableOut.isStatic() && + PInfo.AvailableOut.getVXRMImm() == + BBInfo.AnticipatedIn.getVXRMImm()) + continue; + // If the predecessor anticipates this value for all its succesors, + // then a write to VXRM would have already occured before this block is + // executed. + if (PInfo.AnticipatedOut.isStatic() && + PInfo.AnticipatedOut.getVXRMImm() == + BBInfo.AnticipatedIn.getVXRMImm()) + continue; + PendingInsert = true; + break; + } + } + + Info = BBInfo.AnticipatedIn; + } + + for (MachineInstr &MI : MBB) { + int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc()); + if (VXRMIdx >= 0) { + unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm(); + + if (PendingInsert || !Info.isStatic() || + Info.getVXRMImm() != NewVXRMImm) { + assert((!PendingInsert || + (Info.isStatic() && Info.getVXRMImm() == NewVXRMImm)) && + "Pending VXRM insertion mismatch"); + LLVM_DEBUG(dbgs() << "Inserting before "; MI.print(dbgs())); + BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::WriteVXRMImm)) + .addImm(NewVXRMImm); + PendingInsert = false; + } + + MI.addOperand(MachineOperand::CreateReg(RISCV::VXRM, /*IsDef*/ false, + /*IsImp*/ true)); + Info.setVXRMImm(NewVXRMImm); + continue; + } + + if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VXRM)) + Info.setUnknown(); + } + + // If all our successors anticipate a value, do the insert. + // NOTE: It's possible that not all predecessors of our successor provide the + // correct value. This can occur on critical edges. If we don't split the + // critical edge we'll also have a write vxrm in the succesor that is + // redundant with this one. + if (PendingInsert || + (BBInfo.AnticipatedOut.isStatic() && + (!Info.isStatic() || + Info.getVXRMImm() != BBInfo.AnticipatedOut.getVXRMImm()))) { + assert((!PendingInsert || + (Info.isStatic() && BBInfo.AnticipatedOut.isStatic() && + Info.getVXRMImm() == BBInfo.AnticipatedOut.getVXRMImm())) && + "Pending VXRM insertion mismatch"); + LLVM_DEBUG(dbgs() << "Inserting at end of " << printMBBReference(MBB) + << " changing to " << BBInfo.AnticipatedOut << "\n"); + BuildMI(MBB, MBB.getFirstTerminator(), DebugLoc(), + TII->get(RISCV::WriteVXRMImm)) + .addImm(BBInfo.AnticipatedOut.getVXRMImm()); + } +} + +bool RISCVInsertWriteVXRM::runOnMachineFunction(MachineFunction &MF) { + // Skip if the vector extension is not enabled. + const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>(); + if (!ST.hasVInstructions()) + return false; + + TII = ST.getInstrInfo(); + + assert(BlockInfo.empty() && "Expect empty block infos"); + BlockInfo.resize(MF.getNumBlockIDs()); + + // Phase 1 - collect block information. + bool NeedVXRMChange = false; + for (const MachineBasicBlock &MBB : MF) + NeedVXRMChange |= computeVXRMChanges(MBB); + + if (!NeedVXRMChange) { + BlockInfo.clear(); + return false; + } + + // Phase 2 - Compute available VXRM using a forward walk. + for (const MachineBasicBlock &MBB : MF) { + WorkList.push(&MBB); + BlockInfo[MBB.getNumber()].InQueue = true; + } + while (!WorkList.empty()) { + const MachineBasicBlock &MBB = *WorkList.front(); + WorkList.pop(); + computeAvailable(MBB); + } + + // Phase 3 - Compute anticipated VXRM using a backwards walk. + for (const MachineBasicBlock &MBB : llvm::reverse(MF)) { + WorkList.push(&MBB); + BlockInfo[MBB.getNumber()].InQueue = true; + } + while (!WorkList.empty()) { + const MachineBasicBlock &MBB = *WorkList.front(); + WorkList.pop(); + computeAnticipated(MBB); + } + + // Phase 4 - Emit VXRM writes at the earliest place possible. + for (MachineBasicBlock &MBB : MF) + emitWriteVXRM(MBB); + + BlockInfo.clear(); + + return true; +} + +FunctionPass *llvm::createRISCVInsertWriteVXRMPass() { + return new RISCVInsertWriteVXRM(); +} diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td index 504952b6bd2f..e80ba26800a1 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td @@ -241,8 +241,8 @@ class PseudoQuietFCMP<DAGOperand Ty> } // Pseudo load instructions. -class PseudoLoad<string opcodestr, RegisterClass rdty = GPR> - : Pseudo<(outs rdty:$rd), (ins bare_symbol:$addr), [], opcodestr, "$rd, $addr"> { +class PseudoLoad<string opcodestr> + : Pseudo<(outs GPR:$rd), (ins bare_symbol:$addr), [], opcodestr, "$rd, $addr"> { let hasSideEffects = 0; let mayLoad = 1; let mayStore = 0; @@ -250,7 +250,7 @@ class PseudoLoad<string opcodestr, RegisterClass rdty = GPR> let isAsmParserOnly = 1; } -class PseudoFloatLoad<string opcodestr, RegisterClass rdty = GPR> +class PseudoFloatLoad<string opcodestr, RegisterClass rdty> : Pseudo<(outs GPR:$tmp, rdty:$rd), (ins bare_symbol:$addr), [], opcodestr, "$rd, $addr, $tmp"> { let hasSideEffects = 0; let mayLoad = 1; @@ -270,20 +270,51 @@ class PseudoStore<string opcodestr, RegisterClass rsty = GPR> } // Instruction formats are listed in the order they appear in the RISC-V -// instruction set manual (R, I, S, B, U, J) with sub-formats (e.g. RVInstR4, -// RVInstRAtomic) sorted alphabetically. +// instruction set manual (R, R4, I, S, B, U, J). + +// Common base class for R format instructions. Bits {31-25} should be set by +// the subclasses. +class RVInstRBase<bits<3> funct3, RISCVOpcode opcode, dag outs, + dag ins, string opcodestr, string argstr> + : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> { + bits<5> rs2; + bits<5> rs1; + bits<5> rd; + + let Inst{24-20} = rs2; + let Inst{19-15} = rs1; + let Inst{14-12} = funct3; + let Inst{11-7} = rd; + let Inst{6-0} = opcode.Value; +} class RVInstR<bits<7> funct7, bits<3> funct3, RISCVOpcode opcode, dag outs, dag ins, string opcodestr, string argstr> + : RVInstRBase<funct3, opcode, outs, ins, opcodestr, argstr> { + let Inst{31-25} = funct7; +} + +class RVInstRAtomic<bits<5> funct5, bit aq, bit rl, bits<3> funct3, + RISCVOpcode opcode, dag outs, dag ins, string opcodestr, + string argstr> + : RVInstRBase<funct3, opcode, outs, ins, opcodestr, argstr> { + let Inst{31-27} = funct5; + let Inst{26} = aq; + let Inst{25} = rl; +} + +class RVInstRFrm<bits<7> funct7, RISCVOpcode opcode, dag outs, dag ins, + string opcodestr, string argstr> : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> { bits<5> rs2; bits<5> rs1; + bits<3> frm; bits<5> rd; let Inst{31-25} = funct7; let Inst{24-20} = rs2; let Inst{19-15} = rs1; - let Inst{14-12} = funct3; + let Inst{14-12} = frm; let Inst{11-7} = rd; let Inst{6-0} = opcode.Value; } @@ -323,83 +354,51 @@ class RVInstR4Frm<bits<2> funct2, RISCVOpcode opcode, dag outs, dag ins, let Inst{6-0} = opcode.Value; } -class RVInstRAtomic<bits<5> funct5, bit aq, bit rl, bits<3> funct3, - RISCVOpcode opcode, dag outs, dag ins, string opcodestr, - string argstr> - : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> { - bits<5> rs2; +// Common base class for I format instructions. Bits {31-20} should be set by +// the subclasses. +class RVInstIBase<bits<3> funct3, RISCVOpcode opcode, dag outs, dag ins, + string opcodestr, string argstr> + : RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> { bits<5> rs1; bits<5> rd; - let Inst{31-27} = funct5; - let Inst{26} = aq; - let Inst{25} = rl; - let Inst{24-20} = rs2; let Inst{19-15} = rs1; let Inst{14-12} = funct3; let Inst{11-7} = rd; let Inst{6-0} = opcode.Value; } -class RVInstRFrm<bits<7> funct7, RISCVOpcode opcode, dag outs, dag ins, - string opcodestr, string argstr> - : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> { - bits<5> rs2; - bits<5> rs1; - bits<3> frm; - bits<5> rd; - - let Inst{31-25} = funct7; - let Inst{24-20} = rs2; - let Inst{19-15} = rs1; - let Inst{14-12} = frm; - let Inst{11-7} = rd; - let Inst{6-0} = opcode.Value; -} - class RVInstI<bits<3> funct3, RISCVOpcode opcode, dag outs, dag ins, string opcodestr, string argstr> - : RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> { + : RVInstIBase<funct3, opcode, outs, ins, opcodestr, argstr> { bits<12> imm12; - bits<5> rs1; - bits<5> rd; let Inst{31-20} = imm12; - let Inst{19-15} = rs1; - let Inst{14-12} = funct3; - let Inst{11-7} = rd; - let Inst{6-0} = opcode.Value; } class RVInstIShift<bits<5> imm11_7, bits<3> funct3, RISCVOpcode opcode, dag outs, dag ins, string opcodestr, string argstr> - : RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> { + : RVInstIBase<funct3, opcode, outs, ins, opcodestr, argstr> { bits<6> shamt; - bits<5> rs1; - bits<5> rd; let Inst{31-27} = imm11_7; let Inst{26} = 0; let Inst{25-20} = shamt; - let Inst{19-15} = rs1; - let Inst{14-12} = funct3; - let Inst{11-7} = rd; - let Inst{6-0} = opcode.Value; } class RVInstIShiftW<bits<7> imm11_5, bits<3> funct3, RISCVOpcode opcode, dag outs, dag ins, string opcodestr, string argstr> - : RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> { + : RVInstIBase<funct3, opcode, outs, ins, opcodestr, argstr> { bits<5> shamt; - bits<5> rs1; - bits<5> rd; let Inst{31-25} = imm11_5; let Inst{24-20} = shamt; - let Inst{19-15} = rs1; - let Inst{14-12} = funct3; - let Inst{11-7} = rd; - let Inst{6-0} = opcode.Value; +} + +class RVInstIUnary<bits<12> imm12, bits<3> funct3, RISCVOpcode opcode, + dag outs, dag ins, string opcodestr, string argstr> + : RVInstIBase<funct3, opcode, outs, ins, opcodestr, argstr> { + let Inst{31-20} = imm12; } class RVInstS<bits<3> funct3, RISCVOpcode opcode, dag outs, dag ins, diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrGISel.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrGISel.td new file mode 100644 index 000000000000..ede8c9809833 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrGISel.td @@ -0,0 +1,26 @@ +//===-- RISCVInstrGISel.td - RISC-V GISel target pseudos ----*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +// RISC-V GlobalISel target pseudo instruction definitions. This is kept +// separately from the other tablegen files for organizational purposes, but +// share the same infrastructure. +// +//===----------------------------------------------------------------------===// + +class RISCVGenericInstruction : GenericInstruction { + let Namespace = "RISCV"; +} + +// Pseudo equivalent to a RISCVISD::FCLASS. +def G_FCLASS : RISCVGenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type1:$src); + let hasSideEffects = false; +} +def : GINodeEquiv<G_FCLASS, riscv_fclass>; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index c1065f73000f..1dcff7eb563e 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineCombinerPattern.h" @@ -27,6 +28,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineTraceMetrics.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/TargetRegistry.h" @@ -293,6 +295,112 @@ static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI, return false; } +void RISCVInstrInfo::copyPhysRegVector(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, MCRegister DstReg, + MCRegister SrcReg, bool KillSrc, + unsigned Opc, unsigned NF) const { + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + + RISCVII::VLMUL LMul; + unsigned SubRegIdx; + unsigned VVOpc, VIOpc; + switch (Opc) { + default: + llvm_unreachable("Impossible LMUL for vector register copy."); + case RISCV::VMV1R_V: + LMul = RISCVII::LMUL_1; + SubRegIdx = RISCV::sub_vrm1_0; + VVOpc = RISCV::PseudoVMV_V_V_M1; + VIOpc = RISCV::PseudoVMV_V_I_M1; + break; + case RISCV::VMV2R_V: + LMul = RISCVII::LMUL_2; + SubRegIdx = RISCV::sub_vrm2_0; + VVOpc = RISCV::PseudoVMV_V_V_M2; + VIOpc = RISCV::PseudoVMV_V_I_M2; + break; + case RISCV::VMV4R_V: + LMul = RISCVII::LMUL_4; + SubRegIdx = RISCV::sub_vrm4_0; + VVOpc = RISCV::PseudoVMV_V_V_M4; + VIOpc = RISCV::PseudoVMV_V_I_M4; + break; + case RISCV::VMV8R_V: + assert(NF == 1); + LMul = RISCVII::LMUL_8; + SubRegIdx = RISCV::sub_vrm1_0; // There is no sub_vrm8_0. + VVOpc = RISCV::PseudoVMV_V_V_M8; + VIOpc = RISCV::PseudoVMV_V_I_M8; + break; + } + + bool UseVMV_V_V = false; + bool UseVMV_V_I = false; + MachineBasicBlock::const_iterator DefMBBI; + if (isConvertibleToVMV_V_V(STI, MBB, MBBI, DefMBBI, LMul)) { + UseVMV_V_V = true; + Opc = VVOpc; + + if (DefMBBI->getOpcode() == VIOpc) { + UseVMV_V_I = true; + Opc = VIOpc; + } + } + + if (NF == 1) { + auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), DstReg); + if (UseVMV_V_V) + MIB.addReg(DstReg, RegState::Undef); + if (UseVMV_V_I) + MIB = MIB.add(DefMBBI->getOperand(2)); + else + MIB = MIB.addReg(SrcReg, getKillRegState(KillSrc)); + if (UseVMV_V_V) { + const MCInstrDesc &Desc = DefMBBI->getDesc(); + MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL + MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW + MIB.addImm(0); // tu, mu + MIB.addReg(RISCV::VL, RegState::Implicit); + MIB.addReg(RISCV::VTYPE, RegState::Implicit); + } + return; + } + + int I = 0, End = NF, Incr = 1; + unsigned SrcEncoding = TRI->getEncodingValue(SrcReg); + unsigned DstEncoding = TRI->getEncodingValue(DstReg); + unsigned LMulVal; + bool Fractional; + std::tie(LMulVal, Fractional) = RISCVVType::decodeVLMUL(LMul); + assert(!Fractional && "It is impossible be fractional lmul here."); + if (forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NF * LMulVal)) { + I = NF - 1; + End = -1; + Incr = -1; + } + + for (; I != End; I += Incr) { + auto MIB = + BuildMI(MBB, MBBI, DL, get(Opc), TRI->getSubReg(DstReg, SubRegIdx + I)); + if (UseVMV_V_V) + MIB.addReg(TRI->getSubReg(DstReg, SubRegIdx + I), RegState::Undef); + if (UseVMV_V_I) + MIB = MIB.add(DefMBBI->getOperand(2)); + else + MIB = MIB.addReg(TRI->getSubReg(SrcReg, SubRegIdx + I), + getKillRegState(KillSrc)); + if (UseVMV_V_V) { + const MCInstrDesc &Desc = DefMBBI->getDesc(); + MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL + MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW + MIB.addImm(0); // tu, mu + MIB.addReg(RISCV::VL, RegState::Implicit); + MIB.addReg(RISCV::VTYPE, RegState::Implicit); + } + } +} + void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister DstReg, @@ -329,195 +437,159 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } - // FPR->FPR copies and VR->VR copies. - unsigned Opc; - bool IsScalableVector = true; - unsigned NF = 1; - RISCVII::VLMUL LMul = RISCVII::LMUL_1; - unsigned SubRegIdx = RISCV::sub_vrm1_0; if (RISCV::FPR16RegClass.contains(DstReg, SrcReg)) { + unsigned Opc; if (STI.hasStdExtZfh()) { Opc = RISCV::FSGNJ_H; } else { - assert(STI.hasStdExtF() && STI.hasStdExtZfhmin() && + assert(STI.hasStdExtF() && + (STI.hasStdExtZfhmin() || STI.hasStdExtZfbfmin()) && "Unexpected extensions"); - // Zfhmin subset doesn't have FSGNJ_H, replaces FSGNJ_H with FSGNJ_S. + // Zfhmin/Zfbfmin doesn't have FSGNJ_H, replace FSGNJ_H with FSGNJ_S. DstReg = TRI->getMatchingSuperReg(DstReg, RISCV::sub_16, &RISCV::FPR32RegClass); SrcReg = TRI->getMatchingSuperReg(SrcReg, RISCV::sub_16, &RISCV::FPR32RegClass); Opc = RISCV::FSGNJ_S; } - IsScalableVector = false; - } else if (RISCV::FPR32RegClass.contains(DstReg, SrcReg)) { - Opc = RISCV::FSGNJ_S; - IsScalableVector = false; - } else if (RISCV::FPR64RegClass.contains(DstReg, SrcReg)) { - Opc = RISCV::FSGNJ_D; - IsScalableVector = false; - } else if (RISCV::VRRegClass.contains(DstReg, SrcReg)) { - Opc = RISCV::VMV1R_V; - LMul = RISCVII::LMUL_1; - } else if (RISCV::VRM2RegClass.contains(DstReg, SrcReg)) { - Opc = RISCV::VMV2R_V; - LMul = RISCVII::LMUL_2; - } else if (RISCV::VRM4RegClass.contains(DstReg, SrcReg)) { - Opc = RISCV::VMV4R_V; - LMul = RISCVII::LMUL_4; - } else if (RISCV::VRM8RegClass.contains(DstReg, SrcReg)) { - Opc = RISCV::VMV8R_V; - LMul = RISCVII::LMUL_8; - } else if (RISCV::VRN2M1RegClass.contains(DstReg, SrcReg)) { - Opc = RISCV::VMV1R_V; - SubRegIdx = RISCV::sub_vrm1_0; - NF = 2; - LMul = RISCVII::LMUL_1; - } else if (RISCV::VRN2M2RegClass.contains(DstReg, SrcReg)) { - Opc = RISCV::VMV2R_V; - SubRegIdx = RISCV::sub_vrm2_0; - NF = 2; - LMul = RISCVII::LMUL_2; - } else if (RISCV::VRN2M4RegClass.contains(DstReg, SrcReg)) { - Opc = RISCV::VMV4R_V; - SubRegIdx = RISCV::sub_vrm4_0; - NF = 2; - LMul = RISCVII::LMUL_4; - } else if (RISCV::VRN3M1RegClass.contains(DstReg, SrcReg)) { - Opc = RISCV::VMV1R_V; - SubRegIdx = RISCV::sub_vrm1_0; - NF = 3; - LMul = RISCVII::LMUL_1; - } else if (RISCV::VRN3M2RegClass.contains(DstReg, SrcReg)) { - Opc = RISCV::VMV2R_V; - SubRegIdx = RISCV::sub_vrm2_0; - NF = 3; - LMul = RISCVII::LMUL_2; - } else if (RISCV::VRN4M1RegClass.contains(DstReg, SrcReg)) { - Opc = RISCV::VMV1R_V; - SubRegIdx = RISCV::sub_vrm1_0; - NF = 4; - LMul = RISCVII::LMUL_1; - } else if (RISCV::VRN4M2RegClass.contains(DstReg, SrcReg)) { - Opc = RISCV::VMV2R_V; - SubRegIdx = RISCV::sub_vrm2_0; - NF = 4; - LMul = RISCVII::LMUL_2; - } else if (RISCV::VRN5M1RegClass.contains(DstReg, SrcReg)) { - Opc = RISCV::VMV1R_V; - SubRegIdx = RISCV::sub_vrm1_0; - NF = 5; - LMul = RISCVII::LMUL_1; - } else if (RISCV::VRN6M1RegClass.contains(DstReg, SrcReg)) { - Opc = RISCV::VMV1R_V; - SubRegIdx = RISCV::sub_vrm1_0; - NF = 6; - LMul = RISCVII::LMUL_1; - } else if (RISCV::VRN7M1RegClass.contains(DstReg, SrcReg)) { - Opc = RISCV::VMV1R_V; - SubRegIdx = RISCV::sub_vrm1_0; - NF = 7; - LMul = RISCVII::LMUL_1; - } else if (RISCV::VRN8M1RegClass.contains(DstReg, SrcReg)) { - Opc = RISCV::VMV1R_V; - SubRegIdx = RISCV::sub_vrm1_0; - NF = 8; - LMul = RISCVII::LMUL_1; - } else { - llvm_unreachable("Impossible reg-to-reg copy"); + BuildMI(MBB, MBBI, DL, get(Opc), DstReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; } - if (IsScalableVector) { - bool UseVMV_V_V = false; - bool UseVMV_V_I = false; - MachineBasicBlock::const_iterator DefMBBI; - if (isConvertibleToVMV_V_V(STI, MBB, MBBI, DefMBBI, LMul)) { - UseVMV_V_V = true; - // We only need to handle LMUL = 1/2/4/8 here because we only define - // vector register classes for LMUL = 1/2/4/8. - unsigned VIOpc; - switch (LMul) { - default: - llvm_unreachable("Impossible LMUL for vector register copy."); - case RISCVII::LMUL_1: - Opc = RISCV::PseudoVMV_V_V_M1; - VIOpc = RISCV::PseudoVMV_V_I_M1; - break; - case RISCVII::LMUL_2: - Opc = RISCV::PseudoVMV_V_V_M2; - VIOpc = RISCV::PseudoVMV_V_I_M2; - break; - case RISCVII::LMUL_4: - Opc = RISCV::PseudoVMV_V_V_M4; - VIOpc = RISCV::PseudoVMV_V_I_M4; - break; - case RISCVII::LMUL_8: - Opc = RISCV::PseudoVMV_V_V_M8; - VIOpc = RISCV::PseudoVMV_V_I_M8; - break; - } + if (RISCV::FPR32RegClass.contains(DstReg, SrcReg)) { + BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_S), DstReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } - if (DefMBBI->getOpcode() == VIOpc) { - UseVMV_V_I = true; - Opc = VIOpc; - } - } + if (RISCV::FPR64RegClass.contains(DstReg, SrcReg)) { + BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_D), DstReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } - if (NF == 1) { - auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), DstReg); - if (UseVMV_V_V) - MIB.addReg(DstReg, RegState::Undef); - if (UseVMV_V_I) - MIB = MIB.add(DefMBBI->getOperand(2)); - else - MIB = MIB.addReg(SrcReg, getKillRegState(KillSrc)); - if (UseVMV_V_V) { - const MCInstrDesc &Desc = DefMBBI->getDesc(); - MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL - MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW - MIB.addImm(0); // tu, mu - MIB.addReg(RISCV::VL, RegState::Implicit); - MIB.addReg(RISCV::VTYPE, RegState::Implicit); - } - } else { - int I = 0, End = NF, Incr = 1; - unsigned SrcEncoding = TRI->getEncodingValue(SrcReg); - unsigned DstEncoding = TRI->getEncodingValue(DstReg); - unsigned LMulVal; - bool Fractional; - std::tie(LMulVal, Fractional) = RISCVVType::decodeVLMUL(LMul); - assert(!Fractional && "It is impossible be fractional lmul here."); - if (forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NF * LMulVal)) { - I = NF - 1; - End = -1; - Incr = -1; - } + if (RISCV::FPR32RegClass.contains(DstReg) && + RISCV::GPRRegClass.contains(SrcReg)) { + BuildMI(MBB, MBBI, DL, get(RISCV::FMV_W_X), DstReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } - for (; I != End; I += Incr) { - auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), - TRI->getSubReg(DstReg, SubRegIdx + I)); - if (UseVMV_V_V) - MIB.addReg(TRI->getSubReg(DstReg, SubRegIdx + I), - RegState::Undef); - if (UseVMV_V_I) - MIB = MIB.add(DefMBBI->getOperand(2)); - else - MIB = MIB.addReg(TRI->getSubReg(SrcReg, SubRegIdx + I), - getKillRegState(KillSrc)); - if (UseVMV_V_V) { - const MCInstrDesc &Desc = DefMBBI->getDesc(); - MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL - MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW - MIB.addImm(0); // tu, mu - MIB.addReg(RISCV::VL, RegState::Implicit); - MIB.addReg(RISCV::VTYPE, RegState::Implicit); - } - } - } - } else { - BuildMI(MBB, MBBI, DL, get(Opc), DstReg) - .addReg(SrcReg, getKillRegState(KillSrc)) + if (RISCV::GPRRegClass.contains(DstReg) && + RISCV::FPR32RegClass.contains(SrcReg)) { + BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_W), DstReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + + if (RISCV::FPR64RegClass.contains(DstReg) && + RISCV::GPRRegClass.contains(SrcReg)) { + assert(STI.getXLen() == 64 && "Unexpected GPR size"); + BuildMI(MBB, MBBI, DL, get(RISCV::FMV_D_X), DstReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + + if (RISCV::GPRRegClass.contains(DstReg) && + RISCV::FPR64RegClass.contains(SrcReg)) { + assert(STI.getXLen() == 64 && "Unexpected GPR size"); + BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_D), DstReg) .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + + // VR->VR copies. + if (RISCV::VRRegClass.contains(DstReg, SrcReg)) { + copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V); + return; + } + + if (RISCV::VRM2RegClass.contains(DstReg, SrcReg)) { + copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV2R_V); + return; + } + + if (RISCV::VRM4RegClass.contains(DstReg, SrcReg)) { + copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV4R_V); + return; + } + + if (RISCV::VRM8RegClass.contains(DstReg, SrcReg)) { + copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV8R_V); + return; + } + + if (RISCV::VRN2M1RegClass.contains(DstReg, SrcReg)) { + copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V, + /*NF=*/2); + return; + } + + if (RISCV::VRN2M2RegClass.contains(DstReg, SrcReg)) { + copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV2R_V, + /*NF=*/2); + return; + } + + if (RISCV::VRN2M4RegClass.contains(DstReg, SrcReg)) { + copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV4R_V, + /*NF=*/2); + return; + } + + if (RISCV::VRN3M1RegClass.contains(DstReg, SrcReg)) { + copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V, + /*NF=*/3); + return; + } + + if (RISCV::VRN3M2RegClass.contains(DstReg, SrcReg)) { + copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV2R_V, + /*NF=*/3); + return; + } + + if (RISCV::VRN4M1RegClass.contains(DstReg, SrcReg)) { + copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V, + /*NF=*/4); + return; + } + + if (RISCV::VRN4M2RegClass.contains(DstReg, SrcReg)) { + copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV2R_V, + /*NF=*/4); + return; + } + + if (RISCV::VRN5M1RegClass.contains(DstReg, SrcReg)) { + copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V, + /*NF=*/5); + return; + } + + if (RISCV::VRN6M1RegClass.contains(DstReg, SrcReg)) { + copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V, + /*NF=*/6); + return; + } + + if (RISCV::VRN7M1RegClass.contains(DstReg, SrcReg)) { + copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V, + /*NF=*/7); + return; + } + + if (RISCV::VRN8M1RegClass.contains(DstReg, SrcReg)) { + copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V, + /*NF=*/8); + return; } + + llvm_unreachable("Impossible reg-to-reg copy"); } void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, @@ -526,10 +598,6 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const { - DebugLoc DL; - if (I != MBB.end()) - DL = I->getDebugLoc(); - MachineFunction *MF = MBB.getParent(); MachineFrameInfo &MFI = MF->getFrameInfo(); @@ -590,7 +658,7 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MemoryLocation::UnknownSize, MFI.getObjectAlign(FI)); MFI.setStackID(FI, TargetStackID::ScalableVector); - BuildMI(MBB, I, DL, get(Opcode)) + BuildMI(MBB, I, DebugLoc(), get(Opcode)) .addReg(SrcReg, getKillRegState(IsKill)) .addFrameIndex(FI) .addMemOperand(MMO); @@ -599,7 +667,7 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); - BuildMI(MBB, I, DL, get(Opcode)) + BuildMI(MBB, I, DebugLoc(), get(Opcode)) .addReg(SrcReg, getKillRegState(IsKill)) .addFrameIndex(FI) .addImm(0) @@ -613,10 +681,6 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const { - DebugLoc DL; - if (I != MBB.end()) - DL = I->getDebugLoc(); - MachineFunction *MF = MBB.getParent(); MachineFrameInfo &MFI = MF->getFrameInfo(); @@ -677,7 +741,7 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MemoryLocation::UnknownSize, MFI.getObjectAlign(FI)); MFI.setStackID(FI, TargetStackID::ScalableVector); - BuildMI(MBB, I, DL, get(Opcode), DstReg) + BuildMI(MBB, I, DebugLoc(), get(Opcode), DstReg) .addFrameIndex(FI) .addMemOperand(MMO); } else { @@ -685,7 +749,7 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); - BuildMI(MBB, I, DL, get(Opcode), DstReg) + BuildMI(MBB, I, DebugLoc(), get(Opcode), DstReg) .addFrameIndex(FI) .addImm(0) .addMemOperand(MMO); @@ -704,8 +768,7 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl( if (MF.getDataLayout().isBigEndian()) return nullptr; - // Fold load from stack followed by sext.w into lw. - // TODO: Fold with sext.b, sext.h, zext.b, zext.h, zext.w? + // Fold load from stack followed by sext.b/sext.h/sext.w/zext.b/zext.h/zext.w. if (Ops.size() != 1 || Ops[0] != 1) return nullptr; @@ -753,38 +816,50 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl( void RISCVInstrInfo::movImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register DstReg, uint64_t Val, - MachineInstr::MIFlag Flag) const { + MachineInstr::MIFlag Flag, bool DstRenamable, + bool DstIsDead) const { Register SrcReg = RISCV::X0; if (!STI.is64Bit() && !isInt<32>(Val)) report_fatal_error("Should only materialize 32-bit constants for RV32"); - RISCVMatInt::InstSeq Seq = - RISCVMatInt::generateInstSeq(Val, STI.getFeatureBits()); + RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, STI); assert(!Seq.empty()); + bool SrcRenamable = false; + unsigned Num = 0; + for (const RISCVMatInt::Inst &Inst : Seq) { + bool LastItem = ++Num == Seq.size(); + unsigned DstRegState = getDeadRegState(DstIsDead && LastItem) | + getRenamableRegState(DstRenamable); + unsigned SrcRegState = getKillRegState(SrcReg != RISCV::X0) | + getRenamableRegState(SrcRenamable); switch (Inst.getOpndKind()) { case RISCVMatInt::Imm: - BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()), DstReg) + BuildMI(MBB, MBBI, DL, get(Inst.getOpcode())) + .addReg(DstReg, RegState::Define | DstRegState) .addImm(Inst.getImm()) .setMIFlag(Flag); break; case RISCVMatInt::RegX0: - BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()), DstReg) - .addReg(SrcReg, RegState::Kill) + BuildMI(MBB, MBBI, DL, get(Inst.getOpcode())) + .addReg(DstReg, RegState::Define | DstRegState) + .addReg(SrcReg, SrcRegState) .addReg(RISCV::X0) .setMIFlag(Flag); break; case RISCVMatInt::RegReg: - BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()), DstReg) - .addReg(SrcReg, RegState::Kill) - .addReg(SrcReg, RegState::Kill) + BuildMI(MBB, MBBI, DL, get(Inst.getOpcode())) + .addReg(DstReg, RegState::Define | DstRegState) + .addReg(SrcReg, SrcRegState) + .addReg(SrcReg, SrcRegState) .setMIFlag(Flag); break; case RISCVMatInt::RegImm: - BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()), DstReg) - .addReg(SrcReg, RegState::Kill) + BuildMI(MBB, MBBI, DL, get(Inst.getOpcode())) + .addReg(DstReg, RegState::Define | DstRegState) + .addReg(SrcReg, SrcRegState) .addImm(Inst.getImm()) .setMIFlag(Flag); break; @@ -792,6 +867,7 @@ void RISCVInstrInfo::movImm(MachineBasicBlock &MBB, // Only the first instruction has X0 as its source. SrcReg = DstReg; + SrcRenamable = DstRenamable; } } @@ -829,25 +905,29 @@ static void parseCondBranch(MachineInstr &LastInst, MachineBasicBlock *&Target, Cond.push_back(LastInst.getOperand(1)); } -const MCInstrDesc &RISCVInstrInfo::getBrCond(RISCVCC::CondCode CC) const { +unsigned RISCVCC::getBrCond(RISCVCC::CondCode CC) { switch (CC) { default: llvm_unreachable("Unknown condition code!"); case RISCVCC::COND_EQ: - return get(RISCV::BEQ); + return RISCV::BEQ; case RISCVCC::COND_NE: - return get(RISCV::BNE); + return RISCV::BNE; case RISCVCC::COND_LT: - return get(RISCV::BLT); + return RISCV::BLT; case RISCVCC::COND_GE: - return get(RISCV::BGE); + return RISCV::BGE; case RISCVCC::COND_LTU: - return get(RISCV::BLTU); + return RISCV::BLTU; case RISCVCC::COND_GEU: - return get(RISCV::BGEU); + return RISCV::BGEU; } } +const MCInstrDesc &RISCVInstrInfo::getBrCond(RISCVCC::CondCode CC) const { + return get(RISCVCC::getBrCond(CC)); +} + RISCVCC::CondCode RISCVCC::getOppositeBranchCondition(RISCVCC::CondCode CC) { switch (CC) { default: @@ -907,6 +987,10 @@ bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB, if (I->getDesc().isIndirectBranch()) return true; + // We can't handle Generic branch opcodes from Global ISel. + if (I->isPreISelOpcode()) + return true; + // We can't handle blocks with more than 2 terminators. if (NumTerminators > 2) return true; @@ -1079,6 +1163,125 @@ bool RISCVInstrInfo::reverseBranchCondition( return false; } +bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const { + MachineBasicBlock *MBB = MI.getParent(); + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + + MachineBasicBlock *TBB, *FBB; + SmallVector<MachineOperand, 3> Cond; + if (analyzeBranch(*MBB, TBB, FBB, Cond, /*AllowModify=*/false)) + return false; + (void)FBB; + + RISCVCC::CondCode CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm()); + assert(CC != RISCVCC::COND_INVALID); + + if (CC == RISCVCC::COND_EQ || CC == RISCVCC::COND_NE) + return false; + + // For two constants C0 and C1 from + // ``` + // li Y, C0 + // li Z, C1 + // ``` + // 1. if C1 = C0 + 1 + // we can turn: + // (a) blt Y, X -> bge X, Z + // (b) bge Y, X -> blt X, Z + // + // 2. if C1 = C0 - 1 + // we can turn: + // (a) blt X, Y -> bge Z, X + // (b) bge X, Y -> blt Z, X + // + // To make sure this optimization is really beneficial, we only + // optimize for cases where Y had only one use (i.e. only used by the branch). + + // Right now we only care about LI (i.e. ADDI x0, imm) + auto isLoadImm = [](const MachineInstr *MI, int64_t &Imm) -> bool { + if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() && + MI->getOperand(1).getReg() == RISCV::X0) { + Imm = MI->getOperand(2).getImm(); + return true; + } + return false; + }; + // Either a load from immediate instruction or X0. + auto isFromLoadImm = [&](const MachineOperand &Op, int64_t &Imm) -> bool { + if (!Op.isReg()) + return false; + Register Reg = Op.getReg(); + if (Reg == RISCV::X0) { + Imm = 0; + return true; + } + if (!Reg.isVirtual()) + return false; + return isLoadImm(MRI.getVRegDef(Op.getReg()), Imm); + }; + + MachineOperand &LHS = MI.getOperand(0); + MachineOperand &RHS = MI.getOperand(1); + // Try to find the register for constant Z; return + // invalid register otherwise. + auto searchConst = [&](int64_t C1) -> Register { + MachineBasicBlock::reverse_iterator II(&MI), E = MBB->rend(); + auto DefC1 = std::find_if(++II, E, [&](const MachineInstr &I) -> bool { + int64_t Imm; + return isLoadImm(&I, Imm) && Imm == C1; + }); + if (DefC1 != E) + return DefC1->getOperand(0).getReg(); + + return Register(); + }; + + bool Modify = false; + int64_t C0; + if (isFromLoadImm(LHS, C0) && MRI.hasOneUse(LHS.getReg())) { + // Might be case 1. + // Signed integer overflow is UB. (UINT64_MAX is bigger so we don't need + // to worry about unsigned overflow here) + if (C0 < INT64_MAX) + if (Register RegZ = searchConst(C0 + 1)) { + reverseBranchCondition(Cond); + Cond[1] = MachineOperand::CreateReg(RHS.getReg(), /*isDef=*/false); + Cond[2] = MachineOperand::CreateReg(RegZ, /*isDef=*/false); + // We might extend the live range of Z, clear its kill flag to + // account for this. + MRI.clearKillFlags(RegZ); + Modify = true; + } + } else if (isFromLoadImm(RHS, C0) && MRI.hasOneUse(RHS.getReg())) { + // Might be case 2. + // For unsigned cases, we don't want C1 to wrap back to UINT64_MAX + // when C0 is zero. + if ((CC == RISCVCC::COND_GE || CC == RISCVCC::COND_LT) || C0) + if (Register RegZ = searchConst(C0 - 1)) { + reverseBranchCondition(Cond); + Cond[1] = MachineOperand::CreateReg(RegZ, /*isDef=*/false); + Cond[2] = MachineOperand::CreateReg(LHS.getReg(), /*isDef=*/false); + // We might extend the live range of Z, clear its kill flag to + // account for this. + MRI.clearKillFlags(RegZ); + Modify = true; + } + } + + if (!Modify) + return false; + + // Build the new branch and remove the old one. + BuildMI(*MBB, MI, MI.getDebugLoc(), + getBrCond(static_cast<RISCVCC::CondCode>(Cond[0].getImm()))) + .add(Cond[1]) + .add(Cond[2]) + .addMBB(TBB); + MI.eraseFromParent(); + + return true; +} + MachineBasicBlock * RISCVInstrInfo::getBranchDestBlock(const MachineInstr &MI) const { assert(MI.getDesc().isBranch() && "Unexpected opcode!"); @@ -1118,12 +1321,31 @@ unsigned getPredicatedOpcode(unsigned Opcode) { switch (Opcode) { case RISCV::ADD: return RISCV::PseudoCCADD; break; case RISCV::SUB: return RISCV::PseudoCCSUB; break; + case RISCV::SLL: return RISCV::PseudoCCSLL; break; + case RISCV::SRL: return RISCV::PseudoCCSRL; break; + case RISCV::SRA: return RISCV::PseudoCCSRA; break; case RISCV::AND: return RISCV::PseudoCCAND; break; case RISCV::OR: return RISCV::PseudoCCOR; break; case RISCV::XOR: return RISCV::PseudoCCXOR; break; + case RISCV::ADDI: return RISCV::PseudoCCADDI; break; + case RISCV::SLLI: return RISCV::PseudoCCSLLI; break; + case RISCV::SRLI: return RISCV::PseudoCCSRLI; break; + case RISCV::SRAI: return RISCV::PseudoCCSRAI; break; + case RISCV::ANDI: return RISCV::PseudoCCANDI; break; + case RISCV::ORI: return RISCV::PseudoCCORI; break; + case RISCV::XORI: return RISCV::PseudoCCXORI; break; + case RISCV::ADDW: return RISCV::PseudoCCADDW; break; case RISCV::SUBW: return RISCV::PseudoCCSUBW; break; + case RISCV::SLLW: return RISCV::PseudoCCSLLW; break; + case RISCV::SRLW: return RISCV::PseudoCCSRLW; break; + case RISCV::SRAW: return RISCV::PseudoCCSRAW; break; + + case RISCV::ADDIW: return RISCV::PseudoCCADDIW; break; + case RISCV::SLLIW: return RISCV::PseudoCCSLLIW; break; + case RISCV::SRLIW: return RISCV::PseudoCCSRLIW; break; + case RISCV::SRAIW: return RISCV::PseudoCCSRAIW; break; } return RISCV::INSTRUCTION_LIST_END; @@ -1144,6 +1366,10 @@ static MachineInstr *canFoldAsPredicatedOp(Register Reg, // Check if MI can be predicated and folded into the CCMOV. if (getPredicatedOpcode(MI->getOpcode()) == RISCV::INSTRUCTION_LIST_END) return nullptr; + // Don't predicate li idiom. + if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() && + MI->getOperand(1).getReg() == RISCV::X0) + return nullptr; // Check if MI has any other defs or physreg uses. for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) { // Reject frame index operands, PEI can't handle the predicated pseudos. @@ -1290,7 +1516,20 @@ unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { if (isCompressibleInst(MI, STI)) return 2; } - return get(Opcode).getSize(); + + switch (Opcode) { + case TargetOpcode::STACKMAP: + // The upper bound for a stackmap intrinsic is the full length of its shadow + return StackMapOpers(&MI).getNumPatchBytes(); + case TargetOpcode::PATCHPOINT: + // The size of the patchpoint intrinsic is the number of bytes requested + return PatchPointOpers(&MI).getNumPatchBytes(); + case TargetOpcode::STATEPOINT: + // The size of the statepoint intrinsic is the number of bytes requested + return StatepointOpers(&MI).getNumPatchBytes(); + default: + return get(Opcode).getSize(); + } } unsigned RISCVInstrInfo::getInstBundleLength(const MachineInstr &MI) const { @@ -1372,15 +1611,6 @@ MachineTraceStrategy RISCVInstrInfo::getMachineCombinerTraceStrategy() const { return ForceMachineCombinerStrategy; } -void RISCVInstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1, - MachineInstr &OldMI2, - MachineInstr &NewMI1, - MachineInstr &NewMI2) const { - uint32_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags(); - NewMI1.setFlags(IntersectedFlags); - NewMI2.setFlags(IntersectedFlags); -} - void RISCVInstrInfo::finalizeInsInstrs( MachineInstr &Root, MachineCombinerPattern &P, SmallVectorImpl<MachineInstr *> &InsInstrs) const { @@ -1896,8 +2126,174 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI, return true; } -// Return true if get the base operand, byte offset of an instruction and the -// memory width. Width is the size of memory that is being loaded/stored. +bool RISCVInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, + const MachineInstr &AddrI, + ExtAddrMode &AM) const { + switch (MemI.getOpcode()) { + default: + return false; + case RISCV::LB: + case RISCV::LBU: + case RISCV::LH: + case RISCV::LHU: + case RISCV::LW: + case RISCV::LWU: + case RISCV::LD: + case RISCV::FLH: + case RISCV::FLW: + case RISCV::FLD: + case RISCV::SB: + case RISCV::SH: + case RISCV::SW: + case RISCV::SD: + case RISCV::FSH: + case RISCV::FSW: + case RISCV::FSD: + break; + } + + if (MemI.getOperand(0).getReg() == Reg) + return false; + + if (AddrI.getOpcode() != RISCV::ADDI || !AddrI.getOperand(1).isReg() || + !AddrI.getOperand(2).isImm()) + return false; + + int64_t OldOffset = MemI.getOperand(2).getImm(); + int64_t Disp = AddrI.getOperand(2).getImm(); + int64_t NewOffset = OldOffset + Disp; + if (!STI.is64Bit()) + NewOffset = SignExtend64<32>(NewOffset); + + if (!isInt<12>(NewOffset)) + return false; + + AM.BaseReg = AddrI.getOperand(1).getReg(); + AM.ScaledReg = 0; + AM.Scale = 0; + AM.Displacement = NewOffset; + AM.Form = ExtAddrMode::Formula::Basic; + return true; +} + +MachineInstr *RISCVInstrInfo::emitLdStWithAddr(MachineInstr &MemI, + const ExtAddrMode &AM) const { + + const DebugLoc &DL = MemI.getDebugLoc(); + MachineBasicBlock &MBB = *MemI.getParent(); + + assert(AM.ScaledReg == 0 && AM.Scale == 0 && + "Addressing mode not supported for folding"); + + return BuildMI(MBB, MemI, DL, get(MemI.getOpcode())) + .addReg(MemI.getOperand(0).getReg(), + MemI.mayLoad() ? RegState::Define : 0) + .addReg(AM.BaseReg) + .addImm(AM.Displacement) + .setMemRefs(MemI.memoperands()) + .setMIFlags(MemI.getFlags()); +} + +bool RISCVInstrInfo::getMemOperandsWithOffsetWidth( + const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps, + int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, + const TargetRegisterInfo *TRI) const { + if (!LdSt.mayLoadOrStore()) + return false; + + // Conservatively, only handle scalar loads/stores for now. + switch (LdSt.getOpcode()) { + case RISCV::LB: + case RISCV::LBU: + case RISCV::SB: + case RISCV::LH: + case RISCV::LHU: + case RISCV::FLH: + case RISCV::SH: + case RISCV::FSH: + case RISCV::LW: + case RISCV::LWU: + case RISCV::FLW: + case RISCV::SW: + case RISCV::FSW: + case RISCV::LD: + case RISCV::FLD: + case RISCV::SD: + case RISCV::FSD: + break; + default: + return false; + } + const MachineOperand *BaseOp; + OffsetIsScalable = false; + if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI)) + return false; + BaseOps.push_back(BaseOp); + return true; +} + +// TODO: This was copied from SIInstrInfo. Could it be lifted to a common +// helper? +static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, + ArrayRef<const MachineOperand *> BaseOps1, + const MachineInstr &MI2, + ArrayRef<const MachineOperand *> BaseOps2) { + // Only examine the first "base" operand of each instruction, on the + // assumption that it represents the real base address of the memory access. + // Other operands are typically offsets or indices from this base address. + if (BaseOps1.front()->isIdenticalTo(*BaseOps2.front())) + return true; + + if (!MI1.hasOneMemOperand() || !MI2.hasOneMemOperand()) + return false; + + auto MO1 = *MI1.memoperands_begin(); + auto MO2 = *MI2.memoperands_begin(); + if (MO1->getAddrSpace() != MO2->getAddrSpace()) + return false; + + auto Base1 = MO1->getValue(); + auto Base2 = MO2->getValue(); + if (!Base1 || !Base2) + return false; + Base1 = getUnderlyingObject(Base1); + Base2 = getUnderlyingObject(Base2); + + if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2)) + return false; + + return Base1 == Base2; +} + +bool RISCVInstrInfo::shouldClusterMemOps( + ArrayRef<const MachineOperand *> BaseOps1, int64_t Offset1, + bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2, + int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, + unsigned NumBytes) const { + // If the mem ops (to be clustered) do not have the same base ptr, then they + // should not be clustered + if (!BaseOps1.empty() && !BaseOps2.empty()) { + const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent(); + const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent(); + if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOps1, SecondLdSt, BaseOps2)) + return false; + } else if (!BaseOps1.empty() || !BaseOps2.empty()) { + // If only one base op is empty, they do not have the same base ptr + return false; + } + + // TODO: Use a more carefully chosen heuristic, e.g. only cluster if offsets + // indicate they likely share a cache line. + return ClusterSize <= 4; +} + +// Set BaseReg (the base register operand), Offset (the byte offset being +// accessed) and the access Width of the passed instruction that reads/writes +// memory. Returns false if the instruction does not read/write memory or the +// BaseReg/Offset/Width can't be determined. Is not guaranteed to always +// recognise base operands and offsets in all cases. +// TODO: Add an IsScalable bool ref argument (like the equivalent AArch64 +// function) and set it as appropriate. bool RISCVInstrInfo::getMemOperandWithOffsetWidth( const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset, unsigned &Width, const TargetRegisterInfo *TRI) const { @@ -1906,10 +2302,11 @@ bool RISCVInstrInfo::getMemOperandWithOffsetWidth( // Here we assume the standard RISC-V ISA, which uses a base+offset // addressing mode. You'll need to relax these conditions to support custom - // load/stores instructions. + // load/store instructions. if (LdSt.getNumExplicitOperands() != 3) return false; - if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm()) + if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) || + !LdSt.getOperand(2).isImm()) return false; if (!LdSt.hasOneMemOperand()) @@ -2132,6 +2529,23 @@ MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall( return It; } +std::optional<RegImmPair> RISCVInstrInfo::isAddImmediate(const MachineInstr &MI, + Register Reg) const { + // TODO: Handle cases where Reg is a super- or sub-register of the + // destination register. + const MachineOperand &Op0 = MI.getOperand(0); + if (!Op0.isReg() || Reg != Op0.getReg()) + return std::nullopt; + + // Don't consider ADDIW as a candidate because the caller may not be aware + // of its sign extension behaviour. + if (MI.getOpcode() == RISCV::ADDI && MI.getOperand(1).isReg() && + MI.getOperand(2).isImm()) + return RegImmPair{MI.getOperand(1).getReg(), MI.getOperand(2).getImm()}; + + return std::nullopt; +} + // MIR printer helper function to annotate Operands with a comment. std::string RISCVInstrInfo::createMIROperandComment( const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, @@ -2202,9 +2616,9 @@ std::string RISCVInstrInfo::createMIROperandComment( case CASE_VFMA_OPCODE_LMULS_MF4(OP, TYPE) #define CASE_VFMA_SPLATS(OP) \ - CASE_VFMA_OPCODE_LMULS_MF4(OP, VF16): \ - case CASE_VFMA_OPCODE_LMULS_MF2(OP, VF32): \ - case CASE_VFMA_OPCODE_LMULS_M1(OP, VF64) + CASE_VFMA_OPCODE_LMULS_MF4(OP, VFPR16): \ + case CASE_VFMA_OPCODE_LMULS_MF2(OP, VFPR32): \ + case CASE_VFMA_OPCODE_LMULS_M1(OP, VFPR64) // clang-format on bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI, @@ -2365,9 +2779,9 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI, CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE) #define CASE_VFMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \ - CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VF16) \ - CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VF32) \ - CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VF64) + CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16) \ + CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32) \ + CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64) MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, @@ -2591,6 +3005,7 @@ MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI, .add(MI.getOperand(3)) .add(MI.getOperand(4)) .add(MI.getOperand(5)); + break; } } MIB.copyImplicitOps(MI); @@ -2836,3 +3251,123 @@ bool RISCV::hasEqualFRM(const MachineInstr &MI1, const MachineInstr &MI2) { MachineOperand FrmOp2 = MI2.getOperand(MI2FrmOpIdx); return FrmOp1.getImm() == FrmOp2.getImm(); } + +std::optional<unsigned> +RISCV::getVectorLowDemandedScalarBits(uint16_t Opcode, unsigned Log2SEW) { + // TODO: Handle Zvbb instructions + switch (Opcode) { + default: + return std::nullopt; + + // 11.6. Vector Single-Width Shift Instructions + case RISCV::VSLL_VX: + case RISCV::VSRL_VX: + case RISCV::VSRA_VX: + // 12.4. Vector Single-Width Scaling Shift Instructions + case RISCV::VSSRL_VX: + case RISCV::VSSRA_VX: + // Only the low lg2(SEW) bits of the shift-amount value are used. + return Log2SEW; + + // 11.7 Vector Narrowing Integer Right Shift Instructions + case RISCV::VNSRL_WX: + case RISCV::VNSRA_WX: + // 12.5. Vector Narrowing Fixed-Point Clip Instructions + case RISCV::VNCLIPU_WX: + case RISCV::VNCLIP_WX: + // Only the low lg2(2*SEW) bits of the shift-amount value are used. + return Log2SEW + 1; + + // 11.1. Vector Single-Width Integer Add and Subtract + case RISCV::VADD_VX: + case RISCV::VSUB_VX: + case RISCV::VRSUB_VX: + // 11.2. Vector Widening Integer Add/Subtract + case RISCV::VWADDU_VX: + case RISCV::VWSUBU_VX: + case RISCV::VWADD_VX: + case RISCV::VWSUB_VX: + case RISCV::VWADDU_WX: + case RISCV::VWSUBU_WX: + case RISCV::VWADD_WX: + case RISCV::VWSUB_WX: + // 11.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions + case RISCV::VADC_VXM: + case RISCV::VADC_VIM: + case RISCV::VMADC_VXM: + case RISCV::VMADC_VIM: + case RISCV::VMADC_VX: + case RISCV::VSBC_VXM: + case RISCV::VMSBC_VXM: + case RISCV::VMSBC_VX: + // 11.5 Vector Bitwise Logical Instructions + case RISCV::VAND_VX: + case RISCV::VOR_VX: + case RISCV::VXOR_VX: + // 11.8. Vector Integer Compare Instructions + case RISCV::VMSEQ_VX: + case RISCV::VMSNE_VX: + case RISCV::VMSLTU_VX: + case RISCV::VMSLT_VX: + case RISCV::VMSLEU_VX: + case RISCV::VMSLE_VX: + case RISCV::VMSGTU_VX: + case RISCV::VMSGT_VX: + // 11.9. Vector Integer Min/Max Instructions + case RISCV::VMINU_VX: + case RISCV::VMIN_VX: + case RISCV::VMAXU_VX: + case RISCV::VMAX_VX: + // 11.10. Vector Single-Width Integer Multiply Instructions + case RISCV::VMUL_VX: + case RISCV::VMULH_VX: + case RISCV::VMULHU_VX: + case RISCV::VMULHSU_VX: + // 11.11. Vector Integer Divide Instructions + case RISCV::VDIVU_VX: + case RISCV::VDIV_VX: + case RISCV::VREMU_VX: + case RISCV::VREM_VX: + // 11.12. Vector Widening Integer Multiply Instructions + case RISCV::VWMUL_VX: + case RISCV::VWMULU_VX: + case RISCV::VWMULSU_VX: + // 11.13. Vector Single-Width Integer Multiply-Add Instructions + case RISCV::VMACC_VX: + case RISCV::VNMSAC_VX: + case RISCV::VMADD_VX: + case RISCV::VNMSUB_VX: + // 11.14. Vector Widening Integer Multiply-Add Instructions + case RISCV::VWMACCU_VX: + case RISCV::VWMACC_VX: + case RISCV::VWMACCSU_VX: + case RISCV::VWMACCUS_VX: + // 11.15. Vector Integer Merge Instructions + case RISCV::VMERGE_VXM: + // 11.16. Vector Integer Move Instructions + case RISCV::VMV_V_X: + // 12.1. Vector Single-Width Saturating Add and Subtract + case RISCV::VSADDU_VX: + case RISCV::VSADD_VX: + case RISCV::VSSUBU_VX: + case RISCV::VSSUB_VX: + // 12.2. Vector Single-Width Averaging Add and Subtract + case RISCV::VAADDU_VX: + case RISCV::VAADD_VX: + case RISCV::VASUBU_VX: + case RISCV::VASUB_VX: + // 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation + case RISCV::VSMUL_VX: + // 16.1. Integer Scalar Move Instructions + case RISCV::VMV_S_X: + return 1U << Log2SEW; + } +} + +unsigned RISCV::getRVVMCOpcode(unsigned RVVPseudoOpcode) { + const RISCVVPseudosTable::PseudoInfo *RVV = + RISCVVPseudosTable::getPseudoInfo(RVVPseudoOpcode); + if (!RVV) + return 0; + return RVV->BaseInstr; +} diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h index 99c907a98121..7e1d3f311806 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -43,6 +43,7 @@ enum CondCode { }; CondCode getOppositeBranchCondition(CondCode); +unsigned getBrCond(CondCode CC); } // end of namespace RISCVCC @@ -63,6 +64,10 @@ public: unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex, unsigned &MemBytes) const override; + void copyPhysRegVector(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, const DebugLoc &DL, + MCRegister DstReg, MCRegister SrcReg, bool KillSrc, + unsigned Opc, unsigned NF = 1) const; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, bool KillSrc) const override; @@ -91,7 +96,8 @@ public: // Materializes the given integer Val into DstReg. void movImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register DstReg, uint64_t Val, - MachineInstr::MIFlag Flag = MachineInstr::NoFlags) const; + MachineInstr::MIFlag Flag = MachineInstr::NoFlags, + bool DstRenamable = false, bool DstIsDead = false) const; unsigned getInstSizeInBytes(const MachineInstr &MI) const override; @@ -116,6 +122,8 @@ public: bool reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override; + bool optimizeCondBranch(MachineInstr &MI) const override; + MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; bool isBranchOffsetInRange(unsigned BranchOpc, @@ -137,6 +145,25 @@ public: bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override; + bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, + const MachineInstr &AddrI, + ExtAddrMode &AM) const override; + + MachineInstr *emitLdStWithAddr(MachineInstr &MemI, + const ExtAddrMode &AM) const override; + + bool getMemOperandsWithOffsetWidth( + const MachineInstr &MI, SmallVectorImpl<const MachineOperand *> &BaseOps, + int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, + const TargetRegisterInfo *TRI) const override; + + bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1, + int64_t Offset1, bool OffsetIsScalable1, + ArrayRef<const MachineOperand *> BaseOps2, + int64_t Offset2, bool OffsetIsScalable2, + unsigned ClusterSize, + unsigned NumBytes) const override; + bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, unsigned &Width, @@ -182,6 +209,9 @@ public: MachineBasicBlock::iterator &It, MachineFunction &MF, outliner::Candidate &C) const override; + std::optional<RegImmPair> isAddImmediate(const MachineInstr &MI, + Register Reg) const override; + bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override; MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, @@ -206,9 +236,6 @@ public: MachineTraceStrategy getMachineCombinerTraceStrategy() const override; - void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2, - MachineInstr &NewMI1, - MachineInstr &NewMI2) const override; bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns, @@ -265,6 +292,15 @@ int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex); // one of the instructions does not have rounding mode, false will be returned. bool hasEqualFRM(const MachineInstr &MI1, const MachineInstr &MI2); +// If \p Opcode is a .vx vector instruction, returns the lower number of bits +// that are used from the scalar .x operand for a given \p Log2SEW. Otherwise +// returns null. +std::optional<unsigned> getVectorLowDemandedScalarBits(uint16_t Opcode, + unsigned Log2SEW); + +// Returns the MC opcode of RVV pseudo instruction. +unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode); + // Special immediate for AVL operand of V pseudo instructions to indicate VLMax. static constexpr int64_t VLMaxSentinel = -1LL; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td index e58e3412aea3..edc08187d8f7 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -84,18 +84,12 @@ def riscv_read_cycle_wide : SDNode<"RISCVISD::READ_CYCLE_WIDE", def riscv_add_lo : SDNode<"RISCVISD::ADD_LO", SDTIntBinOp>; def riscv_hi : SDNode<"RISCVISD::HI", SDTIntUnaryOp>; def riscv_lla : SDNode<"RISCVISD::LLA", SDTIntUnaryOp>; -def riscv_lga : SDNode<"RISCVISD::LGA", SDTLoad, - [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def riscv_add_tprel : SDNode<"RISCVISD::ADD_TPREL", SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0>]>>; -def riscv_la_tls_ie : SDNode<"RISCVISD::LA_TLS_IE", SDTLoad, - [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; -def riscv_la_tls_gd : SDNode<"RISCVISD::LA_TLS_GD", SDTIntUnaryOp>; - //===----------------------------------------------------------------------===// // Operand and SDNode transformation definitions. //===----------------------------------------------------------------------===// @@ -149,18 +143,40 @@ class UImmAsmOperand<int width, string suffix = ""> : ImmAsmOperand<"U", width, suffix> { } +class RISCVOp<ValueType vt = XLenVT> : Operand<vt> { + let OperandNamespace = "RISCVOp"; +} + +class RISCVUImmOp<int bitsNum> : RISCVOp { + let ParserMatchClass = UImmAsmOperand<bitsNum>; + let DecoderMethod = "decodeUImmOperand<" # bitsNum # ">"; + let OperandType = "OPERAND_UIMM" # bitsNum; +} + +class RISCVUImmLeafOp<int bitsNum> : + RISCVUImmOp<bitsNum>, ImmLeaf<XLenVT, "return isUInt<" # bitsNum # ">(Imm);">; + +class RISCVSImmOp<int bitsNum> : RISCVOp { + let ParserMatchClass = SImmAsmOperand<bitsNum>; + let EncoderMethod = "getImmOpValue"; + let DecoderMethod = "decodeSImmOperand<" # bitsNum # ">"; + let OperandType = "OPERAND_SIMM" # bitsNum; +} + +class RISCVSImmLeafOp<int bitsNum> : + RISCVSImmOp<bitsNum>, ImmLeaf<XLenVT, "return isInt<" # bitsNum # ">(Imm);">; + def FenceArg : AsmOperandClass { let Name = "FenceArg"; let RenderMethod = "addFenceArgOperands"; let ParserMethod = "parseFenceArg"; } -def fencearg : Operand<XLenVT> { +def fencearg : RISCVOp { let ParserMatchClass = FenceArg; let PrintMethod = "printFenceArg"; let DecoderMethod = "decodeUImmOperand<4>"; let OperandType = "OPERAND_UIMM4"; - let OperandNamespace = "RISCVOp"; } def UImmLog2XLenAsmOperand : AsmOperandClass { @@ -169,7 +185,7 @@ def UImmLog2XLenAsmOperand : AsmOperandClass { let DiagnosticType = "InvalidUImmLog2XLen"; } -def uimmlog2xlen : Operand<XLenVT>, ImmLeaf<XLenVT, [{ +def uimmlog2xlen : RISCVOp, ImmLeaf<XLenVT, [{ if (Subtarget->is64Bit()) return isUInt<6>(Imm); return isUInt<5>(Imm); @@ -186,21 +202,17 @@ def uimmlog2xlen : Operand<XLenVT>, ImmLeaf<XLenVT, [{ return isUInt<5>(Imm); }]; let OperandType = "OPERAND_UIMMLOG2XLEN"; - let OperandNamespace = "RISCVOp"; } -def uimm1 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isUInt<1>(Imm);}]> { - let ParserMatchClass = UImmAsmOperand<1>; - let DecoderMethod = "decodeUImmOperand<1>"; - let OperandType = "OPERAND_UIMM1"; - let OperandNamespace = "RISCVOp"; +def InsnDirectiveOpcode : AsmOperandClass { + let Name = "InsnDirectiveOpcode"; + let ParserMethod = "parseInsnDirectiveOpcode"; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isImm"; } -def uimm2 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isUInt<2>(Imm);}]> { - let ParserMatchClass = UImmAsmOperand<2>; - let DecoderMethod = "decodeUImmOperand<2>"; - let OperandType = "OPERAND_UIMM2"; - let OperandNamespace = "RISCVOp"; +def uimm1 : RISCVUImmLeafOp<1>; +def uimm2 : RISCVUImmLeafOp<2> { let MCOperandPredicate = [{ int64_t Imm; if (!MCOp.evaluateAsConstantImm(Imm)) @@ -208,75 +220,22 @@ def uimm2 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isUInt<2>(Imm);}]> { return isUInt<2>(Imm); }]; } - -def uimm3 : Operand<XLenVT> { - let ParserMatchClass = UImmAsmOperand<3>; - let DecoderMethod = "decodeUImmOperand<3>"; - let OperandType = "OPERAND_UIMM3"; - let OperandNamespace = "RISCVOp"; -} - -def uimm4 : Operand<XLenVT> { - let ParserMatchClass = UImmAsmOperand<4>; - let DecoderMethod = "decodeUImmOperand<4>"; - let OperandType = "OPERAND_UIMM4"; - let OperandNamespace = "RISCVOp"; -} - -def uimm5 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isUInt<5>(Imm);}]> { - let ParserMatchClass = UImmAsmOperand<5>; - let DecoderMethod = "decodeUImmOperand<5>"; - let OperandType = "OPERAND_UIMM5"; - let OperandNamespace = "RISCVOp"; -} - -def InsnDirectiveOpcode : AsmOperandClass { - let Name = "InsnDirectiveOpcode"; - let ParserMethod = "parseInsnDirectiveOpcode"; - let RenderMethod = "addImmOperands"; - let PredicateMethod = "isImm"; -} - -def uimm6 : Operand<XLenVT> { - let ParserMatchClass = UImmAsmOperand<6>; - let DecoderMethod = "decodeUImmOperand<6>"; - let OperandType = "OPERAND_UIMM6"; - let OperandNamespace = "RISCVOp"; -} - -def uimm7_opcode : Operand<XLenVT> { +def uimm3 : RISCVUImmOp<3>; +def uimm4 : RISCVUImmOp<4>; +def uimm5 : RISCVUImmLeafOp<5>; +def uimm6 : RISCVUImmLeafOp<6>; +def uimm7_opcode : RISCVUImmOp<7> { let ParserMatchClass = InsnDirectiveOpcode; - let DecoderMethod = "decodeUImmOperand<7>"; - let OperandType = "OPERAND_UIMM7"; - let OperandNamespace = "RISCVOp"; } - -def uimm7 : Operand<XLenVT> { - let ParserMatchClass = UImmAsmOperand<7>; - let DecoderMethod = "decodeUImmOperand<7>"; - let OperandType = "OPERAND_UIMM7"; - let OperandNamespace = "RISCVOp"; -} - -def uimm8 : Operand<XLenVT> { - let ParserMatchClass = UImmAsmOperand<8>; - let DecoderMethod = "decodeUImmOperand<8>"; - let OperandType = "OPERAND_UIMM8"; - let OperandNamespace = "RISCVOp"; -} - -def simm12 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isInt<12>(Imm);}]> { - let ParserMatchClass = SImmAsmOperand<12>; - let EncoderMethod = "getImmOpValue"; - let DecoderMethod = "decodeSImmOperand<12>"; +def uimm7 : RISCVUImmOp<7>; +def uimm8 : RISCVUImmOp<8>; +def simm12 : RISCVSImmLeafOp<12> { let MCOperandPredicate = [{ int64_t Imm; if (MCOp.evaluateAsConstantImm(Imm)) return isInt<12>(Imm); return MCOp.isBareSymbolRef(); }]; - let OperandType = "OPERAND_SIMM12"; - let OperandNamespace = "RISCVOp"; } // A 12-bit signed immediate which cannot fit in 6-bit signed immediate, @@ -299,26 +258,38 @@ def simm13_lsb0 : Operand<OtherVT> { let OperandType = "OPERAND_PCREL"; } -class UImm20Operand : Operand<XLenVT> { +class UImm20Operand : RISCVOp { let EncoderMethod = "getImmOpValue"; let DecoderMethod = "decodeUImmOperand<20>"; + let OperandType = "OPERAND_UIMM20"; +} + +class UImm20OperandMaybeSym : UImm20Operand { let MCOperandPredicate = [{ int64_t Imm; if (MCOp.evaluateAsConstantImm(Imm)) return isUInt<20>(Imm); return MCOp.isBareSymbolRef(); }]; - let OperandType = "OPERAND_UIMM20"; - let OperandNamespace = "RISCVOp"; } -def uimm20_lui : UImm20Operand { +def uimm20_lui : UImm20OperandMaybeSym { let ParserMatchClass = UImmAsmOperand<20, "LUI">; } -def uimm20_auipc : UImm20Operand { +def uimm20_auipc : UImm20OperandMaybeSym { let ParserMatchClass = UImmAsmOperand<20, "AUIPC">; } +def uimm20 : UImm20Operand { + let ParserMatchClass = UImmAsmOperand<20>; + let MCOperandPredicate = [{ + int64_t Imm; + if (!MCOp.evaluateAsConstantImm(Imm)) + return false; + return isUInt<20>(Imm); + }]; +} + def Simm21Lsb0JALAsmOperand : SImmAsmOperand<21, "Lsb0JAL"> { let ParserMethod = "parseJALOffset"; } @@ -392,12 +363,11 @@ def CSRSystemRegister : AsmOperandClass { let DiagnosticType = "InvalidCSRSystemRegister"; } -def csr_sysreg : Operand<XLenVT> { +def csr_sysreg : RISCVOp { let ParserMatchClass = CSRSystemRegister; let PrintMethod = "printCSRSystemRegister"; let DecoderMethod = "decodeUImmOperand<12>"; let OperandType = "OPERAND_UIMM12"; - let OperandNamespace = "RISCVOp"; } // A parameterized register class alternative to i32imm/i64imm from Target.td. @@ -1105,6 +1075,10 @@ def : MnemonicAlias<"sbreak", "ebreak">; // that don't support this alias. def : InstAlias<"zext.b $rd, $rs", (ANDI GPR:$rd, GPR:$rs, 0xFF), 0>; +let Predicates = [HasStdExtZicfilp] in { +def : InstAlias<"lpad $imm20", (AUIPC X0, uimm20:$imm20)>; +} + //===----------------------------------------------------------------------===// // .insn directive instructions //===----------------------------------------------------------------------===// @@ -1209,11 +1183,13 @@ def : InstAlias<".insn_s $opcode, $funct3, $rs2, ${imm12}(${rs1})", class PatGpr<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT> : Pat<(vt (OpNode (vt GPR:$rs1))), (Inst GPR:$rs1)>; -class PatGprGpr<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT> - : Pat<(vt (OpNode (vt GPR:$rs1), (vt GPR:$rs2))), (Inst GPR:$rs1, GPR:$rs2)>; +class PatGprGpr<SDPatternOperator OpNode, RVInst Inst, ValueType vt1 = XLenVT, + ValueType vt2 = XLenVT> + : Pat<(vt1 (OpNode (vt1 GPR:$rs1), (vt2 GPR:$rs2))), (Inst GPR:$rs1, GPR:$rs2)>; -class PatGprImm<SDPatternOperator OpNode, RVInst Inst, ImmLeaf ImmType> - : Pat<(XLenVT (OpNode (XLenVT GPR:$rs1), ImmType:$imm)), +class PatGprImm<SDPatternOperator OpNode, RVInst Inst, ImmLeaf ImmType, + ValueType vt = XLenVT> + : Pat<(vt (OpNode (vt GPR:$rs1), ImmType:$imm)), (Inst GPR:$rs1, ImmType:$imm)>; class PatGprSimm12<SDPatternOperator OpNode, RVInstI Inst> : PatGprImm<OpNode, Inst, simm12>; @@ -1232,7 +1208,9 @@ def assertzexti32 : PatFrag<(ops node:$src), (assertzext node:$src), [{ }]>; def zexti32 : ComplexPattern<i64, 1, "selectZExtBits<32>">; def zexti16 : ComplexPattern<XLenVT, 1, "selectZExtBits<16>">; +def zexti16i32 : ComplexPattern<i32, 1, "selectZExtBits<16>">; def zexti8 : ComplexPattern<XLenVT, 1, "selectZExtBits<8>">; +def zexti8i32 : ComplexPattern<i32, 1, "selectZExtBits<8>">; def ext : PatFrags<(ops node:$A), [(sext node:$A), (zext node:$A)]>; @@ -1264,6 +1242,10 @@ def anyext_oneuse : unop_oneuse<anyext>; def ext_oneuse : unop_oneuse<ext>; def fpext_oneuse : unop_oneuse<any_fpextend>; +def 33signbits_node : PatLeaf<(i64 GPR:$src), [{ + return CurDAG->ComputeNumSignBits(SDValue(N, 0)) > 32; +}]>; + /// Simple arithmetic operations def : PatGprGpr<add, ADD>; @@ -1421,6 +1403,21 @@ def PseudoCCSUB : Pseudo<(outs GPR:$dst), GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, ReadSFBALU]>; +def PseudoCCSLL : Pseudo<(outs GPR:$dst), + (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, + Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, + ReadSFBALU, ReadSFBALU]>; +def PseudoCCSRL : Pseudo<(outs GPR:$dst), + (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, + Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, + ReadSFBALU, ReadSFBALU]>; +def PseudoCCSRA : Pseudo<(outs GPR:$dst), + (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, + Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, + ReadSFBALU, ReadSFBALU]>; def PseudoCCAND : Pseudo<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, @@ -1437,6 +1434,42 @@ def PseudoCCXOR : Pseudo<(outs GPR:$dst), Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, ReadSFBALU]>; +def PseudoCCADDI : Pseudo<(outs GPR:$dst), + (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, + Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, + ReadSFBALU]>; +def PseudoCCSLLI : Pseudo<(outs GPR:$dst), + (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, + Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, + ReadSFBALU]>; +def PseudoCCSRLI : Pseudo<(outs GPR:$dst), + (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, + Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, + ReadSFBALU]>; +def PseudoCCSRAI : Pseudo<(outs GPR:$dst), + (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, + Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, + ReadSFBALU]>; +def PseudoCCANDI : Pseudo<(outs GPR:$dst), + (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, + Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, + ReadSFBALU]>; +def PseudoCCORI : Pseudo<(outs GPR:$dst), + (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, + Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, + ReadSFBALU]>; +def PseudoCCXORI : Pseudo<(outs GPR:$dst), + (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, + Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, + ReadSFBALU]>; + // RV64I instructions def PseudoCCADDW : Pseudo<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, @@ -1448,6 +1481,42 @@ def PseudoCCSUBW : Pseudo<(outs GPR:$dst), GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, ReadSFBALU]>; +def PseudoCCSLLW : Pseudo<(outs GPR:$dst), + (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, + Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, + ReadSFBALU, ReadSFBALU]>; +def PseudoCCSRLW : Pseudo<(outs GPR:$dst), + (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, + Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, + ReadSFBALU, ReadSFBALU]>; +def PseudoCCSRAW : Pseudo<(outs GPR:$dst), + (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, + Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, + ReadSFBALU, ReadSFBALU]>; + +def PseudoCCADDIW : Pseudo<(outs GPR:$dst), + (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, + Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, + ReadSFBALU]>; +def PseudoCCSLLIW : Pseudo<(outs GPR:$dst), + (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, + Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, + ReadSFBALU]>; +def PseudoCCSRLIW : Pseudo<(outs GPR:$dst), + (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, + Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, + ReadSFBALU]>; +def PseudoCCSRAIW : Pseudo<(outs GPR:$dst), + (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, + Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, + ReadSFBALU]>; } multiclass SelectCC_GPR_rrirr<DAGOperand valty, ValueType vt> { @@ -1603,6 +1672,16 @@ def PseudoJump : Pseudo<(outs GPR:$rd), (ins pseudo_jump_symbol:$target), [], "jump", "$target, $rd">, Sched<[WriteIALU, WriteJalr, ReadJalr]>; +// Pseudo for a rematerializable constant materialization sequence. +// This is an experimental feature enabled by +// -riscv-use-rematerializable-movimm in RISCVISelDAGToDAG.cpp +// It will be expanded after register allocation. +// FIXME: The scheduling information does not reflect the multiple instructions. +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 8, isCodeGenOnly = 1, + isPseudo = 1, isReMaterializable = 1, IsSignExtendingOpW = 1 in +def PseudoMovImm : Pseudo<(outs GPR:$dst), (ins i32imm:$imm), []>, + Sched<[WriteIALU]>; + let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 8, isCodeGenOnly = 0, isAsmParserOnly = 1 in def PseudoLLA : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], @@ -1623,8 +1702,6 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 0, def PseudoLGA : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], "lga", "$dst, $src">; -def : Pat<(iPTR (riscv_lga tglobaladdr:$in)), (PseudoLGA tglobaladdr:$in)>; - let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 0, isAsmParserOnly = 1 in def PseudoLA : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], @@ -1641,16 +1718,11 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 0, def PseudoLA_TLS_IE : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], "la.tls.ie", "$dst, $src">; -def : Pat<(iPTR (riscv_la_tls_ie tglobaltlsaddr:$in)), - (PseudoLA_TLS_IE tglobaltlsaddr:$in)>; - let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 8, isCodeGenOnly = 0, isAsmParserOnly = 1 in def PseudoLA_TLS_GD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], "la.tls.gd", "$dst, $src">; -def : Pat<(riscv_la_tls_gd tglobaltlsaddr:$in), - (PseudoLA_TLS_GD tglobaltlsaddr:$in)>; /// Sign/Zero Extends @@ -1680,7 +1752,7 @@ def : LdPat<sextloadi8, LB>; def : LdPat<extloadi8, LBU>; // Prefer unsigned due to no c.lb in Zcb. def : LdPat<sextloadi16, LH>; def : LdPat<extloadi16, LH>; -def : LdPat<load, LW, i32>, Requires<[IsRV32]>; +def : LdPat<load, LW, i32>; def : LdPat<zextloadi8, LBU>; def : LdPat<zextloadi16, LHU>; @@ -1694,7 +1766,7 @@ class StPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy, def : StPat<truncstorei8, SB, GPR, XLenVT>; def : StPat<truncstorei16, SH, GPR, XLenVT>; -def : StPat<store, SW, GPR, i32>, Requires<[IsRV32]>; +def : StPat<store, SW, GPR, i32>; /// Fences @@ -1796,6 +1868,12 @@ def : Pat<(i64 (shl (and GPR:$rs1, 0xffffffff), uimm5:$shamt)), (SRLI (SLLI GPR:$rs1, 32), (ImmSubFrom32 uimm5:$shamt))>; } +class binop_allhusers<SDPatternOperator operator> + : PatFrag<(ops node:$lhs, node:$rhs), + (XLenVT (operator node:$lhs, node:$rhs)), [{ + return hasAllHUsers(Node); +}]>; + // PatFrag to allow ADDW/SUBW/MULW/SLLW to be selected from i64 add/sub/mul/shl // if only the lower 32 bits of their result is used. class binop_allwusers<SDPatternOperator operator> @@ -1904,9 +1982,9 @@ def : Pat<(debugtrap), (EBREAK)>; let Predicates = [IsRV64], Uses = [X5], Defs = [X1, X6, X7, X28, X29, X30, X31] in -def HWASAN_CHECK_MEMACCESS_SHORTGRANULES +def HWASAN_CHECK_MEMACCESS_SHORTGRANULES : Pseudo<(outs), (ins GPRJALR:$ptr, i32imm:$accessinfo), - [(int_hwasan_check_memaccess_shortgranules X5, GPRJALR:$ptr, + [(int_hwasan_check_memaccess_shortgranules (i64 X5), GPRJALR:$ptr, (i32 timm:$accessinfo))]>; // This gets lowered into a 20-byte instruction sequence (at most) @@ -1928,6 +2006,86 @@ def : Pat<(binop_allwusers<add> GPR:$rs1, (AddiPair:$rs2)), (AddiPairImmSmall AddiPair:$rs2))>; } +let Predicates = [HasShortForwardBranchOpt] in +def : Pat<(XLenVT (abs GPR:$rs1)), + (PseudoCCSUB (XLenVT GPR:$rs1), (XLenVT X0), /* COND_LT */ 2, + (XLenVT GPR:$rs1), (XLenVT X0), (XLenVT GPR:$rs1))>; +let Predicates = [HasShortForwardBranchOpt, IsRV64] in +def : Pat<(sext_inreg (abs 33signbits_node:$rs1), i32), + (PseudoCCSUBW (i64 GPR:$rs1), (i64 X0), /* COND_LT */ 2, + (i64 GPR:$rs1), (i64 X0), (i64 GPR:$rs1))>; + +//===----------------------------------------------------------------------===// +// Experimental RV64 i32 legalization patterns. +//===----------------------------------------------------------------------===// + +def simm12i32 : ImmLeaf<i32, [{return isInt<12>(Imm);}]>; + +// Convert from i32 immediate to i64 target immediate to make SelectionDAG type +// checking happy so we can use ADDIW which expects an XLen immediate. +def as_i64imm : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64); +}]>; + +def zext_is_sext : PatFrag<(ops node:$src), (zext node:$src), [{ + KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0), 0); + return Known.isNonNegative(); +}]>; + +let Predicates = [IsRV64] in { +def : LdPat<sextloadi8, LB, i32>; +def : LdPat<extloadi8, LBU, i32>; // Prefer unsigned due to no c.lb in Zcb. +def : LdPat<sextloadi16, LH, i32>; +def : LdPat<extloadi16, LH, i32>; +def : LdPat<zextloadi8, LBU, i32>; +def : LdPat<zextloadi16, LHU, i32>; + +def : StPat<truncstorei8, SB, GPR, i32>; +def : StPat<truncstorei16, SH, GPR, i32>; + +def : Pat<(anyext GPR:$src), (COPY GPR:$src)>; +def : Pat<(sext GPR:$src), (ADDIW GPR:$src, 0)>; +def : Pat<(trunc GPR:$src), (COPY GPR:$src)>; + +def : PatGprGpr<add, ADDW, i32, i32>; +def : PatGprGpr<sub, SUBW, i32, i32>; +def : PatGprGpr<and, AND, i32, i32>; +def : PatGprGpr<or, OR, i32, i32>; +def : PatGprGpr<xor, XOR, i32, i32>; +def : PatGprGpr<shiftopw<shl>, SLLW, i32, i64>; +def : PatGprGpr<shiftopw<srl>, SRLW, i32, i64>; +def : PatGprGpr<shiftopw<sra>, SRAW, i32, i64>; + +def : Pat<(i32 (add GPR:$rs1, simm12i32:$imm)), + (ADDIW GPR:$rs1, (i64 (as_i64imm $imm)))>; +def : Pat<(i32 (and GPR:$rs1, simm12i32:$imm)), + (ANDI GPR:$rs1, (i64 (as_i64imm $imm)))>; +def : Pat<(i32 (or GPR:$rs1, simm12i32:$imm)), + (ORI GPR:$rs1, (i64 (as_i64imm $imm)))>; +def : Pat<(i32 (xor GPR:$rs1, simm12i32:$imm)), + (XORI GPR:$rs1, (i64 (as_i64imm $imm)))>; + +def : PatGprImm<shl, SLLIW, uimm5, i32>; +def : PatGprImm<srl, SRLIW, uimm5, i32>; +def : PatGprImm<sra, SRAIW, uimm5, i32>; + +def : Pat<(i32 (and GPR:$rs, TrailingOnesMask:$mask)), + (SRLI (SLLI $rs, (i64 (XLenSubTrailingOnes $mask))), + (i64 (XLenSubTrailingOnes $mask)))>; + +// Use sext if the sign bit of the input is 0. +def : Pat<(zext_is_sext GPR:$src), (ADDIW GPR:$src, 0)>; +} + +let Predicates = [IsRV64, NotHasStdExtZba] in { +def : Pat<(zext GPR:$src), (SRLI (SLLI GPR:$src, 32), 32)>; + +// If we're shifting a 32-bit zero extended value left by 0-31 bits, use 2 +// shifts instead of 3. This can occur when unsigned is used to index an array. +def : Pat<(shl (zext GPR:$rs), uimm5:$shamt), + (SRLI (SLLI GPR:$rs, 32), (ImmSubFrom32 uimm5:$shamt))>; +} + //===----------------------------------------------------------------------===// // Standard extensions //===----------------------------------------------------------------------===// @@ -1951,7 +2109,6 @@ include "RISCVInstrInfoZk.td" // Vector include "RISCVInstrInfoV.td" -include "RISCVInstrInfoZvfbf.td" include "RISCVInstrInfoZvk.td" // Integer @@ -1970,3 +2127,9 @@ include "RISCVInstrInfoXVentana.td" include "RISCVInstrInfoXTHead.td" include "RISCVInstrInfoXSf.td" include "RISCVInstrInfoXCV.td" + +//===----------------------------------------------------------------------===// +// Global ISel +//===----------------------------------------------------------------------===// + +include "RISCVInstrGISel.td" diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoA.td index 8421109b8514..c8301fcc6b93 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoA.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoA.td @@ -44,17 +44,11 @@ multiclass AMO_rr_aq_rl<bits<5> funct5, bits<3> funct3, string opcodestr> { def _AQ_RL : AMO_rr<funct5, 1, 1, funct3, opcodestr # ".aqrl">; } -class AtomicStPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy, - ValueType vt = XLenVT> - : Pat<(StoreOp (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm12), - (vt StTy:$rs2)), - (Inst StTy:$rs2, GPR:$rs1, simm12:$imm12)>; - //===----------------------------------------------------------------------===// // Instructions //===----------------------------------------------------------------------===// -let Predicates = [HasStdExtA] in { +let Predicates = [HasStdExtA], IsSignExtendingOpW = 1 in { defm LR_W : LR_r_aq_rl<0b010, "lr.w">, Sched<[WriteAtomicLDW, ReadAtomicLDW]>; defm SC_W : AMO_rr_aq_rl<0b00011, 0b010, "sc.w">, Sched<[WriteAtomicSTW, ReadAtomicSTW, ReadAtomicSTW]>; @@ -123,21 +117,21 @@ let Predicates = [HasAtomicLdSt] in { def : LdPat<atomic_load_16, LH>; def : LdPat<atomic_load_32, LW>; - def : AtomicStPat<atomic_store_8, SB, GPR>; - def : AtomicStPat<atomic_store_16, SH, GPR>; - def : AtomicStPat<atomic_store_32, SW, GPR>; + def : StPat<atomic_store_8, SB, GPR, XLenVT>; + def : StPat<atomic_store_16, SH, GPR, XLenVT>; + def : StPat<atomic_store_32, SW, GPR, XLenVT>; } let Predicates = [HasAtomicLdSt, IsRV64] in { def : LdPat<atomic_load_64, LD, i64>; - def : AtomicStPat<atomic_store_64, SD, GPR, i64>; + def : StPat<atomic_store_64, SD, GPR, i64>; } -let Predicates = [HasStdExtA] in { - /// AMOs -multiclass AMOPat<string AtomicOp, string BaseInst, ValueType vt = XLenVT> { +multiclass AMOPat<string AtomicOp, string BaseInst, ValueType vt = XLenVT, + list<Predicate> ExtraPreds = []> { +let Predicates = !listconcat([HasStdExtA, NotHasStdExtZtso], ExtraPreds) in { def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_monotonic"), !cast<RVInst>(BaseInst), vt>; def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acquire"), @@ -149,6 +143,19 @@ multiclass AMOPat<string AtomicOp, string BaseInst, ValueType vt = XLenVT> { def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_seq_cst"), !cast<RVInst>(BaseInst#"_AQ_RL"), vt>; } +let Predicates = !listconcat([HasStdExtA, HasStdExtZtso], ExtraPreds) in { + def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_monotonic"), + !cast<RVInst>(BaseInst), vt>; + def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acquire"), + !cast<RVInst>(BaseInst), vt>; + def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_release"), + !cast<RVInst>(BaseInst), vt>; + def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acq_rel"), + !cast<RVInst>(BaseInst), vt>; + def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_seq_cst"), + !cast<RVInst>(BaseInst), vt>; +} +} defm : AMOPat<"atomic_swap_32", "AMOSWAP_W">; defm : AMOPat<"atomic_load_add_32", "AMOADD_W">; @@ -160,16 +167,7 @@ defm : AMOPat<"atomic_load_min_32", "AMOMIN_W">; defm : AMOPat<"atomic_load_umax_32", "AMOMAXU_W">; defm : AMOPat<"atomic_load_umin_32", "AMOMINU_W">; -def : Pat<(XLenVT (atomic_load_sub_32_monotonic GPR:$addr, GPR:$incr)), - (AMOADD_W GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; -def : Pat<(XLenVT (atomic_load_sub_32_acquire GPR:$addr, GPR:$incr)), - (AMOADD_W_AQ GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; -def : Pat<(XLenVT (atomic_load_sub_32_release GPR:$addr, GPR:$incr)), - (AMOADD_W_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; -def : Pat<(XLenVT (atomic_load_sub_32_acq_rel GPR:$addr, GPR:$incr)), - (AMOADD_W_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; -def : Pat<(XLenVT (atomic_load_sub_32_seq_cst GPR:$addr, GPR:$incr)), - (AMOADD_W_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; +let Predicates = [HasStdExtA] in { /// Pseudo AMOs @@ -318,30 +316,17 @@ def : Pat<(int_riscv_masked_cmpxchg_i32 } // Predicates = [HasStdExtA] -let Predicates = [HasStdExtA, IsRV64] in { +defm : AMOPat<"atomic_swap_64", "AMOSWAP_D", i64, [IsRV64]>; +defm : AMOPat<"atomic_load_add_64", "AMOADD_D", i64, [IsRV64]>; +defm : AMOPat<"atomic_load_and_64", "AMOAND_D", i64, [IsRV64]>; +defm : AMOPat<"atomic_load_or_64", "AMOOR_D", i64, [IsRV64]>; +defm : AMOPat<"atomic_load_xor_64", "AMOXOR_D", i64, [IsRV64]>; +defm : AMOPat<"atomic_load_max_64", "AMOMAX_D", i64, [IsRV64]>; +defm : AMOPat<"atomic_load_min_64", "AMOMIN_D", i64, [IsRV64]>; +defm : AMOPat<"atomic_load_umax_64", "AMOMAXU_D", i64, [IsRV64]>; +defm : AMOPat<"atomic_load_umin_64", "AMOMINU_D", i64, [IsRV64]>; -defm : AMOPat<"atomic_swap_64", "AMOSWAP_D", i64>; -defm : AMOPat<"atomic_load_add_64", "AMOADD_D", i64>; -defm : AMOPat<"atomic_load_and_64", "AMOAND_D", i64>; -defm : AMOPat<"atomic_load_or_64", "AMOOR_D", i64>; -defm : AMOPat<"atomic_load_xor_64", "AMOXOR_D", i64>; -defm : AMOPat<"atomic_load_max_64", "AMOMAX_D", i64>; -defm : AMOPat<"atomic_load_min_64", "AMOMIN_D", i64>; -defm : AMOPat<"atomic_load_umax_64", "AMOMAXU_D", i64>; -defm : AMOPat<"atomic_load_umin_64", "AMOMINU_D", i64>; - -/// 64-bit AMOs - -def : Pat<(i64 (atomic_load_sub_64_monotonic GPR:$addr, GPR:$incr)), - (AMOADD_D GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; -def : Pat<(i64 (atomic_load_sub_64_acquire GPR:$addr, GPR:$incr)), - (AMOADD_D_AQ GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; -def : Pat<(i64 (atomic_load_sub_64_release GPR:$addr, GPR:$incr)), - (AMOADD_D_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; -def : Pat<(i64 (atomic_load_sub_64_acq_rel GPR:$addr, GPR:$incr)), - (AMOADD_D_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; -def : Pat<(i64 (atomic_load_sub_64_seq_cst GPR:$addr, GPR:$incr)), - (AMOADD_D_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>; +let Predicates = [HasStdExtA, IsRV64] in { /// 64-bit pseudo AMOs @@ -387,3 +372,61 @@ def : Pat<(int_riscv_masked_cmpxchg_i64 (PseudoMaskedCmpXchg32 GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>; } // Predicates = [HasStdExtA, IsRV64] + +//===----------------------------------------------------------------------===// +// Experimental RV64 i32 legalization patterns. +//===----------------------------------------------------------------------===// + +class PatGprGprA<SDPatternOperator OpNode, RVInst Inst, ValueType vt> + : Pat<(vt (OpNode (XLenVT GPR:$rs1), (vt GPR:$rs2))), (Inst GPR:$rs1, GPR:$rs2)>; + +multiclass AMOPat2<string AtomicOp, string BaseInst, ValueType vt = XLenVT, + list<Predicate> ExtraPreds = []> { +let Predicates = !listconcat([HasStdExtA, NotHasStdExtZtso], ExtraPreds) in { + def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_monotonic"), + !cast<RVInst>(BaseInst), vt>; + def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_acquire"), + !cast<RVInst>(BaseInst#"_AQ"), vt>; + def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_release"), + !cast<RVInst>(BaseInst#"_RL"), vt>; + def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_acq_rel"), + !cast<RVInst>(BaseInst#"_AQ_RL"), vt>; + def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_seq_cst"), + !cast<RVInst>(BaseInst#"_AQ_RL"), vt>; +} +let Predicates = !listconcat([HasStdExtA, HasStdExtZtso], ExtraPreds) in { + def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_monotonic"), + !cast<RVInst>(BaseInst), vt>; + def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_acquire"), + !cast<RVInst>(BaseInst), vt>; + def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_release"), + !cast<RVInst>(BaseInst), vt>; + def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_acq_rel"), + !cast<RVInst>(BaseInst), vt>; + def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_seq_cst"), + !cast<RVInst>(BaseInst), vt>; +} +} + +defm : AMOPat2<"atomic_swap_32", "AMOSWAP_W", i32>; +defm : AMOPat2<"atomic_load_add_32", "AMOADD_W", i32>; +defm : AMOPat2<"atomic_load_and_32", "AMOAND_W", i32>; +defm : AMOPat2<"atomic_load_or_32", "AMOOR_W", i32>; +defm : AMOPat2<"atomic_load_xor_32", "AMOXOR_W", i32>; +defm : AMOPat2<"atomic_load_max_32", "AMOMAX_W", i32>; +defm : AMOPat2<"atomic_load_min_32", "AMOMIN_W", i32>; +defm : AMOPat2<"atomic_load_umax_32", "AMOMAXU_W", i32>; +defm : AMOPat2<"atomic_load_umin_32", "AMOMINU_W", i32>; + +defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32, i32>; + +let Predicates = [HasAtomicLdSt] in { + def : LdPat<atomic_load_8, LB, i32>; + def : LdPat<atomic_load_16, LH, i32>; + def : LdPat<atomic_load_32, LW, i32>; + + def : StPat<atomic_store_8, SB, GPR, i32>; + def : StPat<atomic_store_16, SH, GPR, i32>; + def : StPat<atomic_store_32, SW, GPR, i32>; +} + diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoC.td index 74439bb67c61..07137031d9fc 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoC.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoC.td @@ -18,7 +18,7 @@ def UImmLog2XLenNonZeroAsmOperand : AsmOperandClass { let DiagnosticType = "InvalidUImmLog2XLenNonZero"; } -def uimmlog2xlennonzero : Operand<XLenVT>, ImmLeaf<XLenVT, [{ +def uimmlog2xlennonzero : RISCVOp, ImmLeaf<XLenVT, [{ if (Subtarget->is64Bit()) return isUInt<6>(Imm) && (Imm != 0); return isUInt<5>(Imm) && (Imm != 0); @@ -27,7 +27,6 @@ def uimmlog2xlennonzero : Operand<XLenVT>, ImmLeaf<XLenVT, [{ // TODO: should ensure invalid shamt is rejected when decoding. let DecoderMethod = "decodeUImmNonZeroOperand<6>"; let OperandType = "OPERAND_UIMMLOG2XLEN_NONZERO"; - let OperandNamespace = "RISCVOp"; let MCOperandPredicate = [{ int64_t Imm; if (!MCOp.evaluateAsConstantImm(Imm)) @@ -38,12 +37,7 @@ def uimmlog2xlennonzero : Operand<XLenVT>, ImmLeaf<XLenVT, [{ }]; } -def simm6 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isInt<6>(Imm);}]> { - let ParserMatchClass = SImmAsmOperand<6>; - let EncoderMethod = "getImmOpValue"; - let DecoderMethod = "decodeSImmOperand<6>"; - let OperandType = "OPERAND_SIMM6"; - let OperandNamespace = "RISCVOp"; +def simm6 : RISCVSImmLeafOp<6> { let MCOperandPredicate = [{ int64_t Imm; if (MCOp.evaluateAsConstantImm(Imm)) @@ -52,13 +46,12 @@ def simm6 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isInt<6>(Imm);}]> { }]; } -def simm6nonzero : Operand<XLenVT>, +def simm6nonzero : RISCVOp, ImmLeaf<XLenVT, [{return (Imm != 0) && isInt<6>(Imm);}]> { let ParserMatchClass = SImmAsmOperand<6, "NonZero">; let EncoderMethod = "getImmOpValue"; let DecoderMethod = "decodeSImmNonZeroOperand<6>"; let OperandType = "OPERAND_SIMM6_NONZERO"; - let OperandNamespace = "RISCVOp"; let MCOperandPredicate = [{ int64_t Imm; if (MCOp.evaluateAsConstantImm(Imm)) @@ -67,11 +60,10 @@ def simm6nonzero : Operand<XLenVT>, }]; } -def immzero : Operand<XLenVT>, +def immzero : RISCVOp, ImmLeaf<XLenVT, [{return (Imm == 0);}]> { let ParserMatchClass = ImmZeroAsmOperand; let OperandType = "OPERAND_ZERO"; - let OperandNamespace = "RISCVOp"; } def CLUIImmAsmOperand : AsmOperandClass { @@ -86,7 +78,7 @@ def CLUIImmAsmOperand : AsmOperandClass { // loaded in to bits 17-12 of the destination register and sign extended from // bit 17. Therefore, this 6-bit immediate can represent values in the ranges // [1, 31] and [0xfffe0, 0xfffff]. -def c_lui_imm : Operand<XLenVT>, +def c_lui_imm : RISCVOp, ImmLeaf<XLenVT, [{return (Imm != 0) && (isUInt<5>(Imm) || (Imm >= 0xfffe0 && Imm <= 0xfffff));}]> { @@ -94,7 +86,6 @@ def c_lui_imm : Operand<XLenVT>, let EncoderMethod = "getImmOpValue"; let DecoderMethod = "decodeCLUIImmOperand"; let OperandType = "OPERAND_CLUI_IMM"; - let OperandNamespace = "RISCVOp"; let MCOperandPredicate = [{ int64_t Imm; if (MCOp.evaluateAsConstantImm(Imm)) @@ -105,13 +96,12 @@ def c_lui_imm : Operand<XLenVT>, } // A 7-bit unsigned immediate where the least significant two bits are zero. -def uimm7_lsb00 : Operand<XLenVT>, +def uimm7_lsb00 : RISCVOp, ImmLeaf<XLenVT, [{return isShiftedUInt<5, 2>(Imm);}]> { let ParserMatchClass = UImmAsmOperand<7, "Lsb00">; let EncoderMethod = "getImmOpValue"; let DecoderMethod = "decodeUImmOperand<7>"; let OperandType = "OPERAND_UIMM7_LSB00"; - let OperandNamespace = "RISCVOp"; let MCOperandPredicate = [{ int64_t Imm; if (!MCOp.evaluateAsConstantImm(Imm)) @@ -121,13 +111,12 @@ def uimm7_lsb00 : Operand<XLenVT>, } // A 8-bit unsigned immediate where the least significant two bits are zero. -def uimm8_lsb00 : Operand<XLenVT>, +def uimm8_lsb00 : RISCVOp, ImmLeaf<XLenVT, [{return isShiftedUInt<6, 2>(Imm);}]> { let ParserMatchClass = UImmAsmOperand<8, "Lsb00">; let EncoderMethod = "getImmOpValue"; let DecoderMethod = "decodeUImmOperand<8>"; let OperandType = "OPERAND_UIMM8_LSB00"; - let OperandNamespace = "RISCVOp"; let MCOperandPredicate = [{ int64_t Imm; if (!MCOp.evaluateAsConstantImm(Imm)) @@ -137,13 +126,12 @@ def uimm8_lsb00 : Operand<XLenVT>, } // A 8-bit unsigned immediate where the least significant three bits are zero. -def uimm8_lsb000 : Operand<XLenVT>, +def uimm8_lsb000 : RISCVOp, ImmLeaf<XLenVT, [{return isShiftedUInt<5, 3>(Imm);}]> { let ParserMatchClass = UImmAsmOperand<8, "Lsb000">; let EncoderMethod = "getImmOpValue"; let DecoderMethod = "decodeUImmOperand<8>"; let OperandType = "OPERAND_UIMM8_LSB000"; - let OperandNamespace = "RISCVOp"; let MCOperandPredicate = [{ int64_t Imm; if (!MCOp.evaluateAsConstantImm(Imm)) @@ -170,13 +158,12 @@ def simm9_lsb0 : Operand<OtherVT>, } // A 9-bit unsigned immediate where the least significant three bits are zero. -def uimm9_lsb000 : Operand<XLenVT>, +def uimm9_lsb000 : RISCVOp, ImmLeaf<XLenVT, [{return isShiftedUInt<6, 3>(Imm);}]> { let ParserMatchClass = UImmAsmOperand<9, "Lsb000">; let EncoderMethod = "getImmOpValue"; let DecoderMethod = "decodeUImmOperand<9>"; let OperandType = "OPERAND_UIMM9_LSB000"; - let OperandNamespace = "RISCVOp"; let MCOperandPredicate = [{ int64_t Imm; if (!MCOp.evaluateAsConstantImm(Imm)) @@ -187,14 +174,13 @@ def uimm9_lsb000 : Operand<XLenVT>, // A 10-bit unsigned immediate where the least significant two bits are zero // and the immediate can't be zero. -def uimm10_lsb00nonzero : Operand<XLenVT>, +def uimm10_lsb00nonzero : RISCVOp, ImmLeaf<XLenVT, [{return isShiftedUInt<8, 2>(Imm) && (Imm != 0);}]> { let ParserMatchClass = UImmAsmOperand<10, "Lsb00NonZero">; let EncoderMethod = "getImmOpValue"; let DecoderMethod = "decodeUImmNonZeroOperand<10>"; let OperandType = "OPERAND_UIMM10_LSB00_NONZERO"; - let OperandNamespace = "RISCVOp"; let MCOperandPredicate = [{ int64_t Imm; if (!MCOp.evaluateAsConstantImm(Imm)) @@ -204,14 +190,13 @@ def uimm10_lsb00nonzero : Operand<XLenVT>, } // A 10-bit signed immediate where the least significant four bits are zero. -def simm10_lsb0000nonzero : Operand<XLenVT>, +def simm10_lsb0000nonzero : RISCVOp, ImmLeaf<XLenVT, [{return (Imm != 0) && isShiftedInt<6, 4>(Imm);}]> { let ParserMatchClass = SImmAsmOperand<10, "Lsb0000NonZero">; let EncoderMethod = "getImmOpValue"; let DecoderMethod = "decodeSImmNonZeroOperand<10>"; let OperandType = "OPERAND_SIMM10_LSB0000_NONZERO"; - let OperandNamespace = "RISCVOp"; let MCOperandPredicate = [{ int64_t Imm; if (!MCOp.evaluateAsConstantImm(Imm)) @@ -243,11 +228,10 @@ def InsnCDirectiveOpcode : AsmOperandClass { let PredicateMethod = "isImm"; } -def uimm2_opcode : Operand<XLenVT> { +def uimm2_opcode : RISCVOp { let ParserMatchClass = InsnCDirectiveOpcode; let DecoderMethod = "decodeUImmOperand<2>"; let OperandType = "OPERAND_UIMM2"; - let OperandNamespace = "RISCVOp"; } //===----------------------------------------------------------------------===// @@ -972,8 +956,14 @@ def : CompressPat<(JAL X0, simm12_lsb0:$offset), (C_J simm12_lsb0:$offset)>; def : CompressPat<(BEQ GPRC:$rs1, X0, simm9_lsb0:$imm), (C_BEQZ GPRC:$rs1, simm9_lsb0:$imm)>; +let isCompressOnly = true in +def : CompressPat<(BEQ X0, GPRC:$rs1, simm9_lsb0:$imm), + (C_BEQZ GPRC:$rs1, simm9_lsb0:$imm)>; def : CompressPat<(BNE GPRC:$rs1, X0, simm9_lsb0:$imm), (C_BNEZ GPRC:$rs1, simm9_lsb0:$imm)>; +let isCompressOnly = true in +def : CompressPat<(BNE X0, GPRC:$rs1, simm9_lsb0:$imm), + (C_BNEZ GPRC:$rs1, simm9_lsb0:$imm)>; } // Predicates = [HasStdExtCOrZca] // Quadrant 2 diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td index 7a79e3ca6a2f..6af710049a9d 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -78,7 +78,7 @@ def FSD : FPStore_r<0b011, "fsd", FPR64, WriteFST64>; } // Predicates = [HasStdExtD] foreach Ext = DExts in { - let SchedRW = [WriteFMA64, ReadFMA64, ReadFMA64, ReadFMA64] in { + let SchedRW = [WriteFMA64, ReadFMA64, ReadFMA64, ReadFMA64Addend] in { defm FMADD_D : FPFMA_rrr_frm_m<OPC_MADD, 0b01, "fmadd.d", Ext>; defm FMSUB_D : FPFMA_rrr_frm_m<OPC_MSUB, 0b01, "fmsub.d", Ext>; defm FNMSUB_D : FPFMA_rrr_frm_m<OPC_NMSUB, 0b01, "fnmsub.d", Ext>; @@ -115,8 +115,8 @@ foreach Ext = DExts in { Ext.PrimaryTy, "fcvt.s.d">, Sched<[WriteFCvtF64ToF32, ReadFCvtF64ToF32]>; - defm FCVT_D_S : FPUnaryOp_r_m<0b0100001, 0b00000, 0b000, Ext, Ext.PrimaryTy, - Ext.F32Ty, "fcvt.d.s">, + defm FCVT_D_S : FPUnaryOp_r_frmlegacy_m<0b0100001, 0b00000, Ext, Ext.PrimaryTy, + Ext.F32Ty, "fcvt.d.s">, Sched<[WriteFCvtF32ToF64, ReadFCvtF32ToF64]>; let SchedRW = [WriteFCmp64, ReadFCmp64, ReadFCmp64] in { @@ -140,12 +140,12 @@ foreach Ext = DExts in { "fcvt.wu.d">, Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]>; - defm FCVT_D_W : FPUnaryOp_r_m<0b1101001, 0b00000, 0b000, Ext, Ext.PrimaryTy, GPR, - "fcvt.d.w">, + defm FCVT_D_W : FPUnaryOp_r_frmlegacy_m<0b1101001, 0b00000, Ext, Ext.PrimaryTy, GPR, + "fcvt.d.w">, Sched<[WriteFCvtI32ToF64, ReadFCvtI32ToF64]>; - defm FCVT_D_WU : FPUnaryOp_r_m<0b1101001, 0b00001, 0b000, Ext, Ext.PrimaryTy, GPR, - "fcvt.d.wu">, + defm FCVT_D_WU : FPUnaryOp_r_frmlegacy_m<0b1101001, 0b00001, Ext, Ext.PrimaryTy, GPR, + "fcvt.d.wu">, Sched<[WriteFCvtI32ToF64, ReadFCvtI32ToF64]>; } // foreach Ext = DExts @@ -240,7 +240,7 @@ let Predicates = [HasStdExtD] in { // f64 -> f32, f32 -> f64 def : Pat<(any_fpround FPR64:$rs1), (FCVT_S_D FPR64:$rs1, FRM_DYN)>; -def : Pat<(any_fpextend FPR32:$rs1), (FCVT_D_S FPR32:$rs1)>; +def : Pat<(any_fpextend FPR32:$rs1), (FCVT_D_S FPR32:$rs1, FRM_RNE)>; } // Predicates = [HasStdExtD] let Predicates = [HasStdExtZdinx, IsRV64] in { @@ -248,7 +248,7 @@ let Predicates = [HasStdExtZdinx, IsRV64] in { // f64 -> f32, f32 -> f64 def : Pat<(any_fpround FPR64INX:$rs1), (FCVT_S_D_INX FPR64INX:$rs1, FRM_DYN)>; -def : Pat<(any_fpextend FPR32INX:$rs1), (FCVT_D_S_INX FPR32INX:$rs1)>; +def : Pat<(any_fpextend FPR32INX:$rs1), (FCVT_D_S_INX FPR32INX:$rs1, FRM_RNE)>; } // Predicates = [HasStdExtZdinx, IsRV64] let Predicates = [HasStdExtZdinx, IsRV32] in { @@ -256,7 +256,7 @@ let Predicates = [HasStdExtZdinx, IsRV32] in { // f64 -> f32, f32 -> f64 def : Pat<(any_fpround FPR64IN32X:$rs1), (FCVT_S_D_IN32X FPR64IN32X:$rs1, FRM_DYN)>; -def : Pat<(any_fpextend FPR32INX:$rs1), (FCVT_D_S_IN32X FPR32INX:$rs1)>; +def : Pat<(any_fpextend FPR32INX:$rs1), (FCVT_D_S_IN32X FPR32INX:$rs1, FRM_RNE)>; } // Predicates = [HasStdExtZdinx, IsRV32] // [u]int<->double conversion patterns must be gated on IsRV32 or IsRV64, so @@ -277,11 +277,12 @@ def : Pat<(any_fsqrt FPR64:$rs1), (FSQRT_D FPR64:$rs1, FRM_DYN)>; def : Pat<(fneg FPR64:$rs1), (FSGNJN_D $rs1, $rs1)>; def : Pat<(fabs FPR64:$rs1), (FSGNJX_D $rs1, $rs1)>; -def : Pat<(riscv_fpclass FPR64:$rs1), (FCLASS_D $rs1)>; +def : Pat<(riscv_fclass FPR64:$rs1), (FCLASS_D $rs1)>; def : PatFprFpr<fcopysign, FSGNJ_D, FPR64, f64>; def : Pat<(fcopysign FPR64:$rs1, (fneg FPR64:$rs2)), (FSGNJN_D $rs1, $rs2)>; -def : Pat<(fcopysign FPR64:$rs1, FPR32:$rs2), (FSGNJ_D $rs1, (FCVT_D_S $rs2))>; +def : Pat<(fcopysign FPR64:$rs1, FPR32:$rs2), (FSGNJ_D $rs1, (FCVT_D_S $rs2, + FRM_RNE))>; def : Pat<(fcopysign FPR32:$rs1, FPR64:$rs2), (FSGNJ_S $rs1, (FCVT_S_D $rs2, FRM_DYN))>; @@ -312,13 +313,13 @@ def : Pat<(any_fsqrt FPR64INX:$rs1), (FSQRT_D_INX FPR64INX:$rs1, FRM_DYN)>; def : Pat<(fneg FPR64INX:$rs1), (FSGNJN_D_INX $rs1, $rs1)>; def : Pat<(fabs FPR64INX:$rs1), (FSGNJX_D_INX $rs1, $rs1)>; -def : Pat<(riscv_fpclass FPR64INX:$rs1), (FCLASS_D_INX $rs1)>; +def : Pat<(riscv_fclass FPR64INX:$rs1), (FCLASS_D_INX $rs1)>; def : PatFprFpr<fcopysign, FSGNJ_D_INX, FPR64INX, f64>; def : Pat<(fcopysign FPR64INX:$rs1, (fneg FPR64INX:$rs2)), (FSGNJN_D_INX $rs1, $rs2)>; def : Pat<(fcopysign FPR64INX:$rs1, FPR32INX:$rs2), - (FSGNJ_D_INX $rs1, (FCVT_D_S_INX $rs2))>; + (FSGNJ_D_INX $rs1, (FCVT_D_S_INX $rs2, FRM_RNE))>; def : Pat<(fcopysign FPR32INX:$rs1, FPR64INX:$rs2), (FSGNJ_S_INX $rs1, (FCVT_S_D_INX $rs2, FRM_DYN))>; @@ -349,13 +350,13 @@ def : Pat<(any_fsqrt FPR64IN32X:$rs1), (FSQRT_D_IN32X FPR64IN32X:$rs1, FRM_DYN)> def : Pat<(fneg FPR64IN32X:$rs1), (FSGNJN_D_IN32X $rs1, $rs1)>; def : Pat<(fabs FPR64IN32X:$rs1), (FSGNJX_D_IN32X $rs1, $rs1)>; -def : Pat<(riscv_fpclass FPR64IN32X:$rs1), (FCLASS_D_IN32X $rs1)>; +def : Pat<(riscv_fclass FPR64IN32X:$rs1), (FCLASS_D_IN32X $rs1)>; def : PatFprFpr<fcopysign, FSGNJ_D_IN32X, FPR64IN32X, f64>; def : Pat<(fcopysign FPR64IN32X:$rs1, (fneg FPR64IN32X:$rs2)), (FSGNJN_D_IN32X $rs1, $rs2)>; def : Pat<(fcopysign FPR64IN32X:$rs1, FPR32INX:$rs2), - (FSGNJ_D_IN32X $rs1, (FCVT_D_S_INX $rs2))>; + (FSGNJ_D_IN32X $rs1, (FCVT_D_S_INX $rs2, FRM_RNE))>; def : Pat<(fcopysign FPR32INX:$rs1, FPR64IN32X:$rs2), (FSGNJ_S_INX $rs1, (FCVT_S_D_IN32X $rs2, FRM_DYN))>; @@ -396,12 +397,12 @@ foreach Ext = DExts in { // Match non-signaling FEQ_D foreach Ext = DExts in { - defm : PatSetCC_m<any_fsetcc, SETEQ, FEQ_D, Ext, f64>; - defm : PatSetCC_m<any_fsetcc, SETOEQ, FEQ_D, Ext, f64>; - defm : PatSetCC_m<strict_fsetcc, SETLT, PseudoQuietFLT_D, Ext, f64>; - defm : PatSetCC_m<strict_fsetcc, SETOLT, PseudoQuietFLT_D, Ext, f64>; - defm : PatSetCC_m<strict_fsetcc, SETLE, PseudoQuietFLE_D, Ext, f64>; - defm : PatSetCC_m<strict_fsetcc, SETOLE, PseudoQuietFLE_D, Ext, f64>; + defm : PatSetCC_m<any_fsetcc, SETEQ, FEQ_D, Ext>; + defm : PatSetCC_m<any_fsetcc, SETOEQ, FEQ_D, Ext>; + defm : PatSetCC_m<strict_fsetcc, SETLT, PseudoQuietFLT_D, Ext>; + defm : PatSetCC_m<strict_fsetcc, SETOLT, PseudoQuietFLT_D, Ext>; + defm : PatSetCC_m<strict_fsetcc, SETLE, PseudoQuietFLE_D, Ext>; + defm : PatSetCC_m<strict_fsetcc, SETOLE, PseudoQuietFLE_D, Ext>; } let Predicates = [HasStdExtD] in { @@ -537,7 +538,7 @@ def SplitF64Pseudo_INX [(set GPR:$dst1, GPR:$dst2, (RISCVSplitF64 FPR64IN32X:$src))]>; } // Predicates = [HasStdExtZdinx, IsRV32] -let Predicates = [HasStdExtD, IsRV32] in { +let Predicates = [HasStdExtD] in { // double->[u]int. Round-to-zero must be used. def : Pat<(i32 (any_fp_to_sint FPR64:$rs1)), (FCVT_W_D FPR64:$rs1, FRM_RTZ)>; @@ -554,9 +555,9 @@ def : Pat<(i32 (any_lrint FPR64:$rs1)), (FCVT_W_D $rs1, FRM_DYN)>; def : Pat<(i32 (any_lround FPR64:$rs1)), (FCVT_W_D $rs1, FRM_RMM)>; // [u]int->double. -def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_D_W GPR:$rs1)>; -def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_D_WU GPR:$rs1)>; -} // Predicates = [HasStdExtD, IsRV32] +def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_D_W GPR:$rs1, FRM_RNE)>; +def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_D_WU GPR:$rs1, FRM_RNE)>; +} // Predicates = [HasStdExtD] let Predicates = [HasStdExtZdinx, IsRV32] in { @@ -575,8 +576,8 @@ def : Pat<(i32 (any_lrint FPR64IN32X:$rs1)), (FCVT_W_D_IN32X $rs1, FRM_DYN)>; def : Pat<(i32 (any_lround FPR64IN32X:$rs1)), (FCVT_W_D_IN32X $rs1, FRM_RMM)>; // [u]int->double. -def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_D_W_IN32X GPR:$rs1)>; -def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_D_WU_IN32X GPR:$rs1)>; +def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_D_W_IN32X GPR:$rs1, FRM_RNE)>; +def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_D_WU_IN32X GPR:$rs1, FRM_RNE)>; } // Predicates = [HasStdExtZdinx, IsRV32] let Predicates = [HasStdExtD, IsRV64] in { @@ -592,8 +593,8 @@ def : Pat<(riscv_any_fcvt_w_rv64 FPR64:$rs1, timm:$frm), (FCVT_W_D $rs1, timm:$ def : Pat<(riscv_any_fcvt_wu_rv64 FPR64:$rs1, timm:$frm), (FCVT_WU_D $rs1, timm:$frm)>; // [u]int32->fp -def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_D_W $rs1)>; -def : Pat<(any_uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_D_WU $rs1)>; +def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_D_W $rs1, FRM_RNE)>; +def : Pat<(any_uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_D_WU $rs1, FRM_RNE)>; // Saturating double->[u]int64. def : Pat<(i64 (riscv_fcvt_x FPR64:$rs1, timm:$frm)), (FCVT_L_D $rs1, timm:$frm)>; @@ -629,8 +630,8 @@ def : Pat<(riscv_any_fcvt_w_rv64 FPR64INX:$rs1, timm:$frm), (FCVT_W_D_INX $rs1, def : Pat<(riscv_any_fcvt_wu_rv64 FPR64INX:$rs1, timm:$frm), (FCVT_WU_D_INX $rs1, timm:$frm)>; // [u]int32->fp -def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_D_W_INX $rs1)>; -def : Pat<(any_uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_D_WU_INX $rs1)>; +def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_D_W_INX $rs1, FRM_RNE)>; +def : Pat<(any_uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_D_WU_INX $rs1, FRM_RNE)>; // Saturating double->[u]int64. def : Pat<(i64 (riscv_fcvt_x FPR64INX:$rs1, timm:$frm)), (FCVT_L_D_INX $rs1, timm:$frm)>; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoF.td index 290c03defc5f..52eadbdec255 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -29,11 +29,11 @@ def SDT_RISCVFCVT_X def SDT_RISCVFROUND : SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<3, XLenVT>]>; -def SDT_RISCVFPCLASS +def SDT_RISCVFCLASS : SDTypeProfile<1, 1, [SDTCisVT<0, XLenVT>, SDTCisFP<1>]>; -def riscv_fpclass - : SDNode<"RISCVISD::FPCLASS", SDT_RISCVFPCLASS>; +def riscv_fclass + : SDNode<"RISCVISD::FCLASS", SDT_RISCVFCLASS>; def riscv_fround : SDNode<"RISCVISD::FROUND", SDT_RISCVFROUND>; @@ -132,6 +132,26 @@ def frmarg : Operand<XLenVT> { let DecoderMethod = "decodeFRMArg"; } +// Variants of the rounding mode operand that default to 'rne'. This is used +// for historical/legacy reasons. fcvt functions where the rounding mode +// doesn't affect the output originally always set it to 0b000 ('rne'). As old +// versions of LLVM and GCC will fail to decode versions of these instructions +// with the rounding mode set to something other than 'rne', we retain this +// default. +def FRMArgLegacy : AsmOperandClass { + let Name = "FRMArgLegacy"; + let RenderMethod = "addFRMArgOperands"; + let ParserMethod = "parseFRMArg"; + let IsOptional = 1; + let DefaultMethod = "defaultFRMArgLegacyOp"; +} + +def frmarglegacy : Operand<XLenVT> { + let ParserMatchClass = FRMArgLegacy; + let PrintMethod = "printFRMArgLegacy"; + let DecoderMethod = "decodeFRMArg"; +} + //===----------------------------------------------------------------------===// // Instruction class templates //===----------------------------------------------------------------------===// @@ -227,6 +247,24 @@ multiclass FPUnaryOp_r_frm_m<bits<7> funct7, bits<5> rs2val, } let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1, + UseNamedOperandTable = 1, hasPostISelHook = 1 in +class FPUnaryOp_r_frmlegacy<bits<7> funct7, bits<5> rs2val, DAGOperand rdty, + DAGOperand rs1ty, string opcodestr> + : RVInstRFrm<funct7, OPC_OP_FP, (outs rdty:$rd), + (ins rs1ty:$rs1, frmarglegacy:$frm), opcodestr, + "$rd, $rs1$frm"> { + let rs2 = rs2val; +} +multiclass FPUnaryOp_r_frmlegacy_m<bits<7> funct7, bits<5> rs2val, + ExtInfo Ext, DAGOperand rdty, DAGOperand rs1ty, + string opcodestr, list<Predicate> ExtraPreds = []> { + let Predicates = !listconcat(Ext.Predicates, ExtraPreds), + DecoderNamespace = Ext.Space in + def Ext.Suffix : FPUnaryOp_r_frmlegacy<funct7, rs2val, rdty, rs1ty, + opcodestr>; +} + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1, IsSignExtendingOpW = 1 in class FPCmp_rr<bits<7> funct7, bits<3> funct3, string opcodestr, DAGOperand rty, bit Commutable = 0> @@ -264,7 +302,7 @@ def FSW : FPStore_r<0b010, "fsw", FPR32, WriteFST32>; } // Predicates = [HasStdExtF] foreach Ext = FExts in { - let SchedRW = [WriteFMA32, ReadFMA32, ReadFMA32, ReadFMA32] in { + let SchedRW = [WriteFMA32, ReadFMA32, ReadFMA32, ReadFMA32Addend] in { defm FMADD_S : FPFMA_rrr_frm_m<OPC_MADD, 0b00, "fmadd.s", Ext>; defm FMSUB_S : FPFMA_rrr_frm_m<OPC_MSUB, 0b00, "fmsub.s", Ext>; defm FNMSUB_S : FPFMA_rrr_frm_m<OPC_NMSUB, 0b00, "fnmsub.s", Ext>; @@ -443,10 +481,10 @@ class PatSetCC<DAGOperand Ty, SDPatternOperator OpNode, CondCode Cond, RVInst Inst, ValueType vt> : Pat<(XLenVT (OpNode (vt Ty:$rs1), Ty:$rs2, Cond)), (Inst $rs1, $rs2)>; multiclass PatSetCC_m<SDPatternOperator OpNode, CondCode Cond, - RVInst Inst, ExtInfo Ext, ValueType vt> { + RVInst Inst, ExtInfo Ext> { let Predicates = Ext.Predicates in def Ext.Suffix : PatSetCC<Ext.PrimaryTy, OpNode, Cond, - !cast<RVInst>(Inst#Ext.Suffix), vt>; + !cast<RVInst>(Inst#Ext.Suffix), Ext.PrimaryVT>; } class PatFprFpr<SDPatternOperator OpNode, RVInstR Inst, @@ -489,7 +527,7 @@ def : Pat<(any_fsqrt FPR32:$rs1), (FSQRT_S FPR32:$rs1, FRM_DYN)>; def : Pat<(fneg FPR32:$rs1), (FSGNJN_S $rs1, $rs1)>; def : Pat<(fabs FPR32:$rs1), (FSGNJX_S $rs1, $rs1)>; -def : Pat<(riscv_fpclass FPR32:$rs1), (FCLASS_S $rs1)>; +def : Pat<(riscv_fclass FPR32:$rs1), (FCLASS_S $rs1)>; } // Predicates = [HasStdExtF] let Predicates = [HasStdExtZfinx] in { @@ -498,7 +536,7 @@ def : Pat<(any_fsqrt FPR32INX:$rs1), (FSQRT_S_INX FPR32INX:$rs1, FRM_DYN)>; def : Pat<(fneg FPR32INX:$rs1), (FSGNJN_S_INX $rs1, $rs1)>; def : Pat<(fabs FPR32INX:$rs1), (FSGNJX_S_INX $rs1, $rs1)>; -def : Pat<(riscv_fpclass FPR32INX:$rs1), (FCLASS_S_INX $rs1)>; +def : Pat<(riscv_fclass FPR32INX:$rs1), (FCLASS_S_INX $rs1)>; } // Predicates = [HasStdExtZfinx] foreach Ext = FExts in @@ -568,12 +606,12 @@ foreach Ext = FExts in { // Match non-signaling FEQ_S foreach Ext = FExts in { - defm : PatSetCC_m<any_fsetcc, SETEQ, FEQ_S, Ext, f32>; - defm : PatSetCC_m<any_fsetcc, SETOEQ, FEQ_S, Ext, f32>; - defm : PatSetCC_m<strict_fsetcc, SETLT, PseudoQuietFLT_S, Ext, f32>; - defm : PatSetCC_m<strict_fsetcc, SETOLT, PseudoQuietFLT_S, Ext, f32>; - defm : PatSetCC_m<strict_fsetcc, SETLE, PseudoQuietFLE_S, Ext, f32>; - defm : PatSetCC_m<strict_fsetcc, SETOLE, PseudoQuietFLE_S, Ext, f32>; + defm : PatSetCC_m<any_fsetcc, SETEQ, FEQ_S, Ext>; + defm : PatSetCC_m<any_fsetcc, SETOEQ, FEQ_S, Ext>; + defm : PatSetCC_m<strict_fsetcc, SETLT, PseudoQuietFLT_S, Ext>; + defm : PatSetCC_m<strict_fsetcc, SETOLT, PseudoQuietFLT_S, Ext>; + defm : PatSetCC_m<strict_fsetcc, SETLE, PseudoQuietFLE_S, Ext>; + defm : PatSetCC_m<strict_fsetcc, SETOLE, PseudoQuietFLE_S, Ext>; } let Predicates = [HasStdExtF] in { @@ -607,10 +645,10 @@ def : Pat<(XLenVT (strict_fsetccs FPR32INX:$rs1, FPR32INX:$rs1, SETOEQ)), } // Predicates = [HasStdExtZfinx] foreach Ext = FExts in { - defm : PatSetCC_m<any_fsetccs, SETLT, FLT_S, Ext, f32>; - defm : PatSetCC_m<any_fsetccs, SETOLT, FLT_S, Ext, f32>; - defm : PatSetCC_m<any_fsetccs, SETLE, FLE_S, Ext, f32>; - defm : PatSetCC_m<any_fsetccs, SETOLE, FLE_S, Ext, f32>; + defm : PatSetCC_m<any_fsetccs, SETLT, FLT_S, Ext>; + defm : PatSetCC_m<any_fsetccs, SETOLT, FLT_S, Ext>; + defm : PatSetCC_m<any_fsetccs, SETLE, FLE_S, Ext>; + defm : PatSetCC_m<any_fsetccs, SETOLE, FLE_S, Ext>; } let Predicates = [HasStdExtF] in { @@ -642,19 +680,19 @@ def : Pat<(store (f32 FPR32INX:$rs2), (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm (SW (COPY_TO_REGCLASS FPR32INX:$rs2, GPR), GPR:$rs1, simm12:$imm12)>; } // Predicates = [HasStdExtZfinx] -let Predicates = [HasStdExtF, IsRV32] in { +let Predicates = [HasStdExtF] in { // Moves (no conversion) def : Pat<(bitconvert (i32 GPR:$rs1)), (FMV_W_X GPR:$rs1)>; def : Pat<(i32 (bitconvert FPR32:$rs1)), (FMV_X_W FPR32:$rs1)>; -} // Predicates = [HasStdExtF, IsRV32] +} // Predicates = [HasStdExtF] -let Predicates = [HasStdExtZfinx, IsRV32] in { +let Predicates = [HasStdExtZfinx] in { // Moves (no conversion) def : Pat<(f32 (bitconvert (i32 GPR:$rs1))), (COPY_TO_REGCLASS GPR:$rs1, GPRF32)>; def : Pat<(i32 (bitconvert FPR32INX:$rs1)), (COPY_TO_REGCLASS FPR32INX:$rs1, GPR)>; -} // Predicates = [HasStdExtZfinx, IsRV32] +} // Predicates = [HasStdExtZfinx] -let Predicates = [HasStdExtF, IsRV32] in { +let Predicates = [HasStdExtF] in { // float->[u]int. Round-to-zero must be used. def : Pat<(i32 (any_fp_to_sint FPR32:$rs1)), (FCVT_W_S $rs1, FRM_RTZ)>; def : Pat<(i32 (any_fp_to_uint FPR32:$rs1)), (FCVT_WU_S $rs1, FRM_RTZ)>; @@ -672,9 +710,9 @@ def : Pat<(i32 (any_lround FPR32:$rs1)), (FCVT_W_S $rs1, FRM_RMM)>; // [u]int->float. Match GCC and default to using dynamic rounding mode. def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_S_W $rs1, FRM_DYN)>; def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_S_WU $rs1, FRM_DYN)>; -} // Predicates = [HasStdExtF, IsRV32] +} // Predicates = [HasStdExtF] -let Predicates = [HasStdExtZfinx, IsRV32] in { +let Predicates = [HasStdExtZfinx] in { // float->[u]int. Round-to-zero must be used. def : Pat<(i32 (any_fp_to_sint FPR32INX:$rs1)), (FCVT_W_S_INX $rs1, FRM_RTZ)>; def : Pat<(i32 (any_fp_to_uint FPR32INX:$rs1)), (FCVT_WU_S_INX $rs1, FRM_RTZ)>; @@ -692,7 +730,7 @@ def : Pat<(i32 (any_lround FPR32INX:$rs1)), (FCVT_W_S_INX $rs1, FRM_RMM)>; // [u]int->float. Match GCC and default to using dynamic rounding mode. def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_S_W_INX $rs1, FRM_DYN)>; def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_S_WU_INX $rs1, FRM_DYN)>; -} // Predicates = [HasStdExtZfinx, IsRV32] +} // Predicates = [HasStdExtZfinx] let Predicates = [HasStdExtF, IsRV64] in { // Moves (no conversion) diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoM.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoM.td index 6c3c9a771d94..f9890ca4b0ee 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoM.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoM.td @@ -114,3 +114,18 @@ let Predicates = [HasStdExtMOrZmmul, IsRV64, NotHasStdExtZba] in { def : Pat<(i64 (mul (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff))), (MULHU (SLLI GPR:$rs1, 32), (SLLI GPR:$rs2, 32))>; } // Predicates = [HasStdExtMOrZmmul, IsRV64, NotHasStdExtZba] + +//===----------------------------------------------------------------------===// +// Experimental RV64 i32 legalization patterns. +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtMOrZmmul, IsRV64] in { +def : PatGprGpr<mul, MULW, i32, i32>; +} + +let Predicates = [HasStdExtM, IsRV64] in { +def : PatGprGpr<sdiv, DIVW, i32, i32>; +def : PatGprGpr<udiv, DIVUW, i32, i32>; +def : PatGprGpr<srem, REMW, i32, i32>; +def : PatGprGpr<urem, REMUW, i32, i32>; +} diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoV.td index 6e5ee8043e92..9fc9a29c210d 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -24,12 +24,11 @@ class VTypeIAsmOperand<int VTypeINum> : AsmOperandClass { let RenderMethod = "addVTypeIOperands"; } -class VTypeIOp<int VTypeINum> : Operand<XLenVT> { +class VTypeIOp<int VTypeINum> : RISCVOp { let ParserMatchClass = VTypeIAsmOperand<VTypeINum>; let PrintMethod = "printVTypeI"; let DecoderMethod = "decodeUImmOperand<"#VTypeINum#">"; let OperandType = "OPERAND_VTYPEI" # VTypeINum; - let OperandNamespace = "RISCVOp"; let MCOperandPredicate = [{ int64_t Imm; if (MCOp.evaluateAsConstantImm(Imm)) @@ -58,12 +57,7 @@ def VMaskOp : RegisterOperand<VMV0> { let DecoderMethod = "decodeVMaskReg"; } -def simm5 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isInt<5>(Imm);}]> { - let ParserMatchClass = SImmAsmOperand<5>; - let EncoderMethod = "getImmOpValue"; - let DecoderMethod = "decodeSImmOperand<5>"; - let OperandType = "OPERAND_SIMM5"; - let OperandNamespace = "RISCVOp"; +def simm5 : RISCVSImmLeafOp<5> { let MCOperandPredicate = [{ int64_t Imm; if (MCOp.evaluateAsConstantImm(Imm)) @@ -78,11 +72,10 @@ def SImm5Plus1AsmOperand : AsmOperandClass { let DiagnosticType = "InvalidSImm5Plus1"; } -def simm5_plus1 : Operand<XLenVT>, ImmLeaf<XLenVT, +def simm5_plus1 : RISCVOp, ImmLeaf<XLenVT, [{return (isInt<5>(Imm) && Imm != -16) || Imm == 16;}]> { let ParserMatchClass = SImm5Plus1AsmOperand; let OperandType = "OPERAND_SIMM5_PLUS1"; - let OperandNamespace = "RISCVOp"; let MCOperandPredicate = [{ int64_t Imm; if (MCOp.evaluateAsConstantImm(Imm)) @@ -98,88 +91,209 @@ def simm5_plus1_nonzero : ImmLeaf<XLenVT, // Scheduling definitions. //===----------------------------------------------------------------------===// -class VMVRSched<int n> : Sched<[ - !cast<SchedReadWrite>("WriteVMov" #n #"V"), - !cast<SchedReadWrite>("ReadVMov" #n #"V") -]>; - -class VLESched<string lmul = "WorstCase"> : Sched<[ - !cast<SchedReadWrite>("WriteVLDE_" #lmul), - ReadVLDX, ReadVMask -]>; - -class VSESched<string lmul = "WorstCase"> : Sched<[ - !cast<SchedReadWrite>("WriteVSTE_" #lmul), - !cast<SchedReadWrite>("ReadVSTEV_" #lmul), - ReadVSTX, ReadVMask -]>; - -class VLSSched<int eew, string emul = "WorstCase"> : Sched<[ - !cast<SchedReadWrite>("WriteVLDS" #eew #"_" #emul), - ReadVLDX, ReadVLDSX, ReadVMask -]>; - -class VSSSched<int eew, string emul = "WorstCase"> : Sched<[ - !cast<SchedReadWrite>("WriteVSTS" #eew #"_" #emul), - !cast<SchedReadWrite>("ReadVSTS" #eew #"V_" #emul), - ReadVSTX, ReadVSTSX, ReadVMask -]>; - -class VLXSched<int dataEEW, string isOrdered, - string dataEMUL = "WorstCase", - string idxEMUL = "WorstCase"> : Sched<[ - !cast<SchedReadWrite>("WriteVLD" #isOrdered #"X" #dataEEW #"_" #dataEMUL), - ReadVLDX, - !cast<SchedReadWrite>("ReadVLD" #isOrdered #"XV_" #idxEMUL), ReadVMask -]>; - -class VSXSched<int dataEEW, string isOrdered, - string dataEMUL = "WorstCase", - string idxEMUL = "WorstCase"> : Sched<[ - !cast<SchedReadWrite>("WriteVST" #isOrdered #"X" #dataEEW #"_" #dataEMUL), - !cast<SchedReadWrite>("ReadVST" #isOrdered #"X" #dataEEW #"_" #dataEMUL), - ReadVSTX, !cast<SchedReadWrite>("ReadVST" #isOrdered #"XV_" #idxEMUL), ReadVMask -]>; - -class VLFSched<string lmul = "WorstCase"> : Sched<[ - !cast<SchedReadWrite>("WriteVLDFF_" #lmul), - ReadVLDX, ReadVMask -]>; +// Common class of scheduling definitions. +// `ReadVMergeOp` will be prepended to reads if instruction is masked. +// `ReadVMask` will be appended to reads if instruction is masked. +// Operands: +// `writes` SchedWrites that are listed for each explicit def operand +// in order. +// `reads` SchedReads that are listed for each explicit use operand. +// `forceMasked` Forced to be masked (e.g. Add-with-Carry Instructions). +// `forceMergeOpRead` Force to have read for merge operand. +class SchedCommon<list<SchedWrite> writes, list<SchedRead> reads, + string mx = "WorstCase", int sew = 0, bit forceMasked = 0, + bit forceMergeOpRead = 0> : Sched<[]> { + defvar isMasked = !ne(!find(NAME, "_MASK"), -1); + defvar isMaskedOrForceMasked = !or(forceMasked, isMasked); + defvar mergeRead = !if(!or(!eq(mx, "WorstCase"), !eq(sew, 0)), + !cast<SchedRead>("ReadVMergeOp_" # mx), + !cast<SchedRead>("ReadVMergeOp_" # mx # "_E" #sew)); + defvar needsMergeRead = !or(isMaskedOrForceMasked, forceMergeOpRead); + defvar readsWithMask = + !if(isMaskedOrForceMasked, !listconcat(reads, [ReadVMask]), reads); + defvar allReads = + !if(needsMergeRead, !listconcat([mergeRead], readsWithMask), reads); + let SchedRW = !listconcat(writes, allReads); +} + +// Common class of scheduling definitions for n-ary instructions. +// The scheudling resources are relevant to LMUL and may be relevant to SEW. +class SchedNary<string write, list<string> reads, string mx, int sew = 0, + bit forceMasked = 0, bit forceMergeOpRead = 0> + : SchedCommon<[!cast<SchedWrite>( + !if(sew, + write # "_" # mx # "_E" # sew, + write # "_" # mx))], + !foreach(read, reads, + !cast<SchedRead>(!if(sew, read #"_" #mx #"_E" #sew, + read #"_" #mx))), + mx, sew, forceMasked, forceMergeOpRead>; + +// Classes with postfix "MC" are only used in MC layer. +// For these classes, we assume that they are with the worst case costs and +// `ReadVMask` is always needed (with some exceptions). + +// For instructions with no operand. +class SchedNullary<string write, string mx, int sew = 0, bit forceMasked = 0, + bit forceMergeOpRead = 0>: + SchedNary<write, [], mx, sew, forceMasked, forceMergeOpRead>; +class SchedNullaryMC<string write, bit forceMasked = 1>: + SchedNullary<write, "WorstCase", forceMasked=forceMasked>; + +// For instructions with one operand. +class SchedUnary<string write, string read0, string mx, int sew = 0, + bit forceMasked = 0, bit forceMergeOpRead = 0>: + SchedNary<write, [read0], mx, sew, forceMasked, forceMergeOpRead>; +class SchedUnaryMC<string write, string read0, bit forceMasked = 1>: + SchedUnary<write, read0, "WorstCase", forceMasked=forceMasked>; + +// For instructions with two operands. +class SchedBinary<string write, string read0, string read1, string mx, + int sew = 0, bit forceMasked = 0, bit forceMergeOpRead = 0> + : SchedNary<write, [read0, read1], mx, sew, forceMasked, forceMergeOpRead>; +class SchedBinaryMC<string write, string read0, string read1, + bit forceMasked = 1>: + SchedBinary<write, read0, read1, "WorstCase", forceMasked=forceMasked>; + +// For instructions with three operands. +class SchedTernary<string write, string read0, string read1, string read2, + string mx, int sew = 0, bit forceMasked = 0, + bit forceMergeOpRead = 0> + : SchedNary<write, [read0, read1, read2], mx, sew, forceMasked, + forceMergeOpRead>; +class SchedTernaryMC<string write, string read0, string read1, string read2, + int sew = 0, bit forceMasked = 1>: + SchedNary<write, [read0, read1, read2], "WorstCase", sew, forceMasked>; + +// For reduction instructions. +class SchedReduction<string write, string read, string mx, int sew, + bit forceMergeOpRead = 0> + : SchedCommon<[!cast<SchedWrite>(write #"_" #mx #"_E" #sew)], + !listsplat(!cast<SchedRead>(read), 3), mx, sew, forceMergeOpRead>; +class SchedReductionMC<string write, string readV, string readV0>: + SchedCommon<[!cast<SchedWrite>(write # "_WorstCase")], + [!cast<SchedRead>(readV), !cast<SchedRead>(readV0)], + forceMasked=1>; + +// Whole Vector Register Move +class VMVRSched<int n> : SchedCommon< + [!cast<SchedWrite>("WriteVMov" # n # "V")], + [!cast<SchedRead>("ReadVMov" # n # "V")] +>; + +// Vector Unit-Stride Loads and Stores +class VLESched<string lmul, bit forceMasked = 0> : SchedCommon< + [!cast<SchedWrite>("WriteVLDE_" # lmul)], + [ReadVLDX], mx=lmul, forceMasked=forceMasked +>; +class VLESchedMC : VLESched<"WorstCase", forceMasked=1>; + +class VSESched<string lmul, bit forceMasked = 0> : SchedCommon< + [!cast<SchedWrite>("WriteVSTE_" # lmul)], + [!cast<SchedRead>("ReadVSTEV_" # lmul), ReadVSTX], mx=lmul, + forceMasked=forceMasked +>; +class VSESchedMC : VSESched<"WorstCase", forceMasked=1>; + +// Vector Strided Loads and Stores +class VLSSched<int eew, string emul, bit forceMasked = 0> : SchedCommon< + [!cast<SchedWrite>("WriteVLDS" # eew # "_" # emul)], + [ReadVLDX, ReadVLDSX], emul, eew, forceMasked +>; +class VLSSchedMC<int eew> : VLSSched<eew, "WorstCase", forceMasked=1>; + +class VSSSched<int eew, string emul, bit forceMasked = 0> : SchedCommon< + [!cast<SchedWrite>("WriteVSTS" # eew # "_" # emul)], + [!cast<SchedRead>("ReadVSTS" # eew # "V_" # emul), ReadVSTX, ReadVSTSX], + emul, eew, forceMasked +>; +class VSSSchedMC<int eew> : VSSSched<eew, "WorstCase", forceMasked=1>; + +// Vector Indexed Loads and Stores +class VLXSched<int dataEEW, bit isOrdered, string dataEMUL, string idxEMUL, + bit forceMasked = 0> : SchedCommon< + [!cast<SchedWrite>("WriteVLD" # !if(isOrdered, "O", "U") # "X" # dataEEW # "_" # dataEMUL)], + [ReadVLDX, !cast<SchedRead>("ReadVLD" # !if(isOrdered, "O", "U") # "XV_" # idxEMUL)], + dataEMUL, dataEEW, forceMasked +>; +class VLXSchedMC<int dataEEW, bit isOrdered>: + VLXSched<dataEEW, isOrdered, "WorstCase", "WorstCase", forceMasked=1>; + +class VSXSched<int dataEEW, bit isOrdered, string dataEMUL, string idxEMUL, + bit forceMasked = 0> : SchedCommon< + [!cast<SchedWrite>("WriteVST" # !if(isOrdered, "O", "U") # "X" # dataEEW # "_" # dataEMUL)], + [!cast<SchedRead>("ReadVST" # !if(isOrdered, "O", "U") #"X" # dataEEW # "_" # dataEMUL), + ReadVSTX, !cast<SchedRead>("ReadVST" # !if(isOrdered, "O", "U") # "XV_" # idxEMUL)], + dataEMUL, dataEEW, forceMasked +>; +class VSXSchedMC<int dataEEW, bit isOrdered>: + VSXSched<dataEEW, isOrdered, "WorstCase", "WorstCase", forceMasked=1>; + +// Unit-stride Fault-Only-First Loads +class VLFSched<string lmul, bit forceMasked = 0> : SchedCommon< + [!cast<SchedWrite>("WriteVLDFF_" # lmul)], + [ReadVLDX], mx=lmul, forceMasked=forceMasked +>; +class VLFSchedMC: VLFSched<"WorstCase", forceMasked=1>; // Unit-Stride Segment Loads and Stores -class VLSEGSched<int nf, int eew, string emul = "WorstCase"> : Sched<[ - !cast<SchedReadWrite>("WriteVLSEG" #nf #"e" #eew #"_" #emul), - ReadVLDX, ReadVMask -]>; -class VSSEGSched<int nf, int eew, string emul = "WorstCase"> : Sched<[ - !cast<SchedReadWrite>("WriteVSSEG" #nf #"e" #eew #"_" #emul), - !cast<SchedReadWrite>("ReadVSTEV_" #emul), - ReadVSTX, ReadVMask -]>; -class VLSEGFFSched<int nf, int eew, string emul = "WorstCase"> : Sched<[ - !cast<SchedReadWrite>("WriteVLSEGFF" #nf #"e" #eew #"_" #emul), - ReadVLDX, ReadVMask -]>; +class VLSEGSched<int nf, int eew, string emul, bit forceMasked = 0> : SchedCommon< + [!cast<SchedWrite>("WriteVLSEG" #nf #"e" #eew #"_" #emul)], + [ReadVLDX], emul, eew, forceMasked +>; +class VLSEGSchedMC<int nf, int eew> : VLSEGSched<nf, eew, "WorstCase", + forceMasked=1>; + +class VSSEGSched<int nf, int eew, string emul, bit forceMasked = 0> : SchedCommon< + [!cast<SchedWrite>("WriteVSSEG" # nf # "e" # eew # "_" # emul)], + [!cast<SchedRead>("ReadVSTEV_" #emul), ReadVSTX], emul, eew, forceMasked +>; +class VSSEGSchedMC<int nf, int eew> : VSSEGSched<nf, eew, "WorstCase", + forceMasked=1>; + +class VLSEGFFSched<int nf, int eew, string emul, bit forceMasked = 0> : SchedCommon< + [!cast<SchedWrite>("WriteVLSEGFF" # nf # "e" # eew # "_" # emul)], + [ReadVLDX], emul, eew, forceMasked +>; +class VLSEGFFSchedMC<int nf, int eew> : VLSEGFFSched<nf, eew, "WorstCase", + forceMasked=1>; + // Strided Segment Loads and Stores -class VLSSEGSched<int nf, int eew, string emul = "WorstCase"> : Sched<[ - !cast<SchedReadWrite>("WriteVLSSEG" #nf #"e" #eew #"_" #emul), - ReadVLDX, ReadVLDSX, ReadVMask -]>; -class VSSSEGSched<int nf, int eew, string emul = "WorstCase"> : Sched<[ - !cast<SchedReadWrite>("WriteVSSSEG" #nf #"e" #eew #"_" #emul), - !cast<SchedReadWrite>("ReadVSTS" #eew #"V_" #emul), - ReadVSTX, ReadVSTSX, ReadVMask -]>; +class VLSSEGSched<int nf, int eew, string emul, bit forceMasked = 0> : SchedCommon< + [!cast<SchedWrite>("WriteVLSSEG" #nf #"e" #eew #"_" #emul)], + [ReadVLDX, ReadVLDSX], emul, eew, forceMasked +>; +class VLSSEGSchedMC<int nf, int eew> : VLSSEGSched<nf, eew, "WorstCase", + forceMasked=1>; + +class VSSSEGSched<int nf, int eew, string emul, bit forceMasked = 0> : SchedCommon< + [!cast<SchedWrite>("WriteVSSSEG" #nf #"e" #eew #"_" #emul)], + [!cast<SchedRead>("ReadVSTS" #eew #"V_" #emul), + ReadVSTX, ReadVSTSX], emul, eew, forceMasked +>; +class VSSSEGSchedMC<int nf, int eew> : VSSSEGSched<nf, eew, "WorstCase", + forceMasked=1>; + // Indexed Segment Loads and Stores -class VLXSEGSched<int nf, int eew, string isOrdered, string emul = "WorstCase"> : Sched<[ - !cast<SchedReadWrite>("WriteVL" #isOrdered #"XSEG" #nf #"e" #eew #"_" #emul), - ReadVLDX, !cast<SchedReadWrite>("ReadVLD" #isOrdered #"XV_" #emul), ReadVMask -]>; -class VSXSEGSched<int nf, int eew, string isOrdered, string emul = "WorstCase"> : Sched<[ - !cast<SchedReadWrite>("WriteVS" #isOrdered #"XSEG" #nf #"e" #eew #"_" #emul), - !cast<SchedReadWrite>("ReadVST" #isOrdered #"X" #eew #"_" #emul), - ReadVSTX, !cast<SchedReadWrite>("ReadVST" #isOrdered #"XV_" #emul), ReadVMask -]>; +class VLXSEGSched<int nf, int eew, bit isOrdered, string emul, + bit forceMasked = 0> : SchedCommon< + [!cast<SchedWrite>("WriteVL" #!if(isOrdered, "O", "U") #"XSEG" #nf #"e" #eew #"_" #emul)], + [ReadVLDX, !cast<SchedRead>("ReadVLD" #!if(isOrdered, "O", "U") #"XV_" #emul)], + emul, eew, forceMasked +>; +class VLXSEGSchedMC<int nf, int eew, bit isOrdered>: + VLXSEGSched<nf, eew, isOrdered, "WorstCase", forceMasked=1>; + +// Passes sew=0 instead of eew=0 since this pseudo does not follow MX_E form. +class VSXSEGSched<int nf, int eew, bit isOrdered, string emul, + bit forceMasked = 0> : SchedCommon< + [!cast<SchedWrite>("WriteVS" #!if(isOrdered, "O", "U") #"XSEG" #nf #"e" #eew #"_" #emul)], + [!cast<SchedRead>("ReadVST" #!if(isOrdered, "O", "U") #"X" #eew #"_" #emul), + ReadVSTX, !cast<SchedRead>("ReadVST" #!if(isOrdered, "O", "U") #"XV_" #emul)], + emul, sew=0, forceMasked=forceMasked +>; +class VSXSEGSchedMC<int nf, int eew, bit isOrdered>: + VSXSEGSched<nf, eew, isOrdered, "WorstCase", forceMasked=1>; //===----------------------------------------------------------------------===// // Instruction class templates @@ -327,10 +441,14 @@ class VALUmVV<bits<6> funct6, RISCVVFormat opv, string opcodestr> } // op vd, vs1, vs2, vm (reverse the order of vs1 and vs2) -class VALUrVV<bits<6> funct6, RISCVVFormat opv, string opcodestr> - : RVInstVV<funct6, opv, (outs VR:$vd), - (ins VR:$vs1, VR:$vs2, VMaskOp:$vm), - opcodestr, "$vd, $vs1, $vs2$vm">; +class VALUrVV<bits<6> funct6, RISCVVFormat opv, string opcodestr, + bit EarlyClobber = 0> + : RVInstVV<funct6, opv, (outs VR:$vd_wb), + (ins VR:$vd, VR:$vs1, VR:$vs2, VMaskOp:$vm), + opcodestr, "$vd, $vs1, $vs2$vm"> { + let Constraints = !if(EarlyClobber, "@earlyclobber $vd_wb, $vd = $vd_wb", + "$vd = $vd_wb"); +} // op vd, vs2, vs1 class VALUVVNoVm<bits<6> funct6, RISCVVFormat opv, string opcodestr> @@ -355,10 +473,14 @@ class VALUmVX<bits<6> funct6, RISCVVFormat opv, string opcodestr> } // op vd, rs1, vs2, vm (reverse the order of rs1 and vs2) -class VALUrVX<bits<6> funct6, RISCVVFormat opv, string opcodestr> - : RVInstVX<funct6, opv, (outs VR:$vd), - (ins GPR:$rs1, VR:$vs2, VMaskOp:$vm), - opcodestr, "$vd, $rs1, $vs2$vm">; +class VALUrVX<bits<6> funct6, RISCVVFormat opv, string opcodestr, + bit EarlyClobber = 0> + : RVInstVX<funct6, opv, (outs VR:$vd_wb), + (ins VR:$vd, GPR:$rs1, VR:$vs2, VMaskOp:$vm), + opcodestr, "$vd, $rs1, $vs2$vm"> { + let Constraints = !if(EarlyClobber, "@earlyclobber $vd_wb, $vd = $vd_wb", + "$vd = $vd_wb"); +} // op vd, vs1, vs2 class VALUVXNoVm<bits<6> funct6, RISCVVFormat opv, string opcodestr> @@ -397,10 +519,14 @@ class VALUVF<bits<6> funct6, RISCVVFormat opv, string opcodestr> opcodestr, "$vd, $vs2, $rs1$vm">; // op vd, rs1, vs2, vm (Float) (with mask, reverse the order of rs1 and vs2) -class VALUrVF<bits<6> funct6, RISCVVFormat opv, string opcodestr> - : RVInstVX<funct6, opv, (outs VR:$vd), - (ins FPR32:$rs1, VR:$vs2, VMaskOp:$vm), - opcodestr, "$vd, $rs1, $vs2$vm">; +class VALUrVF<bits<6> funct6, RISCVVFormat opv, string opcodestr, + bit EarlyClobber = 0> + : RVInstVX<funct6, opv, (outs VR:$vd_wb), + (ins VR:$vd, FPR32:$rs1, VR:$vs2, VMaskOp:$vm), + opcodestr, "$vd, $rs1, $vs2$vm"> { + let Constraints = !if(EarlyClobber, "@earlyclobber $vd_wb, $vd = $vd_wb", + "$vd = $vd_wb"); +} // op vd, vs2, vm (use vs1 as instruction encoding) class VALUVs2<bits<6> funct6, bits<5> vs1, RISCVVFormat opv, string opcodestr> @@ -422,42 +548,37 @@ class VALUVs2NoVm<bits<6> funct6, bits<5> vs1, RISCVVFormat opv, string opcodest // Use these multiclasses to define instructions more easily. //===----------------------------------------------------------------------===// -multiclass VIndexLoadStore<list<int> EEWList> { - foreach n = EEWList in { - defvar w = !cast<RISCVWidth>("LSWidth" # n); - - def VLUXEI # n # _V : - VIndexedLoad<MOPLDIndexedUnord, w, "vluxei" # n # ".v">, - VLXSched<n, "U">; - def VLOXEI # n # _V : - VIndexedLoad<MOPLDIndexedOrder, w, "vloxei" # n # ".v">, - VLXSched<n, "O">; - - def VSUXEI # n # _V : - VIndexedStore<MOPSTIndexedUnord, w, "vsuxei" # n # ".v">, - VSXSched<n, "U">; - def VSOXEI # n # _V : - VIndexedStore<MOPSTIndexedOrder, w, "vsoxei" # n # ".v">, - VSXSched<n, "O">; - } +multiclass VIndexLoadStore<int eew> { + defvar w = !cast<RISCVWidth>("LSWidth" # eew); + + def VLUXEI # eew # _V : + VIndexedLoad<MOPLDIndexedUnord, w, "vluxei" # eew # ".v">, + VLXSchedMC<eew, isOrdered=0>; + def VLOXEI # eew # _V : + VIndexedLoad<MOPLDIndexedOrder, w, "vloxei" # eew # ".v">, + VLXSchedMC<eew, isOrdered=1>; + + def VSUXEI # eew # _V : + VIndexedStore<MOPSTIndexedUnord, w, "vsuxei" # eew # ".v">, + VSXSchedMC<eew, isOrdered=0>; + def VSOXEI # eew # _V : + VIndexedStore<MOPSTIndexedOrder, w, "vsoxei" # eew # ".v">, + VSXSchedMC<eew, isOrdered=1>; } multiclass VALU_IV_V<string opcodestr, bits<6> funct6> { def V : VALUVV<funct6, OPIVV, opcodestr # ".vv">, - Sched<[WriteVIALUV_WorstCase, ReadVIALUV_WorstCase, - ReadVIALUV_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV">; } multiclass VALU_IV_X<string opcodestr, bits<6> funct6> { def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">, - Sched<[WriteVIALUX_WorstCase, ReadVIALUV_WorstCase, - ReadVIALUX_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVIALUX", "ReadVIALUV", "ReadVIALUX">; } multiclass VALU_IV_I<string opcodestr, bits<6> funct6> { def I : VALUVI<funct6, opcodestr # ".vi", simm5>, - Sched<[WriteVIALUI_WorstCase, ReadVIALUV_WorstCase, - ReadVMask]>; + SchedUnaryMC<"WriteVIALUI", "ReadVIALUV">; } multiclass VALU_IV_V_X_I<string opcodestr, bits<6> funct6> @@ -475,364 +596,314 @@ multiclass VALU_IV_X_I<string opcodestr, bits<6> funct6> multiclass VALU_MV_V_X<string opcodestr, bits<6> funct6, string vw> { def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">, - Sched<[WriteVIWALUV_WorstCase, ReadVIWALUV_WorstCase, - ReadVIWALUV_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVIWALUV", "ReadVIWALUV", "ReadVIWALUV">; def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">, - Sched<[WriteVIWALUX_WorstCase, ReadVIWALUV_WorstCase, - ReadVIWALUX_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVIWALUX", "ReadVIWALUV", "ReadVIWALUX">; } multiclass VMAC_MV_V_X<string opcodestr, bits<6> funct6> { def V : VALUrVV<funct6, OPMVV, opcodestr # ".vv">, - Sched<[WriteVIMulAddV_WorstCase, ReadVIMulAddV_WorstCase, - ReadVIMulAddV_WorstCase, ReadVMask]>; + SchedTernaryMC<"WriteVIMulAddV", "ReadVIMulAddV", "ReadVIMulAddV", + "ReadVIMulAddV">; def X : VALUrVX<funct6, OPMVX, opcodestr # ".vx">, - Sched<[WriteVIMulAddX_WorstCase, ReadVIMulAddV_WorstCase, - ReadVIMulAddX_WorstCase, ReadVMask]>; + SchedTernaryMC<"WriteVIMulAddX", "ReadVIMulAddV", "ReadVIMulAddX", + "ReadVIMulAddV">; } multiclass VWMAC_MV_X<string opcodestr, bits<6> funct6> { + let RVVConstraint = WidenV in def X : VALUrVX<funct6, OPMVX, opcodestr # ".vx">, - Sched<[WriteVIWMulAddX_WorstCase, ReadVIWMulAddV_WorstCase, - ReadVIWMulAddX_WorstCase, ReadVMask]>; + SchedTernaryMC<"WriteVIWMulAddX", "ReadVIWMulAddV", "ReadVIWMulAddX", + "ReadVIWMulAddV">; } multiclass VWMAC_MV_V_X<string opcodestr, bits<6> funct6> : VWMAC_MV_X<opcodestr, funct6> { - def V : VALUrVV<funct6, OPMVV, opcodestr # ".vv">, - Sched<[WriteVIWMulAddV_WorstCase, ReadVIWMulAddV_WorstCase, - ReadVIWMulAddV_WorstCase, ReadVMask]>; + let RVVConstraint = WidenV in + def V : VALUrVV<funct6, OPMVV, opcodestr # ".vv", EarlyClobber=1>, + SchedTernaryMC<"WriteVIWMulAddV", "ReadVIWMulAddV", "ReadVIWMulAddV", + "ReadVIWMulAddV">; } multiclass VALU_MV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { def "" : VALUVs2<funct6, vs1, OPMVV, opcodestr>, - Sched<[WriteVExtV_WorstCase, ReadVExtV_WorstCase, ReadVMask]>; + SchedUnaryMC<"WriteVExtV", "ReadVExtV">; } multiclass VMRG_IV_V_X_I<string opcodestr, bits<6> funct6> { def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">, - Sched<[WriteVIMergeV_WorstCase, ReadVIMergeV_WorstCase, - ReadVIMergeV_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVIMergeV", "ReadVIMergeV", "ReadVIMergeV">; def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">, - Sched<[WriteVIMergeX_WorstCase, ReadVIMergeV_WorstCase, - ReadVIMergeX_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVIMergeX", "ReadVIMergeV", "ReadVIMergeX">; def IM : VALUmVI<funct6, opcodestr # ".vim">, - Sched<[WriteVIMergeI_WorstCase, ReadVIMergeV_WorstCase, - ReadVMask]>; + SchedUnaryMC<"WriteVIMergeI", "ReadVIMergeV">; } multiclass VALUm_IV_V_X<string opcodestr, bits<6> funct6> { def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">, - Sched<[WriteVICALUV_WorstCase, ReadVICALUV_WorstCase, - ReadVICALUV_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV">; def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">, - Sched<[WriteVICALUX_WorstCase, ReadVICALUV_WorstCase, - ReadVICALUX_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX">; } multiclass VALUm_IV_V_X_I<string opcodestr, bits<6> funct6> : VALUm_IV_V_X<opcodestr, funct6> { def IM : VALUmVI<funct6, opcodestr # ".vim">, - Sched<[WriteVICALUI_WorstCase, ReadVICALUV_WorstCase, - ReadVMask]>; + SchedUnaryMC<"WriteVICALUI", "ReadVICALUV">; } multiclass VALUNoVm_IV_V_X<string opcodestr, bits<6> funct6> { def V : VALUVVNoVm<funct6, OPIVV, opcodestr # ".vv">, - Sched<[WriteVICALUV_WorstCase, ReadVICALUV_WorstCase, - ReadVICALUV_WorstCase]>; + SchedBinaryMC<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV", + forceMasked=0>; def X : VALUVXNoVm<funct6, OPIVX, opcodestr # ".vx">, - Sched<[WriteVICALUX_WorstCase, ReadVICALUV_WorstCase, - ReadVICALUX_WorstCase]>; + SchedBinaryMC<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX", + forceMasked=0>; } multiclass VALUNoVm_IV_V_X_I<string opcodestr, bits<6> funct6> : VALUNoVm_IV_V_X<opcodestr, funct6> { def I : VALUVINoVm<funct6, opcodestr # ".vi", simm5>, - Sched<[WriteVICALUI_WorstCase, ReadVICALUV_WorstCase]>; + SchedUnaryMC<"WriteVICALUI", "ReadVICALUV", forceMasked=0>; } multiclass VALU_FV_F<string opcodestr, bits<6> funct6> { def F : VALUVF<funct6, OPFVF, opcodestr # ".vf">, - Sched<[WriteVFALUF_WorstCase, ReadVFALUV_WorstCase, - ReadVFALUF_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVFALUF", "ReadVFALUV", "ReadVFALUF">; } multiclass VALU_FV_V_F<string opcodestr, bits<6> funct6> : VALU_FV_F<opcodestr, funct6> { def V : VALUVV<funct6, OPFVV, opcodestr # ".vv">, - Sched<[WriteVFALUV_WorstCase, ReadVFALUV_WorstCase, - ReadVFALUV_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVFALUV", "ReadVFALUV", "ReadVFALUV">; } multiclass VWALU_FV_V_F<string opcodestr, bits<6> funct6, string vw> { def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">, - Sched<[WriteVFWALUV_WorstCase, ReadVFWALUV_WorstCase, - ReadVFWALUV_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVFWALUV", "ReadVFWALUV", "ReadVFWALUV">; def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">, - Sched<[WriteVFWALUF_WorstCase, ReadVFWALUV_WorstCase, - ReadVFWALUF_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVFWALUF", "ReadVFWALUV", "ReadVFWALUF">; } multiclass VMUL_FV_V_F<string opcodestr, bits<6> funct6> { def V : VALUVV<funct6, OPFVV, opcodestr # ".vv">, - Sched<[WriteVFMulV_WorstCase, ReadVFMulV_WorstCase, - ReadVFMulV_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVFMulV", "ReadVFMulV", "ReadVFMulV">; def F : VALUVF<funct6, OPFVF, opcodestr # ".vf">, - Sched<[WriteVFMulF_WorstCase, ReadVFMulV_WorstCase, - ReadVFMulF_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVFMulF", "ReadVFMulV", "ReadVFMulF">; } multiclass VDIV_FV_F<string opcodestr, bits<6> funct6> { def F : VALUVF<funct6, OPFVF, opcodestr # ".vf">, - Sched<[WriteVFDivF_WorstCase, ReadVFDivV_WorstCase, - ReadVFDivF_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVFDivF", "ReadVFDivV", "ReadVFDivF">; } multiclass VDIV_FV_V_F<string opcodestr, bits<6> funct6> : VDIV_FV_F<opcodestr, funct6> { def V : VALUVV<funct6, OPFVV, opcodestr # ".vv">, - Sched<[WriteVFDivV_WorstCase, ReadVFDivV_WorstCase, - ReadVFDivV_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVFDivV", "ReadVFDivV", "ReadVFDivV">; } multiclass VWMUL_FV_V_F<string opcodestr, bits<6> funct6> { def V : VALUVV<funct6, OPFVV, opcodestr # ".vv">, - Sched<[WriteVFWMulV_WorstCase, ReadVFWMulV_WorstCase, - ReadVFWMulV_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVFWMulV", "ReadVFWMulV", "ReadVFWMulV">; def F : VALUVF<funct6, OPFVF, opcodestr # ".vf">, - Sched<[WriteVFWMulF_WorstCase, ReadVFWMulV_WorstCase, - ReadVFWMulF_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVFWMulF", "ReadVFWMulV", "ReadVFWMulF">; } multiclass VMAC_FV_V_F<string opcodestr, bits<6> funct6> { def V : VALUrVV<funct6, OPFVV, opcodestr # ".vv">, - Sched<[WriteVFMulAddV_WorstCase, ReadVFMulAddV_WorstCase, - ReadVFMulAddV_WorstCase, ReadVMask]>; + SchedTernaryMC<"WriteVFMulAddV", "ReadVFMulAddV", "ReadVFMulAddV", + "ReadVFMulAddV">; def F : VALUrVF<funct6, OPFVF, opcodestr # ".vf">, - Sched<[WriteVFMulAddF_WorstCase, ReadVFMulAddV_WorstCase, - ReadVFMulAddF_WorstCase, ReadVMask]>; + SchedTernaryMC<"WriteVFMulAddF", "ReadVFMulAddV", "ReadVFMulAddF", + "ReadVFMulAddV">; } multiclass VWMAC_FV_V_F<string opcodestr, bits<6> funct6> { - def V : VALUrVV<funct6, OPFVV, opcodestr # ".vv">, - Sched<[WriteVFWMulAddV_WorstCase, ReadVFWMulAddV_WorstCase, - ReadVFWMulAddV_WorstCase, ReadVMask]>; - def F : VALUrVF<funct6, OPFVF, opcodestr # ".vf">, - Sched<[WriteVFWMulAddF_WorstCase, ReadVFWMulAddV_WorstCase, - ReadVFWMulAddF_WorstCase, ReadVMask]>; + let RVVConstraint = WidenV in { + def V : VALUrVV<funct6, OPFVV, opcodestr # ".vv", EarlyClobber=1>, + SchedTernaryMC<"WriteVFWMulAddV", "ReadVFWMulAddV", "ReadVFWMulAddV", + "ReadVFWMulAddV">; + def F : VALUrVF<funct6, OPFVF, opcodestr # ".vf", EarlyClobber=1>, + SchedTernaryMC<"WriteVFWMulAddF", "ReadVFWMulAddV", "ReadVFWMulAddF", + "ReadVFWMulAddV">; + } } multiclass VSQR_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>, - Sched<[WriteVFSqrtV_WorstCase, ReadVFSqrtV_WorstCase, - ReadVMask]>; + SchedUnaryMC<"WriteVFSqrtV", "ReadVFSqrtV">; } multiclass VRCP_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>, - Sched<[WriteVFRecpV_WorstCase, ReadVFRecpV_WorstCase, - ReadVMask]>; + SchedUnaryMC<"WriteVFRecpV", "ReadVFRecpV">; } multiclass VMINMAX_FV_V_F<string opcodestr, bits<6> funct6> { def V : VALUVV<funct6, OPFVV, opcodestr # ".vv">, - Sched<[WriteVFMinMaxV_WorstCase, ReadVFMinMaxV_WorstCase, - ReadVFMinMaxV_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVFMinMaxV", "ReadVFMinMaxV", "ReadVFMinMaxV">; def F : VALUVF<funct6, OPFVF, opcodestr # ".vf">, - Sched<[WriteVFMinMaxF_WorstCase, ReadVFMinMaxV_WorstCase, - ReadVFMinMaxF_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVFMinMaxF", "ReadVFMinMaxV", "ReadVFMinMaxF">; } multiclass VCMP_FV_F<string opcodestr, bits<6> funct6> { def F : VALUVF<funct6, OPFVF, opcodestr # ".vf">, - Sched<[WriteVFCmpF_WorstCase, ReadVFCmpV_WorstCase, - ReadVFCmpF_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVFCmpF", "ReadVFCmpV", "ReadVFCmpF">; } multiclass VCMP_FV_V_F<string opcodestr, bits<6> funct6> : VCMP_FV_F<opcodestr, funct6> { def V : VALUVV<funct6, OPFVV, opcodestr # ".vv">, - Sched<[WriteVFCmpV_WorstCase, ReadVFCmpV_WorstCase, - ReadVFCmpV_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVFCmpV", "ReadVFCmpV", "ReadVFCmpV">; } multiclass VSGNJ_FV_V_F<string opcodestr, bits<6> funct6> { def V : VALUVV<funct6, OPFVV, opcodestr # ".vv">, - Sched<[WriteVFSgnjV_WorstCase, ReadVFSgnjV_WorstCase, - ReadVFSgnjV_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVFSgnjV", "ReadVFSgnjV", "ReadVFSgnjV">; def F : VALUVF<funct6, OPFVF, opcodestr # ".vf">, - Sched<[WriteVFSgnjF_WorstCase, ReadVFSgnjV_WorstCase, - ReadVFSgnjF_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVFSgnjF", "ReadVFSgnjV", "ReadVFSgnjF">; } multiclass VCLS_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>, - Sched<[WriteVFClassV_WorstCase, ReadVFClassV_WorstCase, - ReadVMask]>; + SchedUnaryMC<"WriteVFClassV", "ReadVFClassV">; } multiclass VCVTF_IV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>, - Sched<[WriteVFCvtIToFV_WorstCase, ReadVFCvtIToFV_WorstCase, - ReadVMask]>; + SchedUnaryMC<"WriteVFCvtIToFV", "ReadVFCvtIToFV">; } multiclass VCVTI_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>, - Sched<[WriteVFCvtFToIV_WorstCase, ReadVFCvtFToIV_WorstCase, - ReadVMask]>; + SchedUnaryMC<"WriteVFCvtFToIV", "ReadVFCvtFToIV">; } multiclass VWCVTF_IV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>, - Sched<[WriteVFWCvtIToFV_WorstCase, ReadVFWCvtIToFV_WorstCase, - ReadVMask]>; + SchedUnaryMC<"WriteVFWCvtIToFV", "ReadVFWCvtIToFV">; } multiclass VWCVTI_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>, - Sched<[WriteVFWCvtFToIV_WorstCase, ReadVFWCvtFToIV_WorstCase, - ReadVMask]>; + SchedUnaryMC<"WriteVFWCvtFToIV", "ReadVFWCvtFToIV">; } multiclass VWCVTF_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>, - Sched<[WriteVFWCvtFToFV_WorstCase, ReadVFWCvtFToFV_WorstCase, - ReadVMask]>; + SchedUnaryMC<"WriteVFWCvtFToFV", "ReadVFWCvtFToFV">; } multiclass VNCVTF_IV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>, - Sched<[WriteVFNCvtIToFV_WorstCase, ReadVFNCvtIToFV_WorstCase, - ReadVMask]>; + SchedUnaryMC<"WriteVFNCvtIToFV", "ReadVFNCvtIToFV">; } multiclass VNCVTI_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>, - Sched<[WriteVFNCvtFToIV_WorstCase, ReadVFNCvtFToIV_WorstCase, - ReadVMask]>; + SchedUnaryMC<"WriteVFNCvtFToIV", "ReadVFNCvtFToIV">; } multiclass VNCVTF_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>, - Sched<[WriteVFNCvtFToFV_WorstCase, ReadVFNCvtFToFV_WorstCase, - ReadVMask]>; + SchedUnaryMC<"WriteVFNCvtFToFV", "ReadVFNCvtFToFV">; } multiclass VRED_MV_V<string opcodestr, bits<6> funct6> { def _VS : VALUVV<funct6, OPMVV, opcodestr # ".vs">, - Sched<[WriteVIRedV_From_WorstCase, ReadVIRedV, ReadVIRedV0, - ReadVMask]>; + SchedReductionMC<"WriteVIRedV_From", "ReadVIRedV", "ReadVIRedV0">; } multiclass VREDMINMAX_MV_V<string opcodestr, bits<6> funct6> { def _VS : VALUVV<funct6, OPMVV, opcodestr # ".vs">, - Sched<[WriteVIRedMinMaxV_From_WorstCase, ReadVIRedV, ReadVIRedV0, - ReadVMask]>; + SchedReductionMC<"WriteVIRedMinMaxV_From", "ReadVIRedV", "ReadVIRedV0">; } multiclass VWRED_IV_V<string opcodestr, bits<6> funct6> { def _VS : VALUVV<funct6, OPIVV, opcodestr # ".vs">, - Sched<[WriteVIWRedV_From_WorstCase, ReadVIWRedV, ReadVIWRedV0, - ReadVMask]>; + SchedReductionMC<"WriteVIWRedV_From", "ReadVIWRedV", "ReadVIWRedV0">; } multiclass VRED_FV_V<string opcodestr, bits<6> funct6> { def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">, - Sched<[WriteVFRedV_From_WorstCase, ReadVFRedV, ReadVFRedV0, - ReadVMask]>; + SchedReductionMC<"WriteVFRedV_From", "ReadVFRedV", "ReadVFRedV0">; } multiclass VREDMINMAX_FV_V<string opcodestr, bits<6> funct6> { def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">, - Sched<[WriteVFRedMinMaxV_From_WorstCase, ReadVFRedV, ReadVFRedV0, - ReadVMask]>; + SchedReductionMC<"WriteVFRedMinMaxV_From", "ReadVFRedV", "ReadVFRedV0">; } multiclass VREDO_FV_V<string opcodestr, bits<6> funct6> { def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">, - Sched<[WriteVFRedOV_From_WorstCase, ReadVFRedOV, ReadVFRedOV0, - ReadVMask]>; + SchedReductionMC<"WriteVFRedOV_From", "ReadVFRedOV", "ReadVFRedOV0">; } multiclass VWRED_FV_V<string opcodestr, bits<6> funct6> { def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">, - Sched<[WriteVFWRedV_From_WorstCase, ReadVFWRedV, ReadVFWRedV0, - ReadVMask]>; + SchedReductionMC<"WriteVFWRedV_From", "ReadVFWRedV", "ReadVFWRedV0">; } multiclass VWREDO_FV_V<string opcodestr, bits<6> funct6> { def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">, - Sched<[WriteVFWRedOV_From_WorstCase, ReadVFWRedOV, ReadVFWRedOV0, - ReadVMask]>; + SchedReductionMC<"WriteVFWRedOV_From", "ReadVFWRedOV", "ReadVFWRedOV0">; } multiclass VMALU_MV_Mask<string opcodestr, bits<6> funct6, string vm = "v"> { def M : VALUVVNoVm<funct6, OPMVV, opcodestr #"." #vm #"m">, - Sched<[WriteVMALUV_WorstCase, ReadVMALUV_WorstCase, - ReadVMALUV_WorstCase]>; + SchedBinaryMC<"WriteVMALUV", "ReadVMALUV", "ReadVMALUV", + forceMasked=0>; } multiclass VMSFS_MV_V<string opcodestr, bits<6> funct6, bits<5> vs1> { def "" : VALUVs2<funct6, vs1, OPMVV, opcodestr>, - Sched<[WriteVMSFSV_WorstCase, ReadVMSFSV_WorstCase, ReadVMask]>; + SchedUnaryMC<"WriteVMSFSV", "ReadVMSFSV">; } multiclass VMIOT_MV_V<string opcodestr, bits<6> funct6, bits<5> vs1> { def "" : VALUVs2<funct6, vs1, OPMVV, opcodestr>, - Sched<[WriteVMIotV_WorstCase, ReadVMIotV_WorstCase, ReadVMask]>; + SchedUnaryMC<"WriteVMIotV", "ReadVMIotV">; } multiclass VSHT_IV_V_X_I<string opcodestr, bits<6> funct6> { def V : VALUVV<funct6, OPIVV, opcodestr # ".vv">, - Sched<[WriteVShiftV_WorstCase, ReadVShiftV_WorstCase, - ReadVShiftV_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVShiftV", "ReadVShiftV", "ReadVShiftV">; def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">, - Sched<[WriteVShiftX_WorstCase, ReadVShiftV_WorstCase, - ReadVShiftX_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVShiftX", "ReadVShiftV", "ReadVShiftX">; def I : VALUVI<funct6, opcodestr # ".vi", uimm5>, - Sched<[WriteVShiftI_WorstCase, ReadVShiftV_WorstCase, - ReadVMask]>; + SchedUnaryMC<"WriteVShiftI", "ReadVShiftV">; } multiclass VNSHT_IV_V_X_I<string opcodestr, bits<6> funct6> { def V : VALUVV<funct6, OPIVV, opcodestr # ".wv">, - Sched<[WriteVNShiftV_WorstCase, ReadVNShiftV_WorstCase, - ReadVNShiftV_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVNShiftV", "ReadVNShiftV", "ReadVNShiftV">; def X : VALUVX<funct6, OPIVX, opcodestr # ".wx">, - Sched<[WriteVNShiftX_WorstCase, ReadVNShiftV_WorstCase, - ReadVNShiftX_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVNShiftX", "ReadVNShiftV", "ReadVNShiftX">; def I : VALUVI<funct6, opcodestr # ".wi", uimm5>, - Sched<[WriteVNShiftI_WorstCase, ReadVNShiftV_WorstCase, - ReadVMask]>; + SchedUnaryMC<"WriteVNShiftI", "ReadVNShiftV">; } multiclass VMINMAX_IV_V_X<string opcodestr, bits<6> funct6> { def V : VALUVV<funct6, OPIVV, opcodestr # ".vv">, - Sched<[WriteVIMinMaxV_WorstCase, ReadVIMinMaxV_WorstCase, - ReadVIMinMaxV_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVIMinMaxV", "ReadVIMinMaxV", "ReadVIMinMaxV">; def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">, - Sched<[WriteVIMinMaxX_WorstCase, ReadVIMinMaxV_WorstCase, - ReadVIMinMaxX_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVIMinMaxX", "ReadVIMinMaxV", "ReadVIMinMaxX">; } multiclass VCMP_IV_V<string opcodestr, bits<6> funct6> { def V : VALUVV<funct6, OPIVV, opcodestr # ".vv">, - Sched<[WriteVICmpV_WorstCase, ReadVICmpV_WorstCase, - ReadVICmpV_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVICmpV", "ReadVICmpV", "ReadVICmpV">; } multiclass VCMP_IV_X<string opcodestr, bits<6> funct6> { def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">, - Sched<[WriteVICmpX_WorstCase, ReadVICmpV_WorstCase, - ReadVICmpX_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVICmpX", "ReadVICmpV", "ReadVICmpX">; } multiclass VCMP_IV_I<string opcodestr, bits<6> funct6> { def I : VALUVI<funct6, opcodestr # ".vi", simm5>, - Sched<[WriteVICmpI_WorstCase, ReadVICmpV_WorstCase, - ReadVMask]>; + SchedUnaryMC<"WriteVICmpI", "ReadVICmpV">; } multiclass VCMP_IV_V_X_I<string opcodestr, bits<6> funct6> @@ -850,140 +921,109 @@ multiclass VCMP_IV_V_X<string opcodestr, bits<6> funct6> multiclass VMUL_MV_V_X<string opcodestr, bits<6> funct6> { def V : VALUVV<funct6, OPMVV, opcodestr # ".vv">, - Sched<[WriteVIMulV_WorstCase, ReadVIMulV_WorstCase, - ReadVIMulV_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVIMulV", "ReadVIMulV", "ReadVIMulV">; def X : VALUVX<funct6, OPMVX, opcodestr # ".vx">, - Sched<[WriteVIMulX_WorstCase, ReadVIMulV_WorstCase, - ReadVIMulX_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVIMulX", "ReadVIMulV", "ReadVIMulX">; } multiclass VWMUL_MV_V_X<string opcodestr, bits<6> funct6> { def V : VALUVV<funct6, OPMVV, opcodestr # ".vv">, - Sched<[WriteVIWMulV_WorstCase, ReadVIWMulV_WorstCase, - ReadVIWMulV_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVIWMulV", "ReadVIWMulV", "ReadVIWMulV">; def X : VALUVX<funct6, OPMVX, opcodestr # ".vx">, - Sched<[WriteVIWMulX_WorstCase, ReadVIWMulV_WorstCase, - ReadVIWMulX_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVIWMulX", "ReadVIWMulV", "ReadVIWMulX">; } multiclass VDIV_MV_V_X<string opcodestr, bits<6> funct6> { def V : VALUVV<funct6, OPMVV, opcodestr # ".vv">, - Sched<[WriteVIDivV_WorstCase, ReadVIDivV_WorstCase, - ReadVIDivV_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVIDivV", "ReadVIDivV", "ReadVIDivV">; def X : VALUVX<funct6, OPMVX, opcodestr # ".vx">, - Sched<[WriteVIDivX_WorstCase, ReadVIDivV_WorstCase, - ReadVIDivX_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVIDivX", "ReadVIDivV", "ReadVIDivX">; } multiclass VSALU_IV_V_X<string opcodestr, bits<6> funct6> { def V : VALUVV<funct6, OPIVV, opcodestr # ".vv">, - Sched<[WriteVSALUV_WorstCase, ReadVSALUV_WorstCase, - ReadVSALUV_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVSALUV", "ReadVSALUV", "ReadVSALUV">; def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">, - Sched<[WriteVSALUX_WorstCase, ReadVSALUV_WorstCase, - ReadVSALUX_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVSALUX", "ReadVSALUV", "ReadVSALUX">; } multiclass VSALU_IV_V_X_I<string opcodestr, bits<6> funct6> : VSALU_IV_V_X<opcodestr, funct6> { def I : VALUVI<funct6, opcodestr # ".vi", simm5>, - Sched<[WriteVSALUI_WorstCase, ReadVSALUV_WorstCase, - ReadVMask]>; + SchedUnaryMC<"WriteVSALUI", "ReadVSALUV">; } multiclass VAALU_MV_V_X<string opcodestr, bits<6> funct6> { def V : VALUVV<funct6, OPMVV, opcodestr # ".vv">, - Sched<[WriteVAALUV_WorstCase, ReadVAALUV_WorstCase, - ReadVAALUV_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVAALUV", "ReadVAALUV", "ReadVAALUV">; def X : VALUVX<funct6, OPMVX, opcodestr # ".vx">, - Sched<[WriteVAALUX_WorstCase, ReadVAALUV_WorstCase, - ReadVAALUX_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVAALUX", "ReadVAALUV", "ReadVAALUX">; } multiclass VSMUL_IV_V_X<string opcodestr, bits<6> funct6> { def V : VALUVV<funct6, OPIVV, opcodestr # ".vv">, - Sched<[WriteVSMulV_WorstCase, ReadVSMulV_WorstCase, - ReadVSMulV_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVSMulV", "ReadVSMulV", "ReadVSMulV">; def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">, - Sched<[WriteVSMulX_WorstCase, ReadVSMulV_WorstCase, - ReadVSMulX_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVSMulX", "ReadVSMulV", "ReadVSMulX">; } multiclass VSSHF_IV_V_X_I<string opcodestr, bits<6> funct6> { def V : VALUVV<funct6, OPIVV, opcodestr # ".vv">, - Sched<[WriteVSShiftV_WorstCase, ReadVSShiftV_WorstCase, - ReadVSShiftV_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVSShiftV", "ReadVSShiftV", "ReadVSShiftV">; def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">, - Sched<[WriteVSShiftX_WorstCase, ReadVSShiftV_WorstCase, - ReadVSShiftX_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVSShiftX", "ReadVSShiftV", "ReadVSShiftX">; def I : VALUVI<funct6, opcodestr # ".vi", uimm5>, - Sched<[WriteVSShiftI_WorstCase, ReadVSShiftV_WorstCase, - ReadVMask]>; + SchedUnaryMC<"WriteVSShiftI", "ReadVSShiftV">; } multiclass VNCLP_IV_V_X_I<string opcodestr, bits<6> funct6> { def V : VALUVV<funct6, OPIVV, opcodestr # ".wv">, - Sched<[WriteVNClipV_WorstCase, ReadVNClipV_WorstCase, - ReadVNClipV_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVNClipV", "ReadVNClipV", "ReadVNClipV">; def X : VALUVX<funct6, OPIVX, opcodestr # ".wx">, - Sched<[WriteVNClipX_WorstCase, ReadVNClipV_WorstCase, - ReadVNClipX_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVNClipX", "ReadVNClipV", "ReadVNClipX">; def I : VALUVI<funct6, opcodestr # ".wi", uimm5>, - Sched<[WriteVNClipI_WorstCase, ReadVNClipV_WorstCase, - ReadVMask]>; + SchedUnaryMC<"WriteVNClipI", "ReadVNClipV">; } multiclass VSLD_IV_X_I<string opcodestr, bits<6> funct6> { def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">, - Sched<[WriteVISlideX_WorstCase, ReadVISlideV_WorstCase, - ReadVISlideX_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVISlideX", "ReadVISlideV", "ReadVISlideX">; def I : VALUVI<funct6, opcodestr # ".vi", uimm5>, - Sched<[WriteVISlideI_WorstCase, ReadVISlideV_WorstCase, - ReadVMask]>; + SchedUnaryMC<"WriteVISlideI", "ReadVISlideV">; } multiclass VSLD1_MV_X<string opcodestr, bits<6> funct6> { def X : VALUVX<funct6, OPMVX, opcodestr # ".vx">, - Sched<[WriteVISlide1X_WorstCase, ReadVISlideV_WorstCase, - ReadVISlideX_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVISlide1X", "ReadVISlideV", "ReadVISlideX">; } multiclass VSLD1_FV_F<string opcodestr, bits<6> funct6> { def F : VALUVF<funct6, OPFVF, opcodestr # ".vf">, - Sched<[WriteVFSlide1F_WorstCase, ReadVFSlideV_WorstCase, - ReadVFSlideF_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVFSlide1F", "ReadVFSlideV", "ReadVFSlideF">; } multiclass VGTR_IV_V_X_I<string opcodestr, bits<6> funct6> { def V : VALUVV<funct6, OPIVV, opcodestr # ".vv">, - Sched<[WriteVRGatherVV_WorstCase, ReadVRGatherVV_data_WorstCase, - ReadVRGatherVV_index_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVRGatherVV", "ReadVRGatherVV_data", + "ReadVRGatherVV_index">; def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">, - Sched<[WriteVRGatherVX_WorstCase, ReadVRGatherVX_data_WorstCase, - ReadVRGatherVX_index_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVRGatherVX", "ReadVRGatherVX_data", + "ReadVRGatherVX_index">; def I : VALUVI<funct6, opcodestr # ".vi", uimm5>, - Sched<[WriteVRGatherVI_WorstCase, ReadVRGatherVI_data_WorstCase, - ReadVMask]>; + SchedUnaryMC<"WriteVRGatherVI", "ReadVRGatherVI_data">; } multiclass VCPR_MV_Mask<string opcodestr, bits<6> funct6, string vm = "v"> { def M : VALUVVNoVm<funct6, OPMVV, opcodestr # "." # vm # "m">, - Sched<[WriteVCompressV_WorstCase, ReadVCompressV_WorstCase, - ReadVCompressV_WorstCase]>; + SchedBinaryMC<"WriteVCompressV", "ReadVCompressV", "ReadVCompressV">; } -multiclass VWholeLoadN<bits<3> nf, string opcodestr, RegisterClass VRC> { - foreach l = [8, 16, 32] in { - defvar w = !cast<RISCVWidth>("LSWidth" # l); - defvar s = !cast<SchedWrite>("WriteVLD" # !add(nf, 1) # "R"); +multiclass VWholeLoadN<int l, bits<3> nf, string opcodestr, RegisterClass VRC> { + defvar w = !cast<RISCVWidth>("LSWidth" # l); + defvar s = !cast<SchedWrite>("WriteVLD" # !add(nf, 1) # "R"); - def E # l # _V : VWholeLoad<nf, w, opcodestr # "e" # l # ".v", VRC>, - Sched<[s, ReadVLDX]>; - } -} -multiclass VWholeLoadEEW64<bits<3> nf, string opcodestr, RegisterClass VRC, SchedReadWrite schedrw> { - def E64_V : VWholeLoad<nf, LSWidth64, opcodestr # "e64.v", VRC>, - Sched<[schedrw, ReadVLDX]>; + def E # l # _V : VWholeLoad<nf, w, opcodestr # "e" # l # ".v", VRC>, + Sched<[s, ReadVLDX]>; } //===----------------------------------------------------------------------===// @@ -1003,23 +1043,34 @@ def VSETVL : RVInstSetVL<(outs GPR:$rd), (ins GPR:$rs1, GPR:$rs2), "vsetvl", "$rd, $rs1, $rs2">, Sched<[WriteVSETVL, ReadVSETVL, ReadVSETVL]>; } // hasSideEffects = 1, mayLoad = 0, mayStore = 0 -foreach eew = [8, 16, 32] in { +} // Predicates = [HasVInstructions] + +foreach eew = [8, 16, 32, 64] in { defvar w = !cast<RISCVWidth>("LSWidth" # eew); - // Vector Unit-Stride Instructions - def VLE#eew#_V : VUnitStrideLoad<w, "vle"#eew#".v">, VLESched; - def VSE#eew#_V : VUnitStrideStore<w, "vse"#eew#".v">, VSESched; + let Predicates = !if(!eq(eew, 64), [HasVInstructionsI64], + [HasVInstructions]) in { + // Vector Unit-Stride Instructions + def VLE#eew#_V : VUnitStrideLoad<w, "vle"#eew#".v">, VLESchedMC; + def VSE#eew#_V : VUnitStrideStore<w, "vse"#eew#".v">, VSESchedMC; - // Vector Unit-Stride Fault-only-First Loads - def VLE#eew#FF_V : VUnitStrideLoadFF<w, "vle"#eew#"ff.v">, VLFSched; + // Vector Unit-Stride Fault-only-First Loads + def VLE#eew#FF_V : VUnitStrideLoadFF<w, "vle"#eew#"ff.v">, VLFSchedMC; - // Vector Strided Instructions - def VLSE#eew#_V : VStridedLoad<w, "vlse"#eew#".v">, VLSSched<eew>; - def VSSE#eew#_V : VStridedStore<w, "vsse"#eew#".v">, VSSSched<eew>; -} + // Vector Strided Instructions + def VLSE#eew#_V : VStridedLoad<w, "vlse"#eew#".v">, VLSSchedMC<eew>; + def VSSE#eew#_V : VStridedStore<w, "vsse"#eew#".v">, VSSSchedMC<eew>; -defm "" : VIndexLoadStore<[8, 16, 32]>; -} // Predicates = [HasVInstructions] + defm VL1R : VWholeLoadN<eew, 0, "vl1r", VR>; + defm VL2R : VWholeLoadN<eew, 1, "vl2r", VRM2>; + defm VL4R : VWholeLoadN<eew, 3, "vl4r", VRM4>; + defm VL8R : VWholeLoadN<eew, 7, "vl8r", VRM8>; + } + + let Predicates = !if(!eq(eew, 64), [IsRV64, HasVInstructionsI64], + [HasVInstructions]) in + defm "" : VIndexLoadStore<eew>; +} let Predicates = [HasVInstructions] in { def VLM_V : VUnitStrideLoadMask<"vlm.v">, @@ -1031,11 +1082,6 @@ def : InstAlias<"vle1.v $vd, (${rs1})", def : InstAlias<"vse1.v $vs3, (${rs1})", (VSM_V VR:$vs3, GPR:$rs1), 0>; -defm VL1R : VWholeLoadN<0, "vl1r", VR>; -defm VL2R : VWholeLoadN<1, "vl2r", VRM2>; -defm VL4R : VWholeLoadN<3, "vl4r", VRM4>; -defm VL8R : VWholeLoadN<7, "vl8r", VRM8>; - def VS1R_V : VWholeStore<0, "vs1r.v", VR>, Sched<[WriteVST1R, ReadVST1R, ReadVSTX]>; def VS2R_V : VWholeStore<1, "vs2r.v", VRM2>, @@ -1051,33 +1097,6 @@ def : InstAlias<"vl4r.v $vd, (${rs1})", (VL4RE8_V VRM4:$vd, GPR:$rs1)>; def : InstAlias<"vl8r.v $vd, (${rs1})", (VL8RE8_V VRM8:$vd, GPR:$rs1)>; } // Predicates = [HasVInstructions] -let Predicates = [HasVInstructionsI64] in { -// Vector Unit-Stride Instructions -def VLE64_V : VUnitStrideLoad<LSWidth64, "vle64.v">, - VLESched; - -def VLE64FF_V : VUnitStrideLoadFF<LSWidth64, "vle64ff.v">, - VLFSched; - -def VSE64_V : VUnitStrideStore<LSWidth64, "vse64.v">, - VSESched; -// Vector Strided Instructions -def VLSE64_V : VStridedLoad<LSWidth64, "vlse64.v">, - VLSSched<32>; - -def VSSE64_V : VStridedStore<LSWidth64, "vsse64.v">, - VSSSched<64>; - -defm VL1R: VWholeLoadEEW64<0, "vl1r", VR, WriteVLD1R>; -defm VL2R: VWholeLoadEEW64<1, "vl2r", VRM2, WriteVLD2R>; -defm VL4R: VWholeLoadEEW64<3, "vl4r", VRM4, WriteVLD4R>; -defm VL8R: VWholeLoadEEW64<7, "vl8r", VRM8, WriteVLD8R>; -} // Predicates = [HasVInstructionsI64] -let Predicates = [IsRV64, HasVInstructionsI64] in { - // Vector Indexed Instructions - defm "" : VIndexLoadStore<[64]>; -} // [IsRV64, HasVInstructionsI64] - let Predicates = [HasVInstructions] in { // Vector Single-Width Integer Add and Subtract defm VADD_V : VALU_IV_V_X_I<"vadd", 0b000000>; @@ -1268,12 +1287,10 @@ defm VMADD_V : VMAC_MV_V_X<"vmadd", 0b101001>; defm VNMSUB_V : VMAC_MV_V_X<"vnmsub", 0b101011>; // Vector Widening Integer Multiply-Add Instructions -let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in { defm VWMACCU_V : VWMAC_MV_V_X<"vwmaccu", 0b111100>; defm VWMACC_V : VWMAC_MV_V_X<"vwmacc", 0b111101>; defm VWMACCSU_V : VWMAC_MV_V_X<"vwmaccsu", 0b111111>; defm VWMACCUS_V : VWMAC_MV_X<"vwmaccus", 0b111110>; -} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV // Vector Integer Merge Instructions defm VMERGE_V : VMRG_IV_V_X_I<"vmerge", 0b010111>; @@ -1284,15 +1301,15 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, vs2 = 0, vm = 1, // op vd, vs1 def VMV_V_V : RVInstVV<0b010111, OPIVV, (outs VR:$vd), (ins VR:$vs1), "vmv.v.v", "$vd, $vs1">, - Sched<[WriteVIMovV_WorstCase, ReadVIMovV_WorstCase]>; + SchedUnaryMC<"WriteVIMovV", "ReadVIMovV", forceMasked=0>; // op vd, rs1 def VMV_V_X : RVInstVX<0b010111, OPIVX, (outs VR:$vd), (ins GPR:$rs1), "vmv.v.x", "$vd, $rs1">, - Sched<[WriteVIMovX_WorstCase, ReadVIMovX_WorstCase]>; + SchedUnaryMC<"WriteVIMovX", "ReadVIMovX", forceMasked=0>; // op vd, imm def VMV_V_I : RVInstIVI<0b010111, (outs VR:$vd), (ins simm5:$imm), "vmv.v.i", "$vd, $imm">, - Sched<[WriteVIMovI_WorstCase]>; + SchedNullaryMC<"WriteVIMovI", forceMasked=0>; } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 // Vector Fixed-Point Arithmetic Instructions @@ -1373,8 +1390,7 @@ defm VFNMSUB_V : VMAC_FV_V_F<"vfnmsub", 0b101011>; } // Vector Widening Floating-Point Fused Multiply-Add Instructions -let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV, - Uses = [FRM], mayRaiseFPException = true in { +let Uses = [FRM], mayRaiseFPException = true in { defm VFWMACC_V : VWMAC_FV_V_F<"vfwmacc", 0b111100>; defm VFWNMACC_V : VWMAC_FV_V_F<"vfwnmacc", 0b111101>; defm VFWMSAC_V : VWMAC_FV_V_F<"vfwmsac", 0b111110>; @@ -1435,15 +1451,14 @@ let vm = 0 in def VFMERGE_VFM : RVInstVX<0b010111, OPFVF, (outs VR:$vd), (ins VR:$vs2, FPR32:$rs1, VMV0:$v0), "vfmerge.vfm", "$vd, $vs2, $rs1, v0">, - Sched<[WriteVFMergeV_WorstCase, ReadVFMergeV_WorstCase, - ReadVFMergeF_WorstCase, ReadVMask]>; + SchedBinaryMC<"WriteVFMergeV", "ReadVFMergeV", "ReadVFMergeF">; // Vector Floating-Point Move Instruction let RVVConstraint = NoConstraint in let vm = 1, vs2 = 0 in def VFMV_V_F : RVInstVX<0b010111, OPFVF, (outs VR:$vd), (ins FPR32:$rs1), "vfmv.v.f", "$vd, $rs1">, - Sched<[WriteVFMovV_WorstCase, ReadVFMovF_WorstCase]>; + SchedUnaryMC<"WriteVFMovV", "ReadVFMovF", forceMasked=0>; } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 @@ -1584,15 +1599,13 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, def VCPOP_M : RVInstV<0b010000, 0b10000, OPMVV, (outs GPR:$vd), (ins VR:$vs2, VMaskOp:$vm), "vcpop.m", "$vd, $vs2$vm">, - Sched<[WriteVMPopV_WorstCase, ReadVMPopV_WorstCase, - ReadVMask]>; + SchedUnaryMC<"WriteVMPopV", "ReadVMPopV">; // vfirst find-first-set mask bit def VFIRST_M : RVInstV<0b010000, 0b10001, OPMVV, (outs GPR:$vd), (ins VR:$vs2, VMaskOp:$vm), "vfirst.m", "$vd, $vs2$vm">, - Sched<[WriteVMFFSV_WorstCase, ReadVMFFSV_WorstCase, - ReadVMask]>; + SchedUnaryMC<"WriteVMFFSV", "ReadVMFFSV">; } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 @@ -1618,7 +1631,7 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { let vs2 = 0 in def VID_V : RVInstV<0b010100, 0b10001, OPMVV, (outs VR:$vd), (ins VMaskOp:$vm), "vid.v", "$vd$vm">, - Sched<[WriteVMIdxV_WorstCase, ReadVMask]>; + SchedNullaryMC<"WriteVMIdxV">; // Integer Scalar Move Instructions let vm = 1, RVVConstraint = NoConstraint in { @@ -1674,8 +1687,8 @@ let Predicates = [HasVInstructions] in { let Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather in { defm VRGATHER_V : VGTR_IV_V_X_I<"vrgather", 0b001100>; def VRGATHEREI16_VV : VALUVV<0b001110, OPIVV, "vrgatherei16.vv">, - Sched<[WriteVRGatherVV_WorstCase, ReadVRGatherVV_data_WorstCase, - ReadVRGatherVV_index_WorstCase]>; + SchedBinaryMC<"WriteVRGatherVV", "ReadVRGatherVV_data", + "ReadVRGatherVV_index">; } // Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather // Vector Compress Instruction @@ -1705,38 +1718,38 @@ let Predicates = [HasVInstructions] in { def VLSEG#nf#E#eew#_V : VUnitStrideSegmentLoad<!add(nf, -1), w, "vlseg"#nf#"e"#eew#".v">, - VLSEGSched<nf, eew>; + VLSEGSchedMC<nf, eew>; def VLSEG#nf#E#eew#FF_V : VUnitStrideSegmentLoadFF<!add(nf, -1), w, "vlseg"#nf#"e"#eew#"ff.v">, - VLSEGFFSched<nf, eew>; + VLSEGFFSchedMC<nf, eew>; def VSSEG#nf#E#eew#_V : VUnitStrideSegmentStore<!add(nf, -1), w, "vsseg"#nf#"e"#eew#".v">, - VSSEGSched<nf, eew>; + VSSEGSchedMC<nf, eew>; // Vector Strided Instructions def VLSSEG#nf#E#eew#_V : VStridedSegmentLoad<!add(nf, -1), w, "vlsseg"#nf#"e"#eew#".v">, - VLSSEGSched<nf, eew>; + VLSSEGSchedMC<nf, eew>; def VSSSEG#nf#E#eew#_V : VStridedSegmentStore<!add(nf, -1), w, "vssseg"#nf#"e"#eew#".v">, - VSSSEGSched<nf, eew>; + VSSSEGSchedMC<nf, eew>; // Vector Indexed Instructions def VLUXSEG#nf#EI#eew#_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord, w, "vluxseg"#nf#"ei"#eew#".v">, - VLXSEGSched<nf, eew, "U">; + VLXSEGSchedMC<nf, eew, isOrdered=0>; def VLOXSEG#nf#EI#eew#_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder, w, "vloxseg"#nf#"ei"#eew#".v">, - VLXSEGSched<nf, eew, "O">; + VLXSEGSchedMC<nf, eew, isOrdered=1>; def VSUXSEG#nf#EI#eew#_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord, w, "vsuxseg"#nf#"ei"#eew#".v">, - VSXSEGSched<nf, eew, "U">; + VSXSEGSchedMC<nf, eew, isOrdered=0>; def VSOXSEG#nf#EI#eew#_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder, w, "vsoxseg"#nf#"ei"#eew#".v">, - VSXSEGSched<nf, eew, "O">; + VSXSEGSchedMC<nf, eew, isOrdered=1>; } } } // Predicates = [HasVInstructions] @@ -1746,21 +1759,21 @@ let Predicates = [HasVInstructionsI64] in { // Vector Unit-strided Segment Instructions def VLSEG#nf#E64_V : VUnitStrideSegmentLoad<!add(nf, -1), LSWidth64, "vlseg"#nf#"e64.v">, - VLSEGSched<nf, 64>; + VLSEGSchedMC<nf, 64>; def VLSEG#nf#E64FF_V : VUnitStrideSegmentLoadFF<!add(nf, -1), LSWidth64, "vlseg"#nf#"e64ff.v">, - VLSEGFFSched<nf, 64>; + VLSEGFFSchedMC<nf, 64>; def VSSEG#nf#E64_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth64, "vsseg"#nf#"e64.v">, - VSSEGSched<nf, 64>; + VSSEGSchedMC<nf, 64>; // Vector Strided Segment Instructions def VLSSEG#nf#E64_V : VStridedSegmentLoad<!add(nf, -1), LSWidth64, "vlsseg"#nf#"e64.v">, - VLSSEGSched<nf, 64>; + VLSSEGSchedMC<nf, 64>; def VSSSEG#nf#E64_V : VStridedSegmentStore<!add(nf, -1), LSWidth64, "vssseg"#nf#"e64.v">, - VSSSEGSched<nf, 64>; + VSSSEGSchedMC<nf, 64>; } } // Predicates = [HasVInstructionsI64] let Predicates = [HasVInstructionsI64, IsRV64] in { @@ -1769,20 +1782,21 @@ let Predicates = [HasVInstructionsI64, IsRV64] in { def VLUXSEG #nf #EI64_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord, LSWidth64, "vluxseg" #nf #"ei64.v">, - VLXSEGSched<nf, 64, "U">; + VLXSEGSchedMC<nf, 64, isOrdered=0>; def VLOXSEG #nf #EI64_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder, LSWidth64, "vloxseg" #nf #"ei64.v">, - VLXSEGSched<nf, 64, "O">; + VLXSEGSchedMC<nf, 64, isOrdered=1>; def VSUXSEG #nf #EI64_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord, LSWidth64, "vsuxseg" #nf #"ei64.v">, - VSXSEGSched<nf, 64, "U">; + VSXSEGSchedMC<nf, 64, isOrdered=0>; def VSOXSEG #nf #EI64_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder, LSWidth64, "vsoxseg" #nf #"ei64.v">, - VSXSEGSched<nf, 64, "O">; + VSXSEGSchedMC<nf, 64, isOrdered=1>; } } // Predicates = [HasVInstructionsI64, IsRV64] +include "RISCVInstrInfoZvfbf.td" include "RISCVInstrInfoVPseudos.td" diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index f8b7e32fe34c..5e06422cf9ad 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -31,7 +31,7 @@ /// the exact bit pattern of inactive lanes, or produce the bit pattern -1 for /// those lanes. Note that each lane can make this choice independently. /// Instructions which produce masks (and only those instructions) also have the -/// option of producing a result as-if VL had been VLMAX. +/// option of producing a result as-if VL had been VLMAX. /// * "Undefined" - The bit pattern of the inactive lanes is unspecified, and /// can be changed without impacting the semantics of the program. Note that /// this concept does not exist in the specification, and requires source @@ -52,26 +52,26 @@ /// /// Currently, the policy is represented via the following instrinsic families: /// * _MASK - Can represent all three policy states for both tail and mask. If -/// passthrough is IMPLICIT_DEF, then represents "undefined". Otherwise, -/// policy operand and tablegen flags drive the interpretation. (If policy -/// operand is not present - there are a couple, thought we're rapidly -/// removing them - a non-undefined policy defaults to "tail agnostic", and -/// "mask undisturbed". Since this is the only variant with a mask, all -/// other variants are "mask undefined". +/// passthrough is IMPLICIT_DEF (or NoReg), then represents "undefined". +/// Otherwise, policy operand and tablegen flags drive the interpretation. +/// (If policy operand is not present - there are a couple, though we're +/// rapidly removing them - a non-undefined policy defaults to "tail +/// agnostic", and "mask undisturbed". Since this is the only variant with +/// a mask, all other variants are "mask undefined". /// * Unsuffixed w/ both passthrough and policy operand. Can represent all -/// three policy states. If passthrough is IMPLICIT_DEF, then represents -/// "undefined". Otherwise, policy operand and tablegen flags drive the -/// interpretation. +/// three policy states. If passthrough is IMPLICIT_DEF (or NoReg), then +/// represents "undefined". Otherwise, policy operand and tablegen flags +/// drive the interpretation. /// * Unsuffixed w/o passthrough or policy operand -- Does not have a /// passthrough operand, and thus represents the "undefined" state. Note /// that terminology in code frequently refers to these as "TA" which is /// confusing. We're in the process of migrating away from this /// representation. /// * _TU w/o policy operand -- Has a passthrough operand, and always -/// represents the tail undisturbed state. +/// represents the tail undisturbed state. /// * _TU w/policy operand - Can represent all three policy states. If -/// passthrough is IMPLICIT_DEF, then represents "undefined". Otherwise, -/// policy operand and tablegen flags drive the interpretation. +/// passthrough is IMPLICIT_DEF (or NoReg), then represents "undefined". +/// Otherwise, policy operand and tablegen flags drive the interpretation. /// //===----------------------------------------------------------------------===// @@ -81,9 +81,9 @@ def riscv_vmv_x_s : SDNode<"RISCVISD::VMV_X_S", def riscv_read_vlenb : SDNode<"RISCVISD::READ_VLENB", SDTypeProfile<1, 0, [SDTCisVT<0, XLenVT>]>>; -// Operand that is allowed to be a register or a 5 bit immediate. -// This allows us to pick between VSETIVLI and VSETVLI opcodes using the same -// pseudo instructions. +// Operand that is allowed to be a register other than X0, a 5 bit unsigned +// immediate, or -1. -1 means VLMAX. This allows us to pick between VSETIVLI and +// VSETVLI opcodes using the same pseudo instructions. def AVL : RegisterOperand<GPRNoX0> { let OperandNamespace = "RISCVOp"; let OperandType = "OPERAND_AVL"; @@ -115,16 +115,9 @@ class PseudoToVInst<string PseudoInst> { ["_E32", ""], ["_E16", ""], ["_E8", ""], - ["_F64", "_F"], - ["_F32", "_F"], - ["_F16", "_F"], - ["_VF64", "_VF"], - ["_VF32", "_VF"], - ["_VF16", "_VF"], - ["_WF64", "_WF"], - ["_WF32", "_WF"], - ["_WF16", "_WF"], - ["_TU", ""], + ["FPR64", "F"], + ["FPR32", "F"], + ["FPR16", "F"], ["_TIED", ""], ["_MASK", ""], ["_B64", ""], @@ -141,7 +134,8 @@ class PseudoToVInst<string PseudoInst> { ["_M2", ""], ["_M4", ""], ["_M8", ""], - ["_SE", ""] + ["_SE", ""], + ["_RM", ""] ]; string VInst = !foldl(PseudoInst, AffixSubsts, Acc, AffixSubst, !subst(AffixSubst[0], AffixSubst[1], Acc)); @@ -189,7 +183,7 @@ defvar MxListFWRed = [V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8]; // Use for zext/sext.vf2 defvar MxListVF2 = [V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8]; -// Use for zext/sext.vf4 +// Use for zext/sext.vf4 and vector crypto instructions defvar MxListVF4 = [V_MF2, V_M1, V_M2, V_M4, V_M8]; // Use for zext/sext.vf8 @@ -204,7 +198,7 @@ class MxSet<int eew> { class FPR_Info<int sew> { RegisterClass fprclass = !cast<RegisterClass>("FPR" # sew); - string FX = "F" # sew; + string FX = "FPR" # sew; int SEW = sew; list<LMULInfo> MxList = MxSet<sew>.m; list<LMULInfo> MxListFW = !if(!eq(sew, 64), [], !listremove(MxList, [V_M8])); @@ -214,16 +208,20 @@ def SCALAR_F16 : FPR_Info<16>; def SCALAR_F32 : FPR_Info<32>; def SCALAR_F64 : FPR_Info<64>; +// BF16 uses the same register class as F16. +def SCALAR_BF16 : FPR_Info<16>; + defvar FPList = [SCALAR_F16, SCALAR_F32, SCALAR_F64]; // Used for widening instructions. It excludes F64. defvar FPListW = [SCALAR_F16, SCALAR_F32]; +// Used for widening bf16 instructions. +defvar BFPListW = [SCALAR_BF16]; + class NFSet<LMULInfo m> { - list<int> L = !cond(!eq(m.value, V_M8.value): [], - !eq(m.value, V_M4.value): [2], - !eq(m.value, V_M2.value): [2, 3, 4], - true: [2, 3, 4, 5, 6, 7, 8]); + defvar lmul = !shl(1, m.value); + list<int> L = NFList<lmul>.L; } class octuple_to_str<int octuple> { @@ -243,6 +241,8 @@ def VLOpFrag : PatFrag<(ops), (XLenVT (VLOp (XLenVT AVL:$vl)))>; // This must be kept in sync with RISCV::VLMaxSentinel. def VLMax : OutPatFrag<(ops), (XLenVT -1)>; +def SelectFPImm : ComplexPattern<fAny, 1, "selectFPImm", [], [], 1>; + // List of EEW. defvar EEWList = [8, 16, 32, 64]; @@ -272,9 +272,10 @@ class VTypeInfo<ValueType Vec, ValueType Mas, int Sew, VReg Reg, LMULInfo M, OutPatFrag AVL = VLMax; string ScalarSuffix = !cond(!eq(Scal, XLenVT) : "X", - !eq(Scal, f16) : "F16", - !eq(Scal, f32) : "F32", - !eq(Scal, f64) : "F64"); + !eq(Scal, f16) : "FPR16", + !eq(Scal, bf16) : "FPR16", + !eq(Scal, f32) : "FPR32", + !eq(Scal, f64) : "FPR64"); } class GroupVTypeInfo<ValueType Vec, ValueType VecM1, ValueType Mas, int Sew, @@ -356,6 +357,25 @@ defset list<VTypeInfo> AllVectors = { } } +defset list<VTypeInfo> AllBFloatVectors = { + defset list<VTypeInfo> NoGroupBFloatVectors = { + defset list<VTypeInfo> FractionalGroupBFloatVectors = { + def VBF16MF4: VTypeInfo<vbfloat16mf4_t, vbool64_t, 16, VR, V_MF4, bf16, FPR16>; + def VBF16MF2: VTypeInfo<vbfloat16mf2_t, vbool32_t, 16, VR, V_MF2, bf16, FPR16>; + } + def VBF16M1: VTypeInfo<vbfloat16m1_t, vbool16_t, 16, VR, V_M1, bf16, FPR16>; + } + + defset list<GroupVTypeInfo> GroupBFloatVectors = { + def VBF16M2: GroupVTypeInfo<vbfloat16m2_t, vbfloat16m1_t, vbool8_t, 16, + VRM2, V_M2, bf16, FPR16>; + def VBF16M4: GroupVTypeInfo<vbfloat16m4_t, vbfloat16m1_t, vbool4_t, 16, + VRM4, V_M4, bf16, FPR16>; + def VBF16M8: GroupVTypeInfo<vbfloat16m8_t, vbfloat16m1_t, vbool2_t, 16, + VRM8, V_M8, bf16, FPR16>; + } +} + // This functor is used to obtain the int vector type that has the same SEW and // multiplier as the input parameter type class GetIntVTypeInfo<VTypeInfo vti> { @@ -491,6 +511,14 @@ defset list<VTypeInfoToWide> AllWidenableIntToFloatVectors = { def : VTypeInfoToWide<VI32M4, VF64M8>; } +defset list<VTypeInfoToWide> AllWidenableBFloatToFloatVectors = { + def : VTypeInfoToWide<VBF16MF4, VF32MF2>; + def : VTypeInfoToWide<VBF16MF2, VF32M1>; + def : VTypeInfoToWide<VBF16M1, VF32M2>; + def : VTypeInfoToWide<VBF16M2, VF32M4>; + def : VTypeInfoToWide<VBF16M4, VF32M8>; +} + // This class holds the record of the RISCVVPseudoTable below. // This represents the information we need in codegen for each pseudo. // The definition should be consistent with `struct PseudoInfo` in @@ -500,11 +528,21 @@ class RISCVVPseudo { Instruction BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); // SEW = 0 is used to denote that the Pseudo is not SEW specific (or unknown). bits<8> SEW = 0; + bit NeedBeInPseudoTable = 1; + // TargetOverlapConstraintType indicates that these instructions can + // overlap between source operands and destination operands. + // 1 -> default value, remain current constraint + // 2 -> narrow case + // 3 -> widen case + // TODO: Add TargetOverlapConstraintType into PseudosTable for further + // query. + bits<2> TargetOverlapConstraintType = 1; } // The actual table. def RISCVVPseudosTable : GenericTable { let FilterClass = "RISCVVPseudo"; + let FilterClassField = "NeedBeInPseudoTable"; let CppTypeName = "PseudoInfo"; let Fields = [ "Pseudo", "BaseInstr" ]; let PrimaryKey = [ "Pseudo" ]; @@ -534,16 +572,17 @@ def RISCVVIntrinsicsTable : GenericTable { // unmasked variant. For all but compares, both the masked and // unmasked variant have a passthru and policy operand. For compares, // neither has a policy op, and only the masked version has a passthru. -class RISCVMaskedPseudo<bits<4> MaskIdx> { +class RISCVMaskedPseudo<bits<4> MaskIdx, bit MaskAffectsRes=false> { Pseudo MaskedPseudo = !cast<Pseudo>(NAME); Pseudo UnmaskedPseudo = !cast<Pseudo>(!subst("_MASK", "", NAME)); bits<4> MaskOpIdx = MaskIdx; + bit MaskAffectsResult = MaskAffectsRes; } def RISCVMaskedPseudosTable : GenericTable { let FilterClass = "RISCVMaskedPseudo"; let CppTypeName = "RISCVMaskedPseudoInfo"; - let Fields = ["MaskedPseudo", "UnmaskedPseudo", "MaskOpIdx"]; + let Fields = ["MaskedPseudo", "UnmaskedPseudo", "MaskOpIdx", "MaskAffectsResult"]; let PrimaryKey = ["MaskedPseudo"]; let PrimaryKeyName = "getMaskedPseudoInfo"; } @@ -723,16 +762,18 @@ class VPseudo<Instruction instr, LMULInfo m, dag outs, dag ins, int sew = 0> : class GetVTypePredicates<VTypeInfo vti> { list<Predicate> Predicates = !cond(!eq(vti.Scalar, f16) : [HasVInstructionsF16], + !eq(vti.Scalar, bf16) : [HasVInstructionsBF16], !eq(vti.Scalar, f32) : [HasVInstructionsAnyF], !eq(vti.Scalar, f64) : [HasVInstructionsF64], !eq(vti.SEW, 64) : [HasVInstructionsI64], true : [HasVInstructions]); } -class VPseudoUSLoadNoMask<VReg RetClass, int EEW> : +class VPseudoUSLoadNoMask<VReg RetClass, + int EEW> : Pseudo<(outs RetClass:$rd), (ins RetClass:$dest, GPRMem:$rs1, AVL:$vl, ixlenimm:$sew, - ixlenimm:$policy),[]>, + ixlenimm:$policy), []>, RISCVVPseudo, RISCVVLE</*Masked*/0, /*Strided*/0, /*FF*/0, !logtwo(EEW), VLMul> { let mayLoad = 1; @@ -744,11 +785,12 @@ class VPseudoUSLoadNoMask<VReg RetClass, int EEW> : let Constraints = "$rd = $dest"; } -class VPseudoUSLoadMask<VReg RetClass, int EEW> : +class VPseudoUSLoadMask<VReg RetClass, + int EEW> : Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), - (ins GetVRegNoV0<RetClass>.R:$merge, - GPRMem:$rs1, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>, + (ins GetVRegNoV0<RetClass>.R:$merge, + GPRMem:$rs1, + VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, RISCVVPseudo, RISCVVLE</*Masked*/1, /*Strided*/0, /*FF*/0, !logtwo(EEW), VLMul> { let mayLoad = 1; @@ -761,10 +803,11 @@ class VPseudoUSLoadMask<VReg RetClass, int EEW> : let UsesMaskPolicy = 1; } -class VPseudoUSLoadFFNoMask<VReg RetClass, int EEW> : +class VPseudoUSLoadFFNoMask<VReg RetClass, + int EEW> : Pseudo<(outs RetClass:$rd, GPR:$vl), (ins RetClass:$dest, GPRMem:$rs1, AVL:$avl, - ixlenimm:$sew, ixlenimm:$policy),[]>, + ixlenimm:$sew, ixlenimm:$policy), []>, RISCVVPseudo, RISCVVLE</*Masked*/0, /*Strided*/0, /*FF*/1, !logtwo(EEW), VLMul> { let mayLoad = 1; @@ -776,11 +819,12 @@ class VPseudoUSLoadFFNoMask<VReg RetClass, int EEW> : let Constraints = "$rd = $dest"; } -class VPseudoUSLoadFFMask<VReg RetClass, int EEW> : +class VPseudoUSLoadFFMask<VReg RetClass, + int EEW> : Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd, GPR:$vl), - (ins GetVRegNoV0<RetClass>.R:$merge, - GPRMem:$rs1, - VMaskOp:$vm, AVL:$avl, ixlenimm:$sew, ixlenimm:$policy),[]>, + (ins GetVRegNoV0<RetClass>.R:$merge, + GPRMem:$rs1, + VMaskOp:$vm, AVL:$avl, ixlenimm:$sew, ixlenimm:$policy), []>, RISCVVPseudo, RISCVVLE</*Masked*/1, /*Strided*/0, /*FF*/1, !logtwo(EEW), VLMul> { let mayLoad = 1; @@ -793,10 +837,11 @@ class VPseudoUSLoadFFMask<VReg RetClass, int EEW> : let UsesMaskPolicy = 1; } -class VPseudoSLoadNoMask<VReg RetClass, int EEW>: +class VPseudoSLoadNoMask<VReg RetClass, + int EEW> : Pseudo<(outs RetClass:$rd), (ins RetClass:$dest, GPRMem:$rs1, GPR:$rs2, AVL:$vl, - ixlenimm:$sew, ixlenimm:$policy),[]>, + ixlenimm:$sew, ixlenimm:$policy), []>, RISCVVPseudo, RISCVVLE</*Masked*/0, /*Strided*/1, /*FF*/0, !logtwo(EEW), VLMul> { let mayLoad = 1; @@ -808,11 +853,12 @@ class VPseudoSLoadNoMask<VReg RetClass, int EEW>: let Constraints = "$rd = $dest"; } -class VPseudoSLoadMask<VReg RetClass, int EEW>: +class VPseudoSLoadMask<VReg RetClass, + int EEW> : Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), - (ins GetVRegNoV0<RetClass>.R:$merge, - GPRMem:$rs1, GPR:$rs2, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>, + (ins GetVRegNoV0<RetClass>.R:$merge, + GPRMem:$rs1, GPR:$rs2, + VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, RISCVVPseudo, RISCVVLE</*Masked*/1, /*Strided*/1, /*FF*/0, !logtwo(EEW), VLMul> { let mayLoad = 1; @@ -825,11 +871,16 @@ class VPseudoSLoadMask<VReg RetClass, int EEW>: let UsesMaskPolicy = 1; } -class VPseudoILoadNoMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL, - bit Ordered, bit EarlyClobber>: +class VPseudoILoadNoMask<VReg RetClass, + VReg IdxClass, + int EEW, + bits<3> LMUL, + bit Ordered, + bit EarlyClobber, + int TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), (ins RetClass:$dest, GPRMem:$rs1, IdxClass:$rs2, AVL:$vl, - ixlenimm:$sew, ixlenimm:$policy),[]>, + ixlenimm:$sew, ixlenimm:$policy), []>, RISCVVPseudo, RISCVVLX</*Masked*/0, Ordered, !logtwo(EEW), VLMul, LMUL> { let mayLoad = 1; @@ -839,29 +890,37 @@ class VPseudoILoadNoMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL, let HasSEWOp = 1; let HasVecPolicyOp = 1; let Constraints = !if(!eq(EarlyClobber, 1), "@earlyclobber $rd, $rd = $dest", "$rd = $dest"); + let TargetOverlapConstraintType = TargetConstraintType; } -class VPseudoILoadMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL, - bit Ordered, bit EarlyClobber>: +class VPseudoILoadMask<VReg RetClass, + VReg IdxClass, + int EEW, + bits<3> LMUL, + bit Ordered, + bit EarlyClobber, + int TargetConstraintType = 1> : Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), - (ins GetVRegNoV0<RetClass>.R:$merge, - GPRMem:$rs1, IdxClass:$rs2, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>, + (ins GetVRegNoV0<RetClass>.R:$merge, + GPRMem:$rs1, IdxClass:$rs2, + VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, RISCVVPseudo, RISCVVLX</*Masked*/1, Ordered, !logtwo(EEW), VLMul, LMUL> { let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; let Constraints = !if(!eq(EarlyClobber, 1), "@earlyclobber $rd, $rd = $merge", "$rd = $merge"); + let TargetOverlapConstraintType = TargetConstraintType; let HasVLOp = 1; let HasSEWOp = 1; let HasVecPolicyOp = 1; let UsesMaskPolicy = 1; } -class VPseudoUSStoreNoMask<VReg StClass, int EEW>: +class VPseudoUSStoreNoMask<VReg StClass, + int EEW> : Pseudo<(outs), - (ins StClass:$rd, GPRMem:$rs1, AVL:$vl, ixlenimm:$sew),[]>, + (ins StClass:$rd, GPRMem:$rs1, AVL:$vl, ixlenimm:$sew), []>, RISCVVPseudo, RISCVVSE</*Masked*/0, /*Strided*/0, !logtwo(EEW), VLMul> { let mayLoad = 0; @@ -871,9 +930,11 @@ class VPseudoUSStoreNoMask<VReg StClass, int EEW>: let HasSEWOp = 1; } -class VPseudoUSStoreMask<VReg StClass, int EEW>: +class VPseudoUSStoreMask<VReg StClass, + int EEW> : Pseudo<(outs), - (ins StClass:$rd, GPRMem:$rs1, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>, + (ins StClass:$rd, GPRMem:$rs1, + VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>, RISCVVPseudo, RISCVVSE</*Masked*/1, /*Strided*/0, !logtwo(EEW), VLMul> { let mayLoad = 0; @@ -883,9 +944,11 @@ class VPseudoUSStoreMask<VReg StClass, int EEW>: let HasSEWOp = 1; } -class VPseudoSStoreNoMask<VReg StClass, int EEW>: +class VPseudoSStoreNoMask<VReg StClass, + int EEW> : Pseudo<(outs), - (ins StClass:$rd, GPRMem:$rs1, GPR:$rs2, AVL:$vl, ixlenimm:$sew),[]>, + (ins StClass:$rd, GPRMem:$rs1, GPR:$rs2, + AVL:$vl, ixlenimm:$sew), []>, RISCVVPseudo, RISCVVSE</*Masked*/0, /*Strided*/1, !logtwo(EEW), VLMul> { let mayLoad = 0; @@ -895,9 +958,11 @@ class VPseudoSStoreNoMask<VReg StClass, int EEW>: let HasSEWOp = 1; } -class VPseudoSStoreMask<VReg StClass, int EEW>: +class VPseudoSStoreMask<VReg StClass, + int EEW> : Pseudo<(outs), - (ins StClass:$rd, GPRMem:$rs1, GPR:$rs2, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>, + (ins StClass:$rd, GPRMem:$rs1, GPR:$rs2, + VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>, RISCVVPseudo, RISCVVSE</*Masked*/1, /*Strided*/1, !logtwo(EEW), VLMul> { let mayLoad = 0; @@ -907,10 +972,11 @@ class VPseudoSStoreMask<VReg StClass, int EEW>: let HasSEWOp = 1; } -class VPseudoNullaryNoMask<VReg RegClass>: +class VPseudoNullaryNoMask<VReg RegClass> : Pseudo<(outs RegClass:$rd), - (ins RegClass:$merge, AVL:$vl, ixlenimm:$sew, - ixlenimm:$policy), []>, RISCVVPseudo { + (ins RegClass:$merge, + AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + RISCVVPseudo { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -920,10 +986,11 @@ class VPseudoNullaryNoMask<VReg RegClass>: let HasVecPolicyOp = 1; } -class VPseudoNullaryMask<VReg RegClass>: +class VPseudoNullaryMask<VReg RegClass> : Pseudo<(outs GetVRegNoV0<RegClass>.R:$rd), - (ins GetVRegNoV0<RegClass>.R:$merge, VMaskOp:$vm, AVL:$vl, - ixlenimm:$sew, ixlenimm:$policy), []>, RISCVVPseudo { + (ins GetVRegNoV0<RegClass>.R:$merge, + VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + RISCVVPseudo { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -936,9 +1003,9 @@ class VPseudoNullaryMask<VReg RegClass>: // Nullary for pseudo instructions. They are expanded in // RISCVExpandPseudoInsts pass. -class VPseudoNullaryPseudoM<string BaseInst> - : Pseudo<(outs VR:$rd), (ins AVL:$vl, ixlenimm:$sew), []>, - RISCVVPseudo { +class VPseudoNullaryPseudoM<string BaseInst> : + Pseudo<(outs VR:$rd), (ins AVL:$vl, ixlenimm:$sew), []>, + RISCVVPseudo { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -947,33 +1014,41 @@ class VPseudoNullaryPseudoM<string BaseInst> // BaseInstr is not used in RISCVExpandPseudoInsts pass. // Just fill a corresponding real v-inst to pass tablegen check. let BaseInstr = !cast<Instruction>(BaseInst); + // We exclude them from RISCVVPseudoTable. + let NeedBeInPseudoTable = 0; } -class VPseudoUnaryNoMask<DAGOperand RetClass, DAGOperand OpClass, - string Constraint = ""> : +class VPseudoUnaryNoMask<DAGOperand RetClass, + DAGOperand OpClass, + string Constraint = "", + int TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), - (ins RetClass:$merge, OpClass:$rs2, AVL:$vl, ixlenimm:$sew, - ixlenimm:$policy), []>, - RISCVVPseudo { + (ins RetClass:$merge, OpClass:$rs2, + AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + RISCVVPseudo { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; let Constraints = !interleave([Constraint, "$rd = $merge"], ","); + let TargetOverlapConstraintType = TargetConstraintType; let HasVLOp = 1; let HasSEWOp = 1; let HasVecPolicyOp = 1; } -class VPseudoUnaryNoMaskRoundingMode<DAGOperand RetClass, DAGOperand OpClass, - string Constraint = ""> : +class VPseudoUnaryNoMaskRoundingMode<DAGOperand RetClass, + DAGOperand OpClass, + string Constraint = "", + int TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), - (ins RetClass:$merge, OpClass:$rs2, ixlenimm:$rm, AVL:$vl, ixlenimm:$sew, - ixlenimm:$policy), []>, - RISCVVPseudo { + (ins RetClass:$merge, OpClass:$rs2, ixlenimm:$rm, + AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + RISCVVPseudo { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; let Constraints = !interleave([Constraint, "$rd = $merge"], ","); + let TargetOverlapConstraintType = TargetConstraintType; let HasVLOp = 1; let HasSEWOp = 1; let HasVecPolicyOp = 1; @@ -981,27 +1056,33 @@ class VPseudoUnaryNoMaskRoundingMode<DAGOperand RetClass, DAGOperand OpClass, let UsesVXRM = 0; } -class VPseudoUnaryMask<VReg RetClass, VReg OpClass, string Constraint = ""> : - Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), - (ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, - RISCVVPseudo { +class VPseudoUnaryMask<VReg RetClass, + VReg OpClass, + string Constraint = "", + int TargetConstraintType = 1> : + Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), + (ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2, + VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + RISCVVPseudo { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; let Constraints = !interleave([Constraint, "$rd = $merge"], ","); + let TargetOverlapConstraintType = TargetConstraintType; let HasVLOp = 1; let HasSEWOp = 1; let HasVecPolicyOp = 1; let UsesMaskPolicy = 1; } -class VPseudoUnaryMaskRoundingMode<VReg RetClass, VReg OpClass, string Constraint = ""> : - Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), - (ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2, - VMaskOp:$vm, ixlenimm:$rm, - AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, - RISCVVPseudo { +class VPseudoUnaryMaskRoundingMode<VReg RetClass, + VReg OpClass, + string Constraint = ""> : + Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), + (ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2, + VMaskOp:$vm, ixlenimm:$rm, + AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + RISCVVPseudo { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -1014,10 +1095,12 @@ class VPseudoUnaryMaskRoundingMode<VReg RetClass, VReg OpClass, string Constrain let UsesVXRM = 0; } -class VPseudoUnaryMask_NoExcept<VReg RetClass, VReg OpClass, string Constraint = ""> : - Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), - (ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2, VMaskOp:$vm, - AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []> { +class VPseudoUnaryMask_NoExcept<VReg RetClass, + VReg OpClass, + string Constraint = ""> : + Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), + (ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2, + VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []> { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -1029,10 +1112,13 @@ class VPseudoUnaryMask_NoExcept<VReg RetClass, VReg OpClass, string Constraint = let usesCustomInserter = 1; } -class VPseudoUnaryNoMask_FRM<VReg RetClass, VReg OpClass, string Constraint = ""> : - Pseudo<(outs RetClass:$rd), - (ins RetClass:$merge, OpClass:$rs2, ixlenimm:$frm, AVL:$vl, - ixlenimm:$sew, ixlenimm:$policy), []> { +class VPseudoUnaryNoMask_FRM<VReg RetClass, + VReg OpClass, + string Constraint = ""> : + Pseudo<(outs RetClass:$rd), + (ins RetClass:$merge, OpClass:$rs2, ixlenimm:$frm, + AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + RISCVVPseudo { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -1040,13 +1126,17 @@ class VPseudoUnaryNoMask_FRM<VReg RetClass, VReg OpClass, string Constraint = "" let HasVLOp = 1; let HasSEWOp = 1; let HasVecPolicyOp = 1; - let usesCustomInserter = 1; + let HasRoundModeOp = 1; } -class VPseudoUnaryMask_FRM<VReg RetClass, VReg OpClass, string Constraint = ""> : - Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), - (ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2, - VMaskOp:$vm, ixlenimm:$frm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []> { +class VPseudoUnaryMask_FRM<VReg RetClass, + VReg OpClass, + string Constraint = ""> : + Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), + (ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2, + VMaskOp:$vm, ixlenimm:$frm, + AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + RISCVVPseudo { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -1055,13 +1145,13 @@ class VPseudoUnaryMask_FRM<VReg RetClass, VReg OpClass, string Constraint = ""> let HasSEWOp = 1; let HasVecPolicyOp = 1; let UsesMaskPolicy = 1; - let usesCustomInserter = 1; + let HasRoundModeOp = 1; } class VPseudoUnaryNoMaskGPROut : - Pseudo<(outs GPR:$rd), - (ins VR:$rs2, AVL:$vl, ixlenimm:$sew), []>, - RISCVVPseudo { + Pseudo<(outs GPR:$rd), + (ins VR:$rs2, AVL:$vl, ixlenimm:$sew), []>, + RISCVVPseudo { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -1069,10 +1159,10 @@ class VPseudoUnaryNoMaskGPROut : let HasSEWOp = 1; } -class VPseudoUnaryMaskGPROut: - Pseudo<(outs GPR:$rd), - (ins VR:$rs1, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>, - RISCVVPseudo { +class VPseudoUnaryMaskGPROut : + Pseudo<(outs GPR:$rd), + (ins VR:$rs1, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>, + RISCVVPseudo { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -1084,10 +1174,8 @@ class VPseudoUnaryMaskGPROut: class VPseudoUnaryAnyMask<VReg RetClass, VReg Op1Class> : Pseudo<(outs RetClass:$rd), - (ins RetClass:$merge, - Op1Class:$rs2, - VR:$vm, AVL:$vl, ixlenimm:$sew), - []>, + (ins RetClass:$merge, Op1Class:$rs2, + VR:$vm, AVL:$vl, ixlenimm:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1100,14 +1188,16 @@ class VPseudoUnaryAnyMask<VReg RetClass, class VPseudoBinaryNoMask<VReg RetClass, VReg Op1Class, DAGOperand Op2Class, - string Constraint> : - Pseudo<(outs RetClass:$rd), - (ins Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>, - RISCVVPseudo { + string Constraint, + int TargetConstraintType = 1> : + Pseudo<(outs RetClass:$rd), + (ins Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>, + RISCVVPseudo { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; let Constraints = Constraint; + let TargetOverlapConstraintType = TargetConstraintType; let HasVLOp = 1; let HasSEWOp = 1; } @@ -1115,15 +1205,17 @@ class VPseudoBinaryNoMask<VReg RetClass, class VPseudoBinaryNoMaskTU<VReg RetClass, VReg Op1Class, DAGOperand Op2Class, - string Constraint> : - Pseudo<(outs RetClass:$rd), - (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, - ixlenimm:$sew, ixlenimm:$policy), []>, - RISCVVPseudo { + string Constraint, + int TargetConstraintType = 1> : + Pseudo<(outs RetClass:$rd), + (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, + ixlenimm:$sew, ixlenimm:$policy), []>, + RISCVVPseudo { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; let Constraints = !interleave([Constraint, "$rd = $merge"], ","); + let TargetOverlapConstraintType = TargetConstraintType; let HasVLOp = 1; let HasSEWOp = 1; let HasVecPolicyOp = 1; @@ -1133,14 +1225,16 @@ class VPseudoBinaryNoMaskRoundingMode<VReg RetClass, VReg Op1Class, DAGOperand Op2Class, string Constraint, - int UsesVXRM_ = 1> : - Pseudo<(outs RetClass:$rd), - (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, ixlenimm:$rm, - AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, - RISCVVPseudo { + int UsesVXRM_ = 1, + int TargetConstraintType = 1> : + Pseudo<(outs RetClass:$rd), + (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, ixlenimm:$rm, + AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + RISCVVPseudo { let mayLoad = 0; let mayStore = 0; let Constraints = !interleave([Constraint, "$rd = $merge"], ","); + let TargetOverlapConstraintType = TargetConstraintType; let HasVLOp = 1; let HasSEWOp = 1; let HasVecPolicyOp = 1; @@ -1152,16 +1246,18 @@ class VPseudoBinaryMaskPolicyRoundingMode<VReg RetClass, RegisterClass Op1Class, DAGOperand Op2Class, string Constraint, - int UsesVXRM_> : - Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), - (ins GetVRegNoV0<RetClass>.R:$merge, - Op1Class:$rs2, Op2Class:$rs1, - VMaskOp:$vm, ixlenimm:$rm, AVL:$vl, - ixlenimm:$sew, ixlenimm:$policy), []>, - RISCVVPseudo { + int UsesVXRM_, + int TargetConstraintType = 1> : + Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), + (ins GetVRegNoV0<RetClass>.R:$merge, + Op1Class:$rs2, Op2Class:$rs1, + VMaskOp:$vm, ixlenimm:$rm, AVL:$vl, + ixlenimm:$sew, ixlenimm:$policy), []>, + RISCVVPseudo { let mayLoad = 0; let mayStore = 0; let Constraints = !interleave([Constraint, "$rd = $merge"], ","); + let TargetOverlapConstraintType = TargetConstraintType; let HasVLOp = 1; let HasSEWOp = 1; let HasVecPolicyOp = 1; @@ -1175,15 +1271,17 @@ class VPseudoBinaryMaskPolicyRoundingMode<VReg RetClass, // This allows maskedoff and rs2 to be the same register. class VPseudoTiedBinaryNoMask<VReg RetClass, DAGOperand Op2Class, - string Constraint> : - Pseudo<(outs RetClass:$rd), - (ins RetClass:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew, - ixlenimm:$policy), []>, - RISCVVPseudo { + string Constraint, + int TargetConstraintType = 1> : + Pseudo<(outs RetClass:$rd), + (ins RetClass:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew, + ixlenimm:$policy), []>, + RISCVVPseudo { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; let Constraints = !interleave([Constraint, "$rd = $rs2"], ","); + let TargetOverlapConstraintType = TargetConstraintType; let HasVLOp = 1; let HasSEWOp = 1; let HasVecPolicyOp = 1; @@ -1193,17 +1291,19 @@ class VPseudoTiedBinaryNoMask<VReg RetClass, class VPseudoTiedBinaryNoMaskRoundingMode<VReg RetClass, DAGOperand Op2Class, - string Constraint> : - Pseudo<(outs RetClass:$rd), - (ins RetClass:$rs2, Op2Class:$rs1, - ixlenimm:$rm, - AVL:$vl, ixlenimm:$sew, - ixlenimm:$policy), []>, - RISCVVPseudo { + string Constraint, + int TargetConstraintType = 1> : + Pseudo<(outs RetClass:$rd), + (ins RetClass:$rs2, Op2Class:$rs1, + ixlenimm:$rm, + AVL:$vl, ixlenimm:$sew, + ixlenimm:$policy), []>, + RISCVVPseudo { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; let Constraints = !interleave([Constraint, "$rd = $rs2"], ","); + let TargetOverlapConstraintType = TargetConstraintType; let HasVLOp = 1; let HasSEWOp = 1; let HasVecPolicyOp = 1; @@ -1216,7 +1316,8 @@ class VPseudoTiedBinaryNoMaskRoundingMode<VReg RetClass, class VPseudoIStoreNoMask<VReg StClass, VReg IdxClass, int EEW, bits<3> LMUL, bit Ordered>: Pseudo<(outs), - (ins StClass:$rd, GPRMem:$rs1, IdxClass:$rs2, AVL:$vl, ixlenimm:$sew),[]>, + (ins StClass:$rd, GPRMem:$rs1, IdxClass:$rs2, AVL:$vl, + ixlenimm:$sew),[]>, RISCVVPseudo, RISCVVSX</*Masked*/0, Ordered, !logtwo(EEW), VLMul, LMUL> { let mayLoad = 0; @@ -1229,7 +1330,8 @@ class VPseudoIStoreNoMask<VReg StClass, VReg IdxClass, int EEW, bits<3> LMUL, class VPseudoIStoreMask<VReg StClass, VReg IdxClass, int EEW, bits<3> LMUL, bit Ordered>: Pseudo<(outs), - (ins StClass:$rd, GPRMem:$rs1, IdxClass:$rs2, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>, + (ins StClass:$rd, GPRMem:$rs1, IdxClass:$rs2, + VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>, RISCVVPseudo, RISCVVSX</*Masked*/1, Ordered, !logtwo(EEW), VLMul, LMUL> { let mayLoad = 0; @@ -1243,11 +1345,11 @@ class VPseudoBinaryMask<VReg RetClass, RegisterClass Op1Class, DAGOperand Op2Class, string Constraint> : - Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), - (ins GetVRegNoV0<RetClass>.R:$merge, - Op1Class:$rs2, Op2Class:$rs1, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>, - RISCVVPseudo { + Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), + (ins GetVRegNoV0<RetClass>.R:$merge, + Op1Class:$rs2, Op2Class:$rs1, + VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>, + RISCVVPseudo { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -1259,16 +1361,18 @@ class VPseudoBinaryMask<VReg RetClass, class VPseudoBinaryMaskPolicy<VReg RetClass, RegisterClass Op1Class, DAGOperand Op2Class, - string Constraint> : - Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), - (ins GetVRegNoV0<RetClass>.R:$merge, - Op1Class:$rs2, Op2Class:$rs1, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, - RISCVVPseudo { + string Constraint, + int TargetConstraintType = 1> : + Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), + (ins GetVRegNoV0<RetClass>.R:$merge, + Op1Class:$rs2, Op2Class:$rs1, + VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + RISCVVPseudo { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; let Constraints = !interleave([Constraint, "$rd = $merge"], ","); + let TargetOverlapConstraintType = TargetConstraintType; let HasVLOp = 1; let HasSEWOp = 1; let HasVecPolicyOp = 1; @@ -1279,11 +1383,11 @@ class VPseudoTernaryMaskPolicy<VReg RetClass, RegisterClass Op1Class, DAGOperand Op2Class, string Constraint> : - Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), - (ins GetVRegNoV0<RetClass>.R:$merge, - Op1Class:$rs2, Op2Class:$rs1, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, - RISCVVPseudo { + Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), + (ins GetVRegNoV0<RetClass>.R:$merge, + Op1Class:$rs2, Op2Class:$rs1, + VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + RISCVVPseudo { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -1297,13 +1401,13 @@ class VPseudoTernaryMaskPolicyRoundingMode<VReg RetClass, RegisterClass Op1Class, DAGOperand Op2Class, string Constraint> : - Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), - (ins GetVRegNoV0<RetClass>.R:$merge, - Op1Class:$rs2, Op2Class:$rs1, - VMaskOp:$vm, - ixlenimm:$rm, - AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, - RISCVVPseudo { + Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), + (ins GetVRegNoV0<RetClass>.R:$merge, + Op1Class:$rs2, Op2Class:$rs1, + VMaskOp:$vm, + ixlenimm:$rm, + AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + RISCVVPseudo { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -1319,14 +1423,16 @@ class VPseudoTernaryMaskPolicyRoundingMode<VReg RetClass, class VPseudoBinaryMOutNoMask<VReg RetClass, VReg Op1Class, DAGOperand Op2Class, - string Constraint> : - Pseudo<(outs RetClass:$rd), - (ins Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>, - RISCVVPseudo { + string Constraint, + int TargetConstraintType = 1> : + Pseudo<(outs RetClass:$rd), + (ins Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>, + RISCVVPseudo { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; let Constraints = Constraint; + let TargetOverlapConstraintType = TargetConstraintType; let HasVLOp = 1; let HasSEWOp = 1; } @@ -1335,16 +1441,18 @@ class VPseudoBinaryMOutNoMask<VReg RetClass, class VPseudoBinaryMOutMask<VReg RetClass, RegisterClass Op1Class, DAGOperand Op2Class, - string Constraint> : - Pseudo<(outs RetClass:$rd), - (ins RetClass:$merge, - Op1Class:$rs2, Op2Class:$rs1, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>, - RISCVVPseudo { + string Constraint, + int TargetConstraintType = 1> : + Pseudo<(outs RetClass:$rd), + (ins RetClass:$merge, + Op1Class:$rs2, Op2Class:$rs1, + VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>, + RISCVVPseudo { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; let Constraints = !interleave([Constraint, "$rd = $merge"], ","); + let TargetOverlapConstraintType = TargetConstraintType; let HasVLOp = 1; let HasSEWOp = 1; let UsesMaskPolicy = 1; @@ -1355,16 +1463,18 @@ class VPseudoBinaryMOutMask<VReg RetClass, // This allows maskedoff and rs2 to be the same register. class VPseudoTiedBinaryMask<VReg RetClass, DAGOperand Op2Class, - string Constraint> : - Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), - (ins GetVRegNoV0<RetClass>.R:$merge, - Op2Class:$rs1, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, - RISCVVPseudo { + string Constraint, + int TargetConstraintType = 1> : + Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), + (ins GetVRegNoV0<RetClass>.R:$merge, + Op2Class:$rs1, + VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + RISCVVPseudo { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; let Constraints = !interleave([Constraint, "$rd = $merge"], ","); + let TargetOverlapConstraintType = TargetConstraintType; let HasVLOp = 1; let HasSEWOp = 1; let HasVecPolicyOp = 1; @@ -1374,18 +1484,20 @@ class VPseudoTiedBinaryMask<VReg RetClass, class VPseudoTiedBinaryMaskRoundingMode<VReg RetClass, DAGOperand Op2Class, - string Constraint> : - Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), - (ins GetVRegNoV0<RetClass>.R:$merge, - Op2Class:$rs1, - VMaskOp:$vm, - ixlenimm:$rm, - AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, - RISCVVPseudo { + string Constraint, + int TargetConstraintType = 1> : + Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), + (ins GetVRegNoV0<RetClass>.R:$merge, + Op2Class:$rs1, + VMaskOp:$vm, + ixlenimm:$rm, + AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + RISCVVPseudo { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; let Constraints = !interleave([Constraint, "$rd = $merge"], ","); + let TargetOverlapConstraintType = TargetConstraintType; let HasVLOp = 1; let HasSEWOp = 1; let HasVecPolicyOp = 1; @@ -1400,17 +1512,20 @@ class VPseudoBinaryCarryIn<VReg RetClass, DAGOperand Op2Class, LMULInfo MInfo, bit CarryIn, - string Constraint> : - Pseudo<(outs RetClass:$rd), - !if(CarryIn, - (ins Op1Class:$rs2, Op2Class:$rs1, VMV0:$carry, AVL:$vl, - ixlenimm:$sew), - (ins Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew)), []>, - RISCVVPseudo { + string Constraint, + int TargetConstraintType = 1> : + Pseudo<(outs RetClass:$rd), + !if(CarryIn, + (ins Op1Class:$rs2, Op2Class:$rs1, + VMV0:$carry, AVL:$vl, ixlenimm:$sew), + (ins Op1Class:$rs2, Op2Class:$rs1, + AVL:$vl, ixlenimm:$sew)), []>, + RISCVVPseudo { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; let Constraints = Constraint; + let TargetOverlapConstraintType = TargetConstraintType; let HasVLOp = 1; let HasSEWOp = 1; let VLMul = MInfo.value; @@ -1422,12 +1537,13 @@ class VPseudoTiedBinaryCarryIn<VReg RetClass, LMULInfo MInfo, bit CarryIn, string Constraint> : - Pseudo<(outs RetClass:$rd), - !if(CarryIn, - (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, VMV0:$carry, AVL:$vl, - ixlenimm:$sew), - (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew)), []>, - RISCVVPseudo { + Pseudo<(outs RetClass:$rd), + !if(CarryIn, + (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, + VMV0:$carry, AVL:$vl, ixlenimm:$sew), + (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, + AVL:$vl, ixlenimm:$sew)), []>, + RISCVVPseudo { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -1442,11 +1558,10 @@ class VPseudoTernaryNoMask<VReg RetClass, RegisterClass Op1Class, DAGOperand Op2Class, string Constraint> : - Pseudo<(outs RetClass:$rd), - (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2, - AVL:$vl, ixlenimm:$sew), - []>, - RISCVVPseudo { + Pseudo<(outs RetClass:$rd), + (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2, + AVL:$vl, ixlenimm:$sew), []>, + RISCVVPseudo { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -1458,16 +1573,17 @@ class VPseudoTernaryNoMask<VReg RetClass, class VPseudoTernaryNoMaskWithPolicy<VReg RetClass, RegisterClass Op1Class, DAGOperand Op2Class, - string Constraint> : - Pseudo<(outs RetClass:$rd), - (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2, - AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), - []>, - RISCVVPseudo { + string Constraint, + int TargetConstraintType = 1> : + Pseudo<(outs RetClass:$rd), + (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2, + AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + RISCVVPseudo { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; let Constraints = !interleave([Constraint, "$rd = $rs3"], ","); + let TargetOverlapConstraintType = TargetConstraintType; let HasVecPolicyOp = 1; let HasVLOp = 1; let HasSEWOp = 1; @@ -1476,16 +1592,17 @@ class VPseudoTernaryNoMaskWithPolicy<VReg RetClass, class VPseudoTernaryNoMaskWithPolicyRoundingMode<VReg RetClass, RegisterClass Op1Class, DAGOperand Op2Class, - string Constraint> : - Pseudo<(outs RetClass:$rd), - (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2, - ixlenimm:$rm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), - []>, - RISCVVPseudo { + string Constraint, + int TargetConstraintType = 1> : + Pseudo<(outs RetClass:$rd), + (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2, + ixlenimm:$rm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + RISCVVPseudo { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; let Constraints = !interleave([Constraint, "$rd = $rs3"], ","); + let TargetOverlapConstraintType = TargetConstraintType; let HasVecPolicyOp = 1; let HasVLOp = 1; let HasSEWOp = 1; @@ -1493,10 +1610,12 @@ class VPseudoTernaryNoMaskWithPolicyRoundingMode<VReg RetClass, let UsesVXRM = 0; } -class VPseudoUSSegLoadNoMask<VReg RetClass, int EEW, bits<4> NF>: +class VPseudoUSSegLoadNoMask<VReg RetClass, + int EEW, + bits<4> NF> : Pseudo<(outs RetClass:$rd), (ins RetClass:$dest, GPRMem:$rs1, AVL:$vl, - ixlenimm:$sew, ixlenimm:$policy),[]>, + ixlenimm:$sew, ixlenimm:$policy), []>, RISCVVPseudo, RISCVVLSEG<NF, /*Masked*/0, /*Strided*/0, /*FF*/0, !logtwo(EEW), VLMul> { let mayLoad = 1; @@ -1508,10 +1627,12 @@ class VPseudoUSSegLoadNoMask<VReg RetClass, int EEW, bits<4> NF>: let Constraints = "$rd = $dest"; } -class VPseudoUSSegLoadMask<VReg RetClass, int EEW, bits<4> NF>: +class VPseudoUSSegLoadMask<VReg RetClass, + int EEW, + bits<4> NF> : Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), (ins GetVRegNoV0<RetClass>.R:$merge, GPRMem:$rs1, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>, + VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, RISCVVPseudo, RISCVVLSEG<NF, /*Masked*/1, /*Strided*/0, /*FF*/0, !logtwo(EEW), VLMul> { let mayLoad = 1; @@ -1524,10 +1645,12 @@ class VPseudoUSSegLoadMask<VReg RetClass, int EEW, bits<4> NF>: let UsesMaskPolicy = 1; } -class VPseudoUSSegLoadFFNoMask<VReg RetClass, int EEW, bits<4> NF>: +class VPseudoUSSegLoadFFNoMask<VReg RetClass, + int EEW, + bits<4> NF> : Pseudo<(outs RetClass:$rd, GPR:$vl), (ins RetClass:$dest, GPRMem:$rs1, AVL:$avl, - ixlenimm:$sew, ixlenimm:$policy),[]>, + ixlenimm:$sew, ixlenimm:$policy), []>, RISCVVPseudo, RISCVVLSEG<NF, /*Masked*/0, /*Strided*/0, /*FF*/1, !logtwo(EEW), VLMul> { let mayLoad = 1; @@ -1539,10 +1662,12 @@ class VPseudoUSSegLoadFFNoMask<VReg RetClass, int EEW, bits<4> NF>: let Constraints = "$rd = $dest"; } -class VPseudoUSSegLoadFFMask<VReg RetClass, int EEW, bits<4> NF>: +class VPseudoUSSegLoadFFMask<VReg RetClass, + int EEW, + bits<4> NF> : Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd, GPR:$vl), (ins GetVRegNoV0<RetClass>.R:$merge, GPRMem:$rs1, - VMaskOp:$vm, AVL:$avl, ixlenimm:$sew, ixlenimm:$policy),[]>, + VMaskOp:$vm, AVL:$avl, ixlenimm:$sew, ixlenimm:$policy), []>, RISCVVPseudo, RISCVVLSEG<NF, /*Masked*/1, /*Strided*/0, /*FF*/1, !logtwo(EEW), VLMul> { let mayLoad = 1; @@ -1555,10 +1680,12 @@ class VPseudoUSSegLoadFFMask<VReg RetClass, int EEW, bits<4> NF>: let UsesMaskPolicy = 1; } -class VPseudoSSegLoadNoMask<VReg RetClass, int EEW, bits<4> NF>: +class VPseudoSSegLoadNoMask<VReg RetClass, + int EEW, + bits<4> NF> : Pseudo<(outs RetClass:$rd), (ins RetClass:$merge, GPRMem:$rs1, GPR:$offset, AVL:$vl, - ixlenimm:$sew, ixlenimm:$policy),[]>, + ixlenimm:$sew, ixlenimm:$policy), []>, RISCVVPseudo, RISCVVLSEG<NF, /*Masked*/0, /*Strided*/1, /*FF*/0, !logtwo(EEW), VLMul> { let mayLoad = 1; @@ -1570,11 +1697,13 @@ class VPseudoSSegLoadNoMask<VReg RetClass, int EEW, bits<4> NF>: let Constraints = "$rd = $merge"; } -class VPseudoSSegLoadMask<VReg RetClass, int EEW, bits<4> NF>: +class VPseudoSSegLoadMask<VReg RetClass, + int EEW, + bits<4> NF> : Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), (ins GetVRegNoV0<RetClass>.R:$merge, GPRMem:$rs1, GPR:$offset, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, - ixlenimm:$policy),[]>, + ixlenimm:$policy), []>, RISCVVPseudo, RISCVVLSEG<NF, /*Masked*/1, /*Strided*/1, /*FF*/0, !logtwo(EEW), VLMul> { let mayLoad = 1; @@ -1587,11 +1716,15 @@ class VPseudoSSegLoadMask<VReg RetClass, int EEW, bits<4> NF>: let UsesMaskPolicy = 1; } -class VPseudoISegLoadNoMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL, - bits<4> NF, bit Ordered>: +class VPseudoISegLoadNoMask<VReg RetClass, + VReg IdxClass, + int EEW, + bits<3> LMUL, + bits<4> NF, + bit Ordered> : Pseudo<(outs RetClass:$rd), (ins RetClass:$merge, GPRMem:$rs1, IdxClass:$offset, AVL:$vl, - ixlenimm:$sew, ixlenimm:$policy),[]>, + ixlenimm:$sew, ixlenimm:$policy), []>, RISCVVPseudo, RISCVVLXSEG<NF, /*Masked*/0, Ordered, !logtwo(EEW), VLMul, LMUL> { let mayLoad = 1; @@ -1605,12 +1738,16 @@ class VPseudoISegLoadNoMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL, let HasVecPolicyOp = 1; } -class VPseudoISegLoadMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL, - bits<4> NF, bit Ordered>: +class VPseudoISegLoadMask<VReg RetClass, + VReg IdxClass, + int EEW, + bits<3> LMUL, + bits<4> NF, + bit Ordered> : Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), (ins GetVRegNoV0<RetClass>.R:$merge, GPRMem:$rs1, IdxClass:$offset, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, - ixlenimm:$policy),[]>, + ixlenimm:$policy), []>, RISCVVPseudo, RISCVVLXSEG<NF, /*Masked*/1, Ordered, !logtwo(EEW), VLMul, LMUL> { let mayLoad = 1; @@ -1625,9 +1762,11 @@ class VPseudoISegLoadMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL, let UsesMaskPolicy = 1; } -class VPseudoUSSegStoreNoMask<VReg ValClass, int EEW, bits<4> NF>: +class VPseudoUSSegStoreNoMask<VReg ValClass, + int EEW, + bits<4> NF> : Pseudo<(outs), - (ins ValClass:$rd, GPRMem:$rs1, AVL:$vl, ixlenimm:$sew),[]>, + (ins ValClass:$rd, GPRMem:$rs1, AVL:$vl, ixlenimm:$sew), []>, RISCVVPseudo, RISCVVSSEG<NF, /*Masked*/0, /*Strided*/0, !logtwo(EEW), VLMul> { let mayLoad = 0; @@ -1637,10 +1776,12 @@ class VPseudoUSSegStoreNoMask<VReg ValClass, int EEW, bits<4> NF>: let HasSEWOp = 1; } -class VPseudoUSSegStoreMask<VReg ValClass, int EEW, bits<4> NF>: +class VPseudoUSSegStoreMask<VReg ValClass, + int EEW, + bits<4> NF> : Pseudo<(outs), (ins ValClass:$rd, GPRMem:$rs1, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>, + VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>, RISCVVPseudo, RISCVVSSEG<NF, /*Masked*/1, /*Strided*/0, !logtwo(EEW), VLMul> { let mayLoad = 0; @@ -1650,9 +1791,12 @@ class VPseudoUSSegStoreMask<VReg ValClass, int EEW, bits<4> NF>: let HasSEWOp = 1; } -class VPseudoSSegStoreNoMask<VReg ValClass, int EEW, bits<4> NF>: +class VPseudoSSegStoreNoMask<VReg ValClass, + int EEW, + bits<4> NF> : Pseudo<(outs), - (ins ValClass:$rd, GPRMem:$rs1, GPR: $offset, AVL:$vl, ixlenimm:$sew),[]>, + (ins ValClass:$rd, GPRMem:$rs1, GPR:$offset, + AVL:$vl, ixlenimm:$sew), []>, RISCVVPseudo, RISCVVSSEG<NF, /*Masked*/0, /*Strided*/1, !logtwo(EEW), VLMul> { let mayLoad = 0; @@ -1662,10 +1806,12 @@ class VPseudoSSegStoreNoMask<VReg ValClass, int EEW, bits<4> NF>: let HasSEWOp = 1; } -class VPseudoSSegStoreMask<VReg ValClass, int EEW, bits<4> NF>: +class VPseudoSSegStoreMask<VReg ValClass, + int EEW, + bits<4> NF> : Pseudo<(outs), (ins ValClass:$rd, GPRMem:$rs1, GPR: $offset, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>, + VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>, RISCVVPseudo, RISCVVSSEG<NF, /*Masked*/1, /*Strided*/1, !logtwo(EEW), VLMul> { let mayLoad = 0; @@ -1675,11 +1821,15 @@ class VPseudoSSegStoreMask<VReg ValClass, int EEW, bits<4> NF>: let HasSEWOp = 1; } -class VPseudoISegStoreNoMask<VReg ValClass, VReg IdxClass, int EEW, bits<3> LMUL, - bits<4> NF, bit Ordered>: +class VPseudoISegStoreNoMask<VReg ValClass, + VReg IdxClass, + int EEW, + bits<3> LMUL, + bits<4> NF, + bit Ordered> : Pseudo<(outs), (ins ValClass:$rd, GPRMem:$rs1, IdxClass: $index, - AVL:$vl, ixlenimm:$sew),[]>, + AVL:$vl, ixlenimm:$sew), []>, RISCVVPseudo, RISCVVSXSEG<NF, /*Masked*/0, Ordered, !logtwo(EEW), VLMul, LMUL> { let mayLoad = 0; @@ -1689,11 +1839,15 @@ class VPseudoISegStoreNoMask<VReg ValClass, VReg IdxClass, int EEW, bits<3> LMUL let HasSEWOp = 1; } -class VPseudoISegStoreMask<VReg ValClass, VReg IdxClass, int EEW, bits<3> LMUL, - bits<4> NF, bit Ordered>: +class VPseudoISegStoreMask<VReg ValClass, + VReg IdxClass, + int EEW, + bits<3> LMUL, + bits<4> NF, + bit Ordered> : Pseudo<(outs), (ins ValClass:$rd, GPRMem:$rs1, IdxClass: $index, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>, + VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>, RISCVVPseudo, RISCVVSXSEG<NF, /*Masked*/1, Ordered, !logtwo(EEW), VLMul, LMUL> { let mayLoad = 0; @@ -1782,15 +1936,16 @@ multiclass VPseudoILoad<bit Ordered> { defvar Vreg = dataEMUL.vrclass; defvar IdxVreg = idxEMUL.vrclass; defvar HasConstraint = !ne(dataEEW, idxEEW); - defvar Order = !if(Ordered, "O", "U"); + defvar TypeConstraints = + !if(!eq(dataEEW, idxEEW), 1, !if(!gt(dataEEW, idxEEW), !if(!ge(idxEMULOctuple, 8), 3, 1), 2)); let VLMul = dataEMUL.value in { def "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo : - VPseudoILoadNoMask<Vreg, IdxVreg, idxEEW, idxEMUL.value, Ordered, HasConstraint>, - VLXSched<dataEEW, Order, DataLInfo, IdxLInfo>; + VPseudoILoadNoMask<Vreg, IdxVreg, idxEEW, idxEMUL.value, Ordered, HasConstraint, TypeConstraints>, + VLXSched<dataEEW, Ordered, DataLInfo, IdxLInfo>; def "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo # "_MASK" : - VPseudoILoadMask<Vreg, IdxVreg, idxEEW, idxEMUL.value, Ordered, HasConstraint>, + VPseudoILoadMask<Vreg, IdxVreg, idxEEW, idxEMUL.value, Ordered, HasConstraint, TypeConstraints>, RISCVMaskedPseudo<MaskIdx=3>, - VLXSched<dataEEW, Order, DataLInfo, IdxLInfo>; + VLXSched<dataEEW, Ordered, DataLInfo, IdxLInfo>; } } } @@ -1853,14 +2008,13 @@ multiclass VPseudoIStore<bit Ordered> { defvar idxEMUL = !cast<LMULInfo>("V_" # IdxLInfo); defvar Vreg = dataEMUL.vrclass; defvar IdxVreg = idxEMUL.vrclass; - defvar Order = !if(Ordered, "O", "U"); let VLMul = dataEMUL.value in { def "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo : VPseudoIStoreNoMask<Vreg, IdxVreg, idxEEW, idxEMUL.value, Ordered>, - VSXSched<dataEEW, Order, DataLInfo, IdxLInfo>; + VSXSched<dataEEW, Ordered, DataLInfo, IdxLInfo>; def "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo # "_MASK" : VPseudoIStoreMask<Vreg, IdxVreg, idxEEW, idxEMUL.value, Ordered>, - VSXSched<dataEEW, Order, DataLInfo, IdxLInfo>; + VSXSched<dataEEW, Ordered, DataLInfo, IdxLInfo>; } } } @@ -1871,13 +2025,11 @@ multiclass VPseudoIStore<bit Ordered> { multiclass VPseudoVPOP_M { foreach mti = AllMasks in { defvar mx = mti.LMul.MX; - defvar WriteVMPopV_MX = !cast<SchedWrite>("WriteVMPopV_" # mx); - defvar ReadVMPopV_MX = !cast<SchedRead>("ReadVMPopV_" # mx); let VLMul = mti.LMul.value in { def "_M_" # mti.BX : VPseudoUnaryNoMaskGPROut, - Sched<[WriteVMPopV_MX, ReadVMPopV_MX, ReadVMPopV_MX]>; + SchedBinary<"WriteVMPopV", "ReadVMPopV", "ReadVMPopV", mx>; def "_M_" # mti.BX # "_MASK" : VPseudoUnaryMaskGPROut, - Sched<[WriteVMPopV_MX, ReadVMPopV_MX, ReadVMPopV_MX]>; + SchedBinary<"WriteVMPopV", "ReadVMPopV", "ReadVMPopV", mx>; } } } @@ -1885,13 +2037,11 @@ multiclass VPseudoVPOP_M { multiclass VPseudoV1ST_M { foreach mti = AllMasks in { defvar mx = mti.LMul.MX; - defvar WriteVMFFSV_MX = !cast<SchedWrite>("WriteVMFFSV_" # mx); - defvar ReadVMFFSV_MX = !cast<SchedRead>("ReadVMFFSV_" # mx); let VLMul = mti.LMul.value in { - def "_M_" # mti.BX : VPseudoUnaryNoMaskGPROut, - Sched<[WriteVMFFSV_MX, ReadVMFFSV_MX, ReadVMFFSV_MX]>; + def "_M_" #mti.BX : VPseudoUnaryNoMaskGPROut, + SchedBinary<"WriteVMFFSV", "ReadVMFFSV", "ReadVMFFSV", mx>; def "_M_" # mti.BX # "_MASK" : VPseudoUnaryMaskGPROut, - Sched<[WriteVMFFSV_MX, ReadVMFFSV_MX, ReadVMFFSV_MX]>; + SchedBinary<"WriteVMFFSV", "ReadVMFFSV", "ReadVMFFSV", mx>; } } } @@ -1900,13 +2050,13 @@ multiclass VPseudoVSFS_M { defvar constraint = "@earlyclobber $rd"; foreach mti = AllMasks in { defvar mx = mti.LMul.MX; - defvar WriteVMSFSV_MX = !cast<SchedWrite>("WriteVMSFSV_" # mx); - defvar ReadVMSFSV_MX = !cast<SchedRead>("ReadVMSFSV_" # mx); let VLMul = mti.LMul.value in { def "_M_" # mti.BX : VPseudoUnaryNoMask<VR, VR, constraint>, - Sched<[WriteVMSFSV_MX, ReadVMSFSV_MX, ReadVMask]>; + SchedUnary<"WriteVMSFSV", "ReadVMSFSV", mx, + forceMergeOpRead=true>; def "_M_" # mti.BX # "_MASK" : VPseudoUnaryMask<VR, VR, constraint>, - Sched<[WriteVMSFSV_MX, ReadVMSFSV_MX, ReadVMask]>; + SchedUnary<"WriteVMSFSV", "ReadVMSFSV", mx, + forceMergeOpRead=true>; } } } @@ -1914,28 +2064,22 @@ multiclass VPseudoVSFS_M { multiclass VPseudoVID_V { foreach m = MxList in { defvar mx = m.MX; - defvar WriteVMIdxV_MX = !cast<SchedWrite>("WriteVMIdxV_" # mx); - defvar ReadVMIdxV_MX = !cast<SchedRead>("ReadVMIdxV_" # mx); - let VLMul = m.value in { - def "_V_" # m.MX : VPseudoNullaryNoMask<m.vrclass>, - Sched<[WriteVMIdxV_MX, ReadVMask]>; - def "_V_" # m.MX # "_MASK" : VPseudoNullaryMask<m.vrclass>, + def "_V_" # mx : VPseudoNullaryNoMask<m.vrclass>, + SchedNullary<"WriteVMIdxV", mx, forceMergeOpRead=true>; + def "_V_" # mx # "_MASK" : VPseudoNullaryMask<m.vrclass>, RISCVMaskedPseudo<MaskIdx=1>, - Sched<[WriteVMIdxV_MX, ReadVMask]>; + SchedNullary<"WriteVMIdxV", mx, + forceMergeOpRead=true>; } } } multiclass VPseudoNullaryPseudoM <string BaseInst> { foreach mti = AllMasks in { - defvar mx = mti.LMul.MX; - defvar WriteVMALUV_MX = !cast<SchedWrite>("WriteVMALUV_" # mx); - defvar ReadVMALUV_MX = !cast<SchedRead>("ReadVMALUV_" # mx); - let VLMul = mti.LMul.value in { def "_M_" # mti.BX : VPseudoNullaryPseudoM<BaseInst # "_MM">, - Sched<[WriteVMALUV_MX, ReadVMALUV_MX, ReadVMALUV_MX]>; + SchedBinary<"WriteVMALUV", "ReadVMALUV", "ReadVMALUV", mti.LMul.MX>; } } } @@ -1944,14 +2088,14 @@ multiclass VPseudoVIOT_M { defvar constraint = "@earlyclobber $rd"; foreach m = MxList in { defvar mx = m.MX; - defvar WriteVMIotV_MX = !cast<SchedWrite>("WriteVMIotV_" # mx); - defvar ReadVMIotV_MX = !cast<SchedRead>("ReadVMIotV_" # mx); let VLMul = m.value in { - def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, VR, constraint>, - Sched<[WriteVMIotV_MX, ReadVMIotV_MX, ReadVMask]>; - def "_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, VR, constraint>, - RISCVMaskedPseudo<MaskIdx=2>, - Sched<[WriteVMIotV_MX, ReadVMIotV_MX, ReadVMask]>; + def "_" # mx : VPseudoUnaryNoMask<m.vrclass, VR, constraint>, + SchedUnary<"WriteVMIotV", "ReadVMIotV", mx, + forceMergeOpRead=true>; + def "_" # mx # "_MASK" : VPseudoUnaryMask<m.vrclass, VR, constraint>, + RISCVMaskedPseudo<MaskIdx=2, MaskAffectsRes=true>, + SchedUnary<"WriteVMIotV", "ReadVMIotV", mx, + forceMergeOpRead=true>; } } } @@ -1963,12 +2107,11 @@ multiclass VPseudoVCPR_V { let VLMul = m.value in foreach e = sews in { defvar suffix = "_" # m.MX # "_E" # e; - defvar WriteVCompressV_MX_E = !cast<SchedWrite>("WriteVCompressV" # suffix); - defvar ReadVCompressV_MX_E = !cast<SchedRead>("ReadVCompressV" # suffix); - let SEW = e in - def _VM # suffix : VPseudoUnaryAnyMask<m.vrclass, m.vrclass>, - Sched<[WriteVCompressV_MX_E, ReadVCompressV_MX_E, ReadVCompressV_MX_E]>; + def _VM # suffix + : VPseudoUnaryAnyMask<m.vrclass, m.vrclass>, + SchedBinary<"WriteVCompressV", "ReadVCompressV", "ReadVCompressV", + mx, e>; } } } @@ -1978,33 +2121,50 @@ multiclass VPseudoBinary<VReg RetClass, DAGOperand Op2Class, LMULInfo MInfo, string Constraint = "", - int sew = 0> { + int sew = 0, + int TargetConstraintType = 1> { let VLMul = MInfo.value, SEW=sew in { defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX); def suffix : VPseudoBinaryNoMaskTU<RetClass, Op1Class, Op2Class, - Constraint>; + Constraint, TargetConstraintType>; def suffix # "_MASK" : VPseudoBinaryMaskPolicy<RetClass, Op1Class, Op2Class, - Constraint>, + Constraint, TargetConstraintType>, RISCVMaskedPseudo<MaskIdx=3>; } } +multiclass VPseudoBinaryNoMask<VReg RetClass, + VReg Op1Class, + DAGOperand Op2Class, + LMULInfo MInfo, + string Constraint = "", + int sew = 0> { + let VLMul = MInfo.value, SEW=sew in { + defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX); + def suffix : VPseudoBinaryNoMaskTU<RetClass, Op1Class, Op2Class, + Constraint>; + } +} + multiclass VPseudoBinaryRoundingMode<VReg RetClass, VReg Op1Class, DAGOperand Op2Class, LMULInfo MInfo, string Constraint = "", int sew = 0, - int UsesVXRM = 1> { + int UsesVXRM = 1, + int TargetConstraintType = 1> { let VLMul = MInfo.value, SEW=sew in { defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX); def suffix : VPseudoBinaryNoMaskRoundingMode<RetClass, Op1Class, Op2Class, - Constraint, UsesVXRM>; + Constraint, UsesVXRM, + TargetConstraintType>; def suffix # "_MASK" : VPseudoBinaryMaskPolicyRoundingMode<RetClass, Op1Class, Op2Class, Constraint, - UsesVXRM>, + UsesVXRM, + TargetConstraintType>, RISCVMaskedPseudo<MaskIdx=3>; } } @@ -2014,13 +2174,14 @@ multiclass VPseudoBinaryM<VReg RetClass, VReg Op1Class, DAGOperand Op2Class, LMULInfo MInfo, - string Constraint = ""> { + string Constraint = "", + int TargetConstraintType = 1> { let VLMul = MInfo.value in { def "_" # MInfo.MX : VPseudoBinaryMOutNoMask<RetClass, Op1Class, Op2Class, - Constraint>; + Constraint, TargetConstraintType>; let ForceTailAgnostic = true in def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMOutMask<RetClass, Op1Class, - Op2Class, Constraint>, + Op2Class, Constraint, TargetConstraintType>, RISCVMaskedPseudo<MaskIdx=3>; } } @@ -2045,24 +2206,26 @@ multiclass VPseudoBinaryEmul<VReg RetClass, multiclass VPseudoTiedBinary<VReg RetClass, DAGOperand Op2Class, LMULInfo MInfo, - string Constraint = ""> { + string Constraint = "", + int TargetConstraintType = 1> { let VLMul = MInfo.value in { def "_" # MInfo.MX # "_TIED": VPseudoTiedBinaryNoMask<RetClass, Op2Class, - Constraint>; + Constraint, TargetConstraintType>; def "_" # MInfo.MX # "_MASK_TIED" : VPseudoTiedBinaryMask<RetClass, Op2Class, - Constraint>; + Constraint, TargetConstraintType>; } } multiclass VPseudoTiedBinaryRoundingMode<VReg RetClass, DAGOperand Op2Class, LMULInfo MInfo, - string Constraint = ""> { + string Constraint = "", + int TargetConstraintType = 1> { let VLMul = MInfo.value in { def "_" # MInfo.MX # "_TIED": - VPseudoTiedBinaryNoMaskRoundingMode<RetClass, Op2Class, Constraint>; + VPseudoTiedBinaryNoMaskRoundingMode<RetClass, Op2Class, Constraint, TargetConstraintType>; def "_" # MInfo.MX # "_MASK_TIED" : - VPseudoTiedBinaryMaskRoundingMode<RetClass, Op2Class, Constraint>; + VPseudoTiedBinaryMaskRoundingMode<RetClass, Op2Class, Constraint, TargetConstraintType>; } } @@ -2098,11 +2261,11 @@ multiclass VPseudoVGTR_VV_EEW<int eew, string Constraint = ""> { defvar emul = !cast<LMULInfo>("V_" # emulMX); defvar sews = SchedSEWSet<mx>.val; foreach e = sews in { - defvar WriteVRGatherVV_MX_E = !cast<SchedWrite>("WriteVRGatherVV_" # mx # "_E" # e); - defvar ReadVRGatherVV_data_MX_E = !cast<SchedRead>("ReadVRGatherVV_data_" # mx # "_E" # e); - defvar ReadVRGatherVV_index_MX_E = !cast<SchedRead>("ReadVRGatherVV_index_" # mx # "_E" # e); - defm _VV : VPseudoBinaryEmul<m.vrclass, m.vrclass, emul.vrclass, m, emul, Constraint, e>, - Sched<[WriteVRGatherVV_MX_E, ReadVRGatherVV_data_MX_E, ReadVRGatherVV_index_MX_E]>; + defm _VV + : VPseudoBinaryEmul<m.vrclass, m.vrclass, emul.vrclass, m, emul, + Constraint, e>, + SchedBinary<"WriteVRGatherVV", "ReadVRGatherVV_data", + "ReadVRGatherVV_index", mx, e, forceMergeOpRead=true>; } } } @@ -2119,13 +2282,9 @@ multiclass VPseudoBinaryV_VX_RM<LMULInfo m, string Constraint = ""> { multiclass VPseudoVSLD1_VX<string Constraint = ""> { foreach m = MxList in { - defvar mx = m.MX; - defvar WriteVISlide1X_MX = !cast<SchedWrite>("WriteVISlide1X_" # mx); - defvar ReadVISlideV_MX = !cast<SchedRead>("ReadVISlideV_" # mx); - defvar ReadVISlideX_MX = !cast<SchedRead>("ReadVISlideX_" # mx); - defm "_VX" : VPseudoBinary<m.vrclass, m.vrclass, GPR, m, Constraint>, - Sched<[WriteVISlide1X_MX, ReadVISlideV_MX, ReadVISlideX_MX, ReadVMask]>; + SchedBinary<"WriteVISlide1X", "ReadVISlideV", "ReadVISlideX", + m.MX, forceMergeOpRead=true>; } } @@ -2143,14 +2302,10 @@ multiclass VPseudoBinaryV_VF_RM<LMULInfo m, FPR_Info f, string Constraint = "", multiclass VPseudoVSLD1_VF<string Constraint = ""> { foreach f = FPList in { foreach m = f.MxList in { - defvar mx = m.MX; - defvar WriteVFSlide1F_MX = !cast<SchedWrite>("WriteVFSlide1F_" # mx); - defvar ReadVFSlideV_MX = !cast<SchedRead>("ReadVFSlideV_" # mx); - defvar ReadVFSlideF_MX = !cast<SchedRead>("ReadVFSlideF_" # mx); - - defm "_V" # f.FX : - VPseudoBinary<m.vrclass, m.vrclass, f.fprclass, m, Constraint>, - Sched<[WriteVFSlide1F_MX, ReadVFSlideV_MX, ReadVFSlideF_MX, ReadVMask]>; + defm "_V" #f.FX + : VPseudoBinary<m.vrclass, m.vrclass, f.fprclass, m, Constraint>, + SchedBinary<"WriteVFSlide1F", "ReadVFSlideV", "ReadVFSlideF", m.MX, + forceMergeOpRead=true>; } } } @@ -2163,15 +2318,12 @@ multiclass VPseudoBinaryV_VI_RM<Operand ImmType = simm5, LMULInfo m, string Cons defm _VI : VPseudoBinaryRoundingMode<m.vrclass, m.vrclass, ImmType, m, Constraint>; } -multiclass VPseudoVALU_MM { +multiclass VPseudoVALU_MM<bit Commutable = 0> { foreach m = MxList in { defvar mx = m.MX; - defvar WriteVMALUV_MX = !cast<SchedWrite>("WriteVMALUV_" # mx); - defvar ReadVMALUV_MX = !cast<SchedRead>("ReadVMALUV_" # mx); - - let VLMul = m.value in { + let VLMul = m.value, isCommutable = Commutable in { def "_MM_" # mx : VPseudoBinaryNoMask<VR, VR, VR, "">, - Sched<[WriteVMALUV_MX, ReadVMALUV_MX, ReadVMALUV_MX]>; + SchedBinary<"WriteVMALUV", "ReadVMALUV", "ReadVMALUV", mx>; } } } @@ -2185,17 +2337,23 @@ multiclass VPseudoVALU_MM { // destination register group is legal. Otherwise, it is illegal. multiclass VPseudoBinaryW_VV<LMULInfo m> { defm _VV : VPseudoBinary<m.wvrclass, m.vrclass, m.vrclass, m, - "@earlyclobber $rd">; + "@earlyclobber $rd", TargetConstraintType=3>; } multiclass VPseudoBinaryW_VV_RM<LMULInfo m> { defm _VV : VPseudoBinaryRoundingMode<m.wvrclass, m.vrclass, m.vrclass, m, - "@earlyclobber $rd", UsesVXRM=0>; + "@earlyclobber $rd", UsesVXRM=0, + TargetConstraintType=3>; } multiclass VPseudoBinaryW_VX<LMULInfo m> { defm "_VX" : VPseudoBinary<m.wvrclass, m.vrclass, GPR, m, - "@earlyclobber $rd">; + "@earlyclobber $rd", TargetConstraintType=3>; +} + +multiclass VPseudoBinaryW_VI<Operand ImmType, LMULInfo m> { + defm "_VI" : VPseudoBinary<m.wvrclass, m.vrclass, ImmType, m, + "@earlyclobber $rd", TargetConstraintType=3>; } multiclass VPseudoBinaryW_VF<LMULInfo m, FPR_Info f> { @@ -2208,36 +2366,40 @@ multiclass VPseudoBinaryW_VF_RM<LMULInfo m, FPR_Info f> { defm "_V" # f.FX : VPseudoBinaryRoundingMode<m.wvrclass, m.vrclass, f.fprclass, m, "@earlyclobber $rd", - UsesVXRM=0>; + UsesVXRM=0, + TargetConstraintType=3>; } multiclass VPseudoBinaryW_WV<LMULInfo m> { defm _WV : VPseudoBinary<m.wvrclass, m.wvrclass, m.vrclass, m, - "@earlyclobber $rd">; + "@earlyclobber $rd", TargetConstraintType=3>; defm _WV : VPseudoTiedBinary<m.wvrclass, m.vrclass, m, - "@earlyclobber $rd">; + "@earlyclobber $rd", TargetConstraintType=3>; } multiclass VPseudoBinaryW_WV_RM<LMULInfo m> { defm _WV : VPseudoBinaryRoundingMode<m.wvrclass, m.wvrclass, m.vrclass, m, - "@earlyclobber $rd", UsesVXRM=0>; + "@earlyclobber $rd", UsesVXRM=0, TargetConstraintType=3>; defm _WV : VPseudoTiedBinaryRoundingMode<m.wvrclass, m.vrclass, m, - "@earlyclobber $rd">; + "@earlyclobber $rd", TargetConstraintType=3>; } multiclass VPseudoBinaryW_WX<LMULInfo m> { - defm "_WX" : VPseudoBinary<m.wvrclass, m.wvrclass, GPR, m>; + defm "_WX" : VPseudoBinary<m.wvrclass, m.wvrclass, GPR, m, /*Constraint*/ "", TargetConstraintType=3>; } -multiclass VPseudoBinaryW_WF<LMULInfo m, FPR_Info f> { +multiclass VPseudoBinaryW_WF<LMULInfo m, FPR_Info f, int TargetConstraintType = 1> { defm "_W" # f.FX : VPseudoBinary<m.wvrclass, m.wvrclass, - f.fprclass, m>; + f.fprclass, m, /*Constraint*/ "", TargetConstraintType=TargetConstraintType>; } multiclass VPseudoBinaryW_WF_RM<LMULInfo m, FPR_Info f> { defm "_W" # f.FX : VPseudoBinaryRoundingMode<m.wvrclass, m.wvrclass, f.fprclass, m, - UsesVXRM=0>; + Constraint="", + sew=0, + UsesVXRM=0, + TargetConstraintType=3>; } // Narrowing instructions like vnsrl/vnsra/vnclip(u) don't need @earlyclobber @@ -2245,9 +2407,9 @@ multiclass VPseudoBinaryW_WF_RM<LMULInfo m, FPR_Info f> { // exception from the spec. // "The destination EEW is smaller than the source EEW and the overlap is in the // lowest-numbered part of the source register group." -multiclass VPseudoBinaryV_WV<LMULInfo m> { +multiclass VPseudoBinaryV_WV<LMULInfo m, int TargetConstraintType = 1> { defm _WV : VPseudoBinary<m.vrclass, m.wvrclass, m.vrclass, m, - !if(!ge(m.octuple, 8), "@earlyclobber $rd", "")>; + !if(!ge(m.octuple, 8), "@earlyclobber $rd", ""), TargetConstraintType=TargetConstraintType>; } multiclass VPseudoBinaryV_WV_RM<LMULInfo m> { @@ -2256,9 +2418,9 @@ multiclass VPseudoBinaryV_WV_RM<LMULInfo m> { "@earlyclobber $rd", "")>; } -multiclass VPseudoBinaryV_WX<LMULInfo m> { +multiclass VPseudoBinaryV_WX<LMULInfo m, int TargetConstraintType = 1> { defm _WX : VPseudoBinary<m.vrclass, m.wvrclass, GPR, m, - !if(!ge(m.octuple, 8), "@earlyclobber $rd", "")>; + !if(!ge(m.octuple, 8), "@earlyclobber $rd", ""), TargetConstraintType=TargetConstraintType>; } multiclass VPseudoBinaryV_WX_RM<LMULInfo m> { @@ -2267,9 +2429,9 @@ multiclass VPseudoBinaryV_WX_RM<LMULInfo m> { "@earlyclobber $rd", "")>; } -multiclass VPseudoBinaryV_WI<LMULInfo m> { +multiclass VPseudoBinaryV_WI<LMULInfo m, int TargetConstraintType = 1> { defm _WI : VPseudoBinary<m.vrclass, m.wvrclass, uimm5, m, - !if(!ge(m.octuple, 8), "@earlyclobber $rd", "")>; + !if(!ge(m.octuple, 8), "@earlyclobber $rd", ""), TargetConstraintType=TargetConstraintType>; } multiclass VPseudoBinaryV_WI_RM<LMULInfo m> { @@ -2282,12 +2444,15 @@ multiclass VPseudoBinaryV_WI_RM<LMULInfo m> { // vector register is v0. // For vadc and vsbc, CarryIn == 1 and CarryOut == 0 multiclass VPseudoBinaryV_VM<LMULInfo m, bit CarryOut = 0, bit CarryIn = 1, - string Constraint = ""> { + string Constraint = "", + bit Commutable = 0, + int TargetConstraintType = 1> { + let isCommutable = Commutable in def "_VV" # !if(CarryIn, "M", "") # "_" # m.MX : VPseudoBinaryCarryIn<!if(CarryOut, VR, !if(!and(CarryIn, !not(CarryOut)), GetVRegNoV0<m.vrclass>.R, m.vrclass)), - m.vrclass, m.vrclass, m, CarryIn, Constraint>; + m.vrclass, m.vrclass, m, CarryIn, Constraint, TargetConstraintType>; } multiclass VPseudoTiedBinaryV_VM<LMULInfo m> { @@ -2297,12 +2462,12 @@ multiclass VPseudoTiedBinaryV_VM<LMULInfo m> { } multiclass VPseudoBinaryV_XM<LMULInfo m, bit CarryOut = 0, bit CarryIn = 1, - string Constraint = ""> { + string Constraint = "", int TargetConstraintType = 1> { def "_VX" # !if(CarryIn, "M", "") # "_" # m.MX : VPseudoBinaryCarryIn<!if(CarryOut, VR, !if(!and(CarryIn, !not(CarryOut)), GetVRegNoV0<m.vrclass>.R, m.vrclass)), - m.vrclass, GPR, m, CarryIn, Constraint>; + m.vrclass, GPR, m, CarryIn, Constraint, TargetConstraintType>; } multiclass VPseudoTiedBinaryV_XM<LMULInfo m> { @@ -2315,25 +2480,23 @@ multiclass VPseudoVMRG_FM { foreach f = FPList in { foreach m = f.MxList in { defvar mx = m.MX; - defvar WriteVFMergeV_MX = !cast<SchedWrite>("WriteVFMergeV_" # mx); - defvar ReadVFMergeV_MX = !cast<SchedRead>("ReadVFMergeV_" # mx); - defvar ReadVFMergeF_MX = !cast<SchedRead>("ReadVFMergeF_" # mx); - - def "_V" # f.FX # "M_" # mx: - VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R, - m.vrclass, f.fprclass, m, CarryIn=1, Constraint="">, - Sched<[WriteVFMergeV_MX, ReadVFMergeV_MX, ReadVFMergeF_MX, ReadVMask]>; + def "_V" # f.FX # "M_" # mx + : VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R, m.vrclass, + f.fprclass, m, CarryIn=1, + Constraint = "">, + SchedBinary<"WriteVFMergeV", "ReadVFMergeV", "ReadVFMergeF", mx, + forceMasked=1, forceMergeOpRead=true>; } } } multiclass VPseudoBinaryV_IM<LMULInfo m, bit CarryOut = 0, bit CarryIn = 1, - string Constraint = ""> { + string Constraint = "", int TargetConstraintType = 1> { def "_VI" # !if(CarryIn, "M", "") # "_" # m.MX : VPseudoBinaryCarryIn<!if(CarryOut, VR, !if(!and(CarryIn, !not(CarryOut)), GetVRegNoV0<m.vrclass>.R, m.vrclass)), - m.vrclass, simm5, m, CarryIn, Constraint>; + m.vrclass, simm5, m, CarryIn, Constraint, TargetConstraintType>; } multiclass VPseudoTiedBinaryV_IM<LMULInfo m> { @@ -2346,19 +2509,16 @@ multiclass VPseudoUnaryVMV_V_X_I { foreach m = MxList in { let VLMul = m.value in { defvar mx = m.MX; - defvar WriteVIMovV_MX = !cast<SchedWrite>("WriteVIMovV_" # mx); - defvar WriteVIMovX_MX = !cast<SchedWrite>("WriteVIMovX_" # mx); - defvar WriteVIMovI_MX = !cast<SchedWrite>("WriteVIMovI_" # mx); - defvar ReadVIMovV_MX = !cast<SchedRead>("ReadVIMovV_" # mx); - defvar ReadVIMovX_MX = !cast<SchedRead>("ReadVIMovX_" # mx); - let VLMul = m.value in { def "_V_" # mx : VPseudoUnaryNoMask<m.vrclass, m.vrclass>, - Sched<[WriteVIMovV_MX, ReadVIMovV_MX]>; + SchedUnary<"WriteVIMovV", "ReadVIMovV", mx, + forceMergeOpRead=true>; def "_X_" # mx : VPseudoUnaryNoMask<m.vrclass, GPR>, - Sched<[WriteVIMovX_MX, ReadVIMovX_MX]>; + SchedUnary<"WriteVIMovX", "ReadVIMovX", mx, + forceMergeOpRead=true>; def "_I_" # mx : VPseudoUnaryNoMask<m.vrclass, simm5>, - Sched<[WriteVIMovI_MX]>; + SchedNullary<"WriteVIMovI", mx, + forceMergeOpRead=true>; } } } @@ -2368,13 +2528,10 @@ multiclass VPseudoVMV_F { foreach f = FPList in { foreach m = f.MxList in { defvar mx = m.MX; - defvar WriteVFMovV_MX = !cast<SchedWrite>("WriteVFMovV_" # mx); - defvar ReadVFMovF_MX = !cast<SchedRead>("ReadVFMovF_" # mx); - let VLMul = m.value in { def "_" # f.FX # "_" # mx : VPseudoUnaryNoMask<m.vrclass, f.fprclass>, - Sched<[WriteVFMovV_MX, ReadVFMovF_MX]>; + SchedUnary<"WriteVFMovV", "ReadVFMovF", mx, forceMergeOpRead=true>; } } } @@ -2383,15 +2540,14 @@ multiclass VPseudoVMV_F { multiclass VPseudoVCLS_V { foreach m = MxListF in { defvar mx = m.MX; - defvar WriteVFClassV_MX = !cast<SchedWrite>("WriteVFClassV_" # mx); - defvar ReadVFClassV_MX = !cast<SchedRead>("ReadVFClassV_" # mx); - let VLMul = m.value in { def "_V_" # mx : VPseudoUnaryNoMask<m.vrclass, m.vrclass>, - Sched<[WriteVFClassV_MX, ReadVFClassV_MX, ReadVMask]>; + SchedUnary<"WriteVFClassV", "ReadVFClassV", mx, + forceMergeOpRead=true>; def "_V_" # mx # "_MASK" : VPseudoUnaryMask<m.vrclass, m.vrclass>, RISCVMaskedPseudo<MaskIdx=2>, - Sched<[WriteVFClassV_MX, ReadVFClassV_MX, ReadVMask]>; + SchedUnary<"WriteVFClassV", "ReadVFClassV", mx, + forceMergeOpRead=true>; } } } @@ -2404,17 +2560,15 @@ multiclass VPseudoVSQR_V_RM { let VLMul = m.value in foreach e = sews in { defvar suffix = "_" # mx # "_E" # e; - defvar WriteVFSqrtV_MX_E = !cast<SchedWrite>("WriteVFSqrtV" # suffix); - defvar ReadVFSqrtV_MX_E = !cast<SchedRead>("ReadVFSqrtV" # suffix); - let SEW = e in { def "_V" # suffix : VPseudoUnaryNoMaskRoundingMode<m.vrclass, m.vrclass>, - Sched<[WriteVFSqrtV_MX_E, ReadVFSqrtV_MX_E, - ReadVMask]>; - def "_V" # suffix # "_MASK" : VPseudoUnaryMaskRoundingMode<m.vrclass, m.vrclass>, - RISCVMaskedPseudo<MaskIdx=2>, - Sched<[WriteVFSqrtV_MX_E, ReadVFSqrtV_MX_E, - ReadVMask]>; + SchedUnary<"WriteVFSqrtV", "ReadVFSqrtV", mx, e, + forceMergeOpRead=true>; + def "_V" #suffix # "_MASK" + : VPseudoUnaryMaskRoundingMode<m.vrclass, m.vrclass>, + RISCVMaskedPseudo<MaskIdx = 2>, + SchedUnary<"WriteVFSqrtV", "ReadVFSqrtV", mx, e, + forceMergeOpRead=true>; } } } @@ -2423,15 +2577,14 @@ multiclass VPseudoVSQR_V_RM { multiclass VPseudoVRCP_V { foreach m = MxListF in { defvar mx = m.MX; - defvar WriteVFRecpV_MX = !cast<SchedWrite>("WriteVFRecpV_" # mx); - defvar ReadVFRecpV_MX = !cast<SchedRead>("ReadVFRecpV_" # mx); - let VLMul = m.value in { - def "_V_" # mx : VPseudoUnaryNoMask<m.vrclass, m.vrclass>, - Sched<[WriteVFRecpV_MX, ReadVFRecpV_MX, ReadVMask]>; - def "_V_" # mx # "_MASK" : VPseudoUnaryMask<m.vrclass, m.vrclass>, - RISCVMaskedPseudo<MaskIdx=2>, - Sched<[WriteVFRecpV_MX, ReadVFRecpV_MX, ReadVMask]>; + def "_V_" # mx + : VPseudoUnaryNoMask<m.vrclass, m.vrclass>, + SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, forceMergeOpRead=true>; + def "_V_" # mx # "_MASK" + : VPseudoUnaryMask<m.vrclass, m.vrclass>, + RISCVMaskedPseudo<MaskIdx = 2>, + SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, forceMergeOpRead=true>; } } } @@ -2439,69 +2592,59 @@ multiclass VPseudoVRCP_V { multiclass VPseudoVRCP_V_RM { foreach m = MxListF in { defvar mx = m.MX; - defvar WriteVFRecpV_MX = !cast<SchedWrite>("WriteVFRecpV_" # mx); - defvar ReadVFRecpV_MX = !cast<SchedRead>("ReadVFRecpV_" # mx); - let VLMul = m.value in { - def "_V_" # mx : VPseudoUnaryNoMaskRoundingMode<m.vrclass, m.vrclass>, - Sched<[WriteVFRecpV_MX, ReadVFRecpV_MX, ReadVMask]>; - def "_V_" # mx # "_MASK" : VPseudoUnaryMaskRoundingMode<m.vrclass, m.vrclass>, - RISCVMaskedPseudo<MaskIdx=2>, - Sched<[WriteVFRecpV_MX, ReadVFRecpV_MX, ReadVMask]>; + def "_V_" # mx + : VPseudoUnaryNoMaskRoundingMode<m.vrclass, m.vrclass>, + SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, forceMergeOpRead=true>; + def "_V_" # mx # "_MASK" + : VPseudoUnaryMaskRoundingMode<m.vrclass, m.vrclass>, + RISCVMaskedPseudo<MaskIdx = 2>, + SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, forceMergeOpRead=true>; } } } -multiclass PseudoVEXT_VF2 { +multiclass PseudoVEXT_VF2<int TargetConstraintType = 1> { defvar constraints = "@earlyclobber $rd"; foreach m = MxListVF2 in { defvar mx = m.MX; - defvar WriteVExtV_MX = !cast<SchedWrite>("WriteVExtV_" # mx); - defvar ReadVExtV_MX = !cast<SchedRead>("ReadVExtV_" # mx); - let VLMul = m.value in { - def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f2vrclass, constraints>, - Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>; + def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f2vrclass, constraints, TargetConstraintType>, + SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>; def "_" # mx # "_MASK" : - VPseudoUnaryMask<m.vrclass, m.f2vrclass, constraints>, + VPseudoUnaryMask<m.vrclass, m.f2vrclass, constraints, TargetConstraintType>, RISCVMaskedPseudo<MaskIdx=2>, - Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>; + SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>; } } } -multiclass PseudoVEXT_VF4 { +multiclass PseudoVEXT_VF4<int TargetConstraintType = 1> { defvar constraints = "@earlyclobber $rd"; foreach m = MxListVF4 in { defvar mx = m.MX; - defvar WriteVExtV_MX = !cast<SchedWrite>("WriteVExtV_" # mx); - defvar ReadVExtV_MX = !cast<SchedRead>("ReadVExtV_" # mx); - let VLMul = m.value in { - def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f4vrclass, constraints>, - Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>; + def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f4vrclass, constraints, TargetConstraintType>, + SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>; def "_" # mx # "_MASK" : - VPseudoUnaryMask<m.vrclass, m.f4vrclass, constraints>, + VPseudoUnaryMask<m.vrclass, m.f4vrclass, constraints, TargetConstraintType>, RISCVMaskedPseudo<MaskIdx=2>, - Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>; + SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>; } } } -multiclass PseudoVEXT_VF8 { +multiclass PseudoVEXT_VF8<int TargetConstraintType = 1> { defvar constraints = "@earlyclobber $rd"; foreach m = MxListVF8 in { defvar mx = m.MX; - defvar WriteVExtV_MX = !cast<SchedWrite>("WriteVExtV_" # mx); - defvar ReadVExtV_MX = !cast<SchedRead>("ReadVExtV_" # mx); - let VLMul = m.value in { - def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f8vrclass, constraints>, - Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>; + def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f8vrclass, constraints, TargetConstraintType>, + SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>; def "_" # mx # "_MASK" : - VPseudoUnaryMask<m.vrclass, m.f8vrclass, constraints>, + VPseudoUnaryMask<m.vrclass, m.f8vrclass, constraints, TargetConstraintType>, RISCVMaskedPseudo<MaskIdx=2>, - Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>; + SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>; } } } @@ -2517,51 +2660,43 @@ multiclass PseudoVEXT_VF8 { // lowest-numbered part of the source register group". // With LMUL<=1 the source and dest occupy a single register so any overlap // is in the lowest-numbered part. -multiclass VPseudoBinaryM_VV<LMULInfo m> { +multiclass VPseudoBinaryM_VV<LMULInfo m, int TargetConstraintType = 1> { defm _VV : VPseudoBinaryM<VR, m.vrclass, m.vrclass, m, - !if(!ge(m.octuple, 16), "@earlyclobber $rd", "")>; + !if(!ge(m.octuple, 16), "@earlyclobber $rd", ""), TargetConstraintType>; } -multiclass VPseudoBinaryM_VX<LMULInfo m> { +multiclass VPseudoBinaryM_VX<LMULInfo m, int TargetConstraintType = 1> { defm "_VX" : VPseudoBinaryM<VR, m.vrclass, GPR, m, - !if(!ge(m.octuple, 16), "@earlyclobber $rd", "")>; + !if(!ge(m.octuple, 16), "@earlyclobber $rd", ""), TargetConstraintType>; } -multiclass VPseudoBinaryM_VF<LMULInfo m, FPR_Info f> { +multiclass VPseudoBinaryM_VF<LMULInfo m, FPR_Info f, int TargetConstraintType = 1> { defm "_V" # f.FX : VPseudoBinaryM<VR, m.vrclass, f.fprclass, m, - !if(!ge(m.octuple, 16), "@earlyclobber $rd", "")>; + !if(!ge(m.octuple, 16), "@earlyclobber $rd", ""), TargetConstraintType>; } -multiclass VPseudoBinaryM_VI<LMULInfo m> { +multiclass VPseudoBinaryM_VI<LMULInfo m, int TargetConstraintType = 1> { defm _VI : VPseudoBinaryM<VR, m.vrclass, simm5, m, - !if(!ge(m.octuple, 16), "@earlyclobber $rd", "")>; + !if(!ge(m.octuple, 16), "@earlyclobber $rd", ""), TargetConstraintType>; } multiclass VPseudoVGTR_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> { foreach m = MxList in { defvar mx = m.MX; - defvar WriteVRGatherVX_MX = !cast<SchedWrite>("WriteVRGatherVX_" # mx); - defvar WriteVRGatherVI_MX = !cast<SchedWrite>("WriteVRGatherVI_" # mx); - defvar ReadVRGatherVX_data_MX = !cast<SchedRead>("ReadVRGatherVX_data_" # mx); - defvar ReadVRGatherVX_index_MX = !cast<SchedRead>("ReadVRGatherVX_index_" # mx); - defvar ReadVRGatherVI_data_MX = !cast<SchedRead>("ReadVRGatherVI_data_" # mx); - defm "" : VPseudoBinaryV_VX<m, Constraint>, - Sched<[WriteVRGatherVX_MX, ReadVRGatherVX_data_MX, - ReadVRGatherVX_index_MX, ReadVMask]>; + SchedBinary<"WriteVRGatherVX", "ReadVRGatherVX_data", + "ReadVRGatherVX_index", mx, forceMergeOpRead=true>; defm "" : VPseudoBinaryV_VI<ImmType, m, Constraint>, - Sched<[WriteVRGatherVI_MX, ReadVRGatherVI_data_MX, ReadVMask]>; + SchedUnary<"WriteVRGatherVI", "ReadVRGatherVI_data", mx, + forceMergeOpRead=true>; defvar sews = SchedSEWSet<mx>.val; foreach e = sews in { - defvar WriteVRGatherVV_MX_E = !cast<SchedWrite>("WriteVRGatherVV_" # mx # "_E" # e); - defvar ReadVRGatherVV_data_MX_E = !cast<SchedRead>("ReadVRGatherVV_data_" # mx # "_E" # e); - defvar ReadVRGatherVV_index_MX_E = !cast<SchedRead>("ReadVRGatherVV_index_" # mx # "_E" # e); defm "" : VPseudoBinaryV_VV<m, Constraint, e>, - Sched<[WriteVRGatherVV_MX_E, ReadVRGatherVV_data_MX_E, - ReadVRGatherVV_index_MX_E, ReadVMask]>; + SchedBinary<"WriteVRGatherVV", "ReadVRGatherVV_data", + "ReadVRGatherVV_index", mx, e, forceMergeOpRead=true>; } } } @@ -2569,18 +2704,14 @@ multiclass VPseudoVGTR_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> multiclass VPseudoVSALU_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> { foreach m = MxList in { defvar mx = m.MX; - defvar WriteVSALUV_MX = !cast<SchedWrite>("WriteVSALUV_" # mx); - defvar WriteVSALUX_MX = !cast<SchedWrite>("WriteVSALUX_" # mx); - defvar WriteVSALUI_MX = !cast<SchedWrite>("WriteVSALUI_" # mx); - defvar ReadVSALUV_MX = !cast<SchedRead>("ReadVSALUV_" # mx); - defvar ReadVSALUX_MX = !cast<SchedRead>("ReadVSALUX_" # mx); - defm "" : VPseudoBinaryV_VV<m, Constraint>, - Sched<[WriteVSALUV_MX, ReadVSALUV_MX, ReadVSALUV_MX, ReadVMask]>; + SchedBinary<"WriteVSALUV", "ReadVSALUV", "ReadVSALUX", mx, + forceMergeOpRead=true>; defm "" : VPseudoBinaryV_VX<m, Constraint>, - Sched<[WriteVSALUX_MX, ReadVSALUV_MX, ReadVSALUX_MX, ReadVMask]>; + SchedBinary<"WriteVSALUX", "ReadVSALUV", "ReadVSALUX", mx, + forceMergeOpRead=true>; defm "" : VPseudoBinaryV_VI<ImmType, m, Constraint>, - Sched<[WriteVSALUI_MX, ReadVSALUV_MX, ReadVMask]>; + SchedUnary<"WriteVSALUI", "ReadVSALUV", mx, forceMergeOpRead=true>; } } @@ -2588,129 +2719,98 @@ multiclass VPseudoVSALU_VV_VX_VI<Operand ImmType = simm5, string Constraint = "" multiclass VPseudoVSHT_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> { foreach m = MxList in { defvar mx = m.MX; - defvar WriteVShiftV_MX = !cast<SchedWrite>("WriteVShiftV_" # mx); - defvar WriteVShiftX_MX = !cast<SchedWrite>("WriteVShiftX_" # mx); - defvar WriteVShiftI_MX = !cast<SchedWrite>("WriteVShiftI_" # mx); - defvar ReadVShiftV_MX = !cast<SchedRead>("ReadVShiftV_" # mx); - defvar ReadVShiftX_MX = !cast<SchedRead>("ReadVShiftX_" # mx); - defm "" : VPseudoBinaryV_VV<m, Constraint>, - Sched<[WriteVShiftV_MX, ReadVShiftV_MX, ReadVShiftV_MX, ReadVMask]>; + SchedBinary<"WriteVShiftV", "ReadVShiftV", "ReadVShiftV", mx, + forceMergeOpRead=true>; defm "" : VPseudoBinaryV_VX<m, Constraint>, - Sched<[WriteVShiftX_MX, ReadVShiftV_MX, ReadVShiftX_MX, ReadVMask]>; + SchedBinary<"WriteVShiftX", "ReadVShiftV", "ReadVShiftX", mx, + forceMergeOpRead=true>; defm "" : VPseudoBinaryV_VI<ImmType, m, Constraint>, - Sched<[WriteVShiftI_MX, ReadVShiftV_MX, ReadVMask]>; + SchedUnary<"WriteVShiftI", "ReadVShiftV", mx, forceMergeOpRead=true>; } } multiclass VPseudoVSSHT_VV_VX_VI_RM<Operand ImmType = simm5, string Constraint = ""> { foreach m = MxList in { defvar mx = m.MX; - defvar WriteVSShiftV_MX = !cast<SchedWrite>("WriteVSShiftV_" # mx); - defvar WriteVSShiftX_MX = !cast<SchedWrite>("WriteVSShiftX_" # mx); - defvar WriteVSShiftI_MX = !cast<SchedWrite>("WriteVSShiftI_" # mx); - defvar ReadVSShiftV_MX = !cast<SchedRead>("ReadVSShiftV_" # mx); - defvar ReadVSShiftX_MX = !cast<SchedRead>("ReadVSShiftX_" # mx); - defm "" : VPseudoBinaryV_VV_RM<m, Constraint>, - Sched<[WriteVSShiftV_MX, ReadVSShiftV_MX, ReadVSShiftV_MX, ReadVMask]>; + SchedBinary<"WriteVSShiftV", "ReadVSShiftV", "ReadVSShiftV", mx, + forceMergeOpRead=true>; defm "" : VPseudoBinaryV_VX_RM<m, Constraint>, - Sched<[WriteVSShiftX_MX, ReadVSShiftV_MX, ReadVSShiftX_MX, ReadVMask]>; + SchedBinary<"WriteVSShiftX", "ReadVSShiftV", "ReadVSShiftX", mx, + forceMergeOpRead=true>; defm "" : VPseudoBinaryV_VI_RM<ImmType, m, Constraint>, - Sched<[WriteVSShiftI_MX, ReadVSShiftV_MX, ReadVMask]>; + SchedUnary<"WriteVSShiftI", "ReadVSShiftV", mx, forceMergeOpRead=true>; } } multiclass VPseudoVALU_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> { foreach m = MxList in { defvar mx = m.MX; - defvar WriteVIALUV_MX = !cast<SchedWrite>("WriteVIALUV_" # mx); - defvar WriteVIALUX_MX = !cast<SchedWrite>("WriteVIALUX_" # mx); - defvar WriteVIALUI_MX = !cast<SchedWrite>("WriteVIALUI_" # mx); - defvar ReadVIALUV_MX = !cast<SchedRead>("ReadVIALUV_" # mx); - defvar ReadVIALUX_MX = !cast<SchedRead>("ReadVIALUX_" # mx); - defm "" : VPseudoBinaryV_VV<m, Constraint>, - Sched<[WriteVIALUV_MX, ReadVIALUV_MX, ReadVIALUV_MX, ReadVMask]>; + SchedBinary<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", mx, + forceMergeOpRead=true>; defm "" : VPseudoBinaryV_VX<m, Constraint>, - Sched<[WriteVIALUX_MX, ReadVIALUV_MX, ReadVIALUX_MX, ReadVMask]>; + SchedBinary<"WriteVIALUX", "ReadVIALUV", "ReadVIALUX", mx, + forceMergeOpRead=true>; defm "" : VPseudoBinaryV_VI<ImmType, m, Constraint>, - Sched<[WriteVIALUI_MX, ReadVIALUV_MX, ReadVMask]>; + SchedUnary<"WriteVIALUI", "ReadVIALUV", mx, forceMergeOpRead=true>; } } multiclass VPseudoVSALU_VV_VX { foreach m = MxList in { defvar mx = m.MX; - defvar WriteVSALUV_MX = !cast<SchedWrite>("WriteVSALUV_" # mx); - defvar WriteVSALUX_MX = !cast<SchedWrite>("WriteVSALUX_" # mx); - defvar ReadVSALUV_MX = !cast<SchedRead>("ReadVSALUV_" # mx); - defvar ReadVSALUX_MX = !cast<SchedRead>("ReadVSALUX_" # mx); - defm "" : VPseudoBinaryV_VV<m>, - Sched<[WriteVSALUV_MX, ReadVSALUV_MX, ReadVSALUV_MX, ReadVMask]>; + SchedBinary<"WriteVSALUV", "ReadVSALUV", "ReadVSALUV", mx, + forceMergeOpRead=true>; defm "" : VPseudoBinaryV_VX<m>, - Sched<[WriteVSALUX_MX, ReadVSALUV_MX, ReadVSALUX_MX, ReadVMask]>; + SchedBinary<"WriteVSALUX", "ReadVSALUV", "ReadVSALUX", mx, + forceMergeOpRead=true>; } } multiclass VPseudoVSMUL_VV_VX_RM { foreach m = MxList in { defvar mx = m.MX; - defvar WriteVSMulV_MX = !cast<SchedWrite>("WriteVSMulV_" # mx); - defvar WriteVSMulX_MX = !cast<SchedWrite>("WriteVSMulX_" # mx); - defvar ReadVSMulV_MX = !cast<SchedRead>("ReadVSMulV_" # mx); - defvar ReadVSMulX_MX = !cast<SchedRead>("ReadVSMulX_" # mx); - defm "" : VPseudoBinaryV_VV_RM<m>, - Sched<[WriteVSMulV_MX, ReadVSMulV_MX, ReadVSMulV_MX, ReadVMask]>; + SchedBinary<"WriteVSMulV", "ReadVSMulV", "ReadVSMulV", mx, + forceMergeOpRead=true>; defm "" : VPseudoBinaryV_VX_RM<m>, - Sched<[WriteVSMulX_MX, ReadVSMulV_MX, ReadVSMulX_MX, ReadVMask]>; + SchedBinary<"WriteVSMulX", "ReadVSMulV", "ReadVSMulX", mx, + forceMergeOpRead=true>; } } multiclass VPseudoVAALU_VV_VX_RM { foreach m = MxList in { defvar mx = m.MX; - defvar WriteVAALUV_MX = !cast<SchedWrite>("WriteVAALUV_" # mx); - defvar WriteVAALUX_MX = !cast<SchedWrite>("WriteVAALUX_" # mx); - defvar ReadVAALUV_MX = !cast<SchedRead>("ReadVAALUV_" # mx); - defvar ReadVAALUX_MX = !cast<SchedRead>("ReadVAALUX_" # mx); - defm "" : VPseudoBinaryV_VV_RM<m>, - Sched<[WriteVAALUV_MX, ReadVAALUV_MX, ReadVAALUV_MX, ReadVMask]>; + SchedBinary<"WriteVAALUV", "ReadVAALUV", "ReadVAALUV", mx, + forceMergeOpRead=true>; defm "" : VPseudoBinaryV_VX_RM<m>, - Sched<[WriteVAALUX_MX, ReadVAALUV_MX, ReadVAALUX_MX, ReadVMask]>; + SchedBinary<"WriteVAALUX", "ReadVAALUV", "ReadVAALUX", mx, + forceMergeOpRead=true>; } } multiclass VPseudoVMINMAX_VV_VX { foreach m = MxList in { defvar mx = m.MX; - defvar WriteVIMinMaxV_MX = !cast<SchedWrite>("WriteVIMinMaxV_" # mx); - defvar WriteVIMinMaxX_MX = !cast<SchedWrite>("WriteVIMinMaxX_" # mx); - defvar ReadVIMinMaxV_MX = !cast<SchedRead>("ReadVIMinMaxV_" # mx); - defvar ReadVIMinMaxX_MX = !cast<SchedRead>("ReadVIMinMaxX_" # mx); - defm "" : VPseudoBinaryV_VV<m>, - Sched<[WriteVIMinMaxV_MX, ReadVIMinMaxV_MX, ReadVIMinMaxV_MX, ReadVMask]>; + SchedBinary<"WriteVIMinMaxV", "ReadVIMinMaxV", "ReadVIMinMaxV", mx>; defm "" : VPseudoBinaryV_VX<m>, - Sched<[WriteVIMinMaxX_MX, ReadVIMinMaxV_MX, ReadVIMinMaxX_MX, ReadVMask]>; + SchedBinary<"WriteVIMinMaxX", "ReadVIMinMaxV", "ReadVIMinMaxX", mx>; } } multiclass VPseudoVMUL_VV_VX { foreach m = MxList in { defvar mx = m.MX; - defvar WriteVIMulV_MX = !cast<SchedWrite>("WriteVIMulV_" # mx); - defvar WriteVIMulX_MX = !cast<SchedWrite>("WriteVIMulX_" # mx); - defvar ReadVIMulV_MX = !cast<SchedRead>("ReadVIMulV_" # mx); - defvar ReadVIMulX_MX = !cast<SchedRead>("ReadVIMulX_" # mx); - defm "" : VPseudoBinaryV_VV<m>, - Sched<[WriteVIMulV_MX, ReadVIMulV_MX, ReadVIMulV_MX, ReadVMask]>; + SchedBinary<"WriteVIMulV", "ReadVIMulV", "ReadVIMulV", mx>; defm "" : VPseudoBinaryV_VX<m>, - Sched<[WriteVIMulX_MX, ReadVIMulV_MX, ReadVIMulX_MX, ReadVMask]>; + SchedBinary<"WriteVIMulX", "ReadVIMulV", "ReadVIMulX", mx>; } } @@ -2719,38 +2819,26 @@ multiclass VPseudoVDIV_VV_VX { defvar mx = m.MX; defvar sews = SchedSEWSet<mx>.val; foreach e = sews in { - defvar WriteVIDivV_MX_E = !cast<SchedWrite>("WriteVIDivV_" # mx # "_E" # e); - defvar WriteVIDivX_MX_E = !cast<SchedWrite>("WriteVIDivX_" # mx # "_E" # e); - defvar ReadVIDivV_MX_E = !cast<SchedRead>("ReadVIDivV_" # mx # "_E" # e); - defvar ReadVIDivX_MX_E = !cast<SchedRead>("ReadVIDivX_" # mx # "_E" # e); - defm "" : VPseudoBinaryV_VV<m, "", e>, - Sched<[WriteVIDivV_MX_E, ReadVIDivV_MX_E, ReadVIDivV_MX_E, ReadVMask]>; + SchedBinary<"WriteVIDivV", "ReadVIDivV", "ReadVIDivV", mx, e>; defm "" : VPseudoBinaryV_VX<m, "", e>, - Sched<[WriteVIDivX_MX_E, ReadVIDivV_MX_E, ReadVIDivX_MX_E, ReadVMask]>; + SchedBinary<"WriteVIDivX", "ReadVIDivV", "ReadVIDivX", mx, e>; } } } multiclass VPseudoVFMUL_VV_VF_RM { foreach m = MxListF in { - defvar mx = m.MX; - defvar WriteVFMulV_MX = !cast<SchedWrite>("WriteVFMulV_" # mx); - defvar ReadVFMulV_MX = !cast<SchedRead>("ReadVFMulV_" # mx); - defm "" : VPseudoBinaryFV_VV_RM<m>, - Sched<[WriteVFMulV_MX, ReadVFMulV_MX, ReadVFMulV_MX, ReadVMask]>; + SchedBinary<"WriteVFMulV", "ReadVFMulV", "ReadVFMulV", m.MX, + forceMergeOpRead=true>; } foreach f = FPList in { foreach m = f.MxList in { - defvar mx = m.MX; - defvar WriteVFMulF_MX = !cast<SchedWrite>("WriteVFMulF_" # mx); - defvar ReadVFMulV_MX = !cast<SchedRead>("ReadVFMulV_" # mx); - defvar ReadVFMulF_MX = !cast<SchedRead>("ReadVFMulF_" # mx); - defm "" : VPseudoBinaryV_VF_RM<m, f>, - Sched<[WriteVFMulF_MX, ReadVFMulV_MX, ReadVFMulF_MX, ReadVMask]>; + SchedBinary<"WriteVFMulF", "ReadVFMulV", "ReadVFMulF", m.MX, + forceMergeOpRead=true>; } } } @@ -2760,23 +2848,17 @@ multiclass VPseudoVFDIV_VV_VF_RM { defvar mx = m.MX; defvar sews = SchedSEWSet<mx, isF=1>.val; foreach e = sews in { - defvar WriteVFDivV_MX_E = !cast<SchedWrite>("WriteVFDivV_" # mx # "_E" # e); - defvar ReadVFDivV_MX_E = !cast<SchedRead>("ReadVFDivV_" # mx # "_E" # e); - defm "" : VPseudoBinaryFV_VV_RM<m, "", e>, - Sched<[WriteVFDivV_MX_E, ReadVFDivV_MX_E, ReadVFDivV_MX_E, ReadVMask]>; + SchedBinary<"WriteVFDivV", "ReadVFDivV", "ReadVFDivV", mx, e, + forceMergeOpRead=true>; } } foreach f = FPList in { foreach m = f.MxList in { - defvar mx = m.MX; - defvar WriteVFDivF_MX_E = !cast<SchedWrite>("WriteVFDivF_" # mx # "_E" # f.SEW); - defvar ReadVFDivV_MX_E = !cast<SchedRead>("ReadVFDivV_" # mx # "_E" # f.SEW); - defvar ReadVFDivF_MX_E = !cast<SchedRead>("ReadVFDivF_" # mx # "_E" # f.SEW); - defm "" : VPseudoBinaryV_VF_RM<m, f, "", f.SEW>, - Sched<[WriteVFDivF_MX_E, ReadVFDivV_MX_E, ReadVFDivF_MX_E, ReadVMask]>; + SchedBinary<"WriteVFDivF", "ReadVFDivV", "ReadVFDivF", m.MX, f.SEW, + forceMergeOpRead=true>; } } } @@ -2784,118 +2866,84 @@ multiclass VPseudoVFDIV_VV_VF_RM { multiclass VPseudoVFRDIV_VF_RM { foreach f = FPList in { foreach m = f.MxList in { - defvar mx = m.MX; - defvar WriteVFDivF_MX_E = !cast<SchedWrite>("WriteVFDivF_" # mx # "_E" # f.SEW); - defvar ReadVFDivV_MX_E = !cast<SchedRead>("ReadVFDivV_" # mx # "_E" # f.SEW); - defvar ReadVFDivF_MX_E = !cast<SchedRead>("ReadVFDivF_" # mx # "_E" # f.SEW); - defm "" : VPseudoBinaryV_VF_RM<m, f, "", f.SEW>, - Sched<[WriteVFDivF_MX_E, ReadVFDivV_MX_E, ReadVFDivF_MX_E, ReadVMask]>; + SchedBinary<"WriteVFDivF", "ReadVFDivV", "ReadVFDivF", m.MX, f.SEW, + forceMergeOpRead=true>; } } } multiclass VPseudoVALU_VV_VX { foreach m = MxList in { - defvar mx = m.MX; - defvar WriteVIALUV_MX = !cast<SchedWrite>("WriteVIALUV_" # mx); - defvar WriteVIALUX_MX = !cast<SchedWrite>("WriteVIALUV_" # mx); - defvar ReadVIALUV_MX = !cast<SchedRead>("ReadVIALUV_" # mx); - defvar ReadVIALUX_MX = !cast<SchedRead>("ReadVIALUX_" # mx); - defm "" : VPseudoBinaryV_VV<m>, - Sched<[WriteVIALUV_MX, ReadVIALUV_MX, ReadVIALUV_MX, ReadVMask]>; + SchedBinary<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", m.MX, + forceMergeOpRead=true>; defm "" : VPseudoBinaryV_VX<m>, - Sched<[WriteVIALUX_MX, ReadVIALUV_MX, ReadVIALUX_MX, ReadVMask]>; + SchedBinary<"WriteVIALUX", "ReadVIALUV", "ReadVIALUX", m.MX, + forceMergeOpRead=true>; } } multiclass VPseudoVSGNJ_VV_VF { foreach m = MxListF in { - defvar mx = m.MX; - defvar WriteVFSgnjV_MX = !cast<SchedWrite>("WriteVFSgnjV_" # mx); - defvar ReadVFSgnjV_MX = !cast<SchedRead>("ReadVFSgnjV_" # mx); - defm "" : VPseudoBinaryFV_VV<m>, - Sched<[WriteVFSgnjV_MX, ReadVFSgnjV_MX, ReadVFSgnjV_MX, ReadVMask]>; + SchedBinary<"WriteVFSgnjV", "ReadVFSgnjV", "ReadVFSgnjV", m.MX, + forceMergeOpRead=true>; } foreach f = FPList in { foreach m = f.MxList in { - defvar mx = m.MX; - defvar WriteVFSgnjF_MX = !cast<SchedWrite>("WriteVFSgnjF_" # mx); - defvar ReadVFSgnjV_MX = !cast<SchedRead>("ReadVFSgnjV_" # mx); - defvar ReadVFSgnjF_MX = !cast<SchedRead>("ReadVFSgnjF_" # mx); - defm "" : VPseudoBinaryV_VF<m, f>, - Sched<[WriteVFSgnjF_MX, ReadVFSgnjV_MX, ReadVFSgnjF_MX, ReadVMask]>; + SchedBinary<"WriteVFSgnjF", "ReadVFSgnjV", "ReadVFSgnjF", m.MX, + forceMergeOpRead=true>; } } } multiclass VPseudoVMAX_VV_VF { foreach m = MxListF in { - defvar mx = m.MX; - defvar WriteVFMinMaxV_MX = !cast<SchedWrite>("WriteVFMinMaxV_" # mx); - defvar ReadVFMinMaxV_MX = !cast<SchedRead>("ReadVFMinMaxV_" # mx); - defm "" : VPseudoBinaryFV_VV<m>, - Sched<[WriteVFMinMaxV_MX, ReadVFMinMaxV_MX, ReadVFMinMaxV_MX, ReadVMask]>; + SchedBinary<"WriteVFMinMaxV", "ReadVFMinMaxV", "ReadVFMinMaxV", m.MX, + forceMergeOpRead=true>; } foreach f = FPList in { foreach m = f.MxList in { - defvar mx = m.MX; - defvar WriteVFMinMaxF_MX = !cast<SchedWrite>("WriteVFMinMaxF_" # mx); - defvar ReadVFMinMaxV_MX = !cast<SchedRead>("ReadVFMinMaxV_" # mx); - defvar ReadVFMinMaxF_MX = !cast<SchedRead>("ReadVFMinMaxF_" # mx); - defm "" : VPseudoBinaryV_VF<m, f>, - Sched<[WriteVFMinMaxF_MX, ReadVFMinMaxV_MX, ReadVFMinMaxF_MX, ReadVMask]>; + SchedBinary<"WriteVFMinMaxF", "ReadVFMinMaxV", "ReadVFMinMaxF", m.MX, + forceMergeOpRead=true>; } } } multiclass VPseudoVALU_VV_VF { foreach m = MxListF in { - defvar mx = m.MX; - defvar WriteVFALUV_MX = !cast<SchedWrite>("WriteVFALUV_" # mx); - defvar ReadVFALUV_MX = !cast<SchedRead>("ReadVFALUV_" # mx); - defm "" : VPseudoBinaryFV_VV<m>, - Sched<[WriteVFALUV_MX, ReadVFALUV_MX, ReadVFALUV_MX, ReadVMask]>; + SchedBinary<"WriteVFALUV", "ReadVFALUV", "ReadVFALUV", m.MX, + forceMergeOpRead=true>; } foreach f = FPList in { foreach m = f.MxList in { - defvar mx = m.MX; - defvar WriteVFALUF_MX = !cast<SchedWrite>("WriteVFALUF_" # mx); - defvar ReadVFALUV_MX = !cast<SchedRead>("ReadVFALUV_" # mx); - defvar ReadVFALUF_MX = !cast<SchedRead>("ReadVFALUF_" # mx); defm "" : VPseudoBinaryV_VF<m, f>, - Sched<[WriteVFALUF_MX, ReadVFALUV_MX, ReadVFALUF_MX, ReadVMask]>; + SchedBinary<"WriteVFALUF", "ReadVFALUV", "ReadVFALUF", m.MX, + forceMergeOpRead=true>; } } } multiclass VPseudoVALU_VV_VF_RM { foreach m = MxListF in { - defvar mx = m.MX; - defvar WriteVFALUV_MX = !cast<SchedWrite>("WriteVFALUV_" # mx); - defvar ReadVFALUV_MX = !cast<SchedRead>("ReadVFALUV_" # mx); - defm "" : VPseudoBinaryFV_VV_RM<m>, - Sched<[WriteVFALUV_MX, ReadVFALUV_MX, ReadVFALUV_MX, ReadVMask]>; + SchedBinary<"WriteVFALUV", "ReadVFALUV", "ReadVFALUV", m.MX, + forceMergeOpRead=true>; } foreach f = FPList in { foreach m = f.MxList in { - defvar mx = m.MX; - defvar WriteVFALUF_MX = !cast<SchedWrite>("WriteVFALUF_" # mx); - defvar ReadVFALUV_MX = !cast<SchedRead>("ReadVFALUV_" # mx); - defvar ReadVFALUF_MX = !cast<SchedRead>("ReadVFALUF_" # mx); defm "" : VPseudoBinaryV_VF_RM<m, f>, - Sched<[WriteVFALUF_MX, ReadVFALUV_MX, ReadVFALUF_MX, ReadVMask]>; + SchedBinary<"WriteVFALUF", "ReadVFALUV", "ReadVFALUF", m.MX, + forceMergeOpRead=true>; } } } @@ -2903,13 +2951,9 @@ multiclass VPseudoVALU_VV_VF_RM { multiclass VPseudoVALU_VF { foreach f = FPList in { foreach m = f.MxList in { - defvar mx = m.MX; - defvar WriteVFALUF_MX = !cast<SchedWrite>("WriteVFALUF_" # mx); - defvar ReadVFALUV_MX = !cast<SchedRead>("ReadVFALUV_" # mx); - defvar ReadVFALUF_MX = !cast<SchedRead>("ReadVFALUF_" # mx); - defm "" : VPseudoBinaryV_VF<m, f>, - Sched<[WriteVFALUF_MX, ReadVFALUV_MX, ReadVFALUF_MX, ReadVMask]>; + SchedBinary<"WriteVFALUF", "ReadVFALUV", "ReadVFALUF", m.MX, + forceMergeOpRead=true>; } } } @@ -2917,13 +2961,9 @@ multiclass VPseudoVALU_VF { multiclass VPseudoVALU_VF_RM { foreach f = FPList in { foreach m = f.MxList in { - defvar mx = m.MX; - defvar WriteVFALUF_MX = !cast<SchedWrite>("WriteVFALUF_" # mx); - defvar ReadVFALUV_MX = !cast<SchedRead>("ReadVFALUV_" # mx); - defvar ReadVFALUF_MX = !cast<SchedRead>("ReadVFALUF_" # mx); - defm "" : VPseudoBinaryV_VF_RM<m, f>, - Sched<[WriteVFALUF_MX, ReadVFALUV_MX, ReadVFALUF_MX, ReadVMask]>; + SchedBinary<"WriteVFALUF", "ReadVFALUV", "ReadVFALUF", m.MX, + forceMergeOpRead=true>; } } } @@ -2931,67 +2971,56 @@ multiclass VPseudoVALU_VF_RM { multiclass VPseudoVALU_VX_VI<Operand ImmType = simm5> { foreach m = MxList in { defvar mx = m.MX; - defvar WriteVIALUX_MX = !cast<SchedWrite>("WriteVIALUX_" # mx); - defvar WriteVIALUI_MX = !cast<SchedWrite>("WriteVIALUI_" # mx); - defvar ReadVIALUV_MX = !cast<SchedRead>("ReadVIALUV_" # mx); - defvar ReadVIALUX_MX = !cast<SchedRead>("ReadVIALUX_" # mx); - defm "" : VPseudoBinaryV_VX<m>, - Sched<[WriteVIALUX_MX, ReadVIALUV_MX, ReadVIALUX_MX, ReadVMask]>; + SchedBinary<"WriteVIALUX", "ReadVIALUV", "ReadVIALUX", mx, + forceMergeOpRead=true>; defm "" : VPseudoBinaryV_VI<ImmType, m>, - Sched<[WriteVIALUI_MX, ReadVIALUV_MX, ReadVMask]>; + SchedUnary<"WriteVIALUI", "ReadVIALUV", mx, forceMergeOpRead=true>; } } multiclass VPseudoVWALU_VV_VX { foreach m = MxListW in { defvar mx = m.MX; - defvar WriteVIWALUV_MX = !cast<SchedWrite>("WriteVIWALUV_" # mx); - defvar WriteVIWALUX_MX = !cast<SchedWrite>("WriteVIWALUX_" # mx); - defvar ReadVIWALUV_MX = !cast<SchedRead>("ReadVIWALUV_" # mx); - defvar ReadVIWALUX_MX = !cast<SchedRead>("ReadVIWALUX_" # mx); - defm "" : VPseudoBinaryW_VV<m>, - Sched<[WriteVIWALUV_MX, ReadVIWALUV_MX, ReadVIWALUV_MX, ReadVMask]>; - defm "" : VPseudoBinaryW_VX<m>, - Sched<[WriteVIWALUX_MX, ReadVIWALUV_MX, ReadVIWALUX_MX, ReadVMask]>; + SchedBinary<"WriteVIWALUV", "ReadVIWALUV", "ReadVIWALUV", mx, + forceMergeOpRead=true>; + defm "" : VPseudoBinaryW_VX<m>, + SchedBinary<"WriteVIWALUX", "ReadVIWALUV", "ReadVIWALUX", mx, + forceMergeOpRead=true>; + } +} + +multiclass VPseudoVWALU_VV_VX_VI<Operand ImmType> : VPseudoVWALU_VV_VX { + foreach m = MxListW in { + defm "" : VPseudoBinaryW_VI<ImmType, m>; } } multiclass VPseudoVWMUL_VV_VX { foreach m = MxListW in { defvar mx = m.MX; - defvar WriteVIWMulV_MX = !cast<SchedWrite>("WriteVIWMulV_" # mx); - defvar WriteVIWMulX_MX = !cast<SchedWrite>("WriteVIWMulX_" # mx); - defvar ReadVIWMulV_MX = !cast<SchedRead>("ReadVIWMulV_" # mx); - defvar ReadVIWMulX_MX = !cast<SchedRead>("ReadVIWMulX_" # mx); - defm "" : VPseudoBinaryW_VV<m>, - Sched<[WriteVIWMulV_MX, ReadVIWMulV_MX, ReadVIWMulV_MX, ReadVMask]>; + SchedBinary<"WriteVIWMulV", "ReadVIWMulV", "ReadVIWMulV", mx, + forceMergeOpRead=true>; defm "" : VPseudoBinaryW_VX<m>, - Sched<[WriteVIWMulX_MX, ReadVIWMulV_MX, ReadVIWMulX_MX, ReadVMask]>; + SchedBinary<"WriteVIWMulX", "ReadVIWMulV", "ReadVIWMulX", mx, + forceMergeOpRead=true>; } } multiclass VPseudoVWMUL_VV_VF_RM { foreach m = MxListFW in { - defvar mx = m.MX; - defvar WriteVFWMulV_MX = !cast<SchedWrite>("WriteVFWMulV_" # mx); - defvar ReadVFWMulV_MX = !cast<SchedRead>("ReadVFWMulV_" # mx); - defm "" : VPseudoBinaryW_VV_RM<m>, - Sched<[WriteVFWMulV_MX, ReadVFWMulV_MX, ReadVFWMulV_MX, ReadVMask]>; + SchedBinary<"WriteVFWMulV", "ReadVFWMulV", "ReadVFWMulV", m.MX, + forceMergeOpRead=true>; } foreach f = FPListW in { foreach m = f.MxListFW in { - defvar mx = m.MX; - defvar WriteVFWMulF_MX = !cast<SchedWrite>("WriteVFWMulF_" # mx); - defvar ReadVFWMulV_MX = !cast<SchedRead>("ReadVFWMulV_" # mx); - defvar ReadVFWMulF_MX = !cast<SchedRead>("ReadVFWMulF_" # mx); - defm "" : VPseudoBinaryW_VF_RM<m, f>, - Sched<[WriteVFWMulF_MX, ReadVFWMulV_MX, ReadVFWMulF_MX, ReadVMask]>; + SchedBinary<"WriteVFWMulF", "ReadVFWMulV", "ReadVFWMulF", m.MX, + forceMergeOpRead=true>; } } } @@ -2999,59 +3028,42 @@ multiclass VPseudoVWMUL_VV_VF_RM { multiclass VPseudoVWALU_WV_WX { foreach m = MxListW in { defvar mx = m.MX; - defvar WriteVIWALUV_MX = !cast<SchedWrite>("WriteVIWALUV_" # mx); - defvar WriteVIWALUX_MX = !cast<SchedWrite>("WriteVIWALUX_" # mx); - defvar ReadVIWALUV_MX = !cast<SchedRead>("ReadVIWALUV_" # mx); - defvar ReadVIWALUX_MX = !cast<SchedRead>("ReadVIWALUX_" # mx); - defm "" : VPseudoBinaryW_WV<m>, - Sched<[WriteVIWALUV_MX, ReadVIWALUV_MX, ReadVIWALUV_MX, ReadVMask]>; + SchedBinary<"WriteVIWALUV", "ReadVIWALUV", "ReadVIWALUV", mx, + forceMergeOpRead=true>; defm "" : VPseudoBinaryW_WX<m>, - Sched<[WriteVIWALUX_MX, ReadVIWALUV_MX, ReadVIWALUX_MX, ReadVMask]>; + SchedBinary<"WriteVIWALUX", "ReadVIWALUV", "ReadVIWALUX", mx, + forceMergeOpRead=true>; } } multiclass VPseudoVFWALU_VV_VF_RM { foreach m = MxListFW in { - defvar mx = m.MX; - defvar WriteVFWALUV_MX = !cast<SchedWrite>("WriteVFWALUV_" # mx); - defvar ReadVFWALUV_MX = !cast<SchedRead>("ReadVFWALUV_" # mx); - defm "" : VPseudoBinaryW_VV_RM<m>, - Sched<[WriteVFWALUV_MX, ReadVFWALUV_MX, ReadVFWALUV_MX, ReadVMask]>; + SchedBinary<"WriteVFWALUV", "ReadVFWALUV", "ReadVFWALUV", m.MX, + forceMergeOpRead=true>; } foreach f = FPListW in { foreach m = f.MxListFW in { - defvar mx = m.MX; - defvar WriteVFWALUF_MX = !cast<SchedWrite>("WriteVFWALUF_" # mx); - defvar ReadVFWALUV_MX = !cast<SchedRead>("ReadVFWALUV_" # mx); - defvar ReadVFWALUF_MX = !cast<SchedRead>("ReadVFWALUF_" # mx); - defm "" : VPseudoBinaryW_VF_RM<m, f>, - Sched<[WriteVFWALUF_MX, ReadVFWALUV_MX, ReadVFWALUF_MX, ReadVMask]>; + SchedBinary<"WriteVFWALUF", "ReadVFWALUV", "ReadVFWALUF", m.MX, + forceMergeOpRead=true>; } } } multiclass VPseudoVFWALU_WV_WF_RM { foreach m = MxListFW in { - defvar mx = m.MX; - defvar WriteVFWALUV_MX = !cast<SchedWrite>("WriteVFWALUV_" # mx); - defvar ReadVFWALUV_MX = !cast<SchedRead>("ReadVFWALUV_" # mx); - defm "" : VPseudoBinaryW_WV_RM<m>, - Sched<[WriteVFWALUV_MX, ReadVFWALUV_MX, ReadVFWALUV_MX, ReadVMask]>; + SchedBinary<"WriteVFWALUV", "ReadVFWALUV", "ReadVFWALUV", m.MX, + forceMergeOpRead=true>; } foreach f = FPListW in { foreach m = f.MxListFW in { - defvar mx = m.MX; - defvar WriteVFWALUF_MX = !cast<SchedWrite>("WriteVFWALUF_" # mx); - defvar ReadVFWALUV_MX = !cast<SchedRead>("ReadVFWALUV_" # mx); - defvar ReadVFWALUF_MX = !cast<SchedRead>("ReadVFWALUF_" # mx); - defm "" : VPseudoBinaryW_WF_RM<m, f>, - Sched<[WriteVFWALUF_MX, ReadVFWALUV_MX, ReadVFWALUF_MX, ReadVMask]>; + SchedBinary<"WriteVFWALUF", "ReadVFWALUV", "ReadVFWALUF", m.MX, + forceMergeOpRead=true>; } } } @@ -3059,159 +3071,134 @@ multiclass VPseudoVFWALU_WV_WF_RM { multiclass VPseudoVMRG_VM_XM_IM { foreach m = MxList in { defvar mx = m.MX; - defvar WriteVIMergeV_MX = !cast<SchedWrite>("WriteVIMergeV_" # mx); - defvar WriteVIMergeX_MX = !cast<SchedWrite>("WriteVIMergeX_" # mx); - defvar WriteVIMergeI_MX = !cast<SchedWrite>("WriteVIMergeI_" # mx); - defvar ReadVIMergeV_MX = !cast<SchedRead>("ReadVIMergeV_" # mx); - defvar ReadVIMergeX_MX = !cast<SchedRead>("ReadVIMergeX_" # mx); - def "_VVM" # "_" # m.MX: VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R, m.vrclass, m.vrclass, m, 1, "">, - Sched<[WriteVIMergeV_MX, ReadVIMergeV_MX, ReadVIMergeV_MX, ReadVMask]>; + SchedBinary<"WriteVIMergeV", "ReadVIMergeV", "ReadVIMergeV", mx, + forceMergeOpRead=true>; def "_VXM" # "_" # m.MX: VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R, m.vrclass, GPR, m, 1, "">, - Sched<[WriteVIMergeX_MX, ReadVIMergeV_MX, ReadVIMergeX_MX, ReadVMask]>; + SchedBinary<"WriteVIMergeX", "ReadVIMergeV", "ReadVIMergeX", mx, + forceMergeOpRead=true>; def "_VIM" # "_" # m.MX: VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R, m.vrclass, simm5, m, 1, "">, - Sched<[WriteVIMergeI_MX, ReadVIMergeV_MX, ReadVMask]>; + SchedUnary<"WriteVIMergeI", "ReadVIMergeV", mx, + forceMergeOpRead=true>; } } multiclass VPseudoVCALU_VM_XM_IM { foreach m = MxList in { defvar mx = m.MX; - defvar WriteVICALUV_MX = !cast<SchedWrite>("WriteVICALUV_" # mx); - defvar WriteVICALUX_MX = !cast<SchedWrite>("WriteVICALUX_" # mx); - defvar WriteVICALUI_MX = !cast<SchedWrite>("WriteVICALUI_" # mx); - defvar ReadVICALUV_MX = !cast<SchedRead>("ReadVICALUV_" # mx); - defvar ReadVICALUX_MX = !cast<SchedRead>("ReadVICALUX_" # mx); - defm "" : VPseudoTiedBinaryV_VM<m>, - Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX, ReadVMask]>; + SchedBinary<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV", mx, + forceMergeOpRead=true>; defm "" : VPseudoTiedBinaryV_XM<m>, - Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX, ReadVMask]>; + SchedBinary<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX", mx, + forceMergeOpRead=true>; defm "" : VPseudoTiedBinaryV_IM<m>, - Sched<[WriteVICALUI_MX, ReadVICALUV_MX, ReadVMask]>; + SchedUnary<"WriteVICALUI", "ReadVICALUV", mx, + forceMergeOpRead=true>; } } multiclass VPseudoVCALU_VM_XM { foreach m = MxList in { defvar mx = m.MX; - defvar WriteVICALUV_MX = !cast<SchedWrite>("WriteVICALUV_" # mx); - defvar WriteVICALUX_MX = !cast<SchedWrite>("WriteVICALUX_" # mx); - defvar ReadVICALUV_MX = !cast<SchedRead>("ReadVICALUV_" # mx); - defvar ReadVICALUX_MX = !cast<SchedRead>("ReadVICALUX_" # mx); - defm "" : VPseudoTiedBinaryV_VM<m>, - Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX, ReadVMask]>; + SchedBinary<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV", mx, + forceMergeOpRead=true>; defm "" : VPseudoTiedBinaryV_XM<m>, - Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX, ReadVMask]>; + SchedBinary<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX", mx, + forceMergeOpRead=true>; } } multiclass VPseudoVCALUM_VM_XM_IM<string Constraint> { foreach m = MxList in { defvar mx = m.MX; - defvar WriteVICALUV_MX = !cast<SchedWrite>("WriteVICALUV_" # mx); - defvar WriteVICALUX_MX = !cast<SchedWrite>("WriteVICALUX_" # mx); - defvar WriteVICALUI_MX = !cast<SchedWrite>("WriteVICALUI_" # mx); - defvar ReadVICALUV_MX = !cast<SchedRead>("ReadVICALUV_" # mx); - defvar ReadVICALUX_MX = !cast<SchedRead>("ReadVICALUX_" # mx); - - defm "" : VPseudoBinaryV_VM<m, CarryOut=1, CarryIn=1, Constraint=Constraint>, - Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX, ReadVMask]>; - defm "" : VPseudoBinaryV_XM<m, CarryOut=1, CarryIn=1, Constraint=Constraint>, - Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX, ReadVMask]>; - defm "" : VPseudoBinaryV_IM<m, CarryOut=1, CarryIn=1, Constraint=Constraint>, - Sched<[WriteVICALUI_MX, ReadVICALUV_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_VM<m, CarryOut=1, CarryIn=1, Constraint=Constraint, + Commutable=1, TargetConstraintType=2>, + SchedBinary<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV", mx, forceMasked=1, + forceMergeOpRead=true>; + defm "" : VPseudoBinaryV_XM<m, CarryOut=1, CarryIn=1, Constraint=Constraint, TargetConstraintType=2>, + SchedBinary<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX", mx, forceMasked=1, + forceMergeOpRead=true>; + defm "" : VPseudoBinaryV_IM<m, CarryOut=1, CarryIn=1, Constraint=Constraint, TargetConstraintType=2>, + SchedUnary<"WriteVICALUI", "ReadVICALUV", mx, forceMasked=1, + forceMergeOpRead=true>; } } multiclass VPseudoVCALUM_VM_XM<string Constraint> { foreach m = MxList in { defvar mx = m.MX; - defvar WriteVICALUV_MX = !cast<SchedWrite>("WriteVICALUV_" # mx); - defvar WriteVICALUX_MX = !cast<SchedWrite>("WriteVICALUX_" # mx); - defvar ReadVICALUV_MX = !cast<SchedRead>("ReadVICALUV_" # mx); - defvar ReadVICALUX_MX = !cast<SchedRead>("ReadVICALUX_" # mx); - - defm "" : VPseudoBinaryV_VM<m, CarryOut=1, CarryIn=1, Constraint=Constraint>, - Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX, ReadVMask]>; - defm "" : VPseudoBinaryV_XM<m, CarryOut=1, CarryIn=1, Constraint=Constraint>, - Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_VM<m, CarryOut=1, CarryIn=1, Constraint=Constraint, TargetConstraintType=2>, + SchedBinary<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV", mx, forceMasked=1, + forceMergeOpRead=true>; + defm "" : VPseudoBinaryV_XM<m, CarryOut=1, CarryIn=1, Constraint=Constraint, TargetConstraintType=2>, + SchedBinary<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX", mx, forceMasked=1, + forceMergeOpRead=true>; } } multiclass VPseudoVCALUM_V_X_I<string Constraint> { foreach m = MxList in { defvar mx = m.MX; - defvar WriteVICALUV_MX = !cast<SchedWrite>("WriteVICALUV_" # mx); - defvar WriteVICALUX_MX = !cast<SchedWrite>("WriteVICALUX_" # mx); - defvar WriteVICALUI_MX = !cast<SchedWrite>("WriteVICALUI_" # mx); - defvar ReadVICALUV_MX = !cast<SchedRead>("ReadVICALUV_" # mx); - defvar ReadVICALUX_MX = !cast<SchedRead>("ReadVICALUX_" # mx); - - defm "" : VPseudoBinaryV_VM<m, CarryOut=1, CarryIn=0, Constraint=Constraint>, - Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX]>; - defm "" : VPseudoBinaryV_XM<m, CarryOut=1, CarryIn=0, Constraint=Constraint>, - Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX]>; + defm "" : VPseudoBinaryV_VM<m, CarryOut=1, CarryIn=0, Constraint=Constraint, + Commutable=1, TargetConstraintType=2>, + SchedBinary<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV", mx, + forceMergeOpRead=true>; + defm "" : VPseudoBinaryV_XM<m, CarryOut=1, CarryIn=0, Constraint=Constraint, TargetConstraintType=2>, + SchedBinary<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX", mx, + forceMergeOpRead=true>; defm "" : VPseudoBinaryV_IM<m, CarryOut=1, CarryIn=0, Constraint=Constraint>, - Sched<[WriteVICALUI_MX, ReadVICALUV_MX]>; + SchedUnary<"WriteVICALUI", "ReadVICALUV", mx, + forceMergeOpRead=true>; } } multiclass VPseudoVCALUM_V_X<string Constraint> { foreach m = MxList in { defvar mx = m.MX; - defvar WriteVICALUV_MX = !cast<SchedWrite>("WriteVICALUV_" # mx); - defvar WriteVICALUX_MX = !cast<SchedWrite>("WriteVICALUX_" # mx); - defvar ReadVICALUV_MX = !cast<SchedRead>("ReadVICALUV_" # mx); - defvar ReadVICALUX_MX = !cast<SchedRead>("ReadVICALUX_" # mx); - - defm "" : VPseudoBinaryV_VM<m, CarryOut=1, CarryIn=0, Constraint=Constraint>, - Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX]>; - defm "" : VPseudoBinaryV_XM<m, CarryOut=1, CarryIn=0, Constraint=Constraint>, - Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX]>; + defm "" : VPseudoBinaryV_VM<m, CarryOut=1, CarryIn=0, Constraint=Constraint, TargetConstraintType=2>, + SchedBinary<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV", mx, + forceMergeOpRead=true>; + defm "" : VPseudoBinaryV_XM<m, CarryOut=1, CarryIn=0, Constraint=Constraint, TargetConstraintType=2>, + SchedBinary<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX", mx, + forceMergeOpRead=true>; } } multiclass VPseudoVNCLP_WV_WX_WI_RM { foreach m = MxListW in { defvar mx = m.MX; - defvar WriteVNClipV_MX = !cast<SchedWrite>("WriteVNClipV_" # mx); - defvar WriteVNClipX_MX = !cast<SchedWrite>("WriteVNClipX_" # mx); - defvar WriteVNClipI_MX = !cast<SchedWrite>("WriteVNClipI_" # mx); - defvar ReadVNClipV_MX = !cast<SchedRead>("ReadVNClipV_" # mx); - defvar ReadVNClipX_MX = !cast<SchedRead>("ReadVNClipX_" # mx); - defm "" : VPseudoBinaryV_WV_RM<m>, - Sched<[WriteVNClipV_MX, ReadVNClipV_MX, ReadVNClipV_MX, ReadVMask]>; + SchedBinary<"WriteVNClipV", "ReadVNClipV", "ReadVNClipV", mx, + forceMergeOpRead=true>; defm "" : VPseudoBinaryV_WX_RM<m>, - Sched<[WriteVNClipX_MX, ReadVNClipV_MX, ReadVNClipX_MX, ReadVMask]>; + SchedBinary<"WriteVNClipX", "ReadVNClipV", "ReadVNClipX", mx, + forceMergeOpRead=true>; defm "" : VPseudoBinaryV_WI_RM<m>, - Sched<[WriteVNClipI_MX, ReadVNClipV_MX, ReadVMask]>; + SchedUnary<"WriteVNClipI", "ReadVNClipV", mx, + forceMergeOpRead=true>; } } multiclass VPseudoVNSHT_WV_WX_WI { foreach m = MxListW in { defvar mx = m.MX; - defvar WriteVNShiftV_MX = !cast<SchedWrite>("WriteVNShiftV_" # mx); - defvar WriteVNShiftX_MX = !cast<SchedWrite>("WriteVNShiftX_" # mx); - defvar WriteVNShiftI_MX = !cast<SchedWrite>("WriteVNShiftI_" # mx); - defvar ReadVNShiftV_MX = !cast<SchedRead>("ReadVNShiftV_" # mx); - defvar ReadVNShiftX_MX = !cast<SchedRead>("ReadVNShiftX_" # mx); - - defm "" : VPseudoBinaryV_WV<m>, - Sched<[WriteVNShiftV_MX, ReadVNShiftV_MX, ReadVNShiftV_MX, ReadVMask]>; - defm "" : VPseudoBinaryV_WX<m>, - Sched<[WriteVNShiftX_MX, ReadVNShiftV_MX, ReadVNShiftX_MX, ReadVMask]>; - defm "" : VPseudoBinaryV_WI<m>, - Sched<[WriteVNShiftI_MX, ReadVNShiftV_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_WV<m, TargetConstraintType=2>, + SchedBinary<"WriteVNShiftV", "ReadVNShiftV", "ReadVNShiftV", mx, + forceMergeOpRead=true>; + defm "" : VPseudoBinaryV_WX<m, TargetConstraintType=2>, + SchedBinary<"WriteVNShiftX", "ReadVNShiftV", "ReadVNShiftX", mx, + forceMergeOpRead=true>; + defm "" : VPseudoBinaryV_WI<m, TargetConstraintType=2>, + SchedUnary<"WriteVNShiftI", "ReadVNShiftV", mx, + forceMergeOpRead=true>; } } @@ -3222,11 +3209,12 @@ multiclass VPseudoTernaryWithTailPolicy<VReg RetClass, int sew, string Constraint = "", bit Commutable = 0> { - let VLMul = MInfo.value in { + let VLMul = MInfo.value, SEW=sew in { defvar mx = MInfo.MX; let isCommutable = Commutable in def "_" # mx # "_E" # sew : VPseudoTernaryNoMaskWithPolicy<RetClass, Op1Class, Op2Class, Constraint>; - def "_" # mx # "_E" # sew # "_MASK" : VPseudoTernaryMaskPolicy<RetClass, Op1Class, Op2Class, Constraint>; + def "_" # mx # "_E" # sew # "_MASK" : VPseudoTernaryMaskPolicy<RetClass, Op1Class, Op2Class, Constraint>, + RISCVMaskedPseudo<MaskIdx=3, MaskAffectsRes=true>; } } @@ -3237,15 +3225,16 @@ multiclass VPseudoTernaryWithTailPolicyRoundingMode<VReg RetClass, int sew, string Constraint = "", bit Commutable = 0> { - let VLMul = MInfo.value in { + let VLMul = MInfo.value, SEW=sew in { defvar mx = MInfo.MX; let isCommutable = Commutable in def "_" # mx # "_E" # sew - : VPseudoTernaryNoMaskWithPolicyRoundingMode<RetClass, Op1Class, + : VPseudoTernaryNoMaskWithPolicyRoundingMode<RetClass, Op1Class, Op2Class, Constraint>; def "_" # mx # "_E" # sew # "_MASK" : VPseudoTernaryMaskPolicyRoundingMode<RetClass, Op1Class, - Op2Class, Constraint>; + Op2Class, Constraint>, + RISCVMaskedPseudo<MaskIdx=3, MaskAffectsRes=true>; } } @@ -3254,11 +3243,12 @@ multiclass VPseudoTernaryWithPolicy<VReg RetClass, DAGOperand Op2Class, LMULInfo MInfo, string Constraint = "", - bit Commutable = 0> { + bit Commutable = 0, + int TargetConstraintType = 1> { let VLMul = MInfo.value in { let isCommutable = Commutable in - def "_" # MInfo.MX : VPseudoTernaryNoMaskWithPolicy<RetClass, Op1Class, Op2Class, Constraint>; - def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMaskPolicy<RetClass, Op1Class, Op2Class, Constraint>, + def "_" # MInfo.MX : VPseudoTernaryNoMaskWithPolicy<RetClass, Op1Class, Op2Class, Constraint, TargetConstraintType>; + def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMaskPolicy<RetClass, Op1Class, Op2Class, Constraint, TargetConstraintType>, RISCVMaskedPseudo<MaskIdx=3>; } } @@ -3268,16 +3258,19 @@ multiclass VPseudoTernaryWithPolicyRoundingMode<VReg RetClass, DAGOperand Op2Class, LMULInfo MInfo, string Constraint = "", - bit Commutable = 0> { + bit Commutable = 0, + int TargetConstraintType = 1> { let VLMul = MInfo.value in { let isCommutable = Commutable in def "_" # MInfo.MX : VPseudoTernaryNoMaskWithPolicyRoundingMode<RetClass, Op1Class, - Op2Class, Constraint>; + Op2Class, Constraint, + TargetConstraintType>; def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMaskPolicyRoundingMode<RetClass, Op1Class, Op2Class, Constraint, - UsesVXRM_=0>, + UsesVXRM_=0, + TargetConstraintType=TargetConstraintType>, RISCVMaskedPseudo<MaskIdx=3>; } } @@ -3312,31 +3305,34 @@ multiclass VPseudoTernaryV_VF_AAXA_RM<LMULInfo m, FPR_Info f, string Constraint multiclass VPseudoTernaryW_VV<LMULInfo m> { defvar constraint = "@earlyclobber $rd"; defm _VV : VPseudoTernaryWithPolicy<m.wvrclass, m.vrclass, m.vrclass, m, - constraint>; + constraint, /*Commutable*/ 0, TargetConstraintType=3>; } multiclass VPseudoTernaryW_VV_RM<LMULInfo m> { defvar constraint = "@earlyclobber $rd"; defm _VV : VPseudoTernaryWithPolicyRoundingMode<m.wvrclass, m.vrclass, m.vrclass, m, - constraint>; + constraint, /* Commutable */ 0, + TargetConstraintType=3>; } multiclass VPseudoTernaryW_VX<LMULInfo m> { defvar constraint = "@earlyclobber $rd"; defm "_VX" : VPseudoTernaryWithPolicy<m.wvrclass, GPR, m.vrclass, m, - constraint>; + constraint, /*Commutable*/ 0, TargetConstraintType=3>; } -multiclass VPseudoTernaryW_VF<LMULInfo m, FPR_Info f> { +multiclass VPseudoTernaryW_VF<LMULInfo m, FPR_Info f, int TargetConstraintType = 1> { defvar constraint = "@earlyclobber $rd"; defm "_V" # f.FX : VPseudoTernaryWithPolicy<m.wvrclass, f.fprclass, - m.vrclass, m, constraint>; + m.vrclass, m, constraint, /*Commutable*/ 0, TargetConstraintType>; } multiclass VPseudoTernaryW_VF_RM<LMULInfo m, FPR_Info f> { defvar constraint = "@earlyclobber $rd"; defm "_V" # f.FX : VPseudoTernaryWithPolicyRoundingMode<m.wvrclass, f.fprclass, - m.vrclass, m, constraint>; + m.vrclass, m, constraint, + /* Commutable */ 0, + TargetConstraintType=3>; } multiclass VPseudoVSLDVWithPolicy<VReg RetClass, @@ -3362,62 +3358,43 @@ multiclass VPseudoVSLDV_VI<Operand ImmType = simm5, LMULInfo m, string Constrain multiclass VPseudoVMAC_VV_VX_AAXA<string Constraint = ""> { foreach m = MxList in { defvar mx = m.MX; - defvar WriteVIMulAddV_MX = !cast<SchedWrite>("WriteVIMulAddV_" # mx); - defvar WriteVIMulAddX_MX = !cast<SchedWrite>("WriteVIMulAddX_" # mx); - defvar ReadVIMulAddV_MX = !cast<SchedRead>("ReadVIMulAddV_" # mx); - defvar ReadVIMulAddX_MX = !cast<SchedRead>("ReadVIMulAddX_" # mx); - defm "" : VPseudoTernaryV_VV_AAXA<m, Constraint>, - Sched<[WriteVIMulAddV_MX, ReadVIMulAddV_MX, ReadVIMulAddV_MX, - ReadVIMulAddV_MX, ReadVMask]>; + SchedTernary<"WriteVIMulAddV", "ReadVIMulAddV", "ReadVIMulAddV", + "ReadVIMulAddV", mx>; defm "" : VPseudoTernaryV_VX_AAXA<m, Constraint>, - Sched<[WriteVIMulAddX_MX, ReadVIMulAddV_MX, ReadVIMulAddV_MX, - ReadVIMulAddX_MX, ReadVMask]>; + SchedTernary<"WriteVIMulAddX", "ReadVIMulAddV", "ReadVIMulAddX", + "ReadVIMulAddV", mx>; } } multiclass VPseudoVMAC_VV_VF_AAXA<string Constraint = ""> { foreach m = MxListF in { - defvar mx = m.MX; - defvar WriteVFMulAddV_MX = !cast<SchedWrite>("WriteVFMulAddV_" # mx); - defvar ReadVFMulAddV_MX = !cast<SchedRead>("ReadVFMulAddV_" # mx); - defm "" : VPseudoTernaryV_VV_AAXA<m, Constraint>, - Sched<[WriteVFMulAddV_MX, ReadVFMulAddV_MX, ReadVFMulAddV_MX, ReadVFMulAddV_MX, ReadVMask]>; + SchedTernary<"WriteVFMulAddV", "ReadVFMulAddV", "ReadVFMulAddV", + "ReadVFMulAddV", m.MX>; } foreach f = FPList in { foreach m = f.MxList in { - defvar mx = m.MX; - defvar WriteVFMulAddF_MX = !cast<SchedWrite>("WriteVFMulAddF_" # mx); - defvar ReadVFMulAddV_MX = !cast<SchedRead>("ReadVFMulAddV_" # mx); - defvar ReadVFMulAddF_MX = !cast<SchedRead>("ReadVFMulAddF_" # mx); - defm "" : VPseudoTernaryV_VF_AAXA<m, f, Constraint>, - Sched<[WriteVFMulAddF_MX, ReadVFMulAddV_MX, ReadVFMulAddV_MX, ReadVFMulAddF_MX, ReadVMask]>; + SchedTernary<"WriteVFMulAddF", "ReadVFMulAddV", "ReadVFMulAddF", + "ReadVFMulAddV", m.MX>; } } } multiclass VPseudoVMAC_VV_VF_AAXA_RM<string Constraint = ""> { foreach m = MxListF in { - defvar mx = m.MX; - defvar WriteVFMulAddV_MX = !cast<SchedWrite>("WriteVFMulAddV_" # mx); - defvar ReadVFMulAddV_MX = !cast<SchedRead>("ReadVFMulAddV_" # mx); - defm "" : VPseudoTernaryV_VV_AAXA_RM<m, Constraint>, - Sched<[WriteVFMulAddV_MX, ReadVFMulAddV_MX, ReadVFMulAddV_MX, ReadVFMulAddV_MX, ReadVMask]>; + SchedTernary<"WriteVFMulAddV", "ReadVFMulAddV", "ReadVFMulAddV", + "ReadVFMulAddV", m.MX>; } foreach f = FPList in { foreach m = f.MxList in { - defvar mx = m.MX; - defvar WriteVFMulAddF_MX = !cast<SchedWrite>("WriteVFMulAddF_" # mx); - defvar ReadVFMulAddV_MX = !cast<SchedRead>("ReadVFMulAddV_" # mx); - defvar ReadVFMulAddF_MX = !cast<SchedRead>("ReadVFMulAddF_" # mx); - defm "" : VPseudoTernaryV_VF_AAXA_RM<m, f, Constraint>, - Sched<[WriteVFMulAddF_MX, ReadVFMulAddV_MX, ReadVFMulAddV_MX, ReadVFMulAddF_MX, ReadVMask]>; + SchedTernary<"WriteVFMulAddF", "ReadVFMulAddV", "ReadVFMulAddF", + "ReadVFMulAddV", m.MX>; } } } @@ -3425,70 +3402,64 @@ multiclass VPseudoVMAC_VV_VF_AAXA_RM<string Constraint = ""> { multiclass VPseudoVSLD_VX_VI<Operand ImmType = simm5, string Constraint = ""> { foreach m = MxList in { defvar mx = m.MX; - defvar WriteVISlideX_MX = !cast<SchedWrite>("WriteVISlideX_" # mx); - defvar WriteVISlideI_MX = !cast<SchedWrite>("WriteVISlideI_" # mx); - defvar ReadVISlideV_MX = !cast<SchedRead>("ReadVISlideV_" # mx); - defvar ReadVISlideX_MX = !cast<SchedRead>("ReadVISlideX_" # mx); - defm "" : VPseudoVSLDV_VX<m, Constraint>, - Sched<[WriteVISlideX_MX, ReadVISlideV_MX, ReadVISlideV_MX, - ReadVISlideX_MX, ReadVMask]>; + SchedTernary<"WriteVISlideX", "ReadVISlideV", "ReadVISlideV", + "ReadVISlideX", mx>; defm "" : VPseudoVSLDV_VI<ImmType, m, Constraint>, - Sched<[WriteVISlideI_MX, ReadVISlideV_MX, ReadVISlideV_MX, ReadVMask]>; + SchedBinary<"WriteVISlideI", "ReadVISlideV", "ReadVISlideV", mx>; } } multiclass VPseudoVWMAC_VV_VX { foreach m = MxListW in { defvar mx = m.MX; - defvar WriteVIWMulAddV_MX = !cast<SchedWrite>("WriteVIWMulAddV_" # mx); - defvar WriteVIWMulAddX_MX = !cast<SchedWrite>("WriteVIWMulAddX_" # mx); - defvar ReadVIWMulAddV_MX = !cast<SchedRead>("ReadVIWMulAddV_" # mx); - defvar ReadVIWMulAddX_MX = !cast<SchedRead>("ReadVIWMulAddX_" # mx); - defm "" : VPseudoTernaryW_VV<m>, - Sched<[WriteVIWMulAddV_MX, ReadVIWMulAddV_MX, ReadVIWMulAddV_MX, - ReadVIWMulAddV_MX, ReadVMask]>; + SchedTernary<"WriteVIWMulAddV", "ReadVIWMulAddV", "ReadVIWMulAddV", + "ReadVIWMulAddV", mx>; defm "" : VPseudoTernaryW_VX<m>, - Sched<[WriteVIWMulAddX_MX, ReadVIWMulAddV_MX, ReadVIWMulAddV_MX, - ReadVIWMulAddX_MX, ReadVMask]>; + SchedTernary<"WriteVIWMulAddX", "ReadVIWMulAddV", "ReadVIWMulAddX", + "ReadVIWMulAddV", mx>; } } multiclass VPseudoVWMAC_VX { foreach m = MxListW in { - defvar mx = m.MX; - defvar WriteVIWMulAddX_MX = !cast<SchedWrite>("WriteVIWMulAddX_" # mx); - defvar ReadVIWMulAddV_MX= !cast<SchedRead>("ReadVIWMulAddV_" # mx); - defvar ReadVIWMulAddX_MX = !cast<SchedRead>("ReadVIWMulAddX_" # mx); - defm "" : VPseudoTernaryW_VX<m>, - Sched<[WriteVIWMulAddX_MX, ReadVIWMulAddV_MX, ReadVIWMulAddV_MX, - ReadVIWMulAddX_MX, ReadVMask]>; + SchedTernary<"WriteVIWMulAddX", "ReadVIWMulAddV", "ReadVIWMulAddX", + "ReadVIWMulAddV", m.MX>; } } multiclass VPseudoVWMAC_VV_VF_RM { foreach m = MxListFW in { - defvar mx = m.MX; - defvar WriteVFWMulAddV_MX = !cast<SchedWrite>("WriteVFWMulAddV_" # mx); - defvar ReadVFWMulAddV_MX = !cast<SchedRead>("ReadVFWMulAddV_" # mx); - defm "" : VPseudoTernaryW_VV_RM<m>, - Sched<[WriteVFWMulAddV_MX, ReadVFWMulAddV_MX, - ReadVFWMulAddV_MX, ReadVFWMulAddV_MX, ReadVMask]>; + SchedTernary<"WriteVFWMulAddV", "ReadVFWMulAddV", + "ReadVFWMulAddV", "ReadVFWMulAddV", m.MX>; } foreach f = FPListW in { foreach m = f.MxListFW in { - defvar mx = m.MX; - defvar WriteVFWMulAddF_MX = !cast<SchedWrite>("WriteVFWMulAddF_" # mx); - defvar ReadVFWMulAddV_MX = !cast<SchedRead>("ReadVFWMulAddV_" # mx); - defvar ReadVFWMulAddF_MX = !cast<SchedRead>("ReadVFWMulAddF_" # mx); + defm "" : VPseudoTernaryW_VF_RM<m, f>, + SchedTernary<"WriteVFWMulAddF", "ReadVFWMulAddV", + "ReadVFWMulAddF", "ReadVFWMulAddV", m.MX>; + } + } +} +multiclass VPseudoVWMAC_VV_VF_BF_RM { + foreach m = MxListFW in { + defvar mx = m.MX; + defm "" : VPseudoTernaryW_VV_RM<m>, + SchedTernary<"WriteVFWMulAddV", "ReadVFWMulAddV", + "ReadVFWMulAddV", "ReadVFWMulAddV", mx>; + } + + foreach f = BFPListW in { + foreach m = f.MxListFW in { + defvar mx = m.MX; defm "" : VPseudoTernaryW_VF_RM<m, f>, - Sched<[WriteVFWMulAddF_MX, ReadVFWMulAddV_MX, - ReadVFWMulAddV_MX, ReadVFWMulAddF_MX, ReadVMask]>; + SchedTernary<"WriteVFWMulAddF", "ReadVFWMulAddV", + "ReadVFWMulAddF", "ReadVFWMulAddV", mx>; } } } @@ -3496,55 +3467,35 @@ multiclass VPseudoVWMAC_VV_VF_RM { multiclass VPseudoVCMPM_VV_VX_VI { foreach m = MxList in { defvar mx = m.MX; - defvar WriteVICmpV_MX = !cast<SchedWrite>("WriteVICmpV_" # mx); - defvar WriteVICmpX_MX = !cast<SchedWrite>("WriteVICmpX_" # mx); - defvar WriteVICmpI_MX = !cast<SchedWrite>("WriteVICmpI_" # mx); - defvar ReadVICmpV_MX = !cast<SchedRead>("ReadVICmpV_" # mx); - defvar ReadVICmpX_MX = !cast<SchedRead>("ReadVICmpX_" # mx); - - defm "" : VPseudoBinaryM_VV<m>, - Sched<[WriteVICmpV_MX, ReadVICmpV_MX, ReadVICmpV_MX, ReadVMask]>; - defm "" : VPseudoBinaryM_VX<m>, - Sched<[WriteVICmpX_MX, ReadVICmpV_MX, ReadVICmpX_MX, ReadVMask]>; - defm "" : VPseudoBinaryM_VI<m>, - Sched<[WriteVICmpI_MX, ReadVICmpV_MX, ReadVMask]>; + defm "" : VPseudoBinaryM_VV<m, TargetConstraintType=2>, + SchedBinary<"WriteVICmpV", "ReadVICmpV", "ReadVICmpV", mx>; + defm "" : VPseudoBinaryM_VX<m, TargetConstraintType=2>, + SchedBinary<"WriteVICmpX", "ReadVICmpV", "ReadVICmpX", mx>; + defm "" : VPseudoBinaryM_VI<m, TargetConstraintType=2>, + SchedUnary<"WriteVICmpI", "ReadVICmpV", mx>; } } multiclass VPseudoVCMPM_VV_VX { foreach m = MxList in { defvar mx = m.MX; - defvar WriteVICmpV_MX = !cast<SchedWrite>("WriteVICmpV_" # mx); - defvar WriteVICmpX_MX = !cast<SchedWrite>("WriteVICmpX_" # mx); - defvar ReadVICmpV_MX = !cast<SchedRead>("ReadVICmpV_" # mx); - defvar ReadVICmpX_MX = !cast<SchedRead>("ReadVICmpX_" # mx); - - defm "" : VPseudoBinaryM_VV<m>, - Sched<[WriteVICmpV_MX, ReadVICmpV_MX, ReadVICmpV_MX, ReadVMask]>; - defm "" : VPseudoBinaryM_VX<m>, - Sched<[WriteVICmpX_MX, ReadVICmpV_MX, ReadVICmpX_MX, ReadVMask]>; + defm "" : VPseudoBinaryM_VV<m, TargetConstraintType=2>, + SchedBinary<"WriteVICmpV", "ReadVICmpV", "ReadVICmpV", mx>; + defm "" : VPseudoBinaryM_VX<m, TargetConstraintType=2>, + SchedBinary<"WriteVICmpX", "ReadVICmpV", "ReadVICmpX", mx>; } } multiclass VPseudoVCMPM_VV_VF { foreach m = MxListF in { - defvar mx = m.MX; - defvar WriteVFCmpV_MX = !cast<SchedWrite>("WriteVFCmpV_" # mx); - defvar ReadVFCmpV_MX = !cast<SchedRead>("ReadVFCmpV_" # mx); - - defm "" : VPseudoBinaryM_VV<m>, - Sched<[WriteVFCmpV_MX, ReadVFCmpV_MX, ReadVFCmpV_MX, ReadVMask]>; + defm "" : VPseudoBinaryM_VV<m, TargetConstraintType=2>, + SchedBinary<"WriteVFCmpV", "ReadVFCmpV", "ReadVFCmpV", m.MX>; } foreach f = FPList in { foreach m = f.MxList in { - defvar mx = m.MX; - defvar WriteVFCmpF_MX = !cast<SchedWrite>("WriteVFCmpF_" # mx); - defvar ReadVFCmpV_MX = !cast<SchedRead>("ReadVFCmpV_" # mx); - defvar ReadVFCmpF_MX = !cast<SchedRead>("ReadVFCmpF_" # mx); - - defm "" : VPseudoBinaryM_VF<m, f>, - Sched<[WriteVFCmpF_MX, ReadVFCmpV_MX, ReadVFCmpF_MX, ReadVMask]>; + defm "" : VPseudoBinaryM_VF<m, f, TargetConstraintType=2>, + SchedBinary<"WriteVFCmpF", "ReadVFCmpV", "ReadVFCmpF", m.MX>; } } } @@ -3552,13 +3503,8 @@ multiclass VPseudoVCMPM_VV_VF { multiclass VPseudoVCMPM_VF { foreach f = FPList in { foreach m = f.MxList in { - defvar mx = m.MX; - defvar WriteVFCmpF_MX = !cast<SchedWrite>("WriteVFCmpF_" # mx); - defvar ReadVFCmpV_MX = !cast<SchedRead>("ReadVFCmpV_" # mx); - defvar ReadVFCmpF_MX = !cast<SchedRead>("ReadVFCmpF_" # mx); - - defm "" : VPseudoBinaryM_VF<m, f>, - Sched<[WriteVFCmpF_MX, ReadVFCmpV_MX, ReadVFCmpF_MX, ReadVMask]>; + defm "" : VPseudoBinaryM_VF<m, f, TargetConstraintType=2>, + SchedBinary<"WriteVFCmpF", "ReadVFCmpV", "ReadVFCmpF", m.MX>; } } } @@ -3566,15 +3512,10 @@ multiclass VPseudoVCMPM_VF { multiclass VPseudoVCMPM_VX_VI { foreach m = MxList in { defvar mx = m.MX; - defvar WriteVICmpX_MX = !cast<SchedWrite>("WriteVICmpX_" # mx); - defvar WriteVICmpI_MX = !cast<SchedWrite>("WriteVICmpI_" # mx); - defvar ReadVICmpV_MX = !cast<SchedRead>("ReadVICmpV_" # mx); - defvar ReadVICmpX_MX = !cast<SchedRead>("ReadVICmpX_" # mx); - - defm "" : VPseudoBinaryM_VX<m>, - Sched<[WriteVICmpX_MX, ReadVICmpV_MX, ReadVICmpX_MX, ReadVMask]>; - defm "" : VPseudoBinaryM_VI<m>, - Sched<[WriteVICmpI_MX, ReadVICmpV_MX, ReadVMask]>; + defm "" : VPseudoBinaryM_VX<m, TargetConstraintType=2>, + SchedBinary<"WriteVICmpX", "ReadVICmpV", "ReadVICmpX", mx>; + defm "" : VPseudoBinaryM_VI<m, TargetConstraintType=2>, + SchedUnary<"WriteVICmpI", "ReadVICmpV", mx>; } } @@ -3582,10 +3523,8 @@ multiclass VPseudoVRED_VS { foreach m = MxList in { defvar mx = m.MX; foreach e = SchedSEWSet<mx>.val in { - defvar WriteVIRedV_From_MX_E = !cast<SchedWrite>("WriteVIRedV_From_" # mx # "_E" # e); defm _VS : VPseudoTernaryWithTailPolicy<V_M1.vrclass, m.vrclass, V_M1.vrclass, m, e>, - Sched<[WriteVIRedV_From_MX_E, ReadVIRedV, ReadVIRedV, ReadVIRedV, - ReadVMask]>; + SchedReduction<"WriteVIRedV_From", "ReadVIRedV", mx, e>; } } } @@ -3594,10 +3533,8 @@ multiclass VPseudoVREDMINMAX_VS { foreach m = MxList in { defvar mx = m.MX; foreach e = SchedSEWSet<mx>.val in { - defvar WriteVIRedMinMaxV_From_MX_E = !cast<SchedWrite>("WriteVIRedMinMaxV_From_" # mx # "_E" # e); defm _VS : VPseudoTernaryWithTailPolicy<V_M1.vrclass, m.vrclass, V_M1.vrclass, m, e>, - Sched<[WriteVIRedMinMaxV_From_MX_E, ReadVIRedV, ReadVIRedV, - ReadVIRedV, ReadVMask]>; + SchedReduction<"WriteVIRedMinMaxV_From", "ReadVIRedV", mx, e>; } } } @@ -3606,10 +3543,8 @@ multiclass VPseudoVWRED_VS { foreach m = MxListWRed in { defvar mx = m.MX; foreach e = SchedSEWSet<mx, isWidening=1>.val in { - defvar WriteVIWRedV_From_MX_E = !cast<SchedWrite>("WriteVIWRedV_From_" # mx # "_E" # e); defm _VS : VPseudoTernaryWithTailPolicy<V_M1.vrclass, m.vrclass, V_M1.vrclass, m, e>, - Sched<[WriteVIWRedV_From_MX_E, ReadVIWRedV, ReadVIWRedV, - ReadVIWRedV, ReadVMask]>; + SchedReduction<"WriteVIWRedV_From", "ReadVIWRedV", mx, e>; } } } @@ -3618,12 +3553,10 @@ multiclass VPseudoVFRED_VS_RM { foreach m = MxListF in { defvar mx = m.MX; foreach e = SchedSEWSet<mx, isF=1>.val in { - defvar WriteVFRedV_From_MX_E = !cast<SchedWrite>("WriteVFRedV_From_" # mx # "_E" # e); defm _VS - : VPseudoTernaryWithTailPolicyRoundingMode<V_M1.vrclass, m.vrclass, + : VPseudoTernaryWithTailPolicyRoundingMode<V_M1.vrclass, m.vrclass, V_M1.vrclass, m, e>, - Sched<[WriteVFRedV_From_MX_E, ReadVFRedV, ReadVFRedV, ReadVFRedV, - ReadVMask]>; + SchedReduction<"WriteVFRedV_From", "ReadVFRedV", mx, e>; } } } @@ -3632,10 +3565,8 @@ multiclass VPseudoVFREDMINMAX_VS { foreach m = MxListF in { defvar mx = m.MX; foreach e = SchedSEWSet<mx, isF=1>.val in { - defvar WriteVFRedMinMaxV_From_MX_E = !cast<SchedWrite>("WriteVFRedMinMaxV_From_" # mx # "_E" # e); defm _VS : VPseudoTernaryWithTailPolicy<V_M1.vrclass, m.vrclass, V_M1.vrclass, m, e>, - Sched<[WriteVFRedMinMaxV_From_MX_E, ReadVFRedV, ReadVFRedV, ReadVFRedV, - ReadVMask]>; + SchedReduction<"WriteVFRedMinMaxV_From", "ReadVFRedV", mx, e>; } } } @@ -3644,11 +3575,9 @@ multiclass VPseudoVFREDO_VS_RM { foreach m = MxListF in { defvar mx = m.MX; foreach e = SchedSEWSet<mx, isF=1>.val in { - defvar WriteVFRedOV_From_MX_E = !cast<SchedWrite>("WriteVFRedOV_From_" # mx # "_E" # e); defm _VS : VPseudoTernaryWithTailPolicyRoundingMode<V_M1.vrclass, m.vrclass, V_M1.vrclass, m, e>, - Sched<[WriteVFRedOV_From_MX_E, ReadVFRedOV, ReadVFRedOV, - ReadVFRedOV, ReadVMask]>; + SchedReduction<"WriteVFRedOV_From", "ReadVFRedOV", mx, e>; } } } @@ -3657,12 +3586,22 @@ multiclass VPseudoVFWRED_VS_RM { foreach m = MxListFWRed in { defvar mx = m.MX; foreach e = SchedSEWSet<mx, isF=1, isWidening=1>.val in { - defvar WriteVFWRedV_From_MX_E = !cast<SchedWrite>("WriteVFWRedV_From_" # mx # "_E" # e); defm _VS : VPseudoTernaryWithTailPolicyRoundingMode<V_M1.vrclass, m.vrclass, V_M1.vrclass, m, e>, - Sched<[WriteVFWRedV_From_MX_E, ReadVFWRedV, ReadVFWRedV, - ReadVFWRedV, ReadVMask]>; + SchedReduction<"WriteVFWRedV_From", "ReadVFWRedV", mx, e>; + } + } +} + +multiclass VPseudoVFWREDO_VS_RM { + foreach m = MxListFWRed in { + defvar mx = m.MX; + foreach e = SchedSEWSet<mx, isF=1, isWidening=1>.val in { + defm _VS + : VPseudoTernaryWithTailPolicyRoundingMode<V_M1.vrclass, m.vrclass, + V_M1.vrclass, m, e>, + SchedReduction<"WriteVFWRedOV_From", "ReadVFWRedV", mx, e>; } } } @@ -3670,11 +3609,12 @@ multiclass VPseudoVFWRED_VS_RM { multiclass VPseudoConversion<VReg RetClass, VReg Op1Class, LMULInfo MInfo, - string Constraint = ""> { + string Constraint = "", + int TargetConstraintType = 1> { let VLMul = MInfo.value in { - def "_" # MInfo.MX : VPseudoUnaryNoMask<RetClass, Op1Class, Constraint>; + def "_" # MInfo.MX : VPseudoUnaryNoMask<RetClass, Op1Class, Constraint, TargetConstraintType>; def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMask<RetClass, Op1Class, - Constraint>, + Constraint, TargetConstraintType>, RISCVMaskedPseudo<MaskIdx=2>; } } @@ -3682,9 +3622,10 @@ multiclass VPseudoConversion<VReg RetClass, multiclass VPseudoConversionRoundingMode<VReg RetClass, VReg Op1Class, LMULInfo MInfo, - string Constraint = ""> { + string Constraint = "", + int TargetConstraintType = 1> { let VLMul = MInfo.value in { - def "_" # MInfo.MX : VPseudoUnaryNoMaskRoundingMode<RetClass, Op1Class, Constraint>; + def "_" # MInfo.MX : VPseudoUnaryNoMaskRoundingMode<RetClass, Op1Class, Constraint, TargetConstraintType>; def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMaskRoundingMode<RetClass, Op1Class, Constraint>, RISCVMaskedPseudo<MaskIdx=2>; @@ -3716,211 +3657,157 @@ multiclass VPseudoConversionNoExcept<VReg RetClass, multiclass VPseudoVCVTI_V { foreach m = MxListF in { - defvar mx = m.MX; - defvar WriteVFCvtFToIV_MX = !cast<SchedWrite>("WriteVFCvtFToIV_" # mx); - defvar ReadVFCvtFToIV_MX = !cast<SchedRead>("ReadVFCvtFToIV_" # mx); - defm _V : VPseudoConversion<m.vrclass, m.vrclass, m>, - Sched<[WriteVFCvtFToIV_MX, ReadVFCvtFToIV_MX, ReadVMask]>; + SchedUnary<"WriteVFCvtFToIV", "ReadVFCvtFToIV", m.MX, + forceMergeOpRead=true>; } } multiclass VPseudoVCVTI_V_RM { foreach m = MxListF in { - defvar mx = m.MX; - defvar WriteVFCvtFToIV_MX = !cast<SchedWrite>("WriteVFCvtFToIV_" # mx); - defvar ReadVFCvtFToIV_MX = !cast<SchedRead>("ReadVFCvtFToIV_" # mx); - defm _V : VPseudoConversionRoundingMode<m.vrclass, m.vrclass, m>, - Sched<[WriteVFCvtFToIV_MX, ReadVFCvtFToIV_MX, ReadVMask]>; + SchedUnary<"WriteVFCvtFToIV", "ReadVFCvtFToIV", m.MX, + forceMergeOpRead=true>; } } multiclass VPseudoVCVTI_RM_V { foreach m = MxListF in { - defvar mx = m.MX; - defvar WriteVFCvtFToIV_MX = !cast<SchedWrite>("WriteVFCvtFToIV_" # mx); - defvar ReadVFCvtFToIV_MX = !cast<SchedRead>("ReadVFCvtFToIV_" # mx); - defm _V : VPseudoConversionRM<m.vrclass, m.vrclass, m>, - Sched<[WriteVFCvtFToIV_MX, ReadVFCvtFToIV_MX, ReadVMask]>; + SchedUnary<"WriteVFCvtFToIV", "ReadVFCvtFToIV", m.MX, + forceMergeOpRead=true>; } } multiclass VPseudoVFROUND_NOEXCEPT_V { foreach m = MxListF in { - defvar mx = m.MX; - defvar WriteVFCvtFToIV_MX = !cast<SchedWrite>("WriteVFCvtFToIV_" # mx); - defvar ReadVFCvtFToIV_MX = !cast<SchedRead>("ReadVFCvtFToIV_" # mx); - defm _V : VPseudoConversionNoExcept<m.vrclass, m.vrclass, m>, - Sched<[WriteVFCvtFToIV_MX, ReadVFCvtFToIV_MX, ReadVMask]>; + SchedUnary<"WriteVFCvtFToIV", "ReadVFCvtFToIV", m.MX, + forceMergeOpRead=true>; } } multiclass VPseudoVCVTF_V_RM { foreach m = MxListF in { - defvar mx = m.MX; - defvar WriteVFCvtIToFV_MX = !cast<SchedWrite>("WriteVFCvtIToFV_" # mx); - defvar ReadVFCvtIToFV_MX = !cast<SchedRead>("ReadVFCvtIToFV_" # mx); - defm _V : VPseudoConversionRoundingMode<m.vrclass, m.vrclass, m>, - Sched<[WriteVFCvtIToFV_MX, ReadVFCvtIToFV_MX, ReadVMask]>; + SchedUnary<"WriteVFCvtIToFV", "ReadVFCvtIToFV", m.MX, + forceMergeOpRead=true>; } } multiclass VPseudoVCVTF_RM_V { foreach m = MxListF in { - defvar mx = m.MX; - defvar WriteVFCvtIToFV_MX = !cast<SchedWrite>("WriteVFCvtIToFV_" # mx); - defvar ReadVFCvtIToFV_MX = !cast<SchedRead>("ReadVFCvtIToFV_" # mx); - defm _V : VPseudoConversionRM<m.vrclass, m.vrclass, m>, - Sched<[WriteVFCvtIToFV_MX, ReadVFCvtIToFV_MX, ReadVMask]>; + SchedUnary<"WriteVFCvtIToFV", "ReadVFCvtIToFV", m.MX, + forceMergeOpRead=true>; } } multiclass VPseudoVWCVTI_V { defvar constraint = "@earlyclobber $rd"; foreach m = MxListFW in { - defvar mx = m.MX; - defvar WriteVFWCvtFToIV_MX = !cast<SchedWrite>("WriteVFWCvtFToIV_" # mx); - defvar ReadVFWCvtFToIV_MX = !cast<SchedRead>("ReadVFWCvtFToIV_" # mx); - - defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint>, - Sched<[WriteVFWCvtFToIV_MX, ReadVFWCvtFToIV_MX, ReadVMask]>; + defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint, TargetConstraintType=3>, + SchedUnary<"WriteVFWCvtFToIV", "ReadVFWCvtFToIV", m.MX, + forceMergeOpRead=true>; } } multiclass VPseudoVWCVTI_V_RM { defvar constraint = "@earlyclobber $rd"; foreach m = MxListFW in { - defvar mx = m.MX; - defvar WriteVFWCvtFToIV_MX = !cast<SchedWrite>("WriteVFWCvtFToIV_" # mx); - defvar ReadVFWCvtFToIV_MX = !cast<SchedRead>("ReadVFWCvtFToIV_" # mx); - - defm _V : VPseudoConversionRoundingMode<m.wvrclass, m.vrclass, m, constraint>, - Sched<[WriteVFWCvtFToIV_MX, ReadVFWCvtFToIV_MX, ReadVMask]>; + defm _V : VPseudoConversionRoundingMode<m.wvrclass, m.vrclass, m, constraint, TargetConstraintType=3>, + SchedUnary<"WriteVFWCvtFToIV", "ReadVFWCvtFToIV", m.MX, + forceMergeOpRead=true>; } } multiclass VPseudoVWCVTI_RM_V { defvar constraint = "@earlyclobber $rd"; foreach m = MxListFW in { - defvar mx = m.MX; - defvar WriteVFWCvtFToIV_MX = !cast<SchedWrite>("WriteVFWCvtFToIV_" # mx); - defvar ReadVFWCvtFToIV_MX = !cast<SchedRead>("ReadVFWCvtFToIV_" # mx); - defm _V : VPseudoConversionRM<m.wvrclass, m.vrclass, m, constraint>, - Sched<[WriteVFWCvtFToIV_MX, ReadVFWCvtFToIV_MX, ReadVMask]>; + SchedUnary<"WriteVFWCvtFToIV", "ReadVFWCvtFToIV", m.MX, + forceMergeOpRead=true>; } } multiclass VPseudoVWCVTF_V { defvar constraint = "@earlyclobber $rd"; foreach m = MxListW in { - defvar mx = m.MX; - defvar WriteVFWCvtIToFV_MX = !cast<SchedWrite>("WriteVFWCvtIToFV_" # mx); - defvar ReadVFWCvtIToFV_MX = !cast<SchedRead>("ReadVFWCvtIToFV_" # mx); - - defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint>, - Sched<[WriteVFWCvtIToFV_MX, ReadVFWCvtIToFV_MX, ReadVMask]>; + defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint, TargetConstraintType=3>, + SchedUnary<"WriteVFWCvtIToFV", "ReadVFWCvtIToFV", m.MX, + forceMergeOpRead=true>; } } multiclass VPseudoVWCVTD_V { defvar constraint = "@earlyclobber $rd"; foreach m = MxListFW in { - defvar mx = m.MX; - defvar WriteVFWCvtFToFV_MX = !cast<SchedWrite>("WriteVFWCvtFToFV_" # mx); - defvar ReadVFWCvtFToFV_MX = !cast<SchedRead>("ReadVFWCvtFToFV_" # mx); - - defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint>, - Sched<[WriteVFWCvtFToFV_MX, ReadVFWCvtFToFV_MX, ReadVMask]>; + defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint, TargetConstraintType=3>, + SchedUnary<"WriteVFWCvtFToFV", "ReadVFWCvtFToFV", m.MX, + forceMergeOpRead=true>; } } multiclass VPseudoVNCVTI_W { defvar constraint = "@earlyclobber $rd"; foreach m = MxListW in { - defvar mx = m.MX; - defvar WriteVFNCvtFToIV_MX = !cast<SchedWrite>("WriteVFNCvtFToIV_" # mx); - defvar ReadVFNCvtFToIV_MX = !cast<SchedRead>("ReadVFNCvtFToIV_" # mx); - - defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint>, - Sched<[WriteVFNCvtFToIV_MX, ReadVFNCvtFToIV_MX, ReadVMask]>; + defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint, TargetConstraintType=2>, + SchedUnary<"WriteVFNCvtFToIV", "ReadVFNCvtFToIV", m.MX, + forceMergeOpRead=true>; } } multiclass VPseudoVNCVTI_W_RM { defvar constraint = "@earlyclobber $rd"; foreach m = MxListW in { - defvar mx = m.MX; - defvar WriteVFNCvtFToIV_MX = !cast<SchedWrite>("WriteVFNCvtFToIV_" # mx); - defvar ReadVFNCvtFToIV_MX = !cast<SchedRead>("ReadVFNCvtFToIV_" # mx); - - defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m, constraint>, - Sched<[WriteVFNCvtFToIV_MX, ReadVFNCvtFToIV_MX, ReadVMask]>; + defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m, constraint, TargetConstraintType=2>, + SchedUnary<"WriteVFNCvtFToIV", "ReadVFNCvtFToIV", m.MX, + forceMergeOpRead=true>; } } multiclass VPseudoVNCVTI_RM_W { defvar constraint = "@earlyclobber $rd"; foreach m = MxListW in { - defvar mx = m.MX; - defvar WriteVFNCvtFToIV_MX = !cast<SchedWrite>("WriteVFNCvtFToIV_" # mx); - defvar ReadVFNCvtFToIV_MX = !cast<SchedRead>("ReadVFNCvtFToIV_" # mx); - defm _W : VPseudoConversionRM<m.vrclass, m.wvrclass, m, constraint>, - Sched<[WriteVFNCvtFToIV_MX, ReadVFNCvtFToIV_MX, ReadVMask]>; + SchedUnary<"WriteVFNCvtFToIV", "ReadVFNCvtFToIV", m.MX, + forceMergeOpRead=true>; } } multiclass VPseudoVNCVTF_W_RM { defvar constraint = "@earlyclobber $rd"; foreach m = MxListFW in { - defvar mx = m.MX; - defvar WriteVFNCvtIToFV_MX = !cast<SchedWrite>("WriteVFNCvtIToFV_" # mx); - defvar ReadVFNCvtIToFV_MX = !cast<SchedRead>("ReadVFNCvtIToFV_" # mx); - - defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m, constraint>, - Sched<[WriteVFNCvtIToFV_MX, ReadVFNCvtIToFV_MX, ReadVMask]>; + defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m, constraint, TargetConstraintType=2>, + SchedUnary<"WriteVFNCvtIToFV", "ReadVFNCvtIToFV", m.MX, + forceMergeOpRead=true>; } } multiclass VPseudoVNCVTF_RM_W { defvar constraint = "@earlyclobber $rd"; foreach m = MxListFW in { - defvar mx = m.MX; - defvar WriteVFNCvtIToFV_MX = !cast<SchedWrite>("WriteVFNCvtIToFV_" # mx); - defvar ReadVFNCvtIToFV_MX = !cast<SchedRead>("ReadVFNCvtIToFV_" # mx); - defm _W : VPseudoConversionRM<m.vrclass, m.wvrclass, m, constraint>, - Sched<[WriteVFNCvtIToFV_MX, ReadVFNCvtIToFV_MX, ReadVMask]>; + SchedUnary<"WriteVFNCvtIToFV", "ReadVFNCvtIToFV", m.MX, + forceMergeOpRead=true>; } } multiclass VPseudoVNCVTD_W { defvar constraint = "@earlyclobber $rd"; foreach m = MxListFW in { - defvar mx = m.MX; - defvar WriteVFNCvtFToFV_MX = !cast<SchedWrite>("WriteVFNCvtFToFV_" # mx); - defvar ReadVFNCvtFToFV_MX = !cast<SchedRead>("ReadVFNCvtFToFV_" # mx); - - defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint>, - Sched<[WriteVFNCvtFToFV_MX, ReadVFNCvtFToFV_MX, ReadVMask]>; + defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint, TargetConstraintType=2>, + SchedUnary<"WriteVFNCvtFToFV", "ReadVFNCvtFToFV", m.MX, + forceMergeOpRead=true>; } } multiclass VPseudoVNCVTD_W_RM { defvar constraint = "@earlyclobber $rd"; foreach m = MxListFW in { - defvar mx = m.MX; - defvar WriteVFNCvtFToFV_MX = !cast<SchedWrite>("WriteVFNCvtFToFV_" # mx); - defvar ReadVFNCvtFToFV_MX = !cast<SchedRead>("ReadVFNCvtFToFV_" # mx); - - defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m, constraint>, - Sched<[WriteVFNCvtFToFV_MX, ReadVFNCvtFToFV_MX, ReadVMask]>; + defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m, constraint, TargetConstraintType=2>, + SchedUnary<"WriteVFNCvtFToFV", "ReadVFNCvtFToFV", m.MX, + forceMergeOpRead=true>; } } @@ -3988,18 +3875,17 @@ multiclass VPseudoISegLoad<bit Ordered> { defvar idxEMUL = !cast<LMULInfo>("V_" # IdxLInfo); defvar DataVreg = dataEMUL.vrclass; defvar IdxVreg = idxEMUL.vrclass; - defvar Order = !if(Ordered, "O", "U"); let VLMul = dataEMUL.value in { foreach nf = NFSet<dataEMUL>.L in { defvar Vreg = SegRegClass<dataEMUL, nf>.RC; def nf # "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo : VPseudoISegLoadNoMask<Vreg, IdxVreg, idxEEW, idxEMUL.value, nf, Ordered>, - VLXSEGSched<nf, dataEEW, Order, DataLInfo>; + VLXSEGSched<nf, dataEEW, Ordered, DataLInfo>; def nf # "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo # "_MASK" : VPseudoISegLoadMask<Vreg, IdxVreg, idxEEW, idxEMUL.value, nf, Ordered>, - VLXSEGSched<nf, dataEEW, Order, DataLInfo>; + VLXSEGSched<nf, dataEEW, Ordered, DataLInfo>; } } } @@ -4055,18 +3941,17 @@ multiclass VPseudoISegStore<bit Ordered> { defvar idxEMUL = !cast<LMULInfo>("V_" # IdxLInfo); defvar DataVreg = dataEMUL.vrclass; defvar IdxVreg = idxEMUL.vrclass; - defvar Order = !if(Ordered, "O", "U"); let VLMul = dataEMUL.value in { foreach nf = NFSet<dataEMUL>.L in { defvar Vreg = SegRegClass<dataEMUL, nf>.RC; def nf # "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo : VPseudoISegStoreNoMask<Vreg, IdxVreg, idxEEW, idxEMUL.value, nf, Ordered>, - VSXSEGSched<nf, idxEEW, Order, DataLInfo>; + VSXSEGSched<nf, idxEEW, Ordered, DataLInfo>; def nf # "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo # "_MASK" : VPseudoISegStoreMask<Vreg, IdxVreg, idxEEW, idxEMUL.value, nf, Ordered>, - VSXSEGSched<nf, idxEEW, Order, DataLInfo>; + VSXSEGSched<nf, idxEEW, Ordered, DataLInfo>; } } } @@ -4087,16 +3972,12 @@ class VPatUnaryNoMask<string intrinsic_name, int log2sew, LMULInfo vlmul, VReg result_reg_class, - VReg op2_reg_class, - bit isSEWAware = 0> : + VReg op2_reg_class> : Pat<(result_type (!cast<Intrinsic>(intrinsic_name) (result_type result_reg_class:$merge), (op2_type op2_reg_class:$rs2), VLOpFrag)), - (!cast<Instruction>( - !if(isSEWAware, - inst#"_"#kind#"_"#vlmul.MX#"_E"#!shl(1, log2sew), - inst#"_"#kind#"_"#vlmul.MX)) + (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX) (result_type result_reg_class:$merge), (op2_type op2_reg_class:$rs2), GPR:$vl, log2sew, TU_MU)>; @@ -4135,17 +4016,13 @@ class VPatUnaryMask<string intrinsic_name, int log2sew, LMULInfo vlmul, VReg result_reg_class, - VReg op2_reg_class, - bit isSEWAware = 0> : + VReg op2_reg_class> : Pat<(result_type (!cast<Intrinsic>(intrinsic_name#"_mask") (result_type result_reg_class:$merge), (op2_type op2_reg_class:$rs2), (mask_type V0), VLOpFrag, (XLenVT timm:$policy))), - (!cast<Instruction>( - !if(isSEWAware, - inst#"_"#kind#"_"#vlmul.MX#"_E"#!shl(1, log2sew)#"_MASK", - inst#"_"#kind#"_"#vlmul.MX#"_MASK")) + (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX#"_MASK") (result_type result_reg_class:$merge), (op2_type op2_reg_class:$rs2), (mask_type V0), GPR:$vl, log2sew, (XLenVT timm:$policy))>; @@ -4187,7 +4064,7 @@ class VPatMaskUnaryNoMask<string intrinsic_name, (!cast<Instruction>(inst#"_M_"#mti.BX) (mti.Mask (IMPLICIT_DEF)), (mti.Mask VR:$rs2), - GPR:$vl, mti.Log2SEW, TU_MU)>; + GPR:$vl, mti.Log2SEW, TA_MA)>; class VPatMaskUnaryMask<string intrinsic_name, string inst, @@ -4831,15 +4708,15 @@ multiclass VPatUnaryV_VF<string intrinsic, string instruction, string suffix, } multiclass VPatUnaryV_V<string intrinsic, string instruction, - list<VTypeInfo> vtilist, bit isSEWAware = 0> { + list<VTypeInfo> vtilist> { foreach vti = vtilist in { let Predicates = GetVTypePredicates<vti>.Predicates in { def : VPatUnaryNoMask<intrinsic, instruction, "V", vti.Vector, vti.Vector, vti.Log2SEW, - vti.LMul, vti.RegClass, vti.RegClass, isSEWAware>; + vti.LMul, vti.RegClass, vti.RegClass>; def : VPatUnaryMask<intrinsic, instruction, "V", vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW, - vti.LMul, vti.RegClass, vti.RegClass, isSEWAware>; + vti.LMul, vti.RegClass, vti.RegClass>; } } } @@ -6080,6 +5957,21 @@ multiclass VPatConversionWF_VF<string intrinsic, string instruction> { foreach fvtiToFWti = AllWidenableFloatVectors in { defvar fvti = fvtiToFWti.Vti; defvar fwti = fvtiToFWti.Wti; + // Define vfwcvt.f.f.v for f16 when Zvfhmin is enable. + let Predicates = !if(!eq(fvti.Scalar, f16), [HasVInstructionsF16Minimal], + !listconcat(GetVTypePredicates<fvti>.Predicates, + GetVTypePredicates<fwti>.Predicates)) in + defm : VPatConversionTA<intrinsic, instruction, "V", + fwti.Vector, fvti.Vector, fwti.Mask, fvti.Log2SEW, + fvti.LMul, fwti.RegClass, fvti.RegClass>; + } +} + +multiclass VPatConversionWF_VF_BF <string intrinsic, string instruction> { + foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in + { + defvar fvti = fvtiToFWti.Vti; + defvar fwti = fvtiToFWti.Wti; let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates, GetVTypePredicates<fwti>.Predicates) in defm : VPatConversionTA<intrinsic, instruction, "V", @@ -6136,8 +6028,21 @@ multiclass VPatConversionVF_WF <string intrinsic, string instruction> { } } -multiclass VPatConversionVF_WF_RM <string intrinsic, string instruction> { - foreach fvtiToFWti = AllWidenableFloatVectors in { +multiclass VPatConversionVF_WF_RM <string intrinsic, string instruction, + list<VTypeInfoToWide> wlist = AllWidenableFloatVectors> { + foreach fvtiToFWti = wlist in { + defvar fvti = fvtiToFWti.Vti; + defvar fwti = fvtiToFWti.Wti; + let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates, + GetVTypePredicates<fwti>.Predicates) in + defm : VPatConversionTARoundingMode<intrinsic, instruction, "W", + fvti.Vector, fwti.Vector, fvti.Mask, fvti.Log2SEW, + fvti.LMul, fvti.RegClass, fwti.RegClass>; + } +} + +multiclass VPatConversionVF_WF_BF_RM <string intrinsic, string instruction> { + foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in { defvar fvti = fvtiToFWti.Vti; defvar fwti = fvtiToFWti.Wti; let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates, @@ -6336,7 +6241,7 @@ foreach vti = AllIntegerVectors in { GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>; - + // Match VSUB with a small immediate to vadd.vi by negating the immediate. def : Pat<(vti.Vector (int_riscv_vsub (vti.Vector (undef)), (vti.Vector vti.RegClass:$rs1), @@ -6346,7 +6251,7 @@ foreach vti = AllIntegerVectors in { vti.RegClass:$rs1, (NegImm simm5_plus1:$rs2), GPR:$vl, - vti.Log2SEW, TU_MU)>; + vti.Log2SEW, TA_MA)>; def : Pat<(vti.Vector (int_riscv_vsub_mask (vti.Vector vti.RegClass:$merge), (vti.Vector vti.RegClass:$rs1), (vti.Scalar simm5_plus1:$rs2), @@ -6593,6 +6498,8 @@ defm PseudoVFWMACC : VPseudoVWMAC_VV_VF_RM; defm PseudoVFWNMACC : VPseudoVWMAC_VV_VF_RM; defm PseudoVFWMSAC : VPseudoVWMAC_VV_VF_RM; defm PseudoVFWNMSAC : VPseudoVWMAC_VV_VF_RM; +let Predicates = [HasStdExtZvfbfwma] in +defm PseudoVFWMACCBF16 : VPseudoVWMAC_VV_VF_BF_RM; } //===----------------------------------------------------------------------===// @@ -6697,6 +6604,7 @@ defm PseudoVFWCVT_F_XU : VPseudoVWCVTF_V; defm PseudoVFWCVT_F_X : VPseudoVWCVTF_V; defm PseudoVFWCVT_F_F : VPseudoVWCVTD_V; +defm PseudoVFWCVTBF16_F_F : VPseudoVWCVTD_V; } // mayRaiseFPException = true //===----------------------------------------------------------------------===// @@ -6722,6 +6630,7 @@ defm PseudoVFNCVT_RM_F_X : VPseudoVNCVTF_RM_W; let hasSideEffects = 0, hasPostISelHook = 1 in defm PseudoVFNCVT_F_F : VPseudoVNCVTD_W_RM; +defm PseudoVFNCVTBF16_F_F : VPseudoVNCVTD_W_RM; defm PseudoVFNCVT_ROD_F_F : VPseudoVNCVTD_W; } // mayRaiseFPException = true @@ -6774,7 +6683,7 @@ let IsRVVWideningReduction = 1, hasSideEffects = 0, mayRaiseFPException = true in { defm PseudoVFWREDUSUM : VPseudoVFWRED_VS_RM; -defm PseudoVFWREDOSUM : VPseudoVFWRED_VS_RM; +defm PseudoVFWREDOSUM : VPseudoVFWREDO_VS_RM; } } // Predicates = [HasVInstructionsAnyF] @@ -6787,14 +6696,14 @@ defm PseudoVFWREDOSUM : VPseudoVFWRED_VS_RM; // 15.1 Vector Mask-Register Logical Instructions //===----------------------------------------------------------------------===// -defm PseudoVMAND: VPseudoVALU_MM; -defm PseudoVMNAND: VPseudoVALU_MM; +defm PseudoVMAND: VPseudoVALU_MM<Commutable=1>; +defm PseudoVMNAND: VPseudoVALU_MM<Commutable=1>; defm PseudoVMANDN: VPseudoVALU_MM; -defm PseudoVMXOR: VPseudoVALU_MM; -defm PseudoVMOR: VPseudoVALU_MM; -defm PseudoVMNOR: VPseudoVALU_MM; +defm PseudoVMXOR: VPseudoVALU_MM<Commutable=1>; +defm PseudoVMOR: VPseudoVALU_MM<Commutable=1>; +defm PseudoVMNOR: VPseudoVALU_MM<Commutable=1>; defm PseudoVMORN: VPseudoVALU_MM; -defm PseudoVMXNOR: VPseudoVALU_MM; +defm PseudoVMXNOR: VPseudoVALU_MM<Commutable=1>; // Pseudo instructions defm PseudoVMCLR : VPseudoNullaryPseudoM<"VMXOR">; @@ -7005,7 +6914,7 @@ foreach vti = AllIntegerVectors in { (XLenVT 1), VLOpFrag)), (!cast<Instruction>("PseudoVADD_VV_"#vti.LMul.MX) (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, - vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW, TU_MU)>; + vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW, TA_MA)>; def : Pat<(vti.Vector (int_riscv_vsll_mask (vti.Vector vti.RegClass:$merge), (vti.Vector vti.RegClass:$rs1), (XLenVT 1), @@ -7139,7 +7048,7 @@ foreach vti = AllVectors in { VLOpFrag)), (!cast<Instruction>("PseudoVMV_V_V_"#vti.LMul.MX) $passthru, $rs1, GPR:$vl, vti.Log2SEW, TU_MU)>; - + // vmv.v.x/vmv.v.i are handled in RISCInstrVInstrInfoVVLPatterns.td } } @@ -7222,7 +7131,7 @@ defm : VPatBinaryW_WV_WX_RM<"int_riscv_vfwsub_w", "PseudoVFWSUB", //===----------------------------------------------------------------------===// // 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions //===----------------------------------------------------------------------===// -defm : VPatBinaryV_VV_VX_RM<"int_riscv_vfmul", "PseudoVFMUL", +defm : VPatBinaryV_VV_VX_RM<"int_riscv_vfmul", "PseudoVFMUL", AllFloatVectors>; defm : VPatBinaryV_VV_VX_RM<"int_riscv_vfdiv", "PseudoVFDIV", AllFloatVectors, isSEWAware=1>; @@ -7258,6 +7167,9 @@ defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwmsac", "PseudoVFWMSAC", AllWidenableFloatVectors>; defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwnmsac", "PseudoVFWNMSAC", AllWidenableFloatVectors>; +let Predicates = [HasStdExtZvfbfwma] in +defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwmaccbf16", "PseudoVFWMACCBF16", + AllWidenableBFloatToFloatVectors>; //===----------------------------------------------------------------------===// // 13.8. Vector Floating-Point Square-Root Instruction @@ -7362,6 +7274,8 @@ defm : VPatConversionWI_VF<"int_riscv_vfwcvt_rtz_x_f_v", "PseudoVFWCVT_RTZ_X_F"> defm : VPatConversionWF_VI<"int_riscv_vfwcvt_f_xu_v", "PseudoVFWCVT_F_XU">; defm : VPatConversionWF_VI<"int_riscv_vfwcvt_f_x_v", "PseudoVFWCVT_F_X">; defm : VPatConversionWF_VF<"int_riscv_vfwcvt_f_f_v", "PseudoVFWCVT_F_F">; +defm : VPatConversionWF_VF_BF<"int_riscv_vfwcvtbf16_f_f_v", + "PseudoVFWCVTBF16_F_F">; //===----------------------------------------------------------------------===// // 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions @@ -7372,7 +7286,18 @@ defm : VPatConversionVI_WF<"int_riscv_vfncvt_rtz_xu_f_w", "PseudoVFNCVT_RTZ_XU_F defm : VPatConversionVI_WF<"int_riscv_vfncvt_rtz_x_f_w", "PseudoVFNCVT_RTZ_X_F">; defm : VPatConversionVF_WI_RM <"int_riscv_vfncvt_f_xu_w", "PseudoVFNCVT_F_XU">; defm : VPatConversionVF_WI_RM <"int_riscv_vfncvt_f_x_w", "PseudoVFNCVT_F_X">; -defm : VPatConversionVF_WF_RM<"int_riscv_vfncvt_f_f_w", "PseudoVFNCVT_F_F">; +defvar WidenableFloatVectorsExceptF16 = !filter(fvtiToFWti, AllWidenableFloatVectors, + !ne(fvtiToFWti.Vti.Scalar, f16)); +defm : VPatConversionVF_WF_RM<"int_riscv_vfncvt_f_f_w", "PseudoVFNCVT_F_F", + WidenableFloatVectorsExceptF16>; +// Define vfncvt.f.f.w for f16 when Zvfhmin is enable. +defvar F16WidenableFloatVectors = !filter(fvtiToFWti, AllWidenableFloatVectors, + !eq(fvtiToFWti.Vti.Scalar, f16)); +let Predicates = [HasVInstructionsF16Minimal] in +defm : VPatConversionVF_WF_RM<"int_riscv_vfncvt_f_f_w", "PseudoVFNCVT_F_F", + F16WidenableFloatVectors>; +defm : VPatConversionVF_WF_BF_RM<"int_riscv_vfncvtbf16_f_f_w", + "PseudoVFNCVTBF16_F_F">; defm : VPatConversionVF_WF<"int_riscv_vfncvt_rod_f_f_w", "PseudoVFNCVT_ROD_F_F">; //===----------------------------------------------------------------------===// @@ -7500,6 +7425,11 @@ foreach fvti = AllFloatVectors in { (fvti.Scalar (fpimm0)), VLOpFrag)), (!cast<Instruction>("PseudoVMV_S_X_" # fvti.LMul.MX) (fvti.Vector $rs1), (XLenVT X0), GPR:$vl, fvti.Log2SEW)>; + + def : Pat<(fvti.Vector (int_riscv_vfmv_s_f (fvti.Vector fvti.RegClass:$rs1), + (fvti.Scalar (SelectFPImm (XLenVT GPR:$imm))), VLOpFrag)), + (!cast<Instruction>("PseudoVMV_S_X_" # fvti.LMul.MX) + (fvti.Vector $rs1), GPR:$imm, GPR:$vl, fvti.Log2SEW)>; } } @@ -7532,10 +7462,6 @@ defm : VPatBinaryV_VV_INT_EEW<"int_riscv_vrgatherei16_vv", "PseudoVRGATHEREI16", // 16.5. Vector Compress Instruction //===----------------------------------------------------------------------===// defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllIntegerVectors>; -defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllIntegerVectors>; -defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllIntegerVectors>; -defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllFloatVectors>; -defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllFloatVectors>; defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllFloatVectors>; // Include the non-intrinsic ISel patterns diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index 4141c7698bb4..b7c845703794 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -35,7 +35,7 @@ multiclass VPatUSLoadStoreSDNode<ValueType type, // Load def : Pat<(type (load GPR:$rs1)), (load_instr (type (IMPLICIT_DEF)), GPR:$rs1, avl, - log2sew, TU_MU)>; + log2sew, TA_MA)>; // Store def : Pat<(store type:$rs2, GPR:$rs1), (store_instr reg_class:$rs2, GPR:$rs1, avl, log2sew)>; @@ -399,7 +399,7 @@ multiclass VPatExtendSDNode_V<list<SDNode> ops, string inst_name, string suffix, def : Pat<(vti.Vector (op (fti.Vector fti.RegClass:$rs2))), (!cast<Instruction>(inst_name#"_"#suffix#"_"#vti.LMul.MX) (vti.Vector (IMPLICIT_DEF)), - fti.RegClass:$rs2, fti.AVL, vti.Log2SEW, TU_MU)>; + fti.RegClass:$rs2, fti.AVL, vti.Log2SEW, TA_MA)>; } } @@ -416,7 +416,7 @@ multiclass VPatConvertI2FPSDNode_V_RM<SDPatternOperator vop, // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, - fvti.AVL, fvti.Log2SEW, TU_MU)>; + fvti.AVL, fvti.Log2SEW, TA_MA)>; } } @@ -429,7 +429,7 @@ multiclass VPatConvertFP2ISDNode_V<SDPatternOperator vop, def : Pat<(ivti.Vector (vop (fvti.Vector fvti.RegClass:$rs1))), (!cast<Instruction>(instruction_name#"_"#ivti.LMul.MX) (ivti.Vector (IMPLICIT_DEF)), - fvti.RegClass:$rs1, ivti.AVL, ivti.Log2SEW, TU_MU)>; + fvti.RegClass:$rs1, ivti.AVL, ivti.Log2SEW, TA_MA)>; } } @@ -444,7 +444,7 @@ multiclass VPatWConvertI2FPSDNode_V<SDPatternOperator vop, (!cast<Instruction>(instruction_name#"_"#ivti.LMul.MX) (fwti.Vector (IMPLICIT_DEF)), ivti.RegClass:$rs1, - ivti.AVL, ivti.Log2SEW, TU_MU)>; + ivti.AVL, ivti.Log2SEW, TA_MA)>; } } @@ -458,7 +458,7 @@ multiclass VPatWConvertFP2ISDNode_V<SDPatternOperator vop, def : Pat<(iwti.Vector (vop (fvti.Vector fvti.RegClass:$rs1))), (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX) (iwti.Vector (IMPLICIT_DEF)), - fvti.RegClass:$rs1, fvti.AVL, fvti.Log2SEW, TU_MU)>; + fvti.RegClass:$rs1, fvti.AVL, fvti.Log2SEW, TA_MA)>; } } @@ -476,7 +476,7 @@ multiclass VPatNConvertI2FPSDNode_W_RM<SDPatternOperator vop, // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, - fvti.AVL, fvti.Log2SEW, TU_MU)>; + fvti.AVL, fvti.Log2SEW, TA_MA)>; } } @@ -490,7 +490,7 @@ multiclass VPatNConvertFP2ISDNode_W<SDPatternOperator vop, def : Pat<(vti.Vector (vop (fwti.Vector fwti.RegClass:$rs1))), (!cast<Instruction>(instruction_name#"_"#vti.LMul.MX) (vti.Vector (IMPLICIT_DEF)), - fwti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>; + fwti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TA_MA)>; } } @@ -505,12 +505,12 @@ multiclass VPatWidenBinarySDNode_VV_VX<SDNode op, PatFrags extop1, PatFrags exto (wti.Vector (extop2 (vti.Vector vti.RegClass:$rs1)))), (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX) (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2, - vti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>; + vti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TA_MA)>; def : Pat<(op (wti.Vector (extop1 (vti.Vector vti.RegClass:$rs2))), (wti.Vector (extop2 (vti.Vector (SplatPat (XLenVT GPR:$rs1)))))), (!cast<Instruction>(instruction_name#"_VX_"#vti.LMul.MX) (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2, - GPR:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>; + GPR:$rs1, vti.AVL, vti.Log2SEW, TA_MA)>; } } } @@ -531,7 +531,7 @@ multiclass VPatWidenBinarySDNode_WV_WX<SDNode op, PatFrags extop, (wti.Vector (extop (vti.Vector (SplatPat (XLenVT GPR:$rs1)))))), (!cast<Instruction>(instruction_name#"_WX_"#vti.LMul.MX) (wti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs2, GPR:$rs1, - vti.AVL, vti.Log2SEW, TU_MU)>; + vti.AVL, vti.Log2SEW, TA_MA)>; } } } @@ -588,7 +588,7 @@ multiclass VPatWidenBinaryFPSDNode_VV_VF<SDNode op, string instruction_name> { (vti.Mask true_mask), (XLenVT srcvalue)))), (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX) (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2, - vti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>; + vti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TA_MA)>; def : Pat<(op (wti.Vector (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2), (vti.Mask true_mask), (XLenVT srcvalue))), @@ -597,14 +597,14 @@ multiclass VPatWidenBinaryFPSDNode_VV_VF<SDNode op, string instruction_name> { (vti.Mask true_mask), (XLenVT srcvalue)))), (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2, - vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>; + vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW, TA_MA)>; def : Pat<(op (wti.Vector (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2), (vti.Mask true_mask), (XLenVT srcvalue))), (wti.Vector (SplatFPOp (fpext_oneuse vti.ScalarRegClass:$rs1)))), (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2, - vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>; + vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW, TA_MA)>; } } } @@ -627,7 +627,7 @@ multiclass VPatWidenBinaryFPSDNode_VV_VF_RM<SDNode op, string instruction_name> // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, - vti.AVL, vti.Log2SEW, TU_MU)>; + vti.AVL, vti.Log2SEW, TA_MA)>; def : Pat<(op (wti.Vector (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2), (vti.Mask true_mask), (XLenVT srcvalue))), @@ -640,7 +640,7 @@ multiclass VPatWidenBinaryFPSDNode_VV_VF_RM<SDNode op, string instruction_name> // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, - vti.AVL, vti.Log2SEW, TU_MU)>; + vti.AVL, vti.Log2SEW, TA_MA)>; def : Pat<(op (wti.Vector (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2), (vti.Mask true_mask), (XLenVT srcvalue))), @@ -651,7 +651,7 @@ multiclass VPatWidenBinaryFPSDNode_VV_VF_RM<SDNode op, string instruction_name> // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, - vti.AVL, vti.Log2SEW, TU_MU)>; + vti.AVL, vti.Log2SEW, TA_MA)>; } } } @@ -683,7 +683,7 @@ multiclass VPatWidenBinaryFPSDNode_WV_WF_RM<SDNode op, string instruction_name> // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, - vti.AVL, vti.Log2SEW, TU_MU)>; + vti.AVL, vti.Log2SEW, TA_MA)>; def : Pat<(op (wti.Vector wti.RegClass:$rs2), (wti.Vector (SplatFPOp (fpext_oneuse (vti.Scalar vti.ScalarRegClass:$rs1))))), (!cast<Instruction>(instruction_name#"_W"#vti.ScalarSuffix#"_"#vti.LMul.MX) @@ -692,7 +692,7 @@ multiclass VPatWidenBinaryFPSDNode_WV_WF_RM<SDNode op, string instruction_name> // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, - vti.AVL, vti.Log2SEW, TU_MU)>; + vti.AVL, vti.Log2SEW, TA_MA)>; } } } @@ -883,16 +883,20 @@ multiclass VPatMultiplyAddSDNode_VV_VX<SDNode op, string instruction_name> { // 7.4. Vector Unit-Stride Instructions foreach vti = !listconcat(FractionalGroupIntegerVectors, - FractionalGroupFloatVectors) in - let Predicates = GetVTypePredicates<vti>.Predicates in + FractionalGroupFloatVectors, + FractionalGroupBFloatVectors) in + let Predicates = !if(!eq(vti.Scalar, f16), [HasVInstructionsF16Minimal], + GetVTypePredicates<vti>.Predicates) in defm : VPatUSLoadStoreSDNode<vti.Vector, vti.Log2SEW, vti.LMul, vti.AVL, vti.RegClass>; -foreach vti = [VI8M1, VI16M1, VI32M1, VI64M1, VF16M1, VF32M1, VF64M1] in - let Predicates = GetVTypePredicates<vti>.Predicates in +foreach vti = [VI8M1, VI16M1, VI32M1, VI64M1, VBF16M1, VF16M1, VF32M1, VF64M1] in + let Predicates = !if(!eq(vti.Scalar, f16), [HasVInstructionsF16Minimal], + GetVTypePredicates<vti>.Predicates) in defm : VPatUSLoadStoreWholeVRSDNode<vti.Vector, vti.Log2SEW, vti.LMul, vti.RegClass>; -foreach vti = !listconcat(GroupIntegerVectors, GroupFloatVectors) in - let Predicates = GetVTypePredicates<vti>.Predicates in +foreach vti = !listconcat(GroupIntegerVectors, GroupFloatVectors, GroupBFloatVectors) in + let Predicates = !if(!eq(vti.Scalar, f16), [HasVInstructionsF16Minimal], + GetVTypePredicates<vti>.Predicates) in defm : VPatUSLoadStoreWholeVRSDNode<vti.Vector, vti.Log2SEW, vti.LMul, vti.RegClass>; foreach mti = AllMasks in @@ -916,12 +920,12 @@ foreach vti = AllIntegerVectors in { (vti.Vector vti.RegClass:$rs1)), (!cast<Instruction>("PseudoVRSUB_VX_"# vti.LMul.MX) (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, GPR:$rs2, - vti.AVL, vti.Log2SEW, TU_MU)>; + vti.AVL, vti.Log2SEW, TA_MA)>; def : Pat<(sub (vti.Vector (SplatPat_simm5 simm5:$rs2)), (vti.Vector vti.RegClass:$rs1)), (!cast<Instruction>("PseudoVRSUB_VI_"# vti.LMul.MX) (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, - simm5:$rs2, vti.AVL, vti.Log2SEW, TU_MU)>; + simm5:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>; } } @@ -944,17 +948,17 @@ foreach vtiToWti = AllWidenableIntVectors in { (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), 1, (XLenVT srcvalue)))), (!cast<Instruction>("PseudoVWADD_VV_"#vti.LMul.MX) (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs1, - vti.AVL, vti.Log2SEW, TU_MU)>; + vti.AVL, vti.Log2SEW, TA_MA)>; def : Pat<(shl (wti.Vector (zext_oneuse (vti.Vector vti.RegClass:$rs1))), (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), 1, (XLenVT srcvalue)))), (!cast<Instruction>("PseudoVWADDU_VV_"#vti.LMul.MX) (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs1, - vti.AVL, vti.Log2SEW, TU_MU)>; + vti.AVL, vti.Log2SEW, TA_MA)>; def : Pat<(shl (wti.Vector (anyext_oneuse (vti.Vector vti.RegClass:$rs1))), (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), 1, (XLenVT srcvalue)))), (!cast<Instruction>("PseudoVWADDU_VV_"#vti.LMul.MX) (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs1, - vti.AVL, vti.Log2SEW, TU_MU)>; + vti.AVL, vti.Log2SEW, TA_MA)>; } } @@ -989,7 +993,7 @@ foreach vti = AllIntegerVectors in { (vti.Vector (riscv_vmv_v_x_vl (vti.Vector undef), 1, (XLenVT srcvalue)))), (!cast<Instruction>("PseudoVADD_VV_"# vti.LMul.MX) (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, - vti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>; + vti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TA_MA)>; } @@ -1051,6 +1055,23 @@ defm : VPatBinarySDNode_VV_VX<sdiv, "PseudoVDIV", isSEWAware=1>; defm : VPatBinarySDNode_VV_VX<urem, "PseudoVREMU", isSEWAware=1>; defm : VPatBinarySDNode_VV_VX<srem, "PseudoVREM", isSEWAware=1>; +foreach vtiTowti = AllWidenableIntVectors in { + defvar vti = vtiTowti.Vti; + defvar wti = vtiTowti.Wti; + let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates, + GetVTypePredicates<wti>.Predicates) in { + def : Pat< + (vti.Vector + (riscv_trunc_vector_vl + (srem (wti.Vector (sext_oneuse (vti.Vector vti.RegClass:$rs1))), + (wti.Vector (sext_oneuse (vti.Vector vti.RegClass:$rs2)))), + (vti.Mask true_mask), (XLenVT srcvalue))), + (!cast<Instruction>("PseudoVREM_VV_"#vti.LMul.MX#"_E"#!shl(1, vti.Log2SEW)) + (vti.Vector (IMPLICIT_DEF)), + vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>; + } +} + // 11.12. Vector Widening Integer Multiply Instructions defm : VPatWidenBinarySDNode_VV_VX<mul, sext_oneuse, sext_oneuse, "PseudoVWMUL">; @@ -1145,7 +1166,7 @@ foreach mti = AllMasks in { // Handle rvv_vnot the same as the vmnot.m pseudoinstruction. def : Pat<(mti.Mask (rvv_vnot VR:$rs)), (!cast<Instruction>("PseudoVMNAND_MM_"#mti.LMul.MX) - VR:$rs, VR:$rs, mti.AVL, mti.Log2SEW)>; + VR:$rs, VR:$rs, mti.AVL, mti.Log2SEW)>; } } @@ -1279,40 +1300,40 @@ foreach vti = AllFloatVectors in { // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, - vti.AVL, vti.Log2SEW, TU_MU)>; + vti.AVL, vti.Log2SEW, TA_MA)>; // 13.12. Vector Floating-Point Sign-Injection Instructions def : Pat<(fabs (vti.Vector vti.RegClass:$rs)), (!cast<Instruction>("PseudoVFSGNJX_VV_"# vti.LMul.MX) (vti.Vector (IMPLICIT_DEF)), - vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TU_MU)>; + vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TA_MA)>; // Handle fneg with VFSGNJN using the same input for both operands. def : Pat<(fneg (vti.Vector vti.RegClass:$rs)), (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX) (vti.Vector (IMPLICIT_DEF)), - vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TU_MU)>; + vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TA_MA)>; def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), (vti.Vector vti.RegClass:$rs2))), (!cast<Instruction>("PseudoVFSGNJ_VV_"# vti.LMul.MX) (vti.Vector (IMPLICIT_DEF)), - vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TU_MU)>; + vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>; def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs2)))), (!cast<Instruction>("PseudoVFSGNJ_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) (vti.Vector (IMPLICIT_DEF)), - vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TU_MU)>; - + vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>; + def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), (vti.Vector (fneg vti.RegClass:$rs2)))), (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX) (vti.Vector (IMPLICIT_DEF)), - vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TU_MU)>; + vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>; def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), (vti.Vector (fneg (SplatFPOp vti.ScalarRegClass:$rs2))))), (!cast<Instruction>("PseudoVFSGNJN_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) (vti.Vector (IMPLICIT_DEF)), - vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TU_MU)>; + vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>; } } @@ -1337,7 +1358,8 @@ defm : VPatFPSetCCSDNode_VV_VF_FV<SETOLE, "PseudoVMFLE", "PseudoVMFGE">; // 11.15. Vector Integer Merge Instructions // 13.15. Vector Floating-Point Merge Instruction foreach fvti = AllFloatVectors in { - let Predicates = GetVTypePredicates<fvti>.Predicates in { + defvar ivti = GetIntVTypeInfo<fvti>.Vti; + let Predicates = GetVTypePredicates<ivti>.Predicates in { def : Pat<(fvti.Vector (vselect (fvti.Mask V0), fvti.RegClass:$rs1, fvti.RegClass:$rs2)), (!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX) @@ -1346,6 +1368,15 @@ foreach fvti = AllFloatVectors in { fvti.AVL, fvti.Log2SEW)>; def : Pat<(fvti.Vector (vselect (fvti.Mask V0), + (SplatFPOp (fvti.Scalar fpimm0)), + fvti.RegClass:$rs2)), + (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX) + (fvti.Vector (IMPLICIT_DEF)), + fvti.RegClass:$rs2, 0, (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>; + + } + let Predicates = GetVTypePredicates<fvti>.Predicates in + def : Pat<(fvti.Vector (vselect (fvti.Mask V0), (SplatFPOp fvti.ScalarRegClass:$rs1), fvti.RegClass:$rs2)), (!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX) @@ -1353,14 +1384,6 @@ foreach fvti = AllFloatVectors in { fvti.RegClass:$rs2, (fvti.Scalar fvti.ScalarRegClass:$rs1), (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>; - - def : Pat<(fvti.Vector (vselect (fvti.Mask V0), - (SplatFPOp (fvti.Scalar fpimm0)), - fvti.RegClass:$rs2)), - (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX) - (fvti.Vector (IMPLICIT_DEF)), - fvti.RegClass:$rs2, 0, (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>; - } } // 13.17. Vector Single-Width Floating-Point/Integer Type-Convert Instructions @@ -1383,8 +1406,9 @@ defm : VPatNConvertI2FPSDNode_W_RM<any_uint_to_fp, "PseudoVFNCVT_F_XU_W">; foreach fvtiToFWti = AllWidenableFloatVectors in { defvar fvti = fvtiToFWti.Vti; defvar fwti = fvtiToFWti.Wti; - let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates, - GetVTypePredicates<fwti>.Predicates) in + let Predicates = !if(!eq(fvti.Scalar, f16), [HasVInstructionsF16Minimal], + !listconcat(GetVTypePredicates<fvti>.Predicates, + GetVTypePredicates<fwti>.Predicates)) in def : Pat<(fvti.Vector (fpround (fwti.Vector fwti.RegClass:$rs1))), (!cast<Instruction>("PseudoVFNCVT_F_F_W_"#fvti.LMul.MX) (fvti.Vector (IMPLICIT_DEF)), @@ -1392,7 +1416,7 @@ foreach fvtiToFWti = AllWidenableFloatVectors in { // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, - fvti.AVL, fvti.Log2SEW, TU_MU)>; + fvti.AVL, fvti.Log2SEW, TA_MA)>; } //===----------------------------------------------------------------------===// @@ -1400,18 +1424,18 @@ foreach fvtiToFWti = AllWidenableFloatVectors in { //===----------------------------------------------------------------------===// foreach fvti = AllFloatVectors in { - let Predicates = GetVTypePredicates<fvti>.Predicates in { - def : Pat<(fvti.Vector (SplatFPOp fvti.ScalarRegClass:$rs1)), + let Predicates = GetVTypePredicates<fvti>.Predicates in + def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl undef, fvti.ScalarRegClass:$rs1, srcvalue)), (!cast<Instruction>("PseudoVFMV_V_"#fvti.ScalarSuffix#"_"#fvti.LMul.MX) (fvti.Vector (IMPLICIT_DEF)), (fvti.Scalar fvti.ScalarRegClass:$rs1), - fvti.AVL, fvti.Log2SEW, TU_MU)>; - + fvti.AVL, fvti.Log2SEW, TA_MA)>; + defvar ivti = GetIntVTypeInfo<fvti>.Vti; + let Predicates = GetVTypePredicates<ivti>.Predicates in def : Pat<(fvti.Vector (SplatFPOp (fvti.Scalar fpimm0))), (!cast<Instruction>("PseudoVMV_V_I_"#fvti.LMul.MX) (fvti.Vector (IMPLICIT_DEF)), - 0, fvti.AVL, fvti.Log2SEW, TU_MU)>; - } + 0, fvti.AVL, fvti.Log2SEW, TA_MA)>; } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 900f9dd1be05..dc6b57fad321 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -98,6 +98,8 @@ def riscv_urem_vl : SDNode<"RISCVISD::UREM_VL", SDT_RISCVIntBinOp_VL>; def riscv_shl_vl : SDNode<"RISCVISD::SHL_VL", SDT_RISCVIntBinOp_VL>; def riscv_sra_vl : SDNode<"RISCVISD::SRA_VL", SDT_RISCVIntBinOp_VL>; def riscv_srl_vl : SDNode<"RISCVISD::SRL_VL", SDT_RISCVIntBinOp_VL>; +def riscv_rotl_vl : SDNode<"RISCVISD::ROTL_VL", SDT_RISCVIntBinOp_VL>; +def riscv_rotr_vl : SDNode<"RISCVISD::ROTR_VL", SDT_RISCVIntBinOp_VL>; def riscv_smin_vl : SDNode<"RISCVISD::SMIN_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>; def riscv_smax_vl : SDNode<"RISCVISD::SMAX_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>; def riscv_umin_vl : SDNode<"RISCVISD::UMIN_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>; @@ -122,8 +124,8 @@ def riscv_fneg_vl : SDNode<"RISCVISD::FNEG_VL", SDT_RISCVFPUnOp_VL>; def riscv_fabs_vl : SDNode<"RISCVISD::FABS_VL", SDT_RISCVFPUnOp_VL>; def riscv_fsqrt_vl : SDNode<"RISCVISD::FSQRT_VL", SDT_RISCVFPUnOp_VL>; def riscv_fcopysign_vl : SDNode<"RISCVISD::FCOPYSIGN_VL", SDT_RISCVCopySign_VL>; -def riscv_fminnum_vl : SDNode<"RISCVISD::FMINNUM_VL", SDT_RISCVFPBinOp_VL, [SDNPCommutative]>; -def riscv_fmaxnum_vl : SDNode<"RISCVISD::FMAXNUM_VL", SDT_RISCVFPBinOp_VL, [SDNPCommutative]>; +def riscv_vfmin_vl : SDNode<"RISCVISD::VFMIN_VL", SDT_RISCVFPBinOp_VL, [SDNPCommutative]>; +def riscv_vfmax_vl : SDNode<"RISCVISD::VFMAX_VL", SDT_RISCVFPBinOp_VL, [SDNPCommutative]>; def riscv_strict_fadd_vl : SDNode<"RISCVISD::STRICT_FADD_VL", SDT_RISCVFPBinOp_VL, [SDNPCommutative, SDNPHasChain]>; def riscv_strict_fsub_vl : SDNode<"RISCVISD::STRICT_FSUB_VL", SDT_RISCVFPBinOp_VL, [SDNPHasChain]>; @@ -407,6 +409,7 @@ def riscv_vwadd_vl : SDNode<"RISCVISD::VWADD_VL", SDT_RISCVVWIntBinOp_VL, [S def riscv_vwaddu_vl : SDNode<"RISCVISD::VWADDU_VL", SDT_RISCVVWIntBinOp_VL, [SDNPCommutative]>; def riscv_vwsub_vl : SDNode<"RISCVISD::VWSUB_VL", SDT_RISCVVWIntBinOp_VL, []>; def riscv_vwsubu_vl : SDNode<"RISCVISD::VWSUBU_VL", SDT_RISCVVWIntBinOp_VL, []>; +def riscv_vwsll_vl : SDNode<"RISCVISD::VWSLL_VL", SDT_RISCVVWIntBinOp_VL, []>; def SDT_RISCVVWIntTernOp_VL : SDTypeProfile<1, 5, [SDTCisVec<0>, SDTCisInt<0>, SDTCisInt<1>, @@ -577,14 +580,15 @@ def SplatPat_simm5_plus1 def SplatPat_simm5_plus1_nonzero : ComplexPattern<vAny, 1, "selectVSplatSimm5Plus1NonZero", [], [], 3>; -def ext_oneuse_SplatPat - : ComplexPattern<vAny, 1, "selectExtOneUseVSplat", [], [], 2>; +// Selects extends or truncates of splats where we only care about the lowest 8 +// bits of each element. +def Low8BitsSplatPat + : ComplexPattern<vAny, 1, "selectLow8BitsVSplat", [], [], 2>; -def SelectFPImm : ComplexPattern<fAny, 1, "selectFPImm", [], [], 1>; - -// Ignore the vl operand. -def SplatFPOp : PatFrag<(ops node:$op), - (riscv_vfmv_v_f_vl undef, node:$op, srcvalue)>; +// Ignore the vl operand on vmv_v_f, and vmv_s_f. +def SplatFPOp : PatFrags<(ops node:$op), + [(riscv_vfmv_v_f_vl undef, node:$op, srcvalue), + (riscv_vfmv_s_f_vl undef, node:$op, srcvalue)]>; def sew8simm5 : ComplexPattern<XLenVT, 1, "selectRVVSimm5<8>", []>; def sew16simm5 : ComplexPattern<XLenVT, 1, "selectRVVSimm5<16>", []>; @@ -1377,16 +1381,6 @@ multiclass VPatReductionVL<SDNode vop, string instruction_name, bit is_float> { let Predicates = GetVTypePredicates<vti>.Predicates in { def: Pat<(vti_m1.Vector (vop (vti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1), VR:$rs2, - (vti.Mask true_mask), VLOpFrag, - (XLenVT timm:$policy))), - (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW) - (vti_m1.Vector VR:$merge), - (vti.Vector vti.RegClass:$rs1), - (vti_m1.Vector VR:$rs2), - GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>; - - def: Pat<(vti_m1.Vector (vop (vti_m1.Vector VR:$merge), - (vti.Vector vti.RegClass:$rs1), VR:$rs2, (vti.Mask V0), VLOpFrag, (XLenVT timm:$policy))), (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK") @@ -1404,19 +1398,6 @@ multiclass VPatReductionVL_RM<SDNode vop, string instruction_name, bit is_float> let Predicates = GetVTypePredicates<vti>.Predicates in { def: Pat<(vti_m1.Vector (vop (vti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1), VR:$rs2, - (vti.Mask true_mask), VLOpFrag, - (XLenVT timm:$policy))), - (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW) - (vti_m1.Vector VR:$merge), - (vti.Vector vti.RegClass:$rs1), - (vti_m1.Vector VR:$rs2), - // Value to indicate no rounding mode change in - // RISCVInsertReadWriteCSR - FRM_DYN, - GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>; - - def: Pat<(vti_m1.Vector (vop (vti_m1.Vector VR:$merge), - (vti.Vector vti.RegClass:$rs1), VR:$rs2, (vti.Mask V0), VLOpFrag, (XLenVT timm:$policy))), (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK") @@ -1447,18 +1428,18 @@ multiclass VPatBinaryVL_WV_WX_WI<SDNode op, string instruction_name> { VLOpFrag)), (!cast<Instruction>(instruction_name#"_WV_"#vti.LMul.MX) (vti.Vector (IMPLICIT_DEF)), - wti.RegClass:$rs2, vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW, TU_MU)>; + wti.RegClass:$rs2, vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW, TA_MA)>; def : Pat< (vti.Vector (riscv_trunc_vector_vl (op (wti.Vector wti.RegClass:$rs2), - (wti.Vector (ext_oneuse_SplatPat (XLenVT GPR:$rs1)))), + (wti.Vector (Low8BitsSplatPat (XLenVT GPR:$rs1)))), (vti.Mask true_mask), VLOpFrag)), (!cast<Instruction>(instruction_name#"_WX_"#vti.LMul.MX) (vti.Vector (IMPLICIT_DEF)), - wti.RegClass:$rs2, GPR:$rs1, GPR:$vl, vti.Log2SEW, TU_MU)>; + wti.RegClass:$rs2, GPR:$rs1, GPR:$vl, vti.Log2SEW, TA_MA)>; def : Pat< (vti.Vector @@ -1468,7 +1449,7 @@ multiclass VPatBinaryVL_WV_WX_WI<SDNode op, string instruction_name> { VLOpFrag)), (!cast<Instruction>(instruction_name#"_WI_"#vti.LMul.MX) (vti.Vector (IMPLICIT_DEF)), - wti.RegClass:$rs2, uimm5:$rs1, GPR:$vl, vti.Log2SEW, TU_MU)>; + wti.RegClass:$rs2, uimm5:$rs1, GPR:$vl, vti.Log2SEW, TA_MA)>; } } } @@ -1482,14 +1463,6 @@ multiclass VPatWidenReductionVL<SDNode vop, PatFrags extop, string instruction_n GetVTypePredicates<wti>.Predicates) in { def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge), (wti.Vector (extop (vti.Vector vti.RegClass:$rs1))), - VR:$rs2, (vti.Mask true_mask), VLOpFrag, - (XLenVT timm:$policy))), - (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW) - (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1), - (wti_m1.Vector VR:$rs2), GPR:$vl, vti.Log2SEW, - (XLenVT timm:$policy))>; - def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge), - (wti.Vector (extop (vti.Vector vti.RegClass:$rs1))), VR:$rs2, (vti.Mask V0), VLOpFrag, (XLenVT timm:$policy))), (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK") @@ -1509,18 +1482,6 @@ multiclass VPatWidenReductionVL_RM<SDNode vop, PatFrags extop, string instructio GetVTypePredicates<wti>.Predicates) in { def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge), (wti.Vector (extop (vti.Vector vti.RegClass:$rs1))), - VR:$rs2, (vti.Mask true_mask), VLOpFrag, - (XLenVT timm:$policy))), - (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW) - (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1), - (wti_m1.Vector VR:$rs2), - // Value to indicate no rounding mode change in - // RISCVInsertReadWriteCSR - FRM_DYN, - GPR:$vl, vti.Log2SEW, - (XLenVT timm:$policy))>; - def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge), - (wti.Vector (extop (vti.Vector vti.RegClass:$rs1))), VR:$rs2, (vti.Mask V0), VLOpFrag, (XLenVT timm:$policy))), (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK") @@ -1544,14 +1505,6 @@ multiclass VPatWidenReductionVL_Ext_VL<SDNode vop, PatFrags extop, string instru GetVTypePredicates<wti>.Predicates) in { def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge), (wti.Vector (extop (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), VLOpFrag)), - VR:$rs2, (vti.Mask true_mask), VLOpFrag, - (XLenVT timm:$policy))), - (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW) - (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1), - (wti_m1.Vector VR:$rs2), GPR:$vl, vti.Log2SEW, - (XLenVT timm:$policy))>; - def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge), - (wti.Vector (extop (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), VLOpFrag)), VR:$rs2, (vti.Mask V0), VLOpFrag, (XLenVT timm:$policy))), (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK") @@ -1571,18 +1524,6 @@ multiclass VPatWidenReductionVL_Ext_VL_RM<SDNode vop, PatFrags extop, string ins GetVTypePredicates<wti>.Predicates) in { def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge), (wti.Vector (extop (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), VLOpFrag)), - VR:$rs2, (vti.Mask true_mask), VLOpFrag, - (XLenVT timm:$policy))), - (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW) - (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1), - (wti_m1.Vector VR:$rs2), - // Value to indicate no rounding mode change in - // RISCVInsertReadWriteCSR - FRM_DYN, - GPR:$vl, vti.Log2SEW, - (XLenVT timm:$policy))>; - def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge), - (wti.Vector (extop (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), VLOpFrag)), VR:$rs2, (vti.Mask V0), VLOpFrag, (XLenVT timm:$policy))), (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK") @@ -1693,7 +1634,7 @@ multiclass VPatNarrowShiftSplatExt_WX<SDNode op, PatFrags extop, string instruct (vti.Mask true_mask), VLOpFrag)), (!cast<Instruction>(instruction_name#"_WX_"#vti.LMul.MX) (vti.Vector (IMPLICIT_DEF)), - wti.RegClass:$rs2, GPR:$rs1, GPR:$vl, vti.Log2SEW, TU_MU)>; + wti.RegClass:$rs2, GPR:$rs1, GPR:$vl, vti.Log2SEW, TA_MA)>; } } @@ -1713,7 +1654,7 @@ multiclass VPatNarrowShiftExtVL_WV<SDNode op, PatFrags extop, string instruction (vti.Mask V0), VLOpFrag)), (!cast<Instruction>(instruction_name#"_WV_"#vti.LMul.MX#"_MASK") (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs2, vti.RegClass:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>; + (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>; } } @@ -1832,13 +1773,13 @@ multiclass VPatNarrowShiftSplat_WX_WI<SDNode op, string instruction_name> { srcvalue, true_mask, VLOpFrag)), true_mask, VLOpFrag)), (!cast<Instruction>(instruction_name#"_WX_"#vti.LMul.MX) (vti.Vector (IMPLICIT_DEF)), - wti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>; + wti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW, TA_MA)>; def : Pat<(vti.Vector (riscv_trunc_vector_vl (wti.Vector (op wti.RegClass:$rs1, (SplatPat_uimm5 uimm5:$rs2), srcvalue, true_mask, VLOpFrag)), true_mask, VLOpFrag)), (!cast<Instruction>(instruction_name#"_WI_"#vti.LMul.MX) (vti.Vector (IMPLICIT_DEF)), - wti.RegClass:$rs1, uimm5:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>; + wti.RegClass:$rs1, uimm5:$rs2, GPR:$vl, vti.Log2SEW, TA_MA)>; } } } @@ -2039,6 +1980,56 @@ multiclass VPatWidenFPMulAccVL_VV_VF_RM<SDNode vop, string instruction_name> { } } +multiclass VPatSlideVL_VX_VI<SDNode vop, string instruction_name> { + foreach vti = AllVectors in { + let Predicates = GetVTypePredicates<vti>.Predicates in { + def : Pat<(vti.Vector (vop (vti.Vector vti.RegClass:$rd), + (vti.Vector vti.RegClass:$rs1), + uimm5:$rs2, (vti.Mask V0), + VLOpFrag, (XLenVT timm:$policy))), + (!cast<Instruction>(instruction_name#"_VI_"#vti.LMul.MX#"_MASK") + vti.RegClass:$rd, vti.RegClass:$rs1, uimm5:$rs2, + (vti.Mask V0), GPR:$vl, vti.Log2SEW, + (XLenVT timm:$policy))>; + + def : Pat<(vti.Vector (vop (vti.Vector vti.RegClass:$rd), + (vti.Vector vti.RegClass:$rs1), + GPR:$rs2, (vti.Mask V0), + VLOpFrag, (XLenVT timm:$policy))), + (!cast<Instruction>(instruction_name#"_VX_"#vti.LMul.MX#"_MASK") + vti.RegClass:$rd, vti.RegClass:$rs1, GPR:$rs2, + (vti.Mask V0), GPR:$vl, vti.Log2SEW, + (XLenVT timm:$policy))>; + } + } +} + +multiclass VPatSlide1VL_VX<SDNode vop, string instruction_name> { + foreach vti = AllIntegerVectors in { + let Predicates = GetVTypePredicates<vti>.Predicates in { + def : Pat<(vti.Vector (vop (vti.Vector vti.RegClass:$rs3), + (vti.Vector vti.RegClass:$rs1), + GPR:$rs2, (vti.Mask V0), VLOpFrag)), + (!cast<Instruction>(instruction_name#"_VX_"#vti.LMul.MX#"_MASK") + vti.RegClass:$rs3, vti.RegClass:$rs1, GPR:$rs2, + (vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>; + } + } +} + +multiclass VPatSlide1VL_VF<SDNode vop, string instruction_name> { + foreach vti = AllFloatVectors in { + let Predicates = GetVTypePredicates<vti>.Predicates in { + def : Pat<(vti.Vector (vop (vti.Vector vti.RegClass:$rs3), + (vti.Vector vti.RegClass:$rs1), + vti.Scalar:$rs2, (vti.Mask V0), VLOpFrag)), + (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_MASK") + vti.RegClass:$rs3, vti.RegClass:$rs1, vti.Scalar:$rs2, + (vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>; + } + } +} + //===----------------------------------------------------------------------===// // Patterns. //===----------------------------------------------------------------------===// @@ -2132,7 +2123,7 @@ foreach vti = AllIntegerVectors in { srcvalue, (vti.Mask true_mask), VLOpFrag), (!cast<Instruction>("PseudoVADD_VV_"# vti.LMul.MX) (vti.Vector (IMPLICIT_DEF)), - vti.RegClass:$rs1, vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW, TU_MU)>; + vti.RegClass:$rs1, vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW, TA_MA)>; } // 11.7. Vector Narrowing Integer Right Shift Instructions @@ -2216,7 +2207,7 @@ defm : VPatBinaryVL_VV_VX<riscv_mulhu_vl, "PseudoVMULHU", IntegerVectorsExceptI6 // vsmul.vv and vsmul.vx are not included in EEW=64 in Zve64*. let Predicates = [HasVInstructionsFullMultiply] in { defm : VPatBinaryVL_VV_VX<riscv_mulhs_vl, "PseudoVMULH", I64IntegerVectors>; - defm : VPatBinaryVL_VV_VX<riscv_mulhu_vl, "PseudoVMULHU", I64IntegerVectors>; + defm : VPatBinaryVL_VV_VX<riscv_mulhu_vl, "PseudoVMULHU", I64IntegerVectors>; } // 11.11. Vector Integer Divide Instructions @@ -2373,8 +2364,8 @@ defm : VPatWidenFPMulAccVL_VV_VF_RM<riscv_vfwmsub_vl, "PseudoVFWMSAC">; defm : VPatWidenFPMulAccVL_VV_VF_RM<riscv_vfwnmsub_vl, "PseudoVFWNMSAC">; // 13.11. Vector Floating-Point MIN/MAX Instructions -defm : VPatBinaryFPVL_VV_VF<riscv_fminnum_vl, "PseudoVFMIN">; -defm : VPatBinaryFPVL_VV_VF<riscv_fmaxnum_vl, "PseudoVFMAX">; +defm : VPatBinaryFPVL_VV_VF<riscv_vfmin_vl, "PseudoVFMIN">; +defm : VPatBinaryFPVL_VV_VF<riscv_vfmax_vl, "PseudoVFMAX">; // 13.13. Vector Floating-Point Compare Instructions defm : VPatFPSetCCVL_VV_VF_FV<any_riscv_fsetcc_vl, SETEQ, @@ -2441,7 +2432,7 @@ foreach vti = AllFloatVectors in { VLOpFrag), (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX) (vti.Vector (IMPLICIT_DEF)), - vti.RegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>; + vti.RegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TA_MA)>; def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1), (SplatFPOp vti.ScalarRegClass:$rs2), @@ -2459,12 +2450,13 @@ foreach vti = AllFloatVectors in { (!cast<Instruction>("PseudoVFROUND_NOEXCEPT_V_" # vti.LMul.MX #"_MASK") (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>; - + // 14.14. Vector Floating-Point Classify Instruction - def : Pat<(riscv_fclass_vl (vti.Vector vti.RegClass:$rs2), - (vti.Mask true_mask), VLOpFrag), - (!cast<Instruction>("PseudoVFCLASS_V_"# vti.LMul.MX) - (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>; + def : Pat<(riscv_fclass_vl (vti.Vector vti.RegClass:$rs2), + (vti.Mask V0), VLOpFrag), + (!cast<Instruction>("PseudoVFCLASS_V_"# vti.LMul.MX #"_MASK") + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2, + (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>; } } @@ -2472,7 +2464,8 @@ foreach fvti = AllFloatVectors in { // Floating-point vselects: // 11.15. Vector Integer Merge Instructions // 13.15. Vector Floating-Point Merge Instruction - let Predicates = GetVTypePredicates<fvti>.Predicates in { + defvar ivti = GetIntVTypeInfo<fvti>.Vti; + let Predicates = GetVTypePredicates<ivti>.Predicates in { def : Pat<(fvti.Vector (riscv_vselect_vl (fvti.Mask V0), fvti.RegClass:$rs1, fvti.RegClass:$rs2, @@ -2483,16 +2476,6 @@ foreach fvti = AllFloatVectors in { GPR:$vl, fvti.Log2SEW)>; def : Pat<(fvti.Vector (riscv_vselect_vl (fvti.Mask V0), - (SplatFPOp fvti.ScalarRegClass:$rs1), - fvti.RegClass:$rs2, - VLOpFrag)), - (!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX) - (fvti.Vector (IMPLICIT_DEF)), - fvti.RegClass:$rs2, - (fvti.Scalar fvti.ScalarRegClass:$rs1), - (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; - - def : Pat<(fvti.Vector (riscv_vselect_vl (fvti.Mask V0), (SplatFPOp (SelectFPImm (XLenVT GPR:$imm))), fvti.RegClass:$rs2, VLOpFrag)), @@ -2519,21 +2502,33 @@ foreach fvti = AllFloatVectors in { GPR:$vl, fvti.Log2SEW)>; def : Pat<(fvti.Vector (riscv_vp_merge_vl (fvti.Mask V0), - (SplatFPOp fvti.ScalarRegClass:$rs1), + (SplatFPOp (fvti.Scalar fpimm0)), fvti.RegClass:$rs2, VLOpFrag)), + (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX) + fvti.RegClass:$rs2, fvti.RegClass:$rs2, 0, (fvti.Mask V0), + GPR:$vl, fvti.Log2SEW)>; + } + + let Predicates = GetVTypePredicates<fvti>.Predicates in { + def : Pat<(fvti.Vector (riscv_vselect_vl (fvti.Mask V0), + (SplatFPOp fvti.ScalarRegClass:$rs1), + fvti.RegClass:$rs2, + VLOpFrag)), (!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX) - fvti.RegClass:$rs2, fvti.RegClass:$rs2, + (fvti.Vector (IMPLICIT_DEF)), + fvti.RegClass:$rs2, (fvti.Scalar fvti.ScalarRegClass:$rs1), (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; def : Pat<(fvti.Vector (riscv_vp_merge_vl (fvti.Mask V0), - (SplatFPOp (fvti.Scalar fpimm0)), + (SplatFPOp fvti.ScalarRegClass:$rs1), fvti.RegClass:$rs2, VLOpFrag)), - (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX) - fvti.RegClass:$rs2, fvti.RegClass:$rs2, 0, (fvti.Mask V0), - GPR:$vl, fvti.Log2SEW)>; + (!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX) + fvti.RegClass:$rs2, fvti.RegClass:$rs2, + (fvti.Scalar fvti.ScalarRegClass:$rs1), + (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; // 13.16. Vector Floating-Point Move Instruction // If we're splatting fpimm0, use vmv.v.x vd, x0. @@ -2585,8 +2580,9 @@ defm : VPatWConvertI2FPVL_V<any_riscv_sint_to_fp_vl, "PseudoVFWCVT_F_X_V">; foreach fvtiToFWti = AllWidenableFloatVectors in { defvar fvti = fvtiToFWti.Vti; defvar fwti = fvtiToFWti.Wti; - let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates, - GetVTypePredicates<fwti>.Predicates) in + let Predicates = !if(!eq(fvti.Scalar, f16), [HasVInstructionsF16Minimal], + !listconcat(GetVTypePredicates<fvti>.Predicates, + GetVTypePredicates<fwti>.Predicates)) in def : Pat<(fwti.Vector (any_riscv_fpextend_vl (fvti.Vector fvti.RegClass:$rs1), (fvti.Mask V0), @@ -2615,8 +2611,10 @@ defm : VPatNConvertI2FP_RM_VL_W<riscv_vfcvt_rm_f_x_vl, "PseudoVFNCVT_RM_F_X_W">; foreach fvtiToFWti = AllWidenableFloatVectors in { defvar fvti = fvtiToFWti.Vti; defvar fwti = fvtiToFWti.Wti; - let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates, - GetVTypePredicates<fwti>.Predicates) in { + // Define vfwcvt.f.f.v for f16 when Zvfhmin is enable. + let Predicates = !if(!eq(fvti.Scalar, f16), [HasVInstructionsF16Minimal], + !listconcat(GetVTypePredicates<fvti>.Predicates, + GetVTypePredicates<fwti>.Predicates)) in { def : Pat<(fvti.Vector (any_riscv_fpround_vl (fwti.Vector fwti.RegClass:$rs1), (fwti.Mask V0), VLOpFrag)), @@ -2628,6 +2626,8 @@ foreach fvtiToFWti = AllWidenableFloatVectors in { FRM_DYN, GPR:$vl, fvti.Log2SEW, TA_MA)>; + let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates, + GetVTypePredicates<fwti>.Predicates) in def : Pat<(fvti.Vector (any_riscv_fncvt_rod_vl (fwti.Vector fwti.RegClass:$rs1), (fwti.Mask V0), VLOpFrag)), @@ -2766,7 +2766,7 @@ foreach vti = AllIntegerVectors in { (!cast<Instruction>("PseudoVMV_S_X_"#vti.LMul.MX) vti.RegClass:$merge, (vti.Scalar vti.ScalarRegClass:$rs1), GPR:$vl, vti.Log2SEW)>; - + def : Pat<(vti.Vector (riscv_vrgather_vv_vl vti.RegClass:$rs2, vti.RegClass:$rs1, vti.RegClass:$merge, @@ -2922,70 +2922,12 @@ foreach vti = AllIntegerVectors in { (!cast<Instruction>("PseudoVID_V_"#vti.LMul.MX#"_MASK") (vti.Vector (IMPLICIT_DEF)), (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(vti.Vector (riscv_slide1up_vl (vti.Vector vti.RegClass:$rd), - (vti.Vector vti.RegClass:$rs1), - GPR:$rs2, (vti.Mask true_mask), - VLOpFrag)), - (!cast<Instruction>("PseudoVSLIDE1UP_VX_"#vti.LMul.MX) - vti.RegClass:$rd, vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>; - def : Pat<(vti.Vector (riscv_slide1down_vl (vti.Vector vti.RegClass:$rd), - (vti.Vector vti.RegClass:$rs1), - GPR:$rs2, (vti.Mask true_mask), - VLOpFrag)), - (!cast<Instruction>("PseudoVSLIDE1DOWN_VX_"#vti.LMul.MX) - vti.RegClass:$rd, vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>; } } -foreach vti = AllFloatVectors in { - let Predicates = GetVTypePredicates<vti>.Predicates in { - def : Pat<(vti.Vector (riscv_fslide1up_vl (vti.Vector vti.RegClass:$rd), - (vti.Vector vti.RegClass:$rs1), - vti.Scalar:$rs2, (vti.Mask true_mask), - VLOpFrag)), - (!cast<Instruction>("PseudoVFSLIDE1UP_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) - vti.RegClass:$rd, vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>; - def : Pat<(vti.Vector (riscv_fslide1down_vl (vti.Vector vti.RegClass:$rd), - (vti.Vector vti.RegClass:$rs1), - vti.Scalar:$rs2, (vti.Mask true_mask), - VLOpFrag)), - (!cast<Instruction>("PseudoVFSLIDE1DOWN_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) - vti.RegClass:$rd, vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>; - } -} - -foreach vti = AllVectors in { - let Predicates = GetVTypePredicates<vti>.Predicates in { - def : Pat<(vti.Vector (riscv_slideup_vl (vti.Vector vti.RegClass:$rs3), - (vti.Vector vti.RegClass:$rs1), - uimm5:$rs2, (vti.Mask true_mask), - VLOpFrag, (XLenVT timm:$policy))), - (!cast<Instruction>("PseudoVSLIDEUP_VI_"#vti.LMul.MX) - vti.RegClass:$rs3, vti.RegClass:$rs1, uimm5:$rs2, - GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>; - - def : Pat<(vti.Vector (riscv_slideup_vl (vti.Vector vti.RegClass:$rs3), - (vti.Vector vti.RegClass:$rs1), - GPR:$rs2, (vti.Mask true_mask), - VLOpFrag, (XLenVT timm:$policy))), - (!cast<Instruction>("PseudoVSLIDEUP_VX_"#vti.LMul.MX) - vti.RegClass:$rs3, vti.RegClass:$rs1, GPR:$rs2, - GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>; - - def : Pat<(vti.Vector (riscv_slidedown_vl (vti.Vector vti.RegClass:$rs3), - (vti.Vector vti.RegClass:$rs1), - uimm5:$rs2, (vti.Mask true_mask), - VLOpFrag, (XLenVT timm:$policy))), - (!cast<Instruction>("PseudoVSLIDEDOWN_VI_"#vti.LMul.MX) - vti.RegClass:$rs3, vti.RegClass:$rs1, uimm5:$rs2, - GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>; - - def : Pat<(vti.Vector (riscv_slidedown_vl (vti.Vector vti.RegClass:$rs3), - (vti.Vector vti.RegClass:$rs1), - GPR:$rs2, (vti.Mask true_mask), - VLOpFrag, (XLenVT timm:$policy))), - (!cast<Instruction>("PseudoVSLIDEDOWN_VX_"#vti.LMul.MX) - vti.RegClass:$rs3, vti.RegClass:$rs1, GPR:$rs2, - GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>; - } -} +defm : VPatSlideVL_VX_VI<riscv_slideup_vl, "PseudoVSLIDEUP">; +defm : VPatSlideVL_VX_VI<riscv_slidedown_vl, "PseudoVSLIDEDOWN">; +defm : VPatSlide1VL_VX<riscv_slide1up_vl, "PseudoVSLIDE1UP">; +defm : VPatSlide1VL_VF<riscv_fslide1up_vl, "PseudoVFSLIDE1UP">; +defm : VPatSlide1VL_VX<riscv_slide1down_vl, "PseudoVSLIDE1DOWN">; +defm : VPatSlide1VL_VF<riscv_fslide1down_vl, "PseudoVFSLIDE1DOWN">; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td index 4ba052b25e42..924e91e15c34 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td @@ -13,10 +13,12 @@ let DecoderNamespace = "XCVbitmanip" in { class CVInstBitManipRII<bits<2> funct2, bits<3> funct3, dag outs, dag ins, string opcodestr, string argstr> - : RVInstI<funct3, OPC_CUSTOM_2, outs, ins, opcodestr, argstr> { + : RVInstIBase<funct3, OPC_CUSTOM_2, outs, ins, opcodestr, argstr> { bits<5> is3; bits<5> is2; - let imm12 = {funct2, is3, is2}; + let Inst{31-30} = funct2; + let Inst{29-25} = is3; + let Inst{24-20} = is2; } class CVBitManipRII<bits<2> funct2, bits<3> funct3, string opcodestr, @@ -31,7 +33,7 @@ let DecoderNamespace = "XCVbitmanip" in { class CVBitManipR<bits<7> funct7, string opcodestr> : RVInstR<funct7, 0b011, OPC_CUSTOM_1, (outs GPR:$rd), - (ins GPR:$rs1, GPR:$rs2), opcodestr, "$rd, $rs1"> { + (ins GPR:$rs1), opcodestr, "$rd, $rs1"> { let rs2 = 0b00000; } } @@ -67,125 +69,80 @@ let Predicates = [HasVendorXCVbitmanip, IsRV32], def CV_CNT : CVBitManipR<0b0100100, "cv.cnt">; } -class CVInstMac<bits<7> funct7, bits<3> funct3, dag outs, dag ins, - string opcodestr, string argstr, list<dag> pattern> - : RVInst<outs, ins, opcodestr, argstr, pattern, InstFormatOther> { - bits<5> rs2; - bits<5> rs1; - bits<5> rd; - - let Inst{31-25} = funct7; - let Inst{24-20} = rs2; - let Inst{19-15} = rs1; - let Inst{14-12} = funct3; - let Inst{11-7} = rd; - let Inst{6-0} = OPC_CUSTOM_1.Value; +class CVInstMac<bits<7> funct7, bits<3> funct3, string opcodestr> + : RVInstR<funct7, funct3, OPC_CUSTOM_1, + (outs GPR:$rd_wb), (ins GPR:$rd, GPR:$rs1, GPR:$rs2), + opcodestr, "$rd, $rs1, $rs2"> { let DecoderNamespace = "XCVmac"; } -class CVInstMac16I<bits<2> funct2, bits<3> funct3, dag outs, dag ins, - string opcodestr, string argstr, list<dag> pattern> - : RVInst<outs, ins, opcodestr, argstr, pattern, InstFormatOther> { +class CVInstMacMulN<bits<2> funct2, bits<3> funct3, dag outs, dag ins, + string opcodestr> + : RVInstRBase<funct3, OPC_CUSTOM_2, outs, ins, opcodestr, + "$rd, $rs1, $rs2, $imm5"> { bits<5> imm5; - bits<5> rs2; - bits<5> rs1; - bits<5> rd; let Inst{31-30} = funct2; let Inst{29-25} = imm5; - let Inst{24-20} = rs2; - let Inst{19-15} = rs1; - let Inst{14-12} = funct3; - let Inst{11-7} = rd; - let Inst{6-0} = OPC_CUSTOM_2.Value; let DecoderNamespace = "XCVmac"; } +class CVInstMacN<bits<2> funct2, bits<3> funct3, string opcodestr> + : CVInstMacMulN<funct2, funct3, (outs GPR:$rd_wb), + (ins GPR:$rd, GPR:$rs1, GPR:$rs2, uimm5:$imm5), opcodestr>; + +class CVInstMulN<bits<2> funct2, bits<3> funct3, string opcodestr> + : CVInstMacMulN<funct2, funct3, (outs GPR:$rd), + (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5), opcodestr>; + let Predicates = [HasVendorXCVmac, IsRV32], hasSideEffects = 0, mayLoad = 0, mayStore = 0, Constraints = "$rd = $rd_wb" in { // 32x32 bit macs - def CV_MAC : CVInstMac<0b1001000, 0b011, (outs GPR:$rd_wb), - (ins GPR:$rd, GPR:$rs1, GPR:$rs2), - "cv.mac", "$rd, $rs1, $rs2", []>, + def CV_MAC : CVInstMac<0b1001000, 0b011, "cv.mac">, Sched<[]>; - def CV_MSU : CVInstMac<0b1001001, 0b011, (outs GPR:$rd_wb), - (ins GPR:$rd, GPR:$rs1, GPR:$rs2), - "cv.msu", "$rd, $rs1, $rs2", []>, + def CV_MSU : CVInstMac<0b1001001, 0b011, "cv.msu">, Sched<[]>; // Signed 16x16 bit macs with imm - def CV_MACSN : CVInstMac16I<0b00, 0b110, (outs GPR:$rd_wb), - (ins GPR:$rd, GPR:$rs1, GPR:$rs2, uimm5:$imm5), - "cv.macsn", "$rd, $rs1, $rs2, $imm5", []>, + def CV_MACSN : CVInstMacN<0b00, 0b110, "cv.macsn">, Sched<[]>; - def CV_MACHHSN : CVInstMac16I<0b01, 0b110, (outs GPR:$rd_wb), - (ins GPR:$rd, GPR:$rs1, GPR:$rs2, uimm5:$imm5), - "cv.machhsn", "$rd, $rs1, $rs2, $imm5", []>, + def CV_MACHHSN : CVInstMacN<0b01, 0b110, "cv.machhsn">, Sched<[]>; - def CV_MACSRN : CVInstMac16I<0b10, 0b110, (outs GPR:$rd_wb), - (ins GPR:$rd, GPR:$rs1, GPR:$rs2, uimm5:$imm5), - "cv.macsrn", "$rd, $rs1, $rs2, $imm5", []>, + def CV_MACSRN : CVInstMacN<0b10, 0b110, "cv.macsrn">, Sched<[]>; - def CV_MACHHSRN : CVInstMac16I<0b11, 0b110, (outs GPR:$rd_wb), - (ins GPR:$rd, GPR:$rs1, GPR:$rs2, uimm5:$imm5), - "cv.machhsrn", "$rd, $rs1, $rs2, $imm5", []>, + def CV_MACHHSRN : CVInstMacN<0b11, 0b110, "cv.machhsrn">, Sched<[]>; // Unsigned 16x16 bit macs with imm - def CV_MACUN : CVInstMac16I<0b00, 0b111, (outs GPR:$rd_wb), - (ins GPR:$rd, GPR:$rs1, GPR:$rs2, uimm5:$imm5), - "cv.macun", "$rd, $rs1, $rs2, $imm5", []>, + def CV_MACUN : CVInstMacN<0b00, 0b111, "cv.macun">, Sched<[]>; - def CV_MACHHUN : CVInstMac16I<0b01, 0b111, (outs GPR:$rd_wb), - (ins GPR:$rd, GPR:$rs1, GPR:$rs2, uimm5:$imm5), - "cv.machhun", "$rd, $rs1, $rs2, $imm5", []>, + def CV_MACHHUN : CVInstMacN<0b01, 0b111, "cv.machhun">, Sched<[]>; - def CV_MACURN : CVInstMac16I<0b10, 0b111, (outs GPR:$rd_wb), - (ins GPR:$rd, GPR:$rs1, GPR:$rs2, uimm5:$imm5), - "cv.macurn", "$rd, $rs1, $rs2, $imm5", []>, + def CV_MACURN : CVInstMacN<0b10, 0b111, "cv.macurn">, Sched<[]>; - def CV_MACHHURN : CVInstMac16I<0b11, 0b111, (outs GPR:$rd_wb), - (ins GPR:$rd, GPR:$rs1, GPR:$rs2, uimm5:$imm5), - "cv.machhurn", "$rd, $rs1, $rs2, $imm5", []>, + def CV_MACHHURN : CVInstMacN<0b11, 0b111, "cv.machhurn">, Sched<[]>; } // Predicates = [HasVendorXCVmac, IsRV32], hasSideEffects = 0, mayLoad = 0... let Predicates = [HasVendorXCVmac, IsRV32], hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { // Signed 16x16 bit muls with imm - def CV_MULSN : CVInstMac16I<0b00, 0b100, (outs GPR:$rd), - (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5), - "cv.mulsn", "$rd, $rs1, $rs2, $imm5", []>, + def CV_MULSN : CVInstMulN<0b00, 0b100, "cv.mulsn">, Sched<[]>; - def CV_MULHHSN : CVInstMac16I<0b01, 0b100, (outs GPR:$rd), - (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5), - "cv.mulhhsn", "$rd, $rs1, $rs2, $imm5", []>, + def CV_MULHHSN : CVInstMulN<0b01, 0b100, "cv.mulhhsn">, Sched<[]>; - def CV_MULSRN : CVInstMac16I<0b10, 0b100, (outs GPR:$rd), - (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5), - "cv.mulsrn", "$rd, $rs1, $rs2, $imm5", []>, + def CV_MULSRN : CVInstMulN<0b10, 0b100, "cv.mulsrn">, Sched<[]>; - def CV_MULHHSRN : CVInstMac16I<0b11, 0b100, (outs GPR:$rd), - (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5), - "cv.mulhhsrn", "$rd, $rs1, $rs2, $imm5", []>, + def CV_MULHHSRN : CVInstMulN<0b11, 0b100, "cv.mulhhsrn">, Sched<[]>; - // Unsigned 16x16 bit muls with imm - def CV_MULUN : CVInstMac16I<0b00, 0b101, (outs GPR:$rd), - (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5), - "cv.mulun", "$rd, $rs1, $rs2, $imm5", []>, + def CV_MULUN : CVInstMulN<0b00, 0b101, "cv.mulun">, Sched<[]>; - def CV_MULHHUN : CVInstMac16I<0b01, 0b101, (outs GPR:$rd), - (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5), - "cv.mulhhun", "$rd, $rs1, $rs2, $imm5", []>, + def CV_MULHHUN : CVInstMulN<0b01, 0b101, "cv.mulhhun">, Sched<[]>; - def CV_MULURN : CVInstMac16I<0b10, 0b101, (outs GPR:$rd), - (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5), - "cv.mulurn", "$rd, $rs1, $rs2, $imm5", []>, + def CV_MULURN : CVInstMulN<0b10, 0b101, "cv.mulurn">, Sched<[]>; - def CV_MULHHURN : CVInstMac16I<0b11, 0b101, (outs GPR:$rd), - (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5), - "cv.mulhhurn", "$rd, $rs1, $rs2, $imm5", []>, + def CV_MULHHURN : CVInstMulN<0b11, 0b101, "cv.mulhhurn">, Sched<[]>; } // Predicates = [HasVendorXCVmac, IsRV32], hasSideEffects = 0, mayLoad = 0... @@ -203,3 +160,547 @@ let Predicates = [HasVendorXCVmac, IsRV32] in { def : InstAlias<"cv.mulhhu $rd1, $rs1, $rs2", (CV_MULHHUN GPR:$rd1, GPR:$rs1, GPR:$rs2, 0)>; } // Predicates = [HasVendorXCVmac, IsRV32] + +let DecoderNamespace = "XCValu" in { + class CVInstAluRRI<bits<2> funct2, bits<3> funct3, string opcodestr> + : RVInstRBase<funct3, OPC_CUSTOM_2, (outs GPR:$rd), + (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5), opcodestr, + "$rd, $rs1, $rs2, $imm5"> { + bits<5> imm5; + + let Inst{31-30} = funct2; + let Inst{29-25} = imm5; + } + + class CVInstAluRR<bits<7> funct7, bits<3> funct3, string opcodestr> + : RVInstR<funct7, funct3, OPC_CUSTOM_1, (outs GPR:$rd), + (ins GPR:$rs1, GPR:$rs2), opcodestr, "$rd, $rs1, $rs2">; + + class CVInstAluRRNR<bits<7> funct7, bits<3> funct3, string opcodestr> + : RVInstR<funct7, funct3, OPC_CUSTOM_1, (outs GPR:$rd_wb), + (ins GPR:$rd, GPR:$rs1, GPR:$rs2), opcodestr, "$rd, $rs1, $rs2">; + + class CVInstAluRI<bits<7> funct7, bits<3> funct3, string opcodestr> + : RVInstIBase<funct3, OPC_CUSTOM_1, (outs GPR:$rd), + (ins GPR:$rs1, uimm5:$imm5), opcodestr, + "$rd, $rs1, $imm5"> { + bits<5> imm5; + + let Inst{31-25} = funct7; + let Inst{24-20} = imm5; + } + + class CVInstAluR<bits<7> funct7, bits<3> funct3, string opcodestr> + : RVInstR<funct7, funct3, OPC_CUSTOM_1, (outs GPR:$rd), (ins GPR:$rs1), + opcodestr, "$rd, $rs1"> { + let rs2 = 0b00000; + } + +} // DecoderNamespace = "XCValu" + +let Predicates = [HasVendorXCValu], + hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { + // General ALU Operations + def CV_ABS : CVInstAluR<0b0101000, 0b011, "cv.abs">, + Sched<[]>; + def CV_SLET : CVInstAluRR<0b0101001, 0b011, "cv.slet">, + Sched<[]>; + def CV_SLETU : CVInstAluRR<0b0101010, 0b011, "cv.sletu">, + Sched<[]>; + def CV_MIN : CVInstAluRR<0b0101011, 0b011, "cv.min">, + Sched<[]>; + def CV_MINU : CVInstAluRR<0b0101100, 0b011, "cv.minu">, + Sched<[]>; + def CV_MAX : CVInstAluRR<0b0101101, 0b011, "cv.max">, + Sched<[]>; + def CV_MAXU : CVInstAluRR<0b0101110, 0b011, "cv.maxu">, + Sched<[]>; + def CV_EXTHS : CVInstAluR<0b0110000, 0b011, "cv.exths">, + Sched<[]>; + def CV_EXTHZ : CVInstAluR<0b0110001, 0b011, "cv.exthz">, + Sched<[]>; + def CV_EXTBS : CVInstAluR<0b0110010, 0b011, "cv.extbs">, + Sched<[]>; + def CV_EXTBZ : CVInstAluR<0b0110011, 0b011, "cv.extbz">, + Sched<[]>; + + def CV_CLIP : CVInstAluRI<0b0111000, 0b011, "cv.clip">, + Sched<[]>; + def CV_CLIPU : CVInstAluRI<0b0111001, 0b011, "cv.clipu">, + Sched<[]>; + def CV_CLIPR : CVInstAluRR<0b0111010, 0b011, "cv.clipr">, + Sched<[]>; + def CV_CLIPUR : CVInstAluRR<0b0111011, 0b011, "cv.clipur">, + Sched<[]>; + + def CV_ADDN : CVInstAluRRI<0b00, 0b010, "cv.addn">, + Sched<[]>; + def CV_ADDUN : CVInstAluRRI<0b01, 0b010, "cv.addun">, + Sched<[]>; + def CV_ADDRN : CVInstAluRRI<0b10, 0b010, "cv.addrn">, + Sched<[]>; + def CV_ADDURN : CVInstAluRRI<0b11, 0b010, "cv.addurn">, + Sched<[]>; + def CV_SUBN : CVInstAluRRI<0b00, 0b011, "cv.subn">, + Sched<[]>; + def CV_SUBUN : CVInstAluRRI<0b01, 0b011, "cv.subun">, + Sched<[]>; + def CV_SUBRN : CVInstAluRRI<0b10, 0b011, "cv.subrn">, + Sched<[]>; + def CV_SUBURN : CVInstAluRRI<0b11, 0b011, "cv.suburn">, + Sched<[]>; +} // Predicates = [HasVendorXCValu], + // hasSideEffects = 0, mayLoad = 0, mayStore = 0 + +let Predicates = [HasVendorXCValu], + hasSideEffects = 0, mayLoad = 0, mayStore = 0, + Constraints = "$rd = $rd_wb" in { + def CV_ADDNR : CVInstAluRRNR<0b1000000, 0b011, "cv.addnr">, + Sched<[]>; + def CV_ADDUNR : CVInstAluRRNR<0b1000001, 0b011, "cv.addunr">, + Sched<[]>; + def CV_ADDRNR : CVInstAluRRNR<0b1000010, 0b011, "cv.addrnr">, + Sched<[]>; + def CV_ADDURNR : CVInstAluRRNR<0b1000011, 0b011, "cv.addurnr">, + Sched<[]>; + def CV_SUBNR : CVInstAluRRNR<0b1000100, 0b011, "cv.subnr">, + Sched<[]>; + def CV_SUBUNR : CVInstAluRRNR<0b1000101, 0b011, "cv.subunr">, + Sched<[]>; + def CV_SUBRNR : CVInstAluRRNR<0b1000110, 0b011, "cv.subrnr">, + Sched<[]>; + def CV_SUBURNR : CVInstAluRRNR<0b1000111, 0b011, "cv.suburnr">, + Sched<[]>; + +} // Predicates = [HasVendorXCValu], + // hasSideEffects = 0, mayLoad = 0, mayStore = 0, + // Constraints = "$rd = $rd_wb" + + +class CVInstSIMDRR<bits<5> funct5, bit F, bit funct1, bits<3> funct3, + RISCVOpcode opcode, dag outs, + dag ins, string opcodestr, string argstr> + : RVInstRBase<funct3, opcode, outs, ins, opcodestr, argstr> { + let Inst{31-27} = funct5; + let Inst{26} = F; + let Inst{25} = funct1; + let DecoderNamespace = "XCVsimd"; +} + +class CVInstSIMDRI<bits<5> funct5, bit F, bits<3> funct3, RISCVOpcode opcode, + dag outs, dag ins, string opcodestr, string argstr> + : RVInstIBase<funct3, opcode, outs, ins, opcodestr, argstr> { + bits<6> imm6; + + let Inst{31-27} = funct5; + let Inst{26} = F; + let Inst{25} = imm6{0}; // funct1 unused + let Inst{24-20} = imm6{5-1}; + let DecoderNamespace = "XCVsimd"; +} + +class CVSIMDRR<bits<5> funct5, bit F, bit funct1, bits<3> funct3, + string opcodestr> + : CVInstSIMDRR<funct5, F, funct1, funct3, OPC_CUSTOM_3, (outs GPR:$rd), + (ins GPR:$rs1, GPR:$rs2), opcodestr, "$rd, $rs1, $rs2">; + +class CVSIMDRRWb<bits<5> funct5, bit F, bit funct1, bits<3> funct3, + string opcodestr> + : CVInstSIMDRR<funct5, F, funct1, funct3, OPC_CUSTOM_3, (outs GPR:$rd_wb), + (ins GPR:$rd, GPR:$rs1, GPR:$rs2), opcodestr, "$rd, $rs1, $rs2"> { + let Constraints = "$rd = $rd_wb"; +} + +class CVSIMDRI<bits<5> funct5, bit F, bits<3> funct3, string opcodestr> + : CVInstSIMDRI<funct5, F, funct3, OPC_CUSTOM_3, (outs GPR:$rd), + (ins GPR:$rs1, simm6:$imm6), opcodestr, "$rd, $rs1, $imm6">; + +class CVSIMDRIWb<bits<5> funct5, bit F, bits<3> funct3, string opcodestr> + : CVInstSIMDRI<funct5, F, funct3, OPC_CUSTOM_3, + (outs GPR:$rd_wb), (ins GPR:$rd, GPR:$rs1, simm6:$imm6), + opcodestr, "$rd, $rs1, $imm6"> { + let Constraints = "$rd = $rd_wb"; +} + +class CVSIMDRU<bits<5> funct5, bit F, bits<3> funct3, string opcodestr, + Operand immtype = uimm6> + : CVInstSIMDRI<funct5, F, funct3, OPC_CUSTOM_3, + (outs GPR:$rd), (ins GPR:$rs1, immtype:$imm6), + opcodestr, "$rd, $rs1, $imm6">; + +class CVSIMDRUWb<bits<5> funct5, bit F, bits<3> funct3, string opcodestr> + : CVInstSIMDRI<funct5, F, funct3, OPC_CUSTOM_3, + (outs GPR:$rd_wb), + (ins GPR:$rd, GPR:$rs1, uimm6:$imm6), + opcodestr, "$rd, $rs1, $imm6"> { + let Constraints = "$rd = $rd_wb"; +} + +class CVSIMDR<bits<5> funct5, bit F, bit funct1, bits<3> funct3, + string opcodestr> + : CVInstSIMDRR<funct5, F, funct1, funct3, OPC_CUSTOM_3, (outs GPR:$rd), + (ins GPR:$rs1), opcodestr, "$rd, $rs1"> { + let rs2 = 0b00000; +} + +multiclass CVSIMDBinarySigned<bits<5> funct5, bit F, bit funct1, string mnemonic> { + def CV_ # NAME # _H : CVSIMDRR<funct5, F, funct1, 0b000, "cv." # mnemonic # ".h">; + def CV_ # NAME # _B : CVSIMDRR<funct5, F, funct1, 0b001, "cv." # mnemonic # ".b">; + def CV_ # NAME # _SC_H : CVSIMDRR<funct5, F, funct1, 0b100, "cv." # mnemonic # ".sc.h">; + def CV_ # NAME # _SC_B : CVSIMDRR<funct5, F, funct1, 0b101, "cv." # mnemonic # ".sc.b">; + def CV_ # NAME # _SCI_H : CVSIMDRI<funct5, F, 0b110, "cv." # mnemonic # ".sci.h">; + def CV_ # NAME # _SCI_B : CVSIMDRI<funct5, F, 0b111, "cv." # mnemonic # ".sci.b">; +} + +multiclass CVSIMDBinaryUnsigned<bits<5> funct5, bit F, bit funct1, string mnemonic> { + def CV_ # NAME # _H : CVSIMDRR<funct5, F, funct1, 0b000, "cv." # mnemonic # ".h">; + def CV_ # NAME # _B : CVSIMDRR<funct5, F, funct1, 0b001, "cv." # mnemonic # ".b">; + def CV_ # NAME # _SC_H : CVSIMDRR<funct5, F, funct1, 0b100, "cv." # mnemonic # ".sc.h">; + def CV_ # NAME # _SC_B : CVSIMDRR<funct5, F, funct1, 0b101, "cv." # mnemonic # ".sc.b">; + def CV_ # NAME # _SCI_H : CVSIMDRU<funct5, F, 0b110, "cv." # mnemonic # ".sci.h">; + def CV_ # NAME # _SCI_B : CVSIMDRU<funct5, F, 0b111, "cv." # mnemonic # ".sci.b">; +} + +multiclass CVSIMDShift<bits<5> funct5, bit F, bit funct1, string mnemonic> { + def CV_ # NAME # _H : CVSIMDRR<funct5, F, funct1, 0b000, "cv." # mnemonic # ".h">; + def CV_ # NAME # _B : CVSIMDRR<funct5, F, funct1, 0b001, "cv." # mnemonic # ".b">; + def CV_ # NAME # _SC_H : CVSIMDRR<funct5, F, funct1, 0b100, "cv." # mnemonic # ".sc.h">; + def CV_ # NAME # _SC_B : CVSIMDRR<funct5, F, funct1, 0b101, "cv." # mnemonic # ".sc.b">; + def CV_ # NAME # _SCI_H : CVSIMDRU<funct5, F, 0b110, "cv." # mnemonic # ".sci.h", uimm4>; + def CV_ # NAME # _SCI_B : CVSIMDRU<funct5, F, 0b111, "cv." # mnemonic # ".sci.b", uimm3>; +} + +multiclass CVSIMDBinarySignedWb<bits<5> funct5, bit F, bit funct1, string mnemonic> { + def CV_ # NAME # _H : CVSIMDRRWb<funct5, F, funct1, 0b000, "cv." # mnemonic # ".h">; + def CV_ # NAME # _B : CVSIMDRRWb<funct5, F, funct1, 0b001, "cv." # mnemonic # ".b">; + def CV_ # NAME # _SC_H : CVSIMDRRWb<funct5, F, funct1, 0b100, "cv." # mnemonic # ".sc.h">; + def CV_ # NAME # _SC_B : CVSIMDRRWb<funct5, F, funct1, 0b101, "cv." # mnemonic # ".sc.b">; + def CV_ # NAME # _SCI_H : CVSIMDRIWb<funct5, F, 0b110, "cv." # mnemonic # ".sci.h">; + def CV_ # NAME # _SCI_B : CVSIMDRIWb<funct5, F, 0b111, "cv." # mnemonic # ".sci.b">; +} + +multiclass CVSIMDBinaryUnsignedWb<bits<5> funct5, bit F, bit funct1, string mnemonic> { + def CV_ # NAME # _H : CVSIMDRRWb<funct5, F, funct1, 0b000, "cv." # mnemonic # ".h">; + def CV_ # NAME # _B : CVSIMDRRWb<funct5, F, funct1, 0b001, "cv." # mnemonic # ".b">; + def CV_ # NAME # _SC_H : CVSIMDRRWb<funct5, F, funct1, 0b100, "cv." # mnemonic # ".sc.h">; + def CV_ # NAME # _SC_B : CVSIMDRRWb<funct5, F, funct1, 0b101, "cv." # mnemonic # ".sc.b">; + def CV_ # NAME # _SCI_H : CVSIMDRUWb<funct5, F, 0b110, "cv." # mnemonic # ".sci.h">; + def CV_ # NAME # _SCI_B : CVSIMDRUWb<funct5, F, 0b111, "cv." # mnemonic # ".sci.b">; +} + + +let Predicates = [HasVendorXCVsimd, IsRV32], + hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { + defm ADD : CVSIMDBinarySigned<0b00000, 0, 0, "add">; + defm SUB : CVSIMDBinarySigned<0b00001, 0, 0, "sub">; + defm AVG : CVSIMDBinarySigned<0b00010, 0, 0, "avg">; + defm AVGU : CVSIMDBinaryUnsigned<0b00011, 0, 0, "avgu">; + defm MIN : CVSIMDBinarySigned<0b00100, 0, 0, "min">; + defm MINU : CVSIMDBinaryUnsigned<0b00101, 0, 0, "minu">; + defm MAX : CVSIMDBinarySigned<0b00110, 0, 0, "max">; + defm MAXU : CVSIMDBinaryUnsigned<0b00111, 0, 0, "maxu">; + defm SRL : CVSIMDShift<0b01000, 0, 0, "srl">; + defm SRA : CVSIMDShift<0b01001, 0, 0, "sra">; + defm SLL : CVSIMDShift<0b01010, 0, 0, "sll">; + defm OR : CVSIMDBinarySigned<0b01011, 0, 0, "or">; + defm XOR : CVSIMDBinarySigned<0b01100, 0, 0, "xor">; + defm AND : CVSIMDBinarySigned<0b01101, 0, 0, "and">; + + def CV_ABS_H : CVSIMDR<0b01110, 0, 0, 0b000, "cv.abs.h">; + def CV_ABS_B : CVSIMDR<0b01110, 0, 0, 0b001, "cv.abs.b">; + + // 0b01111xx: UNDEF + + defm DOTUP : CVSIMDBinaryUnsigned<0b10000, 0, 0, "dotup">; + defm DOTUSP : CVSIMDBinarySigned<0b10001, 0, 0, "dotusp">; + defm DOTSP : CVSIMDBinarySigned<0b10010, 0, 0, "dotsp">; + defm SDOTUP : CVSIMDBinaryUnsignedWb<0b10011, 0, 0, "sdotup">; + defm SDOTUSP : CVSIMDBinarySignedWb<0b10100, 0, 0, "sdotusp">; + defm SDOTSP : CVSIMDBinarySignedWb<0b10101, 0, 0, "sdotsp">; + + // 0b10110xx: UNDEF + + def CV_EXTRACT_H : CVSIMDRU<0b10111, 0, 0b000, "cv.extract.h">; + def CV_EXTRACT_B : CVSIMDRU<0b10111, 0, 0b001, "cv.extract.b">; + def CV_EXTRACTU_H : CVSIMDRU<0b10111, 0, 0b010, "cv.extractu.h">; + def CV_EXTRACTU_B : CVSIMDRU<0b10111, 0, 0b011, "cv.extractu.b">; + def CV_INSERT_H : CVSIMDRUWb<0b10111, 0, 0b100, "cv.insert.h">; + def CV_INSERT_B : CVSIMDRUWb<0b10111, 0, 0b101, "cv.insert.b">; + + def CV_SHUFFLE_H : CVSIMDRR<0b11000, 0, 0, 0b000, "cv.shuffle.h">; + def CV_SHUFFLE_B : CVSIMDRR<0b11000, 0, 0, 0b001, "cv.shuffle.b">; + def CV_SHUFFLE_SCI_H : CVSIMDRU<0b11000, 0, 0b110, "cv.shuffle.sci.h">; + def CV_SHUFFLEI0_SCI_B : CVSIMDRU<0b11000, 0, 0b111, "cv.shufflei0.sci.b">; + + def CV_SHUFFLEI1_SCI_B : CVSIMDRU<0b11001, 0, 0b111, "cv.shufflei1.sci.b">; + + def CV_SHUFFLEI2_SCI_B : CVSIMDRU<0b11010, 0, 0b111, "cv.shufflei2.sci.b">; + + def CV_SHUFFLEI3_SCI_B : CVSIMDRU<0b11011, 0, 0b111, "cv.shufflei3.sci.b">; + + def CV_SHUFFLE2_H : CVSIMDRRWb<0b11100, 0, 0, 0b000, "cv.shuffle2.h">; + def CV_SHUFFLE2_B : CVSIMDRRWb<0b11100, 0, 0, 0b001, "cv.shuffle2.b">; + + // 0b11101xx: UNDEF + + def CV_PACK : CVSIMDRR<0b11110, 0, 0, 0b000, "cv.pack">; + def CV_PACK_H : CVSIMDRR<0b11110, 0, 1, 0b000, "cv.pack.h">; + + def CV_PACKHI_B : CVSIMDRRWb<0b11111, 0, 1, 0b001, "cv.packhi.b">; + def CV_PACKLO_B : CVSIMDRRWb<0b11111, 0, 0, 0b001, "cv.packlo.b">; + + defm CMPEQ : CVSIMDBinarySigned<0b00000, 1, 0, "cmpeq">; + defm CMPNE : CVSIMDBinarySigned<0b00001, 1, 0, "cmpne">; + defm CMPGT : CVSIMDBinarySigned<0b00010, 1, 0, "cmpgt">; + defm CMPGE : CVSIMDBinarySigned<0b00011, 1, 0, "cmpge">; + defm CMPLT : CVSIMDBinarySigned<0b00100, 1, 0, "cmplt">; + defm CMPLE : CVSIMDBinarySigned<0b00101, 1, 0, "cmple">; + defm CMPGTU : CVSIMDBinaryUnsigned<0b00110, 1, 0, "cmpgtu">; + defm CMPGEU : CVSIMDBinaryUnsigned<0b00111, 1, 0, "cmpgeu">; + defm CMPLTU : CVSIMDBinaryUnsigned<0b01000, 1, 0, "cmpltu">; + defm CMPLEU : CVSIMDBinaryUnsigned<0b01001, 1, 0, "cmpleu">; + + def CV_CPLXMUL_R : CVSIMDRRWb<0b01010, 1, 0, 0b000, "cv.cplxmul.r">; + def CV_CPLXMUL_I : CVSIMDRRWb<0b01010, 1, 1, 0b000, "cv.cplxmul.i">; + def CV_CPLXMUL_R_DIV2 : CVSIMDRRWb<0b01010, 1, 0, 0b010, "cv.cplxmul.r.div2">; + def CV_CPLXMUL_I_DIV2 : CVSIMDRRWb<0b01010, 1, 1, 0b010, "cv.cplxmul.i.div2">; + def CV_CPLXMUL_R_DIV4 : CVSIMDRRWb<0b01010, 1, 0, 0b100, "cv.cplxmul.r.div4">; + def CV_CPLXMUL_I_DIV4 : CVSIMDRRWb<0b01010, 1, 1, 0b100, "cv.cplxmul.i.div4">; + def CV_CPLXMUL_R_DIV8 : CVSIMDRRWb<0b01010, 1, 0, 0b110, "cv.cplxmul.r.div8">; + def CV_CPLXMUL_I_DIV8 : CVSIMDRRWb<0b01010, 1, 1, 0b110, "cv.cplxmul.i.div8">; + + def CV_CPLXCONJ : CVSIMDR<0b01011, 1, 0, 0b000, "cv.cplxconj">; + + // 0b01011xx: UNDEF + + def CV_SUBROTMJ : CVSIMDRR<0b01100, 1, 0, 0b000, "cv.subrotmj">; + def CV_SUBROTMJ_DIV2 : CVSIMDRR<0b01100, 1, 0, 0b010, "cv.subrotmj.div2">; + def CV_SUBROTMJ_DIV4 : CVSIMDRR<0b01100, 1, 0, 0b100, "cv.subrotmj.div4">; + def CV_SUBROTMJ_DIV8 : CVSIMDRR<0b01100, 1, 0, 0b110, "cv.subrotmj.div8">; + + def CV_ADD_DIV2 : CVSIMDRR<0b01101, 1, 0, 0b010, "cv.add.div2">; + def CV_ADD_DIV4 : CVSIMDRR<0b01101, 1, 0, 0b100, "cv.add.div4">; + def CV_ADD_DIV8 : CVSIMDRR<0b01101, 1, 0, 0b110, "cv.add.div8">; + + def CV_SUB_DIV2 : CVSIMDRR<0b01110, 1, 0, 0b010, "cv.sub.div2">; + def CV_SUB_DIV4 : CVSIMDRR<0b01110, 1, 0, 0b100, "cv.sub.div4">; + def CV_SUB_DIV8 : CVSIMDRR<0b01110, 1, 0, 0b110, "cv.sub.div8">; +} + +class CVInstImmBranch<bits<3> funct3, dag outs, dag ins, + string opcodestr, string argstr> + : RVInstB<funct3, OPC_CUSTOM_0, outs, ins, opcodestr, argstr> { + bits<5> imm5; + let rs2 = imm5; + let DecoderNamespace = "XCVbi"; +} + +let Predicates = [HasVendorXCVbi, IsRV32], hasSideEffects = 0, mayLoad = 0, + mayStore = 0, isBranch = 1, isTerminator = 1 in { + // Immediate branching operations + def CV_BEQIMM : CVInstImmBranch<0b110, (outs), + (ins GPR:$rs1, simm5:$imm5, simm13_lsb0:$imm12), + "cv.beqimm", "$rs1, $imm5, $imm12">, Sched<[]>; + def CV_BNEIMM : CVInstImmBranch<0b111, (outs), + (ins GPR:$rs1, simm5:$imm5, simm13_lsb0:$imm12), + "cv.bneimm", "$rs1, $imm5, $imm12">, Sched<[]>; +} + +def CVrrAsmOperand : AsmOperandClass { + let Name = "RegReg"; + let ParserMethod = "parseRegReg"; + let DiagnosticType = "InvalidRegReg"; +} + +def CVrr : Operand<OtherVT> { + let ParserMatchClass = CVrrAsmOperand; + let EncoderMethod = "getRegReg"; + let DecoderMethod = "decodeRegReg"; + let PrintMethod = "printRegReg"; +} + +class CVLoad_ri_inc<bits<3> funct3, string opcodestr> + : RVInstI<funct3, OPC_CUSTOM_0, (outs GPR:$rd, GPR:$rs1_wb), (ins GPRMem:$rs1, simm12:$imm12), + opcodestr, "$rd, (${rs1}), ${imm12}"> { + let Constraints = "$rs1_wb = $rs1"; + let DecoderNamespace = "XCVmem"; +} + +class CVLoad_rr_inc<bits<7> funct7, bits<3> funct3, string opcodestr> + : RVInstR<funct7, funct3, OPC_CUSTOM_1, (outs GPR:$rd, GPR:$rs1_wb), (ins GPRMem:$rs1, GPR:$rs2), + opcodestr, "$rd, (${rs1}), ${rs2}"> { + let Constraints = "$rs1_wb = $rs1"; + let DecoderNamespace = "XCVmem"; +} + +class CVLoad_rr<bits<7> funct7, bits<3> funct3, string opcodestr> + : RVInstR<funct7, funct3, OPC_CUSTOM_1, (outs GPR:$rd), (ins CVrr:$cvrr), + opcodestr, "$rd, $cvrr"> { + bits<5> rd; + bits<10> cvrr; + + let Inst{31-25} = funct7; + let Inst{24-20} = cvrr{4-0}; + let Inst{19-15} = cvrr{9-5}; + let Inst{14-12} = funct3; + let Inst{11-7} = rd; + let DecoderNamespace = "XCVmem"; +} + +let Predicates = [HasVendorXCVmem, IsRV32], hasSideEffects = 0, + mayLoad = 1, mayStore = 0, Constraints = "$rs1_wb = $rs1" in { + // Register-Immediate load with post-increment + def CV_LB_ri_inc : CVLoad_ri_inc<0b000, "cv.lb">; + def CV_LBU_ri_inc : CVLoad_ri_inc<0b100, "cv.lbu">; + def CV_LH_ri_inc : CVLoad_ri_inc<0b001, "cv.lh">; + def CV_LHU_ri_inc : CVLoad_ri_inc<0b101, "cv.lhu">; + def CV_LW_ri_inc : CVLoad_ri_inc<0b010, "cv.lw">; + + // Register-Register load with post-increment + def CV_LB_rr_inc : CVLoad_rr_inc<0b0000000, 0b011, "cv.lb">; + def CV_LBU_rr_inc : CVLoad_rr_inc<0b0001000, 0b011, "cv.lbu">; + def CV_LH_rr_inc : CVLoad_rr_inc<0b0000001, 0b011, "cv.lh">; + def CV_LHU_rr_inc : CVLoad_rr_inc<0b0001001, 0b011, "cv.lhu">; + def CV_LW_rr_inc : CVLoad_rr_inc<0b0000010, 0b011, "cv.lw">; +} + +let Predicates = [HasVendorXCVmem, IsRV32], hasSideEffects = 0, + mayLoad = 1, mayStore = 0 in { + // Register-Register load + def CV_LB_rr : CVLoad_rr<0b0000100, 0b011, "cv.lb">; + def CV_LBU_rr : CVLoad_rr<0b0001100, 0b011, "cv.lbu">; + def CV_LH_rr : CVLoad_rr<0b0000101, 0b011, "cv.lh">; + def CV_LHU_rr : CVLoad_rr<0b0001101, 0b011, "cv.lhu">; + def CV_LW_rr : CVLoad_rr<0b0000110, 0b011, "cv.lw">; +} + +class CVStore_ri_inc<bits<3> funct3, string opcodestr> + : RVInstS<funct3, OPC_CUSTOM_1, (outs GPR:$rs1_wb), + (ins GPR:$rs2, GPR:$rs1, simm12:$imm12), + opcodestr, "$rs2, (${rs1}), ${imm12}"> { + let Constraints = "$rs1_wb = $rs1"; + let DecoderNamespace = "XCVmem"; +} + +class CVStore_rr_inc<bits<3> funct3, bits<7> funct7, dag outs, dag ins, + string opcodestr, string argstr> + : RVInst<outs, ins, opcodestr, argstr, [], InstFormatOther> { + bits<5> rs3; + bits<5> rs2; + bits<5> rs1; + + let Inst{31-25} = funct7; + let Inst{24-20} = rs2; + let Inst{19-15} = rs1; + let Inst{14-12} = funct3; + let Inst{11-7} = rs3; + let Inst{6-0} = OPC_CUSTOM_1.Value; + let DecoderNamespace = "XCVmem"; +} + + +class CVStore_rr<bits<3> funct3, bits<7> funct7, dag outs, dag ins, + string opcodestr, string argstr> + : RVInst<outs, ins, opcodestr, argstr, [], InstFormatOther> { + bits<5> rs2; + bits<10> cvrr; + + let Inst{31-25} = funct7; + let Inst{24-20} = rs2; + let Inst{19-15} = cvrr{9-5}; + let Inst{14-12} = funct3; + let Inst{11-7} = cvrr{4-0}; + let Inst{6-0} = OPC_CUSTOM_1.Value; + let DecoderNamespace = "XCVmem"; +} + +let Predicates = [HasVendorXCVmem, IsRV32], hasSideEffects = 0, + mayLoad = 0, mayStore = 1, Constraints = "$rs1_wb = $rs1" in { + // Register-Immediate store with post-increment + def CV_SB_ri_inc : CVStore_ri_inc<0b000, "cv.sb">; + def CV_SH_ri_inc : CVStore_ri_inc<0b001, "cv.sh">; + def CV_SW_ri_inc : CVStore_ri_inc<0b010, "cv.sw">; + + // Register-Register store with post-increment + def CV_SB_rr_inc : CVStore_rr_inc<0b011, 0b0010000, + (outs GPR:$rs1_wb), (ins GPR:$rs2, GPR:$rs1, GPR:$rs3), + "cv.sb", "$rs2, (${rs1}), ${rs3}">; + def CV_SH_rr_inc : CVStore_rr_inc<0b011, 0b0010001, + (outs GPR:$rs1_wb), (ins GPR:$rs2, GPR:$rs1, GPR:$rs3), + "cv.sh", "$rs2, (${rs1}), ${rs3}">; + def CV_SW_rr_inc : CVStore_rr_inc<0b011, 0b0010010, + (outs GPR:$rs1_wb), (ins GPR:$rs2, GPR:$rs1, GPR:$rs3), + "cv.sw", "$rs2, (${rs1}), ${rs3}">; +} + + +let Predicates = [HasVendorXCVmem, IsRV32], hasSideEffects = 0, + mayLoad = 0, mayStore = 1 in { + // Register-Register store + def CV_SB_rr : CVStore_rr<0b011, 0b0010100, + (outs), (ins GPR:$rs2, CVrr:$cvrr), + "cv.sb", "$rs2, $cvrr">; + def CV_SH_rr : CVStore_rr<0b011, 0b0010101, + (outs), (ins GPR:$rs2, CVrr:$cvrr), + "cv.sh", "$rs2, $cvrr">; + def CV_SW_rr : CVStore_rr<0b011, 0b0010110, + (outs), (ins GPR:$rs2, CVrr:$cvrr), + "cv.sw", "$rs2, $cvrr">; +} + +let DecoderNamespace = "XCVelw" in +class CVLoad_ri<bits<3> funct3, string opcodestr> + : RVInstI<funct3, OPC_CUSTOM_0, (outs GPR:$rd), + (ins GPRMem:$rs1, simm12:$imm12), opcodestr, "$rd, ${imm12}(${rs1})">; + +let Predicates = [HasVendorXCVelw, IsRV32], hasSideEffects = 0, + mayLoad = 1, mayStore = 0 in { + // Event load + def CV_ELW : CVLoad_ri<0b011, "cv.elw">; +} + +def cv_tuimm2 : TImmLeaf<XLenVT, [{return isUInt<2>(Imm);}]>; +def cv_tuimm5 : TImmLeaf<XLenVT, [{return isUInt<5>(Imm);}]>; +def cv_uimm10 : ImmLeaf<XLenVT, [{return isUInt<10>(Imm);}]>; + +def CV_LO5: SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(N->getZExtValue() & 0x1f, SDLoc(N), + N->getValueType(0)); +}]>; + +def CV_HI5: SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(N->getZExtValue() >> 5, SDLoc(N), + N->getValueType(0)); +}]>; + +multiclass PatCoreVBitManip<Intrinsic intr> { + def : PatGprGpr<intr, !cast<RVInst>("CV_" # NAME # "R")>; + def : Pat<(intr GPR:$rs1, cv_uimm10:$imm), + (!cast<RVInst>("CV_" # NAME) + GPR:$rs1, (CV_HI5 cv_uimm10:$imm), (CV_LO5 cv_uimm10:$imm))>; +} + +let Predicates = [HasVendorXCVbitmanip, IsRV32] in { + defm EXTRACT : PatCoreVBitManip<int_riscv_cv_bitmanip_extract>; + defm EXTRACTU : PatCoreVBitManip<int_riscv_cv_bitmanip_extractu>; + defm BCLR : PatCoreVBitManip<int_riscv_cv_bitmanip_bclr>; + defm BSET : PatCoreVBitManip<int_riscv_cv_bitmanip_bset>; + + def : Pat<(int_riscv_cv_bitmanip_insert GPR:$rs1, GPR:$rs2, GPR:$rd), + (CV_INSERTR GPR:$rd, GPR:$rs1, GPR:$rs2)>; + def : Pat<(int_riscv_cv_bitmanip_insert GPR:$rs1, cv_uimm10:$imm, GPR:$rd), + (CV_INSERT GPR:$rd, GPR:$rs1, (CV_HI5 cv_uimm10:$imm), + (CV_LO5 cv_uimm10:$imm))>; + + def : PatGpr<cttz, CV_FF1>; + def : PatGpr<ctlz, CV_FL1>; + def : PatGpr<int_riscv_cv_bitmanip_clb, CV_CLB>; + def : PatGpr<ctpop, CV_CNT>; + + def : PatGprGpr<rotr, CV_ROR>; + + def : Pat<(int_riscv_cv_bitmanip_bitrev GPR:$rs1, cv_tuimm5:$pts, + cv_tuimm2:$radix), + (CV_BITREV GPR:$rs1, cv_tuimm2:$radix, cv_tuimm5:$pts)>; + def : Pat<(bitreverse (XLenVT GPR:$rs)), (CV_BITREV GPR:$rs, 0, 0)>; +} diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td index 03ed501ba6a3..fa618b437ce7 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td @@ -27,30 +27,19 @@ def VCIX_XV : VCIXType<0b0010>; def VCIX_XVV : VCIXType<0b1010>; def VCIX_XVW : VCIXType<0b1111>; -// The payload and timm5 operands are all marked as ImmArg in the IR +// The payload and tsimm5 operands are all marked as ImmArg in the IR // intrinsic and will be target constant, so use TImmLeaf rather than ImmLeaf. -def payload1 : Operand<XLenVT>, TImmLeaf<XLenVT, [{return isUInt<1>(Imm);}]> { - let ParserMatchClass = UImmAsmOperand<1>; - let DecoderMethod = "decodeUImmOperand<1>"; - let OperandType = "OPERAND_UIMM1"; - let OperandNamespace = "RISCVOp"; +class PayloadOp<int bitsNum> : RISCVOp, TImmLeaf<XLenVT, "return isUInt<" # bitsNum # ">(Imm);"> { + let ParserMatchClass = UImmAsmOperand<bitsNum>; + let DecoderMethod = "decodeUImmOperand<"# bitsNum # ">"; + let OperandType = "OPERAND_UIMM" # bitsNum; } -def payload2 : Operand<XLenVT>, TImmLeaf<XLenVT, [{return isUInt<2>(Imm);}]> { - let ParserMatchClass = UImmAsmOperand<2>; - let DecoderMethod = "decodeUImmOperand<2>"; - let OperandType = "OPERAND_UIMM2"; - let OperandNamespace = "RISCVOp"; -} - -def payload5 : Operand<XLenVT>, TImmLeaf<XLenVT, [{return isUInt<5>(Imm);}]> { - let ParserMatchClass = UImmAsmOperand<5>; - let DecoderMethod = "decodeUImmOperand<5>"; - let OperandType = "OPERAND_UIMM5"; - let OperandNamespace = "RISCVOp"; -} +def payload1 : PayloadOp<1>; +def payload2 : PayloadOp<2>; +def payload5 : PayloadOp<5>; -def timm5 : Operand<XLenVT>, TImmLeaf<XLenVT, [{return isInt<5>(Imm);}]> { +def tsimm5 : Operand<XLenVT>, TImmLeaf<XLenVT, [{return isInt<5>(Imm);}]> { let ParserMatchClass = SImmAsmOperand<5>; let EncoderMethod = "getImmOpValue"; let DecoderMethod = "decodeSImmOperand<5>"; @@ -111,15 +100,6 @@ class RVInstVCFCustom2<bits<4> funct6_hi4, bits<3> funct3, dag outs, dag ins, let RVVConstraint = NoConstraint; } -class GetFTypeInfo<int sew> { - ValueType Scalar = !cond(!eq(sew, 16): f16, - !eq(sew, 32): f32, - !eq(sew, 64): f64); - RegisterClass ScalarRegClass = !cond(!eq(sew, 16): FPR16, - !eq(sew, 32): FPR32, - !eq(sew, 64): FPR64); -} - class VCIXInfo<string suffix, VCIXType type, DAGOperand TyRd, DAGOperand TyRs2, DAGOperand TyRs1, bit HaveOutputDst> { string OpcodeStr = !if(HaveOutputDst, "sf.vc.v." # suffix, @@ -187,6 +167,20 @@ multiclass CustomSiFiveVCIX<string suffix, VCIXType type, InTyRs1, 1>; } +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { +class CustomSiFiveVMACC<bits<6> funct6, RISCVVFormat opv, string opcodestr> + : RVInstVCCustom2<funct6{5-2}, opv.Value, (outs VR:$rd), (ins VR:$rs1, VR:$rs2), + opcodestr, "$rd, $rs1, $rs2"> { + let vm = 1; + let funct6_lo2 = funct6{1-0}; +} +} + +class CustomSiFiveVFNRCLIP<bits<6> funct6, RISCVVFormat opv, string opcodestr> + : VALUVF<funct6, opv, opcodestr> { + let Inst{6-0} = OPC_CUSTOM_2.Value; +} + let Predicates = [HasVendorXSfvcp], mayLoad = 0, mayStore = 0, hasSideEffects = 1, hasNoSchedulingInfo = 1, DecoderNamespace = "XSfvcp" in { defm X : CustomSiFiveVCIX<"x", VCIX_X, uimm5, uimm5, GPR>, Sched<[]>; @@ -205,6 +199,28 @@ let Predicates = [HasVendorXSfvcp], mayLoad = 0, mayStore = 0, defm FVW : CustomSiFiveVCIX<"fvw", VCIX_XVW, VR, VR, FPR32>, Sched<[]>; } +let Predicates = [HasVendorXSfvqmaccdod], DecoderNamespace = "XSfvqmaccdod" in { + def VQMACCU_2x8x2 : CustomSiFiveVMACC<0b101100, OPMVV, "sf.vqmaccu.2x8x2">; + def VQMACC_2x8x2 : CustomSiFiveVMACC<0b101101, OPMVV, "sf.vqmacc.2x8x2">; + def VQMACCUS_2x8x2 : CustomSiFiveVMACC<0b101110, OPMVV, "sf.vqmaccus.2x8x2">; + def VQMACCSU_2x8x2 : CustomSiFiveVMACC<0b101111, OPMVV, "sf.vqmaccsu.2x8x2">; +} + +let Predicates = [HasVendorXSfvqmaccqoq], DecoderNamespace = "XSfvqmaccqoq" in { + def VQMACCU_4x8x4 : CustomSiFiveVMACC<0b111100, OPMVV, "sf.vqmaccu.4x8x4">; + def VQMACC_4x8x4 : CustomSiFiveVMACC<0b111101, OPMVV, "sf.vqmacc.4x8x4">; + def VQMACCUS_4x8x4 : CustomSiFiveVMACC<0b111110, OPMVV, "sf.vqmaccus.4x8x4">; + def VQMACCSU_4x8x4 : CustomSiFiveVMACC<0b111111, OPMVV, "sf.vqmaccsu.4x8x4">; +} + +let Predicates = [HasVendorXSfvfwmaccqqq], DecoderNamespace = "XSfvfwmaccqqq" in { + def VFWMACC_4x4x4 : CustomSiFiveVMACC<0b111100, OPFVV, "sf.vfwmacc.4x4x4">; +} + +let Predicates = [HasVendorXSfvfnrclipxfqf], DecoderNamespace = "XSfvfnrclipxfqf" in { + def VFNRCLIP_XU_F_QF : CustomSiFiveVFNRCLIP<0b100010, OPFVF, "sf.vfnrclip.xu.f.qf">; + def VFNRCLIP_X_F_QF : CustomSiFiveVFNRCLIP<0b100011, OPFVF, "sf.vfnrclip.x.f.qf">; +} class VPseudoVC_X<Operand OpClass, DAGOperand RS1Class, bit HasSideEffect = 1> : Pseudo<(outs), @@ -327,15 +343,41 @@ multiclass VPseudoVC_XVW<LMULInfo m, DAGOperand RS1Class, } } +multiclass VPseudoSiFiveVMACC<string mx, VReg vd_type, VReg vs2_type, + string Constraint = ""> { + def "Pseudo" # NAME # "_" # mx + : VPseudoTernaryNoMaskWithPolicy<vd_type, V_M1.vrclass, vs2_type, Constraint>; +} + +multiclass VPseudoSiFiveVQMACC<string Constraint = ""> { + foreach m = MxListVF8 in + defm NAME : VPseudoSiFiveVMACC<m.MX, m.vrclass, m.vrclass, Constraint>; +} + +multiclass VPseudoSiFiveVFWMACC<string Constraint = ""> { + foreach m = MxListFW in + defm NAME : VPseudoSiFiveVMACC<m.MX, m.wvrclass, m.vrclass, Constraint>; +} + +multiclass VPseudoSiFiveVFNRCLIP<string Constraint = "@earlyclobber $rd"> { + foreach m = MxListVF4 in + let hasSideEffects = 0 in + defm "Pseudo" # NAME : VPseudoBinaryRoundingMode<!if(!eq(m.vrclass, VRM8), + VRM2, VR), + m.vrclass, FPR32, m, + Constraint, /*sew*/0, + UsesVXRM=0>; +} + let Predicates = [HasVendorXSfvcp] in { foreach m = MxList in { defm X : VPseudoVC_X<m, GPR>; - defm I : VPseudoVC_X<m, timm5>; + defm I : VPseudoVC_X<m, tsimm5>; defm XV : VPseudoVC_XV<m, GPR>; - defm IV : VPseudoVC_XV<m, timm5>; + defm IV : VPseudoVC_XV<m, tsimm5>; defm VV : VPseudoVC_XV<m, m.vrclass>; defm XVV : VPseudoVC_XVV<m, GPR>; - defm IVV : VPseudoVC_XVV<m, timm5>; + defm IVV : VPseudoVC_XVV<m, tsimm5>; defm VVV : VPseudoVC_XVV<m, m.vrclass>; } foreach f = FPList in { @@ -346,7 +388,7 @@ let Predicates = [HasVendorXSfvcp] in { } foreach m = MxListW in { defm XVW : VPseudoVC_XVW<m, GPR>; - defm IVW : VPseudoVC_XVW<m, timm5>; + defm IVW : VPseudoVC_XVW<m, tsimm5>; defm VVW : VPseudoVC_XVW<m, m.vrclass>; } foreach f = FPListW in { @@ -355,6 +397,29 @@ let Predicates = [HasVendorXSfvcp] in { } } +let Predicates = [HasVendorXSfvqmaccdod] in { + defm VQMACCU_2x8x2 : VPseudoSiFiveVQMACC; + defm VQMACC_2x8x2 : VPseudoSiFiveVQMACC; + defm VQMACCUS_2x8x2 : VPseudoSiFiveVQMACC; + defm VQMACCSU_2x8x2 : VPseudoSiFiveVQMACC; +} + +let Predicates = [HasVendorXSfvqmaccqoq] in { + defm VQMACCU_4x8x4 : VPseudoSiFiveVQMACC; + defm VQMACC_4x8x4 : VPseudoSiFiveVQMACC; + defm VQMACCUS_4x8x4 : VPseudoSiFiveVQMACC; + defm VQMACCSU_4x8x4 : VPseudoSiFiveVQMACC; +} + +let Predicates = [HasVendorXSfvfwmaccqqq] in { + defm VFWMACC_4x4x4 : VPseudoSiFiveVFWMACC; +} + +let Predicates = [HasVendorXSfvfnrclipxfqf] in { + defm VFNRCLIP_XU_F_QF : VPseudoSiFiveVFNRCLIP; + defm VFNRCLIP_X_F_QF : VPseudoSiFiveVFNRCLIP; +} + class VPatVC_OP4<string intrinsic_name, string inst, ValueType op2_type, @@ -472,39 +537,126 @@ multiclass VPatVC_XVV<string intrinsic_suffix, string instruction_suffix, wti.RegClass, vti.RegClass, kind, op1_kind>; } +class GetFTypeInfo<int Sew> { + ValueType Scalar = !cond(!eq(Sew, 16) : f16, + !eq(Sew, 32) : f32, + !eq(Sew, 64) : f64); + RegisterClass ScalarRegClass = !cond(!eq(Sew, 16) : FPR16, + !eq(Sew, 32) : FPR32, + !eq(Sew, 64) : FPR64); + + string ScalarSuffix = !cond(!eq(Scalar, f16) : "FPR16", + !eq(Scalar, f32) : "FPR32", + !eq(Scalar, f64) : "FPR64"); +} + +multiclass VPatVMACC<string intrinsic, string instruction, string kind, + list<VTypeInfoToWide> info_pairs, ValueType vec_m1> { + foreach pair = info_pairs in { + defvar VdInfo = pair.Wti; + defvar Vs2Info = pair.Vti; + let Predicates = [HasVInstructions] in + def : VPatTernaryNoMaskWithPolicy<"int_riscv_sf_" # intrinsic, + "Pseudo" # instruction, kind, VdInfo.Vector, + vec_m1, Vs2Info.Vector, + Vs2Info.Log2SEW, Vs2Info.LMul, + VdInfo.RegClass, VR, Vs2Info.RegClass>; + } +} + +defset list<VTypeInfoToWide> VQMACCInfoPairs = { + def : VTypeInfoToWide<VI8M1, VI32M1>; + def : VTypeInfoToWide<VI8M2, VI32M2>; + def : VTypeInfoToWide<VI8M4, VI32M4>; + def : VTypeInfoToWide<VI8M8, VI32M8>; +} + +multiclass VPatVQMACC<string intrinsic, string instruction, string kind> + : VPatVMACC<intrinsic, instruction, kind, VQMACCInfoPairs, vint8m1_t>; + + +multiclass VPatVFWMACC<string intrinsic, string instruction, string kind> + : VPatVMACC<intrinsic, instruction, kind, AllWidenableBFloatToFloatVectors, + vbfloat16m1_t>; + +defset list<VTypeInfoToWide> VFNRCLIPInfoPairs = { + def : VTypeInfoToWide<VI8MF8, VF32MF2>; + def : VTypeInfoToWide<VI8MF4, VF32M1>; + def : VTypeInfoToWide<VI8MF2, VF32M2>; + def : VTypeInfoToWide<VI8M1, VF32M4>; + def : VTypeInfoToWide<VI8M2, VF32M8>; +} + +multiclass VPatVFNRCLIP<string intrinsic, string instruction> { + foreach pair = VFNRCLIPInfoPairs in { + defvar Vti = pair.Vti; + defvar Wti = pair.Wti; + defm : VPatBinaryRoundingMode<"int_riscv_sf_" # intrinsic, + "Pseudo" # instruction # "_" # Wti.LMul.MX, + Vti.Vector, Wti.Vector, Wti.Scalar, Vti.Mask, + Vti.Log2SEW, Vti.RegClass, + Wti.RegClass, Wti.ScalarRegClass>; + } +} + let Predicates = [HasVendorXSfvcp] in { foreach vti = AllIntegerVectors in { - defm : VPatVC_X<"x", "X", vti, vti.Scalar, vti.ScalarRegClass>; - defm : VPatVC_X<"i", "I", vti, XLenVT, timm5>; - defm : VPatVC_XV<"xv", "XV", vti, vti.Scalar, vti.ScalarRegClass>; - defm : VPatVC_XV<"iv", "IV", vti, XLenVT, timm5>; + defm : VPatVC_X<"x", "X", vti, XLenVT, GPR>; + defm : VPatVC_X<"i", "I", vti, XLenVT, tsimm5>; + defm : VPatVC_XV<"xv", "XV", vti, XLenVT, GPR>; + defm : VPatVC_XV<"iv", "IV", vti, XLenVT, tsimm5>; defm : VPatVC_XV<"vv", "VV", vti, vti.Vector, vti.RegClass>; - defm : VPatVC_XVV<"xvv", "XVV", vti, vti, vti.Scalar, vti.ScalarRegClass>; - defm : VPatVC_XVV<"ivv", "IVV", vti, vti, XLenVT, timm5>; + defm : VPatVC_XVV<"xvv", "XVV", vti, vti, XLenVT, GPR>; + defm : VPatVC_XVV<"ivv", "IVV", vti, vti, XLenVT, tsimm5>; defm : VPatVC_XVV<"vvv", "VVV", vti, vti, vti.Vector, vti.RegClass>; - if !ge(vti.SEW, 16) then { - defm : VPatVC_XV<"fv", "F" # vti.SEW # "V", vti, - GetFTypeInfo<vti.SEW>.Scalar, - GetFTypeInfo<vti.SEW>.ScalarRegClass, payload1>; - defm : VPatVC_XVV<"fvv", "F" # vti.SEW # "VV", vti, vti, - GetFTypeInfo<vti.SEW>.Scalar, - GetFTypeInfo<vti.SEW>.ScalarRegClass, payload1>; + + if !ne(vti.SEW, 8) then { + defvar finfo = GetFTypeInfo<vti.SEW>; + defm : VPatVC_XV<"fv", finfo.ScalarSuffix # "V", vti, finfo.Scalar, + finfo.ScalarRegClass, payload1>; + defm : VPatVC_XVV<"fvv", finfo.ScalarSuffix # "VV", vti, vti, finfo.Scalar, + finfo.ScalarRegClass, payload1>; } } foreach VtiToWti = AllWidenableIntVectors in { defvar vti = VtiToWti.Vti; defvar wti = VtiToWti.Wti; - defm : VPatVC_XVV<"xvw", "XVW", wti, vti, vti.Scalar, vti.ScalarRegClass>; - defm : VPatVC_XVV<"ivw", "IVW", wti, vti, XLenVT, timm5>; + defvar iinfo = GetIntVTypeInfo<vti>.Vti; + defm : VPatVC_XVV<"xvw", "XVW", wti, vti, iinfo.Scalar, iinfo.ScalarRegClass>; + defm : VPatVC_XVV<"ivw", "IVW", wti, vti, XLenVT, tsimm5>; defm : VPatVC_XVV<"vvw", "VVW", wti, vti, vti.Vector, vti.RegClass>; - if !ge(vti.SEW, 16) then { - defm : VPatVC_XVV<"fvw", "F" # vti.SEW # "VW", wti, vti, - GetFTypeInfo<vti.SEW>.Scalar, - GetFTypeInfo<vti.SEW>.ScalarRegClass, payload1>; + + if !ne(vti.SEW, 8) then { + defvar finfo = GetFTypeInfo<vti.SEW>; + defm : VPatVC_XVV<"fvw", finfo.ScalarSuffix # "VW", wti, vti, finfo.Scalar, + finfo.ScalarRegClass, payload1>; } } } +let Predicates = [HasVendorXSfvqmaccdod] in { + defm : VPatVQMACC<"vqmaccu_2x8x2", "VQMACCU", "2x8x2">; + defm : VPatVQMACC<"vqmacc_2x8x2", "VQMACC", "2x8x2">; + defm : VPatVQMACC<"vqmaccus_2x8x2", "VQMACCUS", "2x8x2">; + defm : VPatVQMACC<"vqmaccsu_2x8x2", "VQMACCSU", "2x8x2">; +} + +let Predicates = [HasVendorXSfvqmaccqoq] in { + defm : VPatVQMACC<"vqmaccu_4x8x4", "VQMACCU", "4x8x4">; + defm : VPatVQMACC<"vqmacc_4x8x4", "VQMACC", "4x8x4">; + defm : VPatVQMACC<"vqmaccus_4x8x4", "VQMACCUS", "4x8x4">; + defm : VPatVQMACC<"vqmaccsu_4x8x4", "VQMACCSU", "4x8x4">; +} + +let Predicates = [HasVendorXSfvfwmaccqqq] in { + defm : VPatVFWMACC<"vfwmacc_4x4x4", "VFWMACC", "4x4x4">; +} + +let Predicates = [HasVendorXSfvfnrclipxfqf] in { + defm : VPatVFNRCLIP<"vfnrclip_xu_f_qf", "VFNRCLIP_XU_F_QF">; + defm : VPatVFNRCLIP<"vfnrclip_x_f_qf", "VFNRCLIP_X_F_QF">; +} + let Predicates = [HasVendorXSfcie] in { let hasSideEffects = 1, mayLoad = 0, mayStore = 0, DecoderNamespace = "XSfcie" in { def SF_CFLUSH_D_L1 : RVInstI<0b000, OPC_SYSTEM, (outs), (ins GPR:$rs1), "cflush.d.l1","$rs1">, diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td index e840dfddd8d9..1d44b1ad2636 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td @@ -14,31 +14,36 @@ // T-HEAD specific DAG Nodes. //===----------------------------------------------------------------------===// -def SDT_LoadPair : SDTypeProfile<2, 2, - [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 3>, SDTCisPtrTy<2>, SDTCisVT<3, XLenVT>]>; -def SDT_StorePair : SDTypeProfile<0, 4, - [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 3>, SDTCisPtrTy<2>, SDTCisVT<3, XLenVT>]>; +def SDT_LoadPair : SDTypeProfile<2, 2, [SDTCisSameAs<0, 1>, + SDTCisSameAs<1, 3>, + SDTCisPtrTy<2>, + SDTCisVT<3, XLenVT>]>; +def SDT_StorePair : SDTypeProfile<0, 4, [SDTCisSameAs<0, 1>, + SDTCisSameAs<1, 3>, + SDTCisPtrTy<2>, + SDTCisVT<3, XLenVT>]>; def th_lwud : SDNode<"RISCVISD::TH_LWUD", SDT_LoadPair, - [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def th_lwd : SDNode<"RISCVISD::TH_LWD", SDT_LoadPair, - [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def th_ldd : SDNode<"RISCVISD::TH_LDD", SDT_LoadPair, - [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def th_swd : SDNode<"RISCVISD::TH_SWD", SDT_StorePair, - [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def th_sdd : SDNode<"RISCVISD::TH_SDD", SDT_StorePair, - [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; //===----------------------------------------------------------------------===// // Instruction class templates //===----------------------------------------------------------------------===// + class THInstVdotVV<bits<6> funct6, RISCVVFormat opv, dag outs, dag ins, string opcodestr, string argstr> : RVInstVV<funct6, opv, outs, ins, opcodestr, argstr> { let Inst{26} = 0; let Inst{6-0} = OPC_CUSTOM_0.Value; - let DecoderNamespace = "THeadV"; + let DecoderNamespace = "XTHeadVdot"; } class THInstVdotVX<bits<6> funct6, RISCVVFormat opv, dag outs, dag ins, @@ -46,45 +51,53 @@ class THInstVdotVX<bits<6> funct6, RISCVVFormat opv, dag outs, dag ins, : RVInstVX<funct6, opv, outs, ins, opcodestr, argstr> { let Inst{26} = 1; let Inst{6-0} = OPC_CUSTOM_0.Value; - let DecoderNamespace = "THeadV"; + let DecoderNamespace = "XTHeadVdot"; } let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { // op vd, vs1, vs2, vm (reverse the order of vs1 and vs2) -class THVdotALUrVV<bits<6> funct6, RISCVVFormat opv, string opcodestr> - : THInstVdotVV<funct6, opv, (outs VR:$vd), - (ins VR:$vs1, VR:$vs2, VMaskOp:$vm), - opcodestr, "$vd, $vs1, $vs2$vm">; +class THVdotALUrVV<bits<6> funct6, RISCVVFormat opv, string opcodestr, + bit EarlyClobber> + : THInstVdotVV<funct6, opv, (outs VR:$vd_wb), + (ins VR:$vd, VR:$vs1, VR:$vs2, VMaskOp:$vm), + opcodestr, "$vd, $vs1, $vs2$vm"> { + let Constraints = !if(EarlyClobber, "@earlyclobber $vd_wb, $vd = $vd_wb", + "$vd = $vd_wb"); +} // op vd, rs1, vs2, vm (reverse the order of rs1 and vs2) -class THVdotALUrVX<bits<6> funct6, RISCVVFormat opv, string opcodestr> - : THInstVdotVX<funct6, opv, (outs VR:$vd), - (ins GPR:$rs1, VR:$vs2, VMaskOp:$vm), - opcodestr, "$vd, $rs1, $vs2$vm">; +class THVdotALUrVX<bits<6> funct6, RISCVVFormat opv, string opcodestr, + bit EarlyClobber> + : THInstVdotVX<funct6, opv, (outs VR:$vd_wb), + (ins VR:$vd, GPR:$rs1, VR:$vs2, VMaskOp:$vm), + opcodestr, "$vd, $rs1, $vs2$vm"> { + let Constraints = !if(EarlyClobber, "@earlyclobber $vd_wb, $vd = $vd_wb", + "$vd = $vd_wb"); +} } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 -let Predicates = [HasVendorXTHeadBa], DecoderNamespace = "THeadBa", - hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +let Predicates = [HasVendorXTHeadBa], DecoderNamespace = "XTHeadBa", + hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class THShiftALU_rri<bits<3> funct3, string opcodestr> - : RVInstR<0, funct3, OPC_CUSTOM_0, (outs GPR:$rd), - (ins GPR:$rs1, GPR:$rs2, uimm2:$uimm2), - opcodestr, "$rd, $rs1, $rs2, $uimm2"> { + : RVInstRBase<funct3, OPC_CUSTOM_0, (outs GPR:$rd), + (ins GPR:$rs1, GPR:$rs2, uimm2:$uimm2), + opcodestr, "$rd, $rs1, $rs2, $uimm2"> { bits<2> uimm2; let Inst{31-27} = 0; let Inst{26-25} = uimm2; } -let Predicates = [HasVendorXTHeadBb], DecoderNamespace = "THeadBb", - hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { +let Predicates = [HasVendorXTHeadBb], DecoderNamespace = "XTHeadBb", + hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { class THShift_ri<bits<5> funct5, bits<3> funct3, string opcodestr> : RVInstIShift<funct5, funct3, OPC_CUSTOM_0, (outs GPR:$rd), (ins GPR:$rs1, uimmlog2xlen:$shamt), opcodestr, "$rd, $rs1, $shamt">; class THBitfieldExtract_rii<bits<3> funct3, string opcodestr> - : RVInstI<funct3, OPC_CUSTOM_0, (outs GPR:$rd), - (ins GPR:$rs1, uimmlog2xlen:$msb, uimmlog2xlen:$lsb), - opcodestr, "$rd, $rs1, $msb, $lsb"> { + : RVInstIBase<funct3, OPC_CUSTOM_0, (outs GPR:$rd), + (ins GPR:$rs1, uimmlog2xlen:$msb, uimmlog2xlen:$lsb), + opcodestr, "$rd, $rs1, $msb, $lsb"> { bits<6> msb; bits<6> lsb; let Inst{31-26} = msb; @@ -92,21 +105,18 @@ class THBitfieldExtract_rii<bits<3> funct3, string opcodestr> } class THRev_r<bits<5> funct5, bits<2> funct2, string opcodestr> - : RVInstR4<funct2, 0b001, OPC_CUSTOM_0, (outs GPR:$rd), (ins GPR:$rs1), - opcodestr, "$rd, $rs1"> { - let rs3 = funct5; - let rs2 = 0; -} + : RVInstIUnary<{funct5, funct2, 0b00000}, 0b001, OPC_CUSTOM_0, + (outs GPR:$rd), (ins GPR:$rs1), opcodestr, "$rd, $rs1">; } -let Predicates = [HasVendorXTHeadBb, IsRV64], DecoderNamespace = "THeadBb", - hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +let Predicates = [HasVendorXTHeadBb, IsRV64], DecoderNamespace = "XTHeadBb", + hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class THShiftW_ri<bits<7> funct7, bits<3> funct3, string opcodestr> : RVInstIShiftW<funct7, funct3, OPC_CUSTOM_0, (outs GPR:$rd), (ins GPR:$rs1, uimm5:$shamt), opcodestr, "$rd, $rs1, $shamt">; -let Predicates = [HasVendorXTHeadCondMov], DecoderNamespace = "THeadCondMov", +let Predicates = [HasVendorXTHeadCondMov], DecoderNamespace = "XTHeadCondMov", hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCommutable = 1 in class THCondMov_rr<bits<7> funct7, string opcodestr> : RVInstR<funct7, 0b001, OPC_CUSTOM_0, (outs GPR:$rd_wb), @@ -115,7 +125,7 @@ class THCondMov_rr<bits<7> funct7, string opcodestr> let Constraints = "$rd_wb = $rd"; } -let Predicates = [HasVendorXTHeadMac], DecoderNamespace = "THeadMac", +let Predicates = [HasVendorXTHeadMac], DecoderNamespace = "XTHeadMac", hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCommutable = 1 in class THMulAccumulate_rr<bits<7> funct7, string opcodestr> : RVInstR<funct7, 0b001, OPC_CUSTOM_0, (outs GPR:$rd_wb), @@ -124,27 +134,28 @@ class THMulAccumulate_rr<bits<7> funct7, string opcodestr> let Constraints = "$rd_wb = $rd"; } -let Predicates = [HasVendorXTHeadMemPair], DecoderNamespace = "THeadMemPair", - hasSideEffects = 0, mayLoad = 1, mayStore = 0 in +let Predicates = [HasVendorXTHeadMemPair], DecoderNamespace = "XTHeadMemPair", + hasSideEffects = 0, mayLoad = 1, mayStore = 0 in class THLoadPair<bits<5> funct5, string opcodestr> - : RVInstR<!shl(funct5, 2), 0b100, OPC_CUSTOM_0, - (outs GPR:$rd, GPR:$rs2), - (ins GPR:$rs1, uimm2:$uimm2, uimm7:$const3or4), - opcodestr, "$rd, $rs2, (${rs1}), $uimm2, $const3or4"> { + : RVInstRBase<0b100, OPC_CUSTOM_0, + (outs GPR:$rd, GPR:$rs2), + (ins GPR:$rs1, uimm2:$uimm2, uimm7:$const3or4), + opcodestr, "$rd, $rs2, (${rs1}), $uimm2, $const3or4"> { bits<2> uimm2; + let Inst{31-27} = funct5; let Inst{26-25} = uimm2; let DecoderMethod = "decodeXTHeadMemPair"; let Constraints = "@earlyclobber $rd,@earlyclobber $rs2"; } -let Predicates = [HasVendorXTHeadMemPair], DecoderNamespace = "THeadMemPair", - hasSideEffects = 0, mayLoad = 0, mayStore = 1 in +let Predicates = [HasVendorXTHeadMemPair], DecoderNamespace = "XTHeadMemPair", + hasSideEffects = 0, mayLoad = 0, mayStore = 1 in class THStorePair<bits<5> funct5, string opcodestr> - : RVInstR<!shl(funct5, 2), 0b101, OPC_CUSTOM_0, - (outs), - (ins GPR:$rd, GPR:$rs2, GPR:$rs1, uimm2:$uimm2, uimm7:$const3or4), - opcodestr, "$rd, $rs2, (${rs1}), $uimm2, $const3or4"> { + : RVInstRBase<0b101, OPC_CUSTOM_0, (outs), + (ins GPR:$rd, GPR:$rs2, GPR:$rs1, uimm2:$uimm2, uimm7:$const3or4), + opcodestr, "$rd, $rs2, (${rs1}), $uimm2, $const3or4"> { bits<2> uimm2; + let Inst{31-27} = funct5; let Inst{26-25} = uimm2; let DecoderMethod = "decodeXTHeadMemPair"; } @@ -174,44 +185,46 @@ class THCacheInst_void<bits<5> funct5, string opcodestr> let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in { class THLoadIndexed<RegisterClass Ty, bits<5> funct5, string opcodestr> - : RVInstR<!shl(funct5, 2), !if(!eq(Ty, GPR), 0b100, 0b110), OPC_CUSTOM_0, - (outs Ty:$rd), (ins GPR:$rs1, GPR:$rs2, uimm2:$uimm2), - opcodestr, "$rd, $rs1, $rs2, $uimm2"> { + : RVInstRBase<!if(!eq(Ty, GPR), 0b100, 0b110), OPC_CUSTOM_0, + (outs Ty:$rd), (ins GPR:$rs1, GPR:$rs2, uimm2:$uimm2), + opcodestr, "$rd, $rs1, $rs2, $uimm2"> { bits<2> uimm2; + let Inst{31-27} = funct5; let Inst{26-25} = uimm2; } class THLoadUpdate<bits<5> funct5, string opcodestr> - : RVInstI<0b100, OPC_CUSTOM_0, (outs GPR:$rd, GPR:$rs1_wb), - (ins GPR:$rs1, simm5:$simm5, uimm2:$uimm2), - opcodestr, "$rd, (${rs1}), $simm5, $uimm2"> { + : RVInstIBase<0b100, OPC_CUSTOM_0, (outs GPR:$rd, GPR:$rs1_wb), + (ins GPR:$rs1, simm5:$simm5, uimm2:$uimm2), + opcodestr, "$rd, (${rs1}), $simm5, $uimm2"> { bits<5> simm5; bits<2> uimm2; - let imm12{11-7} = funct5; - let imm12{6-5} = uimm2; - let imm12{4-0} = simm5; + let Inst{31-27} = funct5; + let Inst{26-25} = uimm2; + let Inst{24-20} = simm5; let Constraints = "@earlyclobber $rd, $rs1_wb = $rs1"; } } let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in { class THStoreIndexed<RegisterClass StTy, bits<5> funct5, string opcodestr> - : RVInstR<!shl(funct5, 2), !if(!eq(StTy, GPR), 0b101, 0b111), OPC_CUSTOM_0, - (outs), (ins StTy:$rd, GPR:$rs1, GPR:$rs2, uimm2:$uimm2), - opcodestr, "$rd, $rs1, $rs2, $uimm2"> { + : RVInstRBase<!if(!eq(StTy, GPR), 0b101, 0b111), OPC_CUSTOM_0, + (outs), (ins StTy:$rd, GPR:$rs1, GPR:$rs2, uimm2:$uimm2), + opcodestr, "$rd, $rs1, $rs2, $uimm2"> { bits<2> uimm2; + let Inst{31-27} = funct5; let Inst{26-25} = uimm2; } class THStoreUpdate<bits<5> funct5, string opcodestr> - : RVInstI<0b101, OPC_CUSTOM_0, (outs GPR:$rs1_up), - (ins GPR:$rd, GPR:$rs1, simm5:$simm5, uimm2:$uimm2), - opcodestr, "$rd, (${rs1}), $simm5, $uimm2"> { + : RVInstIBase<0b101, OPC_CUSTOM_0, (outs GPR:$rs1_up), + (ins GPR:$rd, GPR:$rs1, simm5:$simm5, uimm2:$uimm2), + opcodestr, "$rd, (${rs1}), $simm5, $uimm2"> { bits<5> simm5; bits<2> uimm2; - let imm12{11-7} = funct5; - let imm12{6-5} = uimm2; - let imm12{4-0} = simm5; + let Inst{31-27} = funct5; + let Inst{26-25} = uimm2; + let Inst{24-20} = simm5; let Constraints = "$rs1_up = $rs1"; } } @@ -220,22 +233,25 @@ class THStoreUpdate<bits<5> funct5, string opcodestr> // Combination of instruction classes. // Use these multiclasses to define instructions more easily. //===----------------------------------------------------------------------===// + multiclass THVdotVMAQA_VX<string opcodestr, bits<6> funct6> { - def _VX : THVdotALUrVX<funct6, OPMVX, opcodestr # ".vx">; + let RVVConstraint = WidenV in + def _VX : THVdotALUrVX<funct6, OPMVX, opcodestr # ".vx", EarlyClobber=1>; } -multiclass THVdotVMAQA<string opcodestr, bits<6> funct6> { - def _VV : THVdotALUrVV<funct6, OPMVX, opcodestr # ".vv">; - defm "" : THVdotVMAQA_VX<opcodestr, funct6>; +multiclass THVdotVMAQA<string opcodestr, bits<6> funct6> + : THVdotVMAQA_VX<opcodestr, funct6> { + let RVVConstraint = WidenV in + def _VV : THVdotALUrVV<funct6, OPMVX, opcodestr # ".vv", EarlyClobber=1>; } //===----------------------------------------------------------------------===// // Instructions //===----------------------------------------------------------------------===// -let Predicates = [HasVendorXTHeadBa] in { + +let Predicates = [HasVendorXTHeadBa] in def TH_ADDSL : THShiftALU_rri<0b001, "th.addsl">, Sched<[WriteSHXADD, ReadSHXADD, ReadSHXADD]>; -} // Predicates = [HasVendorXTHeadBa] let Predicates = [HasVendorXTHeadBb] in { def TH_SRRI : THShift_ri<0b00010, 0b001, "th.srri">; @@ -252,20 +268,19 @@ def TH_SRRIW : THShiftW_ri<0b0001010, 0b001, "th.srriw">; def TH_REVW : THRev_r<0b10010, 0b00, "th.revw">; } // Predicates = [HasVendorXTHeadBb, IsRV64] -let Predicates = [HasVendorXTHeadBs], DecoderNamespace = "THeadBs" in { -let IsSignExtendingOpW = 1 in +let Predicates = [HasVendorXTHeadBs], DecoderNamespace = "XTHeadBs", + IsSignExtendingOpW = 1 in def TH_TST : RVBShift_ri<0b10001, 0b001, OPC_CUSTOM_0, "th.tst">, Sched<[WriteSingleBitImm, ReadSingleBitImm]>; -} // Predicates = [HasVendorXTHeadBs] let Predicates = [HasVendorXTHeadCondMov] in { -def TH_MVEQZ : THCondMov_rr<0b0100000, "th.mveqz">; -def TH_MVNEZ : THCondMov_rr<0b0100001, "th.mvnez">; +def TH_MVEQZ : THCondMov_rr<0b0100000, "th.mveqz">; +def TH_MVNEZ : THCondMov_rr<0b0100001, "th.mvnez">; } // Predicates = [HasVendorXTHeadCondMov] let Predicates = [HasVendorXTHeadMac] in { -def TH_MULA : THMulAccumulate_rr<0b0010000, "th.mula">; -def TH_MULS : THMulAccumulate_rr<0b0010001, "th.muls">; +def TH_MULA : THMulAccumulate_rr<0b0010000, "th.mula">; +def TH_MULS : THMulAccumulate_rr<0b0010001, "th.muls">; } // Predicates = [HasVendorXTHeadMac] let Predicates = [HasVendorXTHeadMac], IsSignExtendingOpW = 1 in { @@ -289,130 +304,130 @@ def TH_LWD : THLoadPair<0b11100, "th.lwd">, } let Predicates = [HasVendorXTHeadMemPair, IsRV64] in { -def TH_LDD : THLoadPair<0b11111, "th.ldd">, - Sched<[WriteLDD, WriteLDD, ReadMemBase]>; -def TH_SDD : THStorePair<0b11111, "th.sdd">, - Sched<[WriteSTD, WriteSTD, ReadStoreData, ReadMemBase]>; +def TH_LDD : THLoadPair<0b11111, "th.ldd">, + Sched<[WriteLDD, WriteLDD, ReadMemBase]>; +def TH_SDD : THStorePair<0b11111, "th.sdd">, + Sched<[WriteSTD, WriteSTD, ReadStoreData, ReadMemBase]>; } -let Predicates = [HasVendorXTHeadMemIdx], DecoderNamespace = "THeadMemIdx" in { +let Predicates = [HasVendorXTHeadMemIdx], DecoderNamespace = "XTHeadMemIdx" in { // T-Head Load/Store + Update instructions. def TH_LBIA : THLoadUpdate<0b00011, "th.lbia">, - Sched<[WriteLDB, ReadMemBase]>; + Sched<[WriteLDB, ReadMemBase]>; def TH_LBIB : THLoadUpdate<0b00001, "th.lbib">, - Sched<[WriteLDB, ReadMemBase]>; + Sched<[WriteLDB, ReadMemBase]>; def TH_LBUIA : THLoadUpdate<0b10011, "th.lbuia">, - Sched<[WriteLDB, ReadMemBase]>; + Sched<[WriteLDB, ReadMemBase]>; def TH_LBUIB : THLoadUpdate<0b10001, "th.lbuib">, - Sched<[WriteLDB, ReadMemBase]>; + Sched<[WriteLDB, ReadMemBase]>; def TH_LHIA : THLoadUpdate<0b00111, "th.lhia">, - Sched<[WriteLDH, ReadMemBase]>; + Sched<[WriteLDH, ReadMemBase]>; def TH_LHIB : THLoadUpdate<0b00101, "th.lhib">, - Sched<[WriteLDH, ReadMemBase]>; + Sched<[WriteLDH, ReadMemBase]>; def TH_LHUIA : THLoadUpdate<0b10111, "th.lhuia">, - Sched<[WriteLDH, ReadMemBase]>; + Sched<[WriteLDH, ReadMemBase]>; def TH_LHUIB : THLoadUpdate<0b10101, "th.lhuib">, - Sched<[WriteLDH, ReadMemBase]>; + Sched<[WriteLDH, ReadMemBase]>; def TH_LWIA : THLoadUpdate<0b01011, "th.lwia">, - Sched<[WriteLDW, ReadMemBase]>; + Sched<[WriteLDW, ReadMemBase]>; def TH_LWIB : THLoadUpdate<0b01001, "th.lwib">, - Sched<[WriteLDW, ReadMemBase]>; + Sched<[WriteLDW, ReadMemBase]>; def TH_SBIA : THStoreUpdate<0b00011, "th.sbia">, - Sched<[WriteSTB, ReadStoreData, ReadMemBase]>; + Sched<[WriteSTB, ReadStoreData, ReadMemBase]>; def TH_SBIB : THStoreUpdate<0b00001, "th.sbib">, - Sched<[WriteSTB, ReadStoreData, ReadMemBase]>; + Sched<[WriteSTB, ReadStoreData, ReadMemBase]>; def TH_SHIA : THStoreUpdate<0b00111, "th.shia">, - Sched<[WriteSTH, ReadStoreData, ReadMemBase]>; + Sched<[WriteSTH, ReadStoreData, ReadMemBase]>; def TH_SHIB : THStoreUpdate<0b00101, "th.shib">, - Sched<[WriteSTH, ReadStoreData, ReadMemBase]>; + Sched<[WriteSTH, ReadStoreData, ReadMemBase]>; def TH_SWIA : THStoreUpdate<0b01011, "th.swia">, - Sched<[WriteSTW, ReadStoreData, ReadMemBase]>; + Sched<[WriteSTW, ReadStoreData, ReadMemBase]>; def TH_SWIB : THStoreUpdate<0b01001, "th.swib">, - Sched<[WriteSTW, ReadStoreData, ReadMemBase]>; + Sched<[WriteSTW, ReadStoreData, ReadMemBase]>; // T-Head Load/Store Indexed instructions. def TH_LRB : THLoadIndexed<GPR, 0b00000, "th.lrb">, - Sched<[WriteLDB, ReadMemBase]>; + Sched<[WriteLDB, ReadMemBase]>; def TH_LRBU : THLoadIndexed<GPR, 0b10000, "th.lrbu">, - Sched<[WriteLDB, ReadMemBase]>; + Sched<[WriteLDB, ReadMemBase]>; def TH_LURB : THLoadIndexed<GPR, 0b00010, "th.lurb">, - Sched<[WriteLDB, ReadMemBase]>; + Sched<[WriteLDB, ReadMemBase]>; def TH_LURBU : THLoadIndexed<GPR, 0b10010, "th.lurbu">, - Sched<[WriteLDB, ReadMemBase]>; + Sched<[WriteLDB, ReadMemBase]>; def TH_LRH : THLoadIndexed<GPR, 0b00100, "th.lrh">, - Sched<[WriteLDH, ReadMemBase]>; + Sched<[WriteLDH, ReadMemBase]>; def TH_LRHU : THLoadIndexed<GPR, 0b10100, "th.lrhu">, - Sched<[WriteLDH, ReadMemBase]>; + Sched<[WriteLDH, ReadMemBase]>; def TH_LURH : THLoadIndexed<GPR, 0b00110, "th.lurh">, - Sched<[WriteLDB, ReadMemBase]>; + Sched<[WriteLDB, ReadMemBase]>; def TH_LURHU : THLoadIndexed<GPR, 0b10110, "th.lurhu">, - Sched<[WriteLDB, ReadMemBase]>; + Sched<[WriteLDB, ReadMemBase]>; def TH_LRW : THLoadIndexed<GPR, 0b01000, "th.lrw">, - Sched<[WriteLDW, ReadMemBase]>; + Sched<[WriteLDW, ReadMemBase]>; def TH_LURW : THLoadIndexed<GPR, 0b01010, "th.lurw">, - Sched<[WriteLDB, ReadMemBase]>; + Sched<[WriteLDB, ReadMemBase]>; def TH_SRB : THStoreIndexed<GPR, 0b00000, "th.srb">, - Sched<[WriteSTB, ReadStoreData, ReadMemBase]>; + Sched<[WriteSTB, ReadStoreData, ReadMemBase]>; def TH_SURB : THStoreIndexed<GPR, 0b00010, "th.surb">, - Sched<[WriteLDB, ReadMemBase]>; + Sched<[WriteLDB, ReadMemBase]>; def TH_SRH : THStoreIndexed<GPR, 0b00100, "th.srh">, - Sched<[WriteSTH, ReadStoreData, ReadMemBase]>; + Sched<[WriteSTH, ReadStoreData, ReadMemBase]>; def TH_SURH : THStoreIndexed<GPR, 0b00110, "th.surh">, - Sched<[WriteLDB, ReadMemBase]>; + Sched<[WriteLDB, ReadMemBase]>; def TH_SRW : THStoreIndexed<GPR, 0b01000, "th.srw">, - Sched<[WriteSTW, ReadStoreData, ReadMemBase]>; + Sched<[WriteSTW, ReadStoreData, ReadMemBase]>; def TH_SURW : THStoreIndexed<GPR, 0b01010, "th.surw">, - Sched<[WriteLDB, ReadMemBase]>; + Sched<[WriteLDB, ReadMemBase]>; } -let Predicates = [HasVendorXTHeadMemIdx, IsRV64], DecoderNamespace = "THeadMemIdx" in { +let Predicates = [HasVendorXTHeadMemIdx, IsRV64], DecoderNamespace = "XTHeadMemIdx" in { // T-Head Load/Store + Update instructions. def TH_LWUIA : THLoadUpdate<0b11011, "th.lwuia">, - Sched<[WriteLDH, ReadMemBase]>; + Sched<[WriteLDH, ReadMemBase]>; def TH_LWUIB : THLoadUpdate<0b11001, "th.lwuib">, - Sched<[WriteLDH, ReadMemBase]>; + Sched<[WriteLDH, ReadMemBase]>; def TH_LDIA : THLoadUpdate<0b01111, "th.ldia">, - Sched<[WriteLDW, ReadMemBase]>; + Sched<[WriteLDW, ReadMemBase]>; def TH_LDIB : THLoadUpdate<0b01101, "th.ldib">, - Sched<[WriteLDW, ReadMemBase]>; + Sched<[WriteLDW, ReadMemBase]>; def TH_SDIA : THStoreUpdate<0b01111, "th.sdia">, - Sched<[WriteSTW, ReadStoreData, ReadMemBase]>; + Sched<[WriteSTW, ReadStoreData, ReadMemBase]>; def TH_SDIB : THStoreUpdate<0b01101, "th.sdib">, - Sched<[WriteSTW, ReadStoreData, ReadMemBase]>; + Sched<[WriteSTW, ReadStoreData, ReadMemBase]>; // T-Head Load/Store Indexed instructions. def TH_LRWU : THLoadIndexed<GPR, 0b11000, "th.lrwu">, - Sched<[WriteLDW, ReadMemBase]>; + Sched<[WriteLDW, ReadMemBase]>; def TH_LURWU : THLoadIndexed<GPR, 0b11010, "th.lurwu">, - Sched<[WriteLDB, ReadMemBase]>; + Sched<[WriteLDB, ReadMemBase]>; def TH_LRD : THLoadIndexed<GPR, 0b01100, "th.lrd">, - Sched<[WriteLDW, ReadMemBase]>; + Sched<[WriteLDW, ReadMemBase]>; def TH_LURD : THLoadIndexed<GPR, 0b01110, "th.lurd">, - Sched<[WriteLDB, ReadMemBase]>; + Sched<[WriteLDB, ReadMemBase]>; def TH_SRD : THStoreIndexed<GPR, 0b01100, "th.srd">, - Sched<[WriteSTW, ReadStoreData, ReadMemBase]>; + Sched<[WriteSTW, ReadStoreData, ReadMemBase]>; def TH_SURD : THStoreIndexed<GPR, 0b01110, "th.surd">, - Sched<[WriteLDB, ReadMemBase]>; + Sched<[WriteLDB, ReadMemBase]>; } // T-Head Load/Store Indexed instructions for floating point registers. let Predicates = [HasVendorXTHeadFMemIdx, HasStdExtF], - DecoderNamespace = "THeadFMemIdx" in { + DecoderNamespace = "XTHeadFMemIdx" in { def TH_FLRW : THLoadIndexed<FPR32, 0b01000, "th.flrw">, Sched<[WriteFLD32, ReadFMemBase]>; def TH_FSRW : THStoreIndexed<FPR32, 0b01000, "th.fsrw">, @@ -420,7 +435,7 @@ def TH_FSRW : THStoreIndexed<FPR32, 0b01000, "th.fsrw">, } let Predicates = [HasVendorXTHeadFMemIdx, HasStdExtD], - DecoderNamespace = "THeadFMemIdx" in { + DecoderNamespace = "XTHeadFMemIdx" in { def TH_FLRD : THLoadIndexed<FPR64, 0b01100, "th.flrd">, Sched<[WriteFLD64, ReadFMemBase]>; def TH_FSRD : THStoreIndexed<FPR64, 0b01100, "th.fsrd">, @@ -428,7 +443,7 @@ def TH_FSRD : THStoreIndexed<FPR64, 0b01100, "th.fsrd">, } let Predicates = [HasVendorXTHeadFMemIdx, HasStdExtF, IsRV64], - DecoderNamespace = "THeadFMemIdx" in { + DecoderNamespace = "XTHeadFMemIdx" in { def TH_FLURW : THLoadIndexed<FPR32, 0b01010, "th.flurw">, Sched<[WriteFLD32, ReadFMemBase]>; def TH_FSURW : THStoreIndexed<FPR32, 0b01010, "th.fsurw">, @@ -436,16 +451,14 @@ def TH_FSURW : THStoreIndexed<FPR32, 0b01010, "th.fsurw">, } let Predicates = [HasVendorXTHeadFMemIdx, HasStdExtD, IsRV64], - DecoderNamespace = "THeadFMemIdx" in { + DecoderNamespace = "XTHeadFMemIdx" in { def TH_FLURD : THLoadIndexed<FPR64, 0b01110, "th.flurd">, Sched<[WriteFLD64, ReadFMemBase]>; def TH_FSURD : THStoreIndexed<FPR64, 0b01110, "th.fsurd">, Sched<[WriteFST64, ReadFStoreData, ReadFMemBase]>; } -let Predicates = [HasVendorXTHeadVdot], - Constraints = "@earlyclobber $vd", - RVVConstraint = WidenV in { +let Predicates = [HasVendorXTHeadVdot] in { defm THVdotVMAQA : THVdotVMAQA<"th.vmaqa", 0b100000>; defm THVdotVMAQAU : THVdotVMAQA<"th.vmaqau", 0b100010>; defm THVdotVMAQASU : THVdotVMAQA<"th.vmaqasu", 0b100100>; @@ -472,8 +485,10 @@ defset list<VTypeInfoToWide> AllQuadWidenableInt8NoVLMulVectors = { // Combination of instruction classes. // Use these multiclasses to define instructions more easily. //===----------------------------------------------------------------------===// + multiclass VPseudoVMAQA_VV_VX { foreach m = MxListTHVdot in { + // TODO: Add Sched defm "" : VPseudoTernaryW_VV<m>; defm "" : VPseudoTernaryW_VX<m>; } @@ -481,6 +496,7 @@ multiclass VPseudoVMAQA_VV_VX { multiclass VPseudoVMAQA_VX { foreach m = MxListTHVdot in { + // TODO: Add Sched defm "" : VPseudoTernaryW_VX<m>; } } @@ -518,16 +534,17 @@ multiclass VPatTernaryVMAQA_VV_VX<string intrinsic, string instruction, //===----------------------------------------------------------------------===// // Pseudo-instructions and codegen patterns //===----------------------------------------------------------------------===// + let Predicates = [HasVendorXTHeadBa] in { def : Pat<(add (XLenVT GPR:$rs1), (shl GPR:$rs2, uimm2:$uimm2)), (TH_ADDSL GPR:$rs1, GPR:$rs2, uimm2:$uimm2)>; // Reuse complex patterns from StdExtZba -def : Pat<(add sh1add_op:$rs1, non_imm12:$rs2), +def : Pat<(add_non_imm12 sh1add_op:$rs1, (XLenVT GPR:$rs2)), (TH_ADDSL GPR:$rs2, sh1add_op:$rs1, 1)>; -def : Pat<(add sh2add_op:$rs1, non_imm12:$rs2), +def : Pat<(add_non_imm12 sh2add_op:$rs1, (XLenVT GPR:$rs2)), (TH_ADDSL GPR:$rs2, sh2add_op:$rs1, 2)>; -def : Pat<(add sh3add_op:$rs1, non_imm12:$rs2), +def : Pat<(add_non_imm12 sh3add_op:$rs1, (XLenVT GPR:$rs2)), (TH_ADDSL GPR:$rs2, sh3add_op:$rs1, 3)>; def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 6)), GPR:$rs2), @@ -687,10 +704,14 @@ defm PseudoTHVdotVMAQASU : VPseudoVMAQA_VV_VX; defm PseudoTHVdotVMAQAUS : VPseudoVMAQA_VX; let Predicates = [HasVendorXTHeadVdot] in { -defm : VPatTernaryVMAQA_VV_VX<"int_riscv_th_vmaqa", "PseudoTHVdotVMAQA", AllQuadWidenableInt8NoVLMulVectors>; -defm : VPatTernaryVMAQA_VV_VX<"int_riscv_th_vmaqau", "PseudoTHVdotVMAQAU", AllQuadWidenableInt8NoVLMulVectors>; -defm : VPatTernaryVMAQA_VV_VX<"int_riscv_th_vmaqasu","PseudoTHVdotVMAQASU",AllQuadWidenableInt8NoVLMulVectors>; -defm : VPatTernaryVMAQA_VX<"int_riscv_th_vmaqaus", "PseudoTHVdotVMAQAUS",AllQuadWidenableInt8NoVLMulVectors>; +defm : VPatTernaryVMAQA_VV_VX<"int_riscv_th_vmaqa", "PseudoTHVdotVMAQA", + AllQuadWidenableInt8NoVLMulVectors>; +defm : VPatTernaryVMAQA_VV_VX<"int_riscv_th_vmaqau", "PseudoTHVdotVMAQAU", + AllQuadWidenableInt8NoVLMulVectors>; +defm : VPatTernaryVMAQA_VV_VX<"int_riscv_th_vmaqasu","PseudoTHVdotVMAQASU", + AllQuadWidenableInt8NoVLMulVectors>; +defm : VPatTernaryVMAQA_VX<"int_riscv_th_vmaqaus", "PseudoTHVdotVMAQAUS", + AllQuadWidenableInt8NoVLMulVectors>; } def uimm2_3_XFORM : SDNodeXForm<imm, [{ @@ -725,7 +746,7 @@ let Predicates = [HasVendorXTHeadMemPair] in { (TH_SWD GPR:$rd1, GPR:$rd2, GPR:$rs1, uimm2_3:$uimm2_3, 3)>; } -let Predicates = [HasVendorXTHeadCmo], DecoderNamespace = "THeadCmo" in { +let Predicates = [HasVendorXTHeadCmo], DecoderNamespace = "XTHeadCmo" in { def TH_DCACHE_CSW : THCacheInst_r<0b00001, "th.dcache.csw">; def TH_DCACHE_ISW : THCacheInst_r<0b00010, "th.dcache.isw">; def TH_DCACHE_CISW : THCacheInst_r<0b00011, "th.dcache.cisw">; @@ -750,7 +771,7 @@ def TH_L2CACHE_IALL : THCacheInst_void<0b10110, "th.l2cache.iall">; def TH_L2CACHE_CIALL : THCacheInst_void<0b10111, "th.l2cache.ciall">; } -let Predicates = [HasVendorXTHeadSync], DecoderNamespace = "THeadSync" in { +let Predicates = [HasVendorXTHeadSync], DecoderNamespace = "XTHeadSync" in { def TH_SFENCE_VMAS : THCacheInst_rr<0b0000010, "th.sfence.vmas">; def TH_SYNC : THCacheInst_void<0b11000, "th.sync">; def TH_SYNC_S : THCacheInst_void<0b11001, "th.sync.s">; @@ -865,9 +886,7 @@ defm : StoreUpdatePat<post_truncsti8, TH_SBIA>; defm : StoreUpdatePat<pre_truncsti8, TH_SBIB>; defm : StoreUpdatePat<post_truncsti16, TH_SHIA>; defm : StoreUpdatePat<pre_truncsti16, TH_SHIB>; -} -let Predicates = [HasVendorXTHeadMemIdx, IsRV32] in { defm : StoreUpdatePat<post_store, TH_SWIA, i32>; defm : StoreUpdatePat<pre_store, TH_SWIB, i32>; } @@ -878,3 +897,15 @@ defm : StoreUpdatePat<pre_truncsti32, TH_SWIB, i64>; defm : StoreUpdatePat<post_store, TH_SDIA, i64>; defm : StoreUpdatePat<pre_store, TH_SDIB, i64>; } + +//===----------------------------------------------------------------------===// +// Experimental RV64 i32 legalization patterns. +//===----------------------------------------------------------------------===// + +let Predicates = [HasVendorXTHeadMemIdx, IsRV64] in { +defm : StoreUpdatePat<post_truncsti8, TH_SBIA, i32>; +defm : StoreUpdatePat<pre_truncsti8, TH_SBIB, i32>; +defm : StoreUpdatePat<post_truncsti16, TH_SHIA, i32>; +defm : StoreUpdatePat<pre_truncsti16, TH_SHIB, i32>; +} + diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXVentana.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXVentana.td index f6b0feaf7628..d0a798ef475c 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXVentana.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXVentana.td @@ -15,7 +15,7 @@ //===----------------------------------------------------------------------===// let Predicates = [IsRV64, HasVendorXVentanaCondOps], hasSideEffects = 0, - mayLoad = 0, mayStore = 0, isCodeGenOnly = 0, DecoderNamespace = "Ventana" in + mayLoad = 0, mayStore = 0, isCodeGenOnly = 0, DecoderNamespace = "XVentana" in class VTMaskedMove<bits<3> funct3, string opcodestr> : RVInstR<0b0000000, funct3, OPC_CUSTOM_3, (outs GPR:$rd), (ins GPR:$rs1, GPR:$rs2), opcodestr, @@ -29,8 +29,17 @@ def VT_MASKCN : VTMaskedMove<0b111, "vt.maskcn">, Sched<[WriteIALU, ReadIALU, ReadIALU]>; let Predicates = [IsRV64, HasVendorXVentanaCondOps] in { -def : Pat<(XLenVT (riscv_czero_eqz GPR:$rs1, GPR:$rc)), +def : Pat<(i64 (riscv_czero_eqz GPR:$rs1, GPR:$rc)), (VT_MASKC GPR:$rs1, GPR:$rc)>; -def : Pat<(XLenVT (riscv_czero_nez GPR:$rs1, GPR:$rc)), +def : Pat<(i64 (riscv_czero_nez GPR:$rs1, GPR:$rc)), (VT_MASKCN GPR:$rs1, GPR:$rc)>; + +def : Pat<(i64 (riscv_czero_eqz GPR:$rs1, (riscv_setne (i64 GPR:$rc)))), + (VT_MASKC GPR:$rs1, GPR:$rc)>; +def : Pat<(i64 (riscv_czero_eqz GPR:$rs1, (riscv_seteq (i64 GPR:$rc)))), + (VT_MASKCN GPR:$rs1, GPR:$rc)>; +def : Pat<(i64 (riscv_czero_nez GPR:$rs1, (riscv_setne (i64 GPR:$rc)))), + (VT_MASKCN GPR:$rs1, GPR:$rc)>; +def : Pat<(i64 (riscv_czero_nez GPR:$rs1, (riscv_seteq (i64 GPR:$rc)))), + (VT_MASKC GPR:$rs1, GPR:$rc)>; } // Predicates = [IsRV64, HasVendorXVentanaCondOps] diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index caeedfa652e4..8055473a37c3 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -13,9 +13,6 @@ // Zbc - 1.0 // Zbs - 1.0 // -// The experimental extensions appeared in an earlier draft of the Bitmanip -// extensions. They are not ratified and subject to change. -// // This file also describes RISC-V instructions from the Zbk* extensions in // Cryptography Extensions Volume I: Scalar & Entropy Source Instructions, // versions: @@ -48,7 +45,7 @@ def UImmLog2XLenHalfAsmOperand : AsmOperandClass { let DiagnosticType = "InvalidUImmLog2XLenHalf"; } -def shfl_uimm : Operand<XLenVT>, ImmLeaf<XLenVT, [{ +def shfl_uimm : RISCVOp, ImmLeaf<XLenVT, [{ if (Subtarget->is64Bit()) return isUInt<5>(Imm); return isUInt<4>(Imm); @@ -56,7 +53,6 @@ def shfl_uimm : Operand<XLenVT>, ImmLeaf<XLenVT, [{ let ParserMatchClass = UImmLog2XLenHalfAsmOperand; let DecoderMethod = "decodeUImmOperand<5>"; let OperandType = "OPERAND_UIMM_SHFL"; - let OperandNamespace = "RISCVOp"; let MCOperandPredicate = [{ int64_t Imm; if (!MCOp.evaluateAsConstantImm(Imm)) @@ -183,7 +179,7 @@ def C5LeftShift : PatLeaf<(imm), [{ def C9LeftShift : PatLeaf<(imm), [{ uint64_t C = N->getZExtValue(); - return C > 5 && (C >> llvm::countr_zero(C)) == 9; + return C > 9 && (C >> llvm::countr_zero(C)) == 9; }]>; // Constant of the form (3 << C) where C is less than 32. @@ -234,18 +230,38 @@ def SimmShiftRightBy3XForm : SDNodeXForm<imm, [{ N->getValueType(0)); }]>; -// Pattern to exclude simm12 immediates from matching. -def non_imm12 : PatLeaf<(XLenVT GPR:$a), [{ - auto *C = dyn_cast<ConstantSDNode>(N); +// Pattern to exclude simm12 immediates from matching, namely `non_imm12`. +// GISel currently doesn't support PatFrag for leaf nodes, so `non_imm12` +// cannot be implemented in that way. To reuse patterns between the two +// ISels, we instead create PatFrag on operators that use `non_imm12`. +class binop_with_non_imm12<SDPatternOperator binop> + : PatFrag<(ops node:$x, node:$y), (binop node:$x, node:$y), [{ + auto *C = dyn_cast<ConstantSDNode>(Operands[1]); return !C || !isInt<12>(C->getSExtValue()); -}]>; +}]> { + let PredicateCodeUsesOperands = 1; + let GISelPredicateCode = [{ + const MachineOperand &ImmOp = *Operands[1]; + const MachineFunction &MF = *MI.getParent()->getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + + if (ImmOp.isReg() && ImmOp.getReg()) + if (auto Val = getIConstantVRegValWithLookThrough(ImmOp.getReg(), MRI)) { + // We do NOT want immediates that fit in 12 bits. + return !isInt<12>(Val->Value.getSExtValue()); + } + + return true; + }]; +} +def add_non_imm12 : binop_with_non_imm12<add>; +def or_is_add_non_imm12 : binop_with_non_imm12<or_is_add>; -def Shifted32OnesMask : PatLeaf<(imm), [{ - uint64_t Imm = N->getZExtValue(); - if (!isShiftedMask_64(Imm)) +def Shifted32OnesMask : IntImmLeaf<XLenVT, [{ + if (!Imm.isShiftedMask()) return false; - unsigned TrailingZeros = llvm::countr_zero(Imm); + unsigned TrailingZeros = Imm.countr_zero(); return TrailingZeros > 0 && TrailingZeros < 32 && Imm == UINT64_C(0xFFFFFFFF) << TrailingZeros; }], TrailingZeros>; @@ -262,16 +278,11 @@ def sh3add_uw_op : ComplexPattern<XLenVT, 1, "selectSHXADD_UWOp<3>", [], [], 6>; // Instruction class templates //===----------------------------------------------------------------------===// -// Some of these templates should be moved to RISCVInstrFormats.td once the B -// extension has been ratified. - let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -class RVBUnary<bits<7> funct7, bits<5> funct5, bits<3> funct3, +class RVBUnary<bits<12> imm12, bits<3> funct3, RISCVOpcode opcode, string opcodestr> - : RVInstR<funct7, funct3, opcode, (outs GPR:$rd), (ins GPR:$rs1), - opcodestr, "$rd, $rs1"> { - let rs2 = funct5; -} + : RVInstIUnary<imm12, funct3, opcode, (outs GPR:$rd), (ins GPR:$rs1), + opcodestr, "$rd, $rs1">; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class RVBShift_ri<bits<5> imm11_7, bits<3> funct3, RISCVOpcode opcode, @@ -375,27 +386,27 @@ def XPERM8 : ALU_rr<0b0010100, 0b100, "xperm8">, } // Predicates = [HasStdExtZbkx] let Predicates = [HasStdExtZbb], IsSignExtendingOpW = 1 in { -def CLZ : RVBUnary<0b0110000, 0b00000, 0b001, OPC_OP_IMM, "clz">, +def CLZ : RVBUnary<0b011000000000, 0b001, OPC_OP_IMM, "clz">, Sched<[WriteCLZ, ReadCLZ]>; -def CTZ : RVBUnary<0b0110000, 0b00001, 0b001, OPC_OP_IMM, "ctz">, +def CTZ : RVBUnary<0b011000000001, 0b001, OPC_OP_IMM, "ctz">, Sched<[WriteCTZ, ReadCTZ]>; -def CPOP : RVBUnary<0b0110000, 0b00010, 0b001, OPC_OP_IMM, "cpop">, +def CPOP : RVBUnary<0b011000000010, 0b001, OPC_OP_IMM, "cpop">, Sched<[WriteCPOP, ReadCPOP]>; } // Predicates = [HasStdExtZbb] let Predicates = [HasStdExtZbb, IsRV64], IsSignExtendingOpW = 1 in { -def CLZW : RVBUnary<0b0110000, 0b00000, 0b001, OPC_OP_IMM_32, "clzw">, +def CLZW : RVBUnary<0b011000000000, 0b001, OPC_OP_IMM_32, "clzw">, Sched<[WriteCLZ32, ReadCLZ32]>; -def CTZW : RVBUnary<0b0110000, 0b00001, 0b001, OPC_OP_IMM_32, "ctzw">, +def CTZW : RVBUnary<0b011000000001, 0b001, OPC_OP_IMM_32, "ctzw">, Sched<[WriteCTZ32, ReadCTZ32]>; -def CPOPW : RVBUnary<0b0110000, 0b00010, 0b001, OPC_OP_IMM_32, "cpopw">, +def CPOPW : RVBUnary<0b011000000010, 0b001, OPC_OP_IMM_32, "cpopw">, Sched<[WriteCPOP32, ReadCPOP32]>; } // Predicates = [HasStdExtZbb, IsRV64] let Predicates = [HasStdExtZbb], IsSignExtendingOpW = 1 in { -def SEXT_B : RVBUnary<0b0110000, 0b00100, 0b001, OPC_OP_IMM, "sext.b">, +def SEXT_B : RVBUnary<0b011000000100, 0b001, OPC_OP_IMM, "sext.b">, Sched<[WriteIALU, ReadIALU]>; -def SEXT_H : RVBUnary<0b0110000, 0b00101, 0b001, OPC_OP_IMM, "sext.h">, +def SEXT_H : RVBUnary<0b011000000101, 0b001, OPC_OP_IMM, "sext.h">, Sched<[WriteIALU, ReadIALU]>; } // Predicates = [HasStdExtZbb] @@ -435,38 +446,38 @@ def PACKW : ALUW_rr<0b0000100, 0b100, "packw">, Sched<[WritePACK32, ReadPACK32, ReadPACK32]>; let Predicates = [HasStdExtZbb, IsRV32] in { -def ZEXT_H_RV32 : RVBUnary<0b0000100, 0b00000, 0b100, OPC_OP, "zext.h">, +def ZEXT_H_RV32 : RVBUnary<0b000010000000, 0b100, OPC_OP, "zext.h">, Sched<[WriteIALU, ReadIALU]>; } // Predicates = [HasStdExtZbb, IsRV32] let Predicates = [HasStdExtZbb, IsRV64], IsSignExtendingOpW = 1 in { -def ZEXT_H_RV64 : RVBUnary<0b0000100, 0b00000, 0b100, OPC_OP_32, "zext.h">, +def ZEXT_H_RV64 : RVBUnary<0b000010000000, 0b100, OPC_OP_32, "zext.h">, Sched<[WriteIALU, ReadIALU]>; } // Predicates = [HasStdExtZbb, IsRV64] let Predicates = [HasStdExtZbbOrZbkb, IsRV32] in { -def REV8_RV32 : RVBUnary<0b0110100, 0b11000, 0b101, OPC_OP_IMM, "rev8">, +def REV8_RV32 : RVBUnary<0b011010011000, 0b101, OPC_OP_IMM, "rev8">, Sched<[WriteREV8, ReadREV8]>; } // Predicates = [HasStdExtZbbOrZbkb, IsRV32] let Predicates = [HasStdExtZbbOrZbkb, IsRV64] in { -def REV8_RV64 : RVBUnary<0b0110101, 0b11000, 0b101, OPC_OP_IMM, "rev8">, +def REV8_RV64 : RVBUnary<0b011010111000, 0b101, OPC_OP_IMM, "rev8">, Sched<[WriteREV8, ReadREV8]>; } // Predicates = [HasStdExtZbbOrZbkb, IsRV64] let Predicates = [HasStdExtZbb] in { -def ORC_B : RVBUnary<0b0010100, 0b00111, 0b101, OPC_OP_IMM, "orc.b">, +def ORC_B : RVBUnary<0b001010000111, 0b101, OPC_OP_IMM, "orc.b">, Sched<[WriteORCB, ReadORCB]>; } // Predicates = [HasStdExtZbb] let Predicates = [HasStdExtZbkb] in -def BREV8 : RVBUnary<0b0110100, 0b00111, 0b101, OPC_OP_IMM, "brev8">, +def BREV8 : RVBUnary<0b011010000111, 0b101, OPC_OP_IMM, "brev8">, Sched<[WriteBREV8, ReadBREV8]>; let Predicates = [HasStdExtZbkb, IsRV32] in { -def ZIP_RV32 : RVBUnary<0b0000100, 0b01111, 0b001, OPC_OP_IMM, "zip">, +def ZIP_RV32 : RVBUnary<0b000010001111, 0b001, OPC_OP_IMM, "zip">, Sched<[WriteZIP, ReadZIP]>; -def UNZIP_RV32 : RVBUnary<0b0000100, 0b01111, 0b101, OPC_OP_IMM, "unzip">, +def UNZIP_RV32 : RVBUnary<0b000010001111, 0b101, OPC_OP_IMM, "unzip">, Sched<[WriteZIP, ReadZIP]>; } // Predicates = [HasStdExtZbkb, IsRV32] @@ -543,6 +554,8 @@ def : Pat<(XLenVT (and (shiftop<srl> GPR:$rs1, (XLenVT GPR:$rs2)), 1)), def : Pat<(XLenVT (shiftop<shl> 1, (XLenVT GPR:$rs2))), (BSET (XLenVT X0), GPR:$rs2)>; +def : Pat<(XLenVT (not (shiftop<shl> -1, (XLenVT GPR:$rs2)))), + (ADDI (BSET (XLenVT X0), GPR:$rs2), -1)>; def : Pat<(XLenVT (and GPR:$rs1, BCLRMask:$mask)), (BCLRI GPR:$rs1, BCLRMask:$mask)>; @@ -632,6 +645,10 @@ def : Pat<(or (shl (zexti8 (XLenVT GPR:$rs2)), (XLenVT 8)), def : Pat<(and (or (shl GPR:$rs2, (XLenVT 8)), (zexti8 (XLenVT GPR:$rs1))), 0xFFFF), (PACKH GPR:$rs1, GPR:$rs2)>; + +def : Pat<(binop_allhusers<or> (shl GPR:$rs2, (XLenVT 8)), + (zexti8 (XLenVT GPR:$rs1))), + (PACKH GPR:$rs1, GPR:$rs2)>; } // Predicates = [HasStdExtZbkb] let Predicates = [HasStdExtZbkb, IsRV32] in @@ -656,20 +673,17 @@ let Predicates = [HasStdExtZbb, IsRV64] in def : Pat<(i64 (and GPR:$rs, 0xFFFF)), (ZEXT_H_RV64 GPR:$rs)>; let Predicates = [HasStdExtZba] in { -def : Pat<(add (shl GPR:$rs1, (XLenVT 1)), non_imm12:$rs2), - (SH1ADD GPR:$rs1, GPR:$rs2)>; -def : Pat<(add (shl GPR:$rs1, (XLenVT 2)), non_imm12:$rs2), - (SH2ADD GPR:$rs1, GPR:$rs2)>; -def : Pat<(add (shl GPR:$rs1, (XLenVT 3)), non_imm12:$rs2), - (SH3ADD GPR:$rs1, GPR:$rs2)>; -// More complex cases use a ComplexPattern. -def : Pat<(add sh1add_op:$rs1, non_imm12:$rs2), - (SH1ADD sh1add_op:$rs1, GPR:$rs2)>; -def : Pat<(add sh2add_op:$rs1, non_imm12:$rs2), - (SH2ADD sh2add_op:$rs1, GPR:$rs2)>; -def : Pat<(add sh3add_op:$rs1, non_imm12:$rs2), - (SH3ADD sh3add_op:$rs1, GPR:$rs2)>; +foreach i = {1,2,3} in { + defvar shxadd = !cast<Instruction>("SH"#i#"ADD"); + def : Pat<(XLenVT (add_non_imm12 (shl GPR:$rs1, (XLenVT i)), GPR:$rs2)), + (shxadd GPR:$rs1, GPR:$rs2)>; + + defvar pat = !cast<ComplexPattern>("sh"#i#"add_op"); + // More complex cases use a ComplexPattern. + def : Pat<(XLenVT (add_non_imm12 pat:$rs1, GPR:$rs2)), + (shxadd pat:$rs1, GPR:$rs2)>; +} def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 6)), GPR:$rs2), (SH1ADD (SH1ADD GPR:$rs1, GPR:$rs1), GPR:$rs2)>; @@ -739,46 +753,46 @@ def : Pat<(i64 (shl (and GPR:$rs1, 0xFFFFFFFF), uimm5:$shamt)), def : Pat<(i64 (and GPR:$rs1, Shifted32OnesMask:$mask)), (SLLI_UW (SRLI GPR:$rs1, Shifted32OnesMask:$mask), Shifted32OnesMask:$mask)>; - -def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFF), non_imm12:$rs2)), +def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0xFFFFFFFF), GPR:$rs2)), (ADD_UW GPR:$rs1, GPR:$rs2)>; def : Pat<(i64 (and GPR:$rs, 0xFFFFFFFF)), (ADD_UW GPR:$rs, (XLenVT X0))>; -def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 1)), non_imm12:$rs2)), - (SH1ADD_UW GPR:$rs1, GPR:$rs2)>; -def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 2)), non_imm12:$rs2)), - (SH2ADD_UW GPR:$rs1, GPR:$rs2)>; -def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 3)), non_imm12:$rs2)), - (SH3ADD_UW GPR:$rs1, GPR:$rs2)>; +def : Pat<(i64 (or_is_add_non_imm12 (and GPR:$rs1, 0xFFFFFFFF), GPR:$rs2)), + (ADD_UW GPR:$rs1, GPR:$rs2)>; -def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 1)), 0x1FFFFFFFF), non_imm12:$rs2)), +foreach i = {1,2,3} in { + defvar shxadd_uw = !cast<Instruction>("SH"#i#"ADD_UW"); + def : Pat<(i64 (add_non_imm12 (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 i)), (XLenVT GPR:$rs2))), + (shxadd_uw GPR:$rs1, GPR:$rs2)>; +} + +def : Pat<(i64 (add_non_imm12 (and (shl GPR:$rs1, (i64 1)), 0x1FFFFFFFF), (XLenVT GPR:$rs2))), (SH1ADD_UW GPR:$rs1, GPR:$rs2)>; -def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 2)), 0x3FFFFFFFF), non_imm12:$rs2)), +def : Pat<(i64 (add_non_imm12 (and (shl GPR:$rs1, (i64 2)), 0x3FFFFFFFF), (XLenVT GPR:$rs2))), (SH2ADD_UW GPR:$rs1, GPR:$rs2)>; -def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 3)), 0x7FFFFFFFF), non_imm12:$rs2)), +def : Pat<(i64 (add_non_imm12 (and (shl GPR:$rs1, (i64 3)), 0x7FFFFFFFF), (XLenVT GPR:$rs2))), (SH3ADD_UW GPR:$rs1, GPR:$rs2)>; // More complex cases use a ComplexPattern. -def : Pat<(i64 (add sh1add_uw_op:$rs1, non_imm12:$rs2)), - (SH1ADD_UW sh1add_uw_op:$rs1, GPR:$rs2)>; -def : Pat<(i64 (add sh2add_uw_op:$rs1, non_imm12:$rs2)), - (SH2ADD_UW sh2add_uw_op:$rs1, GPR:$rs2)>; -def : Pat<(i64 (add sh3add_uw_op:$rs1, non_imm12:$rs2)), - (SH3ADD_UW sh3add_uw_op:$rs1, GPR:$rs2)>; - -def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFE), non_imm12:$rs2)), +foreach i = {1,2,3} in { + defvar pat = !cast<ComplexPattern>("sh"#i#"add_uw_op"); + def : Pat<(i64 (add_non_imm12 pat:$rs1, (XLenVT GPR:$rs2))), + (!cast<Instruction>("SH"#i#"ADD_UW") pat:$rs1, GPR:$rs2)>; +} + +def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0xFFFFFFFE), (XLenVT GPR:$rs2))), (SH1ADD (SRLIW GPR:$rs1, 1), GPR:$rs2)>; -def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFC), non_imm12:$rs2)), +def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0xFFFFFFFC), (XLenVT GPR:$rs2))), (SH2ADD (SRLIW GPR:$rs1, 2), GPR:$rs2)>; -def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFF8), non_imm12:$rs2)), +def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0xFFFFFFF8), (XLenVT GPR:$rs2))), (SH3ADD (SRLIW GPR:$rs1, 3), GPR:$rs2)>; // Use SRLI to clear the LSBs and SHXADD_UW to mask and shift. -def : Pat<(i64 (add (and GPR:$rs1, 0x1FFFFFFFE), non_imm12:$rs2)), +def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0x1FFFFFFFE), (XLenVT GPR:$rs2))), (SH1ADD_UW (SRLI GPR:$rs1, 1), GPR:$rs2)>; -def : Pat<(i64 (add (and GPR:$rs1, 0x3FFFFFFFC), non_imm12:$rs2)), +def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0x3FFFFFFFC), (XLenVT GPR:$rs2))), (SH2ADD_UW (SRLI GPR:$rs1, 2), GPR:$rs2)>; -def : Pat<(i64 (add (and GPR:$rs1, 0x7FFFFFFF8), non_imm12:$rs2)), +def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0x7FFFFFFF8), (XLenVT GPR:$rs2))), (SH3ADD_UW (SRLI GPR:$rs1, 3), GPR:$rs2)>; def : Pat<(i64 (mul (and_oneuse GPR:$r, 0xFFFFFFFF), C3LeftShiftUW:$i)), @@ -804,3 +818,99 @@ let Predicates = [HasStdExtZbkx] in { def : PatGprGpr<int_riscv_xperm4, XPERM4>; def : PatGprGpr<int_riscv_xperm8, XPERM8>; } // Predicates = [HasStdExtZbkx] + +//===----------------------------------------------------------------------===// +// Experimental RV64 i32 legalization patterns. +//===----------------------------------------------------------------------===// + +def BCLRMaski32 : ImmLeaf<i32, [{ + return !isInt<12>(Imm) && isPowerOf2_32(~Imm); +}]>; +def SingleBitSetMaski32 : ImmLeaf<i32, [{ + return !isInt<12>(Imm) && isPowerOf2_32(Imm); +}]>; + +let Predicates = [HasStdExtZbb, IsRV64] in { +def : PatGpr<ctlz, CLZW, i32>; +def : PatGpr<cttz, CTZW, i32>; +def : PatGpr<ctpop, CPOPW, i32>; + +def : Pat<(i32 (sext_inreg GPR:$rs1, i8)), (SEXT_B GPR:$rs1)>; +def : Pat<(i32 (sext_inreg GPR:$rs1, i16)), (SEXT_H GPR:$rs1)>; +} // Predicates = [HasStdExtZbb, IsRV64] + +let Predicates = [HasStdExtZbbOrZbkb, IsRV64] in { +def : Pat<(i32 (and GPR:$rs1, (not GPR:$rs2))), (ANDN GPR:$rs1, GPR:$rs2)>; +def : Pat<(i32 (or GPR:$rs1, (not GPR:$rs2))), (ORN GPR:$rs1, GPR:$rs2)>; +def : Pat<(i32 (xor GPR:$rs1, (not GPR:$rs2))), (XNOR GPR:$rs1, GPR:$rs2)>; + +def : PatGprGpr<shiftopw<rotl>, ROLW, i32, i64>; +def : PatGprGpr<shiftopw<rotr>, RORW, i32, i64>; +def : PatGprImm<rotr, RORIW, uimm5, i32>; + +def : Pat<(i32 (rotl GPR:$rs1, uimm5:$rs2)), + (RORIW GPR:$rs1, (ImmSubFrom32 uimm5:$rs2))>; +} // Predicates = [HasStdExtZbbOrZbkb, IsRV64] + +let Predicates = [HasStdExtZbkb, IsRV64] in { +def : Pat<(or (and (shl GPR:$rs2, (i64 8)), 0xFFFF), + (zexti8i32 (i32 GPR:$rs1))), + (PACKH GPR:$rs1, GPR:$rs2)>; +def : Pat<(or (shl (zexti8i32 (i32 GPR:$rs2)), (i64 8)), + (zexti8i32 (i32 GPR:$rs1))), + (PACKH GPR:$rs1, GPR:$rs2)>; +def : Pat<(and (anyext (or (shl GPR:$rs2, (XLenVT 8)), + (zexti8i32 (i32 GPR:$rs1)))), 0xFFFF), + (PACKH GPR:$rs1, GPR:$rs2)>; + +def : Pat<(i32 (or (shl GPR:$rs2, (i64 16)), (zexti16i32 (i32 GPR:$rs1)))), + (PACKW GPR:$rs1, GPR:$rs2)>; +} // Predicates = [HasStdExtZbkb, IsRV64] + +let Predicates = [HasStdExtZba, IsRV64] in { +def : Pat<(shl (i64 (zext i32:$rs1)), uimm5:$shamt), + (SLLI_UW GPR:$rs1, uimm5:$shamt)>; + +def : Pat<(i64 (add_non_imm12 (zext GPR:$rs1), GPR:$rs2)), + (ADD_UW GPR:$rs1, GPR:$rs2)>; +def : Pat<(zext GPR:$src), (ADD_UW GPR:$src, (XLenVT X0))>; + +def : Pat<(i64 (or_is_add_non_imm12 (zext GPR:$rs1), GPR:$rs2)), + (ADD_UW GPR:$rs1, GPR:$rs2)>; + +foreach i = {1,2,3} in { + defvar shxadd = !cast<Instruction>("SH"#i#"ADD"); + def : Pat<(i32 (add_non_imm12 (shl GPR:$rs1, (i64 i)), GPR:$rs2)), + (shxadd GPR:$rs1, GPR:$rs2)>; +} +} + +let Predicates = [HasStdExtZbs, IsRV64] in { +def : Pat<(i32 (and (not (shiftop<shl> 1, (XLenVT GPR:$rs2))), GPR:$rs1)), + (BCLR GPR:$rs1, GPR:$rs2)>; +def : Pat<(i32 (and (rotl -2, (XLenVT GPR:$rs2)), GPR:$rs1)), + (BCLR GPR:$rs1, GPR:$rs2)>; +def : Pat<(i32 (or (shiftop<shl> 1, (XLenVT GPR:$rs2)), GPR:$rs1)), + (BSET GPR:$rs1, GPR:$rs2)>; +def : Pat<(i32 (xor (shiftop<shl> 1, (XLenVT GPR:$rs2)), GPR:$rs1)), + (BINV GPR:$rs1, GPR:$rs2)>; +def : Pat<(i32 (and (shiftop<srl> GPR:$rs1, (XLenVT GPR:$rs2)), 1)), + (BEXT GPR:$rs1, GPR:$rs2)>; +def : Pat<(i64 (and (anyext (i32 (shiftop<srl> GPR:$rs1, (XLenVT GPR:$rs2)))), 1)), + (BEXT GPR:$rs1, GPR:$rs2)>; + +def : Pat<(i32 (shiftop<shl> 1, (XLenVT GPR:$rs2))), + (BSET (XLenVT X0), GPR:$rs2)>; +def : Pat<(i32 (not (shiftop<shl> -1, (XLenVT GPR:$rs2)))), + (ADDI (BSET (XLenVT X0), GPR:$rs2), -1)>; + +def : Pat<(i32 (and (srl GPR:$rs1, uimm5:$shamt), (i32 1))), + (BEXTI GPR:$rs1, uimm5:$shamt)>; + +def : Pat<(i32 (and GPR:$rs1, BCLRMaski32:$mask)), + (BCLRI GPR:$rs1, (i64 (BCLRXForm $mask)))>; +def : Pat<(i32 (or GPR:$rs1, SingleBitSetMaski32:$mask)), + (BSETI GPR:$rs1, (i64 (SingleBitSetMaskToIndex $mask)))>; +def : Pat<(i32 (xor GPR:$rs1, SingleBitSetMaski32:$mask)), + (BINVI GPR:$rs1, (i64 (SingleBitSetMaskToIndex $mask)))>; +} // Predicates = [HasStdExtZbs, IsRV64] diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td index 6687343086da..a78f36244468 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td @@ -8,8 +8,6 @@ /// /// This file describes the RISC-V instructions from the 'Zc*' compressed /// instruction extensions, version 1.0.3. -/// This version is still experimental as the 'Zc*' extensions haven't been -/// ratified yet. /// //===----------------------------------------------------------------------===// @@ -17,13 +15,12 @@ // Operand and SDNode transformation definitions. //===----------------------------------------------------------------------===// -def uimm2_lsb0 : Operand<XLenVT>, +def uimm2_lsb0 : RISCVOp, ImmLeaf<XLenVT, [{return isShiftedUInt<1, 1>(Imm);}]> { let ParserMatchClass = UImmAsmOperand<2, "Lsb0">; let EncoderMethod = "getImmOpValue"; let DecoderMethod = "decodeUImmOperand<2>"; let OperandType = "OPERAND_UIMM2_LSB0"; - let OperandNamespace = "RISCVOp"; let MCOperandPredicate = [{ int64_t Imm; if (!MCOp.evaluateAsConstantImm(Imm)) @@ -32,11 +29,10 @@ def uimm2_lsb0 : Operand<XLenVT>, }]; } -def uimm8ge32 : Operand<XLenVT> { +def uimm8ge32 : RISCVOp { let ParserMatchClass = UImmAsmOperand<8, "GE32">; let DecoderMethod = "decodeUImmOperand<8>"; let OperandType = "OPERAND_UIMM8_GE32"; - let OperandNamespace = "RISCVOp"; } def RlistAsmOperand : AsmOperandClass { @@ -131,7 +127,7 @@ class RVZcArith_r<bits<5> funct5, string OpcodeStr> : class RVInstZcCPPP<bits<5> funct5, string opcodestr> : RVInst16<(outs), (ins rlist:$rlist, spimm:$spimm), - opcodestr, "{$rlist}, $spimm", [], InstFormatOther> { + opcodestr, "$rlist, $spimm", [], InstFormatOther> { bits<4> rlist; bits<16> spimm; @@ -285,9 +281,9 @@ def : CompressPat<(SH GPRC:$rs2, GPRCMem:$rs1, uimm2_lsb0:$imm), //===----------------------------------------------------------------------===// let Predicates = [HasStdExtZcb] in { -def : InstAlias<"c.lbu $rd, (${rs1})",(C_LBU GPRC:$rd, GPRC:$rs1, 0)>; -def : InstAlias<"c.lhu $rd, (${rs1})",(C_LHU GPRC:$rd, GPRC:$rs1, 0)>; -def : InstAlias<"c.lh $rd, (${rs1})", (C_LH GPRC:$rd, GPRC:$rs1, 0)>; -def : InstAlias<"c.sb $rd, (${rs1})", (C_SB GPRC:$rd, GPRC:$rs1, 0)>; -def : InstAlias<"c.sh $rd, (${rs1})", (C_SH GPRC:$rd, GPRC:$rs1, 0)>; +def : InstAlias<"c.lbu $rd, (${rs1})",(C_LBU GPRC:$rd, GPRC:$rs1, 0), 0>; +def : InstAlias<"c.lhu $rd, (${rs1})",(C_LHU GPRC:$rd, GPRC:$rs1, 0), 0>; +def : InstAlias<"c.lh $rd, (${rs1})", (C_LH GPRC:$rd, GPRC:$rs1, 0), 0>; +def : InstAlias<"c.sb $rd, (${rs1})", (C_SB GPRC:$rd, GPRC:$rs1, 0), 0>; +def : InstAlias<"c.sh $rd, (${rs1})", (C_SH GPRC:$rd, GPRC:$rs1, 0), 0>; } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td index f36882f9a968..6f88ff7f7ac1 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td @@ -7,9 +7,7 @@ //===----------------------------------------------------------------------===// // // This file describes the RISC-V instructions from the standard 'Zfa' -// additional floating-point extension, version 0.1. -// This version is still experimental as the 'Zfa' extension hasn't been -// ratified yet. +// additional floating-point extension, version 1.0. // //===----------------------------------------------------------------------===// @@ -54,18 +52,14 @@ class FPBinaryOp_rr<bits<7> funct7, bits<3> funct3, DAGOperand rdty, (ins rsty:$rs1, rsty:$rs2), opcodestr, "$rd, $rs1, $rs2">; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -class FPUnaryOp_imm<bits<7> funct7, bits<5> rs2val, bits<3> funct3, - dag outs, dag ins, string opcodestr, string argstr> - : RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> { +class FPFLI_r<bits<7> funct7, bits<5> rs2val, bits<3> funct3, + DAGOperand rdty, string opcodestr> + : RVInstR<funct7, funct3, OPC_OP_FP, (outs rdty:$rd), + (ins loadfpimm:$imm), opcodestr, "$rd, $imm"> { bits<5> imm; - bits<5> rd; - - let Inst{31-25} = funct7; - let Inst{24-20} = rs2val; - let Inst{19-15} = imm; - let Inst{14-12} = funct3; - let Inst{11-7} = rd; - let Inst{6-0} = OPC_OP_FP.Value; + + let rs2 = rs2val; + let rs1 = imm; } let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1, @@ -84,8 +78,7 @@ class FPUnaryOp_r_rtz<bits<7> funct7, bits<5> rs2val, DAGOperand rdty, let Predicates = [HasStdExtZfa] in { let isReMaterializable = 1, isAsCheapAsAMove = 1 in -def FLI_S : FPUnaryOp_imm<0b1111000, 0b00001, 0b000, (outs FPR32:$rd), - (ins loadfpimm:$imm), "fli.s", "$rd, $imm">, +def FLI_S : FPFLI_r<0b1111000, 0b00001, 0b000, FPR32, "fli.s">, Sched<[WriteFLI32]>; let SchedRW = [WriteFMinMax32, ReadFMinMax32, ReadFMinMax32] in { @@ -106,8 +99,7 @@ def FLEQ_S : FPCmp_rr<0b1010000, 0b100, "fleq.s", FPR32>; let Predicates = [HasStdExtZfa, HasStdExtD] in { let isReMaterializable = 1, isAsCheapAsAMove = 1 in -def FLI_D : FPUnaryOp_imm<0b1111001, 0b00001, 0b000, (outs FPR64:$rd), - (ins loadfpimm:$imm), "fli.d", "$rd, $imm">, +def FLI_D : FPFLI_r<0b1111001, 0b00001, 0b000, FPR64, "fli.d">, Sched<[WriteFLI64]>; let SchedRW = [WriteFMinMax64, ReadFMinMax64, ReadFMinMax64] in { @@ -120,6 +112,7 @@ def FROUND_D : FPUnaryOp_r_frm<0b0100001, 0b00100, FPR64, FPR64, "fround.d">, def FROUNDNX_D : FPUnaryOp_r_frm<0b0100001, 0b00101, FPR64, FPR64, "froundnx.d">, Sched<[WriteFRoundF64, ReadFRoundF64]>; +let IsSignExtendingOpW = 1 in def FCVTMOD_W_D : FPUnaryOp_r_rtz<0b1100001, 0b01000, GPR, FPR64, "fcvtmod.w.d">, Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]>; @@ -146,8 +139,7 @@ def FMV_X_W_FPR64 : FPUnaryOp_r<0b1110000, 0b00000, 0b000, GPR, FPR64, let Predicates = [HasStdExtZfa, HasStdExtZfhOrZvfh] in let isReMaterializable = 1, isAsCheapAsAMove = 1 in -def FLI_H : FPUnaryOp_imm<0b1111010, 0b00001, 0b000, (outs FPR16:$rd), - (ins loadfpimm:$imm), "fli.h", "$rd, $imm">, +def FLI_H : FPFLI_r<0b1111010, 0b00001, 0b000, FPR16, "fli.h">, Sched<[WriteFLI16]>; let Predicates = [HasStdExtZfa, HasStdExtZfh] in { diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfbfmin.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfbfmin.td index 35f9f03f61a1..d819033eea68 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfbfmin.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfbfmin.td @@ -16,12 +16,12 @@ //===----------------------------------------------------------------------===// // RISC-V specific DAG Nodes. //===----------------------------------------------------------------------===// - + def SDT_RISCVFP_ROUND_BF16 : SDTypeProfile<1, 1, [SDTCisVT<0, bf16>, SDTCisVT<1, f32>]>; def SDT_RISCVFP_EXTEND_BF16 : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, bf16>]>; - + def riscv_fpround_bf16 : SDNode<"RISCVISD::FP_ROUND_BF16", SDT_RISCVFP_ROUND_BF16>; def riscv_fpextend_bf16 @@ -41,7 +41,7 @@ def FCVT_S_BF16 : FPUnaryOp_r_frm<0b0100000, 0b00110, FPR32, FPR16, "fcvt.s.bf16 //===----------------------------------------------------------------------===// // Pseudo-instructions and codegen patterns //===----------------------------------------------------------------------===// - + let Predicates = [HasStdExtZfbfmin] in { /// Loads def : LdPat<load, FLH, bf16>; @@ -51,9 +51,9 @@ def : StPat<store, FSH, FPR16, bf16>; /// Float conversion operations // f32 -> bf16, bf16 -> f32 -def : Pat<(bf16 (riscv_fpround_bf16 FPR32:$rs1)), +def : Pat<(bf16 (riscv_fpround_bf16 FPR32:$rs1)), (FCVT_BF16_S FPR32:$rs1, FRM_DYN)>; -def : Pat<(riscv_fpextend_bf16 (bf16 FPR16:$rs1)), +def : Pat<(riscv_fpextend_bf16 (bf16 FPR16:$rs1)), (FCVT_S_BF16 FPR16:$rs1, FRM_DYN)>; // Moves (no conversion) @@ -61,3 +61,25 @@ def : Pat<(bf16 (riscv_fmv_h_x GPR:$src)), (FMV_H_X GPR:$src)>; def : Pat<(riscv_fmv_x_anyexth (bf16 FPR16:$src)), (FMV_X_H FPR16:$src)>; def : Pat<(riscv_fmv_x_signexth (bf16 FPR16:$src)), (FMV_X_H FPR16:$src)>; } // Predicates = [HasStdExtZfbfmin] + +let Predicates = [HasStdExtZfbfmin] in { +// bf16->[u]int. Round-to-zero must be used for the f32->int step, the +// rounding mode has no effect for bf16->f32. +def : Pat<(i32 (any_fp_to_sint (bf16 FPR16:$rs1))), (FCVT_W_S (FCVT_S_BF16 $rs1, FRM_RNE), FRM_RTZ)>; +def : Pat<(i32 (any_fp_to_uint (bf16 FPR16:$rs1))), (FCVT_WU_S (FCVT_S_BF16 $rs1, FRM_RNE), FRM_RTZ)>; + +// [u]int->bf16. Match GCC and default to using dynamic rounding mode. +def : Pat<(bf16 (any_sint_to_fp (i32 GPR:$rs1))), (FCVT_BF16_S (FCVT_S_W $rs1, FRM_DYN), FRM_DYN)>; +def : Pat<(bf16 (any_uint_to_fp (i32 GPR:$rs1))), (FCVT_BF16_S (FCVT_S_WU $rs1, FRM_DYN), FRM_DYN)>; +} + +let Predicates = [HasStdExtZfbfmin, IsRV64] in { +// bf16->[u]int64. Round-to-zero must be used for the f32->int step, the +// rounding mode has no effect for bf16->f32. +def : Pat<(i64 (any_fp_to_sint (bf16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_BF16 $rs1, FRM_RNE), FRM_RTZ)>; +def : Pat<(i64 (any_fp_to_uint (bf16 FPR16:$rs1))), (FCVT_LU_S (FCVT_S_BF16 $rs1, FRM_RNE), FRM_RTZ)>; + +// [u]int->bf16. Match GCC and default to using dynamic rounding mode. +def : Pat<(bf16 (any_sint_to_fp (i64 GPR:$rs1))), (FCVT_BF16_S (FCVT_S_L $rs1, FRM_DYN), FRM_DYN)>; +def : Pat<(bf16 (any_uint_to_fp (i64 GPR:$rs1))), (FCVT_BF16_S (FCVT_S_LU $rs1, FRM_DYN), FRM_DYN)>; +} diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td index 810775a78241..055f13032788 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td @@ -85,7 +85,7 @@ def FSH : FPStore_r<0b001, "fsh", FPR16, WriteFST16>; } // Predicates = [HasHalfFPLoadStoreMove] foreach Ext = ZfhExts in { - let SchedRW = [WriteFMA16, ReadFMA16, ReadFMA16, ReadFMA16] in { + let SchedRW = [WriteFMA16, ReadFMA16, ReadFMA16, ReadFMA16Addend] in { defm FMADD_H : FPFMA_rrr_frm_m<OPC_MADD, 0b10, "fmadd.h", Ext>; defm FMSUB_H : FPFMA_rrr_frm_m<OPC_MSUB, 0b10, "fmsub.h", Ext>; defm FNMSUB_H : FPFMA_rrr_frm_m<OPC_NMSUB, 0b10, "fnmsub.h", Ext>; @@ -142,8 +142,8 @@ foreach Ext = ZfhminExts in { Ext.F32Ty, "fcvt.h.s">, Sched<[WriteFCvtF32ToF16, ReadFCvtF32ToF16]>; - defm FCVT_S_H : FPUnaryOp_r_m<0b0100000, 0b00010, 0b000, Ext, Ext.F32Ty, - Ext.PrimaryTy, "fcvt.s.h">, + defm FCVT_S_H : FPUnaryOp_r_frmlegacy_m<0b0100000, 0b00010,Ext, Ext.F32Ty, + Ext.PrimaryTy, "fcvt.s.h">, Sched<[WriteFCvtF16ToF32, ReadFCvtF16ToF32]>; } // foreach Ext = ZfhminExts @@ -191,8 +191,8 @@ foreach Ext = ZfhminDExts in { Ext.F64Ty, "fcvt.h.d">, Sched<[WriteFCvtF64ToF16, ReadFCvtF64ToF16]>; - defm FCVT_D_H : FPUnaryOp_r_m<0b0100001, 0b00010, 0b000, Ext, Ext.F64Ty, - Ext.F16Ty, "fcvt.d.h">, + defm FCVT_D_H : FPUnaryOp_r_frmlegacy_m<0b0100001, 0b00010, Ext, Ext.F64Ty, + Ext.F16Ty, "fcvt.d.h">, Sched<[WriteFCvtF16ToF64, ReadFCvtF16ToF64]>; } // foreach Ext = ZfhminDExts @@ -248,7 +248,6 @@ def PseudoQuietFLT_H_INX : PseudoQuietFCMP<FPR16INX>; // Pseudo-instructions and codegen patterns //===----------------------------------------------------------------------===// -let Predicates = [HasStdExtZfh] in { /// Float conversion operations @@ -257,17 +256,20 @@ let Predicates = [HasStdExtZfh] in { /// Float arithmetic operations -def : PatFprFprDynFrm<any_fadd, FADD_H, FPR16, f16>; -def : PatFprFprDynFrm<any_fsub, FSUB_H, FPR16, f16>; -def : PatFprFprDynFrm<any_fmul, FMUL_H, FPR16, f16>; -def : PatFprFprDynFrm<any_fdiv, FDIV_H, FPR16, f16>; +foreach Ext = ZfhExts in { + defm : PatFprFprDynFrm_m<any_fadd, FADD_H, Ext>; + defm : PatFprFprDynFrm_m<any_fsub, FSUB_H, Ext>; + defm : PatFprFprDynFrm_m<any_fmul, FMUL_H, Ext>; + defm : PatFprFprDynFrm_m<any_fdiv, FDIV_H, Ext>; +} +let Predicates = [HasStdExtZfh] in { def : Pat<(f16 (any_fsqrt FPR16:$rs1)), (FSQRT_H FPR16:$rs1, FRM_DYN)>; def : Pat<(f16 (fneg FPR16:$rs1)), (FSGNJN_H $rs1, $rs1)>; def : Pat<(f16 (fabs FPR16:$rs1)), (FSGNJX_H $rs1, $rs1)>; -def : Pat<(riscv_fpclass (f16 FPR16:$rs1)), (FCLASS_H $rs1)>; +def : Pat<(riscv_fclass (f16 FPR16:$rs1)), (FCLASS_H $rs1)>; def : PatFprFpr<fcopysign, FSGNJ_H, FPR16, f16>; def : Pat<(f16 (fcopysign FPR16:$rs1, (f16 (fneg FPR16:$rs2)))), (FSGNJN_H $rs1, $rs2)>; @@ -304,17 +306,12 @@ let Predicates = [HasStdExtZhinx] in { /// Float arithmetic operations -def : PatFprFprDynFrm<any_fadd, FADD_H_INX, FPR16INX, f16>; -def : PatFprFprDynFrm<any_fsub, FSUB_H_INX, FPR16INX, f16>; -def : PatFprFprDynFrm<any_fmul, FMUL_H_INX, FPR16INX, f16>; -def : PatFprFprDynFrm<any_fdiv, FDIV_H_INX, FPR16INX, f16>; - def : Pat<(any_fsqrt FPR16INX:$rs1), (FSQRT_H_INX FPR16INX:$rs1, FRM_DYN)>; def : Pat<(fneg FPR16INX:$rs1), (FSGNJN_H_INX $rs1, $rs1)>; def : Pat<(fabs FPR16INX:$rs1), (FSGNJX_H_INX $rs1, $rs1)>; -def : Pat<(riscv_fpclass FPR16INX:$rs1), (FCLASS_H_INX $rs1)>; +def : Pat<(riscv_fclass FPR16INX:$rs1), (FCLASS_H_INX $rs1)>; def : PatFprFpr<fcopysign, FSGNJ_H_INX, FPR16INX, f16>; def : Pat<(fcopysign FPR16INX:$rs1, (fneg FPR16INX:$rs2)), (FSGNJN_H_INX $rs1, $rs2)>; @@ -358,12 +355,12 @@ foreach Ext = ZfhExts in { // Match non-signaling FEQ_D foreach Ext = ZfhExts in { - defm : PatSetCC_m<any_fsetcc, SETEQ, FEQ_H, Ext, f16>; - defm : PatSetCC_m<any_fsetcc, SETOEQ, FEQ_H, Ext, f16>; - defm : PatSetCC_m<strict_fsetcc, SETLT, PseudoQuietFLT_H, Ext, f16>; - defm : PatSetCC_m<strict_fsetcc, SETOLT, PseudoQuietFLT_H, Ext, f16>; - defm : PatSetCC_m<strict_fsetcc, SETLE, PseudoQuietFLE_H, Ext, f16>; - defm : PatSetCC_m<strict_fsetcc, SETOLE, PseudoQuietFLE_H, Ext, f16>; + defm : PatSetCC_m<any_fsetcc, SETEQ, FEQ_H, Ext>; + defm : PatSetCC_m<any_fsetcc, SETOEQ, FEQ_H, Ext>; + defm : PatSetCC_m<strict_fsetcc, SETLT, PseudoQuietFLT_H, Ext>; + defm : PatSetCC_m<strict_fsetcc, SETOLT, PseudoQuietFLT_H, Ext>; + defm : PatSetCC_m<strict_fsetcc, SETLE, PseudoQuietFLE_H, Ext>; + defm : PatSetCC_m<strict_fsetcc, SETOLE, PseudoQuietFLE_H, Ext>; } let Predicates = [HasStdExtZfh] in { @@ -397,10 +394,10 @@ def : Pat<(XLenVT (strict_fsetccs FPR16INX:$rs1, FPR16INX:$rs1, SETOEQ)), } // Predicates = [HasStdExtZhinx] foreach Ext = ZfhExts in { - defm : PatSetCC_m<any_fsetccs, SETLT, FLT_H, Ext, f16>; - defm : PatSetCC_m<any_fsetccs, SETOLT, FLT_H, Ext, f16>; - defm : PatSetCC_m<any_fsetccs, SETLE, FLE_H, Ext, f16>; - defm : PatSetCC_m<any_fsetccs, SETOLE, FLE_H, Ext, f16>; + defm : PatSetCC_m<any_fsetccs, SETLT, FLT_H, Ext>; + defm : PatSetCC_m<any_fsetccs, SETOLT, FLT_H, Ext>; + defm : PatSetCC_m<any_fsetccs, SETLE, FLE_H, Ext>; + defm : PatSetCC_m<any_fsetccs, SETOLE, FLE_H, Ext>; } let Predicates = [HasStdExtZfh] in { @@ -425,11 +422,13 @@ def : StPat<store, FSH, FPR16, f16>; let Predicates = [HasStdExtZhinxOrZhinxmin] in { /// Loads -def : Pat<(f16 (load GPR:$rs1)), (COPY_TO_REGCLASS (LH GPR:$rs1, 0), GPRF16)>; +def : Pat<(f16 (load (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm12))), + (COPY_TO_REGCLASS (LH GPR:$rs1, simm12:$imm12), GPRF16)>; /// Stores -def : Pat<(store (f16 FPR16INX:$rs2), GPR:$rs1), - (SH (COPY_TO_REGCLASS FPR16INX:$rs2, GPR), GPR:$rs1, 0)>; +def : Pat<(store (f16 FPR16INX:$rs2), + (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm12)), + (SH (COPY_TO_REGCLASS FPR16INX:$rs2, GPR), GPR:$rs1, simm12:$imm12)>; } // Predicates = [HasStdExtZhinxOrZhinxmin] let Predicates = [HasStdExtZfhOrZfhmin] in { @@ -437,14 +436,14 @@ let Predicates = [HasStdExtZfhOrZfhmin] in { // f32 -> f16, f16 -> f32 def : Pat<(f16 (any_fpround FPR32:$rs1)), (FCVT_H_S FPR32:$rs1, FRM_DYN)>; -def : Pat<(any_fpextend (f16 FPR16:$rs1)), (FCVT_S_H FPR16:$rs1)>; +def : Pat<(any_fpextend (f16 FPR16:$rs1)), (FCVT_S_H FPR16:$rs1, FRM_RNE)>; // Moves (no conversion) def : Pat<(f16 (riscv_fmv_h_x GPR:$src)), (FMV_H_X GPR:$src)>; def : Pat<(riscv_fmv_x_anyexth (f16 FPR16:$src)), (FMV_X_H FPR16:$src)>; def : Pat<(riscv_fmv_x_signexth (f16 FPR16:$src)), (FMV_X_H FPR16:$src)>; -def : Pat<(fcopysign FPR32:$rs1, (f16 FPR16:$rs2)), (FSGNJ_S $rs1, (FCVT_S_H $rs2))>; +def : Pat<(fcopysign FPR32:$rs1, (f16 FPR16:$rs2)), (FSGNJ_S $rs1, (FCVT_S_H $rs2, FRM_RNE))>; } // Predicates = [HasStdExtZfhOrZfhmin] let Predicates = [HasStdExtZhinxOrZhinxmin] in { @@ -452,17 +451,17 @@ let Predicates = [HasStdExtZhinxOrZhinxmin] in { // f32 -> f16, f16 -> f32 def : Pat<(any_fpround FPR32INX:$rs1), (FCVT_H_S_INX FPR32INX:$rs1, FRM_DYN)>; -def : Pat<(any_fpextend FPR16INX:$rs1), (FCVT_S_H_INX FPR16INX:$rs1)>; +def : Pat<(any_fpextend FPR16INX:$rs1), (FCVT_S_H_INX FPR16INX:$rs1, FRM_RNE)>; // Moves (no conversion) def : Pat<(f16 (riscv_fmv_h_x GPR:$src)), (COPY_TO_REGCLASS GPR:$src, GPR)>; def : Pat<(riscv_fmv_x_anyexth FPR16INX:$src), (COPY_TO_REGCLASS FPR16INX:$src, GPR)>; def : Pat<(riscv_fmv_x_signexth FPR16INX:$src), (COPY_TO_REGCLASS FPR16INX:$src, GPR)>; -def : Pat<(fcopysign FPR32INX:$rs1, FPR16INX:$rs2), (FSGNJ_S_INX $rs1, (FCVT_S_H_INX $rs2))>; +def : Pat<(fcopysign FPR32INX:$rs1, FPR16INX:$rs2), (FSGNJ_S_INX $rs1, (FCVT_S_H_INX $rs2, FRM_RNE))>; } // Predicates = [HasStdExtZhinxOrZhinxmin] -let Predicates = [HasStdExtZfh, IsRV32] in { +let Predicates = [HasStdExtZfh] in { // half->[u]int. Round-to-zero must be used. def : Pat<(i32 (any_fp_to_sint (f16 FPR16:$rs1))), (FCVT_W_H $rs1, 0b001)>; def : Pat<(i32 (any_fp_to_uint (f16 FPR16:$rs1))), (FCVT_WU_H $rs1, 0b001)>; @@ -480,9 +479,9 @@ def : Pat<(i32 (any_lround (f16 FPR16:$rs1))), (FCVT_W_H $rs1, FRM_RMM)>; // [u]int->half. Match GCC and default to using dynamic rounding mode. def : Pat<(f16 (any_sint_to_fp (i32 GPR:$rs1))), (FCVT_H_W $rs1, FRM_DYN)>; def : Pat<(f16 (any_uint_to_fp (i32 GPR:$rs1))), (FCVT_H_WU $rs1, FRM_DYN)>; -} // Predicates = [HasStdExtZfh, IsRV32] +} // Predicates = [HasStdExtZfh] -let Predicates = [HasStdExtZhinx, IsRV32] in { +let Predicates = [HasStdExtZhinx] in { // half->[u]int. Round-to-zero must be used. def : Pat<(i32 (any_fp_to_sint FPR16INX:$rs1)), (FCVT_W_H_INX $rs1, 0b001)>; def : Pat<(i32 (any_fp_to_uint FPR16INX:$rs1)), (FCVT_WU_H_INX $rs1, 0b001)>; @@ -500,7 +499,7 @@ def : Pat<(i32 (any_lround FPR16INX:$rs1)), (FCVT_W_H_INX $rs1, FRM_RMM)>; // [u]int->half. Match GCC and default to using dynamic rounding mode. def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_H_W_INX $rs1, FRM_DYN)>; def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_H_WU_INX $rs1, FRM_DYN)>; -} // Predicates = [HasStdExtZhinx, IsRV32] +} // Predicates = [HasStdExtZhinx] let Predicates = [HasStdExtZfh, IsRV64] in { // Use target specific isd nodes to help us remember the result is sign @@ -566,82 +565,82 @@ let Predicates = [HasStdExtZfhOrZfhmin, HasStdExtD] in { /// Float conversion operations // f64 -> f16, f16 -> f64 def : Pat<(f16 (any_fpround FPR64:$rs1)), (FCVT_H_D FPR64:$rs1, FRM_DYN)>; -def : Pat<(any_fpextend (f16 FPR16:$rs1)), (FCVT_D_H FPR16:$rs1)>; +def : Pat<(any_fpextend (f16 FPR16:$rs1)), (FCVT_D_H FPR16:$rs1, FRM_RNE)>; /// Float arithmetic operations def : Pat<(f16 (fcopysign FPR16:$rs1, FPR64:$rs2)), (FSGNJ_H $rs1, (FCVT_H_D $rs2, FRM_DYN))>; -def : Pat<(fcopysign FPR64:$rs1, (f16 FPR16:$rs2)), (FSGNJ_D $rs1, (FCVT_D_H $rs2))>; +def : Pat<(fcopysign FPR64:$rs1, (f16 FPR16:$rs2)), (FSGNJ_D $rs1, (FCVT_D_H $rs2, FRM_RNE))>; } // Predicates = [HasStdExtZfhOrZfhmin, HasStdExtD] let Predicates = [HasStdExtZhinxOrZhinxmin, HasStdExtZdinx, IsRV32] in { /// Float conversion operations // f64 -> f16, f16 -> f64 def : Pat<(any_fpround FPR64IN32X:$rs1), (FCVT_H_D_IN32X FPR64IN32X:$rs1, FRM_DYN)>; -def : Pat<(any_fpextend FPR16INX:$rs1), (FCVT_D_H_IN32X FPR16INX:$rs1)>; +def : Pat<(any_fpextend FPR16INX:$rs1), (FCVT_D_H_IN32X FPR16INX:$rs1, FRM_RNE)>; /// Float arithmetic operations def : Pat<(fcopysign FPR16INX:$rs1, FPR64IN32X:$rs2), (FSGNJ_H_INX $rs1, (FCVT_H_D_IN32X $rs2, 0b111))>; -def : Pat<(fcopysign FPR64IN32X:$rs1, FPR16INX:$rs2), (FSGNJ_D_IN32X $rs1, (FCVT_D_H_IN32X $rs2))>; +def : Pat<(fcopysign FPR64IN32X:$rs1, FPR16INX:$rs2), (FSGNJ_D_IN32X $rs1, (FCVT_D_H_IN32X $rs2, FRM_RNE))>; } // Predicates = [HasStdExtZhinxOrZhinxmin, HasStdExtZdinx, IsRV32] let Predicates = [HasStdExtZhinxOrZhinxmin, HasStdExtZdinx, IsRV64] in { /// Float conversion operations // f64 -> f16, f16 -> f64 def : Pat<(any_fpround FPR64INX:$rs1), (FCVT_H_D_INX FPR64INX:$rs1, FRM_DYN)>; -def : Pat<(any_fpextend FPR16INX:$rs1), (FCVT_D_H_INX FPR16INX:$rs1)>; +def : Pat<(any_fpextend FPR16INX:$rs1), (FCVT_D_H_INX FPR16INX:$rs1, FRM_RNE)>; /// Float arithmetic operations def : Pat<(fcopysign FPR16INX:$rs1, FPR64INX:$rs2), (FSGNJ_H_INX $rs1, (FCVT_H_D_INX $rs2, 0b111))>; -def : Pat<(fcopysign FPR64INX:$rs1, FPR16INX:$rs2), (FSGNJ_D_INX $rs1, (FCVT_D_H_INX $rs2))>; +def : Pat<(fcopysign FPR64INX:$rs1, FPR16INX:$rs2), (FSGNJ_D_INX $rs1, (FCVT_D_H_INX $rs2, FRM_RNE))>; } // Predicates = [HasStdExtZhinxOrZhinxmin, HasStdExtZdinx, IsRV64] -let Predicates = [HasStdExtZfhmin, NoStdExtZfh, IsRV32] in { +let Predicates = [HasStdExtZfhmin, NoStdExtZfh] in { // half->[u]int. Round-to-zero must be used. -def : Pat<(i32 (any_fp_to_sint (f16 FPR16:$rs1))), (FCVT_W_S (FCVT_S_H $rs1), FRM_RTZ)>; -def : Pat<(i32 (any_fp_to_uint (f16 FPR16:$rs1))), (FCVT_WU_S (FCVT_S_H $rs1), FRM_RTZ)>; +def : Pat<(i32 (any_fp_to_sint (f16 FPR16:$rs1))), (FCVT_W_S (FCVT_S_H $rs1, FRM_RNE), FRM_RTZ)>; +def : Pat<(i32 (any_fp_to_uint (f16 FPR16:$rs1))), (FCVT_WU_S (FCVT_S_H $rs1, FRM_RNE), FRM_RTZ)>; // half->int32 with current rounding mode. -def : Pat<(i32 (any_lrint (f16 FPR16:$rs1))), (FCVT_W_S (FCVT_S_H $rs1), FRM_DYN)>; +def : Pat<(i32 (any_lrint (f16 FPR16:$rs1))), (FCVT_W_S (FCVT_S_H $rs1, FRM_RNE), FRM_DYN)>; // half->int32 rounded to nearest with ties rounded away from zero. -def : Pat<(i32 (any_lround (f16 FPR16:$rs1))), (FCVT_W_S (FCVT_S_H $rs1), FRM_RMM)>; +def : Pat<(i32 (any_lround (f16 FPR16:$rs1))), (FCVT_W_S (FCVT_S_H $rs1, FRM_RNE), FRM_RMM)>; // [u]int->half. Match GCC and default to using dynamic rounding mode. def : Pat<(f16 (any_sint_to_fp (i32 GPR:$rs1))), (FCVT_H_S (FCVT_S_W $rs1, FRM_DYN), FRM_DYN)>; def : Pat<(f16 (any_uint_to_fp (i32 GPR:$rs1))), (FCVT_H_S (FCVT_S_WU $rs1, FRM_DYN), FRM_DYN)>; -} // Predicates = [HasStdExtZfhmin, NoStdExtZfh, IsRV32] +} // Predicates = [HasStdExtZfhmin, NoStdExtZfh] -let Predicates = [HasStdExtZhinxmin, NoStdExtZhinx, IsRV32] in { +let Predicates = [HasStdExtZhinxmin, NoStdExtZhinx] in { // half->[u]int. Round-to-zero must be used. -def : Pat<(i32 (any_fp_to_sint FPR16INX:$rs1)), (FCVT_W_S_INX (FCVT_S_H_INX $rs1), FRM_RTZ)>; -def : Pat<(i32 (any_fp_to_uint FPR16INX:$rs1)), (FCVT_WU_S_INX (FCVT_S_H_INX $rs1), FRM_RTZ)>; +def : Pat<(i32 (any_fp_to_sint FPR16INX:$rs1)), (FCVT_W_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_RTZ)>; +def : Pat<(i32 (any_fp_to_uint FPR16INX:$rs1)), (FCVT_WU_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_RTZ)>; // half->int32 with current rounding mode. -def : Pat<(i32 (any_lrint FPR16INX:$rs1)), (FCVT_W_S_INX (FCVT_S_H_INX $rs1), FRM_DYN)>; +def : Pat<(i32 (any_lrint FPR16INX:$rs1)), (FCVT_W_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_DYN)>; // half->int32 rounded to nearest with ties rounded away from zero. -def : Pat<(i32 (any_lround FPR16INX:$rs1)), (FCVT_W_S_INX (FCVT_S_H_INX $rs1), FRM_RMM)>; +def : Pat<(i32 (any_lround FPR16INX:$rs1)), (FCVT_W_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_RMM)>; // [u]int->half. Match GCC and default to using dynamic rounding mode. def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_H_S_INX (FCVT_S_W_INX $rs1, FRM_DYN), FRM_DYN)>; def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_H_S_INX (FCVT_S_WU_INX $rs1, FRM_DYN), FRM_DYN)>; -} // Predicates = [HasStdExtZhinxmin, NoStdExtZhinx, IsRV32] +} // Predicates = [HasStdExtZhinxmin, NoStdExtZhinx] let Predicates = [HasStdExtZfhmin, NoStdExtZfh, IsRV64] in { // half->[u]int64. Round-to-zero must be used. -def : Pat<(i64 (any_fp_to_sint (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1), FRM_RTZ)>; -def : Pat<(i64 (any_fp_to_uint (f16 FPR16:$rs1))), (FCVT_LU_S (FCVT_S_H $rs1), FRM_RTZ)>; +def : Pat<(i64 (any_fp_to_sint (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1, FRM_RNE), FRM_RTZ)>; +def : Pat<(i64 (any_fp_to_uint (f16 FPR16:$rs1))), (FCVT_LU_S (FCVT_S_H $rs1, FRM_RNE), FRM_RTZ)>; // half->int64 with current rounding mode. -def : Pat<(i64 (any_lrint (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1), FRM_DYN)>; -def : Pat<(i64 (any_llrint (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1), FRM_DYN)>; +def : Pat<(i64 (any_lrint (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1, FRM_RNE), FRM_DYN)>; +def : Pat<(i64 (any_llrint (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1, FRM_RNE), FRM_DYN)>; // half->int64 rounded to nearest with ties rounded away from zero. -def : Pat<(i64 (any_lround (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1), FRM_RMM)>; -def : Pat<(i64 (any_llround (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1), FRM_RMM)>; +def : Pat<(i64 (any_lround (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1, FRM_RNE), FRM_RMM)>; +def : Pat<(i64 (any_llround (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1, FRM_RNE), FRM_RMM)>; // [u]int->fp. Match GCC and default to using dynamic rounding mode. def : Pat<(f16 (any_sint_to_fp (i64 GPR:$rs1))), (FCVT_H_S (FCVT_S_L $rs1, FRM_DYN), FRM_DYN)>; @@ -650,16 +649,16 @@ def : Pat<(f16 (any_uint_to_fp (i64 GPR:$rs1))), (FCVT_H_S (FCVT_S_LU $rs1, FRM_ let Predicates = [HasStdExtZhinxmin, NoStdExtZhinx, IsRV64] in { // half->[u]int64. Round-to-zero must be used. -def : Pat<(i64 (any_fp_to_sint FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1), FRM_RTZ)>; -def : Pat<(i64 (any_fp_to_uint FPR16INX:$rs1)), (FCVT_LU_S_INX (FCVT_S_H_INX $rs1), FRM_RTZ)>; +def : Pat<(i64 (any_fp_to_sint FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_RTZ)>; +def : Pat<(i64 (any_fp_to_uint FPR16INX:$rs1)), (FCVT_LU_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_RTZ)>; // half->int64 with current rounding mode. -def : Pat<(i64 (any_lrint FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1), FRM_DYN)>; -def : Pat<(i64 (any_llrint FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1), FRM_DYN)>; +def : Pat<(i64 (any_lrint FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_DYN)>; +def : Pat<(i64 (any_llrint FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_DYN)>; // half->int64 rounded to nearest with ties rounded away from zero. -def : Pat<(i64 (any_lround FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1), FRM_RMM)>; -def : Pat<(i64 (any_llround FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1), FRM_RMM)>; +def : Pat<(i64 (any_lround FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_RMM)>; +def : Pat<(i64 (any_llround FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_RMM)>; // [u]int->fp. Match GCC and default to using dynamic rounding mode. def : Pat<(any_sint_to_fp (i64 GPR:$rs1)), (FCVT_H_S_INX (FCVT_S_L_INX $rs1, FRM_DYN), FRM_DYN)>; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td index 509d1cfcd874..56b68e324de2 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td @@ -16,7 +16,7 @@ //===----------------------------------------------------------------------===// // A 12-bit signed immediate where the least significant five bits are zero. -def simm12_lsb00000 : Operand<XLenVT>, +def simm12_lsb00000 : RISCVOp, ImmLeaf<XLenVT, [{return isShiftedInt<7, 5>(Imm);}]> { let ParserMatchClass = SImmAsmOperand<12, "Lsb00000">; let EncoderMethod = "getImmOpValue"; @@ -28,7 +28,6 @@ def simm12_lsb00000 : Operand<XLenVT>, return MCOp.isBareSymbolRef(); }]; let OperandType = "OPERAND_SIMM12_LSB00000"; - let OperandNamespace = "RISCVOp"; } //===----------------------------------------------------------------------===// @@ -74,12 +73,16 @@ def PREFETCH_W : Prefetch_ri<0b00011, "prefetch.w">, Sched<[]>; // Patterns //===----------------------------------------------------------------------===// +def AddrRegImmLsb00000 : ComplexPattern<iPTR, 2, "SelectAddrRegImmLsb00000">; + let Predicates = [HasStdExtZicbop] in { - // FIXME: Match address with offset - def : Pat<(prefetch GPR:$rs1, imm, imm, (XLenVT 0)), - (PREFETCH_I GPR:$rs1, 0)>; - def : Pat<(prefetch GPR:$rs1, (XLenVT 0), imm, (XLenVT 1)), - (PREFETCH_R GPR:$rs1, 0)>; - def : Pat<(prefetch GPR:$rs1, (XLenVT 1), imm, (XLenVT 1)), - (PREFETCH_W GPR:$rs1, 0)>; + def : Pat<(prefetch (AddrRegImmLsb00000 (XLenVT GPR:$rs1), simm12_lsb00000:$imm12), + timm, timm, (i32 0)), + (PREFETCH_I GPR:$rs1, simm12_lsb00000:$imm12)>; + def : Pat<(prefetch (AddrRegImmLsb00000 (XLenVT GPR:$rs1), simm12_lsb00000:$imm12), + (i32 0), timm, (i32 1)), + (PREFETCH_R GPR:$rs1, simm12_lsb00000:$imm12)>; + def : Pat<(prefetch (AddrRegImmLsb00000 (XLenVT GPR:$rs1), simm12_lsb00000:$imm12), + (i32 1), timm, (i32 1)), + (PREFETCH_W GPR:$rs1, simm12_lsb00000:$imm12)>; } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicond.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicond.td index ab0b93d62af5..0790a941823b 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicond.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicond.td @@ -40,4 +40,13 @@ def : Pat<(XLenVT (riscv_czero_eqz GPR:$rs1, GPR:$rc)), (CZERO_EQZ GPR:$rs1, GPR:$rc)>; def : Pat<(XLenVT (riscv_czero_nez GPR:$rs1, GPR:$rc)), (CZERO_NEZ GPR:$rs1, GPR:$rc)>; + +def : Pat<(XLenVT (riscv_czero_eqz GPR:$rs1, (riscv_setne (XLenVT GPR:$rc)))), + (CZERO_EQZ GPR:$rs1, GPR:$rc)>; +def : Pat<(XLenVT (riscv_czero_eqz GPR:$rs1, (riscv_seteq (XLenVT GPR:$rc)))), + (CZERO_NEZ GPR:$rs1, GPR:$rc)>; +def : Pat<(XLenVT (riscv_czero_nez GPR:$rs1, (riscv_setne (XLenVT GPR:$rc)))), + (CZERO_NEZ GPR:$rs1, GPR:$rc)>; +def : Pat<(XLenVT (riscv_czero_nez GPR:$rs1, (riscv_seteq (XLenVT GPR:$rc)))), + (CZERO_EQZ GPR:$rs1, GPR:$rc)>; } // Predicates = [HasStdExtZicond] diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td index b8c0606034c5..3ec63b1b6adb 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td @@ -36,30 +36,26 @@ def RnumArg : AsmOperandClass { let DiagnosticType = "InvalidRnumArg"; } -def rnum : Operand<i32>, TImmLeaf<i32, [{return (Imm >= 0 && Imm <= 10);}]> { +def rnum : RISCVOp<i32>, TImmLeaf<i32, [{return (Imm >= 0 && Imm <= 10);}]> { let ParserMatchClass = RnumArg; let EncoderMethod = "getImmOpValue"; let DecoderMethod = "decodeUImmOperand<4>"; let OperandType = "OPERAND_RVKRNUM"; - let OperandNamespace = "RISCVOp"; } -def byteselect : Operand<i32>, TImmLeaf<i32, [{return isUInt<2>(Imm);}]> { +def byteselect : RISCVOp<i32>, TImmLeaf<i32, [{return isUInt<2>(Imm);}]> { let ParserMatchClass = UImmAsmOperand<2>; let DecoderMethod = "decodeUImmOperand<2>"; let OperandType = "OPERAND_UIMM2"; - let OperandNamespace = "RISCVOp"; } //===----------------------------------------------------------------------===// // Instruction class templates //===----------------------------------------------------------------------===// let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -class RVKUnary<bits<12> imm12_in, bits<3> funct3, string opcodestr> - : RVInstI<funct3, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1), - opcodestr, "$rd, $rs1">{ - let imm12 = imm12_in; -} +class RVKUnary<bits<12> imm12, bits<3> funct3, string opcodestr> + : RVInstIUnary<imm12, funct3, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1), + opcodestr, "$rd, $rs1">; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class RVKByteSelect<bits<5> funct5, string opcodestr> @@ -72,12 +68,12 @@ class RVKByteSelect<bits<5> funct5, string opcodestr> let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class RVKUnary_rnum<bits<7> funct7, bits<3> funct3, string opcodestr> - : RVInstI<funct3, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1, rnum:$rnum), - opcodestr, "$rd, $rs1, $rnum">{ - bits<4> rnum; - let Inst{31-25} = funct7; - let Inst{24} = 1; - let Inst{23-20} = rnum; + : RVInstIBase<funct3, OPC_OP_IMM, (outs GPR:$rd), + (ins GPR:$rs1, rnum:$rnum), opcodestr, "$rd, $rs1, $rnum"> { + bits<4> rnum; + let Inst{31-25} = funct7; + let Inst{24} = 0b1; + let Inst{23-20} = rnum; } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td index 046074d848f5..1b1f3b9b16e4 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td @@ -25,7 +25,8 @@ let Uses = [FRM] in defm VFNCVTBF16_F_F_W : VNCVTF_FV_VS2<"vfncvtbf16.f.f.w", 0b010010, 0b11101>; } -let Predicates = [HasStdExtZvfbfwma], Constraints = "@earlyclobber $vd", +let Predicates = [HasStdExtZvfbfwma], + Constraints = "@earlyclobber $vd_wb, $vd = $vd_wb", RVVConstraint = WidenV, Uses = [FRM], mayRaiseFPException = true in { defm VFWMACCBF16_V : VWMAC_FV_V_F<"vfwmaccbf16", 0b111011>; } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td index 13c98ce92d14..1ffa78a28d09 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td @@ -15,46 +15,16 @@ // Operand and SDNode transformation definitions. //===----------------------------------------------------------------------===// -def RnumArg_0_7 : AsmOperandClass { - let Name = "RnumArg_0_7"; - let RenderMethod = "addImmOperands"; - let DiagnosticType = "InvalidRnumArg_0_7"; -} - -def RnumArg_1_10 : AsmOperandClass { - let Name = "RnumArg_1_10"; - let RenderMethod = "addImmOperands"; - let DiagnosticType = "InvalidRnumArg_1_10"; -} - -def RnumArg_2_14 : AsmOperandClass { - let Name = "RnumArg_2_14"; - let RenderMethod = "addImmOperands"; - let DiagnosticType = "InvalidRnumArg_2_14"; -} - -def rnum_0_7 : Operand<XLenVT>, ImmLeaf<XLenVT, - [{return (0 <= Imm && Imm <= 7);}]> { - let ParserMatchClass = RnumArg_0_7; +def tuimm5 : Operand<XLenVT>, TImmLeaf<XLenVT, [{return isUInt<5>(Imm);}]> { + let ParserMatchClass = UImmAsmOperand<5>; + let EncoderMethod = "getUImmOpValue"; let DecoderMethod = "decodeUImmOperand<5>"; - let OperandType = "OPERAND_RVKRNUM_0_7"; - let OperandNamespace = "RISCVOp"; -} - -def rnum_1_10 : Operand<XLenVT>, ImmLeaf<XLenVT, - [{return (1 <= Imm && Imm <= 10);}]> { - let ParserMatchClass = RnumArg_1_10; - let DecoderMethod = "decodeUImmOperand<5>"; - let OperandType = "OPERAND_RVKRNUM_1_10"; - let OperandNamespace = "RISCVOp"; -} - -def rnum_2_14 : Operand<XLenVT>, ImmLeaf<XLenVT, - [{return (2 <= Imm && Imm <= 14);}]> { - let ParserMatchClass = RnumArg_2_14; - let DecoderMethod = "decodeUImmOperand<5>"; - let OperandType = "OPERAND_RVKRNUM_2_14"; - let OperandNamespace = "RISCVOp"; + let MCOperandPredicate = [{ + int64_t UImm; + if (MCOp.evaluateAsConstantImm(UImm)) + return isUInt<5>(UImm); + return MCOp.isBareSymbolRef(); + }]; } //===----------------------------------------------------------------------===// @@ -140,15 +110,10 @@ class VAESKF_MV_I<bits<6> funct6, string opcodestr, Operand optype> //===----------------------------------------------------------------------===// let Predicates = [HasStdExtZvbb] in { - defm VANDN_V : VALU_IV_V_X<"vandn", 0b000001>; - def VBREV8_V : VALUVs2<0b010010, 0b01000, OPMVV, "vbrev8.v">; def VBREV_V : VALUVs2<0b010010, 0b01010, OPMVV, "vbrev.v">; def VCLZ_V : VALUVs2<0b010010, 0b01100, OPMVV, "vclz.v">; def VCPOP_V : VALUVs2<0b010010, 0b01110, OPMVV, "vcpop.v">; def VCTZ_V : VALUVs2<0b010010, 0b01101, OPMVV, "vctz.v">; - def VREV8_V : VALUVs2<0b010010, 0b01001, OPMVV, "vrev8.v">; - defm VROL_V : VALU_IV_V_X<"vrol", 0b010101>; - defm VROR_V : VROR_IV_V_X_I<"vror", 0b010100>; let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in defm VWSLL_V : VSHT_IV_V_X_I<"vwsll", 0b110101>; } // Predicates = [HasStdExtZvbb] @@ -158,16 +123,24 @@ let Predicates = [HasStdExtZvbc] in { defm VCLMULH_V : VCLMUL_MV_V_X<"vclmulh", 0b001101>; } // Predicates = [HasStdExtZvbc] +let Predicates = [HasStdExtZvkb] in { + defm VANDN_V : VALU_IV_V_X<"vandn", 0b000001>; + def VBREV8_V : VALUVs2<0b010010, 0b01000, OPMVV, "vbrev8.v">; + def VREV8_V : VALUVs2<0b010010, 0b01001, OPMVV, "vrev8.v">; + defm VROL_V : VALU_IV_V_X<"vrol", 0b010101>; + defm VROR_V : VROR_IV_V_X_I<"vror", 0b010100>; +} // Predicates = [HasStdExtZvkb] + let Predicates = [HasStdExtZvkg], RVVConstraint = NoConstraint in { def VGHSH_VV : PALUVVNoVm<0b101100, OPMVV, "vghsh.vv">; def VGMUL_VV : PALUVs2NoVm<0b101000, 0b10001, OPMVV, "vgmul.vv">; } // Predicates = [HasStdExtZvkg] -let Predicates = [HasStdExtZvknha], RVVConstraint = NoConstraint in { +let Predicates = [HasStdExtZvknhaOrZvknhb], RVVConstraint = NoConstraint in { def VSHA2CH_VV : PALUVVNoVm<0b101110, OPMVV, "vsha2ch.vv">; def VSHA2CL_VV : PALUVVNoVm<0b101111, OPMVV, "vsha2cl.vv">; def VSHA2MS_VV : PALUVVNoVm<0b101101, OPMVV, "vsha2ms.vv">; -} // Predicates = [HasStdExtZvknha] +} // Predicates = [HasStdExtZvknhaOrZvknhb] let Predicates = [HasStdExtZvkned], RVVConstraint = NoConstraint in { defm VAESDF : VAES_MV_V_S<0b101000, 0b101001, 0b00001, OPMVV, "vaesdf">; @@ -193,34 +166,254 @@ let Predicates = [HasStdExtZvksh], RVVConstraint = NoConstraint in { // Pseudo instructions //===----------------------------------------------------------------------===// -defm PseudoVANDN : VPseudoVALU_VV_VX; +defvar I32IntegerVectors = !filter(vti, AllIntegerVectors, !eq(vti.SEW, 32)); +defvar I32I64IntegerVectors = !filter(vti, AllIntegerVectors, + !or(!eq(vti.SEW, 32), !eq(vti.SEW, 64))); + +class ZvkI32IntegerVectors<string vd_lmul> { + list<VTypeInfo> vs2_types = !cond(!eq(vd_lmul, "M8") : !filter(vti, I32IntegerVectors, !le(vti.LMul.octuple, 32)), + !eq(vd_lmul, "M4") : !filter(vti, I32IntegerVectors, !le(vti.LMul.octuple, 32)), + !eq(vd_lmul, "M2") : !filter(vti, I32IntegerVectors, !le(vti.LMul.octuple, 16)), + !eq(vd_lmul, "M1") : !filter(vti, I32IntegerVectors, !le(vti.LMul.octuple, 8)), + !eq(vd_lmul, "MF2") : !filter(vti, I32IntegerVectors, !le(vti.LMul.octuple, 4)), + !eq(vd_lmul, "MF4") : !filter(vti, I32IntegerVectors, !le(vti.LMul.octuple, 2)), + !eq(vd_lmul, "MF8") : !filter(vti, I32IntegerVectors, !le(vti.LMul.octuple, 1))); +} + +class ZvkMxSet<string vd_lmul> { + list<LMULInfo> vs2_lmuls = !cond(!eq(vd_lmul, "M8") : [V_MF8, V_MF4, V_MF2, V_M1, V_M2, V_M4], + !eq(vd_lmul, "M4") : [V_MF8, V_MF4, V_MF2, V_M1, V_M2, V_M4], + !eq(vd_lmul, "M2") : [V_MF8, V_MF4, V_MF2, V_M1, V_M2], + !eq(vd_lmul, "M1") : [V_MF8, V_MF4, V_MF2, V_M1], + !eq(vd_lmul, "MF2") : [V_MF8, V_MF4, V_MF2], + !eq(vd_lmul, "MF4") : [V_MF8, V_MF4], + !eq(vd_lmul, "MF8") : [V_MF8]); +} + +class VPseudoUnaryNoMask_Zvk<DAGOperand RetClass, VReg OpClass, string Constraint = ""> : + Pseudo<(outs RetClass:$rd), + (ins RetClass:$merge, OpClass:$rs2, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let Constraints = !interleave([Constraint, "$rd = $merge"], ","); + let HasVLOp = 1; + let HasSEWOp = 1; + let HasVecPolicyOp = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +class VPseudoBinaryNoMask_Zvk<VReg RetClass, + VReg Op1Class, + DAGOperand Op2Class, + string Constraint> : + Pseudo<(outs RetClass:$rd), + (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, + AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let Constraints = !interleave([Constraint, "$rd = $merge"], ","); + let HasVLOp = 1; + let HasSEWOp = 1; + let HasVecPolicyOp = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +multiclass VPseudoBinaryNoMask_Zvk<VReg RetClass, + VReg Op1Class, + DAGOperand Op2Class, + LMULInfo MInfo, + string Constraint = ""> { + let VLMul = MInfo.value in + def "_" # MInfo.MX : VPseudoBinaryNoMask_Zvk<RetClass, Op1Class, Op2Class, + Constraint>; +} + +multiclass VPseudoUnaryV_V_NoMask_Zvk<LMULInfo m, string Constraint = ""> { + let VLMul = m.value in { + def "_VV_" # m.MX : VPseudoUnaryNoMask_Zvk<m.vrclass, m.vrclass, Constraint>; + } +} + +multiclass VPseudoUnaryV_S_NoMask_Zvk<LMULInfo m, string Constraint = ""> { + let VLMul = m.value in + foreach vs2_lmul = ZvkMxSet<m.MX>.vs2_lmuls in + def "_VS_" # m.MX # "_" # vs2_lmul.MX : VPseudoUnaryNoMask_Zvk<m.vrclass, vs2_lmul.vrclass, Constraint>; +} + +multiclass VPseudoVALU_V_NoMask_Zvk<string Constraint = ""> { + foreach m = MxListVF4 in { + defvar mx = m.MX; + defvar WriteVIALUV_MX = !cast<SchedWrite>("WriteVIALUV_" # mx); + defvar ReadVIALUV_MX = !cast<SchedRead>("ReadVIALUV_" # mx); + + defm "" : VPseudoUnaryV_V_NoMask_Zvk<m, Constraint>, + Sched<[WriteVIALUV_MX, ReadVIALUV_MX, ReadVIALUV_MX, ReadVMask]>; + } +} + +multiclass VPseudoVALU_S_NoMask_Zvk<string Constraint = ""> { + foreach m = MxListVF4 in { + defvar mx = m.MX; + defvar WriteVIALUV_MX = !cast<SchedWrite>("WriteVIALUV_" # mx); + defvar ReadVIALUV_MX = !cast<SchedRead>("ReadVIALUV_" # mx); + + defm "" : VPseudoUnaryV_S_NoMask_Zvk<m, Constraint>, + Sched<[WriteVIALUV_MX, ReadVIALUV_MX, ReadVIALUV_MX, ReadVMask]>; + } +} + +multiclass VPseudoVALU_V_S_NoMask_Zvk<string Constraint = ""> { + defm "" : VPseudoVALU_V_NoMask_Zvk<Constraint>; + defm "" : VPseudoVALU_S_NoMask_Zvk<Constraint>; +} + +multiclass VPseudoVALU_VV_NoMask_Zvk<string Constraint = ""> { + foreach m = MxListVF4 in { + defvar mx = m.MX; + defvar WriteVIALUV_MX = !cast<SchedWrite>("WriteVIALUV_" # mx); + defvar ReadVIALUV_MX = !cast<SchedRead>("ReadVIALUV_" # mx); + + defm _VV : VPseudoBinaryNoMask_Zvk<m.vrclass, m.vrclass, m.vrclass, m, + Constraint>, + Sched<[WriteVIALUV_MX, ReadVIALUV_MX, ReadVIALUV_MX, ReadVMask]>; + } +} -multiclass VPseudoUnaryV_V { +multiclass VPseudoVALU_VI_NoMask_Zvk<Operand ImmType = simm5, string Constraint = ""> { + foreach m = MxListVF4 in { + defvar mx = m.MX; + defvar WriteVIALUV_MX = !cast<SchedWrite>("WriteVIALUV_" # mx); + defvar ReadVIALUV_MX = !cast<SchedRead>("ReadVIALUV_" # mx); + + defm _VI : VPseudoBinaryNoMask_Zvk<m.vrclass, m.vrclass, ImmType, m, + Constraint>, + Sched<[WriteVIALUV_MX, ReadVIALUV_MX, ReadVIALUV_MX, ReadVMask]>; + } +} + +multiclass VPseudoVALU_VI_NoMaskTU_Zvk<Operand ImmType = uimm5, string Constraint = ""> { + foreach m = MxListVF4 in { + defvar mx = m.MX; + defvar WriteVIALUV_MX = !cast<SchedWrite>("WriteVIALUV_" # mx); + defvar ReadVIALUV_MX = !cast<SchedRead>("ReadVIALUV_" # mx); + + defm _VI : VPseudoBinaryNoMask<m.vrclass, m.vrclass, ImmType, m, + Constraint>, + Sched<[WriteVIALUV_MX, ReadVIALUV_MX, ReadVIALUV_MX, ReadVMask]>; + } +} + +multiclass VPseudoVALU_VV_NoMaskTU_Zvk<string Constraint = ""> { + foreach m = MxListVF4 in { + defvar mx = m.MX; + defvar WriteVIALUV_MX = !cast<SchedWrite>("WriteVIALUV_" # mx); + defvar ReadVIALUV_MX = !cast<SchedRead>("ReadVIALUV_" # mx); + + defm _VV : VPseudoBinaryNoMask<m.vrclass, m.vrclass, m.vrclass, m, + Constraint>, + Sched<[WriteVIALUV_MX, ReadVIALUV_MX, ReadVIALUV_MX, ReadVMask]>; + } +} + +multiclass VPseudoVCLMUL_VV_VX { foreach m = MxList in { - let VLMul = m.value in { - def "_V_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.vrclass>; - def "_V_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, m.vrclass>, - RISCVMaskedPseudo<MaskIdx=2>; - } + defvar mx = m.MX; + defvar WriteVIALUV_MX = !cast<SchedWrite>("WriteVIALUV_" # mx); + defvar WriteVIALUX_MX = !cast<SchedWrite>("WriteVIALUV_" # mx); + defvar ReadVIALUV_MX = !cast<SchedRead>("ReadVIALUV_" # mx); + defvar ReadVIALUX_MX = !cast<SchedRead>("ReadVIALUX_" # mx); + + defm "" : VPseudoBinaryV_VV<m>, + Sched<[WriteVIALUV_MX, ReadVIALUV_MX, ReadVIALUV_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_VX<m>, + Sched<[WriteVIALUX_MX, ReadVIALUV_MX, ReadVIALUX_MX, ReadVMask]>; + } +} + +multiclass VPseudoUnaryV_V<LMULInfo m> { + let VLMul = m.value in { + defvar suffix = "_V_" # m.MX; + def suffix : VPseudoUnaryNoMask<m.vrclass, m.vrclass>; + def suffix # "_MASK" : VPseudoUnaryMask<m.vrclass, m.vrclass>, + RISCVMaskedPseudo<MaskIdx=2>; + } +} + +multiclass VPseudoVALU_V { + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVIALUV_MX = !cast<SchedWrite>("WriteVIALUV_" # mx); + defvar ReadVIALUV_MX = !cast<SchedRead>("ReadVIALUV_" # mx); + + defm "" : VPseudoUnaryV_V<m>, + Sched<[WriteVIALUV_MX, ReadVIALUV_MX, ReadVIALUV_MX, ReadVMask]>; } } -defm PseudoVBREV : VPseudoUnaryV_V; -defm PseudoVREV8 : VPseudoUnaryV_V; -defm PseudoVCLZ : VPseudoUnaryV_V; -defm PseudoVCTZ : VPseudoUnaryV_V; -defm PseudoVCPOP : VPseudoUnaryV_V; +let Predicates = [HasStdExtZvbb] in { + defm PseudoVBREV : VPseudoVALU_V; + defm PseudoVCLZ : VPseudoVALU_V; + defm PseudoVCTZ : VPseudoVALU_V; + defm PseudoVCPOP : VPseudoVALU_V; + defm PseudoVWSLL : VPseudoVWALU_VV_VX_VI<uimm5>; +} // Predicates = [HasStdExtZvbb] + +let Predicates = [HasStdExtZvbc] in { + defm PseudoVCLMUL : VPseudoVCLMUL_VV_VX; + defm PseudoVCLMULH : VPseudoVCLMUL_VV_VX; +} // Predicates = [HasStdExtZvbc] + +let Predicates = [HasStdExtZvkb] in { + defm PseudoVANDN : VPseudoVALU_VV_VX; + defm PseudoVBREV8 : VPseudoVALU_V; + defm PseudoVREV8 : VPseudoVALU_V; + defm PseudoVROL : VPseudoVALU_VV_VX; + defm PseudoVROR : VPseudoVALU_VV_VX_VI<uimm6>; +} // Predicates = [HasStdExtZvkb] + +let Predicates = [HasStdExtZvkg] in { + defm PseudoVGHSH : VPseudoVALU_VV_NoMask_Zvk; + defm PseudoVGMUL : VPseudoVALU_V_NoMask_Zvk; +} // Predicates = [HasStdExtZvkg] + +let Predicates = [HasStdExtZvkned] in { + defm PseudoVAESDF : VPseudoVALU_V_S_NoMask_Zvk; + defm PseudoVAESDM : VPseudoVALU_V_S_NoMask_Zvk; + defm PseudoVAESEF : VPseudoVALU_V_S_NoMask_Zvk; + defm PseudoVAESEM : VPseudoVALU_V_S_NoMask_Zvk; + defm PseudoVAESKF1 : VPseudoVALU_VI_NoMaskTU_Zvk; + defm PseudoVAESKF2 : VPseudoVALU_VI_NoMask_Zvk<uimm5>; + defm PseudoVAESZ : VPseudoVALU_S_NoMask_Zvk; +} // Predicates = [HasStdExtZvkned] + +let Predicates = [HasStdExtZvknhaOrZvknhb] in { + defm PseudoVSHA2CH : VPseudoVALU_VV_NoMask_Zvk; + defm PseudoVSHA2CL : VPseudoVALU_VV_NoMask_Zvk; + defm PseudoVSHA2MS : VPseudoVALU_VV_NoMask_Zvk; +} // Predicates = [HasStdExtZvknhaOrZvknhb] + +let Predicates = [HasStdExtZvksed] in { + defm PseudoVSM4K : VPseudoVALU_VI_NoMaskTU_Zvk; + defm PseudoVSM4R : VPseudoVALU_V_S_NoMask_Zvk; +} // Predicates = [HasStdExtZvksed] -defm PseudoVROL : VPseudoVALU_VV_VX; -defm PseudoVROR : VPseudoVALU_VV_VX_VI<uimm6>; +let Predicates = [HasStdExtZvksh] in { + defm PseudoVSM3C : VPseudoVALU_VI_NoMask_Zvk<uimm5>; + defm PseudoVSM3ME : VPseudoVALU_VV_NoMaskTU_Zvk; +} // Predicates = [HasStdExtZvksh] //===----------------------------------------------------------------------===// // SDNode patterns //===----------------------------------------------------------------------===// -multiclass VPatUnarySDNode_V<SDPatternOperator op, string instruction_name> { +multiclass VPatUnarySDNode_V<SDPatternOperator op, string instruction_name, + Predicate predicate = HasStdExtZvbb> { foreach vti = AllIntegerVectors in { - let Predicates = !listconcat([HasStdExtZvbb], + let Predicates = !listconcat([predicate], GetVTypePredicates<vti>.Predicates) in { def : Pat<(vti.Vector (op (vti.Vector vti.RegClass:$rs1))), (!cast<Instruction>(instruction_name#"_V_"#vti.LMul.MX) @@ -239,7 +432,7 @@ def riscv_vnot : PatFrag<(ops node:$rs1), (xor node:$rs1, (riscv_splat_vector -1))>; foreach vti = AllIntegerVectors in { - let Predicates = !listconcat([HasStdExtZvbb], + let Predicates = !listconcat([HasStdExtZvkb], GetVTypePredicates<vti>.Predicates) in { def : Pat<(vti.Vector (and (riscv_vnot vti.RegClass:$rs1), vti.RegClass:$rs2)), @@ -260,14 +453,27 @@ foreach vti = AllIntegerVectors in { } defm : VPatUnarySDNode_V<bitreverse, "PseudoVBREV">; -defm : VPatUnarySDNode_V<bswap, "PseudoVREV8">; +defm : VPatUnarySDNode_V<bswap, "PseudoVREV8", HasStdExtZvkb>; defm : VPatUnarySDNode_V<ctlz, "PseudoVCLZ">; defm : VPatUnarySDNode_V<cttz, "PseudoVCTZ">; defm : VPatUnarySDNode_V<ctpop, "PseudoVCPOP">; defm : VPatBinarySDNode_VV_VX<rotl, "PseudoVROL">; -def NegImm64 : SDNodeXForm<imm, [{ +// Invert the immediate and mask it to SEW for readability. +def InvRot8Imm : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(0x7 & (64 - N->getZExtValue()), SDLoc(N), + N->getValueType(0)); +}]>; +def InvRot16Imm : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(0xf & (64 - N->getZExtValue()), SDLoc(N), + N->getValueType(0)); +}]>; +def InvRot32Imm : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(0x1f & (64 - N->getZExtValue()), SDLoc(N), + N->getValueType(0)); +}]>; +def InvRot64Imm : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(0x3f & (64 - N->getZExtValue()), SDLoc(N), N->getValueType(0)); }]>; @@ -275,26 +481,56 @@ def NegImm64 : SDNodeXForm<imm, [{ // Although there is no vrol.vi, an immediate rotate left can be achieved by // negating the immediate in vror.vi foreach vti = AllIntegerVectors in { - let Predicates = !listconcat([HasStdExtZvbb], + let Predicates = !listconcat([HasStdExtZvkb], GetVTypePredicates<vti>.Predicates) in { def : Pat<(vti.Vector (rotl vti.RegClass:$rs2, (vti.Vector (SplatPat_uimm6 uimm6:$rs1)))), (!cast<Instruction>("PseudoVROR_VI_"#vti.LMul.MX) (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2, - (NegImm64 uimm6:$rs1), + (!cast<SDNodeXForm>("InvRot" # vti.SEW # "Imm") uimm6:$rs1), vti.AVL, vti.Log2SEW, TA_MA)>; } } defm : VPatBinarySDNode_VV_VX_VI<rotr, "PseudoVROR", uimm6>; +foreach vtiToWti = AllWidenableIntVectors in { + defvar vti = vtiToWti.Vti; + defvar wti = vtiToWti.Wti; + let Predicates = !listconcat([HasStdExtZvbb], + GetVTypePredicates<vti>.Predicates, + GetVTypePredicates<wti>.Predicates) in { + def : Pat<(shl (wti.Vector (zext_oneuse (vti.Vector vti.RegClass:$rs2))), + (wti.Vector (ext_oneuse (vti.Vector vti.RegClass:$rs1)))), + (!cast<Instruction>("PseudoVWSLL_VV_"#vti.LMul.MX) + (wti.Vector (IMPLICIT_DEF)), + vti.RegClass:$rs2, vti.RegClass:$rs1, + vti.AVL, vti.Log2SEW, TA_MA)>; + + def : Pat<(shl (wti.Vector (zext_oneuse (vti.Vector vti.RegClass:$rs2))), + (wti.Vector (Low8BitsSplatPat (XLenVT GPR:$rs1)))), + (!cast<Instruction>("PseudoVWSLL_VX_"#vti.LMul.MX) + (wti.Vector (IMPLICIT_DEF)), + vti.RegClass:$rs2, GPR:$rs1, + vti.AVL, vti.Log2SEW, TA_MA)>; + + def : Pat<(shl (wti.Vector (zext_oneuse (vti.Vector vti.RegClass:$rs2))), + (wti.Vector (SplatPat_uimm5 uimm5:$rs1))), + (!cast<Instruction>("PseudoVWSLL_VI_"#vti.LMul.MX) + (wti.Vector (IMPLICIT_DEF)), + vti.RegClass:$rs2, uimm5:$rs1, + vti.AVL, vti.Log2SEW, TA_MA)>; + } +} + //===----------------------------------------------------------------------===// // VL patterns //===----------------------------------------------------------------------===// -multiclass VPatUnaryVL_V<SDPatternOperator op, string instruction_name> { +multiclass VPatUnaryVL_V<SDPatternOperator op, string instruction_name, + Predicate predicate = HasStdExtZvbb> { foreach vti = AllIntegerVectors in { - let Predicates = !listconcat([HasStdExtZvbb], + let Predicates = !listconcat([predicate], GetVTypePredicates<vti>.Predicates) in { def : Pat<(vti.Vector (op (vti.Vector vti.RegClass:$rs1), (vti.Vector vti.RegClass:$merge), @@ -312,7 +548,7 @@ multiclass VPatUnaryVL_V<SDPatternOperator op, string instruction_name> { } foreach vti = AllIntegerVectors in { - let Predicates = !listconcat([HasStdExtZvbb], + let Predicates = !listconcat([HasStdExtZvkb], GetVTypePredicates<vti>.Predicates) in { def : Pat<(vti.Vector (riscv_and_vl (riscv_xor_vl (vti.Vector vti.RegClass:$rs1), @@ -351,7 +587,339 @@ foreach vti = AllIntegerVectors in { } defm : VPatUnaryVL_V<riscv_bitreverse_vl, "PseudoVBREV">; -defm : VPatUnaryVL_V<riscv_bswap_vl, "PseudoVREV8">; +defm : VPatUnaryVL_V<riscv_bswap_vl, "PseudoVREV8", HasStdExtZvkb>; defm : VPatUnaryVL_V<riscv_ctlz_vl, "PseudoVCLZ">; defm : VPatUnaryVL_V<riscv_cttz_vl, "PseudoVCTZ">; defm : VPatUnaryVL_V<riscv_ctpop_vl, "PseudoVCPOP">; + +defm : VPatBinaryVL_VV_VX<riscv_rotl_vl, "PseudoVROL">; +// Although there is no vrol.vi, an immediate rotate left can be achieved by +// negating the immediate in vror.vi +foreach vti = AllIntegerVectors in { + let Predicates = !listconcat([HasStdExtZvkb], + GetVTypePredicates<vti>.Predicates) in { + def : Pat<(riscv_rotl_vl vti.RegClass:$rs2, + (vti.Vector (SplatPat_uimm6 uimm6:$rs1)), + (vti.Vector vti.RegClass:$merge), + (vti.Mask V0), VLOpFrag), + (!cast<Instruction>("PseudoVROR_VI_"#vti.LMul.MX#"_MASK") + vti.RegClass:$merge, + vti.RegClass:$rs2, + (!cast<SDNodeXForm>("InvRot" # vti.SEW # "Imm") uimm6:$rs1), + (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + } +} +defm : VPatBinaryVL_VV_VX_VI<riscv_rotr_vl, "PseudoVROR", uimm6>; + +foreach vtiToWti = AllWidenableIntVectors in { + defvar vti = vtiToWti.Vti; + defvar wti = vtiToWti.Wti; + let Predicates = !listconcat([HasStdExtZvbb], + GetVTypePredicates<vti>.Predicates, + GetVTypePredicates<wti>.Predicates) in { + def : Pat<(riscv_shl_vl + (wti.Vector (zext_oneuse (vti.Vector vti.RegClass:$rs2))), + (wti.Vector (ext_oneuse (vti.Vector vti.RegClass:$rs1))), + (wti.Vector wti.RegClass:$merge), + (vti.Mask V0), VLOpFrag), + (!cast<Instruction>("PseudoVWSLL_VV_"#vti.LMul.MX#"_MASK") + wti.RegClass:$merge, vti.RegClass:$rs2, vti.RegClass:$rs1, + (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + + def : Pat<(riscv_shl_vl + (wti.Vector (zext_oneuse (vti.Vector vti.RegClass:$rs2))), + (wti.Vector (Low8BitsSplatPat (XLenVT GPR:$rs1))), + (wti.Vector wti.RegClass:$merge), + (vti.Mask V0), VLOpFrag), + (!cast<Instruction>("PseudoVWSLL_VX_"#vti.LMul.MX#"_MASK") + wti.RegClass:$merge, vti.RegClass:$rs2, GPR:$rs1, + (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + + def : Pat<(riscv_shl_vl + (wti.Vector (zext_oneuse (vti.Vector vti.RegClass:$rs2))), + (wti.Vector (SplatPat_uimm5 uimm5:$rs1)), + (wti.Vector wti.RegClass:$merge), + (vti.Mask V0), VLOpFrag), + (!cast<Instruction>("PseudoVWSLL_VI_"#vti.LMul.MX#"_MASK") + wti.RegClass:$merge, vti.RegClass:$rs2, uimm5:$rs1, + (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + + def : Pat<(riscv_vwsll_vl + (vti.Vector vti.RegClass:$rs2), + (vti.Vector vti.RegClass:$rs1), + (wti.Vector wti.RegClass:$merge), + (vti.Mask V0), VLOpFrag), + (!cast<Instruction>("PseudoVWSLL_VV_"#vti.LMul.MX#"_MASK") + wti.RegClass:$merge, vti.RegClass:$rs2, vti.RegClass:$rs1, + (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + + def : Pat<(riscv_vwsll_vl + (vti.Vector vti.RegClass:$rs2), + (vti.Vector (Low8BitsSplatPat (XLenVT GPR:$rs1))), + (wti.Vector wti.RegClass:$merge), + (vti.Mask V0), VLOpFrag), + (!cast<Instruction>("PseudoVWSLL_VX_"#vti.LMul.MX#"_MASK") + wti.RegClass:$merge, vti.RegClass:$rs2, GPR:$rs1, + (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + + def : Pat<(riscv_vwsll_vl + (vti.Vector vti.RegClass:$rs2), + (vti.Vector (SplatPat_uimm5 uimm5:$rs1)), + (wti.Vector wti.RegClass:$merge), + (vti.Mask V0), VLOpFrag), + (!cast<Instruction>("PseudoVWSLL_VI_"#vti.LMul.MX#"_MASK") + wti.RegClass:$merge, vti.RegClass:$rs2, uimm5:$rs1, + (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + } +} + +//===----------------------------------------------------------------------===// +// Codegen patterns +//===----------------------------------------------------------------------===// + +class VPatUnaryNoMask_Zvk<string intrinsic_name, + string inst, + string kind, + ValueType result_type, + ValueType op2_type, + int sew, + LMULInfo vlmul, + VReg result_reg_class, + VReg op2_reg_class> : + Pat<(result_type (!cast<Intrinsic>(intrinsic_name) + (result_type result_reg_class:$merge), + (op2_type op2_reg_class:$rs2), + VLOpFrag, (XLenVT timm:$policy))), + (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX) + (result_type result_reg_class:$merge), + (op2_type op2_reg_class:$rs2), + GPR:$vl, sew, (XLenVT timm:$policy))>; + +class VPatUnaryNoMask_VS_Zvk<string intrinsic_name, + string inst, + string kind, + ValueType result_type, + ValueType op2_type, + int sew, + LMULInfo vlmul, + LMULInfo vs2_lmul, + VReg result_reg_class, + VReg op2_reg_class> : + Pat<(result_type (!cast<Intrinsic>(intrinsic_name) + (result_type result_reg_class:$merge), + (op2_type op2_reg_class:$rs2), + VLOpFrag, (XLenVT timm:$policy))), + (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX#"_"#vs2_lmul.MX) + (result_type result_reg_class:$merge), + (op2_type op2_reg_class:$rs2), + GPR:$vl, sew, (XLenVT timm:$policy))>; + +multiclass VPatUnaryV_V_NoMask_Zvk<string intrinsic, string instruction, + list<VTypeInfo> vtilist> { + foreach vti = vtilist in + def : VPatUnaryNoMask_Zvk<intrinsic # "_vv", instruction, "VV", + vti.Vector, vti.Vector, vti.Log2SEW, + vti.LMul, vti.RegClass, vti.RegClass>; +} + +multiclass VPatUnaryV_S_NoMaskVectorCrypto<string intrinsic, string instruction, + list<VTypeInfo> vtilist> { + foreach vti = vtilist in + foreach vti_vs2 = ZvkI32IntegerVectors<vti.LMul.MX>.vs2_types in + def : VPatUnaryNoMask_VS_Zvk<intrinsic # "_vs", instruction, "VS", + vti.Vector, vti_vs2.Vector, vti.Log2SEW, + vti.LMul, vti_vs2.LMul, vti.RegClass, vti_vs2.RegClass>; +} + +multiclass VPatUnaryV_V_S_NoMask_Zvk<string intrinsic, string instruction, + list<VTypeInfo> vtilist> { + defm : VPatUnaryV_V_NoMask_Zvk<intrinsic, instruction, vtilist>; + defm : VPatUnaryV_S_NoMaskVectorCrypto<intrinsic, instruction, vtilist>; +} + +multiclass VPatBinaryV_VV_NoMask<string intrinsic, string instruction, + list<VTypeInfo> vtilist> { + foreach vti = vtilist in + def : VPatTernaryNoMaskWithPolicy<intrinsic, instruction, "VV", + vti.Vector, vti.Vector, vti.Vector, + vti.Log2SEW, vti.LMul, vti.RegClass, + vti.RegClass, vti.RegClass>; +} + +multiclass VPatBinaryV_VI_NoMask<string intrinsic, string instruction, + list<VTypeInfo> vtilist, Operand imm_type = tuimm5> { + foreach vti = vtilist in + def : VPatTernaryNoMaskWithPolicy<intrinsic, instruction, "VI", + vti.Vector, vti.Vector, XLenVT, + vti.Log2SEW, vti.LMul, vti.RegClass, + vti.RegClass, imm_type>; +} + +multiclass VPatBinaryV_VI_NoMaskTU<string intrinsic, string instruction, + list<VTypeInfo> vtilist, Operand imm_type = tuimm5> { + foreach vti = vtilist in + def : VPatBinaryNoMaskTU<intrinsic, instruction # "_VI_" # vti.LMul.MX, + vti.Vector, vti.Vector, XLenVT, vti.Log2SEW, + vti.RegClass, vti.RegClass, imm_type>; +} + +multiclass VPatBinaryV_VV_NoMaskTU<string intrinsic, string instruction, + list<VTypeInfo> vtilist> { + foreach vti = vtilist in + def : VPatBinaryNoMaskTU<intrinsic, instruction # "_VV_" # vti.LMul.MX, + vti.Vector, vti.Vector, vti.Vector, vti.Log2SEW, + vti.RegClass, vti.RegClass, vti.RegClass>; +} + +multiclass VPatBinaryV_VX_VROTATE<string intrinsic, string instruction, + list<VTypeInfo> vtilist, bit isSEWAware = 0> { + foreach vti = vtilist in { + defvar kind = "V"#vti.ScalarSuffix; + let Predicates = GetVTypePredicates<vti>.Predicates in + defm : VPatBinary<intrinsic, + !if(isSEWAware, + instruction#"_"#kind#"_"#vti.LMul.MX#"_E"#vti.SEW, + instruction#"_"#kind#"_"#vti.LMul.MX), + vti.Vector, vti.Vector, XLenVT, vti.Mask, + vti.Log2SEW, vti.RegClass, + vti.RegClass, vti.ScalarRegClass>; + } +} + +multiclass VPatBinaryV_VI_VROL<string intrinsic, string instruction, + list<VTypeInfo> vtilist, bit isSEWAware = 0> { + foreach vti = vtilist in { + defvar Intr = !cast<Intrinsic>(intrinsic); + defvar Pseudo = !cast<Instruction>( + !if(isSEWAware, instruction#"_VI_"#vti.LMul.MX#"_E"#vti.SEW, + instruction#"_VI_"#vti.LMul.MX)); + let Predicates = GetVTypePredicates<vti>.Predicates in + def : Pat<(vti.Vector (Intr (vti.Vector vti.RegClass:$merge), + (vti.Vector vti.RegClass:$rs2), + (XLenVT uimm6:$rs1), + VLOpFrag)), + (Pseudo (vti.Vector vti.RegClass:$merge), + (vti.Vector vti.RegClass:$rs2), + (InvRot64Imm uimm6:$rs1), + GPR:$vl, vti.Log2SEW, TU_MU)>; + + defvar IntrMask = !cast<Intrinsic>(intrinsic#"_mask"); + defvar PseudoMask = !cast<Instruction>( + !if(isSEWAware, instruction#"_VI_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK", + instruction#"_VI_"#vti.LMul.MX#"_MASK")); + let Predicates = GetVTypePredicates<vti>.Predicates in + def : Pat<(vti.Vector (IntrMask (vti.Vector vti.RegClass:$merge), + (vti.Vector vti.RegClass:$rs2), + (XLenVT uimm6:$rs1), + (vti.Mask V0), + VLOpFrag, (XLenVT timm:$policy))), + (PseudoMask (vti.Vector vti.RegClass:$merge), + (vti.Vector vti.RegClass:$rs2), + (InvRot64Imm uimm6:$rs1), + (vti.Mask V0), + GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>; + } +} + +multiclass VPatBinaryV_VV_VX_VROL<string intrinsic, string instruction, + string instruction2, list<VTypeInfo> vtilist> + : VPatBinaryV_VV<intrinsic, instruction, vtilist>, + VPatBinaryV_VX_VROTATE<intrinsic, instruction, vtilist>, + VPatBinaryV_VI_VROL<intrinsic, instruction2, vtilist>; + +multiclass VPatBinaryV_VV_VX_VI_VROR<string intrinsic, string instruction, + list<VTypeInfo> vtilist, Operand ImmType = uimm6> + : VPatBinaryV_VV<intrinsic, instruction, vtilist>, + VPatBinaryV_VX_VROTATE<intrinsic, instruction, vtilist>, + VPatBinaryV_VI<intrinsic, instruction, vtilist, ImmType>; + +multiclass VPatBinaryW_VI_VWSLL<string intrinsic, string instruction, + list<VTypeInfoToWide> vtilist> { + foreach VtiToWti = vtilist in { + defvar Vti = VtiToWti.Vti; + defvar Wti = VtiToWti.Wti; + defm : VPatBinary<intrinsic, instruction # "_VI_" # Vti.LMul.MX, + Wti.Vector, Vti.Vector, XLenVT, Vti.Mask, + Vti.Log2SEW, Wti.RegClass, + Vti.RegClass, uimm5>; + } +} + +multiclass VPatBinaryW_VX_VWSLL<string intrinsic, string instruction, + list<VTypeInfoToWide> vtilist> { + foreach VtiToWti = vtilist in { + defvar Vti = VtiToWti.Vti; + defvar Wti = VtiToWti.Wti; + defvar kind = "V"#Vti.ScalarSuffix; + let Predicates = !listconcat(GetVTypePredicates<Vti>.Predicates, + GetVTypePredicates<Wti>.Predicates) in + defm : VPatBinary<intrinsic, instruction#"_"#kind#"_"#Vti.LMul.MX, + Wti.Vector, Vti.Vector, XLenVT, Vti.Mask, + Vti.Log2SEW, Wti.RegClass, + Vti.RegClass, Vti.ScalarRegClass>; + } +} + +multiclass VPatBinaryW_VV_VX_VI_VWSLL<string intrinsic, string instruction, + list<VTypeInfoToWide> vtilist> + : VPatBinaryW_VV<intrinsic, instruction, vtilist>, + VPatBinaryW_VX_VWSLL<intrinsic, instruction, vtilist>, + VPatBinaryW_VI_VWSLL<intrinsic, instruction, vtilist>; + +let Predicates = [HasStdExtZvbb] in { + defm : VPatUnaryV_V<"int_riscv_vbrev", "PseudoVBREV", AllIntegerVectors>; + defm : VPatUnaryV_V<"int_riscv_vclz", "PseudoVCLZ", AllIntegerVectors>; + defm : VPatUnaryV_V<"int_riscv_vctz", "PseudoVCTZ", AllIntegerVectors>; + defm : VPatUnaryV_V<"int_riscv_vcpopv", "PseudoVCPOP", AllIntegerVectors>; + defm : VPatBinaryW_VV_VX_VI_VWSLL<"int_riscv_vwsll", "PseudoVWSLL", AllWidenableIntVectors>; +} // Predicates = [HasStdExtZvbb] + +let Predicates = [HasStdExtZvbc] in { + defm : VPatBinaryV_VV_VX<"int_riscv_vclmul", "PseudoVCLMUL", I64IntegerVectors>; + defm : VPatBinaryV_VV_VX<"int_riscv_vclmulh", "PseudoVCLMULH", I64IntegerVectors>; +} // Predicates = [HasStdExtZvbc] + +let Predicates = [HasStdExtZvkb] in { + defm : VPatBinaryV_VV_VX<"int_riscv_vandn", "PseudoVANDN", AllIntegerVectors>; + defm : VPatUnaryV_V<"int_riscv_vbrev8", "PseudoVBREV8", AllIntegerVectors>; + defm : VPatUnaryV_V<"int_riscv_vrev8", "PseudoVREV8", AllIntegerVectors>; + defm : VPatBinaryV_VV_VX_VROL<"int_riscv_vrol", "PseudoVROL", "PseudoVROR", AllIntegerVectors>; + defm : VPatBinaryV_VV_VX_VI_VROR<"int_riscv_vror", "PseudoVROR", AllIntegerVectors>; +} // Predicates = [HasStdExtZvkb] + +let Predicates = [HasStdExtZvkg] in { + defm : VPatBinaryV_VV_NoMask<"int_riscv_vghsh", "PseudoVGHSH", I32IntegerVectors>; + defm : VPatUnaryV_V_NoMask_Zvk<"int_riscv_vgmul", "PseudoVGMUL", I32IntegerVectors>; +} // Predicates = [HasStdExtZvkg] + +let Predicates = [HasStdExtZvkned] in { + defm : VPatUnaryV_V_S_NoMask_Zvk<"int_riscv_vaesdf", "PseudoVAESDF", I32IntegerVectors>; + defm : VPatUnaryV_V_S_NoMask_Zvk<"int_riscv_vaesdm", "PseudoVAESDM", I32IntegerVectors>; + defm : VPatUnaryV_V_S_NoMask_Zvk<"int_riscv_vaesef", "PseudoVAESEF", I32IntegerVectors>; + defm : VPatUnaryV_V_S_NoMask_Zvk<"int_riscv_vaesem", "PseudoVAESEM", I32IntegerVectors>; + defm : VPatBinaryV_VI_NoMaskTU<"int_riscv_vaeskf1", "PseudoVAESKF1", I32IntegerVectors>; + defm : VPatBinaryV_VI_NoMask<"int_riscv_vaeskf2", "PseudoVAESKF2", I32IntegerVectors>; + defm : VPatUnaryV_S_NoMaskVectorCrypto<"int_riscv_vaesz", "PseudoVAESZ", I32IntegerVectors>; +} // Predicates = [HasStdExtZvkned] + +let Predicates = [HasStdExtZvknha] in { + defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ch", "PseudoVSHA2CH", I32IntegerVectors>; + defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2cl", "PseudoVSHA2CH", I32IntegerVectors>; + defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ms", "PseudoVSHA2MS", I32IntegerVectors>; +} // Predicates = [HasStdExtZvknha] + +let Predicates = [HasStdExtZvknhb] in { + defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ch", "PseudoVSHA2CH", I32I64IntegerVectors>; + defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2cl", "PseudoVSHA2CH", I32I64IntegerVectors>; + defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ms", "PseudoVSHA2MS", I32I64IntegerVectors>; +} // Predicates = [HasStdExtZvknhb] + +let Predicates = [HasStdExtZvksed] in { + defm : VPatBinaryV_VI_NoMaskTU<"int_riscv_vsm4k", "PseudoVSM4K", I32IntegerVectors>; + defm : VPatUnaryV_V_S_NoMask_Zvk<"int_riscv_vsm4r", "PseudoVSM4R", I32IntegerVectors>; +} // Predicates = [HasStdExtZvksed] + +let Predicates = [HasStdExtZvksh] in { + defm : VPatBinaryV_VI_NoMask<"int_riscv_vsm3c", "PseudoVSM3C", I32IntegerVectors>; + defm : VPatBinaryV_VV_NoMaskTU<"int_riscv_vsm3me", "PseudoVSM3ME", I32IntegerVectors>; +} // Predicates = [HasStdExtZvksh] diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h index 5dfd47a687e9..fcc20c17c6b4 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h @@ -104,13 +104,18 @@ public: BranchRelaxationScratchFrameIndex = Index; } + unsigned getReservedSpillsSize() const { + return LibCallStackSize + RVPushStackSize; + } + unsigned getLibCallStackSize() const { return LibCallStackSize; } void setLibCallStackSize(unsigned Size) { LibCallStackSize = Size; } bool useSaveRestoreLibCalls(const MachineFunction &MF) const { // We cannot use fixed locations for the callee saved spill slots if the // function uses a varargs save area, or is an interrupt handler. - return MF.getSubtarget<RISCVSubtarget>().enableSaveRestore() && + return !isPushable(MF) && + MF.getSubtarget<RISCVSubtarget>().enableSaveRestore() && VarArgsSaveSize == 0 && !MF.getFrameInfo().hasTailCall() && !MF.getFunction().hasFnAttribute("interrupt"); } @@ -127,10 +132,13 @@ public: unsigned getCalleeSavedStackSize() const { return CalleeSavedStackSize; } void setCalleeSavedStackSize(unsigned Size) { CalleeSavedStackSize = Size; } - uint64_t isPushable(const MachineFunction &MF) const { - return (!useSaveRestoreLibCalls(MF) && - MF.getSubtarget<RISCVSubtarget>().hasStdExtZcmp() && - !MF.getTarget().Options.DisableFramePointerElim(MF)); + bool isPushable(const MachineFunction &MF) const { + // We cannot use fixed locations for the callee saved spill slots if the + // function uses a varargs save area. + // TODO: Use a seperate placement for vararg registers to enable Zcmp. + return MF.getSubtarget<RISCVSubtarget>().hasStdExtZcmp() && + !MF.getTarget().Options.DisableFramePointerElim(MF) && + VarArgsSaveSize == 0; } int getRVPushRlist() const { return RVPushRlist; } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp index da104657680a..02ea5270823d 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp @@ -18,6 +18,101 @@ using namespace llvm; +static bool checkRegisters(Register FirstDest, const MachineInstr &SecondMI) { + if (!SecondMI.getOperand(1).isReg()) + return false; + + if (SecondMI.getOperand(1).getReg() != FirstDest) + return false; + + // If the input is virtual make sure this is the only user. + if (FirstDest.isVirtual()) { + auto &MRI = SecondMI.getMF()->getRegInfo(); + return MRI.hasOneNonDBGUse(FirstDest); + } + + return SecondMI.getOperand(0).getReg() == FirstDest; +} + +// Fuse load with add: +// add rd, rs1, rs2 +// ld rd, 0(rd) +static bool isLDADD(const MachineInstr *FirstMI, const MachineInstr &SecondMI) { + if (SecondMI.getOpcode() != RISCV::LD) + return false; + + if (!SecondMI.getOperand(2).isImm()) + return false; + + if (SecondMI.getOperand(2).getImm() != 0) + return false; + + // Given SecondMI, when FirstMI is unspecified, we must return + // if SecondMI may be part of a fused pair at all. + if (!FirstMI) + return true; + + if (FirstMI->getOpcode() != RISCV::ADD) + return true; + + return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI); +} + +// Fuse these patterns: +// +// slli rd, rs1, 32 +// srli rd, rd, x +// where 0 <= x <= 32 +// +// and +// +// slli rd, rs1, 48 +// srli rd, rd, x +static bool isShiftedZExt(const MachineInstr *FirstMI, + const MachineInstr &SecondMI) { + if (SecondMI.getOpcode() != RISCV::SRLI) + return false; + + if (!SecondMI.getOperand(2).isImm()) + return false; + + unsigned SRLIImm = SecondMI.getOperand(2).getImm(); + bool IsShiftBy48 = SRLIImm == 48; + if (SRLIImm > 32 && !IsShiftBy48) + return false; + + // Given SecondMI, when FirstMI is unspecified, we must return + // if SecondMI may be part of a fused pair at all. + if (!FirstMI) + return true; + + if (FirstMI->getOpcode() != RISCV::SLLI) + return false; + + unsigned SLLIImm = FirstMI->getOperand(2).getImm(); + if (IsShiftBy48 ? (SLLIImm != 48) : (SLLIImm != 32)) + return false; + + return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI); +} + +// Fuse AUIPC followed by ADDI +// auipc rd, imm20 +// addi rd, rd, imm12 +static bool isAUIPCADDI(const MachineInstr *FirstMI, + const MachineInstr &SecondMI) { + if (SecondMI.getOpcode() != RISCV::ADDI) + return false; + // Assume the 1st instr to be a wildcard if it is unspecified. + if (!FirstMI) + return true; + + if (FirstMI->getOpcode() != RISCV::AUIPC) + return false; + + return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI); +} + // Fuse LUI followed by ADDI or ADDIW. // rd = imm[31:0] which decomposes to // lui rd, imm[31:12] @@ -27,7 +122,6 @@ static bool isLUIADDI(const MachineInstr *FirstMI, if (SecondMI.getOpcode() != RISCV::ADDI && SecondMI.getOpcode() != RISCV::ADDIW) return false; - // Assume the 1st instr to be a wildcard if it is unspecified. if (!FirstMI) return true; @@ -35,25 +129,7 @@ static bool isLUIADDI(const MachineInstr *FirstMI, if (FirstMI->getOpcode() != RISCV::LUI) return false; - // The first operand of ADDI might be a frame index. - if (!SecondMI.getOperand(1).isReg()) - return false; - - Register FirstDest = FirstMI->getOperand(0).getReg(); - - // Destination of LUI should be the ADDI(W) source register. - if (SecondMI.getOperand(1).getReg() != FirstDest) - return false; - - // If the input is virtual make sure this is the only user. - if (FirstDest.isVirtual()) { - auto &MRI = SecondMI.getMF()->getRegInfo(); - return MRI.hasOneNonDBGUse(FirstDest); - } - - // If the FirstMI destination is non-virtual, it should match the SecondMI - // destination. - return SecondMI.getOperand(0).getReg() == FirstDest; + return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI); } static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, @@ -65,6 +141,15 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, if (ST.hasLUIADDIFusion() && isLUIADDI(FirstMI, SecondMI)) return true; + if (ST.hasAUIPCADDIFusion() && isAUIPCADDI(FirstMI, SecondMI)) + return true; + + if (ST.hasShiftedZExtFusion() && isShiftedZExt(FirstMI, SecondMI)) + return true; + + if (ST.hasLDADDFusion() && isLDADD(FirstMI, SecondMI)) + return true; + return false; } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp index 841439bb732e..ff21fe1d4064 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp @@ -84,9 +84,7 @@ struct RISCVMakeCompressibleOpt : public MachineFunctionPass { bool runOnMachineFunction(MachineFunction &Fn) override; - RISCVMakeCompressibleOpt() : MachineFunctionPass(ID) { - initializeRISCVMakeCompressibleOptPass(*PassRegistry::getPassRegistry()); - } + RISCVMakeCompressibleOpt() : MachineFunctionPass(ID) {} StringRef getPassName() const override { return RISCV_COMPRESS_INSTRS_NAME; } }; @@ -271,7 +269,7 @@ static Register analyzeCompressibleUses(MachineInstr &FirstMI, RegScavenger RS; RS.enterBasicBlockEnd(MBB); - RS.backward(MIs.back()->getIterator()); + RS.backward(std::next(MIs.back()->getIterator())); return RS.scavengeRegisterBackwards(*RCToScavenge, FirstMI.getIterator(), /*RestoreAfter=*/false, /*SPAdj=*/0, /*AllowSpill=*/false); diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp index 855322b981fb..ae46d5554d35 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp @@ -19,7 +19,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Target/TargetOptions.h" #include <optional> -#include <set> using namespace llvm; #define DEBUG_TYPE "riscv-merge-base-offset" @@ -94,7 +93,8 @@ bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi, if (HiOp1.getTargetFlags() != ExpectedFlags) return false; - if (!(HiOp1.isGlobal() || HiOp1.isCPI()) || HiOp1.getOffset() != 0) + if (!(HiOp1.isGlobal() || HiOp1.isCPI() || HiOp1.isBlockAddress()) || + HiOp1.getOffset() != 0) return false; Register HiDestReg = Hi.getOperand(0).getReg(); @@ -108,7 +108,8 @@ bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi, const MachineOperand &LoOp2 = Lo->getOperand(2); if (Hi.getOpcode() == RISCV::LUI) { if (LoOp2.getTargetFlags() != RISCVII::MO_LO || - !(LoOp2.isGlobal() || LoOp2.isCPI()) || LoOp2.getOffset() != 0) + !(LoOp2.isGlobal() || LoOp2.isCPI() || LoOp2.isBlockAddress()) || + LoOp2.getOffset() != 0) return false; } else { assert(Hi.getOpcode() == RISCV::AUIPC); @@ -120,8 +121,10 @@ bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi, if (HiOp1.isGlobal()) { LLVM_DEBUG(dbgs() << " Found lowered global address: " << *HiOp1.getGlobal() << "\n"); - } else { - assert(HiOp1.isCPI()); + } else if (HiOp1.isBlockAddress()) { + LLVM_DEBUG(dbgs() << " Found lowered basic address: " + << *HiOp1.getBlockAddress() << "\n"); + } else if (HiOp1.isCPI()) { LLVM_DEBUG(dbgs() << " Found lowered constant pool: " << HiOp1.getIndex() << "\n"); } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMoveMerger.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMoveMerger.cpp index 6c1b0cf5ca7f..3c5462057b28 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMoveMerger.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMoveMerger.cpp @@ -1,4 +1,4 @@ -//===-- RISCVMoveMerger.cpp - RISCV move merge pass -----------------------===// +//===-- RISCVMoveMerger.cpp - RISC-V move merge pass ----------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -22,9 +22,7 @@ namespace { struct RISCVMoveMerge : public MachineFunctionPass { static char ID; - RISCVMoveMerge() : MachineFunctionPass(ID) { - initializeRISCVMoveMergePass(*PassRegistry::getPassRegistry()); - } + RISCVMoveMerge() : MachineFunctionPass(ID) {} const RISCVInstrInfo *TII; const TargetRegisterInfo *TRI; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp index 7014755b6706..2c2b34bb5b77 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp @@ -12,16 +12,21 @@ // extended bits aren't consumed or because the input was already sign extended // by an earlier instruction. // -// Then it removes the -w suffix from each addiw and slliw instructions -// whenever all users are dependent only on the lower word of the result of the -// instruction. We do this only for addiw, slliw, and mulw because the -w forms -// are less compressible. +// Then it removes the -w suffix from opw instructions whenever all users are +// dependent only on the lower word of the result of the instruction. +// The cases handled are: +// * addw because c.add has a larger register encoding than c.addw. +// * addiw because it helps reduce test differences between RV32 and RV64 +// w/o being a pessimization. +// * mulw because c.mulw doesn't exist but c.mul does (w/ zcb) +// * slliw because c.slliw doesn't exist and c.slli does // //===---------------------------------------------------------------------===// #include "RISCV.h" #include "RISCVMachineFunctionInfo.h" #include "RISCVSubtarget.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -48,9 +53,7 @@ class RISCVOptWInstrs : public MachineFunctionPass { public: static char ID; - RISCVOptWInstrs() : MachineFunctionPass(ID) { - initializeRISCVOptWInstrsPass(*PassRegistry::getPassRegistry()); - } + RISCVOptWInstrs() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; bool removeSExtWInstrs(MachineFunction &MF, const RISCVInstrInfo &TII, @@ -76,6 +79,29 @@ FunctionPass *llvm::createRISCVOptWInstrsPass() { return new RISCVOptWInstrs(); } +static bool vectorPseudoHasAllNBitUsers(const MachineOperand &UserOp, + unsigned Bits) { + const MachineInstr &MI = *UserOp.getParent(); + unsigned MCOpcode = RISCV::getRVVMCOpcode(MI.getOpcode()); + + if (!MCOpcode) + return false; + + const MCInstrDesc &MCID = MI.getDesc(); + const uint64_t TSFlags = MCID.TSFlags; + if (!RISCVII::hasSEWOp(TSFlags)) + return false; + assert(RISCVII::hasVLOp(TSFlags)); + const unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MCID)).getImm(); + + if (UserOp.getOperandNo() == RISCVII::getVLOpNum(MCID)) + return false; + + auto NumDemandedBits = + RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW); + return NumDemandedBits && Bits >= *NumDemandedBits; +} + // Checks if all users only demand the lower \p OrigBits of the original // instruction's result. // TODO: handle multiple interdependent transformations @@ -100,12 +126,14 @@ static bool hasAllNBitUsers(const MachineInstr &OrigMI, if (MI->getNumExplicitDefs() != 1) return false; - for (auto &UserOp : MRI.use_operands(MI->getOperand(0).getReg())) { + for (auto &UserOp : MRI.use_nodbg_operands(MI->getOperand(0).getReg())) { const MachineInstr *UserMI = UserOp.getParent(); unsigned OpIdx = UserOp.getOperandNo(); switch (UserMI->getOpcode()) { default: + if (vectorPseudoHasAllNBitUsers(UserOp, Bits)) + break; return false; case RISCV::ADDIW: @@ -283,6 +311,8 @@ static bool hasAllNBitUsers(const MachineInstr &OrigMI, Worklist.push_back(std::make_pair(UserMI, Bits)); break; + case RISCV::CZERO_EQZ: + case RISCV::CZERO_NEZ: case RISCV::VT_MASKC: case RISCV::VT_MASKCN: if (OpIdx != 1) @@ -327,9 +357,27 @@ static bool isSignExtendingOpW(const MachineInstr &MI, // An ORI with an >11 bit immediate (negative 12-bit) will set bits 63:11. case RISCV::ORI: return !isUInt<11>(MI.getOperand(2).getImm()); + // A bseti with X0 is sign extended if the immediate is less than 31. + case RISCV::BSETI: + return MI.getOperand(2).getImm() < 31 && + MI.getOperand(1).getReg() == RISCV::X0; // Copying from X0 produces zero. case RISCV::COPY: return MI.getOperand(1).getReg() == RISCV::X0; + case RISCV::PseudoAtomicLoadNand32: + return true; + case RISCV::PseudoVMV_X_S_MF8: + case RISCV::PseudoVMV_X_S_MF4: + case RISCV::PseudoVMV_X_S_MF2: + case RISCV::PseudoVMV_X_S_M1: + case RISCV::PseudoVMV_X_S_M2: + case RISCV::PseudoVMV_X_S_M4: + case RISCV::PseudoVMV_X_S_M8: { + // vmv.x.s has at least 33 sign bits if log2(sew) <= 5. + int64_t Log2SEW = MI.getOperand(2).getImm(); + assert(Log2SEW >= 3 && Log2SEW <= 6 && "Unexpected Log2SEW"); + return Log2SEW <= 5; + } } return false; @@ -348,6 +396,11 @@ static bool isSignExtendedW(Register SrcReg, const RISCVSubtarget &ST, MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); if (!SrcMI) return false; + // Code assumes the register is operand 0. + // TODO: Maybe the worklist should store register? + if (!SrcMI->getOperand(0).isReg() || + SrcMI->getOperand(0).getReg() != SrcReg) + return false; // Add SrcMI to the worklist. Worklist.push_back(SrcMI); return true; @@ -446,9 +499,16 @@ static bool isSignExtendedW(Register SrcReg, const RISCVSubtarget &ST, break; case RISCV::PseudoCCADDW: + case RISCV::PseudoCCADDIW: case RISCV::PseudoCCSUBW: - // Returns operand 4 or an ADDW/SUBW of operands 5 and 6. We only need to - // check if operand 4 is sign extended. + case RISCV::PseudoCCSLLW: + case RISCV::PseudoCCSRLW: + case RISCV::PseudoCCSRAW: + case RISCV::PseudoCCSLLIW: + case RISCV::PseudoCCSRLIW: + case RISCV::PseudoCCSRAIW: + // Returns operand 4 or an ADDW/SUBW/etc. of operands 5 and 6. We only + // need to check if operand 4 is sign extended. if (!AddRegDefToWorkList(MI->getOperand(4).getReg())) return false; break; @@ -504,6 +564,8 @@ static bool isSignExtendedW(Register SrcReg, const RISCVSubtarget &ST, break; } + case RISCV::CZERO_EQZ: + case RISCV::CZERO_NEZ: case RISCV::VT_MASKC: case RISCV::VT_MASKCN: // Instructions return zero or operand 1. Result is sign extended if @@ -567,25 +629,23 @@ bool RISCVOptWInstrs::removeSExtWInstrs(MachineFunction &MF, bool MadeChange = false; for (MachineBasicBlock &MBB : MF) { - for (auto I = MBB.begin(), IE = MBB.end(); I != IE;) { - MachineInstr *MI = &*I++; - + for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { // We're looking for the sext.w pattern ADDIW rd, rs1, 0. - if (!RISCV::isSEXT_W(*MI)) + if (!RISCV::isSEXT_W(MI)) continue; - Register SrcReg = MI->getOperand(1).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); SmallPtrSet<MachineInstr *, 4> FixableDefs; // If all users only use the lower bits, this sext.w is redundant. // Or if all definitions reaching MI sign-extend their output, // then sext.w is redundant. - if (!hasAllWUsers(*MI, ST, MRI) && + if (!hasAllWUsers(MI, ST, MRI) && !isSignExtendedW(SrcReg, ST, MRI, FixableDefs)) continue; - Register DstReg = MI->getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); if (!MRI.constrainRegClass(SrcReg, MRI.getRegClass(DstReg))) continue; @@ -603,7 +663,7 @@ bool RISCVOptWInstrs::removeSExtWInstrs(MachineFunction &MF, LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n"); MRI.replaceRegWith(DstReg, SrcReg); MRI.clearKillFlags(SrcReg); - MI->eraseFromParent(); + MI.eraseFromParent(); ++NumRemovedSExtW; MadeChange = true; } @@ -621,14 +681,13 @@ bool RISCVOptWInstrs::stripWSuffixes(MachineFunction &MF, bool MadeChange = false; for (MachineBasicBlock &MBB : MF) { - for (auto I = MBB.begin(), IE = MBB.end(); I != IE; ++I) { - MachineInstr &MI = *I; - + for (MachineInstr &MI : MBB) { unsigned Opc; switch (MI.getOpcode()) { default: continue; case RISCV::ADDW: Opc = RISCV::ADD; break; + case RISCV::ADDIW: Opc = RISCV::ADDI; break; case RISCV::MULW: Opc = RISCV::MUL; break; case RISCV::SLLIW: Opc = RISCV::SLLI; break; } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp new file mode 100644 index 000000000000..57b473645ae7 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp @@ -0,0 +1,116 @@ +//===-- RISCVPostRAExpandPseudoInsts.cpp - Expand pseudo instrs ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that expands the pseudo instruction pseudolisimm32 +// into target instructions. This pass should be run during the post-regalloc +// passes, before post RA scheduling. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/RISCVMatInt.h" +#include "RISCV.h" +#include "RISCVInstrInfo.h" +#include "RISCVTargetMachine.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" + +using namespace llvm; + +#define RISCV_POST_RA_EXPAND_PSEUDO_NAME \ + "RISC-V post-regalloc pseudo instruction expansion pass" + +namespace { + +class RISCVPostRAExpandPseudo : public MachineFunctionPass { +public: + const RISCVInstrInfo *TII; + static char ID; + + RISCVPostRAExpandPseudo() : MachineFunctionPass(ID) { + initializeRISCVPostRAExpandPseudoPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + StringRef getPassName() const override { + return RISCV_POST_RA_EXPAND_PSEUDO_NAME; + } + +private: + bool expandMBB(MachineBasicBlock &MBB); + bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandMovImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); +}; + +char RISCVPostRAExpandPseudo::ID = 0; + +bool RISCVPostRAExpandPseudo::runOnMachineFunction(MachineFunction &MF) { + TII = static_cast<const RISCVInstrInfo *>(MF.getSubtarget().getInstrInfo()); + bool Modified = false; + for (auto &MBB : MF) + Modified |= expandMBB(MBB); + return Modified; +} + +bool RISCVPostRAExpandPseudo::expandMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + while (MBBI != E) { + MachineBasicBlock::iterator NMBBI = std::next(MBBI); + Modified |= expandMI(MBB, MBBI, NMBBI); + MBBI = NMBBI; + } + + return Modified; +} + +bool RISCVPostRAExpandPseudo::expandMI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + switch (MBBI->getOpcode()) { + case RISCV::PseudoMovImm: + return expandMovImm(MBB, MBBI); + default: + return false; + } +} + +bool RISCVPostRAExpandPseudo::expandMovImm(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + DebugLoc DL = MBBI->getDebugLoc(); + + int64_t Val = MBBI->getOperand(1).getImm(); + + RISCVMatInt::InstSeq Seq = + RISCVMatInt::generateInstSeq(Val, MBB.getParent()->getSubtarget()); + assert(!Seq.empty()); + + Register DstReg = MBBI->getOperand(0).getReg(); + bool DstIsDead = MBBI->getOperand(0).isDead(); + bool Renamable = MBBI->getOperand(0).isRenamable(); + + TII->movImm(MBB, MBBI, DL, DstReg, Val, MachineInstr::NoFlags, Renamable, + DstIsDead); + + MBBI->eraseFromParent(); + return true; +} + +} // end of anonymous namespace + +INITIALIZE_PASS(RISCVPostRAExpandPseudo, "riscv-expand-pseudolisimm32", + RISCV_POST_RA_EXPAND_PSEUDO_NAME, false, false) +namespace llvm { + +FunctionPass *createRISCVPostRAExpandPseudoPass() { + return new RISCVPostRAExpandPseudo(); +} + +} // end of namespace llvm diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVProcessors.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVProcessors.td index 01291001cd7c..58989fd716fa 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -10,6 +10,35 @@ // RISC-V processors supported. //===----------------------------------------------------------------------===// +class RISCVTuneInfo { + bits<8> PrefFunctionAlignment = 1; + bits<8> PrefLoopAlignment = 1; + + // Information needed by LoopDataPrefetch. + bits<16> CacheLineSize = 0; + bits<16> PrefetchDistance = 0; + bits<16> MinPrefetchStride = 1; + bits<32> MaxPrefetchIterationsAhead = -1; + + bits<32> MinimumJumpTableEntries = 5; +} + +def RISCVTuneInfoTable : GenericTable { + let FilterClass = "RISCVTuneInfo"; + let CppTypeName = "RISCVTuneInfo"; + let Fields = ["Name", "PrefFunctionAlignment", "PrefLoopAlignment", + "CacheLineSize", "PrefetchDistance", + "MinPrefetchStride", "MaxPrefetchIterationsAhead", + "MinimumJumpTableEntries"]; +} + +def getRISCVTuneInfo : SearchIndex { + let Table = RISCVTuneInfoTable; + let Key = ["Name"]; +} + +class GenericTuneInfo: RISCVTuneInfo; + class RISCVProcessorModel<string n, SchedMachineModel m, list<SubtargetFeature> f, @@ -27,13 +56,15 @@ class RISCVTuneProcessorModel<string n, def GENERIC_RV32 : RISCVProcessorModel<"generic-rv32", NoSchedModel, - [Feature32Bit]>; + [Feature32Bit]>, + GenericTuneInfo; def GENERIC_RV64 : RISCVProcessorModel<"generic-rv64", NoSchedModel, - [Feature64Bit]>; + [Feature64Bit]>, + GenericTuneInfo; // Support generic for compatibility with other targets. The triple will be used // to change to the appropriate rv32/rv64 version. -def : ProcessorModel<"generic", NoSchedModel, []>; +def : ProcessorModel<"generic", NoSchedModel, []>, GenericTuneInfo; def ROCKET_RV32 : RISCVProcessorModel<"rocket-rv32", RocketModel, @@ -201,3 +232,47 @@ def SYNTACORE_SCR1_MAX : RISCVProcessorModel<"syntacore-scr1-max", FeatureStdExtM, FeatureStdExtC], [TuneNoDefaultUnroll]>; + +def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1", + NoSchedModel, + [Feature64Bit, + FeatureStdExtZifencei, + FeatureStdExtZicsr, + FeatureStdExtZicntr, + FeatureStdExtZihpm, + FeatureStdExtZihintpause, + FeatureStdExtM, + FeatureStdExtA, + FeatureStdExtF, + FeatureStdExtD, + FeatureStdExtC, + FeatureStdExtZba, + FeatureStdExtZbb, + FeatureStdExtZbc, + FeatureStdExtZbs, + FeatureStdExtZicbom, + FeatureStdExtZicbop, + FeatureStdExtZicboz, + FeatureVendorXVentanaCondOps], + [TuneVeyronFusions]>; + +def XIANGSHAN_NANHU : RISCVProcessorModel<"xiangshan-nanhu", + NoSchedModel, + [Feature64Bit, + FeatureStdExtZicsr, + FeatureStdExtZifencei, + FeatureStdExtM, + FeatureStdExtA, + FeatureStdExtF, + FeatureStdExtD, + FeatureStdExtC, + FeatureStdExtZba, + FeatureStdExtZbb, + FeatureStdExtZbc, + FeatureStdExtZbs, + FeatureStdExtZkn, + FeatureStdExtZksed, + FeatureStdExtZksh, + FeatureStdExtSvinval, + FeatureStdExtZicbom, + FeatureStdExtZicboz]>; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVPushPopOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVPushPopOptimizer.cpp index f885adca669f..009dcf57f46d 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVPushPopOptimizer.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVPushPopOptimizer.cpp @@ -1,4 +1,4 @@ -//===------- RISCVPushPopOptimizer.cpp - RISCV Push/Pop opt. pass ---------===// +//===------- RISCVPushPopOptimizer.cpp - RISC-V Push/Pop opt. pass --------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,9 +6,8 @@ // //===----------------------------------------------------------------------===// // -// This file contains a pass that modifies PUSH/POP instructions from Zca -// standard to use their non prolog/epilog related functionalities -// and generates POPRET instruction. +// This file contains a pass that replaces Zcmp POP instructions with +// POPRET[Z] where possible. // //===----------------------------------------------------------------------===// @@ -23,9 +22,7 @@ namespace { struct RISCVPushPopOpt : public MachineFunctionPass { static char ID; - RISCVPushPopOpt() : MachineFunctionPass(ID) { - initializeRISCVPushPopOptPass(*PassRegistry::getPassRegistry()); - } + RISCVPushPopOpt() : MachineFunctionPass(ID) {} const RISCVInstrInfo *TII; const TargetRegisterInfo *TRI; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp index fed3fa2987e5..735fc1350c00 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp @@ -9,7 +9,8 @@ // This file implements a function pass that initializes undef vector value to // temporary pseudo instruction and remove it in expandpseudo pass to prevent // register allocation resulting in a constraint violated result for vector -// instruction. +// instruction. It also rewrites the NoReg tied operand back to an +// IMPLICIT_DEF. // // RISC-V vector instruction has register overlapping constraint for certain // instructions, and will cause illegal instruction trap if violated, we use @@ -30,10 +31,18 @@ // // See also: https://github.com/llvm/llvm-project/issues/50157 // +// Additionally, this pass rewrites tied operands of vector instructions +// from NoReg to IMPLICIT_DEF. (Not that this is a non-overlapping set of +// operands to the above.) We use NoReg to side step a MachineCSE +// optimization quality problem but need to convert back before +// TwoAddressInstruction. See pr64282 for context. +// //===----------------------------------------------------------------------===// #include "RISCV.h" #include "RISCVSubtarget.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/DetectDeadLanes.h" #include "llvm/CodeGen/MachineFunctionPass.h" using namespace llvm; @@ -49,12 +58,14 @@ class RISCVInitUndef : public MachineFunctionPass { const RISCVSubtarget *ST; const TargetRegisterInfo *TRI; + // Newly added vregs, assumed to be fully rewritten + SmallSet<Register, 8> NewRegs; + SmallVector<MachineInstr *, 8> DeadInsts; + public: static char ID; - RISCVInitUndef() : MachineFunctionPass(ID) { - initializeRISCVInitUndefPass(*PassRegistry::getPassRegistry()); - } + RISCVInitUndef() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -67,13 +78,13 @@ public: private: bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB, const DeadLaneDetector &DLD); - bool handleImplicitDef(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &Inst); bool isVectorRegClass(const Register R); const TargetRegisterClass * getVRLargestSuperClass(const TargetRegisterClass *RC) const; bool handleSubReg(MachineFunction &MF, MachineInstr &MI, const DeadLaneDetector &DLD); + bool fixupIllOperand(MachineInstr *MI, MachineOperand &MO); + bool handleReg(MachineInstr *MI); }; } // end anonymous namespace @@ -118,65 +129,38 @@ static unsigned getUndefInitOpcode(unsigned RegClassID) { } } -bool RISCVInitUndef::handleImplicitDef(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &Inst) { - const TargetRegisterInfo &TRI = - *MBB.getParent()->getSubtarget().getRegisterInfo(); - - assert(Inst->getOpcode() == TargetOpcode::IMPLICIT_DEF); - - Register Reg = Inst->getOperand(0).getReg(); - if (!Reg.isVirtual()) - return false; - - bool NeedPseudoInit = false; - SmallVector<MachineOperand *, 1> UseMOs; - for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { - MachineInstr *UserMI = MO.getParent(); - - bool HasEarlyClobber = false; - bool TiedToDef = false; - for (MachineOperand &UserMO : UserMI->operands()) { - if (!UserMO.isReg()) - continue; - if (UserMO.isEarlyClobber()) - HasEarlyClobber = true; - if (UserMO.isUse() && UserMO.isTied() && - TRI.regsOverlap(UserMO.getReg(), Reg)) - TiedToDef = true; - } - if (HasEarlyClobber && !TiedToDef) { - NeedPseudoInit = true; - UseMOs.push_back(&MO); - } - } - - if (!NeedPseudoInit) - return false; - - LLVM_DEBUG( - dbgs() << "Emitting PseudoRVVInitUndef for implicit vector register " - << Reg << '\n'); - - unsigned RegClassID = getVRLargestSuperClass(MRI->getRegClass(Reg))->getID(); - unsigned Opcode = getUndefInitOpcode(RegClassID); - - BuildMI(MBB, Inst, Inst->getDebugLoc(), TII->get(Opcode), Reg); - - Inst = MBB.erase(Inst); - - for (auto MO : UseMOs) - MO->setIsUndef(false); - - return true; -} - static bool isEarlyClobberMI(MachineInstr &MI) { return llvm::any_of(MI.defs(), [](const MachineOperand &DefMO) { return DefMO.isReg() && DefMO.isEarlyClobber(); }); } +static bool findImplictDefMIFromReg(Register Reg, MachineRegisterInfo *MRI) { + for (auto &DefMI : MRI->def_instructions(Reg)) { + if (DefMI.getOpcode() == TargetOpcode::IMPLICIT_DEF) + return true; + } + return false; +} + +bool RISCVInitUndef::handleReg(MachineInstr *MI) { + bool Changed = false; + for (auto &UseMO : MI->uses()) { + if (!UseMO.isReg()) + continue; + if (UseMO.isTied()) + continue; + if (!UseMO.getReg().isVirtual()) + continue; + if (!isVectorRegClass(UseMO.getReg())) + continue; + + if (UseMO.isUndef() || findImplictDefMIFromReg(UseMO.getReg(), MRI)) + Changed |= fixupIllOperand(MI, UseMO); + } + return Changed; +} + bool RISCVInitUndef::handleSubReg(MachineFunction &MF, MachineInstr &MI, const DeadLaneDetector &DLD) { bool Changed = false; @@ -186,8 +170,12 @@ bool RISCVInitUndef::handleSubReg(MachineFunction &MF, MachineInstr &MI, continue; if (!UseMO.getReg().isVirtual()) continue; + if (UseMO.isTied()) + continue; Register Reg = UseMO.getReg(); + if (NewRegs.count(Reg)) + continue; DeadLaneDetector::VRegInfo Info = DLD.getVRegInfo(Register::virtReg2Index(Reg)); @@ -235,18 +223,53 @@ bool RISCVInitUndef::handleSubReg(MachineFunction &MF, MachineInstr &MI, return Changed; } +bool RISCVInitUndef::fixupIllOperand(MachineInstr *MI, MachineOperand &MO) { + + LLVM_DEBUG( + dbgs() << "Emitting PseudoRVVInitUndef for implicit vector register " + << MO.getReg() << '\n'); + + const TargetRegisterClass *TargetRegClass = + getVRLargestSuperClass(MRI->getRegClass(MO.getReg())); + unsigned Opcode = getUndefInitOpcode(TargetRegClass->getID()); + Register NewReg = MRI->createVirtualRegister(TargetRegClass); + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(Opcode), NewReg); + MO.setReg(NewReg); + if (MO.isUndef()) + MO.setIsUndef(false); + return true; +} + bool RISCVInitUndef::processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB, const DeadLaneDetector &DLD) { bool Changed = false; for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) { MachineInstr &MI = *I; - if (ST->enableSubRegLiveness() && isEarlyClobberMI(MI)) - Changed |= handleSubReg(MF, MI, DLD); - if (MI.isImplicitDef()) { - auto DstReg = MI.getOperand(0).getReg(); - if (isVectorRegClass(DstReg)) - Changed |= handleImplicitDef(MBB, I); + + // If we used NoReg to represent the passthru, switch this back to being + // an IMPLICIT_DEF before TwoAddressInstructions. + unsigned UseOpIdx; + if (MI.getNumDefs() != 0 && MI.isRegTiedToUseOperand(0, &UseOpIdx)) { + MachineOperand &UseMO = MI.getOperand(UseOpIdx); + if (UseMO.getReg() == RISCV::NoRegister) { + const TargetRegisterClass *RC = + TII->getRegClass(MI.getDesc(), UseOpIdx, TRI, MF); + Register NewDest = MRI->createVirtualRegister(RC); + // We don't have a way to update dead lanes, so keep track of the + // new register so that we avoid querying it later. + NewRegs.insert(NewDest); + BuildMI(MBB, I, I->getDebugLoc(), + TII->get(TargetOpcode::IMPLICIT_DEF), NewDest); + UseMO.setReg(NewDest); + Changed = true; + } + } + + if (isEarlyClobberMI(MI)) { + if (ST->enableSubRegLiveness()) + Changed |= handleSubReg(MF, MI, DLD); + Changed |= handleReg(&MI); } } return Changed; @@ -268,6 +291,10 @@ bool RISCVInitUndef::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock &BB : MF) Changed |= processBasicBlock(MF, BB, DLD); + for (auto *DeadMI : DeadInsts) + DeadMI->eraseFromParent(); + DeadInsts.clear(); + return Changed; } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index c3ba4c1e7fdb..a3c19115bd31 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -14,6 +14,7 @@ #include "RISCV.h" #include "RISCVMachineFunctionInfo.h" #include "RISCVSubtarget.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -84,10 +85,11 @@ RISCVRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const { const RISCVFrameLowering *TFI = getFrameLowering(MF); BitVector Reserved(getNumRegs()); + auto &Subtarget = MF.getSubtarget<RISCVSubtarget>(); // Mark any registers requested to be reserved as such for (size_t Reg = 0; Reg < getNumRegs(); Reg++) { - if (MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(Reg)) + if (Subtarget.isRegisterReservedByUser(Reg)) markSuperRegs(Reserved, Reg); } @@ -118,6 +120,13 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const { markSuperRegs(Reserved, RISCV::FRM); markSuperRegs(Reserved, RISCV::FFLAGS); + if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) { + if (Subtarget.isRVE()) + report_fatal_error("Graal reserved registers do not exist in RVE"); + markSuperRegs(Reserved, RISCV::X23); + markSuperRegs(Reserved, RISCV::X27); + } + assert(checkAllSuperRegsMarked(Reserved)); return Reserved; } @@ -132,7 +141,7 @@ const uint32_t *RISCVRegisterInfo::getNoPreservedMask() const { } // Frame indexes representing locations of CSRs which are given a fixed location -// by save/restore libcalls. +// by save/restore libcalls or Zcmp Push/Pop. static const std::pair<unsigned, int> FixedCSRFIMap[] = { {/*ra*/ RISCV::X1, -1}, {/*s0*/ RISCV::X8, -2}, @@ -290,12 +299,20 @@ void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const { "Unexpected subreg numbering"); Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass); - BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL); - uint32_t ShiftAmount = Log2_32(LMUL); - if (ShiftAmount != 0) - BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VL) - .addReg(VL) - .addImm(ShiftAmount); + // Optimize for constant VLEN. + const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); + if (STI.getRealMinVLen() == STI.getRealMaxVLen()) { + const int64_t VLENB = STI.getRealMinVLen() / 8; + int64_t Offset = VLENB * LMUL; + STI.getInstrInfo()->movImm(MBB, II, DL, VL, Offset); + } else { + BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL); + uint32_t ShiftAmount = Log2_32(LMUL); + if (ShiftAmount != 0) + BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VL) + .addReg(VL) + .addImm(ShiftAmount); + } Register SrcReg = II->getOperand(0).getReg(); Register Base = II->getOperand(1).getReg(); @@ -359,12 +376,20 @@ void RISCVRegisterInfo::lowerVRELOAD(MachineBasicBlock::iterator II) const { "Unexpected subreg numbering"); Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass); - BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL); - uint32_t ShiftAmount = Log2_32(LMUL); - if (ShiftAmount != 0) - BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VL) - .addReg(VL) - .addImm(ShiftAmount); + // Optimize for constant VLEN. + const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); + if (STI.getRealMinVLen() == STI.getRealMaxVLen()) { + const int64_t VLENB = STI.getRealMinVLen() / 8; + int64_t Offset = VLENB * LMUL; + STI.getInstrInfo()->movImm(MBB, II, DL, VL, Offset); + } else { + BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL); + uint32_t ShiftAmount = Log2_32(LMUL); + if (ShiftAmount != 0) + BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VL) + .addReg(VL) + .addImm(ShiftAmount); + } Register DestReg = II->getOperand(0).getReg(); Register Base = II->getOperand(1).getReg(); @@ -435,9 +460,16 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // offset can by construction, at worst, a LUI and a ADD. int64_t Val = Offset.getFixed(); int64_t Lo12 = SignExtend64<12>(Val); - MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Lo12); - Offset = StackOffset::get((uint64_t)Val - (uint64_t)Lo12, - Offset.getScalable()); + if ((MI.getOpcode() == RISCV::PREFETCH_I || + MI.getOpcode() == RISCV::PREFETCH_R || + MI.getOpcode() == RISCV::PREFETCH_W) && + (Lo12 & 0b11111) != 0) + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0); + else { + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Lo12); + Offset = StackOffset::get((uint64_t)Val - (uint64_t)Lo12, + Offset.getScalable()); + } } } @@ -655,6 +687,14 @@ RISCVRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &) const { if (RC == &RISCV::VMV0RegClass) return &RISCV::VRRegClass; + if (RC == &RISCV::VRNoV0RegClass) + return &RISCV::VRRegClass; + if (RC == &RISCV::VRM2NoV0RegClass) + return &RISCV::VRM2RegClass; + if (RC == &RISCV::VRM4NoV0RegClass) + return &RISCV::VRM4RegClass; + if (RC == &RISCV::VRM8NoV0RegClass) + return &RISCV::VRM8RegClass; return RC; } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td index 0b17f54431ef..c59c9b294d79 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -165,15 +165,6 @@ def SP : GPRRegisterClass<(add X2)>; def SR07 : GPRRegisterClass<(add (sequence "X%u", 8, 9), (sequence "X%u", 18, 23))>; -// Registers saveable by PUSH/POP instruction in Zcmp extension -def PGPR : RegisterClass<"RISCV", [XLenVT], 32, (add - (sequence "X%u", 8, 9), - (sequence "X%u", 18, 27), - X1 - )> { - let RegInfos = XLenRI; -} - // Floating point registers let RegAltNameIndices = [ABIRegAltName] in { def F0_H : RISCVReg16<0, "f0", ["ft0"]>, DwarfRegNum<[32]>; @@ -313,6 +304,13 @@ defvar vfloat16m2_t = nxv8f16; defvar vfloat16m4_t = nxv16f16; defvar vfloat16m8_t = nxv32f16; +defvar vbfloat16mf4_t = nxv1bf16; +defvar vbfloat16mf2_t = nxv2bf16; +defvar vbfloat16m1_t = nxv4bf16; +defvar vbfloat16m2_t = nxv8bf16; +defvar vbfloat16m4_t = nxv16bf16; +defvar vbfloat16m8_t = nxv32bf16; + defvar vfloat32mf2_t = nxv1f32; defvar vfloat32m1_t = nxv2f32; defvar vfloat32m2_t = nxv4f32; @@ -339,20 +337,21 @@ defvar LMULList = [1, 2, 4, 8]; // Utility classes for segment load/store. //===----------------------------------------------------------------------===// // The set of legal NF for LMUL = lmul. -// LMUL == 1, NF = 2, 3, 4, 5, 6, 7, 8 +// LMUL <= 1, NF = 2, 3, 4, 5, 6, 7, 8 // LMUL == 2, NF = 2, 3, 4 // LMUL == 4, NF = 2 +// LMUL == 8, no legal NF class NFList<int lmul> { - list<int> L = !cond(!eq(lmul, 1): [2, 3, 4, 5, 6, 7, 8], - !eq(lmul, 2): [2, 3, 4], + list<int> L = !cond(!eq(lmul, 8): [], !eq(lmul, 4): [2], - !eq(lmul, 8): []); + !eq(lmul, 2): [2, 3, 4], + true: [2, 3, 4, 5, 6, 7, 8]); } // Generate [start, end) SubRegIndex list. class SubRegSet<int nf, int lmul> { list<SubRegIndex> L = !foldl([]<SubRegIndex>, - [0, 1, 2, 3, 4, 5, 6, 7], + !range(0, 8), AccList, i, !listconcat(AccList, !if(!lt(i, nf), @@ -380,15 +379,9 @@ class IndexSet<int tuple_index, int nf, int lmul, bit isV0 = false> { !foldl([]<int>, !if(isV0, [0], !cond( - !eq(lmul, 1): - [8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31, - 1, 2, 3, 4, 5, 6, 7], - !eq(lmul, 2): - [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3], - !eq(lmul, 4): - [2, 3, 4, 5, 6, 7, 1])), + !eq(lmul, 1): !listconcat(!range(8, 32), !range(1, 8)), + !eq(lmul, 2): !listconcat(!range(4, 16), !range(1, 4)), + !eq(lmul, 4): !listconcat(!range(2, 8), !range(1, 2)))), L, i, !listconcat(L, !if(!le(!mul(!add(i, tuple_index), lmul), @@ -418,12 +411,11 @@ class VRegList<list<dag> LIn, int start, int nf, int lmul, bit isV0> { } // Vector registers -foreach Index = 0-31 in { +foreach Index = !range(0, 32, 1) in { def V#Index : RISCVReg<Index, "v"#Index>, DwarfRegNum<[!add(Index, 96)]>; } -foreach Index = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, - 24, 26, 28, 30] in { +foreach Index = !range(0, 32, 2) in { def V#Index#M2 : RISCVRegWithSubRegs<Index, "v"#Index, [!cast<Register>("V"#Index), !cast<Register>("V"#!add(Index, 1))]>, @@ -432,7 +424,7 @@ foreach Index = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, } } -foreach Index = [0, 4, 8, 12, 16, 20, 24, 28] in { +foreach Index = !range(0, 32, 4) in { def V#Index#M4 : RISCVRegWithSubRegs<Index, "v"#Index, [!cast<Register>("V"#Index#"M2"), !cast<Register>("V"#!add(Index, 2)#"M2")]>, @@ -441,7 +433,7 @@ foreach Index = [0, 4, 8, 12, 16, 20, 24, 28] in { } } -foreach Index = [0, 8, 16, 24] in { +foreach Index = !range(0, 32, 8) in { def V#Index#M8 : RISCVRegWithSubRegs<Index, "v"#Index, [!cast<Register>("V"#Index#"M4"), !cast<Register>("V"#!add(Index, 4)#"M4")]>, @@ -461,6 +453,7 @@ def VLENB : RISCVReg<0, "vlenb">, def VCSR : RegisterClass<"RISCV", [XLenVT], 32, (add VTYPE, VL, VLENB)> { let RegInfos = XLenRI; + let isAllocatable = 0; } @@ -488,19 +481,23 @@ defvar VMaskVTs = [vbool1_t, vbool2_t, vbool4_t, vbool8_t, vbool16_t, vbool32_t, vbool64_t]; defvar VM1VTs = [vint8m1_t, vint16m1_t, vint32m1_t, vint64m1_t, - vfloat16m1_t, vfloat32m1_t, vfloat64m1_t, - vint8mf2_t, vint8mf4_t, vint8mf8_t, - vint16mf2_t, vint16mf4_t, vint32mf2_t, - vfloat16mf4_t, vfloat16mf2_t, vfloat32mf2_t]; + vbfloat16m1_t, vfloat16m1_t, vfloat32m1_t, + vfloat64m1_t, vint8mf2_t, vint8mf4_t, vint8mf8_t, + vint16mf2_t, vint16mf4_t, vint32mf2_t, + vfloat16mf4_t, vfloat16mf2_t, vbfloat16mf4_t, + vbfloat16mf2_t, vfloat32mf2_t]; defvar VM2VTs = [vint8m2_t, vint16m2_t, vint32m2_t, vint64m2_t, - vfloat16m2_t, vfloat32m2_t, vfloat64m2_t]; + vfloat16m2_t, vbfloat16m2_t, + vfloat32m2_t, vfloat64m2_t]; defvar VM4VTs = [vint8m4_t, vint16m4_t, vint32m4_t, vint64m4_t, - vfloat16m4_t, vfloat32m4_t, vfloat64m4_t]; + vfloat16m4_t, vbfloat16m4_t, + vfloat32m4_t, vfloat64m4_t]; defvar VM8VTs = [vint8m8_t, vint16m8_t, vint32m8_t, vint64m8_t, - vfloat16m8_t, vfloat32m8_t, vfloat64m8_t]; + vfloat16m8_t, vbfloat16m8_t, + vfloat32m8_t, vfloat64m8_t]; def VR : VReg<!listconcat(VM1VTs, VMaskVTs), (add (sequence "V%u", 8, 31), diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket.td index b14cdd40f154..bb9dfe5d0124 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket.td @@ -70,11 +70,11 @@ def : WriteRes<WriteIMul32, [RocketUnitIMul]>; // Worst case latency is used. def : WriteRes<WriteIDiv32, [RocketUnitIDiv]> { let Latency = 34; - let ResourceCycles = [34]; + let ReleaseAtCycles = [34]; } def : WriteRes<WriteIDiv, [RocketUnitIDiv]> { let Latency = 33; - let ResourceCycles = [33]; + let ReleaseAtCycles = [33]; } // Memory @@ -157,16 +157,16 @@ def : WriteRes<WriteFMA64, [RocketUnitFPALU]>; // FP division // FP division unit on Rocket is not pipelined, so set resource cycles to latency. -let Latency = 20, ResourceCycles = [20] in { +let Latency = 20, ReleaseAtCycles = [20] in { def : WriteRes<WriteFDiv32, [RocketUnitFPDivSqrt]>; def : WriteRes<WriteFDiv64, [RocketUnitFPDivSqrt]>; } // FP square root unit on Rocket is not pipelined, so set resource cycles to latency. def : WriteRes<WriteFSqrt32, [RocketUnitFPDivSqrt]> { let Latency = 20; - let ResourceCycles = [20]; } + let ReleaseAtCycles = [20]; } def : WriteRes<WriteFSqrt64, [RocketUnitFPDivSqrt]> { let Latency = 25; - let ResourceCycles = [25]; } + let ReleaseAtCycles = [25]; } // Others def : WriteRes<WriteCSR, []>; @@ -206,7 +206,9 @@ def : ReadAdvance<ReadFAdd64, 0>; def : ReadAdvance<ReadFMul32, 0>; def : ReadAdvance<ReadFMul64, 0>; def : ReadAdvance<ReadFMA32, 0>; +def : ReadAdvance<ReadFMA32Addend, 0>; def : ReadAdvance<ReadFMA64, 0>; +def : ReadAdvance<ReadFMA64Addend, 0>; def : ReadAdvance<ReadFDiv32, 0>; def : ReadAdvance<ReadFDiv64, 0>; def : ReadAdvance<ReadFSqrt32, 0>; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td index e22c05b30b7f..45783d482f3b 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -154,12 +154,12 @@ class SiFive7GetReductionCycles<string mx, int sew> { !eq(mx, "MF8") : 1 ); int c = !add( - !div(TwoTimesLMUL, DLEN), + TwoTimesLMUL, !mul(5, !add(4, !logtwo(!div(DLEN, sew)))) ); } -/// Cycles for ordered reductions take approximatley 5*VL cycles +/// Cycles for ordered reductions take approximatley 6*VL cycles class SiFive7GetOrderedReductionCycles<string mx, int sew> { defvar VLEN = 512; // (VLEN * LMUL) / SEW @@ -172,7 +172,7 @@ class SiFive7GetOrderedReductionCycles<string mx, int sew> { !eq(mx, "MF4") : !div(!div(VLEN, 4), sew), !eq(mx, "MF8") : !div(!div(VLEN, 8), sew), ); - int c = !mul(5, VLUpperBound); + int c = !mul(6, VLUpperBound); } class SiFive7AnyToGPRBypass<SchedRead read, int cycles = 2> @@ -182,6 +182,8 @@ class SiFive7AnyToGPRBypass<SchedRead read, int cycles = 2> WriteSHXADD, WriteSHXADD32, WriteRotateImm, WriteRotateImm32, WriteRotateReg, WriteRotateReg32, + WriteSingleBit, WriteSingleBitImm, + WriteBEXT, WriteBEXTI, WriteCLZ, WriteCLZ32, WriteCTZ, WriteCTZ32, WriteCPOP, WriteCPOP32, WriteREV8, WriteORCB, WriteSFB, @@ -206,20 +208,26 @@ def SiFive7Model : SchedMachineModel { // Pipe A can handle memory, integer alu and vector operations. // Pipe B can handle integer alu, control flow, integer multiply and divide, // and floating point computation. -// Pipe V can handle the V extension. +// The V pipeline is modeled by the VCQ, VA, VL, and VS resources. let SchedModel = SiFive7Model in { let BufferSize = 0 in { def SiFive7PipeA : ProcResource<1>; def SiFive7PipeB : ProcResource<1>; -def SiFive7PipeV : ProcResource<1>; -} - -let BufferSize = 1 in { -def SiFive7IDiv : ProcResource<1> { let Super = SiFive7PipeB; } // Int Division -def SiFive7FDiv : ProcResource<1> { let Super = SiFive7PipeB; } // FP Division/Sqrt -def SiFive7VA : ProcResource<1> { let Super = SiFive7PipeV; } // Arithmetic sequencer -def SiFive7VL : ProcResource<1> { let Super = SiFive7PipeV; } // Load sequencer -def SiFive7VS : ProcResource<1> { let Super = SiFive7PipeV; } // Store sequencer +def SiFive7IDiv : ProcResource<1>; // Int Division +def SiFive7FDiv : ProcResource<1>; // FP Division/Sqrt +def SiFive7VA : ProcResource<1>; // Arithmetic sequencer +def SiFive7VL : ProcResource<1>; // Load sequencer +def SiFive7VS : ProcResource<1>; // Store sequencer +// The VCQ accepts instructions from the the A Pipe and holds them until the +// vector unit is ready to dequeue them. The unit dequeues up to one instruction +// per cycle, in order, as soon as the sequencer for that type of instruction is +// avaliable. This resource is meant to be used for 1 cycle by all vector +// instructions, to model that only one vector instruction may be dequed at a +// time. The actual dequeueing into the sequencer is modeled by the VA, VL, and +// VS sequencer resources below. Each of them will only accept a single +// instruction at a time and remain busy for the number of cycles associated +// with that instruction. +def SiFive7VCQ : ProcResource<1>; // Vector Command Queue } def SiFive7PipeAB : ProcResGroup<[SiFive7PipeA, SiFive7PipeB]>; @@ -256,11 +264,11 @@ def : WriteRes<WriteIMul32, [SiFive7PipeB]>; // Integer division def : WriteRes<WriteIDiv, [SiFive7PipeB, SiFive7IDiv]> { let Latency = 66; - let ResourceCycles = [1, 65]; + let ReleaseAtCycles = [1, 65]; } def : WriteRes<WriteIDiv32, [SiFive7PipeB, SiFive7IDiv]> { let Latency = 34; - let ResourceCycles = [1, 33]; + let ReleaseAtCycles = [1, 33]; } // Bitmanip @@ -292,6 +300,16 @@ def : WriteRes<WriteSHXADD, [SiFive7PipeB]>; def : WriteRes<WriteSHXADD32, [SiFive7PipeB]>; } +// Single-bit instructions +// BEXT[I] instruction is available on all ALUs and the other instructions +// are only available on the SiFive7B pipe. +let Latency = 3 in { +def : WriteRes<WriteSingleBit, [SiFive7PipeB]>; +def : WriteRes<WriteSingleBitImm, [SiFive7PipeB]>; +def : WriteRes<WriteBEXT, [SiFive7PipeAB]>; +def : WriteRes<WriteBEXTI, [SiFive7PipeAB]>; +} + // Memory def : WriteRes<WriteSTB, [SiFive7PipeA]>; def : WriteRes<WriteSTH, [SiFive7PipeA]>; @@ -336,7 +354,7 @@ def : WriteRes<WriteFSGNJ16, [SiFive7PipeB]>; def : WriteRes<WriteFMinMax16, [SiFive7PipeB]>; } -let Latency = 14, ResourceCycles = [1, 13] in { +let Latency = 14, ReleaseAtCycles = [1, 13] in { def : WriteRes<WriteFDiv16, [SiFive7PipeB, SiFive7FDiv]>; def : WriteRes<WriteFSqrt16, [SiFive7PipeB, SiFive7FDiv]>; } @@ -353,9 +371,9 @@ def : WriteRes<WriteFMinMax32, [SiFive7PipeB]>; } def : WriteRes<WriteFDiv32, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 27; - let ResourceCycles = [1, 26]; } + let ReleaseAtCycles = [1, 26]; } def : WriteRes<WriteFSqrt32, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 27; - let ResourceCycles = [1, 26]; } + let ReleaseAtCycles = [1, 26]; } // Double precision let Latency = 7 in { @@ -369,9 +387,9 @@ def : WriteRes<WriteFMinMax64, [SiFive7PipeB]>; } def : WriteRes<WriteFDiv64, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 56; - let ResourceCycles = [1, 55]; } + let ReleaseAtCycles = [1, 55]; } def : WriteRes<WriteFSqrt64, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 56; - let ResourceCycles = [1, 55]; } + let ReleaseAtCycles = [1, 55]; } // Conversions let Latency = 3 in { @@ -421,21 +439,21 @@ def : WriteRes<WriteVSETVL, [SiFive7PipeA]>; foreach mx = SchedMxList in { defvar Cycles = SiFive7GetCyclesDefault<mx>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; - let Latency = 4, ResourceCycles = [Cycles] in { - defm "" : LMULWriteResMX<"WriteVLDE", [SiFive7VL], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVLDFF", [SiFive7VL], mx, IsWorstCase>; + let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULWriteResMX<"WriteVLDE", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDFF", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; } - let Latency = 1, ResourceCycles = [Cycles] in - defm "" : LMULWriteResMX<"WriteVSTE", [SiFive7VS], mx, IsWorstCase>; + let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in + defm "" : LMULWriteResMX<"WriteVSTE", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; } foreach mx = SchedMxList in { defvar Cycles = SiFive7GetMaskLoadStoreCycles<mx>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; - let Latency = 4, ResourceCycles = [Cycles] in - defm "" : LMULWriteResMX<"WriteVLDM", [SiFive7VL], mx, IsWorstCase>; - let Latency = 1, ResourceCycles = [Cycles] in - defm "" : LMULWriteResMX<"WriteVSTM", [SiFive7VS], mx, IsWorstCase>; + let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in + defm "" : LMULWriteResMX<"WriteVLDM", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; + let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in + defm "" : LMULWriteResMX<"WriteVSTM", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; } // Strided loads and stores operate at one element per cycle and should be @@ -445,81 +463,101 @@ foreach mx = SchedMxList in { // specific suffixes, but since SEW is already encoded in the name of the // resource, we do not need to use LMULSEWXXX constructors. However, we do // use the SEW from the name to determine the number of Cycles. + +// This predicate is true when the rs2 operand of vlse or vsse is x0, false +// otherwise. +def VLDSX0Pred : MCSchedPredicate<CheckRegOperand<3, X0>>; + foreach mx = SchedMxList in { + defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c; defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 8>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; - let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in { - defm "" : LMULWriteResMX<"WriteVLDS8", [SiFive7VL], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVLDUX8", [SiFive7VL], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVLDOX8", [SiFive7VL], mx, IsWorstCase>; + defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS8", VLDSX0Pred, [SiFive7VCQ, SiFive7VL], + 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles), + [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>; + let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULWriteResMX<"WriteVLDUX8", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDOX8", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; } - let Latency = 1, ResourceCycles = [Cycles] in { - defm "" : LMULWriteResMX<"WriteVSTS8", [SiFive7VS], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSTUX8", [SiFive7VS], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSTOX8", [SiFive7VS], mx, IsWorstCase>; + let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULWriteResMX<"WriteVSTS8", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTUX8", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTOX8", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; } } -foreach mx = SchedMxList in { +// TODO: The MxLists need to be filtered by EEW. We only need to support +// LMUL >= SEW_min/ELEN. Here, the smallest EEW prevents us from having MF8 +// since LMUL >= 16/64. +foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in { + defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c; defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 16>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; - let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in { - defm "" : LMULWriteResMX<"WriteVLDS16", [SiFive7VL], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVLDUX16", [SiFive7VL], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVLDOX16", [SiFive7VL], mx, IsWorstCase>; + defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS16", VLDSX0Pred, [SiFive7VCQ, SiFive7VL], + 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles), + [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>; + let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULWriteResMX<"WriteVLDUX16", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDOX16", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; } - let Latency = 1, ResourceCycles = [Cycles] in { - defm "" : LMULWriteResMX<"WriteVSTS16", [SiFive7VS], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSTUX16", [SiFive7VS], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSTOX16", [SiFive7VS], mx, IsWorstCase>; + let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULWriteResMX<"WriteVSTS16", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTUX16", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTOX16", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; } } -foreach mx = SchedMxList in { +foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in { + defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c; defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 32>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; - let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in { - defm "" : LMULWriteResMX<"WriteVLDS32", [SiFive7VL], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVLDUX32", [SiFive7VL], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVLDOX32", [SiFive7VL], mx, IsWorstCase>; + defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS32", VLDSX0Pred, [SiFive7VCQ, SiFive7VL], + 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles), + [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>; + let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULWriteResMX<"WriteVLDUX32", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDOX32", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; } - let Latency = 1, ResourceCycles = [Cycles] in { - defm "" : LMULWriteResMX<"WriteVSTS32", [SiFive7VS], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSTUX32", [SiFive7VS], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSTOX32", [SiFive7VS], mx, IsWorstCase>; + let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULWriteResMX<"WriteVSTS32", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTUX32", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTOX32", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; } } -foreach mx = SchedMxList in { +foreach mx = ["M1", "M2", "M4", "M8"] in { + defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c; defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 64>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; - let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in { - defm "" : LMULWriteResMX<"WriteVLDS64", [SiFive7VL], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVLDUX64", [SiFive7VL], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVLDOX64", [SiFive7VL], mx, IsWorstCase>; + defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS64", VLDSX0Pred, [SiFive7VCQ, SiFive7VL], + 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles), + [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>; + let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULWriteResMX<"WriteVLDUX64", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDOX64", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; } - let Latency = 1, ResourceCycles = [Cycles] in { - defm "" : LMULWriteResMX<"WriteVSTS64", [SiFive7VS], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSTUX64", [SiFive7VS], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSTOX64", [SiFive7VS], mx, IsWorstCase>; + let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULWriteResMX<"WriteVSTS64", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTUX64", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTOX64", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; } } // VLD*R is LMUL aware -let Latency = 4, ResourceCycles = [2] in - def : WriteRes<WriteVLD1R, [SiFive7VL]>; -let Latency = 4, ResourceCycles = [4] in - def : WriteRes<WriteVLD2R, [SiFive7VL]>; -let Latency = 4, ResourceCycles = [8] in - def : WriteRes<WriteVLD4R, [SiFive7VL]>; -let Latency = 4, ResourceCycles = [16] in - def : WriteRes<WriteVLD8R, [SiFive7VL]>; +let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in + def : WriteRes<WriteVLD1R, [SiFive7VCQ, SiFive7VL]>; +let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in + def : WriteRes<WriteVLD2R, [SiFive7VCQ, SiFive7VL]>; +let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in + def : WriteRes<WriteVLD4R, [SiFive7VCQ, SiFive7VL]>; +let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in + def : WriteRes<WriteVLD8R, [SiFive7VCQ, SiFive7VL]>; // VST*R is LMUL aware -let Latency = 1, ResourceCycles = [2] in - def : WriteRes<WriteVST1R, [SiFive7VS]>; -let Latency = 1, ResourceCycles = [4] in - def : WriteRes<WriteVST2R, [SiFive7VS]>; -let Latency = 1, ResourceCycles = [8] in - def : WriteRes<WriteVST4R, [SiFive7VS]>; -let Latency = 1, ResourceCycles = [16] in - def : WriteRes<WriteVST8R, [SiFive7VS]>; +let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in + def : WriteRes<WriteVST1R, [SiFive7VCQ, SiFive7VS]>; +let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in + def : WriteRes<WriteVST2R, [SiFive7VCQ, SiFive7VS]>; +let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in + def : WriteRes<WriteVST4R, [SiFive7VCQ, SiFive7VS]>; +let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in + def : WriteRes<WriteVST8R, [SiFive7VCQ, SiFive7VS]>; // Segmented Loads and Stores // Unit-stride segmented loads and stores are effectively converted into strided @@ -532,22 +570,22 @@ foreach mx = SchedMxList in { defvar Cycles = SiFive7GetCyclesSegmentedSeg2<mx>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; // Does not chain so set latency high - let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in { - defm "" : LMULWriteResMX<"WriteVLSEG2e" # eew, [SiFive7VL], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVLSEGFF2e" # eew, [SiFive7VL], mx, IsWorstCase>; + let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULWriteResMX<"WriteVLSEG2e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLSEGFF2e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; } - let Latency = 1, ResourceCycles = [Cycles] in - defm "" : LMULWriteResMX<"WriteVSSEG2e" # eew, [SiFive7VS], mx, IsWorstCase>; + let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in + defm "" : LMULWriteResMX<"WriteVSSEG2e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; foreach nf=3-8 in { defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; // Does not chain so set latency high - let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in { - defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" # eew, [SiFive7VL], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" # eew, [SiFive7VL], mx, IsWorstCase>; + let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; } - let Latency = 1, ResourceCycles = [Cycles] in - defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" # eew, [SiFive7VS], mx, IsWorstCase>; + let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in + defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; } } } @@ -557,15 +595,15 @@ foreach mx = SchedMxList in { defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; // Does not chain so set latency high - let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in { - defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" # eew, [SiFive7VL], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" # eew, [SiFive7VL], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" # eew, [SiFive7VL], mx, IsWorstCase>; + let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; } - let Latency = 1, ResourceCycles = [Cycles] in { - defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" # eew, [SiFive7VS], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" # eew, [SiFive7VS], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" # eew, [SiFive7VS], mx, IsWorstCase>; + let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; } } } @@ -575,41 +613,41 @@ foreach mx = SchedMxList in { foreach mx = SchedMxList in { defvar Cycles = SiFive7GetCyclesDefault<mx>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; - let Latency = 4, ResourceCycles = [Cycles] in { - defm "" : LMULWriteResMX<"WriteVIALUV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIALUX", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIALUI", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVICALUV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVICALUX", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVICALUI", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVShiftV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVShiftX", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVShiftI", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIMulV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIMulX", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIMulAddV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIMulAddX", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIMergeV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIMergeX", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIMergeI", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIMovV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIMovX", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIMovI", [SiFive7VA], mx, IsWorstCase>; + let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULWriteResMX<"WriteVIALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVShiftV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVShiftX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVShiftI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMulX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMulAddX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMergeV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMergeX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMergeI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMovV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMovX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMovI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; } // Mask results can't chain. - let Latency = !add(Cycles, 3), ResourceCycles = [Cycles] in { - defm "" : LMULWriteResMX<"WriteVICmpV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVICmpX", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVICmpI", [SiFive7VA], mx, IsWorstCase>; + let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULWriteResMX<"WriteVICmpV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICmpX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICmpI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; } } foreach mx = SchedMxList in { defvar Cycles = SiFive7GetCyclesDefault<mx>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; - let Latency = 4, ResourceCycles = [Cycles] in { - defm "" : LMULWriteResMX<"WriteVExtV", [SiFive7VA], mx, IsWorstCase>; + let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULWriteResMX<"WriteVExtV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; } } foreach mx = SchedMxList in { @@ -617,9 +655,9 @@ foreach mx = SchedMxList in { defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c, !div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4)); defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c; - let Latency = Cycles, ResourceCycles = [Cycles] in { - defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SiFive7VA], mx, sew, IsWorstCase>; - defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SiFive7VA], mx, sew, IsWorstCase>; + let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; } } } @@ -628,24 +666,24 @@ foreach mx = SchedMxList in { foreach mx = SchedMxListW in { defvar Cycles = SiFive7GetCyclesDefault<mx>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c; - let Latency = 8, ResourceCycles = [Cycles] in { - defm "" : LMULWriteResMX<"WriteVIWALUV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIWALUX", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIWALUI", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIWMulV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIWMulX", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIWMulAddV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIWMulAddX", [SiFive7VA], mx, IsWorstCase>; + let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULWriteResMX<"WriteVIWALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIWALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIWALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIWMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIWMulX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIWMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIWMulAddX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; } } // Narrowing foreach mx = SchedMxListW in { defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c; - let Latency = 8, ResourceCycles = [Cycles] in { - defm "" : LMULWriteResMX<"WriteVNShiftV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVNShiftX", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVNShiftI", [SiFive7VA], mx, IsWorstCase>; + let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULWriteResMX<"WriteVNShiftV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVNShiftX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVNShiftI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; } } @@ -653,27 +691,27 @@ foreach mx = SchedMxListW in { foreach mx = SchedMxList in { defvar Cycles = SiFive7GetCyclesDefault<mx>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; - let Latency = 8, ResourceCycles = [Cycles] in { - defm "" : LMULWriteResMX<"WriteVSALUV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSALUX", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSALUI", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVAALUV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVAALUX", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSMulV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSMulX", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSShiftV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSShiftX", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSShiftI", [SiFive7VA], mx, IsWorstCase>; + let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULWriteResMX<"WriteVSALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVAALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVAALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSMulX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSShiftV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSShiftX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSShiftI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; } } // Narrowing foreach mx = SchedMxListW in { defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c; - let Latency = 8, ResourceCycles = [Cycles] in { - defm "" : LMULWriteResMX<"WriteVNClipV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVNClipX", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVNClipI", [SiFive7VA], mx, IsWorstCase>; + let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULWriteResMX<"WriteVNClipV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVNClipX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVNClipI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; } } @@ -681,30 +719,30 @@ foreach mx = SchedMxListW in { foreach mx = SchedMxList in { defvar Cycles = SiFive7GetCyclesDefault<mx>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; - let Latency = 8, ResourceCycles = [Cycles] in { - defm "" : LMULWriteResMX<"WriteVFALUV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFALUF", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFMulV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFMulF", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFMulAddV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFMulAddF", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFRecpV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFCvtIToFV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SiFive7VA], mx, IsWorstCase>; + let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULWriteResMX<"WriteVFALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFALUF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFMulF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFMulAddF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFRecpV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; } - let Latency = 4, ResourceCycles = [Cycles] in { - defm "" : LMULWriteResMX<"WriteVFSgnjV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFSgnjF", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFMinMaxV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFMinMaxF", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFClassV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFMergeV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFMovV", [SiFive7VA], mx, IsWorstCase>; + let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULWriteResMX<"WriteVFSgnjV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFSgnjF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFMinMaxV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFMinMaxF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFClassV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFMergeV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFMovV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; } // Mask results can't chain. - let Latency = !add(Cycles, 3), ResourceCycles = [Cycles] in { - defm "" : LMULWriteResMX<"WriteVFCmpV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFCmpF", [SiFive7VA], mx, IsWorstCase>; + let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULWriteResMX<"WriteVFCmpV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFCmpF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; } } foreach mx = SchedMxListF in { @@ -712,10 +750,10 @@ foreach mx = SchedMxListF in { defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c, !div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4)); defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c; - let Latency = Cycles, ResourceCycles = [Cycles] in { - defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SiFive7VA], mx, sew, IsWorstCase>; - defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV", [SiFive7VA], mx, sew, IsWorstCase>; - defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF", [SiFive7VA], mx, sew, IsWorstCase>; + let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; } } } @@ -724,38 +762,38 @@ foreach mx = SchedMxListF in { foreach mx = SchedMxListW in { defvar Cycles = SiFive7GetCyclesDefault<mx>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c; - let Latency = 8, ResourceCycles = [Cycles] in { - defm "" : LMULWriteResMX<"WriteVFWCvtIToFV", [SiFive7VA], mx, IsWorstCase>; + let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULWriteResMX<"WriteVFWCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; } } foreach mx = SchedMxListFW in { defvar Cycles = SiFive7GetCyclesDefault<mx>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c; - let Latency = 8, ResourceCycles = [Cycles] in { - defm "" : LMULWriteResMX<"WriteVFWALUV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFWMulV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFWMulAddV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFWCvtFToFV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFWMulAddF", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFWMulF", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFWALUF", [SiFive7VA], mx, IsWorstCase>; + let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULWriteResMX<"WriteVFWALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFWMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFWMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFWCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFWMulAddF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFWMulF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFWALUF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; } } // Narrowing foreach mx = SchedMxListW in { defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c; - let Latency = 8, ResourceCycles = [Cycles] in { - defm "" : LMULWriteResMX<"WriteVFNCvtFToIV", [SiFive7VA], mx, IsWorstCase>; + let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULWriteResMX<"WriteVFNCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; } } foreach mx = SchedMxListFW in { defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c; - let Latency = 8, ResourceCycles = [Cycles] in { - defm "" : LMULWriteResMX<"WriteVFNCvtIToFV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFNCvtFToFV", [SiFive7VA], mx, IsWorstCase>; + let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULWriteResMX<"WriteVFNCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFNCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; } } @@ -764,11 +802,12 @@ foreach mx = SchedMxList in { foreach sew = SchedSEWSet<mx>.val in { defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c; defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c; - let Latency = Cycles, ResourceCycles = [Cycles] in - defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SiFive7VA], - mx, sew, IsWorstCase>; - defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SiFive7VA], - mx, sew, IsWorstCase>; + let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SiFive7VCQ, SiFive7VA], + mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SiFive7VCQ, SiFive7VA], + mx, sew, IsWorstCase>; + } } } @@ -776,8 +815,8 @@ foreach mx = SchedMxListWRed in { foreach sew = SchedSEWSet<mx, 0, 1>.val in { defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c; defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListWRed>.c; - let Latency = Cycles, ResourceCycles = [Cycles] in - defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SiFive7VA], + let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in + defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; } } @@ -786,15 +825,15 @@ foreach mx = SchedMxListF in { foreach sew = SchedSEWSet<mx, 1>.val in { defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c; defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c; - let Latency = RedCycles, ResourceCycles = [RedCycles] in { - defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SiFive7VA], + let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, RedCycles)] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; - defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SiFive7VA], + defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; } defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c; - let Latency = OrdRedCycles, ResourceCycles = [OrdRedCycles] in - defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SiFive7VA], + let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, OrdRedCycles)] in + defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; } } @@ -803,12 +842,12 @@ foreach mx = SchedMxListFWRed in { foreach sew = SchedSEWSet<mx, 1, 1>.val in { defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c; defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFWRed, 1>.c; - let Latency = RedCycles, ResourceCycles = [RedCycles] in - defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SiFive7VA], + let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, RedCycles)] in + defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c; - let Latency = OrdRedCycles, ResourceCycles = [OrdRedCycles] in - defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SiFive7VA], + let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, OrdRedCycles)] in + defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; } } @@ -817,35 +856,35 @@ foreach mx = SchedMxListFWRed in { foreach mx = SchedMxList in { defvar Cycles = SiFive7GetCyclesVMask<mx>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; - let Latency = 4, ResourceCycles = [Cycles] in { - defm "" : LMULWriteResMX<"WriteVMALUV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVMPopV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVMFFSV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVMSFSV", [SiFive7VA], mx, IsWorstCase>; + let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULWriteResMX<"WriteVMALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVMPopV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVMFFSV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVMSFSV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; } } foreach mx = SchedMxList in { defvar Cycles = SiFive7GetCyclesDefault<mx>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; - let Latency = 4, ResourceCycles = [Cycles] in { - defm "" : LMULWriteResMX<"WriteVMIotV", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVMIdxV", [SiFive7VA], mx, IsWorstCase>; + let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULWriteResMX<"WriteVMIotV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVMIdxV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; } } // 16. Vector Permutation Instructions -let Latency = 4, ResourceCycles = [1] in { - def : WriteRes<WriteVIMovVX, [SiFive7VA]>; - def : WriteRes<WriteVIMovXV, [SiFive7VA]>; - def : WriteRes<WriteVFMovVF, [SiFive7VA]>; - def : WriteRes<WriteVFMovFV, [SiFive7VA]>; +let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 1)] in { + def : WriteRes<WriteVIMovVX, [SiFive7VCQ, SiFive7VA]>; + def : WriteRes<WriteVIMovXV, [SiFive7VCQ, SiFive7VA]>; + def : WriteRes<WriteVFMovVF, [SiFive7VCQ, SiFive7VA]>; + def : WriteRes<WriteVFMovFV, [SiFive7VCQ, SiFive7VA]>; } foreach mx = SchedMxList in { defvar Cycles = SiFive7GetCyclesDefault<mx>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; - let Latency = 8, ResourceCycles = [Cycles] in { - defm "" : LMULWriteResMX<"WriteVRGatherVX", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVRGatherVI", [SiFive7VA], mx, IsWorstCase>; + let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULWriteResMX<"WriteVRGatherVX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVRGatherVI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; } } @@ -853,9 +892,9 @@ foreach mx = SchedMxList in { foreach sew = SchedSEWSet<mx>.val in { defvar Cycles = SiFive7GetCyclesOnePerElement<mx, sew>.c; defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c; - let Latency = !add(Cycles, 3), ResourceCycles = [Cycles] in { - defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SiFive7VA], mx, sew, IsWorstCase>; - defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SiFive7VA], mx, sew, IsWorstCase>; + let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; } } } @@ -863,23 +902,23 @@ foreach mx = SchedMxList in { foreach mx = SchedMxList in { defvar Cycles = SiFive7GetCyclesDefault<mx>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; - let Latency = 4, ResourceCycles = [Cycles] in { - defm "" : LMULWriteResMX<"WriteVISlideX", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVISlideI", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVISlide1X", [SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFSlide1F", [SiFive7VA], mx, IsWorstCase>; + let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULWriteResMX<"WriteVISlideX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVISlideI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVISlide1X", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFSlide1F", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; } } // VMov*V is LMUL Aware -let Latency = 4, ResourceCycles = [2] in - def : WriteRes<WriteVMov1V, [SiFive7VA]>; -let Latency = 4, ResourceCycles = [4] in - def : WriteRes<WriteVMov2V, [SiFive7VA]>; -let Latency = 4, ResourceCycles = [8] in - def : WriteRes<WriteVMov4V, [SiFive7VA]>; -let Latency = 4, ResourceCycles = [16] in - def : WriteRes<WriteVMov8V, [SiFive7VA]>; +let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in + def : WriteRes<WriteVMov1V, [SiFive7VCQ, SiFive7VA]>; +let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in + def : WriteRes<WriteVMov2V, [SiFive7VCQ, SiFive7VA]>; +let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in + def : WriteRes<WriteVMov4V, [SiFive7VCQ, SiFive7VA]>; +let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in + def : WriteRes<WriteVMov8V, [SiFive7VCQ, SiFive7VA]>; // Others def : WriteRes<WriteCSR, [SiFive7PipeB]>; @@ -922,10 +961,13 @@ def : ReadAdvance<ReadFAdd32, 0>; def : ReadAdvance<ReadFAdd64, 0>; def : ReadAdvance<ReadFMul16, 0>; def : ReadAdvance<ReadFMA16, 0>; +def : ReadAdvance<ReadFMA16Addend, 0>; def : ReadAdvance<ReadFMul32, 0>; def : ReadAdvance<ReadFMul64, 0>; def : ReadAdvance<ReadFMA32, 0>; +def : ReadAdvance<ReadFMA32Addend, 0>; def : ReadAdvance<ReadFMA64, 0>; +def : ReadAdvance<ReadFMA64Addend, 0>; def : ReadAdvance<ReadFDiv16, 0>; def : ReadAdvance<ReadFDiv32, 0>; def : ReadAdvance<ReadFDiv64, 0>; @@ -987,6 +1029,9 @@ def : SiFive7AnyToGPRBypass<ReadORCB>; def : SiFive7AnyToGPRBypass<ReadREV8>; def : SiFive7AnyToGPRBypass<ReadSHXADD>; def : SiFive7AnyToGPRBypass<ReadSHXADD32>; +// Single-bit instructions +def : SiFive7AnyToGPRBypass<ReadSingleBit>; +def : SiFive7AnyToGPRBypass<ReadSingleBitImm>; // 6. Configuration-Setting Instructions def : ReadAdvance<ReadVSETVLI, 2>; @@ -1154,11 +1199,16 @@ def : ReadAdvance<ReadVMov8V, 0>; // Others def : ReadAdvance<ReadVMask, 0>; +def : ReadAdvance<ReadVMergeOp_WorstCase, 0>; +foreach mx = SchedMxList in { + def : ReadAdvance<!cast<SchedRead>("ReadVMergeOp_" # mx), 0>; + foreach sew = SchedSEWSet<mx>.val in + def : ReadAdvance<!cast<SchedRead>("ReadVMergeOp_" # mx # "_E" # sew), 0>; +} //===----------------------------------------------------------------------===// // Unsupported extensions defm : UnsupportedSchedZbc; -defm : UnsupportedSchedZbs; defm : UnsupportedSchedZbkb; defm : UnsupportedSchedZbkx; defm : UnsupportedSchedZfa; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td index 41eefa0c67d9..06ad2075b073 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td @@ -55,14 +55,14 @@ def : WriteRes<WriteIMul, [SCR1_MUL]>; def : WriteRes<WriteIMul32, [SCR1_MUL]>; // Integer division: latency 33, inverse throughput 33 -let Latency = 33, ResourceCycles = [33] in { +let Latency = 33, ReleaseAtCycles = [33] in { def : WriteRes<WriteIDiv32, [SCR1_DIV]>; def : WriteRes<WriteIDiv, [SCR1_DIV]>; } // Load/store instructions on SCR1 have latency 2 and inverse throughput 2 // (SCR1_CFG_RV32IMC_MAX includes TCM) -let Latency = 2, ResourceCycles=[2] in { +let Latency = 2, ReleaseAtCycles=[2] in { // Memory def : WriteRes<WriteSTB, [SCR1_LSU]>; def : WriteRes<WriteSTH, [SCR1_LSU]>; @@ -164,7 +164,9 @@ def : ReadAdvance<ReadFAdd64, 0>; def : ReadAdvance<ReadFMul32, 0>; def : ReadAdvance<ReadFMul64, 0>; def : ReadAdvance<ReadFMA32, 0>; +def : ReadAdvance<ReadFMA32Addend, 0>; def : ReadAdvance<ReadFMA64, 0>; +def : ReadAdvance<ReadFMA64Addend, 0>; def : ReadAdvance<ReadFDiv32, 0>; def : ReadAdvance<ReadFDiv64, 0>; def : ReadAdvance<ReadFSqrt32, 0>; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedule.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedule.td index af318ea5bf68..f6c1b096ad90 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedule.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedule.td @@ -150,8 +150,11 @@ def ReadFMul16 : SchedRead; // 16-bit floating point multiply def ReadFMul32 : SchedRead; // 32-bit floating point multiply def ReadFMul64 : SchedRead; // 64-bit floating point multiply def ReadFMA16 : SchedRead; // 16-bit floating point fused multiply-add +def ReadFMA16Addend : SchedRead; // 16-bit floating point fused multiply-add (addend) def ReadFMA32 : SchedRead; // 32-bit floating point fused multiply-add +def ReadFMA32Addend : SchedRead; // 32-bit floating point fused multiply-add (addend) def ReadFMA64 : SchedRead; // 64-bit floating point fused multiply-add +def ReadFMA64Addend : SchedRead; // 64-bit floating point fused multiply-add (addend) def ReadFDiv16 : SchedRead; // 16-bit floating point divide def ReadFDiv32 : SchedRead; // 32-bit floating point divide def ReadFDiv64 : SchedRead; // 64-bit floating point divide diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVScheduleV.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVScheduleV.td index 676383c5a636..29f2ceec25fa 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVScheduleV.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVScheduleV.td @@ -62,6 +62,52 @@ multiclass LMULSEWWriteResMXSEW<string name, list<ProcResourceKind> resources, def : WriteRes<!cast<SchedWrite>(name # "_WorstCase"), resources>; } +// Define a SchedAlias for the SchedWrite associated with (name, mx) whose +// behavior is aliased to a Variant. The Variant has Latency predLad and +// ReleaseAtCycles predCycles if the SchedPredicate Pred is true, otherwise has +// Latency noPredLat and ReleaseAtCycles noPredCycles. The WorstCase SchedWrite +// is created similiarly if IsWorstCase is true. +multiclass LMULWriteResMXVariant<string name, SchedPredicateBase Pred, + list<ProcResourceKind> resources, + int predLat, list<int> predAcquireCycles, + list<int> predReleaseCycles, int noPredLat, + list<int> noPredAcquireCycles, + list<int> noPredReleaseCycles, + string mx, bit IsWorstCase> { + defvar nameMX = name # "_" # mx; + + // Define the different behaviors + def nameMX # "_Pred" : SchedWriteRes<resources>{ + let Latency = predLat; + let AcquireAtCycles = predAcquireCycles; + let ReleaseAtCycles = predReleaseCycles; + } + def nameMX # "_NoPred" : SchedWriteRes<resources> { + let Latency = noPredLat; + let AcquireAtCycles = noPredAcquireCycles; + let ReleaseAtCycles = noPredReleaseCycles; + } + + // Tie behavior to predicate + def NAME # nameMX # "_Variant" : SchedWriteVariant<[ + SchedVar<Pred, [!cast<SchedWriteRes>(NAME # nameMX # "_Pred")]>, + SchedVar<NoSchedPred, [!cast<SchedWriteRes>(NAME # nameMX # "_NoPred")]> + ]>; + def : SchedAlias< + !cast<SchedReadWrite>(nameMX), + !cast<SchedReadWrite>(NAME # nameMX # "_Variant")>; + + if IsWorstCase then { + def NAME # name # "_WorstCase_Variant" : SchedWriteVariant<[ + SchedVar<Pred, [!cast<SchedWriteRes>(NAME # nameMX # "_Pred")]>, + SchedVar<NoSchedPred, [!cast<SchedWriteRes>(NAME # nameMX # "_NoPred")]> + ]>; + def : SchedAlias< + !cast<SchedReadWrite>(name # "_WorstCase"), + !cast<SchedReadWrite>(NAME # name # "_WorstCase_Variant")>; + } +} + // Define multiclasses to define SchedWrite, SchedRead, WriteRes, and // ReadAdvance for each (name, LMUL) pair and for each LMUL in each of the // SchedMxList variants above. Each multiclass is responsible for defining @@ -687,6 +733,12 @@ def ReadVMov8V : SchedRead; // Others def ReadVMask : SchedRead; +def ReadVMergeOp_WorstCase : SchedRead; +foreach mx = SchedMxList in { + def ReadVMergeOp_ # mx : SchedRead; + foreach sew = SchedSEWSet<mx>.val in + def ReadVMergeOp_ # mx # "_E" # sew : SchedRead; +} //===----------------------------------------------------------------------===// /// Define default scheduler resources for V. @@ -1050,6 +1102,12 @@ def : ReadAdvance<ReadVMov8V, 0>; // Others def : ReadAdvance<ReadVMask, 0>; +def : ReadAdvance<ReadVMergeOp_WorstCase, 0>; +foreach mx = SchedMxList in { + def : ReadAdvance<!cast<SchedRead>("ReadVMergeOp_" # mx), 0>; + foreach sew = SchedSEWSet<mx>.val in + def : ReadAdvance<!cast<SchedRead>("ReadVMergeOp_" # mx # "_E" # sew), 0>; +} } // Unsupported } // UnsupportedSchedV diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp index eec2e7359eda..7b64d3cee9c8 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp @@ -29,6 +29,12 @@ using namespace llvm; #define GET_SUBTARGETINFO_CTOR #include "RISCVGenSubtargetInfo.inc" +namespace llvm::RISCVTuneInfoTable { + +#define GET_RISCVTuneInfoTable_IMPL +#include "RISCVGenSearchableTables.inc" +} // namespace llvm::RISCVTuneInfoTable + static cl::opt<bool> EnableSubRegLiveness("riscv-enable-subreg-liveness", cl::init(true), cl::Hidden); @@ -48,6 +54,13 @@ static cl::opt<unsigned> RISCVMaxBuildIntsCost( cl::desc("The maximum cost used for building integers."), cl::init(0), cl::Hidden); +static cl::opt<bool> UseAA("riscv-use-aa", cl::init(true), + cl::desc("Enable the use of AA during codegen.")); + +static cl::opt<unsigned> RISCVMinimumJumpTableEntries( + "riscv-min-jump-table-entries", cl::Hidden, + cl::desc("Set minimum number of entries to use a jump table on RISCV")); + void RISCVSubtarget::anchor() {} RISCVSubtarget & @@ -62,12 +75,13 @@ RISCVSubtarget::initializeSubtargetDependencies(const Triple &TT, StringRef CPU, if (TuneCPU.empty()) TuneCPU = CPU; - ParseSubtargetFeatures(CPU, TuneCPU, FS); - if (Is64Bit) { - XLenVT = MVT::i64; - XLen = 64; - } + TuneInfo = RISCVTuneInfoTable::getRISCVTuneInfo(TuneCPU); + // If there is no TuneInfo for this CPU, we fail back to generic. + if (!TuneInfo) + TuneInfo = RISCVTuneInfoTable::getRISCVTuneInfo("generic"); + assert(TuneInfo && "TuneInfo shouldn't be nullptr!"); + ParseSubtargetFeatures(CPU, TuneCPU, FS); TargetABI = RISCVABI::computeTargetABI(TT, getFeatureBits(), ABIName); RISCVFeatures::validate(TT, getFeatureBits()); return *this; @@ -175,3 +189,13 @@ void RISCVSubtarget::getPostRAMutations( std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const { Mutations.push_back(createRISCVMacroFusionDAGMutation()); } + + /// Enable use of alias analysis during code generation (during MI + /// scheduling, DAGCombine, etc.). +bool RISCVSubtarget::useAA() const { return UseAA; } + +unsigned RISCVSubtarget::getMinimumJumpTableEntries() const { + return RISCVMinimumJumpTableEntries.getNumOccurrences() > 0 + ? RISCVMinimumJumpTableEntries + : TuneInfo->MinimumJumpTableEntries; +} diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h index a831beb7edd9..23d56cfa6e4e 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -25,6 +25,7 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" +#include <bitset> #define GET_SUBTARGETINFO_HEADER #include "RISCVGenSubtargetInfo.inc" @@ -32,13 +33,35 @@ namespace llvm { class StringRef; +namespace RISCVTuneInfoTable { + +struct RISCVTuneInfo { + const char *Name; + uint8_t PrefFunctionAlignment; + uint8_t PrefLoopAlignment; + + // Information needed by LoopDataPrefetch. + uint16_t CacheLineSize; + uint16_t PrefetchDistance; + uint16_t MinPrefetchStride; + unsigned MaxPrefetchIterationsAhead; + + unsigned MinimumJumpTableEntries; +}; + +#define GET_RISCVTuneInfoTable_DECL +#include "RISCVGenSearchableTables.inc" +} // namespace RISCVTuneInfoTable + class RISCVSubtarget : public RISCVGenSubtargetInfo { public: + // clang-format off enum RISCVProcFamilyEnum : uint8_t { Others, SiFive7, + VentanaVeyron, }; - + // clang-format on private: virtual void anchor(); @@ -48,16 +71,13 @@ private: bool ATTRIBUTE = DEFAULT; #include "RISCVGenSubtargetInfo.inc" - unsigned XLen = 32; unsigned ZvlLen = 0; - MVT XLenVT = MVT::i32; unsigned RVVVectorBitsMin; unsigned RVVVectorBitsMax; uint8_t MaxInterleaveFactor = 2; RISCVABI::ABI TargetABI = RISCVABI::ABI_Unknown; std::bitset<RISCV::NUM_TARGET_REGS> UserReservedRegister; - Align PrefFunctionAlignment; - Align PrefLoopAlignment; + const RISCVTuneInfoTable::RISCVTuneInfo *TuneInfo; RISCVFrameLowering FrameLowering; RISCVInstrInfo InstrInfo; @@ -98,8 +118,16 @@ public: } bool enableMachineScheduler() const override { return true; } - Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; } - Align getPrefLoopAlignment() const { return PrefLoopAlignment; } + bool enablePostRAScheduler() const override { + return getSchedModel().PostRAScheduler || UsePostRAScheduler; + } + + Align getPrefFunctionAlignment() const { + return Align(TuneInfo->PrefFunctionAlignment); + } + Align getPrefLoopAlignment() const { + return Align(TuneInfo->PrefLoopAlignment); + } /// Returns RISC-V processor family. /// Avoid this function! CPU specifics should be kept local to this class @@ -124,12 +152,15 @@ public: return hasStdExtZfhOrZfhmin() || hasStdExtZhinxOrZhinxmin(); } bool hasHalfFPLoadStoreMove() const { - return HasStdExtZfh || HasStdExtZfhmin || HasStdExtZfbfmin || - HasStdExtZvfbfwma; + return hasStdExtZfhOrZfhmin() || HasStdExtZfbfmin; } bool is64Bit() const { return IsRV64; } - MVT getXLenVT() const { return XLenVT; } - unsigned getXLen() const { return XLen; } + MVT getXLenVT() const { + return is64Bit() ? MVT::i64 : MVT::i32; + } + unsigned getXLen() const { + return is64Bit() ? 64 : 32; + } unsigned getFLen() const { if (HasStdExtD) return 64; @@ -139,7 +170,7 @@ public: return 0; } - unsigned getELEN() const { + unsigned getELen() const { assert(hasVInstructions() && "Expected V extension"); return hasVInstructionsI64() ? 64 : 32; } @@ -162,16 +193,21 @@ public: return UserReservedRegister[i]; } - bool hasMacroFusion() const { return hasLUIADDIFusion(); } + bool hasMacroFusion() const { + return hasLUIADDIFusion() || hasAUIPCADDIFusion() || + hasShiftedZExtFusion() || hasLDADDFusion(); + } // Vector codegen related methods. bool hasVInstructions() const { return HasStdExtZve32x; } bool hasVInstructionsI64() const { return HasStdExtZve64x; } + bool hasVInstructionsF16Minimal() const { + return HasStdExtZvfhmin || HasStdExtZvfh; + } bool hasVInstructionsF16() const { return HasStdExtZvfh; } - // FIXME: Consider Zfinx in the future - bool hasVInstructionsF32() const { return HasStdExtZve32f && HasStdExtF; } - // FIXME: Consider Zdinx in the future - bool hasVInstructionsF64() const { return HasStdExtZve64d && HasStdExtD; } + bool hasVInstructionsBF16() const { return HasStdExtZvfbfmin; } + bool hasVInstructionsF32() const { return HasStdExtZve32f; } + bool hasVInstructionsF64() const { return HasStdExtZve64d; } // F16 and F64 both require F32. bool hasVInstructionsAnyF() const { return hasVInstructionsF32(); } bool hasVInstructionsFullMultiply() const { return HasStdExtV; } @@ -222,6 +258,26 @@ public: void getPostRAMutations(std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const override; + + bool useAA() const override; + + unsigned getCacheLineSize() const override { + return TuneInfo->CacheLineSize; + }; + unsigned getPrefetchDistance() const override { + return TuneInfo->PrefetchDistance; + }; + unsigned getMinPrefetchStride(unsigned NumMemAccesses, + unsigned NumStridedMemAccesses, + unsigned NumPrefetches, + bool HasCall) const override { + return TuneInfo->MinPrefetchStride; + }; + unsigned getMaxPrefetchIterationsAhead() const override { + return TuneInfo->MaxPrefetchIterationsAhead; + }; + + unsigned getMinimumJumpTableEntries() const; }; } // End llvm namespace diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 59dac5c7b57d..3abdb6003659 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -27,6 +27,7 @@ #include "llvm/CodeGen/MIRParser/MIParser.h" #include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/InitializePasses.h" @@ -34,6 +35,7 @@ #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Scalar.h" #include <optional> using namespace llvm; @@ -71,21 +73,55 @@ static cl::opt<bool> EnableRISCVCopyPropagation( cl::desc("Enable the copy propagation with RISC-V copy instr"), cl::init(true), cl::Hidden); +static cl::opt<bool> EnableRISCVDeadRegisterElimination( + "riscv-enable-dead-defs", cl::Hidden, + cl::desc("Enable the pass that removes dead" + " definitons and replaces stores to" + " them with stores to x0"), + cl::init(true)); + +static cl::opt<bool> + EnableSinkFold("riscv-enable-sink-fold", + cl::desc("Enable sinking and folding of instruction copies"), + cl::init(false), cl::Hidden); + +static cl::opt<bool> + EnableLoopDataPrefetch("riscv-enable-loop-data-prefetch", cl::Hidden, + cl::desc("Enable the loop data prefetch pass"), + cl::init(true)); + +static cl::opt<bool> + EnableSplitRegAlloc("riscv-split-regalloc", cl::Hidden, + cl::desc("Enable Split RegisterAlloc for RVV"), + cl::init(true)); + +static cl::opt<bool> EnableMISchedLoadClustering( + "riscv-misched-load-clustering", cl::Hidden, + cl::desc("Enable load clustering in the machine scheduler"), + cl::init(false)); + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target()); RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target()); auto *PR = PassRegistry::getPassRegistry(); initializeGlobalISel(*PR); + initializeRISCVO0PreLegalizerCombinerPass(*PR); + initializeRISCVPreLegalizerCombinerPass(*PR); + initializeRISCVPostLegalizerCombinerPass(*PR); initializeKCFIPass(*PR); + initializeRISCVDeadRegisterDefinitionsPass(*PR); initializeRISCVMakeCompressibleOptPass(*PR); initializeRISCVGatherScatterLoweringPass(*PR); initializeRISCVCodeGenPreparePass(*PR); + initializeRISCVPostRAExpandPseudoPass(*PR); initializeRISCVMergeBaseOffsetOptPass(*PR); initializeRISCVOptWInstrsPass(*PR); initializeRISCVPreRAExpandPseudoPass(*PR); initializeRISCVExpandPseudoPass(*PR); + initializeRISCVFoldMasksPass(*PR); initializeRISCVInsertVSETVLIPass(*PR); initializeRISCVInsertReadWriteCSRPass(*PR); + initializeRISCVInsertWriteVXRMPass(*PR); initializeRISCVDAGToDAGISelPass(*PR); initializeRISCVInitUndefPass(*PR); initializeRISCVMoveMergePass(*PR); @@ -109,7 +145,7 @@ RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT, const TargetOptions &Options, std::optional<Reloc::Model> RM, std::optional<CodeModel::Model> CM, - CodeGenOpt::Level OL, bool JIT) + CodeGenOptLevel OL, bool JIT) : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, getEffectiveRelocModel(TT, RM), getEffectiveCodeModel(CM, CodeModel::Small), OL), @@ -177,13 +213,8 @@ RISCVTargetMachine::getSubtargetImpl(const Function &F) const { llvm::bit_floor((RVVBitsMax < 64 || RVVBitsMax > 65536) ? 0 : RVVBitsMax); SmallString<512> Key; - Key += "RVVMin"; - Key += std::to_string(RVVBitsMin); - Key += "RVVMax"; - Key += std::to_string(RVVBitsMax); - Key += CPU; - Key += TuneCPU; - Key += FS; + raw_svector_ostream(Key) << "RVVMin" << RVVBitsMin << "RVVMax" << RVVBitsMax + << CPU << TuneCPU << FS; auto &I = SubtargetMap[Key]; if (!I) { // This needs to be done before we create a new subtarget since any @@ -228,10 +259,84 @@ bool RISCVTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS, } namespace { + +class RVVRegisterRegAlloc : public RegisterRegAllocBase<RVVRegisterRegAlloc> { +public: + RVVRegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C) + : RegisterRegAllocBase(N, D, C) {} +}; + +static bool onlyAllocateRVVReg(const TargetRegisterInfo &TRI, + const TargetRegisterClass &RC) { + return RISCV::VRRegClass.hasSubClassEq(&RC) || + RISCV::VRM2RegClass.hasSubClassEq(&RC) || + RISCV::VRM4RegClass.hasSubClassEq(&RC) || + RISCV::VRM8RegClass.hasSubClassEq(&RC) || + RISCV::VRN2M1RegClass.hasSubClassEq(&RC) || + RISCV::VRN2M2RegClass.hasSubClassEq(&RC) || + RISCV::VRN2M4RegClass.hasSubClassEq(&RC) || + RISCV::VRN3M1RegClass.hasSubClassEq(&RC) || + RISCV::VRN3M2RegClass.hasSubClassEq(&RC) || + RISCV::VRN4M1RegClass.hasSubClassEq(&RC) || + RISCV::VRN4M2RegClass.hasSubClassEq(&RC) || + RISCV::VRN5M1RegClass.hasSubClassEq(&RC) || + RISCV::VRN6M1RegClass.hasSubClassEq(&RC) || + RISCV::VRN7M1RegClass.hasSubClassEq(&RC) || + RISCV::VRN8M1RegClass.hasSubClassEq(&RC); +} + +static FunctionPass *useDefaultRegisterAllocator() { return nullptr; } + +static llvm::once_flag InitializeDefaultRVVRegisterAllocatorFlag; + +/// -riscv-rvv-regalloc=<fast|basic|greedy> command line option. +/// This option could designate the rvv register allocator only. +/// For example: -riscv-rvv-regalloc=basic +static cl::opt<RVVRegisterRegAlloc::FunctionPassCtor, false, + RegisterPassParser<RVVRegisterRegAlloc>> + RVVRegAlloc("riscv-rvv-regalloc", cl::Hidden, + cl::init(&useDefaultRegisterAllocator), + cl::desc("Register allocator to use for RVV register.")); + +static void initializeDefaultRVVRegisterAllocatorOnce() { + RegisterRegAlloc::FunctionPassCtor Ctor = RVVRegisterRegAlloc::getDefault(); + + if (!Ctor) { + Ctor = RVVRegAlloc; + RVVRegisterRegAlloc::setDefault(RVVRegAlloc); + } +} + +static FunctionPass *createBasicRVVRegisterAllocator() { + return createBasicRegisterAllocator(onlyAllocateRVVReg); +} + +static FunctionPass *createGreedyRVVRegisterAllocator() { + return createGreedyRegisterAllocator(onlyAllocateRVVReg); +} + +static FunctionPass *createFastRVVRegisterAllocator() { + return createFastRegisterAllocator(onlyAllocateRVVReg, false); +} + +static RVVRegisterRegAlloc basicRegAllocRVVReg("basic", + "basic register allocator", + createBasicRVVRegisterAllocator); +static RVVRegisterRegAlloc + greedyRegAllocRVVReg("greedy", "greedy register allocator", + createGreedyRVVRegisterAllocator); + +static RVVRegisterRegAlloc fastRegAllocRVVReg("fast", "fast register allocator", + createFastRVVRegisterAllocator); + class RISCVPassConfig : public TargetPassConfig { public: RISCVPassConfig(RISCVTargetMachine &TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) {} + : TargetPassConfig(TM, PM) { + if (TM.getOptLevel() != CodeGenOptLevel::None) + substitutePass(&PostRASchedulerID, &PostMachineSchedulerID); + setEnableSinkAndFold(EnableSinkFold); + } RISCVTargetMachine &getRISCVTargetMachine() const { return getTM<RISCVTargetMachine>(); @@ -240,12 +345,16 @@ public: ScheduleDAGInstrs * createMachineScheduler(MachineSchedContext *C) const override { const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>(); + ScheduleDAGMILive *DAG = nullptr; + if (EnableMISchedLoadClustering) { + DAG = createGenericSchedLive(C); + DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); + } if (ST.hasMacroFusion()) { - ScheduleDAGMILive *DAG = createGenericSchedLive(C); + DAG = DAG ? DAG : createGenericSchedLive(C); DAG->addMutation(createRISCVMacroFusionDAGMutation()); - return DAG; } - return nullptr; + return DAG; } ScheduleDAGInstrs * @@ -263,16 +372,22 @@ public: bool addPreISel() override; bool addInstSelector() override; bool addIRTranslator() override; + void addPreLegalizeMachineIR() override; bool addLegalizeMachineIR() override; + void addPreRegBankSelect() override; bool addRegBankSelect() override; bool addGlobalInstructionSelect() override; void addPreEmitPass() override; void addPreEmitPass2() override; void addPreSched2() override; void addMachineSSAOptimization() override; + FunctionPass *createRVVRegAllocPass(bool Optimized); + bool addRegAssignAndRewriteFast() override; + bool addRegAssignAndRewriteOptimized() override; void addPreRegAlloc() override; void addPostRegAlloc() override; void addOptimizedRegAlloc() override; + void addFastRegAlloc() override; }; } // namespace @@ -280,10 +395,42 @@ TargetPassConfig *RISCVTargetMachine::createPassConfig(PassManagerBase &PM) { return new RISCVPassConfig(*this, PM); } +FunctionPass *RISCVPassConfig::createRVVRegAllocPass(bool Optimized) { + // Initialize the global default. + llvm::call_once(InitializeDefaultRVVRegisterAllocatorFlag, + initializeDefaultRVVRegisterAllocatorOnce); + + RegisterRegAlloc::FunctionPassCtor Ctor = RVVRegisterRegAlloc::getDefault(); + if (Ctor != useDefaultRegisterAllocator) + return Ctor(); + + if (Optimized) + return createGreedyRVVRegisterAllocator(); + + return createFastRVVRegisterAllocator(); +} + +bool RISCVPassConfig::addRegAssignAndRewriteFast() { + if (EnableSplitRegAlloc) + addPass(createRVVRegAllocPass(false)); + return TargetPassConfig::addRegAssignAndRewriteFast(); +} + +bool RISCVPassConfig::addRegAssignAndRewriteOptimized() { + if (EnableSplitRegAlloc) { + addPass(createRVVRegAllocPass(true)); + addPass(createVirtRegRewriter(false)); + } + return TargetPassConfig::addRegAssignAndRewriteOptimized(); +} + void RISCVPassConfig::addIRPasses() { addPass(createAtomicExpandPass()); - if (getOptLevel() != CodeGenOpt::None) { + if (getOptLevel() != CodeGenOptLevel::None) { + if (EnableLoopDataPrefetch) + addPass(createLoopDataPrefetchPass()); + addPass(createRISCVGatherScatterLoweringPass()); addPass(createInterleavedAccessPass()); addPass(createRISCVCodeGenPreparePass()); @@ -293,7 +440,7 @@ void RISCVPassConfig::addIRPasses() { } bool RISCVPassConfig::addPreISel() { - if (TM->getOptLevel() != CodeGenOpt::None) { + if (TM->getOptLevel() != CodeGenOptLevel::None) { // Add a barrier before instruction selection so that we will not get // deleted block address after enabling default outlining. See D99707 for // more details. @@ -320,11 +467,24 @@ bool RISCVPassConfig::addIRTranslator() { return false; } +void RISCVPassConfig::addPreLegalizeMachineIR() { + if (getOptLevel() == CodeGenOptLevel::None) { + addPass(createRISCVO0PreLegalizerCombiner()); + } else { + addPass(createRISCVPreLegalizerCombiner()); + } +} + bool RISCVPassConfig::addLegalizeMachineIR() { addPass(new Legalizer()); return false; } +void RISCVPassConfig::addPreRegBankSelect() { + if (getOptLevel() != CodeGenOptLevel::None) + addPass(createRISCVPostLegalizerCombiner()); +} + bool RISCVPassConfig::addRegBankSelect() { addPass(new RegBankSelect()); return false; @@ -336,6 +496,8 @@ bool RISCVPassConfig::addGlobalInstructionSelect() { } void RISCVPassConfig::addPreSched2() { + addPass(createRISCVPostRAExpandPseudoPass()); + // Emit KCFI checks for indirect calls. addPass(createKCFIPass()); } @@ -349,12 +511,13 @@ void RISCVPassConfig::addPreEmitPass() { // propagation after the machine outliner (which runs after addPreEmitPass) // currently leads to incorrect code-gen, where copies to registers within // outlined functions are removed erroneously. - if (TM->getOptLevel() >= CodeGenOpt::Default && EnableRISCVCopyPropagation) + if (TM->getOptLevel() >= CodeGenOptLevel::Default && + EnableRISCVCopyPropagation) addPass(createMachineCopyPropagationPass(true)); } void RISCVPassConfig::addPreEmitPass2() { - if (TM->getOptLevel() != CodeGenOpt::None) { + if (TM->getOptLevel() != CodeGenOptLevel::None) { addPass(createRISCVMoveMergePass()); // Schedule PushPop Optimization before expansion of Pseudo instruction, // ensuring return instruction is detected correctly. @@ -374,32 +537,45 @@ void RISCVPassConfig::addPreEmitPass2() { } void RISCVPassConfig::addMachineSSAOptimization() { + addPass(createRISCVFoldMasksPass()); + TargetPassConfig::addMachineSSAOptimization(); + if (EnableMachineCombiner) addPass(&MachineCombinerID); - if (TM->getTargetTriple().getArch() == Triple::riscv64) { + if (TM->getTargetTriple().isRISCV64()) { addPass(createRISCVOptWInstrsPass()); } } void RISCVPassConfig::addPreRegAlloc() { addPass(createRISCVPreRAExpandPseudoPass()); - if (TM->getOptLevel() != CodeGenOpt::None) + if (TM->getOptLevel() != CodeGenOptLevel::None) addPass(createRISCVMergeBaseOffsetOptPass()); addPass(createRISCVInsertVSETVLIPass()); + if (TM->getOptLevel() != CodeGenOptLevel::None && + EnableRISCVDeadRegisterElimination) + addPass(createRISCVDeadRegisterDefinitionsPass()); addPass(createRISCVInsertReadWriteCSRPass()); + addPass(createRISCVInsertWriteVXRMPass()); } void RISCVPassConfig::addOptimizedRegAlloc() { - if (getOptimizeRegAlloc()) - insertPass(&DetectDeadLanesID, &RISCVInitUndefID); + insertPass(&DetectDeadLanesID, &RISCVInitUndefID); TargetPassConfig::addOptimizedRegAlloc(); } +void RISCVPassConfig::addFastRegAlloc() { + addPass(createRISCVInitUndefPass()); + TargetPassConfig::addFastRegAlloc(); +} + + void RISCVPassConfig::addPostRegAlloc() { - if (TM->getOptLevel() != CodeGenOpt::None && EnableRedundantCopyElimination) + if (TM->getOptLevel() != CodeGenOptLevel::None && + EnableRedundantCopyElimination) addPass(createRISCVRedundantCopyEliminationPass()); } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.h index 775422075314..68dfb3c81f2f 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.h @@ -29,7 +29,7 @@ public: RISCVTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, std::optional<Reloc::Model> RM, - std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL, + std::optional<CodeModel::Model> CM, CodeGenOptLevel OL, bool JIT); const RISCVSubtarget *getSubtargetImpl(const Function &F) const override; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 62883e962b4c..4614446b2150 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -34,28 +34,6 @@ static cl::opt<unsigned> SLPMaxVF( "exclusively by SLP vectorizer."), cl::Hidden); -InstructionCost RISCVTTIImpl::getLMULCost(MVT VT) { - // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is - // implementation-defined. - if (!VT.isVector()) - return InstructionCost::getInvalid(); - unsigned DLenFactor = ST->getDLenFactor(); - unsigned Cost; - if (VT.isScalableVector()) { - unsigned LMul; - bool Fractional; - std::tie(LMul, Fractional) = - RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(VT)); - if (Fractional) - Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1; - else - Cost = (LMul * DLenFactor); - } else { - Cost = divideCeil(VT.getSizeInBits(), ST->getRealMinVLen() / DLenFactor); - } - return Cost; -} - InstructionCost RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy() && @@ -67,8 +45,7 @@ InstructionCost RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, // Otherwise, we check how many instructions it will take to materialise. const DataLayout &DL = getDataLayout(); - return RISCVMatInt::getIntMatCost(Imm, DL.getTypeSizeInBits(Ty), - getST()->getFeatureBits()); + return RISCVMatInt::getIntMatCost(Imm, DL.getTypeSizeInBits(Ty), *getST()); } // Look for patterns of shift followed by AND that can be turned into a pair of @@ -149,6 +126,9 @@ InstructionCost RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, // Power of 2 is a shift. Negated power of 2 is a shift and a negate. if (Imm.isPowerOf2() || Imm.isNegatedPowerOf2()) return TTI::TCC_Free; + // One more or less than a power of 2 can use SLLI+ADD/SUB. + if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2()) + return TTI::TCC_Free; // FIXME: There is no MULI instruction. Takes12BitImm = true; break; @@ -192,7 +172,9 @@ RISCVTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, TargetTransformInfo::PopcntSupportKind RISCVTTIImpl::getPopcntSupport(unsigned TyWidth) { assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); - return ST->hasStdExtZbb() ? TTI::PSK_FastHardware : TTI::PSK_Software; + return ST->hasStdExtZbb() || ST->hasVendorXCVbitmanip() + ? TTI::PSK_FastHardware + : TTI::PSK_Software; } bool RISCVTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const { @@ -263,19 +245,12 @@ static VectorType *getVRGatherIndexType(MVT DataVT, const RISCVSubtarget &ST, return cast<VectorType>(EVT(IndexVT).getTypeForEVT(C)); } -/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv -/// is generally quadratic in the number of vreg implied by LMUL. Note that -/// operand (index and possibly mask) are handled separately. -InstructionCost RISCVTTIImpl::getVRGatherVVCost(MVT VT) { - return getLMULCost(VT) * getLMULCost(VT); -} - InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef<const Value *> Args) { - Kind = improveShuffleKindFromMask(Kind, Mask); + Kind = improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp); std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp); @@ -292,52 +267,86 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, // If the size of the element is < ELEN then shuffles of interleaves and // deinterleaves of 2 vectors can be lowered into the following // sequences - if (EltTp.getScalarSizeInBits() < ST->getELEN()) { + if (EltTp.getScalarSizeInBits() < ST->getELen()) { // Example sequence: // vsetivli zero, 4, e8, mf4, ta, ma (ignored) // vwaddu.vv v10, v8, v9 // li a0, -1 (ignored) // vwmaccu.vx v10, a0, v9 if (ShuffleVectorInst::isInterleaveMask(Mask, 2, Mask.size())) - return 2 * LT.first * getLMULCost(LT.second); + return 2 * LT.first * TLI->getLMULCost(LT.second); if (Mask[0] == 0 || Mask[0] == 1) { auto DeinterleaveMask = createStrideMask(Mask[0], 2, Mask.size()); // Example sequence: // vnsrl.wi v10, v8, 0 if (equal(DeinterleaveMask, Mask)) - return LT.first * getLMULCost(LT.second); + return LT.first * TLI->getLMULCost(LT.second); } } - - // vrgather + cost of generating the mask constant. - // We model this for an unknown mask with a single vrgather. - if (LT.first == 1 && - (LT.second.getScalarSizeInBits() != 8 || - LT.second.getVectorNumElements() <= 256)) { - VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, Tp->getContext()); - InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind); - return IndexCost + getVRGatherVVCost(LT.second); - } } - break; + // vrgather + cost of generating the mask constant. + // We model this for an unknown mask with a single vrgather. + if (LT.second.isFixedLengthVector() && LT.first == 1 && + (LT.second.getScalarSizeInBits() != 8 || + LT.second.getVectorNumElements() <= 256)) { + VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, Tp->getContext()); + InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind); + return IndexCost + TLI->getVRGatherVVCost(LT.second); + } + [[fallthrough]]; } case TTI::SK_Transpose: case TTI::SK_PermuteTwoSrc: { - if (Mask.size() >= 2 && LT.second.isFixedLengthVector()) { - // 2 x (vrgather + cost of generating the mask constant) + cost of mask - // register for the second vrgather. We model this for an unknown - // (shuffle) mask. - if (LT.first == 1 && - (LT.second.getScalarSizeInBits() != 8 || - LT.second.getVectorNumElements() <= 256)) { - auto &C = Tp->getContext(); - auto EC = Tp->getElementCount(); - VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, C); - VectorType *MaskTy = VectorType::get(IntegerType::getInt1Ty(C), EC); - InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind); - InstructionCost MaskCost = getConstantPoolLoadCost(MaskTy, CostKind); - return 2 * IndexCost + 2 * getVRGatherVVCost(LT.second) + MaskCost; + // 2 x (vrgather + cost of generating the mask constant) + cost of mask + // register for the second vrgather. We model this for an unknown + // (shuffle) mask. + if (LT.second.isFixedLengthVector() && LT.first == 1 && + (LT.second.getScalarSizeInBits() != 8 || + LT.second.getVectorNumElements() <= 256)) { + auto &C = Tp->getContext(); + auto EC = Tp->getElementCount(); + VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, C); + VectorType *MaskTy = VectorType::get(IntegerType::getInt1Ty(C), EC); + InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind); + InstructionCost MaskCost = getConstantPoolLoadCost(MaskTy, CostKind); + return 2 * IndexCost + 2 * TLI->getVRGatherVVCost(LT.second) + MaskCost; + } + [[fallthrough]]; + } + case TTI::SK_Select: { + // We are going to permute multiple sources and the result will be in + // multiple destinations. Providing an accurate cost only for splits where + // the element type remains the same. + if (!Mask.empty() && LT.first.isValid() && LT.first != 1 && + LT.second.isFixedLengthVector() && + LT.second.getVectorElementType().getSizeInBits() == + Tp->getElementType()->getPrimitiveSizeInBits() && + LT.second.getVectorNumElements() < + cast<FixedVectorType>(Tp)->getNumElements() && + divideCeil(Mask.size(), + cast<FixedVectorType>(Tp)->getNumElements()) == + static_cast<unsigned>(*LT.first.getValue())) { + unsigned NumRegs = *LT.first.getValue(); + unsigned VF = cast<FixedVectorType>(Tp)->getNumElements(); + unsigned SubVF = PowerOf2Ceil(VF / NumRegs); + auto *SubVecTy = FixedVectorType::get(Tp->getElementType(), SubVF); + + InstructionCost Cost = 0; + for (unsigned I = 0; I < NumRegs; ++I) { + bool IsSingleVector = true; + SmallVector<int> SubMask(SubVF, PoisonMaskElem); + transform(Mask.slice(I * SubVF, + I == NumRegs - 1 ? Mask.size() % SubVF : SubVF), + SubMask.begin(), [&](int I) { + bool SingleSubVector = I / VF == 0; + IsSingleVector &= SingleSubVector; + return (SingleSubVector ? 0 : 1) * SubVF + I % VF; + }); + Cost += getShuffleCost(IsSingleVector ? TTI::SK_PermuteSingleSrc + : TTI::SK_PermuteTwoSrc, + SubVecTy, SubMask, CostKind, 0, nullptr); + return Cost; } } break; @@ -356,19 +365,19 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, // Example sequence: // vsetivli zero, 4, e8, mf2, tu, ma (ignored) // vslidedown.vi v8, v9, 2 - return LT.first * getLMULCost(LT.second); + return LT.first * TLI->getVSlideCost(LT.second); case TTI::SK_InsertSubvector: // Example sequence: // vsetivli zero, 4, e8, mf2, tu, ma (ignored) // vslideup.vi v8, v9, 2 - return LT.first * getLMULCost(LT.second); + return LT.first * TLI->getVSlideCost(LT.second); case TTI::SK_Select: { // Example sequence: // li a0, 90 // vsetivli zero, 8, e8, mf2, ta, ma (ignored) // vmv.s.x v0, a0 // vmerge.vvm v8, v9, v8, v0 - return LT.first * 3 * getLMULCost(LT.second); + return LT.first * 3 * TLI->getLMULCost(LT.second); } case TTI::SK_Broadcast: { bool HasScalar = (Args.size() > 0) && (Operator::getOpcode(Args[0]) == @@ -380,7 +389,7 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, // vsetivli zero, 2, e8, mf8, ta, ma (ignored) // vmv.v.x v8, a0 // vmsne.vi v0, v8, 0 - return LT.first * getLMULCost(LT.second) * 3; + return LT.first * TLI->getLMULCost(LT.second) * 3; } // Example sequence: // vsetivli zero, 2, e8, mf8, ta, mu (ignored) @@ -391,26 +400,24 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, // vmv.v.x v8, a0 // vmsne.vi v0, v8, 0 - return LT.first * getLMULCost(LT.second) * 6; + return LT.first * TLI->getLMULCost(LT.second) * 6; } if (HasScalar) { // Example sequence: // vmv.v.x v8, a0 - return LT.first * getLMULCost(LT.second); + return LT.first * TLI->getLMULCost(LT.second); } // Example sequence: // vrgather.vi v9, v8, 0 - // TODO: vrgather could be slower than vmv.v.x. It is - // implementation-dependent. - return LT.first * getLMULCost(LT.second); + return LT.first * TLI->getVRGatherVICost(LT.second); } case TTI::SK_Splice: // vslidedown+vslideup. // TODO: Multiplying by LT.first implies this legalizes into multiple copies // of similar code, but I think we expand through memory. - return 2 * LT.first * getLMULCost(LT.second); + return 2 * LT.first * TLI->getVSlideCost(LT.second); case TTI::SK_Reverse: { // TODO: Cases to improve here: // * Illegal vector types @@ -430,7 +437,7 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, if (LT.second.isFixedLengthVector()) // vrsub.vi has a 5 bit immediate field, otherwise an li suffices LenCost = isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1; - InstructionCost GatherCost = 2 + getVRGatherVVCost(LT.second); + InstructionCost GatherCost = 2 + TLI->getVRGatherVVCost(LT.second); // Mask operation additionally required extend and truncate InstructionCost ExtendCost = Tp->getElementType()->isIntegerTy(1) ? 3 : 0; return LT.first * (LenCost + GatherCost + ExtendCost); @@ -495,7 +502,7 @@ InstructionCost RISCVTTIImpl::getInterleavedMemoryOpCost( InstructionCost Cost = MemCost; for (unsigned Index : Indices) { FixedVectorType *SubVecTy = - FixedVectorType::get(FVTy->getElementType(), VF); + FixedVectorType::get(FVTy->getElementType(), VF * Factor); auto Mask = createStrideMask(Index, Factor, VF); InstructionCost ShuffleCost = getShuffleCost(TTI::ShuffleKind::SK_PermuteSingleSrc, SubVecTy, Mask, @@ -662,6 +669,31 @@ static const CostTblEntry VectorIntrinsicCostTable[]{ {Intrinsic::rint, MVT::nxv2f64, 7}, {Intrinsic::rint, MVT::nxv4f64, 7}, {Intrinsic::rint, MVT::nxv8f64, 7}, + {Intrinsic::lrint, MVT::v2i32, 1}, + {Intrinsic::lrint, MVT::v4i32, 1}, + {Intrinsic::lrint, MVT::v8i32, 1}, + {Intrinsic::lrint, MVT::v16i32, 1}, + {Intrinsic::lrint, MVT::nxv1i32, 1}, + {Intrinsic::lrint, MVT::nxv2i32, 1}, + {Intrinsic::lrint, MVT::nxv4i32, 1}, + {Intrinsic::lrint, MVT::nxv8i32, 1}, + {Intrinsic::lrint, MVT::nxv16i32, 1}, + {Intrinsic::lrint, MVT::v2i64, 1}, + {Intrinsic::lrint, MVT::v4i64, 1}, + {Intrinsic::lrint, MVT::v8i64, 1}, + {Intrinsic::lrint, MVT::v16i64, 1}, + {Intrinsic::lrint, MVT::nxv1i64, 1}, + {Intrinsic::lrint, MVT::nxv2i64, 1}, + {Intrinsic::lrint, MVT::nxv4i64, 1}, + {Intrinsic::lrint, MVT::nxv8i64, 1}, + {Intrinsic::llrint, MVT::v2i64, 1}, + {Intrinsic::llrint, MVT::v4i64, 1}, + {Intrinsic::llrint, MVT::v8i64, 1}, + {Intrinsic::llrint, MVT::v16i64, 1}, + {Intrinsic::llrint, MVT::nxv1i64, 1}, + {Intrinsic::llrint, MVT::nxv2i64, 1}, + {Intrinsic::llrint, MVT::nxv4i64, 1}, + {Intrinsic::llrint, MVT::nxv8i64, 1}, {Intrinsic::nearbyint, MVT::v2f32, 9}, {Intrinsic::nearbyint, MVT::v4f32, 9}, {Intrinsic::nearbyint, MVT::v8f32, 9}, @@ -1045,6 +1077,8 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, case Intrinsic::floor: case Intrinsic::trunc: case Intrinsic::rint: + case Intrinsic::lrint: + case Intrinsic::llrint: case Intrinsic::round: case Intrinsic::roundeven: { // These all use the same code. @@ -1074,6 +1108,12 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, return LT.first; break; } + case Intrinsic::ctpop: { + auto LT = getTypeLegalizationCost(RetTy); + if (ST->hasVInstructions() && ST->hasStdExtZvbb() && LT.second.isVector()) + return LT.first; + break; + } case Intrinsic::abs: { auto LT = getTypeLegalizationCost(RetTy); if (ST->hasVInstructions() && LT.second.isVector()) { @@ -1142,8 +1182,8 @@ InstructionCost RISCVTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); // Skip if element size of Dst or Src is bigger than ELEN. - if (Src->getScalarSizeInBits() > ST->getELEN() || - Dst->getScalarSizeInBits() > ST->getELEN()) + if (Src->getScalarSizeInBits() > ST->getELen() || + Dst->getScalarSizeInBits() > ST->getELen()) return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); int ISD = TLI->InstructionOpcodeToISD(Opcode); @@ -1226,7 +1266,7 @@ RISCVTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, return BaseT::getMinMaxReductionCost(IID, Ty, FMF, CostKind); // Skip if scalar size of Ty is bigger than ELEN. - if (Ty->getScalarSizeInBits() > ST->getELEN()) + if (Ty->getScalarSizeInBits() > ST->getELen()) return BaseT::getMinMaxReductionCost(IID, Ty, FMF, CostKind); std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty); @@ -1253,7 +1293,7 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind); // Skip if scalar size of Ty is bigger than ELEN. - if (Ty->getScalarSizeInBits() > ST->getELEN()) + if (Ty->getScalarSizeInBits() > ST->getELen()) return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind); int ISD = TLI->InstructionOpcodeToISD(Opcode); @@ -1288,7 +1328,7 @@ InstructionCost RISCVTTIImpl::getExtendedReductionCost( FMF, CostKind); // Skip if scalar size of ResTy is bigger than ELEN. - if (ResTy->getScalarSizeInBits() > ST->getELEN()) + if (ResTy->getScalarSizeInBits() > ST->getELen()) return BaseT::getExtendedReductionCost(Opcode, IsUnsigned, ResTy, ValTy, FMF, CostKind); @@ -1349,7 +1389,7 @@ InstructionCost RISCVTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, // handles the LT.first term for us. if (std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Src); LT.second.isVector()) - BaseCost *= getLMULCost(LT.second); + BaseCost *= TLI->getLMULCost(LT.second); return Cost + BaseCost; } @@ -1368,7 +1408,7 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, I); // Skip if scalar size of ValTy is bigger than ELEN. - if (ValTy->isVectorTy() && ValTy->getScalarSizeInBits() > ST->getELEN()) + if (ValTy->isVectorTy() && ValTy->getScalarSizeInBits() > ST->getELen()) return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); @@ -1437,6 +1477,15 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); } +InstructionCost RISCVTTIImpl::getCFInstrCost(unsigned Opcode, + TTI::TargetCostKind CostKind, + const Instruction *I) { + if (CostKind != TTI::TCK_RecipThroughput) + return Opcode == Instruction::PHI ? 0 : 1; + // Branches are assumed to be predicted. + return 0; +} + InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, @@ -1451,8 +1500,26 @@ InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Val); // This type is legalized to a scalar type. - if (!LT.second.isVector()) - return 0; + if (!LT.second.isVector()) { + auto *FixedVecTy = cast<FixedVectorType>(Val); + // If Index is a known constant, cost is zero. + if (Index != -1U) + return 0; + // Extract/InsertElement with non-constant index is very costly when + // scalarized; estimate cost of loads/stores sequence via the stack: + // ExtractElement cost: store vector to stack, load scalar; + // InsertElement cost: store vector to stack, store scalar, load vector. + Type *ElemTy = FixedVecTy->getElementType(); + auto NumElems = FixedVecTy->getNumElements(); + auto Align = DL.getPrefTypeAlign(ElemTy); + InstructionCost LoadCost = + getMemoryOpCost(Instruction::Load, ElemTy, Align, 0, CostKind); + InstructionCost StoreCost = + getMemoryOpCost(Instruction::Store, ElemTy, Align, 0, CostKind); + return Opcode == Instruction::ExtractElement + ? StoreCost * NumElems + LoadCost + : (StoreCost + LoadCost) * NumElems + StoreCost; + } // For unsupported scalable vector. if (LT.second.isScalableVector() && !LT.first.isValid()) @@ -1461,6 +1528,31 @@ InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, if (!isTypeLegal(Val)) return BaseT::getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1); + // Mask vector extract/insert is expanded via e8. + if (Val->getScalarSizeInBits() == 1) { + VectorType *WideTy = + VectorType::get(IntegerType::get(Val->getContext(), 8), + cast<VectorType>(Val)->getElementCount()); + if (Opcode == Instruction::ExtractElement) { + InstructionCost ExtendCost + = getCastInstrCost(Instruction::ZExt, WideTy, Val, + TTI::CastContextHint::None, CostKind); + InstructionCost ExtractCost + = getVectorInstrCost(Opcode, WideTy, CostKind, Index, nullptr, nullptr); + return ExtendCost + ExtractCost; + } + InstructionCost ExtendCost + = getCastInstrCost(Instruction::ZExt, WideTy, Val, + TTI::CastContextHint::None, CostKind); + InstructionCost InsertCost + = getVectorInstrCost(Opcode, WideTy, CostKind, Index, nullptr, nullptr); + InstructionCost TruncCost + = getCastInstrCost(Instruction::Trunc, Val, WideTy, + TTI::CastContextHint::None, CostKind); + return ExtendCost + InsertCost + TruncCost; + } + + // In RVV, we could use vslidedown + vmv.x.s to extract element from vector // and vslideup + vmv.s.x to insert element to vector. unsigned BaseCost = 1; @@ -1482,30 +1574,6 @@ InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, SlideCost = 1; // With a constant index, we do not need to use addi. } - // Mask vector extract/insert element is different from normal case. - if (Val->getScalarSizeInBits() == 1) { - // For extractelement, we need the following instructions: - // vmv.v.i v8, 0 - // vmerge.vim v8, v8, 1, v0 - // vsetivli zero, 1, e8, m2, ta, mu (not count) - // vslidedown.vx v8, v8, a0 - // vmv.x.s a0, v8 - - // For insertelement, we need the following instructions: - // vsetvli a2, zero, e8, m1, ta, mu (not count) - // vmv.s.x v8, a0 - // vmv.v.i v9, 0 - // vmerge.vim v9, v9, 1, v0 - // addi a0, a1, 1 - // vsetvli zero, a0, e8, m1, tu, mu (not count) - // vslideup.vx v9, v8, a1 - // vsetvli a0, zero, e8, m1, ta, mu (not count) - // vand.vi v8, v9, 1 - // vmsne.vi v0, v8, 0 - - // TODO: should we count these special vsetvlis? - BaseCost = Opcode == Instruction::InsertElement ? 5 : 3; - } // Extract i64 in the target that has XLEN=32 need more instruction. if (Val->getScalarType()->isIntegerTy() && ST->getXLen() < Val->getScalarSizeInBits()) { @@ -1547,7 +1615,7 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost( Args, CxtI); // Skip if scalar size of Ty is bigger than ELEN. - if (isa<VectorType>(Ty) && Ty->getScalarSizeInBits() > ST->getELEN()) + if (isa<VectorType>(Ty) && Ty->getScalarSizeInBits() > ST->getELen()) return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info, Args, CxtI); @@ -1596,7 +1664,7 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost( case ISD::FSUB: case ISD::FMUL: case ISD::FNEG: { - return ConstantMatCost + getLMULCost(LT.second) * LT.first * 1; + return ConstantMatCost + TLI->getLMULCost(LT.second) * LT.first * 1; } default: return ConstantMatCost + diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 7ffcb4828d0c..efc8350064a6 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -48,9 +48,6 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> { /// actual target hardware. unsigned getEstimatedVLFor(VectorType *Ty); - /// Return the cost of LMUL. The larger the LMUL, the higher the cost. - InstructionCost getLMULCost(MVT VT); - /// Return the cost of accessing a constant pool entry of the specified /// type. InstructionCost getConstantPoolLoadCost(Type *Ty, @@ -123,8 +120,6 @@ public: return ST->useRVVForFixedLengthVectors() ? 16 : 0; } - InstructionCost getVRGatherVVCost(MVT VT); - InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index, @@ -174,6 +169,9 @@ public: TTI::TargetCostKind CostKind, const Instruction *I = nullptr); + InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, + const Instruction *I = nullptr); + using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, @@ -201,7 +199,7 @@ public: return false; EVT ElemType = DataTypeVT.getScalarType(); - if (!ST->enableUnalignedVectorMem() && Alignment < ElemType.getStoreSize()) + if (!ST->hasFastUnalignedAccess() && Alignment < ElemType.getStoreSize()) return false; return TLI->isLegalElementTypeForRVV(ElemType); @@ -226,7 +224,7 @@ public: return false; EVT ElemType = DataTypeVT.getScalarType(); - if (!ST->enableUnalignedVectorMem() && Alignment < ElemType.getStoreSize()) + if (!ST->hasFastUnalignedAccess() && Alignment < ElemType.getStoreSize()) return false; return TLI->isLegalElementTypeForRVV(ElemType); @@ -288,9 +286,9 @@ public: case RecurKind::UMax: case RecurKind::FMin: case RecurKind::FMax: - case RecurKind::SelectICmp: - case RecurKind::SelectFCmp: case RecurKind::FMulAdd: + case RecurKind::IAnyOf: + case RecurKind::FAnyOf: return true; default: return false; @@ -359,6 +357,10 @@ public: bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2); + + bool shouldFoldTerminatingConditionAfterLSR() const { + return true; + } }; } // end namespace llvm |