diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2021-06-13 19:31:46 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2021-07-31 18:56:55 +0000 |
commit | af732203b8f7f006927528db5497f5cbc4c4742a (patch) | |
tree | 596f112de3b76118552871dbb6114bb7e3e17f40 /contrib/llvm-project/llvm/lib/Target/RISCV | |
parent | 83dea422ac8d4a8323e64203c2eadaa813768717 (diff) | |
download | src-af732203b8f7f006927528db5497f5cbc4c4742a.tar.gz src-af732203b8f7f006927528db5497f5cbc4c4742a.zip |
Merge llvm-project 12.0.1 release and follow-up fixes
Merge llvm-project main llvmorg-12-init-17869-g8e464dd76bef
This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and
openmp to llvmorg-12-init-17869-g8e464dd76bef, the last commit before the
upstream release/12.x branch was created.
PR: 255570
(cherry picked from commit e8d8bef961a50d4dc22501cde4fb9fb0be1b2532)
Merge llvm-project 12.0.0 release
This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and
openmp to llvmorg-12.0.0-0-gd28af7c654d8, a.k.a. 12.0.0 release.
PR: 255570
(cherry picked from commit d409305fa3838fb39b38c26fc085fb729b8766d5)
Disable strict-fp for powerpcspe, as it does not work properly yet
Merge commit 5c18d1136665 from llvm git (by Qiu Chaofan)
[SPE] Disable strict-fp for SPE by default
As discussed in PR50385, strict-fp on PowerPC SPE has not been
handled well. This patch disables it by default for SPE.
Reviewed By: nemanjai, vit9696, jhibbits
Differential Revision: https://reviews.llvm.org/D103235
PR: 255570
(cherry picked from commit 715df83abc049b23d9acddc81f2480bd4c056d64)
Apply upstream libc++ fix to allow building with devel/xxx-xtoolchain-gcc
Merge commit 52e9d80d5db2 from llvm git (by Jason Liu):
[libc++] add `inline` for __open's definition in ifstream and ofstream
Summary:
When building with gcc on AIX, it seems that gcc does not like the
`always_inline` without the `inline` keyword.
So adding the inline keywords in for __open in ifstream and ofstream.
That will also make it consistent with __open in basic_filebuf
(it seems we added `inline` there before for gcc build as well).
Differential Revision: https://reviews.llvm.org/D99422
PR: 255570
(cherry picked from commit d099db25464b826c5724cf2fb5b22292bbe15f6e)
Undefine HAVE_(DE)REGISTER_FRAME in llvm's config.h on arm
Otherwise, the lli tool (enable by WITH_CLANG_EXTRAS) won't link on arm,
stating that __register_frame is undefined. This function is normally
provided by libunwind, but explicitly not for the ARM Exception ABI.
Reported by: oh
PR: 255570
(cherry picked from commit f336b45e943c7f9a90ffcea1a6c4c7039e54c73c)
Merge llvm-project 12.0.1 rc2
This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and
openmp to llvmorg-12.0.1-rc2-0-ge7dac564cd0e, a.k.a. 12.0.1 rc2.
PR: 255570
(cherry picked from commit 23408297fbf3089f0388a8873b02fa75ab3f5bb9)
Revert libunwind change to fix backtrace segfault on aarch64
Revert commit 22b615a96593 from llvm git (by Daniel Kiss):
[libunwind] Support for leaf function unwinding.
Unwinding leaf function is useful in cases when the backtrace finds a
leaf function for example when it caused a signal.
This patch also add the support for the DW_CFA_undefined because it marks
the end of the frames.
Ryan Prichard provided code for the tests.
Reviewed By: #libunwind, mstorsjo
Differential Revision: https://reviews.llvm.org/D83573
Reland with limit the test to the x86_64-linux target.
Bisection has shown that this particular upstream commit causes programs
using backtrace(3) on aarch64 to segfault. This affects the lang/rust
port, for instance. Until we can upstream to fix this problem, revert
the commit for now.
Reported by: mikael
PR: 256864
(cherry picked from commit 5866c369e4fd917c0d456f0f10b92ee354b82279)
Merge llvm-project 12.0.1 release
This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and
openmp to llvmorg-12.0.1-0-gfed41342a82f, a.k.a. 12.0.1 release.
PR: 255570
(cherry picked from commit 4652422eb477731f284b1345afeefef7f269da50)
compilert-rt: build out-of-line LSE atomics helpers for aarch64
Both clang >= 12 and gcc >= 10.1 now default to -moutline-atomics for
aarch64. This requires a bunch of helper functions in libcompiler_rt.a,
to avoid link errors like "undefined symbol: __aarch64_ldadd8_acq_rel".
(Note: of course you can use -mno-outline-atomics as a workaround too,
but this would negate the potential performance benefit of the faster
LSE instructions.)
Bump __FreeBSD_version so ports maintainers can easily detect this.
PR: 257392
(cherry picked from commit cc55ee8009a550810d38777fd6ace9abf3a2f6b4)
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/RISCV')
58 files changed, 11818 insertions, 2648 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 407f980bd35e..dcf7525d7458 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -7,20 +7,18 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/RISCVAsmBackend.h" +#include "MCTargetDesc/RISCVBaseInfo.h" +#include "MCTargetDesc/RISCVInstPrinter.h" #include "MCTargetDesc/RISCVMCExpr.h" #include "MCTargetDesc/RISCVMCTargetDesc.h" +#include "MCTargetDesc/RISCVMatInt.h" #include "MCTargetDesc/RISCVTargetStreamer.h" -#include "RISCVInstrInfo.h" #include "TargetInfo/RISCVTargetInfo.h" -#include "Utils/RISCVBaseInfo.h" -#include "Utils/RISCVMatInt.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/CodeGen/Register.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -33,6 +31,7 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCValue.h" #include "llvm/Support/Casting.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/RISCVAttributes.h" @@ -99,7 +98,7 @@ class RISCVAsmParser : public MCTargetAsmParser { // Helper to emit a combination of LUI, ADDI(W), and SLLI instructions that // synthesize the desired immedate value into the destination register. - void emitLoadImm(Register DestReg, int64_t Value, MCStreamer &Out); + void emitLoadImm(MCRegister DestReg, int64_t Value, MCStreamer &Out); // Helper to emit a combination of AUIPC and SecondOpcode. Used to implement // helpers such as emitLoadLocalAddress and emitLoadAddress. @@ -125,6 +124,13 @@ class RISCVAsmParser : public MCTargetAsmParser { void emitLoadStoreSymbol(MCInst &Inst, unsigned Opcode, SMLoc IDLoc, MCStreamer &Out, bool HasTmpReg); + // Helper to emit pseudo sign/zero extend instruction. + void emitPseudoExtend(MCInst &Inst, bool SignExtend, int64_t Width, + SMLoc IDLoc, MCStreamer &Out); + + // Helper to emit pseudo vmsge{u}.vx instruction. + void emitVMSGE(MCInst &Inst, unsigned Opcode, SMLoc IDLoc, MCStreamer &Out); + // Checks that a PseudoAddTPRel is using x4/tp in its second input operand. // Enforcing this using a restricted register class for the second input // operand of PseudoAddTPRel results in a poor diagnostic due to the fact @@ -217,8 +223,7 @@ public: }; static bool classifySymbolRef(const MCExpr *Expr, - RISCVMCExpr::VariantKind &Kind, - int64_t &Addend); + RISCVMCExpr::VariantKind &Kind); RISCVAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser, const MCInstrInfo &MII, const MCTargetOptions &Options) @@ -262,7 +267,7 @@ struct RISCVOperand : public MCParsedAsmOperand { bool IsRV64; struct RegOp { - Register RegNum; + MCRegister RegNum; }; struct ImmOp { @@ -277,23 +282,8 @@ struct RISCVOperand : public MCParsedAsmOperand { // e.g.: read/write or user/supervisor/machine privileges. }; - enum class VSEW { - SEW_8 = 0, - SEW_16, - SEW_32, - SEW_64, - SEW_128, - SEW_256, - SEW_512, - SEW_1024, - }; - - enum class VLMUL { LMUL_1 = 0, LMUL_2, LMUL_4, LMUL_8 }; - struct VTypeOp { - VSEW Sew; - VLMUL Lmul; - unsigned Encoding; + unsigned Val; }; SMLoc StartLoc, EndLoc; @@ -373,7 +363,7 @@ public: bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); bool IsValid; if (!IsConstantImm) - IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm); + IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK); else IsValid = isShiftedInt<N - 1, 1>(Imm); return IsValid && VK == RISCVMCExpr::VK_RISCV_None; @@ -387,7 +377,7 @@ public: // Must be of 'immediate' type but not a constant. if (!isImm() || evaluateConstantImm(getImm(), Imm, VK)) return false; - return RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm) && + return RISCVAsmParser::classifySymbolRef(getImm(), VK) && VK == RISCVMCExpr::VK_RISCV_None; } @@ -397,7 +387,7 @@ public: // Must be of 'immediate' type but not a constant. if (!isImm() || evaluateConstantImm(getImm(), Imm, VK)) return false; - return RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm) && + return RISCVAsmParser::classifySymbolRef(getImm(), VK) && (VK == RISCVMCExpr::VK_RISCV_CALL || VK == RISCVMCExpr::VK_RISCV_CALL_PLT); } @@ -408,7 +398,7 @@ public: // Must be of 'immediate' type but not a constant. if (!isImm() || evaluateConstantImm(getImm(), Imm, VK)) return false; - return RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm) && + return RISCVAsmParser::classifySymbolRef(getImm(), VK) && VK == RISCVMCExpr::VK_RISCV_CALL; } @@ -418,7 +408,7 @@ public: // Must be of 'immediate' type but not a constant. if (!isImm() || evaluateConstantImm(getImm(), Imm, VK)) return false; - return RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm) && + return RISCVAsmParser::classifySymbolRef(getImm(), VK) && VK == RISCVMCExpr::VK_RISCV_TPREL_ADD; } @@ -523,16 +513,6 @@ public: return IsConstantImm && isUInt<5>(Imm) && VK == RISCVMCExpr::VK_RISCV_None; } - bool isUImm5NonZero() const { - int64_t Imm; - RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; - if (!isImm()) - return false; - bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); - return IsConstantImm && isUInt<5>(Imm) && (Imm != 0) && - VK == RISCVMCExpr::VK_RISCV_None; - } - bool isSImm5() const { if (!isImm()) return false; @@ -549,7 +529,7 @@ public: int64_t Imm; bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); return IsConstantImm && isInt<6>(Imm) && - VK == RISCVMCExpr::VK_RISCV_None; + VK == RISCVMCExpr::VK_RISCV_None; } bool isSImm6NonZero() const { @@ -633,7 +613,7 @@ public: return false; bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); if (!IsConstantImm) - IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm); + IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK); else IsValid = isInt<12>(Imm); return IsValid && ((IsConstantImm && VK == RISCVMCExpr::VK_RISCV_None) || @@ -664,7 +644,7 @@ public: return false; bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); if (!IsConstantImm) { - IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm); + IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK); return IsValid && (VK == RISCVMCExpr::VK_RISCV_HI || VK == RISCVMCExpr::VK_RISCV_TPREL_HI); } else { @@ -682,7 +662,7 @@ public: return false; bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); if (!IsConstantImm) { - IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm); + IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK); return IsValid && (VK == RISCVMCExpr::VK_RISCV_PCREL_HI || VK == RISCVMCExpr::VK_RISCV_GOT_HI || VK == RISCVMCExpr::VK_RISCV_TLS_GOT_HI || @@ -730,7 +710,7 @@ public: } StringRef getSysReg() const { - assert(Kind == KindTy::SystemRegister && "Invalid access!"); + assert(Kind == KindTy::SystemRegister && "Invalid type access!"); return StringRef(SysReg.Data, SysReg.Length); } @@ -744,59 +724,25 @@ public: return Tok; } - static StringRef getSEWStr(VSEW Sew) { - switch (Sew) { - case VSEW::SEW_8: - return "e8"; - case VSEW::SEW_16: - return "e16"; - case VSEW::SEW_32: - return "e32"; - case VSEW::SEW_64: - return "e64"; - case VSEW::SEW_128: - return "e128"; - case VSEW::SEW_256: - return "e256"; - case VSEW::SEW_512: - return "e512"; - case VSEW::SEW_1024: - return "e1024"; - } - return ""; - } - - static StringRef getLMULStr(VLMUL Lmul) { - switch (Lmul) { - case VLMUL::LMUL_1: - return "m1"; - case VLMUL::LMUL_2: - return "m2"; - case VLMUL::LMUL_4: - return "m4"; - case VLMUL::LMUL_8: - return "m8"; - } - return ""; - } - - StringRef getVType(SmallString<32> &Buf) const { - assert(Kind == KindTy::VType && "Invalid access!"); - Buf.append(getSEWStr(VType.Sew)); - Buf.append(","); - Buf.append(getLMULStr(VType.Lmul)); - - return Buf.str(); + unsigned getVType() const { + assert(Kind == KindTy::VType && "Invalid type access!"); + return VType.Val; } void print(raw_ostream &OS) const override { + auto RegName = [](unsigned Reg) { + if (Reg) + return RISCVInstPrinter::getRegisterName(Reg); + else + return "noreg"; + }; + switch (Kind) { case KindTy::Immediate: OS << *getImm(); break; case KindTy::Register: - OS << "<register x"; - OS << getReg() << ">"; + OS << "<register " << RegName(getReg()) << ">"; break; case KindTy::Token: OS << "'" << getToken() << "'"; @@ -805,8 +751,9 @@ public: OS << "<sysreg: " << getSysReg() << '>'; break; case KindTy::VType: - SmallString<32> VTypeBuf; - OS << "<vtype: " << getVType(VTypeBuf) << '>'; + OS << "<vtype: "; + RISCVVType::printVType(getVType(), OS); + OS << '>'; break; } } @@ -852,15 +799,10 @@ public: return Op; } - static std::unique_ptr<RISCVOperand> createVType(APInt Sew, APInt Lmul, - SMLoc S, bool IsRV64) { + static std::unique_ptr<RISCVOperand> createVType(unsigned VTypeI, SMLoc S, + bool IsRV64) { auto Op = std::make_unique<RISCVOperand>(KindTy::VType); - Sew.ashrInPlace(3); - unsigned SewLog2 = Sew.logBase2(); - unsigned LmulLog2 = Lmul.logBase2(); - Op->VType.Sew = static_cast<VSEW>(SewLog2); - Op->VType.Lmul = static_cast<VLMUL>(LmulLog2); - Op->VType.Encoding = (SewLog2 << 2) | LmulLog2; + Op->VType.Val = VTypeI; Op->StartLoc = S; Op->IsRV64 = IsRV64; return Op; @@ -889,16 +831,6 @@ public: addExpr(Inst, getImm()); } - void addSImm5Plus1Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - int64_t Imm = 0; - RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; - bool IsConstant = evaluateConstantImm(getImm(), Imm, VK); - assert(IsConstant && "Expect constant value!"); - (void)IsConstant; - Inst.addOperand(MCOperand::createImm(Imm - 1)); - } - void addFenceArgOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // isFenceArg has validated the operand, meaning this cast is safe @@ -925,7 +857,7 @@ public: void addVTypeIOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::createImm(VType.Encoding)); + Inst.addOperand(MCOperand::createImm(getVType())); } // Returns the rounding mode represented by this RISCVOperand. Should only @@ -952,7 +884,12 @@ public: #define GET_MNEMONIC_SPELL_CHECKER #include "RISCVGenAsmMatcher.inc" -static Register convertFPR64ToFPR32(Register Reg) { +static MCRegister convertFPR64ToFPR16(MCRegister Reg) { + assert(Reg >= RISCV::F0_D && Reg <= RISCV::F31_D && "Invalid register"); + return Reg - RISCV::F0_D + RISCV::F0_H; +} + +static MCRegister convertFPR64ToFPR32(MCRegister Reg) { assert(Reg >= RISCV::F0_D && Reg <= RISCV::F31_D && "Invalid register"); return Reg - RISCV::F0_D + RISCV::F0_F; } @@ -963,7 +900,7 @@ unsigned RISCVAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, if (!Op.isReg()) return Match_InvalidOperand; - Register Reg = Op.getReg(); + MCRegister Reg = Op.getReg(); bool IsRegFPR64 = RISCVMCRegisterClasses[RISCV::FPR64RegClassID].contains(Reg); bool IsRegFPR64C = @@ -976,6 +913,12 @@ unsigned RISCVAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, Op.Reg.RegNum = convertFPR64ToFPR32(Reg); return Match_Success; } + // As the parser couldn't differentiate an FPR16 from an FPR64, coerce the + // register from FPR64 to FPR16 if necessary. + if (IsRegFPR64 && Kind == MCK_FPR16) { + Op.Reg.RegNum = convertFPR64ToFPR16(Reg); + return Match_Success; + } return Match_InvalidOperand; } @@ -1079,6 +1022,9 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 4) - 1); case Match_InvalidUImm5: return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 5) - 1); + case Match_InvalidSImm5: + return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 4), + (1 << 4) - 1); case Match_InvalidSImm6: return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 5), (1 << 5) - 1); @@ -1181,8 +1127,10 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, } case Match_InvalidVTypeI: { SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc(); - return Error(ErrorLoc, - "operand must be e[8|16|32|64|128|256|512|1024],m[1|2|4|8]"); + return Error( + ErrorLoc, + "operand must be " + "e[8|16|32|64|128|256|512|1024],m[1|2|4|8|f2|f4|f8],[ta|tu],[ma|mu]"); } case Match_InvalidVMaskRegister: { SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc(); @@ -1202,13 +1150,15 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, // alternative ABI names), setting RegNo to the matching register. Upon // failure, returns true and sets RegNo to 0. If IsRV32E then registers // x16-x31 will be rejected. -static bool matchRegisterNameHelper(bool IsRV32E, Register &RegNo, +static bool matchRegisterNameHelper(bool IsRV32E, MCRegister &RegNo, StringRef Name) { RegNo = MatchRegisterName(Name); - // The 32- and 64-bit FPRs have the same asm name. Check that the initial - // match always matches the 64-bit variant, and not the 32-bit one. + // The 16-/32- and 64-bit FPRs have the same asm name. Check that the initial + // match always matches the 64-bit variant, and not the 16/32-bit one. + assert(!(RegNo >= RISCV::F0_H && RegNo <= RISCV::F31_H)); assert(!(RegNo >= RISCV::F0_F && RegNo <= RISCV::F31_F)); // The default FPR register class is based on the tablegen enum ordering. + static_assert(RISCV::F0_D < RISCV::F0_H, "FPR matching must be updated"); static_assert(RISCV::F0_D < RISCV::F0_F, "FPR matching must be updated"); if (RegNo == RISCV::NoRegister) RegNo = MatchRegisterAltName(Name); @@ -1233,7 +1183,7 @@ OperandMatchResultTy RISCVAsmParser::tryParseRegister(unsigned &RegNo, RegNo = 0; StringRef Name = getLexer().getTok().getIdentifier(); - if (matchRegisterNameHelper(isRV32E(), (Register &)RegNo, Name)) + if (matchRegisterNameHelper(isRV32E(), (MCRegister &)RegNo, Name)) return MatchOperand_NoMatch; getParser().Lex(); // Eat identifier token. @@ -1265,7 +1215,7 @@ OperandMatchResultTy RISCVAsmParser::parseRegister(OperandVector &Operands, return MatchOperand_NoMatch; case AsmToken::Identifier: StringRef Name = getLexer().getTok().getIdentifier(); - Register RegNo; + MCRegister RegNo; matchRegisterNameHelper(isRV32E(), RegNo, Name); if (RegNo == RISCV::NoRegister) { @@ -1549,39 +1499,75 @@ OperandMatchResultTy RISCVAsmParser::parseVTypeI(OperandVector &Operands) { if (getLexer().getKind() != AsmToken::Identifier) return MatchOperand_NoMatch; - // Parse "e8,m1" + // Parse "e8,m1,t[a|u],m[a|u]" StringRef Name = getLexer().getTok().getIdentifier(); if (!Name.consume_front("e")) return MatchOperand_NoMatch; - APInt Sew(16, Name, 10); - if (Sew != 8 && Sew != 16 && Sew != 32 && Sew != 64 && Sew != 128 && - Sew != 256 && Sew != 512 && Sew != 1024) + unsigned Sew; + if (Name.getAsInteger(10, Sew)) + return MatchOperand_NoMatch; + if (!RISCVVType::isValidSEW(Sew)) return MatchOperand_NoMatch; getLexer().Lex(); - if (getLexer().getKind() == AsmToken::EndOfStatement) { - Operands.push_back( - RISCVOperand::createVType(Sew, APInt(16, 1), S, isRV64())); + if (!getLexer().is(AsmToken::Comma)) + return MatchOperand_NoMatch; + getLexer().Lex(); - return MatchOperand_Success; - } + Name = getLexer().getTok().getIdentifier(); + if (!Name.consume_front("m")) + return MatchOperand_NoMatch; + // "m" or "mf" + bool Fractional = Name.consume_front("f"); + unsigned Lmul; + if (Name.getAsInteger(10, Lmul)) + return MatchOperand_NoMatch; + if (!RISCVVType::isValidLMUL(Lmul, Fractional)) + return MatchOperand_NoMatch; + getLexer().Lex(); if (!getLexer().is(AsmToken::Comma)) return MatchOperand_NoMatch; getLexer().Lex(); Name = getLexer().getTok().getIdentifier(); - if (!Name.consume_front("m")) + // ta or tu + bool TailAgnostic; + if (Name == "ta") + TailAgnostic = true; + else if (Name == "tu") + TailAgnostic = false; + else + return MatchOperand_NoMatch; + getLexer().Lex(); + + if (!getLexer().is(AsmToken::Comma)) return MatchOperand_NoMatch; - APInt Lmul(16, Name, 10); - if (Lmul != 1 && Lmul != 2 && Lmul != 4 && Lmul != 8) + getLexer().Lex(); + + Name = getLexer().getTok().getIdentifier(); + // ma or mu + bool MaskAgnostic; + if (Name == "ma") + MaskAgnostic = true; + else if (Name == "mu") + MaskAgnostic = false; + else return MatchOperand_NoMatch; getLexer().Lex(); if (getLexer().getKind() != AsmToken::EndOfStatement) return MatchOperand_NoMatch; - Operands.push_back(RISCVOperand::createVType(Sew, Lmul, S, isRV64())); + unsigned SewLog2 = Log2_32(Sew / 8); + unsigned LmulLog2 = Log2_32(Lmul); + RISCVVSEW VSEW = static_cast<RISCVVSEW>(SewLog2); + RISCVVLMUL VLMUL = + static_cast<RISCVVLMUL>(Fractional ? 8 - LmulLog2 : LmulLog2); + + unsigned VTypeI = + RISCVVType::encodeVTYPE(VLMUL, VSEW, TailAgnostic, MaskAgnostic); + Operands.push_back(RISCVOperand::createVType(VTypeI, S, isRV64())); return MatchOperand_Success; } @@ -1596,7 +1582,7 @@ OperandMatchResultTy RISCVAsmParser::parseMaskReg(OperandVector &Operands) { Error(getLoc(), "expected '.t' suffix"); return MatchOperand_ParseFail; } - Register RegNo; + MCRegister RegNo; matchRegisterNameHelper(isRV32E(), RegNo, Name); if (RegNo == RISCV::NoRegister) @@ -1788,48 +1774,19 @@ bool RISCVAsmParser::ParseInstruction(ParseInstructionInfo &Info, } bool RISCVAsmParser::classifySymbolRef(const MCExpr *Expr, - RISCVMCExpr::VariantKind &Kind, - int64_t &Addend) { + RISCVMCExpr::VariantKind &Kind) { Kind = RISCVMCExpr::VK_RISCV_None; - Addend = 0; if (const RISCVMCExpr *RE = dyn_cast<RISCVMCExpr>(Expr)) { Kind = RE->getKind(); Expr = RE->getSubExpr(); } - // It's a simple symbol reference or constant with no addend. - if (isa<MCConstantExpr>(Expr) || isa<MCSymbolRefExpr>(Expr)) - return true; - - const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr); - if (!BE) - return false; - - if (!isa<MCSymbolRefExpr>(BE->getLHS())) - return false; - - if (BE->getOpcode() != MCBinaryExpr::Add && - BE->getOpcode() != MCBinaryExpr::Sub) - return false; - - // We are able to support the subtraction of two symbol references - if (BE->getOpcode() == MCBinaryExpr::Sub && - isa<MCSymbolRefExpr>(BE->getRHS())) - return true; - - // See if the addend is a constant, otherwise there's more going - // on here than we can deal with. - auto AddendExpr = dyn_cast<MCConstantExpr>(BE->getRHS()); - if (!AddendExpr) - return false; - - Addend = AddendExpr->getValue(); - if (BE->getOpcode() == MCBinaryExpr::Sub) - Addend = -Addend; - - // It's some symbol reference + a constant addend - return Kind != RISCVMCExpr::VK_RISCV_Invalid; + MCValue Res; + MCFixup Fixup; + if (Expr->evaluateAsRelocatable(Res, nullptr, &Fixup)) + return Res.getRefKind() == RISCVMCExpr::VK_RISCV_None; + return false; } bool RISCVAsmParser::ParseDirective(AsmToken DirectiveID) { @@ -2040,7 +1997,33 @@ bool RISCVAsmParser::parseDirectiveAttribute() { else return Error(ValueExprLoc, "bad arch string " + Arch); + // .attribute arch overrides the current architecture, so unset all + // currently enabled extensions + clearFeatureBits(RISCV::FeatureRV32E, "e"); + clearFeatureBits(RISCV::FeatureStdExtM, "m"); + clearFeatureBits(RISCV::FeatureStdExtA, "a"); + clearFeatureBits(RISCV::FeatureStdExtF, "f"); + clearFeatureBits(RISCV::FeatureStdExtD, "d"); + clearFeatureBits(RISCV::FeatureStdExtC, "c"); + clearFeatureBits(RISCV::FeatureStdExtB, "experimental-b"); + clearFeatureBits(RISCV::FeatureStdExtV, "experimental-v"); + clearFeatureBits(RISCV::FeatureExtZfh, "experimental-zfh"); + clearFeatureBits(RISCV::FeatureExtZba, "experimental-zba"); + clearFeatureBits(RISCV::FeatureExtZbb, "experimental-zbb"); + clearFeatureBits(RISCV::FeatureExtZbc, "experimental-zbc"); + clearFeatureBits(RISCV::FeatureExtZbe, "experimental-zbe"); + clearFeatureBits(RISCV::FeatureExtZbf, "experimental-zbf"); + clearFeatureBits(RISCV::FeatureExtZbm, "experimental-zbm"); + clearFeatureBits(RISCV::FeatureExtZbp, "experimental-zbp"); + clearFeatureBits(RISCV::FeatureExtZbproposedc, "experimental-zbproposedc"); + clearFeatureBits(RISCV::FeatureExtZbr, "experimental-zbr"); + clearFeatureBits(RISCV::FeatureExtZbs, "experimental-zbs"); + clearFeatureBits(RISCV::FeatureExtZbt, "experimental-zbt"); + clearFeatureBits(RISCV::FeatureExtZvamo, "experimental-zvamo"); + clearFeatureBits(RISCV::FeatureStdExtZvlsseg, "experimental-zvlsseg"); + while (!Arch.empty()) { + bool DropFirst = true; if (Arch[0] == 'i') clearFeatureBits(RISCV::FeatureRV32E, "e"); else if (Arch[0] == 'e') @@ -2062,19 +2045,57 @@ bool RISCVAsmParser::parseDirectiveAttribute() { setFeatureBits(RISCV::FeatureStdExtD, "d"); } else if (Arch[0] == 'c') { setFeatureBits(RISCV::FeatureStdExtC, "c"); + } else if (Arch[0] == 'b') { + setFeatureBits(RISCV::FeatureStdExtB, "experimental-b"); + } else if (Arch[0] == 'v') { + setFeatureBits(RISCV::FeatureStdExtV, "experimental-v"); + } else if (Arch[0] == 's' || Arch[0] == 'x' || Arch[0] == 'z') { + StringRef Ext = + Arch.take_until([](char c) { return ::isdigit(c) || c == '_'; }); + if (Ext == "zba") + setFeatureBits(RISCV::FeatureExtZba, "experimental-zba"); + else if (Ext == "zbb") + setFeatureBits(RISCV::FeatureExtZbb, "experimental-zbb"); + else if (Ext == "zbc") + setFeatureBits(RISCV::FeatureExtZbc, "experimental-zbc"); + else if (Ext == "zbe") + setFeatureBits(RISCV::FeatureExtZbe, "experimental-zbe"); + else if (Ext == "zbf") + setFeatureBits(RISCV::FeatureExtZbf, "experimental-zbf"); + else if (Ext == "zbm") + setFeatureBits(RISCV::FeatureExtZbm, "experimental-zbm"); + else if (Ext == "zbp") + setFeatureBits(RISCV::FeatureExtZbp, "experimental-zbp"); + else if (Ext == "zbproposedc") + setFeatureBits(RISCV::FeatureExtZbproposedc, + "experimental-zbproposedc"); + else if (Ext == "zbr") + setFeatureBits(RISCV::FeatureExtZbr, "experimental-zbr"); + else if (Ext == "zbs") + setFeatureBits(RISCV::FeatureExtZbs, "experimental-zbs"); + else if (Ext == "zbt") + setFeatureBits(RISCV::FeatureExtZbt, "experimental-zbt"); + else if (Ext == "zfh") + setFeatureBits(RISCV::FeatureExtZfh, "experimental-zfh"); + else if (Ext == "zvamo") + setFeatureBits(RISCV::FeatureExtZvamo, "experimental-zvamo"); + else if (Ext == "zvlsseg") + setFeatureBits(RISCV::FeatureStdExtZvlsseg, "experimental-zvlsseg"); + else + return Error(ValueExprLoc, "bad arch string " + Ext); + Arch = Arch.drop_until([](char c) { return ::isdigit(c) || c == '_'; }); + DropFirst = false; } else return Error(ValueExprLoc, "bad arch string " + Arch); - Arch = Arch.drop_front(1); + if (DropFirst) + Arch = Arch.drop_front(1); int major = 0; int minor = 0; Arch.consumeInteger(10, major); Arch.consume_front("p"); Arch.consumeInteger(10, minor); - if (major != 0 || minor != 0) { - Arch = Arch.drop_until([](char c) { return c == '_' || c == '"'; }); - Arch = Arch.drop_while([](char c) { return c == '_'; }); - } + Arch = Arch.drop_while([](char c) { return c == '_'; }); } } @@ -2102,6 +2123,38 @@ bool RISCVAsmParser::parseDirectiveAttribute() { formalArchStr = (Twine(formalArchStr) + "_d2p0").str(); if (getFeatureBits(RISCV::FeatureStdExtC)) formalArchStr = (Twine(formalArchStr) + "_c2p0").str(); + if (getFeatureBits(RISCV::FeatureStdExtB)) + formalArchStr = (Twine(formalArchStr) + "_b0p93").str(); + if (getFeatureBits(RISCV::FeatureStdExtV)) + formalArchStr = (Twine(formalArchStr) + "_v0p10").str(); + if (getFeatureBits(RISCV::FeatureExtZfh)) + formalArchStr = (Twine(formalArchStr) + "_zfh0p1").str(); + if (getFeatureBits(RISCV::FeatureExtZba)) + formalArchStr = (Twine(formalArchStr) + "_zba0p93").str(); + if (getFeatureBits(RISCV::FeatureExtZbb)) + formalArchStr = (Twine(formalArchStr) + "_zbb0p93").str(); + if (getFeatureBits(RISCV::FeatureExtZbc)) + formalArchStr = (Twine(formalArchStr) + "_zbc0p93").str(); + if (getFeatureBits(RISCV::FeatureExtZbe)) + formalArchStr = (Twine(formalArchStr) + "_zbe0p93").str(); + if (getFeatureBits(RISCV::FeatureExtZbf)) + formalArchStr = (Twine(formalArchStr) + "_zbf0p93").str(); + if (getFeatureBits(RISCV::FeatureExtZbm)) + formalArchStr = (Twine(formalArchStr) + "_zbm0p93").str(); + if (getFeatureBits(RISCV::FeatureExtZbp)) + formalArchStr = (Twine(formalArchStr) + "_zbp0p93").str(); + if (getFeatureBits(RISCV::FeatureExtZbproposedc)) + formalArchStr = (Twine(formalArchStr) + "_zbproposedc0p93").str(); + if (getFeatureBits(RISCV::FeatureExtZbr)) + formalArchStr = (Twine(formalArchStr) + "_zbr0p93").str(); + if (getFeatureBits(RISCV::FeatureExtZbs)) + formalArchStr = (Twine(formalArchStr) + "_zbs0p93").str(); + if (getFeatureBits(RISCV::FeatureExtZbt)) + formalArchStr = (Twine(formalArchStr) + "_zbt0p93").str(); + if (getFeatureBits(RISCV::FeatureExtZvamo)) + formalArchStr = (Twine(formalArchStr) + "_zvamo0p10").str(); + if (getFeatureBits(RISCV::FeatureStdExtZvlsseg)) + formalArchStr = (Twine(formalArchStr) + "_zvlsseg0p10").str(); getTargetStreamer().emitTextAttribute(Tag, formalArchStr); } @@ -2118,12 +2171,12 @@ void RISCVAsmParser::emitToStreamer(MCStreamer &S, const MCInst &Inst) { S.emitInstruction((Res ? CInst : Inst), getSTI()); } -void RISCVAsmParser::emitLoadImm(Register DestReg, int64_t Value, +void RISCVAsmParser::emitLoadImm(MCRegister DestReg, int64_t Value, MCStreamer &Out) { RISCVMatInt::InstSeq Seq; RISCVMatInt::generateInstSeq(Value, isRV64(), Seq); - Register SrcReg = RISCV::X0; + MCRegister SrcReg = RISCV::X0; for (RISCVMatInt::Inst &Inst : Seq) { if (Inst.Opc == RISCV::LUI) { emitToStreamer( @@ -2149,8 +2202,7 @@ void RISCVAsmParser::emitAuipcInstPair(MCOperand DestReg, MCOperand TmpReg, // OP DestReg, TmpReg, %pcrel_lo(TmpLabel) MCContext &Ctx = getContext(); - MCSymbol *TmpLabel = Ctx.createTempSymbol( - "pcrel_hi", /* AlwaysAddSuffix */ true, /* CanBeUnnamed */ false); + MCSymbol *TmpLabel = Ctx.createNamedTempSymbol("pcrel_hi"); Out.emitLabel(TmpLabel); const RISCVMCExpr *SymbolHi = RISCVMCExpr::create(Symbol, VKHi, Ctx); @@ -2254,6 +2306,88 @@ void RISCVAsmParser::emitLoadStoreSymbol(MCInst &Inst, unsigned Opcode, Opcode, IDLoc, Out); } +void RISCVAsmParser::emitPseudoExtend(MCInst &Inst, bool SignExtend, + int64_t Width, SMLoc IDLoc, + MCStreamer &Out) { + // The sign/zero extend pseudo-instruction does two shifts, with the shift + // amounts dependent on the XLEN. + // + // The expansion looks like this + // + // SLLI rd, rs, XLEN - Width + // SR[A|R]I rd, rd, XLEN - Width + MCOperand DestReg = Inst.getOperand(0); + MCOperand SourceReg = Inst.getOperand(1); + + unsigned SecondOpcode = SignExtend ? RISCV::SRAI : RISCV::SRLI; + int64_t ShAmt = (isRV64() ? 64 : 32) - Width; + + assert(ShAmt > 0 && "Shift amount must be non-zero."); + + emitToStreamer(Out, MCInstBuilder(RISCV::SLLI) + .addOperand(DestReg) + .addOperand(SourceReg) + .addImm(ShAmt)); + + emitToStreamer(Out, MCInstBuilder(SecondOpcode) + .addOperand(DestReg) + .addOperand(DestReg) + .addImm(ShAmt)); +} + +void RISCVAsmParser::emitVMSGE(MCInst &Inst, unsigned Opcode, SMLoc IDLoc, + MCStreamer &Out) { + if (Inst.getNumOperands() == 3) { + // unmasked va >= x + // + // pseudoinstruction: vmsge{u}.vx vd, va, x + // expansion: vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd + emitToStreamer(Out, MCInstBuilder(Opcode) + .addOperand(Inst.getOperand(0)) + .addOperand(Inst.getOperand(1)) + .addOperand(Inst.getOperand(2)) + .addReg(RISCV::NoRegister)); + emitToStreamer(Out, MCInstBuilder(RISCV::VMNAND_MM) + .addOperand(Inst.getOperand(0)) + .addOperand(Inst.getOperand(0)) + .addOperand(Inst.getOperand(0))); + } else if (Inst.getNumOperands() == 4) { + // masked va >= x, vd != v0 + // + // pseudoinstruction: vmsge{u}.vx vd, va, x, v0.t + // expansion: vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0 + assert(Inst.getOperand(0).getReg() != RISCV::V0 && + "The destination register should not be V0."); + emitToStreamer(Out, MCInstBuilder(Opcode) + .addOperand(Inst.getOperand(0)) + .addOperand(Inst.getOperand(1)) + .addOperand(Inst.getOperand(2)) + .addOperand(Inst.getOperand(3))); + emitToStreamer(Out, MCInstBuilder(RISCV::VMXOR_MM) + .addOperand(Inst.getOperand(0)) + .addOperand(Inst.getOperand(0)) + .addReg(RISCV::V0)); + } else if (Inst.getNumOperands() == 5) { + // masked va >= x, vd == v0 + // + // pseudoinstruction: vmsge{u}.vx vd, va, x, v0.t, vt + // expansion: vmslt{u}.vx vt, va, x; vmandnot.mm vd, vd, vt + assert(Inst.getOperand(0).getReg() == RISCV::V0 && + "The destination register should be V0."); + assert(Inst.getOperand(1).getReg() != RISCV::V0 && + "The temporary vector register should not be V0."); + emitToStreamer(Out, MCInstBuilder(Opcode) + .addOperand(Inst.getOperand(1)) + .addOperand(Inst.getOperand(2)) + .addOperand(Inst.getOperand(3)) + .addOperand(Inst.getOperand(4))); + emitToStreamer(Out, MCInstBuilder(RISCV::VMANDNOT_MM) + .addOperand(Inst.getOperand(0)) + .addOperand(Inst.getOperand(0)) + .addOperand(Inst.getOperand(1))); + } +} + bool RISCVAsmParser::checkPseudoAddTPRel(MCInst &Inst, OperandVector &Operands) { assert(Inst.getOpcode() == RISCV::PseudoAddTPRel && "Invalid instruction"); @@ -2275,77 +2409,48 @@ std::unique_ptr<RISCVOperand> RISCVAsmParser::defaultMaskRegOp() const { bool RISCVAsmParser::validateInstruction(MCInst &Inst, OperandVector &Operands) { const MCInstrDesc &MCID = MII.get(Inst.getOpcode()); - unsigned TargetFlags = - (MCID.TSFlags >> RISCV::ConstraintOffset) & RISCV::ConstraintMask; - if (TargetFlags == RISCV::NoConstraint) + unsigned Constraints = + (MCID.TSFlags & RISCVII::ConstraintMask) >> RISCVII::ConstraintShift; + if (Constraints == RISCVII::NoConstraint) return false; unsigned DestReg = Inst.getOperand(0).getReg(); // Operands[1] will be the first operand, DestReg. SMLoc Loc = Operands[1]->getStartLoc(); - if ((TargetFlags == RISCV::WidenV) || (TargetFlags == RISCV::WidenW) || - (TargetFlags == RISCV::SlideUp) || (TargetFlags == RISCV::Vrgather) || - (TargetFlags == RISCV::Vcompress)) { - if (TargetFlags != RISCV::WidenW) { - unsigned Src2Reg = Inst.getOperand(1).getReg(); - if (DestReg == Src2Reg) - return Error(Loc, "The destination vector register group cannot overlap" - " the source vector register group."); - if (TargetFlags == RISCV::WidenV) { - // Assume DestReg LMUL is 2 at least for widening/narrowing operations. - if (DestReg + 1 == Src2Reg) - return Error(Loc, - "The destination vector register group cannot overlap" - " the source vector register group."); - } - } - if (Inst.getOperand(2).isReg()) { - unsigned Src1Reg = Inst.getOperand(2).getReg(); - if (DestReg == Src1Reg) - return Error(Loc, "The destination vector register group cannot overlap" - " the source vector register group."); - if (TargetFlags == RISCV::WidenV || TargetFlags == RISCV::WidenW) { - // Assume DestReg LMUL is 2 at least for widening/narrowing operations. - if (DestReg + 1 == Src1Reg) - return Error(Loc, - "The destination vector register group cannot overlap" - " the source vector register group."); - } - } - if (Inst.getNumOperands() == 4) { - unsigned MaskReg = Inst.getOperand(3).getReg(); - - if (DestReg == MaskReg) - return Error(Loc, "The destination vector register group cannot overlap" - " the mask register."); - } - } else if (TargetFlags == RISCV::Narrow) { - unsigned Src2Reg = Inst.getOperand(1).getReg(); - if (DestReg == Src2Reg) + if (Constraints & RISCVII::VS2Constraint) { + unsigned CheckReg = Inst.getOperand(1).getReg(); + if (DestReg == CheckReg) return Error(Loc, "The destination vector register group cannot overlap" " the source vector register group."); - // Assume Src2Reg LMUL is 2 at least for widening/narrowing operations. - if (DestReg == Src2Reg + 1) + } + if ((Constraints & RISCVII::VS1Constraint) && (Inst.getOperand(2).isReg())) { + unsigned CheckReg = Inst.getOperand(2).getReg(); + if (DestReg == CheckReg) return Error(Loc, "The destination vector register group cannot overlap" " the source vector register group."); - } else if (TargetFlags == RISCV::WidenCvt || TargetFlags == RISCV::Iota) { - unsigned Src2Reg = Inst.getOperand(1).getReg(); - if (DestReg == Src2Reg) + } + if ((Constraints & RISCVII::VMConstraint) && (DestReg == RISCV::V0)) { + // vadc, vsbc are special cases. These instructions have no mask register. + // The destination register could not be V0. + unsigned Opcode = Inst.getOpcode(); + if (Opcode == RISCV::VADC_VVM || Opcode == RISCV::VADC_VXM || + Opcode == RISCV::VADC_VIM || Opcode == RISCV::VSBC_VVM || + Opcode == RISCV::VSBC_VXM || Opcode == RISCV::VFMERGE_VFM || + Opcode == RISCV::VMERGE_VIM || Opcode == RISCV::VMERGE_VVM || + Opcode == RISCV::VMERGE_VXM) + return Error(Loc, "The destination vector register group cannot be V0."); + + // Regardless masked or unmasked version, the number of operands is the + // same. For example, "viota.m v0, v2" is "viota.m v0, v2, NoRegister" + // actually. We need to check the last operand to ensure whether it is + // masked or not. + unsigned CheckReg = Inst.getOperand(Inst.getNumOperands() - 1).getReg(); + assert((CheckReg == RISCV::V0 || CheckReg == RISCV::NoRegister) && + "Unexpected register for mask operand"); + + if (DestReg == CheckReg) return Error(Loc, "The destination vector register group cannot overlap" - " the source vector register group."); - if (TargetFlags == RISCV::WidenCvt) { - // Assume DestReg LMUL is 2 at least for widening/narrowing operations. - if (DestReg + 1 == Src2Reg) - return Error(Loc, "The destination vector register group cannot overlap" - " the source vector register group."); - } - if (Inst.getNumOperands() == 3) { - unsigned MaskReg = Inst.getOperand(2).getReg(); - - if (DestReg == MaskReg) - return Error(Loc, "The destination vector register group cannot overlap" - " the mask register."); - } + " the mask register."); } return false; } @@ -2359,7 +2464,7 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, default: break; case RISCV::PseudoLI: { - Register Reg = Inst.getOperand(0).getReg(); + MCRegister Reg = Inst.getOperand(0).getReg(); const MCOperand &Op1 = Inst.getOperand(1); if (Op1.isExpr()) { // We must have li reg, %lo(sym) or li reg, %pcrel_lo(sym) or similar. @@ -2412,6 +2517,9 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, case RISCV::PseudoLD: emitLoadStoreSymbol(Inst, RISCV::LD, IDLoc, Out, /*HasTmpReg=*/false); return false; + case RISCV::PseudoFLH: + emitLoadStoreSymbol(Inst, RISCV::FLH, IDLoc, Out, /*HasTmpReg=*/true); + return false; case RISCV::PseudoFLW: emitLoadStoreSymbol(Inst, RISCV::FLW, IDLoc, Out, /*HasTmpReg=*/true); return false; @@ -2430,6 +2538,9 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, case RISCV::PseudoSD: emitLoadStoreSymbol(Inst, RISCV::SD, IDLoc, Out, /*HasTmpReg=*/true); return false; + case RISCV::PseudoFSH: + emitLoadStoreSymbol(Inst, RISCV::FSH, IDLoc, Out, /*HasTmpReg=*/true); + return false; case RISCV::PseudoFSW: emitLoadStoreSymbol(Inst, RISCV::FSW, IDLoc, Out, /*HasTmpReg=*/true); return false; @@ -2440,6 +2551,72 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, if (checkPseudoAddTPRel(Inst, Operands)) return true; break; + case RISCV::PseudoSEXT_B: + emitPseudoExtend(Inst, /*SignExtend=*/true, /*Width=*/8, IDLoc, Out); + return false; + case RISCV::PseudoSEXT_H: + emitPseudoExtend(Inst, /*SignExtend=*/true, /*Width=*/16, IDLoc, Out); + return false; + case RISCV::PseudoZEXT_H: + emitPseudoExtend(Inst, /*SignExtend=*/false, /*Width=*/16, IDLoc, Out); + return false; + case RISCV::PseudoZEXT_W: + emitPseudoExtend(Inst, /*SignExtend=*/false, /*Width=*/32, IDLoc, Out); + return false; + case RISCV::PseudoVMSGEU_VX: + case RISCV::PseudoVMSGEU_VX_M: + case RISCV::PseudoVMSGEU_VX_M_T: + emitVMSGE(Inst, RISCV::VMSLTU_VX, IDLoc, Out); + return false; + case RISCV::PseudoVMSGE_VX: + case RISCV::PseudoVMSGE_VX_M: + case RISCV::PseudoVMSGE_VX_M_T: + emitVMSGE(Inst, RISCV::VMSLT_VX, IDLoc, Out); + return false; + case RISCV::PseudoVMSGE_VI: + case RISCV::PseudoVMSLT_VI: { + // These instructions are signed and so is immediate so we can subtract one + // and change the opcode. + int64_t Imm = Inst.getOperand(2).getImm(); + unsigned Opc = Inst.getOpcode() == RISCV::PseudoVMSGE_VI ? RISCV::VMSGT_VI + : RISCV::VMSLE_VI; + emitToStreamer(Out, MCInstBuilder(Opc) + .addOperand(Inst.getOperand(0)) + .addOperand(Inst.getOperand(1)) + .addImm(Imm - 1) + .addOperand(Inst.getOperand(3))); + return false; + } + case RISCV::PseudoVMSGEU_VI: + case RISCV::PseudoVMSLTU_VI: { + int64_t Imm = Inst.getOperand(2).getImm(); + // Unsigned comparisons are tricky because the immediate is signed. If the + // immediate is 0 we can't just subtract one. vmsltu.vi v0, v1, 0 is always + // false, but vmsle.vi v0, v1, -1 is always true. Instead we use + // vmsne v0, v1, v1 which is always false. + if (Imm == 0) { + unsigned Opc = Inst.getOpcode() == RISCV::PseudoVMSGEU_VI + ? RISCV::VMSEQ_VV + : RISCV::VMSNE_VV; + emitToStreamer(Out, MCInstBuilder(Opc) + .addOperand(Inst.getOperand(0)) + .addOperand(Inst.getOperand(1)) + .addOperand(Inst.getOperand(1)) + .addOperand(Inst.getOperand(3))); + } else { + // Other immediate values can subtract one like signed. + unsigned Opc = Inst.getOpcode() == RISCV::PseudoVMSGEU_VI + ? RISCV::VMSGTU_VI + : RISCV::VMSLEU_VI; + emitToStreamer(Out, MCInstBuilder(Opc) + .addOperand(Inst.getOperand(0)) + .addOperand(Inst.getOperand(1)) + .addImm(Imm - 1) + .addOperand(Inst.getOperand(3))); + } + + return false; + } } emitToStreamer(Out, Inst); diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index 37edc19398a5..623552390f53 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -10,10 +10,9 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/RISCVBaseInfo.h" #include "MCTargetDesc/RISCVMCTargetDesc.h" #include "TargetInfo/RISCVTargetInfo.h" -#include "Utils/RISCVBaseInfo.h" -#include "llvm/CodeGen/Register.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" @@ -71,7 +70,18 @@ static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint64_t RegNo, if (RegNo >= 32 || (IsRV32E && RegNo >= 16)) return MCDisassembler::Fail; - Register Reg = RISCV::X0 + RegNo; + MCRegister Reg = RISCV::X0 + RegNo; + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo >= 32) + return MCDisassembler::Fail; + + MCRegister Reg = RISCV::F0_H + RegNo; Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -82,7 +92,7 @@ static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, uint64_t RegNo, if (RegNo >= 32) return MCDisassembler::Fail; - Register Reg = RISCV::F0_F + RegNo; + MCRegister Reg = RISCV::F0_F + RegNo; Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -93,7 +103,7 @@ static DecodeStatus DecodeFPR32CRegisterClass(MCInst &Inst, uint64_t RegNo, if (RegNo >= 8) { return MCDisassembler::Fail; } - Register Reg = RISCV::F8_F + RegNo; + MCRegister Reg = RISCV::F8_F + RegNo; Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -104,7 +114,7 @@ static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, uint64_t RegNo, if (RegNo >= 32) return MCDisassembler::Fail; - Register Reg = RISCV::F0_D + RegNo; + MCRegister Reg = RISCV::F0_D + RegNo; Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -115,7 +125,7 @@ static DecodeStatus DecodeFPR64CRegisterClass(MCInst &Inst, uint64_t RegNo, if (RegNo >= 8) { return MCDisassembler::Fail; } - Register Reg = RISCV::F8_D + RegNo; + MCRegister Reg = RISCV::F8_D + RegNo; Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -146,7 +156,7 @@ static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo, if (RegNo >= 8) return MCDisassembler::Fail; - Register Reg = RISCV::X8 + RegNo; + MCRegister Reg = RISCV::X8 + RegNo; Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -157,14 +167,14 @@ static DecodeStatus DecodeVRRegisterClass(MCInst &Inst, uint64_t RegNo, if (RegNo >= 32) return MCDisassembler::Fail; - Register Reg = RISCV::V0 + RegNo; + MCRegister Reg = RISCV::V0 + RegNo; Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } static DecodeStatus decodeVMaskReg(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { - Register Reg = RISCV::NoRegister; + MCRegister Reg = RISCV::NoRegister; switch (RegNo) { default: return MCDisassembler::Fail; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h index 090132af3585..56991ccf010a 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h @@ -9,9 +9,9 @@ #ifndef LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVASMBACKEND_H #define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVASMBACKEND_H +#include "MCTargetDesc/RISCVBaseInfo.h" #include "MCTargetDesc/RISCVFixupKinds.h" #include "MCTargetDesc/RISCVMCTargetDesc.h" -#include "Utils/RISCVBaseInfo.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCSubtargetInfo.h" diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp index 43b1f8b80c5f..fa36234d0f5f 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp @@ -1,3 +1,16 @@ +//===-- RISCVBaseInfo.cpp - Top level definitions for RISCV MC ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains small standalone enum definitions for the RISCV target +// useful for the compiler back-end and the MC libraries. +// +//===----------------------------------------------------------------------===// + #include "RISCVBaseInfo.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/Triple.h" @@ -6,7 +19,7 @@ namespace llvm { namespace RISCVSysReg { #define GET_SysRegsList_IMPL -#include "RISCVGenSystemOperands.inc" +#include "RISCVGenSearchableTables.inc" } // namespace RISCVSysReg namespace RISCVABI { @@ -65,7 +78,10 @@ ABI getTargetABI(StringRef ABIName) { // To avoid the BP value clobbered by a function call, we need to choose a // callee saved register to save the value. RV32E only has X8 and X9 as callee // saved registers and X8 will be used as fp. So we choose X9 as bp. -Register getBPReg() { return RISCV::X9; } +MCRegister getBPReg() { return RISCV::X9; } + +// Returns the register holding shadow call stack pointer. +MCRegister getSCSPReg() { return RISCV::X18; } } // namespace RISCVABI @@ -78,4 +94,49 @@ void validate(const Triple &TT, const FeatureBitset &FeatureBits) { } // namespace RISCVFeatures +namespace RISCVVPseudosTable { + +#define GET_RISCVVPseudosTable_IMPL +#include "RISCVGenSearchableTables.inc" + +} // namespace RISCVVPseudosTable + +void RISCVVType::printVType(unsigned VType, raw_ostream &OS) { + RISCVVSEW VSEW = getVSEW(VType); + RISCVVLMUL VLMUL = getVLMUL(VType); + + unsigned Sew = 1 << (static_cast<unsigned>(VSEW) + 3); + OS << "e" << Sew; + + switch (VLMUL) { + case RISCVVLMUL::LMUL_RESERVED: + llvm_unreachable("Unexpected LMUL value!"); + case RISCVVLMUL::LMUL_1: + case RISCVVLMUL::LMUL_2: + case RISCVVLMUL::LMUL_4: + case RISCVVLMUL::LMUL_8: { + unsigned LMul = 1 << static_cast<unsigned>(VLMUL); + OS << ",m" << LMul; + break; + } + case RISCVVLMUL::LMUL_F2: + case RISCVVLMUL::LMUL_F4: + case RISCVVLMUL::LMUL_F8: { + unsigned LMul = 1 << (8 - static_cast<unsigned>(VLMUL)); + OS << ",mf" << LMul; + break; + } + } + + if (isTailAgnostic(VType)) + OS << ",ta"; + else + OS << ",tu"; + + if (isMaskAgnostic(VType)) + OS << ",ma"; + else + OS << ",mu"; +} + } // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h new file mode 100644 index 000000000000..6c9f860c204c --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -0,0 +1,406 @@ +//===-- RISCVBaseInfo.h - Top level definitions for RISCV MC ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains small standalone enum definitions for the RISCV target +// useful for the compiler back-end and the MC libraries. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVBASEINFO_H +#define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVBASEINFO_H + +#include "MCTargetDesc/RISCVMCTargetDesc.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Support/MachineValueType.h" + +namespace llvm { + +// RISCVII - This namespace holds all of the target specific flags that +// instruction info tracks. All definitions must match RISCVInstrFormats.td. +namespace RISCVII { +enum { + InstFormatPseudo = 0, + InstFormatR = 1, + InstFormatR4 = 2, + InstFormatI = 3, + InstFormatS = 4, + InstFormatB = 5, + InstFormatU = 6, + InstFormatJ = 7, + InstFormatCR = 8, + InstFormatCI = 9, + InstFormatCSS = 10, + InstFormatCIW = 11, + InstFormatCL = 12, + InstFormatCS = 13, + InstFormatCA = 14, + InstFormatCB = 15, + InstFormatCJ = 16, + InstFormatOther = 17, + + InstFormatMask = 31, + + ConstraintShift = 5, + ConstraintMask = 0b111 << ConstraintShift, + + VLMulShift = ConstraintShift + 3, + VLMulMask = 0b111 << VLMulShift, + + // Do we need to add a dummy mask op when converting RVV Pseudo to MCInst. + HasDummyMaskOpShift = VLMulShift + 3, + HasDummyMaskOpMask = 1 << HasDummyMaskOpShift, + + // Does this instruction only update element 0 the destination register. + WritesElement0Shift = HasDummyMaskOpShift + 1, + WritesElement0Mask = 1 << WritesElement0Shift, + + // Does this instruction have a merge operand that must be removed when + // converting to MCInst. It will be the first explicit use operand. Used by + // RVV Pseudos. + HasMergeOpShift = WritesElement0Shift + 1, + HasMergeOpMask = 1 << HasMergeOpShift, + + // Does this instruction have a SEW operand. It will be the last explicit + // operand. Used by RVV Pseudos. + HasSEWOpShift = HasMergeOpShift + 1, + HasSEWOpMask = 1 << HasSEWOpShift, + + // Does this instruction have a VL operand. It will be the second to last + // explicit operand. Used by RVV Pseudos. + HasVLOpShift = HasSEWOpShift + 1, + HasVLOpMask = 1 << HasVLOpShift, +}; + +// Match with the definitions in RISCVInstrFormatsV.td +enum RVVConstraintType { + NoConstraint = 0, + VS2Constraint = 0b001, + VS1Constraint = 0b010, + VMConstraint = 0b100, +}; + +// RISC-V Specific Machine Operand Flags +enum { + MO_None = 0, + MO_CALL = 1, + MO_PLT = 2, + MO_LO = 3, + MO_HI = 4, + MO_PCREL_LO = 5, + MO_PCREL_HI = 6, + MO_GOT_HI = 7, + MO_TPREL_LO = 8, + MO_TPREL_HI = 9, + MO_TPREL_ADD = 10, + MO_TLS_GOT_HI = 11, + MO_TLS_GD_HI = 12, + + // Used to differentiate between target-specific "direct" flags and "bitmask" + // flags. A machine operand can only have one "direct" flag, but can have + // multiple "bitmask" flags. + MO_DIRECT_FLAG_MASK = 15 +}; +} // namespace RISCVII + +namespace RISCVOp { +enum OperandType : unsigned { + OPERAND_FIRST_RISCV_IMM = MCOI::OPERAND_FIRST_TARGET, + OPERAND_UIMM4 = OPERAND_FIRST_RISCV_IMM, + OPERAND_UIMM5, + OPERAND_UIMM12, + OPERAND_SIMM12, + OPERAND_UIMM20, + OPERAND_UIMMLOG2XLEN, + OPERAND_LAST_RISCV_IMM = OPERAND_UIMMLOG2XLEN +}; +} // namespace RISCVOp + +// Describes the predecessor/successor bits used in the FENCE instruction. +namespace RISCVFenceField { +enum FenceField { + I = 8, + O = 4, + R = 2, + W = 1 +}; +} + +// Describes the supported floating point rounding mode encodings. +namespace RISCVFPRndMode { +enum RoundingMode { + RNE = 0, + RTZ = 1, + RDN = 2, + RUP = 3, + RMM = 4, + DYN = 7, + Invalid +}; + +inline static StringRef roundingModeToString(RoundingMode RndMode) { + switch (RndMode) { + default: + llvm_unreachable("Unknown floating point rounding mode"); + case RISCVFPRndMode::RNE: + return "rne"; + case RISCVFPRndMode::RTZ: + return "rtz"; + case RISCVFPRndMode::RDN: + return "rdn"; + case RISCVFPRndMode::RUP: + return "rup"; + case RISCVFPRndMode::RMM: + return "rmm"; + case RISCVFPRndMode::DYN: + return "dyn"; + } +} + +inline static RoundingMode stringToRoundingMode(StringRef Str) { + return StringSwitch<RoundingMode>(Str) + .Case("rne", RISCVFPRndMode::RNE) + .Case("rtz", RISCVFPRndMode::RTZ) + .Case("rdn", RISCVFPRndMode::RDN) + .Case("rup", RISCVFPRndMode::RUP) + .Case("rmm", RISCVFPRndMode::RMM) + .Case("dyn", RISCVFPRndMode::DYN) + .Default(RISCVFPRndMode::Invalid); +} + +inline static bool isValidRoundingMode(unsigned Mode) { + switch (Mode) { + default: + return false; + case RISCVFPRndMode::RNE: + case RISCVFPRndMode::RTZ: + case RISCVFPRndMode::RDN: + case RISCVFPRndMode::RUP: + case RISCVFPRndMode::RMM: + case RISCVFPRndMode::DYN: + return true; + } +} +} // namespace RISCVFPRndMode + +namespace RISCVSysReg { +struct SysReg { + const char *Name; + unsigned Encoding; + const char *AltName; + // FIXME: add these additional fields when needed. + // Privilege Access: Read, Write, Read-Only. + // unsigned ReadWrite; + // Privilege Mode: User, System or Machine. + // unsigned Mode; + // Check field name. + // unsigned Extra; + // Register number without the privilege bits. + // unsigned Number; + FeatureBitset FeaturesRequired; + bool isRV32Only; + + bool haveRequiredFeatures(FeatureBitset ActiveFeatures) const { + // Not in 32-bit mode. + if (isRV32Only && ActiveFeatures[RISCV::Feature64Bit]) + return false; + // No required feature associated with the system register. + if (FeaturesRequired.none()) + return true; + return (FeaturesRequired & ActiveFeatures) == FeaturesRequired; + } +}; + +#define GET_SysRegsList_DECL +#include "RISCVGenSearchableTables.inc" +} // end namespace RISCVSysReg + +namespace RISCVABI { + +enum ABI { + ABI_ILP32, + ABI_ILP32F, + ABI_ILP32D, + ABI_ILP32E, + ABI_LP64, + ABI_LP64F, + ABI_LP64D, + ABI_Unknown +}; + +// Returns the target ABI, or else a StringError if the requested ABIName is +// not supported for the given TT and FeatureBits combination. +ABI computeTargetABI(const Triple &TT, FeatureBitset FeatureBits, + StringRef ABIName); + +ABI getTargetABI(StringRef ABIName); + +// Returns the register used to hold the stack pointer after realignment. +MCRegister getBPReg(); + +// Returns the register holding shadow call stack pointer. +MCRegister getSCSPReg(); + +} // namespace RISCVABI + +namespace RISCVFeatures { + +// Validates if the given combination of features are valid for the target +// triple. Exits with report_fatal_error if not. +void validate(const Triple &TT, const FeatureBitset &FeatureBits); + +} // namespace RISCVFeatures + +namespace RISCVVMVTs { + +constexpr MVT vint8mf8_t = MVT::nxv1i8; +constexpr MVT vint8mf4_t = MVT::nxv2i8; +constexpr MVT vint8mf2_t = MVT::nxv4i8; +constexpr MVT vint8m1_t = MVT::nxv8i8; +constexpr MVT vint8m2_t = MVT::nxv16i8; +constexpr MVT vint8m4_t = MVT::nxv32i8; +constexpr MVT vint8m8_t = MVT::nxv64i8; + +constexpr MVT vint16mf4_t = MVT::nxv1i16; +constexpr MVT vint16mf2_t = MVT::nxv2i16; +constexpr MVT vint16m1_t = MVT::nxv4i16; +constexpr MVT vint16m2_t = MVT::nxv8i16; +constexpr MVT vint16m4_t = MVT::nxv16i16; +constexpr MVT vint16m8_t = MVT::nxv32i16; + +constexpr MVT vint32mf2_t = MVT::nxv1i32; +constexpr MVT vint32m1_t = MVT::nxv2i32; +constexpr MVT vint32m2_t = MVT::nxv4i32; +constexpr MVT vint32m4_t = MVT::nxv8i32; +constexpr MVT vint32m8_t = MVT::nxv16i32; + +constexpr MVT vint64m1_t = MVT::nxv1i64; +constexpr MVT vint64m2_t = MVT::nxv2i64; +constexpr MVT vint64m4_t = MVT::nxv4i64; +constexpr MVT vint64m8_t = MVT::nxv8i64; + +constexpr MVT vfloat16mf4_t = MVT::nxv1f16; +constexpr MVT vfloat16mf2_t = MVT::nxv2f16; +constexpr MVT vfloat16m1_t = MVT::nxv4f16; +constexpr MVT vfloat16m2_t = MVT::nxv8f16; +constexpr MVT vfloat16m4_t = MVT::nxv16f16; +constexpr MVT vfloat16m8_t = MVT::nxv32f16; + +constexpr MVT vfloat32mf2_t = MVT::nxv1f32; +constexpr MVT vfloat32m1_t = MVT::nxv2f32; +constexpr MVT vfloat32m2_t = MVT::nxv4f32; +constexpr MVT vfloat32m4_t = MVT::nxv8f32; +constexpr MVT vfloat32m8_t = MVT::nxv16f32; + +constexpr MVT vfloat64m1_t = MVT::nxv1f64; +constexpr MVT vfloat64m2_t = MVT::nxv2f64; +constexpr MVT vfloat64m4_t = MVT::nxv4f64; +constexpr MVT vfloat64m8_t = MVT::nxv8f64; + +constexpr MVT vbool1_t = MVT::nxv64i1; +constexpr MVT vbool2_t = MVT::nxv32i1; +constexpr MVT vbool4_t = MVT::nxv16i1; +constexpr MVT vbool8_t = MVT::nxv8i1; +constexpr MVT vbool16_t = MVT::nxv4i1; +constexpr MVT vbool32_t = MVT::nxv2i1; +constexpr MVT vbool64_t = MVT::nxv1i1; + +} // namespace RISCVVMVTs + +enum class RISCVVSEW { + SEW_8 = 0, + SEW_16, + SEW_32, + SEW_64, + SEW_128, + SEW_256, + SEW_512, + SEW_1024, +}; + +enum class RISCVVLMUL { + LMUL_1 = 0, + LMUL_2, + LMUL_4, + LMUL_8, + LMUL_RESERVED, + LMUL_F8, + LMUL_F4, + LMUL_F2 +}; + +namespace RISCVVType { +// Is this a SEW value that can be encoded into the VTYPE format. +inline static bool isValidSEW(unsigned SEW) { + return isPowerOf2_32(SEW) && SEW >= 8 && SEW <= 1024; +} + +// Is this a LMUL value that can be encoded into the VTYPE format. +inline static bool isValidLMUL(unsigned LMUL, bool Fractional) { + return isPowerOf2_32(LMUL) && LMUL <= 8 && (!Fractional || LMUL != 1); +} + +// Encode VTYPE into the binary format used by the the VSETVLI instruction which +// is used by our MC layer representation. +// +// Bits | Name | Description +// -----+------------+------------------------------------------------ +// 7 | vma | Vector mask agnostic +// 6 | vta | Vector tail agnostic +// 5:3 | vsew[2:0] | Standard element width (SEW) setting +// 2:0 | vlmul[2:0] | Vector register group multiplier (LMUL) setting +inline static unsigned encodeVTYPE(RISCVVLMUL VLMUL, RISCVVSEW VSEW, + bool TailAgnostic, bool MaskAgnostic) { + unsigned VLMULBits = static_cast<unsigned>(VLMUL); + unsigned VSEWBits = static_cast<unsigned>(VSEW); + unsigned VTypeI = (VSEWBits << 3) | (VLMULBits & 0x7); + if (TailAgnostic) + VTypeI |= 0x40; + if (MaskAgnostic) + VTypeI |= 0x80; + + return VTypeI; +} + +inline static RISCVVLMUL getVLMUL(unsigned VType) { + unsigned VLMUL = VType & 0x7; + return static_cast<RISCVVLMUL>(VLMUL); +} + +inline static RISCVVSEW getVSEW(unsigned VType) { + unsigned VSEW = (VType >> 3) & 0x7; + return static_cast<RISCVVSEW>(VSEW); +} + +inline static bool isTailAgnostic(unsigned VType) { return VType & 0x40; } + +inline static bool isMaskAgnostic(unsigned VType) { return VType & 0x80; } + +void printVType(unsigned VType, raw_ostream &OS); + +} // namespace RISCVVType + +namespace RISCVVPseudosTable { + +struct PseudoInfo { +#include "MCTargetDesc/RISCVBaseInfo.h" + uint16_t Pseudo; + uint16_t BaseInstr; +}; + +using namespace RISCV; + +#define GET_RISCVVPseudosTable_DECL +#include "RISCVGenSearchableTables.inc" + +} // end namespace RISCVVPseudosTable + +} // namespace llvm + +#endif diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp index 079dc919928a..7df454be8729 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp @@ -11,9 +11,9 @@ //===----------------------------------------------------------------------===// #include "RISCVELFStreamer.h" -#include "MCTargetDesc/RISCVAsmBackend.h" +#include "RISCVAsmBackend.h" +#include "RISCVBaseInfo.h" #include "RISCVMCTargetDesc.h" -#include "Utils/RISCVBaseInfo.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp index eae3e13dbe40..5f8d6e137518 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp @@ -11,8 +11,8 @@ //===----------------------------------------------------------------------===// #include "RISCVInstPrinter.h" -#include "MCTargetDesc/RISCVMCExpr.h" -#include "Utils/RISCVBaseInfo.h" +#include "RISCVBaseInfo.h" +#include "RISCVMCExpr.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" @@ -102,6 +102,24 @@ void RISCVInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, MO.getExpr()->print(O, &MAI); } +void RISCVInstPrinter::printBranchOperand(const MCInst *MI, uint64_t Address, + unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNo); + if (!MO.isImm()) + return printOperand(MI, OpNo, STI, O); + + if (PrintBranchImmAsAddress) { + uint64_t Target = Address + MO.getImm(); + if (!STI.hasFeature(RISCV::Feature64Bit)) + Target &= 0xffffffff; + O << formatHex(Target); + } else { + O << MO.getImm(); + } +} + void RISCVInstPrinter::printCSRSystemRegister(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { @@ -147,18 +165,12 @@ void RISCVInstPrinter::printAtomicMemOp(const MCInst *MI, unsigned OpNo, O << "("; printRegName(O, MO.getReg()); O << ")"; - return; } void RISCVInstPrinter::printVTypeI(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Imm = MI->getOperand(OpNo).getImm(); - unsigned Sew = (Imm >> 2) & 0x7; - unsigned Lmul = Imm & 0x3; - - Lmul = 0x1 << Lmul; - Sew = 0x1 << (Sew + 3); - O << "e" << Sew << ",m" << Lmul; + RISCVVType::printVType(Imm, O); } void RISCVInstPrinter::printVMaskReg(const MCInst *MI, unsigned OpNo, @@ -174,15 +186,6 @@ void RISCVInstPrinter::printVMaskReg(const MCInst *MI, unsigned OpNo, O << ".t"; } -void RISCVInstPrinter::printSImm5Plus1(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, - raw_ostream &O) { - const MCOperand &MO = MI->getOperand(OpNo); - - assert(MO.isImm() && "printSImm5Plus1 can only print constant operands"); - O << MO.getImm() + 1; -} - const char *RISCVInstPrinter::getRegisterName(unsigned RegNo) { return getRegisterName(RegNo, ArchRegNames ? RISCV::NoRegAltName : RISCV::ABIRegAltName); diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h index fdaa00c5f8eb..d078ead2c8ad 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h @@ -32,6 +32,8 @@ public: void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O, const char *Modifier = nullptr); + void printBranchOperand(const MCInst *MI, uint64_t Address, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); void printCSRSystemRegister(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printFenceArg(const MCInst *MI, unsigned OpNo, @@ -44,10 +46,9 @@ public: raw_ostream &O); void printVMaskReg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); - void printSImm5Plus1(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O); // Autogenerated by tblgen. + std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override; void printInstruction(const MCInst *MI, uint64_t Address, const MCSubtargetInfo &STI, raw_ostream &O); bool printAliasInstr(const MCInst *MI, uint64_t Address, diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp index 816206c477df..b299541939ec 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp @@ -10,12 +10,11 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/RISCVBaseInfo.h" #include "MCTargetDesc/RISCVFixupKinds.h" #include "MCTargetDesc/RISCVMCExpr.h" #include "MCTargetDesc/RISCVMCTargetDesc.h" -#include "Utils/RISCVBaseInfo.h" #include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/Register.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" @@ -84,6 +83,12 @@ public: unsigned getVMaskReg(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const; + +private: + FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const; + void + verifyInstructionPredicates(const MCInst &MI, + const FeatureBitset &AvailableFeatures) const; }; } // end anonymous namespace @@ -106,7 +111,7 @@ void RISCVMCCodeEmitter::expandFunctionCall(const MCInst &MI, raw_ostream &OS, const MCSubtargetInfo &STI) const { MCInst TmpInst; MCOperand Func; - Register Ra; + MCRegister Ra; if (MI.getOpcode() == RISCV::PseudoTAIL) { Func = MI.getOperand(0); Ra = RISCV::X6; @@ -185,6 +190,9 @@ void RISCVMCCodeEmitter::expandAddTPRel(const MCInst &MI, raw_ostream &OS, void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { + verifyInstructionPredicates(MI, + computeAvailableFeatures(STI.getFeatureBits())); + const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); // Get byte count of instruction. unsigned Size = Desc.getSize(); @@ -397,4 +405,5 @@ unsigned RISCVMCCodeEmitter::getVMaskReg(const MCInst &MI, unsigned OpNo, } } +#define ENABLE_INSTR_PREDICATE_VERIFIER #include "RISCVGenMCCodeEmitter.inc" diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp index 2a6f372e50be..8ce2184c7a41 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp @@ -13,7 +13,6 @@ #include "RISCVMCExpr.h" #include "MCTargetDesc/RISCVAsmBackend.h" -#include "RISCV.h" #include "RISCVFixupKinds.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmLayout.h" diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp index a474224e1a4e..093118518db6 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp @@ -11,14 +11,13 @@ //===----------------------------------------------------------------------===// #include "RISCVMCTargetDesc.h" +#include "RISCVBaseInfo.h" #include "RISCVELFStreamer.h" #include "RISCVInstPrinter.h" #include "RISCVMCAsmInfo.h" #include "RISCVTargetStreamer.h" #include "TargetInfo/RISCVTargetInfo.h" -#include "Utils/RISCVBaseInfo.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/CodeGen/Register.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" @@ -56,7 +55,7 @@ static MCAsmInfo *createRISCVMCAsmInfo(const MCRegisterInfo &MRI, const MCTargetOptions &Options) { MCAsmInfo *MAI = new RISCVMCAsmInfo(TT); - Register SP = MRI.getDwarfRegNum(RISCV::X2, true); + MCRegister SP = MRI.getDwarfRegNum(RISCV::X2, true); MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa(nullptr, SP, 0); MAI->addInitialFrameState(Inst); @@ -68,7 +67,7 @@ static MCSubtargetInfo *createRISCVMCSubtargetInfo(const Triple &TT, std::string CPUName = std::string(CPU); if (CPUName.empty()) CPUName = TT.isArch64Bit() ? "generic-rv64" : "generic-rv32"; - return createRISCVMCSubtargetInfoImpl(TT, CPUName, FS); + return createRISCVMCSubtargetInfoImpl(TT, CPUName, /*TuneCPU*/ CPUName, FS); } static MCInstPrinter *createRISCVMCInstPrinter(const Triple &T, diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp index f390ddb89e3c..1f3dead61011 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp @@ -8,10 +8,8 @@ #include "RISCVMatInt.h" #include "MCTargetDesc/RISCVMCTargetDesc.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/MachineValueType.h" +#include "llvm/ADT/APInt.h" #include "llvm/Support/MathExtras.h" -#include <cstdint> namespace llvm { diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h index b12ae2eade99..17ca57458b49 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h @@ -9,12 +9,11 @@ #ifndef LLVM_LIB_TARGET_RISCV_MATINT_H #define LLVM_LIB_TARGET_RISCV_MATINT_H -#include "llvm/ADT/APInt.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Support/MachineValueType.h" #include <cstdint> namespace llvm { +class APInt; namespace RISCVMatInt { struct Inst { diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp index 54a2fb288579..13c4b84aa300 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp @@ -11,7 +11,7 @@ //===----------------------------------------------------------------------===// #include "RISCVTargetStreamer.h" -#include "RISCVSubtarget.h" +#include "RISCVMCTargetDesc.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/RISCVAttributes.h" @@ -60,6 +60,38 @@ void RISCVTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) { Arch += "_d2p0"; if (STI.hasFeature(RISCV::FeatureStdExtC)) Arch += "_c2p0"; + if (STI.hasFeature(RISCV::FeatureStdExtB)) + Arch += "_b0p93"; + if (STI.hasFeature(RISCV::FeatureStdExtV)) + Arch += "_v0p10"; + if (STI.hasFeature(RISCV::FeatureExtZfh)) + Arch += "_zfh0p1"; + if (STI.hasFeature(RISCV::FeatureExtZba)) + Arch += "_zba0p93"; + if (STI.hasFeature(RISCV::FeatureExtZbb)) + Arch += "_zbb0p93"; + if (STI.hasFeature(RISCV::FeatureExtZbc)) + Arch += "_zbc0p93"; + if (STI.hasFeature(RISCV::FeatureExtZbe)) + Arch += "_zbe0p93"; + if (STI.hasFeature(RISCV::FeatureExtZbf)) + Arch += "_zbf0p93"; + if (STI.hasFeature(RISCV::FeatureExtZbm)) + Arch += "_zbm0p93"; + if (STI.hasFeature(RISCV::FeatureExtZbp)) + Arch += "_zbp0p93"; + if (STI.hasFeature(RISCV::FeatureExtZbproposedc)) + Arch += "_zbproposedc0p93"; + if (STI.hasFeature(RISCV::FeatureExtZbr)) + Arch += "_zbr0p93"; + if (STI.hasFeature(RISCV::FeatureExtZbs)) + Arch += "_zbs0p93"; + if (STI.hasFeature(RISCV::FeatureExtZbt)) + Arch += "_zbt0p93"; + if (STI.hasFeature(RISCV::FeatureExtZvamo)) + Arch += "_zvamo0p10"; + if (STI.hasFeature(RISCV::FeatureStdExtZvlsseg)) + Arch += "_zvlsseg0p10"; emitTextAttribute(RISCVAttrs::ARCH, Arch); } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.h index 9baa2cc2741a..2538d9992de7 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.h @@ -14,7 +14,7 @@ #ifndef LLVM_LIB_TARGET_RISCV_RISCV_H #define LLVM_LIB_TARGET_RISCV_RISCV_H -#include "Utils/RISCVBaseInfo.h" +#include "MCTargetDesc/RISCVBaseInfo.h" #include "llvm/Target/TargetMachine.h" namespace llvm { @@ -46,6 +46,9 @@ void initializeRISCVExpandPseudoPass(PassRegistry &); FunctionPass *createRISCVExpandAtomicPseudoPass(); void initializeRISCVExpandAtomicPseudoPass(PassRegistry &); +FunctionPass *createRISCVCleanupVSETVLIPass(); +void initializeRISCVCleanupVSETVLIPass(PassRegistry &); + InstructionSelector *createRISCVInstructionSelector(const RISCVTargetMachine &, RISCVSubtarget &, RISCVRegisterBankInfo &); diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.td index 57e7c41c4271..83811dadc9ab 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.td @@ -41,6 +41,14 @@ def HasStdExtD : Predicate<"Subtarget->hasStdExtD()">, AssemblerPredicate<(all_of FeatureStdExtD), "'D' (Double-Precision Floating-Point)">; +def FeatureExtZfh + : SubtargetFeature<"experimental-zfh", "HasStdExtZfh", "true", + "'Zfh' (Half-Precision Floating-Point)", + [FeatureStdExtF]>; +def HasStdExtZfh : Predicate<"Subtarget->hasStdExtZfh()">, + AssemblerPredicate<(all_of FeatureExtZfh), + "'Zfh' (Half-Precision Floating-Point)">; + def FeatureStdExtC : SubtargetFeature<"c", "HasStdExtC", "true", "'C' (Compressed Instructions)">; @@ -48,6 +56,14 @@ def HasStdExtC : Predicate<"Subtarget->hasStdExtC()">, AssemblerPredicate<(all_of FeatureStdExtC), "'C' (Compressed Instructions)">; +def FeatureExtZba + : SubtargetFeature<"experimental-zba", "HasStdExtZba", "true", + "'Zba' (Address calculation 'B' Instructions)">; +def HasStdExtZba : Predicate<"Subtarget->hasStdExtZba()">, + AssemblerPredicate<(all_of FeatureExtZba), + "'Zba' (Address calculation 'B' Instructions)">; +def NotHasStdExtZba : Predicate<"!Subtarget->hasStdExtZba()">; + def FeatureExtZbb : SubtargetFeature<"experimental-zbb", "HasStdExtZbb", "true", "'Zbb' (Base 'B' Instructions)">; @@ -115,7 +131,9 @@ def HasStdExtZbt : Predicate<"Subtarget->hasStdExtZbt()">, // subextensions. They should be enabled if either has been specified. def HasStdExtZbbOrZbp : Predicate<"Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp()">, - AssemblerPredicate<(any_of FeatureExtZbb, FeatureExtZbp)>; + AssemblerPredicate<(any_of FeatureExtZbb, FeatureExtZbp), + "'Zbb' (Base 'B' Instructions) or " + "'Zbp' (Permutation 'B' Instructions)">; def FeatureExtZbproposedc : SubtargetFeature<"experimental-zbproposedc", "HasStdExtZbproposedc", "true", @@ -127,7 +145,8 @@ def HasStdExtZbproposedc : Predicate<"Subtarget->hasStdExtZbproposedc()">, def FeatureStdExtB : SubtargetFeature<"experimental-b", "HasStdExtB", "true", "'B' (Bit Manipulation Instructions)", - [FeatureExtZbb, + [FeatureExtZba, + FeatureExtZbb, FeatureExtZbc, FeatureExtZbe, FeatureExtZbf, @@ -145,16 +164,30 @@ def FeatureNoRVCHints "Disable RVC Hint Instructions.">; def HasRVCHints : Predicate<"Subtarget->enableRVCHintInstrs()">, AssemblerPredicate<(all_of(not FeatureNoRVCHints)), - "RVC Hint Instructions">; + "RVC Hint Instructions">; def FeatureStdExtV : SubtargetFeature<"experimental-v", "HasStdExtV", "true", - "'V' (Vector Instructions)", - [FeatureStdExtF]>; + "'V' (Vector Instructions)">; def HasStdExtV : Predicate<"Subtarget->hasStdExtV()">, AssemblerPredicate<(all_of FeatureStdExtV), "'V' (Vector Instructions)">; +def FeatureStdExtZvlsseg + : SubtargetFeature<"experimental-zvlsseg", "HasStdExtZvlsseg", "true", + "'Zvlsseg' (Vector segment load/store instructions)", + [FeatureStdExtV]>; +def HasStdExtZvlsseg : Predicate<"Subtarget->hasStdExtZvlsseg()">, + AssemblerPredicate<(all_of FeatureStdExtZvlsseg), + "'Zvlsseg' (Vector segment load/store instructions)">; +def FeatureExtZvamo + : SubtargetFeature<"experimental-zvamo", "HasStdExtZvamo", "true", + "'Zvamo'(Vector AMO Operations)", + [FeatureStdExtV]>; +def HasStdExtZvamo : Predicate<"Subtarget->hasStdExtZvamo()">, + AssemblerPredicate<(all_of FeatureExtZvamo), + "'Zvamo'(Vector AMO Operations)">; + def Feature64Bit : SubtargetFeature<"64bit", "HasRV64", "true", "Implements RV64">; def IsRV64 : Predicate<"Subtarget->is64Bit()">, @@ -164,8 +197,8 @@ def IsRV32 : Predicate<"!Subtarget->is64Bit()">, AssemblerPredicate<(all_of (not Feature64Bit)), "RV32I Base Instruction Set">; +defvar RV32 = DefaultMode; def RV64 : HwMode<"+64bit">; -def RV32 : HwMode<"-64bit">; def FeatureRV32E : SubtargetFeature<"e", "IsRV32E", "true", @@ -200,31 +233,44 @@ include "RISCVRegisterInfo.td" include "RISCVCallingConv.td" include "RISCVInstrInfo.td" include "RISCVRegisterBanks.td" -include "RISCVSchedRocket32.td" -include "RISCVSchedRocket64.td" +include "RISCVSchedRocket.td" +include "RISCVSchedSiFive7.td" //===----------------------------------------------------------------------===// // RISC-V processors supported. //===----------------------------------------------------------------------===// def : ProcessorModel<"generic-rv32", NoSchedModel, []>; - def : ProcessorModel<"generic-rv64", NoSchedModel, [Feature64Bit]>; -def : ProcessorModel<"rocket-rv32", Rocket32Model, []>; - -def : ProcessorModel<"rocket-rv64", Rocket64Model, [Feature64Bit]>; - -def : ProcessorModel<"sifive-e31", Rocket32Model, [FeatureStdExtM, - FeatureStdExtA, - FeatureStdExtC]>; - -def : ProcessorModel<"sifive-u54", Rocket64Model, [Feature64Bit, - FeatureStdExtM, - FeatureStdExtA, - FeatureStdExtF, - FeatureStdExtD, - FeatureStdExtC]>; +def : ProcessorModel<"rocket-rv32", RocketModel, []>; +def : ProcessorModel<"rocket-rv64", RocketModel, [Feature64Bit]>; + +def : ProcessorModel<"sifive-7-rv32", SiFive7Model, []>; +def : ProcessorModel<"sifive-7-rv64", SiFive7Model, [Feature64Bit]>; + +def : ProcessorModel<"sifive-e31", RocketModel, [FeatureStdExtM, + FeatureStdExtA, + FeatureStdExtC]>; + +def : ProcessorModel<"sifive-u54", RocketModel, [Feature64Bit, + FeatureStdExtM, + FeatureStdExtA, + FeatureStdExtF, + FeatureStdExtD, + FeatureStdExtC]>; + +def : ProcessorModel<"sifive-e76", SiFive7Model, [FeatureStdExtM, + FeatureStdExtA, + FeatureStdExtF, + FeatureStdExtC]>; + +def : ProcessorModel<"sifive-u74", SiFive7Model, [Feature64Bit, + FeatureStdExtM, + FeatureStdExtA, + FeatureStdExtF, + FeatureStdExtD, + FeatureStdExtC]>; //===----------------------------------------------------------------------===// // Define the RISC-V target. diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp index 8955994b1c2e..0a915cbcc1af 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp @@ -198,7 +198,9 @@ void RISCVAsmPrinter::emitAttributes() { StringRef CPU = TM.getTargetCPU(); StringRef FS = TM.getTargetFeatureString(); const RISCVTargetMachine &RTM = static_cast<const RISCVTargetMachine &>(TM); - const RISCVSubtarget STI(TT, CPU, FS, /*ABIName=*/"", RTM); + /* TuneCPU doesn't impact emission of ELF attributes, ELF attributes only + care about arch related features, so we can set TuneCPU as CPU. */ + const RISCVSubtarget STI(TT, CPU, /*TuneCPU=*/CPU, FS, /*ABIName=*/"", RTM); RTS.emitTargetAttributes(STI); } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCallLowering.cpp index c63a84739c4a..d265f3a12b7f 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCallLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCallLowering.cpp @@ -22,8 +22,8 @@ RISCVCallLowering::RISCVCallLowering(const RISCVTargetLowering &TLI) : CallLowering(&TLI) {} bool RISCVCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, - const Value *Val, - ArrayRef<Register> VRegs) const { + const Value *Val, ArrayRef<Register> VRegs, + FunctionLoweringInfo &FLI) const { MachineInstrBuilder Ret = MIRBuilder.buildInstrNoInsert(RISCV::PseudoRET); @@ -34,9 +34,10 @@ bool RISCVCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, return true; } -bool RISCVCallLowering::lowerFormalArguments( - MachineIRBuilder &MIRBuilder, const Function &F, - ArrayRef<ArrayRef<Register>> VRegs) const { +bool RISCVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, + const Function &F, + ArrayRef<ArrayRef<Register>> VRegs, + FunctionLoweringInfo &FLI) const { if (F.arg_empty()) return true; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCallLowering.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCallLowering.h index 7ce074a61f0a..cd7fc4c76123 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCallLowering.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCallLowering.h @@ -28,10 +28,12 @@ public: RISCVCallLowering(const RISCVTargetLowering &TLI); bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val, - ArrayRef<Register> VRegs) const override; + ArrayRef<Register> VRegs, + FunctionLoweringInfo &FLI) const override; bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, - ArrayRef<ArrayRef<Register>> VRegs) const override; + ArrayRef<ArrayRef<Register>> VRegs, + FunctionLoweringInfo &FLI) const override; bool lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const override; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp new file mode 100644 index 000000000000..ae32cbd1ae59 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp @@ -0,0 +1,154 @@ +//===- RISCVCleanupVSETVLI.cpp - Cleanup unneeded VSETVLI instructions ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function pass that removes duplicate vsetvli +// instructions within a basic block. +// +//===----------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVSubtarget.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +using namespace llvm; + +#define DEBUG_TYPE "riscv-cleanup-vsetvli" +#define RISCV_CLEANUP_VSETVLI_NAME "RISCV Cleanup VSETVLI pass" + +namespace { + +class RISCVCleanupVSETVLI : public MachineFunctionPass { +public: + static char ID; + + RISCVCleanupVSETVLI() : MachineFunctionPass(ID) { + initializeRISCVCleanupVSETVLIPass(*PassRegistry::getPassRegistry()); + } + bool runOnMachineFunction(MachineFunction &MF) override; + bool runOnMachineBasicBlock(MachineBasicBlock &MBB); + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::IsSSA); + } + + // This pass modifies the program, but does not modify the CFG + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { return RISCV_CLEANUP_VSETVLI_NAME; } +}; + +} // end anonymous namespace + +char RISCVCleanupVSETVLI::ID = 0; + +INITIALIZE_PASS(RISCVCleanupVSETVLI, DEBUG_TYPE, + RISCV_CLEANUP_VSETVLI_NAME, false, false) + +bool RISCVCleanupVSETVLI::runOnMachineBasicBlock(MachineBasicBlock &MBB) { + bool Changed = false; + MachineInstr *PrevVSETVLI = nullptr; + + for (auto MII = MBB.begin(), MIE = MBB.end(); MII != MIE;) { + MachineInstr &MI = *MII++; + + if (MI.getOpcode() != RISCV::PseudoVSETVLI && + MI.getOpcode() != RISCV::PseudoVSETIVLI) { + if (PrevVSETVLI && + (MI.isCall() || MI.modifiesRegister(RISCV::VL) || + MI.modifiesRegister(RISCV::VTYPE))) { + // Old VL/VTYPE is overwritten. + PrevVSETVLI = nullptr; + } + continue; + } + + // If we don't have a previous VSET{I}VLI or the VL output isn't dead, we + // can't remove this VSETVLI. + if (!PrevVSETVLI || !MI.getOperand(0).isDead()) { + PrevVSETVLI = &MI; + continue; + } + + // If a previous "set vl" instruction opcode is different from this one, we + // can't differentiate the AVL values. + if (PrevVSETVLI->getOpcode() != MI.getOpcode()) { + PrevVSETVLI = &MI; + continue; + } + + // The remaining two cases are + // 1. PrevVSETVLI = PseudoVSETVLI + // MI = PseudoVSETVLI + // + // 2. PrevVSETVLI = PseudoVSETIVLI + // MI = PseudoVSETIVLI + Register AVLReg; + bool SameAVL = false; + if (MI.getOpcode() == RISCV::PseudoVSETVLI) { + AVLReg = MI.getOperand(1).getReg(); + SameAVL = PrevVSETVLI->getOperand(1).getReg() == AVLReg; + } else { // RISCV::PseudoVSETIVLI + SameAVL = + PrevVSETVLI->getOperand(1).getImm() == MI.getOperand(1).getImm(); + } + int64_t PrevVTYPEImm = PrevVSETVLI->getOperand(2).getImm(); + int64_t VTYPEImm = MI.getOperand(2).getImm(); + + // Does this VSET{I}VLI use the same AVL register/value and VTYPE immediate? + if (!SameAVL || PrevVTYPEImm != VTYPEImm) { + PrevVSETVLI = &MI; + continue; + } + + // If the AVLReg is X0 we need to look at the output VL of both VSETVLIs. + if ((MI.getOpcode() == RISCV::PseudoVSETVLI) && (AVLReg == RISCV::X0)) { + assert((PrevVSETVLI->getOpcode() == RISCV::PseudoVSETVLI) && + "Unexpected vsetvli opcode."); + Register PrevOutVL = PrevVSETVLI->getOperand(0).getReg(); + Register OutVL = MI.getOperand(0).getReg(); + // We can't remove if the previous VSETVLI left VL unchanged and the + // current instruction is setting it to VLMAX. Without knowing the VL + // before the previous instruction we don't know if this is a change. + if (PrevOutVL == RISCV::X0 && OutVL != RISCV::X0) { + PrevVSETVLI = &MI; + continue; + } + } + + // This VSETVLI is redundant, remove it. + MI.eraseFromParent(); + Changed = true; + } + + return Changed; +} + +bool RISCVCleanupVSETVLI::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) + return false; + + // Skip if the vector extension is not enabled. + const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>(); + if (!ST.hasStdExtV()) + return false; + + bool Changed = false; + + for (MachineBasicBlock &MBB : MF) + Changed |= runOnMachineBasicBlock(MBB); + + return Changed; +} + +/// Returns an instance of the Cleanup VSETVLI pass. +FunctionPass *llvm::createRISCVCleanupVSETVLIPass() { + return new RISCVCleanupVSETVLI(); +} diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp index 504355fb8bf8..ec9a39569952 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -59,6 +59,9 @@ private: bool expandLoadTLSGDAddress(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI); + bool expandVSetVL(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); + bool expandVMSET_VMCLR(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, unsigned Opcode); }; char RISCVExpandPseudo::ID = 0; @@ -99,6 +102,27 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB, return expandLoadTLSIEAddress(MBB, MBBI, NextMBBI); case RISCV::PseudoLA_TLS_GD: return expandLoadTLSGDAddress(MBB, MBBI, NextMBBI); + case RISCV::PseudoVSETVLI: + case RISCV::PseudoVSETIVLI: + return expandVSetVL(MBB, MBBI); + case RISCV::PseudoVMCLR_M_B1: + case RISCV::PseudoVMCLR_M_B2: + case RISCV::PseudoVMCLR_M_B4: + case RISCV::PseudoVMCLR_M_B8: + case RISCV::PseudoVMCLR_M_B16: + case RISCV::PseudoVMCLR_M_B32: + case RISCV::PseudoVMCLR_M_B64: + // vmclr.m vd => vmxor.mm vd, vd, vd + return expandVMSET_VMCLR(MBB, MBBI, RISCV::VMXOR_MM); + case RISCV::PseudoVMSET_M_B1: + case RISCV::PseudoVMSET_M_B2: + case RISCV::PseudoVMSET_M_B4: + case RISCV::PseudoVMSET_M_B8: + case RISCV::PseudoVMSET_M_B16: + case RISCV::PseudoVMSET_M_B32: + case RISCV::PseudoVMSET_M_B64: + // vmset.m vd => vmxnor.mm vd, vd, vd + return expandVMSET_VMCLR(MBB, MBBI, RISCV::VMXNOR_MM); } return false; @@ -188,6 +212,47 @@ bool RISCVExpandPseudo::expandLoadTLSGDAddress( RISCV::ADDI); } +bool RISCVExpandPseudo::expandVSetVL(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + assert(MBBI->getNumOperands() == 5 && "Unexpected instruction format"); + + DebugLoc DL = MBBI->getDebugLoc(); + + assert((MBBI->getOpcode() == RISCV::PseudoVSETVLI || + MBBI->getOpcode() == RISCV::PseudoVSETIVLI) && + "Unexpected pseudo instruction"); + unsigned Opcode; + if (MBBI->getOpcode() == RISCV::PseudoVSETVLI) + Opcode = RISCV::VSETVLI; + else + Opcode = RISCV::VSETIVLI; + const MCInstrDesc &Desc = TII->get(Opcode); + assert(Desc.getNumOperands() == 3 && "Unexpected instruction format"); + + Register DstReg = MBBI->getOperand(0).getReg(); + bool DstIsDead = MBBI->getOperand(0).isDead(); + BuildMI(MBB, MBBI, DL, Desc) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .add(MBBI->getOperand(1)) // VL + .add(MBBI->getOperand(2)); // VType + + MBBI->eraseFromParent(); // The pseudo instruction is gone now. + return true; +} + +bool RISCVExpandPseudo::expandVMSET_VMCLR(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned Opcode) { + DebugLoc DL = MBBI->getDebugLoc(); + Register DstReg = MBBI->getOperand(0).getReg(); + const MCInstrDesc &Desc = TII->get(Opcode); + BuildMI(MBB, MBBI, DL, Desc, DstReg) + .addReg(DstReg, RegState::Undef) + .addReg(DstReg, RegState::Undef); + MBBI->eraseFromParent(); // The pseudo instruction is gone now. + return true; +} + } // end of anonymous namespace INITIALIZE_PASS(RISCVExpandPseudo, "riscv-expand-pseudo", diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 43adc7426c79..564d97f47d9e 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -23,6 +23,105 @@ using namespace llvm; +// For now we use x18, a.k.a s2, as pointer to shadow call stack. +// User should explicitly set -ffixed-x18 and not use x18 in their asm. +static void emitSCSPrologue(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const DebugLoc &DL) { + if (!MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack)) + return; + + const auto &STI = MF.getSubtarget<RISCVSubtarget>(); + Register RAReg = STI.getRegisterInfo()->getRARegister(); + + // Do not save RA to the SCS if it's not saved to the regular stack, + // i.e. RA is not at risk of being overwritten. + std::vector<CalleeSavedInfo> &CSI = MF.getFrameInfo().getCalleeSavedInfo(); + if (std::none_of(CSI.begin(), CSI.end(), + [&](CalleeSavedInfo &CSR) { return CSR.getReg() == RAReg; })) + return; + + Register SCSPReg = RISCVABI::getSCSPReg(); + + auto &Ctx = MF.getFunction().getContext(); + if (!STI.isRegisterReservedByUser(SCSPReg)) { + Ctx.diagnose(DiagnosticInfoUnsupported{ + MF.getFunction(), "x18 not reserved by user for Shadow Call Stack."}); + return; + } + + const auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); + if (RVFI->useSaveRestoreLibCalls(MF)) { + Ctx.diagnose(DiagnosticInfoUnsupported{ + MF.getFunction(), + "Shadow Call Stack cannot be combined with Save/Restore LibCalls."}); + return; + } + + const RISCVInstrInfo *TII = STI.getInstrInfo(); + bool IsRV64 = STI.hasFeature(RISCV::Feature64Bit); + int64_t SlotSize = STI.getXLen() / 8; + // Store return address to shadow call stack + // s[w|d] ra, 0(s2) + // addi s2, s2, [4|8] + BuildMI(MBB, MI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW)) + .addReg(RAReg) + .addReg(SCSPReg) + .addImm(0); + BuildMI(MBB, MI, DL, TII->get(RISCV::ADDI)) + .addReg(SCSPReg, RegState::Define) + .addReg(SCSPReg) + .addImm(SlotSize); +} + +static void emitSCSEpilogue(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const DebugLoc &DL) { + if (!MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack)) + return; + + const auto &STI = MF.getSubtarget<RISCVSubtarget>(); + Register RAReg = STI.getRegisterInfo()->getRARegister(); + + // See emitSCSPrologue() above. + std::vector<CalleeSavedInfo> &CSI = MF.getFrameInfo().getCalleeSavedInfo(); + if (std::none_of(CSI.begin(), CSI.end(), + [&](CalleeSavedInfo &CSR) { return CSR.getReg() == RAReg; })) + return; + + Register SCSPReg = RISCVABI::getSCSPReg(); + + auto &Ctx = MF.getFunction().getContext(); + if (!STI.isRegisterReservedByUser(SCSPReg)) { + Ctx.diagnose(DiagnosticInfoUnsupported{ + MF.getFunction(), "x18 not reserved by user for Shadow Call Stack."}); + return; + } + + const auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); + if (RVFI->useSaveRestoreLibCalls(MF)) { + Ctx.diagnose(DiagnosticInfoUnsupported{ + MF.getFunction(), + "Shadow Call Stack cannot be combined with Save/Restore LibCalls."}); + return; + } + + const RISCVInstrInfo *TII = STI.getInstrInfo(); + bool IsRV64 = STI.hasFeature(RISCV::Feature64Bit); + int64_t SlotSize = STI.getXLen() / 8; + // Load return address from shadow call stack + // l[w|d] ra, -[4|8](s2) + // addi s2, s2, -[4|8] + BuildMI(MBB, MI, DL, TII->get(IsRV64 ? RISCV::LD : RISCV::LW)) + .addReg(RAReg, RegState::Define) + .addReg(SCSPReg) + .addImm(-SlotSize); + BuildMI(MBB, MI, DL, TII->get(RISCV::ADDI)) + .addReg(SCSPReg, RegState::Define) + .addReg(SCSPReg) + .addImm(-SlotSize); +} + // Get the ID of the libcall used for spilling and restoring callee saved // registers. The ID is representative of the number of registers saved or // restored by the libcall, except it is zero-indexed - ID 0 corresponds to a @@ -39,7 +138,7 @@ static int getLibCallID(const MachineFunction &MF, // RISCVRegisterInfo::hasReservedSpillSlot assigns negative frame indexes to // registers which can be saved by libcall. if (CS.getFrameIdx() < 0) - MaxReg = std::max(MaxReg.id(), CS.getReg()); + MaxReg = std::max(MaxReg.id(), CS.getReg().id()); if (MaxReg == RISCV::NoRegister) return -1; @@ -136,18 +235,12 @@ bool RISCVFrameLowering::hasBP(const MachineFunction &MF) const { // Determines the size of the frame and maximum call frame size. void RISCVFrameLowering::determineFrameLayout(MachineFunction &MF) const { MachineFrameInfo &MFI = MF.getFrameInfo(); - const RISCVRegisterInfo *RI = STI.getRegisterInfo(); // Get the number of bytes to allocate from the FrameInfo. uint64_t FrameSize = MFI.getStackSize(); // Get the alignment. Align StackAlign = getStackAlign(); - if (RI->needsStackRealignment(MF)) { - Align MaxStackAlign = std::max(StackAlign, MFI.getMaxAlign()); - FrameSize += (MaxStackAlign.value() - StackAlign.value()); - StackAlign = MaxStackAlign; - } // Set Max Call Frame Size uint64_t MaxCallSize = alignTo(MFI.getMaxCallFrameSize(), StackAlign); @@ -222,15 +315,23 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, Register SPReg = getSPReg(STI); Register BPReg = RISCVABI::getBPReg(); + // Debug location must be unknown since the first debug location is used + // to determine the end of the prologue. + DebugLoc DL; + + // All calls are tail calls in GHC calling conv, and functions have no + // prologue/epilogue. + if (MF.getFunction().getCallingConv() == CallingConv::GHC) + return; + + // Emit prologue for shadow call stack. + emitSCSPrologue(MF, MBB, MBBI, DL); + // Since spillCalleeSavedRegisters may have inserted a libcall, skip past // any instructions marked as FrameSetup while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) ++MBBI; - // Debug location must be unknown since the first debug location is used - // to determine the end of the prologue. - DebugLoc DL; - // Determine the correct frame layout determineFrameLayout(MF); @@ -398,6 +499,11 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, Register FPReg = getFPReg(STI); Register SPReg = getSPReg(STI); + // All calls are tail calls in GHC calling conv, and functions have no + // prologue/epilogue. + if (MF.getFunction().getCallingConv() == CallingConv::GHC) + return; + // Get the insert location for the epilogue. If there were no terminators in // the block, get the last instruction. MachineBasicBlock::iterator MBBI = MBB.end(); @@ -457,11 +563,14 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, // Deallocate stack adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackSize, MachineInstr::FrameDestroy); + + // Emit epilogue for shadow call stack. + emitSCSEpilogue(MF, MBB, MBBI, DL); } -int RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, - int FI, - Register &FrameReg) const { +StackOffset +RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, + Register &FrameReg) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); const auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); @@ -513,7 +622,7 @@ int RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, Offset += RVFI->getLibCallStackSize(); } } - return Offset; + return StackOffset::getFixed(Offset); } void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF, @@ -547,14 +656,14 @@ void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF, for (unsigned i = 0; CSRegs[i]; ++i) SavedRegs.set(CSRegs[i]); - if (MF.getSubtarget<RISCVSubtarget>().hasStdExtD() || - MF.getSubtarget<RISCVSubtarget>().hasStdExtF()) { + if (MF.getSubtarget<RISCVSubtarget>().hasStdExtF()) { // If interrupt is enabled, this list contains all FP registers. const MCPhysReg * Regs = MF.getRegInfo().getCalleeSavedRegs(); for (unsigned i = 0; Regs[i]; ++i) - if (RISCV::FPR32RegClass.contains(Regs[i]) || + if (RISCV::FPR16RegClass.contains(Regs[i]) || + RISCV::FPR32RegClass.contains(Regs[i]) || RISCV::FPR64RegClass.contains(Regs[i])) SavedRegs.set(Regs[i]); } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h index 1517c847a04c..889b9ce2e1a9 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h @@ -14,6 +14,7 @@ #define LLVM_LIB_TARGET_RISCV_RISCVFRAMELOWERING_H #include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/Support/TypeSize.h" namespace llvm { class RISCVSubtarget; @@ -29,8 +30,8 @@ public: void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; - int getFrameIndexReference(const MachineFunction &MF, int FI, - Register &FrameReg) const override; + StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, + Register &FrameReg) const override; void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 7570385e38e3..43bf16c53a62 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -12,8 +12,9 @@ #include "RISCVISelDAGToDAG.h" #include "MCTargetDesc/RISCVMCTargetDesc.h" -#include "Utils/RISCVMatInt.h" +#include "MCTargetDesc/RISCVMatInt.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/IR/IntrinsicsRISCV.h" #include "llvm/Support/Alignment.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" @@ -48,15 +49,439 @@ static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, int64_t Imm, return Result; } -// Returns true if the Node is an ISD::AND with a constant argument. If so, -// set Mask to that constant value. -static bool isConstantMask(SDNode *Node, uint64_t &Mask) { - if (Node->getOpcode() == ISD::AND && - Node->getOperand(1).getOpcode() == ISD::Constant) { - Mask = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); - return true; +static RISCVVLMUL getLMUL(EVT VT) { + switch (VT.getSizeInBits().getKnownMinValue() / 8) { + default: + llvm_unreachable("Invalid LMUL."); + case 1: + return RISCVVLMUL::LMUL_F8; + case 2: + return RISCVVLMUL::LMUL_F4; + case 4: + return RISCVVLMUL::LMUL_F2; + case 8: + return RISCVVLMUL::LMUL_1; + case 16: + return RISCVVLMUL::LMUL_2; + case 32: + return RISCVVLMUL::LMUL_4; + case 64: + return RISCVVLMUL::LMUL_8; } - return false; +} + +static unsigned getSubregIndexByEVT(EVT VT, unsigned Index) { + RISCVVLMUL LMUL = getLMUL(VT); + if (LMUL == RISCVVLMUL::LMUL_F8 || LMUL == RISCVVLMUL::LMUL_F4 || + LMUL == RISCVVLMUL::LMUL_F2 || LMUL == RISCVVLMUL::LMUL_1) { + static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, + "Unexpected subreg numbering"); + return RISCV::sub_vrm1_0 + Index; + } else if (LMUL == RISCVVLMUL::LMUL_2) { + static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, + "Unexpected subreg numbering"); + return RISCV::sub_vrm2_0 + Index; + } else if (LMUL == RISCVVLMUL::LMUL_4) { + static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, + "Unexpected subreg numbering"); + return RISCV::sub_vrm4_0 + Index; + } + llvm_unreachable("Invalid vector type."); +} + +static SDValue createTupleImpl(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, + unsigned RegClassID, unsigned SubReg0) { + assert(Regs.size() >= 2 && Regs.size() <= 8); + + SDLoc DL(Regs[0]); + SmallVector<SDValue, 8> Ops; + + Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32)); + + for (unsigned I = 0; I < Regs.size(); ++I) { + Ops.push_back(Regs[I]); + Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32)); + } + SDNode *N = + CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); + return SDValue(N, 0); +} + +static SDValue createM1Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, + unsigned NF) { + static const unsigned RegClassIDs[] = { + RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID, + RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID, + RISCV::VRN8M1RegClassID}; + + return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm1_0); +} + +static SDValue createM2Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, + unsigned NF) { + static const unsigned RegClassIDs[] = {RISCV::VRN2M2RegClassID, + RISCV::VRN3M2RegClassID, + RISCV::VRN4M2RegClassID}; + + return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm2_0); +} + +static SDValue createM4Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, + unsigned NF) { + return createTupleImpl(CurDAG, Regs, RISCV::VRN2M4RegClassID, + RISCV::sub_vrm4_0); +} + +static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, + unsigned NF, RISCVVLMUL LMUL) { + switch (LMUL) { + default: + llvm_unreachable("Invalid LMUL."); + case RISCVVLMUL::LMUL_F8: + case RISCVVLMUL::LMUL_F4: + case RISCVVLMUL::LMUL_F2: + case RISCVVLMUL::LMUL_1: + return createM1Tuple(CurDAG, Regs, NF); + case RISCVVLMUL::LMUL_2: + return createM2Tuple(CurDAG, Regs, NF); + case RISCVVLMUL::LMUL_4: + return createM4Tuple(CurDAG, Regs, NF); + } +} + +void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, unsigned IntNo, + bool IsStrided) { + SDLoc DL(Node); + unsigned NF = Node->getNumValues() - 1; + EVT VT = Node->getValueType(0); + unsigned ScalarSize = VT.getScalarSizeInBits(); + MVT XLenVT = Subtarget->getXLenVT(); + RISCVVLMUL LMUL = getLMUL(VT); + SDValue SEW = CurDAG->getTargetConstant(ScalarSize, DL, XLenVT); + SmallVector<SDValue, 5> Operands; + Operands.push_back(Node->getOperand(2)); // Base pointer. + if (IsStrided) { + Operands.push_back(Node->getOperand(3)); // Stride. + Operands.push_back(Node->getOperand(4)); // VL. + } else { + Operands.push_back(Node->getOperand(3)); // VL. + } + Operands.push_back(SEW); + Operands.push_back(Node->getOperand(0)); // Chain. + const RISCVZvlssegTable::RISCVZvlsseg *P = RISCVZvlssegTable::getPseudo( + IntNo, ScalarSize, static_cast<unsigned>(LMUL), + static_cast<unsigned>(RISCVVLMUL::LMUL_1)); + SDNode *Load = + CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); + SDValue SuperReg = SDValue(Load, 0); + for (unsigned I = 0; I < NF; ++I) + ReplaceUses(SDValue(Node, I), + CurDAG->getTargetExtractSubreg(getSubregIndexByEVT(VT, I), DL, + VT, SuperReg)); + + ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); + CurDAG->RemoveDeadNode(Node); +} + +void RISCVDAGToDAGISel::selectVLSEGMask(SDNode *Node, unsigned IntNo, + bool IsStrided) { + SDLoc DL(Node); + unsigned NF = Node->getNumValues() - 1; + EVT VT = Node->getValueType(0); + unsigned ScalarSize = VT.getScalarSizeInBits(); + MVT XLenVT = Subtarget->getXLenVT(); + RISCVVLMUL LMUL = getLMUL(VT); + SDValue SEW = CurDAG->getTargetConstant(ScalarSize, DL, XLenVT); + SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); + SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); + SmallVector<SDValue, 7> Operands; + Operands.push_back(MaskedOff); + Operands.push_back(Node->getOperand(NF + 2)); // Base pointer. + if (IsStrided) { + Operands.push_back(Node->getOperand(NF + 3)); // Stride. + Operands.push_back(Node->getOperand(NF + 4)); // Mask. + Operands.push_back(Node->getOperand(NF + 5)); // VL. + } else { + Operands.push_back(Node->getOperand(NF + 3)); // Mask. + Operands.push_back(Node->getOperand(NF + 4)); // VL. + } + Operands.push_back(SEW); + Operands.push_back(Node->getOperand(0)); /// Chain. + const RISCVZvlssegTable::RISCVZvlsseg *P = RISCVZvlssegTable::getPseudo( + IntNo, ScalarSize, static_cast<unsigned>(LMUL), + static_cast<unsigned>(RISCVVLMUL::LMUL_1)); + SDNode *Load = + CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); + SDValue SuperReg = SDValue(Load, 0); + for (unsigned I = 0; I < NF; ++I) + ReplaceUses(SDValue(Node, I), + CurDAG->getTargetExtractSubreg(getSubregIndexByEVT(VT, I), DL, + VT, SuperReg)); + + ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); + CurDAG->RemoveDeadNode(Node); +} + +void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node) { + SDLoc DL(Node); + unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + unsigned NF = Node->getNumValues() - 2; // Do not count Chain and Glue. + EVT VT = Node->getValueType(0); + unsigned ScalarSize = VT.getScalarSizeInBits(); + MVT XLenVT = Subtarget->getXLenVT(); + RISCVVLMUL LMUL = getLMUL(VT); + SDValue SEW = CurDAG->getTargetConstant(ScalarSize, DL, XLenVT); + SmallVector<SDValue, 5> Operands; + Operands.push_back(Node->getOperand(2)); // Base pointer. + Operands.push_back(Node->getOperand(3)); // VL. + Operands.push_back(SEW); + Operands.push_back(Node->getOperand(0)); // Chain. + const RISCVZvlssegTable::RISCVZvlsseg *P = RISCVZvlssegTable::getPseudo( + IntNo, ScalarSize, static_cast<unsigned>(LMUL), + static_cast<unsigned>(RISCVVLMUL::LMUL_1)); + SDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, + MVT::Glue, Operands); + SDValue SuperReg = SDValue(Load, 0); + for (unsigned I = 0; I < NF; ++I) + ReplaceUses(SDValue(Node, I), + CurDAG->getTargetExtractSubreg(getSubregIndexByEVT(VT, I), DL, + VT, SuperReg)); + + ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); // Chain. + ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Glue. + CurDAG->RemoveDeadNode(Node); +} + +void RISCVDAGToDAGISel::selectVLSEGFFMask(SDNode *Node) { + SDLoc DL(Node); + unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + unsigned NF = Node->getNumValues() - 2; // Do not count Chain and Glue. + EVT VT = Node->getValueType(0); + unsigned ScalarSize = VT.getScalarSizeInBits(); + MVT XLenVT = Subtarget->getXLenVT(); + RISCVVLMUL LMUL = getLMUL(VT); + SDValue SEW = CurDAG->getTargetConstant(ScalarSize, DL, XLenVT); + SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); + SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); + SmallVector<SDValue, 7> Operands; + Operands.push_back(MaskedOff); + Operands.push_back(Node->getOperand(NF + 2)); // Base pointer. + Operands.push_back(Node->getOperand(NF + 3)); // Mask. + Operands.push_back(Node->getOperand(NF + 4)); // VL. + Operands.push_back(SEW); + Operands.push_back(Node->getOperand(0)); /// Chain. + const RISCVZvlssegTable::RISCVZvlsseg *P = RISCVZvlssegTable::getPseudo( + IntNo, ScalarSize, static_cast<unsigned>(LMUL), + static_cast<unsigned>(RISCVVLMUL::LMUL_1)); + SDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, + MVT::Glue, Operands); + SDValue SuperReg = SDValue(Load, 0); + for (unsigned I = 0; I < NF; ++I) + ReplaceUses(SDValue(Node, I), + CurDAG->getTargetExtractSubreg(getSubregIndexByEVT(VT, I), DL, + VT, SuperReg)); + + ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); // Chain. + ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Glue. + CurDAG->RemoveDeadNode(Node); +} + +void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, unsigned IntNo) { + SDLoc DL(Node); + unsigned NF = Node->getNumValues() - 1; + EVT VT = Node->getValueType(0); + unsigned ScalarSize = VT.getScalarSizeInBits(); + MVT XLenVT = Subtarget->getXLenVT(); + RISCVVLMUL LMUL = getLMUL(VT); + SDValue SEW = CurDAG->getTargetConstant(ScalarSize, DL, XLenVT); + SDValue Operands[] = { + Node->getOperand(2), // Base pointer. + Node->getOperand(3), // Index. + Node->getOperand(4), // VL. + SEW, Node->getOperand(0) // Chain. + }; + + EVT IndexVT = Node->getOperand(3)->getValueType(0); + RISCVVLMUL IndexLMUL = getLMUL(IndexVT); + unsigned IndexScalarSize = IndexVT.getScalarSizeInBits(); + const RISCVZvlssegTable::RISCVZvlsseg *P = RISCVZvlssegTable::getPseudo( + IntNo, IndexScalarSize, static_cast<unsigned>(LMUL), + static_cast<unsigned>(IndexLMUL)); + SDNode *Load = + CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); + SDValue SuperReg = SDValue(Load, 0); + for (unsigned I = 0; I < NF; ++I) + ReplaceUses(SDValue(Node, I), + CurDAG->getTargetExtractSubreg(getSubregIndexByEVT(VT, I), DL, + VT, SuperReg)); + + ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); + CurDAG->RemoveDeadNode(Node); +} + +void RISCVDAGToDAGISel::selectVLXSEGMask(SDNode *Node, unsigned IntNo) { + SDLoc DL(Node); + unsigned NF = Node->getNumValues() - 1; + EVT VT = Node->getValueType(0); + unsigned ScalarSize = VT.getScalarSizeInBits(); + MVT XLenVT = Subtarget->getXLenVT(); + RISCVVLMUL LMUL = getLMUL(VT); + SDValue SEW = CurDAG->getTargetConstant(ScalarSize, DL, XLenVT); + SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); + SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); + SDValue Operands[] = { + MaskedOff, + Node->getOperand(NF + 2), // Base pointer. + Node->getOperand(NF + 3), // Index. + Node->getOperand(NF + 4), // Mask. + Node->getOperand(NF + 5), // VL. + SEW, + Node->getOperand(0) // Chain. + }; + + EVT IndexVT = Node->getOperand(NF + 3)->getValueType(0); + RISCVVLMUL IndexLMUL = getLMUL(IndexVT); + unsigned IndexScalarSize = IndexVT.getScalarSizeInBits(); + const RISCVZvlssegTable::RISCVZvlsseg *P = RISCVZvlssegTable::getPseudo( + IntNo, IndexScalarSize, static_cast<unsigned>(LMUL), + static_cast<unsigned>(IndexLMUL)); + SDNode *Load = + CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); + SDValue SuperReg = SDValue(Load, 0); + for (unsigned I = 0; I < NF; ++I) + ReplaceUses(SDValue(Node, I), + CurDAG->getTargetExtractSubreg(getSubregIndexByEVT(VT, I), DL, + VT, SuperReg)); + + ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); + CurDAG->RemoveDeadNode(Node); +} + +void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, unsigned IntNo, + bool IsStrided) { + SDLoc DL(Node); + unsigned NF = Node->getNumOperands() - 4; + if (IsStrided) + NF--; + EVT VT = Node->getOperand(2)->getValueType(0); + unsigned ScalarSize = VT.getScalarSizeInBits(); + MVT XLenVT = Subtarget->getXLenVT(); + RISCVVLMUL LMUL = getLMUL(VT); + SDValue SEW = CurDAG->getTargetConstant(ScalarSize, DL, XLenVT); + SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); + SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); + SmallVector<SDValue, 6> Operands; + Operands.push_back(StoreVal); + Operands.push_back(Node->getOperand(2 + NF)); // Base pointer. + if (IsStrided) { + Operands.push_back(Node->getOperand(3 + NF)); // Stride. + Operands.push_back(Node->getOperand(4 + NF)); // VL. + } else { + Operands.push_back(Node->getOperand(3 + NF)); // VL. + } + Operands.push_back(SEW); + Operands.push_back(Node->getOperand(0)); // Chain. + const RISCVZvlssegTable::RISCVZvlsseg *P = RISCVZvlssegTable::getPseudo( + IntNo, ScalarSize, static_cast<unsigned>(LMUL), + static_cast<unsigned>(RISCVVLMUL::LMUL_1)); + SDNode *Store = + CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); + ReplaceNode(Node, Store); +} + +void RISCVDAGToDAGISel::selectVSSEGMask(SDNode *Node, unsigned IntNo, + bool IsStrided) { + SDLoc DL(Node); + unsigned NF = Node->getNumOperands() - 5; + if (IsStrided) + NF--; + EVT VT = Node->getOperand(2)->getValueType(0); + unsigned ScalarSize = VT.getScalarSizeInBits(); + MVT XLenVT = Subtarget->getXLenVT(); + RISCVVLMUL LMUL = getLMUL(VT); + SDValue SEW = CurDAG->getTargetConstant(ScalarSize, DL, XLenVT); + SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); + SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); + SmallVector<SDValue, 7> Operands; + Operands.push_back(StoreVal); + Operands.push_back(Node->getOperand(2 + NF)); // Base pointer. + if (IsStrided) { + Operands.push_back(Node->getOperand(3 + NF)); // Stride. + Operands.push_back(Node->getOperand(4 + NF)); // Mask. + Operands.push_back(Node->getOperand(5 + NF)); // VL. + } else { + Operands.push_back(Node->getOperand(3 + NF)); // Mask. + Operands.push_back(Node->getOperand(4 + NF)); // VL. + } + Operands.push_back(SEW); + Operands.push_back(Node->getOperand(0)); // Chain. + const RISCVZvlssegTable::RISCVZvlsseg *P = RISCVZvlssegTable::getPseudo( + IntNo, ScalarSize, static_cast<unsigned>(LMUL), + static_cast<unsigned>(RISCVVLMUL::LMUL_1)); + SDNode *Store = + CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); + ReplaceNode(Node, Store); +} + +void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned IntNo) { + SDLoc DL(Node); + unsigned NF = Node->getNumOperands() - 5; + EVT VT = Node->getOperand(2)->getValueType(0); + unsigned ScalarSize = VT.getScalarSizeInBits(); + MVT XLenVT = Subtarget->getXLenVT(); + RISCVVLMUL LMUL = getLMUL(VT); + SDValue SEW = CurDAG->getTargetConstant(ScalarSize, DL, XLenVT); + SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); + SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); + SDValue Operands[] = { + StoreVal, + Node->getOperand(2 + NF), // Base pointer. + Node->getOperand(3 + NF), // Index. + Node->getOperand(4 + NF), // VL. + SEW, + Node->getOperand(0) // Chain. + }; + + EVT IndexVT = Node->getOperand(3 + NF)->getValueType(0); + RISCVVLMUL IndexLMUL = getLMUL(IndexVT); + unsigned IndexScalarSize = IndexVT.getScalarSizeInBits(); + const RISCVZvlssegTable::RISCVZvlsseg *P = RISCVZvlssegTable::getPseudo( + IntNo, IndexScalarSize, static_cast<unsigned>(LMUL), + static_cast<unsigned>(IndexLMUL)); + SDNode *Store = + CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); + ReplaceNode(Node, Store); +} + +void RISCVDAGToDAGISel::selectVSXSEGMask(SDNode *Node, unsigned IntNo) { + SDLoc DL(Node); + unsigned NF = Node->getNumOperands() - 6; + EVT VT = Node->getOperand(2)->getValueType(0); + unsigned ScalarSize = VT.getScalarSizeInBits(); + MVT XLenVT = Subtarget->getXLenVT(); + RISCVVLMUL LMUL = getLMUL(VT); + SDValue SEW = CurDAG->getTargetConstant(ScalarSize, DL, XLenVT); + SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); + SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); + SDValue Operands[] = { + StoreVal, + Node->getOperand(2 + NF), // Base pointer. + Node->getOperand(3 + NF), // Index. + Node->getOperand(4 + NF), // Mask. + Node->getOperand(5 + NF), // VL. + SEW, + Node->getOperand(0) // Chain. + }; + + EVT IndexVT = Node->getOperand(3 + NF)->getValueType(0); + RISCVVLMUL IndexLMUL = getLMUL(IndexVT); + unsigned IndexScalarSize = IndexVT.getScalarSizeInBits(); + const RISCVZvlssegTable::RISCVZvlsseg *P = RISCVZvlssegTable::getPseudo( + IntNo, IndexScalarSize, static_cast<unsigned>(LMUL), + static_cast<unsigned>(IndexLMUL)); + SDNode *Store = + CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); + ReplaceNode(Node, Store); } void RISCVDAGToDAGISel::Select(SDNode *Node) { @@ -86,7 +511,6 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { if (!(-4096 <= Imm && Imm <= -2049) && !(2048 <= Imm && Imm <= 4094)) break; // Break the imm to imm0+imm1. - SDLoc DL(Node); EVT VT = Node->getValueType(0); const SDValue ImmOp0 = CurDAG->getTargetConstant(Imm - Imm / 2, DL, VT); const SDValue ImmOp1 = CurDAG->getTargetConstant(Imm / 2, DL, VT); @@ -102,14 +526,14 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { case ISD::Constant: { auto ConstNode = cast<ConstantSDNode>(Node); if (VT == XLenVT && ConstNode->isNullValue()) { - SDValue New = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node), - RISCV::X0, XLenVT); + SDValue New = + CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, XLenVT); ReplaceNode(Node, New.getNode()); return; } int64_t Imm = ConstNode->getSExtValue(); if (XLenVT == MVT::i64) { - ReplaceNode(Node, selectImm(CurDAG, SDLoc(Node), Imm, XLenVT)); + ReplaceNode(Node, selectImm(CurDAG, DL, Imm, XLenVT)); return; } break; @@ -121,38 +545,235 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { ReplaceNode(Node, CurDAG->getMachineNode(RISCV::ADDI, DL, VT, TFI, Imm)); return; } - case ISD::SRL: { - if (!Subtarget->is64Bit()) + case ISD::INTRINSIC_W_CHAIN: { + unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + switch (IntNo) { + // By default we do not custom select any intrinsic. + default: break; - SDValue Op0 = Node->getOperand(0); - SDValue Op1 = Node->getOperand(1); - uint64_t Mask; - // Match (srl (and val, mask), imm) where the result would be a - // zero-extended 32-bit integer. i.e. the mask is 0xffffffff or the result - // is equivalent to this (SimplifyDemandedBits may have removed lower bits - // from the mask that aren't necessary due to the right-shifting). - if (Op1.getOpcode() == ISD::Constant && - isConstantMask(Op0.getNode(), Mask)) { - uint64_t ShAmt = cast<ConstantSDNode>(Op1.getNode())->getZExtValue(); - - if ((Mask | maskTrailingOnes<uint64_t>(ShAmt)) == 0xffffffff) { - SDValue ShAmtVal = - CurDAG->getTargetConstant(ShAmt, SDLoc(Node), XLenVT); - CurDAG->SelectNodeTo(Node, RISCV::SRLIW, XLenVT, Op0.getOperand(0), - ShAmtVal); - return; + + case Intrinsic::riscv_vsetvli: { + if (!Subtarget->hasStdExtV()) + break; + + assert(Node->getNumOperands() == 5); + + RISCVVSEW VSEW = + static_cast<RISCVVSEW>(Node->getConstantOperandVal(3) & 0x7); + RISCVVLMUL VLMul = + static_cast<RISCVVLMUL>(Node->getConstantOperandVal(4) & 0x7); + + unsigned VTypeI = RISCVVType::encodeVTYPE( + VLMul, VSEW, /*TailAgnostic*/ true, /*MaskAgnostic*/ false); + SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT); + + SDValue VLOperand = Node->getOperand(2); + if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) { + uint64_t AVL = C->getZExtValue(); + if (isUInt<5>(AVL)) { + SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT); + ReplaceNode(Node, + CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, XLenVT, + MVT::Other, VLImm, VTypeIOp, + /* Chain */ Node->getOperand(0))); + return; + } } + + ReplaceNode(Node, + CurDAG->getMachineNode(RISCV::PseudoVSETVLI, DL, XLenVT, + MVT::Other, VLOperand, VTypeIOp, + /* Chain */ Node->getOperand(0))); + return; + } + case Intrinsic::riscv_vsetvlimax: { + if (!Subtarget->hasStdExtV()) + break; + + assert(Node->getNumOperands() == 4); + + RISCVVSEW VSEW = + static_cast<RISCVVSEW>(Node->getConstantOperandVal(2) & 0x7); + RISCVVLMUL VLMul = + static_cast<RISCVVLMUL>(Node->getConstantOperandVal(3) & 0x7); + + unsigned VTypeI = RISCVVType::encodeVTYPE( + VLMul, VSEW, /*TailAgnostic*/ true, /*MaskAgnostic*/ false); + SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT); + + SDValue VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT); + ReplaceNode(Node, + CurDAG->getMachineNode(RISCV::PseudoVSETVLI, DL, XLenVT, + MVT::Other, VLOperand, VTypeIOp, + /* Chain */ Node->getOperand(0))); + return; + } + case Intrinsic::riscv_vlseg2: + case Intrinsic::riscv_vlseg3: + case Intrinsic::riscv_vlseg4: + case Intrinsic::riscv_vlseg5: + case Intrinsic::riscv_vlseg6: + case Intrinsic::riscv_vlseg7: + case Intrinsic::riscv_vlseg8: { + selectVLSEG(Node, IntNo, /*IsStrided=*/false); + return; + } + case Intrinsic::riscv_vlseg2_mask: + case Intrinsic::riscv_vlseg3_mask: + case Intrinsic::riscv_vlseg4_mask: + case Intrinsic::riscv_vlseg5_mask: + case Intrinsic::riscv_vlseg6_mask: + case Intrinsic::riscv_vlseg7_mask: + case Intrinsic::riscv_vlseg8_mask: { + selectVLSEGMask(Node, IntNo, /*IsStrided=*/false); + return; + } + case Intrinsic::riscv_vlsseg2: + case Intrinsic::riscv_vlsseg3: + case Intrinsic::riscv_vlsseg4: + case Intrinsic::riscv_vlsseg5: + case Intrinsic::riscv_vlsseg6: + case Intrinsic::riscv_vlsseg7: + case Intrinsic::riscv_vlsseg8: { + selectVLSEG(Node, IntNo, /*IsStrided=*/true); + return; + } + case Intrinsic::riscv_vlsseg2_mask: + case Intrinsic::riscv_vlsseg3_mask: + case Intrinsic::riscv_vlsseg4_mask: + case Intrinsic::riscv_vlsseg5_mask: + case Intrinsic::riscv_vlsseg6_mask: + case Intrinsic::riscv_vlsseg7_mask: + case Intrinsic::riscv_vlsseg8_mask: { + selectVLSEGMask(Node, IntNo, /*IsStrided=*/true); + return; + } + case Intrinsic::riscv_vloxseg2: + case Intrinsic::riscv_vloxseg3: + case Intrinsic::riscv_vloxseg4: + case Intrinsic::riscv_vloxseg5: + case Intrinsic::riscv_vloxseg6: + case Intrinsic::riscv_vloxseg7: + case Intrinsic::riscv_vloxseg8: + case Intrinsic::riscv_vluxseg2: + case Intrinsic::riscv_vluxseg3: + case Intrinsic::riscv_vluxseg4: + case Intrinsic::riscv_vluxseg5: + case Intrinsic::riscv_vluxseg6: + case Intrinsic::riscv_vluxseg7: + case Intrinsic::riscv_vluxseg8: { + selectVLXSEG(Node, IntNo); + return; + } + case Intrinsic::riscv_vloxseg2_mask: + case Intrinsic::riscv_vloxseg3_mask: + case Intrinsic::riscv_vloxseg4_mask: + case Intrinsic::riscv_vloxseg5_mask: + case Intrinsic::riscv_vloxseg6_mask: + case Intrinsic::riscv_vloxseg7_mask: + case Intrinsic::riscv_vloxseg8_mask: + case Intrinsic::riscv_vluxseg2_mask: + case Intrinsic::riscv_vluxseg3_mask: + case Intrinsic::riscv_vluxseg4_mask: + case Intrinsic::riscv_vluxseg5_mask: + case Intrinsic::riscv_vluxseg6_mask: + case Intrinsic::riscv_vluxseg7_mask: + case Intrinsic::riscv_vluxseg8_mask: { + selectVLXSEGMask(Node, IntNo); + return; + } } break; } - case RISCVISD::READ_CYCLE_WIDE: - assert(!Subtarget->is64Bit() && "READ_CYCLE_WIDE is only used on riscv32"); - - ReplaceNode(Node, CurDAG->getMachineNode(RISCV::ReadCycleWide, DL, MVT::i32, - MVT::i32, MVT::Other, - Node->getOperand(0))); + case ISD::INTRINSIC_VOID: { + unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + switch (IntNo) { + case Intrinsic::riscv_vsseg2: + case Intrinsic::riscv_vsseg3: + case Intrinsic::riscv_vsseg4: + case Intrinsic::riscv_vsseg5: + case Intrinsic::riscv_vsseg6: + case Intrinsic::riscv_vsseg7: + case Intrinsic::riscv_vsseg8: { + selectVSSEG(Node, IntNo, /*IsStrided=*/false); + return; + } + case Intrinsic::riscv_vsseg2_mask: + case Intrinsic::riscv_vsseg3_mask: + case Intrinsic::riscv_vsseg4_mask: + case Intrinsic::riscv_vsseg5_mask: + case Intrinsic::riscv_vsseg6_mask: + case Intrinsic::riscv_vsseg7_mask: + case Intrinsic::riscv_vsseg8_mask: { + selectVSSEGMask(Node, IntNo, /*IsStrided=*/false); + return; + } + case Intrinsic::riscv_vssseg2: + case Intrinsic::riscv_vssseg3: + case Intrinsic::riscv_vssseg4: + case Intrinsic::riscv_vssseg5: + case Intrinsic::riscv_vssseg6: + case Intrinsic::riscv_vssseg7: + case Intrinsic::riscv_vssseg8: { + selectVSSEG(Node, IntNo, /*IsStrided=*/true); + return; + } + case Intrinsic::riscv_vssseg2_mask: + case Intrinsic::riscv_vssseg3_mask: + case Intrinsic::riscv_vssseg4_mask: + case Intrinsic::riscv_vssseg5_mask: + case Intrinsic::riscv_vssseg6_mask: + case Intrinsic::riscv_vssseg7_mask: + case Intrinsic::riscv_vssseg8_mask: { + selectVSSEGMask(Node, IntNo, /*IsStrided=*/true); + return; + } + case Intrinsic::riscv_vsoxseg2: + case Intrinsic::riscv_vsoxseg3: + case Intrinsic::riscv_vsoxseg4: + case Intrinsic::riscv_vsoxseg5: + case Intrinsic::riscv_vsoxseg6: + case Intrinsic::riscv_vsoxseg7: + case Intrinsic::riscv_vsoxseg8: + case Intrinsic::riscv_vsuxseg2: + case Intrinsic::riscv_vsuxseg3: + case Intrinsic::riscv_vsuxseg4: + case Intrinsic::riscv_vsuxseg5: + case Intrinsic::riscv_vsuxseg6: + case Intrinsic::riscv_vsuxseg7: + case Intrinsic::riscv_vsuxseg8: { + selectVSXSEG(Node, IntNo); + return; + } + case Intrinsic::riscv_vsoxseg2_mask: + case Intrinsic::riscv_vsoxseg3_mask: + case Intrinsic::riscv_vsoxseg4_mask: + case Intrinsic::riscv_vsoxseg5_mask: + case Intrinsic::riscv_vsoxseg6_mask: + case Intrinsic::riscv_vsoxseg7_mask: + case Intrinsic::riscv_vsoxseg8_mask: + case Intrinsic::riscv_vsuxseg2_mask: + case Intrinsic::riscv_vsuxseg3_mask: + case Intrinsic::riscv_vsuxseg4_mask: + case Intrinsic::riscv_vsuxseg5_mask: + case Intrinsic::riscv_vsuxseg6_mask: + case Intrinsic::riscv_vsuxseg7_mask: + case Intrinsic::riscv_vsuxseg8_mask: { + selectVSXSEGMask(Node, IntNo); + return; + } + } + break; + } + case RISCVISD::VLSEGFF: { + selectVLSEGFF(Node); return; } + case RISCVISD::VLSEGFF_MASK: { + selectVLSEGFFMask(Node); + return; + } + } // Select the default instruction. SelectCode(Node); @@ -184,328 +805,132 @@ bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) { return false; } -// Check that it is a SLOI (Shift Left Ones Immediate). We first check that -// it is the right node tree: -// -// (OR (SHL RS1, VC2), VC1) -// -// and then we check that VC1, the mask used to fill with ones, is compatible -// with VC2, the shamt: -// -// VC1 == maskTrailingOnes<uint64_t>(VC2) +// Match (srl (and val, mask), imm) where the result would be a +// zero-extended 32-bit integer. i.e. the mask is 0xffffffff or the result +// is equivalent to this (SimplifyDemandedBits may have removed lower bits +// from the mask that aren't necessary due to the right-shifting). +bool RISCVDAGToDAGISel::MatchSRLIW(SDNode *N) const { + assert(N->getOpcode() == ISD::SRL); + assert(N->getOperand(0).getOpcode() == ISD::AND); + assert(isa<ConstantSDNode>(N->getOperand(1))); + assert(isa<ConstantSDNode>(N->getOperand(0).getOperand(1))); -bool RISCVDAGToDAGISel::SelectSLOI(SDValue N, SDValue &RS1, SDValue &Shamt) { - MVT XLenVT = Subtarget->getXLenVT(); - if (N.getOpcode() == ISD::OR) { - SDValue Or = N; - if (Or.getOperand(0).getOpcode() == ISD::SHL) { - SDValue Shl = Or.getOperand(0); - if (isa<ConstantSDNode>(Shl.getOperand(1)) && - isa<ConstantSDNode>(Or.getOperand(1))) { - if (XLenVT == MVT::i64) { - uint64_t VC1 = Or.getConstantOperandVal(1); - uint64_t VC2 = Shl.getConstantOperandVal(1); - if (VC1 == maskTrailingOnes<uint64_t>(VC2)) { - RS1 = Shl.getOperand(0); - Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N), - Shl.getOperand(1).getValueType()); - return true; - } - } - if (XLenVT == MVT::i32) { - uint32_t VC1 = Or.getConstantOperandVal(1); - uint32_t VC2 = Shl.getConstantOperandVal(1); - if (VC1 == maskTrailingOnes<uint32_t>(VC2)) { - RS1 = Shl.getOperand(0); - Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N), - Shl.getOperand(1).getValueType()); - return true; - } - } - } - } - } - return false; -} - -// Check that it is a SROI (Shift Right Ones Immediate). We first check that -// it is the right node tree: -// -// (OR (SRL RS1, VC2), VC1) -// -// and then we check that VC1, the mask used to fill with ones, is compatible -// with VC2, the shamt: -// -// VC1 == maskLeadingOnes<uint64_t>(VC2) - -bool RISCVDAGToDAGISel::SelectSROI(SDValue N, SDValue &RS1, SDValue &Shamt) { - MVT XLenVT = Subtarget->getXLenVT(); - if (N.getOpcode() == ISD::OR) { - SDValue Or = N; - if (Or.getOperand(0).getOpcode() == ISD::SRL) { - SDValue Srl = Or.getOperand(0); - if (isa<ConstantSDNode>(Srl.getOperand(1)) && - isa<ConstantSDNode>(Or.getOperand(1))) { - if (XLenVT == MVT::i64) { - uint64_t VC1 = Or.getConstantOperandVal(1); - uint64_t VC2 = Srl.getConstantOperandVal(1); - if (VC1 == maskLeadingOnes<uint64_t>(VC2)) { - RS1 = Srl.getOperand(0); - Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N), - Srl.getOperand(1).getValueType()); - return true; - } - } - if (XLenVT == MVT::i32) { - uint32_t VC1 = Or.getConstantOperandVal(1); - uint32_t VC2 = Srl.getConstantOperandVal(1); - if (VC1 == maskLeadingOnes<uint32_t>(VC2)) { - RS1 = Srl.getOperand(0); - Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N), - Srl.getOperand(1).getValueType()); - return true; - } - } - } - } - } - return false; -} - -// Check that it is a RORI (Rotate Right Immediate). We first check that -// it is the right node tree: -// -// (ROTL RS1, VC) -// -// The compiler translates immediate rotations to the right given by the call -// to the rotateright32/rotateright64 intrinsics as rotations to the left. -// Since the rotation to the left can be easily emulated as a rotation to the -// right by negating the constant, there is no encoding for ROLI. -// We then select the immediate left rotations as RORI by the complementary -// constant: -// -// Shamt == XLen - VC + // The IsRV64 predicate is checked after PatFrag predicates so we can get + // here even on RV32. + if (!Subtarget->is64Bit()) + return false; -bool RISCVDAGToDAGISel::SelectRORI(SDValue N, SDValue &RS1, SDValue &Shamt) { - MVT XLenVT = Subtarget->getXLenVT(); - if (N.getOpcode() == ISD::ROTL) { - if (isa<ConstantSDNode>(N.getOperand(1))) { - if (XLenVT == MVT::i64) { - uint64_t VC = N.getConstantOperandVal(1); - Shamt = CurDAG->getTargetConstant((64 - VC), SDLoc(N), - N.getOperand(1).getValueType()); - RS1 = N.getOperand(0); - return true; - } - if (XLenVT == MVT::i32) { - uint32_t VC = N.getConstantOperandVal(1); - Shamt = CurDAG->getTargetConstant((32 - VC), SDLoc(N), - N.getOperand(1).getValueType()); - RS1 = N.getOperand(0); - return true; - } - } - } - return false; + SDValue And = N->getOperand(0); + uint64_t ShAmt = N->getConstantOperandVal(1); + uint64_t Mask = And.getConstantOperandVal(1); + return (Mask | maskTrailingOnes<uint64_t>(ShAmt)) == 0xffffffff; } - // Check that it is a SLLIUW (Shift Logical Left Immediate Unsigned i32 // on RV64). // SLLIUW is the same as SLLI except for the fact that it clears the bits // XLEN-1:32 of the input RS1 before shifting. -// We first check that it is the right node tree: +// A PatFrag has already checked that it has the right structure: // // (AND (SHL RS1, VC2), VC1) // // We check that VC2, the shamt is less than 32, otherwise the pattern is // exactly the same as SLLI and we give priority to that. -// Eventually we check that that VC1, the mask used to clear the upper 32 bits +// Eventually we check that VC1, the mask used to clear the upper 32 bits // of RS1, is correct: // // VC1 == (0xFFFFFFFF << VC2) +// +bool RISCVDAGToDAGISel::MatchSLLIUW(SDNode *N) const { + assert(N->getOpcode() == ISD::AND); + assert(N->getOperand(0).getOpcode() == ISD::SHL); + assert(isa<ConstantSDNode>(N->getOperand(1))); + assert(isa<ConstantSDNode>(N->getOperand(0).getOperand(1))); -bool RISCVDAGToDAGISel::SelectSLLIUW(SDValue N, SDValue &RS1, SDValue &Shamt) { - if (N.getOpcode() == ISD::AND && Subtarget->getXLenVT() == MVT::i64) { - SDValue And = N; - if (And.getOperand(0).getOpcode() == ISD::SHL) { - SDValue Shl = And.getOperand(0); - if (isa<ConstantSDNode>(Shl.getOperand(1)) && - isa<ConstantSDNode>(And.getOperand(1))) { - uint64_t VC1 = And.getConstantOperandVal(1); - uint64_t VC2 = Shl.getConstantOperandVal(1); - if (VC2 < 32 && VC1 == ((uint64_t)0xFFFFFFFF << VC2)) { - RS1 = Shl.getOperand(0); - Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N), - Shl.getOperand(1).getValueType()); - return true; - } - } - } - } - return false; + // The IsRV64 predicate is checked after PatFrag predicates so we can get + // here even on RV32. + if (!Subtarget->is64Bit()) + return false; + + SDValue Shl = N->getOperand(0); + uint64_t VC1 = N->getConstantOperandVal(1); + uint64_t VC2 = Shl.getConstantOperandVal(1); + + // Immediate range should be enforced by uimm5 predicate. + assert(VC2 < 32 && "Unexpected immediate"); + return (VC1 >> VC2) == UINT64_C(0xFFFFFFFF); } -// Check that it is a SLOIW (Shift Left Ones Immediate i32 on RV64). -// We first check that it is the right node tree: -// -// (SIGN_EXTEND_INREG (OR (SHL RS1, VC2), VC1)) -// -// and then we check that VC1, the mask used to fill with ones, is compatible -// with VC2, the shamt: -// -// VC1 == maskTrailingOnes<uint32_t>(VC2) - -bool RISCVDAGToDAGISel::SelectSLOIW(SDValue N, SDValue &RS1, SDValue &Shamt) { - if (Subtarget->getXLenVT() == MVT::i64 && - N.getOpcode() == ISD::SIGN_EXTEND_INREG && - cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) { - if (N.getOperand(0).getOpcode() == ISD::OR) { - SDValue Or = N.getOperand(0); - if (Or.getOperand(0).getOpcode() == ISD::SHL) { - SDValue Shl = Or.getOperand(0); - if (isa<ConstantSDNode>(Shl.getOperand(1)) && - isa<ConstantSDNode>(Or.getOperand(1))) { - uint32_t VC1 = Or.getConstantOperandVal(1); - uint32_t VC2 = Shl.getConstantOperandVal(1); - if (VC1 == maskTrailingOnes<uint32_t>(VC2)) { - RS1 = Shl.getOperand(0); - Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N), - Shl.getOperand(1).getValueType()); - return true; - } - } - } - } - } - return false; +// X0 has special meaning for vsetvl/vsetvli. +// rd | rs1 | AVL value | Effect on vl +//-------------------------------------------------------------- +// !X0 | X0 | VLMAX | Set vl to VLMAX +// X0 | X0 | Value in vl | Keep current vl, just change vtype. +bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) { + // If the VL value is a constant 0, manually select it to an ADDI with 0 + // immediate to prevent the default selection path from matching it to X0. + auto *C = dyn_cast<ConstantSDNode>(N); + if (C && C->isNullValue()) + VL = SDValue(selectImm(CurDAG, SDLoc(N), 0, Subtarget->getXLenVT()), 0); + else + VL = N; + + return true; } -// Check that it is a SROIW (Shift Right Ones Immediate i32 on RV64). -// We first check that it is the right node tree: -// -// (OR (SHL RS1, VC2), VC1) -// -// and then we check that VC1, the mask used to fill with ones, is compatible -// with VC2, the shamt: -// -// VC1 == maskLeadingOnes<uint32_t>(VC2) - -bool RISCVDAGToDAGISel::SelectSROIW(SDValue N, SDValue &RS1, SDValue &Shamt) { - if (N.getOpcode() == ISD::OR && Subtarget->getXLenVT() == MVT::i64) { - SDValue Or = N; - if (Or.getOperand(0).getOpcode() == ISD::SRL) { - SDValue Srl = Or.getOperand(0); - if (isa<ConstantSDNode>(Srl.getOperand(1)) && - isa<ConstantSDNode>(Or.getOperand(1))) { - uint32_t VC1 = Or.getConstantOperandVal(1); - uint32_t VC2 = Srl.getConstantOperandVal(1); - if (VC1 == maskLeadingOnes<uint32_t>(VC2)) { - RS1 = Srl.getOperand(0); - Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N), - Srl.getOperand(1).getValueType()); - return true; - } - } - } - } - return false; +bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) { + if (N.getOpcode() != ISD::SPLAT_VECTOR && + N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64) + return false; + SplatVal = N.getOperand(0); + return true; } -// Check that it is a RORIW (i32 Right Rotate Immediate on RV64). -// We first check that it is the right node tree: -// -// (SIGN_EXTEND_INREG (OR (SHL (AsserSext RS1, i32), VC2), -// (SRL (AND (AssertSext RS2, i32), VC3), VC1))) -// -// Then we check that the constant operands respect these constraints: -// -// VC2 == 32 - VC1 -// VC3 == maskLeadingOnes<uint32_t>(VC2) -// -// being VC1 the Shamt we need, VC2 the complementary of Shamt over 32 -// and VC3 a 32 bit mask of (32 - VC1) leading ones. - -bool RISCVDAGToDAGISel::SelectRORIW(SDValue N, SDValue &RS1, SDValue &Shamt) { - if (N.getOpcode() == ISD::SIGN_EXTEND_INREG && - Subtarget->getXLenVT() == MVT::i64 && - cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) { - if (N.getOperand(0).getOpcode() == ISD::OR) { - SDValue Or = N.getOperand(0); - if (Or.getOperand(0).getOpcode() == ISD::SHL && - Or.getOperand(1).getOpcode() == ISD::SRL) { - SDValue Shl = Or.getOperand(0); - SDValue Srl = Or.getOperand(1); - if (Srl.getOperand(0).getOpcode() == ISD::AND) { - SDValue And = Srl.getOperand(0); - if (isa<ConstantSDNode>(Srl.getOperand(1)) && - isa<ConstantSDNode>(Shl.getOperand(1)) && - isa<ConstantSDNode>(And.getOperand(1))) { - uint32_t VC1 = Srl.getConstantOperandVal(1); - uint32_t VC2 = Shl.getConstantOperandVal(1); - uint32_t VC3 = And.getConstantOperandVal(1); - if (VC2 == (32 - VC1) && - VC3 == maskLeadingOnes<uint32_t>(VC2)) { - RS1 = Shl.getOperand(0); - Shamt = CurDAG->getTargetConstant(VC1, SDLoc(N), - Srl.getOperand(1).getValueType()); - return true; - } - } - } - } - } +bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) { + if ((N.getOpcode() != ISD::SPLAT_VECTOR && + N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64) || + !isa<ConstantSDNode>(N.getOperand(0))) + return false; + + int64_t SplatImm = cast<ConstantSDNode>(N.getOperand(0))->getSExtValue(); + + // Both ISD::SPLAT_VECTOR and RISCVISD::SPLAT_VECTOR_I64 share semantics when + // the operand type is wider than the resulting vector element type: an + // implicit truncation first takes place. Therefore, perform a manual + // truncation/sign-extension in order to ignore any truncated bits and catch + // any zero-extended immediate. + // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first + // sign-extending to (XLenVT -1). + auto XLenVT = Subtarget->getXLenVT(); + assert(XLenVT == N.getOperand(0).getSimpleValueType() && + "Unexpected splat operand type"); + auto EltVT = N.getValueType().getVectorElementType(); + if (EltVT.bitsLT(XLenVT)) { + SplatImm = SignExtend64(SplatImm, EltVT.getSizeInBits()); } - return false; + + if (!isInt<5>(SplatImm)) + return false; + + SplatVal = CurDAG->getTargetConstant(SplatImm, SDLoc(N), XLenVT); + return true; } -// Check that it is a FSRIW (i32 Funnel Shift Right Immediate on RV64). -// We first check that it is the right node tree: -// -// (SIGN_EXTEND_INREG (OR (SHL (AsserSext RS1, i32), VC2), -// (SRL (AND (AssertSext RS2, i32), VC3), VC1))) -// -// Then we check that the constant operands respect these constraints: -// -// VC2 == 32 - VC1 -// VC3 == maskLeadingOnes<uint32_t>(VC2) -// -// being VC1 the Shamt we need, VC2 the complementary of Shamt over 32 -// and VC3 a 32 bit mask of (32 - VC1) leading ones. - -bool RISCVDAGToDAGISel::SelectFSRIW(SDValue N, SDValue &RS1, SDValue &RS2, - SDValue &Shamt) { - if (N.getOpcode() == ISD::SIGN_EXTEND_INREG && - Subtarget->getXLenVT() == MVT::i64 && - cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) { - if (N.getOperand(0).getOpcode() == ISD::OR) { - SDValue Or = N.getOperand(0); - if (Or.getOperand(0).getOpcode() == ISD::SHL && - Or.getOperand(1).getOpcode() == ISD::SRL) { - SDValue Shl = Or.getOperand(0); - SDValue Srl = Or.getOperand(1); - if (Srl.getOperand(0).getOpcode() == ISD::AND) { - SDValue And = Srl.getOperand(0); - if (isa<ConstantSDNode>(Srl.getOperand(1)) && - isa<ConstantSDNode>(Shl.getOperand(1)) && - isa<ConstantSDNode>(And.getOperand(1))) { - uint32_t VC1 = Srl.getConstantOperandVal(1); - uint32_t VC2 = Shl.getConstantOperandVal(1); - uint32_t VC3 = And.getConstantOperandVal(1); - if (VC2 == (32 - VC1) && - VC3 == maskLeadingOnes<uint32_t>(VC2)) { - RS1 = Shl.getOperand(0); - RS2 = And.getOperand(0); - Shamt = CurDAG->getTargetConstant(VC1, SDLoc(N), - Srl.getOperand(1).getValueType()); - return true; - } - } - } - } - } - } - return false; +bool RISCVDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &SplatVal) { + if ((N.getOpcode() != ISD::SPLAT_VECTOR && + N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64) || + !isa<ConstantSDNode>(N.getOperand(0))) + return false; + + int64_t SplatImm = cast<ConstantSDNode>(N.getOperand(0))->getSExtValue(); + + if (!isUInt<5>(SplatImm)) + return false; + + SplatVal = + CurDAG->getTargetConstant(SplatImm, SDLoc(N), Subtarget->getXLenVT()); + + return true; } // Merge an ADDI into the offset of a load/store instruction where possible. @@ -536,6 +961,7 @@ void RISCVDAGToDAGISel::doPeepholeLoadStoreADDI() { case RISCV::LHU: case RISCV::LWU: case RISCV::LD: + case RISCV::FLH: case RISCV::FLW: case RISCV::FLD: BaseOpIdx = 0; @@ -545,6 +971,7 @@ void RISCVDAGToDAGISel::doPeepholeLoadStoreADDI() { case RISCV::SH: case RISCV::SW: case RISCV::SD: + case RISCV::FSH: case RISCV::FSW: case RISCV::FSD: BaseOpIdx = 1; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index 0ca12510a230..6099586d049d 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -45,14 +45,25 @@ public: bool SelectAddrFI(SDValue Addr, SDValue &Base); - bool SelectSLOI(SDValue N, SDValue &RS1, SDValue &Shamt); - bool SelectSROI(SDValue N, SDValue &RS1, SDValue &Shamt); - bool SelectRORI(SDValue N, SDValue &RS1, SDValue &Shamt); - bool SelectSLLIUW(SDValue N, SDValue &RS1, SDValue &Shamt); - bool SelectSLOIW(SDValue N, SDValue &RS1, SDValue &Shamt); - bool SelectSROIW(SDValue N, SDValue &RS1, SDValue &Shamt); - bool SelectRORIW(SDValue N, SDValue &RS1, SDValue &Shamt); - bool SelectFSRIW(SDValue N, SDValue &RS1, SDValue &RS2, SDValue &Shamt); + bool MatchSRLIW(SDNode *N) const; + bool MatchSLLIUW(SDNode *N) const; + + bool selectVLOp(SDValue N, SDValue &VL); + + bool selectVSplat(SDValue N, SDValue &SplatVal); + bool selectVSplatSimm5(SDValue N, SDValue &SplatVal); + bool selectVSplatUimm5(SDValue N, SDValue &SplatVal); + + void selectVLSEG(SDNode *Node, unsigned IntNo, bool IsStrided); + void selectVLSEGMask(SDNode *Node, unsigned IntNo, bool IsStrided); + void selectVLSEGFF(SDNode *Node); + void selectVLSEGFFMask(SDNode *Node); + void selectVLXSEG(SDNode *Node, unsigned IntNo); + void selectVLXSEGMask(SDNode *Node, unsigned IntNo); + void selectVSSEG(SDNode *Node, unsigned IntNo, bool IsStrided); + void selectVSSEGMask(SDNode *Node, unsigned IntNo, bool IsStrided); + void selectVSXSEG(SDNode *Node, unsigned IntNo); + void selectVSXSEGMask(SDNode *Node, unsigned IntNo); // Include the pieces autogenerated from the target description. #include "RISCVGenDAGISel.inc" diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 03d9eefd59d0..97f46d9731b1 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -12,12 +12,12 @@ //===----------------------------------------------------------------------===// #include "RISCVISelLowering.h" +#include "MCTargetDesc/RISCVMatInt.h" #include "RISCV.h" #include "RISCVMachineFunctionInfo.h" #include "RISCVRegisterInfo.h" #include "RISCVSubtarget.h" #include "RISCVTargetMachine.h" -#include "Utils/RISCVMatInt.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/CallingConvLower.h" @@ -25,7 +25,6 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DiagnosticInfo.h" @@ -33,6 +32,7 @@ #include "llvm/IR/IntrinsicsRISCV.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -83,11 +83,73 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // Set up the register classes. addRegisterClass(XLenVT, &RISCV::GPRRegClass); + if (Subtarget.hasStdExtZfh()) + addRegisterClass(MVT::f16, &RISCV::FPR16RegClass); if (Subtarget.hasStdExtF()) addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); if (Subtarget.hasStdExtD()) addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); + if (Subtarget.hasStdExtV()) { + addRegisterClass(RISCVVMVTs::vbool64_t, &RISCV::VRRegClass); + addRegisterClass(RISCVVMVTs::vbool32_t, &RISCV::VRRegClass); + addRegisterClass(RISCVVMVTs::vbool16_t, &RISCV::VRRegClass); + addRegisterClass(RISCVVMVTs::vbool8_t, &RISCV::VRRegClass); + addRegisterClass(RISCVVMVTs::vbool4_t, &RISCV::VRRegClass); + addRegisterClass(RISCVVMVTs::vbool2_t, &RISCV::VRRegClass); + addRegisterClass(RISCVVMVTs::vbool1_t, &RISCV::VRRegClass); + + addRegisterClass(RISCVVMVTs::vint8mf8_t, &RISCV::VRRegClass); + addRegisterClass(RISCVVMVTs::vint8mf4_t, &RISCV::VRRegClass); + addRegisterClass(RISCVVMVTs::vint8mf2_t, &RISCV::VRRegClass); + addRegisterClass(RISCVVMVTs::vint8m1_t, &RISCV::VRRegClass); + addRegisterClass(RISCVVMVTs::vint8m2_t, &RISCV::VRM2RegClass); + addRegisterClass(RISCVVMVTs::vint8m4_t, &RISCV::VRM4RegClass); + addRegisterClass(RISCVVMVTs::vint8m8_t, &RISCV::VRM8RegClass); + + addRegisterClass(RISCVVMVTs::vint16mf4_t, &RISCV::VRRegClass); + addRegisterClass(RISCVVMVTs::vint16mf2_t, &RISCV::VRRegClass); + addRegisterClass(RISCVVMVTs::vint16m1_t, &RISCV::VRRegClass); + addRegisterClass(RISCVVMVTs::vint16m2_t, &RISCV::VRM2RegClass); + addRegisterClass(RISCVVMVTs::vint16m4_t, &RISCV::VRM4RegClass); + addRegisterClass(RISCVVMVTs::vint16m8_t, &RISCV::VRM8RegClass); + + addRegisterClass(RISCVVMVTs::vint32mf2_t, &RISCV::VRRegClass); + addRegisterClass(RISCVVMVTs::vint32m1_t, &RISCV::VRRegClass); + addRegisterClass(RISCVVMVTs::vint32m2_t, &RISCV::VRM2RegClass); + addRegisterClass(RISCVVMVTs::vint32m4_t, &RISCV::VRM4RegClass); + addRegisterClass(RISCVVMVTs::vint32m8_t, &RISCV::VRM8RegClass); + + addRegisterClass(RISCVVMVTs::vint64m1_t, &RISCV::VRRegClass); + addRegisterClass(RISCVVMVTs::vint64m2_t, &RISCV::VRM2RegClass); + addRegisterClass(RISCVVMVTs::vint64m4_t, &RISCV::VRM4RegClass); + addRegisterClass(RISCVVMVTs::vint64m8_t, &RISCV::VRM8RegClass); + + if (Subtarget.hasStdExtZfh()) { + addRegisterClass(RISCVVMVTs::vfloat16mf4_t, &RISCV::VRRegClass); + addRegisterClass(RISCVVMVTs::vfloat16mf2_t, &RISCV::VRRegClass); + addRegisterClass(RISCVVMVTs::vfloat16m1_t, &RISCV::VRRegClass); + addRegisterClass(RISCVVMVTs::vfloat16m2_t, &RISCV::VRM2RegClass); + addRegisterClass(RISCVVMVTs::vfloat16m4_t, &RISCV::VRM4RegClass); + addRegisterClass(RISCVVMVTs::vfloat16m8_t, &RISCV::VRM8RegClass); + } + + if (Subtarget.hasStdExtF()) { + addRegisterClass(RISCVVMVTs::vfloat32mf2_t, &RISCV::VRRegClass); + addRegisterClass(RISCVVMVTs::vfloat32m1_t, &RISCV::VRRegClass); + addRegisterClass(RISCVVMVTs::vfloat32m2_t, &RISCV::VRM2RegClass); + addRegisterClass(RISCVVMVTs::vfloat32m4_t, &RISCV::VRM4RegClass); + addRegisterClass(RISCVVMVTs::vfloat32m8_t, &RISCV::VRM8RegClass); + } + + if (Subtarget.hasStdExtD()) { + addRegisterClass(RISCVVMVTs::vfloat64m1_t, &RISCV::VRRegClass); + addRegisterClass(RISCVVMVTs::vfloat64m2_t, &RISCV::VRM2RegClass); + addRegisterClass(RISCVVMVTs::vfloat64m4_t, &RISCV::VRM4RegClass); + addRegisterClass(RISCVVMVTs::vfloat64m8_t, &RISCV::VRM8RegClass); + } + } + // Compute derived properties from the register classes. computeRegisterProperties(STI.getRegisterInfo()); @@ -101,7 +163,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::BR_JT, MVT::Other, Expand); setOperationAction(ISD::BR_CC, XLenVT, Expand); - setOperationAction(ISD::SELECT, XLenVT, Custom); setOperationAction(ISD::SELECT_CC, XLenVT, Expand); setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); @@ -112,8 +173,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::VACOPY, MVT::Other, Expand); setOperationAction(ISD::VAEND, MVT::Other, Expand); - for (auto VT : {MVT::i1, MVT::i8, MVT::i16}) - setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + if (!Subtarget.hasStdExtZbb()) { + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); + } if (Subtarget.is64Bit()) { setOperationAction(ISD::ADD, MVT::i32, Custom); @@ -135,6 +199,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) { setOperationAction(ISD::MUL, MVT::i32, Custom); + + setOperationAction(ISD::SDIV, MVT::i8, Custom); + setOperationAction(ISD::UDIV, MVT::i8, Custom); + setOperationAction(ISD::UREM, MVT::i8, Custom); + setOperationAction(ISD::SDIV, MVT::i16, Custom); + setOperationAction(ISD::UDIV, MVT::i16, Custom); + setOperationAction(ISD::UREM, MVT::i16, Custom); setOperationAction(ISD::SDIV, MVT::i32, Custom); setOperationAction(ISD::UDIV, MVT::i32, Custom); setOperationAction(ISD::UREM, MVT::i32, Custom); @@ -149,46 +220,90 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SRL_PARTS, XLenVT, Custom); setOperationAction(ISD::SRA_PARTS, XLenVT, Custom); - if (!(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp())) { + if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) { + if (Subtarget.is64Bit()) { + setOperationAction(ISD::ROTL, MVT::i32, Custom); + setOperationAction(ISD::ROTR, MVT::i32, Custom); + } + } else { setOperationAction(ISD::ROTL, XLenVT, Expand); setOperationAction(ISD::ROTR, XLenVT, Expand); } - if (!Subtarget.hasStdExtZbp()) - setOperationAction(ISD::BSWAP, XLenVT, Expand); + if (Subtarget.hasStdExtZbp()) { + // Custom lower bswap/bitreverse so we can convert them to GREVI to enable + // more combining. + setOperationAction(ISD::BITREVERSE, XLenVT, Custom); + setOperationAction(ISD::BSWAP, XLenVT, Custom); - if (!Subtarget.hasStdExtZbb()) { + if (Subtarget.is64Bit()) { + setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); + setOperationAction(ISD::BSWAP, MVT::i32, Custom); + } + } else { + // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll + // pattern match it directly in isel. + setOperationAction(ISD::BSWAP, XLenVT, + Subtarget.hasStdExtZbb() ? Legal : Expand); + } + + if (Subtarget.hasStdExtZbb()) { + setOperationAction(ISD::SMIN, XLenVT, Legal); + setOperationAction(ISD::SMAX, XLenVT, Legal); + setOperationAction(ISD::UMIN, XLenVT, Legal); + setOperationAction(ISD::UMAX, XLenVT, Legal); + } else { setOperationAction(ISD::CTTZ, XLenVT, Expand); setOperationAction(ISD::CTLZ, XLenVT, Expand); setOperationAction(ISD::CTPOP, XLenVT, Expand); } - if (Subtarget.hasStdExtZbp()) - setOperationAction(ISD::BITREVERSE, XLenVT, Legal); - if (Subtarget.hasStdExtZbt()) { setOperationAction(ISD::FSHL, XLenVT, Legal); setOperationAction(ISD::FSHR, XLenVT, Legal); + setOperationAction(ISD::SELECT, XLenVT, Legal); + + if (Subtarget.is64Bit()) { + setOperationAction(ISD::FSHL, MVT::i32, Custom); + setOperationAction(ISD::FSHR, MVT::i32, Custom); + } + } else { + setOperationAction(ISD::SELECT, XLenVT, Custom); } - ISD::CondCode FPCCToExtend[] = { + ISD::CondCode FPCCToExpand[] = { ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, - ISD::SETGE, ISD::SETNE}; + ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO}; - ISD::NodeType FPOpToExtend[] = { + ISD::NodeType FPOpToExpand[] = { ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP, ISD::FP_TO_FP16}; + if (Subtarget.hasStdExtZfh()) + setOperationAction(ISD::BITCAST, MVT::i16, Custom); + + if (Subtarget.hasStdExtZfh()) { + setOperationAction(ISD::FMINNUM, MVT::f16, Legal); + setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); + for (auto CC : FPCCToExpand) + setCondCodeAction(CC, MVT::f16, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); + setOperationAction(ISD::SELECT, MVT::f16, Custom); + setOperationAction(ISD::BR_CC, MVT::f16, Expand); + for (auto Op : FPOpToExpand) + setOperationAction(Op, MVT::f16, Expand); + } + if (Subtarget.hasStdExtF()) { setOperationAction(ISD::FMINNUM, MVT::f32, Legal); setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); - for (auto CC : FPCCToExtend) + for (auto CC : FPCCToExpand) setCondCodeAction(CC, MVT::f32, Expand); setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); setOperationAction(ISD::SELECT, MVT::f32, Custom); setOperationAction(ISD::BR_CC, MVT::f32, Expand); - for (auto Op : FPOpToExtend) + for (auto Op : FPOpToExpand) setOperationAction(Op, MVT::f32, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); setTruncStoreAction(MVT::f32, MVT::f16, Expand); @@ -200,21 +315,20 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, if (Subtarget.hasStdExtD()) { setOperationAction(ISD::FMINNUM, MVT::f64, Legal); setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); - for (auto CC : FPCCToExtend) + for (auto CC : FPCCToExpand) setCondCodeAction(CC, MVT::f64, Expand); setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); setOperationAction(ISD::SELECT, MVT::f64, Custom); setOperationAction(ISD::BR_CC, MVT::f64, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); setTruncStoreAction(MVT::f64, MVT::f32, Expand); - for (auto Op : FPOpToExtend) + for (auto Op : FPOpToExpand) setOperationAction(Op, MVT::f64, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); setTruncStoreAction(MVT::f64, MVT::f16, Expand); } - if (Subtarget.is64Bit() && - !(Subtarget.hasStdExtD() || Subtarget.hasStdExtF())) { + if (Subtarget.is64Bit()) { setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); @@ -224,6 +338,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::GlobalAddress, XLenVT, Custom); setOperationAction(ISD::BlockAddress, XLenVT, Custom); setOperationAction(ISD::ConstantPool, XLenVT, Custom); + setOperationAction(ISD::JumpTable, XLenVT, Custom); setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom); @@ -245,25 +360,133 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setBooleanContents(ZeroOrOneBooleanContent); + if (Subtarget.hasStdExtV()) { + setBooleanVectorContents(ZeroOrOneBooleanContent); + + setOperationAction(ISD::VSCALE, XLenVT, Custom); + + // RVV intrinsics may have illegal operands. + // We also need to custom legalize vmv.x.s. + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); + + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + + if (Subtarget.is64Bit()) { + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); + } + + for (auto VT : MVT::integer_scalable_vector_valuetypes()) { + setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); + + setOperationAction(ISD::SMIN, VT, Legal); + setOperationAction(ISD::SMAX, VT, Legal); + setOperationAction(ISD::UMIN, VT, Legal); + setOperationAction(ISD::UMAX, VT, Legal); + + setOperationAction(ISD::ROTL, VT, Expand); + setOperationAction(ISD::ROTR, VT, Expand); + + if (isTypeLegal(VT)) { + // Custom-lower extensions and truncations from/to mask types. + setOperationAction(ISD::ANY_EXTEND, VT, Custom); + setOperationAction(ISD::SIGN_EXTEND, VT, Custom); + setOperationAction(ISD::ZERO_EXTEND, VT, Custom); + + // We custom-lower all legally-typed vector truncates: + // 1. Mask VTs are custom-expanded into a series of standard nodes + // 2. Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR" + // nodes which truncate by one power of two at a time. + setOperationAction(ISD::TRUNCATE, VT, Custom); + + // Custom-lower insert/extract operations to simplify patterns. + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + } + } + + // We must custom-lower certain vXi64 operations on RV32 due to the vector + // element type being illegal. + if (!Subtarget.is64Bit()) { + setOperationAction(ISD::SPLAT_VECTOR, MVT::i64, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom); + } + + // Expand various CCs to best match the RVV ISA, which natively supports UNE + // but no other unordered comparisons, and supports all ordered comparisons + // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization + // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE), + // and we pattern-match those back to the "original", swapping operands once + // more. This way we catch both operations and both "vf" and "fv" forms with + // fewer patterns. + ISD::CondCode VFPCCToExpand[] = { + ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, + ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO, + ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE, + }; + + // Sets common operation actions on RVV floating-point vector types. + const auto SetCommonVFPActions = [&](MVT VT) { + setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); + // Custom-lower insert/extract operations to simplify patterns. + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + for (auto CC : VFPCCToExpand) + setCondCodeAction(CC, VT, Expand); + }; + + if (Subtarget.hasStdExtZfh()) { + for (auto VT : {RISCVVMVTs::vfloat16mf4_t, RISCVVMVTs::vfloat16mf2_t, + RISCVVMVTs::vfloat16m1_t, RISCVVMVTs::vfloat16m2_t, + RISCVVMVTs::vfloat16m4_t, RISCVVMVTs::vfloat16m8_t}) + SetCommonVFPActions(VT); + } + + if (Subtarget.hasStdExtF()) { + for (auto VT : {RISCVVMVTs::vfloat32mf2_t, RISCVVMVTs::vfloat32m1_t, + RISCVVMVTs::vfloat32m2_t, RISCVVMVTs::vfloat32m4_t, + RISCVVMVTs::vfloat32m8_t}) + SetCommonVFPActions(VT); + } + + if (Subtarget.hasStdExtD()) { + for (auto VT : {RISCVVMVTs::vfloat64m1_t, RISCVVMVTs::vfloat64m2_t, + RISCVVMVTs::vfloat64m4_t, RISCVVMVTs::vfloat64m8_t}) + SetCommonVFPActions(VT); + } + } + // Function alignments. const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4); setMinFunctionAlignment(FunctionAlignment); setPrefFunctionAlignment(FunctionAlignment); - // Effectively disable jump table generation. - setMinimumJumpTableEntries(INT_MAX); + setMinimumJumpTableEntries(5); // Jumps are expensive, compared to logic setJumpIsExpensive(); // We can use any register for comparisons setHasMultipleConditionRegisters(); + + setTargetDAGCombine(ISD::SETCC); + if (Subtarget.hasStdExtZbp()) { + setTargetDAGCombine(ISD::OR); + } } EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const { if (!VT.isVector()) return getPointerTy(DL); + if (Subtarget.hasStdExtV()) + return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); return VT.changeVectorElementTypeToInteger(); } @@ -367,8 +590,18 @@ bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; } +bool RISCVTargetLowering::isCheapToSpeculateCttz() const { + return Subtarget.hasStdExtZbb(); +} + +bool RISCVTargetLowering::isCheapToSpeculateCtlz() const { + return Subtarget.hasStdExtZbb(); +} + bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const { + if (VT == MVT::f16 && !Subtarget.hasStdExtZfh()) + return false; if (VT == MVT::f32 && !Subtarget.hasStdExtF()) return false; if (VT == MVT::f64 && !Subtarget.hasStdExtD()) @@ -379,7 +612,8 @@ bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, } bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const { - return (VT == MVT::f32 && Subtarget.hasStdExtF()) || + return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) || + (VT == MVT::f32 && Subtarget.hasStdExtF()) || (VT == MVT::f64 && Subtarget.hasStdExtD()); } @@ -433,6 +667,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return lowerBlockAddress(Op, DAG); case ISD::ConstantPool: return lowerConstantPool(Op, DAG); + case ISD::JumpTable: + return lowerJumpTable(Op, DAG); case ISD::GlobalTLSAddress: return lowerGlobalTLSAddress(Op, DAG); case ISD::SELECT: @@ -450,18 +686,105 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::SRL_PARTS: return lowerShiftRightParts(Op, DAG, false); case ISD::BITCAST: { - assert(Subtarget.is64Bit() && Subtarget.hasStdExtF() && + assert(((Subtarget.is64Bit() && Subtarget.hasStdExtF()) || + Subtarget.hasStdExtZfh()) && "Unexpected custom legalisation"); SDLoc DL(Op); SDValue Op0 = Op.getOperand(0); - if (Op.getValueType() != MVT::f32 || Op0.getValueType() != MVT::i32) - return SDValue(); - SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); - SDValue FPConv = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); - return FPConv; + if (Op.getValueType() == MVT::f16 && Subtarget.hasStdExtZfh()) { + if (Op0.getValueType() != MVT::i16) + return SDValue(); + SDValue NewOp0 = + DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Op0); + SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0); + return FPConv; + } else if (Op.getValueType() == MVT::f32 && Subtarget.is64Bit() && + Subtarget.hasStdExtF()) { + if (Op0.getValueType() != MVT::i32) + return SDValue(); + SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); + SDValue FPConv = + DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); + return FPConv; + } + return SDValue(); } case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::INTRINSIC_W_CHAIN: + return LowerINTRINSIC_W_CHAIN(Op, DAG); + case ISD::BSWAP: + case ISD::BITREVERSE: { + // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining. + assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); + MVT VT = Op.getSimpleValueType(); + SDLoc DL(Op); + // Start with the maximum immediate value which is the bitwidth - 1. + unsigned Imm = VT.getSizeInBits() - 1; + // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits. + if (Op.getOpcode() == ISD::BSWAP) + Imm &= ~0x7U; + return DAG.getNode(RISCVISD::GREVI, DL, VT, Op.getOperand(0), + DAG.getTargetConstant(Imm, DL, Subtarget.getXLenVT())); + } + case ISD::TRUNCATE: { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + // Only custom-lower vector truncates + if (!VT.isVector()) + return Op; + + // Truncates to mask types are handled differently + if (VT.getVectorElementType() == MVT::i1) + return lowerVectorMaskTrunc(Op, DAG); + + // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary + // truncates as a series of "RISCVISD::TRUNCATE_VECTOR" nodes which + // truncate by one power of two at a time. + EVT DstEltVT = VT.getVectorElementType(); + + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + EVT SrcEltVT = SrcVT.getVectorElementType(); + + assert(DstEltVT.bitsLT(SrcEltVT) && + isPowerOf2_64(DstEltVT.getSizeInBits()) && + isPowerOf2_64(SrcEltVT.getSizeInBits()) && + "Unexpected vector truncate lowering"); + + SDValue Result = Src; + LLVMContext &Context = *DAG.getContext(); + const ElementCount Count = SrcVT.getVectorElementCount(); + do { + SrcEltVT = EVT::getIntegerVT(Context, SrcEltVT.getSizeInBits() / 2); + EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count); + Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR, DL, ResultVT, Result); + } while (SrcEltVT != DstEltVT); + + return Result; + } + case ISD::ANY_EXTEND: + case ISD::ZERO_EXTEND: + return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1); + case ISD::SIGN_EXTEND: + return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1); + case ISD::SPLAT_VECTOR: + return lowerSPLATVECTOR(Op, DAG); + case ISD::INSERT_VECTOR_ELT: + return lowerINSERT_VECTOR_ELT(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: + return lowerEXTRACT_VECTOR_ELT(Op, DAG); + case ISD::VSCALE: { + MVT VT = Op.getSimpleValueType(); + SDLoc DL(Op); + SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT); + // We define our scalable vector types for lmul=1 to use a 64 bit known + // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate + // vscale as VLENB / 8. + SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB, + DAG.getConstant(3, DL, VT)); + return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0)); + } } } @@ -482,6 +805,11 @@ static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty, N->getOffset(), Flags); } +static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty, + SelectionDAG &DAG, unsigned Flags) { + return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags); +} + template <class NodeTy> SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal) const { @@ -559,6 +887,13 @@ SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, return getAddr(N, DAG); } +SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op, + SelectionDAG &DAG) const { + JumpTableSDNode *N = cast<JumpTableSDNode>(Op); + + return getAddr(N, DAG); +} + SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG, bool UseGOT) const { @@ -642,6 +977,10 @@ SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal()); + if (DAG.getMachineFunction().getFunction().getCallingConv() == + CallingConv::GHC) + report_fatal_error("In GHC calling convention TLS is not supported"); + SDValue Addr; switch (Model) { case TLSModel::LocalExec: @@ -689,9 +1028,8 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { normaliseSetCC(LHS, RHS, CCVal); SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT); - SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; - return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops); + return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); } // Otherwise: @@ -700,10 +1038,9 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Zero = DAG.getConstant(0, DL, XLenVT); SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT); - SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; - return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops); + return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); } SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { @@ -865,10 +1202,226 @@ SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, return DAG.getMergeValues(Parts, DL); } +// Custom-lower a SPLAT_VECTOR where XLEN<SEW, as the SEW element type is +// illegal (currently only vXi64 RV32). +// FIXME: We could also catch non-constant sign-extended i32 values and lower +// them to SPLAT_VECTOR_I64 +SDValue RISCVTargetLowering::lowerSPLATVECTOR(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT VecVT = Op.getValueType(); + assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 && + "Unexpected SPLAT_VECTOR lowering"); + SDValue SplatVal = Op.getOperand(0); + + // If we can prove that the value is a sign-extended 32-bit value, lower this + // as a custom node in order to try and match RVV vector/scalar instructions. + if (auto *CVal = dyn_cast<ConstantSDNode>(SplatVal)) { + if (isInt<32>(CVal->getSExtValue())) + return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, + DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32)); + } + + if (SplatVal.getOpcode() == ISD::SIGN_EXTEND && + SplatVal.getOperand(0).getValueType() == MVT::i32) { + return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, + SplatVal.getOperand(0)); + } + + // Else, on RV32 we lower an i64-element SPLAT_VECTOR thus, being careful not + // to accidentally sign-extend the 32-bit halves to the e64 SEW: + // vmv.v.x vX, hi + // vsll.vx vX, vX, /*32*/ + // vmv.v.x vY, lo + // vsll.vx vY, vY, /*32*/ + // vsrl.vx vY, vY, /*32*/ + // vor.vv vX, vX, vY + SDValue One = DAG.getConstant(1, DL, MVT::i32); + SDValue Zero = DAG.getConstant(0, DL, MVT::i32); + SDValue ThirtyTwoV = DAG.getConstant(32, DL, VecVT); + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, Zero); + SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, One); + + Lo = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo); + Lo = DAG.getNode(ISD::SHL, DL, VecVT, Lo, ThirtyTwoV); + Lo = DAG.getNode(ISD::SRL, DL, VecVT, Lo, ThirtyTwoV); + + if (isNullConstant(Hi)) + return Lo; + + Hi = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Hi); + Hi = DAG.getNode(ISD::SHL, DL, VecVT, Hi, ThirtyTwoV); + + return DAG.getNode(ISD::OR, DL, VecVT, Lo, Hi); +} + +// Custom-lower extensions from mask vectors by using a vselect either with 1 +// for zero/any-extension or -1 for sign-extension: +// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0) +// Note that any-extension is lowered identically to zero-extension. +SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, + int64_t ExtTrueVal) const { + SDLoc DL(Op); + EVT VecVT = Op.getValueType(); + SDValue Src = Op.getOperand(0); + // Only custom-lower extensions from mask types + if (!Src.getValueType().isVector() || + Src.getValueType().getVectorElementType() != MVT::i1) + return Op; + + // Be careful not to introduce illegal scalar types at this stage, and be + // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is + // illegal and must be expanded. Since we know that the constants are + // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly. + bool IsRV32E64 = + !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64; + SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); + SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, Subtarget.getXLenVT()); + + if (!IsRV32E64) { + SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero); + SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal); + } else { + SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero); + SplatTrueVal = + DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal); + } + + return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero); +} + +// Custom-lower truncations from vectors to mask vectors by using a mask and a +// setcc operation: +// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne) +SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT MaskVT = Op.getValueType(); + // Only expect to custom-lower truncations to mask types + assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 && + "Unexpected type for vector mask lowering"); + SDValue Src = Op.getOperand(0); + EVT VecVT = Src.getValueType(); + + // Be careful not to introduce illegal scalar types at this stage, and be + // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is + // illegal and must be expanded. Since we know that the constants are + // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly. + bool IsRV32E64 = + !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64; + SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT()); + SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); + + if (!IsRV32E64) { + SplatOne = DAG.getSplatVector(VecVT, DL, SplatOne); + SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero); + } else { + SplatOne = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatOne); + SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero); + } + + SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne); + + return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE); +} + +SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT VecVT = Op.getValueType(); + SDValue Vec = Op.getOperand(0); + SDValue Val = Op.getOperand(1); + SDValue Idx = Op.getOperand(2); + + // Custom-legalize INSERT_VECTOR_ELT where XLEN>=SEW, so that the vector is + // first slid down into position, the value is inserted into the first + // position, and the vector is slid back up. We do this to simplify patterns. + // (slideup vec, (insertelt (slidedown impdef, vec, idx), val, 0), idx), + if (Subtarget.is64Bit() || VecVT.getVectorElementType() != MVT::i64) { + if (isNullConstant(Idx)) + return Op; + SDValue Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT, + DAG.getUNDEF(VecVT), Vec, Idx); + SDValue InsertElt0 = + DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecVT, Slidedown, Val, + DAG.getConstant(0, DL, Subtarget.getXLenVT())); + + return DAG.getNode(RISCVISD::VSLIDEUP, DL, VecVT, Vec, InsertElt0, Idx); + } + + // Custom-legalize INSERT_VECTOR_ELT where XLEN<SEW, as the SEW element type + // is illegal (currently only vXi64 RV32). + // Since there is no easy way of getting a single element into a vector when + // XLEN<SEW, we lower the operation to the following sequence: + // splat vVal, rVal + // vid.v vVid + // vmseq.vx mMask, vVid, rIdx + // vmerge.vvm vDest, vSrc, vVal, mMask + // This essentially merges the original vector with the inserted element by + // using a mask whose only set bit is that corresponding to the insert + // index. + SDValue SplattedVal = DAG.getSplatVector(VecVT, DL, Val); + SDValue SplattedIdx = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Idx); + + SDValue VID = DAG.getNode(RISCVISD::VID, DL, VecVT); + auto SetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VecVT); + SDValue Mask = DAG.getSetCC(DL, SetCCVT, VID, SplattedIdx, ISD::SETEQ); + + return DAG.getNode(ISD::VSELECT, DL, VecVT, Mask, SplattedVal, Vec); +} + +// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then +// extract the first element: (extractelt (slidedown vec, idx), 0). This is +// done to maintain partity with the legalization of RV32 vXi64 legalization. +SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue Idx = Op.getOperand(1); + if (isNullConstant(Idx)) + return Op; + + SDValue Vec = Op.getOperand(0); + EVT EltVT = Op.getValueType(); + EVT VecVT = Vec.getValueType(); + SDValue Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT, + DAG.getUNDEF(VecVT), Vec, Idx); + + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Slidedown, + DAG.getConstant(0, DL, Subtarget.getXLenVT())); +} + SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); SDLoc DL(Op); + + if (Subtarget.hasStdExtV()) { + // Some RVV intrinsics may claim that they want an integer operand to be + // extended. + if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = + RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) { + if (II->ExtendedOperand) { + assert(II->ExtendedOperand < Op.getNumOperands()); + SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); + SDValue &ScalarOp = Operands[II->ExtendedOperand]; + EVT OpVT = ScalarOp.getValueType(); + if (OpVT == MVT::i8 || OpVT == MVT::i16 || + (OpVT == MVT::i32 && Subtarget.is64Bit())) { + // If the operand is a constant, sign extend to increase our chances + // of being able to use a .vi instruction. ANY_EXTEND would become a + // a zero extend and the simm5 check in isel would fail. + // FIXME: Should we ignore the upper bits in isel instead? + unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND + : ISD::ANY_EXTEND; + ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp); + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(), + Operands); + } + } + } + } + switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. @@ -876,6 +1429,151 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, EVT PtrVT = getPointerTy(DAG.getDataLayout()); return DAG.getRegister(RISCV::X4, PtrVT); } + case Intrinsic::riscv_vmv_x_s: + assert(Op.getValueType() == Subtarget.getXLenVT() && "Unexpected VT!"); + return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(), + Op.getOperand(1)); + } +} + +SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + SDLoc DL(Op); + + if (Subtarget.hasStdExtV()) { + // Some RVV intrinsics may claim that they want an integer operand to be + // extended. + if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = + RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) { + if (II->ExtendedOperand) { + // The operands start from the second argument in INTRINSIC_W_CHAIN. + unsigned ExtendOp = II->ExtendedOperand + 1; + assert(ExtendOp < Op.getNumOperands()); + SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); + SDValue &ScalarOp = Operands[ExtendOp]; + EVT OpVT = ScalarOp.getValueType(); + if (OpVT == MVT::i8 || OpVT == MVT::i16 || + (OpVT == MVT::i32 && Subtarget.is64Bit())) { + // If the operand is a constant, sign extend to increase our chances + // of being able to use a .vi instruction. ANY_EXTEND would become a + // a zero extend and the simm5 check in isel would fail. + // FIXME: Should we ignore the upper bits in isel instead? + unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND + : ISD::ANY_EXTEND; + ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp); + return DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, Op->getVTList(), + Operands); + } + } + } + } + + unsigned NF = 1; + switch (IntNo) { + default: + return SDValue(); // Don't custom lower most intrinsics. + case Intrinsic::riscv_vleff: { + SDLoc DL(Op); + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other, MVT::Glue); + SDValue Load = DAG.getNode(RISCVISD::VLEFF, DL, VTs, Op.getOperand(0), + Op.getOperand(2), Op.getOperand(3)); + VTs = DAG.getVTList(Op->getValueType(1), MVT::Other); + SDValue ReadVL = DAG.getNode(RISCVISD::READ_VL, DL, VTs, Load.getValue(2)); + return DAG.getMergeValues({Load, ReadVL, Load.getValue(1)}, DL); + } + case Intrinsic::riscv_vleff_mask: { + SDLoc DL(Op); + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other, MVT::Glue); + SDValue Load = DAG.getNode(RISCVISD::VLEFF_MASK, DL, VTs, Op.getOperand(0), + Op.getOperand(2), Op.getOperand(3), + Op.getOperand(4), Op.getOperand(5)); + VTs = DAG.getVTList(Op->getValueType(1), MVT::Other); + SDValue ReadVL = DAG.getNode(RISCVISD::READ_VL, DL, VTs, Load.getValue(2)); + return DAG.getMergeValues({Load, ReadVL, Load.getValue(1)}, DL); + } + case Intrinsic::riscv_vlseg8ff: + NF++; + LLVM_FALLTHROUGH; + case Intrinsic::riscv_vlseg7ff: + NF++; + LLVM_FALLTHROUGH; + case Intrinsic::riscv_vlseg6ff: + NF++; + LLVM_FALLTHROUGH; + case Intrinsic::riscv_vlseg5ff: + NF++; + LLVM_FALLTHROUGH; + case Intrinsic::riscv_vlseg4ff: + NF++; + LLVM_FALLTHROUGH; + case Intrinsic::riscv_vlseg3ff: + NF++; + LLVM_FALLTHROUGH; + case Intrinsic::riscv_vlseg2ff: { + NF++; + SDLoc DL(Op); + SmallVector<EVT, 8> EVTs(NF, Op.getValueType()); + EVTs.push_back(MVT::Other); + EVTs.push_back(MVT::Glue); + SDVTList VTs = DAG.getVTList(EVTs); + SDValue Load = + DAG.getNode(RISCVISD::VLSEGFF, DL, VTs, Op.getOperand(0), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + VTs = DAG.getVTList(Op->getValueType(NF), MVT::Other); + SDValue ReadVL = DAG.getNode(RISCVISD::READ_VL, DL, VTs, + /*Glue*/ Load.getValue(NF + 1)); + SmallVector<SDValue, 8> Results; + for (unsigned i = 0; i < NF; ++i) + Results.push_back(Load.getValue(i)); + Results.push_back(ReadVL); + Results.push_back(Load.getValue(NF)); // Chain. + return DAG.getMergeValues(Results, DL); + } + case Intrinsic::riscv_vlseg8ff_mask: + NF++; + LLVM_FALLTHROUGH; + case Intrinsic::riscv_vlseg7ff_mask: + NF++; + LLVM_FALLTHROUGH; + case Intrinsic::riscv_vlseg6ff_mask: + NF++; + LLVM_FALLTHROUGH; + case Intrinsic::riscv_vlseg5ff_mask: + NF++; + LLVM_FALLTHROUGH; + case Intrinsic::riscv_vlseg4ff_mask: + NF++; + LLVM_FALLTHROUGH; + case Intrinsic::riscv_vlseg3ff_mask: + NF++; + LLVM_FALLTHROUGH; + case Intrinsic::riscv_vlseg2ff_mask: { + NF++; + SDLoc DL(Op); + SmallVector<EVT, 8> EVTs(NF, Op.getValueType()); + EVTs.push_back(MVT::Other); + EVTs.push_back(MVT::Glue); + SDVTList VTs = DAG.getVTList(EVTs); + SmallVector<SDValue, 13> LoadOps; + LoadOps.push_back(Op.getOperand(0)); // Chain. + LoadOps.push_back(Op.getOperand(1)); // Intrinsic ID. + for (unsigned i = 0; i < NF; ++i) + LoadOps.push_back(Op.getOperand(2 + i)); // MaskedOff. + LoadOps.push_back(Op.getOperand(2 + NF)); // Base. + LoadOps.push_back(Op.getOperand(3 + NF)); // Mask. + LoadOps.push_back(Op.getOperand(4 + NF)); // VL. + SDValue Load = DAG.getNode(RISCVISD::VLSEGFF_MASK, DL, VTs, LoadOps); + VTs = DAG.getVTList(Op->getValueType(NF), MVT::Other); + SDValue ReadVL = DAG.getNode(RISCVISD::READ_VL, DL, VTs, + /*Glue*/ Load.getValue(NF + 1)); + SmallVector<SDValue, 8> Results; + for (unsigned i = 0; i < NF; ++i) + Results.push_back(Load.getValue(i)); + Results.push_back(ReadVL); + Results.push_back(Load.getValue(NF)); // Chain. + return DAG.getMergeValues(Results, DL); + } } } @@ -897,6 +1595,14 @@ static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { return RISCVISD::DIVUW; case ISD::UREM: return RISCVISD::REMUW; + case ISD::ROTL: + return RISCVISD::ROLW; + case ISD::ROTR: + return RISCVISD::RORW; + case RISCVISD::GREVI: + return RISCVISD::GREVIW; + case RISCVISD::GORCI: + return RISCVISD::GORCIW; } } @@ -905,14 +1611,15 @@ static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W // later one because the fact the operation was originally of type i32 is // lost. -static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG) { +static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, + unsigned ExtOpc = ISD::ANY_EXTEND) { SDLoc DL(N); RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); - SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); - SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); + SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); + SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); // ReplaceNodeResults requires we maintain the same type for the return value. - return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); + return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); } // Converts the given 32-bit operation to a i64 operation with signed extension @@ -942,6 +1649,13 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && "Unexpected custom legalisation"); SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0); + // If the FP type needs to be softened, emit a library call using the 'si' + // version. If we left it to default legalization we'd end up with 'di'. If + // the FP type doesn't need to be softened just let generic type + // legalization promote the result type. + if (getTypeAction(*DAG.getContext(), Op0.getValueType()) != + TargetLowering::TypeSoftenFloat) + return; RTLIB::Libcall LC; if (N->getOpcode() == ISD::FP_TO_SINT || N->getOpcode() == ISD::STRICT_FP_TO_SINT) @@ -991,31 +1705,377 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, return; Results.push_back(customLegalizeToWOp(N, DAG)); break; + case ISD::ROTL: + case ISD::ROTR: + assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + "Unexpected custom legalisation"); + Results.push_back(customLegalizeToWOp(N, DAG)); + break; case ISD::SDIV: case ISD::UDIV: - case ISD::UREM: - assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && - Subtarget.hasStdExtM() && "Unexpected custom legalisation"); + case ISD::UREM: { + MVT VT = N->getSimpleValueType(0); + assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) && + Subtarget.is64Bit() && Subtarget.hasStdExtM() && + "Unexpected custom legalisation"); if (N->getOperand(0).getOpcode() == ISD::Constant || N->getOperand(1).getOpcode() == ISD::Constant) return; - Results.push_back(customLegalizeToWOp(N, DAG)); + + // If the input is i32, use ANY_EXTEND since the W instructions don't read + // the upper 32 bits. For other types we need to sign or zero extend + // based on the opcode. + unsigned ExtOpc = ISD::ANY_EXTEND; + if (VT != MVT::i32) + ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND + : ISD::ZERO_EXTEND; + + Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc)); break; + } case ISD::BITCAST: { + assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + Subtarget.hasStdExtF()) || + (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh())) && + "Unexpected custom legalisation"); + SDValue Op0 = N->getOperand(0); + if (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh()) { + if (Op0.getValueType() != MVT::f16) + return; + SDValue FPConv = + DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Op0); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv)); + } else if (N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + Subtarget.hasStdExtF()) { + if (Op0.getValueType() != MVT::f32) + return; + SDValue FPConv = + DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); + } + break; + } + case RISCVISD::GREVI: + case RISCVISD::GORCI: { assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && - Subtarget.hasStdExtF() && "Unexpected custom legalisation"); + "Unexpected custom legalisation"); + // This is similar to customLegalizeToWOp, except that we pass the second + // operand (a TargetConstant) straight through: it is already of type + // XLenVT. SDLoc DL(N); - SDValue Op0 = N->getOperand(0); - if (Op0.getValueType() != MVT::f32) - return; - SDValue FPConv = - DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); - Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); + RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); + SDValue NewOp0 = + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); + SDValue NewRes = + DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, N->getOperand(1)); + // ReplaceNodeResults requires we maintain the same type for the return + // value. + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); + break; + } + case ISD::BSWAP: + case ISD::BITREVERSE: { + assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); + SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, + N->getOperand(0)); + unsigned Imm = N->getOpcode() == ISD::BITREVERSE ? 31 : 24; + SDValue GREVIW = DAG.getNode(RISCVISD::GREVIW, DL, MVT::i64, NewOp0, + DAG.getTargetConstant(Imm, DL, + Subtarget.getXLenVT())); + // ReplaceNodeResults requires we maintain the same type for the return + // value. + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, GREVIW)); + break; + } + case ISD::FSHL: + case ISD::FSHR: { + assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + Subtarget.hasStdExtZbt() && "Unexpected custom legalisation"); + SDValue NewOp0 = + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); + SDValue NewOp1 = + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); + SDValue NewOp2 = + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); + // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits. + // Mask the shift amount to 5 bits. + NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2, + DAG.getConstant(0x1f, DL, MVT::i64)); + unsigned Opc = + N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW; + SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp)); + break; + } + case ISD::EXTRACT_VECTOR_ELT: { + // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element + // type is illegal (currently only vXi64 RV32). + // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are + // transferred to the destination register. We issue two of these from the + // upper- and lower- halves of the SEW-bit vector element, slid down to the + // first element. + SDLoc DL(N); + SDValue Vec = N->getOperand(0); + SDValue Idx = N->getOperand(1); + EVT VecVT = Vec.getValueType(); + assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 && + VecVT.getVectorElementType() == MVT::i64 && + "Unexpected EXTRACT_VECTOR_ELT legalization"); + + SDValue Slidedown = Vec; + // Unless the index is known to be 0, we must slide the vector down to get + // the desired element into index 0. + if (!isNullConstant(Idx)) + Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT, + DAG.getUNDEF(VecVT), Vec, Idx); + + MVT XLenVT = Subtarget.getXLenVT(); + // Extract the lower XLEN bits of the correct vector element. + SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Slidedown, Idx); + + // To extract the upper XLEN bits of the vector element, shift the first + // element right by 32 bits and re-extract the lower XLEN bits. + SDValue ThirtyTwoV = + DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, + DAG.getConstant(32, DL, Subtarget.getXLenVT())); + SDValue LShr32 = DAG.getNode(ISD::SRL, DL, VecVT, Slidedown, ThirtyTwoV); + + SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32, Idx); + + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); + break; + } + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + switch (IntNo) { + default: + llvm_unreachable( + "Don't know how to custom type legalize this intrinsic!"); + case Intrinsic::riscv_vmv_x_s: { + EVT VT = N->getValueType(0); + assert((VT == MVT::i8 || VT == MVT::i16 || + (Subtarget.is64Bit() && VT == MVT::i32)) && + "Unexpected custom legalisation!"); + SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL, + Subtarget.getXLenVT(), N->getOperand(1)); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract)); + break; + } + } break; } } } +// A structure to hold one of the bit-manipulation patterns below. Together, a +// SHL and non-SHL pattern may form a bit-manipulation pair on a single source: +// (or (and (shl x, 1), 0xAAAAAAAA), +// (and (srl x, 1), 0x55555555)) +struct RISCVBitmanipPat { + SDValue Op; + unsigned ShAmt; + bool IsSHL; + + bool formsPairWith(const RISCVBitmanipPat &Other) const { + return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL; + } +}; + +// Matches any of the following bit-manipulation patterns: +// (and (shl x, 1), (0x55555555 << 1)) +// (and (srl x, 1), 0x55555555) +// (shl (and x, 0x55555555), 1) +// (srl (and x, (0x55555555 << 1)), 1) +// where the shift amount and mask may vary thus: +// [1] = 0x55555555 / 0xAAAAAAAA +// [2] = 0x33333333 / 0xCCCCCCCC +// [4] = 0x0F0F0F0F / 0xF0F0F0F0 +// [8] = 0x00FF00FF / 0xFF00FF00 +// [16] = 0x0000FFFF / 0xFFFFFFFF +// [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64) +static Optional<RISCVBitmanipPat> matchRISCVBitmanipPat(SDValue Op) { + Optional<uint64_t> Mask; + // Optionally consume a mask around the shift operation. + if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) { + Mask = Op.getConstantOperandVal(1); + Op = Op.getOperand(0); + } + if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL) + return None; + bool IsSHL = Op.getOpcode() == ISD::SHL; + + if (!isa<ConstantSDNode>(Op.getOperand(1))) + return None; + auto ShAmt = Op.getConstantOperandVal(1); + + if (!isPowerOf2_64(ShAmt)) + return None; + + // These are the unshifted masks which we use to match bit-manipulation + // patterns. They may be shifted left in certain circumstances. + static const uint64_t BitmanipMasks[] = { + 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL, + 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL, + }; + + unsigned MaskIdx = Log2_64(ShAmt); + if (MaskIdx >= array_lengthof(BitmanipMasks)) + return None; + + auto Src = Op.getOperand(0); + + unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32; + auto ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width); + + // The expected mask is shifted left when the AND is found around SHL + // patterns. + // ((x >> 1) & 0x55555555) + // ((x << 1) & 0xAAAAAAAA) + bool SHLExpMask = IsSHL; + + if (!Mask) { + // Sometimes LLVM keeps the mask as an operand of the shift, typically when + // the mask is all ones: consume that now. + if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) { + Mask = Src.getConstantOperandVal(1); + Src = Src.getOperand(0); + // The expected mask is now in fact shifted left for SRL, so reverse the + // decision. + // ((x & 0xAAAAAAAA) >> 1) + // ((x & 0x55555555) << 1) + SHLExpMask = !SHLExpMask; + } else { + // Use a default shifted mask of all-ones if there's no AND, truncated + // down to the expected width. This simplifies the logic later on. + Mask = maskTrailingOnes<uint64_t>(Width); + *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt); + } + } + + if (SHLExpMask) + ExpMask <<= ShAmt; + + if (Mask != ExpMask) + return None; + + return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL}; +} + +// Match the following pattern as a GREVI(W) operation +// (or (BITMANIP_SHL x), (BITMANIP_SRL x)) +static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + EVT VT = Op.getValueType(); + + if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { + auto LHS = matchRISCVBitmanipPat(Op.getOperand(0)); + auto RHS = matchRISCVBitmanipPat(Op.getOperand(1)); + if (LHS && RHS && LHS->formsPairWith(*RHS)) { + SDLoc DL(Op); + return DAG.getNode( + RISCVISD::GREVI, DL, VT, LHS->Op, + DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT())); + } + } + return SDValue(); +} + +// Matches any the following pattern as a GORCI(W) operation +// 1. (or (GREVI x, shamt), x) if shamt is a power of 2 +// 2. (or x, (GREVI x, shamt)) if shamt is a power of 2 +// 3. (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x)) +// Note that with the variant of 3., +// (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x) +// the inner pattern will first be matched as GREVI and then the outer +// pattern will be matched to GORC via the first rule above. +// 4. (or (rotl/rotr x, bitwidth/2), x) +static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + EVT VT = Op.getValueType(); + + if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { + SDLoc DL(Op); + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + + auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) { + if (Reverse.getOpcode() == RISCVISD::GREVI && Reverse.getOperand(0) == X && + isPowerOf2_32(Reverse.getConstantOperandVal(1))) + return DAG.getNode(RISCVISD::GORCI, DL, VT, X, Reverse.getOperand(1)); + // We can also form GORCI from ROTL/ROTR by half the bitwidth. + if ((Reverse.getOpcode() == ISD::ROTL || + Reverse.getOpcode() == ISD::ROTR) && + Reverse.getOperand(0) == X && + isa<ConstantSDNode>(Reverse.getOperand(1))) { + uint64_t RotAmt = Reverse.getConstantOperandVal(1); + if (RotAmt == (VT.getSizeInBits() / 2)) + return DAG.getNode( + RISCVISD::GORCI, DL, VT, X, + DAG.getTargetConstant(RotAmt, DL, Subtarget.getXLenVT())); + } + return SDValue(); + }; + + // Check for either commutable permutation of (or (GREVI x, shamt), x) + if (SDValue V = MatchOROfReverse(Op0, Op1)) + return V; + if (SDValue V = MatchOROfReverse(Op1, Op0)) + return V; + + // OR is commutable so canonicalize its OR operand to the left + if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR) + std::swap(Op0, Op1); + if (Op0.getOpcode() != ISD::OR) + return SDValue(); + SDValue OrOp0 = Op0.getOperand(0); + SDValue OrOp1 = Op0.getOperand(1); + auto LHS = matchRISCVBitmanipPat(OrOp0); + // OR is commutable so swap the operands and try again: x might have been + // on the left + if (!LHS) { + std::swap(OrOp0, OrOp1); + LHS = matchRISCVBitmanipPat(OrOp0); + } + auto RHS = matchRISCVBitmanipPat(Op1); + if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) { + return DAG.getNode( + RISCVISD::GORCI, DL, VT, LHS->Op, + DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT())); + } + } + return SDValue(); +} + +// Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is +// non-zero, and to x when it is. Any repeated GREVI stage undoes itself. +// Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does +// not undo itself, but they are redundant. +static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) { + unsigned ShAmt1 = N->getConstantOperandVal(1); + SDValue Src = N->getOperand(0); + + if (Src.getOpcode() != N->getOpcode()) + return SDValue(); + + unsigned ShAmt2 = Src.getConstantOperandVal(1); + Src = Src.getOperand(0); + + unsigned CombinedShAmt; + if (N->getOpcode() == RISCVISD::GORCI || N->getOpcode() == RISCVISD::GORCIW) + CombinedShAmt = ShAmt1 | ShAmt2; + else + CombinedShAmt = ShAmt1 ^ ShAmt2; + + if (CombinedShAmt == 0) + return Src; + + SDLoc DL(N); + return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), Src, + DAG.getTargetConstant(CombinedShAmt, DL, + N->getOperand(1).getValueType())); +} + SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -1067,17 +2127,53 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, } case RISCVISD::SLLW: case RISCVISD::SRAW: - case RISCVISD::SRLW: { + case RISCVISD::SRLW: + case RISCVISD::ROLW: + case RISCVISD::RORW: { // Only the lower 32 bits of LHS and lower 5 bits of RHS are read. SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32); APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5); - if ((SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI)) || - (SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI))) - return SDValue(); + if (SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI) || + SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)) { + if (N->getOpcode() != ISD::DELETED_NODE) + DCI.AddToWorklist(N); + return SDValue(N, 0); + } + break; + } + case RISCVISD::FSLW: + case RISCVISD::FSRW: { + // Only the lower 32 bits of Values and lower 6 bits of shift amount are + // read. + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + SDValue ShAmt = N->getOperand(2); + APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32); + APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6); + if (SimplifyDemandedBits(Op0, OpMask, DCI) || + SimplifyDemandedBits(Op1, OpMask, DCI) || + SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) { + if (N->getOpcode() != ISD::DELETED_NODE) + DCI.AddToWorklist(N); + return SDValue(N, 0); + } break; } + case RISCVISD::GREVIW: + case RISCVISD::GORCIW: { + // Only the lower 32 bits of the first operand are read + SDValue Op0 = N->getOperand(0); + APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32); + if (SimplifyDemandedBits(Op0, Mask, DCI)) { + if (N->getOpcode() != ISD::DELETED_NODE) + DCI.AddToWorklist(N); + return SDValue(N, 0); + } + + return combineGREVI_GORCI(N, DCI.DAG); + } case RISCVISD::FMV_X_ANYEXTW_RV64: { SDLoc DL(N); SDValue Op0 = N->getOperand(0); @@ -1085,9 +2181,9 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, // conversion is unnecessary and can be replaced with an ANY_EXTEND // of the FMV_W_X_RV64 operand. if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) { - SDValue AExtOp = - DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0.getOperand(0)); - return DCI.CombineTo(N, AExtOp); + assert(Op0.getOperand(0).getValueType() == MVT::i64 && + "Unexpected value type!"); + return Op0.getOperand(0); } // This is a target-specific version of a DAGCombine performed in @@ -1100,15 +2196,61 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0.getOperand(0)); APInt SignBit = APInt::getSignMask(32).sext(64); - if (Op0.getOpcode() == ISD::FNEG) { - return DCI.CombineTo(N, - DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV, - DAG.getConstant(SignBit, DL, MVT::i64))); - } + if (Op0.getOpcode() == ISD::FNEG) + return DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV, + DAG.getConstant(SignBit, DL, MVT::i64)); + assert(Op0.getOpcode() == ISD::FABS); - return DCI.CombineTo(N, - DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV, - DAG.getConstant(~SignBit, DL, MVT::i64))); + return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV, + DAG.getConstant(~SignBit, DL, MVT::i64)); + } + case RISCVISD::GREVI: + case RISCVISD::GORCI: + return combineGREVI_GORCI(N, DCI.DAG); + case ISD::OR: + if (auto GREV = combineORToGREV(SDValue(N, 0), DCI.DAG, Subtarget)) + return GREV; + if (auto GORC = combineORToGORC(SDValue(N, 0), DCI.DAG, Subtarget)) + return GORC; + break; + case RISCVISD::SELECT_CC: { + // Transform + // (select_cc (xor X, 1), 0, setne, trueV, falseV) -> + // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1. + // This can occur when legalizing some floating point comparisons. + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + auto CCVal = static_cast<ISD::CondCode>(N->getConstantOperandVal(2)); + APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); + if (ISD::isIntEqualitySetCC(CCVal) && isNullConstant(RHS) && + LHS.getOpcode() == ISD::XOR && isOneConstant(LHS.getOperand(1)) && + DAG.MaskedValueIsZero(LHS.getOperand(0), Mask)) { + SDLoc DL(N); + CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); + SDValue TargetCC = DAG.getConstant(CCVal, DL, Subtarget.getXLenVT()); + return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0), + {LHS.getOperand(0), RHS, TargetCC, N->getOperand(3), + N->getOperand(4)}); + } + break; + } + case ISD::SETCC: { + // (setcc X, 1, setne) -> (setcc X, 0, seteq) if we can prove X is 0/1. + // Comparing with 0 may allow us to fold into bnez/beqz. + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + if (LHS.getValueType().isScalableVector()) + break; + auto CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); + APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); + if (isOneConstant(RHS) && ISD::isIntEqualitySetCC(CC) && + DAG.MaskedValueIsZero(LHS, Mask)) { + SDLoc DL(N); + SDValue Zero = DAG.getConstant(0, DL, LHS.getValueType()); + CC = ISD::getSetCCInverse(CC, LHS.getValueType()); + return DAG.getSetCC(DL, N->getValueType(0), LHS, Zero, CC); + } + break; } } @@ -1129,7 +2271,7 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift( auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1)); auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)); if (C1 && C2) { - APInt C1Int = C1->getAPIntValue(); + const APInt &C1Int = C1->getAPIntValue(); APInt ShiftedC1Int = C1Int << C2->getAPIntValue(); // We can materialise `c1 << c2` into an add immediate, so it's "free", @@ -1161,6 +2303,116 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift( return true; } +bool RISCVTargetLowering::targetShrinkDemandedConstant( + SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, + TargetLoweringOpt &TLO) const { + // Delay this optimization as late as possible. + if (!TLO.LegalOps) + return false; + + EVT VT = Op.getValueType(); + if (VT.isVector()) + return false; + + // Only handle AND for now. + if (Op.getOpcode() != ISD::AND) + return false; + + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + if (!C) + return false; + + const APInt &Mask = C->getAPIntValue(); + + // Clear all non-demanded bits initially. + APInt ShrunkMask = Mask & DemandedBits; + + // If the shrunk mask fits in sign extended 12 bits, let the target + // independent code apply it. + if (ShrunkMask.isSignedIntN(12)) + return false; + + // Try to make a smaller immediate by setting undemanded bits. + + // We need to be able to make a negative number through a combination of mask + // and undemanded bits. + APInt ExpandedMask = Mask | ~DemandedBits; + if (!ExpandedMask.isNegative()) + return false; + + // What is the fewest number of bits we need to represent the negative number. + unsigned MinSignedBits = ExpandedMask.getMinSignedBits(); + + // Try to make a 12 bit negative immediate. If that fails try to make a 32 + // bit negative immediate unless the shrunk immediate already fits in 32 bits. + APInt NewMask = ShrunkMask; + if (MinSignedBits <= 12) + NewMask.setBitsFrom(11); + else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32)) + NewMask.setBitsFrom(31); + else + return false; + + // Sanity check that our new mask is a subset of the demanded mask. + assert(NewMask.isSubsetOf(ExpandedMask)); + + // If we aren't changing the mask, just return true to keep it and prevent + // the caller from optimizing. + if (NewMask == Mask) + return true; + + // Replace the constant with the new mask. + SDLoc DL(Op); + SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT); + SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC); + return TLO.CombineTo(Op, NewOp); +} + +void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, + KnownBits &Known, + const APInt &DemandedElts, + const SelectionDAG &DAG, + unsigned Depth) const { + unsigned BitWidth = Known.getBitWidth(); + unsigned Opc = Op.getOpcode(); + assert((Opc >= ISD::BUILTIN_OP_END || + Opc == ISD::INTRINSIC_WO_CHAIN || + Opc == ISD::INTRINSIC_W_CHAIN || + Opc == ISD::INTRINSIC_VOID) && + "Should use MaskedValueIsZero if you don't know whether Op" + " is a target node!"); + + Known.resetAll(); + switch (Opc) { + default: break; + case RISCVISD::REMUW: { + KnownBits Known2; + Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + // We only care about the lower 32 bits. + Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32)); + // Restore the original width by sign extending. + Known = Known.sext(BitWidth); + break; + } + case RISCVISD::DIVUW: { + KnownBits Known2; + Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + // We only care about the lower 32 bits. + Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32)); + // Restore the original width by sign extending. + Known = Known.sext(BitWidth); + break; + } + case RISCVISD::READ_VLENB: + // We assume VLENB is at least 8 bytes. + // FIXME: The 1.0 draft spec defines minimum VLEN as 128 bits. + Known.Zero.setLowBits(3); + break; + } +} + unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { @@ -1173,10 +2425,25 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( case RISCVISD::DIVW: case RISCVISD::DIVUW: case RISCVISD::REMUW: + case RISCVISD::ROLW: + case RISCVISD::RORW: + case RISCVISD::GREVIW: + case RISCVISD::GORCIW: + case RISCVISD::FSLW: + case RISCVISD::FSRW: // TODO: As the result is sign-extended, this is conservatively correct. A // more precise answer could be calculated for SRAW depending on known // bits in the shift amount. return 33; + case RISCVISD::VMV_X_S: + // The number of sign bits of the scalar result is computed by obtaining the + // element type of the input vector operand, subtracting its width from the + // XLEN, and then adding one (sign bit within the element type). If the + // element type is wider than XLen, the least-significant XLEN bits are + // taken. + if (Op.getOperand(0).getScalarValueSizeInBits() > Subtarget.getXLen()) + return 1; + return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1; } return 1; @@ -1260,17 +2527,19 @@ static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC, RI); - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI), - MachineMemOperand::MOLoad, 8, Align(8)); + MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); + MachineMemOperand *MMOLo = + MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8)); + MachineMemOperand *MMOHi = MF.getMachineMemOperand( + MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8)); BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg) .addFrameIndex(FI) .addImm(0) - .addMemOperand(MMO); + .addMemOperand(MMOLo); BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg) .addFrameIndex(FI) .addImm(4) - .addMemOperand(MMO); + .addMemOperand(MMOHi); MI.eraseFromParent(); // The pseudo instruction is gone now. return BB; } @@ -1290,19 +2559,21 @@ static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass; int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI), - MachineMemOperand::MOStore, 8, Align(8)); + MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); + MachineMemOperand *MMOLo = + MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8)); + MachineMemOperand *MMOHi = MF.getMachineMemOperand( + MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8)); BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill())) .addFrameIndex(FI) .addImm(0) - .addMemOperand(MMO); + .addMemOperand(MMOLo); BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill())) .addFrameIndex(FI) .addImm(4) - .addMemOperand(MMO); + .addMemOperand(MMOHi); TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI); MI.eraseFromParent(); // The pseudo instruction is gone now. return BB; @@ -1313,6 +2584,7 @@ static bool isSelectPseudo(MachineInstr &MI) { default: return false; case RISCV::Select_GPR_Using_CC_GPR: + case RISCV::Select_FPR16_Using_CC_GPR: case RISCV::Select_FPR32_Using_CC_GPR: case RISCV::Select_FPR64_Using_CC_GPR: return true; @@ -1442,9 +2714,80 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, return TailMBB; } +static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB, + int VLIndex, unsigned SEWIndex, + RISCVVLMUL VLMul, bool WritesElement0) { + MachineFunction &MF = *BB->getParent(); + DebugLoc DL = MI.getDebugLoc(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + + unsigned SEW = MI.getOperand(SEWIndex).getImm(); + assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); + RISCVVSEW ElementWidth = static_cast<RISCVVSEW>(Log2_32(SEW / 8)); + + MachineRegisterInfo &MRI = MF.getRegInfo(); + + // VL and VTYPE are alive here. + MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI)); + + if (VLIndex >= 0) { + // Set VL (rs1 != X0). + Register DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); + MIB.addReg(DestReg, RegState::Define | RegState::Dead) + .addReg(MI.getOperand(VLIndex).getReg()); + } else + // With no VL operator in the pseudo, do not modify VL (rd = X0, rs1 = X0). + MIB.addReg(RISCV::X0, RegState::Define | RegState::Dead) + .addReg(RISCV::X0, RegState::Kill); + + // Default to tail agnostic unless the destination is tied to a source. In + // that case the user would have some control over the tail values. The tail + // policy is also ignored on instructions that only update element 0 like + // vmv.s.x or reductions so use agnostic there to match the common case. + // FIXME: This is conservatively correct, but we might want to detect that + // the input is undefined. + bool TailAgnostic = true; + unsigned UseOpIdx; + if (MI.isRegTiedToUseOperand(0, &UseOpIdx) && !WritesElement0) { + TailAgnostic = false; + // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic. + const MachineOperand &UseMO = MI.getOperand(UseOpIdx); + MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg()); + if (UseMI && UseMI->isImplicitDef()) + TailAgnostic = true; + } + + // For simplicity we reuse the vtype representation here. + MIB.addImm(RISCVVType::encodeVTYPE(VLMul, ElementWidth, + /*TailAgnostic*/ TailAgnostic, + /*MaskAgnostic*/ false)); + + // Remove (now) redundant operands from pseudo + MI.getOperand(SEWIndex).setImm(-1); + if (VLIndex >= 0) { + MI.getOperand(VLIndex).setReg(RISCV::NoRegister); + MI.getOperand(VLIndex).setIsKill(false); + } + + return BB; +} + MachineBasicBlock * RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { + uint64_t TSFlags = MI.getDesc().TSFlags; + + if (TSFlags & RISCVII::HasSEWOpMask) { + unsigned NumOperands = MI.getNumExplicitOperands(); + int VLIndex = (TSFlags & RISCVII::HasVLOpMask) ? NumOperands - 2 : -1; + unsigned SEWIndex = NumOperands - 1; + bool WritesElement0 = TSFlags & RISCVII::WritesElement0Mask; + + RISCVVLMUL VLMul = static_cast<RISCVVLMUL>((TSFlags & RISCVII::VLMulMask) >> + RISCVII::VLMulShift); + return addVSetVL(MI, BB, VLIndex, SEWIndex, VLMul, WritesElement0); + } + switch (MI.getOpcode()) { default: llvm_unreachable("Unexpected instr type to insert"); @@ -1453,6 +2796,7 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, "ReadCycleWrite is only to be used on riscv32"); return emitReadCycleWidePseudo(MI, BB); case RISCV::Select_GPR_Using_CC_GPR: + case RISCV::Select_FPR16_Using_CC_GPR: case RISCV::Select_FPR32_Using_CC_GPR: case RISCV::Select_FPR64_Using_CC_GPR: return emitSelectPseudo(MI, BB); @@ -1492,6 +2836,10 @@ static const MCPhysReg ArgGPRs[] = { RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 }; +static const MCPhysReg ArgFPR16s[] = { + RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, + RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H +}; static const MCPhysReg ArgFPR32s[] = { RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F @@ -1500,6 +2848,17 @@ static const MCPhysReg ArgFPR64s[] = { RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D }; +// This is an interim calling convention and it may be changed in the future. +static const MCPhysReg ArgVRs[] = { + RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13, + RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19, + RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23}; +static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2, + RISCV::V14M2, RISCV::V16M2, RISCV::V18M2, + RISCV::V20M2, RISCV::V22M2}; +static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4, + RISCV::V20M4}; +static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8}; // Pass a 2*XLEN argument that has been split into two XLEN values through // registers or the stack as necessary. @@ -1544,7 +2903,8 @@ static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, - bool IsRet, Type *OrigTy) { + bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, + Optional<unsigned> FirstMaskArgument) { unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); assert(XLen == 32 || XLen == 64); MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; @@ -1554,9 +2914,9 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, if (IsRet && ValNo > 1) return true; - // UseGPRForF32 if targeting one of the soft-float ABIs, if passing a - // variadic argument, or if no F32 argument registers are available. - bool UseGPRForF32 = true; + // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a + // variadic argument, or if no F16/F32 argument registers are available. + bool UseGPRForF16_F32 = true; // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a // variadic argument, or if no F64 argument registers are available. bool UseGPRForF64 = true; @@ -1569,24 +2929,26 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, break; case RISCVABI::ABI_ILP32F: case RISCVABI::ABI_LP64F: - UseGPRForF32 = !IsFixed; + UseGPRForF16_F32 = !IsFixed; break; case RISCVABI::ABI_ILP32D: case RISCVABI::ABI_LP64D: - UseGPRForF32 = !IsFixed; + UseGPRForF16_F32 = !IsFixed; UseGPRForF64 = !IsFixed; break; } - if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) - UseGPRForF32 = true; - if (State.getFirstUnallocated(ArgFPR64s) == array_lengthof(ArgFPR64s)) + // FPR16, FPR32, and FPR64 alias each other. + if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) { + UseGPRForF16_F32 = true; UseGPRForF64 = true; + } - // From this point on, rely on UseGPRForF32, UseGPRForF64 and similar local - // variables rather than directly checking against the target ABI. + // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and + // similar local variables rather than directly checking against the target + // ABI. - if (UseGPRForF32 && ValVT == MVT::f32) { + if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) { LocVT = XLenVT; LocInfo = CCValAssign::BCvt; } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) { @@ -1669,11 +3031,40 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, // Allocate to a register if possible, or else a stack slot. Register Reg; - if (ValVT == MVT::f32 && !UseGPRForF32) - Reg = State.AllocateReg(ArgFPR32s, ArgFPR64s); + if (ValVT == MVT::f16 && !UseGPRForF16_F32) + Reg = State.AllocateReg(ArgFPR16s); + else if (ValVT == MVT::f32 && !UseGPRForF16_F32) + Reg = State.AllocateReg(ArgFPR32s); else if (ValVT == MVT::f64 && !UseGPRForF64) - Reg = State.AllocateReg(ArgFPR64s, ArgFPR32s); - else + Reg = State.AllocateReg(ArgFPR64s); + else if (ValVT.isScalableVector()) { + const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT); + if (RC == &RISCV::VRRegClass) { + // Assign the first mask argument to V0. + // This is an interim calling convention and it may be changed in the + // future. + if (FirstMaskArgument.hasValue() && + ValNo == FirstMaskArgument.getValue()) { + Reg = State.AllocateReg(RISCV::V0); + } else { + Reg = State.AllocateReg(ArgVRs); + } + } else if (RC == &RISCV::VRM2RegClass) { + Reg = State.AllocateReg(ArgVRM2s); + } else if (RC == &RISCV::VRM4RegClass) { + Reg = State.AllocateReg(ArgVRM4s); + } else if (RC == &RISCV::VRM8RegClass) { + Reg = State.AllocateReg(ArgVRM8s); + } else { + llvm_unreachable("Unhandled class register for ValueType"); + } + if (!Reg) { + LocInfo = CCValAssign::Indirect; + // Try using a GPR to pass the address + Reg = State.AllocateReg(ArgGPRs); + LocVT = XLenVT; + } + } else Reg = State.AllocateReg(ArgGPRs); unsigned StackOffset = Reg ? 0 : State.AllocateStack(XLen / 8, Align(XLen / 8)); @@ -1696,16 +3087,18 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, return false; } - assert((!UseGPRForF32 || !UseGPRForF64 || LocVT == XLenVT) && - "Expected an XLenVT at this stage"); + assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT || + (TLI.getSubtarget().hasStdExtV() && ValVT.isScalableVector())) && + "Expected an XLenVT or scalable vector types at this stage"); if (Reg) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } - // When an f32 or f64 is passed on the stack, no bit-conversion is needed. - if (ValVT == MVT::f32 || ValVT == MVT::f64) { + // When a floating-point value is passed on the stack, no bit-conversion is + // needed. + if (ValVT.isFloatingPoint()) { LocVT = ValVT; LocInfo = CCValAssign::Full; } @@ -1713,12 +3106,27 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, return false; } +template <typename ArgTy> +static Optional<unsigned> preAssignMask(const ArgTy &Args) { + for (const auto &ArgIdx : enumerate(Args)) { + MVT ArgVT = ArgIdx.value().VT; + if (ArgVT.isScalableVector() && + ArgVT.getVectorElementType().SimpleTy == MVT::i1) + return ArgIdx.index(); + } + return None; +} + void RISCVTargetLowering::analyzeInputArgs( MachineFunction &MF, CCState &CCInfo, const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const { unsigned NumArgs = Ins.size(); FunctionType *FType = MF.getFunction().getFunctionType(); + Optional<unsigned> FirstMaskArgument; + if (Subtarget.hasStdExtV()) + FirstMaskArgument = preAssignMask(Ins); + for (unsigned i = 0; i != NumArgs; ++i) { MVT ArgVT = Ins[i].VT; ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; @@ -1731,7 +3139,8 @@ void RISCVTargetLowering::analyzeInputArgs( RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, - ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) { + ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this, + FirstMaskArgument)) { LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << EVT(ArgVT).getEVTString() << '\n'); llvm_unreachable(nullptr); @@ -1745,6 +3154,10 @@ void RISCVTargetLowering::analyzeOutputArgs( CallLoweringInfo *CLI) const { unsigned NumArgs = Outs.size(); + Optional<unsigned> FirstMaskArgument; + if (Subtarget.hasStdExtV()) + FirstMaskArgument = preAssignMask(Outs); + for (unsigned i = 0; i != NumArgs; i++) { MVT ArgVT = Outs[i].VT; ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; @@ -1752,7 +3165,8 @@ void RISCVTargetLowering::analyzeOutputArgs( RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, - ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) { + ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this, + FirstMaskArgument)) { LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << EVT(ArgVT).getEVTString() << "\n"); llvm_unreachable(nullptr); @@ -1770,11 +3184,12 @@ static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, case CCValAssign::Full: break; case CCValAssign::BCvt: - if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) { + if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) + Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val); + else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val); - break; - } - Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); + else + Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); break; } return Val; @@ -1783,28 +3198,13 @@ static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, // The caller is responsible for loading the full value if the argument is // passed with CCValAssign::Indirect. static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, - const CCValAssign &VA, const SDLoc &DL) { + const CCValAssign &VA, const SDLoc &DL, + const RISCVTargetLowering &TLI) { MachineFunction &MF = DAG.getMachineFunction(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); EVT LocVT = VA.getLocVT(); SDValue Val; - const TargetRegisterClass *RC; - - switch (LocVT.getSimpleVT().SimpleTy) { - default: - llvm_unreachable("Unexpected register type"); - case MVT::i32: - case MVT::i64: - RC = &RISCV::GPRRegClass; - break; - case MVT::f32: - RC = &RISCV::FPR32RegClass; - break; - case MVT::f64: - RC = &RISCV::FPR64RegClass; - break; - } - + const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT()); Register VReg = RegInfo.createVirtualRegister(RC); RegInfo.addLiveIn(VA.getLocReg(), VReg); Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); @@ -1825,11 +3225,12 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, case CCValAssign::Full: break; case CCValAssign::BCvt: - if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) { + if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) + Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val); + else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val); - break; - } - Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); + else + Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); break; } return Val; @@ -1920,6 +3321,18 @@ static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, } } + if (LocVT == MVT::f16) { + static const MCPhysReg FPR16List[] = { + RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H, + RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H, + RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H, + RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H}; + if (unsigned Reg = State.AllocateReg(FPR16List)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + } + if (LocVT == MVT::f32) { static const MCPhysReg FPR32List[] = { RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, @@ -1959,22 +3372,71 @@ static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, return true; // CC didn't match. } +static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + + if (LocVT == MVT::i32 || LocVT == MVT::i64) { + // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim + // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 + static const MCPhysReg GPRList[] = { + RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22, + RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27}; + if (unsigned Reg = State.AllocateReg(GPRList)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + } + + if (LocVT == MVT::f32) { + // Pass in STG registers: F1, ..., F6 + // fs0 ... fs5 + static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F, + RISCV::F18_F, RISCV::F19_F, + RISCV::F20_F, RISCV::F21_F}; + if (unsigned Reg = State.AllocateReg(FPR32List)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + } + + if (LocVT == MVT::f64) { + // Pass in STG registers: D1, ..., D6 + // fs6 ... fs11 + static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D, + RISCV::F24_D, RISCV::F25_D, + RISCV::F26_D, RISCV::F27_D}; + if (unsigned Reg = State.AllocateReg(FPR64List)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + } + + report_fatal_error("No registers left in GHC calling convention"); + return true; +} + // Transform physical registers into virtual registers. SDValue RISCVTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { + MachineFunction &MF = DAG.getMachineFunction(); + switch (CallConv) { default: report_fatal_error("Unsupported calling convention"); case CallingConv::C: case CallingConv::Fast: break; + case CallingConv::GHC: + if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] || + !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD]) + report_fatal_error( + "GHC calling convention requires the F and D instruction set extensions"); } - MachineFunction &MF = DAG.getMachineFunction(); - const Function &Func = MF.getFunction(); if (Func.hasFnAttribute("interrupt")) { if (!Func.arg_empty()) @@ -2001,6 +3463,8 @@ SDValue RISCVTargetLowering::LowerFormalArguments( if (CallConv == CallingConv::Fast) CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC); + else if (CallConv == CallingConv::GHC) + CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC); else analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false); @@ -2012,7 +3476,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments( if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL); else if (VA.isRegLoc()) - ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL); + ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this); else ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); @@ -2201,6 +3665,8 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, if (CallConv == CallingConv::Fast) ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC); + else if (CallConv == CallingConv::GHC) + ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC); else analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI); @@ -2458,12 +3924,18 @@ bool RISCVTargetLowering::CanLowerReturn( const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); + + Optional<unsigned> FirstMaskArgument; + if (Subtarget.hasStdExtV()) + FirstMaskArgument = preAssignMask(Outs); + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { MVT VT = Outs[i].VT; ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full, - ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr)) + ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr, + *this, FirstMaskArgument)) return false; } return true; @@ -2488,6 +3960,9 @@ RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, nullptr); + if (CallConv == CallingConv::GHC && !RVLocs.empty()) + report_fatal_error("GHC functions return void only"); + SDValue Glue; SmallVector<SDValue, 4> RetOps(1, Chain); @@ -2574,7 +4049,7 @@ void RISCVTargetLowering::validateCCReservedRegs( const Function &F = MF.getFunction(); const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); - if (std::any_of(std::begin(Regs), std::end(Regs), [&STI](auto Reg) { + if (llvm::any_of(Regs, [&STI](auto Reg) { return STI.isRegisterReservedByUser(Reg.first); })) F.getContext().diagnose(DiagnosticInfoUnsupported{ @@ -2586,47 +4061,57 @@ bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { } const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { +#define NODE_NAME_CASE(NODE) \ + case RISCVISD::NODE: \ + return "RISCVISD::" #NODE; + // clang-format off switch ((RISCVISD::NodeType)Opcode) { case RISCVISD::FIRST_NUMBER: break; - case RISCVISD::RET_FLAG: - return "RISCVISD::RET_FLAG"; - case RISCVISD::URET_FLAG: - return "RISCVISD::URET_FLAG"; - case RISCVISD::SRET_FLAG: - return "RISCVISD::SRET_FLAG"; - case RISCVISD::MRET_FLAG: - return "RISCVISD::MRET_FLAG"; - case RISCVISD::CALL: - return "RISCVISD::CALL"; - case RISCVISD::SELECT_CC: - return "RISCVISD::SELECT_CC"; - case RISCVISD::BuildPairF64: - return "RISCVISD::BuildPairF64"; - case RISCVISD::SplitF64: - return "RISCVISD::SplitF64"; - case RISCVISD::TAIL: - return "RISCVISD::TAIL"; - case RISCVISD::SLLW: - return "RISCVISD::SLLW"; - case RISCVISD::SRAW: - return "RISCVISD::SRAW"; - case RISCVISD::SRLW: - return "RISCVISD::SRLW"; - case RISCVISD::DIVW: - return "RISCVISD::DIVW"; - case RISCVISD::DIVUW: - return "RISCVISD::DIVUW"; - case RISCVISD::REMUW: - return "RISCVISD::REMUW"; - case RISCVISD::FMV_W_X_RV64: - return "RISCVISD::FMV_W_X_RV64"; - case RISCVISD::FMV_X_ANYEXTW_RV64: - return "RISCVISD::FMV_X_ANYEXTW_RV64"; - case RISCVISD::READ_CYCLE_WIDE: - return "RISCVISD::READ_CYCLE_WIDE"; + NODE_NAME_CASE(RET_FLAG) + NODE_NAME_CASE(URET_FLAG) + NODE_NAME_CASE(SRET_FLAG) + NODE_NAME_CASE(MRET_FLAG) + NODE_NAME_CASE(CALL) + NODE_NAME_CASE(SELECT_CC) + NODE_NAME_CASE(BuildPairF64) + NODE_NAME_CASE(SplitF64) + NODE_NAME_CASE(TAIL) + NODE_NAME_CASE(SLLW) + NODE_NAME_CASE(SRAW) + NODE_NAME_CASE(SRLW) + NODE_NAME_CASE(DIVW) + NODE_NAME_CASE(DIVUW) + NODE_NAME_CASE(REMUW) + NODE_NAME_CASE(ROLW) + NODE_NAME_CASE(RORW) + NODE_NAME_CASE(FSLW) + NODE_NAME_CASE(FSRW) + NODE_NAME_CASE(FMV_H_X) + NODE_NAME_CASE(FMV_X_ANYEXTH) + NODE_NAME_CASE(FMV_W_X_RV64) + NODE_NAME_CASE(FMV_X_ANYEXTW_RV64) + NODE_NAME_CASE(READ_CYCLE_WIDE) + NODE_NAME_CASE(GREVI) + NODE_NAME_CASE(GREVIW) + NODE_NAME_CASE(GORCI) + NODE_NAME_CASE(GORCIW) + NODE_NAME_CASE(VMV_X_S) + NODE_NAME_CASE(SPLAT_VECTOR_I64) + NODE_NAME_CASE(READ_VLENB) + NODE_NAME_CASE(TRUNCATE_VECTOR) + NODE_NAME_CASE(VLEFF) + NODE_NAME_CASE(VLEFF_MASK) + NODE_NAME_CASE(VLSEGFF) + NODE_NAME_CASE(VLSEGFF_MASK) + NODE_NAME_CASE(READ_VL) + NODE_NAME_CASE(VSLIDEUP) + NODE_NAME_CASE(VSLIDEDOWN) + NODE_NAME_CASE(VID) } + // clang-format on return nullptr; +#undef NODE_NAME_CASE } /// getConstraintType - Given a constraint letter, return the type of @@ -2661,6 +4146,8 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, case 'r': return std::make_pair(0U, &RISCV::GPRRegClass); case 'f': + if (Subtarget.hasStdExtZfh() && VT == MVT::f16) + return std::make_pair(0U, &RISCV::FPR16RegClass); if (Subtarget.hasStdExtF() && VT == MVT::f32) return std::make_pair(0U, &RISCV::FPR32RegClass); if (Subtarget.hasStdExtD() && VT == MVT::f64) @@ -2675,7 +4162,7 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, // official names. However, other frontends like `rustc` do not. This allows // users of these frontends to use the ABI names for registers in LLVM-style // register constraints. - Register XRegFromAlias = StringSwitch<Register>(Constraint.lower()) + unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower()) .Case("{zero}", RISCV::X0) .Case("{ra}", RISCV::X1) .Case("{sp}", RISCV::X2) @@ -2719,46 +4206,50 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, // // The second case is the ABI name of the register, so that frontends can also // use the ABI names in register constraint lists. - if (Subtarget.hasStdExtF() || Subtarget.hasStdExtD()) { - std::pair<Register, Register> FReg = - StringSwitch<std::pair<Register, Register>>(Constraint.lower()) - .Cases("{f0}", "{ft0}", {RISCV::F0_F, RISCV::F0_D}) - .Cases("{f1}", "{ft1}", {RISCV::F1_F, RISCV::F1_D}) - .Cases("{f2}", "{ft2}", {RISCV::F2_F, RISCV::F2_D}) - .Cases("{f3}", "{ft3}", {RISCV::F3_F, RISCV::F3_D}) - .Cases("{f4}", "{ft4}", {RISCV::F4_F, RISCV::F4_D}) - .Cases("{f5}", "{ft5}", {RISCV::F5_F, RISCV::F5_D}) - .Cases("{f6}", "{ft6}", {RISCV::F6_F, RISCV::F6_D}) - .Cases("{f7}", "{ft7}", {RISCV::F7_F, RISCV::F7_D}) - .Cases("{f8}", "{fs0}", {RISCV::F8_F, RISCV::F8_D}) - .Cases("{f9}", "{fs1}", {RISCV::F9_F, RISCV::F9_D}) - .Cases("{f10}", "{fa0}", {RISCV::F10_F, RISCV::F10_D}) - .Cases("{f11}", "{fa1}", {RISCV::F11_F, RISCV::F11_D}) - .Cases("{f12}", "{fa2}", {RISCV::F12_F, RISCV::F12_D}) - .Cases("{f13}", "{fa3}", {RISCV::F13_F, RISCV::F13_D}) - .Cases("{f14}", "{fa4}", {RISCV::F14_F, RISCV::F14_D}) - .Cases("{f15}", "{fa5}", {RISCV::F15_F, RISCV::F15_D}) - .Cases("{f16}", "{fa6}", {RISCV::F16_F, RISCV::F16_D}) - .Cases("{f17}", "{fa7}", {RISCV::F17_F, RISCV::F17_D}) - .Cases("{f18}", "{fs2}", {RISCV::F18_F, RISCV::F18_D}) - .Cases("{f19}", "{fs3}", {RISCV::F19_F, RISCV::F19_D}) - .Cases("{f20}", "{fs4}", {RISCV::F20_F, RISCV::F20_D}) - .Cases("{f21}", "{fs5}", {RISCV::F21_F, RISCV::F21_D}) - .Cases("{f22}", "{fs6}", {RISCV::F22_F, RISCV::F22_D}) - .Cases("{f23}", "{fs7}", {RISCV::F23_F, RISCV::F23_D}) - .Cases("{f24}", "{fs8}", {RISCV::F24_F, RISCV::F24_D}) - .Cases("{f25}", "{fs9}", {RISCV::F25_F, RISCV::F25_D}) - .Cases("{f26}", "{fs10}", {RISCV::F26_F, RISCV::F26_D}) - .Cases("{f27}", "{fs11}", {RISCV::F27_F, RISCV::F27_D}) - .Cases("{f28}", "{ft8}", {RISCV::F28_F, RISCV::F28_D}) - .Cases("{f29}", "{ft9}", {RISCV::F29_F, RISCV::F29_D}) - .Cases("{f30}", "{ft10}", {RISCV::F30_F, RISCV::F30_D}) - .Cases("{f31}", "{ft11}", {RISCV::F31_F, RISCV::F31_D}) - .Default({RISCV::NoRegister, RISCV::NoRegister}); - if (FReg.first != RISCV::NoRegister) - return Subtarget.hasStdExtD() - ? std::make_pair(FReg.second, &RISCV::FPR64RegClass) - : std::make_pair(FReg.first, &RISCV::FPR32RegClass); + if (Subtarget.hasStdExtF()) { + unsigned FReg = StringSwitch<unsigned>(Constraint.lower()) + .Cases("{f0}", "{ft0}", RISCV::F0_F) + .Cases("{f1}", "{ft1}", RISCV::F1_F) + .Cases("{f2}", "{ft2}", RISCV::F2_F) + .Cases("{f3}", "{ft3}", RISCV::F3_F) + .Cases("{f4}", "{ft4}", RISCV::F4_F) + .Cases("{f5}", "{ft5}", RISCV::F5_F) + .Cases("{f6}", "{ft6}", RISCV::F6_F) + .Cases("{f7}", "{ft7}", RISCV::F7_F) + .Cases("{f8}", "{fs0}", RISCV::F8_F) + .Cases("{f9}", "{fs1}", RISCV::F9_F) + .Cases("{f10}", "{fa0}", RISCV::F10_F) + .Cases("{f11}", "{fa1}", RISCV::F11_F) + .Cases("{f12}", "{fa2}", RISCV::F12_F) + .Cases("{f13}", "{fa3}", RISCV::F13_F) + .Cases("{f14}", "{fa4}", RISCV::F14_F) + .Cases("{f15}", "{fa5}", RISCV::F15_F) + .Cases("{f16}", "{fa6}", RISCV::F16_F) + .Cases("{f17}", "{fa7}", RISCV::F17_F) + .Cases("{f18}", "{fs2}", RISCV::F18_F) + .Cases("{f19}", "{fs3}", RISCV::F19_F) + .Cases("{f20}", "{fs4}", RISCV::F20_F) + .Cases("{f21}", "{fs5}", RISCV::F21_F) + .Cases("{f22}", "{fs6}", RISCV::F22_F) + .Cases("{f23}", "{fs7}", RISCV::F23_F) + .Cases("{f24}", "{fs8}", RISCV::F24_F) + .Cases("{f25}", "{fs9}", RISCV::F25_F) + .Cases("{f26}", "{fs10}", RISCV::F26_F) + .Cases("{f27}", "{fs11}", RISCV::F27_F) + .Cases("{f28}", "{ft8}", RISCV::F28_F) + .Cases("{f29}", "{ft9}", RISCV::F29_F) + .Cases("{f30}", "{ft10}", RISCV::F30_F) + .Cases("{f31}", "{ft11}", RISCV::F31_F) + .Default(RISCV::NoRegister); + if (FReg != RISCV::NoRegister) { + assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg"); + if (Subtarget.hasStdExtD()) { + unsigned RegNo = FReg - RISCV::F0_F; + unsigned DReg = RISCV::F0_D + RegNo; + return std::make_pair(DReg, &RISCV::FPR64RegClass); + } + return std::make_pair(FReg, &RISCV::FPR32RegClass); + } } return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); @@ -2974,6 +4465,27 @@ Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( return Result; } +bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const { + VT = VT.getScalarType(); + + if (!VT.isSimple()) + return false; + + switch (VT.getSimpleVT().SimpleTy) { + case MVT::f16: + return Subtarget.hasStdExtZfh(); + case MVT::f32: + return Subtarget.hasStdExtF(); + case MVT::f64: + return Subtarget.hasStdExtD(); + default: + break; + } + + return false; +} + Register RISCVTargetLowering::getExceptionPointerRegister( const Constant *PersonalityFn) const { return RISCV::X10; @@ -2994,20 +4506,39 @@ bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { return true; } +bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { + if (Subtarget.is64Bit() && Type == MVT::i32) + return true; + + return IsSigned; +} + bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const { // Check integral scalar types. if (VT.isScalarInteger()) { - // Do not perform the transformation on riscv32 with the M extension. - if (!Subtarget.is64Bit() && Subtarget.hasStdExtM()) + // Omit the optimization if the sub target has the M extension and the data + // size exceeds XLen. + if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen()) return false; if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) { - if (ConstNode->getAPIntValue().getBitWidth() > 8 * sizeof(int64_t)) + // Break the MUL to a SLLI and an ADD/SUB. + const APInt &Imm = ConstNode->getAPIntValue(); + if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || + (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) + return true; + // Omit the following optimization if the sub target has the M extension + // and the data size >= XLen. + if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen()) return false; - int64_t Imm = ConstNode->getSExtValue(); - if (isPowerOf2_64(Imm + 1) || isPowerOf2_64(Imm - 1) || - isPowerOf2_64(1 - Imm) || isPowerOf2_64(-1 - Imm)) + // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs + // a pair of LUI/ADDI. + if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) { + APInt ImmS = Imm.ashr(Imm.countTrailingZeros()); + if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() || + (1 - ImmS).isPowerOf2()) return true; + } } } @@ -3032,3 +4563,19 @@ RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT, StringRef(RegName) + "\".")); return Reg; } + +namespace llvm { +namespace RISCVVIntrinsicsTable { + +#define GET_RISCVVIntrinsicsTable_IMPL +#include "RISCVGenSearchableTables.inc" + +} // namespace RISCVVIntrinsicsTable + +namespace RISCVZvlssegTable { + +#define GET_RISCVZvlssegTable_IMPL +#include "RISCVGenSearchableTables.inc" + +} // namespace RISCVZvlssegTable +} // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h index e420e879efc9..40b1a45c6d15 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -28,6 +28,12 @@ enum NodeType : unsigned { SRET_FLAG, MRET_FLAG, CALL, + /// Select with condition operator - This selects between a true value and + /// a false value (ops #3 and #4) based on the boolean result of comparing + /// the lhs and rhs (ops #0 and #1) of a conditional expression with the + /// condition code in op #2, a XLenVT constant from the ISD::CondCode enum. + /// The lhs and rhs are XLenVT integers. The true and false values can be + /// integer or floating point. SELECT_CC, BuildPairF64, SplitF64, @@ -38,22 +44,75 @@ enum NodeType : unsigned { SRAW, SRLW, // 32-bit operations from RV64M that can't be simply matched with a pattern - // at instruction selection time. + // at instruction selection time. These have undefined behavior for division + // by 0 or overflow (divw) like their target independent counterparts. DIVW, DIVUW, REMUW, - // FPR32<->GPR transfer operations for RV64. Needed as an i32<->f32 bitcast - // is not legal on RV64. FMV_W_X_RV64 matches the semantics of the FMV.W.X. + // RV64IB rotates, directly matching the semantics of the named RISC-V + // instructions. + ROLW, + RORW, + // RV64IB funnel shifts, with the semantics of the named RISC-V instructions, + // but the same operand order as fshl/fshr intrinsics. + FSRW, + FSLW, + // FPR<->GPR transfer operations when the FPR is smaller than XLEN, needed as + // XLEN is the only legal integer width. + // + // FMV_H_X matches the semantics of the FMV.H.X. + // FMV_X_ANYEXTH is similar to FMV.X.H but has an any-extended result. + // FMV_W_X_RV64 matches the semantics of the FMV.W.X. // FMV_X_ANYEXTW_RV64 is similar to FMV.X.W but has an any-extended result. + // // This is a more convenient semantic for producing dagcombines that remove // unnecessary GPR->FPR->GPR moves. + FMV_H_X, + FMV_X_ANYEXTH, FMV_W_X_RV64, FMV_X_ANYEXTW_RV64, // READ_CYCLE_WIDE - A read of the 64-bit cycle CSR on a 32-bit target // (returns (Lo, Hi)). It takes a chain operand. - READ_CYCLE_WIDE + READ_CYCLE_WIDE, + // Generalized Reverse and Generalized Or-Combine - directly matching the + // semantics of the named RISC-V instructions. Lowered as custom nodes as + // TableGen chokes when faced with commutative permutations in deeply-nested + // DAGs. Each node takes an input operand and a TargetConstant immediate + // shift amount, and outputs a bit-manipulated version of input. All operands + // are of type XLenVT. + GREVI, + GREVIW, + GORCI, + GORCIW, + // Vector Extension + // VMV_X_S matches the semantics of vmv.x.s. The result is always XLenVT + // sign extended from the vector element size. NOTE: The result size will + // never be less than the vector element size. + VMV_X_S, + // Splats an i64 scalar to a vector type (with element type i64) where the + // scalar is a sign-extended i32. + SPLAT_VECTOR_I64, + // Read VLENB CSR + READ_VLENB, + // Truncates a RVV integer vector by one power-of-two. + TRUNCATE_VECTOR, + // Unit-stride fault-only-first load + VLEFF, + VLEFF_MASK, + // Unit-stride fault-only-first segment load + VLSEGFF, + VLSEGFF_MASK, + // read vl CSR + READ_VL, + // Matches the semantics of vslideup/vslidedown. The first operand is the + // pass-thru operand, the second is the source vector, and the third is the + // XLenVT index (either constant or non-constant). + VSLIDEUP, + VSLIDEDOWN, + // Matches the semantics of the unmasked vid.v instruction. + VID, }; -} +} // namespace RISCVISD class RISCVTargetLowering : public TargetLowering { const RISCVSubtarget &Subtarget; @@ -62,6 +121,8 @@ public: explicit RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI); + const RISCVSubtarget &getSubtarget() const { return Subtarget; } + bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override; @@ -74,6 +135,8 @@ public: bool isTruncateFree(EVT SrcVT, EVT DstVT) const override; bool isZExtFree(SDValue Val, EVT VT2) const override; bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override; + bool isCheapToSpeculateCttz() const override; + bool isCheapToSpeculateCtlz() const override; bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; @@ -86,6 +149,15 @@ public: SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, + const APInt &DemandedElts, + TargetLoweringOpt &TLO) const override; + + void computeKnownBitsForTargetNode(const SDValue Op, + KnownBits &Known, + const APInt &DemandedElts, + const SelectionDAG &DAG, + unsigned Depth) const override; unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, @@ -126,6 +198,9 @@ public: Instruction *emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst, AtomicOrdering Ord) const override; + bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const override; + ISD::NodeType getExtendForAtomicOps() const override { return ISD::SIGN_EXTEND; } @@ -153,6 +228,7 @@ public: getExceptionSelectorRegister(const Constant *PersonalityFn) const override; bool shouldExtendTypeInLibCall(EVT Type) const override; + bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override; /// Returns the register with the specified architectural or ABI name. This /// method is necessary to lower the llvm.read_register.* and @@ -220,6 +296,7 @@ private: SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const; SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; @@ -227,7 +304,14 @@ private: SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; SDValue lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const; + SDValue lowerSPLATVECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, + int64_t ExtTrueVal) const; + SDValue lowerVectorMaskTrunc(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; bool isEligibleForTailCallOptimization( CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, @@ -239,6 +323,37 @@ private: const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs, MachineFunction &MF) const; }; + +namespace RISCVVIntrinsicsTable { + +struct RISCVVIntrinsicInfo { + unsigned int IntrinsicID; + unsigned int ExtendedOperand; +}; + +using namespace RISCV; + +#define GET_RISCVVIntrinsicsTable_DECL +#include "RISCVGenSearchableTables.inc" + +} // end namespace RISCVVIntrinsicsTable + +namespace RISCVZvlssegTable { + +struct RISCVZvlsseg { + unsigned int IntrinsicID; + unsigned int SEW; + unsigned int LMUL; + unsigned int IndexLMUL; + unsigned int Pseudo; +}; + +using namespace RISCV; + +#define GET_RISCVZvlssegTable_DECL +#include "RISCVGenSearchableTables.inc" + +} // namespace RISCVZvlssegTable } #endif diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td index a47945a6a515..7be74b79d99b 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td @@ -49,18 +49,61 @@ def InstFormatCB : InstFormat<15>; def InstFormatCJ : InstFormat<16>; def InstFormatOther : InstFormat<17>; -class RISCVVConstraint<bits<4> val> { - bits<4> Value = val; +class RISCVVConstraint<bits<3> val> { + bits<3> Value = val; } -def NoConstraint : RISCVVConstraint<0>; -def WidenV : RISCVVConstraint<1>; -def WidenW : RISCVVConstraint<2>; -def WidenCvt : RISCVVConstraint<3>; -def Narrow : RISCVVConstraint<4>; -def Iota : RISCVVConstraint<5>; -def SlideUp : RISCVVConstraint<6>; -def Vrgather : RISCVVConstraint<7>; -def Vcompress : RISCVVConstraint<8>; +def NoConstraint : RISCVVConstraint<0b000>; +def VS2Constraint : RISCVVConstraint<0b001>; +def VS1Constraint : RISCVVConstraint<0b010>; +def VMConstraint : RISCVVConstraint<0b100>; + +// Illegal instructions: +// +// * The destination vector register group for a masked vector instruction +// cannot overlap the source mask register (v0), unless the destination vector +// register is being written with a mask value (e.g., comparisons) or the +// scalar result of a reduction. +// +// * Widening: The destination EEW is greater than the source EEW, the source +// EMUL is at least 1. The destination vector register group cannot overlap +// with the source vector register groups besides the highest-numbered part of +// the destination register group. +// +// * Narrowing: The destination EEW is smaller than the source EEW. The +// destination vector register group cannot overlap with the source vector +// register groups besides the lowest-numbered part of the source register +// group. +// +// * vmsbf.m/vmsif.m/vmsof.m: The destination register cannot overlap the +// source register and, if masked, cannot overlap the mask register ('v0'). +// +// * viota: The destination register cannot overlap the source register and, +// if masked, cannot overlap the mask register ('v0'). +// +// * v[f]slide[1]up: The destination vector register group for vslideup cannot +// overlap the source vector register group. +// +// * vrgather: The destination vector register group cannot overlap with the +// source vector register groups. +// +// * vcompress: The destination vector register group cannot overlap the +// source vector register group or the source mask register +def WidenV : RISCVVConstraint<!or(VS2Constraint.Value, + VS1Constraint.Value, + VMConstraint.Value)>; +def WidenW : RISCVVConstraint<!or(VS1Constraint.Value, + VMConstraint.Value)>; +def WidenCvt : RISCVVConstraint<!or(VS2Constraint.Value, + VMConstraint.Value)>; +def Iota : RISCVVConstraint<!or(VS2Constraint.Value, + VMConstraint.Value)>; +def SlideUp : RISCVVConstraint<!or(VS2Constraint.Value, + VMConstraint.Value)>; +def Vrgather : RISCVVConstraint<!or(VS2Constraint.Value, + VS1Constraint.Value, + VMConstraint.Value)>; +def Vcompress : RISCVVConstraint<!or(VS2Constraint.Value, + VS1Constraint.Value)>; // The following opcode names match those given in Table 19.1 in the // RISC-V User-level ISA specification ("RISC-V base opcode map"). @@ -116,7 +159,25 @@ class RVInst<dag outs, dag ins, string opcodestr, string argstr, // Defaults RISCVVConstraint RVVConstraint = NoConstraint; - let TSFlags{8-5} = RVVConstraint.Value; + let TSFlags{7-5} = RVVConstraint.Value; + + bits<3> VLMul = 0; + let TSFlags{10-8} = VLMul; + + bit HasDummyMask = 0; + let TSFlags{11} = HasDummyMask; + + bit WritesElement0 = 0; + let TSFlags{12} = WritesElement0; + + bit HasMergeOp = 0; + let TSFlags{13} = HasMergeOp; + + bit HasSEWOp = 0; + let TSFlags{14} = HasSEWOp; + + bit HasVLOp = 0; + let TSFlags{15} = HasVLOp; } // Pseudo instructions diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td index e5f154966ba6..80f46b73bfd7 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td @@ -21,37 +21,67 @@ def OPIVX : RISCVVFormat<0b100>; def OPFVF : RISCVVFormat<0b101>; def OPMVX : RISCVVFormat<0b110>; -class RISCVMOP<bits<3> val> { - bits<3> Value = val; +class RISCVMOP<bits<2> val> { + bits<2> Value = val; } -def MOPLDUnitStrideU : RISCVMOP<0b000>; -def MOPLDStridedU : RISCVMOP<0b010>; -def MOPLDIndexedU : RISCVMOP<0b011>; -def MOPLDUnitStrideS : RISCVMOP<0b100>; -def MOPLDStridedS : RISCVMOP<0b110>; -def MOPLDIndexedS : RISCVMOP<0b111>; - -def MOPSTUnitStride : RISCVMOP<0b000>; -def MOPSTStrided : RISCVMOP<0b010>; -def MOPSTIndexedOrder: RISCVMOP<0b011>; -def MOPSTIndexedUnOrd: RISCVMOP<0b111>; +def MOPLDUnitStride : RISCVMOP<0b00>; +def MOPLDIndexedUnord : RISCVMOP<0b01>; +def MOPLDStrided : RISCVMOP<0b10>; +def MOPLDIndexedOrder : RISCVMOP<0b11>; + +def MOPSTUnitStride : RISCVMOP<0b00>; +def MOPSTIndexedUnord : RISCVMOP<0b01>; +def MOPSTStrided : RISCVMOP<0b10>; +def MOPSTIndexedOrder : RISCVMOP<0b11>; class RISCVLSUMOP<bits<5> val> { bits<5> Value = val; } def LUMOPUnitStride : RISCVLSUMOP<0b00000>; +def LUMOPUnitStrideMask : RISCVLSUMOP<0b01011>; def LUMOPUnitStrideWholeReg : RISCVLSUMOP<0b01000>; def LUMOPUnitStrideFF: RISCVLSUMOP<0b10000>; def SUMOPUnitStride : RISCVLSUMOP<0b00000>; +def SUMOPUnitStrideMask : RISCVLSUMOP<0b01011>; def SUMOPUnitStrideWholeReg : RISCVLSUMOP<0b01000>; -class RISCVWidth<bits<3> val> { - bits<3> Value = val; +class RISCVAMOOP<bits<5> val> { + bits<5> Value = val; +} +def AMOOPVamoSwap : RISCVAMOOP<0b00001>; +def AMOOPVamoAdd : RISCVAMOOP<0b00000>; +def AMOOPVamoXor : RISCVAMOOP<0b00100>; +def AMOOPVamoAnd : RISCVAMOOP<0b01100>; +def AMOOPVamoOr : RISCVAMOOP<0b01000>; +def AMOOPVamoMin : RISCVAMOOP<0b10000>; +def AMOOPVamoMax : RISCVAMOOP<0b10100>; +def AMOOPVamoMinu : RISCVAMOOP<0b11000>; +def AMOOPVamoMaxu : RISCVAMOOP<0b11100>; + +class RISCVWidth<bits<4> val> { + bits<4> Value = val; +} +def LSWidth8 : RISCVWidth<0b0000>; +def LSWidth16 : RISCVWidth<0b0101>; +def LSWidth32 : RISCVWidth<0b0110>; +def LSWidth64 : RISCVWidth<0b0111>; + +class RVInstSetiVLi<dag outs, dag ins, string opcodestr, string argstr> + : RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> { + bits<5> uimm; + bits<5> rd; + bits<10> vtypei; + + let Inst{31} = 1; + let Inst{30} = 1; + let Inst{29-20} = vtypei{9-0}; + let Inst{19-15} = uimm; + let Inst{14-12} = 0b111; + let Inst{11-7} = rd; + let Opcode = OPC_OP_V.Value; + + let Defs = [VTYPE, VL]; } -def LSWidthVByte : RISCVWidth<0b000>; -def LSWidthVHalf : RISCVWidth<0b101>; -def LSWidthVWord : RISCVWidth<0b110>; -def LSWidthVSEW : RISCVWidth<0b111>; class RVInstSetVLi<dag outs, dag ins, string opcodestr, string argstr> : RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> { @@ -103,6 +133,7 @@ class RVInstVV<bits<6> funct6, RISCVVFormat opv, dag outs, dag ins, let Opcode = OPC_OP_V.Value; let Uses = [VTYPE, VL]; + let RVVConstraint = VMConstraint; } class RVInstVX<bits<6> funct6, RISCVVFormat opv, dag outs, dag ins, @@ -122,6 +153,7 @@ class RVInstVX<bits<6> funct6, RISCVVFormat opv, dag outs, dag ins, let Opcode = OPC_OP_V.Value; let Uses = [VTYPE, VL]; + let RVVConstraint = VMConstraint; } class RVInstV2<bits<6> funct6, bits<5> vs2, RISCVVFormat opv, dag outs, dag ins, @@ -140,6 +172,7 @@ class RVInstV2<bits<6> funct6, bits<5> vs2, RISCVVFormat opv, dag outs, dag ins, let Opcode = OPC_OP_V.Value; let Uses = [VTYPE, VL]; + let RVVConstraint = VMConstraint; } class RVInstIVI<bits<6> funct6, dag outs, dag ins, string opcodestr, @@ -159,6 +192,7 @@ class RVInstIVI<bits<6> funct6, dag outs, dag ins, string opcodestr, let Opcode = OPC_OP_V.Value; let Uses = [VTYPE, VL]; + let RVVConstraint = VMConstraint; } class RVInstV<bits<6> funct6, bits<5> vs1, RISCVVFormat opv, dag outs, @@ -177,10 +211,11 @@ class RVInstV<bits<6> funct6, bits<5> vs1, RISCVVFormat opv, dag outs, let Opcode = OPC_OP_V.Value; let Uses = [VTYPE, VL]; + let RVVConstraint = VMConstraint; } -class RVInstVLU<bits<3> nf, RISCVMOP mop, RISCVLSUMOP lumop, - RISCVWidth width, dag outs, dag ins, string opcodestr, +class RVInstVLU<bits<3> nf, bit mew, RISCVLSUMOP lumop, + bits<3> width, dag outs, dag ins, string opcodestr, string argstr> : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> { bits<5> rs1; @@ -188,18 +223,20 @@ class RVInstVLU<bits<3> nf, RISCVMOP mop, RISCVLSUMOP lumop, bit vm; let Inst{31-29} = nf; - let Inst{28-26} = mop.Value; + let Inst{28} = mew; + let Inst{27-26} = MOPLDUnitStride.Value; let Inst{25} = vm; let Inst{24-20} = lumop.Value; let Inst{19-15} = rs1; - let Inst{14-12} = width.Value; + let Inst{14-12} = width; let Inst{11-7} = vd; let Opcode = OPC_LOAD_FP.Value; let Uses = [VTYPE, VL]; + let RVVConstraint = VMConstraint; } -class RVInstVLS<bits<3> nf, RISCVMOP mop, RISCVWidth width, +class RVInstVLS<bits<3> nf, bit mew, bits<3> width, dag outs, dag ins, string opcodestr, string argstr> : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> { bits<5> rs2; @@ -208,18 +245,20 @@ class RVInstVLS<bits<3> nf, RISCVMOP mop, RISCVWidth width, bit vm; let Inst{31-29} = nf; - let Inst{28-26} = mop.Value; + let Inst{28} = mew; + let Inst{27-26} = MOPLDStrided.Value; let Inst{25} = vm; let Inst{24-20} = rs2; let Inst{19-15} = rs1; - let Inst{14-12} = width.Value; + let Inst{14-12} = width; let Inst{11-7} = vd; let Opcode = OPC_LOAD_FP.Value; let Uses = [VTYPE, VL]; + let RVVConstraint = VMConstraint; } -class RVInstVLX<bits<3> nf, RISCVMOP mop, RISCVWidth width, +class RVInstVLX<bits<3> nf, bit mew, RISCVMOP mop, bits<3> width, dag outs, dag ins, string opcodestr, string argstr> : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> { bits<5> vs2; @@ -228,19 +267,21 @@ class RVInstVLX<bits<3> nf, RISCVMOP mop, RISCVWidth width, bit vm; let Inst{31-29} = nf; - let Inst{28-26} = mop.Value; + let Inst{28} = mew; + let Inst{27-26} = mop.Value; let Inst{25} = vm; let Inst{24-20} = vs2; let Inst{19-15} = rs1; - let Inst{14-12} = width.Value; + let Inst{14-12} = width; let Inst{11-7} = vd; let Opcode = OPC_LOAD_FP.Value; let Uses = [VTYPE, VL]; + let RVVConstraint = VMConstraint; } -class RVInstVSU<bits<3> nf, RISCVMOP mop, RISCVLSUMOP sumop, - RISCVWidth width, dag outs, dag ins, string opcodestr, +class RVInstVSU<bits<3> nf, bit mew, RISCVLSUMOP sumop, + bits<3> width, dag outs, dag ins, string opcodestr, string argstr> : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> { bits<5> rs1; @@ -248,18 +289,19 @@ class RVInstVSU<bits<3> nf, RISCVMOP mop, RISCVLSUMOP sumop, bit vm; let Inst{31-29} = nf; - let Inst{28-26} = mop.Value; + let Inst{28} = mew; + let Inst{27-26} = MOPSTUnitStride.Value; let Inst{25} = vm; let Inst{24-20} = sumop.Value; let Inst{19-15} = rs1; - let Inst{14-12} = width.Value; + let Inst{14-12} = width; let Inst{11-7} = vs3; let Opcode = OPC_STORE_FP.Value; let Uses = [VTYPE, VL]; } -class RVInstVSS<bits<3> nf, RISCVMOP mop, RISCVWidth width, +class RVInstVSS<bits<3> nf, bit mew, bits<3> width, dag outs, dag ins, string opcodestr, string argstr> : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> { bits<5> rs2; @@ -268,18 +310,19 @@ class RVInstVSS<bits<3> nf, RISCVMOP mop, RISCVWidth width, bit vm; let Inst{31-29} = nf; - let Inst{28-26} = mop.Value; + let Inst{28} = mew; + let Inst{27-26} = MOPSTStrided.Value; let Inst{25} = vm; let Inst{24-20} = rs2; let Inst{19-15} = rs1; - let Inst{14-12} = width.Value; + let Inst{14-12} = width; let Inst{11-7} = vs3; let Opcode = OPC_STORE_FP.Value; let Uses = [VTYPE, VL]; } -class RVInstVSX<bits<3> nf, RISCVMOP mop, RISCVWidth width, +class RVInstVSX<bits<3> nf, bit mew, RISCVMOP mop, bits<3> width, dag outs, dag ins, string opcodestr, string argstr> : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> { bits<5> vs2; @@ -288,13 +331,33 @@ class RVInstVSX<bits<3> nf, RISCVMOP mop, RISCVWidth width, bit vm; let Inst{31-29} = nf; - let Inst{28-26} = mop.Value; + let Inst{28} = mew; + let Inst{27-26} = mop.Value; let Inst{25} = vm; let Inst{24-20} = vs2; let Inst{19-15} = rs1; - let Inst{14-12} = width.Value; + let Inst{14-12} = width; let Inst{11-7} = vs3; let Opcode = OPC_STORE_FP.Value; let Uses = [VTYPE, VL]; } + +class RVInstVAMO<RISCVAMOOP amoop, bits<3> width, dag outs, + dag ins, string opcodestr, string argstr> + : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> { + bits<5> vs2; + bits<5> rs1; + bit wd; + bit vm; + + let Inst{31-27} = amoop.Value; + let Inst{26} = wd; + let Inst{25} = vm; + let Inst{24-20} = vs2; + let Inst{19-15} = rs1; + let Inst{14-12} = width; + let Opcode = OPC_AMO.Value; + + let Uses = [VTYPE, VL]; +} diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 7b6ea002c7b7..45a5e10e26a3 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -11,10 +11,10 @@ //===----------------------------------------------------------------------===// #include "RISCVInstrInfo.h" +#include "MCTargetDesc/RISCVMatInt.h" #include "RISCV.h" #include "RISCVSubtarget.h" #include "RISCVTargetMachine.h" -#include "Utils/RISCVMatInt.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -45,6 +45,7 @@ unsigned RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, case RISCV::LBU: case RISCV::LH: case RISCV::LHU: + case RISCV::FLH: case RISCV::LW: case RISCV::FLW: case RISCV::LWU: @@ -70,6 +71,7 @@ unsigned RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI, case RISCV::SB: case RISCV::SH: case RISCV::SW: + case RISCV::FSH: case RISCV::FSW: case RISCV::SD: case RISCV::FSD: @@ -96,18 +98,37 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } - // FPR->FPR copies + // FPR->FPR copies and VR->VR copies. unsigned Opc; - if (RISCV::FPR32RegClass.contains(DstReg, SrcReg)) + bool IsScalableVector = false; + if (RISCV::FPR16RegClass.contains(DstReg, SrcReg)) + Opc = RISCV::FSGNJ_H; + else if (RISCV::FPR32RegClass.contains(DstReg, SrcReg)) Opc = RISCV::FSGNJ_S; else if (RISCV::FPR64RegClass.contains(DstReg, SrcReg)) Opc = RISCV::FSGNJ_D; - else + else if (RISCV::VRRegClass.contains(DstReg, SrcReg)) { + Opc = RISCV::PseudoVMV1R_V; + IsScalableVector = true; + } else if (RISCV::VRM2RegClass.contains(DstReg, SrcReg)) { + Opc = RISCV::PseudoVMV2R_V; + IsScalableVector = true; + } else if (RISCV::VRM4RegClass.contains(DstReg, SrcReg)) { + Opc = RISCV::PseudoVMV4R_V; + IsScalableVector = true; + } else if (RISCV::VRM8RegClass.contains(DstReg, SrcReg)) { + Opc = RISCV::PseudoVMV8R_V; + IsScalableVector = true; + } else llvm_unreachable("Impossible reg-to-reg copy"); - BuildMI(MBB, MBBI, DL, get(Opc), DstReg) - .addReg(SrcReg, getKillRegState(KillSrc)) - .addReg(SrcReg, getKillRegState(KillSrc)); + if (IsScalableVector) + BuildMI(MBB, MBBI, DL, get(Opc), DstReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else + BuildMI(MBB, MBBI, DL, get(Opc), DstReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addReg(SrcReg, getKillRegState(KillSrc)); } void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, @@ -119,11 +140,18 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, if (I != MBB.end()) DL = I->getDebugLoc(); - unsigned Opcode; + MachineFunction *MF = MBB.getParent(); + const MachineFrameInfo &MFI = MF->getFrameInfo(); + MachineMemOperand *MMO = MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, + MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); + unsigned Opcode; if (RISCV::GPRRegClass.hasSubClassEq(RC)) Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? RISCV::SW : RISCV::SD; + else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) + Opcode = RISCV::FSH; else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) Opcode = RISCV::FSW; else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) @@ -134,7 +162,8 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, BuildMI(MBB, I, DL, get(Opcode)) .addReg(SrcReg, getKillRegState(IsKill)) .addFrameIndex(FI) - .addImm(0); + .addImm(0) + .addMemOperand(MMO); } void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, @@ -146,11 +175,18 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, if (I != MBB.end()) DL = I->getDebugLoc(); - unsigned Opcode; + MachineFunction *MF = MBB.getParent(); + const MachineFrameInfo &MFI = MF->getFrameInfo(); + MachineMemOperand *MMO = MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, + MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); + unsigned Opcode; if (RISCV::GPRRegClass.hasSubClassEq(RC)) Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? RISCV::LW : RISCV::LD; + else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) + Opcode = RISCV::FLH; else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) Opcode = RISCV::FLW; else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) @@ -158,7 +194,10 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, else llvm_unreachable("Can't load this register from stack slot"); - BuildMI(MBB, I, DL, get(Opcode), DstReg).addFrameIndex(FI).addImm(0); + BuildMI(MBB, I, DL, get(Opcode), DstReg) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO); } void RISCVInstrInfo::movImm(MachineBasicBlock &MBB, @@ -512,17 +551,48 @@ unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { const unsigned Opcode = MI.getOpcode(); - switch(Opcode) { - default: - break; - case RISCV::ADDI: - case RISCV::ORI: - case RISCV::XORI: - return (MI.getOperand(1).isReg() && MI.getOperand(1).getReg() == RISCV::X0); + switch (Opcode) { + default: + break; + case RISCV::FSGNJ_D: + case RISCV::FSGNJ_S: + // The canonical floating-point move is fsgnj rd, rs, rs. + return MI.getOperand(1).isReg() && MI.getOperand(2).isReg() && + MI.getOperand(1).getReg() == MI.getOperand(2).getReg(); + case RISCV::ADDI: + case RISCV::ORI: + case RISCV::XORI: + return (MI.getOperand(1).isReg() && + MI.getOperand(1).getReg() == RISCV::X0) || + (MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0); } return MI.isAsCheapAsAMove(); } +Optional<DestSourcePair> +RISCVInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { + if (MI.isMoveReg()) + return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; + switch (MI.getOpcode()) { + default: + break; + case RISCV::ADDI: + // Operand 1 can be a frameindex but callers expect registers + if (MI.getOperand(1).isReg() && MI.getOperand(2).isImm() && + MI.getOperand(2).getImm() == 0) + return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; + break; + case RISCV::FSGNJ_D: + case RISCV::FSGNJ_S: + // The canonical floating-point move is fsgnj rd, rs, rs. + if (MI.getOperand(1).isReg() && MI.getOperand(2).isReg() && + MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) + return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; + break; + } + return None; +} + bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const { const MCInstrInfo *MCII = STI.getInstrInfo(); @@ -551,15 +621,9 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI, case RISCVOp::OPERAND_SIMM12: Ok = isInt<12>(Imm); break; - case RISCVOp::OPERAND_SIMM13_LSB0: - Ok = isShiftedInt<12, 1>(Imm); - break; case RISCVOp::OPERAND_UIMM20: Ok = isUInt<20>(Imm); break; - case RISCVOp::OPERAND_SIMM21_LSB0: - Ok = isShiftedInt<20, 1>(Imm); - break; case RISCVOp::OPERAND_UIMMLOG2XLEN: if (STI.getTargetTriple().isArch64Bit()) Ok = isUInt<6>(Imm); @@ -699,10 +763,7 @@ outliner::OutlinedFunction RISCVInstrInfo::getOutliningCandidateInfo( return !LRU.available(RISCV::X5); }; - RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(), - RepeatedSequenceLocs.end(), - CannotInsertCall), - RepeatedSequenceLocs.end()); + llvm::erase_if(RepeatedSequenceLocs, CannotInsertCall); // If the sequence doesn't have enough candidates left, then we're done. if (RepeatedSequenceLocs.size() < 2) diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h index 21bc508cdc9c..0b034210aa55 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -83,6 +83,9 @@ public: bool isAsCheapAsAMove(const MachineInstr &MI) const override; + Optional<DestSourcePair> + isCopyInstrImpl(const MachineInstr &MI) const override; + bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override; @@ -134,23 +137,5 @@ protected: const RISCVSubtarget &STI; }; -namespace RISCV { -// Match with the definitions in RISCVInstrFormatsV.td -enum RVVConstraintType { - NoConstraint = 0, - WidenV = 1, - WidenW = 2, - WidenCvt = 3, - Narrow = 4, - Iota = 5, - SlideUp = 6, - Vrgather = 7, - Vcompress = 8, - - ConstraintOffset = 5, - ConstraintMask = 0b1111 -}; -} // end namespace RISCV - } // end namespace llvm #endif diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 8547f791092b..a07b589e77fb 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -25,6 +25,8 @@ def SDT_RISCVCall : SDTypeProfile<0, -1, [SDTCisVT<0, XLenVT>]>; def SDT_RISCVSelectCC : SDTypeProfile<1, 5, [SDTCisSameAs<1, 2>, SDTCisSameAs<0, 4>, SDTCisSameAs<4, 5>]>; +def SDT_RISCVReadCycleWide : SDTypeProfile<2, 0, [SDTCisVT<0, i32>, + SDTCisVT<1, i32>]>; // Target-independent nodes, but with target-specific formats. def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_CallSeqStart, @@ -44,8 +46,7 @@ def riscv_sret_flag : SDNode<"RISCVISD::SRET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInGlue]>; def riscv_mret_flag : SDNode<"RISCVISD::MRET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInGlue]>; -def riscv_selectcc : SDNode<"RISCVISD::SELECT_CC", SDT_RISCVSelectCC, - [SDNPInGlue]>; +def riscv_selectcc : SDNode<"RISCVISD::SELECT_CC", SDT_RISCVSelectCC>; def riscv_tail : SDNode<"RISCVISD::TAIL", SDT_RISCVCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; @@ -53,6 +54,10 @@ def riscv_sllw : SDNode<"RISCVISD::SLLW", SDTIntShiftOp>; def riscv_sraw : SDNode<"RISCVISD::SRAW", SDTIntShiftOp>; def riscv_srlw : SDNode<"RISCVISD::SRLW", SDTIntShiftOp>; +def riscv_read_cycle_wide : SDNode<"RISCVISD::READ_CYCLE_WIDE", + SDT_RISCVReadCycleWide, + [SDNPHasChain, SDNPSideEffect]>; + //===----------------------------------------------------------------------===// // Operand and SDNode transformation definitions. //===----------------------------------------------------------------------===// @@ -161,6 +166,7 @@ def simm12_plus1 : Operand<XLenVT>, ImmLeaf<XLenVT, // A 13-bit signed immediate where the least significant bit is zero. def simm13_lsb0 : Operand<OtherVT> { let ParserMatchClass = SImmAsmOperand<13, "Lsb0">; + let PrintMethod = "printBranchOperand"; let EncoderMethod = "getImmOpValueAsr1"; let DecoderMethod = "decodeSImmOperandAndLsl1<13>"; let MCOperandPredicate = [{ @@ -169,8 +175,7 @@ def simm13_lsb0 : Operand<OtherVT> { return isShiftedInt<12, 1>(Imm); return MCOp.isBareSymbolRef(); }]; - let OperandType = "OPERAND_SIMM13_LSB0"; - let OperandNamespace = "RISCVOp"; + let OperandType = "OPERAND_PCREL"; } class UImm20Operand : Operand<XLenVT> { @@ -200,6 +205,7 @@ def Simm21Lsb0JALAsmOperand : SImmAsmOperand<21, "Lsb0JAL"> { // A 21-bit signed immediate where the least significant bit is zero. def simm21_lsb0_jal : Operand<OtherVT> { let ParserMatchClass = Simm21Lsb0JALAsmOperand; + let PrintMethod = "printBranchOperand"; let EncoderMethod = "getImmOpValueAsr1"; let DecoderMethod = "decodeSImmOperandAndLsl1<21>"; let MCOperandPredicate = [{ @@ -208,8 +214,7 @@ def simm21_lsb0_jal : Operand<OtherVT> { return isShiftedInt<20, 1>(Imm); return MCOp.isBareSymbolRef(); }]; - let OperandType = "OPERAND_SIMM21_LSB0"; - let OperandNamespace = "RISCVOp"; + let OperandType = "OPERAND_PCREL"; } def BareSymbol : AsmOperandClass { @@ -291,6 +296,11 @@ def immbottomxlenset : ImmLeaf<XLenVT, [{ return countTrailingOnes<uint64_t>(Imm) >= 5; }]>; +// A 6-bit constant greater than 32. +def uimm6gt32 : ImmLeaf<XLenVT, [{ + return isUInt<6>(Imm) && Imm > 32; +}]>; + // Addressing modes. // Necessary because a frameindex can't be matched directly in a pattern. def AddrFI : ComplexPattern<iPTR, 1, "SelectAddrFI", [frameindex], []>; @@ -316,6 +326,25 @@ def NegImm : SDNodeXForm<imm, [{ N->getValueType(0)); }]>; +// Return an immediate value minus 32. +def ImmSub32 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(N->getSExtValue() - 32, SDLoc(N), + N->getValueType(0)); +}]>; + +// Return an immediate subtracted from XLen. +def ImmSubFromXLen : SDNodeXForm<imm, [{ + uint64_t XLen = Subtarget->getXLen(); + return CurDAG->getTargetConstant(XLen - N->getZExtValue(), SDLoc(N), + N->getValueType(0)); +}]>; + +// Return an immediate subtracted from 32. +def ImmSubFrom32 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(32 - N->getZExtValue(), SDLoc(N), + N->getValueType(0)); +}]>; + //===----------------------------------------------------------------------===// // Instruction Formats //===----------------------------------------------------------------------===// @@ -368,12 +397,14 @@ class ALU_rr<bits<7> funct7, bits<3> funct3, string opcodestr> : RVInstR<funct7, funct3, OPC_OP, (outs GPR:$rd), (ins GPR:$rs1, GPR:$rs2), opcodestr, "$rd, $rs1, $rs2">; -let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in +let hasNoSchedulingInfo = 1, + hasSideEffects = 1, mayLoad = 0, mayStore = 0 in class CSR_ir<bits<3> funct3, string opcodestr> : RVInstI<funct3, OPC_SYSTEM, (outs GPR:$rd), (ins csr_sysreg:$imm12, GPR:$rs1), opcodestr, "$rd, $imm12, $rs1">, Sched<[WriteCSR, ReadCSR]>; -let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in +let hasNoSchedulingInfo = 1, + hasSideEffects = 1, mayLoad = 0, mayStore = 0 in class CSR_ii<bits<3> funct3, string opcodestr> : RVInstI<funct3, OPC_SYSTEM, (outs GPR:$rd), (ins csr_sysreg:$imm12, uimm5:$rs1), @@ -791,6 +822,11 @@ def : MnemonicAlias<"move", "mv">; def : MnemonicAlias<"scall", "ecall">; def : MnemonicAlias<"sbreak", "ebreak">; +// This alias was added to the spec in December 2020. Don't print it by default +// to allow assembly we print to be compatible with versions of GNU assembler +// that don't support this alias. +def : InstAlias<"zext.b $rd, $rs", (ANDI GPR:$rd, GPR:$rs, 0xFF), 0>; + //===----------------------------------------------------------------------===// // Pseudo-instructions and codegen patterns // @@ -815,18 +851,30 @@ def IsOrAdd: PatFrag<(ops node:$A, node:$B), (or node:$A, node:$B), [{ return isOrEquivalentToAdd(N); }]>; def assertsexti32 : PatFrag<(ops node:$src), (assertsext node:$src), [{ - return cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32; + return cast<VTSDNode>(N->getOperand(1))->getVT().bitsLE(MVT::i32); }]>; def sexti32 : PatFrags<(ops node:$src), [(sext_inreg node:$src, i32), (assertsexti32 node:$src)]>; def assertzexti32 : PatFrag<(ops node:$src), (assertzext node:$src), [{ - return cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32; + return cast<VTSDNode>(N->getOperand(1))->getVT().bitsLE(MVT::i32); }]>; def zexti32 : PatFrags<(ops node:$src), [(and node:$src, 0xffffffff), (assertzexti32 node:$src)]>; +def SRLIWPat : PatFrag<(ops node:$A, node:$B), + (srl (and node:$A, imm), node:$B), [{ + return MatchSRLIW(N); +}]>; + +// Check that it is a SLLIUW (Shift Logical Left Immediate Unsigned i32 +// on RV64). Also used to optimize the same sequence without SLLIUW. +def SLLIUWPat : PatFrag<(ops node:$A, node:$B), + (and (shl node:$A, node:$B), imm), [{ + return MatchSLLIUW(N); +}]>; + /// Immediates def : Pat<(simm12:$imm), (ADDI X0, simm12:$imm)>; @@ -857,6 +905,10 @@ class shiftop<SDPatternOperator operator> : PatFrags<(ops node:$val, node:$count), [(operator node:$val, node:$count), (operator node:$val, (and node:$count, immbottomxlenset))]>; +class shiftopw<SDPatternOperator operator> + : PatFrags<(ops node:$val, node:$count), + [(operator node:$val, node:$count), + (operator node:$val, (and node:$count, (XLenVT 31)))]>; def : PatGprGpr<shiftop<shl>, SLL>; def : PatGprGpr<shiftop<srl>, SRL>; @@ -873,10 +925,10 @@ def PseudoAddTPRel : Pseudo<(outs GPR:$rd), /// FrameIndex calculations -def : Pat<(add (i32 AddrFI:$Rs), simm12:$imm12), - (ADDI (i32 AddrFI:$Rs), simm12:$imm12)>; -def : Pat<(IsOrAdd (i32 AddrFI:$Rs), simm12:$imm12), - (ADDI (i32 AddrFI:$Rs), simm12:$imm12)>; +def : Pat<(add (XLenVT AddrFI:$Rs), simm12:$imm12), + (ADDI (XLenVT AddrFI:$Rs), simm12:$imm12)>; +def : Pat<(IsOrAdd (XLenVT AddrFI:$Rs), simm12:$imm12), + (ADDI (XLenVT AddrFI:$Rs), simm12:$imm12)>; /// Setcc @@ -938,15 +990,18 @@ def : BccSwapPat<setle, BGE>; def : BccSwapPat<setugt, BLTU>; def : BccSwapPat<setule, BGEU>; -// An extra pattern is needed for a brcond without a setcc (i.e. where the +// Extra patterns are needed for a brcond without a setcc (i.e. where the // condition was calculated elsewhere). def : Pat<(brcond GPR:$cond, bb:$imm12), (BNE GPR:$cond, X0, bb:$imm12)>; +// In this pattern, the `(xor $cond, 1)` functions like (boolean) `not`, as the +// `brcond` only uses the lowest bit. +def : Pat<(brcond (XLenVT (xor GPR:$cond, 1)), bb:$imm12), + (BEQ GPR:$cond, X0, bb:$imm12)>; let isBarrier = 1, isBranch = 1, isTerminator = 1 in def PseudoBR : Pseudo<(outs), (ins simm21_lsb0_jal:$imm20), [(br bb:$imm20)]>, PseudoInstExpansion<(JAL X0, simm21_lsb0_jal:$imm20)>; -let isCall = 1, Defs=[X1] in let isBarrier = 1, isBranch = 1, isIndirectBranch = 1, isTerminator = 1 in def PseudoBRIND : Pseudo<(outs), (ins GPR:$rs1, simm12:$imm12), []>, PseudoInstExpansion<(JALR X0, GPR:$rs1, simm12:$imm12)>; @@ -1038,6 +1093,25 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 0, def PseudoLA_TLS_GD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], "la.tls.gd", "$dst, $src">; + +/// Sign/Zero Extends + +// There are single-instruction versions of these in Zbb, so disable these +// Pseudos if that extension is present. +let hasSideEffects = 0, mayLoad = 0, + mayStore = 0, isCodeGenOnly = 0, isAsmParserOnly = 1 in { +def PseudoSEXT_B : Pseudo<(outs GPR:$rd), (ins GPR:$rs), [], "sext.b", "$rd, $rs">; +def PseudoSEXT_H : Pseudo<(outs GPR:$rd), (ins GPR:$rs), [], "sext.h", "$rd, $rs">; +// rv64's sext.w is defined above, using InstAlias<"sext.w ... +// zext.b is defined above, using InstAlias<"zext.b ... +def PseudoZEXT_H : Pseudo<(outs GPR:$rd), (ins GPR:$rs), [], "zext.h", "$rd, $rs">; +} // hasSideEffects = 0, ... + +let Predicates = [IsRV64], hasSideEffects = 0, mayLoad = 0, mayStore = 0, + isCodeGenOnly = 0, isAsmParserOnly = 1 in { +def PseudoZEXT_W : Pseudo<(outs GPR:$rd), (ins GPR:$rs), [], "zext.w", "$rd, $rs">; +} // Predicates = [IsRV64], ... + /// Loads multiclass LdPat<PatFrag LoadOp, RVInst Inst> { @@ -1108,12 +1182,23 @@ def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), /// RV64 patterns +let Predicates = [IsRV64, NotHasStdExtZba] in { +def : Pat<(and GPR:$rs1, 0xffffffff), (SRLI (SLLI GPR:$rs1, 32), 32)>; + +// If we're shifting a 32-bit zero extended value left by 0-31 bits, use 2 +// shifts instead of 3. This can occur when unsigned is used to index an array. +def : Pat<(shl (and GPR:$rs1, 0xffffffff), uimm5:$shamt), + (SRLI (SLLI GPR:$rs1, 32), (ImmSubFrom32 uimm5:$shamt))>; +// shl/and can appear in the other order too. +def : Pat<(SLLIUWPat GPR:$rs1, uimm5:$shamt), + (SRLI (SLLI GPR:$rs1, 32), (ImmSubFrom32 uimm5:$shamt))>; +} + let Predicates = [IsRV64] in { /// sext and zext def : Pat<(sext_inreg GPR:$rs1, i32), (ADDIW GPR:$rs1, 0)>; -def : Pat<(and GPR:$rs1, 0xffffffff), (SRLI (SLLI GPR:$rs1, 32), 32)>; /// ALU operations @@ -1125,14 +1210,18 @@ def : Pat<(sext_inreg (sub GPR:$rs1, GPR:$rs2), i32), (SUBW GPR:$rs1, GPR:$rs2)>; def : Pat<(sext_inreg (shl GPR:$rs1, uimm5:$shamt), i32), (SLLIW GPR:$rs1, uimm5:$shamt)>; -// (srl (zexti32 ...), uimm5:$shamt) is matched with custom code due to the -// need to undo manipulation of the mask value performed by DAGCombine. +def : Pat<(SRLIWPat GPR:$rs1, uimm5:$shamt), + (SRLIW GPR:$rs1, uimm5:$shamt)>; +def : Pat<(srl (shl GPR:$rs1, (i64 32)), uimm6gt32:$shamt), + (SRLIW GPR:$rs1, (ImmSub32 uimm6gt32:$shamt))>; def : Pat<(sra (sext_inreg GPR:$rs1, i32), uimm5:$shamt), (SRAIW GPR:$rs1, uimm5:$shamt)>; +def : Pat<(sra (shl GPR:$rs1, (i64 32)), uimm6gt32:$shamt), + (SRAIW GPR:$rs1, (ImmSub32 uimm6gt32:$shamt))>; -def : PatGprGpr<riscv_sllw, SLLW>; -def : PatGprGpr<riscv_srlw, SRLW>; -def : PatGprGpr<riscv_sraw, SRAW>; +def : PatGprGpr<shiftopw<riscv_sllw>, SLLW>; +def : PatGprGpr<shiftopw<riscv_srlw>, SRLW>; +def : PatGprGpr<shiftopw<riscv_sraw>, SRAW>; /// Loads @@ -1153,9 +1242,10 @@ let Predicates = [IsRV64] in def : Pat<(readcyclecounter), (CSRRS CYCLE.Encoding, X0)>; // On RV32, ReadCycleWide will be expanded to the suggested loop reading both // halves of the 64-bit "cycle" CSR. -let Predicates = [IsRV32], usesCustomInserter = 1, hasSideEffects = 0, -mayLoad = 0, mayStore = 0, hasNoSchedulingInfo = 1 in -def ReadCycleWide : Pseudo<(outs GPR:$lo, GPR:$hi), (ins), [], "", "">; +let Predicates = [IsRV32], usesCustomInserter = 1, hasNoSchedulingInfo = 1 in +def ReadCycleWide : Pseudo<(outs GPR:$lo, GPR:$hi), (ins), + [(set GPR:$lo, GPR:$hi, (riscv_read_cycle_wide))], + "", "">; /// traps @@ -1178,3 +1268,4 @@ include "RISCVInstrInfoD.td" include "RISCVInstrInfoC.td" include "RISCVInstrInfoB.td" include "RISCVInstrInfoV.td" +include "RISCVInstrInfoZfh.td" diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoB.td index afac509f743d..7888ac7bac8e 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoB.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoB.td @@ -7,16 +7,21 @@ //===----------------------------------------------------------------------===// // // This file describes the RISC-V instructions from the standard 'B' Bitmanip -// extension, version 0.92. +// extension, version 0.93. // This version is still experimental as the 'B' extension hasn't been // ratified yet. // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// Operand definitions. +// Operand and SDNode transformation definitions. //===----------------------------------------------------------------------===// +def riscv_rolw : SDNode<"RISCVISD::ROLW", SDTIntShiftOp>; +def riscv_rorw : SDNode<"RISCVISD::RORW", SDTIntShiftOp>; +def riscv_fslw : SDNode<"RISCVISD::FSLW", SDTIntShiftDOp>; +def riscv_fsrw : SDNode<"RISCVISD::FSRW", SDTIntShiftDOp>; + def UImmLog2XLenHalfAsmOperand : AsmOperandClass { let Name = "UImmLog2XLenHalf"; let RenderMethod = "addImmOperands"; @@ -40,6 +45,44 @@ def shfl_uimm : Operand<XLenVT>, ImmLeaf<XLenVT, [{ }]; } +// Checks if this mask has a single 0 bit and cannot be used with ANDI. +def BCLRMask : ImmLeaf<XLenVT, [{ + if (Subtarget->is64Bit()) + return !isInt<12>(Imm) && isPowerOf2_64(~Imm); + return !isInt<12>(Imm) && isPowerOf2_32(~Imm); +}]>; + +// Checks if this mask has a single 1 bit and cannot be used with ORI/XORI. +def BSETINVMask : ImmLeaf<XLenVT, [{ + if (Subtarget->is64Bit()) + return !isInt<12>(Imm) && isPowerOf2_64(Imm); + return !isInt<12>(Imm) && isPowerOf2_32(Imm); +}]>; + +def BCLRXForm : SDNodeXForm<imm, [{ + // Find the lowest 0. + return CurDAG->getTargetConstant(N->getAPIntValue().countTrailingOnes(), + SDLoc(N), N->getValueType(0)); +}]>; + +def BSETINVXForm : SDNodeXForm<imm, [{ + // Find the lowest 1. + return CurDAG->getTargetConstant(N->getAPIntValue().countTrailingZeros(), + SDLoc(N), N->getValueType(0)); +}]>; + +// Similar to above, but makes sure the immediate has 33 sign bits. When used +// with an AND/OR/XOR where the other operand has at least 33 sign bits, the +// result will have 33 sign bits. This can match BCLRIW/BSETIW/BINVIW. +def BCLRWMask : ImmLeaf<i64, [{ + // After checking the sign bits, truncate to 32 bits for power of 2 check. + return isInt<32>(Imm) && !isInt<12>(Imm) && isPowerOf2_32(~Imm); +}]>; + +def BSETINVWMask : ImmLeaf<i64, [{ + return isInt<32>(Imm) && !isInt<12>(Imm) && isPowerOf2_32(Imm); +}]>; + //===----------------------------------------------------------------------===// // Instruction class templates //===----------------------------------------------------------------------===// @@ -56,11 +99,6 @@ class RVBUnary<bits<7> funct7, bits<5> funct5, bits<3> funct3, } let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -class RVBALUW_ri<bits<3> funct3, string opcodestr> - : RVInstI<funct3, OPC_OP_IMM_32, (outs GPR:$rd), - (ins GPR:$rs1, simm12:$imm12), opcodestr, "$rd, $rs1, $imm12">; - -let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class RVBShift_ri<bits<5> funct5, bits<3> funct3, RISCVOpcode opcode, string opcodestr> : RVInstI<funct3, opcode, (outs GPR:$rd), @@ -147,10 +185,11 @@ def ORN : ALU_rr<0b0100000, 0b110, "orn">, Sched<[]>; def XNOR : ALU_rr<0b0100000, 0b100, "xnor">, Sched<[]>; } // Predicates = [HasStdExtZbbOrZbp] -let Predicates = [HasStdExtZbb] in { -def SLO : ALU_rr<0b0010000, 0b001, "slo">, Sched<[]>; -def SRO : ALU_rr<0b0010000, 0b101, "sro">, Sched<[]>; -} // Predicates = [HasStdExtZbb] +let Predicates = [HasStdExtZba] in { +def SH1ADD : ALU_rr<0b0010000, 0b010, "sh1add">, Sched<[]>; +def SH2ADD : ALU_rr<0b0010000, 0b100, "sh2add">, Sched<[]>; +def SH3ADD : ALU_rr<0b0010000, 0b110, "sh3add">, Sched<[]>; +} // Predicates = [HasStdExtZba] let Predicates = [HasStdExtZbbOrZbp] in { def ROL : ALU_rr<0b0110000, 0b001, "rol">, Sched<[]>; @@ -158,10 +197,10 @@ def ROR : ALU_rr<0b0110000, 0b101, "ror">, Sched<[]>; } // Predicates = [HasStdExtZbbOrZbp] let Predicates = [HasStdExtZbs] in { -def SBCLR : ALU_rr<0b0100100, 0b001, "sbclr">, Sched<[]>; -def SBSET : ALU_rr<0b0010100, 0b001, "sbset">, Sched<[]>; -def SBINV : ALU_rr<0b0110100, 0b001, "sbinv">, Sched<[]>; -def SBEXT : ALU_rr<0b0100100, 0b101, "sbext">, Sched<[]>; +def BCLR : ALU_rr<0b0100100, 0b001, "bclr">, Sched<[]>; +def BSET : ALU_rr<0b0010100, 0b001, "bset">, Sched<[]>; +def BINV : ALU_rr<0b0110100, 0b001, "binv">, Sched<[]>; +def BEXT : ALU_rr<0b0100100, 0b101, "bext">, Sched<[]>; } // Predicates = [HasStdExtZbs] let Predicates = [HasStdExtZbp] in { @@ -169,19 +208,20 @@ def GORC : ALU_rr<0b0010100, 0b101, "gorc">, Sched<[]>; def GREV : ALU_rr<0b0110100, 0b101, "grev">, Sched<[]>; } // Predicates = [HasStdExtZbp] -let Predicates = [HasStdExtZbb] in { -def SLOI : RVBShift_ri<0b00100, 0b001, OPC_OP_IMM, "sloi">, Sched<[]>; -def SROI : RVBShift_ri<0b00100, 0b101, OPC_OP_IMM, "sroi">, Sched<[]>; -} // Predicates = [HasStdExtZbb] +let Predicates = [HasStdExtZbp] in { +def XPERMN : ALU_rr<0b0010100, 0b010, "xperm.n">, Sched<[]>; +def XPERMB : ALU_rr<0b0010100, 0b100, "xperm.b">, Sched<[]>; +def XPERMH : ALU_rr<0b0010100, 0b110, "xperm.h">, Sched<[]>; +} // Predicates = [HasStdExtZbp] let Predicates = [HasStdExtZbbOrZbp] in def RORI : RVBShift_ri<0b01100, 0b101, OPC_OP_IMM, "rori">, Sched<[]>; let Predicates = [HasStdExtZbs] in { -def SBCLRI : RVBShift_ri<0b01001, 0b001, OPC_OP_IMM, "sbclri">, Sched<[]>; -def SBSETI : RVBShift_ri<0b00101, 0b001, OPC_OP_IMM, "sbseti">, Sched<[]>; -def SBINVI : RVBShift_ri<0b01101, 0b001, OPC_OP_IMM, "sbinvi">, Sched<[]>; -def SBEXTI : RVBShift_ri<0b01001, 0b101, OPC_OP_IMM, "sbexti">, Sched<[]>; +def BCLRI : RVBShift_ri<0b01001, 0b001, OPC_OP_IMM, "bclri">, Sched<[]>; +def BSETI : RVBShift_ri<0b00101, 0b001, OPC_OP_IMM, "bseti">, Sched<[]>; +def BINVI : RVBShift_ri<0b01101, 0b001, OPC_OP_IMM, "binvi">, Sched<[]>; +def BEXTI : RVBShift_ri<0b01001, 0b101, OPC_OP_IMM, "bexti">, Sched<[]>; } // Predicates = [HasStdExtZbs] let Predicates = [HasStdExtZbp] in { @@ -207,7 +247,7 @@ def CLZ : RVBUnary<0b0110000, 0b00000, 0b001, RISCVOpcode<0b0010011>, "clz">, Sched<[]>; def CTZ : RVBUnary<0b0110000, 0b00001, 0b001, RISCVOpcode<0b0010011>, "ctz">, Sched<[]>; -def PCNT : RVBUnary<0b0110000, 0b00010, 0b001, RISCVOpcode<0b0010011>, "pcnt">, +def CPOP : RVBUnary<0b0110000, 0b00010, 0b001, RISCVOpcode<0b0010011>, "cpop">, Sched<[]>; } // Predicates = [HasStdExtZbb] @@ -256,8 +296,8 @@ def CLMULH : ALU_rr<0b0000101, 0b011, "clmulh">, Sched<[]>; let Predicates = [HasStdExtZbb] in { def MIN : ALU_rr<0b0000101, 0b100, "min">, Sched<[]>; -def MAX : ALU_rr<0b0000101, 0b101, "max">, Sched<[]>; -def MINU : ALU_rr<0b0000101, 0b110, "minu">, Sched<[]>; +def MINU : ALU_rr<0b0000101, 0b101, "minu">, Sched<[]>; +def MAX : ALU_rr<0b0000101, 0b110, "max">, Sched<[]>; def MAXU : ALU_rr<0b0000101, 0b111, "maxu">, Sched<[]>; } // Predicates = [HasStdExtZbb] @@ -267,23 +307,23 @@ def UNSHFL : ALU_rr<0b0000100, 0b101, "unshfl">, Sched<[]>; } // Predicates = [HasStdExtZbp] let Predicates = [HasStdExtZbe] in { -def BDEP : ALU_rr<0b0100100, 0b110, "bdep">, Sched<[]>; -def BEXT : ALU_rr<0b0000100, 0b110, "bext">, Sched<[]>; +// NOTE: These mnemonics are from the 0.94 spec. There is a name conflict with +// bext in the 0.93 spec. +def BDECOMPRESS : ALU_rr<0b0100100, 0b110, "bdecompress">, Sched<[]>; +def BCOMPRESS : ALU_rr<0b0000100, 0b110, "bcompress">, Sched<[]>; } // Predicates = [HasStdExtZbe] -let Predicates = [HasStdExtZbbOrZbp] in { +let Predicates = [HasStdExtZbp] in { def PACK : ALU_rr<0b0000100, 0b100, "pack">, Sched<[]>; def PACKU : ALU_rr<0b0100100, 0b100, "packu">, Sched<[]>; -} // Predicates = [HasStdExtZbbOrZbp] +def PACKH : ALU_rr<0b0000100, 0b111, "packh">, Sched<[]>; +} // Predicates = [HasStdExtZbp] let Predicates = [HasStdExtZbm, IsRV64] in { def BMATOR : ALU_rr<0b0000100, 0b011, "bmator">, Sched<[]>; def BMATXOR : ALU_rr<0b0100100, 0b011, "bmatxor">, Sched<[]>; } // Predicates = [HasStdExtZbm, IsRV64] -let Predicates = [HasStdExtZbbOrZbp] in -def PACKH : ALU_rr<0b0000100, 0b111, "packh">, Sched<[]>; - let Predicates = [HasStdExtZbf] in def BFP : ALU_rr<0b0100100, 0b111, "bfp">, Sched<[]>; @@ -292,18 +332,12 @@ def SHFLI : RVBShfl_ri<0b000010, 0b001, OPC_OP_IMM, "shfli">, Sched<[]>; def UNSHFLI : RVBShfl_ri<0b000010, 0b101, OPC_OP_IMM, "unshfli">, Sched<[]>; } // Predicates = [HasStdExtZbp] -let Predicates = [HasStdExtZbb, IsRV64] in { -def ADDIWU : RVBALUW_ri<0b100, "addiwu">, Sched<[]>; -def SLLIUW : RVBShift_ri<0b00001, 0b001, OPC_OP_IMM_32, "slliu.w">, Sched<[]>; -def ADDWU : ALUW_rr<0b0000101, 0b000, "addwu">, Sched<[]>; -def SUBWU : ALUW_rr<0b0100101, 0b000, "subwu">, Sched<[]>; -def ADDUW : ALUW_rr<0b0000100, 0b000, "addu.w">, Sched<[]>; -def SUBUW : ALUW_rr<0b0100100, 0b000, "subu.w">, Sched<[]>; -} // Predicates = [HasStdExtZbb, IsRV64] - -let Predicates = [HasStdExtZbb, IsRV64] in { -def SLOW : ALUW_rr<0b0010000, 0b001, "slow">, Sched<[]>; -def SROW : ALUW_rr<0b0010000, 0b101, "srow">, Sched<[]>; +let Predicates = [HasStdExtZba, IsRV64] in { +def SLLIUW : RVBShift_ri<0b00001, 0b001, OPC_OP_IMM_32, "slli.uw">, Sched<[]>; +def ADDUW : ALUW_rr<0b0000100, 0b000, "add.uw">, Sched<[]>; +def SH1ADDUW : ALUW_rr<0b0010000, 0b010, "sh1add.uw">, Sched<[]>; +def SH2ADDUW : ALUW_rr<0b0010000, 0b100, "sh2add.uw">, Sched<[]>; +def SH3ADDUW : ALUW_rr<0b0010000, 0b110, "sh3add.uw">, Sched<[]>; } // Predicates = [HasStdExtZbb, IsRV64] let Predicates = [HasStdExtZbbOrZbp, IsRV64] in { @@ -312,10 +346,10 @@ def RORW : ALUW_rr<0b0110000, 0b101, "rorw">, Sched<[]>; } // Predicates = [HasStdExtZbbOrZbp, IsRV64] let Predicates = [HasStdExtZbs, IsRV64] in { -def SBCLRW : ALUW_rr<0b0100100, 0b001, "sbclrw">, Sched<[]>; -def SBSETW : ALUW_rr<0b0010100, 0b001, "sbsetw">, Sched<[]>; -def SBINVW : ALUW_rr<0b0110100, 0b001, "sbinvw">, Sched<[]>; -def SBEXTW : ALUW_rr<0b0100100, 0b101, "sbextw">, Sched<[]>; +def BCLRW : ALUW_rr<0b0100100, 0b001, "bclrw">, Sched<[]>; +def BSETW : ALUW_rr<0b0010100, 0b001, "bsetw">, Sched<[]>; +def BINVW : ALUW_rr<0b0110100, 0b001, "binvw">, Sched<[]>; +def BEXTW : ALUW_rr<0b0100100, 0b101, "bextw">, Sched<[]>; } // Predicates = [HasStdExtZbs, IsRV64] let Predicates = [HasStdExtZbp, IsRV64] in { @@ -323,20 +357,19 @@ def GORCW : ALUW_rr<0b0010100, 0b101, "gorcw">, Sched<[]>; def GREVW : ALUW_rr<0b0110100, 0b101, "grevw">, Sched<[]>; } // Predicates = [HasStdExtZbp, IsRV64] -let Predicates = [HasStdExtZbb, IsRV64] in { -def SLOIW : RVBShiftW_ri<0b0010000, 0b001, OPC_OP_IMM_32, "sloiw">, Sched<[]>; -def SROIW : RVBShiftW_ri<0b0010000, 0b101, OPC_OP_IMM_32, "sroiw">, Sched<[]>; -} // Predicates = [HasStdExtZbb, IsRV64] +let Predicates = [HasStdExtZbp, IsRV64] in { +def XPERMW : ALU_rr<0b0010100, 0b000, "xperm.w">, Sched<[]>; +} // Predicates = [HasStdExtZbp, IsRV64] let Predicates = [HasStdExtZbbOrZbp, IsRV64] in def RORIW : RVBShiftW_ri<0b0110000, 0b101, OPC_OP_IMM_32, "roriw">, Sched<[]>; let Predicates = [HasStdExtZbs, IsRV64] in { -def SBCLRIW : RVBShiftW_ri<0b0100100, 0b001, OPC_OP_IMM_32, "sbclriw">, +def BCLRIW : RVBShiftW_ri<0b0100100, 0b001, OPC_OP_IMM_32, "bclriw">, Sched<[]>; -def SBSETIW : RVBShiftW_ri<0b0010100, 0b001, OPC_OP_IMM_32, "sbsetiw">, +def BSETIW : RVBShiftW_ri<0b0010100, 0b001, OPC_OP_IMM_32, "bsetiw">, Sched<[]>; -def SBINVIW : RVBShiftW_ri<0b0110100, 0b001, OPC_OP_IMM_32, "sbinviw">, +def BINVIW : RVBShiftW_ri<0b0110100, 0b001, OPC_OP_IMM_32, "binviw">, Sched<[]>; } // Predicates = [HasStdExtZbs, IsRV64] @@ -359,34 +392,77 @@ def CLZW : RVBUnary<0b0110000, 0b00000, 0b001, RISCVOpcode<0b0011011>, "clzw">, Sched<[]>; def CTZW : RVBUnary<0b0110000, 0b00001, 0b001, RISCVOpcode<0b0011011>, "ctzw">, Sched<[]>; -def PCNTW : RVBUnary<0b0110000, 0b00010, 0b001, RISCVOpcode<0b0011011>, - "pcntw">, Sched<[]>; +def CPOPW : RVBUnary<0b0110000, 0b00010, 0b001, RISCVOpcode<0b0011011>, + "cpopw">, Sched<[]>; } // Predicates = [HasStdExtZbb, IsRV64] -let Predicates = [HasStdExtZbc, IsRV64] in { -def CLMULW : ALUW_rr<0b0000101, 0b001, "clmulw">, Sched<[]>; -def CLMULRW : ALUW_rr<0b0000101, 0b010, "clmulrw">, Sched<[]>; -def CLMULHW : ALUW_rr<0b0000101, 0b011, "clmulhw">, Sched<[]>; -} // Predicates = [HasStdExtZbc, IsRV64] - let Predicates = [HasStdExtZbp, IsRV64] in { def SHFLW : ALUW_rr<0b0000100, 0b001, "shflw">, Sched<[]>; def UNSHFLW : ALUW_rr<0b0000100, 0b101, "unshflw">, Sched<[]>; } // Predicates = [HasStdExtZbp, IsRV64] let Predicates = [HasStdExtZbe, IsRV64] in { -def BDEPW : ALUW_rr<0b0100100, 0b110, "bdepw">, Sched<[]>; -def BEXTW : ALUW_rr<0b0000100, 0b110, "bextw">, Sched<[]>; +// NOTE: These mnemonics are from the 0.94 spec. There is a name conflict with +// bextw in the 0.93 spec. +def BDECOMPRESSW : ALUW_rr<0b0100100, 0b110, "bdecompressw">, Sched<[]>; +def BCOMPRESSW : ALUW_rr<0b0000100, 0b110, "bcompressw">, Sched<[]>; } // Predicates = [HasStdExtZbe, IsRV64] -let Predicates = [HasStdExtZbbOrZbp, IsRV64] in { +let Predicates = [HasStdExtZbp, IsRV64] in { def PACKW : ALUW_rr<0b0000100, 0b100, "packw">, Sched<[]>; def PACKUW : ALUW_rr<0b0100100, 0b100, "packuw">, Sched<[]>; -} // Predicates = [HasStdExtZbbOrZbp, IsRV64] +} // Predicates = [HasStdExtZbp, IsRV64] let Predicates = [HasStdExtZbf, IsRV64] in def BFPW : ALUW_rr<0b0100100, 0b111, "bfpw">, Sched<[]>; +let Predicates = [HasStdExtZbbOrZbp, IsRV32] in { +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +def ZEXTH_RV32 : RVInstR<0b0000100, 0b100, OPC_OP, (outs GPR:$rd), + (ins GPR:$rs1), "zext.h", "$rd, $rs1">, Sched<[]> { + let rs2 = 0b00000; +} +} // Predicates = [HasStdExtZbbOrZbp, IsRV32] + +let Predicates = [HasStdExtZbbOrZbp, IsRV64] in { +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +def ZEXTH_RV64 : RVInstR<0b0000100, 0b100, OPC_OP_32, (outs GPR:$rd), + (ins GPR:$rs1), "zext.h", "$rd, $rs1">, Sched<[]> { + let rs2 = 0b00000; +} +} // Predicates = [HasStdExtZbbOrZbp, IsRV64] + +// We treat rev8 and orc.b as standalone instructions even though they use a +// portion of the encodings for grevi and gorci. This allows us to support only +// those encodings when only Zbb is enabled. We do this even when grevi and +// gorci are available with Zbp. Trying to use 'HasStdExtZbb, NotHasStdExtZbp' +// causes diagnostics to suggest that Zbp rather than Zbb is required for rev8 +// or gorci. Since Zbb is closer to being finalized than Zbp this will be +// misleading to users. +let Predicates = [HasStdExtZbbOrZbp, IsRV32] in { +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +def REV8_RV32 : RVInstI<0b101, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1), + "rev8", "$rd, $rs1">, Sched<[]> { + let imm12 = { 0b01101, 0b0011000 }; +} +} // Predicates = [HasStdExtZbbOrZbp, IsRV32] + +let Predicates = [HasStdExtZbbOrZbp, IsRV64] in { +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +def REV8_RV64 : RVInstI<0b101, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1), + "rev8", "$rd, $rs1">, Sched<[]> { + let imm12 = { 0b01101, 0b0111000 }; +} +} // Predicates = [HasStdExtZbbOrZbp, IsRV64] + +let Predicates = [HasStdExtZbbOrZbp] in { +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +def ORCB : RVInstI<0b101, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1), + "orc.b", "$rd, $rs1">, Sched<[]> { + let imm12 = { 0b00101, 0b0000111 }; +} +} // Predicates = [HasStdExtZbbOrZbp] + //===----------------------------------------------------------------------===// // Future compressed instructions //===----------------------------------------------------------------------===// @@ -415,208 +491,123 @@ def C_NOT : RVBInstC<0b00, "c.not">, Sched<[]>; def C_NEG : RVBInstC<0b01, "c.neg">, Sched<[]>; } // DecoderNamespace = "RVBC", Predicates = [HasStdExtZbproposedc, HasStdExtC] -let DecoderNamespace = "RVBC", Predicates = [HasStdExtZbproposedc, HasStdExtZbbOrZbp, HasStdExtC, IsRV64] in +let DecoderNamespace = "RVBC", Predicates = [HasStdExtZbproposedc, HasStdExtZba, HasStdExtC, IsRV64] in def C_ZEXTW : RVBInstC<0b10, "c.zext.w">, Sched<[]>; //===----------------------------------------------------------------------===// // Pseudo Instructions //===----------------------------------------------------------------------===// -let Predicates = [HasStdExtZbb, IsRV32] in { -def : InstAlias<"zext.b $rd, $rs", (ANDI GPR:$rd, GPR:$rs, 0xFF)>; -def : InstAlias<"zext.h $rd, $rs", (PACK GPR:$rd, GPR:$rs, X0)>; -} // Predicates = [HasStdExtZbb, IsRV32] - -let Predicates = [HasStdExtZbb, IsRV64] in { -def : InstAlias<"zext.b $rd, $rs", (ANDI GPR:$rd, GPR:$rs, 0xFF)>; -def : InstAlias<"zext.h $rd, $rs", (PACKW GPR:$rd, GPR:$rs, X0)>; -def : InstAlias<"zext.w $rd, $rs", (PACK GPR:$rd, GPR:$rs, X0)>; -} // Predicates = [HasStdExtZbb, IsRV64] +let Predicates = [HasStdExtZba, IsRV64] in { +// NOTE: The 0.93 spec shows zext.w as an alias of pack/packw. It has been +// changed to add.uw in a draft after 0.94. +def : InstAlias<"zext.w $rd, $rs", (ADDUW GPR:$rd, GPR:$rs, X0)>; +} -let Predicates = [HasStdExtZbbOrZbp] in { -def : InstAlias<"rev.p $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00001)>, - Sched<[]>; -def : InstAlias<"rev2.n $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00010)>, - Sched<[]>; -def : InstAlias<"rev.n $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00011)>, - Sched<[]>; -def : InstAlias<"rev4.b $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00100)>, - Sched<[]>; -def : InstAlias<"rev2.b $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00110)>, - Sched<[]>; -def : InstAlias<"rev.b $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00111)>, - Sched<[]>; -def : InstAlias<"rev8.h $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b01000)>, - Sched<[]>; -def : InstAlias<"rev4.h $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b01100)>, - Sched<[]>; -def : InstAlias<"rev2.h $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b01110)>, - Sched<[]>; -def : InstAlias<"rev.h $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b01111)>, - Sched<[]>; - -def : InstAlias<"zip.n $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b0001)>, - Sched<[]>; -def : InstAlias<"unzip.n $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b0001)>, - Sched<[]>; -def : InstAlias<"zip2.b $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b0010)>, - Sched<[]>; -def : InstAlias<"unzip2.b $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b0010)>, - Sched<[]>; -def : InstAlias<"zip.b $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b0011)>, - Sched<[]>; -def : InstAlias<"unzip.b $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b0011)>, - Sched<[]>; -def : InstAlias<"zip4.h $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b0100)>, - Sched<[]>; -def : InstAlias<"unzip4.h $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b0100)>, - Sched<[]>; -def : InstAlias<"zip2.h $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b0110)>, - Sched<[]>; -def : InstAlias<"unzip2.h $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b0110)>, - Sched<[]>; -def : InstAlias<"zip.h $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b0111)>, - Sched<[]>; -def : InstAlias<"unzip.h $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b0111)>, - Sched<[]>; - -def : InstAlias<"orc.p $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b00001)>, - Sched<[]>; -def : InstAlias<"orc2.n $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b00010)>, - Sched<[]>; -def : InstAlias<"orc.n $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b00011)>, - Sched<[]>; -def : InstAlias<"orc4.b $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b00100)>, - Sched<[]>; -def : InstAlias<"orc2.b $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b00110)>, - Sched<[]>; -def : InstAlias<"orc.b $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b00111)>, - Sched<[]>; -def : InstAlias<"orc8.h $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b01000)>, - Sched<[]>; -def : InstAlias<"orc4.h $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b01100)>, - Sched<[]>; -def : InstAlias<"orc2.h $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b01110)>, - Sched<[]>; -def : InstAlias<"orc.h $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b01111)>, - Sched<[]>; -} // Predicates = [HasStdExtZbbOrZbp] +let Predicates = [HasStdExtZbp] in { +def : InstAlias<"rev.p $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00001)>; +def : InstAlias<"rev2.n $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00010)>; +def : InstAlias<"rev.n $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00011)>; +def : InstAlias<"rev4.b $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00100)>; +def : InstAlias<"rev2.b $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00110)>; +def : InstAlias<"rev.b $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00111)>; +def : InstAlias<"rev8.h $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b01000)>; +def : InstAlias<"rev4.h $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b01100)>; +def : InstAlias<"rev2.h $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b01110)>; +def : InstAlias<"rev.h $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b01111)>; + +def : InstAlias<"zip.n $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b0001)>; +def : InstAlias<"unzip.n $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b0001)>; +def : InstAlias<"zip2.b $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b0010)>; +def : InstAlias<"unzip2.b $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b0010)>; +def : InstAlias<"zip.b $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b0011)>; +def : InstAlias<"unzip.b $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b0011)>; +def : InstAlias<"zip4.h $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b0100)>; +def : InstAlias<"unzip4.h $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b0100)>; +def : InstAlias<"zip2.h $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b0110)>; +def : InstAlias<"unzip2.h $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b0110)>; +def : InstAlias<"zip.h $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b0111)>; +def : InstAlias<"unzip.h $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b0111)>; + +def : InstAlias<"orc.p $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b00001)>; +def : InstAlias<"orc2.n $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b00010)>; +def : InstAlias<"orc.n $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b00011)>; +def : InstAlias<"orc4.b $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b00100)>; +def : InstAlias<"orc2.b $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b00110)>; +// orc.b is considered an instruction rather than an alias. +def : InstAlias<"orc8.h $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b01000)>; +def : InstAlias<"orc4.h $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b01100)>; +def : InstAlias<"orc2.h $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b01110)>; +def : InstAlias<"orc.h $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b01111)>; +} // Predicates = [HasStdExtZbp] -let Predicates = [HasStdExtZbbOrZbp, IsRV32] in { -def : InstAlias<"rev16 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b10000)>, Sched<[]>; -def : InstAlias<"rev8 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b11000)>, Sched<[]>; -def : InstAlias<"rev4 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b11100)>, Sched<[]>; -def : InstAlias<"rev2 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b11110)>, Sched<[]>; -def : InstAlias<"rev $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b11111)>, Sched<[]>; - -def : InstAlias<"zip8 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b1000)>, - Sched<[]>; -def : InstAlias<"unzip8 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b1000)>, - Sched<[]>; -def : InstAlias<"zip4 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b1100)>, - Sched<[]>; -def : InstAlias<"unzip4 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b1100)>, - Sched<[]>; -def : InstAlias<"zip2 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b1110)>, - Sched<[]>; -def : InstAlias<"unzip2 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b1110)>, - Sched<[]>; -def : InstAlias<"zip $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b1111)>, - Sched<[]>; -def : InstAlias<"unzip $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b1111)>, - Sched<[]>; - -def : InstAlias<"orc16 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b10000)>, Sched<[]>; -def : InstAlias<"orc8 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b11000)>, Sched<[]>; -def : InstAlias<"orc4 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b11100)>, Sched<[]>; -def : InstAlias<"orc2 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b11110)>, Sched<[]>; -def : InstAlias<"orc $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b11111)>, Sched<[]>; -} // Predicates = [HasStdExtZbbOrZbp, IsRV32] +let Predicates = [HasStdExtZbp, IsRV32] in { +def : InstAlias<"rev16 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b10000)>; +// rev8 is considered an instruction rather than an alias. +def : InstAlias<"rev4 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b11100)>; +def : InstAlias<"rev2 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b11110)>; +def : InstAlias<"rev $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b11111)>; + +def : InstAlias<"zip8 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b1000)>; +def : InstAlias<"unzip8 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b1000)>; +def : InstAlias<"zip4 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b1100)>; +def : InstAlias<"unzip4 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b1100)>; +def : InstAlias<"zip2 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b1110)>; +def : InstAlias<"unzip2 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b1110)>; +def : InstAlias<"zip $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b1111)>; +def : InstAlias<"unzip $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b1111)>; + +def : InstAlias<"orc16 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b10000)>; +def : InstAlias<"orc8 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b11000)>; +def : InstAlias<"orc4 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b11100)>; +def : InstAlias<"orc2 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b11110)>; +def : InstAlias<"orc $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b11111)>; +} // Predicates = [HasStdExtZbp, IsRV32] -let Predicates = [HasStdExtZbbOrZbp, IsRV64] in { -def : InstAlias<"rev16.w $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b010000)>, - Sched<[]>; -def : InstAlias<"rev8.w $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b011000)>, - Sched<[]>; -def : InstAlias<"rev4.w $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b011100)>, - Sched<[]>; -def : InstAlias<"rev2.w $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b011110)>, - Sched<[]>; -def : InstAlias<"rev.w $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b011111)>, - Sched<[]>; -def : InstAlias<"rev32 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b100000)>, - Sched<[]>; -def : InstAlias<"rev16 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b110000)>, - Sched<[]>; -def : InstAlias<"rev8 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b111000)>, - Sched<[]>; -def : InstAlias<"rev4 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b111100)>, - Sched<[]>; -def : InstAlias<"rev2 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b111110)>, - Sched<[]>; -def : InstAlias<"rev $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b111111)>, - Sched<[]>; - -def : InstAlias<"zip8.w $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b01000)>, - Sched<[]>; -def : InstAlias<"unzip8.w $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b01000)>, - Sched<[]>; -def : InstAlias<"zip4.w $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b01100)>, - Sched<[]>; -def : InstAlias<"unzip4.w $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b01100)>, - Sched<[]>; -def : InstAlias<"zip2.w $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b01110)>, - Sched<[]>; -def : InstAlias<"unzip2.w $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b01110)>, - Sched<[]>; -def : InstAlias<"zip.w $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b01111)>, - Sched<[]>; -def : InstAlias<"unzip.w $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b01111)>, - Sched<[]>; -def : InstAlias<"zip16 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b10000)>, - Sched<[]>; -def : InstAlias<"unzip16 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b10000)>, - Sched<[]>; -def : InstAlias<"zip8 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b11000)>, - Sched<[]>; -def : InstAlias<"unzip8 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b11000)>, - Sched<[]>; -def : InstAlias<"zip4 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b11100)>, - Sched<[]>; -def : InstAlias<"unzip4 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b11100)>, - Sched<[]>; -def : InstAlias<"zip2 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b11110)>, - Sched<[]>; -def : InstAlias<"unzip2 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b11110)>, - Sched<[]>; -def : InstAlias<"zip $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b11111)>, - Sched<[]>; -def : InstAlias<"unzip $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b11111)>, - Sched<[]>; - -def : InstAlias<"orc16.w $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b010000)>, - Sched<[]>; -def : InstAlias<"orc8.w $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b011000)>, - Sched<[]>; -def : InstAlias<"orc4.w $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b011100)>, - Sched<[]>; -def : InstAlias<"orc2.w $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b011110)>, - Sched<[]>; -def : InstAlias<"orc.w $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b011111)>, - Sched<[]>; -def : InstAlias<"orc32 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b100000)>, - Sched<[]>; -def : InstAlias<"orc16 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b110000)>, - Sched<[]>; -def : InstAlias<"orc8 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b111000)>, - Sched<[]>; -def : InstAlias<"orc4 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b111100)>, - Sched<[]>; -def : InstAlias<"orc2 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b111110)>, - Sched<[]>; -def : InstAlias<"orc $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b111111)>, - Sched<[]>; -} // Predicates = [HasStdExtZbbOrZbp, IsRV64] +let Predicates = [HasStdExtZbp, IsRV64] in { +def : InstAlias<"rev16.w $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b010000)>; +def : InstAlias<"rev8.w $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b011000)>; +def : InstAlias<"rev4.w $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b011100)>; +def : InstAlias<"rev2.w $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b011110)>; +def : InstAlias<"rev.w $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b011111)>; +def : InstAlias<"rev32 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b100000)>; +def : InstAlias<"rev16 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b110000)>; +// rev8 is considered an instruction rather than an alias. +def : InstAlias<"rev4 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b111100)>; +def : InstAlias<"rev2 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b111110)>; +def : InstAlias<"rev $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b111111)>; + +def : InstAlias<"zip8.w $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b01000)>; +def : InstAlias<"unzip8.w $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b01000)>; +def : InstAlias<"zip4.w $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b01100)>; +def : InstAlias<"unzip4.w $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b01100)>; +def : InstAlias<"zip2.w $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b01110)>; +def : InstAlias<"unzip2.w $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b01110)>; +def : InstAlias<"zip.w $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b01111)>; +def : InstAlias<"unzip.w $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b01111)>; +def : InstAlias<"zip16 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b10000)>; +def : InstAlias<"unzip16 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b10000)>; +def : InstAlias<"zip8 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b11000)>; +def : InstAlias<"unzip8 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b11000)>; +def : InstAlias<"zip4 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b11100)>; +def : InstAlias<"unzip4 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b11100)>; +def : InstAlias<"zip2 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b11110)>; +def : InstAlias<"unzip2 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b11110)>; +def : InstAlias<"zip $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b11111)>; +def : InstAlias<"unzip $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b11111)>; + +def : InstAlias<"orc16.w $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b010000)>; +def : InstAlias<"orc8.w $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b011000)>; +def : InstAlias<"orc4.w $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b011100)>; +def : InstAlias<"orc2.w $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b011110)>; +def : InstAlias<"orc.w $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b011111)>; +def : InstAlias<"orc32 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b100000)>; +def : InstAlias<"orc16 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b110000)>; +def : InstAlias<"orc8 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b111000)>; +def : InstAlias<"orc4 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b111100)>; +def : InstAlias<"orc2 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b111110)>; +def : InstAlias<"orc $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b111111)>; +} // Predicates = [HasStdExtZbp, IsRV64] //===----------------------------------------------------------------------===// // Compressed Instruction patterns @@ -628,22 +619,14 @@ def : CompressPat<(SUB GPRC:$rs1, X0, GPRC:$rs1), (C_NEG GPRC:$rs1)>; } // Predicates = [HasStdExtZbproposedc, HasStdExtC] -let Predicates = [HasStdExtZbproposedc, HasStdExtZbbOrZbp, HasStdExtC, IsRV64] in { -def : CompressPat<(PACK GPRC:$rs1, GPRC:$rs1, X0), +let Predicates = [HasStdExtZbproposedc, HasStdExtZba, HasStdExtC, IsRV64] in { +def : CompressPat<(ADDUW GPRC:$rs1, GPRC:$rs1, X0), (C_ZEXTW GPRC:$rs1)>; } // Predicates = [HasStdExtZbproposedc, HasStdExtC, IsRV64] //===----------------------------------------------------------------------===// // Codegen patterns //===----------------------------------------------------------------------===// -def SLOIPat : ComplexPattern<XLenVT, 2, "SelectSLOI", [or]>; -def SROIPat : ComplexPattern<XLenVT, 2, "SelectSROI", [or]>; -def RORIPat : ComplexPattern<XLenVT, 2, "SelectRORI", [rotl]>; -def SLLIUWPat : ComplexPattern<i64, 2, "SelectSLLIUW", [and]>; -def SLOIWPat : ComplexPattern<i64, 2, "SelectSLOIW", [sext_inreg]>; -def SROIWPat : ComplexPattern<i64, 2, "SelectSROIW", [or]>; -def RORIWPat : ComplexPattern<i64, 2, "SelectRORIW", [sext_inreg]>; -def FSRIWPat : ComplexPattern<i64, 3, "SelectFSRIW", [sext_inreg]>; let Predicates = [HasStdExtZbbOrZbp] in { def : Pat<(and GPR:$rs1, (not GPR:$rs2)), (ANDN GPR:$rs1, GPR:$rs2)>; @@ -651,221 +634,180 @@ def : Pat<(or GPR:$rs1, (not GPR:$rs2)), (ORN GPR:$rs1, GPR:$rs2)>; def : Pat<(xor GPR:$rs1, (not GPR:$rs2)), (XNOR GPR:$rs1, GPR:$rs2)>; } // Predicates = [HasStdExtZbbOrZbp] -let Predicates = [HasStdExtZbb] in { -def : Pat<(xor (shl (xor GPR:$rs1, -1), GPR:$rs2), -1), - (SLO GPR:$rs1, GPR:$rs2)>; -def : Pat<(xor (srl (xor GPR:$rs1, -1), GPR:$rs2), -1), - (SRO GPR:$rs1, GPR:$rs2)>; -} // Predicates = [HasStdExtZbb] - let Predicates = [HasStdExtZbbOrZbp] in { def : Pat<(rotl GPR:$rs1, GPR:$rs2), (ROL GPR:$rs1, GPR:$rs2)>; -def : Pat<(fshl GPR:$rs1, GPR:$rs1, GPR:$rs2), (ROL GPR:$rs1, GPR:$rs2)>; def : Pat<(rotr GPR:$rs1, GPR:$rs2), (ROR GPR:$rs1, GPR:$rs2)>; -def : Pat<(fshr GPR:$rs1, GPR:$rs1, GPR:$rs2), (ROR GPR:$rs1, GPR:$rs2)>; } // Predicates = [HasStdExtZbbOrZbp] -let Predicates = [HasStdExtZbs, IsRV32] in -def : Pat<(and (xor (shl 1, (and GPR:$rs2, 31)), -1), GPR:$rs1), - (SBCLR GPR:$rs1, GPR:$rs2)>; -let Predicates = [HasStdExtZbs, IsRV64] in -def : Pat<(and (xor (shl 1, (and GPR:$rs2, 63)), -1), GPR:$rs1), - (SBCLR GPR:$rs1, GPR:$rs2)>; - -let Predicates = [HasStdExtZbs] in -def : Pat<(and (rotl -2, GPR:$rs2), GPR:$rs1), (SBCLR GPR:$rs1, GPR:$rs2)>; - -let Predicates = [HasStdExtZbs, IsRV32] in -def : Pat<(or (shl 1, (and GPR:$rs2, 31)), GPR:$rs1), - (SBSET GPR:$rs1, GPR:$rs2)>; -let Predicates = [HasStdExtZbs, IsRV64] in -def : Pat<(or (shl 1, (and GPR:$rs2, 63)), GPR:$rs1), - (SBSET GPR:$rs1, GPR:$rs2)>; - -let Predicates = [HasStdExtZbs, IsRV32] in -def : Pat<(xor (shl 1, (and GPR:$rs2, 31)), GPR:$rs1), - (SBINV GPR:$rs1, GPR:$rs2)>; -let Predicates = [HasStdExtZbs, IsRV64] in -def : Pat<(xor (shl 1, (and GPR:$rs2, 63)), GPR:$rs1), - (SBINV GPR:$rs1, GPR:$rs2)>; - -let Predicates = [HasStdExtZbs, IsRV32] in -def : Pat<(and (srl GPR:$rs1, (and GPR:$rs2, 31)), 1), - (SBEXT GPR:$rs1, GPR:$rs2)>; - -let Predicates = [HasStdExtZbs, IsRV64] in -def : Pat<(and (srl GPR:$rs1, (and GPR:$rs2, 63)), 1), - (SBEXT GPR:$rs1, GPR:$rs2)>; +let Predicates = [HasStdExtZbs] in { +def : Pat<(and (not (shiftop<shl> 1, GPR:$rs2)), GPR:$rs1), + (BCLR GPR:$rs1, GPR:$rs2)>; +def : Pat<(and (rotl -2, GPR:$rs2), GPR:$rs1), (BCLR GPR:$rs1, GPR:$rs2)>; +def : Pat<(or (shiftop<shl> 1, GPR:$rs2), GPR:$rs1), + (BSET GPR:$rs1, GPR:$rs2)>; +def : Pat<(xor (shiftop<shl> 1, GPR:$rs2), GPR:$rs1), + (BINV GPR:$rs1, GPR:$rs2)>; +def : Pat<(and (shiftop<srl> GPR:$rs1, GPR:$rs2), 1), + (BEXT GPR:$rs1, GPR:$rs2)>; + +def : Pat<(shiftop<shl> 1, GPR:$rs2), + (BSET X0, GPR:$rs2)>; + +def : Pat<(and GPR:$rs1, BCLRMask:$mask), + (BCLRI GPR:$rs1, (BCLRXForm imm:$mask))>; +def : Pat<(or GPR:$rs1, BSETINVMask:$mask), + (BSETI GPR:$rs1, (BSETINVXForm imm:$mask))>; +def : Pat<(xor GPR:$rs1, BSETINVMask:$mask), + (BINVI GPR:$rs1, (BSETINVXForm imm:$mask))>; -let Predicates = [HasStdExtZbb] in { -def : Pat<(SLOIPat GPR:$rs1, uimmlog2xlen:$shamt), - (SLOI GPR:$rs1, uimmlog2xlen:$shamt)>; -def : Pat<(SROIPat GPR:$rs1, uimmlog2xlen:$shamt), - (SROI GPR:$rs1, uimmlog2xlen:$shamt)>; -} // Predicates = [HasStdExtZbb] +def : Pat<(and (srl GPR:$rs1, uimmlog2xlen:$shamt), (XLenVT 1)), + (BEXTI GPR:$rs1, uimmlog2xlen:$shamt)>; +} -// There's no encoding for roli in the current version of the 'B' extension -// (v0.92) as it can be implemented with rori by negating the immediate. -// For this reason we pattern-match only against rori[w]. -let Predicates = [HasStdExtZbbOrZbp] in -def : Pat<(RORIPat GPR:$rs1, uimmlog2xlen:$shamt), +// There's no encoding for roli in the the 'B' extension as it can be +// implemented with rori by negating the immediate. +let Predicates = [HasStdExtZbbOrZbp] in { +def : Pat<(rotr GPR:$rs1, uimmlog2xlen:$shamt), (RORI GPR:$rs1, uimmlog2xlen:$shamt)>; +def : Pat<(rotl GPR:$rs1, uimmlog2xlen:$shamt), + (RORI GPR:$rs1, (ImmSubFromXLen uimmlog2xlen:$shamt))>; +} -// We don't pattern-match sbclri[w], sbseti[w], sbinvi[w] because they are -// pattern-matched by simple andi, ori, and xori. -let Predicates = [HasStdExtZbs] in -def : Pat<(and (srl GPR:$rs1, uimmlog2xlen:$shamt), (XLenVT 1)), - (SBEXTI GPR:$rs1, uimmlog2xlen:$shamt)>; +def riscv_grevi : SDNode<"RISCVISD::GREVI", SDTIntBinOp, []>; +def riscv_greviw : SDNode<"RISCVISD::GREVIW", SDTIntBinOp, []>; +def riscv_gorci : SDNode<"RISCVISD::GORCI", SDTIntBinOp, []>; +def riscv_gorciw : SDNode<"RISCVISD::GORCIW", SDTIntBinOp, []>; -let Predicates = [HasStdExtZbp, IsRV32] in { -def : Pat<(or (or (and (srl GPR:$rs1, (i32 1)), (i32 0x55555555)), GPR:$rs1), - (and (shl GPR:$rs1, (i32 1)), (i32 0xAAAAAAAA))), - (GORCI GPR:$rs1, (i32 1))>; -def : Pat<(or (or (and (srl GPR:$rs1, (i32 2)), (i32 0x33333333)), GPR:$rs1), - (and (shl GPR:$rs1, (i32 2)), (i32 0xCCCCCCCC))), - (GORCI GPR:$rs1, (i32 2))>; -def : Pat<(or (or (and (srl GPR:$rs1, (i32 4)), (i32 0x0F0F0F0F)), GPR:$rs1), - (and (shl GPR:$rs1, (i32 4)), (i32 0xF0F0F0F0))), - (GORCI GPR:$rs1, (i32 4))>; -def : Pat<(or (or (and (srl GPR:$rs1, (i32 8)), (i32 0x00FF00FF)), GPR:$rs1), - (and (shl GPR:$rs1, (i32 8)), (i32 0xFF00FF00))), - (GORCI GPR:$rs1, (i32 8))>; -def : Pat<(or (or (srl GPR:$rs1, (i32 16)), GPR:$rs1), - (shl GPR:$rs1, (i32 16))), - (GORCI GPR:$rs1, (i32 16))>; -} // Predicates = [HasStdExtZbp, IsRV32] +let Predicates = [HasStdExtZbp] in { +def : Pat<(riscv_grevi GPR:$rs1, timm:$shamt), (GREVI GPR:$rs1, timm:$shamt)>; +def : Pat<(riscv_gorci GPR:$rs1, timm:$shamt), (GORCI GPR:$rs1, timm:$shamt)>; -let Predicates = [HasStdExtZbp, IsRV64] in { -def : Pat<(or (or (and (srl GPR:$rs1, (i64 1)), (i64 0x5555555555555555)), - GPR:$rs1), - (and (shl GPR:$rs1, (i64 1)), (i64 0xAAAAAAAAAAAAAAAA))), - (GORCI GPR:$rs1, (i64 1))>; -def : Pat<(or (or (and (srl GPR:$rs1, (i64 2)), (i64 0x3333333333333333)), - GPR:$rs1), - (and (shl GPR:$rs1, (i64 2)), (i64 0xCCCCCCCCCCCCCCCC))), - (GORCI GPR:$rs1, (i64 2))>; -def : Pat<(or (or (and (srl GPR:$rs1, (i64 4)), (i64 0x0F0F0F0F0F0F0F0F)), - GPR:$rs1), - (and (shl GPR:$rs1, (i64 4)), (i64 0xF0F0F0F0F0F0F0F0))), - (GORCI GPR:$rs1, (i64 4))>; -def : Pat<(or (or (and (srl GPR:$rs1, (i64 8)), (i64 0x00FF00FF00FF00FF)), - GPR:$rs1), - (and (shl GPR:$rs1, (i64 8)), (i64 0xFF00FF00FF00FF00))), - (GORCI GPR:$rs1, (i64 8))>; -def : Pat<(or (or (and (srl GPR:$rs1, (i64 16)), (i64 0x0000FFFF0000FFFF)), - GPR:$rs1), - (and (shl GPR:$rs1, (i64 16)), (i64 0xFFFF0000FFFF0000))), - (GORCI GPR:$rs1, (i64 16))>; -def : Pat<(or (or (srl GPR:$rs1, (i64 32)), GPR:$rs1), - (shl GPR:$rs1, (i64 32))), - (GORCI GPR:$rs1, (i64 32))>; -} // Predicates = [HasStdExtZbp, IsRV64] +// We treat orc.b as a separate instruction, so match it directly. +def : Pat<(riscv_gorci GPR:$rs1, (XLenVT 7)), (ORCB GPR:$rs1)>; +} // Predicates = [HasStdExtZbp] let Predicates = [HasStdExtZbp, IsRV32] in { -def : Pat<(or (and (shl GPR:$rs1, (i32 1)), (i32 0xAAAAAAAA)), - (and (srl GPR:$rs1, (i32 1)), (i32 0x55555555))), - (GREVI GPR:$rs1, (i32 1))>; -def : Pat<(or (and (shl GPR:$rs1, (i32 2)), (i32 0xCCCCCCCC)), - (and (srl GPR:$rs1, (i32 2)), (i32 0x33333333))), - (GREVI GPR:$rs1, (i32 2))>; -def : Pat<(or (and (shl GPR:$rs1, (i32 4)), (i32 0xF0F0F0F0)), - (and (srl GPR:$rs1, (i32 4)), (i32 0x0F0F0F0F))), - (GREVI GPR:$rs1, (i32 4))>; -def : Pat<(or (and (shl GPR:$rs1, (i32 8)), (i32 0xFF00FF00)), - (and (srl GPR:$rs1, (i32 8)), (i32 0x00FF00FF))), - (GREVI GPR:$rs1, (i32 8))>; -def : Pat<(rotr (bswap GPR:$rs1), (i32 16)), (GREVI GPR:$rs1, (i32 8))>; -def : Pat<(or (shl GPR:$rs1, (i32 16)), (srl GPR:$rs1, (i32 16))), - (GREVI GPR:$rs1, (i32 16))>; -def : Pat<(rotl GPR:$rs1, (i32 16)), (GREVI GPR:$rs1, (i32 16))>; -def : Pat<(bswap GPR:$rs1), (GREVI GPR:$rs1, (i32 24))>; -def : Pat<(bitreverse GPR:$rs1), (GREVI GPR:$rs1, (i32 31))>; +def : Pat<(rotr (riscv_grevi GPR:$rs1, (i32 24)), (i32 16)), (GREVI GPR:$rs1, 8)>; +def : Pat<(rotl (riscv_grevi GPR:$rs1, (i32 24)), (i32 16)), (GREVI GPR:$rs1, 8)>; + +// We treat rev8 as a separate instruction, so match it directly. +def : Pat<(riscv_grevi GPR:$rs1, (i32 24)), (REV8_RV32 GPR:$rs1)>; } // Predicates = [HasStdExtZbp, IsRV32] let Predicates = [HasStdExtZbp, IsRV64] in { -def : Pat<(or (and (shl GPR:$rs1, (i64 1)), (i64 0xAAAAAAAAAAAAAAAA)), - (and (srl GPR:$rs1, (i64 1)), (i64 0x5555555555555555))), - (GREVI GPR:$rs1, (i64 1))>; -def : Pat<(or (and (shl GPR:$rs1, (i64 2)), (i64 0xCCCCCCCCCCCCCCCC)), - (and (srl GPR:$rs1, (i64 2)), (i64 0x3333333333333333))), - (GREVI GPR:$rs1, (i64 2))>; -def : Pat<(or (and (shl GPR:$rs1, (i64 4)), (i64 0xF0F0F0F0F0F0F0F0)), - (and (srl GPR:$rs1, (i64 4)), (i64 0x0F0F0F0F0F0F0F0F))), - (GREVI GPR:$rs1, (i64 4))>; -def : Pat<(or (and (shl GPR:$rs1, (i64 8)), (i64 0xFF00FF00FF00FF00)), - (and (srl GPR:$rs1, (i64 8)), (i64 0x00FF00FF00FF00FF))), - (GREVI GPR:$rs1, (i64 8))>; -def : Pat<(or (and (shl GPR:$rs1, (i64 16)), (i64 0xFFFF0000FFFF0000)), - (and (srl GPR:$rs1, (i64 16)), (i64 0x0000FFFF0000FFFF))), - (GREVI GPR:$rs1, (i64 16))>; -def : Pat<(or (shl GPR:$rs1, (i64 32)), (srl GPR:$rs1, (i64 32))), - (GREVI GPR:$rs1, (i64 32))>; -def : Pat<(rotl GPR:$rs1, (i64 32)), (GREVI GPR:$rs1, (i64 32))>; -def : Pat<(bswap GPR:$rs1), (GREVI GPR:$rs1, (i64 56))>; -def : Pat<(bitreverse GPR:$rs1), (GREVI GPR:$rs1, (i64 63))>; +// We treat rev8 as a separate instruction, so match it directly. +def : Pat<(riscv_grevi GPR:$rs1, (i64 56)), (REV8_RV64 GPR:$rs1)>; } // Predicates = [HasStdExtZbp, IsRV64] let Predicates = [HasStdExtZbt] in { -def : Pat<(or (and (xor GPR:$rs2, -1), GPR:$rs3), (and GPR:$rs2, GPR:$rs1)), +def : Pat<(or (and (not GPR:$rs2), GPR:$rs3), (and GPR:$rs2, GPR:$rs1)), (CMIX GPR:$rs1, GPR:$rs2, GPR:$rs3)>; -def : Pat<(riscv_selectcc GPR:$rs2, (XLenVT 0), (XLenVT 17), GPR:$rs3, GPR:$rs1), + +def : Pat<(select (XLenVT (setne GPR:$rs2, 0)), GPR:$rs1, GPR:$rs3), (CMOV GPR:$rs1, GPR:$rs2, GPR:$rs3)>; -def : Pat<(fshl GPR:$rs1, GPR:$rs2, GPR:$rs3), - (FSL GPR:$rs1, GPR:$rs2, GPR:$rs3)>; -def : Pat<(fshr GPR:$rs1, GPR:$rs2, GPR:$rs3), - (FSR GPR:$rs1, GPR:$rs2, GPR:$rs3)>; -def : Pat<(fshr GPR:$rs1, GPR:$rs2, uimmlog2xlen:$shamt), - (FSRI GPR:$rs1, GPR:$rs2, uimmlog2xlen:$shamt)>; +def : Pat<(select (XLenVT (seteq GPR:$rs2, 0)), GPR:$rs3, GPR:$rs1), + (CMOV GPR:$rs1, GPR:$rs2, GPR:$rs3)>; +def : Pat<(select (XLenVT (setne GPR:$x, simm12_plus1:$y)), GPR:$rs1, GPR:$rs3), + (CMOV GPR:$rs1, (ADDI GPR:$x, (NegImm simm12_plus1:$y)), GPR:$rs3)>; +def : Pat<(select (XLenVT (seteq GPR:$x, simm12_plus1:$y)), GPR:$rs3, GPR:$rs1), + (CMOV GPR:$rs1, (ADDI GPR:$x, (NegImm simm12_plus1:$y)), GPR:$rs3)>; +def : Pat<(select (XLenVT (setne GPR:$x, GPR:$y)), GPR:$rs1, GPR:$rs3), + (CMOV GPR:$rs1, (XOR GPR:$x, GPR:$y), GPR:$rs3)>; +def : Pat<(select (XLenVT (seteq GPR:$x, GPR:$y)), GPR:$rs3, GPR:$rs1), + (CMOV GPR:$rs1, (XOR GPR:$x, GPR:$y), GPR:$rs3)>; +def : Pat<(select (XLenVT (setuge GPR:$x, GPR:$y)), GPR:$rs3, GPR:$rs1), + (CMOV GPR:$rs1, (SLTU GPR:$x, GPR:$y), GPR:$rs3)>; +def : Pat<(select (XLenVT (setule GPR:$y, GPR:$x)), GPR:$rs3, GPR:$rs1), + (CMOV GPR:$rs1, (SLTU GPR:$x, GPR:$y), GPR:$rs3)>; +def : Pat<(select (XLenVT (setge GPR:$x, GPR:$y)), GPR:$rs3, GPR:$rs1), + (CMOV GPR:$rs1, (SLT GPR:$x, GPR:$y), GPR:$rs3)>; +def : Pat<(select (XLenVT (setle GPR:$y, GPR:$x)), GPR:$rs3, GPR:$rs1), + (CMOV GPR:$rs1, (SLT GPR:$x, GPR:$y), GPR:$rs3)>; +def : Pat<(select GPR:$rs2, GPR:$rs1, GPR:$rs3), + (CMOV GPR:$rs1, GPR:$rs2, GPR:$rs3)>; +} // Predicates = [HasStdExtZbt] + +// fshl and fshr concatenate their operands in the same order. fsr and fsl +// instruction use different orders. fshl will return its first operand for +// shift of zero, fshr will return its second operand. fsl and fsr both return +// $rs1 so the patterns need to have different operand orders. +// +// fshl and fshr only read the lower log2(xlen) bits of the shift amount, but +// fsl/fsr instructions read log2(xlen)+1 bits. DAG combine may have removed +// an AND mask on the shift amount that we need to add back to avoid a one in +// the extra bit. +// FIXME: If we can prove that the extra bit in the shift amount is zero, we +// don't need this mask. +let Predicates = [HasStdExtZbt, IsRV32] in { +def : Pat<(fshl GPR:$rs1, GPR:$rs3, GPR:$rs2), + (FSL GPR:$rs1, (ANDI GPR:$rs2, 31), GPR:$rs3)>; +def : Pat<(fshr GPR:$rs3, GPR:$rs1, GPR:$rs2), + (FSR GPR:$rs1, (ANDI GPR:$rs2, 31), GPR:$rs3)>; +} +let Predicates = [HasStdExtZbt, IsRV64] in { +def : Pat<(fshl GPR:$rs1, GPR:$rs3, GPR:$rs2), + (FSL GPR:$rs1, (ANDI GPR:$rs2, 63), GPR:$rs3)>; +def : Pat<(fshr GPR:$rs3, GPR:$rs1, GPR:$rs2), + (FSR GPR:$rs1, (ANDI GPR:$rs2, 63), GPR:$rs3)>; +} +let Predicates = [HasStdExtZbt] in { +def : Pat<(fshr GPR:$rs3, GPR:$rs1, uimmlog2xlen:$shamt), + (FSRI GPR:$rs1, GPR:$rs3, uimmlog2xlen:$shamt)>; +// We can use FSRI for fshl by immediate if we subtract the immediate from +// XLen and swap the operands. +def : Pat<(fshl GPR:$rs3, GPR:$rs1, uimmlog2xlen:$shamt), + (FSRI GPR:$rs1, GPR:$rs3, (ImmSubFromXLen uimmlog2xlen:$shamt))>; } // Predicates = [HasStdExtZbt] let Predicates = [HasStdExtZbb] in { def : Pat<(ctlz GPR:$rs1), (CLZ GPR:$rs1)>; def : Pat<(cttz GPR:$rs1), (CTZ GPR:$rs1)>; -def : Pat<(ctpop GPR:$rs1), (PCNT GPR:$rs1)>; +def : Pat<(ctpop GPR:$rs1), (CPOP GPR:$rs1)>; } // Predicates = [HasStdExtZbb] -let Predicates = [HasStdExtZbb, IsRV32] in -def : Pat<(sra (shl GPR:$rs1, (i32 24)), (i32 24)), (SEXTB GPR:$rs1)>; -let Predicates = [HasStdExtZbb, IsRV64] in -def : Pat<(sra (shl GPR:$rs1, (i64 56)), (i64 56)), (SEXTB GPR:$rs1)>; - -let Predicates = [HasStdExtZbb, IsRV32] in -def : Pat<(sra (shl GPR:$rs1, (i32 16)), (i32 16)), (SEXTH GPR:$rs1)>; -let Predicates = [HasStdExtZbb, IsRV64] in -def : Pat<(sra (shl GPR:$rs1, (i64 48)), (i64 48)), (SEXTH GPR:$rs1)>; +let Predicates = [HasStdExtZbb] in { +def : Pat<(sext_inreg GPR:$rs1, i8), (SEXTB GPR:$rs1)>; +def : Pat<(sext_inreg GPR:$rs1, i16), (SEXTH GPR:$rs1)>; +} let Predicates = [HasStdExtZbb] in { def : Pat<(smin GPR:$rs1, GPR:$rs2), (MIN GPR:$rs1, GPR:$rs2)>; -def : Pat<(riscv_selectcc GPR:$rs1, GPR:$rs2, (XLenVT 20), GPR:$rs1, GPR:$rs2), - (MIN GPR:$rs1, GPR:$rs2)>; def : Pat<(smax GPR:$rs1, GPR:$rs2), (MAX GPR:$rs1, GPR:$rs2)>; -def : Pat<(riscv_selectcc GPR:$rs2, GPR:$rs1, (XLenVT 20), GPR:$rs1, GPR:$rs2), - (MAX GPR:$rs1, GPR:$rs2)>; def : Pat<(umin GPR:$rs1, GPR:$rs2), (MINU GPR:$rs1, GPR:$rs2)>; -def : Pat<(riscv_selectcc GPR:$rs1, GPR:$rs2, (XLenVT 12), GPR:$rs1, GPR:$rs2), - (MINU GPR:$rs1, GPR:$rs2)>; def : Pat<(umax GPR:$rs1, GPR:$rs2), (MAXU GPR:$rs1, GPR:$rs2)>; -def : Pat<(riscv_selectcc GPR:$rs2, GPR:$rs1, (XLenVT 12), GPR:$rs1, GPR:$rs2), - (MAXU GPR:$rs1, GPR:$rs2)>; } // Predicates = [HasStdExtZbb] -let Predicates = [HasStdExtZbbOrZbp, IsRV32] in +let Predicates = [HasStdExtZbb, IsRV32] in { +def : Pat<(bswap GPR:$rs1), (REV8_RV32 GPR:$rs1)>; +} // Predicates = [HasStdExtZbb, IsRV32] + +let Predicates = [HasStdExtZbb, IsRV64] in { +def : Pat<(bswap GPR:$rs1), (REV8_RV64 GPR:$rs1)>; +} // Predicates = [HasStdExtZbb, IsRV64] + +let Predicates = [HasStdExtZbp, IsRV32] in def : Pat<(or (and GPR:$rs1, 0x0000FFFF), (shl GPR:$rs2, (i32 16))), (PACK GPR:$rs1, GPR:$rs2)>; -let Predicates = [HasStdExtZbbOrZbp, IsRV64] in +let Predicates = [HasStdExtZbp, IsRV64] in def : Pat<(or (and GPR:$rs1, 0x00000000FFFFFFFF), (shl GPR:$rs2, (i64 32))), (PACK GPR:$rs1, GPR:$rs2)>; -let Predicates = [HasStdExtZbbOrZbp, IsRV32] in +let Predicates = [HasStdExtZbp, IsRV32] in def : Pat<(or (and GPR:$rs2, 0xFFFF0000), (srl GPR:$rs1, (i32 16))), (PACKU GPR:$rs1, GPR:$rs2)>; -let Predicates = [HasStdExtZbbOrZbp, IsRV64] in +let Predicates = [HasStdExtZbp, IsRV64] in def : Pat<(or (and GPR:$rs2, 0xFFFFFFFF00000000), (srl GPR:$rs1, (i64 32))), (PACKU GPR:$rs1, GPR:$rs2)>; -let Predicates = [HasStdExtZbbOrZbp] in +let Predicates = [HasStdExtZbp] in def : Pat<(or (and (shl GPR:$rs2, (XLenVT 8)), 0xFF00), (and GPR:$rs1, 0x00FF)), (PACKH GPR:$rs1, GPR:$rs2)>; +let Predicates = [HasStdExtZbbOrZbp, IsRV32] in +def : Pat<(and GPR:$rs, 0x0000FFFF), (ZEXTH_RV32 GPR:$rs)>; +let Predicates = [HasStdExtZbbOrZbp, IsRV64] in { +def : Pat<(and GPR:$rs, 0x000000000000FFFF), (ZEXTH_RV64 GPR:$rs)>; +} + let Predicates = [HasStdExtZbp, IsRV32] in { def : Pat<(or (or (and (shl GPR:$rs1, (i32 8)), (i32 0x00FF0000)), (and GPR:$rs1, (i32 0xFF0000FF))), @@ -908,156 +850,115 @@ def : Pat<(or (or (and (shl GPR:$rs1, (i64 1)), (i64 0x4444444444444444)), (SHFLI GPR:$rs1, (i64 1))>; } // Predicates = [HasStdExtZbp, IsRV64] -let Predicates = [HasStdExtZbb, IsRV64] in { -def : Pat<(and (add GPR:$rs, simm12:$simm12), (i64 0xFFFFFFFF)), - (ADDIWU GPR:$rs, simm12:$simm12)>; -def : Pat<(SLLIUWPat GPR:$rs1, uimmlog2xlen:$shamt), - (SLLIUW GPR:$rs1, uimmlog2xlen:$shamt)>; -def : Pat<(and (add GPR:$rs1, GPR:$rs2), (i64 0xFFFFFFFF)), - (ADDWU GPR:$rs1, GPR:$rs2)>; -def : Pat<(and (sub GPR:$rs1, GPR:$rs2), (i64 0xFFFFFFFF)), - (SUBWU GPR:$rs1, GPR:$rs2)>; -def : Pat<(add GPR:$rs1, (and GPR:$rs2, (i64 0xFFFFFFFF))), +let Predicates = [HasStdExtZba] in { +def : Pat<(add (shl GPR:$rs1, (XLenVT 1)), GPR:$rs2), + (SH1ADD GPR:$rs1, GPR:$rs2)>; +def : Pat<(add (shl GPR:$rs1, (XLenVT 2)), GPR:$rs2), + (SH2ADD GPR:$rs1, GPR:$rs2)>; +def : Pat<(add (shl GPR:$rs1, (XLenVT 3)), GPR:$rs2), + (SH3ADD GPR:$rs1, GPR:$rs2)>; +} // Predicates = [HasStdExtZba] + +let Predicates = [HasStdExtZba, IsRV64] in { +def : Pat<(SLLIUWPat GPR:$rs1, uimm5:$shamt), + (SLLIUW GPR:$rs1, uimm5:$shamt)>; +def : Pat<(shl (and GPR:$rs1, 0xFFFFFFFF), uimm5:$shamt), + (SLLIUW GPR:$rs1, uimm5:$shamt)>; +def : Pat<(add (and GPR:$rs1, (i64 0xFFFFFFFF)), GPR:$rs2), (ADDUW GPR:$rs1, GPR:$rs2)>; -def : Pat<(sub GPR:$rs1, (and GPR:$rs2, (i64 0xFFFFFFFF))), - (SUBUW GPR:$rs1, GPR:$rs2)>; -def : Pat<(xor (riscv_sllw (xor GPR:$rs1, -1), GPR:$rs2), -1), - (SLOW GPR:$rs1, GPR:$rs2)>; -def : Pat<(xor (riscv_srlw (xor GPR:$rs1, -1), GPR:$rs2), -1), - (SROW GPR:$rs1, GPR:$rs2)>; -} // Predicates = [HasStdExtZbb, IsRV64] +def : Pat<(and GPR:$rs, 0x00000000FFFFFFFF), (ADDUW GPR:$rs, X0)>; + +def : Pat<(add (shl (and GPR:$rs1, (i64 0xFFFFFFFF)), (XLenVT 1)), GPR:$rs2), + (SH1ADDUW GPR:$rs1, GPR:$rs2)>; +def : Pat<(add (shl (and GPR:$rs1, (i64 0xFFFFFFFF)), (XLenVT 2)), GPR:$rs2), + (SH2ADDUW GPR:$rs1, GPR:$rs2)>; +def : Pat<(add (shl (and GPR:$rs1, (i64 0xFFFFFFFF)), (XLenVT 3)), GPR:$rs2), + (SH3ADDUW GPR:$rs1, GPR:$rs2)>; + +def : Pat<(add (SLLIUWPat GPR:$rs1, (XLenVT 1)), GPR:$rs2), + (SH1ADDUW GPR:$rs1, GPR:$rs2)>; +def : Pat<(add (SLLIUWPat GPR:$rs1, (XLenVT 2)), GPR:$rs2), + (SH2ADDUW GPR:$rs1, GPR:$rs2)>; +def : Pat<(add (SLLIUWPat GPR:$rs1, (XLenVT 3)), GPR:$rs2), + (SH3ADDUW GPR:$rs1, GPR:$rs2)>; +} // Predicates = [HasStdExtZba, IsRV64] let Predicates = [HasStdExtZbbOrZbp, IsRV64] in { -def : Pat<(or (riscv_sllw (assertsexti32 GPR:$rs1), (assertsexti32 GPR:$rs2)), - (riscv_srlw (assertsexti32 GPR:$rs1), - (sub (i64 0), (assertsexti32 GPR:$rs2)))), +def : Pat<(riscv_rolw GPR:$rs1, GPR:$rs2), (ROLW GPR:$rs1, GPR:$rs2)>; -def : Pat<(or (riscv_sllw (assertsexti32 GPR:$rs1), - (sub (i64 0), (assertsexti32 GPR:$rs2))), - (riscv_srlw (assertsexti32 GPR:$rs1), (assertsexti32 GPR:$rs2))), +def : Pat<(riscv_rorw GPR:$rs1, GPR:$rs2), (RORW GPR:$rs1, GPR:$rs2)>; +def : Pat<(riscv_rorw GPR:$rs1, uimm5:$rs2), + (RORIW GPR:$rs1, uimm5:$rs2)>; +def : Pat<(riscv_rolw GPR:$rs1, uimm5:$rs2), + (RORIW GPR:$rs1, (ImmSubFrom32 uimm5:$rs2))>; } // Predicates = [HasStdExtZbbOrZbp, IsRV64] let Predicates = [HasStdExtZbs, IsRV64] in { -def : Pat<(and (xor (riscv_sllw 1, (assertsexti32 GPR:$rs2)), -1), - (assertsexti32 GPR:$rs1)), - (SBCLRW GPR:$rs1, GPR:$rs2)>; -def : Pat<(or (riscv_sllw 1, (assertsexti32 GPR:$rs2)), - (assertsexti32 GPR:$rs1)), - (SBSETW GPR:$rs1, GPR:$rs2)>; -def : Pat<(xor (riscv_sllw 1, (assertsexti32 GPR:$rs2)), - (assertsexti32 GPR:$rs1)), - (SBINVW GPR:$rs1, GPR:$rs2)>; -def : Pat<(and (riscv_srlw (assertsexti32 GPR:$rs1), (assertsexti32 GPR:$rs2)), - 1), - (SBEXTW GPR:$rs1, GPR:$rs2)>; -} // Predicates = [HasStdExtZbs, IsRV64] - -let Predicates = [HasStdExtZbb, IsRV64] in { -def : Pat<(SLOIWPat GPR:$rs1, uimmlog2xlen:$shamt), - (SLOIW GPR:$rs1, uimmlog2xlen:$shamt)>; -def : Pat<(SROIWPat GPR:$rs1, uimmlog2xlen:$shamt), - (SROIW GPR:$rs1, uimmlog2xlen:$shamt)>; -} // Predicates = [HasStdExtZbb, IsRV64] +def : Pat<(and (not (riscv_sllw 1, GPR:$rs2)), (assertsexti32 GPR:$rs1)), + (BCLRW GPR:$rs1, GPR:$rs2)>; +def : Pat<(sext_inreg (and (not (riscv_sllw 1, GPR:$rs2)), GPR:$rs1), i32), + (BCLRW GPR:$rs1, GPR:$rs2)>; +def : Pat<(or (riscv_sllw 1, GPR:$rs2), (assertsexti32 GPR:$rs1)), + (BSETW GPR:$rs1, GPR:$rs2)>; +def : Pat<(sext_inreg (or (riscv_sllw 1, GPR:$rs2), GPR:$rs1), i32), + (BSETW GPR:$rs1, GPR:$rs2)>; +def : Pat<(xor (riscv_sllw 1, GPR:$rs2), (assertsexti32 GPR:$rs1)), + (BINVW GPR:$rs1, GPR:$rs2)>; +def : Pat<(sext_inreg (xor (riscv_sllw 1, GPR:$rs2), GPR:$rs1), i32), + (BINVW GPR:$rs1, GPR:$rs2)>; +def : Pat<(and (riscv_srlw GPR:$rs1, GPR:$rs2), 1), + (BEXTW GPR:$rs1, GPR:$rs2)>; + +def : Pat<(riscv_sllw 1, GPR:$rs2), + (BSETW X0, GPR:$rs2)>; + +def : Pat<(and (assertsexti32 GPR:$rs1), BCLRWMask:$mask), + (BCLRIW GPR:$rs1, (BCLRXForm imm:$mask))>; +def : Pat<(or (assertsexti32 GPR:$rs1), BSETINVWMask:$mask), + (BSETIW GPR:$rs1, (BSETINVXForm imm:$mask))>; +def : Pat<(xor (assertsexti32 GPR:$rs1), BSETINVWMask:$mask), + (BINVIW GPR:$rs1, (BSETINVXForm imm:$mask))>; -let Predicates = [HasStdExtZbbOrZbp, IsRV64] in -def : Pat<(RORIWPat GPR:$rs1, uimmlog2xlen:$shamt), - (RORIW GPR:$rs1, uimmlog2xlen:$shamt)>; +} // Predicates = [HasStdExtZbs, IsRV64] let Predicates = [HasStdExtZbp, IsRV64] in { -def : Pat<(sext_inreg (or (or (and (srl GPR:$rs1, (i64 1)), (i64 0x55555555)), - GPR:$rs1), - (and (shl GPR:$rs1, (i64 1)), (i64 0xAAAAAAAA))), - i32), - (GORCIW GPR:$rs1, (i64 1))>; -def : Pat<(sext_inreg (or (or (and (srl GPR:$rs1, (i64 2)), (i64 0x33333333)), - GPR:$rs1), - (and (shl GPR:$rs1, (i64 2)), (i64 0xCCCCCCCC))), - i32), - (GORCIW GPR:$rs1, (i64 2))>; -def : Pat<(sext_inreg (or (or (and (srl GPR:$rs1, (i64 4)), (i64 0x0F0F0F0F)), - GPR:$rs1), - (and (shl GPR:$rs1, (i64 4)), (i64 0xF0F0F0F0))), - i32), - (GORCIW GPR:$rs1, (i64 4))>; -def : Pat<(sext_inreg (or (or (and (srl GPR:$rs1, (i64 8)), (i64 0x00FF00FF)), - GPR:$rs1), - (and (shl GPR:$rs1, (i64 8)), (i64 0xFF00FF00))), - i32), - (GORCIW GPR:$rs1, (i64 8))>; -def : Pat<(sext_inreg (or (or (and (srl GPR:$rs1, (i64 16)), (i64 0x0000FFFF)), - GPR:$rs1), - (and (shl GPR:$rs1, (i64 16)), (i64 0xFFFF0000))), - i32), - (GORCIW GPR:$rs1, (i64 16))>; -def : Pat<(sext_inreg (or (or (srl (and GPR:$rs1, (i64 0xFFFF0000)), (i64 16)), - GPR:$rs1), - (shl GPR:$rs1, (i64 16))), i32), - (GORCIW GPR:$rs1, (i64 16))>; - -def : Pat<(sext_inreg (or (and (shl GPR:$rs1, (i64 1)), (i64 0xAAAAAAAA)), - (and (srl GPR:$rs1, (i64 1)), (i64 0x55555555))), - i32), - (GREVIW GPR:$rs1, (i64 1))>; -def : Pat<(sext_inreg (or (and (shl GPR:$rs1, (i64 2)), (i64 0xCCCCCCCC)), - (and (srl GPR:$rs1, (i64 2)), (i64 0x33333333))), - i32), - (GREVIW GPR:$rs1, (i64 2))>; -def : Pat<(sext_inreg (or (and (shl GPR:$rs1, (i64 4)), (i64 0xF0F0F0F0)), - (and (srl GPR:$rs1, (i64 4)), (i64 0x0F0F0F0F))), - i32), - (GREVIW GPR:$rs1, (i64 4))>; -def : Pat<(sext_inreg (or (and (shl GPR:$rs1, (i64 8)), (i64 0xFF00FF00)), - (and (srl GPR:$rs1, (i64 8)), (i64 0x00FF00FF))), - i32), - (GREVIW GPR:$rs1, (i64 8))>; -def : Pat<(sext_inreg (or (shl GPR:$rs1, (i64 16)), - (srl (and GPR:$rs1, 0xFFFF0000), (i64 16))), i32), - (GREVIW GPR:$rs1, (i64 16))>; -def : Pat<(sra (bswap GPR:$rs1), (i64 32)), (GREVIW GPR:$rs1, (i64 24))>; -def : Pat<(sra (bitreverse GPR:$rs1), (i64 32)), (GREVIW GPR:$rs1, (i64 31))>; +def : Pat<(riscv_rorw (riscv_greviw GPR:$rs1, 24), (i64 16)), (GREVIW GPR:$rs1, 8)>; +def : Pat<(riscv_rolw (riscv_greviw GPR:$rs1, 24), (i64 16)), (GREVIW GPR:$rs1, 8)>; +def : Pat<(riscv_greviw GPR:$rs1, timm:$shamt), (GREVIW GPR:$rs1, timm:$shamt)>; +def : Pat<(riscv_gorciw GPR:$rs1, timm:$shamt), (GORCIW GPR:$rs1, timm:$shamt)>; } // Predicates = [HasStdExtZbp, IsRV64] let Predicates = [HasStdExtZbt, IsRV64] in { -def : Pat<(riscv_selectcc (and (assertsexti32 GPR:$rs3), 31), - (i64 0), - (i64 17), - (assertsexti32 GPR:$rs1), - (or (riscv_sllw (assertsexti32 GPR:$rs1), - (and (assertsexti32 GPR:$rs3), 31)), - (riscv_srlw (assertsexti32 GPR:$rs2), - (sub (i64 32), - (assertsexti32 GPR:$rs3))))), +def : Pat<(riscv_fslw GPR:$rs1, GPR:$rs3, GPR:$rs2), (FSLW GPR:$rs1, GPR:$rs2, GPR:$rs3)>; -def : Pat<(riscv_selectcc (and (assertsexti32 GPR:$rs3), 31), - (i64 0), - (i64 17), - (assertsexti32 GPR:$rs2), - (or (riscv_sllw (assertsexti32 GPR:$rs1), - (sub (i64 32), - (assertsexti32 GPR:$rs3))), - (riscv_srlw (assertsexti32 GPR:$rs2), - (and (assertsexti32 GPR:$rs3), 31)))), +def : Pat<(riscv_fsrw GPR:$rs3, GPR:$rs1, GPR:$rs2), (FSRW GPR:$rs1, GPR:$rs2, GPR:$rs3)>; -def : Pat<(FSRIWPat GPR:$rs1, GPR:$rs2, uimmlog2xlen:$shamt), - (FSRIW GPR:$rs1, GPR:$rs2, uimmlog2xlen:$shamt)>; +def : Pat<(riscv_fsrw GPR:$rs3, GPR:$rs1, uimm5:$shamt), + (FSRIW GPR:$rs1, GPR:$rs3, uimm5:$shamt)>; +def : Pat<(riscv_fslw GPR:$rs3, GPR:$rs1, uimm5:$shamt), + (FSRIW GPR:$rs1, GPR:$rs3, (ImmSubFrom32 uimm5:$shamt))>; } // Predicates = [HasStdExtZbt, IsRV64] let Predicates = [HasStdExtZbb, IsRV64] in { def : Pat<(add (ctlz (and GPR:$rs1, (i64 0xFFFFFFFF))), (i64 -32)), (CLZW GPR:$rs1)>; -// We don't pattern-match CTZW here as it has the same pattern and result as -// RV64 CTZ -def : Pat<(ctpop (and GPR:$rs1, (i64 0xFFFFFFFF))), (PCNTW GPR:$rs1)>; +// computeKnownBits can't figure out that the and mask on the add result is +// unnecessary so we need to pattern match it away. +def : Pat<(and (add (ctlz (and GPR:$rs1, (i64 0xFFFFFFFF))), (i64 -32)), + (i64 0xFFFFFFFF)), + (CLZW GPR:$rs1)>; +def : Pat<(cttz (or GPR:$rs1, (i64 0x100000000))), + (CTZW GPR:$rs1)>; +def : Pat<(ctpop (and GPR:$rs1, (i64 0xFFFFFFFF))), (CPOPW GPR:$rs1)>; } // Predicates = [HasStdExtZbb, IsRV64] -let Predicates = [HasStdExtZbbOrZbp, IsRV64] in { -def : Pat<(sext_inreg (or (shl (assertsexti32 GPR:$rs2), (i64 16)), - (and (assertsexti32 GPR:$rs1), 0x000000000000FFFF)), +let Predicates = [HasStdExtZbp, IsRV64] in { +def : Pat<(sext_inreg (or (shl GPR:$rs2, (i64 16)), + (and GPR:$rs1, 0x000000000000FFFF)), i32), (PACKW GPR:$rs1, GPR:$rs2)>; def : Pat<(or (and (assertsexti32 GPR:$rs2), 0xFFFFFFFFFFFF0000), - (srl (and (assertsexti32 GPR:$rs1), 0x00000000FFFF0000), - (i64 16))), + (SRLIWPat GPR:$rs1, (i64 16))), (PACKUW GPR:$rs1, GPR:$rs2)>; -} // Predicates = [HasStdExtZbbOrZbp, IsRV64] +} // Predicates = [HasStdExtZbp, IsRV64] diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoC.td index f68767847ade..30df455c1927 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoC.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoC.td @@ -140,6 +140,7 @@ def uimm8_lsb000 : Operand<XLenVT>, def simm9_lsb0 : Operand<OtherVT>, ImmLeaf<XLenVT, [{return isShiftedInt<8, 1>(Imm);}]> { let ParserMatchClass = SImmAsmOperand<9, "Lsb0">; + let PrintMethod = "printBranchOperand"; let EncoderMethod = "getImmOpValueAsr1"; let DecoderMethod = "decodeSImmOperandAndLsl1<9>"; let MCOperandPredicate = [{ @@ -149,6 +150,7 @@ def simm9_lsb0 : Operand<OtherVT>, return MCOp.isBareSymbolRef(); }]; + let OperandType = "OPERAND_PCREL"; } // A 9-bit unsigned immediate where the least significant three bits are zero. @@ -200,6 +202,7 @@ def simm10_lsb0000nonzero : Operand<XLenVT>, def simm12_lsb0 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isShiftedInt<11, 1>(Imm);}]> { let ParserMatchClass = SImmAsmOperand<12, "Lsb0">; + let PrintMethod = "printBranchOperand"; let EncoderMethod = "getImmOpValueAsr1"; let DecoderMethod = "decodeSImmOperandAndLsl1<12>"; let MCOperandPredicate = [{ @@ -208,6 +211,7 @@ def simm12_lsb0 : Operand<XLenVT>, return isShiftedInt<11, 1>(Imm); return MCOp.isBareSymbolRef(); }]; + let OperandType = "OPERAND_PCREL"; } //===----------------------------------------------------------------------===// @@ -239,7 +243,7 @@ class CStore_rri<bits<3> funct3, string OpcodeStr, OpcodeStr, "$rs2, ${imm}(${rs1})">; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -class Bcz<bits<3> funct3, string OpcodeStr, PatFrag CondOp, +class Bcz<bits<3> funct3, string OpcodeStr, RegisterClass cls> : RVInst16CB<funct3, 0b01, (outs), (ins cls:$rs1, simm9_lsb0:$imm), OpcodeStr, "$rs1, $imm"> { @@ -469,8 +473,8 @@ def C_J : RVInst16CJ<0b101, 0b01, (outs), (ins simm12_lsb0:$offset), let isBarrier=1; } -def C_BEQZ : Bcz<0b110, "c.beqz", seteq, GPRC>, Sched<[WriteJmp]>; -def C_BNEZ : Bcz<0b111, "c.bnez", setne, GPRC>, Sched<[WriteJmp]>; +def C_BEQZ : Bcz<0b110, "c.beqz", GPRC>, Sched<[WriteJmp]>; +def C_BNEZ : Bcz<0b111, "c.bnez", GPRC>, Sched<[WriteJmp]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def C_SLLI : RVInst16CI<0b000, 0b10, (outs GPRNoX0:$rd_wb), @@ -519,7 +523,8 @@ def C_JR : RVInst16CR<0b1000, 0b10, (outs), (ins GPRNoX0:$rs1), let rs2 = 0; } -let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isMoveReg = 1, + isAsCheapAsAMove = 1 in def C_MV : RVInst16CR<0b1000, 0b10, (outs GPRNoX0:$rs1), (ins GPRNoX0:$rs2), "c.mv", "$rs1, $rs2">, Sched<[WriteIALU, ReadIALU]>; @@ -744,6 +749,7 @@ class CompressPat<dag input, dag output> { dag Input = input; dag Output = output; list<Predicate> Predicates = []; + bit isCompressOnly = false; } // Patterns are defined in the same order the compressed instructions appear @@ -829,25 +835,30 @@ def : CompressPat<(SUB GPRC:$rs1, GPRC:$rs1, GPRC:$rs2), (C_SUB GPRC:$rs1, GPRC:$rs2)>; def : CompressPat<(XOR GPRC:$rs1, GPRC:$rs1, GPRC:$rs2), (C_XOR GPRC:$rs1, GPRC:$rs2)>; +let isCompressOnly = true in def : CompressPat<(XOR GPRC:$rs1, GPRC:$rs2, GPRC:$rs1), (C_XOR GPRC:$rs1, GPRC:$rs2)>; def : CompressPat<(OR GPRC:$rs1, GPRC:$rs1, GPRC:$rs2), (C_OR GPRC:$rs1, GPRC:$rs2)>; +let isCompressOnly = true in def : CompressPat<(OR GPRC:$rs1, GPRC:$rs2, GPRC:$rs1), (C_OR GPRC:$rs1, GPRC:$rs2)>; def : CompressPat<(AND GPRC:$rs1, GPRC:$rs1, GPRC:$rs2), (C_AND GPRC:$rs1, GPRC:$rs2)>; +let isCompressOnly = true in def : CompressPat<(AND GPRC:$rs1, GPRC:$rs2, GPRC:$rs1), (C_AND GPRC:$rs1, GPRC:$rs2)>; } // Predicates = [HasStdExtC] let Predicates = [HasStdExtC, IsRV64] in { +let isCompressOnly = true in def : CompressPat<(ADDIW GPRNoX0:$rd, X0, simm6:$imm), (C_LI GPRNoX0:$rd, simm6:$imm)>; def : CompressPat<(SUBW GPRC:$rs1, GPRC:$rs1, GPRC:$rs2), (C_SUBW GPRC:$rs1, GPRC:$rs2)>; def : CompressPat<(ADDW GPRC:$rs1, GPRC:$rs1, GPRC:$rs2), (C_ADDW GPRC:$rs1, GPRC:$rs2)>; +let isCompressOnly = true in def : CompressPat<(ADDW GPRC:$rs1, GPRC:$rs2, GPRC:$rs1), (C_ADDW GPRC:$rs1, GPRC:$rs2)>; } // Predicates = [HasStdExtC, IsRV64] @@ -890,10 +901,12 @@ def : CompressPat<(LD GPRNoX0:$rd, SP:$rs1, uimm9_lsb000:$imm), let Predicates = [HasStdExtC] in { def : CompressPat<(JALR X0, GPRNoX0:$rs1, 0), (C_JR GPRNoX0:$rs1)>; +let isCompressOnly = true in { def : CompressPat<(ADD GPRNoX0:$rs1, X0, GPRNoX0:$rs2), (C_MV GPRNoX0:$rs1, GPRNoX0:$rs2)>; def : CompressPat<(ADD GPRNoX0:$rs1, GPRNoX0:$rs2, X0), (C_MV GPRNoX0:$rs1, GPRNoX0:$rs2)>; +} def : CompressPat<(ADDI GPRNoX0:$rs1, GPRNoX0:$rs2, 0), (C_MV GPRNoX0:$rs1, GPRNoX0:$rs2)>; def : CompressPat<(EBREAK), (C_EBREAK)>; @@ -902,6 +915,7 @@ def : CompressPat<(JALR X1, GPRNoX0:$rs1, 0), (C_JALR GPRNoX0:$rs1)>; def : CompressPat<(ADD GPRNoX0:$rs1, GPRNoX0:$rs1, GPRNoX0:$rs2), (C_ADD GPRNoX0:$rs1, GPRNoX0:$rs2)>; +let isCompressOnly = true in def : CompressPat<(ADD GPRNoX0:$rs1, GPRNoX0:$rs2, GPRNoX0:$rs1), (C_ADD GPRNoX0:$rs1, GPRNoX0:$rs2)>; } // Predicates = [HasStdExtC] diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td index 6c36f53cd563..133599e13b8b 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -299,23 +299,6 @@ def : PatFpr64Fpr64<setolt, FLT_D>; def : PatFpr64Fpr64<setle, FLE_D>; def : PatFpr64Fpr64<setole, FLE_D>; -// Define pattern expansions for setcc operations which aren't directly -// handled by a RISC-V instruction and aren't expanded in the SelectionDAG -// Legalizer. - -def : Pat<(seto FPR64:$rs1, FPR64:$rs2), - (AND (FEQ_D FPR64:$rs1, FPR64:$rs1), - (FEQ_D FPR64:$rs2, FPR64:$rs2))>; -def : Pat<(seto FPR64:$rs1, FPR64:$rs1), - (FEQ_D $rs1, $rs1)>; - -def : Pat<(setuo FPR64:$rs1, FPR64:$rs2), - (SLTIU (AND (FEQ_D FPR64:$rs1, FPR64:$rs1), - (FEQ_D FPR64:$rs2, FPR64:$rs2)), - 1)>; -def : Pat<(setuo FPR64:$rs1, FPR64:$rs1), - (SLTIU (FEQ_D $rs1, $rs1), 1)>; - def Select_FPR64_Using_CC_GPR : SelectCC_rrirr<FPR64, GPR>; /// Loads @@ -361,6 +344,7 @@ let Predicates = [HasStdExtD, IsRV64] in { /// Float constants def : Pat<(f64 (fpimm0)), (FMV_D_X X0)>; +// Moves (no conversion) def : Pat<(bitconvert GPR:$rs1), (FMV_D_X GPR:$rs1)>; def : Pat<(bitconvert FPR64:$rs1), (FMV_X_D FPR64:$rs1)>; @@ -368,11 +352,11 @@ def : Pat<(bitconvert FPR64:$rs1), (FMV_X_D FPR64:$rs1)>; // because fpto[u|s]i produce poison if the value can't fit into the target. // We match the single case below because fcvt.wu.d sign-extends its result so // is cheaper than fcvt.lu.d+sext.w. -def : Pat<(sext_inreg (zexti32 (fp_to_uint FPR64:$rs1)), i32), +def : Pat<(sext_inreg (assertzexti32 (fp_to_uint FPR64:$rs1)), i32), (FCVT_WU_D $rs1, 0b001)>; // [u]int32->fp -def : Pat<(sint_to_fp (sext_inreg GPR:$rs1, i32)), (FCVT_D_W $rs1)>; +def : Pat<(sint_to_fp (sexti32 GPR:$rs1)), (FCVT_D_W $rs1)>; def : Pat<(uint_to_fp (zexti32 GPR:$rs1)), (FCVT_D_WU $rs1)>; def : Pat<(fp_to_sint FPR64:$rs1), (FCVT_L_D FPR64:$rs1, 0b001)>; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoF.td index ce5c3abb6a06..4529949f693e 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -303,10 +303,6 @@ def : Pat<(f32 (fpimm0)), (FMV_W_X X0)>; /// Float conversion operations -// Moves (no conversion) -def : Pat<(bitconvert GPR:$rs1), (FMV_W_X GPR:$rs1)>; -def : Pat<(bitconvert FPR32:$rs1), (FMV_X_W FPR32:$rs1)>; - // [u]int32<->float conversion patterns must be gated on IsRV32 or IsRV64, so // are defined later. @@ -359,23 +355,6 @@ def : PatFpr32Fpr32<setolt, FLT_S>; def : PatFpr32Fpr32<setle, FLE_S>; def : PatFpr32Fpr32<setole, FLE_S>; -// Define pattern expansions for setcc operations which aren't directly -// handled by a RISC-V instruction and aren't expanded in the SelectionDAG -// Legalizer. - -def : Pat<(seto FPR32:$rs1, FPR32:$rs2), - (AND (FEQ_S FPR32:$rs1, FPR32:$rs1), - (FEQ_S FPR32:$rs2, FPR32:$rs2))>; -def : Pat<(seto FPR32:$rs1, FPR32:$rs1), - (FEQ_S $rs1, $rs1)>; - -def : Pat<(setuo FPR32:$rs1, FPR32:$rs2), - (SLTIU (AND (FEQ_S FPR32:$rs1, FPR32:$rs1), - (FEQ_S FPR32:$rs2, FPR32:$rs2)), - 1)>; -def : Pat<(setuo FPR32:$rs1, FPR32:$rs1), - (SLTIU (FEQ_S $rs1, $rs1), 1)>; - def Select_FPR32_Using_CC_GPR : SelectCC_rrirr<FPR32, GPR>; /// Loads @@ -389,6 +368,10 @@ defm : StPat<store, FSW, FPR32>; } // Predicates = [HasStdExtF] let Predicates = [HasStdExtF, IsRV32] in { +// Moves (no conversion) +def : Pat<(bitconvert GPR:$rs1), (FMV_W_X GPR:$rs1)>; +def : Pat<(bitconvert FPR32:$rs1), (FMV_X_W FPR32:$rs1)>; + // float->[u]int. Round-to-zero must be used. def : Pat<(fp_to_sint FPR32:$rs1), (FCVT_W_S $rs1, 0b001)>; def : Pat<(fp_to_uint FPR32:$rs1), (FCVT_WU_S $rs1, 0b001)>; @@ -399,9 +382,10 @@ def : Pat<(uint_to_fp GPR:$rs1), (FCVT_S_WU $rs1, 0b111)>; } // Predicates = [HasStdExtF, IsRV32] let Predicates = [HasStdExtF, IsRV64] in { +// Moves (no conversion) def : Pat<(riscv_fmv_w_x_rv64 GPR:$src), (FMV_W_X GPR:$src)>; def : Pat<(riscv_fmv_x_anyextw_rv64 FPR32:$src), (FMV_X_W FPR32:$src)>; -def : Pat<(sexti32 (riscv_fmv_x_anyextw_rv64 FPR32:$src)), +def : Pat<(sext_inreg (riscv_fmv_x_anyextw_rv64 FPR32:$src), i32), (FMV_X_W FPR32:$src)>; // FP->[u]int32 is mostly handled by the FP->[u]int64 patterns. This is safe @@ -416,7 +400,7 @@ def : Pat<(fp_to_sint FPR32:$rs1), (FCVT_L_S $rs1, 0b001)>; def : Pat<(fp_to_uint FPR32:$rs1), (FCVT_LU_S $rs1, 0b001)>; // [u]int->fp. Match GCC and default to using dynamic rounding mode. -def : Pat<(sint_to_fp (sext_inreg GPR:$rs1, i32)), (FCVT_S_W $rs1, 0b111)>; +def : Pat<(sint_to_fp (sexti32 GPR:$rs1)), (FCVT_S_W $rs1, 0b111)>; def : Pat<(uint_to_fp (zexti32 GPR:$rs1)), (FCVT_S_WU $rs1, 0b111)>; def : Pat<(sint_to_fp GPR:$rs1), (FCVT_S_L $rs1, 0b111)>; def : Pat<(uint_to_fp GPR:$rs1), (FCVT_S_LU $rs1, 0b111)>; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoM.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoM.td index 987534aadd79..8cfb903a173c 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoM.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoM.td @@ -81,9 +81,11 @@ def : PatGprGpr<riscv_remuw, REMUW>; // Handle the specific cases where using DIVU/REMU would be correct and result // in fewer instructions than emitting DIVUW/REMUW then zero-extending the // result. -def : Pat<(zexti32 (riscv_divuw (zexti32 GPR:$rs1), (zexti32 GPR:$rs2))), +def : Pat<(and (riscv_divuw (assertzexti32 GPR:$rs1), + (assertzexti32 GPR:$rs2)), 0xffffffff), (DIVU GPR:$rs1, GPR:$rs2)>; -def : Pat<(zexti32 (riscv_remuw (zexti32 GPR:$rs1), (zexti32 GPR:$rs2))), +def : Pat<(and (riscv_remuw (assertzexti32 GPR:$rs1), + (assertzexti32 GPR:$rs2)), 0xffffffff), (REMU GPR:$rs1, GPR:$rs2)>; // Although the sexti32 operands may not have originated from an i32 srem, diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoV.td index 1c7f53fecb8c..b3fc76aee161 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// /// /// This file describes the RISC-V instructions from the standard 'V' Vector -/// extension, version 0.8. +/// extension, version 0.10. /// This version is still experimental as the 'V' extension hasn't been /// ratified yet. /// @@ -31,18 +31,6 @@ def VTypeIOp : Operand<XLenVT> { let DecoderMethod = "decodeUImmOperand<11>"; } -def VRegAsmOperand : AsmOperandClass { - let Name = "RVVRegOpOperand"; - let RenderMethod = "addRegOperands"; - let PredicateMethod = "isReg"; - let ParserMethod = "parseRegister"; -} - -def VRegOp : RegisterOperand<VR> { - let ParserMatchClass = VRegAsmOperand; - let PrintMethod = "printOperand"; -} - def VMaskAsmOperand : AsmOperandClass { let Name = "RVVMaskRegOpOperand"; let RenderMethod = "addRegOperands"; @@ -74,14 +62,13 @@ def simm5 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isInt<5>(Imm);}]> { def SImm5Plus1AsmOperand : AsmOperandClass { let Name = "SImm5Plus1"; - let RenderMethod = "addSImm5Plus1Operands"; + let RenderMethod = "addImmOperands"; let DiagnosticType = "InvalidSImm5Plus1"; } def simm5_plus1 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isInt<5>(Imm - 1);}]> { let ParserMatchClass = SImm5Plus1AsmOperand; - let PrintMethod = "printSImm5Plus1"; let MCOperandPredicate = [{ int64_t Imm; if (MCOp.evaluateAsConstantImm(Imm)) @@ -95,162 +82,242 @@ def simm5_plus1 : Operand<XLenVT>, ImmLeaf<XLenVT, //===----------------------------------------------------------------------===// let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in { +// load vd, (rs1) +class VUnitStrideLoadMask<string opcodestr> + : RVInstVLU<0b000, LSWidth8.Value{3}, LUMOPUnitStrideMask, LSWidth8.Value{2-0}, + (outs VR:$vd), + (ins GPR:$rs1), opcodestr, "$vd, (${rs1})">; + // load vd, (rs1), vm -class VUnitStrideLoad<RISCVMOP mop, RISCVLSUMOP lumop, RISCVWidth width, - string opcodestr> - : RVInstVLU<0b000, mop, lumop, width, (outs VRegOp:$vd), +class VUnitStrideLoad<RISCVLSUMOP lumop, RISCVWidth width, + string opcodestr> + : RVInstVLU<0b000, width.Value{3}, lumop, width.Value{2-0}, + (outs VR:$vd), (ins GPR:$rs1, VMaskOp:$vm), opcodestr, "$vd, (${rs1})$vm">; // load vd, (rs1), rs2, vm -class VStridedLoad<RISCVMOP mop, RISCVWidth width, string opcodestr> - : RVInstVLS<0b000, mop, width, (outs VRegOp:$vd), +class VStridedLoad<RISCVWidth width, string opcodestr> + : RVInstVLS<0b000, width.Value{3}, width.Value{2-0}, + (outs VR:$vd), (ins GPR:$rs1, GPR:$rs2, VMaskOp:$vm), opcodestr, "$vd, (${rs1}), $rs2$vm">; // load vd, (rs1), vs2, vm class VIndexedLoad<RISCVMOP mop, RISCVWidth width, string opcodestr> - : RVInstVLX<0b000, mop, width, (outs VRegOp:$vd), - (ins GPR:$rs1, VRegOp:$vs2, VMaskOp:$vm), opcodestr, + : RVInstVLX<0b000, width.Value{3}, mop, width.Value{2-0}, + (outs VR:$vd), + (ins GPR:$rs1, VR:$vs2, VMaskOp:$vm), opcodestr, "$vd, (${rs1}), $vs2$vm">; // vl<nf>r.v vd, (rs1) -class VWholeLoad<bits<3> nf, string opcodestr> - : RVInstVLU<nf, MOPLDUnitStrideU, LUMOPUnitStrideWholeReg, - LSWidthVSEW, (outs VRegOp:$vd), (ins GPR:$rs1), +class VWholeLoad<bits<3> nf, RISCVWidth width, string opcodestr> + : RVInstVLU<nf, width.Value{3}, LUMOPUnitStrideWholeReg, + width.Value{2-0}, (outs VR:$vd), (ins GPR:$rs1), opcodestr, "$vd, (${rs1})"> { let vm = 1; let Uses = []; + let RVVConstraint = NoConstraint; } + +// segment load vd, (rs1), vm +class VUnitStrideSegmentLoad<bits<3> nf, RISCVLSUMOP lumop, + RISCVWidth width, string opcodestr> + : RVInstVLU<nf, width.Value{3}, lumop, width.Value{2-0}, + (outs VR:$vd), + (ins GPR:$rs1, VMaskOp:$vm), opcodestr, "$vd, (${rs1})$vm">; + +// segment load vd, (rs1), rs2, vm +class VStridedSegmentLoad<bits<3> nf, RISCVWidth width, string opcodestr> + : RVInstVLS<nf, width.Value{3}, width.Value{2-0}, + (outs VR:$vd), + (ins GPR:$rs1, GPR:$rs2, VMaskOp:$vm), opcodestr, + "$vd, (${rs1}), $rs2$vm">; + +// segment load vd, (rs1), vs2, vm +class VIndexedSegmentLoad<bits<3> nf, RISCVMOP mop, RISCVWidth width, + string opcodestr> + : RVInstVLX<nf, width.Value{3}, mop, width.Value{2-0}, + (outs VR:$vd), + (ins GPR:$rs1, VR:$vs2, VMaskOp:$vm), opcodestr, + "$vd, (${rs1}), $vs2$vm">; } // hasSideEffects = 0, mayLoad = 1, mayStore = 0 let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in { // store vd, vs3, (rs1), vm -class VUnitStrideStore<RISCVMOP mop, RISCVLSUMOP sumop, RISCVWidth width, +class VUnitStrideStoreMask<string opcodestr> + : RVInstVSU<0b000, LSWidth8.Value{3}, SUMOPUnitStrideMask, LSWidth8.Value{2-0}, + (outs), (ins VR:$vs3, GPR:$rs1), opcodestr, + "$vs3, (${rs1})">; + +// store vd, vs3, (rs1), vm +class VUnitStrideStore<RISCVLSUMOP sumop, RISCVWidth width, string opcodestr> - : RVInstVSU<0b000, mop, sumop, width, (outs), - (ins VRegOp:$vs3, GPR:$rs1, VMaskOp:$vm), opcodestr, + : RVInstVSU<0b000, width.Value{3}, sumop, width.Value{2-0}, + (outs), (ins VR:$vs3, GPR:$rs1, VMaskOp:$vm), opcodestr, "$vs3, (${rs1})$vm">; // store vd, vs3, (rs1), rs2, vm -class VStridedStore<RISCVMOP mop, RISCVWidth width, string opcodestr> - : RVInstVSS<0b000, mop, width, (outs), - (ins VRegOp:$vs3, GPR:$rs1, GPR:$rs2, VMaskOp:$vm), +class VStridedStore<RISCVWidth width, string opcodestr> + : RVInstVSS<0b000, width.Value{3}, width.Value{2-0}, (outs), + (ins VR:$vs3, GPR:$rs1, GPR:$rs2, VMaskOp:$vm), opcodestr, "$vs3, (${rs1}), $rs2$vm">; // store vd, vs3, (rs1), vs2, vm class VIndexedStore<RISCVMOP mop, RISCVWidth width, string opcodestr> - : RVInstVSX<0b000, mop, width, (outs), - (ins VRegOp:$vs3, GPR:$rs1, VRegOp:$vs2, VMaskOp:$vm), + : RVInstVSX<0b000, width.Value{3}, mop, width.Value{2-0}, (outs), + (ins VR:$vs3, GPR:$rs1, VR:$vs2, VMaskOp:$vm), opcodestr, "$vs3, (${rs1}), $vs2$vm">; // vs<nf>r.v vd, (rs1) class VWholeStore<bits<3> nf, string opcodestr> - : RVInstVSU<nf, MOPSTUnitStride, SUMOPUnitStrideWholeReg, - LSWidthVSEW, (outs), (ins VRegOp:$vs3, GPR:$rs1), + : RVInstVSU<nf, 0, SUMOPUnitStrideWholeReg, + 0b000, (outs), (ins VR:$vs3, GPR:$rs1), opcodestr, "$vs3, (${rs1})"> { let vm = 1; let Uses = []; } + +// segment store vd, vs3, (rs1), vm +class VUnitStrideSegmentStore<bits<3> nf, RISCVWidth width, string opcodestr> + : RVInstVSU<nf, width.Value{3}, SUMOPUnitStride, width.Value{2-0}, + (outs), (ins VR:$vs3, GPR:$rs1, VMaskOp:$vm), opcodestr, + "$vs3, (${rs1})$vm">; + +// segment store vd, vs3, (rs1), rs2, vm +class VStridedSegmentStore<bits<3> nf, RISCVWidth width, string opcodestr> + : RVInstVSS<nf, width.Value{3}, width.Value{2-0}, (outs), + (ins VR:$vs3, GPR:$rs1, GPR:$rs2, VMaskOp:$vm), + opcodestr, "$vs3, (${rs1}), $rs2$vm">; + +// segment store vd, vs3, (rs1), vs2, vm +class VIndexedSegmentStore<bits<3> nf, RISCVMOP mop, RISCVWidth width, + string opcodestr> + : RVInstVSX<nf, width.Value{3}, mop, width.Value{2-0}, (outs), + (ins VR:$vs3, GPR:$rs1, VR:$vs2, VMaskOp:$vm), + opcodestr, "$vs3, (${rs1}), $vs2$vm">; } // hasSideEffects = 0, mayLoad = 0, mayStore = 1 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { // op vd, vs2, vs1, vm class VALUVV<bits<6> funct6, RISCVVFormat opv, string opcodestr> - : RVInstVV<funct6, opv, (outs VRegOp:$vd), - (ins VRegOp:$vs2, VRegOp:$vs1, VMaskOp:$vm), + : RVInstVV<funct6, opv, (outs VR:$vd), + (ins VR:$vs2, VR:$vs1, VMaskOp:$vm), opcodestr, "$vd, $vs2, $vs1$vm">; // op vd, vs2, vs1, v0 (without mask, use v0 as carry input) class VALUmVV<bits<6> funct6, RISCVVFormat opv, string opcodestr> - : RVInstVV<funct6, opv, (outs VRegOp:$vd), - (ins VRegOp:$vs2, VRegOp:$vs1, VMV0:$v0), + : RVInstVV<funct6, opv, (outs VR:$vd), + (ins VR:$vs2, VR:$vs1, VMV0:$v0), opcodestr, "$vd, $vs2, $vs1, v0"> { let vm = 0; } // op vd, vs1, vs2, vm (reverse the order of vs1 and vs2) class VALUrVV<bits<6> funct6, RISCVVFormat opv, string opcodestr> - : RVInstVV<funct6, opv, (outs VRegOp:$vd), - (ins VRegOp:$vs1, VRegOp:$vs2, VMaskOp:$vm), + : RVInstVV<funct6, opv, (outs VR:$vd), + (ins VR:$vs1, VR:$vs2, VMaskOp:$vm), opcodestr, "$vd, $vs1, $vs2$vm">; -// op vd, vs1, vs2 +// op vd, vs2, vs1 class VALUVVNoVm<bits<6> funct6, RISCVVFormat opv, string opcodestr> - : RVInstVV<funct6, opv, (outs VRegOp:$vd), - (ins VRegOp:$vs2, VRegOp:$vs1), + : RVInstVV<funct6, opv, (outs VR:$vd), + (ins VR:$vs2, VR:$vs1), opcodestr, "$vd, $vs2, $vs1"> { let vm = 1; } // op vd, vs2, rs1, vm class VALUVX<bits<6> funct6, RISCVVFormat opv, string opcodestr> - : RVInstVX<funct6, opv, (outs VRegOp:$vd), - (ins VRegOp:$vs2, GPR:$rs1, VMaskOp:$vm), + : RVInstVX<funct6, opv, (outs VR:$vd), + (ins VR:$vs2, GPR:$rs1, VMaskOp:$vm), opcodestr, "$vd, $vs2, $rs1$vm">; // op vd, vs2, rs1, v0 (without mask, use v0 as carry input) class VALUmVX<bits<6> funct6, RISCVVFormat opv, string opcodestr> - : RVInstVX<funct6, opv, (outs VRegOp:$vd), - (ins VRegOp:$vs2, GPR:$rs1, VMV0:$v0), + : RVInstVX<funct6, opv, (outs VR:$vd), + (ins VR:$vs2, GPR:$rs1, VMV0:$v0), opcodestr, "$vd, $vs2, $rs1, v0"> { let vm = 0; } // op vd, rs1, vs2, vm (reverse the order of rs1 and vs2) class VALUrVX<bits<6> funct6, RISCVVFormat opv, string opcodestr> - : RVInstVX<funct6, opv, (outs VRegOp:$vd), - (ins GPR:$rs1, VRegOp:$vs2, VMaskOp:$vm), + : RVInstVX<funct6, opv, (outs VR:$vd), + (ins GPR:$rs1, VR:$vs2, VMaskOp:$vm), opcodestr, "$vd, $rs1, $vs2$vm">; // op vd, vs1, vs2 class VALUVXNoVm<bits<6> funct6, RISCVVFormat opv, string opcodestr> - : RVInstVX<funct6, opv, (outs VRegOp:$vd), - (ins VRegOp:$vs2, GPR:$rs1), + : RVInstVX<funct6, opv, (outs VR:$vd), + (ins VR:$vs2, GPR:$rs1), opcodestr, "$vd, $vs2, $rs1"> { let vm = 1; } // op vd, vs2, imm, vm class VALUVI<bits<6> funct6, string opcodestr, Operand optype = simm5> - : RVInstIVI<funct6, (outs VRegOp:$vd), - (ins VRegOp:$vs2, optype:$imm, VMaskOp:$vm), + : RVInstIVI<funct6, (outs VR:$vd), + (ins VR:$vs2, optype:$imm, VMaskOp:$vm), opcodestr, "$vd, $vs2, $imm$vm">; // op vd, vs2, imm, v0 (without mask, use v0 as carry input) class VALUmVI<bits<6> funct6, string opcodestr, Operand optype = simm5> - : RVInstIVI<funct6, (outs VRegOp:$vd), - (ins VRegOp:$vs2, optype:$imm, VMV0:$v0), + : RVInstIVI<funct6, (outs VR:$vd), + (ins VR:$vs2, optype:$imm, VMV0:$v0), opcodestr, "$vd, $vs2, $imm, v0"> { let vm = 0; } // op vd, vs2, imm, vm class VALUVINoVm<bits<6> funct6, string opcodestr, Operand optype = simm5> - : RVInstIVI<funct6, (outs VRegOp:$vd), - (ins VRegOp:$vs2, optype:$imm), + : RVInstIVI<funct6, (outs VR:$vd), + (ins VR:$vs2, optype:$imm), opcodestr, "$vd, $vs2, $imm"> { let vm = 1; } // op vd, vs2, rs1, vm (Float) class VALUVF<bits<6> funct6, RISCVVFormat opv, string opcodestr> - : RVInstVX<funct6, opv, (outs VRegOp:$vd), - (ins VRegOp:$vs2, FPR32:$rs1, VMaskOp:$vm), + : RVInstVX<funct6, opv, (outs VR:$vd), + (ins VR:$vs2, FPR32:$rs1, VMaskOp:$vm), opcodestr, "$vd, $vs2, $rs1$vm">; // op vd, rs1, vs2, vm (Float) (with mask, reverse the order of rs1 and vs2) class VALUrVF<bits<6> funct6, RISCVVFormat opv, string opcodestr> - : RVInstVX<funct6, opv, (outs VRegOp:$vd), - (ins FPR32:$rs1, VRegOp:$vs2, VMaskOp:$vm), + : RVInstVX<funct6, opv, (outs VR:$vd), + (ins FPR32:$rs1, VR:$vs2, VMaskOp:$vm), opcodestr, "$vd, $rs1, $vs2$vm">; // op vd, vs2, vm (use vs1 as instruction encoding) class VALUVs2<bits<6> funct6, bits<5> vs1, RISCVVFormat opv, string opcodestr> - : RVInstV<funct6, vs1, opv, (outs VRegOp:$vd), - (ins VRegOp:$vs2, VMaskOp:$vm), + : RVInstV<funct6, vs1, opv, (outs VR:$vd), + (ins VR:$vs2, VMaskOp:$vm), opcodestr, "$vd, $vs2$vm">; } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 +let hasSideEffects = 0, mayLoad = 1, mayStore = 1 in { +// vamo vd, (rs1), vs2, vd, vm +class VAMOWd<RISCVAMOOP amoop, RISCVWidth width, string opcodestr> + : RVInstVAMO<amoop, width.Value{2-0}, (outs VR:$vd_wd), + (ins GPR:$rs1, VR:$vs2, VR:$vd, VMaskOp:$vm), + opcodestr, "$vd_wd, (${rs1}), $vs2, $vd$vm"> { + let Constraints = "$vd_wd = $vd"; + let wd = 1; + bits<5> vd; + let Inst{11-7} = vd; +} + +// vamo x0, (rs1), vs2, vs3, vm +class VAMONoWd<RISCVAMOOP amoop, RISCVWidth width, string opcodestr> + : RVInstVAMO<amoop, width.Value{2-0}, (outs), + (ins GPR:$rs1, VR:$vs2, VR:$vs3, VMaskOp:$vm), + opcodestr, "x0, (${rs1}), $vs2, $vs3$vm"> { + bits<5> vs3; + let Inst{11-7} = vs3; +} + +} // hasSideEffects = 0, mayLoad = 1, mayStore = 1 + //===----------------------------------------------------------------------===// // Combination of instruction classes. // Use these multiclasses to define instructions more easily. @@ -358,6 +425,18 @@ multiclass VALU_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>; } +multiclass VAMO<RISCVAMOOP amoop, RISCVWidth width, string opcodestr> { + def _WD : VAMOWd<amoop, width, opcodestr>; + def _UNWD : VAMONoWd<amoop, width, opcodestr>; +} + +multiclass VWholeLoad<bits<3> nf, string opcodestr> { + def E8_V : VWholeLoad<nf, LSWidth8, opcodestr # "e8.v">; + def E16_V : VWholeLoad<nf, LSWidth16, opcodestr # "e16.v">; + def E32_V : VWholeLoad<nf, LSWidth32, opcodestr # "e32.v">; + def E64_V : VWholeLoad<nf, LSWidth64, opcodestr # "e64.v">; +} + //===----------------------------------------------------------------------===// // Instructions //===----------------------------------------------------------------------===// @@ -367,82 +446,85 @@ let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in { def VSETVLI : RVInstSetVLi<(outs GPR:$rd), (ins GPR:$rs1, VTypeIOp:$vtypei), "vsetvli", "$rd, $rs1, $vtypei">; +def VSETIVLI : RVInstSetiVLi<(outs GPR:$rd), (ins uimm5:$uimm, VTypeIOp:$vtypei), + "vsetivli", "$rd, $uimm, $vtypei">; + def VSETVL : RVInstSetVL<(outs GPR:$rd), (ins GPR:$rs1, GPR:$rs2), "vsetvl", "$rd, $rs1, $rs2">; } // hasSideEffects = 1, mayLoad = 0, mayStore = 0 // Vector Unit-Stride Instructions -def VLB_V : VUnitStrideLoad<MOPLDUnitStrideS, LUMOPUnitStride, LSWidthVByte, "vlb.v">; -def VLH_V : VUnitStrideLoad<MOPLDUnitStrideS, LUMOPUnitStride, LSWidthVHalf, "vlh.v">; -def VLW_V : VUnitStrideLoad<MOPLDUnitStrideS, LUMOPUnitStride, LSWidthVWord, "vlw.v">; - -def VLBU_V : VUnitStrideLoad<MOPLDUnitStrideU, LUMOPUnitStride, LSWidthVByte, "vlbu.v">; -def VLHU_V : VUnitStrideLoad<MOPLDUnitStrideU, LUMOPUnitStride, LSWidthVHalf, "vlhu.v">; -def VLWU_V : VUnitStrideLoad<MOPLDUnitStrideU, LUMOPUnitStride, LSWidthVWord, "vlwu.v">; - -def VLE_V : VUnitStrideLoad<MOPLDUnitStrideU, LUMOPUnitStride, LSWidthVSEW, "vle.v">; - -def VLBFF_V : VUnitStrideLoad<MOPLDUnitStrideS, LUMOPUnitStrideFF, LSWidthVByte, "vlbff.v">; -def VLHFF_V : VUnitStrideLoad<MOPLDUnitStrideS, LUMOPUnitStrideFF, LSWidthVHalf, "vlhff.v">; -def VLWFF_V : VUnitStrideLoad<MOPLDUnitStrideS, LUMOPUnitStrideFF, LSWidthVWord, "vlwff.v">; - -def VLBUFF_V : VUnitStrideLoad<MOPLDUnitStrideU, LUMOPUnitStrideFF, LSWidthVByte, "vlbuff.v">; -def VLHUFF_V : VUnitStrideLoad<MOPLDUnitStrideU, LUMOPUnitStrideFF, LSWidthVHalf, "vlhuff.v">; -def VLWUFF_V : VUnitStrideLoad<MOPLDUnitStrideU, LUMOPUnitStrideFF, LSWidthVWord, "vlwuff.v">; +def VLE8_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth8, "vle8.v">; +def VLE16_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth16, "vle16.v">; +def VLE32_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth32, "vle32.v">; +def VLE64_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth64, "vle64.v">; -def VLEFF_V : VUnitStrideLoad<MOPLDUnitStrideU, LUMOPUnitStrideFF, LSWidthVSEW, "vleff.v">; +def VLE8FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth8, "vle8ff.v">; +def VLE16FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth16, "vle16ff.v">; +def VLE32FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth32, "vle32ff.v">; +def VLE64FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth64, "vle64ff.v">; -def VSB_V : VUnitStrideStore<MOPSTUnitStride, SUMOPUnitStride, LSWidthVByte, "vsb.v">; -def VSH_V : VUnitStrideStore<MOPSTUnitStride, SUMOPUnitStride, LSWidthVHalf, "vsh.v">; -def VSW_V : VUnitStrideStore<MOPSTUnitStride, SUMOPUnitStride, LSWidthVWord, "vsw.v">; +def VLE1_V : VUnitStrideLoadMask<"vle1.v">; +def VSE1_V : VUnitStrideStoreMask<"vse1.v">; -def VSE_V : VUnitStrideStore<MOPSTUnitStride, SUMOPUnitStride, LSWidthVSEW, "vse.v">; +def VSE8_V : VUnitStrideStore<SUMOPUnitStride, LSWidth8, "vse8.v">; +def VSE16_V : VUnitStrideStore<SUMOPUnitStride, LSWidth16, "vse16.v">; +def VSE32_V : VUnitStrideStore<SUMOPUnitStride, LSWidth32, "vse32.v">; +def VSE64_V : VUnitStrideStore<SUMOPUnitStride, LSWidth64, "vse64.v">; // Vector Strided Instructions -def VLSB_V : VStridedLoad<MOPLDStridedS, LSWidthVByte, "vlsb.v">; -def VLSH_V : VStridedLoad<MOPLDStridedS, LSWidthVHalf, "vlsh.v">; -def VLSW_V : VStridedLoad<MOPLDStridedS, LSWidthVWord, "vlsw.v">; +def VLSE8_V : VStridedLoad<LSWidth8, "vlse8.v">; +def VLSE16_V : VStridedLoad<LSWidth16, "vlse16.v">; +def VLSE32_V : VStridedLoad<LSWidth32, "vlse32.v">; +def VLSE64_V : VStridedLoad<LSWidth64, "vlse64.v">; -def VLSBU_V : VStridedLoad<MOPLDStridedU, LSWidthVByte, "vlsbu.v">; -def VLSHU_V : VStridedLoad<MOPLDStridedU, LSWidthVHalf, "vlshu.v">; -def VLSWU_V : VStridedLoad<MOPLDStridedU, LSWidthVWord, "vlswu.v">; - -def VLSE_V : VStridedLoad<MOPLDStridedU, LSWidthVSEW, "vlse.v">; - -def VSSB_V : VStridedStore<MOPSTStrided, LSWidthVByte, "vssb.v">; -def VSSH_V : VStridedStore<MOPSTStrided, LSWidthVHalf, "vssh.v">; -def VSSW_V : VStridedStore<MOPSTStrided, LSWidthVWord, "vssw.v">; -def VSSE_V : VStridedStore<MOPSTStrided, LSWidthVSEW, "vsse.v">; +def VSSE8_V : VStridedStore<LSWidth8, "vsse8.v">; +def VSSE16_V : VStridedStore<LSWidth16, "vsse16.v">; +def VSSE32_V : VStridedStore<LSWidth32, "vsse32.v">; +def VSSE64_V : VStridedStore<LSWidth64, "vsse64.v">; // Vector Indexed Instructions -def VLXB_V : VIndexedLoad<MOPLDIndexedS, LSWidthVByte, "vlxb.v">; -def VLXH_V : VIndexedLoad<MOPLDIndexedS, LSWidthVHalf, "vlxh.v">; -def VLXW_V : VIndexedLoad<MOPLDIndexedS, LSWidthVWord, "vlxw.v">; - -def VLXBU_V : VIndexedLoad<MOPLDIndexedU, LSWidthVByte, "vlxbu.v">; -def VLXHU_V : VIndexedLoad<MOPLDIndexedU, LSWidthVHalf, "vlxhu.v">; -def VLXWU_V : VIndexedLoad<MOPLDIndexedU, LSWidthVWord, "vlxwu.v">; - -def VLXE_V : VIndexedLoad<MOPLDIndexedU, LSWidthVSEW, "vlxe.v">; - -def VSXB_V : VIndexedStore<MOPSTIndexedOrder, LSWidthVByte, "vsxb.v">; -def VSXH_V : VIndexedStore<MOPSTIndexedOrder, LSWidthVHalf, "vsxh.v">; -def VSXW_V : VIndexedStore<MOPSTIndexedOrder, LSWidthVWord, "vsxw.v">; -def VSXE_V : VIndexedStore<MOPSTIndexedOrder, LSWidthVSEW, "vsxe.v">; +def VLUXEI8_V : VIndexedLoad<MOPLDIndexedUnord, LSWidth8, "vluxei8.v">; +def VLUXEI16_V : VIndexedLoad<MOPLDIndexedUnord, LSWidth16, "vluxei16.v">; +def VLUXEI32_V : VIndexedLoad<MOPLDIndexedUnord, LSWidth32, "vluxei32.v">; +def VLUXEI64_V : VIndexedLoad<MOPLDIndexedUnord, LSWidth64, "vluxei64.v">; + +def VLOXEI8_V : VIndexedLoad<MOPLDIndexedOrder, LSWidth8, "vloxei8.v">; +def VLOXEI16_V : VIndexedLoad<MOPLDIndexedOrder, LSWidth16, "vloxei16.v">; +def VLOXEI32_V : VIndexedLoad<MOPLDIndexedOrder, LSWidth32, "vloxei32.v">; +def VLOXEI64_V : VIndexedLoad<MOPLDIndexedOrder, LSWidth64, "vloxei64.v">; + +def VSUXEI8_V : VIndexedStore<MOPSTIndexedUnord, LSWidth8, "vsuxei8.v">; +def VSUXEI16_V : VIndexedStore<MOPSTIndexedUnord, LSWidth16, "vsuxei16.v">; +def VSUXEI32_V : VIndexedStore<MOPSTIndexedUnord, LSWidth32, "vsuxei32.v">; +def VSUXEI64_V : VIndexedStore<MOPSTIndexedUnord, LSWidth64, "vsuxei64.v">; + +def VSOXEI8_V : VIndexedStore<MOPSTIndexedOrder, LSWidth8, "vsoxei8.v">; +def VSOXEI16_V : VIndexedStore<MOPSTIndexedOrder, LSWidth16, "vsoxei16.v">; +def VSOXEI32_V : VIndexedStore<MOPSTIndexedOrder, LSWidth32, "vsoxei32.v">; +def VSOXEI64_V : VIndexedStore<MOPSTIndexedOrder, LSWidth64, "vsoxei64.v">; + +defm VL1R : VWholeLoad<0, "vl1r">; +defm VL2R : VWholeLoad<1, "vl2r">; +defm VL4R : VWholeLoad<3, "vl4r">; +defm VL8R : VWholeLoad<7, "vl8r">; +def : InstAlias<"vl1r.v $vd, (${rs1})", (VL1RE8_V VR:$vd, GPR:$rs1)>; +def : InstAlias<"vl2r.v $vd, (${rs1})", (VL2RE8_V VR:$vd, GPR:$rs1)>; +def : InstAlias<"vl4r.v $vd, (${rs1})", (VL4RE8_V VR:$vd, GPR:$rs1)>; +def : InstAlias<"vl8r.v $vd, (${rs1})", (VL8RE8_V VR:$vd, GPR:$rs1)>; -def VSUXB_V : VIndexedStore<MOPSTIndexedUnOrd, LSWidthVByte, "vsuxb.v">; -def VSUXH_V : VIndexedStore<MOPSTIndexedUnOrd, LSWidthVHalf, "vsuxh.v">; -def VSUXW_V : VIndexedStore<MOPSTIndexedUnOrd, LSWidthVWord, "vsuxw.v">; -def VSUXE_V : VIndexedStore<MOPSTIndexedUnOrd, LSWidthVSEW, "vsuxe.v">; - -def VL1R_V : VWholeLoad<0, "vl1r.v">; def VS1R_V : VWholeStore<0, "vs1r.v">; +def VS2R_V : VWholeStore<1, "vs2r.v">; +def VS4R_V : VWholeStore<3, "vs4r.v">; +def VS8R_V : VWholeStore<7, "vs8r.v">; // Vector Single-Width Integer Add and Subtract defm VADD_V : VALU_IV_V_X_I<"vadd", 0b000000>; defm VSUB_V : VALU_IV_V_X<"vsub", 0b000010>; defm VRSUB_V : VALU_IV_X_I<"vrsub", 0b000011>; +def : InstAlias<"vneg.v $vd, $vs$vm", (VRSUB_VX VR:$vd, VR:$vs, X0, VMaskOp:$vm)>; + // Vector Widening Integer Add/Subtract // Refer to 11.2 Widening Vector Arithmetic Instructions // The destination vector register group cannot overlap a source vector @@ -468,17 +550,29 @@ defm VWSUB_W : VALU_MV_V_X<"vwsub", 0b110111, "w">; } // Constraints = "@earlyclobber $vd" def : InstAlias<"vwcvt.x.x.v $vd, $vs$vm", - (VWADD_VX VRegOp:$vd, VRegOp:$vs, X0, VMaskOp:$vm)>; + (VWADD_VX VR:$vd, VR:$vs, X0, VMaskOp:$vm)>; def : InstAlias<"vwcvtu.x.x.v $vd, $vs$vm", - (VWADDU_VX VRegOp:$vd, VRegOp:$vs, X0, VMaskOp:$vm)>; + (VWADDU_VX VR:$vd, VR:$vs, X0, VMaskOp:$vm)>; + +// Vector Integer Extension +defm VZEXT_VF8 : VALU_MV_VS2<"vzext.vf8", 0b010010, 0b00010>; +defm VSEXT_VF8 : VALU_MV_VS2<"vsext.vf8", 0b010010, 0b00011>; +defm VZEXT_VF4 : VALU_MV_VS2<"vzext.vf4", 0b010010, 0b00100>; +defm VSEXT_VF4 : VALU_MV_VS2<"vsext.vf4", 0b010010, 0b00101>; +defm VZEXT_VF2 : VALU_MV_VS2<"vzext.vf2", 0b010010, 0b00110>; +defm VSEXT_VF2 : VALU_MV_VS2<"vsext.vf2", 0b010010, 0b00111>; // Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions defm VADC_V : VALUm_IV_V_X_I<"vadc", 0b010000>; +let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint in { defm VMADC_V : VALUm_IV_V_X_I<"vmadc", 0b010001>; defm VMADC_V : VALUNoVm_IV_V_X_I<"vmadc", 0b010001>; +} // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint defm VSBC_V : VALUm_IV_V_X<"vsbc", 0b010010>; +let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint in { defm VMSBC_V : VALUm_IV_V_X<"vmsbc", 0b010011>; defm VMSBC_V : VALUNoVm_IV_V_X<"vmsbc", 0b010011>; +} // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint // Vector Bitwise Logical Instructions defm VAND_V : VALU_IV_V_X_I<"vand", 0b001001>; @@ -486,7 +580,7 @@ defm VOR_V : VALU_IV_V_X_I<"vor", 0b001010>; defm VXOR_V : VALU_IV_V_X_I<"vxor", 0b001011>; def : InstAlias<"vnot.v $vd, $vs$vm", - (VXOR_VI VRegOp:$vd, VRegOp:$vs, -1, VMaskOp:$vm)>; + (VXOR_VI VR:$vd, VR:$vs, -1, VMaskOp:$vm)>; // Vector Single-Width Bit Shift Instructions defm VSLL_V : VALU_IV_V_X_I<"vsll", 0b100101, uimm5>; @@ -498,12 +592,16 @@ defm VSRA_V : VALU_IV_V_X_I<"vsra", 0b101001, uimm5>; // The destination vector register group cannot overlap the first source // vector register group (specified by vs2). The destination vector register // group cannot overlap the mask register if used, unless LMUL=1. -let Constraints = "@earlyclobber $vd", RVVConstraint = Narrow in { +let Constraints = "@earlyclobber $vd" in { defm VNSRL_W : VALU_IV_V_X_I<"vnsrl", 0b101100, uimm5, "w">; defm VNSRA_W : VALU_IV_V_X_I<"vnsra", 0b101101, uimm5, "w">; -} // Constraints = "@earlyclobber $vd", RVVConstraint = Narrow +} // Constraints = "@earlyclobber $vd" + +def : InstAlias<"vncvt.x.x.w $vd, $vs$vm", + (VNSRL_WX VR:$vd, VR:$vs, X0, VMaskOp:$vm)>; // Vector Integer Comparison Instructions +let RVVConstraint = NoConstraint in { defm VMSEQ_V : VALU_IV_V_X_I<"vmseq", 0b011000>; defm VMSNE_V : VALU_IV_V_X_I<"vmsne", 0b011001>; defm VMSLTU_V : VALU_IV_V_X<"vmsltu", 0b011010>; @@ -512,27 +610,61 @@ defm VMSLEU_V : VALU_IV_V_X_I<"vmsleu", 0b011100>; defm VMSLE_V : VALU_IV_V_X_I<"vmsle", 0b011101>; defm VMSGTU_V : VALU_IV_X_I<"vmsgtu", 0b011110>; defm VMSGT_V : VALU_IV_X_I<"vmsgt", 0b011111>; +} // RVVConstraint = NoConstraint def : InstAlias<"vmsgtu.vv $vd, $va, $vb$vm", - (VMSLTU_VV VRegOp:$vd, VRegOp:$vb, VRegOp:$va, VMaskOp:$vm), 0>; + (VMSLTU_VV VR:$vd, VR:$vb, VR:$va, VMaskOp:$vm), 0>; def : InstAlias<"vmsgt.vv $vd, $va, $vb$vm", - (VMSLT_VV VRegOp:$vd, VRegOp:$vb, VRegOp:$va, VMaskOp:$vm), 0>; + (VMSLT_VV VR:$vd, VR:$vb, VR:$va, VMaskOp:$vm), 0>; def : InstAlias<"vmsgeu.vv $vd, $va, $vb$vm", - (VMSLEU_VV VRegOp:$vd, VRegOp:$vb, VRegOp:$va, VMaskOp:$vm), 0>; + (VMSLEU_VV VR:$vd, VR:$vb, VR:$va, VMaskOp:$vm), 0>; def : InstAlias<"vmsge.vv $vd, $va, $vb$vm", - (VMSLE_VV VRegOp:$vd, VRegOp:$vb, VRegOp:$va, VMaskOp:$vm), 0>; -def : InstAlias<"vmsltu.vi $vd, $va, $imm$vm", - (VMSLEU_VI VRegOp:$vd, VRegOp:$va, simm5_plus1:$imm, - VMaskOp:$vm), 0>; -def : InstAlias<"vmslt.vi $vd, $va, $imm$vm", - (VMSLE_VI VRegOp:$vd, VRegOp:$va, simm5_plus1:$imm, - VMaskOp:$vm), 0>; -def : InstAlias<"vmsgeu.vi $vd, $va, $imm$vm", - (VMSGTU_VI VRegOp:$vd, VRegOp:$va, simm5_plus1:$imm, - VMaskOp:$vm), 0>; -def : InstAlias<"vmsge.vi $vd, $va, $imm$vm", - (VMSGT_VI VRegOp:$vd, VRegOp:$va, simm5_plus1:$imm, - VMaskOp:$vm), 0>; + (VMSLE_VV VR:$vd, VR:$vb, VR:$va, VMaskOp:$vm), 0>; + +let isCodeGenOnly = 0, isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 0, + mayStore = 0 in { +// For unsigned comparisons we need to special case 0 immediate to maintain +// the always true/false semantics we would invert if we just decremented the +// immediate like we do for signed. To match the GNU assembler we will use +// vmseq/vmsne.vv with the same register for both operands which we can't do +// from an InstAlias. +def PseudoVMSGEU_VI : Pseudo<(outs VR:$vd), + (ins VR:$vs2, simm5_plus1:$imm, VMaskOp:$vm), + [], "vmsgeu.vi", "$vd, $vs2, $imm$vm">; +def PseudoVMSLTU_VI : Pseudo<(outs VR:$vd), + (ins VR:$vs2, simm5_plus1:$imm, VMaskOp:$vm), + [], "vmsltu.vi", "$vd, $vs2, $imm$vm">; +// Handle signed with pseudos as well for more consistency in the +// implementation. +def PseudoVMSGE_VI : Pseudo<(outs VR:$vd), + (ins VR:$vs2, simm5_plus1:$imm, VMaskOp:$vm), + [], "vmsge.vi", "$vd, $vs2, $imm$vm">; +def PseudoVMSLT_VI : Pseudo<(outs VR:$vd), + (ins VR:$vs2, simm5_plus1:$imm, VMaskOp:$vm), + [], "vmslt.vi", "$vd, $vs2, $imm$vm">; +} + +let isCodeGenOnly = 0, isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 0, + mayStore = 0 in { +def PseudoVMSGEU_VX : Pseudo<(outs VR:$vd), + (ins VR:$vs2, GPR:$rs1), + [], "vmsgeu.vx", "$vd, $vs2, $rs1">; +def PseudoVMSGE_VX : Pseudo<(outs VR:$vd), + (ins VR:$vs2, GPR:$rs1), + [], "vmsge.vx", "$vd, $vs2, $rs1">; +def PseudoVMSGEU_VX_M : Pseudo<(outs VRNoV0:$vd), + (ins VR:$vs2, GPR:$rs1, VMaskOp:$vm), + [], "vmsgeu.vx", "$vd, $vs2, $rs1$vm">; +def PseudoVMSGE_VX_M : Pseudo<(outs VRNoV0:$vd), + (ins VR:$vs2, GPR:$rs1, VMaskOp:$vm), + [], "vmsge.vx", "$vd, $vs2, $rs1$vm">; +def PseudoVMSGEU_VX_M_T : Pseudo<(outs VMV0:$vd, VR:$scratch), + (ins VR:$vs2, GPR:$rs1, VMaskOp:$vm), + [], "vmsgeu.vx", "$vd, $vs2, $rs1$vm, $scratch">; +def PseudoVMSGE_VX_M_T : Pseudo<(outs VMV0:$vd, VR:$scratch), + (ins VR:$vs2, GPR:$rs1, VMaskOp:$vm), + [], "vmsge.vx", "$vd, $vs2, $rs1$vm, $scratch">; +} // Vector Integer Min/Max Instructions defm VMINU_V : VALU_IV_V_X<"vminu", 0b000100>; @@ -577,15 +709,16 @@ defm VWMACCUS_V : VALUr_MV_X<"vwmaccus", 0b111110>; defm VMERGE_V : VALUm_IV_V_X_I<"vmerge", 0b010111>; // Vector Integer Move Instructions -let hasSideEffects = 0, mayLoad = 0, mayStore = 0, vs2 = 0, vm = 1 in { +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, vs2 = 0, vm = 1, + RVVConstraint = NoConstraint in { // op vd, vs1 -def VMV_V_V : RVInstVV<0b010111, OPIVV, (outs VRegOp:$vd), - (ins VRegOp:$vs1), "vmv.v.v", "$vd, $vs1">; +def VMV_V_V : RVInstVV<0b010111, OPIVV, (outs VR:$vd), + (ins VR:$vs1), "vmv.v.v", "$vd, $vs1">; // op vd, rs1 -def VMV_V_X : RVInstVX<0b010111, OPIVX, (outs VRegOp:$vd), +def VMV_V_X : RVInstVX<0b010111, OPIVX, (outs VR:$vd), (ins GPR:$rs1), "vmv.v.x", "$vd, $rs1">; // op vd, imm -def VMV_V_I : RVInstIVI<0b010111, (outs VRegOp:$vd), +def VMV_V_I : RVInstIVI<0b010111, (outs VR:$vd), (ins simm5:$imm), "vmv.v.i", "$vd, $imm">; } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 @@ -609,11 +742,13 @@ defm VSSRL_V : VALU_IV_V_X_I<"vssrl", 0b101010, uimm5>; defm VSSRA_V : VALU_IV_V_X_I<"vssra", 0b101011, uimm5>; // Vector Narrowing Fixed-Point Clip Instructions -let Constraints = "@earlyclobber $vd", RVVConstraint = Narrow in { +let Constraints = "@earlyclobber $vd" in { defm VNCLIPU_W : VALU_IV_V_X_I<"vnclipu", 0b101110, uimm5, "w">; defm VNCLIP_W : VALU_IV_V_X_I<"vnclip", 0b101111, uimm5, "w">; -} // Constraints = "@earlyclobber $vd", RVVConstraint = Narrow +} // Constraints = "@earlyclobber $vd" +} // Predicates = [HasStdExtV] +let Predicates = [HasStdExtV, HasStdExtF] in { // Vector Single-Width Floating-Point Add/Subtract Instructions defm VFADD_V : VALU_FV_V_F<"vfadd", 0b000000>; defm VFSUB_V : VALU_FV_V_F<"vfsub", 0b000010>; @@ -664,7 +799,9 @@ defm VFWNMSAC_V : VALUr_FV_V_F<"vfwnmsac", 0b111111>; } // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV // Vector Floating-Point Square-Root Instruction -defm VFSQRT_V : VALU_FV_VS2<"vfsqrt.v", 0b100011, 0b00000>; +defm VFSQRT_V : VALU_FV_VS2<"vfsqrt.v", 0b010011, 0b00000>; +defm VFRSQRT7_V : VALU_FV_VS2<"vfrsqrt7.v", 0b010011, 0b00100>; +defm VFREC7_V : VALU_FV_VS2<"vfrec7.v", 0b010011, 0b00101>; // Vector Floating-Point MIN/MAX Instructions defm VFMIN_V : VALU_FV_V_F<"vfmin", 0b000100>; @@ -675,32 +812,38 @@ defm VFSGNJ_V : VALU_FV_V_F<"vfsgnj", 0b001000>; defm VFSGNJN_V : VALU_FV_V_F<"vfsgnjn", 0b001001>; defm VFSGNJX_V : VALU_FV_V_F<"vfsgnjx", 0b001010>; +def : InstAlias<"vfneg.v $vd, $vs$vm", + (VFSGNJN_VV VR:$vd, VR:$vs, VR:$vs, VMaskOp:$vm)>; + // Vector Floating-Point Compare Instructions +let RVVConstraint = NoConstraint in { defm VMFEQ_V : VALU_FV_V_F<"vmfeq", 0b011000>; defm VMFNE_V : VALU_FV_V_F<"vmfne", 0b011100>; defm VMFLT_V : VALU_FV_V_F<"vmflt", 0b011011>; defm VMFLE_V : VALU_FV_V_F<"vmfle", 0b011001>; defm VMFGT_V : VALU_FV_F<"vmfgt", 0b011101>; defm VMFGE_V : VALU_FV_F<"vmfge", 0b011111>; +} // RVVConstraint = NoConstraint def : InstAlias<"vmfgt.vv $vd, $va, $vb$vm", - (VMFLT_VV VRegOp:$vd, VRegOp:$vb, VRegOp:$va, VMaskOp:$vm), 0>; + (VMFLT_VV VR:$vd, VR:$vb, VR:$va, VMaskOp:$vm), 0>; def : InstAlias<"vmfge.vv $vd, $va, $vb$vm", - (VMFLE_VV VRegOp:$vd, VRegOp:$vb, VRegOp:$va, VMaskOp:$vm), 0>; + (VMFLE_VV VR:$vd, VR:$vb, VR:$va, VMaskOp:$vm), 0>; // Vector Floating-Point Classify Instruction -defm VFCLASS_V : VALU_FV_VS2<"vfclass.v", 0b100011, 0b10000>; +defm VFCLASS_V : VALU_FV_VS2<"vfclass.v", 0b010011, 0b10000>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { // Vector Floating-Point Merge Instruction -def VFMERGE_VFM : RVInstVX<0b010111, OPFVF, (outs VRegOp:$vd), - (ins VRegOp:$vs2, FPR32:$rs1, VMV0:$v0), +def VFMERGE_VFM : RVInstVX<0b010111, OPFVF, (outs VR:$vd), + (ins VR:$vs2, FPR32:$rs1, VMV0:$v0), "vfmerge.vfm", "$vd, $vs2, $rs1, v0"> { let vm = 0; } // Vector Floating-Point Move Instruction -def VFMV_V_F : RVInstVX<0b010111, OPFVF, (outs VRegOp:$vd), +let RVVConstraint = NoConstraint in +def VFMV_V_F : RVInstVX<0b010111, OPFVF, (outs VR:$vd), (ins FPR32:$rs1), "vfmv.v.f", "$vd, $rs1"> { let vs2 = 0; let vm = 1; @@ -708,31 +851,40 @@ def VFMV_V_F : RVInstVX<0b010111, OPFVF, (outs VRegOp:$vd), } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 // Single-Width Floating-Point/Integer Type-Convert Instructions -defm VFCVT_XU_F_V : VALU_FV_VS2<"vfcvt.xu.f.v", 0b100010, 0b00000>; -defm VFCVT_X_F_V : VALU_FV_VS2<"vfcvt.x.f.v", 0b100010, 0b00001>; -defm VFCVT_F_XU_V : VALU_FV_VS2<"vfcvt.f.xu.v", 0b100010, 0b00010>; -defm VFCVT_F_X_V : VALU_FV_VS2<"vfcvt.f.x.v", 0b100010, 0b00011>; +defm VFCVT_XU_F_V : VALU_FV_VS2<"vfcvt.xu.f.v", 0b010010, 0b00000>; +defm VFCVT_X_F_V : VALU_FV_VS2<"vfcvt.x.f.v", 0b010010, 0b00001>; +defm VFCVT_RTZ_XU_F_V : VALU_FV_VS2<"vfcvt.rtz.xu.f.v", 0b010010, 0b00110>; +defm VFCVT_RTZ_X_F_V : VALU_FV_VS2<"vfcvt.rtz.x.f.v", 0b010010, 0b00111>; +defm VFCVT_F_XU_V : VALU_FV_VS2<"vfcvt.f.xu.v", 0b010010, 0b00010>; +defm VFCVT_F_X_V : VALU_FV_VS2<"vfcvt.f.x.v", 0b010010, 0b00011>; // Widening Floating-Point/Integer Type-Convert Instructions let Constraints = "@earlyclobber $vd", RVVConstraint = WidenCvt in { -defm VFWCVT_XU_F_V : VALU_FV_VS2<"vfwcvt.xu.f.v", 0b100010, 0b01000>; -defm VFWCVT_X_F_V : VALU_FV_VS2<"vfwcvt.x.f.v", 0b100010, 0b01001>; -defm VFWCVT_F_XU_V : VALU_FV_VS2<"vfwcvt.f.xu.v", 0b100010, 0b01010>; -defm VFWCVT_F_X_V : VALU_FV_VS2<"vfwcvt.f.x.v", 0b100010, 0b01011>; -defm VFWCVT_F_F_V : VALU_FV_VS2<"vfwcvt.f.f.v", 0b100010, 0b01100>; +defm VFWCVT_XU_F_V : VALU_FV_VS2<"vfwcvt.xu.f.v", 0b010010, 0b01000>; +defm VFWCVT_X_F_V : VALU_FV_VS2<"vfwcvt.x.f.v", 0b010010, 0b01001>; +defm VFWCVT_RTZ_XU_F_V : VALU_FV_VS2<"vfwcvt.rtz.xu.f.v", 0b010010, 0b01110>; +defm VFWCVT_RTZ_X_F_V : VALU_FV_VS2<"vfwcvt.rtz.x.f.v", 0b010010, 0b01111>; +defm VFWCVT_F_XU_V : VALU_FV_VS2<"vfwcvt.f.xu.v", 0b010010, 0b01010>; +defm VFWCVT_F_X_V : VALU_FV_VS2<"vfwcvt.f.x.v", 0b010010, 0b01011>; +defm VFWCVT_F_F_V : VALU_FV_VS2<"vfwcvt.f.f.v", 0b010010, 0b01100>; } // Constraints = "@earlyclobber $vd", RVVConstraint = WidenCvt // Narrowing Floating-Point/Integer Type-Convert Instructions -let Constraints = "@earlyclobber $vd", RVVConstraint = Narrow in { -defm VFNCVT_XU_F_W : VALU_FV_VS2<"vfncvt.xu.f.w", 0b100010, 0b10000>; -defm VFNCVT_X_F_W : VALU_FV_VS2<"vfncvt.x.f.w", 0b100010, 0b10001>; -defm VFNCVT_F_XU_W : VALU_FV_VS2<"vfncvt.f.xu.w", 0b100010, 0b10010>; -defm VFNCVT_F_X_W : VALU_FV_VS2<"vfncvt.f.x.w", 0b100010, 0b10011>; -defm VFNCVT_F_F_W : VALU_FV_VS2<"vfncvt.f.f.w", 0b100010, 0b10100>; -defm VFNCVT_ROD_F_F_W : VALU_FV_VS2<"vfncvt.rod.f.f.w", 0b100010, 0b10101>; -} // Constraints = "@earlyclobber $vd", RVVConstraint = Narrow +let Constraints = "@earlyclobber $vd" in { +defm VFNCVT_XU_F_W : VALU_FV_VS2<"vfncvt.xu.f.w", 0b010010, 0b10000>; +defm VFNCVT_X_F_W : VALU_FV_VS2<"vfncvt.x.f.w", 0b010010, 0b10001>; +defm VFNCVT_RTZ_XU_F_W : VALU_FV_VS2<"vfncvt.rtz.xu.f.w", 0b010010, 0b10110>; +defm VFNCVT_RTZ_X_F_W : VALU_FV_VS2<"vfncvt.rtz.x.f.w", 0b010010, 0b10111>; +defm VFNCVT_F_XU_W : VALU_FV_VS2<"vfncvt.f.xu.w", 0b010010, 0b10010>; +defm VFNCVT_F_X_W : VALU_FV_VS2<"vfncvt.f.x.w", 0b010010, 0b10011>; +defm VFNCVT_F_F_W : VALU_FV_VS2<"vfncvt.f.f.w", 0b010010, 0b10100>; +defm VFNCVT_ROD_F_F_W : VALU_FV_VS2<"vfncvt.rod.f.f.w", 0b010010, 0b10101>; +} // Constraints = "@earlyclobber $vd" +} // Predicates = [HasStdExtV, HasStdExtF] +let Predicates = [HasStdExtV] in { // Vector Single-Width Integer Reduction Instructions +let RVVConstraint = NoConstraint in { defm VREDSUM : VALU_MV_V<"vredsum", 0b000000>; defm VREDMAXU : VALU_MV_V<"vredmaxu", 0b000110>; defm VREDMAX : VALU_MV_V<"vredmax", 0b000111>; @@ -741,34 +893,42 @@ defm VREDMIN : VALU_MV_V<"vredmin", 0b000101>; defm VREDAND : VALU_MV_V<"vredand", 0b000001>; defm VREDOR : VALU_MV_V<"vredor", 0b000010>; defm VREDXOR : VALU_MV_V<"vredxor", 0b000011>; +} // RVVConstraint = NoConstraint // Vector Widening Integer Reduction Instructions -let Constraints = "@earlyclobber $vd" in { +let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint in { // Set earlyclobber for following instructions for second and mask operands. // This has the downside that the earlyclobber constraint is too coarse and // will impose unnecessary restrictions by not allowing the destination to // overlap with the first (wide) operand. defm VWREDSUMU : VALU_IV_V<"vwredsumu", 0b110000>; defm VWREDSUM : VALU_IV_V<"vwredsum", 0b110001>; -} // Constraints = "@earlyclobber $vd" +} // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint +} // Predicates = [HasStdExtV] +let Predicates = [HasStdExtV, HasStdExtF] in { // Vector Single-Width Floating-Point Reduction Instructions +let RVVConstraint = NoConstraint in { defm VFREDOSUM : VALU_FV_V<"vfredosum", 0b000011>; defm VFREDSUM : VALU_FV_V<"vfredsum", 0b000001>; defm VFREDMAX : VALU_FV_V<"vfredmax", 0b000111>; defm VFREDMIN : VALU_FV_V<"vfredmin", 0b000101>; +} // RVVConstraint = NoConstraint // Vector Widening Floating-Point Reduction Instructions -let Constraints = "@earlyclobber $vd" in { +let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint in { // Set earlyclobber for following instructions for second and mask operands. // This has the downside that the earlyclobber constraint is too coarse and // will impose unnecessary restrictions by not allowing the destination to // overlap with the first (wide) operand. defm VFWREDOSUM : VALU_FV_V<"vfwredosum", 0b110011>; defm VFWREDSUM : VALU_FV_V<"vfwredsum", 0b110001>; -} // Constraints = "@earlyclobber $vd" +} // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint +} // Predicates = [HasStdExtV, HasStdExtF] +let Predicates = [HasStdExtV] in { // Vector Mask-Register Logical Instructions +let RVVConstraint = NoConstraint in { defm VMAND_M : VALU_MV_Mask<"vmand", 0b011001, "m">; defm VMNAND_M : VALU_MV_Mask<"vmnand", 0b011101, "m">; defm VMANDNOT_M : VALU_MV_Mask<"vmandnot", 0b011000, "m">; @@ -777,82 +937,95 @@ defm VMOR_M : VALU_MV_Mask<"vmor", 0b011010, "m">; defm VMNOR_M : VALU_MV_Mask<"vmnor", 0b011110, "m">; defm VMORNOT_M : VALU_MV_Mask<"vmornot", 0b011100, "m">; defm VMXNOR_M : VALU_MV_Mask<"vmxnor", 0b011111, "m">; +} -def : InstAlias<"vmcpy.m $vd, $vs", - (VMAND_MM VRegOp:$vd, VRegOp:$vs, VRegOp:$vs)>; +def : InstAlias<"vmmv.m $vd, $vs", + (VMAND_MM VR:$vd, VR:$vs, VR:$vs)>; def : InstAlias<"vmclr.m $vd", - (VMXOR_MM VRegOp:$vd, VRegOp:$vd, VRegOp:$vd)>; + (VMXOR_MM VR:$vd, VR:$vd, VR:$vd)>; def : InstAlias<"vmset.m $vd", - (VMXNOR_MM VRegOp:$vd, VRegOp:$vd, VRegOp:$vd)>; + (VMXNOR_MM VR:$vd, VR:$vd, VR:$vd)>; def : InstAlias<"vmnot.m $vd, $vs", - (VMNAND_MM VRegOp:$vd, VRegOp:$vs, VRegOp:$vs)>; + (VMNAND_MM VR:$vd, VR:$vs, VR:$vs)>; -let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, + RVVConstraint = NoConstraint in { // Vector mask population count vpopc def VPOPC_M : RVInstV<0b010000, 0b10000, OPMVV, (outs GPR:$vd), - (ins VRegOp:$vs2, VMaskOp:$vm), + (ins VR:$vs2, VMaskOp:$vm), "vpopc.m", "$vd, $vs2$vm">; // vfirst find-first-set mask bit def VFIRST_M : RVInstV<0b010000, 0b10001, OPMVV, (outs GPR:$vd), - (ins VRegOp:$vs2, VMaskOp:$vm), + (ins VR:$vs2, VMaskOp:$vm), "vfirst.m", "$vd, $vs2$vm">; } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 +let Constraints = "@earlyclobber $vd", RVVConstraint = Iota in { // vmsbf.m set-before-first mask bit defm VMSBF_M : VALU_MV_VS2<"vmsbf.m", 0b010100, 0b00001>; - // vmsif.m set-including-first mask bit defm VMSIF_M : VALU_MV_VS2<"vmsif.m", 0b010100, 0b00011>; - // vmsof.m set-only-first mask bit defm VMSOF_M : VALU_MV_VS2<"vmsof.m", 0b010100, 0b00010>; - // Vector Iota Instruction -let Constraints = "@earlyclobber $vd", RVVConstraint = Iota in { defm VIOTA_M : VALU_MV_VS2<"viota.m", 0b010100, 0b10000>; } // Constraints = "@earlyclobber $vd", RVVConstraint = Iota // Vector Element Index Instruction let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { -def VID_V : RVInstV<0b010100, 0b10001, OPMVV, (outs VRegOp:$vd), +def VID_V : RVInstV<0b010100, 0b10001, OPMVV, (outs VR:$vd), (ins VMaskOp:$vm), "vid.v", "$vd$vm"> { let vs2 = 0; } // Integer Scalar Move Instructions -let vm = 1 in { +let vm = 1, RVVConstraint = NoConstraint in { def VMV_X_S : RVInstV<0b010000, 0b00000, OPMVV, (outs GPR:$vd), - (ins VRegOp:$vs2), "vmv.x.s", "$vd, $vs2">; -def VMV_S_X : RVInstV2<0b010000, 0b00000, OPMVX, (outs VRegOp:$vd), - (ins GPR:$rs1), "vmv.s.x", "$vd, $rs1">; + (ins VR:$vs2), "vmv.x.s", "$vd, $vs2">; +let Constraints = "$vd = $vd_wb" in +def VMV_S_X : RVInstV2<0b010000, 0b00000, OPMVX, (outs VR:$vd_wb), + (ins VR:$vd, GPR:$rs1), "vmv.s.x", "$vd, $rs1">; } } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 +} // Predicates = [HasStdExtV] -let hasSideEffects = 0, mayLoad = 0, mayStore = 0, vm = 1 in { +let Predicates = [HasStdExtV, HasStdExtF] in { +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, vm = 1, + RVVConstraint = NoConstraint in { // Floating-Point Scalar Move Instructions def VFMV_F_S : RVInstV<0b010000, 0b00000, OPFVV, (outs FPR32:$vd), - (ins VRegOp:$vs2), "vfmv.f.s", "$vd, $vs2">; -def VFMV_S_F : RVInstV2<0b010000, 0b00000, OPFVF, (outs VRegOp:$vd), - (ins FPR32:$rs1), "vfmv.s.f", "$vd, $rs1">; + (ins VR:$vs2), "vfmv.f.s", "$vd, $vs2">; +let Constraints = "$vd = $vd_wb" in +def VFMV_S_F : RVInstV2<0b010000, 0b00000, OPFVF, (outs VR:$vd_wb), + (ins VR:$vd, FPR32:$rs1), "vfmv.s.f", "$vd, $rs1">; } // hasSideEffects = 0, mayLoad = 0, mayStore = 0, vm = 1 +} // Predicates = [HasStdExtV, HasStdExtF] +let Predicates = [HasStdExtV] in { // Vector Slide Instructions let Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp in { defm VSLIDEUP_V : VALU_IV_X_I<"vslideup", 0b001110, uimm5>; +defm VSLIDE1UP_V : VALU_MV_X<"vslide1up", 0b001110>; } // Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp defm VSLIDEDOWN_V : VALU_IV_X_I<"vslidedown", 0b001111, uimm5>; +defm VSLIDE1DOWN_V : VALU_MV_X<"vslide1down", 0b001111>; +} // Predicates = [HasStdExtV] +let Predicates = [HasStdExtV, HasStdExtF] in { let Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp in { -defm VSLIDE1UP_V : VALU_MV_X<"vslide1up", 0b001110>; +defm VFSLIDE1UP_V : VALU_FV_F<"vfslide1up", 0b001110>; } // Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp -defm VSLIDE1DOWN_V : VALU_MV_X<"vslide1down", 0b001111>; +defm VFSLIDE1DOWN_V : VALU_FV_F<"vfslide1down", 0b001111>; +} // Predicates = [HasStdExtV, HasStdExtF] +let Predicates = [HasStdExtV] in { // Vector Register Gather Instruction let Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather in { defm VRGATHER_V : VALU_IV_V_X_I<"vrgather", 0b001100, uimm5>; +def VRGATHEREI16_VV : VALUVV<0b001110, OPIVV, "vrgatherei16.vv">; } // Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather // Vector Compress Instruction @@ -860,10 +1033,11 @@ let Constraints = "@earlyclobber $vd", RVVConstraint = Vcompress in { defm VCOMPRESS_V : VALU_MV_Mask<"vcompress", 0b010111>; } // Constraints = "@earlyclobber $vd", RVVConstraint = Vcompress -let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, + RVVConstraint = NoConstraint in { foreach nf = [1, 2, 4, 8] in { - def VMV#nf#R_V : RVInstV<0b100111, !add(nf, -1), OPIVI, (outs VRegOp:$vd), - (ins VRegOp:$vs2), "vmv" # nf # "r.v", + def VMV#nf#R_V : RVInstV<0b100111, !add(nf, -1), OPIVI, (outs VR:$vd), + (ins VR:$vs2), "vmv" # nf # "r.v", "$vd, $vs2"> { let Uses = []; let vm = 1; @@ -871,3 +1045,122 @@ foreach nf = [1, 2, 4, 8] in { } } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 } // Predicates = [HasStdExtV] + +let Predicates = [HasStdExtZvlsseg] in { + foreach nf=2-8 in { + def VLSEG#nf#E8_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth8, "vlseg"#nf#"e8.v">; + def VLSEG#nf#E16_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth16, "vlseg"#nf#"e16.v">; + def VLSEG#nf#E32_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth32, "vlseg"#nf#"e32.v">; + def VLSEG#nf#E64_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth64, "vlseg"#nf#"e64.v">; + + def VLSEG#nf#E8FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth8, "vlseg"#nf#"e8ff.v">; + def VLSEG#nf#E16FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth16, "vlseg"#nf#"e16ff.v">; + def VLSEG#nf#E32FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth32, "vlseg"#nf#"e32ff.v">; + def VLSEG#nf#E64FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth64, "vlseg"#nf#"e64ff.v">; + + def VSSEG#nf#E8_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth8, "vsseg"#nf#"e8.v">; + def VSSEG#nf#E16_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth16, "vsseg"#nf#"e16.v">; + def VSSEG#nf#E32_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth32, "vsseg"#nf#"e32.v">; + def VSSEG#nf#E64_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth64, "vsseg"#nf#"e64.v">; + + // Vector Strided Instructions + def VLSSEG#nf#E8_V : VStridedSegmentLoad<!add(nf, -1), LSWidth8, "vlsseg"#nf#"e8.v">; + def VLSSEG#nf#E16_V : VStridedSegmentLoad<!add(nf, -1), LSWidth16, "vlsseg"#nf#"e16.v">; + def VLSSEG#nf#E32_V : VStridedSegmentLoad<!add(nf, -1), LSWidth32, "vlsseg"#nf#"e32.v">; + def VLSSEG#nf#E64_V : VStridedSegmentLoad<!add(nf, -1), LSWidth64, "vlsseg"#nf#"e64.v">; + + def VSSSEG#nf#E8_V : VStridedSegmentStore<!add(nf, -1), LSWidth8, "vssseg"#nf#"e8.v">; + def VSSSEG#nf#E16_V : VStridedSegmentStore<!add(nf, -1), LSWidth16, "vssseg"#nf#"e16.v">; + def VSSSEG#nf#E32_V : VStridedSegmentStore<!add(nf, -1), LSWidth32, "vssseg"#nf#"e32.v">; + def VSSSEG#nf#E64_V : VStridedSegmentStore<!add(nf, -1), LSWidth64, "vssseg"#nf#"e64.v">; + + // Vector Indexed Instructions + def VLUXSEG#nf#EI8_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord, + LSWidth8, "vluxseg"#nf#"ei8.v">; + def VLUXSEG#nf#EI16_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord, + LSWidth16, "vluxseg"#nf#"ei16.v">; + def VLUXSEG#nf#EI32_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord, + LSWidth32, "vluxseg"#nf#"ei32.v">; + def VLUXSEG#nf#EI64_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord, + LSWidth64, "vluxseg"#nf#"ei64.v">; + + def VLOXSEG#nf#EI8_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder, + LSWidth8, "vloxseg"#nf#"ei8.v">; + def VLOXSEG#nf#EI16_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder, + LSWidth16, "vloxseg"#nf#"ei16.v">; + def VLOXSEG#nf#EI32_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder, + LSWidth32, "vloxseg"#nf#"ei32.v">; + def VLOXSEG#nf#EI64_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder, + LSWidth64, "vloxseg"#nf#"ei64.v">; + + def VSUXSEG#nf#EI8_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord, + LSWidth8, "vsuxseg"#nf#"ei8.v">; + def VSUXSEG#nf#EI16_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord, + LSWidth16, "vsuxseg"#nf#"ei16.v">; + def VSUXSEG#nf#EI32_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord, + LSWidth32, "vsuxseg"#nf#"ei32.v">; + def VSUXSEG#nf#EI64_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord, + LSWidth64, "vsuxseg"#nf#"ei64.v">; + + def VSOXSEG#nf#EI8_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder, + LSWidth8, "vsoxseg"#nf#"ei8.v">; + def VSOXSEG#nf#EI16_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder, + LSWidth16, "vsoxseg"#nf#"ei16.v">; + def VSOXSEG#nf#EI32_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder, + LSWidth32, "vsoxseg"#nf#"ei32.v">; + def VSOXSEG#nf#EI64_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder, + LSWidth64, "vsoxseg"#nf#"ei64.v">; + } +} // Predicates = [HasStdExtZvlsseg] + +let Predicates = [HasStdExtZvamo, HasStdExtA] in { + defm VAMOSWAPEI8 : VAMO<AMOOPVamoSwap, LSWidth8, "vamoswapei8.v">; + defm VAMOSWAPEI16 : VAMO<AMOOPVamoSwap, LSWidth16, "vamoswapei16.v">; + defm VAMOSWAPEI32 : VAMO<AMOOPVamoSwap, LSWidth32, "vamoswapei32.v">; + + defm VAMOADDEI8 : VAMO<AMOOPVamoAdd, LSWidth8, "vamoaddei8.v">; + defm VAMOADDEI16 : VAMO<AMOOPVamoAdd, LSWidth16, "vamoaddei16.v">; + defm VAMOADDEI32 : VAMO<AMOOPVamoAdd, LSWidth32, "vamoaddei32.v">; + + defm VAMOXOREI8 : VAMO<AMOOPVamoXor, LSWidth8, "vamoxorei8.v">; + defm VAMOXOREI16 : VAMO<AMOOPVamoXor, LSWidth16, "vamoxorei16.v">; + defm VAMOXOREI32 : VAMO<AMOOPVamoXor, LSWidth32, "vamoxorei32.v">; + + defm VAMOANDEI8 : VAMO<AMOOPVamoAnd, LSWidth8, "vamoandei8.v">; + defm VAMOANDEI16 : VAMO<AMOOPVamoAnd, LSWidth16, "vamoandei16.v">; + defm VAMOANDEI32 : VAMO<AMOOPVamoAnd, LSWidth32, "vamoandei32.v">; + + defm VAMOOREI8 : VAMO<AMOOPVamoOr, LSWidth8, "vamoorei8.v">; + defm VAMOOREI16 : VAMO<AMOOPVamoOr, LSWidth16, "vamoorei16.v">; + defm VAMOOREI32 : VAMO<AMOOPVamoOr, LSWidth32, "vamoorei32.v">; + + defm VAMOMINEI8 : VAMO<AMOOPVamoMin, LSWidth8, "vamominei8.v">; + defm VAMOMINEI16 : VAMO<AMOOPVamoMin, LSWidth16, "vamominei16.v">; + defm VAMOMINEI32 : VAMO<AMOOPVamoMin, LSWidth32, "vamominei32.v">; + + defm VAMOMAXEI8 : VAMO<AMOOPVamoMax, LSWidth8, "vamomaxei8.v">; + defm VAMOMAXEI16 : VAMO<AMOOPVamoMax, LSWidth16, "vamomaxei16.v">; + defm VAMOMAXEI32 : VAMO<AMOOPVamoMax, LSWidth32, "vamomaxei32.v">; + + defm VAMOMINUEI8 : VAMO<AMOOPVamoMinu, LSWidth8, "vamominuei8.v">; + defm VAMOMINUEI16 : VAMO<AMOOPVamoMinu, LSWidth16, "vamominuei16.v">; + defm VAMOMINUEI32 : VAMO<AMOOPVamoMinu, LSWidth32, "vamominuei32.v">; + + defm VAMOMAXUEI8 : VAMO<AMOOPVamoMaxu, LSWidth8, "vamomaxuei8.v">; + defm VAMOMAXUEI16 : VAMO<AMOOPVamoMaxu, LSWidth16, "vamomaxuei16.v">; + defm VAMOMAXUEI32 : VAMO<AMOOPVamoMaxu, LSWidth32, "vamomaxuei32.v">; +} // Predicates = [HasStdExtZvamo, HasStdExtA] + +let Predicates = [HasStdExtZvamo, HasStdExtA, IsRV64] in { + defm VAMOSWAPEI64 : VAMO<AMOOPVamoSwap, LSWidth64, "vamoswapei64.v">; + defm VAMOADDEI64 : VAMO<AMOOPVamoAdd, LSWidth64, "vamoaddei64.v">; + defm VAMOXOREI64 : VAMO<AMOOPVamoXor, LSWidth64, "vamoxorei64.v">; + defm VAMOANDEI64 : VAMO<AMOOPVamoAnd, LSWidth64, "vamoandei64.v">; + defm VAMOOREI64 : VAMO<AMOOPVamoOr, LSWidth64, "vamoorei64.v">; + defm VAMOMINEI64 : VAMO<AMOOPVamoMin, LSWidth64, "vamominei64.v">; + defm VAMOMAXEI64 : VAMO<AMOOPVamoMax, LSWidth64, "vamomaxei64.v">; + defm VAMOMINUEI64 : VAMO<AMOOPVamoMinu, LSWidth64, "vamominuei64.v">; + defm VAMOMAXUEI64 : VAMO<AMOOPVamoMaxu, LSWidth64, "vamomaxuei64.v">; +} // Predicates = [HasStdExtZvamo, HasStdExtA, IsRV64] + +include "RISCVInstrInfoVPseudos.td" diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td new file mode 100644 index 000000000000..5c228820f0cc --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -0,0 +1,4416 @@ +//===-- RISCVInstrInfoVPseudos.td - RISC-V 'V' Pseudos -----*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// This file contains the required infrastructure to support code generation +/// for the standard 'V' (Vector) extension, version 0.10. This version is still +/// experimental as the 'V' extension hasn't been ratified yet. +/// +/// This file is included from RISCVInstrInfoV.td +/// +//===----------------------------------------------------------------------===// + +def riscv_vmv_x_s : SDNode<"RISCVISD::VMV_X_S", + SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>, + SDTCisInt<1>]>>; +def riscv_read_vlenb : SDNode<"RISCVISD::READ_VLENB", + SDTypeProfile<1, 0, [SDTCisVT<0, XLenVT>]>>; + +def riscv_vleff : SDNode<"RISCVISD::VLEFF", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisPtrTy<1>, + SDTCisVT<2, XLenVT>]>, + [SDNPHasChain, SDNPOutGlue, SDNPMayLoad, + SDNPSideEffect]>; +def riscv_vleff_mask : SDNode<"RISCVISD::VLEFF_MASK", + SDTypeProfile<1, 4, [SDTCisVec<0>, + SDTCisSameAs<0, 1>, + SDTCisPtrTy<2>, + SDTCVecEltisVT<3, i1>, + SDTCisVT<4, XLenVT>]>, + [SDNPHasChain, SDNPOutGlue, SDNPMayLoad, + SDNPSideEffect]>; +def riscv_read_vl : SDNode<"RISCVISD::READ_VL", + SDTypeProfile<1, 0, [SDTCisVT<0, XLenVT>]>, + [SDNPInGlue]>; + +// X0 has special meaning for vsetvl/vsetvli. +// rd | rs1 | AVL value | Effect on vl +//-------------------------------------------------------------- +// !X0 | X0 | VLMAX | Set vl to VLMAX +// X0 | X0 | Value in vl | Keep current vl, just change vtype. +def VLOp : ComplexPattern<XLenVT, 1, "selectVLOp">; + +def DecImm : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(N->getSExtValue() - 1, SDLoc(N), + N->getValueType(0)); +}]>; + +//===----------------------------------------------------------------------===// +// Utilities. +//===----------------------------------------------------------------------===// + +// This class describes information associated to the LMUL. +class LMULInfo<int lmul, VReg regclass, VReg wregclass, + VReg f2regclass, VReg f4regclass, VReg f8regclass, string mx> { + bits<3> value = lmul; // This is encoded as the vlmul field of vtype. + VReg vrclass = regclass; + VReg wvrclass = wregclass; + VReg f8vrclass = f8regclass; + VReg f4vrclass = f4regclass; + VReg f2vrclass = f2regclass; + string MX = mx; +} + +// Associate LMUL with tablegen records of register classes. +def V_M1 : LMULInfo<0b000, VR, VRM2, VR, VR, VR, "M1">; +def V_M2 : LMULInfo<0b001, VRM2, VRM4, VR, VR, VR, "M2">; +def V_M4 : LMULInfo<0b010, VRM4, VRM8, VRM2, VR, VR, "M4">; +def V_M8 : LMULInfo<0b011, VRM8,/*NoVReg*/VR, VRM4, VRM2, VR, "M8">; + +def V_MF8 : LMULInfo<0b101, VR, VR,/*NoVReg*/VR,/*NoVReg*/VR,/*NoVReg*/VR, "MF8">; +def V_MF4 : LMULInfo<0b110, VR, VR, VR,/*NoVReg*/VR,/*NoVReg*/VR, "MF4">; +def V_MF2 : LMULInfo<0b111, VR, VR, VR, VR,/*NoVReg*/VR, "MF2">; + +// Used to iterate over all possible LMULs. +def MxList { + list<LMULInfo> m = [V_MF8, V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8]; +} + +class FPR_Info<RegisterClass regclass, string fx> { + RegisterClass fprclass = regclass; + string FX = fx; +} + +def SCALAR_F16 : FPR_Info<FPR16, "F16">; +def SCALAR_F32 : FPR_Info<FPR32, "F32">; +def SCALAR_F64 : FPR_Info<FPR64, "F64">; + +def FPList { + list<FPR_Info> fpinfo = [SCALAR_F16, SCALAR_F32, SCALAR_F64]; +} + +class MxSet<int eew> { + list<LMULInfo> m = !cond(!eq(eew, 8) : [V_MF8, V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8], + !eq(eew, 16) : [V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8], + !eq(eew, 32) : [V_MF2, V_M1, V_M2, V_M4, V_M8], + !eq(eew, 64) : [V_M1, V_M2, V_M4, V_M8]); +} + +class NFSet<LMULInfo m> { + list<int> L = !cond(!eq(m.value, V_M8.value): [], + !eq(m.value, V_M4.value): [2], + !eq(m.value, V_M2.value): [2, 3, 4], + true: [2, 3, 4, 5, 6, 7, 8]); +} + +class shift_amount<int num> { + int val = !if(!eq(num, 1), 0, !add(1, shift_amount<!srl(num, 1)>.val)); +} + +class octuple_from_str<string MX> { + int ret = !cond(!eq(MX, "MF8") : 1, + !eq(MX, "MF4") : 2, + !eq(MX, "MF2") : 4, + !eq(MX, "M1") : 8, + !eq(MX, "M2") : 16, + !eq(MX, "M4") : 32, + !eq(MX, "M8") : 64); +} + +class octuple_to_str<int octuple> { + string ret = !if(!eq(octuple, 1), "MF8", + !if(!eq(octuple, 2), "MF4", + !if(!eq(octuple, 4), "MF2", + !if(!eq(octuple, 8), "M1", + !if(!eq(octuple, 16), "M2", + !if(!eq(octuple, 32), "M4", + !if(!eq(octuple, 64), "M8", + "NoDef"))))))); +} + +// Output pattern for X0 used to represent VLMAX in the pseudo instructions. +def VLMax : OutPatFrag<(ops), (XLenVT X0)>; + +// List of EEW. +defvar EEWList = [8, 16, 32, 64]; + +class SegRegClass<LMULInfo m, int nf> { + VReg RC = !cast<VReg>("VRN" # nf # !cond(!eq(m.value, V_MF8.value): V_M1.MX, + !eq(m.value, V_MF4.value): V_M1.MX, + !eq(m.value, V_MF2.value): V_M1.MX, + true: m.MX)); +} + +//===----------------------------------------------------------------------===// +// Vector register and vector group type information. +//===----------------------------------------------------------------------===// + +class VTypeInfo<ValueType Vec, ValueType Mas, int Sew, VReg Reg, LMULInfo M, + ValueType Scal = XLenVT, RegisterClass ScalarReg = GPR> +{ + ValueType Vector = Vec; + ValueType Mask = Mas; + int SEW = Sew; + VReg RegClass = Reg; + LMULInfo LMul = M; + ValueType Scalar = Scal; + RegisterClass ScalarRegClass = ScalarReg; + // The pattern fragment which produces the AVL operand, representing the + // "natural" vector length for this type. For scalable vectors this is VLMax. + OutPatFrag AVL = VLMax; + + string ScalarSuffix = !cond(!eq(Scal, XLenVT) : "X", + !eq(Scal, f16) : "F16", + !eq(Scal, f32) : "F32", + !eq(Scal, f64) : "F64"); +} + +class GroupVTypeInfo<ValueType Vec, ValueType VecM1, ValueType Mas, int Sew, + VReg Reg, LMULInfo M, ValueType Scal = XLenVT, + RegisterClass ScalarReg = GPR> + : VTypeInfo<Vec, Mas, Sew, Reg, M, Scal, ScalarReg> +{ + ValueType VectorM1 = VecM1; +} + +defset list<VTypeInfo> AllVectors = { + defset list<VTypeInfo> AllIntegerVectors = { + defset list<VTypeInfo> NoGroupIntegerVectors = { + def VI8MF8: VTypeInfo<vint8mf8_t, vbool64_t, 8, VR, V_MF8>; + def VI8MF4: VTypeInfo<vint8mf4_t, vbool32_t, 8, VR, V_MF4>; + def VI8MF2: VTypeInfo<vint8mf2_t, vbool16_t, 8, VR, V_MF2>; + def VI8M1: VTypeInfo<vint8m1_t, vbool8_t, 8, VR, V_M1>; + def VI16MF4: VTypeInfo<vint16mf4_t, vbool64_t, 16, VR, V_MF4>; + def VI16MF2: VTypeInfo<vint16mf2_t, vbool32_t, 16, VR, V_MF2>; + def VI16M1: VTypeInfo<vint16m1_t, vbool16_t, 16, VR, V_M1>; + def VI32MF2: VTypeInfo<vint32mf2_t, vbool64_t, 32, VR, V_MF2>; + def VI32M1: VTypeInfo<vint32m1_t, vbool32_t, 32, VR, V_M1>; + def VI64M1: VTypeInfo<vint64m1_t, vbool64_t, 64, VR, V_M1>; + } + defset list<GroupVTypeInfo> GroupIntegerVectors = { + def VI8M2: GroupVTypeInfo<vint8m2_t, vint8m1_t, vbool4_t, 8, VRM2, V_M2>; + def VI8M4: GroupVTypeInfo<vint8m4_t, vint8m1_t, vbool2_t, 8, VRM4, V_M4>; + def VI8M8: GroupVTypeInfo<vint8m8_t, vint8m1_t, vbool1_t, 8, VRM8, V_M8>; + + def VI16M2: GroupVTypeInfo<vint16m2_t,vint16m1_t,vbool8_t, 16,VRM2, V_M2>; + def VI16M4: GroupVTypeInfo<vint16m4_t,vint16m1_t,vbool4_t, 16,VRM4, V_M4>; + def VI16M8: GroupVTypeInfo<vint16m8_t,vint16m1_t,vbool2_t, 16,VRM8, V_M8>; + + def VI32M2: GroupVTypeInfo<vint32m2_t,vint32m1_t,vbool16_t,32,VRM2, V_M2>; + def VI32M4: GroupVTypeInfo<vint32m4_t,vint32m1_t,vbool8_t, 32,VRM4, V_M4>; + def VI32M8: GroupVTypeInfo<vint32m8_t,vint32m1_t,vbool4_t, 32,VRM8, V_M8>; + + def VI64M2: GroupVTypeInfo<vint64m2_t,vint64m1_t,vbool32_t,64,VRM2, V_M2>; + def VI64M4: GroupVTypeInfo<vint64m4_t,vint64m1_t,vbool16_t,64,VRM4, V_M4>; + def VI64M8: GroupVTypeInfo<vint64m8_t,vint64m1_t,vbool8_t, 64,VRM8, V_M8>; + } + } + + defset list<VTypeInfo> AllFloatVectors = { + defset list<VTypeInfo> NoGroupFloatVectors = { + def VF16MF4: VTypeInfo<vfloat16mf4_t, vbool64_t, 16, VR, V_MF4, f16, FPR16>; + def VF16MF2: VTypeInfo<vfloat16mf2_t, vbool32_t, 16, VR, V_MF2, f16, FPR16>; + def VF16M1: VTypeInfo<vfloat16m1_t, vbool16_t, 16, VR, V_M1, f16, FPR16>; + + def VF32MF2: VTypeInfo<vfloat32mf2_t,vbool64_t, 32, VR, V_MF2, f32, FPR32>; + def VF32M1: VTypeInfo<vfloat32m1_t, vbool32_t, 32, VR, V_M1, f32, FPR32>; + + def VF64M1: VTypeInfo<vfloat64m1_t, vbool64_t, 64, VR, V_M1, f64, FPR64>; + } + + defset list<GroupVTypeInfo> GroupFloatVectors = { + def VF16M2: GroupVTypeInfo<vfloat16m2_t, vfloat16m1_t, vbool8_t, 16, + VRM2, V_M2, f16, FPR16>; + def VF16M4: GroupVTypeInfo<vfloat16m4_t, vfloat16m1_t, vbool4_t, 16, + VRM4, V_M4, f16, FPR16>; + def VF16M8: GroupVTypeInfo<vfloat16m8_t, vfloat16m1_t, vbool2_t, 16, + VRM8, V_M8, f16, FPR16>; + + def VF32M2: GroupVTypeInfo<vfloat32m2_t, vfloat32m1_t, vbool16_t, 32, + VRM2, V_M2, f32, FPR32>; + def VF32M4: GroupVTypeInfo<vfloat32m4_t, vfloat32m1_t, vbool8_t, 32, + VRM4, V_M4, f32, FPR32>; + def VF32M8: GroupVTypeInfo<vfloat32m8_t, vfloat32m1_t, vbool4_t, 32, + VRM8, V_M8, f32, FPR32>; + + def VF64M2: GroupVTypeInfo<vfloat64m2_t, vfloat64m1_t, vbool32_t, 64, + VRM2, V_M2, f64, FPR64>; + def VF64M4: GroupVTypeInfo<vfloat64m4_t, vfloat64m1_t, vbool16_t, 64, + VRM4, V_M4, f64, FPR64>; + def VF64M8: GroupVTypeInfo<vfloat64m8_t, vfloat64m1_t, vbool8_t, 64, + VRM8, V_M8, f64, FPR64>; + } + } +} + +// This functor is used to obtain the int vector type that has the same SEW and +// multiplier as the input parameter type +class GetIntVTypeInfo<VTypeInfo vti> +{ + // Equivalent integer vector type. Eg. + // VI8M1 → VI8M1 (identity) + // VF64M4 → VI64M4 + VTypeInfo Vti = !cast<VTypeInfo>(!subst("VF", "VI", !cast<string>(vti))); +} + +class MTypeInfo<ValueType Mas, LMULInfo M, string Bx> { + ValueType Mask = Mas; + // {SEW, VLMul} values set a valid VType to deal with this mask type. + // we assume SEW=8 and set corresponding LMUL. + int SEW = 8; + LMULInfo LMul = M; + string BX = Bx; // Appendix of mask operations. + // The pattern fragment which produces the AVL operand, representing the + // "natural" vector length for this mask type. For scalable masks this is + // VLMax. + OutPatFrag AVL = VLMax; +} + +defset list<MTypeInfo> AllMasks = { + // vbool<n>_t, <n> = SEW/LMUL, we assume SEW=8 and corresponding LMUL. + def : MTypeInfo<vbool64_t, V_MF8, "B1">; + def : MTypeInfo<vbool32_t, V_MF4, "B2">; + def : MTypeInfo<vbool16_t, V_MF2, "B4">; + def : MTypeInfo<vbool8_t, V_M1, "B8">; + def : MTypeInfo<vbool4_t, V_M2, "B16">; + def : MTypeInfo<vbool2_t, V_M4, "B32">; + def : MTypeInfo<vbool1_t, V_M8, "B64">; +} + +class VTypeInfoToWide<VTypeInfo vti, VTypeInfo wti> +{ + VTypeInfo Vti = vti; + VTypeInfo Wti = wti; +} + +class VTypeInfoToFraction<VTypeInfo vti, VTypeInfo fti> +{ + VTypeInfo Vti = vti; + VTypeInfo Fti = fti; +} + +defset list<VTypeInfoToWide> AllWidenableIntVectors = { + def : VTypeInfoToWide<VI8MF8, VI16MF4>; + def : VTypeInfoToWide<VI8MF4, VI16MF2>; + def : VTypeInfoToWide<VI8MF2, VI16M1>; + def : VTypeInfoToWide<VI8M1, VI16M2>; + def : VTypeInfoToWide<VI8M2, VI16M4>; + def : VTypeInfoToWide<VI8M4, VI16M8>; + + def : VTypeInfoToWide<VI16MF4, VI32MF2>; + def : VTypeInfoToWide<VI16MF2, VI32M1>; + def : VTypeInfoToWide<VI16M1, VI32M2>; + def : VTypeInfoToWide<VI16M2, VI32M4>; + def : VTypeInfoToWide<VI16M4, VI32M8>; + + def : VTypeInfoToWide<VI32MF2, VI64M1>; + def : VTypeInfoToWide<VI32M1, VI64M2>; + def : VTypeInfoToWide<VI32M2, VI64M4>; + def : VTypeInfoToWide<VI32M4, VI64M8>; +} + +defset list<VTypeInfoToWide> AllWidenableFloatVectors = { + def : VTypeInfoToWide<VF16MF4, VF32MF2>; + def : VTypeInfoToWide<VF16MF2, VF32M1>; + def : VTypeInfoToWide<VF16M1, VF32M2>; + def : VTypeInfoToWide<VF16M2, VF32M4>; + def : VTypeInfoToWide<VF16M4, VF32M8>; + + def : VTypeInfoToWide<VF32MF2, VF64M1>; + def : VTypeInfoToWide<VF32M1, VF64M2>; + def : VTypeInfoToWide<VF32M2, VF64M4>; + def : VTypeInfoToWide<VF32M4, VF64M8>; +} + +defset list<VTypeInfoToFraction> AllFractionableVF2IntVectors = { + def : VTypeInfoToFraction<VI16MF4, VI8MF8>; + def : VTypeInfoToFraction<VI16MF2, VI8MF4>; + def : VTypeInfoToFraction<VI16M1, VI8MF2>; + def : VTypeInfoToFraction<VI16M2, VI8M1>; + def : VTypeInfoToFraction<VI16M4, VI8M2>; + def : VTypeInfoToFraction<VI16M8, VI8M4>; + def : VTypeInfoToFraction<VI32MF2, VI16MF4>; + def : VTypeInfoToFraction<VI32M1, VI16MF2>; + def : VTypeInfoToFraction<VI32M2, VI16M1>; + def : VTypeInfoToFraction<VI32M4, VI16M2>; + def : VTypeInfoToFraction<VI32M8, VI16M4>; + def : VTypeInfoToFraction<VI64M1, VI32MF2>; + def : VTypeInfoToFraction<VI64M2, VI32M1>; + def : VTypeInfoToFraction<VI64M4, VI32M2>; + def : VTypeInfoToFraction<VI64M8, VI32M4>; +} + +defset list<VTypeInfoToFraction> AllFractionableVF4IntVectors = { + def : VTypeInfoToFraction<VI32MF2, VI8MF8>; + def : VTypeInfoToFraction<VI32M1, VI8MF4>; + def : VTypeInfoToFraction<VI32M2, VI8MF2>; + def : VTypeInfoToFraction<VI32M4, VI8M1>; + def : VTypeInfoToFraction<VI32M8, VI8M2>; + def : VTypeInfoToFraction<VI64M1, VI16MF4>; + def : VTypeInfoToFraction<VI64M2, VI16MF2>; + def : VTypeInfoToFraction<VI64M4, VI16M1>; + def : VTypeInfoToFraction<VI64M8, VI16M2>; +} + +defset list<VTypeInfoToFraction> AllFractionableVF8IntVectors = { + def : VTypeInfoToFraction<VI64M1, VI8MF8>; + def : VTypeInfoToFraction<VI64M2, VI8MF4>; + def : VTypeInfoToFraction<VI64M4, VI8MF2>; + def : VTypeInfoToFraction<VI64M8, VI8M1>; +} + +defset list<VTypeInfoToWide> AllWidenableIntToFloatVectors = { + def : VTypeInfoToWide<VI8MF8, VF16MF4>; + def : VTypeInfoToWide<VI8MF4, VF16MF2>; + def : VTypeInfoToWide<VI8MF2, VF16M1>; + def : VTypeInfoToWide<VI8M1, VF16M2>; + def : VTypeInfoToWide<VI8M2, VF16M4>; + def : VTypeInfoToWide<VI8M4, VF16M8>; + + def : VTypeInfoToWide<VI16MF4, VF32MF2>; + def : VTypeInfoToWide<VI16MF2, VF32M1>; + def : VTypeInfoToWide<VI16M1, VF32M2>; + def : VTypeInfoToWide<VI16M2, VF32M4>; + def : VTypeInfoToWide<VI16M4, VF32M8>; + + def : VTypeInfoToWide<VI32MF2, VF64M1>; + def : VTypeInfoToWide<VI32M1, VF64M2>; + def : VTypeInfoToWide<VI32M2, VF64M4>; + def : VTypeInfoToWide<VI32M4, VF64M8>; +} + +// This class holds the record of the RISCVVPseudoTable below. +// This represents the information we need in codegen for each pseudo. +// The definition should be consistent with `struct PseudoInfo` in +// RISCVBaseInfo.h. +class CONST8b<bits<8> val> { + bits<8> V = val; +} +def InvalidIndex : CONST8b<0x80>; +class RISCVVPseudo { + Pseudo Pseudo = !cast<Pseudo>(NAME); // Used as a key. + Instruction BaseInstr; +} + +// The actual table. +def RISCVVPseudosTable : GenericTable { + let FilterClass = "RISCVVPseudo"; + let CppTypeName = "PseudoInfo"; + let Fields = [ "Pseudo", "BaseInstr" ]; + let PrimaryKey = [ "Pseudo" ]; + let PrimaryKeyName = "getPseudoInfo"; +} + +def RISCVVIntrinsicsTable : GenericTable { + let FilterClass = "RISCVVIntrinsic"; + let CppTypeName = "RISCVVIntrinsicInfo"; + let Fields = ["IntrinsicID", "ExtendOperand"]; + let PrimaryKey = ["IntrinsicID"]; + let PrimaryKeyName = "getRISCVVIntrinsicInfo"; +} + +class RISCVZvlsseg<string IntrName, bits<11> S, bits<3> L, bits<3> IL = V_M1.value> { + Intrinsic IntrinsicID = !cast<Intrinsic>(IntrName); + bits<11> SEW = S; + bits<3> LMUL = L; + bits<3> IndexLMUL = IL; + Pseudo Pseudo = !cast<Pseudo>(NAME); +} + +def RISCVZvlssegTable : GenericTable { + let FilterClass = "RISCVZvlsseg"; + let Fields = ["IntrinsicID", "SEW", "LMUL", "IndexLMUL", "Pseudo"]; + let PrimaryKey = ["IntrinsicID", "SEW", "LMUL", "IndexLMUL"]; + let PrimaryKeyName = "getPseudo"; +} + +//===----------------------------------------------------------------------===// +// Helpers to define the different pseudo instructions. +//===----------------------------------------------------------------------===// + +class PseudoToVInst<string PseudoInst> { + string VInst = !subst("_M8", "", + !subst("_M4", "", + !subst("_M2", "", + !subst("_M1", "", + !subst("_MF2", "", + !subst("_MF4", "", + !subst("_MF8", "", + !subst("_B1", "", + !subst("_B2", "", + !subst("_B4", "", + !subst("_B8", "", + !subst("_B16", "", + !subst("_B32", "", + !subst("_B64", "", + !subst("_MASK", "", + !subst("F16", "F", + !subst("F32", "F", + !subst("F64", "F", + !subst("Pseudo", "", PseudoInst))))))))))))))))))); +} + +class ToLowerCase<string Upper> { + string L = !subst("FF", "ff", + !subst("VLSEG", "vlseg", + !subst("VLSSEG", "vlsseg", + !subst("VSSEG", "vsseg", + !subst("VSSSEG", "vssseg", + !subst("VLOXSEG", "vloxseg", + !subst("VLUXSEG", "vluxseg", + !subst("VSOXSEG", "vsoxseg", + !subst("VSUXSEG", "vsuxseg", Upper))))))))); +} + +// Example: PseudoVLSEG2E32_V_M2 -> int_riscv_vlseg2 +// Example: PseudoVLSEG2E32_V_M2_MASK -> int_riscv_vlseg2_mask +class PseudoToIntrinsic<string PseudoInst, bit IsMasked> { + string Intrinsic = !strconcat("int_riscv_", + ToLowerCase< + !subst("E8", "", + !subst("E16", "", + !subst("E32", "", + !subst("E64", "", + !subst("EI8", "", + !subst("EI16", "", + !subst("EI32", "", + !subst("EI64", "", + !subst("_V", "", PseudoToVInst<PseudoInst>.VInst)))))))))>.L, + !if(IsMasked, "_mask", "")); +} + +// The destination vector register group for a masked vector instruction cannot +// overlap the source mask register (v0), unless the destination vector register +// is being written with a mask value (e.g., comparisons) or the scalar result +// of a reduction. +class GetVRegNoV0<VReg VRegClass> { + VReg R = !cond(!eq(VRegClass, VR) : VRNoV0, + !eq(VRegClass, VRM2) : VRM2NoV0, + !eq(VRegClass, VRM4) : VRM4NoV0, + !eq(VRegClass, VRM8) : VRM8NoV0, + !eq(1, 1) : VRegClass); +} + +// Join strings in list using separator and ignoring empty elements +class Join<list<string> strings, string separator> { + string ret = !foldl(!head(strings), !tail(strings), a, b, + !cond( + !and(!empty(a), !empty(b)) : "", + !empty(a) : b, + !empty(b) : a, + 1 : a#separator#b)); +} + +class VPseudo<Instruction instr, LMULInfo m, dag outs, dag ins> : + Pseudo<outs, ins, []>, RISCVVPseudo { + let BaseInstr = instr; + let VLMul = m.value; +} + +class VPseudoUSLoadNoMask<VReg RetClass>: + Pseudo<(outs RetClass:$rd), + (ins GPR:$rs1, GPR:$vl, ixlenimm:$sew),[]>, + RISCVVPseudo { + let mayLoad = 1; + let mayStore = 0; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasDummyMask = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +class VPseudoUSLoadMask<VReg RetClass>: + Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), + (ins GetVRegNoV0<RetClass>.R:$merge, + GPR:$rs1, + VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>, + RISCVVPseudo { + let mayLoad = 1; + let mayStore = 0; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Constraints = "$rd = $merge"; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasMergeOp = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +class VPseudoSLoadNoMask<VReg RetClass>: + Pseudo<(outs RetClass:$rd), + (ins GPR:$rs1, GPR:$rs2, GPR:$vl, ixlenimm:$sew),[]>, + RISCVVPseudo { + let mayLoad = 1; + let mayStore = 0; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasDummyMask = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +class VPseudoSLoadMask<VReg RetClass>: + Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), + (ins GetVRegNoV0<RetClass>.R:$merge, + GPR:$rs1, GPR:$rs2, + VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>, + RISCVVPseudo { + let mayLoad = 1; + let mayStore = 0; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Constraints = "$rd = $merge"; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasMergeOp = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +class VPseudoILoadNoMask<VReg RetClass, VReg IdxClass>: + Pseudo<(outs RetClass:$rd), + (ins GPR:$rs1, IdxClass:$rs2, GPR:$vl, ixlenimm:$sew),[]>, + RISCVVPseudo { + let mayLoad = 1; + let mayStore = 0; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasDummyMask = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +class VPseudoILoadMask<VReg RetClass, VReg IdxClass>: + Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), + (ins GetVRegNoV0<RetClass>.R:$merge, + GPR:$rs1, IdxClass:$rs2, + VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>, + RISCVVPseudo { + let mayLoad = 1; + let mayStore = 0; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Constraints = "$rd = $merge"; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasMergeOp = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +class VPseudoUSStoreNoMask<VReg StClass>: + Pseudo<(outs), + (ins StClass:$rd, GPR:$rs1, GPR:$vl, ixlenimm:$sew),[]>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 1; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasDummyMask = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +class VPseudoUSStoreMask<VReg StClass>: + Pseudo<(outs), + (ins StClass:$rd, GPR:$rs1, VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 1; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +class VPseudoSStoreNoMask<VReg StClass>: + Pseudo<(outs), + (ins StClass:$rd, GPR:$rs1, GPR:$rs2, GPR:$vl, ixlenimm:$sew),[]>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 1; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasDummyMask = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +class VPseudoSStoreMask<VReg StClass>: + Pseudo<(outs), + (ins StClass:$rd, GPR:$rs1, GPR:$rs2, VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 1; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +// Unary instruction that is never masked so HasDummyMask=0. +class VPseudoUnaryNoDummyMask<VReg RetClass, + DAGOperand Op2Class> : + Pseudo<(outs RetClass:$rd), + (ins Op2Class:$rs1, GPR:$vl, ixlenimm:$sew), []>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +class VPseudoNullaryNoMask<VReg RegClass>: + Pseudo<(outs RegClass:$rd), + (ins GPR:$vl, ixlenimm:$sew), + []>, RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasDummyMask = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +class VPseudoNullaryMask<VReg RegClass>: + Pseudo<(outs GetVRegNoV0<RegClass>.R:$rd), + (ins GetVRegNoV0<RegClass>.R:$merge, VMaskOp:$vm, GPR:$vl, + ixlenimm:$sew), []>, RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Constraints ="$rd = $merge"; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasMergeOp = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +// Nullary for pseudo instructions. They are expanded in +// RISCVExpandPseudoInsts pass. +class VPseudoNullaryPseudoM<string BaseInst> + : Pseudo<(outs VR:$rd), (ins GPR:$vl, ixlenimm:$sew), []>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + // BaseInstr is not used in RISCVExpandPseudoInsts pass. + // Just fill a corresponding real v-inst to pass tablegen check. + let BaseInstr = !cast<Instruction>(BaseInst); +} + +// RetClass could be GPR or VReg. +class VPseudoUnaryNoMask<DAGOperand RetClass, VReg OpClass, string Constraint = ""> : + Pseudo<(outs RetClass:$rd), + (ins OpClass:$rs2, GPR:$vl, ixlenimm:$sew), []>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Constraints = Constraint; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasDummyMask = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +class VPseudoUnaryMask<VReg RetClass, VReg OpClass, string Constraint = ""> : + Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), + (ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2, + VMaskOp:$vm, GPR:$vl, ixlenimm:$sew), []>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasMergeOp = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +// mask unary operation without maskedoff +class VPseudoMaskUnarySOutMask: + Pseudo<(outs GPR:$rd), + (ins VR:$rs1, VMaskOp:$vm, GPR:$vl, ixlenimm:$sew), []>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +// Masked mask operation have no $rd=$merge constraints +class VPseudoUnaryMOutMask: + Pseudo<(outs VR:$rd), + (ins VR:$merge, VR:$rs1, VMaskOp:$vm, GPR:$vl, ixlenimm:$sew), []>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Constraints = "$rd = $merge"; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasMergeOp = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +// Mask can be V0~V31 +class VPseudoUnaryAnyMask<VReg RetClass, + VReg Op1Class> : + Pseudo<(outs RetClass:$rd), + (ins RetClass:$merge, + Op1Class:$rs2, + VR:$vm, GPR:$vl, ixlenimm:$sew), + []>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Constraints = "@earlyclobber $rd, $rd = $merge"; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasMergeOp = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +class VPseudoBinaryNoMask<VReg RetClass, + VReg Op1Class, + DAGOperand Op2Class, + string Constraint> : + Pseudo<(outs RetClass:$rd), + (ins Op1Class:$rs2, Op2Class:$rs1, GPR:$vl, ixlenimm:$sew), []>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Constraints = Constraint; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasDummyMask = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +class VPseudoIStoreNoMask<VReg StClass, VReg IdxClass>: + Pseudo<(outs), + (ins StClass:$rd, GPR:$rs1, IdxClass:$rs2, GPR:$vl, ixlenimm:$sew),[]>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 1; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasDummyMask = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +class VPseudoIStoreMask<VReg StClass, VReg IdxClass>: + Pseudo<(outs), + (ins StClass:$rd, GPR:$rs1, IdxClass:$rs2, VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 1; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +class VPseudoBinaryMask<VReg RetClass, + VReg Op1Class, + DAGOperand Op2Class, + string Constraint> : + Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), + (ins GetVRegNoV0<RetClass>.R:$merge, + Op1Class:$rs2, Op2Class:$rs1, + VMaskOp:$vm, GPR:$vl, ixlenimm:$sew), []>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasMergeOp = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +class VPseudoBinaryCarryIn<VReg RetClass, + VReg Op1Class, + DAGOperand Op2Class, + LMULInfo MInfo, + bit CarryIn, + string Constraint> : + Pseudo<(outs RetClass:$rd), + !if(CarryIn, + (ins Op1Class:$rs2, Op2Class:$rs1, VMV0:$carry, GPR:$vl, + ixlenimm:$sew), + (ins Op1Class:$rs2, Op2Class:$rs1, GPR:$vl, ixlenimm:$sew)), []>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Constraints = Constraint; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasMergeOp = 0; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); + let VLMul = MInfo.value; +} + +class VPseudoTernaryNoMask<VReg RetClass, + VReg Op1Class, + DAGOperand Op2Class, + string Constraint> : + Pseudo<(outs RetClass:$rd), + (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2, + GPR:$vl, ixlenimm:$sew), + []>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Constraints = Join<[Constraint, "$rd = $rs3"], ",">.ret; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasMergeOp = 1; + let HasDummyMask = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +class VPseudoAMOWDNoMask<VReg RetClass, + VReg Op1Class> : + Pseudo<(outs GetVRegNoV0<RetClass>.R:$vd_wd), + (ins GPR:$rs1, + Op1Class:$vs2, + GetVRegNoV0<RetClass>.R:$vd, + GPR:$vl, ixlenimm:$sew), []>, + RISCVVPseudo { + let mayLoad = 1; + let mayStore = 1; + let hasSideEffects = 1; + let usesCustomInserter = 1; + let Constraints = "$vd_wd = $vd"; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasDummyMask = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +class VPseudoAMOWDMask<VReg RetClass, + VReg Op1Class> : + Pseudo<(outs GetVRegNoV0<RetClass>.R:$vd_wd), + (ins GPR:$rs1, + Op1Class:$vs2, + GetVRegNoV0<RetClass>.R:$vd, + VMaskOp:$vm, GPR:$vl, ixlenimm:$sew), []>, + RISCVVPseudo { + let mayLoad = 1; + let mayStore = 1; + let hasSideEffects = 1; + let usesCustomInserter = 1; + let Constraints = "$vd_wd = $vd"; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +multiclass VPseudoAMOEI<int eew> { + // Standard scalar AMO supports 32, 64, and 128 Mem data bits, + // and in the base vector "V" extension, only SEW up to ELEN = max(XLEN, FLEN) + // are required to be supported. + // therefore only [32, 64] is allowed here. + foreach sew = [32, 64] in { + foreach lmul = MxSet<sew>.m in { + defvar octuple_lmul = octuple_from_str<lmul.MX>.ret; + // Calculate emul = eew * lmul / sew + defvar octuple_emul = !srl(!mul(eew, octuple_lmul), shift_amount<sew>.val); + if !and(!ge(octuple_emul, 1), !le(octuple_emul, 64)) then { + defvar emulMX = octuple_to_str<octuple_emul>.ret; + defvar lmulMX = octuple_to_str<octuple_lmul>.ret; + defvar emul= !cast<LMULInfo>("V_" # emulMX); + defvar lmul = !cast<LMULInfo>("V_" # lmulMX); + let VLMul = lmul.value in { + def "_WD_" # lmulMX # "_" # emulMX : VPseudoAMOWDNoMask<lmul.vrclass, emul.vrclass>; + def "_WD_" # lmulMX # "_" # emulMX # "_MASK" : VPseudoAMOWDMask<lmul.vrclass, emul.vrclass>; + } + } + } + } +} + +multiclass VPseudoAMO { + foreach eew = EEWList in + defm "EI" # eew : VPseudoAMOEI<eew>; +} + +class VPseudoUSSegLoadNoMask<VReg RetClass, bits<11> EEW>: + Pseudo<(outs RetClass:$rd), + (ins GPR:$rs1, GPR:$vl, ixlenimm:$sew),[]>, + RISCVVPseudo, + RISCVZvlsseg<PseudoToIntrinsic<NAME, false>.Intrinsic, EEW, VLMul> { + let mayLoad = 1; + let mayStore = 0; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasDummyMask = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +class VPseudoUSSegLoadMask<VReg RetClass, bits<11> EEW>: + Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), + (ins GetVRegNoV0<RetClass>.R:$merge, GPR:$rs1, + VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>, + RISCVVPseudo, + RISCVZvlsseg<PseudoToIntrinsic<NAME, true>.Intrinsic, EEW, VLMul> { + let mayLoad = 1; + let mayStore = 0; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Constraints = "$rd = $merge"; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasMergeOp = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +class VPseudoSSegLoadNoMask<VReg RetClass, bits<11> EEW>: + Pseudo<(outs RetClass:$rd), + (ins GPR:$rs1, GPR:$offset, GPR:$vl, ixlenimm:$sew),[]>, + RISCVVPseudo, + RISCVZvlsseg<PseudoToIntrinsic<NAME, false>.Intrinsic, EEW, VLMul> { + let mayLoad = 1; + let mayStore = 0; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasDummyMask = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +class VPseudoSSegLoadMask<VReg RetClass, bits<11> EEW>: + Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), + (ins GetVRegNoV0<RetClass>.R:$merge, GPR:$rs1, + GPR:$offset, VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>, + RISCVVPseudo, + RISCVZvlsseg<PseudoToIntrinsic<NAME, true>.Intrinsic, EEW, VLMul> { + let mayLoad = 1; + let mayStore = 0; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Constraints = "$rd = $merge"; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasMergeOp = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +class VPseudoISegLoadNoMask<VReg RetClass, VReg IdxClass, bits<11> EEW, bits<3> LMUL>: + Pseudo<(outs RetClass:$rd), + (ins GPR:$rs1, IdxClass:$offset, GPR:$vl, ixlenimm:$sew),[]>, + RISCVVPseudo, + RISCVZvlsseg<PseudoToIntrinsic<NAME, false>.Intrinsic, EEW, VLMul, LMUL> { + let mayLoad = 1; + let mayStore = 0; + let hasSideEffects = 0; + let usesCustomInserter = 1; + // For vector indexed segment loads, the destination vector register groups + // cannot overlap the source vector register group + let Constraints = "@earlyclobber $rd"; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasDummyMask = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +class VPseudoISegLoadMask<VReg RetClass, VReg IdxClass, bits<11> EEW, bits<3> LMUL>: + Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), + (ins GetVRegNoV0<RetClass>.R:$merge, GPR:$rs1, + IdxClass:$offset, VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>, + RISCVVPseudo, + RISCVZvlsseg<PseudoToIntrinsic<NAME, true>.Intrinsic, EEW, VLMul, LMUL> { + let mayLoad = 1; + let mayStore = 0; + let hasSideEffects = 0; + let usesCustomInserter = 1; + // For vector indexed segment loads, the destination vector register groups + // cannot overlap the source vector register group + let Constraints = "@earlyclobber $rd, $rd = $merge"; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasMergeOp = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +class VPseudoUSSegStoreNoMask<VReg ValClass, bits<11> EEW>: + Pseudo<(outs), + (ins ValClass:$rd, GPR:$rs1, GPR:$vl, ixlenimm:$sew),[]>, + RISCVVPseudo, + RISCVZvlsseg<PseudoToIntrinsic<NAME, false>.Intrinsic, EEW, VLMul> { + let mayLoad = 0; + let mayStore = 1; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasDummyMask = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +class VPseudoUSSegStoreMask<VReg ValClass, bits<11> EEW>: + Pseudo<(outs), + (ins ValClass:$rd, GPR:$rs1, + VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>, + RISCVVPseudo, + RISCVZvlsseg<PseudoToIntrinsic<NAME, true>.Intrinsic, EEW, VLMul> { + let mayLoad = 0; + let mayStore = 1; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +class VPseudoSSegStoreNoMask<VReg ValClass, bits<11> EEW>: + Pseudo<(outs), + (ins ValClass:$rd, GPR:$rs1, GPR: $offset, GPR:$vl, ixlenimm:$sew),[]>, + RISCVVPseudo, + RISCVZvlsseg<PseudoToIntrinsic<NAME, false>.Intrinsic, EEW, VLMul> { + let mayLoad = 0; + let mayStore = 1; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasDummyMask = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +class VPseudoSSegStoreMask<VReg ValClass, bits<11> EEW>: + Pseudo<(outs), + (ins ValClass:$rd, GPR:$rs1, GPR: $offset, + VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>, + RISCVVPseudo, + RISCVZvlsseg<PseudoToIntrinsic<NAME, true>.Intrinsic, EEW, VLMul> { + let mayLoad = 0; + let mayStore = 1; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +class VPseudoISegStoreNoMask<VReg ValClass, VReg IdxClass, bits<11> EEW, bits<3> LMUL>: + Pseudo<(outs), + (ins ValClass:$rd, GPR:$rs1, IdxClass: $index, + GPR:$vl, ixlenimm:$sew),[]>, + RISCVVPseudo, + RISCVZvlsseg<PseudoToIntrinsic<NAME, false>.Intrinsic, EEW, VLMul, LMUL> { + let mayLoad = 0; + let mayStore = 1; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasDummyMask = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +class VPseudoISegStoreMask<VReg ValClass, VReg IdxClass, bits<11> EEW, bits<3> LMUL>: + Pseudo<(outs), + (ins ValClass:$rd, GPR:$rs1, IdxClass: $index, + VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>, + RISCVVPseudo, + RISCVZvlsseg<PseudoToIntrinsic<NAME, true>.Intrinsic, EEW, VLMul, LMUL> { + let mayLoad = 0; + let mayStore = 1; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +multiclass VPseudoUSLoad { + foreach lmul = MxList.m in { + defvar LInfo = lmul.MX; + defvar vreg = lmul.vrclass; + let VLMul = lmul.value in { + def "_V_" # LInfo : VPseudoUSLoadNoMask<vreg>; + def "_V_" # LInfo # "_MASK" : VPseudoUSLoadMask<vreg>; + } + } +} + +multiclass VPseudoLoadMask { + foreach mti = AllMasks in { + let VLMul = mti.LMul.value in { + def "_V_" # mti.BX : VPseudoUSLoadNoMask<VR>; + } + } +} + +multiclass VPseudoSLoad { + foreach lmul = MxList.m in { + defvar LInfo = lmul.MX; + defvar vreg = lmul.vrclass; + let VLMul = lmul.value in { + def "_V_" # LInfo : VPseudoSLoadNoMask<vreg>; + def "_V_" # LInfo # "_MASK" : VPseudoSLoadMask<vreg>; + } + } +} + +multiclass VPseudoILoad { + foreach lmul = MxList.m in + foreach idx_lmul = MxList.m in { + defvar LInfo = lmul.MX; + defvar Vreg = lmul.vrclass; + defvar IdxLInfo = idx_lmul.MX; + defvar IdxVreg = idx_lmul.vrclass; + let VLMul = lmul.value in { + def "_V_" # IdxLInfo # "_" # LInfo : VPseudoILoadNoMask<Vreg, IdxVreg>; + def "_V_" # IdxLInfo # "_" # LInfo # "_MASK" : VPseudoILoadMask<Vreg, IdxVreg>; + } + } +} + +multiclass VPseudoUSStore { + foreach lmul = MxList.m in { + defvar LInfo = lmul.MX; + defvar vreg = lmul.vrclass; + let VLMul = lmul.value in { + def "_V_" # LInfo : VPseudoUSStoreNoMask<vreg>; + def "_V_" # LInfo # "_MASK" : VPseudoUSStoreMask<vreg>; + } + } +} + +multiclass VPseudoStoreMask { + foreach mti = AllMasks in { + let VLMul = mti.LMul.value in { + def "_V_" # mti.BX : VPseudoUSStoreNoMask<VR>; + } + } +} + +multiclass VPseudoSStore { + foreach lmul = MxList.m in { + defvar LInfo = lmul.MX; + defvar vreg = lmul.vrclass; + let VLMul = lmul.value in { + def "_V_" # LInfo : VPseudoSStoreNoMask<vreg>; + def "_V_" # LInfo # "_MASK" : VPseudoSStoreMask<vreg>; + } + } +} + +multiclass VPseudoIStore { + foreach lmul = MxList.m in + foreach idx_lmul = MxList.m in { + defvar LInfo = lmul.MX; + defvar Vreg = lmul.vrclass; + defvar IdxLInfo = idx_lmul.MX; + defvar IdxVreg = idx_lmul.vrclass; + let VLMul = lmul.value in { + def "_V_" # IdxLInfo # "_" # LInfo : VPseudoIStoreNoMask<Vreg, IdxVreg>; + def "_V_" # IdxLInfo # "_" # LInfo # "_MASK" : VPseudoIStoreMask<Vreg, IdxVreg>; + } + } +} + +multiclass VPseudoUnaryS_M { + foreach mti = AllMasks in + { + let VLMul = mti.LMul.value in { + def "_M_" # mti.BX : VPseudoUnaryNoMask<GPR, VR>; + def "_M_" # mti.BX # "_MASK" : VPseudoMaskUnarySOutMask; + } + } +} + +multiclass VPseudoUnaryM_M { + defvar constraint = "@earlyclobber $rd"; + foreach mti = AllMasks in + { + let VLMul = mti.LMul.value in { + def "_M_" # mti.BX : VPseudoUnaryNoMask<VR, VR, constraint>; + def "_M_" # mti.BX # "_MASK" : VPseudoUnaryMask<VR, VR, constraint>; + } + } +} + +multiclass VPseudoMaskNullaryV { + foreach m = MxList.m in { + let VLMul = m.value in { + def "_V_" # m.MX : VPseudoNullaryNoMask<m.vrclass>; + def "_V_" # m.MX # "_MASK" : VPseudoNullaryMask<m.vrclass>; + } + } +} + +multiclass VPseudoNullaryPseudoM <string BaseInst> { + foreach mti = AllMasks in { + let VLMul = mti.LMul.value in { + def "_M_" # mti.BX : VPseudoNullaryPseudoM<BaseInst # "_MM">; + } + } +} + +multiclass VPseudoUnaryV_M { + defvar constraint = "@earlyclobber $rd"; + foreach m = MxList.m in { + let VLMul = m.value in { + def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, VR, constraint>; + def "_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, VR, constraint>; + } + } +} + +multiclass VPseudoUnaryV_V_AnyMask { + foreach m = MxList.m in { + let VLMul = m.value in + def _VM # "_" # m.MX : VPseudoUnaryAnyMask<m.vrclass, m.vrclass>; + } +} + +multiclass VPseudoBinary<VReg RetClass, + VReg Op1Class, + DAGOperand Op2Class, + LMULInfo MInfo, + string Constraint = ""> { + let VLMul = MInfo.value in { + def "_" # MInfo.MX : VPseudoBinaryNoMask<RetClass, Op1Class, Op2Class, + Constraint>; + def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMask<RetClass, Op1Class, Op2Class, + Constraint>; + } +} + +multiclass VPseudoBinaryEmul<VReg RetClass, + VReg Op1Class, + DAGOperand Op2Class, + LMULInfo lmul, + LMULInfo emul, + string Constraint = ""> { + let VLMul = lmul.value in { + def "_" # lmul.MX # "_" # emul.MX : VPseudoBinaryNoMask<RetClass, Op1Class, Op2Class, + Constraint>; + def "_" # lmul.MX # "_" # emul.MX # "_MASK" : VPseudoBinaryMask<RetClass, Op1Class, Op2Class, + Constraint>; + } +} + +multiclass VPseudoBinaryV_VV<string Constraint = ""> { + foreach m = MxList.m in + defm _VV : VPseudoBinary<m.vrclass, m.vrclass, m.vrclass, m, Constraint>; +} + +multiclass VPseudoBinaryV_VV_EEW<int eew, string Constraint = ""> { + foreach m = MxList.m in { + foreach sew = EEWList in { + defvar octuple_lmul = octuple_from_str<m.MX>.ret; + // emul = lmul * eew / sew + defvar octuple_emul = !srl(!mul(octuple_lmul, eew), shift_amount<sew>.val); + if !and(!ge(octuple_emul, 1), !le(octuple_emul, 64)) then { + defvar emulMX = octuple_to_str<octuple_emul>.ret; + defvar emul = !cast<LMULInfo>("V_" # emulMX); + defm _VV : VPseudoBinaryEmul<m.vrclass, m.vrclass, emul.vrclass, m, emul, Constraint>; + } + } + } +} + +multiclass VPseudoBinaryV_VX<string Constraint = ""> { + foreach m = MxList.m in + defm "_VX" : VPseudoBinary<m.vrclass, m.vrclass, GPR, m, Constraint>; +} + +multiclass VPseudoBinaryV_VF<string Constraint = ""> { + foreach m = MxList.m in + foreach f = FPList.fpinfo in + defm "_V" # f.FX : VPseudoBinary<m.vrclass, m.vrclass, + f.fprclass, m, Constraint>; +} + +multiclass VPseudoBinaryV_VI<Operand ImmType = simm5, string Constraint = ""> { + foreach m = MxList.m in + defm _VI : VPseudoBinary<m.vrclass, m.vrclass, ImmType, m, Constraint>; +} + +multiclass VPseudoBinaryM_MM { + foreach m = MxList.m in + let VLMul = m.value in { + def "_MM_" # m.MX : VPseudoBinaryNoMask<VR, VR, VR, "">; + } +} + +// We use earlyclobber here due to +// * The destination EEW is smaller than the source EEW and the overlap is +// in the lowest-numbered part of the source register group is legal. +// Otherwise, it is illegal. +// * The destination EEW is greater than the source EEW, the source EMUL is +// at least 1, and the overlap is in the highest-numbered part of the +// destination register group is legal. Otherwise, it is illegal. +multiclass VPseudoBinaryW_VV { + foreach m = MxList.m[0-5] in + defm _VV : VPseudoBinary<m.wvrclass, m.vrclass, m.vrclass, m, + "@earlyclobber $rd">; +} + +multiclass VPseudoBinaryW_VX { + foreach m = MxList.m[0-5] in + defm "_VX" : VPseudoBinary<m.wvrclass, m.vrclass, GPR, m, + "@earlyclobber $rd">; +} + +multiclass VPseudoBinaryW_VF { + foreach m = MxList.m[0-5] in + foreach f = FPList.fpinfo[0-1] in + defm "_V" # f.FX : VPseudoBinary<m.wvrclass, m.vrclass, + f.fprclass, m, + "@earlyclobber $rd">; +} + +multiclass VPseudoBinaryW_WV { + foreach m = MxList.m[0-5] in + defm _WV : VPseudoBinary<m.wvrclass, m.wvrclass, m.vrclass, m, + "@earlyclobber $rd">; +} + +multiclass VPseudoBinaryW_WX { + foreach m = MxList.m[0-5] in + defm "_WX" : VPseudoBinary<m.wvrclass, m.wvrclass, GPR, m, + "@earlyclobber $rd">; +} + +multiclass VPseudoBinaryW_WF { + foreach m = MxList.m[0-5] in + foreach f = FPList.fpinfo[0-1] in + defm "_W" # f.FX : VPseudoBinary<m.wvrclass, m.wvrclass, + f.fprclass, m, + "@earlyclobber $rd">; +} + +multiclass VPseudoBinaryV_WV { + foreach m = MxList.m[0-5] in + defm _WV : VPseudoBinary<m.vrclass, m.wvrclass, m.vrclass, m, + "@earlyclobber $rd">; +} + +multiclass VPseudoBinaryV_WX { + foreach m = MxList.m[0-5] in + defm _WX : VPseudoBinary<m.vrclass, m.wvrclass, GPR, m, + "@earlyclobber $rd">; +} + +multiclass VPseudoBinaryV_WI { + foreach m = MxList.m[0-5] in + defm _WI : VPseudoBinary<m.vrclass, m.wvrclass, uimm5, m, + "@earlyclobber $rd">; +} + +// For vadc and vsbc, the instruction encoding is reserved if the destination +// vector register is v0. +// For vadc and vsbc, CarryIn == 1 and CarryOut == 0 +multiclass VPseudoBinaryV_VM<bit CarryOut = 0, bit CarryIn = 1, + string Constraint = ""> { + foreach m = MxList.m in + def "_VV" # !if(CarryIn, "M", "") # "_" # m.MX : + VPseudoBinaryCarryIn<!if(CarryOut, VR, + !if(!and(CarryIn, !not(CarryOut)), + GetVRegNoV0<m.vrclass>.R, m.vrclass)), + m.vrclass, m.vrclass, m, CarryIn, Constraint>; +} + +multiclass VPseudoBinaryV_XM<bit CarryOut = 0, bit CarryIn = 1, + string Constraint = ""> { + foreach m = MxList.m in + def "_VX" # !if(CarryIn, "M", "") # "_" # m.MX : + VPseudoBinaryCarryIn<!if(CarryOut, VR, + !if(!and(CarryIn, !not(CarryOut)), + GetVRegNoV0<m.vrclass>.R, m.vrclass)), + m.vrclass, GPR, m, CarryIn, Constraint>; +} + +multiclass VPseudoBinaryV_FM { + foreach m = MxList.m in + foreach f = FPList.fpinfo in + def "_V" # f.FX # "M_" # m.MX : + VPseudoBinaryCarryIn<GetVRegNoV0<m.vrclass>.R, + m.vrclass, f.fprclass, m, /*CarryIn=*/1, "">; +} + +multiclass VPseudoBinaryV_IM<bit CarryOut = 0, bit CarryIn = 1, + string Constraint = ""> { + foreach m = MxList.m in + def "_VI" # !if(CarryIn, "M", "") # "_" # m.MX : + VPseudoBinaryCarryIn<!if(CarryOut, VR, + !if(!and(CarryIn, !not(CarryOut)), + GetVRegNoV0<m.vrclass>.R, m.vrclass)), + m.vrclass, simm5, m, CarryIn, Constraint>; +} + +multiclass VPseudoUnaryV_V_X_I_NoDummyMask { + foreach m = MxList.m in { + let VLMul = m.value in { + def "_V_" # m.MX : VPseudoUnaryNoDummyMask<m.vrclass, m.vrclass>; + def "_X_" # m.MX : VPseudoUnaryNoDummyMask<m.vrclass, GPR>; + def "_I_" # m.MX : VPseudoUnaryNoDummyMask<m.vrclass, simm5>; + } + } +} + +multiclass VPseudoUnaryV_F_NoDummyMask { + foreach m = MxList.m in { + foreach f = FPList.fpinfo in { + let VLMul = m.value in { + def "_" # f.FX # "_" # m.MX : VPseudoUnaryNoDummyMask<m.vrclass, f.fprclass>; + } + } + } +} + +multiclass VPseudoUnaryV_V { + foreach m = MxList.m in { + let VLMul = m.value in { + def "_V_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.vrclass>; + def "_V_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, m.vrclass>; + } + } +} + +multiclass PseudoUnaryV_VF2 { + defvar constraints = "@earlyclobber $rd"; + foreach m = MxList.m[1-6] in + { + let VLMul = m.value in { + def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.f2vrclass, constraints>; + def "_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, m.f2vrclass, + constraints>; + } + } +} + +multiclass PseudoUnaryV_VF4 { + defvar constraints = "@earlyclobber $rd"; + foreach m = MxList.m[2-6] in + { + let VLMul = m.value in { + def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.f4vrclass, constraints>; + def "_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, m.f4vrclass, + constraints>; + } + } +} + +multiclass PseudoUnaryV_VF8 { + defvar constraints = "@earlyclobber $rd"; + foreach m = MxList.m[3-6] in + { + let VLMul = m.value in { + def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.f8vrclass, constraints>; + def "_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, m.f8vrclass, + constraints>; + } + } +} + +// The destination EEW is 1. +// The source EEW is 8, 16, 32, or 64. +// When the destination EEW is different from source EEW, we need to use +// @earlyclobber to avoid the overlap between destination and source registers. +multiclass VPseudoBinaryM_VV { + foreach m = MxList.m in + defm _VV : VPseudoBinary<VR, m.vrclass, m.vrclass, m, "@earlyclobber $rd">; +} + +multiclass VPseudoBinaryM_VX { + foreach m = MxList.m in + defm "_VX" : + VPseudoBinary<VR, m.vrclass, GPR, m, "@earlyclobber $rd">; +} + +multiclass VPseudoBinaryM_VF { + foreach m = MxList.m in + foreach f = FPList.fpinfo in + defm "_V" # f.FX : + VPseudoBinary<VR, m.vrclass, f.fprclass, m, "@earlyclobber $rd">; +} + +multiclass VPseudoBinaryM_VI { + foreach m = MxList.m in + defm _VI : VPseudoBinary<VR, m.vrclass, simm5, m, "@earlyclobber $rd">; +} + +multiclass VPseudoBinaryV_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> { + defm "" : VPseudoBinaryV_VV<Constraint>; + defm "" : VPseudoBinaryV_VX<Constraint>; + defm "" : VPseudoBinaryV_VI<ImmType, Constraint>; +} + +multiclass VPseudoBinaryV_VV_VX { + defm "" : VPseudoBinaryV_VV; + defm "" : VPseudoBinaryV_VX; +} + +multiclass VPseudoBinaryV_VV_VF { + defm "" : VPseudoBinaryV_VV; + defm "" : VPseudoBinaryV_VF; +} + +multiclass VPseudoBinaryV_VX_VI<Operand ImmType = simm5> { + defm "" : VPseudoBinaryV_VX; + defm "" : VPseudoBinaryV_VI<ImmType>; +} + +multiclass VPseudoBinaryW_VV_VX { + defm "" : VPseudoBinaryW_VV; + defm "" : VPseudoBinaryW_VX; +} + +multiclass VPseudoBinaryW_VV_VF { + defm "" : VPseudoBinaryW_VV; + defm "" : VPseudoBinaryW_VF; +} + +multiclass VPseudoBinaryW_WV_WX { + defm "" : VPseudoBinaryW_WV; + defm "" : VPseudoBinaryW_WX; +} + +multiclass VPseudoBinaryW_WV_WF { + defm "" : VPseudoBinaryW_WV; + defm "" : VPseudoBinaryW_WF; +} + +multiclass VPseudoBinaryV_VM_XM_IM { + defm "" : VPseudoBinaryV_VM; + defm "" : VPseudoBinaryV_XM; + defm "" : VPseudoBinaryV_IM; +} + +multiclass VPseudoBinaryV_VM_XM { + defm "" : VPseudoBinaryV_VM; + defm "" : VPseudoBinaryV_XM; +} + +multiclass VPseudoBinaryM_VM_XM_IM<string Constraint> { + defm "" : VPseudoBinaryV_VM</*CarryOut=*/1, /*CarryIn=*/1, Constraint>; + defm "" : VPseudoBinaryV_XM</*CarryOut=*/1, /*CarryIn=*/1, Constraint>; + defm "" : VPseudoBinaryV_IM</*CarryOut=*/1, /*CarryIn=*/1, Constraint>; +} + +multiclass VPseudoBinaryM_VM_XM<string Constraint> { + defm "" : VPseudoBinaryV_VM</*CarryOut=*/1, /*CarryIn=*/1, Constraint>; + defm "" : VPseudoBinaryV_XM</*CarryOut=*/1, /*CarryIn=*/1, Constraint>; +} + +multiclass VPseudoBinaryM_V_X_I<string Constraint> { + defm "" : VPseudoBinaryV_VM</*CarryOut=*/1, /*CarryIn=*/0, Constraint>; + defm "" : VPseudoBinaryV_XM</*CarryOut=*/1, /*CarryIn=*/0, Constraint>; + defm "" : VPseudoBinaryV_IM</*CarryOut=*/1, /*CarryIn=*/0, Constraint>; +} + +multiclass VPseudoBinaryM_V_X<string Constraint> { + defm "" : VPseudoBinaryV_VM</*CarryOut=*/1, /*CarryIn=*/0, Constraint>; + defm "" : VPseudoBinaryV_XM</*CarryOut=*/1, /*CarryIn=*/0, Constraint>; +} + +multiclass VPseudoBinaryV_WV_WX_WI { + defm "" : VPseudoBinaryV_WV; + defm "" : VPseudoBinaryV_WX; + defm "" : VPseudoBinaryV_WI; +} + +multiclass VPseudoTernary<VReg RetClass, + VReg Op1Class, + RegisterClass Op2Class, + LMULInfo MInfo, + string Constraint = ""> { + let VLMul = MInfo.value in { + def "_" # MInfo.MX : VPseudoTernaryNoMask<RetClass, Op1Class, Op2Class, Constraint>; + def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMask<RetClass, Op1Class, Op2Class, Constraint>; + } +} + +multiclass VPseudoTernaryV_VV<string Constraint = ""> { + foreach m = MxList.m in + defm _VV : VPseudoTernary<m.vrclass, m.vrclass, m.vrclass, m, Constraint>; +} + +multiclass VPseudoTernaryV_VX<string Constraint = ""> { + foreach m = MxList.m in + defm _VX : VPseudoTernary<m.vrclass, m.vrclass, GPR, m, Constraint>; +} + +multiclass VPseudoTernaryV_VX_AAXA<string Constraint = ""> { + foreach m = MxList.m in + defm "_VX" : VPseudoTernary<m.vrclass, GPR, m.vrclass, m, Constraint>; +} + +multiclass VPseudoTernaryV_VF_AAXA<string Constraint = ""> { + foreach m = MxList.m in + foreach f = FPList.fpinfo in + defm "_V" # f.FX : VPseudoTernary<m.vrclass, f.fprclass, m.vrclass, + m, Constraint>; +} + +multiclass VPseudoTernaryW_VV { + defvar constraint = "@earlyclobber $rd"; + foreach m = MxList.m[0-5] in + defm _VV : VPseudoTernary<m.wvrclass, m.vrclass, m.vrclass, m, constraint>; +} + +multiclass VPseudoTernaryW_VX { + defvar constraint = "@earlyclobber $rd"; + foreach m = MxList.m[0-5] in + defm "_VX" : VPseudoTernary<m.wvrclass, GPR, m.vrclass, m, constraint>; +} + +multiclass VPseudoTernaryW_VF { + defvar constraint = "@earlyclobber $rd"; + foreach m = MxList.m[0-5] in + foreach f = FPList.fpinfo[0-1] in + defm "_V" # f.FX : VPseudoTernary<m.wvrclass, f.fprclass, m.vrclass, m, + constraint>; +} + +multiclass VPseudoTernaryV_VI<Operand ImmType = simm5, string Constraint = ""> { + foreach m = MxList.m in + defm _VI : VPseudoTernary<m.vrclass, m.vrclass, ImmType, m, Constraint>; +} + +multiclass VPseudoTernaryV_VV_VX_AAXA<string Constraint = ""> { + defm "" : VPseudoTernaryV_VV<Constraint>; + defm "" : VPseudoTernaryV_VX_AAXA<Constraint>; +} + +multiclass VPseudoTernaryV_VV_VF_AAXA<string Constraint = ""> { + defm "" : VPseudoTernaryV_VV<Constraint>; + defm "" : VPseudoTernaryV_VF_AAXA<Constraint>; +} + +multiclass VPseudoTernaryV_VX_VI<Operand ImmType = simm5, string Constraint = ""> { + defm "" : VPseudoTernaryV_VX<Constraint>; + defm "" : VPseudoTernaryV_VI<ImmType, Constraint>; +} + +multiclass VPseudoTernaryW_VV_VX { + defm "" : VPseudoTernaryW_VV; + defm "" : VPseudoTernaryW_VX; +} + +multiclass VPseudoTernaryW_VV_VF { + defm "" : VPseudoTernaryW_VV; + defm "" : VPseudoTernaryW_VF; +} + +multiclass VPseudoBinaryM_VV_VX_VI { + defm "" : VPseudoBinaryM_VV; + defm "" : VPseudoBinaryM_VX; + defm "" : VPseudoBinaryM_VI; +} + +multiclass VPseudoBinaryM_VV_VX { + defm "" : VPseudoBinaryM_VV; + defm "" : VPseudoBinaryM_VX; +} + +multiclass VPseudoBinaryM_VV_VF { + defm "" : VPseudoBinaryM_VV; + defm "" : VPseudoBinaryM_VF; +} + +multiclass VPseudoBinaryM_VX_VI { + defm "" : VPseudoBinaryM_VX; + defm "" : VPseudoBinaryM_VI; +} + +multiclass VPseudoReductionV_VS { + foreach m = MxList.m in { + let WritesElement0 = 1 in + defm _VS : VPseudoTernary<V_M1.vrclass, m.vrclass, V_M1.vrclass, m>; + } +} + +multiclass VPseudoConversion<VReg RetClass, + VReg Op1Class, + LMULInfo MInfo, + string Constraint = ""> { + let VLMul = MInfo.value in { + def "_" # MInfo.MX : VPseudoUnaryNoMask<RetClass, Op1Class, Constraint>; + def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMask<RetClass, Op1Class, + Constraint>; + } +} + +multiclass VPseudoConversionV_V { + foreach m = MxList.m in + defm _V : VPseudoConversion<m.vrclass, m.vrclass, m>; +} + +multiclass VPseudoConversionW_V { + defvar constraint = "@earlyclobber $rd"; + foreach m = MxList.m[0-5] in + defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint>; +} + +multiclass VPseudoConversionV_W { + defvar constraint = "@earlyclobber $rd"; + foreach m = MxList.m[0-5] in + defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint>; +} + +multiclass VPseudoUSSegLoad<bit isFF> { + foreach eew = EEWList in { + foreach lmul = MxSet<eew>.m in { + defvar LInfo = lmul.MX; + let VLMul = lmul.value in { + foreach nf = NFSet<lmul>.L in { + defvar vreg = SegRegClass<lmul, nf>.RC; + defvar FFStr = !if(isFF, "FF", ""); + def nf # "E" # eew # FFStr # "_V_" # LInfo : + VPseudoUSSegLoadNoMask<vreg, eew>; + def nf # "E" # eew # FFStr # "_V_" # LInfo # "_MASK" : + VPseudoUSSegLoadMask<vreg, eew>; + } + } + } + } +} + +multiclass VPseudoSSegLoad { + foreach eew = EEWList in { + foreach lmul = MxSet<eew>.m in { + defvar LInfo = lmul.MX; + let VLMul = lmul.value in { + foreach nf = NFSet<lmul>.L in { + defvar vreg = SegRegClass<lmul, nf>.RC; + def nf # "E" # eew # "_V_" # LInfo : VPseudoSSegLoadNoMask<vreg, eew>; + def nf # "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoSSegLoadMask<vreg, eew>; + } + } + } + } +} + +multiclass VPseudoISegLoad { + foreach idx_eew = EEWList in { // EEW for index argument. + foreach idx_lmul = MxSet<idx_eew>.m in { // LMUL for index argument. + foreach val_lmul = MxList.m in { // LMUL for the value. + defvar IdxLInfo = idx_lmul.MX; + defvar IdxVreg = idx_lmul.vrclass; + defvar ValLInfo = val_lmul.MX; + let VLMul = val_lmul.value in { + foreach nf = NFSet<val_lmul>.L in { + defvar ValVreg = SegRegClass<val_lmul, nf>.RC; + def nf # "EI" # idx_eew # "_V_" # IdxLInfo # "_" # ValLInfo : + VPseudoISegLoadNoMask<ValVreg, IdxVreg, idx_eew, idx_lmul.value>; + def nf # "EI" # idx_eew # "_V_" # IdxLInfo # "_" # ValLInfo # "_MASK" : + VPseudoISegLoadMask<ValVreg, IdxVreg, idx_eew, idx_lmul.value>; + } + } + } + } + } +} + +multiclass VPseudoUSSegStore { + foreach eew = EEWList in { + foreach lmul = MxSet<eew>.m in { + defvar LInfo = lmul.MX; + let VLMul = lmul.value in { + foreach nf = NFSet<lmul>.L in { + defvar vreg = SegRegClass<lmul, nf>.RC; + def nf # "E" # eew # "_V_" # LInfo : VPseudoUSSegStoreNoMask<vreg, eew>; + def nf # "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoUSSegStoreMask<vreg, eew>; + } + } + } + } +} + +multiclass VPseudoSSegStore { + foreach eew = EEWList in { + foreach lmul = MxSet<eew>.m in { + defvar LInfo = lmul.MX; + let VLMul = lmul.value in { + foreach nf = NFSet<lmul>.L in { + defvar vreg = SegRegClass<lmul, nf>.RC; + def nf # "E" # eew # "_V_" # LInfo : VPseudoSSegStoreNoMask<vreg, eew>; + def nf # "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoSSegStoreMask<vreg, eew>; + } + } + } + } +} + +multiclass VPseudoISegStore { + foreach idx_eew = EEWList in { // EEW for index argument. + foreach idx_lmul = MxSet<idx_eew>.m in { // LMUL for index argument. + foreach val_lmul = MxList.m in { // LMUL for the value. + defvar IdxLInfo = idx_lmul.MX; + defvar IdxVreg = idx_lmul.vrclass; + defvar ValLInfo = val_lmul.MX; + let VLMul = val_lmul.value in { + foreach nf = NFSet<val_lmul>.L in { + defvar ValVreg = SegRegClass<val_lmul, nf>.RC; + def nf # "EI" # idx_eew # "_V_" # IdxLInfo # "_" # ValLInfo : + VPseudoISegStoreNoMask<ValVreg, IdxVreg, idx_eew, idx_lmul.value>; + def nf # "EI" # idx_eew # "_V_" # IdxLInfo # "_" # ValLInfo # "_MASK" : + VPseudoISegStoreMask<ValVreg, IdxVreg, idx_eew, idx_lmul.value>; + } + } + } + } + } +} + +//===----------------------------------------------------------------------===// +// Helpers to define the intrinsic patterns. +//===----------------------------------------------------------------------===// + +class VPatUnaryNoMask<string intrinsic_name, + string inst, + string kind, + ValueType result_type, + ValueType op2_type, + int sew, + LMULInfo vlmul, + VReg op2_reg_class> : + Pat<(result_type (!cast<Intrinsic>(intrinsic_name) + (op2_type op2_reg_class:$rs2), + (XLenVT (VLOp GPR:$vl)))), + (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX) + (op2_type op2_reg_class:$rs2), + GPR:$vl, sew)>; + +class VPatUnaryMask<string intrinsic_name, + string inst, + string kind, + ValueType result_type, + ValueType op2_type, + ValueType mask_type, + int sew, + LMULInfo vlmul, + VReg result_reg_class, + VReg op2_reg_class> : + Pat<(result_type (!cast<Intrinsic>(intrinsic_name#"_mask") + (result_type result_reg_class:$merge), + (op2_type op2_reg_class:$rs2), + (mask_type V0), + (XLenVT (VLOp GPR:$vl)))), + (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX#"_MASK") + (result_type result_reg_class:$merge), + (op2_type op2_reg_class:$rs2), + (mask_type V0), GPR:$vl, sew)>; + +class VPatMaskUnaryNoMask<string intrinsic_name, + string inst, + MTypeInfo mti> : + Pat<(mti.Mask (!cast<Intrinsic>(intrinsic_name) + (mti.Mask VR:$rs2), + (XLenVT (VLOp GPR:$vl)))), + (!cast<Instruction>(inst#"_M_"#mti.BX) + (mti.Mask VR:$rs2), + GPR:$vl, mti.SEW)>; + +class VPatMaskUnaryMask<string intrinsic_name, + string inst, + MTypeInfo mti> : + Pat<(mti.Mask (!cast<Intrinsic>(intrinsic_name#"_mask") + (mti.Mask VR:$merge), + (mti.Mask VR:$rs2), + (mti.Mask V0), + (XLenVT (VLOp GPR:$vl)))), + (!cast<Instruction>(inst#"_M_"#mti.BX#"_MASK") + (mti.Mask VR:$merge), + (mti.Mask VR:$rs2), + (mti.Mask V0), GPR:$vl, mti.SEW)>; + +class VPatUnaryAnyMask<string intrinsic, + string inst, + string kind, + ValueType result_type, + ValueType op1_type, + ValueType mask_type, + int sew, + LMULInfo vlmul, + VReg result_reg_class, + VReg op1_reg_class> : + Pat<(result_type (!cast<Intrinsic>(intrinsic) + (result_type result_reg_class:$merge), + (op1_type op1_reg_class:$rs1), + (mask_type VR:$rs2), + (XLenVT (VLOp GPR:$vl)))), + (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX) + (result_type result_reg_class:$merge), + (op1_type op1_reg_class:$rs1), + (mask_type VR:$rs2), + GPR:$vl, sew)>; + +class VPatBinaryNoMask<string intrinsic_name, + string inst, + ValueType result_type, + ValueType op1_type, + ValueType op2_type, + int sew, + VReg op1_reg_class, + DAGOperand op2_kind> : + Pat<(result_type (!cast<Intrinsic>(intrinsic_name) + (op1_type op1_reg_class:$rs1), + (op2_type op2_kind:$rs2), + (XLenVT (VLOp GPR:$vl)))), + (!cast<Instruction>(inst) + (op1_type op1_reg_class:$rs1), + (op2_type op2_kind:$rs2), + GPR:$vl, sew)>; + +class VPatBinaryMask<string intrinsic_name, + string inst, + ValueType result_type, + ValueType op1_type, + ValueType op2_type, + ValueType mask_type, + int sew, + VReg result_reg_class, + VReg op1_reg_class, + DAGOperand op2_kind> : + Pat<(result_type (!cast<Intrinsic>(intrinsic_name#"_mask") + (result_type result_reg_class:$merge), + (op1_type op1_reg_class:$rs1), + (op2_type op2_kind:$rs2), + (mask_type V0), + (XLenVT (VLOp GPR:$vl)))), + (!cast<Instruction>(inst#"_MASK") + (result_type result_reg_class:$merge), + (op1_type op1_reg_class:$rs1), + (op2_type op2_kind:$rs2), + (mask_type V0), GPR:$vl, sew)>; + +class VPatTernaryNoMask<string intrinsic, + string inst, + string kind, + ValueType result_type, + ValueType op1_type, + ValueType op2_type, + ValueType mask_type, + int sew, + LMULInfo vlmul, + VReg result_reg_class, + RegisterClass op1_reg_class, + DAGOperand op2_kind> : + Pat<(result_type (!cast<Intrinsic>(intrinsic) + (result_type result_reg_class:$rs3), + (op1_type op1_reg_class:$rs1), + (op2_type op2_kind:$rs2), + (XLenVT (VLOp GPR:$vl)))), + (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX) + result_reg_class:$rs3, + (op1_type op1_reg_class:$rs1), + op2_kind:$rs2, + GPR:$vl, sew)>; + +class VPatTernaryMask<string intrinsic, + string inst, + string kind, + ValueType result_type, + ValueType op1_type, + ValueType op2_type, + ValueType mask_type, + int sew, + LMULInfo vlmul, + VReg result_reg_class, + RegisterClass op1_reg_class, + DAGOperand op2_kind> : + Pat<(result_type (!cast<Intrinsic>(intrinsic#"_mask") + (result_type result_reg_class:$rs3), + (op1_type op1_reg_class:$rs1), + (op2_type op2_kind:$rs2), + (mask_type V0), + (XLenVT (VLOp GPR:$vl)))), + (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX # "_MASK") + result_reg_class:$rs3, + (op1_type op1_reg_class:$rs1), + op2_kind:$rs2, + (mask_type V0), + GPR:$vl, sew)>; + +class VPatAMOWDNoMask<string intrinsic_name, + string inst, + ValueType result_type, + ValueType op1_type, + int sew, + LMULInfo vlmul, + LMULInfo emul, + VReg op1_reg_class> : + Pat<(result_type (!cast<Intrinsic>(intrinsic_name) + GPR:$rs1, + (op1_type op1_reg_class:$vs2), + (result_type vlmul.vrclass:$vd), + (XLenVT (VLOp GPR:$vl)))), + (!cast<Instruction>(inst # "_WD_" # vlmul.MX # "_" # emul.MX) + $rs1, $vs2, $vd, + GPR:$vl, sew)>; + +class VPatAMOWDMask<string intrinsic_name, + string inst, + ValueType result_type, + ValueType op1_type, + ValueType mask_type, + int sew, + LMULInfo vlmul, + LMULInfo emul, + VReg op1_reg_class> : + Pat<(result_type (!cast<Intrinsic>(intrinsic_name # "_mask") + GPR:$rs1, + (op1_type op1_reg_class:$vs2), + (result_type vlmul.vrclass:$vd), + (mask_type V0), + (XLenVT (VLOp GPR:$vl)))), + (!cast<Instruction>(inst # "_WD_" # vlmul.MX # "_" # emul.MX # "_MASK") + $rs1, $vs2, $vd, + (mask_type V0), GPR:$vl, sew)>; + +multiclass VPatUSLoad<string intrinsic, + string inst, + LLVMType type, + LLVMType mask_type, + int sew, + LMULInfo vlmul, + VReg reg_class> +{ + defvar Intr = !cast<Intrinsic>(intrinsic); + defvar Pseudo = !cast<Instruction>(inst#"_V_"#vlmul.MX); + def : Pat<(type (Intr GPR:$rs1, (XLenVT (VLOp GPR:$vl)))), + (Pseudo $rs1, GPR:$vl, sew)>; + defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask"); + defvar PseudoMask = !cast<Instruction>(inst#"_V_"#vlmul.MX#"_MASK"); + def : Pat<(type (IntrMask (type GetVRegNoV0<reg_class>.R:$merge), + GPR:$rs1, (mask_type V0), (XLenVT (VLOp GPR:$vl)))), + (PseudoMask $merge, + $rs1, (mask_type V0), GPR:$vl, sew)>; +} + +multiclass VPatUSLoadFF<string inst, + LLVMType type, + LLVMType mask_type, + int sew, + LMULInfo vlmul, + VReg reg_class> +{ + defvar Pseudo = !cast<Instruction>(inst#"_V_"#vlmul.MX); + def : Pat<(type (riscv_vleff GPR:$rs1, (XLenVT (VLOp GPR:$vl)))), + (Pseudo $rs1, GPR:$vl, sew)>; + defvar PseudoMask = !cast<Instruction>(inst#"_V_"#vlmul.MX#"_MASK"); + def : Pat<(type (riscv_vleff_mask (type GetVRegNoV0<reg_class>.R:$merge), + GPR:$rs1, (mask_type V0), (XLenVT (VLOp GPR:$vl)))), + (PseudoMask $merge, + $rs1, (mask_type V0), GPR:$vl, sew)>; +} + +multiclass VPatSLoad<string intrinsic, + string inst, + LLVMType type, + LLVMType mask_type, + int sew, + LMULInfo vlmul, + VReg reg_class> +{ + defvar Intr = !cast<Intrinsic>(intrinsic); + defvar Pseudo = !cast<Instruction>(inst#"_V_"#vlmul.MX); + def : Pat<(type (Intr GPR:$rs1, GPR:$rs2, (XLenVT (VLOp GPR:$vl)))), + (Pseudo $rs1, $rs2, GPR:$vl, sew)>; + defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask"); + defvar PseudoMask = !cast<Instruction>(inst#"_V_"#vlmul.MX#"_MASK"); + def : Pat<(type (IntrMask (type GetVRegNoV0<reg_class>.R:$merge), + GPR:$rs1, GPR:$rs2, (mask_type V0), (XLenVT (VLOp GPR:$vl)))), + (PseudoMask $merge, + $rs1, $rs2, (mask_type V0), GPR:$vl, sew)>; +} + +multiclass VPatILoad<string intrinsic, + string inst, + LLVMType type, + LLVMType idx_type, + LLVMType mask_type, + int sew, + LMULInfo vlmul, + LMULInfo idx_vlmul, + VReg reg_class, + VReg idx_reg_class> +{ + defvar Intr = !cast<Intrinsic>(intrinsic); + defvar Pseudo = !cast<Instruction>(inst#"_V_"#idx_vlmul.MX#"_"#vlmul.MX); + def : Pat<(type (Intr GPR:$rs1, (idx_type idx_reg_class:$rs2), (XLenVT (VLOp GPR:$vl)))), + (Pseudo $rs1, $rs2, GPR:$vl, sew)>; + + defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask"); + defvar PseudoMask = !cast<Instruction>(inst#"_V_"#idx_vlmul.MX#"_"#vlmul.MX#"_MASK"); + def : Pat<(type (IntrMask (type GetVRegNoV0<reg_class>.R:$merge), + GPR:$rs1, (idx_type idx_reg_class:$rs2), + (mask_type V0), (XLenVT (VLOp GPR:$vl)))), + (PseudoMask $merge, + $rs1, $rs2, (mask_type V0), GPR:$vl, sew)>; +} + +multiclass VPatUSStore<string intrinsic, + string inst, + LLVMType type, + LLVMType mask_type, + int sew, + LMULInfo vlmul, + VReg reg_class> +{ + defvar Intr = !cast<Intrinsic>(intrinsic); + defvar Pseudo = !cast<Instruction>(inst#"_V_"#vlmul.MX); + def : Pat<(Intr (type reg_class:$rs3), GPR:$rs1, (XLenVT (VLOp GPR:$vl))), + (Pseudo $rs3, $rs1, GPR:$vl, sew)>; + defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask"); + defvar PseudoMask = !cast<Instruction>(inst#"_V_"#vlmul.MX#"_MASK"); + def : Pat<(IntrMask (type reg_class:$rs3), GPR:$rs1, (mask_type V0), (XLenVT (VLOp GPR:$vl))), + (PseudoMask $rs3, $rs1, (mask_type V0), GPR:$vl, sew)>; +} + +multiclass VPatSStore<string intrinsic, + string inst, + LLVMType type, + LLVMType mask_type, + int sew, + LMULInfo vlmul, + VReg reg_class> +{ + defvar Intr = !cast<Intrinsic>(intrinsic); + defvar Pseudo = !cast<Instruction>(inst#"_V_"#vlmul.MX); + def : Pat<(Intr (type reg_class:$rs3), GPR:$rs1, GPR:$rs2, (XLenVT (VLOp GPR:$vl))), + (Pseudo $rs3, $rs1, $rs2, GPR:$vl, sew)>; + defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask"); + defvar PseudoMask = !cast<Instruction>(inst#"_V_"#vlmul.MX#"_MASK"); + def : Pat<(IntrMask (type reg_class:$rs3), GPR:$rs1, GPR:$rs2, (mask_type V0), (XLenVT (VLOp GPR:$vl))), + (PseudoMask $rs3, $rs1, $rs2, (mask_type V0), GPR:$vl, sew)>; +} + +multiclass VPatIStore<string intrinsic, + string inst, + LLVMType type, + LLVMType idx_type, + LLVMType mask_type, + int sew, + LMULInfo vlmul, + LMULInfo idx_vlmul, + VReg reg_class, + VReg idx_reg_class> +{ + defvar Intr = !cast<Intrinsic>(intrinsic); + defvar Pseudo = !cast<Instruction>(inst#"_V_"#idx_vlmul.MX#"_"#vlmul.MX); + def : Pat<(Intr (type reg_class:$rs3), GPR:$rs1, + (idx_type idx_reg_class:$rs2), (XLenVT (VLOp GPR:$vl))), + (Pseudo $rs3, $rs1, $rs2, GPR:$vl, sew)>; + defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask"); + defvar PseudoMask = !cast<Instruction>(inst#"_V_"#idx_vlmul.MX#"_"#vlmul.MX#"_MASK"); + def : Pat<(IntrMask (type reg_class:$rs3), GPR:$rs1, + (idx_type idx_reg_class:$rs2), (mask_type V0), (XLenVT (VLOp GPR:$vl))), + (PseudoMask $rs3, $rs1, $rs2, (mask_type V0), GPR:$vl, sew)>; +} + +multiclass VPatUnaryS_M<string intrinsic_name, + string inst> +{ + foreach mti = AllMasks in { + def : Pat<(XLenVT (!cast<Intrinsic>(intrinsic_name) + (mti.Mask VR:$rs1), (XLenVT (VLOp GPR:$vl)))), + (!cast<Instruction>(inst#"_M_"#mti.BX) $rs1, + GPR:$vl, mti.SEW)>; + def : Pat<(XLenVT (!cast<Intrinsic>(intrinsic_name # "_mask") + (mti.Mask VR:$rs1), (mti.Mask V0), (XLenVT (VLOp GPR:$vl)))), + (!cast<Instruction>(inst#"_M_"#mti.BX#"_MASK") $rs1, + (mti.Mask V0), GPR:$vl, mti.SEW)>; + } +} + +multiclass VPatUnaryV_V_AnyMask<string intrinsic, string instruction, + list<VTypeInfo> vtilist> { + foreach vti = vtilist in { + def : VPatUnaryAnyMask<intrinsic, instruction, "VM", + vti.Vector, vti.Vector, vti.Mask, + vti.SEW, vti.LMul, vti.RegClass, + vti.RegClass>; + } +} + +multiclass VPatUnaryM_M<string intrinsic, + string inst> +{ + foreach mti = AllMasks in { + def : VPatMaskUnaryNoMask<intrinsic, inst, mti>; + def : VPatMaskUnaryMask<intrinsic, inst, mti>; + } +} + +multiclass VPatUnaryV_M<string intrinsic, string instruction> +{ + foreach vti = AllIntegerVectors in { + def : VPatUnaryNoMask<intrinsic, instruction, "M", vti.Vector, vti.Mask, + vti.SEW, vti.LMul, VR>; + def : VPatUnaryMask<intrinsic, instruction, "M", vti.Vector, vti.Mask, + vti.Mask, vti.SEW, vti.LMul, vti.RegClass, VR>; + } +} + +multiclass VPatUnaryV_VF<string intrinsic, string instruction, string suffix, + list<VTypeInfoToFraction> fractionList> +{ + foreach vtiTofti = fractionList in + { + defvar vti = vtiTofti.Vti; + defvar fti = vtiTofti.Fti; + def : VPatUnaryNoMask<intrinsic, instruction, suffix, + vti.Vector, fti.Vector, + vti.SEW, vti.LMul, fti.RegClass>; + def : VPatUnaryMask<intrinsic, instruction, suffix, + vti.Vector, fti.Vector, vti.Mask, + vti.SEW, vti.LMul, vti.RegClass, fti.RegClass>; + } +} + +multiclass VPatUnaryV_V<string intrinsic, string instruction, + list<VTypeInfo> vtilist> { + foreach vti = vtilist in { + def : VPatUnaryNoMask<intrinsic, instruction, "V", + vti.Vector, vti.Vector, + vti.SEW, vti.LMul, vti.RegClass>; + def : VPatUnaryMask<intrinsic, instruction, "V", + vti.Vector, vti.Vector, vti.Mask, + vti.SEW, vti.LMul, vti.RegClass, vti.RegClass>; + } +} + +multiclass VPatNullaryV<string intrinsic, string instruction> +{ + foreach vti = AllIntegerVectors in { + def : Pat<(vti.Vector (!cast<Intrinsic>(intrinsic) + (XLenVT (VLOp GPR:$vl)))), + (!cast<Instruction>(instruction#"_V_" # vti.LMul.MX) + GPR:$vl, vti.SEW)>; + def : Pat<(vti.Vector (!cast<Intrinsic>(intrinsic # "_mask") + (vti.Vector vti.RegClass:$merge), + (vti.Mask V0), (XLenVT (VLOp GPR:$vl)))), + (!cast<Instruction>(instruction#"_V_" # vti.LMul.MX # "_MASK") + vti.RegClass:$merge, (vti.Mask V0), + GPR:$vl, vti.SEW)>; + } +} + +multiclass VPatNullaryM<string intrinsic, string inst> { + foreach mti = AllMasks in + def : Pat<(mti.Mask (!cast<Intrinsic>(intrinsic) + (XLenVT (VLOp GPR:$vl)))), + (!cast<Instruction>(inst#"_M_"#mti.BX) + GPR:$vl, mti.SEW)>; +} + +multiclass VPatBinary<string intrinsic, + string inst, + ValueType result_type, + ValueType op1_type, + ValueType op2_type, + ValueType mask_type, + int sew, + VReg result_reg_class, + VReg op1_reg_class, + DAGOperand op2_kind> +{ + def : VPatBinaryNoMask<intrinsic, inst, result_type, op1_type, op2_type, + sew, op1_reg_class, op2_kind>; + def : VPatBinaryMask<intrinsic, inst, result_type, op1_type, op2_type, + mask_type, sew, result_reg_class, op1_reg_class, + op2_kind>; +} + +multiclass VPatBinaryCarryIn<string intrinsic, + string inst, + string kind, + ValueType result_type, + ValueType op1_type, + ValueType op2_type, + ValueType mask_type, + int sew, + LMULInfo vlmul, + VReg op1_reg_class, + DAGOperand op2_kind> +{ + def : Pat<(result_type (!cast<Intrinsic>(intrinsic) + (op1_type op1_reg_class:$rs1), + (op2_type op2_kind:$rs2), + (mask_type V0), + (XLenVT (VLOp GPR:$vl)))), + (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX) + (op1_type op1_reg_class:$rs1), + (op2_type op2_kind:$rs2), + (mask_type V0), GPR:$vl, sew)>; +} + +multiclass VPatBinaryMaskOut<string intrinsic, + string inst, + string kind, + ValueType result_type, + ValueType op1_type, + ValueType op2_type, + int sew, + LMULInfo vlmul, + VReg op1_reg_class, + DAGOperand op2_kind> +{ + def : Pat<(result_type (!cast<Intrinsic>(intrinsic) + (op1_type op1_reg_class:$rs1), + (op2_type op2_kind:$rs2), + (XLenVT (VLOp GPR:$vl)))), + (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX) + (op1_type op1_reg_class:$rs1), + (op2_type op2_kind:$rs2), + GPR:$vl, sew)>; +} + +multiclass VPatConversion<string intrinsic, + string inst, + string kind, + ValueType result_type, + ValueType op1_type, + ValueType mask_type, + int sew, + LMULInfo vlmul, + VReg result_reg_class, + VReg op1_reg_class> +{ + def : VPatUnaryNoMask<intrinsic, inst, kind, result_type, op1_type, + sew, vlmul, op1_reg_class>; + def : VPatUnaryMask<intrinsic, inst, kind, result_type, op1_type, + mask_type, sew, vlmul, result_reg_class, op1_reg_class>; +} + +multiclass VPatBinaryV_VV<string intrinsic, string instruction, + list<VTypeInfo> vtilist> { + foreach vti = vtilist in + defm : VPatBinary<intrinsic, instruction # "_VV_" # vti.LMul.MX, + vti.Vector, vti.Vector, vti.Vector,vti.Mask, + vti.SEW, vti.RegClass, + vti.RegClass, vti.RegClass>; +} + +multiclass VPatBinaryV_VV_INT<string intrinsic, string instruction, + list<VTypeInfo> vtilist> { + foreach vti = vtilist in { + defvar ivti = GetIntVTypeInfo<vti>.Vti; + defm : VPatBinary<intrinsic, instruction # "_VV_" # vti.LMul.MX, + vti.Vector, vti.Vector, ivti.Vector, vti.Mask, + vti.SEW, vti.RegClass, + vti.RegClass, vti.RegClass>; + } +} + +multiclass VPatBinaryV_VV_INT_EEW<string intrinsic, string instruction, + int eew, list<VTypeInfo> vtilist> { + foreach vti = vtilist in { + // emul = lmul * eew / sew + defvar vlmul = vti.LMul; + defvar octuple_lmul = octuple_from_str<vlmul.MX>.ret; + defvar octuple_emul = !srl(!mul(octuple_lmul, eew), shift_amount<vti.SEW>.val); + if !and(!ge(octuple_emul, 1), !le(octuple_emul, 64)) then { + defvar emul_str = octuple_to_str<octuple_emul>.ret; + defvar ivti = !cast<VTypeInfo>("VI" # eew # emul_str); + defvar inst = instruction # "_VV_" # vti.LMul.MX # "_" # emul_str; + defm : VPatBinary<intrinsic, inst, + vti.Vector, vti.Vector, ivti.Vector, vti.Mask, + vti.SEW, vti.RegClass, + vti.RegClass, ivti.RegClass>; + } + } +} + +multiclass VPatBinaryV_VX<string intrinsic, string instruction, + list<VTypeInfo> vtilist> { + foreach vti = vtilist in { + defvar kind = "V"#vti.ScalarSuffix; + defm : VPatBinary<intrinsic, instruction#"_"#kind#"_"#vti.LMul.MX, + vti.Vector, vti.Vector, vti.Scalar, vti.Mask, + vti.SEW, vti.RegClass, + vti.RegClass, vti.ScalarRegClass>; + } +} + +multiclass VPatBinaryV_VX_INT<string intrinsic, string instruction, + list<VTypeInfo> vtilist> { + foreach vti = vtilist in + defm : VPatBinary<intrinsic, instruction # "_VX_" # vti.LMul.MX, + vti.Vector, vti.Vector, XLenVT, vti.Mask, + vti.SEW, vti.RegClass, + vti.RegClass, GPR>; +} + +multiclass VPatBinaryV_VI<string intrinsic, string instruction, + list<VTypeInfo> vtilist, Operand imm_type> { + foreach vti = vtilist in + defm : VPatBinary<intrinsic, instruction # "_VI_" # vti.LMul.MX, + vti.Vector, vti.Vector, XLenVT, vti.Mask, + vti.SEW, vti.RegClass, + vti.RegClass, imm_type>; +} + +multiclass VPatBinaryM_MM<string intrinsic, string instruction> { + foreach mti = AllMasks in + def : VPatBinaryNoMask<intrinsic, instruction # "_MM_" # mti.LMul.MX, + mti.Mask, mti.Mask, mti.Mask, + mti.SEW, VR, VR>; +} + +multiclass VPatBinaryW_VV<string intrinsic, string instruction, + list<VTypeInfoToWide> vtilist> { + foreach VtiToWti = vtilist in { + defvar Vti = VtiToWti.Vti; + defvar Wti = VtiToWti.Wti; + defm : VPatBinary<intrinsic, instruction # "_VV_" # Vti.LMul.MX, + Wti.Vector, Vti.Vector, Vti.Vector, Vti.Mask, + Vti.SEW, Wti.RegClass, + Vti.RegClass, Vti.RegClass>; + } +} + +multiclass VPatBinaryW_VX<string intrinsic, string instruction, + list<VTypeInfoToWide> vtilist> { + foreach VtiToWti = vtilist in { + defvar Vti = VtiToWti.Vti; + defvar Wti = VtiToWti.Wti; + defvar kind = "V"#Vti.ScalarSuffix; + defm : VPatBinary<intrinsic, instruction#"_"#kind#"_"#Vti.LMul.MX, + Wti.Vector, Vti.Vector, Vti.Scalar, Vti.Mask, + Vti.SEW, Wti.RegClass, + Vti.RegClass, Vti.ScalarRegClass>; + } +} + +multiclass VPatBinaryW_WV<string intrinsic, string instruction, + list<VTypeInfoToWide> vtilist> { + foreach VtiToWti = vtilist in { + defvar Vti = VtiToWti.Vti; + defvar Wti = VtiToWti.Wti; + defm : VPatBinary<intrinsic, instruction # "_WV_" # Vti.LMul.MX, + Wti.Vector, Wti.Vector, Vti.Vector, Vti.Mask, + Vti.SEW, Wti.RegClass, + Wti.RegClass, Vti.RegClass>; + } +} + +multiclass VPatBinaryW_WX<string intrinsic, string instruction, + list<VTypeInfoToWide> vtilist> { + foreach VtiToWti = vtilist in { + defvar Vti = VtiToWti.Vti; + defvar Wti = VtiToWti.Wti; + defvar kind = "W"#Vti.ScalarSuffix; + defm : VPatBinary<intrinsic, instruction#"_"#kind#"_"#Vti.LMul.MX, + Wti.Vector, Wti.Vector, Vti.Scalar, Vti.Mask, + Vti.SEW, Wti.RegClass, + Wti.RegClass, Vti.ScalarRegClass>; + } +} + +multiclass VPatBinaryV_WV<string intrinsic, string instruction, + list<VTypeInfoToWide> vtilist> { + foreach VtiToWti = vtilist in { + defvar Vti = VtiToWti.Vti; + defvar Wti = VtiToWti.Wti; + defm : VPatBinary<intrinsic, instruction # "_WV_" # Vti.LMul.MX, + Vti.Vector, Wti.Vector, Vti.Vector, Vti.Mask, + Vti.SEW, Vti.RegClass, + Wti.RegClass, Vti.RegClass>; + } +} + +multiclass VPatBinaryV_WX<string intrinsic, string instruction, + list<VTypeInfoToWide> vtilist> { + foreach VtiToWti = vtilist in { + defvar Vti = VtiToWti.Vti; + defvar Wti = VtiToWti.Wti; + defvar kind = "W"#Vti.ScalarSuffix; + defm : VPatBinary<intrinsic, instruction#"_"#kind#"_"#Vti.LMul.MX, + Vti.Vector, Wti.Vector, Vti.Scalar, Vti.Mask, + Vti.SEW, Vti.RegClass, + Wti.RegClass, Vti.ScalarRegClass>; + } +} + +multiclass VPatBinaryV_WI<string intrinsic, string instruction, + list<VTypeInfoToWide> vtilist> { + foreach VtiToWti = vtilist in { + defvar Vti = VtiToWti.Vti; + defvar Wti = VtiToWti.Wti; + defm : VPatBinary<intrinsic, instruction # "_WI_" # Vti.LMul.MX, + Vti.Vector, Wti.Vector, XLenVT, Vti.Mask, + Vti.SEW, Vti.RegClass, + Wti.RegClass, uimm5>; + } +} + +multiclass VPatBinaryV_VM<string intrinsic, string instruction, + bit CarryOut = 0, + list<VTypeInfo> vtilist = AllIntegerVectors> { + foreach vti = vtilist in + defm : VPatBinaryCarryIn<intrinsic, instruction, "VVM", + !if(CarryOut, vti.Mask, vti.Vector), + vti.Vector, vti.Vector, vti.Mask, + vti.SEW, vti.LMul, + vti.RegClass, vti.RegClass>; +} + +multiclass VPatBinaryV_XM<string intrinsic, string instruction, + bit CarryOut = 0, + list<VTypeInfo> vtilist = AllIntegerVectors> { + foreach vti = vtilist in + defm : VPatBinaryCarryIn<intrinsic, instruction, + "V"#vti.ScalarSuffix#"M", + !if(CarryOut, vti.Mask, vti.Vector), + vti.Vector, vti.Scalar, vti.Mask, + vti.SEW, vti.LMul, + vti.RegClass, vti.ScalarRegClass>; +} + +multiclass VPatBinaryV_IM<string intrinsic, string instruction, + bit CarryOut = 0> { + foreach vti = AllIntegerVectors in + defm : VPatBinaryCarryIn<intrinsic, instruction, "VIM", + !if(CarryOut, vti.Mask, vti.Vector), + vti.Vector, XLenVT, vti.Mask, + vti.SEW, vti.LMul, + vti.RegClass, simm5>; +} + +multiclass VPatBinaryV_V<string intrinsic, string instruction> { + foreach vti = AllIntegerVectors in + defm : VPatBinaryMaskOut<intrinsic, instruction, "VV", + vti.Mask, vti.Vector, vti.Vector, + vti.SEW, vti.LMul, + vti.RegClass, vti.RegClass>; +} + +multiclass VPatBinaryV_X<string intrinsic, string instruction> { + foreach vti = AllIntegerVectors in + defm : VPatBinaryMaskOut<intrinsic, instruction, "VX", + vti.Mask, vti.Vector, XLenVT, + vti.SEW, vti.LMul, + vti.RegClass, GPR>; +} + +multiclass VPatBinaryV_I<string intrinsic, string instruction> { + foreach vti = AllIntegerVectors in + defm : VPatBinaryMaskOut<intrinsic, instruction, "VI", + vti.Mask, vti.Vector, XLenVT, + vti.SEW, vti.LMul, + vti.RegClass, simm5>; +} + +multiclass VPatBinaryM_VV<string intrinsic, string instruction, + list<VTypeInfo> vtilist> { + foreach vti = vtilist in + defm : VPatBinary<intrinsic, instruction # "_VV_" # vti.LMul.MX, + vti.Mask, vti.Vector, vti.Vector, vti.Mask, + vti.SEW, VR, + vti.RegClass, vti.RegClass>; +} + +multiclass VPatBinaryM_VX<string intrinsic, string instruction, + list<VTypeInfo> vtilist> { + foreach vti = vtilist in { + defvar kind = "V"#vti.ScalarSuffix; + defm : VPatBinary<intrinsic, instruction#"_"#kind#"_"#vti.LMul.MX, + vti.Mask, vti.Vector, vti.Scalar, vti.Mask, + vti.SEW, VR, + vti.RegClass, vti.ScalarRegClass>; + } +} + +multiclass VPatBinaryM_VI<string intrinsic, string instruction, + list<VTypeInfo> vtilist> { + foreach vti = vtilist in + defm : VPatBinary<intrinsic, instruction # "_VI_" # vti.LMul.MX, + vti.Mask, vti.Vector, XLenVT, vti.Mask, + vti.SEW, VR, + vti.RegClass, simm5>; +} + +multiclass VPatBinaryV_VV_VX_VI<string intrinsic, string instruction, + list<VTypeInfo> vtilist, Operand ImmType = simm5> +{ + defm "" : VPatBinaryV_VV<intrinsic, instruction, vtilist>; + defm "" : VPatBinaryV_VX<intrinsic, instruction, vtilist>; + defm "" : VPatBinaryV_VI<intrinsic, instruction, vtilist, ImmType>; +} + +multiclass VPatBinaryV_VV_VX<string intrinsic, string instruction, + list<VTypeInfo> vtilist> +{ + defm "" : VPatBinaryV_VV<intrinsic, instruction, vtilist>; + defm "" : VPatBinaryV_VX<intrinsic, instruction, vtilist>; +} + +multiclass VPatBinaryV_VX_VI<string intrinsic, string instruction, + list<VTypeInfo> vtilist> +{ + defm "" : VPatBinaryV_VX<intrinsic, instruction, vtilist>; + defm "" : VPatBinaryV_VI<intrinsic, instruction, vtilist, simm5>; +} + +multiclass VPatBinaryW_VV_VX<string intrinsic, string instruction, + list<VTypeInfoToWide> vtilist> +{ + defm "" : VPatBinaryW_VV<intrinsic, instruction, vtilist>; + defm "" : VPatBinaryW_VX<intrinsic, instruction, vtilist>; +} + +multiclass VPatBinaryW_WV_WX<string intrinsic, string instruction, + list<VTypeInfoToWide> vtilist> +{ + defm "" : VPatBinaryW_WV<intrinsic, instruction, vtilist>; + defm "" : VPatBinaryW_WX<intrinsic, instruction, vtilist>; +} + +multiclass VPatBinaryV_WV_WX_WI<string intrinsic, string instruction, + list<VTypeInfoToWide> vtilist> +{ + defm "" : VPatBinaryV_WV<intrinsic, instruction, vtilist>; + defm "" : VPatBinaryV_WX<intrinsic, instruction, vtilist>; + defm "" : VPatBinaryV_WI<intrinsic, instruction, vtilist>; +} + +multiclass VPatBinaryV_VM_XM_IM<string intrinsic, string instruction> +{ + defm "" : VPatBinaryV_VM<intrinsic, instruction>; + defm "" : VPatBinaryV_XM<intrinsic, instruction>; + defm "" : VPatBinaryV_IM<intrinsic, instruction>; +} + +multiclass VPatBinaryM_VM_XM_IM<string intrinsic, string instruction> +{ + defm "" : VPatBinaryV_VM<intrinsic, instruction, /*CarryOut=*/1>; + defm "" : VPatBinaryV_XM<intrinsic, instruction, /*CarryOut=*/1>; + defm "" : VPatBinaryV_IM<intrinsic, instruction, /*CarryOut=*/1>; +} + +multiclass VPatBinaryM_V_X_I<string intrinsic, string instruction> +{ + defm "" : VPatBinaryV_V<intrinsic, instruction>; + defm "" : VPatBinaryV_X<intrinsic, instruction>; + defm "" : VPatBinaryV_I<intrinsic, instruction>; +} + +multiclass VPatBinaryV_VM_XM<string intrinsic, string instruction> +{ + defm "" : VPatBinaryV_VM<intrinsic, instruction>; + defm "" : VPatBinaryV_XM<intrinsic, instruction>; +} + +multiclass VPatBinaryM_VM_XM<string intrinsic, string instruction> +{ + defm "" : VPatBinaryV_VM<intrinsic, instruction, /*CarryOut=*/1>; + defm "" : VPatBinaryV_XM<intrinsic, instruction, /*CarryOut=*/1>; +} + +multiclass VPatBinaryM_V_X<string intrinsic, string instruction> +{ + defm "" : VPatBinaryV_V<intrinsic, instruction>; + defm "" : VPatBinaryV_X<intrinsic, instruction>; +} + +multiclass VPatTernary<string intrinsic, + string inst, + string kind, + ValueType result_type, + ValueType op1_type, + ValueType op2_type, + ValueType mask_type, + int sew, + LMULInfo vlmul, + VReg result_reg_class, + RegisterClass op1_reg_class, + DAGOperand op2_kind> { + def : VPatTernaryNoMask<intrinsic, inst, kind, result_type, op1_type, op2_type, + mask_type, sew, vlmul, result_reg_class, op1_reg_class, + op2_kind>; + def : VPatTernaryMask<intrinsic, inst, kind, result_type, op1_type, op2_type, + mask_type, sew, vlmul, result_reg_class, op1_reg_class, + op2_kind>; +} + +multiclass VPatTernaryV_VV<string intrinsic, string instruction, + list<VTypeInfo> vtilist> { + foreach vti = vtilist in + defm : VPatTernary<intrinsic, instruction, "VV", + vti.Vector, vti.Vector, vti.Vector, vti.Mask, + vti.SEW, vti.LMul, vti.RegClass, + vti.RegClass, vti.RegClass>; +} + +multiclass VPatTernaryV_VX<string intrinsic, string instruction, + list<VTypeInfo> vtilist> { + foreach vti = vtilist in + defm : VPatTernary<intrinsic, instruction, "VX", + vti.Vector, vti.Vector, XLenVT, vti.Mask, + vti.SEW, vti.LMul, vti.RegClass, + vti.RegClass, GPR>; +} + +multiclass VPatTernaryV_VX_AAXA<string intrinsic, string instruction, + list<VTypeInfo> vtilist> { + foreach vti = vtilist in + defm : VPatTernary<intrinsic, instruction, + "V"#vti.ScalarSuffix, + vti.Vector, vti.Scalar, vti.Vector, vti.Mask, + vti.SEW, vti.LMul, vti.RegClass, + vti.ScalarRegClass, vti.RegClass>; +} + +multiclass VPatTernaryV_VI<string intrinsic, string instruction, + list<VTypeInfo> vtilist, Operand Imm_type> { + foreach vti = vtilist in + defm : VPatTernary<intrinsic, instruction, "VI", + vti.Vector, vti.Vector, XLenVT, vti.Mask, + vti.SEW, vti.LMul, vti.RegClass, + vti.RegClass, Imm_type>; +} + +multiclass VPatTernaryW_VV<string intrinsic, string instruction, + list<VTypeInfoToWide> vtilist> { + foreach vtiToWti = vtilist in { + defvar vti = vtiToWti.Vti; + defvar wti = vtiToWti.Wti; + defm : VPatTernary<intrinsic, instruction, "VV", + wti.Vector, vti.Vector, vti.Vector, + vti.Mask, vti.SEW, vti.LMul, + wti.RegClass, vti.RegClass, vti.RegClass>; + } +} + +multiclass VPatTernaryW_VX<string intrinsic, string instruction, + list<VTypeInfoToWide> vtilist> { + foreach vtiToWti = vtilist in { + defvar vti = vtiToWti.Vti; + defvar wti = vtiToWti.Wti; + defm : VPatTernary<intrinsic, instruction, + "V"#vti.ScalarSuffix, + wti.Vector, vti.Scalar, vti.Vector, + vti.Mask, vti.SEW, vti.LMul, + wti.RegClass, vti.ScalarRegClass, vti.RegClass>; + } +} + +multiclass VPatTernaryV_VV_VX_AAXA<string intrinsic, string instruction, + list<VTypeInfo> vtilist> { + defm "" : VPatTernaryV_VV<intrinsic, instruction, vtilist>; + defm "" : VPatTernaryV_VX_AAXA<intrinsic, instruction, vtilist>; +} + +multiclass VPatTernaryV_VX_VI<string intrinsic, string instruction, + list<VTypeInfo> vtilist, Operand Imm_type = simm5> { + defm "" : VPatTernaryV_VX<intrinsic, instruction, vtilist>; + defm "" : VPatTernaryV_VI<intrinsic, instruction, vtilist, Imm_type>; +} + +multiclass VPatBinaryM_VV_VX_VI<string intrinsic, string instruction, + list<VTypeInfo> vtilist> +{ + defm "" : VPatBinaryM_VV<intrinsic, instruction, vtilist>; + defm "" : VPatBinaryM_VX<intrinsic, instruction, vtilist>; + defm "" : VPatBinaryM_VI<intrinsic, instruction, vtilist>; +} + +multiclass VPatTernaryW_VV_VX<string intrinsic, string instruction, + list<VTypeInfoToWide> vtilist> { + defm "" : VPatTernaryW_VV<intrinsic, instruction, vtilist>; + defm "" : VPatTernaryW_VX<intrinsic, instruction, vtilist>; +} + +multiclass VPatBinaryM_VV_VX<string intrinsic, string instruction, + list<VTypeInfo> vtilist> +{ + defm "" : VPatBinaryM_VV<intrinsic, instruction, vtilist>; + defm "" : VPatBinaryM_VX<intrinsic, instruction, vtilist>; +} + +multiclass VPatBinaryM_VX_VI<string intrinsic, string instruction, + list<VTypeInfo> vtilist> +{ + defm "" : VPatBinaryM_VX<intrinsic, instruction, vtilist>; + defm "" : VPatBinaryM_VI<intrinsic, instruction, vtilist>; +} + +multiclass VPatBinaryV_VV_VX_VI_INT<string intrinsic, string instruction, + list<VTypeInfo> vtilist, Operand ImmType = simm5> +{ + defm "" : VPatBinaryV_VV_INT<intrinsic, instruction, vtilist>; + defm "" : VPatBinaryV_VX_INT<intrinsic, instruction, vtilist>; + defm "" : VPatBinaryV_VI<intrinsic, instruction, vtilist, ImmType>; +} + +multiclass VPatReductionV_VS<string intrinsic, string instruction, bit IsFloat = 0> { + foreach vti = !if(IsFloat, NoGroupFloatVectors, NoGroupIntegerVectors) in + { + defvar vectorM1 = !cast<VTypeInfo>(!if(IsFloat, "VF", "VI") # vti.SEW # "M1"); + defm : VPatTernary<intrinsic, instruction, "VS", + vectorM1.Vector, vti.Vector, + vectorM1.Vector, vti.Mask, + vti.SEW, vti.LMul, + VR, vti.RegClass, VR>; + } + foreach gvti = !if(IsFloat, GroupFloatVectors, GroupIntegerVectors) in + { + defm : VPatTernary<intrinsic, instruction, "VS", + gvti.VectorM1, gvti.Vector, + gvti.VectorM1, gvti.Mask, + gvti.SEW, gvti.LMul, + VR, gvti.RegClass, VR>; + } +} + +multiclass VPatReductionW_VS<string intrinsic, string instruction, bit IsFloat = 0> { + foreach vti = !if(IsFloat, AllFloatVectors, AllIntegerVectors) in + { + defvar wtiSEW = !mul(vti.SEW, 2); + if !le(wtiSEW, 64) then { + defvar wtiM1 = !cast<VTypeInfo>(!if(IsFloat, "VF", "VI") # wtiSEW # "M1"); + defm : VPatTernary<intrinsic, instruction, "VS", + wtiM1.Vector, vti.Vector, + wtiM1.Vector, vti.Mask, + vti.SEW, vti.LMul, + wtiM1.RegClass, vti.RegClass, + wtiM1.RegClass>; + } + } +} + +multiclass VPatConversionVI_VF<string intrinsic, + string instruction> +{ + foreach fvti = AllFloatVectors in + { + defvar ivti = GetIntVTypeInfo<fvti>.Vti; + + defm : VPatConversion<intrinsic, instruction, "V", + ivti.Vector, fvti.Vector, ivti.Mask, fvti.SEW, + fvti.LMul, ivti.RegClass, fvti.RegClass>; + } +} + +multiclass VPatConversionVF_VI<string intrinsic, + string instruction> +{ + foreach fvti = AllFloatVectors in + { + defvar ivti = GetIntVTypeInfo<fvti>.Vti; + + defm : VPatConversion<intrinsic, instruction, "V", + fvti.Vector, ivti.Vector, fvti.Mask, ivti.SEW, + ivti.LMul, fvti.RegClass, ivti.RegClass>; + } +} + +multiclass VPatConversionWI_VF<string intrinsic, string instruction> { + foreach fvtiToFWti = AllWidenableFloatVectors in + { + defvar fvti = fvtiToFWti.Vti; + defvar iwti = GetIntVTypeInfo<fvtiToFWti.Wti>.Vti; + + defm : VPatConversion<intrinsic, instruction, "V", + iwti.Vector, fvti.Vector, iwti.Mask, fvti.SEW, + fvti.LMul, iwti.RegClass, fvti.RegClass>; + } +} + +multiclass VPatConversionWF_VI<string intrinsic, string instruction> { + foreach vtiToWti = AllWidenableIntToFloatVectors in + { + defvar vti = vtiToWti.Vti; + defvar fwti = vtiToWti.Wti; + + defm : VPatConversion<intrinsic, instruction, "V", + fwti.Vector, vti.Vector, fwti.Mask, vti.SEW, + vti.LMul, fwti.RegClass, vti.RegClass>; + } +} + +multiclass VPatConversionWF_VF <string intrinsic, string instruction> { + foreach fvtiToFWti = AllWidenableFloatVectors in + { + defvar fvti = fvtiToFWti.Vti; + defvar fwti = fvtiToFWti.Wti; + + defm : VPatConversion<intrinsic, instruction, "V", + fwti.Vector, fvti.Vector, fwti.Mask, fvti.SEW, + fvti.LMul, fwti.RegClass, fvti.RegClass>; + } +} + +multiclass VPatConversionVI_WF <string intrinsic, string instruction> { + foreach vtiToWti = AllWidenableIntToFloatVectors in + { + defvar vti = vtiToWti.Vti; + defvar fwti = vtiToWti.Wti; + + defm : VPatConversion<intrinsic, instruction, "W", + vti.Vector, fwti.Vector, vti.Mask, vti.SEW, + vti.LMul, vti.RegClass, fwti.RegClass>; + } +} + +multiclass VPatConversionVF_WI <string intrinsic, string instruction> { + foreach fvtiToFWti = AllWidenableFloatVectors in + { + defvar fvti = fvtiToFWti.Vti; + defvar iwti = GetIntVTypeInfo<fvtiToFWti.Wti>.Vti; + + defm : VPatConversion<intrinsic, instruction, "W", + fvti.Vector, iwti.Vector, fvti.Mask, fvti.SEW, + fvti.LMul, fvti.RegClass, iwti.RegClass>; + } +} + +multiclass VPatConversionVF_WF <string intrinsic, string instruction> { + foreach fvtiToFWti = AllWidenableFloatVectors in + { + defvar fvti = fvtiToFWti.Vti; + defvar fwti = fvtiToFWti.Wti; + + defm : VPatConversion<intrinsic, instruction, "W", + fvti.Vector, fwti.Vector, fvti.Mask, fvti.SEW, + fvti.LMul, fvti.RegClass, fwti.RegClass>; + } +} + +multiclass VPatAMOWD<string intrinsic, + string inst, + ValueType result_type, + ValueType offset_type, + ValueType mask_type, + int sew, + LMULInfo vlmul, + LMULInfo emul, + VReg op1_reg_class> +{ + def : VPatAMOWDNoMask<intrinsic, inst, result_type, offset_type, + sew, vlmul, emul, op1_reg_class>; + def : VPatAMOWDMask<intrinsic, inst, result_type, offset_type, + mask_type, sew, vlmul, emul, op1_reg_class>; +} + +multiclass VPatAMOV_WD<string intrinsic, + string inst, + list<VTypeInfo> vtilist> { + foreach eew = EEWList in { + foreach vti = vtilist in { + if !or(!eq(vti.SEW, 32), !eq(vti.SEW, 64)) then { + defvar octuple_lmul = octuple_from_str<vti.LMul.MX>.ret; + // Calculate emul = eew * lmul / sew + defvar octuple_emul = !srl(!mul(eew, octuple_lmul), shift_amount<vti.SEW>.val); + if !and(!ge(octuple_emul, 1), !le(octuple_emul, 64)) then { + defvar emulMX = octuple_to_str<octuple_emul>.ret; + defvar offsetVti = !cast<VTypeInfo>("VI" # eew # emulMX); + defvar inst_ei = inst # "EI" # eew; + defm : VPatAMOWD<intrinsic, inst_ei, + vti.Vector, offsetVti.Vector, + vti.Mask, vti.SEW, vti.LMul, offsetVti.LMul, offsetVti.RegClass>; + } + } + } + } +} + +//===----------------------------------------------------------------------===// +// Pseudo instructions +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtV] in { + +//===----------------------------------------------------------------------===// +// Pseudo Instructions for CodeGen +//===----------------------------------------------------------------------===// +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { + def PseudoVMV1R_V : VPseudo<VMV1R_V, V_M1, (outs VR:$vd), (ins VR:$vs2)>; + def PseudoVMV2R_V : VPseudo<VMV2R_V, V_M2, (outs VRM2:$vd), (ins VRM2:$vs2)>; + def PseudoVMV4R_V : VPseudo<VMV4R_V, V_M4, (outs VRM4:$vd), (ins VRM4:$vs2)>; + def PseudoVMV8R_V : VPseudo<VMV8R_V, V_M8, (outs VRM8:$vd), (ins VRM8:$vs2)>; +} + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 1 in { + def PseudoReadVLENB : Pseudo<(outs GPR:$rd), (ins), + [(set GPR:$rd, (riscv_read_vlenb))]>; +} + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 1, + Uses = [VL] in +def PseudoReadVL : Pseudo<(outs GPR:$rd), (ins), + [(set GPR:$rd, (riscv_read_vl))]>; + +//===----------------------------------------------------------------------===// +// 6. Configuration-Setting Instructions +//===----------------------------------------------------------------------===// + +// Pseudos. +let hasSideEffects = 1, mayLoad = 0, mayStore = 0, Defs = [VL, VTYPE] in { +def PseudoVSETVLI : Pseudo<(outs GPR:$rd), (ins GPR:$rs1, VTypeIOp:$vtypei), []>; +def PseudoVSETIVLI : Pseudo<(outs GPR:$rd), (ins uimm5:$rs1, VTypeIOp:$vtypei), []>; +} + +//===----------------------------------------------------------------------===// +// 7. Vector Loads and Stores +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// 7.4 Vector Unit-Stride Instructions +//===----------------------------------------------------------------------===// + +// Pseudos Unit-Stride Loads and Stores +foreach eew = EEWList in { + defm PseudoVLE # eew : VPseudoUSLoad; + defm PseudoVSE # eew : VPseudoUSStore; +} + +defm PseudoVLE1 : VPseudoLoadMask; +defm PseudoVSE1 : VPseudoStoreMask; + +//===----------------------------------------------------------------------===// +// 7.5 Vector Strided Instructions +//===----------------------------------------------------------------------===// + +// Vector Strided Loads and Stores +foreach eew = EEWList in { + defm PseudoVLSE # eew : VPseudoSLoad; + defm PseudoVSSE # eew : VPseudoSStore; +} + +//===----------------------------------------------------------------------===// +// 7.6 Vector Indexed Instructions +//===----------------------------------------------------------------------===// + +// Vector Indexed Loads and Stores +foreach eew = EEWList in { + defm PseudoVLUXEI # eew : VPseudoILoad; + defm PseudoVLOXEI # eew : VPseudoILoad; + defm PseudoVSOXEI # eew : VPseudoIStore; + defm PseudoVSUXEI # eew : VPseudoIStore; +} + +//===----------------------------------------------------------------------===// +// 7.7. Unit-stride Fault-Only-First Loads +//===----------------------------------------------------------------------===// + +// vleff may update VL register +let hasSideEffects = 1, Defs = [VL] in +foreach eew = EEWList in { + defm PseudoVLE # eew # FF : VPseudoUSLoad; +} + +//===----------------------------------------------------------------------===// +// 7.8. Vector Load/Store Segment Instructions +//===----------------------------------------------------------------------===// +defm PseudoVLSEG : VPseudoUSSegLoad</*fault-only-first*/false>; +defm PseudoVLSSEG : VPseudoSSegLoad; +defm PseudoVLOXSEG : VPseudoISegLoad; +defm PseudoVLUXSEG : VPseudoISegLoad; +defm PseudoVSSEG : VPseudoUSSegStore; +defm PseudoVSSSEG : VPseudoSSegStore; +defm PseudoVSOXSEG : VPseudoISegStore; +defm PseudoVSUXSEG : VPseudoISegStore; + +// vlseg<nf>e<eew>ff.v may update VL register +let hasSideEffects = 1, Defs = [VL] in +defm PseudoVLSEG : VPseudoUSSegLoad</*fault-only-first*/true>; + +//===----------------------------------------------------------------------===// +// 8. Vector AMO Operations +//===----------------------------------------------------------------------===// +defm PseudoVAMOSWAP : VPseudoAMO; +defm PseudoVAMOADD : VPseudoAMO; +defm PseudoVAMOXOR : VPseudoAMO; +defm PseudoVAMOAND : VPseudoAMO; +defm PseudoVAMOOR : VPseudoAMO; +defm PseudoVAMOMIN : VPseudoAMO; +defm PseudoVAMOMAX : VPseudoAMO; +defm PseudoVAMOMINU : VPseudoAMO; +defm PseudoVAMOMAXU : VPseudoAMO; + +//===----------------------------------------------------------------------===// +// 12. Vector Integer Arithmetic Instructions +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// 12.1. Vector Single-Width Integer Add and Subtract +//===----------------------------------------------------------------------===// +defm PseudoVADD : VPseudoBinaryV_VV_VX_VI; +defm PseudoVSUB : VPseudoBinaryV_VV_VX; +defm PseudoVRSUB : VPseudoBinaryV_VX_VI; + +//===----------------------------------------------------------------------===// +// 12.2. Vector Widening Integer Add/Subtract +//===----------------------------------------------------------------------===// +defm PseudoVWADDU : VPseudoBinaryW_VV_VX; +defm PseudoVWSUBU : VPseudoBinaryW_VV_VX; +defm PseudoVWADD : VPseudoBinaryW_VV_VX; +defm PseudoVWSUB : VPseudoBinaryW_VV_VX; +defm PseudoVWADDU : VPseudoBinaryW_WV_WX; +defm PseudoVWSUBU : VPseudoBinaryW_WV_WX; +defm PseudoVWADD : VPseudoBinaryW_WV_WX; +defm PseudoVWSUB : VPseudoBinaryW_WV_WX; + +//===----------------------------------------------------------------------===// +// 12.3. Vector Integer Extension +//===----------------------------------------------------------------------===// +defm PseudoVZEXT_VF2 : PseudoUnaryV_VF2; +defm PseudoVZEXT_VF4 : PseudoUnaryV_VF4; +defm PseudoVZEXT_VF8 : PseudoUnaryV_VF8; +defm PseudoVSEXT_VF2 : PseudoUnaryV_VF2; +defm PseudoVSEXT_VF4 : PseudoUnaryV_VF4; +defm PseudoVSEXT_VF8 : PseudoUnaryV_VF8; + +//===----------------------------------------------------------------------===// +// 12.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions +//===----------------------------------------------------------------------===// +defm PseudoVADC : VPseudoBinaryV_VM_XM_IM; +defm PseudoVMADC : VPseudoBinaryM_VM_XM_IM<"@earlyclobber $rd">; +defm PseudoVMADC : VPseudoBinaryM_V_X_I<"@earlyclobber $rd">; + +defm PseudoVSBC : VPseudoBinaryV_VM_XM; +defm PseudoVMSBC : VPseudoBinaryM_VM_XM<"@earlyclobber $rd">; +defm PseudoVMSBC : VPseudoBinaryM_V_X<"@earlyclobber $rd">; + +//===----------------------------------------------------------------------===// +// 12.5. Vector Bitwise Logical Instructions +//===----------------------------------------------------------------------===// +defm PseudoVAND : VPseudoBinaryV_VV_VX_VI; +defm PseudoVOR : VPseudoBinaryV_VV_VX_VI; +defm PseudoVXOR : VPseudoBinaryV_VV_VX_VI; + +//===----------------------------------------------------------------------===// +// 12.6. Vector Single-Width Bit Shift Instructions +//===----------------------------------------------------------------------===// +defm PseudoVSLL : VPseudoBinaryV_VV_VX_VI<uimm5>; +defm PseudoVSRL : VPseudoBinaryV_VV_VX_VI<uimm5>; +defm PseudoVSRA : VPseudoBinaryV_VV_VX_VI<uimm5>; + +//===----------------------------------------------------------------------===// +// 12.7. Vector Narrowing Integer Right Shift Instructions +//===----------------------------------------------------------------------===// +defm PseudoVNSRL : VPseudoBinaryV_WV_WX_WI; +defm PseudoVNSRA : VPseudoBinaryV_WV_WX_WI; + +//===----------------------------------------------------------------------===// +// 12.8. Vector Integer Comparison Instructions +//===----------------------------------------------------------------------===// +defm PseudoVMSEQ : VPseudoBinaryM_VV_VX_VI; +defm PseudoVMSNE : VPseudoBinaryM_VV_VX_VI; +defm PseudoVMSLTU : VPseudoBinaryM_VV_VX; +defm PseudoVMSLT : VPseudoBinaryM_VV_VX; +defm PseudoVMSLEU : VPseudoBinaryM_VV_VX_VI; +defm PseudoVMSLE : VPseudoBinaryM_VV_VX_VI; +defm PseudoVMSGTU : VPseudoBinaryM_VX_VI; +defm PseudoVMSGT : VPseudoBinaryM_VX_VI; + +//===----------------------------------------------------------------------===// +// 12.9. Vector Integer Min/Max Instructions +//===----------------------------------------------------------------------===// +defm PseudoVMINU : VPseudoBinaryV_VV_VX; +defm PseudoVMIN : VPseudoBinaryV_VV_VX; +defm PseudoVMAXU : VPseudoBinaryV_VV_VX; +defm PseudoVMAX : VPseudoBinaryV_VV_VX; + +//===----------------------------------------------------------------------===// +// 12.10. Vector Single-Width Integer Multiply Instructions +//===----------------------------------------------------------------------===// +defm PseudoVMUL : VPseudoBinaryV_VV_VX; +defm PseudoVMULH : VPseudoBinaryV_VV_VX; +defm PseudoVMULHU : VPseudoBinaryV_VV_VX; +defm PseudoVMULHSU : VPseudoBinaryV_VV_VX; + +//===----------------------------------------------------------------------===// +// 12.11. Vector Integer Divide Instructions +//===----------------------------------------------------------------------===// +defm PseudoVDIVU : VPseudoBinaryV_VV_VX; +defm PseudoVDIV : VPseudoBinaryV_VV_VX; +defm PseudoVREMU : VPseudoBinaryV_VV_VX; +defm PseudoVREM : VPseudoBinaryV_VV_VX; + +//===----------------------------------------------------------------------===// +// 12.12. Vector Widening Integer Multiply Instructions +//===----------------------------------------------------------------------===// +defm PseudoVWMUL : VPseudoBinaryW_VV_VX; +defm PseudoVWMULU : VPseudoBinaryW_VV_VX; +defm PseudoVWMULSU : VPseudoBinaryW_VV_VX; + +//===----------------------------------------------------------------------===// +// 12.13. Vector Single-Width Integer Multiply-Add Instructions +//===----------------------------------------------------------------------===// +defm PseudoVMACC : VPseudoTernaryV_VV_VX_AAXA; +defm PseudoVNMSAC : VPseudoTernaryV_VV_VX_AAXA; +defm PseudoVMADD : VPseudoTernaryV_VV_VX_AAXA; +defm PseudoVNMSUB : VPseudoTernaryV_VV_VX_AAXA; + +//===----------------------------------------------------------------------===// +// 12.14. Vector Widening Integer Multiply-Add Instructions +//===----------------------------------------------------------------------===// +defm PseudoVWMACCU : VPseudoTernaryW_VV_VX; +defm PseudoVWMACC : VPseudoTernaryW_VV_VX; +defm PseudoVWMACCSU : VPseudoTernaryW_VV_VX; +defm PseudoVWMACCUS : VPseudoTernaryW_VX; + +//===----------------------------------------------------------------------===// +// 12.16. Vector Integer Merge Instructions +//===----------------------------------------------------------------------===// +defm PseudoVMERGE : VPseudoBinaryV_VM_XM_IM; + +//===----------------------------------------------------------------------===// +// 12.17. Vector Integer Move Instructions +//===----------------------------------------------------------------------===// +defm PseudoVMV_V : VPseudoUnaryV_V_X_I_NoDummyMask; + +//===----------------------------------------------------------------------===// +// 13.1. Vector Single-Width Saturating Add and Subtract +//===----------------------------------------------------------------------===// +let Defs = [VXSAT], hasSideEffects = 1 in { + defm PseudoVSADDU : VPseudoBinaryV_VV_VX_VI; + defm PseudoVSADD : VPseudoBinaryV_VV_VX_VI; + defm PseudoVSSUBU : VPseudoBinaryV_VV_VX; + defm PseudoVSSUB : VPseudoBinaryV_VV_VX; +} + +//===----------------------------------------------------------------------===// +// 13.2. Vector Single-Width Averaging Add and Subtract +//===----------------------------------------------------------------------===// +let Uses = [VL, VTYPE, VXRM], hasSideEffects = 1 in { + defm PseudoVAADDU : VPseudoBinaryV_VV_VX; + defm PseudoVAADD : VPseudoBinaryV_VV_VX; + defm PseudoVASUBU : VPseudoBinaryV_VV_VX; + defm PseudoVASUB : VPseudoBinaryV_VV_VX; +} + +//===----------------------------------------------------------------------===// +// 13.3. Vector Single-Width Fractional Multiply with Rounding and Saturation +//===----------------------------------------------------------------------===// +let Uses = [VL, VTYPE, VXRM], Defs = [VXSAT], hasSideEffects = 1 in { + defm PseudoVSMUL : VPseudoBinaryV_VV_VX; +} + +//===----------------------------------------------------------------------===// +// 13.4. Vector Single-Width Scaling Shift Instructions +//===----------------------------------------------------------------------===// +let Uses = [VL, VTYPE, VXRM], hasSideEffects = 1 in { + defm PseudoVSSRL : VPseudoBinaryV_VV_VX_VI<uimm5>; + defm PseudoVSSRA : VPseudoBinaryV_VV_VX_VI<uimm5>; +} + +//===----------------------------------------------------------------------===// +// 13.5. Vector Narrowing Fixed-Point Clip Instructions +//===----------------------------------------------------------------------===// +let Uses = [VL, VTYPE, VXRM], Defs = [VXSAT], hasSideEffects = 1 in { + defm PseudoVNCLIP : VPseudoBinaryV_WV_WX_WI; + defm PseudoVNCLIPU : VPseudoBinaryV_WV_WX_WI; +} + +} // Predicates = [HasStdExtV] + +let Predicates = [HasStdExtV, HasStdExtF] in { +//===----------------------------------------------------------------------===// +// 14.2. Vector Single-Width Floating-Point Add/Subtract Instructions +//===----------------------------------------------------------------------===// +defm PseudoVFADD : VPseudoBinaryV_VV_VF; +defm PseudoVFSUB : VPseudoBinaryV_VV_VF; +defm PseudoVFRSUB : VPseudoBinaryV_VF; + +//===----------------------------------------------------------------------===// +// 14.3. Vector Widening Floating-Point Add/Subtract Instructions +//===----------------------------------------------------------------------===// +defm PseudoVFWADD : VPseudoBinaryW_VV_VF; +defm PseudoVFWSUB : VPseudoBinaryW_VV_VF; +defm PseudoVFWADD : VPseudoBinaryW_WV_WF; +defm PseudoVFWSUB : VPseudoBinaryW_WV_WF; + +//===----------------------------------------------------------------------===// +// 14.4. Vector Single-Width Floating-Point Multiply/Divide Instructions +//===----------------------------------------------------------------------===// +defm PseudoVFMUL : VPseudoBinaryV_VV_VF; +defm PseudoVFDIV : VPseudoBinaryV_VV_VF; +defm PseudoVFRDIV : VPseudoBinaryV_VF; + +//===----------------------------------------------------------------------===// +// 14.5. Vector Widening Floating-Point Multiply +//===----------------------------------------------------------------------===// +defm PseudoVFWMUL : VPseudoBinaryW_VV_VF; + +//===----------------------------------------------------------------------===// +// 14.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions +//===----------------------------------------------------------------------===// +defm PseudoVFMACC : VPseudoTernaryV_VV_VF_AAXA; +defm PseudoVFNMACC : VPseudoTernaryV_VV_VF_AAXA; +defm PseudoVFMSAC : VPseudoTernaryV_VV_VF_AAXA; +defm PseudoVFNMSAC : VPseudoTernaryV_VV_VF_AAXA; +defm PseudoVFMADD : VPseudoTernaryV_VV_VF_AAXA; +defm PseudoVFNMADD : VPseudoTernaryV_VV_VF_AAXA; +defm PseudoVFMSUB : VPseudoTernaryV_VV_VF_AAXA; +defm PseudoVFNMSUB : VPseudoTernaryV_VV_VF_AAXA; + +//===----------------------------------------------------------------------===// +// 14.7. Vector Widening Floating-Point Fused Multiply-Add Instructions +//===----------------------------------------------------------------------===// +defm PseudoVFWMACC : VPseudoTernaryW_VV_VF; +defm PseudoVFWNMACC : VPseudoTernaryW_VV_VF; +defm PseudoVFWMSAC : VPseudoTernaryW_VV_VF; +defm PseudoVFWNMSAC : VPseudoTernaryW_VV_VF; + +//===----------------------------------------------------------------------===// +// 14.8. Vector Floating-Point Square-Root Instruction +//===----------------------------------------------------------------------===// +defm PseudoVFSQRT : VPseudoUnaryV_V; + +//===----------------------------------------------------------------------===// +// 14.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction +//===----------------------------------------------------------------------===// +defm PseudoVFRSQRT7 : VPseudoUnaryV_V; + +//===----------------------------------------------------------------------===// +// 14.10. Vector Floating-Point Reciprocal Estimate Instruction +//===----------------------------------------------------------------------===// +defm PseudoVFREC7 : VPseudoUnaryV_V; + +//===----------------------------------------------------------------------===// +// 14.11. Vector Floating-Point Min/Max Instructions +//===----------------------------------------------------------------------===// +defm PseudoVFMIN : VPseudoBinaryV_VV_VF; +defm PseudoVFMAX : VPseudoBinaryV_VV_VF; + +//===----------------------------------------------------------------------===// +// 14.12. Vector Floating-Point Sign-Injection Instructions +//===----------------------------------------------------------------------===// +defm PseudoVFSGNJ : VPseudoBinaryV_VV_VF; +defm PseudoVFSGNJN : VPseudoBinaryV_VV_VF; +defm PseudoVFSGNJX : VPseudoBinaryV_VV_VF; + +//===----------------------------------------------------------------------===// +// 14.13. Vector Floating-Point Compare Instructions +//===----------------------------------------------------------------------===// +defm PseudoVMFEQ : VPseudoBinaryM_VV_VF; +defm PseudoVMFNE : VPseudoBinaryM_VV_VF; +defm PseudoVMFLT : VPseudoBinaryM_VV_VF; +defm PseudoVMFLE : VPseudoBinaryM_VV_VF; +defm PseudoVMFGT : VPseudoBinaryM_VF; +defm PseudoVMFGE : VPseudoBinaryM_VF; + +//===----------------------------------------------------------------------===// +// 14.14. Vector Floating-Point Classify Instruction +//===----------------------------------------------------------------------===// +defm PseudoVFCLASS : VPseudoUnaryV_V; + +//===----------------------------------------------------------------------===// +// 14.15. Vector Floating-Point Merge Instruction +//===----------------------------------------------------------------------===// +defm PseudoVFMERGE : VPseudoBinaryV_FM; + +//===----------------------------------------------------------------------===// +// 14.16. Vector Floating-Point Move Instruction +//===----------------------------------------------------------------------===// +defm PseudoVFMV_V : VPseudoUnaryV_F_NoDummyMask; + +//===----------------------------------------------------------------------===// +// 14.17. Single-Width Floating-Point/Integer Type-Convert Instructions +//===----------------------------------------------------------------------===// +defm PseudoVFCVT_XU_F : VPseudoConversionV_V; +defm PseudoVFCVT_X_F : VPseudoConversionV_V; +defm PseudoVFCVT_RTZ_XU_F : VPseudoConversionV_V; +defm PseudoVFCVT_RTZ_X_F : VPseudoConversionV_V; +defm PseudoVFCVT_F_XU : VPseudoConversionV_V; +defm PseudoVFCVT_F_X : VPseudoConversionV_V; + +//===----------------------------------------------------------------------===// +// 14.18. Widening Floating-Point/Integer Type-Convert Instructions +//===----------------------------------------------------------------------===// +defm PseudoVFWCVT_XU_F : VPseudoConversionW_V; +defm PseudoVFWCVT_X_F : VPseudoConversionW_V; +defm PseudoVFWCVT_RTZ_XU_F : VPseudoConversionW_V; +defm PseudoVFWCVT_RTZ_X_F : VPseudoConversionW_V; +defm PseudoVFWCVT_F_XU : VPseudoConversionW_V; +defm PseudoVFWCVT_F_X : VPseudoConversionW_V; +defm PseudoVFWCVT_F_F : VPseudoConversionW_V; + +//===----------------------------------------------------------------------===// +// 14.19. Narrowing Floating-Point/Integer Type-Convert Instructions +//===----------------------------------------------------------------------===// +defm PseudoVFNCVT_XU_F : VPseudoConversionV_W; +defm PseudoVFNCVT_X_F : VPseudoConversionV_W; +defm PseudoVFNCVT_RTZ_XU_F : VPseudoConversionV_W; +defm PseudoVFNCVT_RTZ_X_F : VPseudoConversionV_W; +defm PseudoVFNCVT_F_XU : VPseudoConversionV_W; +defm PseudoVFNCVT_F_X : VPseudoConversionV_W; +defm PseudoVFNCVT_F_F : VPseudoConversionV_W; +defm PseudoVFNCVT_ROD_F_F : VPseudoConversionV_W; +} // Predicates = [HasStdExtV, HasStdExtF] + +let Predicates = [HasStdExtV] in { +//===----------------------------------------------------------------------===// +// 15.1. Vector Single-Width Integer Reduction Instructions +//===----------------------------------------------------------------------===// +defm PseudoVREDSUM : VPseudoReductionV_VS; +defm PseudoVREDAND : VPseudoReductionV_VS; +defm PseudoVREDOR : VPseudoReductionV_VS; +defm PseudoVREDXOR : VPseudoReductionV_VS; +defm PseudoVREDMINU : VPseudoReductionV_VS; +defm PseudoVREDMIN : VPseudoReductionV_VS; +defm PseudoVREDMAXU : VPseudoReductionV_VS; +defm PseudoVREDMAX : VPseudoReductionV_VS; + +//===----------------------------------------------------------------------===// +// 15.2. Vector Widening Integer Reduction Instructions +//===----------------------------------------------------------------------===// +defm PseudoVWREDSUMU : VPseudoReductionV_VS; +defm PseudoVWREDSUM : VPseudoReductionV_VS; +} // Predicates = [HasStdExtV] + +let Predicates = [HasStdExtV, HasStdExtF] in { +//===----------------------------------------------------------------------===// +// 15.3. Vector Single-Width Floating-Point Reduction Instructions +//===----------------------------------------------------------------------===// +defm PseudoVFREDOSUM : VPseudoReductionV_VS; +defm PseudoVFREDSUM : VPseudoReductionV_VS; +defm PseudoVFREDMIN : VPseudoReductionV_VS; +defm PseudoVFREDMAX : VPseudoReductionV_VS; + +//===----------------------------------------------------------------------===// +// 15.4. Vector Widening Floating-Point Reduction Instructions +//===----------------------------------------------------------------------===// +defm PseudoVFWREDSUM : VPseudoReductionV_VS; +defm PseudoVFWREDOSUM : VPseudoReductionV_VS; + +} // Predicates = [HasStdExtV, HasStdExtF] + +//===----------------------------------------------------------------------===// +// 16. Vector Mask Instructions +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// 16.1 Vector Mask-Register Logical Instructions +//===----------------------------------------------------------------------===// + +defm PseudoVMAND: VPseudoBinaryM_MM; +defm PseudoVMNAND: VPseudoBinaryM_MM; +defm PseudoVMANDNOT: VPseudoBinaryM_MM; +defm PseudoVMXOR: VPseudoBinaryM_MM; +defm PseudoVMOR: VPseudoBinaryM_MM; +defm PseudoVMNOR: VPseudoBinaryM_MM; +defm PseudoVMORNOT: VPseudoBinaryM_MM; +defm PseudoVMXNOR: VPseudoBinaryM_MM; + +// Pseudo insturctions +defm PseudoVMCLR : VPseudoNullaryPseudoM<"VMXOR">; +defm PseudoVMSET : VPseudoNullaryPseudoM<"VMXNOR">; + +//===----------------------------------------------------------------------===// +// 16.2. Vector mask population count vpopc +//===----------------------------------------------------------------------===// + +defm PseudoVPOPC: VPseudoUnaryS_M; + +//===----------------------------------------------------------------------===// +// 16.3. vfirst find-first-set mask bit +//===----------------------------------------------------------------------===// + +defm PseudoVFIRST: VPseudoUnaryS_M; + +//===----------------------------------------------------------------------===// +// 16.4. vmsbf.m set-before-first mask bit +//===----------------------------------------------------------------------===// +defm PseudoVMSBF: VPseudoUnaryM_M; + +//===----------------------------------------------------------------------===// +// 16.5. vmsif.m set-including-first mask bit +//===----------------------------------------------------------------------===// +defm PseudoVMSIF: VPseudoUnaryM_M; + +//===----------------------------------------------------------------------===// +// 16.6. vmsof.m set-only-first mask bit +//===----------------------------------------------------------------------===// +defm PseudoVMSOF: VPseudoUnaryM_M; + +//===----------------------------------------------------------------------===// +// 16.8. Vector Iota Instruction +//===----------------------------------------------------------------------===// +defm PseudoVIOTA_M: VPseudoUnaryV_M; + +//===----------------------------------------------------------------------===// +// 16.9. Vector Element Index Instruction +//===----------------------------------------------------------------------===// +defm PseudoVID : VPseudoMaskNullaryV; + +//===----------------------------------------------------------------------===// +// 17. Vector Permutation Instructions +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// 17.1. Integer Scalar Move Instructions +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtV] in { +let mayLoad = 0, mayStore = 0, hasSideEffects = 0, usesCustomInserter = 1, + Uses = [VL, VTYPE] in { + foreach m = MxList.m in { + let VLMul = m.value in { + let HasSEWOp = 1, BaseInstr = VMV_X_S in + def PseudoVMV_X_S # "_" # m.MX: Pseudo<(outs GPR:$rd), + (ins m.vrclass:$rs2, ixlenimm:$sew), + []>, RISCVVPseudo; + let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VMV_S_X, WritesElement0 = 1, + Constraints = "$rd = $rs1" in + def PseudoVMV_S_X # "_" # m.MX: Pseudo<(outs m.vrclass:$rd), + (ins m.vrclass:$rs1, GPR:$rs2, + GPR:$vl, ixlenimm:$sew), + []>, RISCVVPseudo; + } + } +} +} // Predicates = [HasStdExtV] + +//===----------------------------------------------------------------------===// +// 17.2. Floating-Point Scalar Move Instructions +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtV, HasStdExtF] in { +let mayLoad = 0, mayStore = 0, hasSideEffects = 0, usesCustomInserter = 1, + Uses = [VL, VTYPE] in { + foreach m = MxList.m in { + foreach f = FPList.fpinfo in { + let VLMul = m.value in { + let HasSEWOp = 1, BaseInstr = VFMV_F_S in + def "PseudoVFMV_" # f.FX # "_S_" # m.MX : + Pseudo<(outs f.fprclass:$rd), + (ins m.vrclass:$rs2, + ixlenimm:$sew), + []>, RISCVVPseudo; + let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VFMV_S_F, WritesElement0 = 1, + Constraints = "$rd = $rs1" in + def "PseudoVFMV_S_" # f.FX # "_" # m.MX : + Pseudo<(outs m.vrclass:$rd), + (ins m.vrclass:$rs1, f.fprclass:$rs2, + GPR:$vl, ixlenimm:$sew), + []>, RISCVVPseudo; + } + } + } +} +} // Predicates = [HasStdExtV, HasStdExtF] + +//===----------------------------------------------------------------------===// +// 17.3. Vector Slide Instructions +//===----------------------------------------------------------------------===// +let Predicates = [HasStdExtV] in { + defm PseudoVSLIDEUP : VPseudoTernaryV_VX_VI<uimm5, "@earlyclobber $rd">; + defm PseudoVSLIDEDOWN : VPseudoTernaryV_VX_VI<uimm5>; + defm PseudoVSLIDE1UP : VPseudoBinaryV_VX<"@earlyclobber $rd">; + defm PseudoVSLIDE1DOWN : VPseudoBinaryV_VX; +} // Predicates = [HasStdExtV] + +let Predicates = [HasStdExtV, HasStdExtF] in { + defm PseudoVFSLIDE1UP : VPseudoBinaryV_VF<"@earlyclobber $rd">; + defm PseudoVFSLIDE1DOWN : VPseudoBinaryV_VF; +} // Predicates = [HasStdExtV, HasStdExtF] + +//===----------------------------------------------------------------------===// +// 17.4. Vector Register Gather Instructions +//===----------------------------------------------------------------------===// +defm PseudoVRGATHER : VPseudoBinaryV_VV_VX_VI<uimm5, "@earlyclobber $rd">; +defm PseudoVRGATHEREI16 : VPseudoBinaryV_VV_EEW</* eew */ 16, "@earlyclobber $rd">; + +//===----------------------------------------------------------------------===// +// 17.5. Vector Compress Instruction +//===----------------------------------------------------------------------===// +defm PseudoVCOMPRESS : VPseudoUnaryV_V_AnyMask; + +//===----------------------------------------------------------------------===// +// Patterns. +//===----------------------------------------------------------------------===// +let Predicates = [HasStdExtV] in { + +//===----------------------------------------------------------------------===// +// 7. Vector Loads and Stores +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// 7.4 Vector Unit-Stride Instructions +//===----------------------------------------------------------------------===// + +foreach vti = AllVectors in +{ + defm : VPatUSLoad<"int_riscv_vle", + "PseudoVLE" # vti.SEW, + vti.Vector, vti.Mask, vti.SEW, vti.LMul, vti.RegClass>; + defm : VPatUSLoadFF<"PseudoVLE" # vti.SEW # "FF", + vti.Vector, vti.Mask, vti.SEW, vti.LMul, vti.RegClass>; + defm : VPatUSStore<"int_riscv_vse", + "PseudoVSE" # vti.SEW, + vti.Vector, vti.Mask, vti.SEW, vti.LMul, vti.RegClass>; +} + +foreach vti = AllMasks in { + defvar PseudoVLE1 = !cast<Instruction>("PseudoVLE1_V_"#vti.BX); + def : Pat<(vti.Mask (int_riscv_vle1 GPR:$rs1, (XLenVT (VLOp GPR:$vl)))), + (PseudoVLE1 $rs1, GPR:$vl, vti.SEW)>; + defvar PseudoVSE1 = !cast<Instruction>("PseudoVSE1_V_"#vti.BX); + def : Pat<(int_riscv_vse1 (vti.Mask VR:$rs3), GPR:$rs1, (XLenVT (VLOp GPR:$vl))), + (PseudoVSE1 $rs3, $rs1, GPR:$vl, vti.SEW)>; +} + +//===----------------------------------------------------------------------===// +// 7.5 Vector Strided Instructions +//===----------------------------------------------------------------------===// + +foreach vti = AllVectors in +{ + defm : VPatSLoad<"int_riscv_vlse", + "PseudoVLSE" # vti.SEW, + vti.Vector, vti.Mask, vti.SEW, vti.LMul, vti.RegClass>; + defm : VPatSStore<"int_riscv_vsse", + "PseudoVSSE" # vti.SEW, + vti.Vector, vti.Mask, vti.SEW, vti.LMul, vti.RegClass>; +} + +//===----------------------------------------------------------------------===// +// 7.6 Vector Indexed Instructions +//===----------------------------------------------------------------------===// + +foreach vti = AllVectors in +foreach eew = EEWList in { + defvar vlmul = vti.LMul; + defvar octuple_lmul = octuple_from_str<vti.LMul.MX>.ret; + defvar log_sew = shift_amount<vti.SEW>.val; + // The data vector register group has EEW=SEW, EMUL=LMUL, while the offset + // vector register group has EEW encoding in the instruction and EMUL=(EEW/SEW)*LMUL. + // calculate octuple elmul which is (eew * octuple_lmul) >> log_sew + defvar octuple_elmul = !srl(!mul(eew, octuple_lmul), log_sew); + // legal octuple elmul should be more than 0 and less than equal 64 + if !gt(octuple_elmul, 0) then { + if !le(octuple_elmul, 64) then { + defvar elmul_str = octuple_to_str<octuple_elmul>.ret; + defvar elmul =!cast<LMULInfo>("V_" # elmul_str); + defvar idx_vti = !cast<VTypeInfo>("VI" # eew # elmul_str); + + defm : VPatILoad<"int_riscv_vluxei", + "PseudoVLUXEI"#eew, + vti.Vector, idx_vti.Vector, vti.Mask, vti.SEW, + vlmul, elmul, vti.RegClass, idx_vti.RegClass>; + defm : VPatILoad<"int_riscv_vloxei", + "PseudoVLOXEI"#eew, + vti.Vector, idx_vti.Vector, vti.Mask, vti.SEW, + vlmul, elmul, vti.RegClass, idx_vti.RegClass>; + defm : VPatIStore<"int_riscv_vsoxei", + "PseudoVSOXEI"#eew, + vti.Vector, idx_vti.Vector, vti.Mask, vti.SEW, + vlmul, elmul, vti.RegClass, idx_vti.RegClass>; + defm : VPatIStore<"int_riscv_vsuxei", + "PseudoVSUXEI"#eew, + vti.Vector, idx_vti.Vector, vti.Mask, vti.SEW, + vlmul, elmul, vti.RegClass, idx_vti.RegClass>; + } + } +} +} // Predicates = [HasStdExtV] + +//===----------------------------------------------------------------------===// +// 8. Vector AMO Operations +//===----------------------------------------------------------------------===// +let Predicates = [HasStdExtZvamo] in { + defm "" : VPatAMOV_WD<"int_riscv_vamoswap", "PseudoVAMOSWAP", AllIntegerVectors>; + defm "" : VPatAMOV_WD<"int_riscv_vamoadd", "PseudoVAMOADD", AllIntegerVectors>; + defm "" : VPatAMOV_WD<"int_riscv_vamoxor", "PseudoVAMOXOR", AllIntegerVectors>; + defm "" : VPatAMOV_WD<"int_riscv_vamoand", "PseudoVAMOAND", AllIntegerVectors>; + defm "" : VPatAMOV_WD<"int_riscv_vamoor", "PseudoVAMOOR", AllIntegerVectors>; + defm "" : VPatAMOV_WD<"int_riscv_vamomin", "PseudoVAMOMIN", AllIntegerVectors>; + defm "" : VPatAMOV_WD<"int_riscv_vamomax", "PseudoVAMOMAX", AllIntegerVectors>; + defm "" : VPatAMOV_WD<"int_riscv_vamominu", "PseudoVAMOMINU", AllIntegerVectors>; + defm "" : VPatAMOV_WD<"int_riscv_vamomaxu", "PseudoVAMOMAXU", AllIntegerVectors>; +} // Predicates = [HasStdExtZvamo] + +let Predicates = [HasStdExtZvamo, HasStdExtF] in { + defm "" : VPatAMOV_WD<"int_riscv_vamoswap", "PseudoVAMOSWAP", AllFloatVectors>; +} // Predicates = [HasStdExtZvamo, HasStdExtF] + +//===----------------------------------------------------------------------===// +// 12. Vector Integer Arithmetic Instructions +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtV] in { +//===----------------------------------------------------------------------===// +// 12.1. Vector Single-Width Integer Add and Subtract +//===----------------------------------------------------------------------===// +defm "" : VPatBinaryV_VV_VX_VI<"int_riscv_vadd", "PseudoVADD", AllIntegerVectors>; +defm "" : VPatBinaryV_VV_VX<"int_riscv_vsub", "PseudoVSUB", AllIntegerVectors>; +defm "" : VPatBinaryV_VX_VI<"int_riscv_vrsub", "PseudoVRSUB", AllIntegerVectors>; + +//===----------------------------------------------------------------------===// +// 12.2. Vector Widening Integer Add/Subtract +//===----------------------------------------------------------------------===// +defm "" : VPatBinaryW_VV_VX<"int_riscv_vwaddu", "PseudoVWADDU", AllWidenableIntVectors>; +defm "" : VPatBinaryW_VV_VX<"int_riscv_vwsubu", "PseudoVWSUBU", AllWidenableIntVectors>; +defm "" : VPatBinaryW_VV_VX<"int_riscv_vwadd", "PseudoVWADD", AllWidenableIntVectors>; +defm "" : VPatBinaryW_VV_VX<"int_riscv_vwsub", "PseudoVWSUB", AllWidenableIntVectors>; +defm "" : VPatBinaryW_WV_WX<"int_riscv_vwaddu_w", "PseudoVWADDU", AllWidenableIntVectors>; +defm "" : VPatBinaryW_WV_WX<"int_riscv_vwsubu_w", "PseudoVWSUBU", AllWidenableIntVectors>; +defm "" : VPatBinaryW_WV_WX<"int_riscv_vwadd_w", "PseudoVWADD", AllWidenableIntVectors>; +defm "" : VPatBinaryW_WV_WX<"int_riscv_vwsub_w", "PseudoVWSUB", AllWidenableIntVectors>; + +//===----------------------------------------------------------------------===// +// 12.3. Vector Integer Extension +//===----------------------------------------------------------------------===// +defm "" : VPatUnaryV_VF<"int_riscv_vzext", "PseudoVZEXT", "VF2", + AllFractionableVF2IntVectors>; +defm "" : VPatUnaryV_VF<"int_riscv_vzext", "PseudoVZEXT", "VF4", + AllFractionableVF4IntVectors>; +defm "" : VPatUnaryV_VF<"int_riscv_vzext", "PseudoVZEXT", "VF8", + AllFractionableVF8IntVectors>; +defm "" : VPatUnaryV_VF<"int_riscv_vsext", "PseudoVSEXT", "VF2", + AllFractionableVF2IntVectors>; +defm "" : VPatUnaryV_VF<"int_riscv_vsext", "PseudoVSEXT", "VF4", + AllFractionableVF4IntVectors>; +defm "" : VPatUnaryV_VF<"int_riscv_vsext", "PseudoVSEXT", "VF8", + AllFractionableVF8IntVectors>; + +//===----------------------------------------------------------------------===// +// 12.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatBinaryV_VM_XM_IM<"int_riscv_vadc", "PseudoVADC">; +defm "" : VPatBinaryM_VM_XM_IM<"int_riscv_vmadc_carry_in", "PseudoVMADC">; +defm "" : VPatBinaryM_V_X_I<"int_riscv_vmadc", "PseudoVMADC">; + +defm "" : VPatBinaryV_VM_XM<"int_riscv_vsbc", "PseudoVSBC">; +defm "" : VPatBinaryM_VM_XM<"int_riscv_vmsbc_borrow_in", "PseudoVMSBC">; +defm "" : VPatBinaryM_V_X<"int_riscv_vmsbc", "PseudoVMSBC">; + +//===----------------------------------------------------------------------===// +// 12.5. Vector Bitwise Logical Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatBinaryV_VV_VX_VI<"int_riscv_vand", "PseudoVAND", AllIntegerVectors>; +defm "" : VPatBinaryV_VV_VX_VI<"int_riscv_vor", "PseudoVOR", AllIntegerVectors>; +defm "" : VPatBinaryV_VV_VX_VI<"int_riscv_vxor", "PseudoVXOR", AllIntegerVectors>; + +//===----------------------------------------------------------------------===// +// 12.6. Vector Single-Width Bit Shift Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatBinaryV_VV_VX_VI<"int_riscv_vsll", "PseudoVSLL", AllIntegerVectors, + uimm5>; +defm "" : VPatBinaryV_VV_VX_VI<"int_riscv_vsrl", "PseudoVSRL", AllIntegerVectors, + uimm5>; +defm "" : VPatBinaryV_VV_VX_VI<"int_riscv_vsra", "PseudoVSRA", AllIntegerVectors, + uimm5>; + +//===----------------------------------------------------------------------===// +// 12.7. Vector Narrowing Integer Right Shift Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatBinaryV_WV_WX_WI<"int_riscv_vnsrl", "PseudoVNSRL", AllWidenableIntVectors>; +defm "" : VPatBinaryV_WV_WX_WI<"int_riscv_vnsra", "PseudoVNSRA", AllWidenableIntVectors>; + +//===----------------------------------------------------------------------===// +// 12.8. Vector Integer Comparison Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatBinaryM_VV_VX_VI<"int_riscv_vmseq", "PseudoVMSEQ", AllIntegerVectors>; +defm "" : VPatBinaryM_VV_VX_VI<"int_riscv_vmsne", "PseudoVMSNE", AllIntegerVectors>; +defm "" : VPatBinaryM_VV_VX<"int_riscv_vmsltu", "PseudoVMSLTU", AllIntegerVectors>; +defm "" : VPatBinaryM_VV_VX<"int_riscv_vmslt", "PseudoVMSLT", AllIntegerVectors>; +defm "" : VPatBinaryM_VV_VX_VI<"int_riscv_vmsleu", "PseudoVMSLEU", AllIntegerVectors>; +defm "" : VPatBinaryM_VV_VX_VI<"int_riscv_vmsle", "PseudoVMSLE", AllIntegerVectors>; + +defm "" : VPatBinaryM_VX_VI<"int_riscv_vmsgtu", "PseudoVMSGTU", AllIntegerVectors>; +defm "" : VPatBinaryM_VX_VI<"int_riscv_vmsgt", "PseudoVMSGT", AllIntegerVectors>; + +// Match vmslt(u).vx intrinsics to vmsle(u).vi if the scalar is -15 to 16. This +// avoids the user needing to know that there is no vmslt(u).vi instruction. +// This is limited to vmslt(u).vx as there is no vmsge().vx intrinsic or +// instruction. +foreach vti = AllIntegerVectors in { + def : Pat<(vti.Mask (int_riscv_vmslt (vti.Vector vti.RegClass:$rs1), + (vti.Scalar simm5_plus1:$rs2), (XLenVT (VLOp GPR:$vl)))), + (!cast<Instruction>("PseudoVMSLE_VI_"#vti.LMul.MX) vti.RegClass:$rs1, + (DecImm simm5_plus1:$rs2), + GPR:$vl, + vti.SEW)>; + def : Pat<(vti.Mask (int_riscv_vmslt_mask (vti.Mask VR:$merge), + (vti.Vector vti.RegClass:$rs1), + (vti.Scalar simm5_plus1:$rs2), + (vti.Mask V0), + (XLenVT (VLOp GPR:$vl)))), + (!cast<Instruction>("PseudoVMSLE_VI_"#vti.LMul.MX#"_MASK") + VR:$merge, + vti.RegClass:$rs1, + (DecImm simm5_plus1:$rs2), + (vti.Mask V0), + GPR:$vl, + vti.SEW)>; + + def : Pat<(vti.Mask (int_riscv_vmsltu (vti.Vector vti.RegClass:$rs1), + (vti.Scalar simm5_plus1:$rs2), + (XLenVT (VLOp GPR:$vl)))), + (!cast<Instruction>("PseudoVMSLEU_VI_"#vti.LMul.MX) vti.RegClass:$rs1, + (DecImm simm5_plus1:$rs2), + GPR:$vl, + vti.SEW)>; + def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask VR:$merge), + (vti.Vector vti.RegClass:$rs1), + (vti.Scalar simm5_plus1:$rs2), + (vti.Mask V0), + (XLenVT (VLOp GPR:$vl)))), + (!cast<Instruction>("PseudoVMSLEU_VI_"#vti.LMul.MX#"_MASK") + VR:$merge, + vti.RegClass:$rs1, + (DecImm simm5_plus1:$rs2), + (vti.Mask V0), + GPR:$vl, + vti.SEW)>; + + // Special cases to avoid matching vmsltu.vi 0 (always false) to + // vmsleu.vi -1 (always true). Instead match to vmsne.vv. + def : Pat<(vti.Mask (int_riscv_vmsltu (vti.Vector vti.RegClass:$rs1), + (vti.Scalar 0), (XLenVT (VLOp GPR:$vl)))), + (!cast<Instruction>("PseudoVMSNE_VV_"#vti.LMul.MX) vti.RegClass:$rs1, + vti.RegClass:$rs1, + GPR:$vl, + vti.SEW)>; + def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask VR:$merge), + (vti.Vector vti.RegClass:$rs1), + (vti.Scalar 0), + (vti.Mask V0), + (XLenVT (VLOp GPR:$vl)))), + (!cast<Instruction>("PseudoVMSNE_VV_"#vti.LMul.MX#"_MASK") + VR:$merge, + vti.RegClass:$rs1, + vti.RegClass:$rs1, + (vti.Mask V0), + GPR:$vl, + vti.SEW)>; +} + +//===----------------------------------------------------------------------===// +// 12.9. Vector Integer Min/Max Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatBinaryV_VV_VX<"int_riscv_vminu", "PseudoVMINU", AllIntegerVectors>; +defm "" : VPatBinaryV_VV_VX<"int_riscv_vmin", "PseudoVMIN", AllIntegerVectors>; +defm "" : VPatBinaryV_VV_VX<"int_riscv_vmaxu", "PseudoVMAXU", AllIntegerVectors>; +defm "" : VPatBinaryV_VV_VX<"int_riscv_vmax", "PseudoVMAX", AllIntegerVectors>; + +//===----------------------------------------------------------------------===// +// 12.10. Vector Single-Width Integer Multiply Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatBinaryV_VV_VX<"int_riscv_vmul", "PseudoVMUL", AllIntegerVectors>; +defm "" : VPatBinaryV_VV_VX<"int_riscv_vmulh", "PseudoVMULH", AllIntegerVectors>; +defm "" : VPatBinaryV_VV_VX<"int_riscv_vmulhu", "PseudoVMULHU", AllIntegerVectors>; +defm "" : VPatBinaryV_VV_VX<"int_riscv_vmulhsu", "PseudoVMULHSU", AllIntegerVectors>; + +//===----------------------------------------------------------------------===// +// 12.11. Vector Integer Divide Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatBinaryV_VV_VX<"int_riscv_vdivu", "PseudoVDIVU", AllIntegerVectors>; +defm "" : VPatBinaryV_VV_VX<"int_riscv_vdiv", "PseudoVDIV", AllIntegerVectors>; +defm "" : VPatBinaryV_VV_VX<"int_riscv_vremu", "PseudoVREMU", AllIntegerVectors>; +defm "" : VPatBinaryV_VV_VX<"int_riscv_vrem", "PseudoVREM", AllIntegerVectors>; + +//===----------------------------------------------------------------------===// +// 12.12. Vector Widening Integer Multiply Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatBinaryW_VV_VX<"int_riscv_vwmul", "PseudoVWMUL", AllWidenableIntVectors>; +defm "" : VPatBinaryW_VV_VX<"int_riscv_vwmulu", "PseudoVWMULU", AllWidenableIntVectors>; +defm "" : VPatBinaryW_VV_VX<"int_riscv_vwmulsu", "PseudoVWMULSU", AllWidenableIntVectors>; + +//===----------------------------------------------------------------------===// +// 12.13. Vector Single-Width Integer Multiply-Add Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vmadd", "PseudoVMADD", AllIntegerVectors>; +defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vnmsub", "PseudoVNMSUB", AllIntegerVectors>; +defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vmacc", "PseudoVMACC", AllIntegerVectors>; +defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vnmsac", "PseudoVNMSAC", AllIntegerVectors>; + +//===----------------------------------------------------------------------===// +// 12.14. Vector Widening Integer Multiply-Add Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatTernaryW_VV_VX<"int_riscv_vwmaccu", "PseudoVWMACCU", AllWidenableIntVectors>; +defm "" : VPatTernaryW_VV_VX<"int_riscv_vwmacc", "PseudoVWMACC", AllWidenableIntVectors>; +defm "" : VPatTernaryW_VV_VX<"int_riscv_vwmaccsu", "PseudoVWMACCSU", AllWidenableIntVectors>; +defm "" : VPatTernaryW_VX<"int_riscv_vwmaccus", "PseudoVWMACCUS", AllWidenableIntVectors>; + +//===----------------------------------------------------------------------===// +// 12.16. Vector Integer Merge Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatBinaryV_VM_XM_IM<"int_riscv_vmerge", "PseudoVMERGE">; + +//===----------------------------------------------------------------------===// +// 12.17. Vector Integer Move Instructions +//===----------------------------------------------------------------------===// +foreach vti = AllVectors in { + def : Pat<(vti.Vector (int_riscv_vmv_v_v (vti.Vector vti.RegClass:$rs1), + (XLenVT (VLOp GPR:$vl)))), + (!cast<Instruction>("PseudoVMV_V_V_"#vti.LMul.MX) + $rs1, GPR:$vl, vti.SEW)>; +} + +foreach vti = AllIntegerVectors in { + def : Pat<(vti.Vector (int_riscv_vmv_v_x GPR:$rs2, (XLenVT (VLOp GPR:$vl)))), + (!cast<Instruction>("PseudoVMV_V_X_"#vti.LMul.MX) + $rs2, GPR:$vl, vti.SEW)>; + def : Pat<(vti.Vector (int_riscv_vmv_v_x simm5:$imm5, (XLenVT (VLOp GPR:$vl)))), + (!cast<Instruction>("PseudoVMV_V_I_"#vti.LMul.MX) + simm5:$imm5, GPR:$vl, vti.SEW)>; +} + +//===----------------------------------------------------------------------===// +// 13.1. Vector Single-Width Saturating Add and Subtract +//===----------------------------------------------------------------------===// +defm "" : VPatBinaryV_VV_VX_VI<"int_riscv_vsaddu", "PseudoVSADDU", AllIntegerVectors>; +defm "" : VPatBinaryV_VV_VX_VI<"int_riscv_vsadd", "PseudoVSADD", AllIntegerVectors>; +defm "" : VPatBinaryV_VV_VX<"int_riscv_vssubu", "PseudoVSSUBU", AllIntegerVectors>; +defm "" : VPatBinaryV_VV_VX<"int_riscv_vssub", "PseudoVSSUB", AllIntegerVectors>; + +//===----------------------------------------------------------------------===// +// 13.2. Vector Single-Width Averaging Add and Subtract +//===----------------------------------------------------------------------===// +defm "" : VPatBinaryV_VV_VX<"int_riscv_vaaddu", "PseudoVAADDU", AllIntegerVectors>; +defm "" : VPatBinaryV_VV_VX<"int_riscv_vaadd", "PseudoVAADD", AllIntegerVectors>; +defm "" : VPatBinaryV_VV_VX<"int_riscv_vasubu", "PseudoVASUBU", AllIntegerVectors>; +defm "" : VPatBinaryV_VV_VX<"int_riscv_vasub", "PseudoVASUB", AllIntegerVectors>; + +//===----------------------------------------------------------------------===// +// 13.3. Vector Single-Width Fractional Multiply with Rounding and Saturation +//===----------------------------------------------------------------------===// +defm "" : VPatBinaryV_VV_VX<"int_riscv_vsmul", "PseudoVSMUL", AllIntegerVectors>; + +//===----------------------------------------------------------------------===// +// 13.4. Vector Single-Width Scaling Shift Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatBinaryV_VV_VX_VI<"int_riscv_vssrl", "PseudoVSSRL", AllIntegerVectors, + uimm5>; +defm "" : VPatBinaryV_VV_VX_VI<"int_riscv_vssra", "PseudoVSSRA", AllIntegerVectors, + uimm5>; + +//===----------------------------------------------------------------------===// +// 13.5. Vector Narrowing Fixed-Point Clip Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatBinaryV_WV_WX_WI<"int_riscv_vnclipu", "PseudoVNCLIPU", AllWidenableIntVectors>; +defm "" : VPatBinaryV_WV_WX_WI<"int_riscv_vnclip", "PseudoVNCLIP", AllWidenableIntVectors>; + +} // Predicates = [HasStdExtV] + +let Predicates = [HasStdExtV, HasStdExtF] in { +//===----------------------------------------------------------------------===// +// 14.2. Vector Single-Width Floating-Point Add/Subtract Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatBinaryV_VV_VX<"int_riscv_vfadd", "PseudoVFADD", AllFloatVectors>; +defm "" : VPatBinaryV_VV_VX<"int_riscv_vfsub", "PseudoVFSUB", AllFloatVectors>; +defm "" : VPatBinaryV_VX<"int_riscv_vfrsub", "PseudoVFRSUB", AllFloatVectors>; + +//===----------------------------------------------------------------------===// +// 14.3. Vector Widening Floating-Point Add/Subtract Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatBinaryW_VV_VX<"int_riscv_vfwadd", "PseudoVFWADD", AllWidenableFloatVectors>; +defm "" : VPatBinaryW_VV_VX<"int_riscv_vfwsub", "PseudoVFWSUB", AllWidenableFloatVectors>; +defm "" : VPatBinaryW_WV_WX<"int_riscv_vfwadd_w", "PseudoVFWADD", AllWidenableFloatVectors>; +defm "" : VPatBinaryW_WV_WX<"int_riscv_vfwsub_w", "PseudoVFWSUB", AllWidenableFloatVectors>; + +//===----------------------------------------------------------------------===// +// 14.4. Vector Single-Width Floating-Point Multiply/Divide Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatBinaryV_VV_VX<"int_riscv_vfmul", "PseudoVFMUL", AllFloatVectors>; +defm "" : VPatBinaryV_VV_VX<"int_riscv_vfdiv", "PseudoVFDIV", AllFloatVectors>; +defm "" : VPatBinaryV_VX<"int_riscv_vfrdiv", "PseudoVFRDIV", AllFloatVectors>; + +//===----------------------------------------------------------------------===// +// 14.5. Vector Widening Floating-Point Multiply +//===----------------------------------------------------------------------===// +defm "" : VPatBinaryW_VV_VX<"int_riscv_vfwmul", "PseudoVFWMUL", AllWidenableFloatVectors>; + +//===----------------------------------------------------------------------===// +// 14.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfmacc", "PseudoVFMACC", AllFloatVectors>; +defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfnmacc", "PseudoVFNMACC", AllFloatVectors>; +defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfmsac", "PseudoVFMSAC", AllFloatVectors>; +defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfnmsac", "PseudoVFNMSAC", AllFloatVectors>; +defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfmadd", "PseudoVFMADD", AllFloatVectors>; +defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfnmadd", "PseudoVFNMADD", AllFloatVectors>; +defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfmsub", "PseudoVFMSUB", AllFloatVectors>; +defm "" : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfnmsub", "PseudoVFNMSUB", AllFloatVectors>; + +//===----------------------------------------------------------------------===// +// 14.7. Vector Widening Floating-Point Fused Multiply-Add Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatTernaryW_VV_VX<"int_riscv_vfwmacc", "PseudoVFWMACC", AllWidenableFloatVectors>; +defm "" : VPatTernaryW_VV_VX<"int_riscv_vfwnmacc", "PseudoVFWNMACC", AllWidenableFloatVectors>; +defm "" : VPatTernaryW_VV_VX<"int_riscv_vfwmsac", "PseudoVFWMSAC", AllWidenableFloatVectors>; +defm "" : VPatTernaryW_VV_VX<"int_riscv_vfwnmsac", "PseudoVFWNMSAC", AllWidenableFloatVectors>; + +//===----------------------------------------------------------------------===// +// 14.8. Vector Floating-Point Square-Root Instruction +//===----------------------------------------------------------------------===// +defm "" : VPatUnaryV_V<"int_riscv_vfsqrt", "PseudoVFSQRT", AllFloatVectors>; + +//===----------------------------------------------------------------------===// +// 14.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction +//===----------------------------------------------------------------------===// +defm "" : VPatUnaryV_V<"int_riscv_vfrsqrt7", "PseudoVFRSQRT7", AllFloatVectors>; + +//===----------------------------------------------------------------------===// +// 14.10. Vector Floating-Point Reciprocal Estimate Instruction +//===----------------------------------------------------------------------===// +defm "" : VPatUnaryV_V<"int_riscv_vfrec7", "PseudoVFREC7", AllFloatVectors>; + +//===----------------------------------------------------------------------===// +// 14.11. Vector Floating-Point Min/Max Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatBinaryV_VV_VX<"int_riscv_vfmin", "PseudoVFMIN", AllFloatVectors>; +defm "" : VPatBinaryV_VV_VX<"int_riscv_vfmax", "PseudoVFMAX", AllFloatVectors>; + +//===----------------------------------------------------------------------===// +// 14.12. Vector Floating-Point Sign-Injection Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatBinaryV_VV_VX<"int_riscv_vfsgnj", "PseudoVFSGNJ", AllFloatVectors>; +defm "" : VPatBinaryV_VV_VX<"int_riscv_vfsgnjn", "PseudoVFSGNJN", AllFloatVectors>; +defm "" : VPatBinaryV_VV_VX<"int_riscv_vfsgnjx", "PseudoVFSGNJX", AllFloatVectors>; + +//===----------------------------------------------------------------------===// +// 14.13. Vector Floating-Point Compare Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatBinaryM_VV_VX<"int_riscv_vmfeq", "PseudoVMFEQ", AllFloatVectors>; +defm "" : VPatBinaryM_VV_VX<"int_riscv_vmfle", "PseudoVMFLE", AllFloatVectors>; +defm "" : VPatBinaryM_VV_VX<"int_riscv_vmflt", "PseudoVMFLT", AllFloatVectors>; +defm "" : VPatBinaryM_VV_VX<"int_riscv_vmfne", "PseudoVMFNE", AllFloatVectors>; +defm "" : VPatBinaryM_VX<"int_riscv_vmfgt", "PseudoVMFGT", AllFloatVectors>; +defm "" : VPatBinaryM_VX<"int_riscv_vmfge", "PseudoVMFGE", AllFloatVectors>; + +//===----------------------------------------------------------------------===// +// 14.14. Vector Floating-Point Classify Instruction +//===----------------------------------------------------------------------===// +defm "" : VPatConversionVI_VF<"int_riscv_vfclass", "PseudoVFCLASS">; + +//===----------------------------------------------------------------------===// +// 14.15. Vector Floating-Point Merge Instruction +//===----------------------------------------------------------------------===// +// We can use vmerge.vvm to support vector-vector vfmerge. +defm "" : VPatBinaryV_VM<"int_riscv_vfmerge", "PseudoVMERGE", + /*CarryOut = */0, /*vtilist=*/AllFloatVectors>; +defm "" : VPatBinaryV_XM<"int_riscv_vfmerge", "PseudoVFMERGE", + /*CarryOut = */0, /*vtilist=*/AllFloatVectors>; + +foreach fvti = AllFloatVectors in { + defvar instr = !cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX); + def : Pat<(fvti.Vector (int_riscv_vfmerge (fvti.Vector fvti.RegClass:$rs2), + (fvti.Scalar (fpimm0)), + (fvti.Mask V0), (XLenVT (VLOp GPR:$vl)))), + (instr fvti.RegClass:$rs2, 0, (fvti.Mask V0), GPR:$vl, fvti.SEW)>; +} + +//===----------------------------------------------------------------------===// +// 14.16. Vector Floating-Point Move Instruction +//===----------------------------------------------------------------------===// +foreach fvti = AllFloatVectors in { + // If we're splatting fpimm0, use vmv.v.x vd, x0. + def : Pat<(fvti.Vector (int_riscv_vfmv_v_f + (fvti.Scalar (fpimm0)), (XLenVT (VLOp GPR:$vl)))), + (!cast<Instruction>("PseudoVMV_V_I_"#fvti.LMul.MX) + 0, GPR:$vl, fvti.SEW)>; + + def : Pat<(fvti.Vector (int_riscv_vfmv_v_f + (fvti.Scalar fvti.ScalarRegClass:$rs2), (XLenVT (VLOp GPR:$vl)))), + (!cast<Instruction>("PseudoVFMV_V_" # fvti.ScalarSuffix # "_" # + fvti.LMul.MX) + (fvti.Scalar fvti.ScalarRegClass:$rs2), + GPR:$vl, fvti.SEW)>; +} + +//===----------------------------------------------------------------------===// +// 14.17. Single-Width Floating-Point/Integer Type-Convert Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatConversionVI_VF<"int_riscv_vfcvt_xu_f_v", "PseudoVFCVT_XU_F">; +defm "" : VPatConversionVI_VF<"int_riscv_vfcvt_rtz_xu_f_v", "PseudoVFCVT_RTZ_XU_F">; +defm "" : VPatConversionVI_VF<"int_riscv_vfcvt_x_f_v", "PseudoVFCVT_X_F">; +defm "" : VPatConversionVI_VF<"int_riscv_vfcvt_rtz_x_f_v", "PseudoVFCVT_RTZ_X_F">; +defm "" : VPatConversionVF_VI<"int_riscv_vfcvt_f_x_v", "PseudoVFCVT_F_X">; +defm "" : VPatConversionVF_VI<"int_riscv_vfcvt_f_xu_v", "PseudoVFCVT_F_XU">; + +//===----------------------------------------------------------------------===// +// 14.18. Widening Floating-Point/Integer Type-Convert Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatConversionWI_VF<"int_riscv_vfwcvt_xu_f_v", "PseudoVFWCVT_XU_F">; +defm "" : VPatConversionWI_VF<"int_riscv_vfwcvt_x_f_v", "PseudoVFWCVT_X_F">; +defm "" : VPatConversionWI_VF<"int_riscv_vfwcvt_rtz_xu_f_v", "PseudoVFWCVT_RTZ_XU_F">; +defm "" : VPatConversionWI_VF<"int_riscv_vfwcvt_rtz_x_f_v", "PseudoVFWCVT_RTZ_X_F">; +defm "" : VPatConversionWF_VI<"int_riscv_vfwcvt_f_xu_v", "PseudoVFWCVT_F_XU">; +defm "" : VPatConversionWF_VI<"int_riscv_vfwcvt_f_x_v", "PseudoVFWCVT_F_X">; +defm "" : VPatConversionWF_VF<"int_riscv_vfwcvt_f_f_v", "PseudoVFWCVT_F_F">; + +//===----------------------------------------------------------------------===// +// 14.19. Narrowing Floating-Point/Integer Type-Convert Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatConversionVI_WF<"int_riscv_vfncvt_xu_f_w", "PseudoVFNCVT_XU_F">; +defm "" : VPatConversionVI_WF<"int_riscv_vfncvt_x_f_w", "PseudoVFNCVT_X_F">; +defm "" : VPatConversionVI_WF<"int_riscv_vfncvt_rtz_xu_f_w", "PseudoVFNCVT_RTZ_XU_F">; +defm "" : VPatConversionVI_WF<"int_riscv_vfncvt_rtz_x_f_w", "PseudoVFNCVT_RTZ_X_F">; +defm "" : VPatConversionVF_WI <"int_riscv_vfncvt_f_xu_w", "PseudoVFNCVT_F_XU">; +defm "" : VPatConversionVF_WI <"int_riscv_vfncvt_f_x_w", "PseudoVFNCVT_F_X">; +defm "" : VPatConversionVF_WF<"int_riscv_vfncvt_f_f_w", "PseudoVFNCVT_F_F">; +defm "" : VPatConversionVF_WF<"int_riscv_vfncvt_rod_f_f_w", "PseudoVFNCVT_ROD_F_F">; +} // Predicates = [HasStdExtV, HasStdExtF] + +let Predicates = [HasStdExtV] in { +//===----------------------------------------------------------------------===// +// 15.1. Vector Single-Width Integer Reduction Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatReductionV_VS<"int_riscv_vredsum", "PseudoVREDSUM">; +defm "" : VPatReductionV_VS<"int_riscv_vredand", "PseudoVREDAND">; +defm "" : VPatReductionV_VS<"int_riscv_vredor", "PseudoVREDOR">; +defm "" : VPatReductionV_VS<"int_riscv_vredxor", "PseudoVREDXOR">; +defm "" : VPatReductionV_VS<"int_riscv_vredminu", "PseudoVREDMINU">; +defm "" : VPatReductionV_VS<"int_riscv_vredmin", "PseudoVREDMIN">; +defm "" : VPatReductionV_VS<"int_riscv_vredmaxu", "PseudoVREDMAXU">; +defm "" : VPatReductionV_VS<"int_riscv_vredmax", "PseudoVREDMAX">; + +//===----------------------------------------------------------------------===// +// 15.2. Vector Widening Integer Reduction Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatReductionW_VS<"int_riscv_vwredsumu", "PseudoVWREDSUMU">; +defm "" : VPatReductionW_VS<"int_riscv_vwredsum", "PseudoVWREDSUM">; +} // Predicates = [HasStdExtV] + +let Predicates = [HasStdExtV, HasStdExtF] in { +//===----------------------------------------------------------------------===// +// 15.3. Vector Single-Width Floating-Point Reduction Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatReductionV_VS<"int_riscv_vfredosum", "PseudoVFREDOSUM", /*IsFloat=*/1>; +defm "" : VPatReductionV_VS<"int_riscv_vfredsum", "PseudoVFREDSUM", /*IsFloat=*/1>; +defm "" : VPatReductionV_VS<"int_riscv_vfredmin", "PseudoVFREDMIN", /*IsFloat=*/1>; +defm "" : VPatReductionV_VS<"int_riscv_vfredmax", "PseudoVFREDMAX", /*IsFloat=*/1>; + +//===----------------------------------------------------------------------===// +// 15.4. Vector Widening Floating-Point Reduction Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatReductionW_VS<"int_riscv_vfwredsum", "PseudoVFWREDSUM", /*IsFloat=*/1>; +defm "" : VPatReductionW_VS<"int_riscv_vfwredosum", "PseudoVFWREDOSUM", /*IsFloat=*/1>; + +} // Predicates = [HasStdExtV, HasStdExtF] + +//===----------------------------------------------------------------------===// +// 16. Vector Mask Instructions +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtV] in { +//===----------------------------------------------------------------------===// +// 16.1 Vector Mask-Register Logical Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatBinaryM_MM<"int_riscv_vmand", "PseudoVMAND">; +defm "" : VPatBinaryM_MM<"int_riscv_vmnand", "PseudoVMNAND">; +defm "" : VPatBinaryM_MM<"int_riscv_vmandnot", "PseudoVMANDNOT">; +defm "" : VPatBinaryM_MM<"int_riscv_vmxor", "PseudoVMXOR">; +defm "" : VPatBinaryM_MM<"int_riscv_vmor", "PseudoVMOR">; +defm "" : VPatBinaryM_MM<"int_riscv_vmnor", "PseudoVMNOR">; +defm "" : VPatBinaryM_MM<"int_riscv_vmornot", "PseudoVMORNOT">; +defm "" : VPatBinaryM_MM<"int_riscv_vmxnor", "PseudoVMXNOR">; + +// pseudo instructions +defm "" : VPatNullaryM<"int_riscv_vmclr", "PseudoVMCLR">; +defm "" : VPatNullaryM<"int_riscv_vmset", "PseudoVMSET">; + +//===----------------------------------------------------------------------===// +// 16.2. Vector mask population count vpopc +//===----------------------------------------------------------------------===// +defm "" : VPatUnaryS_M<"int_riscv_vpopc", "PseudoVPOPC">; + +//===----------------------------------------------------------------------===// +// 16.3. vfirst find-first-set mask bit +//===----------------------------------------------------------------------===// +defm "" : VPatUnaryS_M<"int_riscv_vfirst", "PseudoVFIRST">; + +//===----------------------------------------------------------------------===// +// 16.4. vmsbf.m set-before-first mask bit +//===----------------------------------------------------------------------===// +defm "" : VPatUnaryM_M<"int_riscv_vmsbf", "PseudoVMSBF">; + +//===----------------------------------------------------------------------===// +// 16.5. vmsif.m set-including-first mask bit +//===----------------------------------------------------------------------===// +defm "" : VPatUnaryM_M<"int_riscv_vmsif", "PseudoVMSIF">; + +//===----------------------------------------------------------------------===// +// 16.6. vmsof.m set-only-first mask bit +//===----------------------------------------------------------------------===// +defm "" : VPatUnaryM_M<"int_riscv_vmsof", "PseudoVMSOF">; + +//===----------------------------------------------------------------------===// +// 16.8. Vector Iota Instruction +//===----------------------------------------------------------------------===// +defm "" : VPatUnaryV_M<"int_riscv_viota", "PseudoVIOTA">; + +//===----------------------------------------------------------------------===// +// 16.9. Vector Element Index Instruction +//===----------------------------------------------------------------------===// +defm "" : VPatNullaryV<"int_riscv_vid", "PseudoVID">; + +} // Predicates = [HasStdExtV] + +//===----------------------------------------------------------------------===// +// 17. Vector Permutation Instructions +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// 17.1. Integer Scalar Move Instructions +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtV] in { +foreach vti = AllIntegerVectors in { + def : Pat<(riscv_vmv_x_s (vti.Vector vti.RegClass:$rs2)), + (!cast<Instruction>("PseudoVMV_X_S_" # vti.LMul.MX) $rs2, vti.SEW)>; + def : Pat<(vti.Vector (int_riscv_vmv_s_x (vti.Vector vti.RegClass:$rs1), + GPR:$rs2, (XLenVT (VLOp GPR:$vl)))), + (!cast<Instruction>("PseudoVMV_S_X_" # vti.LMul.MX) + (vti.Vector $rs1), $rs2, GPR:$vl, vti.SEW)>; +} +} // Predicates = [HasStdExtV] + +//===----------------------------------------------------------------------===// +// 17.2. Floating-Point Scalar Move Instructions +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtV, HasStdExtF] in { +foreach fvti = AllFloatVectors in { + defvar instr = !cast<Instruction>("PseudoVFMV_"#fvti.ScalarSuffix#"_S_" # + fvti.LMul.MX); + def : Pat<(fvti.Scalar (int_riscv_vfmv_f_s (fvti.Vector fvti.RegClass:$rs2))), + (instr $rs2, fvti.SEW)>; + + def : Pat<(fvti.Vector (int_riscv_vfmv_s_f (fvti.Vector fvti.RegClass:$rs1), + (fvti.Scalar fvti.ScalarRegClass:$rs2), (XLenVT (VLOp GPR:$vl)))), + (!cast<Instruction>("PseudoVFMV_S_"#fvti.ScalarSuffix#"_" # + fvti.LMul.MX) + (fvti.Vector $rs1), + (fvti.Scalar fvti.ScalarRegClass:$rs2), + GPR:$vl, fvti.SEW)>; +} +} // Predicates = [HasStdExtV, HasStdExtF] + +//===----------------------------------------------------------------------===// +// 17.3. Vector Slide Instructions +//===----------------------------------------------------------------------===// +let Predicates = [HasStdExtV] in { + defm "" : VPatTernaryV_VX_VI<"int_riscv_vslideup", "PseudoVSLIDEUP", AllIntegerVectors, uimm5>; + defm "" : VPatTernaryV_VX_VI<"int_riscv_vslidedown", "PseudoVSLIDEDOWN", AllIntegerVectors, uimm5>; + defm "" : VPatBinaryV_VX<"int_riscv_vslide1up", "PseudoVSLIDE1UP", AllIntegerVectors>; + defm "" : VPatBinaryV_VX<"int_riscv_vslide1down", "PseudoVSLIDE1DOWN", AllIntegerVectors>; +} // Predicates = [HasStdExtV] + +let Predicates = [HasStdExtV, HasStdExtF] in { + defm "" : VPatTernaryV_VX_VI<"int_riscv_vslideup", "PseudoVSLIDEUP", AllFloatVectors, uimm5>; + defm "" : VPatTernaryV_VX_VI<"int_riscv_vslidedown", "PseudoVSLIDEDOWN", AllFloatVectors, uimm5>; + defm "" : VPatBinaryV_VX<"int_riscv_vfslide1up", "PseudoVFSLIDE1UP", AllFloatVectors>; + defm "" : VPatBinaryV_VX<"int_riscv_vfslide1down", "PseudoVFSLIDE1DOWN", AllFloatVectors>; +} // Predicates = [HasStdExtV, HasStdExtF] + +//===----------------------------------------------------------------------===// +// 17.4. Vector Register Gather Instructions +//===----------------------------------------------------------------------===// +let Predicates = [HasStdExtV] in { + defm "" : VPatBinaryV_VV_VX_VI_INT<"int_riscv_vrgather", "PseudoVRGATHER", + AllIntegerVectors, uimm5>; + defm "" : VPatBinaryV_VV_INT_EEW<"int_riscv_vrgatherei16", "PseudoVRGATHEREI16", + /* eew */ 16, AllIntegerVectors>; +} // Predicates = [HasStdExtV] + +let Predicates = [HasStdExtV, HasStdExtF] in { + defm "" : VPatBinaryV_VV_VX_VI_INT<"int_riscv_vrgather", "PseudoVRGATHER", + AllFloatVectors, uimm5>; + defm "" : VPatBinaryV_VV_INT_EEW<"int_riscv_vrgatherei16", "PseudoVRGATHEREI16", + /* eew */ 16, AllFloatVectors>; +} // Predicates = [HasStdExtV, HasStdExtF] + +//===----------------------------------------------------------------------===// +// 17.5. Vector Compress Instruction +//===----------------------------------------------------------------------===// +let Predicates = [HasStdExtV] in { + defm "" : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllIntegerVectors>; +} // Predicates = [HasStdExtV] + +let Predicates = [HasStdExtV, HasStdExtF] in { + defm "" : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllFloatVectors>; +} // Predicates = [HasStdExtV, HasStdExtF] + +// Include the non-intrinsic ISel patterns +include "RISCVInstrInfoVSDPatterns.td" diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td new file mode 100644 index 000000000000..dee67708bed1 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -0,0 +1,643 @@ +//===- RISCVInstrInfoVSDPatterns.td - RVV SDNode patterns --*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// This file contains the required infrastructure and SDNode patterns to +/// support code generation for the standard 'V' (Vector) extension, version +/// 0.10. This version is still experimental as the 'V' extension hasn't been +/// ratified yet. +/// +/// This file is included from and depends upon RISCVInstrInfoVPseudos.td +/// +/// Note: the patterns for RVV intrinsics are found in +/// RISCVInstrInfoVPseudos.td. +/// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Helpers to define the SDNode patterns. +//===----------------------------------------------------------------------===// + +def SDTSplatI64 : SDTypeProfile<1, 1, [ + SDTCVecEltisVT<0, i64>, SDTCisVT<1, i32> +]>; + +def rv32_splat_i64 : SDNode<"RISCVISD::SPLAT_VECTOR_I64", SDTSplatI64>; + +def riscv_trunc_vector : SDNode<"RISCVISD::TRUNCATE_VECTOR", + SDTypeProfile<1, 1, + [SDTCisVec<0>, SDTCisVec<1>]>>; + +// Penalize the generic form with Complexity=1 to give the simm5/uimm5 variants +// precedence +def SplatPat : ComplexPattern<vAny, 1, "selectVSplat", [], [], 1>; + +def SplatPat_simm5 : ComplexPattern<vAny, 1, "selectVSplatSimm5", []>; +def SplatPat_uimm5 : ComplexPattern<vAny, 1, "selectVSplatUimm5", []>; + +class SwapHelper<dag Prefix, dag A, dag B, dag Suffix, bit swap> { + dag Value = !con(Prefix, !if(swap, B, A), !if(swap, A, B), Suffix); +} + +multiclass VPatUSLoadStoreSDNode<LLVMType type, + LLVMType mask_type, + int sew, + LMULInfo vlmul, + OutPatFrag avl, + RegisterClass reg_rs1, + VReg reg_class> +{ + defvar load_instr = !cast<Instruction>("PseudoVLE"#sew#"_V_"#vlmul.MX); + defvar store_instr = !cast<Instruction>("PseudoVSE"#sew#"_V_"#vlmul.MX); + // Load + def : Pat<(type (load reg_rs1:$rs1)), + (load_instr reg_rs1:$rs1, avl, sew)>; + // Store + def : Pat<(store type:$rs2, reg_rs1:$rs1), + (store_instr reg_class:$rs2, reg_rs1:$rs1, avl, sew)>; +} + +multiclass VPatUSLoadStoreSDNodes<RegisterClass reg_rs1> { + foreach vti = AllVectors in + defm "" : VPatUSLoadStoreSDNode<vti.Vector, vti.Mask, vti.SEW, vti.LMul, + vti.AVL, reg_rs1, vti.RegClass>; +} + +class VPatBinarySDNode_VV<SDNode vop, + string instruction_name, + ValueType result_type, + ValueType op_type, + ValueType mask_type, + int sew, + LMULInfo vlmul, + OutPatFrag avl, + VReg RetClass, + VReg op_reg_class> : + Pat<(result_type (vop + (op_type op_reg_class:$rs1), + (op_type op_reg_class:$rs2))), + (!cast<Instruction>(instruction_name#"_VV_"# vlmul.MX) + op_reg_class:$rs1, + op_reg_class:$rs2, + avl, sew)>; + +class VPatBinarySDNode_XI<SDNode vop, + string instruction_name, + string suffix, + ValueType result_type, + ValueType vop_type, + ValueType xop_type, + ValueType mask_type, + int sew, + LMULInfo vlmul, + OutPatFrag avl, + VReg RetClass, + VReg vop_reg_class, + ComplexPattern SplatPatKind, + DAGOperand xop_kind> : + Pat<(result_type (vop + (vop_type vop_reg_class:$rs1), + (vop_type (SplatPatKind xop_kind:$rs2)))), + (!cast<Instruction>(instruction_name#_#suffix#_# vlmul.MX) + vop_reg_class:$rs1, + xop_kind:$rs2, + avl, sew)>; + +multiclass VPatBinarySDNode_VV_VX<SDNode vop, string instruction_name> +{ + foreach vti = AllIntegerVectors in { + def : VPatBinarySDNode_VV<vop, instruction_name, + vti.Vector, vti.Vector, vti.Mask, vti.SEW, + vti.LMul, vti.AVL, vti.RegClass, vti.RegClass>; + def : VPatBinarySDNode_XI<vop, instruction_name, "VX", + vti.Vector, vti.Vector, XLenVT, vti.Mask, vti.SEW, + vti.LMul, vti.AVL, vti.RegClass, vti.RegClass, + SplatPat, GPR>; + } +} + +multiclass VPatBinarySDNode_VV_VX_VI<SDNode vop, string instruction_name, + Operand ImmType = simm5> +{ + foreach vti = AllIntegerVectors in { + def : VPatBinarySDNode_VV<vop, instruction_name, + vti.Vector, vti.Vector, vti.Mask, vti.SEW, + vti.LMul, vti.AVL, vti.RegClass, vti.RegClass>; + def : VPatBinarySDNode_XI<vop, instruction_name, "VX", + vti.Vector, vti.Vector, XLenVT, vti.Mask, vti.SEW, + vti.LMul, vti.AVL, vti.RegClass, vti.RegClass, + SplatPat, GPR>; + def : VPatBinarySDNode_XI<vop, instruction_name, "VI", + vti.Vector, vti.Vector, XLenVT, vti.Mask, vti.SEW, + vti.LMul, vti.AVL, vti.RegClass, vti.RegClass, + !cast<ComplexPattern>(SplatPat#_#ImmType), + ImmType>; + } +} + +class VPatBinarySDNode_VF<SDNode vop, + string instruction_name, + ValueType result_type, + ValueType vop_type, + ValueType xop_type, + ValueType mask_type, + int sew, + LMULInfo vlmul, + OutPatFrag avl, + VReg RetClass, + VReg vop_reg_class, + DAGOperand xop_kind> : + Pat<(result_type (vop (vop_type vop_reg_class:$rs1), + (vop_type (splat_vector xop_kind:$rs2)))), + (!cast<Instruction>(instruction_name#"_"#vlmul.MX) + vop_reg_class:$rs1, + (xop_type xop_kind:$rs2), + avl, sew)>; + +multiclass VPatBinaryFPSDNode_VV_VF<SDNode vop, string instruction_name> { + foreach vti = AllFloatVectors in { + def : VPatBinarySDNode_VV<vop, instruction_name, + vti.Vector, vti.Vector, vti.Mask, vti.SEW, + vti.LMul, vti.AVL, vti.RegClass, vti.RegClass>; + def : VPatBinarySDNode_VF<vop, instruction_name#"_V"#vti.ScalarSuffix, + vti.Vector, vti.Vector, vti.Scalar, vti.Mask, + vti.SEW, vti.LMul, vti.AVL, vti.RegClass, vti.RegClass, + vti.ScalarRegClass>; + } +} + +multiclass VPatBinaryFPSDNode_R_VF<SDNode vop, string instruction_name> { + foreach fvti = AllFloatVectors in + def : Pat<(fvti.Vector (vop (fvti.Vector (splat_vector fvti.Scalar:$rs2)), + (fvti.Vector fvti.RegClass:$rs1))), + (!cast<Instruction>(instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX) + fvti.RegClass:$rs1, + (fvti.Scalar fvti.ScalarRegClass:$rs2), + fvti.AVL, fvti.SEW)>; +} + +multiclass VPatIntegerSetCCSDNode_VV<CondCode cc, + string instruction_name, + bit swap = 0> { + foreach vti = AllIntegerVectors in { + defvar instruction = !cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX); + def : Pat<(vti.Mask (setcc (vti.Vector vti.RegClass:$rs1), + (vti.Vector vti.RegClass:$rs2), cc)), + SwapHelper<(instruction), + (instruction vti.RegClass:$rs1), + (instruction vti.RegClass:$rs2), + (instruction vti.AVL, vti.SEW), + swap>.Value>; + } +} + +multiclass VPatIntegerSetCCSDNode_XI<CondCode cc, + string instruction_name, + string kind, + ComplexPattern SplatPatKind, + DAGOperand xop_kind, + bit swap = 0> { + foreach vti = AllIntegerVectors in { + defvar instruction = !cast<Instruction>(instruction_name#_#kind#_#vti.LMul.MX); + def : Pat<(vti.Mask (setcc (vti.Vector vti.RegClass:$rs1), + (vti.Vector (SplatPatKind xop_kind:$rs2)), cc)), + SwapHelper<(instruction), + (instruction vti.RegClass:$rs1), + (instruction xop_kind:$rs2), + (instruction vti.AVL, vti.SEW), + swap>.Value>; + } +} + +multiclass VPatIntegerSetCCSDNode_VV_VX_VI<CondCode cc, + string instruction_name, + bit swap = 0> { + defm : VPatIntegerSetCCSDNode_VV<cc, instruction_name, swap>; + defm : VPatIntegerSetCCSDNode_XI<cc, instruction_name, "VX", + SplatPat, GPR, swap>; + defm : VPatIntegerSetCCSDNode_XI<cc, instruction_name, "VI", + SplatPat_simm5, simm5, swap>; +} + +multiclass VPatIntegerSetCCSDNode_VV_VX<CondCode cc, + string instruction_name, + bit swap = 0> { + defm : VPatIntegerSetCCSDNode_VV<cc, instruction_name, swap>; + defm : VPatIntegerSetCCSDNode_XI<cc, instruction_name, "VX", + SplatPat, GPR, swap>; +} + +multiclass VPatIntegerSetCCSDNode_VX_VI<CondCode cc, + string instruction_name, + bit swap = 0> { + defm : VPatIntegerSetCCSDNode_XI<cc, instruction_name, "VX", + SplatPat, GPR, swap>; + defm : VPatIntegerSetCCSDNode_XI<cc, instruction_name, "VI", + SplatPat_simm5, simm5, swap>; +} + +multiclass VPatFPSetCCSDNode_VV<CondCode cc, string instruction_name> { + foreach fvti = AllFloatVectors in + def : Pat<(fvti.Mask (setcc (fvti.Vector fvti.RegClass:$rs1), + (fvti.Vector fvti.RegClass:$rs2), + cc)), + (!cast<Instruction>(instruction_name#"_VV_"#fvti.LMul.MX) + fvti.RegClass:$rs1, fvti.RegClass:$rs2, fvti.AVL, fvti.SEW)>; +} + +multiclass VPatFPSetCCSDNode_VF<CondCode cc, string instruction_name> { + foreach fvti = AllFloatVectors in + def : Pat<(fvti.Mask (setcc (fvti.Vector fvti.RegClass:$rs1), + (fvti.Vector (splat_vector fvti.ScalarRegClass:$rs2)), + cc)), + (!cast<Instruction>(instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX) + fvti.RegClass:$rs1, + (fvti.Scalar fvti.ScalarRegClass:$rs2), + fvti.AVL, fvti.SEW)>; +} + +multiclass VPatFPSetCCSDNode_FV<CondCode cc, string swapped_op_instruction_name> { + foreach fvti = AllFloatVectors in + def : Pat<(fvti.Mask (setcc (fvti.Vector (splat_vector fvti.ScalarRegClass:$rs2)), + (fvti.Vector fvti.RegClass:$rs1), + cc)), + (!cast<Instruction>(swapped_op_instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX) + fvti.RegClass:$rs1, + (fvti.Scalar fvti.ScalarRegClass:$rs2), + fvti.AVL, fvti.SEW)>; +} + +multiclass VPatFPSetCCSDNode_VV_VF_FV<CondCode cc, + string inst_name, + string swapped_op_inst_name> { + defm : VPatFPSetCCSDNode_VV<cc, inst_name>; + defm : VPatFPSetCCSDNode_VF<cc, inst_name>; + defm : VPatFPSetCCSDNode_FV<cc, swapped_op_inst_name>; +} + +multiclass VPatExtendSDNode_V<list<SDNode> ops, string inst_name, string suffix, + list <VTypeInfoToFraction> fraction_list> { + foreach vtiTofti = fraction_list in { + defvar vti = vtiTofti.Vti; + defvar fti = vtiTofti.Fti; + foreach op = ops in + def : Pat<(vti.Vector (op (fti.Vector fti.RegClass:$rs2))), + (!cast<Instruction>(inst_name#"_"#suffix#"_"#vti.LMul.MX) + fti.RegClass:$rs2, fti.AVL, vti.SEW)>; + } +} + +//===----------------------------------------------------------------------===// +// Patterns. +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtV] in { + +// 7.4. Vector Unit-Stride Instructions +defm "" : VPatUSLoadStoreSDNodes<GPR>; +defm "" : VPatUSLoadStoreSDNodes<AddrFI>; + +// 12.1. Vector Single-Width Integer Add and Subtract +defm "" : VPatBinarySDNode_VV_VX_VI<add, "PseudoVADD">; +defm "" : VPatBinarySDNode_VV_VX<sub, "PseudoVSUB">; +// Handle VRSUB specially since it's the only integer binary op with reversed +// pattern operands +foreach vti = AllIntegerVectors in { + def : Pat<(sub (vti.Vector (SplatPat XLenVT:$rs2)), + (vti.Vector vti.RegClass:$rs1)), + (!cast<Instruction>("PseudoVRSUB_VX_"# vti.LMul.MX) + vti.RegClass:$rs1, GPR:$rs2, vti.AVL, vti.SEW)>; + def : Pat<(sub (vti.Vector (SplatPat_simm5 XLenVT:$rs2)), + (vti.Vector vti.RegClass:$rs1)), + (!cast<Instruction>("PseudoVRSUB_VI_"# vti.LMul.MX) + vti.RegClass:$rs1, simm5:$rs2, vti.AVL, vti.SEW)>; +} + +// 12.3. Vector Integer Extension +defm "" : VPatExtendSDNode_V<[zext, anyext], "PseudoVZEXT", "VF2", + AllFractionableVF2IntVectors>; +defm "" : VPatExtendSDNode_V<[sext], "PseudoVSEXT", "VF2", + AllFractionableVF2IntVectors>; +defm "" : VPatExtendSDNode_V<[zext, anyext], "PseudoVZEXT", "VF4", + AllFractionableVF4IntVectors>; +defm "" : VPatExtendSDNode_V<[sext], "PseudoVSEXT", "VF4", + AllFractionableVF4IntVectors>; +defm "" : VPatExtendSDNode_V<[zext, anyext], "PseudoVZEXT", "VF8", + AllFractionableVF8IntVectors>; +defm "" : VPatExtendSDNode_V<[sext], "PseudoVSEXT", "VF8", + AllFractionableVF8IntVectors>; + +// 12.5. Vector Bitwise Logical Instructions +defm "" : VPatBinarySDNode_VV_VX_VI<and, "PseudoVAND">; +defm "" : VPatBinarySDNode_VV_VX_VI<or, "PseudoVOR">; +defm "" : VPatBinarySDNode_VV_VX_VI<xor, "PseudoVXOR">; + +// 12.6. Vector Single-Width Bit Shift Instructions +defm "" : VPatBinarySDNode_VV_VX_VI<shl, "PseudoVSLL", uimm5>; +defm "" : VPatBinarySDNode_VV_VX_VI<srl, "PseudoVSRL", uimm5>; +defm "" : VPatBinarySDNode_VV_VX_VI<sra, "PseudoVSRA", uimm5>; + +// 12.7. Vector Narrowing Integer Right Shift Instructions +foreach vtiTofti = AllFractionableVF2IntVectors in { + defvar vti = vtiTofti.Vti; + defvar fti = vtiTofti.Fti; + def : Pat<(fti.Vector (riscv_trunc_vector (vti.Vector vti.RegClass:$rs1))), + (!cast<Instruction>("PseudoVNSRL_WI_"#fti.LMul.MX) + vti.RegClass:$rs1, 0, fti.AVL, fti.SEW)>; +} + +// 12.8. Vector Integer Comparison Instructions +defm "" : VPatIntegerSetCCSDNode_VV_VX_VI<SETEQ, "PseudoVMSEQ">; +defm "" : VPatIntegerSetCCSDNode_VV_VX_VI<SETNE, "PseudoVMSNE">; + +// FIXME: Support immediate forms of these by choosing SLE decrementing the +// immediate +defm "" : VPatIntegerSetCCSDNode_VV_VX<SETLT, "PseudoVMSLT">; +defm "" : VPatIntegerSetCCSDNode_VV_VX<SETULT, "PseudoVMSLTU">; + +defm "" : VPatIntegerSetCCSDNode_VV<SETGT, "PseudoVMSLT", /*swap*/1>; +defm "" : VPatIntegerSetCCSDNode_VV<SETUGT, "PseudoVMSLTU", /*swap*/1>; +defm "" : VPatIntegerSetCCSDNode_VX_VI<SETGT, "PseudoVMSGT">; +defm "" : VPatIntegerSetCCSDNode_VX_VI<SETUGT, "PseudoVMSGTU">; + +defm "" : VPatIntegerSetCCSDNode_VV_VX_VI<SETLE, "PseudoVMSLE">; +defm "" : VPatIntegerSetCCSDNode_VV_VX_VI<SETULE, "PseudoVMSLEU">; + +// FIXME: Support immediate forms of these by choosing SGT and decrementing the +// immediate +defm "" : VPatIntegerSetCCSDNode_VV<SETGE, "PseudoVMSLE", /*swap*/1>; +defm "" : VPatIntegerSetCCSDNode_VV<SETUGE, "PseudoVMSLEU", /*swap*/1>; + +// 12.9. Vector Integer Min/Max Instructions +defm "" : VPatBinarySDNode_VV_VX<umin, "PseudoVMINU">; +defm "" : VPatBinarySDNode_VV_VX<smin, "PseudoVMIN">; +defm "" : VPatBinarySDNode_VV_VX<umax, "PseudoVMAXU">; +defm "" : VPatBinarySDNode_VV_VX<smax, "PseudoVMAX">; + +// 12.10. Vector Single-Width Integer Multiply Instructions +defm "" : VPatBinarySDNode_VV_VX<mul, "PseudoVMUL">; +defm "" : VPatBinarySDNode_VV_VX<mulhs, "PseudoVMULH">; +defm "" : VPatBinarySDNode_VV_VX<mulhu, "PseudoVMULHU">; + +// 12.11. Vector Integer Divide Instructions +defm "" : VPatBinarySDNode_VV_VX<udiv, "PseudoVDIVU">; +defm "" : VPatBinarySDNode_VV_VX<sdiv, "PseudoVDIV">; +defm "" : VPatBinarySDNode_VV_VX<urem, "PseudoVREMU">; +defm "" : VPatBinarySDNode_VV_VX<srem, "PseudoVREM">; + +// 12.16. Vector Integer Merge Instructions +foreach vti = AllIntegerVectors in { + def : Pat<(vti.Vector (vselect (vti.Mask VMV0:$vm), vti.RegClass:$rs1, + vti.RegClass:$rs2)), + (!cast<Instruction>("PseudoVMERGE_VVM_"#vti.LMul.MX) + vti.RegClass:$rs2, vti.RegClass:$rs1, VMV0:$vm, + vti.AVL, vti.SEW)>; + + def : Pat<(vti.Vector (vselect (vti.Mask VMV0:$vm), (SplatPat XLenVT:$rs1), + vti.RegClass:$rs2)), + (!cast<Instruction>("PseudoVMERGE_VXM_"#vti.LMul.MX) + vti.RegClass:$rs2, GPR:$rs1, VMV0:$vm, vti.AVL, vti.SEW)>; + + def : Pat<(vti.Vector (vselect (vti.Mask VMV0:$vm), (SplatPat_simm5 simm5:$rs1), + vti.RegClass:$rs2)), + (!cast<Instruction>("PseudoVMERGE_VIM_"#vti.LMul.MX) + vti.RegClass:$rs2, simm5:$rs1, VMV0:$vm, vti.AVL, vti.SEW)>; +} + +// 16.1. Vector Mask-Register Logical Instructions +foreach mti = AllMasks in { + def : Pat<(mti.Mask (and VR:$rs1, VR:$rs2)), + (!cast<Instruction>("PseudoVMAND_MM_"#mti.LMul.MX) + VR:$rs1, VR:$rs2, mti.AVL, mti.SEW)>; + def : Pat<(mti.Mask (or VR:$rs1, VR:$rs2)), + (!cast<Instruction>("PseudoVMOR_MM_"#mti.LMul.MX) + VR:$rs1, VR:$rs2, mti.AVL, mti.SEW)>; + def : Pat<(mti.Mask (xor VR:$rs1, VR:$rs2)), + (!cast<Instruction>("PseudoVMXOR_MM_"#mti.LMul.MX) + VR:$rs1, VR:$rs2, mti.AVL, mti.SEW)>; + + def : Pat<(mti.Mask (vnot (and VR:$rs1, VR:$rs2))), + (!cast<Instruction>("PseudoVMNAND_MM_"#mti.LMul.MX) + VR:$rs1, VR:$rs2, mti.AVL, mti.SEW)>; + def : Pat<(mti.Mask (vnot (or VR:$rs1, VR:$rs2))), + (!cast<Instruction>("PseudoVMNOR_MM_"#mti.LMul.MX) + VR:$rs1, VR:$rs2, mti.AVL, mti.SEW)>; + def : Pat<(mti.Mask (vnot (xor VR:$rs1, VR:$rs2))), + (!cast<Instruction>("PseudoVMXNOR_MM_"#mti.LMul.MX) + VR:$rs1, VR:$rs2, mti.AVL, mti.SEW)>; + + def : Pat<(mti.Mask (and VR:$rs1, (vnot VR:$rs2))), + (!cast<Instruction>("PseudoVMANDNOT_MM_"#mti.LMul.MX) + VR:$rs1, VR:$rs2, mti.AVL, mti.SEW)>; + def : Pat<(mti.Mask (or VR:$rs1, (vnot VR:$rs2))), + (!cast<Instruction>("PseudoVMORNOT_MM_"#mti.LMul.MX) + VR:$rs1, VR:$rs2, mti.AVL, mti.SEW)>; +} + +} // Predicates = [HasStdExtV] + +let Predicates = [HasStdExtV, HasStdExtF] in { + +// 14.2. Vector Single-Width Floating-Point Add/Subtract Instructions +defm "" : VPatBinaryFPSDNode_VV_VF<fadd, "PseudoVFADD">; +defm "" : VPatBinaryFPSDNode_VV_VF<fsub, "PseudoVFSUB">; +defm "" : VPatBinaryFPSDNode_R_VF<fsub, "PseudoVFRSUB">; + +// 14.4. Vector Single-Width Floating-Point Multiply/Divide Instructions +defm "" : VPatBinaryFPSDNode_VV_VF<fmul, "PseudoVFMUL">; +defm "" : VPatBinaryFPSDNode_VV_VF<fdiv, "PseudoVFDIV">; +defm "" : VPatBinaryFPSDNode_R_VF<fdiv, "PseudoVFRDIV">; + +// 14.11. Vector Floating-Point Compare Instructions +defm "" : VPatFPSetCCSDNode_VV_VF_FV<SETEQ, "PseudoVMFEQ", "PseudoVMFEQ">; +defm "" : VPatFPSetCCSDNode_VV_VF_FV<SETOEQ, "PseudoVMFEQ", "PseudoVMFEQ">; + +defm "" : VPatFPSetCCSDNode_VV_VF_FV<SETNE, "PseudoVMFNE", "PseudoVMFNE">; +defm "" : VPatFPSetCCSDNode_VV_VF_FV<SETUNE, "PseudoVMFNE", "PseudoVMFNE">; + +defm "" : VPatFPSetCCSDNode_VV_VF_FV<SETLT, "PseudoVMFLT", "PseudoVMFGT">; +defm "" : VPatFPSetCCSDNode_VV_VF_FV<SETOLT, "PseudoVMFLT", "PseudoVMFGT">; + +defm "" : VPatFPSetCCSDNode_VV_VF_FV<SETLE, "PseudoVMFLE", "PseudoVMFGE">; +defm "" : VPatFPSetCCSDNode_VV_VF_FV<SETOLE, "PseudoVMFLE", "PseudoVMFGE">; + +// Floating-point vselects: +// 12.16. Vector Integer Merge Instructions +// 14.13. Vector Floating-Point Merge Instruction +foreach fvti = AllFloatVectors in { + def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm), fvti.RegClass:$rs1, + fvti.RegClass:$rs2)), + (!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX) + fvti.RegClass:$rs2, fvti.RegClass:$rs1, VMV0:$vm, + fvti.AVL, fvti.SEW)>; + + def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm), + (splat_vector fvti.ScalarRegClass:$rs1), + fvti.RegClass:$rs2)), + (!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX) + fvti.RegClass:$rs2, + (fvti.Scalar fvti.ScalarRegClass:$rs1), + VMV0:$vm, fvti.AVL, fvti.SEW)>; + + def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm), + (splat_vector (fvti.Scalar fpimm0)), + fvti.RegClass:$rs2)), + (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX) + fvti.RegClass:$rs2, 0, VMV0:$vm, fvti.AVL, fvti.SEW)>; +} +} // Predicates = [HasStdExtV, HasStdExtF] + +//===----------------------------------------------------------------------===// +// Vector Splats +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtV] in { +foreach vti = AllIntegerVectors in { + def : Pat<(vti.Vector (splat_vector GPR:$rs1)), + (!cast<Instruction>("PseudoVMV_V_X_" # vti.LMul.MX) + GPR:$rs1, vti.AVL, vti.SEW)>; + def : Pat<(vti.Vector (splat_vector simm5:$rs1)), + (!cast<Instruction>("PseudoVMV_V_I_" # vti.LMul.MX) + simm5:$rs1, vti.AVL, vti.SEW)>; +} + +foreach mti = AllMasks in { + def : Pat<(mti.Mask immAllOnesV), + (!cast<Instruction>("PseudoVMSET_M_"#mti.BX) mti.AVL, mti.SEW)>; + def : Pat<(mti.Mask immAllZerosV), + (!cast<Instruction>("PseudoVMCLR_M_"#mti.BX) mti.AVL, mti.SEW)>; +} +} // Predicates = [HasStdExtV] + +let Predicates = [HasStdExtV, IsRV32] in { +foreach vti = AllIntegerVectors in { + if !eq(vti.SEW, 64) then { + def : Pat<(vti.Vector (rv32_splat_i64 GPR:$rs1)), + (!cast<Instruction>("PseudoVMV_V_X_" # vti.LMul.MX) + GPR:$rs1, vti.AVL, vti.SEW)>; + def : Pat<(vti.Vector (rv32_splat_i64 simm5:$rs1)), + (!cast<Instruction>("PseudoVMV_V_I_" # vti.LMul.MX) + simm5:$rs1, vti.AVL, vti.SEW)>; + } +} +} // Predicates = [HasStdExtV, IsRV32] + +let Predicates = [HasStdExtV, HasStdExtF] in { +foreach fvti = AllFloatVectors in { + def : Pat<(fvti.Vector (splat_vector fvti.ScalarRegClass:$rs1)), + (!cast<Instruction>("PseudoVFMV_V_"#fvti.ScalarSuffix#"_"#fvti.LMul.MX) + (fvti.Scalar fvti.ScalarRegClass:$rs1), + fvti.AVL, fvti.SEW)>; + + def : Pat<(fvti.Vector (splat_vector (fvti.Scalar fpimm0))), + (!cast<Instruction>("PseudoVMV_V_I_"#fvti.LMul.MX) + 0, fvti.AVL, fvti.SEW)>; +} +} // Predicates = [HasStdExtV, HasStdExtF] + +//===----------------------------------------------------------------------===// +// Vector Element Inserts/Extracts +//===----------------------------------------------------------------------===// + +// The built-in TableGen 'extractelt' and 'insertelt' nodes must return the +// same type as the vector element type. On RISC-V, XLenVT is the only legal +// integer type, so for integer inserts/extracts we use a custom node which +// returns XLenVT. +def riscv_insert_vector_elt + : SDNode<"ISD::INSERT_VECTOR_ELT", + SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisVT<2, XLenVT>, + SDTCisPtrTy<3>]>, []>; +def riscv_extract_vector_elt + : SDNode<"ISD::EXTRACT_VECTOR_ELT", + SDTypeProfile<1, 2, [SDTCisVT<0, XLenVT>, SDTCisPtrTy<2>]>, []>; + +multiclass VPatInsertExtractElt_XI_Idx<bit IsFloat> { + defvar vtilist = !if(IsFloat, AllFloatVectors, AllIntegerVectors); + defvar insertelt_node = !if(IsFloat, insertelt, riscv_insert_vector_elt); + defvar extractelt_node = !if(IsFloat, extractelt, riscv_extract_vector_elt); + foreach vti = vtilist in { + defvar MX = vti.LMul.MX; + defvar vmv_xf_s_inst = !cast<Instruction>(!strconcat("PseudoV", + !if(IsFloat, "F", ""), + "MV_", + vti.ScalarSuffix, + "_S_", MX)); + defvar vmv_s_xf_inst = !cast<Instruction>(!strconcat("PseudoV", + !if(IsFloat, "F", ""), + "MV_S_", + vti.ScalarSuffix, + "_", MX)); + // Only pattern-match insert/extract-element operations where the index is + // 0. Any other index will have been custom-lowered to slide the vector + // correctly into place (and, in the case of insert, slide it back again + // afterwards). + def : Pat<(vti.Scalar (extractelt_node (vti.Vector vti.RegClass:$rs2), 0)), + (vmv_xf_s_inst vti.RegClass:$rs2, vti.SEW)>; + + def : Pat<(vti.Vector (insertelt_node (vti.Vector vti.RegClass:$merge), + vti.ScalarRegClass:$rs1, 0)), + (vmv_s_xf_inst vti.RegClass:$merge, + (vti.Scalar vti.ScalarRegClass:$rs1), + vti.AVL, vti.SEW)>; + } +} + +let Predicates = [HasStdExtV] in +defm "" : VPatInsertExtractElt_XI_Idx</*IsFloat*/0>; +let Predicates = [HasStdExtV, HasStdExtF] in +defm "" : VPatInsertExtractElt_XI_Idx</*IsFloat*/1>; + +//===----------------------------------------------------------------------===// +// Miscellaneous RISCVISD SDNodes +//===----------------------------------------------------------------------===// + +def riscv_vid + : SDNode<"RISCVISD::VID", SDTypeProfile<1, 0, [SDTCisVec<0>]>, []>; + +def SDTRVVSlide : SDTypeProfile<1, 3, [ + SDTCisVec<0>, SDTCisSameAs<1, 0>, SDTCisSameAs<2, 0>, SDTCisVT<3, XLenVT> +]>; + +def riscv_slideup : SDNode<"RISCVISD::VSLIDEUP", SDTRVVSlide, []>; +def riscv_slidedown : SDNode<"RISCVISD::VSLIDEDOWN", SDTRVVSlide, []>; + +let Predicates = [HasStdExtV] in { + +foreach vti = AllIntegerVectors in + def : Pat<(vti.Vector riscv_vid), + (!cast<Instruction>("PseudoVID_V_"#vti.LMul.MX) vti.AVL, vti.SEW)>; + +foreach vti = !listconcat(AllIntegerVectors, AllFloatVectors) in { + def : Pat<(vti.Vector (riscv_slideup (vti.Vector vti.RegClass:$rs3), + (vti.Vector vti.RegClass:$rs1), + uimm5:$rs2)), + (!cast<Instruction>("PseudoVSLIDEUP_VI_"#vti.LMul.MX) + vti.RegClass:$rs3, vti.RegClass:$rs1, uimm5:$rs2, + vti.AVL, vti.SEW)>; + + def : Pat<(vti.Vector (riscv_slideup (vti.Vector vti.RegClass:$rs3), + (vti.Vector vti.RegClass:$rs1), + GPR:$rs2)), + (!cast<Instruction>("PseudoVSLIDEUP_VX_"#vti.LMul.MX) + vti.RegClass:$rs3, vti.RegClass:$rs1, GPR:$rs2, + vti.AVL, vti.SEW)>; + + def : Pat<(vti.Vector (riscv_slidedown (vti.Vector vti.RegClass:$rs3), + (vti.Vector vti.RegClass:$rs1), + uimm5:$rs2)), + (!cast<Instruction>("PseudoVSLIDEDOWN_VI_"#vti.LMul.MX) + vti.RegClass:$rs3, vti.RegClass:$rs1, uimm5:$rs2, + vti.AVL, vti.SEW)>; + + def : Pat<(vti.Vector (riscv_slidedown (vti.Vector vti.RegClass:$rs3), + (vti.Vector vti.RegClass:$rs1), + GPR:$rs2)), + (!cast<Instruction>("PseudoVSLIDEDOWN_VX_"#vti.LMul.MX) + vti.RegClass:$rs3, vti.RegClass:$rs1, GPR:$rs2, + vti.AVL, vti.SEW)>; +} +} // Predicates = [HasStdExtV] diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td new file mode 100644 index 000000000000..85ebe054499e --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td @@ -0,0 +1,371 @@ +//===-- RISCVInstrInfoFH.td - RISC-V 'FH' instructions -----*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes the RISC-V instructions from the standard 'Zfh' +// half-precision floating-point extension, version 0.1. +// This version is still experimental as the 'Zfh' extension hasn't been +// ratified yet. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// RISC-V specific DAG Nodes. +//===----------------------------------------------------------------------===// + +def SDT_RISCVFMV_H_X + : SDTypeProfile<1, 1, [SDTCisVT<0, f16>, SDTCisVT<1, XLenVT>]>; +def SDT_RISCVFMV_X_ANYEXTH + : SDTypeProfile<1, 1, [SDTCisVT<0, XLenVT>, SDTCisVT<1, f16>]>; + +def riscv_fmv_h_x + : SDNode<"RISCVISD::FMV_H_X", SDT_RISCVFMV_H_X>; +def riscv_fmv_x_anyexth + : SDNode<"RISCVISD::FMV_X_ANYEXTH", SDT_RISCVFMV_X_ANYEXTH>; + +//===----------------------------------------------------------------------===// +// Instruction class templates +//===----------------------------------------------------------------------===// + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class FPFMAH_rrr_frm<RISCVOpcode opcode, string opcodestr> + : RVInstR4<0b10, opcode, (outs FPR16:$rd), + (ins FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, frmarg:$funct3), + opcodestr, "$rd, $rs1, $rs2, $rs3, $funct3">; + +class FPFMAHDynFrmAlias<FPFMAH_rrr_frm Inst, string OpcodeStr> + : InstAlias<OpcodeStr#" $rd, $rs1, $rs2, $rs3", + (Inst FPR16:$rd, FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, 0b111)>; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class FPALUH_rr<bits<7> funct7, bits<3> funct3, string opcodestr> + : RVInstR<funct7, funct3, OPC_OP_FP, (outs FPR16:$rd), + (ins FPR16:$rs1, FPR16:$rs2), opcodestr, "$rd, $rs1, $rs2">; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class FPALUH_rr_frm<bits<7> funct7, string opcodestr> + : RVInstRFrm<funct7, OPC_OP_FP, (outs FPR16:$rd), + (ins FPR16:$rs1, FPR16:$rs2, frmarg:$funct3), opcodestr, + "$rd, $rs1, $rs2, $funct3">; + +class FPALUHDynFrmAlias<FPALUH_rr_frm Inst, string OpcodeStr> + : InstAlias<OpcodeStr#" $rd, $rs1, $rs2", + (Inst FPR16:$rd, FPR16:$rs1, FPR16:$rs2, 0b111)>; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class FPCmpH_rr<bits<3> funct3, string opcodestr> + : RVInstR<0b1010010, funct3, OPC_OP_FP, (outs GPR:$rd), + (ins FPR16:$rs1, FPR16:$rs2), opcodestr, "$rd, $rs1, $rs2">, + Sched<[]>; + +//===----------------------------------------------------------------------===// +// Instructions +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtZfh] in { +let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in +def FLH : RVInstI<0b001, OPC_LOAD_FP, (outs FPR16:$rd), + (ins GPR:$rs1, simm12:$imm12), + "flh", "$rd, ${imm12}(${rs1})">, + Sched<[]>; + +// Operands for stores are in the order srcreg, base, offset rather than +// reflecting the order these fields are specified in the instruction +// encoding. +let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in +def FSH : RVInstS<0b001, OPC_STORE_FP, (outs), + (ins FPR16:$rs2, GPR:$rs1, simm12:$imm12), + "fsh", "$rs2, ${imm12}(${rs1})">, + Sched<[]>; + +def FMADD_H : FPFMAH_rrr_frm<OPC_MADD, "fmadd.h">, + Sched<[]>; +def : FPFMAHDynFrmAlias<FMADD_H, "fmadd.h">; +def FMSUB_H : FPFMAH_rrr_frm<OPC_MSUB, "fmsub.h">, + Sched<[]>; +def : FPFMAHDynFrmAlias<FMSUB_H, "fmsub.h">; +def FNMSUB_H : FPFMAH_rrr_frm<OPC_NMSUB, "fnmsub.h">, + Sched<[]>; +def : FPFMAHDynFrmAlias<FNMSUB_H, "fnmsub.h">; +def FNMADD_H : FPFMAH_rrr_frm<OPC_NMADD, "fnmadd.h">, + Sched<[]>; +def : FPFMAHDynFrmAlias<FNMADD_H, "fnmadd.h">; + +def FADD_H : FPALUH_rr_frm<0b0000010, "fadd.h">, + Sched<[]>; +def : FPALUHDynFrmAlias<FADD_H, "fadd.h">; +def FSUB_H : FPALUH_rr_frm<0b0000110, "fsub.h">, + Sched<[]>; +def : FPALUHDynFrmAlias<FSUB_H, "fsub.h">; +def FMUL_H : FPALUH_rr_frm<0b0001010, "fmul.h">, + Sched<[]>; +def : FPALUHDynFrmAlias<FMUL_H, "fmul.h">; +def FDIV_H : FPALUH_rr_frm<0b0001110, "fdiv.h">, + Sched<[]>; +def : FPALUHDynFrmAlias<FDIV_H, "fdiv.h">; + +def FSQRT_H : FPUnaryOp_r_frm<0b0101110, FPR16, FPR16, "fsqrt.h">, + Sched<[]> { + let rs2 = 0b00000; +} +def : FPUnaryOpDynFrmAlias<FSQRT_H, "fsqrt.h", FPR16, FPR16>; + +def FSGNJ_H : FPALUH_rr<0b0010010, 0b000, "fsgnj.h">, + Sched<[]>; +def FSGNJN_H : FPALUH_rr<0b0010010, 0b001, "fsgnjn.h">, + Sched<[]>; +def FSGNJX_H : FPALUH_rr<0b0010010, 0b010, "fsgnjx.h">, + Sched<[]>; + +def FMIN_H : FPALUH_rr<0b0010110, 0b000, "fmin.h">, + Sched<[]>; +def FMAX_H : FPALUH_rr<0b0010110, 0b001, "fmax.h">, + Sched<[]>; + +def FCVT_W_H : FPUnaryOp_r_frm<0b1100010, GPR, FPR16, "fcvt.w.h">, + Sched<[]> { + let rs2 = 0b00000; +} +def : FPUnaryOpDynFrmAlias<FCVT_W_H, "fcvt.w.h", GPR, FPR16>; + +def FCVT_WU_H : FPUnaryOp_r_frm<0b1100010, GPR, FPR16, "fcvt.wu.h">, + Sched<[]> { + let rs2 = 0b00001; +} +def : FPUnaryOpDynFrmAlias<FCVT_WU_H, "fcvt.wu.h", GPR, FPR16>; + +def FCVT_H_W : FPUnaryOp_r_frm<0b1101010, FPR16, GPR, "fcvt.h.w">, + Sched<[]> { + let rs2 = 0b00000; +} +def : FPUnaryOpDynFrmAlias<FCVT_H_W, "fcvt.h.w", FPR16, GPR>; + +def FCVT_H_WU : FPUnaryOp_r_frm<0b1101010, FPR16, GPR, "fcvt.h.wu">, + Sched<[]> { + let rs2 = 0b00001; +} +def : FPUnaryOpDynFrmAlias<FCVT_H_WU, "fcvt.h.wu", FPR16, GPR>; + +def FCVT_H_S : FPUnaryOp_r_frm<0b0100010, FPR16, FPR32, "fcvt.h.s">, + Sched<[]> { + let rs2 = 0b00000; +} +def : FPUnaryOpDynFrmAlias<FCVT_H_S, "fcvt.h.s", FPR16, FPR32>; + +def FCVT_S_H : FPUnaryOp_r<0b0100000, 0b000, FPR32, FPR16, "fcvt.s.h">, + Sched<[]> { + let rs2 = 0b00010; +} + +def FMV_X_H : FPUnaryOp_r<0b1110010, 0b000, GPR, FPR16, "fmv.x.h">, + Sched<[]> { + let rs2 = 0b00000; +} + +def FMV_H_X : FPUnaryOp_r<0b1111010, 0b000, FPR16, GPR, "fmv.h.x">, + Sched<[]> { + let rs2 = 0b00000; +} + +def FEQ_H : FPCmpH_rr<0b010, "feq.h">; +def FLT_H : FPCmpH_rr<0b001, "flt.h">; +def FLE_H : FPCmpH_rr<0b000, "fle.h">; + +def FCLASS_H : FPUnaryOp_r<0b1110010, 0b001, GPR, FPR16, "fclass.h">, + Sched<[]> { + let rs2 = 0b00000; +} +} // Predicates = [HasStdExtZfh] + +let Predicates = [HasStdExtZfh, IsRV64] in { +def FCVT_L_H : FPUnaryOp_r_frm<0b1100010, GPR, FPR16, "fcvt.l.h">, + Sched<[]> { + let rs2 = 0b00010; +} +def : FPUnaryOpDynFrmAlias<FCVT_L_H, "fcvt.l.h", GPR, FPR16>; + +def FCVT_LU_H : FPUnaryOp_r_frm<0b1100010, GPR, FPR16, "fcvt.lu.h">, + Sched<[]> { + let rs2 = 0b00011; +} +def : FPUnaryOpDynFrmAlias<FCVT_LU_H, "fcvt.lu.h", GPR, FPR16>; + +def FCVT_H_L : FPUnaryOp_r_frm<0b1101010, FPR16, GPR, "fcvt.h.l">, + Sched<[]> { + let rs2 = 0b00010; +} +def : FPUnaryOpDynFrmAlias<FCVT_H_L, "fcvt.h.l", FPR16, GPR>; + +def FCVT_H_LU : FPUnaryOp_r_frm<0b1101010, FPR16, GPR, "fcvt.h.lu">, + Sched<[]> { + let rs2 = 0b00011; +} +def : FPUnaryOpDynFrmAlias<FCVT_H_LU, "fcvt.h.lu", FPR16, GPR>; +} // Predicates = [HasStdExtZfh, IsRV64] + +let Predicates = [HasStdExtZfh, HasStdExtD] in { +def FCVT_H_D : FPUnaryOp_r_frm<0b0100010, FPR16, FPR64, "fcvt.h.d">, + Sched<[]> { + let rs2 = 0b00001; +} +def : FPUnaryOpDynFrmAlias<FCVT_H_D, "fcvt.h.d", FPR16, FPR64>; + +def FCVT_D_H : FPUnaryOp_r<0b0100001, 0b000, FPR64, FPR16, "fcvt.d.h">, + Sched<[]> { + let rs2 = 0b00010; +} +} // Predicates = [HasStdExtZfh, HasStdExtD] + +//===----------------------------------------------------------------------===// +// Assembler Pseudo Instructions (User-Level ISA, Version 2.2, Chapter 20) +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtZfh] in { +def : InstAlias<"flh $rd, (${rs1})", (FLH FPR16:$rd, GPR:$rs1, 0), 0>; +def : InstAlias<"fsh $rs2, (${rs1})", (FSH FPR16:$rs2, GPR:$rs1, 0), 0>; + +def : InstAlias<"fmv.h $rd, $rs", (FSGNJ_H FPR16:$rd, FPR16:$rs, FPR16:$rs)>; +def : InstAlias<"fabs.h $rd, $rs", (FSGNJX_H FPR16:$rd, FPR16:$rs, FPR16:$rs)>; +def : InstAlias<"fneg.h $rd, $rs", (FSGNJN_H FPR16:$rd, FPR16:$rs, FPR16:$rs)>; + +// fgt.h/fge.h are recognised by the GNU assembler but the canonical +// flt.h/fle.h forms will always be printed. Therefore, set a zero weight. +def : InstAlias<"fgt.h $rd, $rs, $rt", + (FLT_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>; +def : InstAlias<"fge.h $rd, $rs, $rt", + (FLE_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>; + +def PseudoFLH : PseudoFloatLoad<"flh", FPR16>; +def PseudoFSH : PseudoStore<"fsh", FPR16>; +} // Predicates = [HasStdExtZfh] + +//===----------------------------------------------------------------------===// +// Pseudo-instructions and codegen patterns +//===----------------------------------------------------------------------===// + +/// Generic pattern classes +class PatFpr16Fpr16<SDPatternOperator OpNode, RVInstR Inst> + : Pat<(OpNode FPR16:$rs1, FPR16:$rs2), (Inst $rs1, $rs2)>; + +class PatFpr16Fpr16DynFrm<SDPatternOperator OpNode, RVInstRFrm Inst> + : Pat<(OpNode FPR16:$rs1, FPR16:$rs2), (Inst $rs1, $rs2, 0b111)>; + +let Predicates = [HasStdExtZfh] in { + +/// Float constants +def : Pat<(f16 (fpimm0)), (FMV_H_X X0)>; + +/// Float conversion operations + +// [u]int32<->float conversion patterns must be gated on IsRV32 or IsRV64, so +// are defined later. + +/// Float arithmetic operations + +def : PatFpr16Fpr16DynFrm<fadd, FADD_H>; +def : PatFpr16Fpr16DynFrm<fsub, FSUB_H>; +def : PatFpr16Fpr16DynFrm<fmul, FMUL_H>; +def : PatFpr16Fpr16DynFrm<fdiv, FDIV_H>; + +def : Pat<(fsqrt FPR16:$rs1), (FSQRT_H FPR16:$rs1, 0b111)>; + +def : Pat<(fneg FPR16:$rs1), (FSGNJN_H $rs1, $rs1)>; +def : Pat<(fabs FPR16:$rs1), (FSGNJX_H $rs1, $rs1)>; + +def : PatFpr16Fpr16<fcopysign, FSGNJ_H>; +def : Pat<(fcopysign FPR16:$rs1, (fneg FPR16:$rs2)), (FSGNJN_H $rs1, $rs2)>; +def : Pat<(fcopysign FPR16:$rs1, FPR32:$rs2), + (FSGNJ_H $rs1, (FCVT_H_S $rs2, 0b111))>; +def : Pat<(fcopysign FPR16:$rs1, FPR64:$rs2), + (FSGNJ_H $rs1, (FCVT_H_D $rs2, 0b111))>; +def : Pat<(fcopysign FPR32:$rs1, FPR16:$rs2), (FSGNJ_S $rs1, (FCVT_S_H $rs2))>; +def : Pat<(fcopysign FPR64:$rs1, FPR16:$rs2), (FSGNJ_D $rs1, (FCVT_D_H $rs2))>; + +// fmadd: rs1 * rs2 + rs3 +def : Pat<(fma FPR16:$rs1, FPR16:$rs2, FPR16:$rs3), + (FMADD_H $rs1, $rs2, $rs3, 0b111)>; + +// fmsub: rs1 * rs2 - rs3 +def : Pat<(fma FPR16:$rs1, FPR16:$rs2, (fneg FPR16:$rs3)), + (FMSUB_H FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, 0b111)>; + +// fnmsub: -rs1 * rs2 + rs3 +def : Pat<(fma (fneg FPR16:$rs1), FPR16:$rs2, FPR16:$rs3), + (FNMSUB_H FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, 0b111)>; + +// fnmadd: -rs1 * rs2 - rs3 +def : Pat<(fma (fneg FPR16:$rs1), FPR16:$rs2, (fneg FPR16:$rs3)), + (FNMADD_H FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, 0b111)>; + +def : PatFpr16Fpr16<fminnum, FMIN_H>; +def : PatFpr16Fpr16<fmaxnum, FMAX_H>; + +/// Setcc + +def : PatFpr16Fpr16<seteq, FEQ_H>; +def : PatFpr16Fpr16<setoeq, FEQ_H>; +def : PatFpr16Fpr16<setlt, FLT_H>; +def : PatFpr16Fpr16<setolt, FLT_H>; +def : PatFpr16Fpr16<setle, FLE_H>; +def : PatFpr16Fpr16<setole, FLE_H>; + +def Select_FPR16_Using_CC_GPR : SelectCC_rrirr<FPR16, GPR>; + +/// Loads + +defm : LdPat<load, FLH>; + +/// Stores + +defm : StPat<store, FSH, FPR16>; + +/// Float conversion operations + +// f32 -> f16, f16 -> f32 +def : Pat<(fpround FPR32:$rs1), (FCVT_H_S FPR32:$rs1, 0b111)>; +def : Pat<(fpextend FPR16:$rs1), (FCVT_S_H FPR16:$rs1)>; + +// Moves (no conversion) +def : Pat<(riscv_fmv_h_x GPR:$src), (FMV_H_X GPR:$src)>; +def : Pat<(riscv_fmv_x_anyexth FPR16:$src), (FMV_X_H FPR16:$src)>; +} // Predicates = [HasStdExtZfh] + +let Predicates = [HasStdExtZfh, IsRV32] in { +// float->[u]int. Round-to-zero must be used. +def : Pat<(fp_to_sint FPR16:$rs1), (FCVT_W_H $rs1, 0b001)>; +def : Pat<(fp_to_uint FPR16:$rs1), (FCVT_WU_H $rs1, 0b001)>; + +// [u]int->float. Match GCC and default to using dynamic rounding mode. +def : Pat<(sint_to_fp GPR:$rs1), (FCVT_H_W $rs1, 0b111)>; +def : Pat<(uint_to_fp GPR:$rs1), (FCVT_H_WU $rs1, 0b111)>; +} // Predicates = [HasStdExtZfh, IsRV32] + +let Predicates = [HasStdExtZfh, IsRV64] in { +// FP->[u]int32 is mostly handled by the FP->[u]int64 patterns. This is safe +// because fpto[u|s]i produces poison if the value can't fit into the target. +// We match the single case below because fcvt.wu.s sign-extends its result so +// is cheaper than fcvt.lu.h+sext.w. +def : Pat<(sext_inreg (assertzexti32 (fp_to_uint FPR16:$rs1)), i32), + (FCVT_WU_H $rs1, 0b001)>; + +// FP->[u]int64 +def : Pat<(fp_to_sint FPR16:$rs1), (FCVT_L_H $rs1, 0b001)>; +def : Pat<(fp_to_uint FPR16:$rs1), (FCVT_LU_H $rs1, 0b001)>; + +// [u]int->fp. Match GCC and default to using dynamic rounding mode. +def : Pat<(sint_to_fp (sexti32 GPR:$rs1)), (FCVT_H_W $rs1, 0b111)>; +def : Pat<(uint_to_fp (zexti32 GPR:$rs1)), (FCVT_H_WU $rs1, 0b111)>; +def : Pat<(sint_to_fp GPR:$rs1), (FCVT_H_L $rs1, 0b111)>; +def : Pat<(uint_to_fp GPR:$rs1), (FCVT_H_LU $rs1, 0b111)>; +} // Predicates = [HasStdExtZfh, IsRV64] + +let Predicates = [HasStdExtZfh, HasStdExtD] in { +/// Float conversion operations +// f64 -> f16, f16 -> f64 +def : Pat<(fpround FPR64:$rs1), (FCVT_H_D FPR64:$rs1, 0b111)>; +def : Pat<(fpextend FPR16:$rs1), (FCVT_D_H FPR16:$rs1)>; +} diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp index b1dbcfa7f738..3c38dd1bf64d 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "RISCV.h" +#include "RISCVSubtarget.h" #include "MCTargetDesc/RISCVMCExpr.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -121,12 +122,93 @@ bool llvm::LowerRISCVMachineOperandToMCOperand(const MachineOperand &MO, case MachineOperand::MO_ConstantPoolIndex: MCOp = lowerSymbolOperand(MO, AP.GetCPISymbol(MO.getIndex()), AP); break; + case MachineOperand::MO_JumpTableIndex: + MCOp = lowerSymbolOperand(MO, AP.GetJTISymbol(MO.getIndex()), AP); + break; + } + return true; +} + +static bool lowerRISCVVMachineInstrToMCInst(const MachineInstr *MI, + MCInst &OutMI) { + const RISCVVPseudosTable::PseudoInfo *RVV = + RISCVVPseudosTable::getPseudoInfo(MI->getOpcode()); + if (!RVV) + return false; + + OutMI.setOpcode(RVV->BaseInstr); + + const MachineBasicBlock *MBB = MI->getParent(); + assert(MBB && "MI expected to be in a basic block"); + const MachineFunction *MF = MBB->getParent(); + assert(MF && "MBB expected to be in a machine function"); + + const TargetRegisterInfo *TRI = + MF->getSubtarget<RISCVSubtarget>().getRegisterInfo(); + assert(TRI && "TargetRegisterInfo expected"); + + uint64_t TSFlags = MI->getDesc().TSFlags; + int NumOps = MI->getNumExplicitOperands(); + + for (const MachineOperand &MO : MI->explicit_operands()) { + int OpNo = (int)MI->getOperandNo(&MO); + assert(OpNo >= 0 && "Operand number doesn't fit in an 'int' type"); + + // Skip VL and SEW operands which are the last two operands if present. + if ((TSFlags & RISCVII::HasVLOpMask) && OpNo == (NumOps - 2)) + continue; + if ((TSFlags & RISCVII::HasSEWOpMask) && OpNo == (NumOps - 1)) + continue; + + // Skip merge op. It should be the first operand after the result. + if ((TSFlags & RISCVII::HasMergeOpMask) && OpNo == 1) { + assert(MI->getNumExplicitDefs() == 1); + continue; + } + + MCOperand MCOp; + switch (MO.getType()) { + default: + llvm_unreachable("Unknown operand type"); + case MachineOperand::MO_Register: { + unsigned Reg = MO.getReg(); + + if (RISCV::VRM2RegClass.contains(Reg) || + RISCV::VRM4RegClass.contains(Reg) || + RISCV::VRM8RegClass.contains(Reg)) { + Reg = TRI->getSubReg(Reg, RISCV::sub_vrm1_0); + assert(Reg && "Subregister does not exist"); + } else if (RISCV::FPR16RegClass.contains(Reg)) { + Reg = TRI->getMatchingSuperReg(Reg, RISCV::sub_16, &RISCV::FPR32RegClass); + assert(Reg && "Subregister does not exist"); + } else if (RISCV::FPR64RegClass.contains(Reg)) { + Reg = TRI->getSubReg(Reg, RISCV::sub_32); + assert(Reg && "Superregister does not exist"); + } + + MCOp = MCOperand::createReg(Reg); + break; + } + case MachineOperand::MO_Immediate: + MCOp = MCOperand::createImm(MO.getImm()); + break; + } + OutMI.addOperand(MCOp); } + + // Unmasked pseudo instructions need to append dummy mask operand to + // V instructions. All V instructions are modeled as the masked version. + if (TSFlags & RISCVII::HasDummyMaskOpMask) + OutMI.addOperand(MCOperand::createReg(RISCV::NoRegister)); + return true; } void llvm::LowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, const AsmPrinter &AP) { + if (lowerRISCVVMachineInstrToMCInst(MI, OutMI)) + return; + OutMI.setOpcode(MI->getOpcode()); for (const MachineOperand &MO : MI->operands()) { @@ -134,4 +216,20 @@ void llvm::LowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, if (LowerRISCVMachineOperandToMCOperand(MO, MCOp, AP)) OutMI.addOperand(MCOp); } + + if (OutMI.getOpcode() == RISCV::PseudoReadVLENB) { + OutMI.setOpcode(RISCV::CSRRS); + OutMI.addOperand(MCOperand::createImm( + RISCVSysReg::lookupSysRegByName("VLENB")->Encoding)); + OutMI.addOperand(MCOperand::createReg(RISCV::X0)); + return; + } + + if (OutMI.getOpcode() == RISCV::PseudoReadVL) { + OutMI.setOpcode(RISCV::CSRRS); + OutMI.addOperand(MCOperand::createImm( + RISCVSysReg::lookupSysRegByName("VL")->Encoding)); + OutMI.addOperand(MCOperand::createReg(RISCV::X0)); + return; + } } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp index 4c9013aa1e23..87586023caa4 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp @@ -64,7 +64,7 @@ private: } // end anonymous namespace char RISCVMergeBaseOffsetOpt::ID = 0; -INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, "riscv-merge-base-offset", +INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, DEBUG_TYPE, RISCV_MERGE_BASE_OFFSET_NAME, false, false) // Detect the pattern: @@ -216,12 +216,14 @@ bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &HiLUI, case RISCV::LHU: case RISCV::LWU: case RISCV::LD: + case RISCV::FLH: case RISCV::FLW: case RISCV::FLD: case RISCV::SB: case RISCV::SH: case RISCV::SW: case RISCV::SD: + case RISCV::FSH: case RISCV::FSW: case RISCV::FSD: { // Transforms the sequence: Into: diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index cb7d55eb0f0c..631077ef83f5 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -29,6 +29,9 @@ using namespace llvm; static_assert(RISCV::X1 == RISCV::X0 + 1, "Register list not consecutive"); static_assert(RISCV::X31 == RISCV::X0 + 31, "Register list not consecutive"); +static_assert(RISCV::F1_H == RISCV::F0_H + 1, "Register list not consecutive"); +static_assert(RISCV::F31_H == RISCV::F0_H + 31, + "Register list not consecutive"); static_assert(RISCV::F1_F == RISCV::F0_F + 1, "Register list not consecutive"); static_assert(RISCV::F31_F == RISCV::F0_F + 31, "Register list not consecutive"); @@ -45,6 +48,8 @@ RISCVRegisterInfo::RISCVRegisterInfo(unsigned HwMode) const MCPhysReg * RISCVRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { auto &Subtarget = MF->getSubtarget<RISCVSubtarget>(); + if (MF->getFunction().getCallingConv() == CallingConv::GHC) + return CSR_NoRegs_SaveList; if (MF->getFunction().hasFnAttribute("interrupt")) { if (Subtarget.hasStdExtD()) return CSR_XLEN_F64_Interrupt_SaveList; @@ -89,6 +94,13 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const { // variable-sized objects at runtime. if (TFI->hasBP(MF)) markSuperRegs(Reserved, RISCVABI::getBPReg()); // bp + + // V registers for code generation. We handle them manually. + markSuperRegs(Reserved, RISCV::VL); + markSuperRegs(Reserved, RISCV::VTYPE); + markSuperRegs(Reserved, RISCV::VXSAT); + markSuperRegs(Reserved, RISCV::VXRM); + assert(checkAllSuperRegsMarked(Reserved)); return Reserved; } @@ -152,9 +164,10 @@ void RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); Register FrameReg; - int Offset = - getFrameLowering(MF)->getFrameIndexReference(MF, FrameIndex, FrameReg) + - MI.getOperand(FIOperandNum + 1).getImm(); + int Offset = getFrameLowering(MF) + ->getFrameIndexReference(MF, FrameIndex, FrameReg) + .getFixed() + + MI.getOperand(FIOperandNum + 1).getImm(); if (!isInt<32>(Offset)) { report_fatal_error( @@ -190,9 +203,11 @@ Register RISCVRegisterInfo::getFrameRegister(const MachineFunction &MF) const { const uint32_t * RISCVRegisterInfo::getCallPreservedMask(const MachineFunction & MF, - CallingConv::ID /*CC*/) const { + CallingConv::ID CC) const { auto &Subtarget = MF.getSubtarget<RISCVSubtarget>(); + if (CC == CallingConv::GHC) + return CSR_NoRegs_RegMask; switch (Subtarget.getTargetABI()) { default: llvm_unreachable("Unrecognized ABI"); diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td index 7544b4b3b845..e1a11fd9389f 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -16,14 +16,23 @@ class RISCVReg<bits<5> Enc, string n, list<string> alt = []> : Register<n> { let AltNames = alt; } -class RISCVReg32<bits<5> Enc, string n, list<string> alt = []> : Register<n> { +class RISCVReg16<bits<5> Enc, string n, list<string> alt = []> : Register<n> { let HWEncoding{4-0} = Enc; let AltNames = alt; } +def sub_16 : SubRegIndex<16>; +class RISCVReg32<RISCVReg16 subreg> : Register<""> { + let HWEncoding{4-0} = subreg.HWEncoding{4-0}; + let SubRegs = [subreg]; + let SubRegIndices = [sub_16]; + let AsmName = subreg.AsmName; + let AltNames = subreg.AltNames; +} + // Because RISCVReg64 register have AsmName and AltNames that alias with their -// 32-bit sub-register, RISCVAsmParser will need to coerce a register number -// from a RISCVReg32 to the equivalent RISCVReg64 when appropriate. +// 16/32-bit sub-register, RISCVAsmParser will need to coerce a register number +// from a RISCVReg16/RISCVReg32 to the equivalent RISCVReg64 when appropriate. def sub_32 : SubRegIndex<32>; class RISCVReg64<RISCVReg32 subreg> : Register<""> { let HWEncoding{4-0} = subreg.HWEncoding{4-0}; @@ -42,12 +51,21 @@ class RISCVRegWithSubRegs<bits<5> Enc, string n, list<Register> subregs, def ABIRegAltName : RegAltNameIndex; -def sub_vrm2 : SubRegIndex<64, -1>; -def sub_vrm2_hi : SubRegIndex<64, -1>; -def sub_vrm4 : SubRegIndex<128, -1>; -def sub_vrm4_hi : SubRegIndex<128, -1>; -def sub_vrm8 : SubRegIndex<256, -1>; -def sub_vrm8_hi : SubRegIndex<256, -1>; +def sub_vrm1_0 : SubRegIndex<64, -1>; +def sub_vrm1_1 : SubRegIndex<64, -1>; +def sub_vrm1_2 : SubRegIndex<64, -1>; +def sub_vrm1_3 : SubRegIndex<64, -1>; +def sub_vrm1_4 : SubRegIndex<64, -1>; +def sub_vrm1_5 : SubRegIndex<64, -1>; +def sub_vrm1_6 : SubRegIndex<64, -1>; +def sub_vrm1_7 : SubRegIndex<64, -1>; +def sub_vrm2_0 : SubRegIndex<128, -1>; +def sub_vrm2_1 : SubRegIndex<128, -1>; +def sub_vrm2_2 : SubRegIndex<128, -1>; +def sub_vrm2_3 : SubRegIndex<128, -1>; +def sub_vrm4_0 : SubRegIndex<256, -1>; +def sub_vrm4_1 : SubRegIndex<256, -1>; + } // Namespace = "RISCV" // Integer registers @@ -97,8 +115,8 @@ let RegAltNameIndices = [ABIRegAltName] in { } } -def XLenVT : ValueTypeByHwMode<[RV32, RV64, DefaultMode], - [i32, i64, i32]>; +def XLenVT : ValueTypeByHwMode<[RV32, RV64], + [i32, i64]>; // The order of registers represents the preferred allocation sequence. // Registers are listed in the order caller-save, callee-save, specials. @@ -111,14 +129,14 @@ def GPR : RegisterClass<"RISCV", [XLenVT], 32, (add (sequence "X%u", 0, 4) )> { let RegInfos = RegInfoByHwMode< - [RV32, RV64, DefaultMode], - [RegInfo<32,32,32>, RegInfo<64,64,64>, RegInfo<32,32,32>]>; + [RV32, RV64], + [RegInfo<32,32,32>, RegInfo<64,64,64>]>; } def GPRX0 : RegisterClass<"RISCV", [XLenVT], 32, (add X0)> { let RegInfos = RegInfoByHwMode< - [RV32, RV64, DefaultMode], - [RegInfo<32,32,32>, RegInfo<64,64,64>, RegInfo<32,32,32>]>; + [RV32, RV64], + [RegInfo<32,32,32>, RegInfo<64,64,64>]>; } // The order of registers represents the preferred allocation sequence. @@ -132,8 +150,8 @@ def GPRNoX0 : RegisterClass<"RISCV", [XLenVT], 32, (add (sequence "X%u", 1, 4) )> { let RegInfos = RegInfoByHwMode< - [RV32, RV64, DefaultMode], - [RegInfo<32,32,32>, RegInfo<64,64,64>, RegInfo<32,32,32>]>; + [RV32, RV64], + [RegInfo<32,32,32>, RegInfo<64,64,64>]>; } def GPRNoX0X2 : RegisterClass<"RISCV", [XLenVT], 32, (add @@ -145,8 +163,8 @@ def GPRNoX0X2 : RegisterClass<"RISCV", [XLenVT], 32, (add X1, X3, X4 )> { let RegInfos = RegInfoByHwMode< - [RV32, RV64, DefaultMode], - [RegInfo<32,32,32>, RegInfo<64,64,64>, RegInfo<32,32,32>]>; + [RV32, RV64], + [RegInfo<32,32,32>, RegInfo<64,64,64>]>; } def GPRC : RegisterClass<"RISCV", [XLenVT], 32, (add @@ -154,8 +172,8 @@ def GPRC : RegisterClass<"RISCV", [XLenVT], 32, (add (sequence "X%u", 8, 9) )> { let RegInfos = RegInfoByHwMode< - [RV32, RV64, DefaultMode], - [RegInfo<32,32,32>, RegInfo<64,64,64>, RegInfo<32,32,32>]>; + [RV32, RV64], + [RegInfo<32,32,32>, RegInfo<64,64,64>]>; } // For indirect tail calls, we can't use callee-saved registers, as they are @@ -167,50 +185,55 @@ def GPRTC : RegisterClass<"RISCV", [XLenVT], 32, (add (sequence "X%u", 28, 31) )> { let RegInfos = RegInfoByHwMode< - [RV32, RV64, DefaultMode], - [RegInfo<32,32,32>, RegInfo<64,64,64>, RegInfo<32,32,32>]>; + [RV32, RV64], + [RegInfo<32,32,32>, RegInfo<64,64,64>]>; } def SP : RegisterClass<"RISCV", [XLenVT], 32, (add X2)> { let RegInfos = RegInfoByHwMode< - [RV32, RV64, DefaultMode], - [RegInfo<32,32,32>, RegInfo<64,64,64>, RegInfo<32,32,32>]>; + [RV32, RV64], + [RegInfo<32,32,32>, RegInfo<64,64,64>]>; } // Floating point registers let RegAltNameIndices = [ABIRegAltName] in { - def F0_F : RISCVReg32<0, "f0", ["ft0"]>, DwarfRegNum<[32]>; - def F1_F : RISCVReg32<1, "f1", ["ft1"]>, DwarfRegNum<[33]>; - def F2_F : RISCVReg32<2, "f2", ["ft2"]>, DwarfRegNum<[34]>; - def F3_F : RISCVReg32<3, "f3", ["ft3"]>, DwarfRegNum<[35]>; - def F4_F : RISCVReg32<4, "f4", ["ft4"]>, DwarfRegNum<[36]>; - def F5_F : RISCVReg32<5, "f5", ["ft5"]>, DwarfRegNum<[37]>; - def F6_F : RISCVReg32<6, "f6", ["ft6"]>, DwarfRegNum<[38]>; - def F7_F : RISCVReg32<7, "f7", ["ft7"]>, DwarfRegNum<[39]>; - def F8_F : RISCVReg32<8, "f8", ["fs0"]>, DwarfRegNum<[40]>; - def F9_F : RISCVReg32<9, "f9", ["fs1"]>, DwarfRegNum<[41]>; - def F10_F : RISCVReg32<10,"f10", ["fa0"]>, DwarfRegNum<[42]>; - def F11_F : RISCVReg32<11,"f11", ["fa1"]>, DwarfRegNum<[43]>; - def F12_F : RISCVReg32<12,"f12", ["fa2"]>, DwarfRegNum<[44]>; - def F13_F : RISCVReg32<13,"f13", ["fa3"]>, DwarfRegNum<[45]>; - def F14_F : RISCVReg32<14,"f14", ["fa4"]>, DwarfRegNum<[46]>; - def F15_F : RISCVReg32<15,"f15", ["fa5"]>, DwarfRegNum<[47]>; - def F16_F : RISCVReg32<16,"f16", ["fa6"]>, DwarfRegNum<[48]>; - def F17_F : RISCVReg32<17,"f17", ["fa7"]>, DwarfRegNum<[49]>; - def F18_F : RISCVReg32<18,"f18", ["fs2"]>, DwarfRegNum<[50]>; - def F19_F : RISCVReg32<19,"f19", ["fs3"]>, DwarfRegNum<[51]>; - def F20_F : RISCVReg32<20,"f20", ["fs4"]>, DwarfRegNum<[52]>; - def F21_F : RISCVReg32<21,"f21", ["fs5"]>, DwarfRegNum<[53]>; - def F22_F : RISCVReg32<22,"f22", ["fs6"]>, DwarfRegNum<[54]>; - def F23_F : RISCVReg32<23,"f23", ["fs7"]>, DwarfRegNum<[55]>; - def F24_F : RISCVReg32<24,"f24", ["fs8"]>, DwarfRegNum<[56]>; - def F25_F : RISCVReg32<25,"f25", ["fs9"]>, DwarfRegNum<[57]>; - def F26_F : RISCVReg32<26,"f26", ["fs10"]>, DwarfRegNum<[58]>; - def F27_F : RISCVReg32<27,"f27", ["fs11"]>, DwarfRegNum<[59]>; - def F28_F : RISCVReg32<28,"f28", ["ft8"]>, DwarfRegNum<[60]>; - def F29_F : RISCVReg32<29,"f29", ["ft9"]>, DwarfRegNum<[61]>; - def F30_F : RISCVReg32<30,"f30", ["ft10"]>, DwarfRegNum<[62]>; - def F31_F : RISCVReg32<31,"f31", ["ft11"]>, DwarfRegNum<[63]>; + def F0_H : RISCVReg16<0, "f0", ["ft0"]>, DwarfRegNum<[32]>; + def F1_H : RISCVReg16<1, "f1", ["ft1"]>, DwarfRegNum<[33]>; + def F2_H : RISCVReg16<2, "f2", ["ft2"]>, DwarfRegNum<[34]>; + def F3_H : RISCVReg16<3, "f3", ["ft3"]>, DwarfRegNum<[35]>; + def F4_H : RISCVReg16<4, "f4", ["ft4"]>, DwarfRegNum<[36]>; + def F5_H : RISCVReg16<5, "f5", ["ft5"]>, DwarfRegNum<[37]>; + def F6_H : RISCVReg16<6, "f6", ["ft6"]>, DwarfRegNum<[38]>; + def F7_H : RISCVReg16<7, "f7", ["ft7"]>, DwarfRegNum<[39]>; + def F8_H : RISCVReg16<8, "f8", ["fs0"]>, DwarfRegNum<[40]>; + def F9_H : RISCVReg16<9, "f9", ["fs1"]>, DwarfRegNum<[41]>; + def F10_H : RISCVReg16<10,"f10", ["fa0"]>, DwarfRegNum<[42]>; + def F11_H : RISCVReg16<11,"f11", ["fa1"]>, DwarfRegNum<[43]>; + def F12_H : RISCVReg16<12,"f12", ["fa2"]>, DwarfRegNum<[44]>; + def F13_H : RISCVReg16<13,"f13", ["fa3"]>, DwarfRegNum<[45]>; + def F14_H : RISCVReg16<14,"f14", ["fa4"]>, DwarfRegNum<[46]>; + def F15_H : RISCVReg16<15,"f15", ["fa5"]>, DwarfRegNum<[47]>; + def F16_H : RISCVReg16<16,"f16", ["fa6"]>, DwarfRegNum<[48]>; + def F17_H : RISCVReg16<17,"f17", ["fa7"]>, DwarfRegNum<[49]>; + def F18_H : RISCVReg16<18,"f18", ["fs2"]>, DwarfRegNum<[50]>; + def F19_H : RISCVReg16<19,"f19", ["fs3"]>, DwarfRegNum<[51]>; + def F20_H : RISCVReg16<20,"f20", ["fs4"]>, DwarfRegNum<[52]>; + def F21_H : RISCVReg16<21,"f21", ["fs5"]>, DwarfRegNum<[53]>; + def F22_H : RISCVReg16<22,"f22", ["fs6"]>, DwarfRegNum<[54]>; + def F23_H : RISCVReg16<23,"f23", ["fs7"]>, DwarfRegNum<[55]>; + def F24_H : RISCVReg16<24,"f24", ["fs8"]>, DwarfRegNum<[56]>; + def F25_H : RISCVReg16<25,"f25", ["fs9"]>, DwarfRegNum<[57]>; + def F26_H : RISCVReg16<26,"f26", ["fs10"]>, DwarfRegNum<[58]>; + def F27_H : RISCVReg16<27,"f27", ["fs11"]>, DwarfRegNum<[59]>; + def F28_H : RISCVReg16<28,"f28", ["ft8"]>, DwarfRegNum<[60]>; + def F29_H : RISCVReg16<29,"f29", ["ft9"]>, DwarfRegNum<[61]>; + def F30_H : RISCVReg16<30,"f30", ["ft10"]>, DwarfRegNum<[62]>; + def F31_H : RISCVReg16<31,"f31", ["ft11"]>, DwarfRegNum<[63]>; + + foreach Index = 0-31 in { + def F#Index#_F : RISCVReg32<!cast<RISCVReg16>("F"#Index#"_H")>, + DwarfRegNum<[!add(Index, 32)]>; + } foreach Index = 0-31 in { def F#Index#_D : RISCVReg64<!cast<RISCVReg32>("F"#Index#"_F")>, @@ -220,6 +243,14 @@ let RegAltNameIndices = [ABIRegAltName] in { // The order of registers represents the preferred allocation sequence, // meaning caller-save regs are listed before callee-save. +def FPR16 : RegisterClass<"RISCV", [f16], 16, (add + (sequence "F%u_H", 0, 7), + (sequence "F%u_H", 10, 17), + (sequence "F%u_H", 28, 31), + (sequence "F%u_H", 8, 9), + (sequence "F%u_H", 18, 27) +)>; + def FPR32 : RegisterClass<"RISCV", [f32], 32, (add (sequence "F%u_F", 0, 7), (sequence "F%u_F", 10, 17), @@ -248,10 +279,139 @@ def FPR64C : RegisterClass<"RISCV", [f64], 64, (add (sequence "F%u_D", 8, 9) )>; +// Vector type mapping to LLVM types. +// +// Though the V extension allows that VLEN be as small as 8, +// this approach assumes that VLEN>=64. +// Additionally, the only supported ELEN values are 32 and 64, +// thus `vscale` can be defined as VLEN/64, +// allowing the same types with either ELEN value. +// +// MF8 MF4 MF2 M1 M2 M4 M8 +// i64* N/A N/A N/A nxv1i64 nxv2i64 nxv4i64 nxv8i64 +// i32 N/A N/A nxv1i32 nxv2i32 nxv4i32 nxv8i32 nxv16i32 +// i16 N/A nxv1i16 nxv2i16 nxv4i16 nxv8i16 nxv16i16 nxv32i16 +// i8 nxv1i8 nxv2i8 nxv4i8 nxv8i8 nxv16i8 nxv32i8 nxv64i8 +// double* N/A N/A N/A nxv1f64 nxv2f64 nxv4f64 nxv8f64 +// float N/A N/A nxv1f32 nxv2f32 nxv4f32 nxv8f32 nxv16f32 +// half N/A nxv1f16 nxv2f16 nxv4f16 nxv8f16 nxv16f16 nxv32f16 +// * ELEN=64 + +defvar vint8mf8_t = nxv1i8; +defvar vint8mf4_t = nxv2i8; +defvar vint8mf2_t = nxv4i8; +defvar vint8m1_t = nxv8i8; +defvar vint8m2_t = nxv16i8; +defvar vint8m4_t = nxv32i8; +defvar vint8m8_t = nxv64i8; + +defvar vint16mf4_t = nxv1i16; +defvar vint16mf2_t = nxv2i16; +defvar vint16m1_t = nxv4i16; +defvar vint16m2_t = nxv8i16; +defvar vint16m4_t = nxv16i16; +defvar vint16m8_t = nxv32i16; + +defvar vint32mf2_t = nxv1i32; +defvar vint32m1_t = nxv2i32; +defvar vint32m2_t = nxv4i32; +defvar vint32m4_t = nxv8i32; +defvar vint32m8_t = nxv16i32; + +defvar vint64m1_t = nxv1i64; +defvar vint64m2_t = nxv2i64; +defvar vint64m4_t = nxv4i64; +defvar vint64m8_t = nxv8i64; + +defvar vfloat16mf4_t = nxv1f16; +defvar vfloat16mf2_t = nxv2f16; +defvar vfloat16m1_t = nxv4f16; +defvar vfloat16m2_t = nxv8f16; +defvar vfloat16m4_t = nxv16f16; +defvar vfloat16m8_t = nxv32f16; + +defvar vfloat32mf2_t = nxv1f32; +defvar vfloat32m1_t = nxv2f32; +defvar vfloat32m2_t = nxv4f32; +defvar vfloat32m4_t = nxv8f32; +defvar vfloat32m8_t = nxv16f32; + +defvar vfloat64m1_t = nxv1f64; +defvar vfloat64m2_t = nxv2f64; +defvar vfloat64m4_t = nxv4f64; +defvar vfloat64m8_t = nxv8f64; + +defvar vbool1_t = nxv64i1; +defvar vbool2_t = nxv32i1; +defvar vbool4_t = nxv16i1; +defvar vbool8_t = nxv8i1; +defvar vbool16_t = nxv4i1; +defvar vbool32_t = nxv2i1; +defvar vbool64_t = nxv1i1; + +// There is no need to define register classes for fractional LMUL. +def LMULList { + list<int> m = [1, 2, 4, 8]; +} + +//===----------------------------------------------------------------------===// +// Utility classes for segment load/store. +//===----------------------------------------------------------------------===// +// The set of legal NF for LMUL = lmul. +// LMUL == 1, NF = 2, 3, 4, 5, 6, 7, 8 +// LMUL == 2, NF = 2, 3, 4 +// LMUL == 4, NF = 2 +class NFList<int lmul> { + list<int> L = !cond(!eq(lmul, 1): [2, 3, 4, 5, 6, 7, 8], + !eq(lmul, 2): [2, 3, 4], + !eq(lmul, 4): [2], + !eq(lmul, 8): []); +} + +// Generate [start, end) SubRegIndex list. +class SubRegSet<list<SubRegIndex> LIn, int start, int nf, int lmul> { + list<SubRegIndex> L = !foldl([]<SubRegIndex>, + [0, 1, 2, 3, 4, 5, 6, 7], + AccList, i, + !listconcat(AccList, + !if(!lt(i, nf), + [!cast<SubRegIndex>("sub_vrm" # lmul # "_" # i)], + []))); +} + +class IndexSet<int index, int nf, int lmul> { + list<int> R = + !foldl([]<int>, + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, + 23, 24, 25, 26, 27, 28, 29, 30, 31], + L, i, + !listconcat(L, + !if(!and( + !le(!mul(index, lmul), !mul(i, lmul)), + !le(!mul(i, lmul), + !sub(!add(32, !mul(index, lmul)), !mul(nf, lmul))) + ), [!mul(i, lmul)], []))); +} + +class VRegList<list<dag> LIn, int start, int nf, int lmul> { + list<dag> L = + !if(!ge(start, nf), + LIn, + !listconcat( + [!dag(add, + !foreach(i, IndexSet<start, nf, lmul>.R, + !cast<Register>("V" # i # !cond(!eq(lmul, 2): "M2", + !eq(lmul, 4): "M4", + true: ""))), + !listsplat("", !size(IndexSet<start, nf, lmul>.R)))], + VRegList<LIn, !add(start, 1), nf, lmul>.L)); +} + // Vector registers let RegAltNameIndices = [ABIRegAltName] in { foreach Index = 0-31 in { - def V#Index : RISCVReg<Index, "v"#Index, ["v"#Index]>, DwarfRegNum<[!add(Index, 64)]>; + def V#Index : RISCVReg<Index, "v"#Index, ["v"#Index]>, DwarfRegNum<[!add(Index, 96)]>; } foreach Index = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, @@ -261,7 +421,7 @@ let RegAltNameIndices = [ABIRegAltName] in { !cast<Register>("V"#!add(Index, 1))], ["v"#Index]>, DwarfRegAlias<!cast<Register>("V"#Index)> { - let SubRegIndices = [sub_vrm2, sub_vrm2_hi]; + let SubRegIndices = [sub_vrm1_0, sub_vrm1_1]; } } @@ -271,7 +431,7 @@ let RegAltNameIndices = [ABIRegAltName] in { !cast<Register>("V"#!add(Index, 2)#"M2")], ["v"#Index]>, DwarfRegAlias<!cast<Register>("V"#Index)> { - let SubRegIndices = [sub_vrm4, sub_vrm4_hi]; + let SubRegIndices = [sub_vrm2_0, sub_vrm2_1]; } } @@ -281,54 +441,91 @@ let RegAltNameIndices = [ABIRegAltName] in { !cast<Register>("V"#!add(Index, 4)#"M4")], ["v"#Index]>, DwarfRegAlias<!cast<Register>("V"#Index)> { - let SubRegIndices = [sub_vrm8, sub_vrm8_hi]; + let SubRegIndices = [sub_vrm4_0, sub_vrm4_1]; } } def VTYPE : RISCVReg<0, "vtype", ["vtype"]>; def VL : RISCVReg<0, "vl", ["vl"]>; + def VXSAT : RISCVReg<0, "vxsat", ["vxsat"]>; + def VXRM : RISCVReg<0, "vxrm", ["vxrm"]>; } -class RegisterTypes<list<ValueType> reg_types> { - list<ValueType> types = reg_types; -} - -// The order of registers represents the preferred allocation sequence, -// meaning caller-save regs are listed before callee-save. -def VR : RegisterClass<"RISCV", [nxv8i8, nxv4i16, nxv2i32, nxv1i64], - 64, (add - (sequence "V%u", 25, 31), - (sequence "V%u", 8, 24), - (sequence "V%u", 0, 7) - )> { - let Size = 64; -} - -def VRM2 : RegisterClass<"RISCV", [nxv16i8, nxv8i16, nxv4i32, nxv2i64], 64, - (add V26M2, V28M2, V30M2, V8M2, V10M2, V12M2, V14M2, V16M2, - V18M2, V20M2, V22M2, V24M2, V0M2, V2M2, V4M2, V6M2)> { - let Size = 128; -} - -def VRM4 : RegisterClass<"RISCV", [nxv32i8, nxv16i16, nxv8i32, nxv4i64], 64, - (add V28M4, V8M4, V12M4, V16M4, V20M4, V24M4, V0M4, V4M4)> { - let Size = 256; +foreach m = [1, 2, 4] in { + foreach n = NFList<m>.L in { + def "VN" # n # "M" # m: RegisterTuples<SubRegSet<[], 0, n, m>.L, + VRegList<[], 0, n, m>.L>; + } } -def VRM8 : RegisterClass<"RISCV", [nxv32i16, nxv16i32, nxv8i64], 64, - (add V8M8, V16M8, V24M8, V0M8)> { - let Size = 512; +class VReg<list<ValueType> regTypes, dag regList, int Vlmul> + : RegisterClass<"RISCV", + regTypes, + 64, // The maximum supported ELEN is 64. + regList> { + int VLMul = Vlmul; + int Size = !mul(Vlmul, 64); } -def VMaskVT : RegisterTypes<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1, nxv32i1]>; - -def VM : RegisterClass<"RISCV", VMaskVT.types, 64, (add - (sequence "V%u", 25, 31), - (sequence "V%u", 8, 24), - (sequence "V%u", 0, 7))> { +def VR : VReg<[vint8mf2_t, vint8mf4_t, vint8mf8_t, + vint16mf2_t, vint16mf4_t, vint32mf2_t, + vint8m1_t, vint16m1_t, vint32m1_t, vint64m1_t, + vfloat16mf4_t, vfloat16mf2_t, vfloat16m1_t, + vfloat32mf2_t, vfloat32m1_t, vfloat64m1_t, + vbool64_t, vbool32_t, vbool16_t, vbool8_t, vbool4_t, + vbool2_t, vbool1_t], + (add (sequence "V%u", 25, 31), + (sequence "V%u", 8, 24), + (sequence "V%u", 0, 7)), 1>; + +def VRNoV0 : VReg<[vint8mf2_t, vint8mf4_t, vint8mf8_t, + vint16mf2_t, vint16mf4_t, vint32mf2_t, + vint8m1_t, vint16m1_t, vint32m1_t, vint64m1_t, + vfloat16mf4_t, vfloat16mf2_t, vfloat16m1_t, + vfloat32mf2_t, vfloat32m1_t, vfloat64m1_t, + vbool64_t, vbool32_t, vbool16_t, vbool8_t, vbool4_t, + vbool2_t, vbool1_t], + (add (sequence "V%u", 25, 31), + (sequence "V%u", 8, 24), + (sequence "V%u", 1, 7)), 1>; + +def VRM2 : VReg<[vint8m2_t, vint16m2_t, vint32m2_t, vint64m2_t, + vfloat16m2_t, vfloat32m2_t, vfloat64m2_t], + (add V26M2, V28M2, V30M2, V8M2, V10M2, V12M2, V14M2, V16M2, + V18M2, V20M2, V22M2, V24M2, V0M2, V2M2, V4M2, V6M2), 2>; + +def VRM2NoV0 : VReg<[vint8m2_t, vint16m2_t, vint32m2_t, vint64m2_t, + vfloat16m2_t, vfloat32m2_t, vfloat64m2_t], + (add V26M2, V28M2, V30M2, V8M2, V10M2, V12M2, V14M2, V16M2, + V18M2, V20M2, V22M2, V24M2, V2M2, V4M2, V6M2), 2>; + +def VRM4 : VReg<[vint8m4_t, vint16m4_t, vint32m4_t, vint64m4_t, + vfloat16m4_t, vfloat32m4_t, vfloat64m4_t], + (add V28M4, V8M4, V12M4, V16M4, V20M4, V24M4, V0M4, V4M4), 4>; + +def VRM4NoV0 : VReg<[vint8m4_t, vint16m4_t, vint32m4_t, vint64m4_t, + vfloat16m4_t, vfloat32m4_t, vfloat64m4_t], + (add V28M4, V8M4, V12M4, V16M4, V20M4, V24M4, V4M4), 4>; + +def VRM8 : VReg<[vint8m8_t, vint16m8_t, vint32m8_t, vint64m8_t, + vfloat16m8_t, vfloat32m8_t, vfloat64m8_t], + (add V8M8, V16M8, V24M8, V0M8), 8>; + +def VRM8NoV0 : VReg<[vint8m8_t, vint16m8_t, vint32m8_t, vint64m8_t, + vfloat16m8_t, vfloat32m8_t, vfloat64m8_t], + (add V8M8, V16M8, V24M8), 8>; + +defvar VMaskVTs = [vbool64_t, vbool32_t, vbool16_t, vbool8_t, + vbool4_t, vbool2_t, vbool1_t]; + +def VMV0 : RegisterClass<"RISCV", VMaskVTs, 64, (add V0)> { let Size = 64; } -def VMV0 : RegisterClass<"RISCV", VMaskVT.types, 64, (add V0)> { - let Size = 64; +foreach m = LMULList.m in { + foreach nf = NFList<m>.L in { + def "VRN" # nf # "M" # m : VReg<[untyped], + (add !cast<RegisterTuples>("VN" # nf # "M" # m)), + !mul(nf, m)>; + } } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket.td new file mode 100644 index 000000000000..de2cdf512e87 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket.td @@ -0,0 +1,233 @@ +//==- RISCVSchedRocket.td - Rocket Scheduling Definitions ----*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// ===---------------------------------------------------------------------===// +// The following definitions describe the simpler per-operand machine model. +// This works with MachineScheduler. See MCSchedule.h for details. + +// Rocket machine model for scheduling and other instruction cost heuristics. +def RocketModel : SchedMachineModel { + let MicroOpBufferSize = 0; // Rocket is in-order. + let IssueWidth = 1; // 1 micro-op is dispatched per cycle. + let LoadLatency = 3; + let MispredictPenalty = 3; + let UnsupportedFeatures = [HasStdExtV, HasStdExtZvamo, HasStdExtZvlsseg]; +} + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available. + +// Modeling each pipeline as a ProcResource using the BufferSize = 0 since +// Rocket is in-order. + +let BufferSize = 0 in { +def RocketUnitALU : ProcResource<1>; // Int ALU +def RocketUnitIMul : ProcResource<1>; // Int Multiply +def RocketUnitMem : ProcResource<1>; // Load/Store +def RocketUnitB : ProcResource<1>; // Branch + +def RocketUnitFPALU : ProcResource<1>; // FP ALU +} + +let BufferSize = 1 in { +def RocketUnitIDiv : ProcResource<1>; // Int Division +def RocketUnitFPDivSqrt : ProcResource<1>; // FP Divide/Sqrt +} + +//===----------------------------------------------------------------------===// + +let SchedModel = RocketModel in { + +// Branching +def : WriteRes<WriteJmp, [RocketUnitB]>; +def : WriteRes<WriteJal, [RocketUnitB]>; +def : WriteRes<WriteJalr, [RocketUnitB]>; +def : WriteRes<WriteJmpReg, [RocketUnitB]>; + +// Integer arithmetic and logic +def : WriteRes<WriteIALU32, [RocketUnitALU]>; +def : WriteRes<WriteIALU, [RocketUnitALU]>; +def : WriteRes<WriteShift32, [RocketUnitALU]>; +def : WriteRes<WriteShift, [RocketUnitALU]>; + +// Integer multiplication +let Latency = 4 in { +def : WriteRes<WriteIMul, [RocketUnitIMul]>; +def : WriteRes<WriteIMul32, [RocketUnitIMul]>; +} + +// Integer division +// Worst case latency is used. +def : WriteRes<WriteIDiv32, [RocketUnitIDiv]> { + let Latency = 34; + let ResourceCycles = [34]; +} +def : WriteRes<WriteIDiv, [RocketUnitIDiv]> { + let Latency = 33; + let ResourceCycles = [33]; +} + +// Memory +def : WriteRes<WriteSTB, [RocketUnitMem]>; +def : WriteRes<WriteSTH, [RocketUnitMem]>; +def : WriteRes<WriteSTW, [RocketUnitMem]>; +def : WriteRes<WriteSTD, [RocketUnitMem]>; +def : WriteRes<WriteFST32, [RocketUnitMem]>; +def : WriteRes<WriteFST64, [RocketUnitMem]>; + +let Latency = 3 in { +def : WriteRes<WriteLDB, [RocketUnitMem]>; +def : WriteRes<WriteLDH, [RocketUnitMem]>; +} + +let Latency = 2 in { +def : WriteRes<WriteLDW, [RocketUnitMem]>; +def : WriteRes<WriteLDWU, [RocketUnitMem]>; +def : WriteRes<WriteLDD, [RocketUnitMem]>; +def : WriteRes<WriteFLD32, [RocketUnitMem]>; +def : WriteRes<WriteFLD64, [RocketUnitMem]>; + +// Atomic memory +def : WriteRes<WriteAtomicW, [RocketUnitMem]>; +def : WriteRes<WriteAtomicD, [RocketUnitMem]>; + +def : WriteRes<WriteAtomicLDW, [RocketUnitMem]>; +def : WriteRes<WriteAtomicLDD, [RocketUnitMem]>; +} + +def : WriteRes<WriteAtomicSTW, [RocketUnitMem]>; +def : WriteRes<WriteAtomicSTD, [RocketUnitMem]>; + +// Single precision. +let Latency = 4 in { +def : WriteRes<WriteFALU32, [RocketUnitFPALU]>; +def : WriteRes<WriteFSGNJ32, [RocketUnitFPALU]>; +def : WriteRes<WriteFMinMax32, [RocketUnitFPALU]>; +} + +// Double precision +let Latency = 6 in { +def : WriteRes<WriteFALU64, [RocketUnitFPALU]>; +def : WriteRes<WriteFSGNJ64, [RocketUnitFPALU]>; +def : WriteRes<WriteFMinMax64, [RocketUnitFPALU]>; +} + +// Conversions +let Latency = 2 in { +def : WriteRes<WriteFCvtI32ToF32, [RocketUnitFPALU]>; +def : WriteRes<WriteFCvtI32ToF64, [RocketUnitFPALU]>; +def : WriteRes<WriteFCvtI64ToF32, [RocketUnitFPALU]>; +def : WriteRes<WriteFCvtI64ToF64, [RocketUnitFPALU]>; +def : WriteRes<WriteFCvtF32ToI32, [RocketUnitFPALU]>; +def : WriteRes<WriteFCvtF32ToI64, [RocketUnitFPALU]>; +def : WriteRes<WriteFCvtF64ToI32, [RocketUnitFPALU]>; +def : WriteRes<WriteFCvtF64ToI64, [RocketUnitFPALU]>; +def : WriteRes<WriteFCvtF32ToF64, [RocketUnitFPALU]>; +def : WriteRes<WriteFCvtF64ToF32, [RocketUnitFPALU]>; + +def : WriteRes<WriteFClass32, [RocketUnitFPALU]>; +def : WriteRes<WriteFClass64, [RocketUnitFPALU]>; +def : WriteRes<WriteFCmp32, [RocketUnitFPALU]>; +def : WriteRes<WriteFCmp64, [RocketUnitFPALU]>; +def : WriteRes<WriteFMovF32ToI32, [RocketUnitFPALU]>; +def : WriteRes<WriteFMovI32ToF32, [RocketUnitFPALU]>; +def : WriteRes<WriteFMovF64ToI64, [RocketUnitFPALU]>; +def : WriteRes<WriteFMovI64ToF64, [RocketUnitFPALU]>; +} + +// FP multiplication +let Latency = 5 in { +def : WriteRes<WriteFMul32, [RocketUnitFPALU]>; +def : WriteRes<WriteFMulAdd32, [RocketUnitFPALU]>; +def : WriteRes<WriteFMulSub32, [RocketUnitFPALU]>; +} + +let Latency = 7 in { +def : WriteRes<WriteFMul64, [RocketUnitFPALU]>; +def : WriteRes<WriteFMulAdd64, [RocketUnitFPALU]>; +def : WriteRes<WriteFMulSub64, [RocketUnitFPALU]>; +} + +// FP division +// FP division unit on Rocket is not pipelined, so set resource cycles to latency. +let Latency = 20, ResourceCycles = [20] in { +def : WriteRes<WriteFDiv32, [RocketUnitFPDivSqrt]>; +def : WriteRes<WriteFDiv64, [RocketUnitFPDivSqrt]>; +} + +// FP square root unit on Rocket is not pipelined, so set resource cycles to latency. +def : WriteRes<WriteFSqrt32, [RocketUnitFPDivSqrt]> { let Latency = 20; + let ResourceCycles = [20]; } +def : WriteRes<WriteFSqrt64, [RocketUnitFPDivSqrt]> { let Latency = 25; + let ResourceCycles = [25]; } + +// Others +def : WriteRes<WriteCSR, []>; +def : WriteRes<WriteNop, []>; + +def : InstRW<[WriteIALU], (instrs COPY)>; + +//===----------------------------------------------------------------------===// +// Bypass and advance +def : ReadAdvance<ReadJmp, 0>; +def : ReadAdvance<ReadJalr, 0>; +def : ReadAdvance<ReadCSR, 0>; +def : ReadAdvance<ReadStoreData, 0>; +def : ReadAdvance<ReadMemBase, 0>; +def : ReadAdvance<ReadIALU, 0>; +def : ReadAdvance<ReadIALU32, 0>; +def : ReadAdvance<ReadShift, 0>; +def : ReadAdvance<ReadShift32, 0>; +def : ReadAdvance<ReadIDiv, 0>; +def : ReadAdvance<ReadIDiv32, 0>; +def : ReadAdvance<ReadIMul, 0>; +def : ReadAdvance<ReadIMul32, 0>; +def : ReadAdvance<ReadAtomicWA, 0>; +def : ReadAdvance<ReadAtomicWD, 0>; +def : ReadAdvance<ReadAtomicDA, 0>; +def : ReadAdvance<ReadAtomicDD, 0>; +def : ReadAdvance<ReadAtomicLDW, 0>; +def : ReadAdvance<ReadAtomicLDD, 0>; +def : ReadAdvance<ReadAtomicSTW, 0>; +def : ReadAdvance<ReadAtomicSTD, 0>; +def : ReadAdvance<ReadFMemBase, 0>; +def : ReadAdvance<ReadFALU32, 0>; +def : ReadAdvance<ReadFALU64, 0>; +def : ReadAdvance<ReadFMul32, 0>; +def : ReadAdvance<ReadFMulAdd32, 0>; +def : ReadAdvance<ReadFMulSub32, 0>; +def : ReadAdvance<ReadFMul64, 0>; +def : ReadAdvance<ReadFMulAdd64, 0>; +def : ReadAdvance<ReadFMulSub64, 0>; +def : ReadAdvance<ReadFDiv32, 0>; +def : ReadAdvance<ReadFDiv64, 0>; +def : ReadAdvance<ReadFSqrt32, 0>; +def : ReadAdvance<ReadFSqrt64, 0>; +def : ReadAdvance<ReadFCmp32, 0>; +def : ReadAdvance<ReadFCmp64, 0>; +def : ReadAdvance<ReadFSGNJ32, 0>; +def : ReadAdvance<ReadFSGNJ64, 0>; +def : ReadAdvance<ReadFMinMax32, 0>; +def : ReadAdvance<ReadFMinMax64, 0>; +def : ReadAdvance<ReadFCvtF32ToI32, 0>; +def : ReadAdvance<ReadFCvtF32ToI64, 0>; +def : ReadAdvance<ReadFCvtF64ToI32, 0>; +def : ReadAdvance<ReadFCvtF64ToI64, 0>; +def : ReadAdvance<ReadFCvtI32ToF32, 0>; +def : ReadAdvance<ReadFCvtI32ToF64, 0>; +def : ReadAdvance<ReadFCvtI64ToF32, 0>; +def : ReadAdvance<ReadFCvtI64ToF64, 0>; +def : ReadAdvance<ReadFCvtF32ToF64, 0>; +def : ReadAdvance<ReadFCvtF64ToF32, 0>; +def : ReadAdvance<ReadFMovF32ToI32, 0>; +def : ReadAdvance<ReadFMovI32ToF32, 0>; +def : ReadAdvance<ReadFMovF64ToI64, 0>; +def : ReadAdvance<ReadFMovI64ToF64, 0>; +def : ReadAdvance<ReadFClass32, 0>; +def : ReadAdvance<ReadFClass64, 0>; +} diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket32.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket32.td deleted file mode 100644 index 305e2b9b5927..000000000000 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket32.td +++ /dev/null @@ -1,227 +0,0 @@ -//==- RISCVSchedRocket32.td - Rocket Scheduling Definitions -*- tablegen -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// ===---------------------------------------------------------------------===// -// The following definitions describe the simpler per-operand machine model. -// This works with MachineScheduler. See MCSchedule.h for details. - -// Rocket machine model for scheduling and other instruction cost heuristics. -def Rocket32Model : SchedMachineModel { - let MicroOpBufferSize = 0; // Explicitly set to zero since Rocket is in-order. - let IssueWidth = 1; // 1 micro-ops are dispatched per cycle. - let LoadLatency = 3; - let MispredictPenalty = 3; - let CompleteModel = 1; - let UnsupportedFeatures = [HasStdExtV]; -} - -//===----------------------------------------------------------------------===// -// Define each kind of processor resource and number available. - -// Modeling each pipeline as a ProcResource using the BufferSize = 0 since -// Rocket is in-order. - -let BufferSize = 0 in { -def Rocket32UnitALU : ProcResource<1>; // Int ALU -def Rocket32UnitIMul : ProcResource<1>; // Int Multiply -def Rocket32UnitMem : ProcResource<1>; // Load/Store -def Rocket32UnitB : ProcResource<1>; // Branch - -def Rocket32UnitFPALU : ProcResource<1>; // FP ALU -} - -let BufferSize = 1 in { -def Rocket32UnitIDiv : ProcResource<1>; // Int Division -def Rocket32UnitFPDivSqrt : ProcResource<1>; // FP Divide/Sqrt' -} - -//===----------------------------------------------------------------------===// -// Subtarget-specific SchedWrite types which both map the ProcResources and -// set the latency. - -let SchedModel = Rocket32Model in { - -def : WriteRes<WriteJmp, [Rocket32UnitB]>; -def : WriteRes<WriteJal, [Rocket32UnitB]>; -def : WriteRes<WriteJalr, [Rocket32UnitB]>; -def : WriteRes<WriteJmpReg, [Rocket32UnitB]>; - -def : WriteRes<WriteIALU, [Rocket32UnitALU]>; -def : WriteRes<WriteShift, [Rocket32UnitALU]>; - -// Multiplies on Rocket differ by implementation; placeholder until -// we can determine how to read from command line -def : WriteRes<WriteIMul, [Rocket32UnitIMul]> { let Latency = 4; } - -// 32-bit divides have worse case latency of 34 cycle -def : WriteRes<WriteIDiv, [Rocket32UnitIDiv]> { - let Latency = 34; - let ResourceCycles = [34]; -} - -// Memory -def : WriteRes<WriteSTB, [Rocket32UnitMem]>; -def : WriteRes<WriteSTH, [Rocket32UnitMem]>; -def : WriteRes<WriteSTW, [Rocket32UnitMem]>; -def : WriteRes<WriteFST32, [Rocket32UnitMem]>; -def : WriteRes<WriteFST64, [Rocket32UnitMem]>; - -let Latency = 3 in { -def : WriteRes<WriteLDB, [Rocket32UnitMem]>; -def : WriteRes<WriteLDH, [Rocket32UnitMem]>; -def : WriteRes<WriteCSR, [Rocket32UnitALU]>; -} - -let Latency = 2 in { -def : WriteRes<WriteLDW, [Rocket32UnitMem]>; -def : WriteRes<WriteFLD32, [Rocket32UnitMem]>; -def : WriteRes<WriteFLD64, [Rocket32UnitMem]>; - -def : WriteRes<WriteAtomicW, [Rocket32UnitMem]>; -def : WriteRes<WriteAtomicLDW, [Rocket32UnitMem]>; -} - -def : WriteRes<WriteAtomicSTW, [Rocket32UnitMem]>; - -// Most FP single precision operations are 4 cycles -let Latency = 4 in { -def : WriteRes<WriteFALU32, [Rocket32UnitFPALU]>; -def : WriteRes<WriteFSGNJ32, [Rocket32UnitFPALU]>; -def : WriteRes<WriteFMinMax32, [Rocket32UnitFPALU]>; -} - -// Most FP double precision operations are 6 cycles -let Latency = 6 in { -def : WriteRes<WriteFALU64, [Rocket32UnitFPALU]>; -def : WriteRes<WriteFSGNJ64, [Rocket32UnitFPALU]>; -def : WriteRes<WriteFMinMax64, [Rocket32UnitFPALU]>; -} - -let Latency = 2 in { -def : WriteRes<WriteFCvtI32ToF32, [Rocket32UnitFPALU]>; -def : WriteRes<WriteFCvtI32ToF64, [Rocket32UnitFPALU]>; -def : WriteRes<WriteFCvtF32ToI32, [Rocket32UnitFPALU]>; -def : WriteRes<WriteFCvtF64ToI32, [Rocket32UnitFPALU]>; -def : WriteRes<WriteFCvtF32ToF64, [Rocket32UnitFPALU]>; -def : WriteRes<WriteFCvtF64ToF32, [Rocket32UnitFPALU]>; - -def : WriteRes<WriteFClass32, [Rocket32UnitFPALU]>; -def : WriteRes<WriteFClass64, [Rocket32UnitFPALU]>; -def : WriteRes<WriteFCmp32, [Rocket32UnitFPALU]>; -def : WriteRes<WriteFCmp64, [Rocket32UnitFPALU]>; -def : WriteRes<WriteFMovF32ToI32, [Rocket32UnitFPALU]>; -def : WriteRes<WriteFMovI32ToF32, [Rocket32UnitFPALU]>; -} - -let Latency = 5 in { -def : WriteRes<WriteFMul32, [Rocket32UnitFPALU]>; -def : WriteRes<WriteFMulAdd32, [Rocket32UnitFPALU]>; -def : WriteRes<WriteFMulSub32, [Rocket32UnitFPALU]>; -} - -let Latency = 7 in { -def : WriteRes<WriteFMul64, [Rocket32UnitFPALU]>; -def : WriteRes<WriteFMulAdd64, [Rocket32UnitFPALU]>; -def : WriteRes<WriteFMulSub64, [Rocket32UnitFPALU]>; -} - -// FP Divide unit on Rocket is not pipelined, so set resource cycles to latency -let Latency = 20, ResourceCycles = [20] in { -def : WriteRes<WriteFDiv32, [Rocket32UnitFPDivSqrt]>; -def : WriteRes<WriteFDiv64, [Rocket32UnitFPDivSqrt]>; -} - -// FP Sqrt unit on Rocket is not pipelined, so set resource cycles to latency -def : WriteRes<WriteFSqrt32, [Rocket32UnitFPDivSqrt]> { let Latency = 20; - let ResourceCycles = [20];} -def : WriteRes<WriteFSqrt64, [Rocket32UnitFPDivSqrt]> { let Latency = 25; - let ResourceCycles = [25];} - -def : WriteRes<WriteNop, []>; - -def : InstRW<[WriteIALU], (instrs COPY)>; - -let Unsupported = 1 in { -def : WriteRes<WriteIALU32, []>; -def : WriteRes<WriteShift32, []>; -def : WriteRes<WriteIMul32, []>; -def : WriteRes<WriteIDiv32, []>; -def : WriteRes<WriteSTD, []>; -def : WriteRes<WriteLDWU, []>; -def : WriteRes<WriteLDD, []>; -def : WriteRes<WriteAtomicD, []>; -def : WriteRes<WriteAtomicLDD, []>; -def : WriteRes<WriteAtomicSTD, []>; -def : WriteRes<WriteFCvtI64ToF32, []>; -def : WriteRes<WriteFCvtI64ToF64, []>; -def : WriteRes<WriteFCvtF64ToI64, []>; -def : WriteRes<WriteFCvtF32ToI64, []>; -def : WriteRes<WriteFMovI64ToF64, []>; -def : WriteRes<WriteFMovF64ToI64, []>; -} - -//===----------------------------------------------------------------------===// -// Subtarget-specific SchedRead types with cycles. -// Dummy definitions for RocketCore. -def : ReadAdvance<ReadJmp, 0>; -def : ReadAdvance<ReadJalr, 0>; -def : ReadAdvance<ReadCSR, 0>; -def : ReadAdvance<ReadStoreData, 0>; -def : ReadAdvance<ReadMemBase, 0>; -def : ReadAdvance<ReadIALU, 0>; -def : ReadAdvance<ReadIALU32, 0>; -def : ReadAdvance<ReadShift, 0>; -def : ReadAdvance<ReadShift32, 0>; -def : ReadAdvance<ReadIDiv, 0>; -def : ReadAdvance<ReadIDiv32, 0>; -def : ReadAdvance<ReadIMul, 0>; -def : ReadAdvance<ReadIMul32, 0>; -def : ReadAdvance<ReadAtomicWA, 0>; -def : ReadAdvance<ReadAtomicWD, 0>; -def : ReadAdvance<ReadAtomicDA, 0>; -def : ReadAdvance<ReadAtomicDD, 0>; -def : ReadAdvance<ReadAtomicLDW, 0>; -def : ReadAdvance<ReadAtomicLDD, 0>; -def : ReadAdvance<ReadAtomicSTW, 0>; -def : ReadAdvance<ReadAtomicSTD, 0>; -def : ReadAdvance<ReadFMemBase, 0>; -def : ReadAdvance<ReadFALU32, 0>; -def : ReadAdvance<ReadFALU64, 0>; -def : ReadAdvance<ReadFMul32, 0>; -def : ReadAdvance<ReadFMulAdd32, 0>; -def : ReadAdvance<ReadFMulSub32, 0>; -def : ReadAdvance<ReadFMul64, 0>; -def : ReadAdvance<ReadFMulAdd64, 0>; -def : ReadAdvance<ReadFMulSub64, 0>; -def : ReadAdvance<ReadFDiv32, 0>; -def : ReadAdvance<ReadFDiv64, 0>; -def : ReadAdvance<ReadFSqrt32, 0>; -def : ReadAdvance<ReadFSqrt64, 0>; -def : ReadAdvance<ReadFCmp32, 0>; -def : ReadAdvance<ReadFCmp64, 0>; -def : ReadAdvance<ReadFSGNJ32, 0>; -def : ReadAdvance<ReadFSGNJ64, 0>; -def : ReadAdvance<ReadFMinMax32, 0>; -def : ReadAdvance<ReadFMinMax64, 0>; -def : ReadAdvance<ReadFCvtF32ToI32, 0>; -def : ReadAdvance<ReadFCvtF32ToI64, 0>; -def : ReadAdvance<ReadFCvtF64ToI32, 0>; -def : ReadAdvance<ReadFCvtF64ToI64, 0>; -def : ReadAdvance<ReadFCvtI32ToF32, 0>; -def : ReadAdvance<ReadFCvtI32ToF64, 0>; -def : ReadAdvance<ReadFCvtI64ToF32, 0>; -def : ReadAdvance<ReadFCvtI64ToF64, 0>; -def : ReadAdvance<ReadFCvtF32ToF64, 0>; -def : ReadAdvance<ReadFCvtF64ToF32, 0>; -def : ReadAdvance<ReadFMovF32ToI32, 0>; -def : ReadAdvance<ReadFMovI32ToF32, 0>; -def : ReadAdvance<ReadFMovF64ToI64, 0>; -def : ReadAdvance<ReadFMovI64ToF64, 0>; -def : ReadAdvance<ReadFClass32, 0>; -def : ReadAdvance<ReadFClass64, 0>; -} diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket64.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket64.td deleted file mode 100644 index e8514a275c45..000000000000 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket64.td +++ /dev/null @@ -1,228 +0,0 @@ -//==- RISCVSchedRocket64.td - Rocket Scheduling Definitions -*- tablegen -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// ===---------------------------------------------------------------------===// -// The following definitions describe the simpler per-operand machine model. -// This works with MachineScheduler. See MCSchedule.h for details. - -// Rocket machine model for scheduling and other instruction cost heuristics. -def Rocket64Model : SchedMachineModel { - let MicroOpBufferSize = 0; // Explicitly set to zero since Rocket is in-order. - let IssueWidth = 1; // 1 micro-ops are dispatched per cycle. - let LoadLatency = 3; - let MispredictPenalty = 3; - let UnsupportedFeatures = [HasStdExtV]; -} - -//===----------------------------------------------------------------------===// -// Define each kind of processor resource and number available. - -// Modeling each pipeline as a ProcResource using the BufferSize = 0 since -// Rocket is in-order. - -let BufferSize = 0 in { -def Rocket64UnitALU : ProcResource<1>; // Int ALU -def Rocket64UnitIMul : ProcResource<1>; // Int Multiply -def Rocket64UnitMem : ProcResource<1>; // Load/Store -def Rocket64UnitB : ProcResource<1>; // Branch - -def Rocket64UnitFPALU : ProcResource<1>; // FP ALU -} - -let BufferSize = 1 in { -def Rocket64UnitIDiv : ProcResource<1>; // Int Division -def Rocket64UnitFPDivSqrt : ProcResource<1>; // FP Divide/Sqrt -} - -//===----------------------------------------------------------------------===// -// Subtarget-specific SchedWrite types which both map the ProcResources and -// set the latency. - -let SchedModel = Rocket64Model in { - -def : WriteRes<WriteJmp, [Rocket64UnitB]>; -def : WriteRes<WriteJal, [Rocket64UnitB]>; -def : WriteRes<WriteJalr, [Rocket64UnitB]>; -def : WriteRes<WriteJmpReg, [Rocket64UnitB]>; - -def : WriteRes<WriteIALU32, [Rocket64UnitALU]>; -def : WriteRes<WriteIALU, [Rocket64UnitALU]>; -def : WriteRes<WriteShift32, [Rocket64UnitALU]>; -def : WriteRes<WriteShift, [Rocket64UnitALU]>; - -let Latency = 4 in { -def : WriteRes<WriteIMul, [Rocket64UnitIMul]>; -def : WriteRes<WriteIMul32, [Rocket64UnitIMul]>; -} - -// Integer divide varies based on operand magnitude and sign; worse case latency is 34. -def : WriteRes<WriteIDiv32, [Rocket64UnitIDiv]> { - let Latency = 34; - let ResourceCycles = [34]; -} -def : WriteRes<WriteIDiv, [Rocket64UnitIDiv]> { - let Latency = 33; - let ResourceCycles = [33]; -} - -// Memory -def : WriteRes<WriteSTB, [Rocket64UnitMem]>; -def : WriteRes<WriteSTH, [Rocket64UnitMem]>; -def : WriteRes<WriteSTW, [Rocket64UnitMem]>; -def : WriteRes<WriteSTD, [Rocket64UnitMem]>; -def : WriteRes<WriteFST32, [Rocket64UnitMem]>; -def : WriteRes<WriteFST64, [Rocket64UnitMem]>; - -let Latency = 3 in { -def : WriteRes<WriteLDB, [Rocket64UnitMem]>; -def : WriteRes<WriteLDH, [Rocket64UnitMem]>; -def : WriteRes<WriteCSR, [Rocket64UnitALU]>; -} - -let Latency = 2 in { -def : WriteRes<WriteLDW, [Rocket64UnitMem]>; -def : WriteRes<WriteLDWU, [Rocket64UnitMem]>; -def : WriteRes<WriteLDD, [Rocket64UnitMem]>; -def : WriteRes<WriteFLD32, [Rocket64UnitMem]>; -def : WriteRes<WriteFLD64, [Rocket64UnitMem]>; - -def : WriteRes<WriteAtomicW, [Rocket64UnitMem]>; -def : WriteRes<WriteAtomicD, [Rocket64UnitMem]>; - -def : WriteRes<WriteAtomicLDW, [Rocket64UnitMem]>; -def : WriteRes<WriteAtomicLDD, [Rocket64UnitMem]>; -} - -def : WriteRes<WriteAtomicSTW, [Rocket64UnitMem]>; -def : WriteRes<WriteAtomicSTD, [Rocket64UnitMem]>; - -// Most FP single precision operations are 4 cycles -let Latency = 4 in { -def : WriteRes<WriteFALU32, [Rocket64UnitFPALU]>; -def : WriteRes<WriteFSGNJ32, [Rocket64UnitFPALU]>; -def : WriteRes<WriteFMinMax32, [Rocket64UnitFPALU]>; -} - -let Latency = 6 in { -// Most FP double precision operations are 6 cycles -def : WriteRes<WriteFALU64, [Rocket64UnitFPALU]>; -def : WriteRes<WriteFSGNJ64, [Rocket64UnitFPALU]>; -def : WriteRes<WriteFMinMax64, [Rocket64UnitFPALU]>; -} - -// Conversion instructions -let Latency = 2 in { -def : WriteRes<WriteFCvtI32ToF32, [Rocket32UnitFPALU]>; -def : WriteRes<WriteFCvtI32ToF64, [Rocket32UnitFPALU]>; -def : WriteRes<WriteFCvtI64ToF32, [Rocket32UnitFPALU]>; -def : WriteRes<WriteFCvtI64ToF64, [Rocket32UnitFPALU]>; -def : WriteRes<WriteFCvtF32ToI32, [Rocket32UnitFPALU]>; -def : WriteRes<WriteFCvtF32ToI64, [Rocket32UnitFPALU]>; -def : WriteRes<WriteFCvtF64ToI32, [Rocket32UnitFPALU]>; -def : WriteRes<WriteFCvtF64ToI64, [Rocket32UnitFPALU]>; -def : WriteRes<WriteFCvtF32ToF64, [Rocket32UnitFPALU]>; -def : WriteRes<WriteFCvtF64ToF32, [Rocket32UnitFPALU]>; - -def : WriteRes<WriteFClass32, [Rocket64UnitFPALU]>; -def : WriteRes<WriteFClass64, [Rocket64UnitFPALU]>; -def : WriteRes<WriteFCmp32, [Rocket64UnitFPALU]>; -def : WriteRes<WriteFCmp64, [Rocket64UnitFPALU]>; -def : WriteRes<WriteFMovF32ToI32, [Rocket64UnitFPALU]>; -def : WriteRes<WriteFMovI32ToF32, [Rocket64UnitFPALU]>; -def : WriteRes<WriteFMovF64ToI64, [Rocket64UnitFPALU]>; -def : WriteRes<WriteFMovI64ToF64, [Rocket64UnitFPALU]>; -} - -let Latency = 5 in { -def : WriteRes<WriteFMul32, [Rocket64UnitFPALU]>; -def : WriteRes<WriteFMulAdd32, [Rocket64UnitFPALU]>; -def : WriteRes<WriteFMulSub32, [Rocket64UnitFPALU]>; -} - -let Latency = 7 in { -def : WriteRes<WriteFMul64, [Rocket64UnitFPALU]>; -def : WriteRes<WriteFMulAdd64, [Rocket64UnitFPALU]>; -def : WriteRes<WriteFMulSub64, [Rocket64UnitFPALU]>; -} - -// FP Divide unit on Rocket is not pipelined, so set resource cycles to latency -let Latency = 20, ResourceCycles = [20] in { -def : WriteRes<WriteFDiv32, [Rocket64UnitFPDivSqrt]>; -def : WriteRes<WriteFDiv64, [Rocket64UnitFPDivSqrt]>; -} - -// FP Sqrt unit on Rocket is not pipelined, so set resource cycles to latency -def : WriteRes<WriteFSqrt32, [Rocket64UnitFPDivSqrt]> { let Latency = 20; - let ResourceCycles = [20]; } -def : WriteRes<WriteFSqrt64, [Rocket64UnitFPDivSqrt]> { let Latency = 25; - let ResourceCycles = [25]; } - -def : WriteRes<WriteNop, []>; - -def : InstRW<[WriteIALU], (instrs COPY)>; - -//===----------------------------------------------------------------------===// -// Subtarget-specific SchedRead types with cycles. -// Dummy definitions for RocketCore. -def : ReadAdvance<ReadJmp, 0>; -def : ReadAdvance<ReadJalr, 0>; -def : ReadAdvance<ReadCSR, 0>; -def : ReadAdvance<ReadStoreData, 0>; -def : ReadAdvance<ReadMemBase, 0>; -def : ReadAdvance<ReadIALU, 0>; -def : ReadAdvance<ReadIALU32, 0>; -def : ReadAdvance<ReadShift, 0>; -def : ReadAdvance<ReadShift32, 0>; -def : ReadAdvance<ReadIDiv, 0>; -def : ReadAdvance<ReadIDiv32, 0>; -def : ReadAdvance<ReadIMul, 0>; -def : ReadAdvance<ReadIMul32, 0>; -def : ReadAdvance<ReadAtomicWA, 0>; -def : ReadAdvance<ReadAtomicWD, 0>; -def : ReadAdvance<ReadAtomicDA, 0>; -def : ReadAdvance<ReadAtomicDD, 0>; -def : ReadAdvance<ReadAtomicLDW, 0>; -def : ReadAdvance<ReadAtomicLDD, 0>; -def : ReadAdvance<ReadAtomicSTW, 0>; -def : ReadAdvance<ReadAtomicSTD, 0>; -def : ReadAdvance<ReadFMemBase, 0>; -def : ReadAdvance<ReadFALU32, 0>; -def : ReadAdvance<ReadFALU64, 0>; -def : ReadAdvance<ReadFMul32, 0>; -def : ReadAdvance<ReadFMulAdd32, 0>; -def : ReadAdvance<ReadFMulSub32, 0>; -def : ReadAdvance<ReadFMul64, 0>; -def : ReadAdvance<ReadFMulAdd64, 0>; -def : ReadAdvance<ReadFMulSub64, 0>; -def : ReadAdvance<ReadFDiv32, 0>; -def : ReadAdvance<ReadFDiv64, 0>; -def : ReadAdvance<ReadFSqrt32, 0>; -def : ReadAdvance<ReadFSqrt64, 0>; -def : ReadAdvance<ReadFCmp32, 0>; -def : ReadAdvance<ReadFCmp64, 0>; -def : ReadAdvance<ReadFSGNJ32, 0>; -def : ReadAdvance<ReadFSGNJ64, 0>; -def : ReadAdvance<ReadFMinMax32, 0>; -def : ReadAdvance<ReadFMinMax64, 0>; -def : ReadAdvance<ReadFCvtF32ToI32, 0>; -def : ReadAdvance<ReadFCvtF32ToI64, 0>; -def : ReadAdvance<ReadFCvtF64ToI32, 0>; -def : ReadAdvance<ReadFCvtF64ToI64, 0>; -def : ReadAdvance<ReadFCvtI32ToF32, 0>; -def : ReadAdvance<ReadFCvtI32ToF64, 0>; -def : ReadAdvance<ReadFCvtI64ToF32, 0>; -def : ReadAdvance<ReadFCvtI64ToF64, 0>; -def : ReadAdvance<ReadFCvtF32ToF64, 0>; -def : ReadAdvance<ReadFCvtF64ToF32, 0>; -def : ReadAdvance<ReadFMovF32ToI32, 0>; -def : ReadAdvance<ReadFMovI32ToF32, 0>; -def : ReadAdvance<ReadFMovF64ToI64, 0>; -def : ReadAdvance<ReadFMovI64ToF64, 0>; -def : ReadAdvance<ReadFClass32, 0>; -def : ReadAdvance<ReadFClass64, 0>; -} diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td new file mode 100644 index 000000000000..e57ba4f61b98 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -0,0 +1,222 @@ +//==- RISCVSchedSiFive7.td - SiFive7 Scheduling Definitions --*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// + +// SiFive7 machine model for scheduling and other instruction cost heuristics. +def SiFive7Model : SchedMachineModel { + let MicroOpBufferSize = 0; // Explicitly set to zero since SiFive7 is in-order. + let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. + let LoadLatency = 3; + let MispredictPenalty = 3; + let CompleteModel = 0; + let UnsupportedFeatures = [HasStdExtV, HasStdExtZvamo, HasStdExtZvlsseg]; +} + +// The SiFive7 microarchitecure has two pipelines: A and B. +// Pipe A can handle memory, integer alu and vector operations. +// Pipe B can handle integer alu, control flow, integer multiply and divide, +// and floating point computation. +let SchedModel = SiFive7Model in { +let BufferSize = 0 in { +def SiFive7PipeA : ProcResource<1>; +def SiFive7PipeB : ProcResource<1>; +} + +let BufferSize = 1 in { +def SiFive7IDiv : ProcResource<1> { let Super = SiFive7PipeB; } // Int Division +def SiFive7FDiv : ProcResource<1> { let Super = SiFive7PipeB; } // FP Division/Sqrt +} + +def SiFive7PipeAB : ProcResGroup<[SiFive7PipeA, SiFive7PipeB]>; + +// Branching +def : WriteRes<WriteJmp, [SiFive7PipeB]>; +def : WriteRes<WriteJal, [SiFive7PipeB]>; +def : WriteRes<WriteJalr, [SiFive7PipeB]>; +def : WriteRes<WriteJmpReg, [SiFive7PipeB]>; + +// Integer arithmetic and logic +let Latency = 3 in { +def : WriteRes<WriteIALU, [SiFive7PipeAB]>; +def : WriteRes<WriteIALU32, [SiFive7PipeAB]>; +def : WriteRes<WriteShift, [SiFive7PipeAB]>; +def : WriteRes<WriteShift32, [SiFive7PipeAB]>; +} + +// Integer multiplication +let Latency = 3 in { +def : WriteRes<WriteIMul, [SiFive7PipeB]>; +def : WriteRes<WriteIMul32, [SiFive7PipeB]>; +} + +// Integer division +def : WriteRes<WriteIDiv, [SiFive7PipeB, SiFive7IDiv]> { + let Latency = 16; + let ResourceCycles = [1, 15]; +} +def : WriteRes<WriteIDiv32, [SiFive7PipeB, SiFive7IDiv]> { + let Latency = 16; + let ResourceCycles = [1, 15]; +} + +// Memory +def : WriteRes<WriteSTB, [SiFive7PipeA]>; +def : WriteRes<WriteSTH, [SiFive7PipeA]>; +def : WriteRes<WriteSTW, [SiFive7PipeA]>; +def : WriteRes<WriteSTD, [SiFive7PipeA]>; +def : WriteRes<WriteFST32, [SiFive7PipeA]>; +def : WriteRes<WriteFST64, [SiFive7PipeA]>; + +let Latency = 3 in { +def : WriteRes<WriteLDB, [SiFive7PipeA]>; +def : WriteRes<WriteLDH, [SiFive7PipeA]>; +def : WriteRes<WriteLDW, [SiFive7PipeA]>; +def : WriteRes<WriteLDWU, [SiFive7PipeA]>; +def : WriteRes<WriteLDD, [SiFive7PipeA]>; +} + +let Latency = 2 in { +def : WriteRes<WriteFLD32, [SiFive7PipeA]>; +def : WriteRes<WriteFLD64, [SiFive7PipeA]>; +} + +// Atomic memory +def : WriteRes<WriteAtomicSTW, [SiFive7PipeA]>; +def : WriteRes<WriteAtomicSTD, [SiFive7PipeA]>; + +let Latency = 3 in { +def : WriteRes<WriteAtomicW, [SiFive7PipeA]>; +def : WriteRes<WriteAtomicD, [SiFive7PipeA]>; +def : WriteRes<WriteAtomicLDW, [SiFive7PipeA]>; +def : WriteRes<WriteAtomicLDD, [SiFive7PipeA]>; +} + +// Single precision. +let Latency = 5 in { +def : WriteRes<WriteFALU32, [SiFive7PipeB]>; +def : WriteRes<WriteFMul32, [SiFive7PipeB]>; +def : WriteRes<WriteFMulAdd32, [SiFive7PipeB]>; +def : WriteRes<WriteFMulSub32, [SiFive7PipeB]>; +} +let Latency = 3 in { +def : WriteRes<WriteFSGNJ32, [SiFive7PipeB]>; +def : WriteRes<WriteFMinMax32, [SiFive7PipeB]>; +} + +def : WriteRes<WriteFDiv32, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 27; + let ResourceCycles = [1, 26]; } +def : WriteRes<WriteFSqrt32, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 27; + let ResourceCycles = [1, 26]; } + +// Double precision +let Latency = 7 in { +def : WriteRes<WriteFALU64, [SiFive7PipeB]>; +def : WriteRes<WriteFMul64, [SiFive7PipeB]>; +def : WriteRes<WriteFMulAdd64, [SiFive7PipeB]>; +def : WriteRes<WriteFMulSub64, [SiFive7PipeB]>; +} +let Latency = 3 in { +def : WriteRes<WriteFSGNJ64, [SiFive7PipeB]>; +def : WriteRes<WriteFMinMax64, [SiFive7PipeB]>; +} + +def : WriteRes<WriteFDiv64, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 56; + let ResourceCycles = [1, 55]; } +def : WriteRes<WriteFSqrt64, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 56; + let ResourceCycles = [1, 55]; } + +// Conversions +let Latency = 3 in { +def : WriteRes<WriteFCvtI32ToF32, [SiFive7PipeB]>; +def : WriteRes<WriteFCvtI32ToF64, [SiFive7PipeB]>; +def : WriteRes<WriteFCvtI64ToF32, [SiFive7PipeB]>; +def : WriteRes<WriteFCvtI64ToF64, [SiFive7PipeB]>; +def : WriteRes<WriteFCvtF32ToI32, [SiFive7PipeB]>; +def : WriteRes<WriteFCvtF32ToI64, [SiFive7PipeB]>; +def : WriteRes<WriteFCvtF32ToF64, [SiFive7PipeB]>; +def : WriteRes<WriteFCvtF64ToI32, [SiFive7PipeB]>; +def : WriteRes<WriteFCvtF64ToI64, [SiFive7PipeB]>; +def : WriteRes<WriteFCvtF64ToF32, [SiFive7PipeB]>; + +def : WriteRes<WriteFClass32, [SiFive7PipeB]>; +def : WriteRes<WriteFClass64, [SiFive7PipeB]>; +def : WriteRes<WriteFCmp32, [SiFive7PipeB]>; +def : WriteRes<WriteFCmp64, [SiFive7PipeB]>; +def : WriteRes<WriteFMovI32ToF32, [SiFive7PipeB]>; +def : WriteRes<WriteFMovF32ToI32, [SiFive7PipeB]>; +def : WriteRes<WriteFMovI64ToF64, [SiFive7PipeB]>; +def : WriteRes<WriteFMovF64ToI64, [SiFive7PipeB]>; +} + +// Others +def : WriteRes<WriteCSR, [SiFive7PipeB]>; +def : WriteRes<WriteNop, []>; + +def : InstRW<[WriteIALU], (instrs COPY)>; + + +//===----------------------------------------------------------------------===// +// Bypass and advance +def : ReadAdvance<ReadJmp, 0>; +def : ReadAdvance<ReadJalr, 0>; +def : ReadAdvance<ReadCSR, 0>; +def : ReadAdvance<ReadStoreData, 0>; +def : ReadAdvance<ReadMemBase, 0>; +def : ReadAdvance<ReadIALU, 0>; +def : ReadAdvance<ReadIALU32, 0>; +def : ReadAdvance<ReadShift, 0>; +def : ReadAdvance<ReadShift32, 0>; +def : ReadAdvance<ReadIDiv, 0>; +def : ReadAdvance<ReadIDiv32, 0>; +def : ReadAdvance<ReadIMul, 0>; +def : ReadAdvance<ReadIMul32, 0>; +def : ReadAdvance<ReadAtomicWA, 0>; +def : ReadAdvance<ReadAtomicWD, 0>; +def : ReadAdvance<ReadAtomicDA, 0>; +def : ReadAdvance<ReadAtomicDD, 0>; +def : ReadAdvance<ReadAtomicLDW, 0>; +def : ReadAdvance<ReadAtomicLDD, 0>; +def : ReadAdvance<ReadAtomicSTW, 0>; +def : ReadAdvance<ReadAtomicSTD, 0>; +def : ReadAdvance<ReadFMemBase, 0>; +def : ReadAdvance<ReadFALU32, 0>; +def : ReadAdvance<ReadFALU64, 0>; +def : ReadAdvance<ReadFMul32, 0>; +def : ReadAdvance<ReadFMulAdd32, 0>; +def : ReadAdvance<ReadFMulSub32, 0>; +def : ReadAdvance<ReadFMul64, 0>; +def : ReadAdvance<ReadFMulAdd64, 0>; +def : ReadAdvance<ReadFMulSub64, 0>; +def : ReadAdvance<ReadFDiv32, 0>; +def : ReadAdvance<ReadFDiv64, 0>; +def : ReadAdvance<ReadFSqrt32, 0>; +def : ReadAdvance<ReadFSqrt64, 0>; +def : ReadAdvance<ReadFCmp32, 0>; +def : ReadAdvance<ReadFCmp64, 0>; +def : ReadAdvance<ReadFSGNJ32, 0>; +def : ReadAdvance<ReadFSGNJ64, 0>; +def : ReadAdvance<ReadFMinMax32, 0>; +def : ReadAdvance<ReadFMinMax64, 0>; +def : ReadAdvance<ReadFCvtF32ToI32, 0>; +def : ReadAdvance<ReadFCvtF32ToI64, 0>; +def : ReadAdvance<ReadFCvtF64ToI32, 0>; +def : ReadAdvance<ReadFCvtF64ToI64, 0>; +def : ReadAdvance<ReadFCvtI32ToF32, 0>; +def : ReadAdvance<ReadFCvtI32ToF64, 0>; +def : ReadAdvance<ReadFCvtI64ToF32, 0>; +def : ReadAdvance<ReadFCvtI64ToF64, 0>; +def : ReadAdvance<ReadFCvtF32ToF64, 0>; +def : ReadAdvance<ReadFCvtF64ToF32, 0>; +def : ReadAdvance<ReadFMovF32ToI32, 0>; +def : ReadAdvance<ReadFMovI32ToF32, 0>; +def : ReadAdvance<ReadFMovF64ToI64, 0>; +def : ReadAdvance<ReadFMovI64ToF64, 0>; +def : ReadAdvance<ReadFClass32, 0>; +def : ReadAdvance<ReadFClass64, 0>; +} diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedule.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedule.td index bbcd03d46236..0806be8a8d87 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedule.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedule.td @@ -1,4 +1,4 @@ -//===-- RISCVSchedule.td - RISCV Scheduling Definitions -------*- tablegen -*-===// +//===-- RISCVSchedule.td - RISCV Scheduling Definitions ----*- tablegen -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp index 47a48c820a29..df11d237a16c 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp @@ -30,13 +30,16 @@ using namespace llvm; void RISCVSubtarget::anchor() {} RISCVSubtarget &RISCVSubtarget::initializeSubtargetDependencies( - const Triple &TT, StringRef CPU, StringRef FS, StringRef ABIName) { + const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS, StringRef ABIName) { // Determine default and user-specified characteristics bool Is64Bit = TT.isArch64Bit(); std::string CPUName = std::string(CPU); + std::string TuneCPUName = std::string(TuneCPU); if (CPUName.empty()) CPUName = Is64Bit ? "generic-rv64" : "generic-rv32"; - ParseSubtargetFeatures(CPUName, FS); + if (TuneCPUName.empty()) + TuneCPUName = CPUName; + ParseSubtargetFeatures(CPUName, TuneCPUName, FS); if (Is64Bit) { XLenVT = MVT::i64; XLen = 64; @@ -47,11 +50,12 @@ RISCVSubtarget &RISCVSubtarget::initializeSubtargetDependencies( return *this; } -RISCVSubtarget::RISCVSubtarget(const Triple &TT, StringRef CPU, StringRef FS, +RISCVSubtarget::RISCVSubtarget(const Triple &TT, StringRef CPU, + StringRef TuneCPU, StringRef FS, StringRef ABIName, const TargetMachine &TM) - : RISCVGenSubtargetInfo(TT, CPU, FS), + : RISCVGenSubtargetInfo(TT, CPU, TuneCPU, FS), UserReservedRegister(RISCV::NUM_TARGET_REGS), - FrameLowering(initializeSubtargetDependencies(TT, CPU, FS, ABIName)), + FrameLowering(initializeSubtargetDependencies(TT, CPU, TuneCPU, FS, ABIName)), InstrInfo(*this), RegInfo(getHwMode()), TLInfo(TM, *this) { CallLoweringInfo.reset(new RISCVCallLowering(*getTargetLowering())); Legalizer.reset(new RISCVLegalizerInfo(*this)); diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h index fe1285f23b15..561b04cc0b44 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -13,10 +13,10 @@ #ifndef LLVM_LIB_TARGET_RISCV_RISCVSUBTARGET_H #define LLVM_LIB_TARGET_RISCV_RISCVSUBTARGET_H +#include "MCTargetDesc/RISCVBaseInfo.h" #include "RISCVFrameLowering.h" #include "RISCVISelLowering.h" #include "RISCVInstrInfo.h" -#include "Utils/RISCVBaseInfo.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" @@ -40,6 +40,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { bool HasStdExtD = false; bool HasStdExtC = false; bool HasStdExtB = false; + bool HasStdExtZba = false; bool HasStdExtZbb = false; bool HasStdExtZbc = false; bool HasStdExtZbe = false; @@ -51,6 +52,9 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { bool HasStdExtZbt = false; bool HasStdExtZbproposedc = false; bool HasStdExtV = false; + bool HasStdExtZvlsseg = false; + bool HasStdExtZvamo = false; + bool HasStdExtZfh = false; bool HasRV64 = false; bool IsRV32E = false; bool EnableLinkerRelax = false; @@ -69,17 +73,19 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { /// Initializes using the passed in CPU and feature strings so that we can /// use initializer lists for subtarget initialization. RISCVSubtarget &initializeSubtargetDependencies(const Triple &TT, - StringRef CPU, StringRef FS, + StringRef CPU, + StringRef TuneCPU, + StringRef FS, StringRef ABIName); public: // Initializes the data members to match that of the specified triple. - RISCVSubtarget(const Triple &TT, StringRef CPU, StringRef FS, - StringRef ABIName, const TargetMachine &TM); + RISCVSubtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, + StringRef FS, StringRef ABIName, const TargetMachine &TM); // Parses features string setting specified subtarget options. The // definition of this function is auto-generated by tblgen. - void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); const RISCVFrameLowering *getFrameLowering() const override { return &FrameLowering; @@ -101,6 +107,7 @@ public: bool hasStdExtD() const { return HasStdExtD; } bool hasStdExtC() const { return HasStdExtC; } bool hasStdExtB() const { return HasStdExtB; } + bool hasStdExtZba() const { return HasStdExtZba; } bool hasStdExtZbb() const { return HasStdExtZbb; } bool hasStdExtZbc() const { return HasStdExtZbc; } bool hasStdExtZbe() const { return HasStdExtZbe; } @@ -112,6 +119,9 @@ public: bool hasStdExtZbt() const { return HasStdExtZbt; } bool hasStdExtZbproposedc() const { return HasStdExtZbproposedc; } bool hasStdExtV() const { return HasStdExtV; } + bool hasStdExtZvlsseg() const { return HasStdExtZvlsseg; } + bool hasStdExtZvamo() const { return HasStdExtZvamo; } + bool hasStdExtZfh() const { return HasStdExtZfh; } bool is64Bit() const { return HasRV64; } bool isRV32E() const { return IsRV32E; } bool enableLinkerRelax() const { return EnableLinkerRelax; } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSystemOperands.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSystemOperands.td index 8e75647bd4a9..16399fea150e 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSystemOperands.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSystemOperands.td @@ -310,7 +310,9 @@ def: SysReg<"mhpmcounter31h", 0xB9F>; //===-------------------------- // Machine Counter Setup //===-------------------------- +let AltName = "mucounteren" in // Privileged spec v1.9.1 Name def : SysReg<"mcountinhibit", 0x320>; + def : SysReg<"mhpmevent3", 0x323>; def : SysReg<"mhpmevent4", 0x324>; def : SysReg<"mhpmevent5", 0x325>; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 75683e2fd8e9..32fb7cd6753c 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -11,11 +11,11 @@ //===----------------------------------------------------------------------===// #include "RISCVTargetMachine.h" +#include "MCTargetDesc/RISCVBaseInfo.h" #include "RISCV.h" #include "RISCVTargetObjectFile.h" #include "RISCVTargetTransformInfo.h" #include "TargetInfo/RISCVTargetInfo.h" -#include "Utils/RISCVBaseInfo.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" @@ -35,18 +35,18 @@ using namespace llvm; extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target()); RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target()); - auto PR = PassRegistry::getPassRegistry(); + auto *PR = PassRegistry::getPassRegistry(); initializeGlobalISel(*PR); + initializeRISCVMergeBaseOffsetOptPass(*PR); initializeRISCVExpandPseudoPass(*PR); + initializeRISCVCleanupVSETVLIPass(*PR); } static StringRef computeDataLayout(const Triple &TT) { - if (TT.isArch64Bit()) { + if (TT.isArch64Bit()) return "e-m:e-p:64:64-i64:64-i128:128-n64-S128"; - } else { - assert(TT.isArch32Bit() && "only RV32 and RV64 are currently supported"); - return "e-m:e-p:32:32-i64:64-n32-S128"; - } + assert(TT.isArch32Bit() && "only RV32 and RV64 are currently supported"); + return "e-m:e-p:32:32-i64:64-n32-S128"; } static Reloc::Model getEffectiveRelocModel(const Triple &TT, @@ -75,15 +75,16 @@ RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT, const RISCVSubtarget * RISCVTargetMachine::getSubtargetImpl(const Function &F) const { Attribute CPUAttr = F.getFnAttribute("target-cpu"); + Attribute TuneAttr = F.getFnAttribute("tune-cpu"); Attribute FSAttr = F.getFnAttribute("target-features"); - std::string CPU = !CPUAttr.hasAttribute(Attribute::None) - ? CPUAttr.getValueAsString().str() - : TargetCPU; - std::string FS = !FSAttr.hasAttribute(Attribute::None) - ? FSAttr.getValueAsString().str() - : TargetFS; - std::string Key = CPU + FS; + std::string CPU = + CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU; + std::string TuneCPU = + TuneAttr.isValid() ? TuneAttr.getValueAsString().str() : CPU; + std::string FS = + FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS; + std::string Key = CPU + TuneCPU + FS; auto &I = SubtargetMap[Key]; if (!I) { // This needs to be done before we create a new subtarget since any @@ -100,7 +101,7 @@ RISCVTargetMachine::getSubtargetImpl(const Function &F) const { } ABIName = ModuleTargetABI->getString(); } - I = std::make_unique<RISCVSubtarget>(TargetTriple, CPU, FS, ABIName, *this); + I = std::make_unique<RISCVSubtarget>(TargetTriple, CPU, TuneCPU, FS, ABIName, *this); } return I.get(); } @@ -110,6 +111,15 @@ RISCVTargetMachine::getTargetTransformInfo(const Function &F) { return TargetTransformInfo(RISCVTTIImpl(this, F)); } +// A RISC-V hart has a single byte-addressable address space of 2^XLEN bytes +// for all memory accesses, so it is reasonable to assume that an +// implementation has no-op address space casts. If an implementation makes a +// change to this, they can override it here. +bool RISCVTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS, + unsigned DstAS) const { + return true; +} + namespace { class RISCVPassConfig : public TargetPassConfig { public: @@ -131,7 +141,7 @@ public: void addPreSched2() override; void addPreRegAlloc() override; }; -} +} // namespace TargetPassConfig *RISCVTargetMachine::createPassConfig(PassManagerBase &PM) { return new RISCVPassConfig(*this, PM); @@ -149,7 +159,7 @@ bool RISCVPassConfig::addInstSelector() { } bool RISCVPassConfig::addIRTranslator() { - addPass(new IRTranslator()); + addPass(new IRTranslator(getOptLevel())); return false; } @@ -181,5 +191,8 @@ void RISCVPassConfig::addPreEmitPass2() { } void RISCVPassConfig::addPreRegAlloc() { - addPass(createRISCVMergeBaseOffsetOptPass()); + if (TM->getOptLevel() != CodeGenOpt::None) { + addPass(createRISCVMergeBaseOffsetOptPass()); + addPass(createRISCVCleanupVSETVLIPass()); + } } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.h index a4476fa40a7d..3156333f7ee1 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.h @@ -43,7 +43,10 @@ public: } TargetTransformInfo getTargetTransformInfo(const Function &F) override; + + virtual bool isNoopAddrSpaceCast(unsigned SrcAS, + unsigned DstAS) const override; }; -} +} // namespace llvm #endif diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index bd78f801c59a..27714cffc989 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "RISCVTargetTransformInfo.h" -#include "Utils/RISCVMatInt.h" +#include "MCTargetDesc/RISCVMatInt.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/CodeGen/TargetLowering.h" @@ -30,8 +30,10 @@ int RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, getST()->is64Bit()); } -int RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, - Type *Ty, TTI::TargetCostKind CostKind) { +int RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind, + Instruction *Inst) { assert(Ty->isIntegerTy() && "getIntImmCost can only estimate cost of materialising integers"); diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 392700707760..8d077e946305 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -42,8 +42,9 @@ public: TLI(ST->getTargetLowering()) {} int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); - int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, - TTI::TargetCostKind CostKind); + int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, + Type *Ty, TTI::TargetCostKind CostKind, + Instruction *Inst = nullptr); int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); }; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h deleted file mode 100644 index 4e6cdd8606b1..000000000000 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h +++ /dev/null @@ -1,223 +0,0 @@ -//===-- RISCVBaseInfo.h - Top level definitions for RISCV MC ----*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains small standalone enum definitions for the RISCV target -// useful for the compiler back-end and the MC libraries. -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVBASEINFO_H -#define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVBASEINFO_H - -#include "RISCVRegisterInfo.h" -#include "MCTargetDesc/RISCVMCTargetDesc.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/MC/MCInstrDesc.h" -#include "llvm/MC/SubtargetFeature.h" - -namespace llvm { - -// RISCVII - This namespace holds all of the target specific flags that -// instruction info tracks. All definitions must match RISCVInstrFormats.td. -namespace RISCVII { -enum { - InstFormatPseudo = 0, - InstFormatR = 1, - InstFormatR4 = 2, - InstFormatI = 3, - InstFormatS = 4, - InstFormatB = 5, - InstFormatU = 6, - InstFormatJ = 7, - InstFormatCR = 8, - InstFormatCI = 9, - InstFormatCSS = 10, - InstFormatCIW = 11, - InstFormatCL = 12, - InstFormatCS = 13, - InstFormatCA = 14, - InstFormatCB = 15, - InstFormatCJ = 16, - InstFormatOther = 17, - - InstFormatMask = 31, -}; - -// RISC-V Specific Machine Operand Flags -enum { - MO_None = 0, - MO_CALL = 1, - MO_PLT = 2, - MO_LO = 3, - MO_HI = 4, - MO_PCREL_LO = 5, - MO_PCREL_HI = 6, - MO_GOT_HI = 7, - MO_TPREL_LO = 8, - MO_TPREL_HI = 9, - MO_TPREL_ADD = 10, - MO_TLS_GOT_HI = 11, - MO_TLS_GD_HI = 12, - - // Used to differentiate between target-specific "direct" flags and "bitmask" - // flags. A machine operand can only have one "direct" flag, but can have - // multiple "bitmask" flags. - MO_DIRECT_FLAG_MASK = 15 -}; -} // namespace RISCVII - -namespace RISCVOp { -enum OperandType : unsigned { - OPERAND_FIRST_RISCV_IMM = MCOI::OPERAND_FIRST_TARGET, - OPERAND_UIMM4 = OPERAND_FIRST_RISCV_IMM, - OPERAND_UIMM5, - OPERAND_UIMM12, - OPERAND_SIMM12, - OPERAND_SIMM13_LSB0, - OPERAND_UIMM20, - OPERAND_SIMM21_LSB0, - OPERAND_UIMMLOG2XLEN, - OPERAND_LAST_RISCV_IMM = OPERAND_UIMMLOG2XLEN -}; -} // namespace RISCVOp - -// Describes the predecessor/successor bits used in the FENCE instruction. -namespace RISCVFenceField { -enum FenceField { - I = 8, - O = 4, - R = 2, - W = 1 -}; -} - -// Describes the supported floating point rounding mode encodings. -namespace RISCVFPRndMode { -enum RoundingMode { - RNE = 0, - RTZ = 1, - RDN = 2, - RUP = 3, - RMM = 4, - DYN = 7, - Invalid -}; - -inline static StringRef roundingModeToString(RoundingMode RndMode) { - switch (RndMode) { - default: - llvm_unreachable("Unknown floating point rounding mode"); - case RISCVFPRndMode::RNE: - return "rne"; - case RISCVFPRndMode::RTZ: - return "rtz"; - case RISCVFPRndMode::RDN: - return "rdn"; - case RISCVFPRndMode::RUP: - return "rup"; - case RISCVFPRndMode::RMM: - return "rmm"; - case RISCVFPRndMode::DYN: - return "dyn"; - } -} - -inline static RoundingMode stringToRoundingMode(StringRef Str) { - return StringSwitch<RoundingMode>(Str) - .Case("rne", RISCVFPRndMode::RNE) - .Case("rtz", RISCVFPRndMode::RTZ) - .Case("rdn", RISCVFPRndMode::RDN) - .Case("rup", RISCVFPRndMode::RUP) - .Case("rmm", RISCVFPRndMode::RMM) - .Case("dyn", RISCVFPRndMode::DYN) - .Default(RISCVFPRndMode::Invalid); -} - -inline static bool isValidRoundingMode(unsigned Mode) { - switch (Mode) { - default: - return false; - case RISCVFPRndMode::RNE: - case RISCVFPRndMode::RTZ: - case RISCVFPRndMode::RDN: - case RISCVFPRndMode::RUP: - case RISCVFPRndMode::RMM: - case RISCVFPRndMode::DYN: - return true; - } -} -} // namespace RISCVFPRndMode - -namespace RISCVSysReg { -struct SysReg { - const char *Name; - unsigned Encoding; - const char *AltName; - // FIXME: add these additional fields when needed. - // Privilege Access: Read, Write, Read-Only. - // unsigned ReadWrite; - // Privilege Mode: User, System or Machine. - // unsigned Mode; - // Check field name. - // unsigned Extra; - // Register number without the privilege bits. - // unsigned Number; - FeatureBitset FeaturesRequired; - bool isRV32Only; - - bool haveRequiredFeatures(FeatureBitset ActiveFeatures) const { - // Not in 32-bit mode. - if (isRV32Only && ActiveFeatures[RISCV::Feature64Bit]) - return false; - // No required feature associated with the system register. - if (FeaturesRequired.none()) - return true; - return (FeaturesRequired & ActiveFeatures) == FeaturesRequired; - } -}; - -#define GET_SysRegsList_DECL -#include "RISCVGenSystemOperands.inc" -} // end namespace RISCVSysReg - -namespace RISCVABI { - -enum ABI { - ABI_ILP32, - ABI_ILP32F, - ABI_ILP32D, - ABI_ILP32E, - ABI_LP64, - ABI_LP64F, - ABI_LP64D, - ABI_Unknown -}; - -// Returns the target ABI, or else a StringError if the requested ABIName is -// not supported for the given TT and FeatureBits combination. -ABI computeTargetABI(const Triple &TT, FeatureBitset FeatureBits, - StringRef ABIName); - -ABI getTargetABI(StringRef ABIName); - -// Returns the register used to hold the stack pointer after realignment. -Register getBPReg(); - -} // namespace RISCVABI - -namespace RISCVFeatures { - -// Validates if the given combination of features are valid for the target -// triple. Exits with report_fatal_error if not. -void validate(const Triple &TT, const FeatureBitset &FeatureBits); - -} // namespace RISCVFeatures - -} // namespace llvm - -#endif |