aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/RISCV
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2023-12-18 20:30:12 +0000
committerDimitry Andric <dim@FreeBSD.org>2024-04-06 20:11:55 +0000
commit5f757f3ff9144b609b3c433dfd370cc6bdc191ad (patch)
tree1b4e980b866cd26a00af34c0a653eb640bd09caf /contrib/llvm-project/llvm/lib/Target/RISCV
parent3e1c8a35f741a5d114d0ba670b15191355711fe9 (diff)
parent312c0ed19cc5276a17bacf2120097bec4515b0f1 (diff)
downloadsrc-5f757f3ff9144b609b3c433dfd370cc6bdc191ad.tar.gz
src-5f757f3ff9144b609b3c433dfd370cc6bdc191ad.zip
Merge llvm-project main llvmorg-18-init-15088-gd14ee76181fb
This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp to llvm-project main llvmorg-18-init-15088-gd14ee76181fb. PR: 276104 MFC after: 1 month
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/RISCV')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp126
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp84
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp446
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp1177
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp439
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVO0PreLegalizerCombiner.cpp155
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp173
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVPreLegalizerCombiner.cpp169
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp442
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.h23
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBanks.td9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp63
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp118
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h31
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp44
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp59
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h59
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp98
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp40
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp79
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp202
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.h27
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp136
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp70
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCombine.td27
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp103
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp93
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp46
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td258
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp216
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp146
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVGISel.td159
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp56
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp484
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp4756
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h99
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp87
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp473
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp458
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td103
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrGISel.td26
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp981
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h44
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td371
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoA.td137
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoC.td46
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td65
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoF.td90
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoM.td15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoV.td824
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td2114
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td140
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td294
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td665
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td256
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td329
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXVentana.td15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td260
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfbfmin.td32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td135
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicond.td9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td26
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td710
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp125
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMoveMerger.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp101
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp116
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVProcessors.td81
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVPushPopOptimizer.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp157
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp74
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td67
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket.td12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td542
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedule.td3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVScheduleV.td58
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h90
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp218
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp284
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h20
99 files changed, 16003 insertions, 5640 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 046a208921ae..f3ea0f597eec 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -16,7 +16,6 @@
#include "TargetInfo/RISCVTargetInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallBitVector.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
@@ -108,10 +107,9 @@ class RISCVAsmParser : public MCTargetAsmParser {
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
- bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
- OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
+ bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
@@ -204,6 +202,7 @@ class RISCVAsmParser : public MCTargetAsmParser {
ParseStatus parseFRMArg(OperandVector &Operands);
ParseStatus parseFenceArg(OperandVector &Operands);
ParseStatus parseReglist(OperandVector &Operands);
+ ParseStatus parseRegReg(OperandVector &Operands);
ParseStatus parseRetval(OperandVector &Operands);
ParseStatus parseZcmpSpimm(OperandVector &Operands);
@@ -260,6 +259,7 @@ class RISCVAsmParser : public MCTargetAsmParser {
std::unique_ptr<RISCVOperand> defaultMaskRegOp() const;
std::unique_ptr<RISCVOperand> defaultFRMArgOp() const;
+ std::unique_ptr<RISCVOperand> defaultFRMArgLegacyOp() const;
public:
enum RISCVMatchResultTy {
@@ -286,11 +286,11 @@ public:
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
auto ABIName = StringRef(Options.ABIName);
- if (ABIName.endswith("f") && !getSTI().hasFeature(RISCV::FeatureStdExtF)) {
+ if (ABIName.ends_with("f") && !getSTI().hasFeature(RISCV::FeatureStdExtF)) {
errs() << "Hard-float 'f' ABI can't be used for a target that "
"doesn't support the F instruction set extension (ignoring "
"target-abi)\n";
- } else if (ABIName.endswith("d") &&
+ } else if (ABIName.ends_with("d") &&
!getSTI().hasFeature(RISCV::FeatureStdExtD)) {
errs() << "Hard-float 'd' ABI can't be used for a target that "
"doesn't support the D instruction set extension (ignoring "
@@ -325,6 +325,7 @@ struct RISCVOperand final : public MCParsedAsmOperand {
Fence,
Rlist,
Spimm,
+ RegReg,
} Kind;
struct RegOp {
@@ -369,6 +370,11 @@ struct RISCVOperand final : public MCParsedAsmOperand {
unsigned Val;
};
+ struct RegRegOp {
+ MCRegister Reg1;
+ MCRegister Reg2;
+ };
+
SMLoc StartLoc, EndLoc;
union {
StringRef Tok;
@@ -381,6 +387,7 @@ struct RISCVOperand final : public MCParsedAsmOperand {
struct FenceOp Fence;
struct RlistOp Rlist;
struct SpimmOp Spimm;
+ struct RegRegOp RegReg;
};
RISCVOperand(KindTy K) : Kind(K) {}
@@ -421,6 +428,9 @@ public:
case KindTy::Spimm:
Spimm = o.Spimm;
break;
+ case KindTy::RegReg:
+ RegReg = o.RegReg;
+ break;
}
}
@@ -445,6 +455,7 @@ public:
bool isImm() const override { return Kind == KindTy::Immediate; }
bool isMem() const override { return false; }
bool isSystemRegister() const { return Kind == KindTy::SystemRegister; }
+ bool isRegReg() const { return Kind == KindTy::RegReg; }
bool isRlist() const { return Kind == KindTy::Rlist; }
bool isSpimm() const { return Kind == KindTy::Spimm; }
@@ -564,6 +575,7 @@ public:
/// Return true if the operand is a valid floating point rounding mode.
bool isFRMArg() const { return Kind == KindTy::FRM; }
+ bool isFRMArgLegacy() const { return Kind == KindTy::FRM; }
bool isRTZArg() const { return isFRMArg() && FRM.FRM == RISCVFPRndMode::RTZ; }
/// Return true if the operand is a valid fli.s floating-point immediate.
@@ -660,6 +672,7 @@ public:
bool isUImm6() const { return IsUImm<6>(); }
bool isUImm7() const { return IsUImm<7>(); }
bool isUImm8() const { return IsUImm<8>(); }
+ bool isUImm20() const { return IsUImm<20>(); }
bool isUImm8GE32() const {
int64_t Imm;
@@ -1024,6 +1037,10 @@ public:
RISCVZC::printSpimm(Spimm.Val, OS);
OS << '>';
break;
+ case KindTy::RegReg:
+ OS << "<RegReg: Reg1 " << RegName(RegReg.Reg1);
+ OS << " Reg2 " << RegName(RegReg.Reg2);
+ break;
}
}
@@ -1107,6 +1124,16 @@ public:
return Op;
}
+ static std::unique_ptr<RISCVOperand> createRegReg(unsigned Reg1No,
+ unsigned Reg2No, SMLoc S) {
+ auto Op = std::make_unique<RISCVOperand>(KindTy::RegReg);
+ Op->RegReg.Reg1 = Reg1No;
+ Op->RegReg.Reg2 = Reg2No;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
static std::unique_ptr<RISCVOperand> createSpimm(unsigned Spimm, SMLoc S) {
auto Op = std::make_unique<RISCVOperand>(KindTy::Spimm);
Op->Spimm.Val = Spimm;
@@ -1182,6 +1209,12 @@ public:
Inst.addOperand(MCOperand::createImm(Rlist.Val));
}
+ void addRegRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(RegReg.Reg1));
+ Inst.addOperand(MCOperand::createReg(RegReg.Reg2));
+ }
+
void addSpimmOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::createImm(Spimm.Val));
@@ -1478,6 +1511,8 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
"operand must be a symbol with "
"%hi/%tprel_hi modifier or an integer in "
"the range");
+ case Match_InvalidUImm20:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 20) - 1);
case Match_InvalidUImm20AUIPC:
return generateImmOutOfRangeError(
Operands, ErrorInfo, 0, (1 << 20) - 1,
@@ -1546,6 +1581,10 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_InvalidRnumArg: {
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, 10);
}
+ case Match_InvalidRegReg: {
+ SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
+ return Error(ErrorLoc, "operands must be register and register");
+ }
}
llvm_unreachable("Unknown match type detected!");
@@ -1571,27 +1610,26 @@ static MCRegister matchRegisterNameHelper(bool IsRVE, StringRef Name) {
return Reg;
}
-bool RISCVAsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
+bool RISCVAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
SMLoc &EndLoc) {
- if (tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success)
+ if (!tryParseRegister(Reg, StartLoc, EndLoc).isSuccess())
return Error(StartLoc, "invalid register name");
return false;
}
-OperandMatchResultTy RISCVAsmParser::tryParseRegister(MCRegister &RegNo,
- SMLoc &StartLoc,
- SMLoc &EndLoc) {
+ParseStatus RISCVAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
const AsmToken &Tok = getParser().getTok();
StartLoc = Tok.getLoc();
EndLoc = Tok.getEndLoc();
StringRef Name = getLexer().getTok().getIdentifier();
- RegNo = matchRegisterNameHelper(isRVE(), Name);
- if (!RegNo)
- return MatchOperand_NoMatch;
+ Reg = matchRegisterNameHelper(isRVE(), Name);
+ if (!Reg)
+ return ParseStatus::NoMatch;
getParser().Lex(); // Eat identifier token.
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
ParseStatus RISCVAsmParser::parseRegister(OperandVector &Operands,
@@ -2379,6 +2417,37 @@ ParseStatus RISCVAsmParser::parseZeroOffsetMemOp(OperandVector &Operands) {
return ParseStatus::Success;
}
+ParseStatus RISCVAsmParser::parseRegReg(OperandVector &Operands) {
+ // RR : a2(a1)
+ if (getLexer().getKind() != AsmToken::Identifier)
+ return ParseStatus::NoMatch;
+
+ StringRef RegName = getLexer().getTok().getIdentifier();
+ MCRegister Reg = matchRegisterNameHelper(isRVE(), RegName);
+ if (!Reg)
+ return Error(getLoc(), "invalid register");
+ getLexer().Lex();
+
+ if (parseToken(AsmToken::LParen, "expected '(' or invalid operand"))
+ return ParseStatus::Failure;
+
+ if (getLexer().getKind() != AsmToken::Identifier)
+ return Error(getLoc(), "expected register");
+
+ StringRef Reg2Name = getLexer().getTok().getIdentifier();
+ MCRegister Reg2 = matchRegisterNameHelper(isRVE(), Reg2Name);
+ if (!Reg2)
+ return Error(getLoc(), "invalid register");
+ getLexer().Lex();
+
+ if (parseToken(AsmToken::RParen, "expected ')'"))
+ return ParseStatus::Failure;
+
+ Operands.push_back(RISCVOperand::createRegReg(Reg, Reg2, getLoc()));
+
+ return ParseStatus::Success;
+}
+
ParseStatus RISCVAsmParser::parseReglist(OperandVector &Operands) {
// Rlist: {ra [, s0[-sN]]}
// XRlist: {x1 [, x8[-x9][, x18[-xN]]]}
@@ -2974,8 +3043,7 @@ void RISCVAsmParser::emitToStreamer(MCStreamer &S, const MCInst &Inst) {
void RISCVAsmParser::emitLoadImm(MCRegister DestReg, int64_t Value,
MCStreamer &Out) {
- RISCVMatInt::InstSeq Seq =
- RISCVMatInt::generateInstSeq(Value, getSTI().getFeatureBits());
+ RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Value, getSTI());
MCRegister SrcReg = RISCV::X0;
for (const RISCVMatInt::Inst &Inst : Seq) {
@@ -3200,7 +3268,7 @@ void RISCVAsmParser::emitVMSGE(MCInst &Inst, unsigned Opcode, SMLoc IDLoc,
.addOperand(Inst.getOperand(1))
.addOperand(Inst.getOperand(2))
.addOperand(Inst.getOperand(3))
- .addOperand(Inst.getOperand(4)));
+ .addReg(RISCV::NoRegister));
emitToStreamer(Out, MCInstBuilder(RISCV::VMANDN_MM)
.addOperand(Inst.getOperand(0))
.addOperand(Inst.getOperand(0))
@@ -3209,8 +3277,8 @@ void RISCVAsmParser::emitVMSGE(MCInst &Inst, unsigned Opcode, SMLoc IDLoc,
// masked va >= x, any vd
//
// pseudoinstruction: vmsge{u}.vx vd, va, x, v0.t, vt
- // expansion: vmslt{u}.vx vt, va, x; vmandn.mm vt, v0, vt; vmandn.mm vd,
- // vd, v0; vmor.mm vd, vt, vd
+ // expansion: vmslt{u}.vx vt, va, x; vmandn.mm vt, v0, vt;
+ // vmandn.mm vd, vd, v0; vmor.mm vd, vt, vd
assert(Inst.getOperand(1).getReg() != RISCV::V0 &&
"The temporary vector register should not be V0.");
emitToStreamer(Out, MCInstBuilder(Opcode)
@@ -3256,6 +3324,11 @@ std::unique_ptr<RISCVOperand> RISCVAsmParser::defaultFRMArgOp() const {
llvm::SMLoc());
}
+std::unique_ptr<RISCVOperand> RISCVAsmParser::defaultFRMArgLegacyOp() const {
+ return RISCVOperand::createFRMArg(RISCVFPRndMode::RoundingMode::RNE,
+ llvm::SMLoc());
+}
+
bool RISCVAsmParser::validateInstruction(MCInst &Inst,
OperandVector &Operands) {
unsigned Opcode = Inst.getOpcode();
@@ -3352,16 +3425,21 @@ bool RISCVAsmParser::validateInstruction(MCInst &Inst,
}
unsigned DestReg = Inst.getOperand(0).getReg();
+ unsigned Offset = 0;
+ int TiedOp = MCID.getOperandConstraint(1, MCOI::TIED_TO);
+ if (TiedOp == 0)
+ Offset = 1;
+
// Operands[1] will be the first operand, DestReg.
SMLoc Loc = Operands[1]->getStartLoc();
if (MCID.TSFlags & RISCVII::VS2Constraint) {
- unsigned CheckReg = Inst.getOperand(1).getReg();
+ unsigned CheckReg = Inst.getOperand(Offset + 1).getReg();
if (DestReg == CheckReg)
return Error(Loc, "The destination vector register group cannot overlap"
" the source vector register group.");
}
- if ((MCID.TSFlags & RISCVII::VS1Constraint) && (Inst.getOperand(2).isReg())) {
- unsigned CheckReg = Inst.getOperand(2).getReg();
+ if ((MCID.TSFlags & RISCVII::VS1Constraint) && Inst.getOperand(Offset + 2).isReg()) {
+ unsigned CheckReg = Inst.getOperand(Offset + 2).getReg();
if (DestReg == CheckReg)
return Error(Loc, "The destination vector register group cannot overlap"
" the source vector register group.");
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index e6ea6baa72ff..53e2b6b4d94e 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -196,10 +196,7 @@ static DecodeStatus DecodeVRRegisterClass(MCInst &Inst, uint32_t RegNo,
static DecodeStatus DecodeVRM2RegisterClass(MCInst &Inst, uint32_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
- if (RegNo >= 32)
- return MCDisassembler::Fail;
-
- if (RegNo % 2)
+ if (RegNo >= 32 || RegNo % 2)
return MCDisassembler::Fail;
const RISCVDisassembler *Dis =
@@ -216,10 +213,7 @@ static DecodeStatus DecodeVRM2RegisterClass(MCInst &Inst, uint32_t RegNo,
static DecodeStatus DecodeVRM4RegisterClass(MCInst &Inst, uint32_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
- if (RegNo >= 32)
- return MCDisassembler::Fail;
-
- if (RegNo % 4)
+ if (RegNo >= 32 || RegNo % 4)
return MCDisassembler::Fail;
const RISCVDisassembler *Dis =
@@ -236,10 +230,7 @@ static DecodeStatus DecodeVRM4RegisterClass(MCInst &Inst, uint32_t RegNo,
static DecodeStatus DecodeVRM8RegisterClass(MCInst &Inst, uint32_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
- if (RegNo >= 32)
- return MCDisassembler::Fail;
-
- if (RegNo % 8)
+ if (RegNo >= 32 || RegNo % 8)
return MCDisassembler::Fail;
const RISCVDisassembler *Dis =
@@ -256,16 +247,11 @@ static DecodeStatus DecodeVRM8RegisterClass(MCInst &Inst, uint32_t RegNo,
static DecodeStatus decodeVMaskReg(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
- MCRegister Reg = RISCV::NoRegister;
- switch (RegNo) {
- default:
+ if (RegNo > 2) {
return MCDisassembler::Fail;
- case 0:
- Reg = RISCV::V0;
- break;
- case 1:
- break;
}
+ MCRegister Reg = (RegNo == 0) ? RISCV::V0 : RISCV::NoRegister;
+
Inst.addOperand(MCOperand::createReg(Reg));
return MCDisassembler::Success;
}
@@ -367,6 +353,9 @@ static DecodeStatus decodeXTHeadMemPair(MCInst &Inst, uint32_t Insn,
static DecodeStatus decodeZcmpRlist(MCInst &Inst, unsigned Imm,
uint64_t Address, const void *Decoder);
+static DecodeStatus decodeRegReg(MCInst &Inst, uint32_t Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
+
static DecodeStatus decodeZcmpSpimm(MCInst &Inst, unsigned Imm,
uint64_t Address, const void *Decoder);
@@ -464,6 +453,15 @@ static DecodeStatus decodeZcmpRlist(MCInst &Inst, unsigned Imm,
return MCDisassembler::Success;
}
+static DecodeStatus decodeRegReg(MCInst &Inst, uint32_t Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
+ uint32_t Rs1 = fieldFromInstruction(Insn, 0, 5);
+ uint32_t Rs2 = fieldFromInstruction(Insn, 5, 5);
+ DecodeGPRRegisterClass(Inst, Rs1, Address, Decoder);
+ DecodeGPRRegisterClass(Inst, Rs2, Address, Decoder);
+ return MCDisassembler::Success;
+}
+
// spimm is based on rlist now.
static DecodeStatus decodeZcmpSpimm(MCInst &Inst, unsigned Imm,
uint64_t Address, const void *Decoder) {
@@ -528,43 +526,65 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
TRY_TO_DECODE_FEATURE(RISCV::FeatureStdExtZfinx, DecoderTableRVZfinx32,
"RVZfinx table (Float in Integer)");
TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXVentanaCondOps,
- DecoderTableVentana32, "Ventana custom opcode table");
- TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadBa, DecoderTableTHeadBa32,
+ DecoderTableXVentana32, "Ventana custom opcode table");
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadBa, DecoderTableXTHeadBa32,
"XTHeadBa custom opcode table");
- TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadBb, DecoderTableTHeadBb32,
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadBb, DecoderTableXTHeadBb32,
"XTHeadBb custom opcode table");
- TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadBs, DecoderTableTHeadBs32,
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadBs, DecoderTableXTHeadBs32,
"XTHeadBs custom opcode table");
TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadCondMov,
- DecoderTableTHeadCondMov32,
+ DecoderTableXTHeadCondMov32,
"XTHeadCondMov custom opcode table");
- TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadCmo, DecoderTableTHeadCmo32,
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadCmo, DecoderTableXTHeadCmo32,
"XTHeadCmo custom opcode table");
TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadFMemIdx,
- DecoderTableTHeadFMemIdx32,
+ DecoderTableXTHeadFMemIdx32,
"XTHeadFMemIdx custom opcode table");
- TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadMac, DecoderTableTHeadMac32,
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadMac, DecoderTableXTHeadMac32,
"XTHeadMac custom opcode table");
TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadMemIdx,
- DecoderTableTHeadMemIdx32,
+ DecoderTableXTHeadMemIdx32,
"XTHeadMemIdx custom opcode table");
TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadMemPair,
- DecoderTableTHeadMemPair32,
+ DecoderTableXTHeadMemPair32,
"XTHeadMemPair custom opcode table");
TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadSync,
- DecoderTableTHeadSync32,
+ DecoderTableXTHeadSync32,
"XTHeadSync custom opcode table");
- TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadVdot, DecoderTableTHeadV32,
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadVdot, DecoderTableXTHeadVdot32,
"XTHeadVdot custom opcode table");
TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXSfvcp, DecoderTableXSfvcp32,
"SiFive VCIX custom opcode table");
+ TRY_TO_DECODE_FEATURE(
+ RISCV::FeatureVendorXSfvqmaccdod, DecoderTableXSfvqmaccdod32,
+ "SiFive Matrix Multiplication (2x8 and 8x2) Instruction opcode table");
+ TRY_TO_DECODE_FEATURE(
+ RISCV::FeatureVendorXSfvqmaccqoq, DecoderTableXSfvqmaccqoq32,
+ "SiFive Matrix Multiplication (4x8 and 8x4) Instruction opcode table");
+ TRY_TO_DECODE_FEATURE(
+ RISCV::FeatureVendorXSfvfwmaccqqq, DecoderTableXSfvfwmaccqqq32,
+ "SiFive Matrix Multiplication Instruction opcode table");
+ TRY_TO_DECODE_FEATURE(
+ RISCV::FeatureVendorXSfvfnrclipxfqf, DecoderTableXSfvfnrclipxfqf32,
+ "SiFive FP32-to-int8 Ranged Clip Instructions opcode table");
TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXSfcie, DecoderTableXSfcie32,
"Sifive CIE custom opcode table");
TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXCVbitmanip,
DecoderTableXCVbitmanip32,
"CORE-V Bit Manipulation custom opcode table");
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXCVelw, DecoderTableXCVelw32,
+ "CORE-V Event load custom opcode table");
TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXCVmac, DecoderTableXCVmac32,
"CORE-V MAC custom opcode table");
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXCVmem, DecoderTableXCVmem32,
+ "CORE-V MEM custom opcode table");
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXCValu, DecoderTableXCValu32,
+ "CORE-V ALU custom opcode table");
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXCVsimd, DecoderTableXCVsimd32,
+ "CORE-V SIMD extensions custom opcode table");
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXCVbi, DecoderTableXCVbi32,
+ "CORE-V Immediate Branching custom opcode table");
TRY_TO_DECODE(true, DecoderTable32, "RISCV32 table");
return MCDisassembler::Fail;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
index 5505f89a32f2..50ed85acdec0 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
@@ -14,9 +14,11 @@
#include "RISCVCallLowering.h"
#include "RISCVISelLowering.h"
+#include "RISCVMachineFunctionInfo.h"
#include "RISCVSubtarget.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
using namespace llvm;
@@ -46,37 +48,118 @@ public:
const DataLayout &DL = MF.getDataLayout();
const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
- return RISCVAssignFn(DL, Subtarget.getTargetABI(), ValNo, ValVT, LocVT,
- LocInfo, Flags, State, /*IsFixed=*/true, IsRet,
- Info.Ty, *Subtarget.getTargetLowering(),
- /*FirstMaskArgument=*/std::nullopt);
+ if (RISCVAssignFn(DL, Subtarget.getTargetABI(), ValNo, ValVT, LocVT,
+ LocInfo, Flags, State, Info.IsFixed, IsRet, Info.Ty,
+ *Subtarget.getTargetLowering(),
+ /*FirstMaskArgument=*/std::nullopt))
+ return true;
+
+ StackSize = State.getStackSize();
+ return false;
}
};
struct RISCVOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
RISCVOutgoingValueHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI,
MachineInstrBuilder MIB)
- : OutgoingValueHandler(B, MRI), MIB(MIB) {}
-
- MachineInstrBuilder MIB;
-
+ : OutgoingValueHandler(B, MRI), MIB(MIB),
+ Subtarget(MIRBuilder.getMF().getSubtarget<RISCVSubtarget>()) {}
Register getStackAddress(uint64_t MemSize, int64_t Offset,
MachinePointerInfo &MPO,
ISD::ArgFlagsTy Flags) override {
- llvm_unreachable("not implemented");
+ MachineFunction &MF = MIRBuilder.getMF();
+ LLT p0 = LLT::pointer(0, Subtarget.getXLen());
+ LLT sXLen = LLT::scalar(Subtarget.getXLen());
+
+ if (!SPReg)
+ SPReg = MIRBuilder.buildCopy(p0, Register(RISCV::X2)).getReg(0);
+
+ auto OffsetReg = MIRBuilder.buildConstant(sXLen, Offset);
+
+ auto AddrReg = MIRBuilder.buildPtrAdd(p0, SPReg, OffsetReg);
+
+ MPO = MachinePointerInfo::getStack(MF, Offset);
+ return AddrReg.getReg(0);
}
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
- MachinePointerInfo &MPO, CCValAssign &VA) override {
- llvm_unreachable("not implemented");
+ const MachinePointerInfo &MPO,
+ const CCValAssign &VA) override {
+ MachineFunction &MF = MIRBuilder.getMF();
+ uint64_t LocMemOffset = VA.getLocMemOffset();
+
+ // TODO: Move StackAlignment to subtarget and share with FrameLowering.
+ auto MMO =
+ MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, MemTy,
+ commonAlignment(Align(16), LocMemOffset));
+
+ Register ExtReg = extendRegister(ValVReg, VA);
+ MIRBuilder.buildStore(ExtReg, Addr, *MMO);
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign VA) override {
+ const CCValAssign &VA) override {
+ // If we're passing an f32 value into an i64, anyextend before copying.
+ if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
+ ValVReg = MIRBuilder.buildAnyExt(LLT::scalar(64), ValVReg).getReg(0);
+
Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
MIB.addUse(PhysReg, RegState::Implicit);
}
+
+ unsigned assignCustomValue(CallLowering::ArgInfo &Arg,
+ ArrayRef<CCValAssign> VAs,
+ std::function<void()> *Thunk) override {
+ assert(VAs.size() >= 2 && "Expected at least 2 VAs.");
+ const CCValAssign &VALo = VAs[0];
+ const CCValAssign &VAHi = VAs[1];
+
+ assert(VAHi.needsCustom() && "Value doesn't need custom handling");
+ assert(VALo.getValNo() == VAHi.getValNo() &&
+ "Values belong to different arguments");
+
+ assert(VALo.getLocVT() == MVT::i32 && VAHi.getLocVT() == MVT::i32 &&
+ VALo.getValVT() == MVT::f64 && VAHi.getValVT() == MVT::f64 &&
+ "unexpected custom value");
+
+ Register NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)),
+ MRI.createGenericVirtualRegister(LLT::scalar(32))};
+ MIRBuilder.buildUnmerge(NewRegs, Arg.Regs[0]);
+
+ if (VAHi.isMemLoc()) {
+ LLT MemTy(VAHi.getLocVT());
+
+ MachinePointerInfo MPO;
+ Register StackAddr = getStackAddress(
+ MemTy.getSizeInBytes(), VAHi.getLocMemOffset(), MPO, Arg.Flags[0]);
+
+ assignValueToAddress(NewRegs[1], StackAddr, MemTy, MPO,
+ const_cast<CCValAssign &>(VAHi));
+ }
+
+ auto assignFunc = [=]() {
+ assignValueToReg(NewRegs[0], VALo.getLocReg(), VALo);
+ if (VAHi.isRegLoc())
+ assignValueToReg(NewRegs[1], VAHi.getLocReg(), VAHi);
+ };
+
+ if (Thunk) {
+ *Thunk = assignFunc;
+ return 1;
+ }
+
+ assignFunc();
+ return 1;
+ }
+
+private:
+ MachineInstrBuilder MIB;
+
+ // Cache the SP register vreg if we need it more than once in this call site.
+ Register SPReg;
+
+ const RISCVSubtarget &Subtarget;
};
struct RISCVIncomingValueAssigner : public CallLowering::IncomingValueAssigner {
@@ -103,33 +186,105 @@ public:
const DataLayout &DL = MF.getDataLayout();
const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
- return RISCVAssignFn(DL, Subtarget.getTargetABI(), ValNo, ValVT, LocVT,
- LocInfo, Flags, State, /*IsFixed=*/true, IsRet,
- Info.Ty, *Subtarget.getTargetLowering(),
- /*FirstMaskArgument=*/std::nullopt);
+ if (LocVT.isScalableVector())
+ MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
+
+ if (RISCVAssignFn(DL, Subtarget.getTargetABI(), ValNo, ValVT, LocVT,
+ LocInfo, Flags, State, /*IsFixed=*/true, IsRet, Info.Ty,
+ *Subtarget.getTargetLowering(),
+ /*FirstMaskArgument=*/std::nullopt))
+ return true;
+
+ StackSize = State.getStackSize();
+ return false;
}
};
struct RISCVIncomingValueHandler : public CallLowering::IncomingValueHandler {
RISCVIncomingValueHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI)
- : IncomingValueHandler(B, MRI) {}
+ : IncomingValueHandler(B, MRI),
+ Subtarget(MIRBuilder.getMF().getSubtarget<RISCVSubtarget>()) {}
Register getStackAddress(uint64_t MemSize, int64_t Offset,
MachinePointerInfo &MPO,
ISD::ArgFlagsTy Flags) override {
- llvm_unreachable("not implemented");
+ MachineFrameInfo &MFI = MIRBuilder.getMF().getFrameInfo();
+
+ int FI = MFI.CreateFixedObject(MemSize, Offset, /*Immutable=*/true);
+ MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
+ return MIRBuilder.buildFrameIndex(LLT::pointer(0, Subtarget.getXLen()), FI)
+ .getReg(0);
}
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
- MachinePointerInfo &MPO, CCValAssign &VA) override {
- llvm_unreachable("not implemented");
+ const MachinePointerInfo &MPO,
+ const CCValAssign &VA) override {
+ MachineFunction &MF = MIRBuilder.getMF();
+ auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOLoad, MemTy,
+ inferAlignFromPtrInfo(MF, MPO));
+ MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign VA) override {
- // Copy argument received in physical register to desired VReg.
+ const CCValAssign &VA) override {
+ markPhysRegUsed(PhysReg);
+ IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
+ }
+
+ unsigned assignCustomValue(CallLowering::ArgInfo &Arg,
+ ArrayRef<CCValAssign> VAs,
+ std::function<void()> *Thunk) override {
+ assert(VAs.size() >= 2 && "Expected at least 2 VAs.");
+ const CCValAssign &VALo = VAs[0];
+ const CCValAssign &VAHi = VAs[1];
+
+ assert(VAHi.needsCustom() && "Value doesn't need custom handling");
+ assert(VALo.getValNo() == VAHi.getValNo() &&
+ "Values belong to different arguments");
+
+ assert(VALo.getLocVT() == MVT::i32 && VAHi.getLocVT() == MVT::i32 &&
+ VALo.getValVT() == MVT::f64 && VAHi.getValVT() == MVT::f64 &&
+ "unexpected custom value");
+
+ Register NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)),
+ MRI.createGenericVirtualRegister(LLT::scalar(32))};
+
+ if (VAHi.isMemLoc()) {
+ LLT MemTy(VAHi.getLocVT());
+
+ MachinePointerInfo MPO;
+ Register StackAddr = getStackAddress(
+ MemTy.getSizeInBytes(), VAHi.getLocMemOffset(), MPO, Arg.Flags[0]);
+
+ assignValueToAddress(NewRegs[1], StackAddr, MemTy, MPO,
+ const_cast<CCValAssign &>(VAHi));
+ }
+
+ assignValueToReg(NewRegs[0], VALo.getLocReg(), VALo);
+ if (VAHi.isRegLoc())
+ assignValueToReg(NewRegs[1], VAHi.getLocReg(), VAHi);
+
+ MIRBuilder.buildMergeLikeInstr(Arg.Regs[0], NewRegs);
+
+ return 1;
+ }
+
+ /// How the physical register gets marked varies between formal
+ /// parameters (it's a basic-block live-in), and a call instruction
+ /// (it's an implicit-def of the BL).
+ virtual void markPhysRegUsed(MCRegister PhysReg) = 0;
+
+private:
+ const RISCVSubtarget &Subtarget;
+};
+
+struct RISCVFormalArgHandler : public RISCVIncomingValueHandler {
+ RISCVFormalArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI)
+ : RISCVIncomingValueHandler(B, MRI) {}
+
+ void markPhysRegUsed(MCRegister PhysReg) override {
+ MIRBuilder.getMRI()->addLiveIn(PhysReg);
MIRBuilder.getMBB().addLiveIn(PhysReg);
- MIRBuilder.buildCopy(ValVReg, PhysReg);
}
};
@@ -138,14 +293,11 @@ struct RISCVCallReturnHandler : public RISCVIncomingValueHandler {
MachineInstrBuilder &MIB)
: RISCVIncomingValueHandler(B, MRI), MIB(MIB) {}
- MachineInstrBuilder MIB;
-
- void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign VA) override {
- // Copy argument received in physical register to desired VReg.
+ void markPhysRegUsed(MCRegister PhysReg) override {
MIB.addDef(PhysReg, RegState::Implicit);
- MIRBuilder.buildCopy(ValVReg, PhysReg);
}
+
+ MachineInstrBuilder MIB;
};
} // namespace
@@ -153,6 +305,80 @@ struct RISCVCallReturnHandler : public RISCVIncomingValueHandler {
RISCVCallLowering::RISCVCallLowering(const RISCVTargetLowering &TLI)
: CallLowering(&TLI) {}
+/// Return true if scalable vector with ScalarTy is legal for lowering.
+static bool isLegalElementTypeForRVV(Type *EltTy,
+ const RISCVSubtarget &Subtarget) {
+ if (EltTy->isPointerTy())
+ return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
+ if (EltTy->isIntegerTy(1) || EltTy->isIntegerTy(8) ||
+ EltTy->isIntegerTy(16) || EltTy->isIntegerTy(32))
+ return true;
+ if (EltTy->isIntegerTy(64))
+ return Subtarget.hasVInstructionsI64();
+ if (EltTy->isHalfTy())
+ return Subtarget.hasVInstructionsF16();
+ if (EltTy->isBFloatTy())
+ return Subtarget.hasVInstructionsBF16();
+ if (EltTy->isFloatTy())
+ return Subtarget.hasVInstructionsF32();
+ if (EltTy->isDoubleTy())
+ return Subtarget.hasVInstructionsF64();
+ return false;
+}
+
+// TODO: Support all argument types.
+// TODO: Remove IsLowerArgs argument by adding support for vectors in lowerCall.
+static bool isSupportedArgumentType(Type *T, const RISCVSubtarget &Subtarget,
+ bool IsLowerArgs = false) {
+ // TODO: Integers larger than 2*XLen are passed indirectly which is not
+ // supported yet.
+ if (T->isIntegerTy())
+ return T->getIntegerBitWidth() <= Subtarget.getXLen() * 2;
+ if (T->isFloatTy() || T->isDoubleTy())
+ return true;
+ if (T->isPointerTy())
+ return true;
+ // TODO: Support fixed vector types.
+ if (IsLowerArgs && T->isVectorTy() && Subtarget.hasVInstructions() &&
+ T->isScalableTy() &&
+ isLegalElementTypeForRVV(T->getScalarType(), Subtarget))
+ return true;
+ return false;
+}
+
+// TODO: Only integer, pointer and aggregate types are supported now.
+// TODO: Remove IsLowerRetVal argument by adding support for vectors in
+// lowerCall.
+static bool isSupportedReturnType(Type *T, const RISCVSubtarget &Subtarget,
+ bool IsLowerRetVal = false) {
+ // TODO: Integers larger than 2*XLen are passed indirectly which is not
+ // supported yet.
+ if (T->isIntegerTy())
+ return T->getIntegerBitWidth() <= Subtarget.getXLen() * 2;
+ if (T->isFloatTy() || T->isDoubleTy())
+ return true;
+ if (T->isPointerTy())
+ return true;
+
+ if (T->isArrayTy())
+ return isSupportedReturnType(T->getArrayElementType(), Subtarget);
+
+ if (T->isStructTy()) {
+ auto StructT = cast<StructType>(T);
+ for (unsigned i = 0, e = StructT->getNumElements(); i != e; ++i)
+ if (!isSupportedReturnType(StructT->getElementType(i), Subtarget))
+ return false;
+ return true;
+ }
+
+ if (IsLowerRetVal && T->isVectorTy() && Subtarget.hasVInstructions() &&
+ T->isScalableTy() &&
+ isLegalElementTypeForRVV(T->getScalarType(), Subtarget))
+ return true;
+
+ return false;
+}
+
bool RISCVCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
const Value *Val,
ArrayRef<Register> VRegs,
@@ -160,8 +386,9 @@ bool RISCVCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
if (!Val)
return true;
- // TODO: Only integer, pointer and aggregate types are supported now.
- if (!Val->getType()->isIntOrPtrTy() && !Val->getType()->isAggregateType())
+ const RISCVSubtarget &Subtarget =
+ MIRBuilder.getMF().getSubtarget<RISCVSubtarget>();
+ if (!isSupportedReturnType(Val->getType(), Subtarget, /*IsLowerRetVal=*/true))
return false;
MachineFunction &MF = MIRBuilder.getMF();
@@ -196,25 +423,89 @@ bool RISCVCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
return true;
}
+/// If there are varargs that were passed in a0-a7, the data in those registers
+/// must be copied to the varargs save area on the stack.
+void RISCVCallLowering::saveVarArgRegisters(
+ MachineIRBuilder &MIRBuilder, CallLowering::IncomingValueHandler &Handler,
+ IncomingValueAssigner &Assigner, CCState &CCInfo) const {
+ MachineFunction &MF = MIRBuilder.getMF();
+ const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
+ unsigned XLenInBytes = Subtarget.getXLen() / 8;
+ ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
+
+ // Size of the vararg save area. For now, the varargs save area is either
+ // zero or large enough to hold a0-a7.
+ int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
+ int FI;
+
+ // If all registers are allocated, then all varargs must be passed on the
+ // stack and we don't need to save any argregs.
+ if (VarArgsSaveSize == 0) {
+ int VaArgOffset = Assigner.StackSize;
+ FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
+ } else {
+ int VaArgOffset = -VarArgsSaveSize;
+ FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
+
+ // If saving an odd number of registers then create an extra stack slot to
+ // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
+ // offsets to even-numbered registered remain 2*XLEN-aligned.
+ if (Idx % 2) {
+ MFI.CreateFixedObject(XLenInBytes,
+ VaArgOffset - static_cast<int>(XLenInBytes), true);
+ VarArgsSaveSize += XLenInBytes;
+ }
+
+ const LLT p0 = LLT::pointer(MF.getDataLayout().getAllocaAddrSpace(),
+ Subtarget.getXLen());
+ const LLT sXLen = LLT::scalar(Subtarget.getXLen());
+
+ auto FIN = MIRBuilder.buildFrameIndex(p0, FI);
+ auto Offset = MIRBuilder.buildConstant(
+ MRI.createGenericVirtualRegister(sXLen), XLenInBytes);
+
+ // Copy the integer registers that may have been used for passing varargs
+ // to the vararg save area.
+ const MVT XLenVT = Subtarget.getXLenVT();
+ for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
+ const Register VReg = MRI.createGenericVirtualRegister(sXLen);
+ Handler.assignValueToReg(
+ VReg, ArgRegs[I],
+ CCValAssign::getReg(I + MF.getFunction().getNumOperands(), XLenVT,
+ ArgRegs[I], XLenVT, CCValAssign::Full));
+ auto MPO =
+ MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes);
+ MIRBuilder.buildStore(VReg, FIN, MPO, inferAlignFromPtrInfo(MF, MPO));
+ FIN = MIRBuilder.buildPtrAdd(MRI.createGenericVirtualRegister(p0),
+ FIN.getReg(0), Offset);
+ }
+ }
+
+ // Record the frame index of the first variable argument which is a value
+ // necessary to G_VASTART.
+ RVFI->setVarArgsFrameIndex(FI);
+ RVFI->setVarArgsSaveSize(VarArgsSaveSize);
+}
+
bool RISCVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
const Function &F,
ArrayRef<ArrayRef<Register>> VRegs,
FunctionLoweringInfo &FLI) const {
- // Early exit if there are no arguments.
- if (F.arg_empty())
+ // Early exit if there are no arguments. varargs are not part of F.args() but
+ // must be lowered.
+ if (F.arg_empty() && !F.isVarArg())
return true;
- // TODO: Support vararg functions.
- if (F.isVarArg())
- return false;
-
- // TODO: Support all argument types.
+ const RISCVSubtarget &Subtarget =
+ MIRBuilder.getMF().getSubtarget<RISCVSubtarget>();
for (auto &Arg : F.args()) {
- if (Arg.getType()->isIntegerTy())
- continue;
- if (Arg.getType()->isPointerTy())
- continue;
- return false;
+ if (!isSupportedArgumentType(Arg.getType(), Subtarget,
+ /*IsLowerArgs=*/true))
+ return false;
}
MachineFunction &MF = MIRBuilder.getMF();
@@ -239,10 +530,18 @@ bool RISCVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
RISCVIncomingValueAssigner Assigner(
CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV,
/*IsRet=*/false);
- RISCVIncomingValueHandler Handler(MIRBuilder, MF.getRegInfo());
+ RISCVFormalArgHandler Handler(MIRBuilder, MF.getRegInfo());
- return determineAndHandleAssignments(Handler, Assigner, SplitArgInfos,
- MIRBuilder, CC, F.isVarArg());
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, F.isVarArg(), MIRBuilder.getMF(), ArgLocs, F.getContext());
+ if (!determineAssignments(Assigner, SplitArgInfos, CCInfo) ||
+ !handleAssignments(Handler, SplitArgInfos, CCInfo, ArgLocs, MIRBuilder))
+ return false;
+
+ if (F.isVarArg())
+ saveVarArgRegisters(MIRBuilder, Handler, Assigner, CCInfo);
+
+ return true;
}
bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
@@ -252,21 +551,20 @@ bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
const Function &F = MF.getFunction();
CallingConv::ID CC = F.getCallingConv();
- // TODO: Support vararg functions.
- if (Info.IsVarArg)
- return false;
-
- // TODO: Support all argument types.
+ const RISCVSubtarget &Subtarget =
+ MIRBuilder.getMF().getSubtarget<RISCVSubtarget>();
for (auto &AInfo : Info.OrigArgs) {
- if (AInfo.Ty->isIntegerTy())
- continue;
- if (AInfo.Ty->isPointerTy())
- continue;
- if (AInfo.Ty->isFloatingPointTy())
- continue;
- return false;
+ if (!isSupportedArgumentType(AInfo.Ty, Subtarget))
+ return false;
}
+ if (!Info.OrigRet.Ty->isVoidTy() &&
+ !isSupportedReturnType(Info.OrigRet.Ty, Subtarget))
+ return false;
+
+ MachineInstrBuilder CallSeqStart =
+ MIRBuilder.buildInstr(RISCV::ADJCALLSTACKDOWN);
+
SmallVector<ArgInfo, 32> SplitArgInfos;
SmallVector<ISD::OutputArg, 8> Outs;
for (auto &AInfo : Info.OrigArgs) {
@@ -279,14 +577,17 @@ bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// TODO: Support tail calls.
Info.IsTailCall = false;
+ // Select the recommended relocation type R_RISCV_CALL_PLT.
if (!Info.Callee.isReg())
- Info.Callee.setTargetFlags(RISCVII::MO_CALL);
+ Info.Callee.setTargetFlags(RISCVII::MO_PLT);
MachineInstrBuilder Call =
MIRBuilder
.buildInstrNoInsert(Info.Callee.isReg() ? RISCV::PseudoCALLIndirect
: RISCV::PseudoCALL)
.add(Info.Callee);
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+ Call.addRegMask(TRI->getCallPreservedMask(MF, Info.CallConv));
RISCVOutgoingValueAssigner ArgAssigner(
CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV,
@@ -298,22 +599,26 @@ bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
MIRBuilder.insertInstr(Call);
+ CallSeqStart.addImm(ArgAssigner.StackSize).addImm(0);
+ MIRBuilder.buildInstr(RISCV::ADJCALLSTACKUP)
+ .addImm(ArgAssigner.StackSize)
+ .addImm(0);
+
+ // If Callee is a reg, since it is used by a target specific
+ // instruction, it must have a register class matching the
+ // constraint of that instruction.
+ if (Call->getOperand(0).isReg())
+ constrainOperandRegClass(MF, *TRI, MF.getRegInfo(),
+ *Subtarget.getInstrInfo(),
+ *Subtarget.getRegBankInfo(), *Call,
+ Call->getDesc(), Call->getOperand(0), 0);
+
if (Info.OrigRet.Ty->isVoidTy())
return true;
- // TODO: Only integer, pointer and aggregate types are supported now.
- if (!Info.OrigRet.Ty->isIntOrPtrTy() && !Info.OrigRet.Ty->isAggregateType())
- return false;
-
SmallVector<ArgInfo, 4> SplitRetInfos;
splitToValueTypes(Info.OrigRet, SplitRetInfos, DL, CC);
- // Assignments should be handled *before* the merging of values takes place.
- // To ensure this, the insert point is temporarily adjusted to just after the
- // call instruction.
- MachineBasicBlock::iterator CallInsertPt = Call;
- MIRBuilder.setInsertPt(MIRBuilder.getMBB(), std::next(CallInsertPt));
-
RISCVIncomingValueAssigner RetAssigner(
CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV,
/*IsRet=*/true);
@@ -322,8 +627,5 @@ bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
MIRBuilder, CC, Info.IsVarArg))
return false;
- // Readjust insert point to end of basic block.
- MIRBuilder.setMBB(MIRBuilder.getMBB());
-
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.h b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.h
index d80a666f3489..abe704b4a645 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.h
@@ -42,6 +42,11 @@ public:
private:
bool lowerReturnVal(MachineIRBuilder &MIRBuilder, const Value *Val,
ArrayRef<Register> VRegs, MachineInstrBuilder &Ret) const;
+
+ void saveVarArgRegisters(MachineIRBuilder &MIRBuilder,
+ CallLowering::IncomingValueHandler &Handler,
+ IncomingValueAssigner &Assigner,
+ CCState &CCInfo) const;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
index 691439b3a18b..61bdbfc47d94 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
@@ -11,17 +11,23 @@
/// \todo This should be generated by TableGen.
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/RISCVMatInt.h"
#include "RISCVRegisterBankInfo.h"
#include "RISCVSubtarget.h"
#include "RISCVTargetMachine.h"
#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/IR/IntrinsicsRISCV.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "riscv-isel"
using namespace llvm;
+using namespace MIPatternMatch;
#define GET_GLOBALISEL_PREDICATE_BITSET
#include "RISCVGenGlobalISel.inc"
@@ -35,16 +41,86 @@ public:
const RISCVSubtarget &STI,
const RISCVRegisterBankInfo &RBI);
- bool select(MachineInstr &I) override;
+ bool select(MachineInstr &MI) override;
static const char *getName() { return DEBUG_TYPE; }
private:
+ const TargetRegisterClass *
+ getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB) const;
+
+ bool isRegInGprb(Register Reg, MachineRegisterInfo &MRI) const;
+ bool isRegInFprb(Register Reg, MachineRegisterInfo &MRI) const;
+
+ // tblgen-erated 'select' implementation, used as the initial selector for
+ // the patterns that don't require complex C++.
bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
+ // A lowering phase that runs before any selection attempts.
+ // Returns true if the instruction was modified.
+ void preISelLower(MachineInstr &MI, MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI);
+
+ bool replacePtrWithInt(MachineOperand &Op, MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI);
+
+ // Custom selection methods
+ bool selectCopy(MachineInstr &MI, MachineRegisterInfo &MRI) const;
+ bool selectImplicitDef(MachineInstr &MI, MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI) const;
+ bool materializeImm(Register Reg, int64_t Imm, MachineIRBuilder &MIB) const;
+ bool selectAddr(MachineInstr &MI, MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI, bool IsLocal = true,
+ bool IsExternWeak = false) const;
+ bool selectSExtInreg(MachineInstr &MI, MachineIRBuilder &MIB) const;
+ bool selectSelect(MachineInstr &MI, MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI) const;
+ bool selectFPCompare(MachineInstr &MI, MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI) const;
+ bool selectIntrinsicWithSideEffects(MachineInstr &MI, MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI) const;
+ void emitFence(AtomicOrdering FenceOrdering, SyncScope::ID FenceSSID,
+ MachineIRBuilder &MIB) const;
+ bool selectMergeValues(MachineInstr &MI, MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI) const;
+ bool selectUnmergeValues(MachineInstr &MI, MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI) const;
+
+ ComplexRendererFns selectShiftMask(MachineOperand &Root) const;
+ ComplexRendererFns selectAddrRegImm(MachineOperand &Root) const;
+
+ ComplexRendererFns selectSHXADDOp(MachineOperand &Root, unsigned ShAmt) const;
+ template <unsigned ShAmt>
+ ComplexRendererFns selectSHXADDOp(MachineOperand &Root) const {
+ return selectSHXADDOp(Root, ShAmt);
+ }
+
+ ComplexRendererFns selectSHXADD_UWOp(MachineOperand &Root,
+ unsigned ShAmt) const;
+ template <unsigned ShAmt>
+ ComplexRendererFns selectSHXADD_UWOp(MachineOperand &Root) const {
+ return selectSHXADD_UWOp(Root, ShAmt);
+ }
+
+ // Custom renderers for tablegen
+ void renderNegImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
+ void renderImmSubFromXLen(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
+ void renderImmSubFrom32(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
+ void renderImmPlus1(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
+ void renderImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
+
+ void renderTrailingZeros(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
+
const RISCVSubtarget &STI;
const RISCVInstrInfo &TII;
const RISCVRegisterInfo &TRI;
const RISCVRegisterBankInfo &RBI;
+ const RISCVTargetMachine &TM;
// FIXME: This is necessary because DAGISel uses "Subtarget->" and GlobalISel
// uses "STI." in the code generated by TableGen. We need to unify the name of
@@ -70,6 +146,7 @@ RISCVInstructionSelector::RISCVInstructionSelector(
const RISCVTargetMachine &TM, const RISCVSubtarget &STI,
const RISCVRegisterBankInfo &RBI)
: STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI),
+ TM(TM),
#define GET_GLOBALISEL_PREDICATES_INIT
#include "RISCVGenGlobalISel.inc"
@@ -80,19 +157,1111 @@ RISCVInstructionSelector::RISCVInstructionSelector(
{
}
-bool RISCVInstructionSelector::select(MachineInstr &I) {
+InstructionSelector::ComplexRendererFns
+RISCVInstructionSelector::selectShiftMask(MachineOperand &Root) const {
+ // TODO: Also check if we are seeing the result of an AND operation which
+ // could be bypassed since we only check the lower log2(xlen) bits.
+ return {{[=](MachineInstrBuilder &MIB) { MIB.add(Root); }}};
+}
+
+InstructionSelector::ComplexRendererFns
+RISCVInstructionSelector::selectSHXADDOp(MachineOperand &Root,
+ unsigned ShAmt) const {
+ using namespace llvm::MIPatternMatch;
+ MachineFunction &MF = *Root.getParent()->getParent()->getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ if (!Root.isReg())
+ return std::nullopt;
+ Register RootReg = Root.getReg();
+
+ const unsigned XLen = STI.getXLen();
+ APInt Mask, C2;
+ Register RegY;
+ std::optional<bool> LeftShift;
+ // (and (shl y, c2), mask)
+ if (mi_match(RootReg, MRI,
+ m_GAnd(m_GShl(m_Reg(RegY), m_ICst(C2)), m_ICst(Mask))))
+ LeftShift = true;
+ // (and (lshr y, c2), mask)
+ else if (mi_match(RootReg, MRI,
+ m_GAnd(m_GLShr(m_Reg(RegY), m_ICst(C2)), m_ICst(Mask))))
+ LeftShift = false;
+
+ if (LeftShift.has_value()) {
+ if (*LeftShift)
+ Mask &= maskTrailingZeros<uint64_t>(C2.getLimitedValue());
+ else
+ Mask &= maskTrailingOnes<uint64_t>(XLen - C2.getLimitedValue());
+
+ if (Mask.isShiftedMask()) {
+ unsigned Leading = XLen - Mask.getActiveBits();
+ unsigned Trailing = Mask.countr_zero();
+ // Given (and (shl y, c2), mask) in which mask has no leading zeros and
+ // c3 trailing zeros. We can use an SRLI by c3 - c2 followed by a SHXADD.
+ if (*LeftShift && Leading == 0 && C2.ult(Trailing) && Trailing == ShAmt) {
+ Register DstReg =
+ MRI.createGenericVirtualRegister(MRI.getType(RootReg));
+ return {{[=](MachineInstrBuilder &MIB) {
+ MachineIRBuilder(*MIB.getInstr())
+ .buildInstr(RISCV::SRLI, {DstReg}, {RegY})
+ .addImm(Trailing - C2.getLimitedValue());
+ MIB.addReg(DstReg);
+ }}};
+ }
+
+ // Given (and (lshr y, c2), mask) in which mask has c2 leading zeros and
+ // c3 trailing zeros. We can use an SRLI by c2 + c3 followed by a SHXADD.
+ if (!*LeftShift && Leading == C2 && Trailing == ShAmt) {
+ Register DstReg =
+ MRI.createGenericVirtualRegister(MRI.getType(RootReg));
+ return {{[=](MachineInstrBuilder &MIB) {
+ MachineIRBuilder(*MIB.getInstr())
+ .buildInstr(RISCV::SRLI, {DstReg}, {RegY})
+ .addImm(Leading + Trailing);
+ MIB.addReg(DstReg);
+ }}};
+ }
+ }
+ }
+
+ LeftShift.reset();
+
+ // (shl (and y, mask), c2)
+ if (mi_match(RootReg, MRI,
+ m_GShl(m_OneNonDBGUse(m_GAnd(m_Reg(RegY), m_ICst(Mask))),
+ m_ICst(C2))))
+ LeftShift = true;
+ // (lshr (and y, mask), c2)
+ else if (mi_match(RootReg, MRI,
+ m_GLShr(m_OneNonDBGUse(m_GAnd(m_Reg(RegY), m_ICst(Mask))),
+ m_ICst(C2))))
+ LeftShift = false;
+
+ if (LeftShift.has_value() && Mask.isShiftedMask()) {
+ unsigned Leading = XLen - Mask.getActiveBits();
+ unsigned Trailing = Mask.countr_zero();
+
+ // Given (shl (and y, mask), c2) in which mask has 32 leading zeros and
+ // c3 trailing zeros. If c1 + c3 == ShAmt, we can emit SRLIW + SHXADD.
+ bool Cond = *LeftShift && Leading == 32 && Trailing > 0 &&
+ (Trailing + C2.getLimitedValue()) == ShAmt;
+ if (!Cond)
+ // Given (lshr (and y, mask), c2) in which mask has 32 leading zeros and
+ // c3 trailing zeros. If c3 - c1 == ShAmt, we can emit SRLIW + SHXADD.
+ Cond = !*LeftShift && Leading == 32 && C2.ult(Trailing) &&
+ (Trailing - C2.getLimitedValue()) == ShAmt;
+
+ if (Cond) {
+ Register DstReg = MRI.createGenericVirtualRegister(MRI.getType(RootReg));
+ return {{[=](MachineInstrBuilder &MIB) {
+ MachineIRBuilder(*MIB.getInstr())
+ .buildInstr(RISCV::SRLIW, {DstReg}, {RegY})
+ .addImm(Trailing);
+ MIB.addReg(DstReg);
+ }}};
+ }
+ }
+
+ return std::nullopt;
+}
+
+InstructionSelector::ComplexRendererFns
+RISCVInstructionSelector::selectSHXADD_UWOp(MachineOperand &Root,
+ unsigned ShAmt) const {
+ using namespace llvm::MIPatternMatch;
+ MachineFunction &MF = *Root.getParent()->getParent()->getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ if (!Root.isReg())
+ return std::nullopt;
+ Register RootReg = Root.getReg();
+
+ // Given (and (shl x, c2), mask) in which mask is a shifted mask with
+ // 32 - ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
+ // c2 - ShAmt followed by SHXADD_UW with ShAmt for x amount.
+ APInt Mask, C2;
+ Register RegX;
+ if (mi_match(
+ RootReg, MRI,
+ m_OneNonDBGUse(m_GAnd(m_OneNonDBGUse(m_GShl(m_Reg(RegX), m_ICst(C2))),
+ m_ICst(Mask))))) {
+ Mask &= maskTrailingZeros<uint64_t>(C2.getLimitedValue());
+
+ if (Mask.isShiftedMask()) {
+ unsigned Leading = Mask.countl_zero();
+ unsigned Trailing = Mask.countr_zero();
+ if (Leading == 32 - ShAmt && C2 == Trailing && Trailing > ShAmt) {
+ Register DstReg =
+ MRI.createGenericVirtualRegister(MRI.getType(RootReg));
+ return {{[=](MachineInstrBuilder &MIB) {
+ MachineIRBuilder(*MIB.getInstr())
+ .buildInstr(RISCV::SLLI, {DstReg}, {RegX})
+ .addImm(C2.getLimitedValue() - ShAmt);
+ MIB.addReg(DstReg);
+ }}};
+ }
+ }
+ }
+
+ return std::nullopt;
+}
+
+InstructionSelector::ComplexRendererFns
+RISCVInstructionSelector::selectAddrRegImm(MachineOperand &Root) const {
+ MachineFunction &MF = *Root.getParent()->getParent()->getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ if (!Root.isReg())
+ return std::nullopt;
+
+ MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
+ if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
+ }};
+ }
+
+ if (isBaseWithConstantOffset(Root, MRI)) {
+ MachineOperand &LHS = RootDef->getOperand(1);
+ MachineOperand &RHS = RootDef->getOperand(2);
+ MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
+ MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
+
+ int64_t RHSC = RHSDef->getOperand(1).getCImm()->getSExtValue();
+ if (isInt<12>(RHSC)) {
+ if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
+ }};
+
+ return {{[=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); }}};
+ }
+ }
+
+ // TODO: Need to get the immediate from a G_PTR_ADD. Should this be done in
+ // the combiner?
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }}};
+}
+
+/// Returns the RISCVCC::CondCode that corresponds to the CmpInst::Predicate CC.
+/// CC Must be an ICMP Predicate.
+static RISCVCC::CondCode getRISCVCCFromICmp(CmpInst::Predicate CC) {
+ switch (CC) {
+ default:
+ llvm_unreachable("Expected ICMP CmpInst::Predicate.");
+ case CmpInst::Predicate::ICMP_EQ:
+ return RISCVCC::COND_EQ;
+ case CmpInst::Predicate::ICMP_NE:
+ return RISCVCC::COND_NE;
+ case CmpInst::Predicate::ICMP_ULT:
+ return RISCVCC::COND_LTU;
+ case CmpInst::Predicate::ICMP_SLT:
+ return RISCVCC::COND_LT;
+ case CmpInst::Predicate::ICMP_UGE:
+ return RISCVCC::COND_GEU;
+ case CmpInst::Predicate::ICMP_SGE:
+ return RISCVCC::COND_GE;
+ }
+}
+
+static void getOperandsForBranch(Register CondReg, MachineRegisterInfo &MRI,
+ RISCVCC::CondCode &CC, Register &LHS,
+ Register &RHS) {
+ // Try to fold an ICmp. If that fails, use a NE compare with X0.
+ CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
+ if (!mi_match(CondReg, MRI, m_GICmp(m_Pred(Pred), m_Reg(LHS), m_Reg(RHS)))) {
+ LHS = CondReg;
+ RHS = RISCV::X0;
+ CC = RISCVCC::COND_NE;
+ return;
+ }
+
+ // We found an ICmp, do some canonicalizations.
+
+ // Adjust comparisons to use comparison with 0 if possible.
+ if (auto Constant = getIConstantVRegSExtVal(RHS, MRI)) {
+ switch (Pred) {
+ case CmpInst::Predicate::ICMP_SGT:
+ // Convert X > -1 to X >= 0
+ if (*Constant == -1) {
+ CC = RISCVCC::COND_GE;
+ RHS = RISCV::X0;
+ return;
+ }
+ break;
+ case CmpInst::Predicate::ICMP_SLT:
+ // Convert X < 1 to 0 >= X
+ if (*Constant == 1) {
+ CC = RISCVCC::COND_GE;
+ RHS = LHS;
+ LHS = RISCV::X0;
+ return;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ switch (Pred) {
+ default:
+ llvm_unreachable("Expected ICMP CmpInst::Predicate.");
+ case CmpInst::Predicate::ICMP_EQ:
+ case CmpInst::Predicate::ICMP_NE:
+ case CmpInst::Predicate::ICMP_ULT:
+ case CmpInst::Predicate::ICMP_SLT:
+ case CmpInst::Predicate::ICMP_UGE:
+ case CmpInst::Predicate::ICMP_SGE:
+ // These CCs are supported directly by RISC-V branches.
+ break;
+ case CmpInst::Predicate::ICMP_SGT:
+ case CmpInst::Predicate::ICMP_SLE:
+ case CmpInst::Predicate::ICMP_UGT:
+ case CmpInst::Predicate::ICMP_ULE:
+ // These CCs are not supported directly by RISC-V branches, but changing the
+ // direction of the CC and swapping LHS and RHS are.
+ Pred = CmpInst::getSwappedPredicate(Pred);
+ std::swap(LHS, RHS);
+ break;
+ }
+
+ CC = getRISCVCCFromICmp(Pred);
+ return;
+}
+
+bool RISCVInstructionSelector::select(MachineInstr &MI) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MachineIRBuilder MIB(MI);
+
+ preISelLower(MI, MIB, MRI);
+ const unsigned Opc = MI.getOpcode();
+
+ if (!MI.isPreISelOpcode() || Opc == TargetOpcode::G_PHI) {
+ if (Opc == TargetOpcode::PHI || Opc == TargetOpcode::G_PHI) {
+ const Register DefReg = MI.getOperand(0).getReg();
+ const LLT DefTy = MRI.getType(DefReg);
+
+ const RegClassOrRegBank &RegClassOrBank =
+ MRI.getRegClassOrRegBank(DefReg);
+
+ const TargetRegisterClass *DefRC =
+ RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
+ if (!DefRC) {
+ if (!DefTy.isValid()) {
+ LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
+ return false;
+ }
+
+ const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
+ DefRC = getRegClassForTypeOnBank(DefTy, RB);
+ if (!DefRC) {
+ LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
+ return false;
+ }
+ }
+
+ MI.setDesc(TII.get(TargetOpcode::PHI));
+ return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
+ }
- if (!isPreISelGenericOpcode(I.getOpcode())) {
// Certain non-generic instructions also need some special handling.
+ if (MI.isCopy())
+ return selectCopy(MI, MRI);
+
+ return true;
+ }
+
+ if (selectImpl(MI, *CoverageInfo))
+ return true;
+
+ switch (Opc) {
+ case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_PTRTOINT:
+ case TargetOpcode::G_INTTOPTR:
+ case TargetOpcode::G_TRUNC:
+ return selectCopy(MI, MRI);
+ case TargetOpcode::G_CONSTANT: {
+ Register DstReg = MI.getOperand(0).getReg();
+ int64_t Imm = MI.getOperand(1).getCImm()->getSExtValue();
+
+ if (!materializeImm(DstReg, Imm, MIB))
+ return false;
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case TargetOpcode::G_FCONSTANT: {
+ // TODO: Use constant pool for complext constants.
+ // TODO: Optimize +0.0 to use fcvt.d.w for s64 on rv32.
+ Register DstReg = MI.getOperand(0).getReg();
+ const APFloat &FPimm = MI.getOperand(1).getFPImm()->getValueAPF();
+ APInt Imm = FPimm.bitcastToAPInt();
+ unsigned Size = MRI.getType(DstReg).getSizeInBits();
+ if (Size == 32 || (Size == 64 && Subtarget->is64Bit())) {
+ Register GPRReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ if (!materializeImm(GPRReg, Imm.getSExtValue(), MIB))
+ return false;
+
+ unsigned Opcode = Size == 64 ? RISCV::FMV_D_X : RISCV::FMV_W_X;
+ auto FMV = MIB.buildInstr(Opcode, {DstReg}, {GPRReg});
+ if (!FMV.constrainAllUses(TII, TRI, RBI))
+ return false;
+ } else {
+ assert(Size == 64 && !Subtarget->is64Bit() &&
+ "Unexpected size or subtarget");
+ // Split into two pieces and build through the stack.
+ Register GPRRegHigh = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ Register GPRRegLow = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ if (!materializeImm(GPRRegHigh, Imm.extractBits(32, 32).getSExtValue(),
+ MIB))
+ return false;
+ if (!materializeImm(GPRRegLow, Imm.trunc(32).getSExtValue(), MIB))
+ return false;
+ MachineInstrBuilder PairF64 = MIB.buildInstr(
+ RISCV::BuildPairF64Pseudo, {DstReg}, {GPRRegLow, GPRRegHigh});
+ if (!PairF64.constrainAllUses(TII, TRI, RBI))
+ return false;
+ }
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case TargetOpcode::G_GLOBAL_VALUE: {
+ auto *GV = MI.getOperand(1).getGlobal();
+ if (GV->isThreadLocal()) {
+ // TODO: implement this case.
+ return false;
+ }
+
+ return selectAddr(MI, MIB, MRI, GV->isDSOLocal(),
+ GV->hasExternalWeakLinkage());
+ }
+ case TargetOpcode::G_JUMP_TABLE:
+ case TargetOpcode::G_CONSTANT_POOL:
+ return selectAddr(MI, MIB, MRI);
+ case TargetOpcode::G_BRCOND: {
+ Register LHS, RHS;
+ RISCVCC::CondCode CC;
+ getOperandsForBranch(MI.getOperand(0).getReg(), MRI, CC, LHS, RHS);
+
+ auto Bcc = MIB.buildInstr(RISCVCC::getBrCond(CC), {}, {LHS, RHS})
+ .addMBB(MI.getOperand(1).getMBB());
+ MI.eraseFromParent();
+ return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
+ }
+ case TargetOpcode::G_BRJT: {
+ // FIXME: Move to legalization?
+ const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ unsigned EntrySize = MJTI->getEntrySize(MF.getDataLayout());
+ assert((EntrySize == 4 || (Subtarget->is64Bit() && EntrySize == 8)) &&
+ "Unsupported jump-table entry size");
+ assert(
+ (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 ||
+ MJTI->getEntryKind() == MachineJumpTableInfo::EK_Custom32 ||
+ MJTI->getEntryKind() == MachineJumpTableInfo::EK_BlockAddress) &&
+ "Unexpected jump-table entry kind");
+
+ auto SLL =
+ MIB.buildInstr(RISCV::SLLI, {&RISCV::GPRRegClass}, {MI.getOperand(2)})
+ .addImm(Log2_32(EntrySize));
+ if (!SLL.constrainAllUses(TII, TRI, RBI))
+ return false;
+
+ // TODO: Use SHXADD. Moving to legalization would fix this automatically.
+ auto ADD = MIB.buildInstr(RISCV::ADD, {&RISCV::GPRRegClass},
+ {MI.getOperand(0), SLL.getReg(0)});
+ if (!ADD.constrainAllUses(TII, TRI, RBI))
+ return false;
+
+ unsigned LdOpc = EntrySize == 8 ? RISCV::LD : RISCV::LW;
+ auto Dest =
+ MIB.buildInstr(LdOpc, {&RISCV::GPRRegClass}, {ADD.getReg(0)})
+ .addImm(0)
+ .addMemOperand(MF.getMachineMemOperand(
+ MachinePointerInfo::getJumpTable(MF), MachineMemOperand::MOLoad,
+ EntrySize, Align(MJTI->getEntryAlignment(MF.getDataLayout()))));
+ if (!Dest.constrainAllUses(TII, TRI, RBI))
+ return false;
+
+ // If the Kind is EK_LabelDifference32, the table stores an offset from
+ // the location of the table. Add the table address to get an absolute
+ // address.
+ if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32) {
+ Dest = MIB.buildInstr(RISCV::ADD, {&RISCV::GPRRegClass},
+ {Dest.getReg(0), MI.getOperand(0)});
+ if (!Dest.constrainAllUses(TII, TRI, RBI))
+ return false;
+ }
+
+ auto Branch =
+ MIB.buildInstr(RISCV::PseudoBRIND, {}, {Dest.getReg(0)}).addImm(0);
+ if (!Branch.constrainAllUses(TII, TRI, RBI))
+ return false;
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case TargetOpcode::G_BRINDIRECT:
+ MI.setDesc(TII.get(RISCV::PseudoBRIND));
+ MI.addOperand(MachineOperand::CreateImm(0));
+ return constrainSelectedInstRegOperands(MI, TII, TRI, RBI);
+ case TargetOpcode::G_SEXT_INREG:
+ return selectSExtInreg(MI, MIB);
+ case TargetOpcode::G_FRAME_INDEX: {
+ // TODO: We may want to replace this code with the SelectionDAG patterns,
+ // which fail to get imported because it uses FrameAddrRegImm, which is a
+ // ComplexPattern
+ MI.setDesc(TII.get(RISCV::ADDI));
+ MI.addOperand(MachineOperand::CreateImm(0));
+ return constrainSelectedInstRegOperands(MI, TII, TRI, RBI);
+ }
+ case TargetOpcode::G_SELECT:
+ return selectSelect(MI, MIB, MRI);
+ case TargetOpcode::G_FCMP:
+ return selectFPCompare(MI, MIB, MRI);
+ case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
+ return selectIntrinsicWithSideEffects(MI, MIB, MRI);
+ case TargetOpcode::G_FENCE: {
+ AtomicOrdering FenceOrdering =
+ static_cast<AtomicOrdering>(MI.getOperand(0).getImm());
+ SyncScope::ID FenceSSID =
+ static_cast<SyncScope::ID>(MI.getOperand(1).getImm());
+ emitFence(FenceOrdering, FenceSSID, MIB);
+ MI.eraseFromParent();
+ return true;
+ }
+ case TargetOpcode::G_IMPLICIT_DEF:
+ return selectImplicitDef(MI, MIB, MRI);
+ case TargetOpcode::G_MERGE_VALUES:
+ return selectMergeValues(MI, MIB, MRI);
+ case TargetOpcode::G_UNMERGE_VALUES:
+ return selectUnmergeValues(MI, MIB, MRI);
+ default:
+ return false;
+ }
+}
+
+bool RISCVInstructionSelector::selectMergeValues(
+ MachineInstr &MI, MachineIRBuilder &MIB, MachineRegisterInfo &MRI) const {
+ assert(MI.getOpcode() == TargetOpcode::G_MERGE_VALUES);
+
+ // Build a F64 Pair from operands
+ if (MI.getNumOperands() != 3)
+ return false;
+ Register Dst = MI.getOperand(0).getReg();
+ Register Lo = MI.getOperand(1).getReg();
+ Register Hi = MI.getOperand(2).getReg();
+ if (!isRegInFprb(Dst, MRI) || !isRegInGprb(Lo, MRI) || !isRegInGprb(Hi, MRI))
+ return false;
+ MI.setDesc(TII.get(RISCV::BuildPairF64Pseudo));
+ return constrainSelectedInstRegOperands(MI, TII, TRI, RBI);
+}
+
+bool RISCVInstructionSelector::selectUnmergeValues(
+ MachineInstr &MI, MachineIRBuilder &MIB, MachineRegisterInfo &MRI) const {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
+
+ // Split F64 Src into two s32 parts
+ if (MI.getNumOperands() != 3)
+ return false;
+ Register Src = MI.getOperand(2).getReg();
+ Register Lo = MI.getOperand(0).getReg();
+ Register Hi = MI.getOperand(1).getReg();
+ if (!isRegInFprb(Src, MRI) || !isRegInGprb(Lo, MRI) || !isRegInGprb(Hi, MRI))
+ return false;
+ MI.setDesc(TII.get(RISCV::SplitF64Pseudo));
+ return constrainSelectedInstRegOperands(MI, TII, TRI, RBI);
+}
+
+bool RISCVInstructionSelector::replacePtrWithInt(MachineOperand &Op,
+ MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI) {
+ Register PtrReg = Op.getReg();
+ assert(MRI.getType(PtrReg).isPointer() && "Operand is not a pointer!");
+
+ const LLT sXLen = LLT::scalar(STI.getXLen());
+ auto PtrToInt = MIB.buildPtrToInt(sXLen, PtrReg);
+ MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(RISCV::GPRBRegBankID));
+ Op.setReg(PtrToInt.getReg(0));
+ return select(*PtrToInt);
+}
+
+void RISCVInstructionSelector::preISelLower(MachineInstr &MI,
+ MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI) {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_PTR_ADD: {
+ Register DstReg = MI.getOperand(0).getReg();
+ const LLT sXLen = LLT::scalar(STI.getXLen());
+
+ replacePtrWithInt(MI.getOperand(1), MIB, MRI);
+ MI.setDesc(TII.get(TargetOpcode::G_ADD));
+ MRI.setType(DstReg, sXLen);
+ break;
+ }
+ case TargetOpcode::G_PTRMASK: {
+ Register DstReg = MI.getOperand(0).getReg();
+ const LLT sXLen = LLT::scalar(STI.getXLen());
+ replacePtrWithInt(MI.getOperand(1), MIB, MRI);
+ MI.setDesc(TII.get(TargetOpcode::G_AND));
+ MRI.setType(DstReg, sXLen);
+ }
+ }
+}
+
+void RISCVInstructionSelector::renderNegImm(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
+ "Expected G_CONSTANT");
+ int64_t CstVal = MI.getOperand(1).getCImm()->getSExtValue();
+ MIB.addImm(-CstVal);
+}
+
+void RISCVInstructionSelector::renderImmSubFromXLen(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
+ "Expected G_CONSTANT");
+ uint64_t CstVal = MI.getOperand(1).getCImm()->getZExtValue();
+ MIB.addImm(STI.getXLen() - CstVal);
+}
+
+void RISCVInstructionSelector::renderImmSubFrom32(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
+ "Expected G_CONSTANT");
+ uint64_t CstVal = MI.getOperand(1).getCImm()->getZExtValue();
+ MIB.addImm(32 - CstVal);
+}
+
+void RISCVInstructionSelector::renderImmPlus1(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
+ "Expected G_CONSTANT");
+ int64_t CstVal = MI.getOperand(1).getCImm()->getSExtValue();
+ MIB.addImm(CstVal + 1);
+}
+
+void RISCVInstructionSelector::renderImm(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
+ "Expected G_CONSTANT");
+ int64_t CstVal = MI.getOperand(1).getCImm()->getSExtValue();
+ MIB.addImm(CstVal);
+}
+
+void RISCVInstructionSelector::renderTrailingZeros(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
+ "Expected G_CONSTANT");
+ uint64_t C = MI.getOperand(1).getCImm()->getZExtValue();
+ MIB.addImm(llvm::countr_zero(C));
+}
+
+const TargetRegisterClass *RISCVInstructionSelector::getRegClassForTypeOnBank(
+ LLT Ty, const RegisterBank &RB) const {
+ if (RB.getID() == RISCV::GPRBRegBankID) {
+ if (Ty.getSizeInBits() <= 32 || (STI.is64Bit() && Ty.getSizeInBits() == 64))
+ return &RISCV::GPRRegClass;
+ }
+
+ if (RB.getID() == RISCV::FPRBRegBankID) {
+ if (Ty.getSizeInBits() == 32)
+ return &RISCV::FPR32RegClass;
+ if (Ty.getSizeInBits() == 64)
+ return &RISCV::FPR64RegClass;
+ }
+
+ // TODO: Non-GPR register classes.
+ return nullptr;
+}
+
+bool RISCVInstructionSelector::isRegInGprb(Register Reg,
+ MachineRegisterInfo &MRI) const {
+ return RBI.getRegBank(Reg, MRI, TRI)->getID() == RISCV::GPRBRegBankID;
+}
+
+bool RISCVInstructionSelector::isRegInFprb(Register Reg,
+ MachineRegisterInfo &MRI) const {
+ return RBI.getRegBank(Reg, MRI, TRI)->getID() == RISCV::FPRBRegBankID;
+}
+
+bool RISCVInstructionSelector::selectCopy(MachineInstr &MI,
+ MachineRegisterInfo &MRI) const {
+ Register DstReg = MI.getOperand(0).getReg();
+
+ if (DstReg.isPhysical())
+ return true;
+
+ const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(
+ MRI.getType(DstReg), *RBI.getRegBank(DstReg, MRI, TRI));
+ assert(DstRC &&
+ "Register class not available for LLT, register bank combination");
+
+ // No need to constrain SrcReg. It will get constrained when
+ // we hit another of its uses or its defs.
+ // Copies do not have constraints.
+ if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
+ LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(MI.getOpcode())
+ << " operand\n");
+ return false;
+ }
+
+ MI.setDesc(TII.get(RISCV::COPY));
+ return true;
+}
+
+bool RISCVInstructionSelector::selectImplicitDef(
+ MachineInstr &MI, MachineIRBuilder &MIB, MachineRegisterInfo &MRI) const {
+ assert(MI.getOpcode() == TargetOpcode::G_IMPLICIT_DEF);
+
+ const Register DstReg = MI.getOperand(0).getReg();
+ const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(
+ MRI.getType(DstReg), *RBI.getRegBank(DstReg, MRI, TRI));
+
+ assert(DstRC &&
+ "Register class not available for LLT, register bank combination");
+
+ if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
+ LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(MI.getOpcode())
+ << " operand\n");
+ }
+ MI.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
+ return true;
+}
+
+bool RISCVInstructionSelector::materializeImm(Register DstReg, int64_t Imm,
+ MachineIRBuilder &MIB) const {
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+
+ if (Imm == 0) {
+ MIB.buildCopy(DstReg, Register(RISCV::X0));
+ RBI.constrainGenericRegister(DstReg, RISCV::GPRRegClass, MRI);
+ return true;
+ }
+
+ RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, *Subtarget);
+ unsigned NumInsts = Seq.size();
+ Register SrcReg = RISCV::X0;
+
+ for (unsigned i = 0; i < NumInsts; i++) {
+ Register TmpReg = i < NumInsts - 1
+ ? MRI.createVirtualRegister(&RISCV::GPRRegClass)
+ : DstReg;
+ const RISCVMatInt::Inst &I = Seq[i];
+ MachineInstr *Result;
+
+ switch (I.getOpndKind()) {
+ case RISCVMatInt::Imm:
+ // clang-format off
+ Result = MIB.buildInstr(I.getOpcode(), {TmpReg}, {})
+ .addImm(I.getImm());
+ // clang-format on
+ break;
+ case RISCVMatInt::RegX0:
+ Result = MIB.buildInstr(I.getOpcode(), {TmpReg},
+ {SrcReg, Register(RISCV::X0)});
+ break;
+ case RISCVMatInt::RegReg:
+ Result = MIB.buildInstr(I.getOpcode(), {TmpReg}, {SrcReg, SrcReg});
+ break;
+ case RISCVMatInt::RegImm:
+ Result =
+ MIB.buildInstr(I.getOpcode(), {TmpReg}, {SrcReg}).addImm(I.getImm());
+ break;
+ }
+
+ if (!constrainSelectedInstRegOperands(*Result, TII, TRI, RBI))
+ return false;
+
+ SrcReg = TmpReg;
+ }
+
+ return true;
+}
+
+bool RISCVInstructionSelector::selectAddr(MachineInstr &MI,
+ MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI,
+ bool IsLocal,
+ bool IsExternWeak) const {
+ assert((MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE ||
+ MI.getOpcode() == TargetOpcode::G_JUMP_TABLE ||
+ MI.getOpcode() == TargetOpcode::G_CONSTANT_POOL) &&
+ "Unexpected opcode");
+
+ const MachineOperand &DispMO = MI.getOperand(1);
+
+ Register DefReg = MI.getOperand(0).getReg();
+ const LLT DefTy = MRI.getType(DefReg);
+
+ // When HWASAN is used and tagging of global variables is enabled
+ // they should be accessed via the GOT, since the tagged address of a global
+ // is incompatible with existing code models. This also applies to non-pic
+ // mode.
+ if (TM.isPositionIndependent() || Subtarget->allowTaggedGlobals()) {
+ if (IsLocal && !Subtarget->allowTaggedGlobals()) {
+ // Use PC-relative addressing to access the symbol. This generates the
+ // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
+ // %pcrel_lo(auipc)).
+ MI.setDesc(TII.get(RISCV::PseudoLLA));
+ return constrainSelectedInstRegOperands(MI, TII, TRI, RBI);
+ }
+
+ // Use PC-relative addressing to access the GOT for this symbol, then
+ // load the address from the GOT. This generates the pattern (PseudoLGA
+ // sym), which expands to (ld (addi (auipc %got_pcrel_hi(sym))
+ // %pcrel_lo(auipc))).
+ MachineFunction &MF = *MI.getParent()->getParent();
+ MachineMemOperand *MemOp = MF.getMachineMemOperand(
+ MachinePointerInfo::getGOT(MF),
+ MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant,
+ DefTy, Align(DefTy.getSizeInBits() / 8));
+
+ auto Result = MIB.buildInstr(RISCV::PseudoLGA, {DefReg}, {})
+ .addDisp(DispMO, 0)
+ .addMemOperand(MemOp);
+
+ if (!constrainSelectedInstRegOperands(*Result, TII, TRI, RBI))
+ return false;
+
+ MI.eraseFromParent();
+ return true;
+ }
+
+ switch (TM.getCodeModel()) {
+ default: {
+ reportGISelFailure(const_cast<MachineFunction &>(*MF), *TPC, *MORE,
+ getName(), "Unsupported code model for lowering", MI);
+ return false;
+ }
+ case CodeModel::Small: {
+ // Must lie within a single 2 GiB address range and must lie between
+ // absolute addresses -2 GiB and +2 GiB. This generates the pattern (addi
+ // (lui %hi(sym)) %lo(sym)).
+ Register AddrHiDest = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ MachineInstr *AddrHi = MIB.buildInstr(RISCV::LUI, {AddrHiDest}, {})
+ .addDisp(DispMO, 0, RISCVII::MO_HI);
+
+ if (!constrainSelectedInstRegOperands(*AddrHi, TII, TRI, RBI))
+ return false;
+
+ auto Result = MIB.buildInstr(RISCV::ADDI, {DefReg}, {AddrHiDest})
+ .addDisp(DispMO, 0, RISCVII::MO_LO);
+
+ if (!constrainSelectedInstRegOperands(*Result, TII, TRI, RBI))
+ return false;
+
+ MI.eraseFromParent();
return true;
}
+ case CodeModel::Medium:
+ // Emit LGA/LLA instead of the sequence it expands to because the pcrel_lo
+ // relocation needs to reference a label that points to the auipc
+ // instruction itself, not the global. This cannot be done inside the
+ // instruction selector.
+ if (IsExternWeak) {
+ // An extern weak symbol may be undefined, i.e. have value 0, which may
+ // not be within 2GiB of PC, so use GOT-indirect addressing to access the
+ // symbol. This generates the pattern (PseudoLGA sym), which expands to
+ // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
+ MachineFunction &MF = *MI.getParent()->getParent();
+ MachineMemOperand *MemOp = MF.getMachineMemOperand(
+ MachinePointerInfo::getGOT(MF),
+ MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant,
+ DefTy, Align(DefTy.getSizeInBits() / 8));
+
+ auto Result = MIB.buildInstr(RISCV::PseudoLGA, {DefReg}, {})
+ .addDisp(DispMO, 0)
+ .addMemOperand(MemOp);
+
+ if (!constrainSelectedInstRegOperands(*Result, TII, TRI, RBI))
+ return false;
+
+ MI.eraseFromParent();
+ return true;
+ }
+
+ // Generate a sequence for accessing addresses within any 2GiB range
+ // within the address space. This generates the pattern (PseudoLLA sym),
+ // which expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
+ MI.setDesc(TII.get(RISCV::PseudoLLA));
+ return constrainSelectedInstRegOperands(MI, TII, TRI, RBI);
+ }
+
+ return false;
+}
+
+bool RISCVInstructionSelector::selectSExtInreg(MachineInstr &MI,
+ MachineIRBuilder &MIB) const {
+ if (!STI.isRV64())
+ return false;
+
+ const MachineOperand &Size = MI.getOperand(2);
+ // Only Size == 32 (i.e. shift by 32 bits) is acceptable at this point.
+ if (!Size.isImm() || Size.getImm() != 32)
+ return false;
+
+ const MachineOperand &Src = MI.getOperand(1);
+ const MachineOperand &Dst = MI.getOperand(0);
+ // addiw rd, rs, 0 (i.e. sext.w rd, rs)
+ MachineInstr *NewMI =
+ MIB.buildInstr(RISCV::ADDIW, {Dst.getReg()}, {Src.getReg()}).addImm(0U);
+
+ if (!constrainSelectedInstRegOperands(*NewMI, TII, TRI, RBI))
+ return false;
+
+ MI.eraseFromParent();
+ return true;
+}
+
+bool RISCVInstructionSelector::selectSelect(MachineInstr &MI,
+ MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI) const {
+ auto &SelectMI = cast<GSelect>(MI);
+
+ Register LHS, RHS;
+ RISCVCC::CondCode CC;
+ getOperandsForBranch(SelectMI.getCondReg(), MRI, CC, LHS, RHS);
+
+ Register DstReg = SelectMI.getReg(0);
+
+ unsigned Opc = RISCV::Select_GPR_Using_CC_GPR;
+ if (RBI.getRegBank(DstReg, MRI, TRI)->getID() == RISCV::FPRBRegBankID) {
+ unsigned Size = MRI.getType(DstReg).getSizeInBits();
+ Opc = Size == 32 ? RISCV::Select_FPR32_Using_CC_GPR
+ : RISCV::Select_FPR64_Using_CC_GPR;
+ }
+
+ MachineInstr *Result = MIB.buildInstr(Opc)
+ .addDef(DstReg)
+ .addReg(LHS)
+ .addReg(RHS)
+ .addImm(CC)
+ .addReg(SelectMI.getTrueReg())
+ .addReg(SelectMI.getFalseReg());
+ MI.eraseFromParent();
+ return constrainSelectedInstRegOperands(*Result, TII, TRI, RBI);
+}
+
+// Convert an FCMP predicate to one of the supported F or D instructions.
+static unsigned getFCmpOpcode(CmpInst::Predicate Pred, unsigned Size) {
+ assert((Size == 32 || Size == 64) && "Unsupported size");
+ switch (Pred) {
+ default:
+ llvm_unreachable("Unsupported predicate");
+ case CmpInst::FCMP_OLT:
+ return Size == 32 ? RISCV::FLT_S : RISCV::FLT_D;
+ case CmpInst::FCMP_OLE:
+ return Size == 32 ? RISCV::FLE_S : RISCV::FLE_D;
+ case CmpInst::FCMP_OEQ:
+ return Size == 32 ? RISCV::FEQ_S : RISCV::FEQ_D;
+ }
+}
+
+// Try legalizing an FCMP by swapping or inverting the predicate to one that
+// is supported.
+static bool legalizeFCmpPredicate(Register &LHS, Register &RHS,
+ CmpInst::Predicate &Pred, bool &NeedInvert) {
+ auto isLegalFCmpPredicate = [](CmpInst::Predicate Pred) {
+ return Pred == CmpInst::FCMP_OLT || Pred == CmpInst::FCMP_OLE ||
+ Pred == CmpInst::FCMP_OEQ;
+ };
+
+ assert(!isLegalFCmpPredicate(Pred) && "Predicate already legal?");
- if (selectImpl(I, *CoverageInfo))
+ CmpInst::Predicate InvPred = CmpInst::getSwappedPredicate(Pred);
+ if (isLegalFCmpPredicate(InvPred)) {
+ Pred = InvPred;
+ std::swap(LHS, RHS);
return true;
+ }
+
+ InvPred = CmpInst::getInversePredicate(Pred);
+ NeedInvert = true;
+ if (isLegalFCmpPredicate(InvPred)) {
+ Pred = InvPred;
+ return true;
+ }
+ InvPred = CmpInst::getSwappedPredicate(InvPred);
+ if (isLegalFCmpPredicate(InvPred)) {
+ Pred = InvPred;
+ std::swap(LHS, RHS);
+ return true;
+ }
return false;
}
+// Emit a sequence of instructions to compare LHS and RHS using Pred. Return
+// the result in DstReg.
+// FIXME: Maybe we should expand this earlier.
+bool RISCVInstructionSelector::selectFPCompare(MachineInstr &MI,
+ MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI) const {
+ auto &CmpMI = cast<GFCmp>(MI);
+ CmpInst::Predicate Pred = CmpMI.getCond();
+
+ Register DstReg = CmpMI.getReg(0);
+ Register LHS = CmpMI.getLHSReg();
+ Register RHS = CmpMI.getRHSReg();
+
+ unsigned Size = MRI.getType(LHS).getSizeInBits();
+ assert((Size == 32 || Size == 64) && "Unexpected size");
+
+ Register TmpReg = DstReg;
+
+ bool NeedInvert = false;
+ // First try swapping operands or inverting.
+ if (legalizeFCmpPredicate(LHS, RHS, Pred, NeedInvert)) {
+ if (NeedInvert)
+ TmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ auto Cmp = MIB.buildInstr(getFCmpOpcode(Pred, Size), {TmpReg}, {LHS, RHS});
+ if (!Cmp.constrainAllUses(TII, TRI, RBI))
+ return false;
+ } else if (Pred == CmpInst::FCMP_ONE || Pred == CmpInst::FCMP_UEQ) {
+ // fcmp one LHS, RHS => (OR (FLT LHS, RHS), (FLT RHS, LHS))
+ NeedInvert = Pred == CmpInst::FCMP_UEQ;
+ auto Cmp1 = MIB.buildInstr(getFCmpOpcode(CmpInst::FCMP_OLT, Size),
+ {&RISCV::GPRRegClass}, {LHS, RHS});
+ if (!Cmp1.constrainAllUses(TII, TRI, RBI))
+ return false;
+ auto Cmp2 = MIB.buildInstr(getFCmpOpcode(CmpInst::FCMP_OLT, Size),
+ {&RISCV::GPRRegClass}, {RHS, LHS});
+ if (!Cmp2.constrainAllUses(TII, TRI, RBI))
+ return false;
+ if (NeedInvert)
+ TmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ auto Or =
+ MIB.buildInstr(RISCV::OR, {TmpReg}, {Cmp1.getReg(0), Cmp2.getReg(0)});
+ if (!Or.constrainAllUses(TII, TRI, RBI))
+ return false;
+ } else if (Pred == CmpInst::FCMP_ORD || Pred == CmpInst::FCMP_UNO) {
+ // fcmp ord LHS, RHS => (AND (FEQ LHS, LHS), (FEQ RHS, RHS))
+ // FIXME: If LHS and RHS are the same we can use a single FEQ.
+ NeedInvert = Pred == CmpInst::FCMP_UNO;
+ auto Cmp1 = MIB.buildInstr(getFCmpOpcode(CmpInst::FCMP_OEQ, Size),
+ {&RISCV::GPRRegClass}, {LHS, LHS});
+ if (!Cmp1.constrainAllUses(TII, TRI, RBI))
+ return false;
+ auto Cmp2 = MIB.buildInstr(getFCmpOpcode(CmpInst::FCMP_OEQ, Size),
+ {&RISCV::GPRRegClass}, {RHS, RHS});
+ if (!Cmp2.constrainAllUses(TII, TRI, RBI))
+ return false;
+ if (NeedInvert)
+ TmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ auto And =
+ MIB.buildInstr(RISCV::AND, {TmpReg}, {Cmp1.getReg(0), Cmp2.getReg(0)});
+ if (!And.constrainAllUses(TII, TRI, RBI))
+ return false;
+ } else
+ llvm_unreachable("Unhandled predicate");
+
+ // Emit an XORI to invert the result if needed.
+ if (NeedInvert) {
+ auto Xor = MIB.buildInstr(RISCV::XORI, {DstReg}, {TmpReg}).addImm(1);
+ if (!Xor.constrainAllUses(TII, TRI, RBI))
+ return false;
+ }
+
+ MI.eraseFromParent();
+ return true;
+}
+
+bool RISCVInstructionSelector::selectIntrinsicWithSideEffects(
+ MachineInstr &MI, MachineIRBuilder &MIB, MachineRegisterInfo &MRI) const {
+ assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS &&
+ "Unexpected opcode");
+ // Find the intrinsic ID.
+ unsigned IntrinID = cast<GIntrinsic>(MI).getIntrinsicID();
+
+ // Select the instruction.
+ switch (IntrinID) {
+ default:
+ return false;
+ case Intrinsic::trap:
+ MIB.buildInstr(RISCV::UNIMP, {}, {});
+ break;
+ case Intrinsic::debugtrap:
+ MIB.buildInstr(RISCV::EBREAK, {}, {});
+ break;
+ }
+
+ MI.eraseFromParent();
+ return true;
+}
+
+void RISCVInstructionSelector::emitFence(AtomicOrdering FenceOrdering,
+ SyncScope::ID FenceSSID,
+ MachineIRBuilder &MIB) const {
+ if (STI.hasStdExtZtso()) {
+ // The only fence that needs an instruction is a sequentially-consistent
+ // cross-thread fence.
+ if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
+ FenceSSID == SyncScope::System) {
+ // fence rw, rw
+ MIB.buildInstr(RISCV::FENCE, {}, {})
+ .addImm(RISCVFenceField::R | RISCVFenceField::W)
+ .addImm(RISCVFenceField::R | RISCVFenceField::W);
+ return;
+ }
+
+ // MEMBARRIER is a compiler barrier; it codegens to a no-op.
+ MIB.buildInstr(TargetOpcode::MEMBARRIER, {}, {});
+ return;
+ }
+
+ // singlethread fences only synchronize with signal handlers on the same
+ // thread and thus only need to preserve instruction order, not actually
+ // enforce memory ordering.
+ if (FenceSSID == SyncScope::SingleThread) {
+ MIB.buildInstr(TargetOpcode::MEMBARRIER, {}, {});
+ return;
+ }
+
+ // Refer to Table A.6 in the version 2.3 draft of the RISC-V Instruction Set
+ // Manual: Volume I.
+ unsigned Pred, Succ;
+ switch (FenceOrdering) {
+ default:
+ llvm_unreachable("Unexpected ordering");
+ case AtomicOrdering::AcquireRelease:
+ // fence acq_rel -> fence.tso
+ MIB.buildInstr(RISCV::FENCE_TSO, {}, {});
+ return;
+ case AtomicOrdering::Acquire:
+ // fence acquire -> fence r, rw
+ Pred = RISCVFenceField::R;
+ Succ = RISCVFenceField::R | RISCVFenceField::W;
+ break;
+ case AtomicOrdering::Release:
+ // fence release -> fence rw, w
+ Pred = RISCVFenceField::R | RISCVFenceField::W;
+ Succ = RISCVFenceField::W;
+ break;
+ case AtomicOrdering::SequentiallyConsistent:
+ // fence seq_cst -> fence rw, rw
+ Pred = RISCVFenceField::R | RISCVFenceField::W;
+ Succ = RISCVFenceField::R | RISCVFenceField::W;
+ break;
+ }
+ MIB.buildInstr(RISCV::FENCE, {}, {}).addImm(Pred).addImm(Succ);
+}
+
namespace llvm {
InstructionSelector *
createRISCVInstructionSelector(const RISCVTargetMachine &TM,
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index 3f829cc2e677..8f03a7ac41d3 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -11,23 +11,452 @@
//===----------------------------------------------------------------------===//
#include "RISCVLegalizerInfo.h"
+#include "RISCVMachineFunctionInfo.h"
#include "RISCVSubtarget.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Type.h"
using namespace llvm;
+using namespace LegalityPredicates;
+using namespace LegalizeMutations;
-RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) {
- const unsigned XLen = ST.getXLen();
- const LLT XLenLLT = LLT::scalar(XLen);
+// Is this type supported by scalar FP arithmetic operations given the current
+// subtarget.
+static LegalityPredicate typeIsScalarFPArith(unsigned TypeIdx,
+ const RISCVSubtarget &ST) {
+ return [=, &ST](const LegalityQuery &Query) {
+ return Query.Types[TypeIdx].isScalar() &&
+ ((ST.hasStdExtF() && Query.Types[TypeIdx].getSizeInBits() == 32) ||
+ (ST.hasStdExtD() && Query.Types[TypeIdx].getSizeInBits() == 64));
+ };
+}
+
+RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
+ : STI(ST), XLen(STI.getXLen()), sXLen(LLT::scalar(XLen)) {
+ const LLT sDoubleXLen = LLT::scalar(2 * XLen);
+ const LLT p0 = LLT::pointer(0, XLen);
+ const LLT s1 = LLT::scalar(1);
+ const LLT s8 = LLT::scalar(8);
+ const LLT s16 = LLT::scalar(16);
+ const LLT s32 = LLT::scalar(32);
+ const LLT s64 = LLT::scalar(64);
using namespace TargetOpcode;
getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR})
- .legalFor({XLenLLT})
- .clampScalar(0, XLenLLT, XLenLLT);
+ .legalFor({s32, sXLen})
+ .widenScalarToNextPow2(0)
+ .clampScalar(0, s32, sXLen);
+
+ getActionDefinitionsBuilder(
+ {G_UADDE, G_UADDO, G_USUBE, G_USUBO}).lower();
+
+ getActionDefinitionsBuilder({G_SADDO, G_SSUBO}).minScalar(0, sXLen).lower();
+
+ auto &ShiftActions = getActionDefinitionsBuilder({G_ASHR, G_LSHR, G_SHL});
+ if (ST.is64Bit())
+ ShiftActions.customFor({{s32, s32}});
+ ShiftActions.legalFor({{s32, s32}, {s32, sXLen}, {sXLen, sXLen}})
+ .widenScalarToNextPow2(0)
+ .clampScalar(1, s32, sXLen)
+ .clampScalar(0, s32, sXLen)
+ .minScalarSameAs(1, 0);
+
+ if (ST.is64Bit()) {
+ getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
+ .legalFor({{sXLen, s32}})
+ .maxScalar(0, sXLen);
+
+ getActionDefinitionsBuilder(G_SEXT_INREG)
+ .customFor({sXLen})
+ .maxScalar(0, sXLen)
+ .lower();
+ } else {
+ getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT}).maxScalar(0, sXLen);
+
+ getActionDefinitionsBuilder(G_SEXT_INREG).maxScalar(0, sXLen).lower();
+ }
+
+ // Merge/Unmerge
+ for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
+ unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
+ unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
+ auto &MergeUnmergeActions = getActionDefinitionsBuilder(Op);
+ if (XLen == 32 && ST.hasStdExtD()) {
+ LLT IdxZeroTy = G_MERGE_VALUES ? s64 : s32;
+ LLT IdxOneTy = G_MERGE_VALUES ? s32 : s64;
+ MergeUnmergeActions.legalFor({IdxZeroTy, IdxOneTy});
+ }
+ MergeUnmergeActions.widenScalarToNextPow2(LitTyIdx, XLen)
+ .widenScalarToNextPow2(BigTyIdx, XLen)
+ .clampScalar(LitTyIdx, sXLen, sXLen)
+ .clampScalar(BigTyIdx, sXLen, sXLen);
+ }
+
+ getActionDefinitionsBuilder({G_FSHL, G_FSHR}).lower();
+
+ auto &RotateActions = getActionDefinitionsBuilder({G_ROTL, G_ROTR});
+ if (ST.hasStdExtZbb()) {
+ RotateActions.legalFor({{s32, sXLen}, {sXLen, sXLen}});
+ // Widen s32 rotate amount to s64 so SDAG patterns will match.
+ if (ST.is64Bit())
+ RotateActions.widenScalarIf(all(typeIs(0, s32), typeIs(1, s32)),
+ changeTo(1, sXLen));
+ }
+ RotateActions.lower();
+
+ getActionDefinitionsBuilder(G_BITREVERSE).maxScalar(0, sXLen).lower();
+
+ auto &BSWAPActions = getActionDefinitionsBuilder(G_BSWAP);
+ if (ST.hasStdExtZbb())
+ BSWAPActions.legalFor({sXLen}).clampScalar(0, sXLen, sXLen);
+ else
+ BSWAPActions.maxScalar(0, sXLen).lower();
+
+ auto &CountZerosActions = getActionDefinitionsBuilder({G_CTLZ, G_CTTZ});
+ auto &CountZerosUndefActions =
+ getActionDefinitionsBuilder({G_CTLZ_ZERO_UNDEF, G_CTTZ_ZERO_UNDEF});
+ if (ST.hasStdExtZbb()) {
+ CountZerosActions.legalFor({{s32, s32}, {sXLen, sXLen}})
+ .clampScalar(0, s32, sXLen)
+ .widenScalarToNextPow2(0)
+ .scalarSameSizeAs(1, 0);
+ } else {
+ CountZerosActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower();
+ CountZerosUndefActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0);
+ }
+ CountZerosUndefActions.lower();
+
+ auto &CTPOPActions = getActionDefinitionsBuilder(G_CTPOP);
+ if (ST.hasStdExtZbb()) {
+ CTPOPActions.legalFor({{s32, s32}, {sXLen, sXLen}})
+ .clampScalar(0, s32, sXLen)
+ .widenScalarToNextPow2(0)
+ .scalarSameSizeAs(1, 0);
+ } else {
+ CTPOPActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower();
+ }
+
+ getActionDefinitionsBuilder({G_CONSTANT, G_IMPLICIT_DEF})
+ .legalFor({s32, sXLen, p0})
+ .widenScalarToNextPow2(0)
+ .clampScalar(0, s32, sXLen);
+
+ getActionDefinitionsBuilder(G_ICMP)
+ .legalFor({{sXLen, sXLen}, {sXLen, p0}})
+ .widenScalarToNextPow2(1)
+ .clampScalar(1, sXLen, sXLen)
+ .clampScalar(0, sXLen, sXLen);
+
+ auto &SelectActions = getActionDefinitionsBuilder(G_SELECT).legalFor(
+ {{s32, sXLen}, {p0, sXLen}});
+ if (XLen == 64 || ST.hasStdExtD())
+ SelectActions.legalFor({{s64, sXLen}});
+ SelectActions.widenScalarToNextPow2(0)
+ .clampScalar(0, s32, (XLen == 64 || ST.hasStdExtD()) ? s64 : s32)
+ .clampScalar(1, sXLen, sXLen);
+
+ auto &LoadStoreActions =
+ getActionDefinitionsBuilder({G_LOAD, G_STORE})
+ .legalForTypesWithMemDesc({{s32, p0, s8, 8},
+ {s32, p0, s16, 16},
+ {s32, p0, s32, 32},
+ {p0, p0, sXLen, XLen}});
+ auto &ExtLoadActions =
+ getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
+ .legalForTypesWithMemDesc({{s32, p0, s8, 8}, {s32, p0, s16, 16}});
+ if (XLen == 64) {
+ LoadStoreActions.legalForTypesWithMemDesc({{s64, p0, s8, 8},
+ {s64, p0, s16, 16},
+ {s64, p0, s32, 32},
+ {s64, p0, s64, 64}});
+ ExtLoadActions.legalForTypesWithMemDesc(
+ {{s64, p0, s8, 8}, {s64, p0, s16, 16}, {s64, p0, s32, 32}});
+ } else if (ST.hasStdExtD()) {
+ LoadStoreActions.legalForTypesWithMemDesc({{s64, p0, s64, 64}});
+ }
+ LoadStoreActions.clampScalar(0, s32, sXLen).lower();
+ ExtLoadActions.widenScalarToNextPow2(0).clampScalar(0, s32, sXLen).lower();
+
+ getActionDefinitionsBuilder({G_PTR_ADD, G_PTRMASK}).legalFor({{p0, sXLen}});
+
+ getActionDefinitionsBuilder(G_PTRTOINT)
+ .legalFor({{sXLen, p0}})
+ .clampScalar(0, sXLen, sXLen);
+
+ getActionDefinitionsBuilder(G_INTTOPTR)
+ .legalFor({{p0, sXLen}})
+ .clampScalar(1, sXLen, sXLen);
+
+ getActionDefinitionsBuilder(G_BRCOND).legalFor({sXLen}).minScalar(0, sXLen);
+
+ getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, sXLen}});
+
+ getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
+
+ getActionDefinitionsBuilder(G_PHI)
+ .legalFor({p0, sXLen})
+ .widenScalarToNextPow2(0)
+ .clampScalar(0, sXLen, sXLen);
+
+ getActionDefinitionsBuilder({G_GLOBAL_VALUE, G_JUMP_TABLE, G_CONSTANT_POOL})
+ .legalFor({p0});
+
+ if (ST.hasStdExtM() || ST.hasStdExtZmmul()) {
+ getActionDefinitionsBuilder(G_MUL)
+ .legalFor({s32, sXLen})
+ .widenScalarToNextPow2(0)
+ .clampScalar(0, s32, sXLen);
+
+ // clang-format off
+ getActionDefinitionsBuilder({G_SMULH, G_UMULH})
+ .legalFor({sXLen})
+ .lower();
+ // clang-format on
+
+ getActionDefinitionsBuilder({G_SMULO, G_UMULO}).minScalar(0, sXLen).lower();
+ } else {
+ getActionDefinitionsBuilder(G_MUL)
+ .libcallFor({sXLen, sDoubleXLen})
+ .widenScalarToNextPow2(0)
+ .clampScalar(0, sXLen, sDoubleXLen);
+
+ getActionDefinitionsBuilder({G_SMULH, G_UMULH}).lowerFor({sXLen});
+
+ getActionDefinitionsBuilder({G_SMULO, G_UMULO})
+ .minScalar(0, sXLen)
+ // Widen sXLen to sDoubleXLen so we can use a single libcall to get
+ // the low bits for the mul result and high bits to do the overflow
+ // check.
+ .widenScalarIf(typeIs(0, sXLen),
+ LegalizeMutations::changeTo(0, sDoubleXLen))
+ .lower();
+ }
+
+ if (ST.hasStdExtM()) {
+ getActionDefinitionsBuilder({G_UDIV, G_SDIV, G_UREM, G_SREM})
+ .legalFor({s32, sXLen})
+ .libcallFor({sDoubleXLen})
+ .clampScalar(0, s32, sDoubleXLen)
+ .widenScalarToNextPow2(0);
+ } else {
+ getActionDefinitionsBuilder({G_UDIV, G_SDIV, G_UREM, G_SREM})
+ .libcallFor({sXLen, sDoubleXLen})
+ .clampScalar(0, sXLen, sDoubleXLen)
+ .widenScalarToNextPow2(0);
+ }
+
+ auto &AbsActions = getActionDefinitionsBuilder(G_ABS);
+ if (ST.hasStdExtZbb())
+ AbsActions.customFor({s32, sXLen}).minScalar(0, sXLen);
+ AbsActions.lower();
+
+ auto &MinMaxActions =
+ getActionDefinitionsBuilder({G_UMAX, G_UMIN, G_SMAX, G_SMIN});
+ if (ST.hasStdExtZbb())
+ MinMaxActions.legalFor({sXLen}).minScalar(0, sXLen);
+ MinMaxActions.lower();
+
+ getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
+
+ getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
+
+ getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower();
+
+ // FP Operations
+
+ getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FNEG,
+ G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM})
+ .legalIf(typeIsScalarFPArith(0, ST));
+
+ getActionDefinitionsBuilder(G_FCOPYSIGN)
+ .legalIf(all(typeIsScalarFPArith(0, ST), typeIsScalarFPArith(1, ST)));
+
+ getActionDefinitionsBuilder(G_FPTRUNC).legalIf(
+ [=, &ST](const LegalityQuery &Query) -> bool {
+ return (ST.hasStdExtD() && typeIs(0, s32)(Query) &&
+ typeIs(1, s64)(Query));
+ });
+ getActionDefinitionsBuilder(G_FPEXT).legalIf(
+ [=, &ST](const LegalityQuery &Query) -> bool {
+ return (ST.hasStdExtD() && typeIs(0, s64)(Query) &&
+ typeIs(1, s32)(Query));
+ });
+
+ getActionDefinitionsBuilder(G_FCMP)
+ .legalIf(all(typeIs(0, sXLen), typeIsScalarFPArith(1, ST)))
+ .clampScalar(0, sXLen, sXLen);
+
+ // TODO: Support vector version of G_IS_FPCLASS.
+ getActionDefinitionsBuilder(G_IS_FPCLASS)
+ .customIf(all(typeIs(0, s1), typeIsScalarFPArith(1, ST)));
+
+ getActionDefinitionsBuilder(G_FCONSTANT)
+ .legalIf(typeIsScalarFPArith(0, ST))
+ .lowerFor({s32, s64});
+
+ getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
+ .legalIf(all(typeInSet(0, {s32, sXLen}), typeIsScalarFPArith(1, ST)))
+ .widenScalarToNextPow2(0)
+ .clampScalar(0, s32, sXLen);
+
+ getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
+ .legalIf(all(typeIsScalarFPArith(0, ST), typeInSet(1, {s32, sXLen})))
+ .widenScalarToNextPow2(1)
+ .clampScalar(1, s32, sXLen);
+
+ // FIXME: We can do custom inline expansion like SelectionDAG.
+ // FIXME: Legal with Zfa.
+ getActionDefinitionsBuilder({G_FCEIL, G_FFLOOR})
+ .libcallFor({s32, s64});
+
+ getActionDefinitionsBuilder(G_VASTART).customFor({p0});
+
+ // va_list must be a pointer, but most sized types are pretty easy to handle
+ // as the destination.
+ getActionDefinitionsBuilder(G_VAARG)
+ // TODO: Implement narrowScalar and widenScalar for G_VAARG for types
+ // outside the [s32, sXLen] range.
+ .clampScalar(0, s32, sXLen)
+ .lowerForCartesianProduct({s32, sXLen, p0}, {p0});
getLegacyLegalizerInfo().computeTables();
}
+
+static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
+ if (Ty.isVector())
+ return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
+ Ty.getNumElements());
+ return IntegerType::get(C, Ty.getSizeInBits());
+}
+
+bool RISCVLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
+ MachineInstr &MI) const {
+ Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
+ switch (IntrinsicID) {
+ default:
+ return false;
+ case Intrinsic::vacopy: {
+ // vacopy arguments must be legal because of the intrinsic signature.
+ // No need to check here.
+
+ MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+ MachineFunction &MF = *MI.getMF();
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+ LLVMContext &Ctx = MF.getFunction().getContext();
+
+ Register DstLst = MI.getOperand(1).getReg();
+ LLT PtrTy = MRI.getType(DstLst);
+
+ // Load the source va_list
+ Align Alignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
+ MachineMemOperand *LoadMMO = MF.getMachineMemOperand(
+ MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, Alignment);
+ auto Tmp = MIRBuilder.buildLoad(PtrTy, MI.getOperand(2), *LoadMMO);
+
+ // Store the result in the destination va_list
+ MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
+ MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, Alignment);
+ MIRBuilder.buildStore(DstLst, Tmp, *StoreMMO);
+
+ MI.eraseFromParent();
+ return true;
+ }
+ }
+}
+
+bool RISCVLegalizerInfo::legalizeShlAshrLshr(
+ MachineInstr &MI, MachineIRBuilder &MIRBuilder,
+ GISelChangeObserver &Observer) const {
+ assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
+ MI.getOpcode() == TargetOpcode::G_LSHR ||
+ MI.getOpcode() == TargetOpcode::G_SHL);
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+ // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
+ // imported patterns can select it later. Either way, it will be legal.
+ Register AmtReg = MI.getOperand(2).getReg();
+ auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
+ if (!VRegAndVal)
+ return true;
+ // Check the shift amount is in range for an immediate form.
+ uint64_t Amount = VRegAndVal->Value.getZExtValue();
+ if (Amount > 31)
+ return true; // This will have to remain a register variant.
+ auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
+ Observer.changingInstr(MI);
+ MI.getOperand(2).setReg(ExtCst.getReg(0));
+ Observer.changedInstr(MI);
+ return true;
+}
+
+bool RISCVLegalizerInfo::legalizeVAStart(MachineInstr &MI,
+ MachineIRBuilder &MIRBuilder) const {
+ // Stores the address of the VarArgsFrameIndex slot into the memory location
+ assert(MI.getOpcode() == TargetOpcode::G_VASTART);
+ MachineFunction *MF = MI.getParent()->getParent();
+ RISCVMachineFunctionInfo *FuncInfo = MF->getInfo<RISCVMachineFunctionInfo>();
+ int FI = FuncInfo->getVarArgsFrameIndex();
+ LLT AddrTy = MIRBuilder.getMRI()->getType(MI.getOperand(0).getReg());
+ auto FINAddr = MIRBuilder.buildFrameIndex(AddrTy, FI);
+ assert(MI.hasOneMemOperand());
+ MIRBuilder.buildStore(FINAddr, MI.getOperand(0).getReg(),
+ *MI.memoperands()[0]);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool RISCVLegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
+ MachineInstr &MI) const {
+ MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+ GISelChangeObserver &Observer = Helper.Observer;
+ switch (MI.getOpcode()) {
+ default:
+ // No idea what to do.
+ return false;
+ case TargetOpcode::G_ABS:
+ return Helper.lowerAbsToMaxNeg(MI);
+ case TargetOpcode::G_SHL:
+ case TargetOpcode::G_ASHR:
+ case TargetOpcode::G_LSHR:
+ return legalizeShlAshrLshr(MI, MIRBuilder, Observer);
+ case TargetOpcode::G_SEXT_INREG: {
+ // Source size of 32 is sext.w.
+ int64_t SizeInBits = MI.getOperand(2).getImm();
+ if (SizeInBits == 32)
+ return true;
+
+ return Helper.lower(MI, 0, /* Unused hint type */ LLT()) ==
+ LegalizerHelper::Legalized;
+ }
+ case TargetOpcode::G_IS_FPCLASS: {
+ Register GISFPCLASS = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ const MachineOperand &ImmOp = MI.getOperand(2);
+ MachineIRBuilder MIB(MI);
+
+ // Turn LLVM IR's floating point classes to that in RISC-V,
+ // by simply rotating the 10-bit immediate right by two bits.
+ APInt GFpClassImm(10, static_cast<uint64_t>(ImmOp.getImm()));
+ auto FClassMask = MIB.buildConstant(sXLen, GFpClassImm.rotr(2).zext(XLen));
+ auto ConstZero = MIB.buildConstant(sXLen, 0);
+
+ auto GFClass = MIB.buildInstr(RISCV::G_FCLASS, {sXLen}, {Src});
+ auto And = MIB.buildAnd(sXLen, GFClass, FClassMask);
+ MIB.buildICmp(CmpInst::ICMP_NE, GISFPCLASS, And, ConstZero);
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case TargetOpcode::G_VASTART:
+ return legalizeVAStart(MI, MIRBuilder);
+ }
+
+ llvm_unreachable("expected switch to return");
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
index 960410ead62c..48c36976501f 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
@@ -17,12 +17,29 @@
namespace llvm {
+class GISelChangeObserver;
+class MachineIRBuilder;
class RISCVSubtarget;
/// This class provides the information for the target register banks.
class RISCVLegalizerInfo : public LegalizerInfo {
+ const RISCVSubtarget &STI;
+ const unsigned XLen;
+ const LLT sXLen;
+
public:
RISCVLegalizerInfo(const RISCVSubtarget &ST);
+
+ bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override;
+
+ bool legalizeIntrinsic(LegalizerHelper &Helper,
+ MachineInstr &MI) const override;
+
+private:
+ bool legalizeShlAshrLshr(MachineInstr &MI, MachineIRBuilder &MIRBuilder,
+ GISelChangeObserver &Observer) const;
+
+ bool legalizeVAStart(MachineInstr &MI, MachineIRBuilder &MIRBuilder) const;
};
} // end namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVO0PreLegalizerCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVO0PreLegalizerCombiner.cpp
new file mode 100644
index 000000000000..be77979512e0
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVO0PreLegalizerCombiner.cpp
@@ -0,0 +1,155 @@
+//=== RISCVO0PreLegalizerCombiner.cpp -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass does combining of machine instructions at the generic MI level,
+// before the legalizer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCVSubtarget.h"
+#include "llvm/CodeGen/GlobalISel/Combiner.h"
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+
+#define GET_GICOMBINER_DEPS
+#include "RISCVGenO0PreLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_DEPS
+
+#define DEBUG_TYPE "riscv-O0-prelegalizer-combiner"
+
+using namespace llvm;
+
+namespace {
+#define GET_GICOMBINER_TYPES
+#include "RISCVGenO0PreLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_TYPES
+
+class RISCVO0PreLegalizerCombinerImpl : public Combiner {
+protected:
+ // TODO: Make CombinerHelper methods const.
+ mutable CombinerHelper Helper;
+ const RISCVO0PreLegalizerCombinerImplRuleConfig &RuleConfig;
+ const RISCVSubtarget &STI;
+
+public:
+ RISCVO0PreLegalizerCombinerImpl(
+ MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
+ GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
+ const RISCVO0PreLegalizerCombinerImplRuleConfig &RuleConfig,
+ const RISCVSubtarget &STI);
+
+ static const char *getName() { return "RISCVO0PreLegalizerCombiner"; }
+
+ bool tryCombineAll(MachineInstr &I) const override;
+
+private:
+#define GET_GICOMBINER_CLASS_MEMBERS
+#include "RISCVGenO0PreLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_CLASS_MEMBERS
+};
+
+#define GET_GICOMBINER_IMPL
+#include "RISCVGenO0PreLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_IMPL
+
+RISCVO0PreLegalizerCombinerImpl::RISCVO0PreLegalizerCombinerImpl(
+ MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
+ GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
+ const RISCVO0PreLegalizerCombinerImplRuleConfig &RuleConfig,
+ const RISCVSubtarget &STI)
+ : Combiner(MF, CInfo, TPC, &KB, CSEInfo),
+ Helper(Observer, B, /*IsPreLegalize*/ true, &KB), RuleConfig(RuleConfig),
+ STI(STI),
+#define GET_GICOMBINER_CONSTRUCTOR_INITS
+#include "RISCVGenO0PreLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_CONSTRUCTOR_INITS
+{
+}
+
+// Pass boilerplate
+// ================
+
+class RISCVO0PreLegalizerCombiner : public MachineFunctionPass {
+public:
+ static char ID;
+
+ RISCVO0PreLegalizerCombiner();
+
+ StringRef getPassName() const override {
+ return "RISCVO0PreLegalizerCombiner";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+private:
+ RISCVO0PreLegalizerCombinerImplRuleConfig RuleConfig;
+};
+} // end anonymous namespace
+
+void RISCVO0PreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetPassConfig>();
+ AU.setPreservesCFG();
+ getSelectionDAGFallbackAnalysisUsage(AU);
+ AU.addRequired<GISelKnownBitsAnalysis>();
+ AU.addPreserved<GISelKnownBitsAnalysis>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+RISCVO0PreLegalizerCombiner::RISCVO0PreLegalizerCombiner()
+ : MachineFunctionPass(ID) {
+ initializeRISCVO0PreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
+
+ if (!RuleConfig.parseCommandLineOption())
+ report_fatal_error("Invalid rule identifier");
+}
+
+bool RISCVO0PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+ auto &TPC = getAnalysis<TargetPassConfig>();
+
+ const Function &F = MF.getFunction();
+ GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
+
+ const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
+
+ CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
+ /*LegalizerInfo*/ nullptr, /*EnableOpt*/ false,
+ F.hasOptSize(), F.hasMinSize());
+ RISCVO0PreLegalizerCombinerImpl Impl(MF, CInfo, &TPC, *KB,
+ /*CSEInfo*/ nullptr, RuleConfig, ST);
+ return Impl.combineMachineInstrs();
+}
+
+char RISCVO0PreLegalizerCombiner::ID = 0;
+INITIALIZE_PASS_BEGIN(RISCVO0PreLegalizerCombiner, DEBUG_TYPE,
+ "Combine RISC-V machine instrs before legalization", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
+INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
+INITIALIZE_PASS_END(RISCVO0PreLegalizerCombiner, DEBUG_TYPE,
+ "Combine RISC-V machine instrs before legalization", false,
+ false)
+
+namespace llvm {
+FunctionPass *createRISCVO0PreLegalizerCombiner() {
+ return new RISCVO0PreLegalizerCombiner();
+}
+} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp
new file mode 100644
index 000000000000..9c28944abc76
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerCombiner.cpp
@@ -0,0 +1,173 @@
+//=== RISCVPostLegalizerCombiner.cpp --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Post-legalization combines on generic MachineInstrs.
+///
+/// The combines here must preserve instruction legality.
+///
+/// Combines which don't rely on instruction legality should go in the
+/// RISCVPreLegalizerCombiner.
+///
+//===----------------------------------------------------------------------===//
+
+#include "RISCVTargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
+#include "llvm/CodeGen/GlobalISel/Combiner.h"
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+
+#define GET_GICOMBINER_DEPS
+#include "RISCVGenPostLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_DEPS
+
+#define DEBUG_TYPE "riscv-postlegalizer-combiner"
+
+using namespace llvm;
+
+namespace {
+
+#define GET_GICOMBINER_TYPES
+#include "RISCVGenPostLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_TYPES
+
+class RISCVPostLegalizerCombinerImpl : public Combiner {
+protected:
+ // TODO: Make CombinerHelper methods const.
+ mutable CombinerHelper Helper;
+ const RISCVPostLegalizerCombinerImplRuleConfig &RuleConfig;
+ const RISCVSubtarget &STI;
+
+public:
+ RISCVPostLegalizerCombinerImpl(
+ MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
+ GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
+ const RISCVPostLegalizerCombinerImplRuleConfig &RuleConfig,
+ const RISCVSubtarget &STI, MachineDominatorTree *MDT,
+ const LegalizerInfo *LI);
+
+ static const char *getName() { return "RISCVPostLegalizerCombiner"; }
+
+ bool tryCombineAll(MachineInstr &I) const override;
+
+private:
+#define GET_GICOMBINER_CLASS_MEMBERS
+#include "RISCVGenPostLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_CLASS_MEMBERS
+};
+
+#define GET_GICOMBINER_IMPL
+#include "RISCVGenPostLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_IMPL
+
+RISCVPostLegalizerCombinerImpl::RISCVPostLegalizerCombinerImpl(
+ MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
+ GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
+ const RISCVPostLegalizerCombinerImplRuleConfig &RuleConfig,
+ const RISCVSubtarget &STI, MachineDominatorTree *MDT,
+ const LegalizerInfo *LI)
+ : Combiner(MF, CInfo, TPC, &KB, CSEInfo),
+ Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI),
+ RuleConfig(RuleConfig), STI(STI),
+#define GET_GICOMBINER_CONSTRUCTOR_INITS
+#include "RISCVGenPostLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_CONSTRUCTOR_INITS
+{
+}
+
+class RISCVPostLegalizerCombiner : public MachineFunctionPass {
+public:
+ static char ID;
+
+ RISCVPostLegalizerCombiner();
+
+ StringRef getPassName() const override {
+ return "RISCVPostLegalizerCombiner";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+private:
+ RISCVPostLegalizerCombinerImplRuleConfig RuleConfig;
+};
+} // end anonymous namespace
+
+void RISCVPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetPassConfig>();
+ AU.setPreservesCFG();
+ getSelectionDAGFallbackAnalysisUsage(AU);
+ AU.addRequired<GISelKnownBitsAnalysis>();
+ AU.addPreserved<GISelKnownBitsAnalysis>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<GISelCSEAnalysisWrapperPass>();
+ AU.addPreserved<GISelCSEAnalysisWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+RISCVPostLegalizerCombiner::RISCVPostLegalizerCombiner()
+ : MachineFunctionPass(ID) {
+ initializeRISCVPostLegalizerCombinerPass(*PassRegistry::getPassRegistry());
+
+ if (!RuleConfig.parseCommandLineOption())
+ report_fatal_error("Invalid rule identifier");
+}
+
+bool RISCVPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+ assert(MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::Legalized) &&
+ "Expected a legalized function?");
+ auto *TPC = &getAnalysis<TargetPassConfig>();
+ const Function &F = MF.getFunction();
+ bool EnableOpt =
+ MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
+
+ const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
+ const auto *LI = ST.getLegalizerInfo();
+
+ GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
+ MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>();
+ GISelCSEAnalysisWrapper &Wrapper =
+ getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
+ auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig());
+
+ CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
+ /*LegalizerInfo*/ nullptr, EnableOpt, F.hasOptSize(),
+ F.hasMinSize());
+ RISCVPostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, CSEInfo,
+ RuleConfig, ST, MDT, LI);
+ return Impl.combineMachineInstrs();
+}
+
+char RISCVPostLegalizerCombiner::ID = 0;
+INITIALIZE_PASS_BEGIN(RISCVPostLegalizerCombiner, DEBUG_TYPE,
+ "Combine RISC-V MachineInstrs after legalization", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
+INITIALIZE_PASS_END(RISCVPostLegalizerCombiner, DEBUG_TYPE,
+ "Combine RISC-V MachineInstrs after legalization", false,
+ false)
+
+namespace llvm {
+FunctionPass *createRISCVPostLegalizerCombiner() {
+ return new RISCVPostLegalizerCombiner();
+}
+} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVPreLegalizerCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVPreLegalizerCombiner.cpp
new file mode 100644
index 000000000000..9a35fffae058
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVPreLegalizerCombiner.cpp
@@ -0,0 +1,169 @@
+//=== RISCVPreLegalizerCombiner.cpp ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass does combining of machine instructions at the generic MI level,
+// before the legalizer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCVSubtarget.h"
+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
+#include "llvm/CodeGen/GlobalISel/Combiner.h"
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+
+#define GET_GICOMBINER_DEPS
+#include "RISCVGenPreLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_DEPS
+
+#define DEBUG_TYPE "riscv-prelegalizer-combiner"
+
+using namespace llvm;
+
+namespace {
+
+#define GET_GICOMBINER_TYPES
+#include "RISCVGenPreLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_TYPES
+
+class RISCVPreLegalizerCombinerImpl : public Combiner {
+protected:
+ // TODO: Make CombinerHelper methods const.
+ mutable CombinerHelper Helper;
+ const RISCVPreLegalizerCombinerImplRuleConfig &RuleConfig;
+ const RISCVSubtarget &STI;
+
+public:
+ RISCVPreLegalizerCombinerImpl(
+ MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
+ GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
+ const RISCVPreLegalizerCombinerImplRuleConfig &RuleConfig,
+ const RISCVSubtarget &STI, MachineDominatorTree *MDT,
+ const LegalizerInfo *LI);
+
+ static const char *getName() { return "RISCV00PreLegalizerCombiner"; }
+
+ bool tryCombineAll(MachineInstr &I) const override;
+
+private:
+#define GET_GICOMBINER_CLASS_MEMBERS
+#include "RISCVGenPreLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_CLASS_MEMBERS
+};
+
+#define GET_GICOMBINER_IMPL
+#include "RISCVGenPreLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_IMPL
+
+RISCVPreLegalizerCombinerImpl::RISCVPreLegalizerCombinerImpl(
+ MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
+ GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
+ const RISCVPreLegalizerCombinerImplRuleConfig &RuleConfig,
+ const RISCVSubtarget &STI, MachineDominatorTree *MDT,
+ const LegalizerInfo *LI)
+ : Combiner(MF, CInfo, TPC, &KB, CSEInfo),
+ Helper(Observer, B, /*IsPreLegalize*/ true, &KB, MDT, LI),
+ RuleConfig(RuleConfig), STI(STI),
+#define GET_GICOMBINER_CONSTRUCTOR_INITS
+#include "RISCVGenPreLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_CONSTRUCTOR_INITS
+{
+}
+
+// Pass boilerplate
+// ================
+
+class RISCVPreLegalizerCombiner : public MachineFunctionPass {
+public:
+ static char ID;
+
+ RISCVPreLegalizerCombiner();
+
+ StringRef getPassName() const override { return "RISCVPreLegalizerCombiner"; }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+private:
+ RISCVPreLegalizerCombinerImplRuleConfig RuleConfig;
+};
+} // end anonymous namespace
+
+void RISCVPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetPassConfig>();
+ AU.setPreservesCFG();
+ getSelectionDAGFallbackAnalysisUsage(AU);
+ AU.addRequired<GISelKnownBitsAnalysis>();
+ AU.addPreserved<GISelKnownBitsAnalysis>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<GISelCSEAnalysisWrapperPass>();
+ AU.addPreserved<GISelCSEAnalysisWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+RISCVPreLegalizerCombiner::RISCVPreLegalizerCombiner()
+ : MachineFunctionPass(ID) {
+ initializeRISCVPreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
+
+ if (!RuleConfig.parseCommandLineOption())
+ report_fatal_error("Invalid rule identifier");
+}
+
+bool RISCVPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+ auto &TPC = getAnalysis<TargetPassConfig>();
+
+ // Enable CSE.
+ GISelCSEAnalysisWrapper &Wrapper =
+ getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
+ auto *CSEInfo = &Wrapper.get(TPC.getCSEConfig());
+
+ const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
+ const auto *LI = ST.getLegalizerInfo();
+
+ const Function &F = MF.getFunction();
+ bool EnableOpt =
+ MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
+ GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
+ MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>();
+ CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
+ /*LegalizerInfo*/ nullptr, EnableOpt, F.hasOptSize(),
+ F.hasMinSize());
+ RISCVPreLegalizerCombinerImpl Impl(MF, CInfo, &TPC, *KB, CSEInfo, RuleConfig,
+ ST, MDT, LI);
+ return Impl.combineMachineInstrs();
+}
+
+char RISCVPreLegalizerCombiner::ID = 0;
+INITIALIZE_PASS_BEGIN(RISCVPreLegalizerCombiner, DEBUG_TYPE,
+ "Combine RISC-V machine instrs before legalization", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
+INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
+INITIALIZE_PASS_END(RISCVPreLegalizerCombiner, DEBUG_TYPE,
+ "Combine RISC-V machine instrs before legalization", false,
+ false)
+
+namespace llvm {
+FunctionPass *createRISCVPreLegalizerCombiner() {
+ return new RISCVPreLegalizerCombiner();
+}
+} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
index 9b601902ad20..cf0ff63a5e51 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
@@ -12,6 +12,7 @@
#include "RISCVRegisterBankInfo.h"
#include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "RISCVSubtarget.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterBank.h"
#include "llvm/CodeGen/RegisterBankInfo.h"
@@ -20,7 +21,448 @@
#define GET_TARGET_REGBANK_IMPL
#include "RISCVGenRegisterBank.inc"
+namespace llvm {
+namespace RISCV {
+
+const RegisterBankInfo::PartialMapping PartMappings[] = {
+ {0, 32, GPRBRegBank},
+ {0, 64, GPRBRegBank},
+ {0, 32, FPRBRegBank},
+ {0, 64, FPRBRegBank},
+};
+
+enum PartialMappingIdx {
+ PMI_GPRB32 = 0,
+ PMI_GPRB64 = 1,
+ PMI_FPRB32 = 2,
+ PMI_FPRB64 = 3,
+};
+
+const RegisterBankInfo::ValueMapping ValueMappings[] = {
+ // Invalid value mapping.
+ {nullptr, 0},
+ // Maximum 3 GPR operands; 32 bit.
+ {&PartMappings[PMI_GPRB32], 1},
+ {&PartMappings[PMI_GPRB32], 1},
+ {&PartMappings[PMI_GPRB32], 1},
+ // Maximum 3 GPR operands; 64 bit.
+ {&PartMappings[PMI_GPRB64], 1},
+ {&PartMappings[PMI_GPRB64], 1},
+ {&PartMappings[PMI_GPRB64], 1},
+ // Maximum 3 FPR operands; 32 bit.
+ {&PartMappings[PMI_FPRB32], 1},
+ {&PartMappings[PMI_FPRB32], 1},
+ {&PartMappings[PMI_FPRB32], 1},
+ // Maximum 3 FPR operands; 64 bit.
+ {&PartMappings[PMI_FPRB64], 1},
+ {&PartMappings[PMI_FPRB64], 1},
+ {&PartMappings[PMI_FPRB64], 1},
+};
+
+enum ValueMappingIdx {
+ InvalidIdx = 0,
+ GPRB32Idx = 1,
+ GPRB64Idx = 4,
+ FPRB32Idx = 7,
+ FPRB64Idx = 10,
+};
+} // namespace RISCV
+} // namespace llvm
+
using namespace llvm;
RISCVRegisterBankInfo::RISCVRegisterBankInfo(unsigned HwMode)
: RISCVGenRegisterBankInfo(HwMode) {}
+
+const RegisterBank &
+RISCVRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
+ LLT Ty) const {
+ switch (RC.getID()) {
+ default:
+ llvm_unreachable("Register class not supported");
+ case RISCV::GPRRegClassID:
+ case RISCV::GPRF16RegClassID:
+ case RISCV::GPRF32RegClassID:
+ case RISCV::GPRNoX0RegClassID:
+ case RISCV::GPRNoX0X2RegClassID:
+ case RISCV::GPRJALRRegClassID:
+ case RISCV::GPRTCRegClassID:
+ case RISCV::GPRC_and_GPRTCRegClassID:
+ case RISCV::GPRCRegClassID:
+ case RISCV::GPRC_and_SR07RegClassID:
+ case RISCV::SR07RegClassID:
+ case RISCV::SPRegClassID:
+ case RISCV::GPRX0RegClassID:
+ return getRegBank(RISCV::GPRBRegBankID);
+ case RISCV::FPR64RegClassID:
+ case RISCV::FPR16RegClassID:
+ case RISCV::FPR32RegClassID:
+ case RISCV::FPR64CRegClassID:
+ case RISCV::FPR32CRegClassID:
+ return getRegBank(RISCV::FPRBRegBankID);
+ case RISCV::VMRegClassID:
+ case RISCV::VRRegClassID:
+ case RISCV::VRNoV0RegClassID:
+ case RISCV::VRM2RegClassID:
+ case RISCV::VRM2NoV0RegClassID:
+ case RISCV::VRM4RegClassID:
+ case RISCV::VRM4NoV0RegClassID:
+ case RISCV::VMV0RegClassID:
+ case RISCV::VRM2_with_sub_vrm1_0_in_VMV0RegClassID:
+ case RISCV::VRM4_with_sub_vrm1_0_in_VMV0RegClassID:
+ case RISCV::VRM8RegClassID:
+ case RISCV::VRM8NoV0RegClassID:
+ case RISCV::VRM8_with_sub_vrm1_0_in_VMV0RegClassID:
+ return getRegBank(RISCV::VRBRegBankID);
+ }
+}
+
+static const RegisterBankInfo::ValueMapping *getFPValueMapping(unsigned Size) {
+ assert(Size == 32 || Size == 64);
+ unsigned Idx = Size == 64 ? RISCV::FPRB64Idx : RISCV::FPRB32Idx;
+ return &RISCV::ValueMappings[Idx];
+}
+
+/// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode,
+/// having only floating-point operands.
+/// FIXME: this is copied from target AArch64. Needs some code refactor here to
+/// put this function in GlobalISel/Utils.cpp.
+static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
+ switch (Opc) {
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FSUB:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_FMA:
+ case TargetOpcode::G_FDIV:
+ case TargetOpcode::G_FCONSTANT:
+ case TargetOpcode::G_FPEXT:
+ case TargetOpcode::G_FPTRUNC:
+ case TargetOpcode::G_FCEIL:
+ case TargetOpcode::G_FFLOOR:
+ case TargetOpcode::G_FNEARBYINT:
+ case TargetOpcode::G_FNEG:
+ case TargetOpcode::G_FCOPYSIGN:
+ case TargetOpcode::G_FCOS:
+ case TargetOpcode::G_FSIN:
+ case TargetOpcode::G_FLOG10:
+ case TargetOpcode::G_FLOG:
+ case TargetOpcode::G_FLOG2:
+ case TargetOpcode::G_FSQRT:
+ case TargetOpcode::G_FABS:
+ case TargetOpcode::G_FEXP:
+ case TargetOpcode::G_FRINT:
+ case TargetOpcode::G_INTRINSIC_TRUNC:
+ case TargetOpcode::G_INTRINSIC_ROUND:
+ case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
+ case TargetOpcode::G_FMAXNUM:
+ case TargetOpcode::G_FMINNUM:
+ case TargetOpcode::G_FMAXIMUM:
+ case TargetOpcode::G_FMINIMUM:
+ return true;
+ }
+ return false;
+}
+
+// TODO: Make this more like AArch64?
+bool RISCVRegisterBankInfo::hasFPConstraints(
+ const MachineInstr &MI, const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const {
+ if (isPreISelGenericFloatingPointOpcode(MI.getOpcode()))
+ return true;
+
+ // If we have a copy instruction, we could be feeding floating point
+ // instructions.
+ if (MI.getOpcode() != TargetOpcode::COPY)
+ return false;
+
+ return getRegBank(MI.getOperand(0).getReg(), MRI, TRI) == &RISCV::FPRBRegBank;
+}
+
+bool RISCVRegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_FPTOSI:
+ case TargetOpcode::G_FPTOUI:
+ case TargetOpcode::G_FCMP:
+ return true;
+ default:
+ break;
+ }
+
+ return hasFPConstraints(MI, MRI, TRI);
+}
+
+bool RISCVRegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_SITOFP:
+ case TargetOpcode::G_UITOFP:
+ return true;
+ default:
+ break;
+ }
+
+ return hasFPConstraints(MI, MRI, TRI);
+}
+
+bool RISCVRegisterBankInfo::anyUseOnlyUseFP(
+ Register Def, const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const {
+ return any_of(
+ MRI.use_nodbg_instructions(Def),
+ [&](const MachineInstr &UseMI) { return onlyUsesFP(UseMI, MRI, TRI); });
+}
+
+const RegisterBankInfo::InstructionMapping &
+RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
+ const unsigned Opc = MI.getOpcode();
+
+ // Try the default logic for non-generic instructions that are either copies
+ // or already have some operands assigned to banks.
+ if (!isPreISelGenericOpcode(Opc) || Opc == TargetOpcode::G_PHI) {
+ const InstructionMapping &Mapping = getInstrMappingImpl(MI);
+ if (Mapping.isValid())
+ return Mapping;
+ }
+
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
+
+ unsigned GPRSize = getMaximumSize(RISCV::GPRBRegBankID);
+ assert((GPRSize == 32 || GPRSize == 64) && "Unexpected GPR size");
+
+ unsigned NumOperands = MI.getNumOperands();
+ const ValueMapping *GPRValueMapping =
+ &RISCV::ValueMappings[GPRSize == 64 ? RISCV::GPRB64Idx
+ : RISCV::GPRB32Idx];
+
+ switch (Opc) {
+ case TargetOpcode::G_ADD:
+ case TargetOpcode::G_SUB:
+ case TargetOpcode::G_SHL:
+ case TargetOpcode::G_ASHR:
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_OR:
+ case TargetOpcode::G_XOR:
+ case TargetOpcode::G_MUL:
+ case TargetOpcode::G_SDIV:
+ case TargetOpcode::G_SREM:
+ case TargetOpcode::G_SMULH:
+ case TargetOpcode::G_SMAX:
+ case TargetOpcode::G_SMIN:
+ case TargetOpcode::G_UDIV:
+ case TargetOpcode::G_UREM:
+ case TargetOpcode::G_UMULH:
+ case TargetOpcode::G_UMAX:
+ case TargetOpcode::G_UMIN:
+ case TargetOpcode::G_PTR_ADD:
+ case TargetOpcode::G_PTRTOINT:
+ case TargetOpcode::G_INTTOPTR:
+ case TargetOpcode::G_TRUNC:
+ case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_SEXTLOAD:
+ case TargetOpcode::G_ZEXTLOAD:
+ return getInstructionMapping(DefaultMappingID, /*Cost=*/1, GPRValueMapping,
+ NumOperands);
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FSUB:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_FDIV:
+ case TargetOpcode::G_FABS:
+ case TargetOpcode::G_FNEG:
+ case TargetOpcode::G_FSQRT:
+ case TargetOpcode::G_FMAXNUM:
+ case TargetOpcode::G_FMINNUM: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ return getInstructionMapping(DefaultMappingID, /*Cost=*/1,
+ getFPValueMapping(Ty.getSizeInBits()),
+ NumOperands);
+ }
+ case TargetOpcode::G_IMPLICIT_DEF: {
+ Register Dst = MI.getOperand(0).getReg();
+ auto Mapping = GPRValueMapping;
+ // FIXME: May need to do a better job determining when to use FPRB.
+ // For example, the look through COPY case:
+ // %0:_(s32) = G_IMPLICIT_DEF
+ // %1:_(s32) = COPY %0
+ // $f10_d = COPY %1(s32)
+ if (anyUseOnlyUseFP(Dst, MRI, TRI))
+ Mapping = getFPValueMapping(MRI.getType(Dst).getSizeInBits());
+ return getInstructionMapping(DefaultMappingID, /*Cost=*/1, Mapping,
+ NumOperands);
+ }
+ }
+
+ SmallVector<const ValueMapping *, 4> OpdsMapping(NumOperands);
+
+ switch (Opc) {
+ case TargetOpcode::G_LOAD: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ OpdsMapping[0] = GPRValueMapping;
+ OpdsMapping[1] = GPRValueMapping;
+ // Use FPR64 for s64 loads on rv32.
+ if (GPRSize == 32 && Ty.getSizeInBits() == 64) {
+ assert(MF.getSubtarget<RISCVSubtarget>().hasStdExtD());
+ OpdsMapping[0] = getFPValueMapping(Ty.getSizeInBits());
+ break;
+ }
+
+ // Check if that load feeds fp instructions.
+ // In that case, we want the default mapping to be on FPR
+ // instead of blind map every scalar to GPR.
+ if (anyUseOnlyUseFP(MI.getOperand(0).getReg(), MRI, TRI))
+ // If we have at least one direct use in a FP instruction,
+ // assume this was a floating point load in the IR. If it was
+ // not, we would have had a bitcast before reaching that
+ // instruction.
+ OpdsMapping[0] = getFPValueMapping(Ty.getSizeInBits());
+
+ break;
+ }
+ case TargetOpcode::G_STORE: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ OpdsMapping[0] = GPRValueMapping;
+ OpdsMapping[1] = GPRValueMapping;
+ // Use FPR64 for s64 stores on rv32.
+ if (GPRSize == 32 && Ty.getSizeInBits() == 64) {
+ assert(MF.getSubtarget<RISCVSubtarget>().hasStdExtD());
+ OpdsMapping[0] = getFPValueMapping(Ty.getSizeInBits());
+ break;
+ }
+
+ MachineInstr *DefMI = MRI.getVRegDef(MI.getOperand(0).getReg());
+ if (onlyDefinesFP(*DefMI, MRI, TRI))
+ OpdsMapping[0] = getFPValueMapping(Ty.getSizeInBits());
+ break;
+ }
+ case TargetOpcode::G_SELECT: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+
+ // Try to minimize the number of copies. If we have more floating point
+ // constrained values than not, then we'll put everything on FPR. Otherwise,
+ // everything has to be on GPR.
+ unsigned NumFP = 0;
+
+ // Use FPR64 for s64 select on rv32.
+ if (GPRSize == 32 && Ty.getSizeInBits() == 64) {
+ NumFP = 3;
+ } else {
+ // Check if the uses of the result always produce floating point values.
+ //
+ // For example:
+ //
+ // %z = G_SELECT %cond %x %y
+ // fpr = G_FOO %z ...
+ if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
+ [&](const MachineInstr &UseMI) {
+ return onlyUsesFP(UseMI, MRI, TRI);
+ }))
+ ++NumFP;
+
+ // Check if the defs of the source values always produce floating point
+ // values.
+ //
+ // For example:
+ //
+ // %x = G_SOMETHING_ALWAYS_FLOAT %a ...
+ // %z = G_SELECT %cond %x %y
+ //
+ // Also check whether or not the sources have already been decided to be
+ // FPR. Keep track of this.
+ //
+ // This doesn't check the condition, since the condition is always an
+ // integer.
+ for (unsigned Idx = 2; Idx < 4; ++Idx) {
+ Register VReg = MI.getOperand(Idx).getReg();
+ MachineInstr *DefMI = MRI.getVRegDef(VReg);
+ if (getRegBank(VReg, MRI, TRI) == &RISCV::FPRBRegBank ||
+ onlyDefinesFP(*DefMI, MRI, TRI))
+ ++NumFP;
+ }
+ }
+
+ // Condition operand is always GPR.
+ OpdsMapping[1] = GPRValueMapping;
+
+ const ValueMapping *Mapping = GPRValueMapping;
+ if (NumFP >= 2)
+ Mapping = getFPValueMapping(Ty.getSizeInBits());
+
+ OpdsMapping[0] = OpdsMapping[2] = OpdsMapping[3] = Mapping;
+ break;
+ }
+ case TargetOpcode::G_FPTOSI:
+ case TargetOpcode::G_FPTOUI:
+ case RISCV::G_FCLASS: {
+ LLT Ty = MRI.getType(MI.getOperand(1).getReg());
+ OpdsMapping[0] = GPRValueMapping;
+ OpdsMapping[1] = getFPValueMapping(Ty.getSizeInBits());
+ break;
+ }
+ case TargetOpcode::G_SITOFP:
+ case TargetOpcode::G_UITOFP: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ OpdsMapping[0] = getFPValueMapping(Ty.getSizeInBits());
+ OpdsMapping[1] = GPRValueMapping;
+ break;
+ }
+ case TargetOpcode::G_FCMP: {
+ LLT Ty = MRI.getType(MI.getOperand(2).getReg());
+
+ unsigned Size = Ty.getSizeInBits();
+ assert((Size == 32 || Size == 64) && "Unsupported size for G_FCMP");
+
+ OpdsMapping[0] = GPRValueMapping;
+ OpdsMapping[2] = OpdsMapping[3] = getFPValueMapping(Size);
+ break;
+ }
+ case TargetOpcode::G_MERGE_VALUES: {
+ // Use FPR64 for s64 merge on rv32.
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ if (GPRSize == 32 && Ty.getSizeInBits() == 64) {
+ assert(MF.getSubtarget<RISCVSubtarget>().hasStdExtD());
+ OpdsMapping[0] = getFPValueMapping(Ty.getSizeInBits());
+ OpdsMapping[1] = GPRValueMapping;
+ OpdsMapping[2] = GPRValueMapping;
+ }
+ break;
+ }
+ case TargetOpcode::G_UNMERGE_VALUES: {
+ // Use FPR64 for s64 unmerge on rv32.
+ LLT Ty = MRI.getType(MI.getOperand(2).getReg());
+ if (GPRSize == 32 && Ty.getSizeInBits() == 64) {
+ assert(MF.getSubtarget<RISCVSubtarget>().hasStdExtD());
+ OpdsMapping[0] = GPRValueMapping;
+ OpdsMapping[1] = GPRValueMapping;
+ OpdsMapping[2] = getFPValueMapping(Ty.getSizeInBits());
+ }
+ break;
+ }
+ default:
+ // By default map all scalars to GPR.
+ for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
+ auto &MO = MI.getOperand(Idx);
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ LLT Ty = MRI.getType(MO.getReg());
+ if (!Ty.isValid())
+ continue;
+
+ if (isPreISelGenericFloatingPointOpcode(Opc))
+ OpdsMapping[Idx] = getFPValueMapping(Ty.getSizeInBits());
+ else
+ OpdsMapping[Idx] = GPRValueMapping;
+ }
+ break;
+ }
+
+ return getInstructionMapping(DefaultMappingID, /*Cost=*/1,
+ getOperandsMapping(OpdsMapping), NumOperands);
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.h
index ee6d4db27880..abd0837395f6 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.h
@@ -32,6 +32,29 @@ protected:
class RISCVRegisterBankInfo final : public RISCVGenRegisterBankInfo {
public:
RISCVRegisterBankInfo(unsigned HwMode);
+
+ const RegisterBank &getRegBankFromRegClass(const TargetRegisterClass &RC,
+ LLT Ty) const override;
+
+ const InstructionMapping &
+ getInstrMapping(const MachineInstr &MI) const override;
+
+private:
+ /// \returns true if \p MI only uses and defines FPRs.
+ bool hasFPConstraints(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const;
+
+ /// \returns true if \p MI only uses FPRs.
+ bool onlyUsesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const;
+
+ /// \returns true if any use of \p Def only user FPRs.
+ bool anyUseOnlyUseFP(Register Def, const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const;
+
+ /// \returns true if \p MI only defines FPRs.
+ bool onlyDefinesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const;
};
} // end namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBanks.td b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBanks.td
index b49f8259e382..b1ef815fe373 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBanks.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVRegisterBanks.td
@@ -10,4 +10,11 @@
//===----------------------------------------------------------------------===//
/// General Purpose Registers: X.
-def GPRRegBank : RegisterBank<"GPRB", [GPR]>;
+def GPRBRegBank : RegisterBank<"GPRB", [GPR]>;
+
+/// Floating Point Registers: F.
+def FPRBRegBank : RegisterBank<"FPRB", [FPR64]>;
+
+/// Vector Registers : V.
+def VRBRegBank : RegisterBank<"VRB", [VRM8]>;
+
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
index 8f8684e30b3a..aba2511959af 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
@@ -14,7 +14,6 @@
#include "RISCVCustomBehaviour.h"
#include "MCTargetDesc/RISCVMCTargetDesc.h"
#include "RISCV.h"
-#include "RISCVInstrInfo.h"
#include "TargetInfo/RISCVTargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Debug.h"
@@ -64,9 +63,9 @@ uint8_t RISCVLMULInstrument::getLMUL() const {
.Case("M2", 0b001)
.Case("M4", 0b010)
.Case("M8", 0b011)
- .Case("MF2", 0b101)
+ .Case("MF2", 0b111)
.Case("MF4", 0b110)
- .Case("MF8", 0b111);
+ .Case("MF8", 0b101);
}
const llvm::StringRef RISCVSEWInstrument::DESC_NAME = "RISCV-SEW";
@@ -186,13 +185,46 @@ RISCVInstrumentManager::createInstruments(const MCInst &Inst) {
return SmallVector<UniqueInstrument>();
}
+static std::pair<uint8_t, uint8_t>
+getEEWAndEMULForUnitStrideLoadStore(unsigned Opcode, RISCVII::VLMUL LMUL,
+ uint8_t SEW) {
+ uint8_t EEW;
+ switch (Opcode) {
+ case RISCV::VLM_V:
+ case RISCV::VSM_V:
+ case RISCV::VLE8_V:
+ case RISCV::VSE8_V:
+ EEW = 8;
+ break;
+ case RISCV::VLE16_V:
+ case RISCV::VSE16_V:
+ EEW = 16;
+ break;
+ case RISCV::VLE32_V:
+ case RISCV::VSE32_V:
+ EEW = 32;
+ break;
+ case RISCV::VLE64_V:
+ case RISCV::VSE64_V:
+ EEW = 64;
+ break;
+ default:
+ llvm_unreachable("Opcode is not a vector unit stride load nor store");
+ }
+
+ auto EMUL = RISCVVType::getSameRatioLMUL(SEW, LMUL, EEW);
+ if (!EEW)
+ llvm_unreachable("Invalid SEW or LMUL for new ratio");
+ return std::make_pair(EEW, *EMUL);
+}
+
unsigned RISCVInstrumentManager::getSchedClassID(
const MCInstrInfo &MCII, const MCInst &MCI,
const llvm::SmallVector<Instrument *> &IVec) const {
unsigned short Opcode = MCI.getOpcode();
unsigned SchedClassID = MCII.get(Opcode).getSchedClass();
- // Unpack all possible RISCV instruments from IVec.
+ // Unpack all possible RISC-V instruments from IVec.
RISCVLMULInstrument *LI = nullptr;
RISCVSEWInstrument *SI = nullptr;
for (auto &I : IVec) {
@@ -215,12 +247,23 @@ unsigned RISCVInstrumentManager::getSchedClassID(
// or (Opcode, LMUL, SEW) if SEW instrument is active, and depends on LMUL
// and SEW, or (Opcode, LMUL, 0) if does not depend on SEW.
uint8_t SEW = SI ? SI->getSEW() : 0;
- // Check if it depends on LMUL and SEW
- const RISCVVInversePseudosTable::PseudoInfo *RVV =
- RISCVVInversePseudosTable::getBaseInfo(Opcode, LMUL, SEW);
- // Check if it depends only on LMUL
- if (!RVV)
- RVV = RISCVVInversePseudosTable::getBaseInfo(Opcode, LMUL, 0);
+
+ const RISCVVInversePseudosTable::PseudoInfo *RVV = nullptr;
+ if (Opcode == RISCV::VLM_V || Opcode == RISCV::VSM_V ||
+ Opcode == RISCV::VLE8_V || Opcode == RISCV::VSE8_V ||
+ Opcode == RISCV::VLE16_V || Opcode == RISCV::VSE16_V ||
+ Opcode == RISCV::VLE32_V || Opcode == RISCV::VSE32_V ||
+ Opcode == RISCV::VLE64_V || Opcode == RISCV::VSE64_V) {
+ RISCVII::VLMUL VLMUL = static_cast<RISCVII::VLMUL>(LMUL);
+ auto [EEW, EMUL] = getEEWAndEMULForUnitStrideLoadStore(Opcode, VLMUL, SEW);
+ RVV = RISCVVInversePseudosTable::getBaseInfo(Opcode, EMUL, EEW);
+ } else {
+ // Check if it depends on LMUL and SEW
+ RVV = RISCVVInversePseudosTable::getBaseInfo(Opcode, LMUL, SEW);
+ // Check if it depends only on LMUL
+ if (!RVV)
+ RVV = RISCVVInversePseudosTable::getBaseInfo(Opcode, LMUL, 0);
+ }
// Not a RVV instr
if (!RVV) {
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index 1b890fbe041a..716fb67c5824 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -1,4 +1,4 @@
-//===-- RISCVAsmBackend.cpp - RISCV Assembler Backend ---------------------===//
+//===-- RISCVAsmBackend.cpp - RISC-V Assembler Backend --------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -19,6 +19,7 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/ErrorHandling.h"
@@ -27,6 +28,15 @@
using namespace llvm;
+static cl::opt<bool> RelaxBranches("riscv-asm-relax-branches", cl::init(true),
+ cl::Hidden);
+// Temporary workaround for old linkers that do not support ULEB128 relocations,
+// which are abused by DWARF v5 DW_LLE_offset_pair/DW_RLE_offset_pair
+// implemented in Clang/LLVM.
+static cl::opt<bool> ULEB128Reloc(
+ "riscv-uleb128-reloc", cl::init(true), cl::Hidden,
+ cl::desc("Emit R_RISCV_SET_ULEB128/E_RISCV_SUB_ULEB128 if appropriate"));
+
std::optional<MCFixupKind> RISCVAsmBackend::getFixupKind(StringRef Name) const {
if (STI.getTargetTriple().isOSBinFormatELF()) {
unsigned Type;
@@ -76,24 +86,6 @@ RISCVAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
{"fixup_riscv_call_plt", 0, 64, MCFixupKindInfo::FKF_IsPCRel},
{"fixup_riscv_relax", 0, 0, 0},
{"fixup_riscv_align", 0, 0, 0},
-
- {"fixup_riscv_set_8", 0, 8, 0},
- {"fixup_riscv_add_8", 0, 8, 0},
- {"fixup_riscv_sub_8", 0, 8, 0},
-
- {"fixup_riscv_set_16", 0, 16, 0},
- {"fixup_riscv_add_16", 0, 16, 0},
- {"fixup_riscv_sub_16", 0, 16, 0},
-
- {"fixup_riscv_set_32", 0, 32, 0},
- {"fixup_riscv_add_32", 0, 32, 0},
- {"fixup_riscv_sub_32", 0, 32, 0},
-
- {"fixup_riscv_add_64", 0, 64, 0},
- {"fixup_riscv_sub_64", 0, 64, 0},
-
- {"fixup_riscv_set_6b", 2, 6, 0},
- {"fixup_riscv_sub_6b", 2, 6, 0},
};
static_assert((std::size(Infos)) == RISCV::NumTargetFixupKinds,
"Not all fixup kinds added to Infos array");
@@ -116,7 +108,8 @@ RISCVAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
// necessary for correctness as offsets may change during relaxation.
bool RISCVAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
const MCFixup &Fixup,
- const MCValue &Target) {
+ const MCValue &Target,
+ const MCSubtargetInfo *STI) {
if (Fixup.getKind() >= FirstLiteralRelocationKind)
return true;
switch (Fixup.getTargetKind()) {
@@ -126,6 +119,7 @@ bool RISCVAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
case FK_Data_2:
case FK_Data_4:
case FK_Data_8:
+ case FK_Data_leb128:
if (Target.isAbsolute())
return false;
break;
@@ -135,7 +129,7 @@ bool RISCVAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
return true;
}
- return STI.hasFeature(RISCV::FeatureRelax) || ForceRelocs;
+ return STI->hasFeature(RISCV::FeatureRelax) || ForceRelocs;
}
bool RISCVAsmBackend::fixupNeedsRelaxationAdvanced(const MCFixup &Fixup,
@@ -144,15 +138,12 @@ bool RISCVAsmBackend::fixupNeedsRelaxationAdvanced(const MCFixup &Fixup,
const MCRelaxableFragment *DF,
const MCAsmLayout &Layout,
const bool WasForced) const {
+ if (!RelaxBranches)
+ return false;
+
int64_t Offset = int64_t(Value);
unsigned Kind = Fixup.getTargetKind();
- // We only do conditional branch relaxation when the symbol is resolved.
- // For conditional branch, the immediate must be in the range
- // [-4096, 4094].
- if (Kind == RISCV::fixup_riscv_branch)
- return Resolved && !isInt<13>(Offset);
-
// Return true if the symbol is actually unresolved.
// Resolved could be always false when shouldForceRelocation return true.
// We use !WasForced to indicate that the symbol is unresolved and not forced
@@ -171,6 +162,10 @@ bool RISCVAsmBackend::fixupNeedsRelaxationAdvanced(const MCFixup &Fixup,
// For compressed jump instructions the immediate must be
// in the range [-2048, 2046].
return Offset > 2046 || Offset < -2048;
+ case RISCV::fixup_riscv_branch:
+ // For conditional branch instructions the immediate must be
+ // in the range [-4096, 4095].
+ return !isInt<13>(Offset);
}
}
@@ -251,7 +246,7 @@ bool RISCVAsmBackend::relaxDwarfLineAddr(MCDwarfLineAddrFragment &DF,
OS << uint8_t(dwarf::DW_LNS_fixed_advance_pc);
Offset = OS.tell();
Fixup = RISCV::getRelocPairForSize(2);
- support::endian::write<uint16_t>(OS, 0, support::little);
+ support::endian::write<uint16_t>(OS, 0, llvm::endianness::little);
}
const MCBinaryExpr &MBE = cast<MCBinaryExpr>(AddrDelta);
@@ -301,27 +296,31 @@ bool RISCVAsmBackend::relaxDwarfCFA(MCDwarfCallFrameFragment &DF,
auto AddFixups = [&Fixups, &AddrDelta](unsigned Offset,
std::pair<unsigned, unsigned> Fixup) {
const MCBinaryExpr &MBE = cast<MCBinaryExpr>(AddrDelta);
- Fixups.push_back(MCFixup::create(
- Offset, MBE.getLHS(), static_cast<MCFixupKind>(std::get<0>(Fixup))));
- Fixups.push_back(MCFixup::create(
- Offset, MBE.getRHS(), static_cast<MCFixupKind>(std::get<1>(Fixup))));
+ Fixups.push_back(
+ MCFixup::create(Offset, MBE.getLHS(),
+ static_cast<MCFixupKind>(FirstLiteralRelocationKind +
+ std::get<0>(Fixup))));
+ Fixups.push_back(
+ MCFixup::create(Offset, MBE.getRHS(),
+ static_cast<MCFixupKind>(FirstLiteralRelocationKind +
+ std::get<1>(Fixup))));
};
if (isUIntN(6, Value)) {
OS << uint8_t(dwarf::DW_CFA_advance_loc);
- AddFixups(0, {RISCV::fixup_riscv_set_6b, RISCV::fixup_riscv_sub_6b});
+ AddFixups(0, {ELF::R_RISCV_SET6, ELF::R_RISCV_SUB6});
} else if (isUInt<8>(Value)) {
OS << uint8_t(dwarf::DW_CFA_advance_loc1);
- support::endian::write<uint8_t>(OS, 0, support::little);
- AddFixups(1, {RISCV::fixup_riscv_set_8, RISCV::fixup_riscv_sub_8});
+ support::endian::write<uint8_t>(OS, 0, llvm::endianness::little);
+ AddFixups(1, {ELF::R_RISCV_SET8, ELF::R_RISCV_SUB8});
} else if (isUInt<16>(Value)) {
OS << uint8_t(dwarf::DW_CFA_advance_loc2);
- support::endian::write<uint16_t>(OS, 0, support::little);
- AddFixups(1, {RISCV::fixup_riscv_set_16, RISCV::fixup_riscv_sub_16});
+ support::endian::write<uint16_t>(OS, 0, llvm::endianness::little);
+ AddFixups(1, {ELF::R_RISCV_SET16, ELF::R_RISCV_SUB16});
} else if (isUInt<32>(Value)) {
OS << uint8_t(dwarf::DW_CFA_advance_loc4);
- support::endian::write<uint32_t>(OS, 0, support::little);
- AddFixups(1, {RISCV::fixup_riscv_set_32, RISCV::fixup_riscv_sub_32});
+ support::endian::write<uint32_t>(OS, 0, llvm::endianness::little);
+ AddFixups(1, {ELF::R_RISCV_SET32, ELF::R_RISCV_SUB32});
} else {
llvm_unreachable("unsupported CFA encoding");
}
@@ -330,6 +329,18 @@ bool RISCVAsmBackend::relaxDwarfCFA(MCDwarfCallFrameFragment &DF,
return true;
}
+bool RISCVAsmBackend::relaxLEB128(MCLEBFragment &LF, MCAsmLayout &Layout,
+ int64_t &Value) const {
+ if (LF.isSigned())
+ return false;
+ const MCExpr &Expr = LF.getValue();
+ if (ULEB128Reloc) {
+ LF.getFixups().push_back(
+ MCFixup::create(0, &Expr, FK_Data_leb128, Expr.getLoc()));
+ }
+ return Expr.evaluateKnownAbsolute(Value, Layout);
+}
+
// Given a compressed control flow instruction this function returns
// the expanded instruction.
unsigned RISCVAsmBackend::getRelaxedOpcode(unsigned Op) const {
@@ -400,25 +411,12 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
case RISCV::fixup_riscv_tls_got_hi20:
case RISCV::fixup_riscv_tls_gd_hi20:
llvm_unreachable("Relocation should be unconditionally forced\n");
- case RISCV::fixup_riscv_set_8:
- case RISCV::fixup_riscv_add_8:
- case RISCV::fixup_riscv_sub_8:
- case RISCV::fixup_riscv_set_16:
- case RISCV::fixup_riscv_add_16:
- case RISCV::fixup_riscv_sub_16:
- case RISCV::fixup_riscv_set_32:
- case RISCV::fixup_riscv_add_32:
- case RISCV::fixup_riscv_sub_32:
- case RISCV::fixup_riscv_add_64:
- case RISCV::fixup_riscv_sub_64:
case FK_Data_1:
case FK_Data_2:
case FK_Data_4:
case FK_Data_8:
- case FK_Data_6b:
+ case FK_Data_leb128:
return Value;
- case RISCV::fixup_riscv_set_6b:
- return Value & 0x03;
case RISCV::fixup_riscv_lo12_i:
case RISCV::fixup_riscv_pcrel_lo12_i:
case RISCV::fixup_riscv_tprel_lo12_i:
@@ -483,6 +481,8 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
return UpperImm | ((LowerImm << 20) << 32);
}
case RISCV::fixup_riscv_rvc_jump: {
+ if (!isInt<12>(Value))
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
// Need to produce offset[11|4|9:8|10|6|7|3:1|5] from the 11-bit Value.
unsigned Bit11 = (Value >> 11) & 0x1;
unsigned Bit4 = (Value >> 4) & 0x1;
@@ -497,6 +497,8 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
return Value;
}
case RISCV::fixup_riscv_rvc_branch: {
+ if (!isInt<9>(Value))
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
// Need to produce offset[8|4:3], [reg 3 bit], offset[7:6|2:1|5]
unsigned Bit8 = (Value >> 8) & 0x1;
unsigned Bit7_6 = (Value >> 6) & 0x3;
@@ -513,8 +515,8 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
bool RISCVAsmBackend::evaluateTargetFixup(
const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFixup &Fixup,
- const MCFragment *DF, const MCValue &Target, uint64_t &Value,
- bool &WasForced) {
+ const MCFragment *DF, const MCValue &Target, const MCSubtargetInfo *STI,
+ uint64_t &Value, bool &WasForced) {
const MCFixup *AUIPCFixup;
const MCFragment *AUIPCDF;
MCValue AUIPCTarget;
@@ -564,7 +566,7 @@ bool RISCVAsmBackend::evaluateTargetFixup(
Value = Layout.getSymbolOffset(SA) + AUIPCTarget.getConstant();
Value -= Layout.getFragmentOffset(AUIPCDF) + AUIPCFixup->getOffset();
- if (shouldForceRelocation(Asm, *AUIPCFixup, AUIPCTarget)) {
+ if (shouldForceRelocation(Asm, *AUIPCFixup, AUIPCTarget, STI)) {
WasForced = true;
return false;
}
@@ -596,6 +598,10 @@ bool RISCVAsmBackend::handleAddSubRelocations(const MCAsmLayout &Layout,
TA = ELF::R_RISCV_ADD64;
TB = ELF::R_RISCV_SUB64;
break;
+ case llvm::FK_Data_leb128:
+ TA = ELF::R_RISCV_SET_ULEB128;
+ TB = ELF::R_RISCV_SUB_ULEB128;
+ break;
default:
llvm_unreachable("unsupported fixup size");
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
index 0ea1f32e8296..2ad6534ac8bc 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
@@ -31,8 +31,8 @@ class RISCVAsmBackend : public MCAsmBackend {
public:
RISCVAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit,
const MCTargetOptions &Options)
- : MCAsmBackend(support::little, RISCV::fixup_riscv_relax), STI(STI),
- OSABI(OSABI), Is64Bit(Is64Bit), TargetOptions(Options) {
+ : MCAsmBackend(llvm::endianness::little, RISCV::fixup_riscv_relax),
+ STI(STI), OSABI(OSABI), Is64Bit(Is64Bit), TargetOptions(Options) {
RISCVFeatures::validate(STI.getTargetTriple(), STI.getFeatureBits());
}
~RISCVAsmBackend() override = default;
@@ -50,8 +50,8 @@ public:
bool evaluateTargetFixup(const MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFixup &Fixup, const MCFragment *DF,
- const MCValue &Target, uint64_t &Value,
- bool &WasForced) override;
+ const MCValue &Target, const MCSubtargetInfo *STI,
+ uint64_t &Value, bool &WasForced) override;
bool handleAddSubRelocations(const MCAsmLayout &Layout, const MCFragment &F,
const MCFixup &Fixup, const MCValue &Target,
@@ -66,7 +66,8 @@ public:
createObjectTargetWriter() const override;
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target) override;
+ const MCValue &Target,
+ const MCSubtargetInfo *STI) override;
bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
const MCRelaxableFragment *DF,
@@ -99,6 +100,8 @@ public:
bool &WasRelaxed) const override;
bool relaxDwarfCFA(MCDwarfCallFrameFragment &DF, MCAsmLayout &Layout,
bool &WasRelaxed) const override;
+ bool relaxLEB128(MCLEBFragment &LF, MCAsmLayout &Layout,
+ int64_t &Value) const override;
bool writeNopData(raw_ostream &OS, uint64_t Count,
const MCSubtargetInfo *STI) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
index 0a42c6faee29..66a46a485f53 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
@@ -47,11 +47,11 @@ ABI computeTargetABI(const Triple &TT, const FeatureBitset &FeatureBits,
errs()
<< "'" << ABIName
<< "' is not a recognized ABI for this target (ignoring target-abi)\n";
- } else if (ABIName.startswith("ilp32") && IsRV64) {
+ } else if (ABIName.starts_with("ilp32") && IsRV64) {
errs() << "32-bit ABIs are not supported for 64-bit targets (ignoring "
"target-abi)\n";
TargetABI = ABI_Unknown;
- } else if (ABIName.startswith("lp64") && !IsRV64) {
+ } else if (ABIName.starts_with("lp64") && !IsRV64) {
errs() << "64-bit ABIs are not supported for 32-bit targets (ignoring "
"target-abi)\n";
TargetABI = ABI_Unknown;
@@ -206,6 +206,17 @@ unsigned RISCVVType::getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul) {
return (SEW * 8) / LMul;
}
+std::optional<RISCVII::VLMUL>
+RISCVVType::getSameRatioLMUL(unsigned SEW, RISCVII::VLMUL VLMUL, unsigned EEW) {
+ unsigned Ratio = RISCVVType::getSEWLMULRatio(SEW, VLMUL);
+ unsigned EMULFixedPoint = (EEW * 8) / Ratio;
+ bool Fractional = EMULFixedPoint < 8;
+ unsigned EMUL = Fractional ? 8 / EMULFixedPoint : EMULFixedPoint / 8;
+ if (!isValidLMUL(EMUL, Fractional))
+ return std::nullopt;
+ return RISCVVType::encodeLMUL(EMUL, Fractional);
+}
+
// Include the auto-generated portion of the compress emitter.
#define GEN_UNCOMPRESS_INSTR
#define GEN_COMPRESS_INSTR
@@ -242,7 +253,7 @@ int RISCVLoadFPImm::getLoadFPImm(APFloat FPImm) {
"Unexpected semantics");
// Handle the minimum normalized value which is different for each type.
- if (FPImm.isSmallestNormalized())
+ if (FPImm.isSmallestNormalized() && !FPImm.isNegative())
return 1;
// Convert to single precision to use its lookup table.
@@ -273,7 +284,7 @@ int RISCVLoadFPImm::getLoadFPImm(APFloat FPImm) {
if (Sign) {
if (Entry == 16)
return 0;
- return false;
+ return -1;
}
return Entry;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index f86419319dd3..00b4751905f6 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -203,6 +203,35 @@ static inline unsigned getVecPolicyOpNum(const MCInstrDesc &Desc) {
return Desc.getNumOperands() - 1;
}
+/// \returns the index to the rounding mode immediate value if any, otherwise
+/// returns -1.
+static inline int getFRMOpNum(const MCInstrDesc &Desc) {
+ const uint64_t TSFlags = Desc.TSFlags;
+ if (!hasRoundModeOp(TSFlags) || usesVXRM(TSFlags))
+ return -1;
+
+ // The operand order
+ // --------------------------------------
+ // | n-1 (if any) | n-2 | n-3 | n-4 |
+ // | policy | sew | vl | frm |
+ // --------------------------------------
+ return getVLOpNum(Desc) - 1;
+}
+
+/// \returns the index to the rounding mode immediate value if any, otherwise
+/// returns -1.
+static inline int getVXRMOpNum(const MCInstrDesc &Desc) {
+ const uint64_t TSFlags = Desc.TSFlags;
+ if (!hasRoundModeOp(TSFlags) || !usesVXRM(TSFlags))
+ return -1;
+ // The operand order
+ // --------------------------------------
+ // | n-1 (if any) | n-2 | n-3 | n-4 |
+ // | policy | sew | vl | vxrm |
+ // --------------------------------------
+ return getVLOpNum(Desc) - 1;
+}
+
// Is the first def operand tied to the first use operand. This is true for
// vector pseudo instructions that have a merge operand for tail/mask
// undisturbed. It's also true for vector FMA instructions where one of the
@@ -506,6 +535,8 @@ void printVType(unsigned VType, raw_ostream &OS);
unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul);
+std::optional<RISCVII::VLMUL>
+getSameRatioLMUL(unsigned SEW, RISCVII::VLMUL VLMUL, unsigned EEW);
} // namespace RISCVVType
namespace RISCVRVC {
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
index db7dc1aed7fc..0799267eaf7c 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
@@ -27,7 +27,7 @@ public:
// Return true if the given relocation must be with a symbol rather than
// section plus offset.
- bool needsRelocateWithSymbol(const MCSymbol &Sym,
+ bool needsRelocateWithSymbol(const MCValue &Val, const MCSymbol &Sym,
unsigned Type) const override {
// TODO: this is very conservative, update once RISC-V psABI requirements
// are clarified.
@@ -89,22 +89,6 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx,
return ELF::R_RISCV_CALL_PLT;
case RISCV::fixup_riscv_call_plt:
return ELF::R_RISCV_CALL_PLT;
- case RISCV::fixup_riscv_add_8:
- return ELF::R_RISCV_ADD8;
- case RISCV::fixup_riscv_sub_8:
- return ELF::R_RISCV_SUB8;
- case RISCV::fixup_riscv_add_16:
- return ELF::R_RISCV_ADD16;
- case RISCV::fixup_riscv_sub_16:
- return ELF::R_RISCV_SUB16;
- case RISCV::fixup_riscv_add_32:
- return ELF::R_RISCV_ADD32;
- case RISCV::fixup_riscv_sub_32:
- return ELF::R_RISCV_SUB32;
- case RISCV::fixup_riscv_add_64:
- return ELF::R_RISCV_ADD64;
- case RISCV::fixup_riscv_sub_64:
- return ELF::R_RISCV_SUB64;
}
}
@@ -143,32 +127,6 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx,
return ELF::R_RISCV_RELAX;
case RISCV::fixup_riscv_align:
return ELF::R_RISCV_ALIGN;
- case RISCV::fixup_riscv_set_6b:
- return ELF::R_RISCV_SET6;
- case RISCV::fixup_riscv_sub_6b:
- return ELF::R_RISCV_SUB6;
- case RISCV::fixup_riscv_add_8:
- return ELF::R_RISCV_ADD8;
- case RISCV::fixup_riscv_set_8:
- return ELF::R_RISCV_SET8;
- case RISCV::fixup_riscv_sub_8:
- return ELF::R_RISCV_SUB8;
- case RISCV::fixup_riscv_set_16:
- return ELF::R_RISCV_SET16;
- case RISCV::fixup_riscv_add_16:
- return ELF::R_RISCV_ADD16;
- case RISCV::fixup_riscv_sub_16:
- return ELF::R_RISCV_SUB16;
- case RISCV::fixup_riscv_set_32:
- return ELF::R_RISCV_SET32;
- case RISCV::fixup_riscv_add_32:
- return ELF::R_RISCV_ADD32;
- case RISCV::fixup_riscv_sub_32:
- return ELF::R_RISCV_SUB32;
- case RISCV::fixup_riscv_add_64:
- return ELF::R_RISCV_ADD64;
- case RISCV::fixup_riscv_sub_64:
- return ELF::R_RISCV_SUB64;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
index e43cb8b40d83..9db5148208b3 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
@@ -125,6 +125,65 @@ void RISCVTargetELFStreamer::emitDirectiveVariantCC(MCSymbol &Symbol) {
void RISCVELFStreamer::reset() {
static_cast<RISCVTargetStreamer *>(getTargetStreamer())->reset();
MCELFStreamer::reset();
+ MappingSymbolCounter = 0;
+ LastMappingSymbols.clear();
+ LastEMS = EMS_None;
+}
+
+void RISCVELFStreamer::emitDataMappingSymbol() {
+ if (LastEMS == EMS_Data)
+ return;
+ emitMappingSymbol("$d");
+ LastEMS = EMS_Data;
+}
+
+void RISCVELFStreamer::emitInstructionsMappingSymbol() {
+ if (LastEMS == EMS_Instructions)
+ return;
+ emitMappingSymbol("$x");
+ LastEMS = EMS_Instructions;
+}
+
+void RISCVELFStreamer::emitMappingSymbol(StringRef Name) {
+ auto *Symbol = cast<MCSymbolELF>(getContext().getOrCreateSymbol(
+ Name + "." + Twine(MappingSymbolCounter++)));
+ emitLabel(Symbol);
+ Symbol->setType(ELF::STT_NOTYPE);
+ Symbol->setBinding(ELF::STB_LOCAL);
+}
+
+void RISCVELFStreamer::changeSection(MCSection *Section,
+ const MCExpr *Subsection) {
+ // We have to keep track of the mapping symbol state of any sections we
+ // use. Each one should start off as EMS_None, which is provided as the
+ // default constructor by DenseMap::lookup.
+ LastMappingSymbols[getPreviousSection().first] = LastEMS;
+ LastEMS = LastMappingSymbols.lookup(Section);
+
+ MCELFStreamer::changeSection(Section, Subsection);
+}
+
+void RISCVELFStreamer::emitInstruction(const MCInst &Inst,
+ const MCSubtargetInfo &STI) {
+ emitInstructionsMappingSymbol();
+ MCELFStreamer::emitInstruction(Inst, STI);
+}
+
+void RISCVELFStreamer::emitBytes(StringRef Data) {
+ emitDataMappingSymbol();
+ MCELFStreamer::emitBytes(Data);
+}
+
+void RISCVELFStreamer::emitFill(const MCExpr &NumBytes, uint64_t FillValue,
+ SMLoc Loc) {
+ emitDataMappingSymbol();
+ MCELFStreamer::emitFill(NumBytes, FillValue, Loc);
+}
+
+void RISCVELFStreamer::emitValueImpl(const MCExpr *Value, unsigned Size,
+ SMLoc Loc) {
+ emitDataMappingSymbol();
+ MCELFStreamer::emitValueImpl(Value, Size, Loc);
}
namespace llvm {
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
index e68f70261146..a6f54bf67b5d 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
@@ -16,12 +16,27 @@ using namespace llvm;
class RISCVELFStreamer : public MCELFStreamer {
void reset() override;
+ void emitDataMappingSymbol();
+ void emitInstructionsMappingSymbol();
+ void emitMappingSymbol(StringRef Name);
+
+ enum ElfMappingSymbol { EMS_None, EMS_Instructions, EMS_Data };
+
+ int64_t MappingSymbolCounter = 0;
+ DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols;
+ ElfMappingSymbol LastEMS = EMS_None;
public:
RISCVELFStreamer(MCContext &C, std::unique_ptr<MCAsmBackend> MAB,
std::unique_ptr<MCObjectWriter> MOW,
std::unique_ptr<MCCodeEmitter> MCE)
: MCELFStreamer(C, std::move(MAB), std::move(MOW), std::move(MCE)) {}
+
+ void changeSection(MCSection *Section, const MCExpr *Subsection) override;
+ void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
+ void emitBytes(StringRef Data) override;
+ void emitFill(const MCExpr &NumBytes, uint64_t FillValue, SMLoc Loc) override;
+ void emitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override;
};
namespace llvm {
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h
index 5727aab3cd4c..74bd9398a9ef 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h
@@ -1,4 +1,4 @@
-//===-- RISCVFixupKinds.h - RISCV Specific Fixup Entries --------*- C++ -*-===//
+//===-- RISCVFixupKinds.h - RISC-V Specific Fixup Entries -------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -9,6 +9,7 @@
#ifndef LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVFIXUPKINDS_H
#define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVFIXUPKINDS_H
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCFixup.h"
#include <utility>
@@ -70,42 +71,6 @@ enum Fixups {
// Used to generate an R_RISCV_ALIGN relocation, which indicates the linker
// should fixup the alignment after linker relaxation.
fixup_riscv_align,
- // 8-bit fixup corresponding to R_RISCV_SET8 for local label assignment.
- fixup_riscv_set_8,
- // 8-bit fixup corresponding to R_RISCV_ADD8 for 8-bit symbolic difference
- // paired relocations.
- fixup_riscv_add_8,
- // 8-bit fixup corresponding to R_RISCV_SUB8 for 8-bit symbolic difference
- // paired relocations.
- fixup_riscv_sub_8,
- // 16-bit fixup corresponding to R_RISCV_SET16 for local label assignment.
- fixup_riscv_set_16,
- // 16-bit fixup corresponding to R_RISCV_ADD16 for 16-bit symbolic difference
- // paired reloctions.
- fixup_riscv_add_16,
- // 16-bit fixup corresponding to R_RISCV_SUB16 for 16-bit symbolic difference
- // paired reloctions.
- fixup_riscv_sub_16,
- // 32-bit fixup corresponding to R_RISCV_SET32 for local label assignment.
- fixup_riscv_set_32,
- // 32-bit fixup corresponding to R_RISCV_ADD32 for 32-bit symbolic difference
- // paired relocations.
- fixup_riscv_add_32,
- // 32-bit fixup corresponding to R_RISCV_SUB32 for 32-bit symbolic difference
- // paired relocations.
- fixup_riscv_sub_32,
- // 64-bit fixup corresponding to R_RISCV_ADD64 for 64-bit symbolic difference
- // paired relocations.
- fixup_riscv_add_64,
- // 64-bit fixup corresponding to R_RISCV_SUB64 for 64-bit symbolic difference
- // paired relocations.
- fixup_riscv_sub_64,
- // 6-bit fixup corresponding to R_RISCV_SET6 for local label assignment in
- // DWARF CFA.
- fixup_riscv_set_6b,
- // 6-bit fixup corresponding to R_RISCV_SUB6 for local label assignment in
- // DWARF CFA.
- fixup_riscv_sub_6b,
// Used as a sentinel, must be the last
fixup_riscv_invalid,
@@ -118,17 +83,21 @@ getRelocPairForSize(unsigned Size) {
default:
llvm_unreachable("unsupported fixup size");
case 1:
- return std::make_pair(MCFixupKind(RISCV::fixup_riscv_add_8),
- MCFixupKind(RISCV::fixup_riscv_sub_8));
+ return std::make_pair(
+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_RISCV_ADD8),
+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_RISCV_SUB8));
case 2:
- return std::make_pair(MCFixupKind(RISCV::fixup_riscv_add_16),
- MCFixupKind(RISCV::fixup_riscv_sub_16));
+ return std::make_pair(
+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_RISCV_ADD16),
+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_RISCV_SUB16));
case 4:
- return std::make_pair(MCFixupKind(RISCV::fixup_riscv_add_32),
- MCFixupKind(RISCV::fixup_riscv_sub_32));
+ return std::make_pair(
+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_RISCV_ADD32),
+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_RISCV_SUB32));
case 8:
- return std::make_pair(MCFixupKind(RISCV::fixup_riscv_add_64),
- MCFixupKind(RISCV::fixup_riscv_sub_64));
+ return std::make_pair(
+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_RISCV_ADD64),
+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_RISCV_SUB64));
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
index 8e98abd65aab..195dda0b8b14 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
@@ -16,6 +16,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
@@ -75,7 +76,7 @@ void RISCVInstPrinter::printInst(const MCInst *MI, uint64_t Address,
}
void RISCVInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) const {
- O << getRegisterName(Reg);
+ markup(O, Markup::Register) << getRegisterName(Reg);
}
void RISCVInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
@@ -90,7 +91,7 @@ void RISCVInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
}
if (MO.isImm()) {
- O << MO.getImm();
+ markup(O, Markup::Immediate) << formatImm(MO.getImm());
return;
}
@@ -110,9 +111,9 @@ void RISCVInstPrinter::printBranchOperand(const MCInst *MI, uint64_t Address,
uint64_t Target = Address + MO.getImm();
if (!STI.hasFeature(RISCV::Feature64Bit))
Target &= 0xffffffff;
- O << formatHex(Target);
+ markup(O, Markup::Target) << formatHex(Target);
} else {
- O << MO.getImm();
+ markup(O, Markup::Target) << formatImm(MO.getImm());
}
}
@@ -123,11 +124,11 @@ void RISCVInstPrinter::printCSRSystemRegister(const MCInst *MI, unsigned OpNo,
auto SiFiveReg = RISCVSysReg::lookupSiFiveRegByEncoding(Imm);
auto SysReg = RISCVSysReg::lookupSysRegByEncoding(Imm);
if (SiFiveReg && SiFiveReg->haveVendorRequiredFeatures(STI.getFeatureBits()))
- O << SiFiveReg->Name;
+ markup(O, Markup::Register) << SiFiveReg->Name;
else if (SysReg && SysReg->haveRequiredFeatures(STI.getFeatureBits()))
- O << SysReg->Name;
+ markup(O, Markup::Register) << SysReg->Name;
else
- O << Imm;
+ markup(O, Markup::Register) << formatImm(Imm);
}
void RISCVInstPrinter::printFenceArg(const MCInst *MI, unsigned OpNo,
@@ -157,16 +158,29 @@ void RISCVInstPrinter::printFRMArg(const MCInst *MI, unsigned OpNo,
O << ", " << RISCVFPRndMode::roundingModeToString(FRMArg);
}
+void RISCVInstPrinter::printFRMArgLegacy(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ auto FRMArg =
+ static_cast<RISCVFPRndMode::RoundingMode>(MI->getOperand(OpNo).getImm());
+ // Never print rounding mode if it's the default 'rne'. This ensures the
+ // output can still be parsed by older tools that erroneously failed to
+ // accept a rounding mode.
+ if (FRMArg == RISCVFPRndMode::RoundingMode::RNE)
+ return;
+ O << ", " << RISCVFPRndMode::roundingModeToString(FRMArg);
+}
+
void RISCVInstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNo).getImm();
if (Imm == 1) {
- O << "min";
+ markup(O, Markup::Immediate) << "min";
} else if (Imm == 30) {
- O << "inf";
+ markup(O, Markup::Immediate) << "inf";
} else if (Imm == 31) {
- O << "nan";
+ markup(O, Markup::Immediate) << "nan";
} else {
float FPVal = RISCVLoadFPImm::getFPImm(Imm);
// If the value is an integer, print a .0 fraction. Otherwise, use %g to
@@ -174,9 +188,9 @@ void RISCVInstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNo,
// if it is shorter than printing as a decimal. The smallest value requires
// 12 digits of precision including the decimal.
if (FPVal == (int)(FPVal))
- O << format("%.1f", FPVal);
+ markup(O, Markup::Immediate) << format("%.1f", FPVal);
else
- O << format("%.12g", FPVal);
+ markup(O, Markup::Immediate) << format("%.12g", FPVal);
}
}
@@ -198,7 +212,7 @@ void RISCVInstPrinter::printVTypeI(const MCInst *MI, unsigned OpNo,
// or non-zero in bits 8 and above.
if (RISCVVType::getVLMUL(Imm) == RISCVII::VLMUL::LMUL_RESERVED ||
RISCVVType::getSEW(Imm) > 64 || (Imm >> 8) != 0) {
- O << Imm;
+ O << formatImm(Imm);
return;
}
// Print the text form.
@@ -211,16 +225,30 @@ void RISCVInstPrinter::printRlist(const MCInst *MI, unsigned OpNo,
O << "{";
switch (Imm) {
case RISCVZC::RLISTENCODE::RA:
- O << (ArchRegNames ? "x1" : "ra");
+ markup(O, Markup::Register) << (ArchRegNames ? "x1" : "ra");
break;
case RISCVZC::RLISTENCODE::RA_S0:
- O << (ArchRegNames ? "x1, x8" : "ra, s0");
+ markup(O, Markup::Register) << (ArchRegNames ? "x1" : "ra");
+ O << ", ";
+ markup(O, Markup::Register) << (ArchRegNames ? "x8" : "s0");
break;
case RISCVZC::RLISTENCODE::RA_S0_S1:
- O << (ArchRegNames ? "x1, x8-x9" : "ra, s0-s1");
+ markup(O, Markup::Register) << (ArchRegNames ? "x1" : "ra");
+ O << ", ";
+ markup(O, Markup::Register) << (ArchRegNames ? "x8" : "s0");
+ O << '-';
+ markup(O, Markup::Register) << (ArchRegNames ? "x9" : "s1");
break;
case RISCVZC::RLISTENCODE::RA_S0_S2:
- O << (ArchRegNames ? "x1, x8-x9, x18" : "ra, s0-s2");
+ markup(O, Markup::Register) << (ArchRegNames ? "x1" : "ra");
+ O << ", ";
+ markup(O, Markup::Register) << (ArchRegNames ? "x8" : "s0");
+ O << '-';
+ markup(O, Markup::Register) << (ArchRegNames ? "x9" : "s2");
+ if (ArchRegNames) {
+ O << ", ";
+ markup(O, Markup::Register) << "x18";
+ }
break;
case RISCVZC::RLISTENCODE::RA_S0_S3:
case RISCVZC::RLISTENCODE::RA_S0_S4:
@@ -229,11 +257,21 @@ void RISCVInstPrinter::printRlist(const MCInst *MI, unsigned OpNo,
case RISCVZC::RLISTENCODE::RA_S0_S7:
case RISCVZC::RLISTENCODE::RA_S0_S8:
case RISCVZC::RLISTENCODE::RA_S0_S9:
- O << (ArchRegNames ? "x1, x8-x9, x18-" : "ra, s0-")
- << getRegisterName(RISCV::X19 + (Imm - RISCVZC::RLISTENCODE::RA_S0_S3));
- break;
case RISCVZC::RLISTENCODE::RA_S0_S11:
- O << (ArchRegNames ? "x1, x8-x9, x18-x27" : "ra, s0-s11");
+ markup(O, Markup::Register) << (ArchRegNames ? "x1" : "ra");
+ O << ", ";
+ markup(O, Markup::Register) << (ArchRegNames ? "x8" : "s0");
+ O << '-';
+ if (ArchRegNames) {
+ markup(O, Markup::Register) << "x9";
+ O << ", ";
+ markup(O, Markup::Register) << "x18";
+ O << '-';
+ }
+ markup(O, Markup::Register) << getRegisterName(
+ RISCV::X19 + (Imm == RISCVZC::RLISTENCODE::RA_S0_S11
+ ? 8
+ : Imm - RISCVZC::RLISTENCODE::RA_S0_S3));
break;
default:
llvm_unreachable("invalid register list");
@@ -241,6 +279,22 @@ void RISCVInstPrinter::printRlist(const MCInst *MI, unsigned OpNo,
O << "}";
}
+void RISCVInstPrinter::printRegReg(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNo);
+
+ assert(MO.isReg() && "printRegReg can only print register operands");
+ if (MO.getReg() == RISCV::NoRegister)
+ return;
+ printRegName(O, MO.getReg());
+
+ O << "(";
+ const MCOperand &MO1 = MI->getOperand(OpNo + 1);
+ assert(MO1.isReg() && "printRegReg can only print register operands");
+ printRegName(O, MO1.getReg());
+ O << ")";
+}
+
void RISCVInstPrinter::printSpimm(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
int64_t Imm = MI->getOperand(OpNo).getImm();
@@ -256,6 +310,8 @@ void RISCVInstPrinter::printSpimm(const MCInst *MI, unsigned OpNo,
if (Opcode == RISCV::CM_PUSH)
Spimm = -Spimm;
+ // RAII guard for ANSI color escape sequences
+ WithMarkup ScopedMarkup = markup(O, Markup::Immediate);
RISCVZC::printSpimm(Spimm, O);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h
index 20f12af13008..4512bd5f4c4b 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h
@@ -40,6 +40,8 @@ public:
const MCSubtargetInfo &STI, raw_ostream &O);
void printFRMArg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printFRMArgLegacy(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printFPImmOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printZeroOffsetMemOp(const MCInst *MI, unsigned OpNo,
@@ -52,7 +54,8 @@ public:
raw_ostream &O);
void printSpimm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
-
+ void printRegReg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
// Autogenerated by tblgen.
std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
void printInstruction(const MCInst *MI, uint64_t Address,
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
index b63a5cea823e..82fed50bce75 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
@@ -92,6 +92,10 @@ public:
unsigned getRlistOpValue(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+
+ unsigned getRegReg(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
};
} // end anonymous namespace
@@ -137,7 +141,7 @@ void RISCVMCCodeEmitter::expandFunctionCall(const MCInst &MI,
// Emit AUIPC Ra, Func with R_RISCV_CALL relocation type.
TmpInst = MCInstBuilder(RISCV::AUIPC).addReg(Ra).addExpr(CallExpr);
Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
- support::endian::write(CB, Binary, support::little);
+ support::endian::write(CB, Binary, llvm::endianness::little);
if (MI.getOpcode() == RISCV::PseudoTAIL ||
MI.getOpcode() == RISCV::PseudoJump)
@@ -147,7 +151,7 @@ void RISCVMCCodeEmitter::expandFunctionCall(const MCInst &MI,
// Emit JALR Ra, Ra, 0
TmpInst = MCInstBuilder(RISCV::JALR).addReg(Ra).addReg(Ra).addImm(0);
Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
- support::endian::write(CB, Binary, support::little);
+ support::endian::write(CB, Binary, llvm::endianness::little);
}
// Expand PseudoAddTPRel to a simple ADD with the correct relocation.
@@ -186,7 +190,7 @@ void RISCVMCCodeEmitter::expandAddTPRel(const MCInst &MI,
.addOperand(SrcReg)
.addOperand(TPReg);
uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
- support::endian::write(CB, Binary, support::little);
+ support::endian::write(CB, Binary, llvm::endianness::little);
}
static unsigned getInvertedBranchOp(unsigned BrOp) {
@@ -240,14 +244,14 @@ void RISCVMCCodeEmitter::expandLongCondBr(const MCInst &MI,
Opcode == RISCV::PseudoLongBNE ? RISCV::C_BEQZ : RISCV::C_BNEZ;
MCInst TmpInst = MCInstBuilder(InvOpc).addReg(SrcReg1).addImm(6);
uint16_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
- support::endian::write<uint16_t>(CB, Binary, support::little);
+ support::endian::write<uint16_t>(CB, Binary, llvm::endianness::little);
Offset = 2;
} else {
unsigned InvOpc = getInvertedBranchOp(Opcode);
MCInst TmpInst =
MCInstBuilder(InvOpc).addReg(SrcReg1).addReg(SrcReg2).addImm(8);
uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
- support::endian::write(CB, Binary, support::little);
+ support::endian::write(CB, Binary, llvm::endianness::little);
Offset = 4;
}
@@ -255,7 +259,7 @@ void RISCVMCCodeEmitter::expandLongCondBr(const MCInst &MI,
MCInst TmpInst =
MCInstBuilder(RISCV::JAL).addReg(RISCV::X0).addOperand(SrcSymbol);
uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
- support::endian::write(CB, Binary, support::little);
+ support::endian::write(CB, Binary, llvm::endianness::little);
Fixups.clear();
if (SrcSymbol.isExpr()) {
@@ -306,12 +310,12 @@ void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI,
llvm_unreachable("Unhandled encodeInstruction length!");
case 2: {
uint16_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
- support::endian::write<uint16_t>(CB, Bits, support::little);
+ support::endian::write<uint16_t>(CB, Bits, llvm::endianness::little);
break;
}
case 4: {
uint32_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
- support::endian::write(CB, Bits, support::little);
+ support::endian::write(CB, Bits, llvm::endianness::little);
break;
}
}
@@ -442,8 +446,11 @@ unsigned RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo,
RelaxCandidate = true;
break;
}
- } else if (Kind == MCExpr::SymbolRef &&
- cast<MCSymbolRefExpr>(Expr)->getKind() == MCSymbolRefExpr::VK_None) {
+ } else if ((Kind == MCExpr::SymbolRef &&
+ cast<MCSymbolRefExpr>(Expr)->getKind() ==
+ MCSymbolRefExpr::VK_None) ||
+ Kind == MCExpr::Binary) {
+ // FIXME: Sub kind binary exprs have chance of underflow.
if (MIFrm == RISCVII::InstFormatJ) {
FixupKind = RISCV::fixup_riscv_jal;
} else if (MIFrm == RISCVII::InstFormatB) {
@@ -503,4 +510,17 @@ unsigned RISCVMCCodeEmitter::getRlistOpValue(const MCInst &MI, unsigned OpNo,
return Imm;
}
+unsigned RISCVMCCodeEmitter::getRegReg(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ const MCOperand &MO1 = MI.getOperand(OpNo + 1);
+ assert(MO.isReg() && MO1.isReg() && "Expected registers.");
+
+ unsigned Op = Ctx.getRegisterInfo()->getEncodingValue(MO.getReg());
+ unsigned Op1 = Ctx.getRegisterInfo()->getEncodingValue(MO1.getReg());
+
+ return Op | Op1 << 5;
+}
+
#include "RISCVGenMCCodeEmitter.inc"
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
index 75af5c2de094..79e56a7a6d03 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -31,6 +31,7 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
+#include <bitset>
#define GET_INSTRINFO_MC_DESC
#define ENABLE_INSTR_PREDICATE_VERIFIER
@@ -114,10 +115,79 @@ static MCTargetStreamer *createRISCVNullTargetStreamer(MCStreamer &S) {
namespace {
class RISCVMCInstrAnalysis : public MCInstrAnalysis {
+ int64_t GPRState[31] = {};
+ std::bitset<31> GPRValidMask;
+
+ static bool isGPR(unsigned Reg) {
+ return Reg >= RISCV::X0 && Reg <= RISCV::X31;
+ }
+
+ static unsigned getRegIndex(unsigned Reg) {
+ assert(isGPR(Reg) && Reg != RISCV::X0 && "Invalid GPR reg");
+ return Reg - RISCV::X1;
+ }
+
+ void setGPRState(unsigned Reg, std::optional<int64_t> Value) {
+ if (Reg == RISCV::X0)
+ return;
+
+ auto Index = getRegIndex(Reg);
+
+ if (Value) {
+ GPRState[Index] = *Value;
+ GPRValidMask.set(Index);
+ } else {
+ GPRValidMask.reset(Index);
+ }
+ }
+
+ std::optional<int64_t> getGPRState(unsigned Reg) const {
+ if (Reg == RISCV::X0)
+ return 0;
+
+ auto Index = getRegIndex(Reg);
+
+ if (GPRValidMask.test(Index))
+ return GPRState[Index];
+ return std::nullopt;
+ }
+
public:
explicit RISCVMCInstrAnalysis(const MCInstrInfo *Info)
: MCInstrAnalysis(Info) {}
+ void resetState() override { GPRValidMask.reset(); }
+
+ void updateState(const MCInst &Inst, uint64_t Addr) override {
+ // Terminators mark the end of a basic block which means the sequentially
+ // next instruction will be the first of another basic block and the current
+ // state will typically not be valid anymore. For calls, we assume all
+ // registers may be clobbered by the callee (TODO: should we take the
+ // calling convention into account?).
+ if (isTerminator(Inst) || isCall(Inst)) {
+ resetState();
+ return;
+ }
+
+ switch (Inst.getOpcode()) {
+ default: {
+ // Clear the state of all defined registers for instructions that we don't
+ // explicitly support.
+ auto NumDefs = Info->get(Inst.getOpcode()).getNumDefs();
+ for (unsigned I = 0; I < NumDefs; ++I) {
+ auto DefReg = Inst.getOperand(I).getReg();
+ if (isGPR(DefReg))
+ setGPRState(DefReg, std::nullopt);
+ }
+ break;
+ }
+ case RISCV::AUIPC:
+ setGPRState(Inst.getOperand(0).getReg(),
+ Addr + (Inst.getOperand(1).getImm() << 12));
+ break;
+ }
+ }
+
bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
uint64_t &Target) const override {
if (isConditionalBranch(Inst)) {
@@ -140,6 +210,15 @@ public:
return true;
}
+ if (Inst.getOpcode() == RISCV::JALR) {
+ if (auto TargetRegState = getGPRState(Inst.getOperand(1).getReg())) {
+ Target = *TargetRegState + Inst.getOperand(2).getImm();
+ return true;
+ }
+
+ return false;
+ }
+
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
index f659779e9772..4358a5b878e6 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
@@ -45,13 +45,12 @@ static int getInstSeqCost(RISCVMatInt::InstSeq &Res, bool HasRVC) {
}
// Recursively generate a sequence for materializing an integer.
-static void generateInstSeqImpl(int64_t Val,
- const FeatureBitset &ActiveFeatures,
+static void generateInstSeqImpl(int64_t Val, const MCSubtargetInfo &STI,
RISCVMatInt::InstSeq &Res) {
- bool IsRV64 = ActiveFeatures[RISCV::Feature64Bit];
+ bool IsRV64 = STI.hasFeature(RISCV::Feature64Bit);
// Use BSETI for a single bit that can't be expressed by a single LUI or ADDI.
- if (ActiveFeatures[RISCV::FeatureStdExtZbs] && isPowerOf2_64(Val) &&
+ if (STI.hasFeature(RISCV::FeatureStdExtZbs) && isPowerOf2_64(Val) &&
(!isInt<32>(Val) || Val == 0x800)) {
Res.emplace_back(RISCV::BSETI, Log2_64(Val));
return;
@@ -122,7 +121,7 @@ static void generateInstSeqImpl(int64_t Val,
ShiftAmount -= 12;
Val = (uint64_t)Val << 12;
} else if (isUInt<32>((uint64_t)Val << 12) &&
- ActiveFeatures[RISCV::FeatureStdExtZba]) {
+ STI.hasFeature(RISCV::FeatureStdExtZba)) {
// Reduce the shift amount and add zeros to the LSBs so it will match
// LUI, then shift left with SLLI.UW to clear the upper 32 set bits.
ShiftAmount -= 12;
@@ -133,7 +132,7 @@ static void generateInstSeqImpl(int64_t Val,
// Try to use SLLI_UW for Val when it is uint32 but not int32.
if (isUInt<32>((uint64_t)Val) && !isInt<32>((uint64_t)Val) &&
- ActiveFeatures[RISCV::FeatureStdExtZba]) {
+ STI.hasFeature(RISCV::FeatureStdExtZba)) {
// Use LUI+ADDI or LUI to compose, then clear the upper 32 bits with
// SLLI_UW.
Val = ((uint64_t)Val) | (0xffffffffull << 32);
@@ -141,7 +140,7 @@ static void generateInstSeqImpl(int64_t Val,
}
}
- generateInstSeqImpl(Val, ActiveFeatures, Res);
+ generateInstSeqImpl(Val, STI, Res);
// Skip shift if we were able to use LUI directly.
if (ShiftAmount) {
@@ -171,10 +170,60 @@ static unsigned extractRotateInfo(int64_t Val) {
return 0;
}
+static void generateInstSeqLeadingZeros(int64_t Val, const MCSubtargetInfo &STI,
+ RISCVMatInt::InstSeq &Res) {
+ assert(Val > 0 && "Expected postive val");
+
+ unsigned LeadingZeros = llvm::countl_zero((uint64_t)Val);
+ uint64_t ShiftedVal = (uint64_t)Val << LeadingZeros;
+ // Fill in the bits that will be shifted out with 1s. An example where this
+ // helps is trailing one masks with 32 or more ones. This will generate
+ // ADDI -1 and an SRLI.
+ ShiftedVal |= maskTrailingOnes<uint64_t>(LeadingZeros);
+
+ RISCVMatInt::InstSeq TmpSeq;
+ generateInstSeqImpl(ShiftedVal, STI, TmpSeq);
+
+ // Keep the new sequence if it is an improvement or the original is empty.
+ if ((TmpSeq.size() + 1) < Res.size() ||
+ (Res.empty() && TmpSeq.size() < 8)) {
+ TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros);
+ Res = TmpSeq;
+ }
+
+ // Some cases can benefit from filling the lower bits with zeros instead.
+ ShiftedVal &= maskTrailingZeros<uint64_t>(LeadingZeros);
+ TmpSeq.clear();
+ generateInstSeqImpl(ShiftedVal, STI, TmpSeq);
+
+ // Keep the new sequence if it is an improvement or the original is empty.
+ if ((TmpSeq.size() + 1) < Res.size() ||
+ (Res.empty() && TmpSeq.size() < 8)) {
+ TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros);
+ Res = TmpSeq;
+ }
+
+ // If we have exactly 32 leading zeros and Zba, we can try using zext.w at
+ // the end of the sequence.
+ if (LeadingZeros == 32 && STI.hasFeature(RISCV::FeatureStdExtZba)) {
+ // Try replacing upper bits with 1.
+ uint64_t LeadingOnesVal = Val | maskLeadingOnes<uint64_t>(LeadingZeros);
+ TmpSeq.clear();
+ generateInstSeqImpl(LeadingOnesVal, STI, TmpSeq);
+
+ // Keep the new sequence if it is an improvement.
+ if ((TmpSeq.size() + 1) < Res.size() ||
+ (Res.empty() && TmpSeq.size() < 8)) {
+ TmpSeq.emplace_back(RISCV::ADD_UW, 0);
+ Res = TmpSeq;
+ }
+ }
+}
+
namespace llvm::RISCVMatInt {
-InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
+InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI) {
RISCVMatInt::InstSeq Res;
- generateInstSeqImpl(Val, ActiveFeatures, Res);
+ generateInstSeqImpl(Val, STI, Res);
// If the low 12 bits are non-zero, the first expansion may end with an ADDI
// or ADDIW. If there are trailing zeros, try generating a sign extended
@@ -187,9 +236,9 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
// NOTE: We don't check for C extension to minimize differences in generated
// code.
bool IsShiftedCompressible =
- isInt<6>(ShiftedVal) && !ActiveFeatures[RISCV::TuneLUIADDIFusion];
+ isInt<6>(ShiftedVal) && !STI.hasFeature(RISCV::TuneLUIADDIFusion);
RISCVMatInt::InstSeq TmpSeq;
- generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
+ generateInstSeqImpl(ShiftedVal, STI, TmpSeq);
// Keep the new sequence if it is an improvement.
if ((TmpSeq.size() + 1) < Res.size() || IsShiftedCompressible) {
@@ -203,65 +252,56 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
if (Res.size() <= 2)
return Res;
- assert(ActiveFeatures[RISCV::Feature64Bit] &&
+ assert(STI.hasFeature(RISCV::Feature64Bit) &&
"Expected RV32 to only need 2 instructions");
- // If the constant is positive we might be able to generate a shifted constant
- // with no leading zeros and use a final SRLI to restore them.
- if (Val > 0) {
- assert(Res.size() > 2 && "Expected longer sequence");
- unsigned LeadingZeros = llvm::countl_zero((uint64_t)Val);
- uint64_t ShiftedVal = (uint64_t)Val << LeadingZeros;
- // Fill in the bits that will be shifted out with 1s. An example where this
- // helps is trailing one masks with 32 or more ones. This will generate
- // ADDI -1 and an SRLI.
- ShiftedVal |= maskTrailingOnes<uint64_t>(LeadingZeros);
-
+ // If the lower 13 bits are something like 0x17ff, try to add 1 to change the
+ // lower 13 bits to 0x1800. We can restore this with an ADDI of -1 at the end
+ // of the sequence. Call generateInstSeqImpl on the new constant which may
+ // subtract 0xfffffffffffff800 to create another ADDI. This will leave a
+ // constant with more than 12 trailing zeros for the next recursive step.
+ if ((Val & 0xfff) != 0 && (Val & 0x1800) == 0x1000) {
+ int64_t Imm12 = -(0x800 - (Val & 0xfff));
+ int64_t AdjustedVal = Val - Imm12;
RISCVMatInt::InstSeq TmpSeq;
- generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
+ generateInstSeqImpl(AdjustedVal, STI, TmpSeq);
// Keep the new sequence if it is an improvement.
if ((TmpSeq.size() + 1) < Res.size()) {
- TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros);
+ TmpSeq.emplace_back(RISCV::ADDI, Imm12);
Res = TmpSeq;
}
+ }
- // Some cases can benefit from filling the lower bits with zeros instead.
- ShiftedVal &= maskTrailingZeros<uint64_t>(LeadingZeros);
- TmpSeq.clear();
- generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
-
- // Keep the new sequence if it is an improvement.
- if ((TmpSeq.size() + 1) < Res.size()) {
- TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros);
- Res = TmpSeq;
- }
+ // If the constant is positive we might be able to generate a shifted constant
+ // with no leading zeros and use a final SRLI to restore them.
+ if (Val > 0 && Res.size() > 2) {
+ generateInstSeqLeadingZeros(Val, STI, Res);
+ }
- // If we have exactly 32 leading zeros and Zba, we can try using zext.w at
- // the end of the sequence.
- if (LeadingZeros == 32 && ActiveFeatures[RISCV::FeatureStdExtZba]) {
- // Try replacing upper bits with 1.
- uint64_t LeadingOnesVal = Val | maskLeadingOnes<uint64_t>(LeadingZeros);
- TmpSeq.clear();
- generateInstSeqImpl(LeadingOnesVal, ActiveFeatures, TmpSeq);
+ // If the constant is negative, trying inverting and using our trailing zero
+ // optimizations. Use an xori to invert the final value.
+ if (Val < 0 && Res.size() > 3) {
+ uint64_t InvertedVal = ~(uint64_t)Val;
+ RISCVMatInt::InstSeq TmpSeq;
+ generateInstSeqLeadingZeros(InvertedVal, STI, TmpSeq);
- // Keep the new sequence if it is an improvement.
- if ((TmpSeq.size() + 1) < Res.size()) {
- TmpSeq.emplace_back(RISCV::ADD_UW, 0);
- Res = TmpSeq;
- }
+ // Keep it if we found a sequence that is smaller after inverting.
+ if (!TmpSeq.empty() && (TmpSeq.size() + 1) < Res.size()) {
+ TmpSeq.emplace_back(RISCV::XORI, -1);
+ Res = TmpSeq;
}
}
// If the Low and High halves are the same, use pack. The pack instruction
// packs the XLEN/2-bit lower halves of rs1 and rs2 into rd, with rs1 in the
// lower half and rs2 in the upper half.
- if (Res.size() > 2 && ActiveFeatures[RISCV::FeatureStdExtZbkb]) {
+ if (Res.size() > 2 && STI.hasFeature(RISCV::FeatureStdExtZbkb)) {
int64_t LoVal = SignExtend64<32>(Val);
int64_t HiVal = SignExtend64<32>(Val >> 32);
if (LoVal == HiVal) {
RISCVMatInt::InstSeq TmpSeq;
- generateInstSeqImpl(LoVal, ActiveFeatures, TmpSeq);
+ generateInstSeqImpl(LoVal, STI, TmpSeq);
if ((TmpSeq.size() + 1) < Res.size()) {
TmpSeq.emplace_back(RISCV::PACK, 0);
Res = TmpSeq;
@@ -270,7 +310,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
}
// Perform optimization with BCLRI/BSETI in the Zbs extension.
- if (Res.size() > 2 && ActiveFeatures[RISCV::FeatureStdExtZbs]) {
+ if (Res.size() > 2 && STI.hasFeature(RISCV::FeatureStdExtZbs)) {
// 1. For values in range 0xffffffff 7fffffff ~ 0xffffffff 00000000,
// call generateInstSeqImpl with Val|0x80000000 (which is expected be
// an int32), then emit (BCLRI r, 31).
@@ -288,7 +328,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
}
if (isInt<32>(NewVal)) {
RISCVMatInt::InstSeq TmpSeq;
- generateInstSeqImpl(NewVal, ActiveFeatures, TmpSeq);
+ generateInstSeqImpl(NewVal, STI, TmpSeq);
if ((TmpSeq.size() + 1) < Res.size()) {
TmpSeq.emplace_back(Opc, 31);
Res = TmpSeq;
@@ -302,7 +342,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
uint32_t Hi = Hi_32(Val);
Opc = 0;
RISCVMatInt::InstSeq TmpSeq;
- generateInstSeqImpl(Lo, ActiveFeatures, TmpSeq);
+ generateInstSeqImpl(Lo, STI, TmpSeq);
// Check if it is profitable to use BCLRI/BSETI.
if (Lo > 0 && TmpSeq.size() + llvm::popcount(Hi) < Res.size()) {
Opc = RISCV::BSETI;
@@ -323,7 +363,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
}
// Perform optimization with SH*ADD in the Zba extension.
- if (Res.size() > 2 && ActiveFeatures[RISCV::FeatureStdExtZba]) {
+ if (Res.size() > 2 && STI.hasFeature(RISCV::FeatureStdExtZba)) {
int64_t Div = 0;
unsigned Opc = 0;
RISCVMatInt::InstSeq TmpSeq;
@@ -340,7 +380,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
}
// Build the new instruction sequence.
if (Div > 0) {
- generateInstSeqImpl(Val / Div, ActiveFeatures, TmpSeq);
+ generateInstSeqImpl(Val / Div, STI, TmpSeq);
if ((TmpSeq.size() + 1) < Res.size()) {
TmpSeq.emplace_back(Opc, 0);
Res = TmpSeq;
@@ -367,7 +407,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
assert(Lo12 != 0 &&
"unexpected instruction sequence for immediate materialisation");
assert(TmpSeq.empty() && "Expected empty TmpSeq");
- generateInstSeqImpl(Hi52 / Div, ActiveFeatures, TmpSeq);
+ generateInstSeqImpl(Hi52 / Div, STI, TmpSeq);
if ((TmpSeq.size() + 2) < Res.size()) {
TmpSeq.emplace_back(Opc, 0);
TmpSeq.emplace_back(RISCV::ADDI, Lo12);
@@ -379,14 +419,14 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
// Perform optimization with rori in the Zbb and th.srri in the XTheadBb
// extension.
- if (Res.size() > 2 && (ActiveFeatures[RISCV::FeatureStdExtZbb] ||
- ActiveFeatures[RISCV::FeatureVendorXTHeadBb])) {
+ if (Res.size() > 2 && (STI.hasFeature(RISCV::FeatureStdExtZbb) ||
+ STI.hasFeature(RISCV::FeatureVendorXTHeadBb))) {
if (unsigned Rotate = extractRotateInfo(Val)) {
RISCVMatInt::InstSeq TmpSeq;
uint64_t NegImm12 = llvm::rotl<uint64_t>(Val, Rotate);
assert(isInt<12>(NegImm12));
TmpSeq.emplace_back(RISCV::ADDI, NegImm12);
- TmpSeq.emplace_back(ActiveFeatures[RISCV::FeatureStdExtZbb]
+ TmpSeq.emplace_back(STI.hasFeature(RISCV::FeatureStdExtZbb)
? RISCV::RORI
: RISCV::TH_SRRI,
Rotate);
@@ -396,11 +436,44 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
return Res;
}
-int getIntMatCost(const APInt &Val, unsigned Size,
- const FeatureBitset &ActiveFeatures, bool CompressionCost) {
- bool IsRV64 = ActiveFeatures[RISCV::Feature64Bit];
- bool HasRVC = CompressionCost && (ActiveFeatures[RISCV::FeatureStdExtC] ||
- ActiveFeatures[RISCV::FeatureStdExtZca]);
+InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI,
+ unsigned &ShiftAmt, unsigned &AddOpc) {
+ int64_t LoVal = SignExtend64<32>(Val);
+ if (LoVal == 0)
+ return RISCVMatInt::InstSeq();
+
+ // Subtract the LoVal to emulate the effect of the final ADD.
+ uint64_t Tmp = (uint64_t)Val - (uint64_t)LoVal;
+ assert(Tmp != 0);
+
+ // Use trailing zero counts to figure how far we need to shift LoVal to line
+ // up with the remaining constant.
+ // TODO: This algorithm assumes all non-zero bits in the low 32 bits of the
+ // final constant come from LoVal.
+ unsigned TzLo = llvm::countr_zero((uint64_t)LoVal);
+ unsigned TzHi = llvm::countr_zero(Tmp);
+ assert(TzLo < 32 && TzHi >= 32);
+ ShiftAmt = TzHi - TzLo;
+ AddOpc = RISCV::ADD;
+
+ if (Tmp == ((uint64_t)LoVal << ShiftAmt))
+ return RISCVMatInt::generateInstSeq(LoVal, STI);
+
+ // If we have Zba, we can use (ADD_UW X, (SLLI X, 32)).
+ if (STI.hasFeature(RISCV::FeatureStdExtZba) && Lo_32(Val) == Hi_32(Val)) {
+ ShiftAmt = 32;
+ AddOpc = RISCV::ADD_UW;
+ return RISCVMatInt::generateInstSeq(LoVal, STI);
+ }
+
+ return RISCVMatInt::InstSeq();
+}
+
+int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI,
+ bool CompressionCost) {
+ bool IsRV64 = STI.hasFeature(RISCV::Feature64Bit);
+ bool HasRVC = CompressionCost && (STI.hasFeature(RISCV::FeatureStdExtC) ||
+ STI.hasFeature(RISCV::FeatureStdExtZca));
int PlatRegSize = IsRV64 ? 64 : 32;
// Split the constant into platform register sized chunks, and calculate cost
@@ -408,7 +481,7 @@ int getIntMatCost(const APInt &Val, unsigned Size,
int Cost = 0;
for (unsigned ShiftVal = 0; ShiftVal < Size; ShiftVal += PlatRegSize) {
APInt Chunk = Val.ashr(ShiftVal).sextOrTrunc(PlatRegSize);
- InstSeq MatSeq = generateInstSeq(Chunk.getSExtValue(), ActiveFeatures);
+ InstSeq MatSeq = generateInstSeq(Chunk.getSExtValue(), STI);
Cost += getInstSeqCost(MatSeq, HasRVC);
}
return std::max(1, Cost);
@@ -429,6 +502,7 @@ OpndKind Inst::getOpndKind() const {
return RISCVMatInt::RegReg;
case RISCV::ADDI:
case RISCV::ADDIW:
+ case RISCV::XORI:
case RISCV::SLLI:
case RISCV::SRLI:
case RISCV::SLLI_UW:
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h
index ae7b8d402184..780f685463f3 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h
@@ -10,7 +10,7 @@
#define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_MATINT_H
#include "llvm/ADT/SmallVector.h"
-#include "llvm/TargetParser/SubtargetFeature.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include <cstdint>
namespace llvm {
@@ -46,7 +46,15 @@ using InstSeq = SmallVector<Inst, 8>;
// simple struct is produced rather than directly emitting the instructions in
// order to allow this helper to be used from both the MC layer and during
// instruction selection.
-InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures);
+InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI);
+
+// Helper to generate an instruction sequence that can materialize the given
+// immediate value into a register using an additional temporary register. This
+// handles cases where the constant can be generated by (ADD (SLLI X, C), X) or
+// (ADD_UW (SLLI X, C) X). The sequence to generate X is returned. ShiftAmt is
+// provides the SLLI and AddOpc indicates ADD or ADD_UW.
+InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI,
+ unsigned &ShiftAmt, unsigned &AddOpc);
// Helper to estimate the number of instructions required to materialise the
// given immediate value into a register. This estimate does not account for
@@ -58,8 +66,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures);
// If CompressionCost is true it will use a different cost calculation if RVC is
// enabled. This should be used to compare two different sequences to determine
// which is more compressible.
-int getIntMatCost(const APInt &Val, unsigned Size,
- const FeatureBitset &ActiveFeatures,
+int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI,
bool CompressionCost = false);
} // namespace RISCVMatInt
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.h
index 107ca51520b7..9eb18099894b 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.h
@@ -18,13 +18,8 @@
#include "llvm/Target/TargetMachine.h"
namespace llvm {
-class AsmPrinter;
class FunctionPass;
class InstructionSelector;
-class MCInst;
-class MCOperand;
-class MachineInstr;
-class MachineOperand;
class PassRegistry;
class RISCVRegisterBankInfo;
class RISCVSubtarget;
@@ -33,8 +28,11 @@ class RISCVTargetMachine;
FunctionPass *createRISCVCodeGenPreparePass();
void initializeRISCVCodeGenPreparePass(PassRegistry &);
+FunctionPass *createRISCVDeadRegisterDefinitionsPass();
+void initializeRISCVDeadRegisterDefinitionsPass(PassRegistry &);
+
FunctionPass *createRISCVISelDag(RISCVTargetMachine &TM,
- CodeGenOpt::Level OptLevel);
+ CodeGenOptLevel OptLevel);
FunctionPass *createRISCVMakeCompressibleOptPass();
void initializeRISCVMakeCompressibleOptPass(PassRegistry &);
@@ -42,6 +40,9 @@ void initializeRISCVMakeCompressibleOptPass(PassRegistry &);
FunctionPass *createRISCVGatherScatterLoweringPass();
void initializeRISCVGatherScatterLoweringPass(PassRegistry &);
+FunctionPass *createRISCVFoldMasksPass();
+void initializeRISCVFoldMasksPass(PassRegistry &);
+
FunctionPass *createRISCVOptWInstrsPass();
void initializeRISCVOptWInstrsPass(PassRegistry &);
@@ -60,9 +61,14 @@ void initializeRISCVExpandAtomicPseudoPass(PassRegistry &);
FunctionPass *createRISCVInsertVSETVLIPass();
void initializeRISCVInsertVSETVLIPass(PassRegistry &);
+FunctionPass *createRISCVPostRAExpandPseudoPass();
+void initializeRISCVPostRAExpandPseudoPass(PassRegistry &);
FunctionPass *createRISCVInsertReadWriteCSRPass();
void initializeRISCVInsertReadWriteCSRPass(PassRegistry &);
+FunctionPass *createRISCVInsertWriteVXRMPass();
+void initializeRISCVInsertWriteVXRMPass(PassRegistry &);
+
FunctionPass *createRISCVRedundantCopyEliminationPass();
void initializeRISCVRedundantCopyEliminationPass(PassRegistry &);
@@ -80,6 +86,15 @@ InstructionSelector *createRISCVInstructionSelector(const RISCVTargetMachine &,
RISCVSubtarget &,
RISCVRegisterBankInfo &);
void initializeRISCVDAGToDAGISelPass(PassRegistry &);
+
+FunctionPass *createRISCVPostLegalizerCombiner();
+void initializeRISCVPostLegalizerCombinerPass(PassRegistry &);
+
+FunctionPass *createRISCVO0PreLegalizerCombiner();
+void initializeRISCVO0PreLegalizerCombinerPass(PassRegistry &);
+
+FunctionPass *createRISCVPreLegalizerCombiner();
+void initializeRISCVPreLegalizerCombinerPass(PassRegistry &);
} // namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
index d2520d932ddf..0fd514fa87cd 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
@@ -36,6 +36,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/RISCVISAInfo.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
@@ -46,6 +47,10 @@ using namespace llvm;
STATISTIC(RISCVNumInstrsCompressed,
"Number of RISC-V Compressed instructions emitted");
+namespace llvm {
+extern const SubtargetFeatureKV RISCVFeatureKV[RISCV::NumSubtargetFeatures];
+} // namespace llvm
+
namespace {
class RISCVAsmPrinter : public AsmPrinter {
const RISCVSubtarget *STI;
@@ -57,6 +62,15 @@ public:
StringRef getPassName() const override { return "RISC-V Assembly Printer"; }
+ void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
+ const MachineInstr &MI);
+
+ void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
+ const MachineInstr &MI);
+
+ void LowerSTATEPOINT(MCStreamer &OutStreamer, StackMaps &SM,
+ const MachineInstr &MI);
+
bool runOnMachineFunction(MachineFunction &MF) override;
void emitInstruction(const MachineInstr *MI) override;
@@ -83,6 +97,7 @@ public:
void emitEndOfAsmFile(Module &M) override;
void emitFunctionEntryLabel() override;
+ bool emitDirectiveOptionArch();
private:
void emitAttributes();
@@ -93,6 +108,78 @@ private:
};
}
+void RISCVAsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
+ const MachineInstr &MI) {
+ unsigned NOPBytes = STI->getFeatureBits()[RISCV::FeatureStdExtC] ? 2 : 4;
+ unsigned NumNOPBytes = StackMapOpers(&MI).getNumPatchBytes();
+
+ auto &Ctx = OutStreamer.getContext();
+ MCSymbol *MILabel = Ctx.createTempSymbol();
+ OutStreamer.emitLabel(MILabel);
+
+ SM.recordStackMap(*MILabel, MI);
+ assert(NumNOPBytes % NOPBytes == 0 &&
+ "Invalid number of NOP bytes requested!");
+
+ // Scan ahead to trim the shadow.
+ const MachineBasicBlock &MBB = *MI.getParent();
+ MachineBasicBlock::const_iterator MII(MI);
+ ++MII;
+ while (NumNOPBytes > 0) {
+ if (MII == MBB.end() || MII->isCall() ||
+ MII->getOpcode() == RISCV::DBG_VALUE ||
+ MII->getOpcode() == TargetOpcode::PATCHPOINT ||
+ MII->getOpcode() == TargetOpcode::STACKMAP)
+ break;
+ ++MII;
+ NumNOPBytes -= 4;
+ }
+
+ // Emit nops.
+ emitNops(NumNOPBytes / NOPBytes);
+}
+
+// Lower a patchpoint of the form:
+// [<def>], <id>, <numBytes>, <target>, <numArgs>
+void RISCVAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
+ const MachineInstr &MI) {
+ unsigned NOPBytes = STI->getFeatureBits()[RISCV::FeatureStdExtC] ? 2 : 4;
+
+ auto &Ctx = OutStreamer.getContext();
+ MCSymbol *MILabel = Ctx.createTempSymbol();
+ OutStreamer.emitLabel(MILabel);
+ SM.recordPatchPoint(*MILabel, MI);
+
+ PatchPointOpers Opers(&MI);
+
+ unsigned EncodedBytes = 0;
+
+ // Emit padding.
+ unsigned NumBytes = Opers.getNumPatchBytes();
+ assert(NumBytes >= EncodedBytes &&
+ "Patchpoint can't request size less than the length of a call.");
+ assert((NumBytes - EncodedBytes) % NOPBytes == 0 &&
+ "Invalid number of NOP bytes requested!");
+ emitNops((NumBytes - EncodedBytes) / NOPBytes);
+}
+
+void RISCVAsmPrinter::LowerSTATEPOINT(MCStreamer &OutStreamer, StackMaps &SM,
+ const MachineInstr &MI) {
+ unsigned NOPBytes = STI->getFeatureBits()[RISCV::FeatureStdExtC] ? 2 : 4;
+
+ StatepointOpers SOpers(&MI);
+ if (unsigned PatchBytes = SOpers.getNumPatchBytes()) {
+ assert(PatchBytes % NOPBytes == 0 &&
+ "Invalid number of NOP bytes requested!");
+ emitNops(PatchBytes / NOPBytes);
+ }
+
+ auto &Ctx = OutStreamer.getContext();
+ MCSymbol *MILabel = Ctx.createTempSymbol();
+ OutStreamer.emitLabel(MILabel);
+ SM.recordStatepoint(*MILabel, MI);
+}
+
void RISCVAsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) {
MCInst CInst;
bool Res = RISCVRVC::compress(CInst, Inst, *STI);
@@ -160,6 +247,12 @@ void RISCVAsmPrinter::emitInstruction(const MachineInstr *MI) {
case RISCV::PseudoRVVInitUndefM4:
case RISCV::PseudoRVVInitUndefM8:
return;
+ case TargetOpcode::STACKMAP:
+ return LowerSTACKMAP(*OutStreamer, SM, *MI);
+ case TargetOpcode::PATCHPOINT:
+ return LowerPATCHPOINT(*OutStreamer, SM, *MI);
+ case TargetOpcode::STATEPOINT:
+ return LowerSTATEPOINT(*OutStreamer, SM, *MI);
}
MCInst OutInst;
@@ -230,7 +323,7 @@ bool RISCVAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
// RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand).
if (!AddrReg.isReg())
return true;
- if (!Offset.isImm() && !Offset.isGlobal())
+ if (!Offset.isImm() && !Offset.isGlobal() && !Offset.isBlockAddress())
return true;
MCOperand MCO;
@@ -239,17 +332,49 @@ bool RISCVAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
if (Offset.isImm())
OS << MCO.getImm();
- else if (Offset.isGlobal())
+ else if (Offset.isGlobal() || Offset.isBlockAddress())
OS << *MCO.getExpr();
OS << "(" << RISCVInstPrinter::getRegisterName(AddrReg.getReg()) << ")";
return false;
}
+bool RISCVAsmPrinter::emitDirectiveOptionArch() {
+ RISCVTargetStreamer &RTS =
+ static_cast<RISCVTargetStreamer &>(*OutStreamer->getTargetStreamer());
+ SmallVector<RISCVOptionArchArg> NeedEmitStdOptionArgs;
+ const MCSubtargetInfo &MCSTI = *TM.getMCSubtargetInfo();
+ for (const auto &Feature : RISCVFeatureKV) {
+ if (STI->hasFeature(Feature.Value) == MCSTI.hasFeature(Feature.Value))
+ continue;
+
+ if (!llvm::RISCVISAInfo::isSupportedExtensionFeature(Feature.Key))
+ continue;
+
+ auto Delta = STI->hasFeature(Feature.Value) ? RISCVOptionArchArgType::Plus
+ : RISCVOptionArchArgType::Minus;
+ NeedEmitStdOptionArgs.emplace_back(Delta, Feature.Key);
+ }
+ if (!NeedEmitStdOptionArgs.empty()) {
+ RTS.emitDirectiveOptionPush();
+ RTS.emitDirectiveOptionArch(NeedEmitStdOptionArgs);
+ return true;
+ }
+
+ return false;
+}
+
bool RISCVAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
STI = &MF.getSubtarget<RISCVSubtarget>();
+ RISCVTargetStreamer &RTS =
+ static_cast<RISCVTargetStreamer &>(*OutStreamer->getTargetStreamer());
+
+ bool EmittedOptionArch = emitDirectiveOptionArch();
SetupMachineFunction(MF);
emitFunctionBody();
+
+ if (EmittedOptionArch)
+ RTS.emitDirectiveOptionPop();
return false;
}
@@ -738,13 +863,14 @@ static bool lowerRISCVVMachineInstrToMCInst(const MachineInstr *MI,
uint64_t TSFlags = MCID.TSFlags;
unsigned NumOps = MI->getNumExplicitOperands();
- // Skip policy, VL and SEW operands which are the last operands if present.
+ // Skip policy, SEW, VL, VXRM/FRM operands which are the last operands if
+ // present.
if (RISCVII::hasVecPolicyOp(TSFlags))
--NumOps;
- if (RISCVII::hasVLOp(TSFlags))
- --NumOps;
if (RISCVII::hasSEWOp(TSFlags))
--NumOps;
+ if (RISCVII::hasVLOp(TSFlags))
+ --NumOps;
if (RISCVII::hasRoundModeOp(TSFlags))
--NumOps;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
index 2fcd9a40588a..f9d8401bab7b 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
@@ -28,8 +28,6 @@ using namespace llvm;
#define DEBUG_TYPE "riscv-codegenprepare"
#define PASS_NAME "RISC-V CodeGenPrepare"
-STATISTIC(NumZExtToSExt, "Number of SExt instructions converted to ZExt");
-
namespace {
class RISCVCodeGenPrepare : public FunctionPass,
@@ -52,59 +50,14 @@ public:
}
bool visitInstruction(Instruction &I) { return false; }
- bool visitZExtInst(ZExtInst &I);
bool visitAnd(BinaryOperator &BO);
};
} // end anonymous namespace
-bool RISCVCodeGenPrepare::visitZExtInst(ZExtInst &ZExt) {
- if (!ST->is64Bit())
- return false;
-
- Value *Src = ZExt.getOperand(0);
-
- // We only care about ZExt from i32 to i64.
- if (!ZExt.getType()->isIntegerTy(64) || !Src->getType()->isIntegerTy(32))
- return false;
-
- // Look for an opportunity to replace (i64 (zext (i32 X))) with a sext if we
- // can determine that the sign bit of X is zero via a dominating condition.
- // This often occurs with widened induction variables.
- if (isImpliedByDomCondition(ICmpInst::ICMP_SGE, Src,
- Constant::getNullValue(Src->getType()), &ZExt,
- *DL).value_or(false)) {
- auto *SExt = new SExtInst(Src, ZExt.getType(), "", &ZExt);
- SExt->takeName(&ZExt);
- SExt->setDebugLoc(ZExt.getDebugLoc());
-
- ZExt.replaceAllUsesWith(SExt);
- ZExt.eraseFromParent();
- ++NumZExtToSExt;
- return true;
- }
-
- // Convert (zext (abs(i32 X, i1 1))) -> (sext (abs(i32 X, i1 1))). If abs of
- // INT_MIN is poison, the sign bit is zero.
- using namespace PatternMatch;
- if (match(Src, m_Intrinsic<Intrinsic::abs>(m_Value(), m_One()))) {
- auto *SExt = new SExtInst(Src, ZExt.getType(), "", &ZExt);
- SExt->takeName(&ZExt);
- SExt->setDebugLoc(ZExt.getDebugLoc());
-
- ZExt.replaceAllUsesWith(SExt);
- ZExt.eraseFromParent();
- ++NumZExtToSExt;
- return true;
- }
-
- return false;
-}
-
// Try to optimize (i64 (and (zext/sext (i32 X), C1))) if C1 has bit 31 set,
-// but bits 63:32 are zero. If we can prove that bit 31 of X is 0, we can fill
-// the upper 32 bits with ones. A separate transform will turn (zext X) into
-// (sext X) for the same condition.
+// but bits 63:32 are zero. If we know that bit 31 of X is 0, we can fill
+// the upper 32 bits with ones.
bool RISCVCodeGenPrepare::visitAnd(BinaryOperator &BO) {
if (!ST->is64Bit())
return false;
@@ -112,9 +65,17 @@ bool RISCVCodeGenPrepare::visitAnd(BinaryOperator &BO) {
if (!BO.getType()->isIntegerTy(64))
return false;
- // Left hand side should be sext or zext.
+ auto canBeSignExtend = [](Instruction *I) {
+ if (isa<SExtInst>(I))
+ return true;
+ if (isa<ZExtInst>(I))
+ return I->hasNonNeg();
+ return false;
+ };
+
+ // Left hand side should be a sext or zext nneg.
Instruction *LHS = dyn_cast<Instruction>(BO.getOperand(0));
- if (!LHS || (!isa<SExtInst>(LHS) && !isa<ZExtInst>(LHS)))
+ if (!LHS || !canBeSignExtend(LHS))
return false;
Value *LHSSrc = LHS->getOperand(0);
@@ -135,13 +96,6 @@ bool RISCVCodeGenPrepare::visitAnd(BinaryOperator &BO) {
if (!isUInt<32>(C) || isInt<12>(C) || !isInt<12>(SignExtend64<32>(C)))
return false;
- // If we can determine the sign bit of the input is 0, we can replace the
- // And mask constant.
- if (!isImpliedByDomCondition(ICmpInst::ICMP_SGE, LHSSrc,
- Constant::getNullValue(LHSSrc->getType()),
- LHS, *DL).value_or(false))
- return false;
-
// Sign extend the constant and replace the And operand.
C = SignExtend64<32>(C);
BO.setOperand(1, ConstantInt::get(LHS->getType(), C));
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCombine.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCombine.td
new file mode 100644
index 000000000000..3a5afb1b075c
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCombine.td
@@ -0,0 +1,27 @@
+//=- RISCVCombine.td - Define RISC-V Combine Rules -----------*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/GlobalISel/Combine.td"
+
+def RISCVPreLegalizerCombiner: GICombiner<
+ "RISCVPreLegalizerCombinerImpl", [all_combines]> {
+}
+
+def RISCVO0PreLegalizerCombiner: GICombiner<
+ "RISCVO0PreLegalizerCombinerImpl", [optnone_combines]> {
+}
+
+// Post-legalization combines which are primarily optimizations.
+// TODO: Add more combines.
+def RISCVPostLegalizerCombiner
+ : GICombiner<"RISCVPostLegalizerCombinerImpl",
+ [redundant_and, identity_combines]> {
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp
new file mode 100644
index 000000000000..df607236f7d5
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp
@@ -0,0 +1,103 @@
+//===- RISCVDeadRegisterDefinitions.cpp - Replace dead defs w/ zero reg --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+//
+// This pass rewrites Rd to x0 for instrs whose return values are unused.
+//
+//===---------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVInstrInfo.h"
+#include "RISCVSubtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+#define DEBUG_TYPE "riscv-dead-defs"
+#define RISCV_DEAD_REG_DEF_NAME "RISC-V Dead register definitions"
+
+STATISTIC(NumDeadDefsReplaced, "Number of dead definitions replaced");
+
+namespace {
+class RISCVDeadRegisterDefinitions : public MachineFunctionPass {
+public:
+ static char ID;
+
+ RISCVDeadRegisterDefinitions() : MachineFunctionPass(ID) {}
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ StringRef getPassName() const override { return RISCV_DEAD_REG_DEF_NAME; }
+};
+} // end anonymous namespace
+
+char RISCVDeadRegisterDefinitions::ID = 0;
+INITIALIZE_PASS(RISCVDeadRegisterDefinitions, DEBUG_TYPE,
+ RISCV_DEAD_REG_DEF_NAME, false, false)
+
+FunctionPass *llvm::createRISCVDeadRegisterDefinitionsPass() {
+ return new RISCVDeadRegisterDefinitions();
+}
+
+bool RISCVDeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ const MachineRegisterInfo *MRI = &MF.getRegInfo();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ LLVM_DEBUG(dbgs() << "***** RISCVDeadRegisterDefinitions *****\n");
+
+ bool MadeChange = false;
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ // We only handle non-computational instructions since some NOP encodings
+ // are reserved for HINT instructions.
+ const MCInstrDesc &Desc = MI.getDesc();
+ if (!Desc.mayLoad() && !Desc.mayStore() &&
+ !Desc.hasUnmodeledSideEffects())
+ continue;
+ // For PseudoVSETVLIX0, Rd = X0 has special meaning.
+ if (MI.getOpcode() == RISCV::PseudoVSETVLIX0)
+ continue;
+ for (int I = 0, E = Desc.getNumDefs(); I != E; ++I) {
+ MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg() || !MO.isDef() || MO.isEarlyClobber())
+ continue;
+ // Be careful not to change the register if it's a tied operand.
+ if (MI.isRegTiedToUseOperand(I)) {
+ LLVM_DEBUG(dbgs() << " Ignoring, def is tied operand.\n");
+ continue;
+ }
+ // We should not have any relevant physreg defs that are replacable by
+ // zero before register allocation. So we just check for dead vreg defs.
+ Register Reg = MO.getReg();
+ if (!Reg.isVirtual() || (!MO.isDead() && !MRI->use_nodbg_empty(Reg)))
+ continue;
+ LLVM_DEBUG(dbgs() << " Dead def operand #" << I << " in:\n ";
+ MI.print(dbgs()));
+ const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI, MF);
+ if (!(RC && RC->contains(RISCV::X0))) {
+ LLVM_DEBUG(dbgs() << " Ignoring, register is not a GPR.\n");
+ continue;
+ }
+ MO.setReg(RISCV::X0);
+ MO.setIsDead();
+ LLVM_DEBUG(dbgs() << " Replacing with zero register. New:\n ";
+ MI.print(dbgs()));
+ ++NumDeadDefsReplaced;
+ MadeChange = true;
+ }
+ }
+ }
+
+ return MadeChange;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
index d10bba26023f..bb772fc5da92 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
@@ -30,6 +30,7 @@ namespace {
class RISCVExpandAtomicPseudo : public MachineFunctionPass {
public:
+ const RISCVSubtarget *STI;
const RISCVInstrInfo *TII;
static char ID;
@@ -72,7 +73,8 @@ private:
char RISCVExpandAtomicPseudo::ID = 0;
bool RISCVExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) {
- TII = MF.getSubtarget<RISCVSubtarget>().getInstrInfo();
+ STI = &MF.getSubtarget<RISCVSubtarget>();
+ TII = STI->getInstrInfo();
#ifndef NDEBUG
const unsigned OldSize = getInstSizeInBytes(MF);
@@ -105,8 +107,8 @@ bool RISCVExpandAtomicPseudo::expandMBB(MachineBasicBlock &MBB) {
bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI) {
- // RISCVInstrInfo::getInstSizeInBytes expects that the total size of the
- // expanded instructions for each pseudo is correct in the Size field of the
+ // RISCVInstrInfo::getInstSizeInBytes expects that the total size of the
+ // expanded instructions for each pseudo is correct in the Size field of the
// tablegen definition for the pseudo.
switch (MBBI->getOpcode()) {
case RISCV::PseudoAtomicLoadNand32:
@@ -148,24 +150,30 @@ bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB,
return false;
}
-static unsigned getLRForRMW32(AtomicOrdering Ordering) {
+static unsigned getLRForRMW32(AtomicOrdering Ordering,
+ const RISCVSubtarget *Subtarget) {
switch (Ordering) {
default:
llvm_unreachable("Unexpected AtomicOrdering");
case AtomicOrdering::Monotonic:
return RISCV::LR_W;
case AtomicOrdering::Acquire:
+ if (Subtarget->hasStdExtZtso())
+ return RISCV::LR_W;
return RISCV::LR_W_AQ;
case AtomicOrdering::Release:
return RISCV::LR_W;
case AtomicOrdering::AcquireRelease:
+ if (Subtarget->hasStdExtZtso())
+ return RISCV::LR_W;
return RISCV::LR_W_AQ;
case AtomicOrdering::SequentiallyConsistent:
return RISCV::LR_W_AQ_RL;
}
}
-static unsigned getSCForRMW32(AtomicOrdering Ordering) {
+static unsigned getSCForRMW32(AtomicOrdering Ordering,
+ const RISCVSubtarget *Subtarget) {
switch (Ordering) {
default:
llvm_unreachable("Unexpected AtomicOrdering");
@@ -174,32 +182,42 @@ static unsigned getSCForRMW32(AtomicOrdering Ordering) {
case AtomicOrdering::Acquire:
return RISCV::SC_W;
case AtomicOrdering::Release:
+ if (Subtarget->hasStdExtZtso())
+ return RISCV::SC_W;
return RISCV::SC_W_RL;
case AtomicOrdering::AcquireRelease:
+ if (Subtarget->hasStdExtZtso())
+ return RISCV::SC_W;
return RISCV::SC_W_RL;
case AtomicOrdering::SequentiallyConsistent:
return RISCV::SC_W_RL;
}
}
-static unsigned getLRForRMW64(AtomicOrdering Ordering) {
+static unsigned getLRForRMW64(AtomicOrdering Ordering,
+ const RISCVSubtarget *Subtarget) {
switch (Ordering) {
default:
llvm_unreachable("Unexpected AtomicOrdering");
case AtomicOrdering::Monotonic:
return RISCV::LR_D;
case AtomicOrdering::Acquire:
+ if (Subtarget->hasStdExtZtso())
+ return RISCV::LR_D;
return RISCV::LR_D_AQ;
case AtomicOrdering::Release:
return RISCV::LR_D;
case AtomicOrdering::AcquireRelease:
+ if (Subtarget->hasStdExtZtso())
+ return RISCV::LR_D;
return RISCV::LR_D_AQ;
case AtomicOrdering::SequentiallyConsistent:
return RISCV::LR_D_AQ_RL;
}
}
-static unsigned getSCForRMW64(AtomicOrdering Ordering) {
+static unsigned getSCForRMW64(AtomicOrdering Ordering,
+ const RISCVSubtarget *Subtarget) {
switch (Ordering) {
default:
llvm_unreachable("Unexpected AtomicOrdering");
@@ -208,27 +226,33 @@ static unsigned getSCForRMW64(AtomicOrdering Ordering) {
case AtomicOrdering::Acquire:
return RISCV::SC_D;
case AtomicOrdering::Release:
+ if (Subtarget->hasStdExtZtso())
+ return RISCV::SC_D;
return RISCV::SC_D_RL;
case AtomicOrdering::AcquireRelease:
+ if (Subtarget->hasStdExtZtso())
+ return RISCV::SC_D;
return RISCV::SC_D_RL;
case AtomicOrdering::SequentiallyConsistent:
return RISCV::SC_D_RL;
}
}
-static unsigned getLRForRMW(AtomicOrdering Ordering, int Width) {
+static unsigned getLRForRMW(AtomicOrdering Ordering, int Width,
+ const RISCVSubtarget *Subtarget) {
if (Width == 32)
- return getLRForRMW32(Ordering);
+ return getLRForRMW32(Ordering, Subtarget);
if (Width == 64)
- return getLRForRMW64(Ordering);
+ return getLRForRMW64(Ordering, Subtarget);
llvm_unreachable("Unexpected LR width\n");
}
-static unsigned getSCForRMW(AtomicOrdering Ordering, int Width) {
+static unsigned getSCForRMW(AtomicOrdering Ordering, int Width,
+ const RISCVSubtarget *Subtarget) {
if (Width == 32)
- return getSCForRMW32(Ordering);
+ return getSCForRMW32(Ordering, Subtarget);
if (Width == 64)
- return getSCForRMW64(Ordering);
+ return getSCForRMW64(Ordering, Subtarget);
llvm_unreachable("Unexpected SC width\n");
}
@@ -236,7 +260,8 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
DebugLoc DL, MachineBasicBlock *ThisMBB,
MachineBasicBlock *LoopMBB,
MachineBasicBlock *DoneMBB,
- AtomicRMWInst::BinOp BinOp, int Width) {
+ AtomicRMWInst::BinOp BinOp, int Width,
+ const RISCVSubtarget *STI) {
Register DestReg = MI.getOperand(0).getReg();
Register ScratchReg = MI.getOperand(1).getReg();
Register AddrReg = MI.getOperand(2).getReg();
@@ -249,7 +274,7 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
// binop scratch, dest, val
// sc.[w|d] scratch, scratch, (addr)
// bnez scratch, loop
- BuildMI(LoopMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
+ BuildMI(LoopMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)), DestReg)
.addReg(AddrReg);
switch (BinOp) {
default:
@@ -263,7 +288,7 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
.addImm(-1);
break;
}
- BuildMI(LoopMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
+ BuildMI(LoopMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)), ScratchReg)
.addReg(AddrReg)
.addReg(ScratchReg);
BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
@@ -294,10 +319,13 @@ static void insertMaskedMerge(const RISCVInstrInfo *TII, DebugLoc DL,
.addReg(ScratchReg);
}
-static void doMaskedAtomicBinOpExpansion(
- const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL,
- MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB,
- MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width) {
+static void doMaskedAtomicBinOpExpansion(const RISCVInstrInfo *TII,
+ MachineInstr &MI, DebugLoc DL,
+ MachineBasicBlock *ThisMBB,
+ MachineBasicBlock *LoopMBB,
+ MachineBasicBlock *DoneMBB,
+ AtomicRMWInst::BinOp BinOp, int Width,
+ const RISCVSubtarget *STI) {
assert(Width == 32 && "Should never need to expand masked 64-bit operations");
Register DestReg = MI.getOperand(0).getReg();
Register ScratchReg = MI.getOperand(1).getReg();
@@ -315,7 +343,7 @@ static void doMaskedAtomicBinOpExpansion(
// xor scratch, destreg, scratch
// sc.w scratch, scratch, (alignedaddr)
// bnez scratch, loop
- BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+ BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering, STI)), DestReg)
.addReg(AddrReg);
switch (BinOp) {
default:
@@ -348,7 +376,7 @@ static void doMaskedAtomicBinOpExpansion(
insertMaskedMerge(TII, DL, LoopMBB, ScratchReg, DestReg, ScratchReg, MaskReg,
ScratchReg);
- BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg)
+ BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering, STI)), ScratchReg)
.addReg(AddrReg)
.addReg(ScratchReg);
BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
@@ -380,10 +408,11 @@ bool RISCVExpandAtomicPseudo::expandAtomicBinOp(
MBB.addSuccessor(LoopMBB);
if (!IsMasked)
- doAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, Width);
+ doAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, Width,
+ STI);
else
doMaskedAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp,
- Width);
+ Width, STI);
NextMBBI = MBB.end();
MI.eraseFromParent();
@@ -455,7 +484,7 @@ bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
// mv scratch1, destreg
// [sext scratch2 if signed min/max]
// ifnochangeneeded scratch2, incr, .looptail
- BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+ BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering, STI)), DestReg)
.addReg(AddrReg);
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), Scratch2Reg)
.addReg(DestReg)
@@ -507,7 +536,7 @@ bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
// .looptail:
// sc.w scratch1, scratch1, (addr)
// bnez scratch1, loop
- BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering)), Scratch1Reg)
+ BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering, STI)), Scratch1Reg)
.addReg(AddrReg)
.addReg(Scratch1Reg);
BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
@@ -635,7 +664,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
// .loophead:
// lr.[w|d] dest, (addr)
// bne dest, cmpval, done
- BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
+ BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)),
+ DestReg)
.addReg(AddrReg);
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
.addReg(DestReg)
@@ -644,7 +674,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
// .looptail:
// sc.[w|d] scratch, newval, (addr)
// bnez scratch, loophead
- BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
+ BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)),
+ ScratchReg)
.addReg(AddrReg)
.addReg(NewValReg);
BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
@@ -657,7 +688,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
// and scratch, dest, mask
// bne scratch, cmpval, done
Register MaskReg = MI.getOperand(5).getReg();
- BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
+ BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)),
+ DestReg)
.addReg(AddrReg);
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), ScratchReg)
.addReg(DestReg)
@@ -675,7 +707,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
// bnez scratch, loophead
insertMaskedMerge(TII, DL, LoopTailMBB, ScratchReg, DestReg, NewValReg,
MaskReg, ScratchReg);
- BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
+ BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)),
+ ScratchReg)
.addReg(AddrReg)
.addReg(ScratchReg);
BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 58896ee1b388..24a13f93af88 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -34,9 +34,7 @@ public:
const RISCVInstrInfo *TII;
static char ID;
- RISCVExpandPseudo() : MachineFunctionPass(ID) {
- initializeRISCVExpandPseudoPass(*PassRegistry::getPassRegistry());
- }
+ RISCVExpandPseudo() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -119,6 +117,23 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
case RISCV::PseudoCCXOR:
case RISCV::PseudoCCADDW:
case RISCV::PseudoCCSUBW:
+ case RISCV::PseudoCCSLL:
+ case RISCV::PseudoCCSRL:
+ case RISCV::PseudoCCSRA:
+ case RISCV::PseudoCCADDI:
+ case RISCV::PseudoCCSLLI:
+ case RISCV::PseudoCCSRLI:
+ case RISCV::PseudoCCSRAI:
+ case RISCV::PseudoCCANDI:
+ case RISCV::PseudoCCORI:
+ case RISCV::PseudoCCXORI:
+ case RISCV::PseudoCCSLLW:
+ case RISCV::PseudoCCSRLW:
+ case RISCV::PseudoCCSRAW:
+ case RISCV::PseudoCCADDIW:
+ case RISCV::PseudoCCSLLIW:
+ case RISCV::PseudoCCSRLIW:
+ case RISCV::PseudoCCSRAIW:
return expandCCOp(MBB, MBBI, NextMBBI);
case RISCV::PseudoVSETVLI:
case RISCV::PseudoVSETVLIX0:
@@ -188,11 +203,28 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
llvm_unreachable("Unexpected opcode!");
case RISCV::PseudoCCADD: NewOpc = RISCV::ADD; break;
case RISCV::PseudoCCSUB: NewOpc = RISCV::SUB; break;
+ case RISCV::PseudoCCSLL: NewOpc = RISCV::SLL; break;
+ case RISCV::PseudoCCSRL: NewOpc = RISCV::SRL; break;
+ case RISCV::PseudoCCSRA: NewOpc = RISCV::SRA; break;
case RISCV::PseudoCCAND: NewOpc = RISCV::AND; break;
case RISCV::PseudoCCOR: NewOpc = RISCV::OR; break;
case RISCV::PseudoCCXOR: NewOpc = RISCV::XOR; break;
+ case RISCV::PseudoCCADDI: NewOpc = RISCV::ADDI; break;
+ case RISCV::PseudoCCSLLI: NewOpc = RISCV::SLLI; break;
+ case RISCV::PseudoCCSRLI: NewOpc = RISCV::SRLI; break;
+ case RISCV::PseudoCCSRAI: NewOpc = RISCV::SRAI; break;
+ case RISCV::PseudoCCANDI: NewOpc = RISCV::ANDI; break;
+ case RISCV::PseudoCCORI: NewOpc = RISCV::ORI; break;
+ case RISCV::PseudoCCXORI: NewOpc = RISCV::XORI; break;
case RISCV::PseudoCCADDW: NewOpc = RISCV::ADDW; break;
case RISCV::PseudoCCSUBW: NewOpc = RISCV::SUBW; break;
+ case RISCV::PseudoCCSLLW: NewOpc = RISCV::SLLW; break;
+ case RISCV::PseudoCCSRLW: NewOpc = RISCV::SRLW; break;
+ case RISCV::PseudoCCSRAW: NewOpc = RISCV::SRAW; break;
+ case RISCV::PseudoCCADDIW: NewOpc = RISCV::ADDIW; break;
+ case RISCV::PseudoCCSLLIW: NewOpc = RISCV::SLLIW; break;
+ case RISCV::PseudoCCSRLIW: NewOpc = RISCV::SRLIW; break;
+ case RISCV::PseudoCCSRAIW: NewOpc = RISCV::SRAIW; break;
}
BuildMI(TrueBB, DL, TII->get(NewOpc), DestReg)
.add(MI.getOperand(5))
@@ -275,8 +307,8 @@ bool RISCVExpandPseudo::expandRV32ZdinxStore(MachineBasicBlock &MBB,
.addReg(MBBI->getOperand(1).getReg())
.add(MBBI->getOperand(2));
if (MBBI->getOperand(2).isGlobal() || MBBI->getOperand(2).isCPI()) {
- // FIXME: Zdinx RV32 can not work on unaligned scalar memory.
- assert(!STI->enableUnalignedScalarMem());
+ // FIXME: Zdinx RV32 can not work on unaligned memory.
+ assert(!STI->hasFastUnalignedAccess());
assert(MBBI->getOperand(2).getOffset() % 8 == 0);
MBBI->getOperand(2).setOffset(MBBI->getOperand(2).getOffset() + 4);
@@ -347,9 +379,7 @@ public:
const RISCVInstrInfo *TII;
static char ID;
- RISCVPreRAExpandPseudo() : MachineFunctionPass(ID) {
- initializeRISCVPreRAExpandPseudoPass(*PassRegistry::getPassRegistry());
- }
+ RISCVPreRAExpandPseudo() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td
index 4ce9c41eaf5c..294927aecb94 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -73,7 +73,7 @@ def HasStdExtZihintpause : Predicate<"Subtarget->hasStdExtZihintpause()">,
"'Zihintpause' (Pause Hint)">;
def FeatureStdExtZihintntl
- : SubtargetFeature<"experimental-zihintntl", "HasStdExtZihintntl", "true",
+ : SubtargetFeature<"zihintntl", "HasStdExtZihintntl", "true",
"'Zihintntl' (Non-Temporal Locality Hints)">;
def HasStdExtZihintntl : Predicate<"Subtarget->hasStdExtZihintntl()">,
AssemblerPredicate<(all_of FeatureStdExtZihintntl),
@@ -159,7 +159,7 @@ def HasStdExtZhinxOrZhinxmin
"'Zhinxmin' (Half Float in Integer Minimal)">;
def FeatureStdExtZfa
- : SubtargetFeature<"experimental-zfa", "HasStdExtZfa", "true",
+ : SubtargetFeature<"zfa", "HasStdExtZfa", "true",
"'Zfa' (Additional Floating-Point)",
[FeatureStdExtF]>;
def HasStdExtZfa : Predicate<"Subtarget->hasStdExtZfa()">,
@@ -444,8 +444,8 @@ def FeatureStdExtV
def HasVInstructions : Predicate<"Subtarget->hasVInstructions()">,
AssemblerPredicate<
(any_of FeatureStdExtZve32x),
- "'V' (Vector Extension for Application Processors), 'Zve32x' or "
- "'Zve64x' (Vector Extensions for Embedded Processors)">;
+ "'V' (Vector Extension for Application Processors), 'Zve32x' "
+ "(Vector Extensions for Embedded Processors)">;
def HasVInstructionsI64 : Predicate<"Subtarget->hasVInstructionsI64()">,
AssemblerPredicate<
(any_of FeatureStdExtZve64x),
@@ -454,17 +454,25 @@ def HasVInstructionsI64 : Predicate<"Subtarget->hasVInstructionsI64()">,
def HasVInstructionsAnyF : Predicate<"Subtarget->hasVInstructionsAnyF()">,
AssemblerPredicate<
(any_of FeatureStdExtZve32f),
- "'V' (Vector Extension for Application Processors), 'Zve32f', "
- "'Zve64f' or 'Zve64d' (Vector Extensions for Embedded Processors)">;
+ "'V' (Vector Extension for Application Processors), 'Zve32f' "
+ "(Vector Extensions for Embedded Processors)">;
def HasVInstructionsF64 : Predicate<"Subtarget->hasVInstructionsF64()">;
def HasVInstructionsFullMultiply : Predicate<"Subtarget->hasVInstructionsFullMultiply()">;
+def FeatureStdExtZfbfmin
+ : SubtargetFeature<"experimental-zfbfmin", "HasStdExtZfbfmin", "true",
+ "'Zfbfmin' (Scalar BF16 Converts)",
+ [FeatureStdExtF]>;
+def HasStdExtZfbfmin : Predicate<"Subtarget->hasStdExtZfbfmin()">,
+ AssemblerPredicate<(all_of FeatureStdExtZfbfmin),
+ "'Zfbfmin' (Scalar BF16 Converts)">;
+
def FeatureStdExtZvfbfmin
: SubtargetFeature<"experimental-zvfbfmin", "HasStdExtZvfbfmin", "true",
"'Zvbfmin' (Vector BF16 Converts)",
- [FeatureStdExtZve32f]>;
+ [FeatureStdExtZve32f, FeatureStdExtZfbfmin]>;
def HasStdExtZvfbfmin : Predicate<"Subtarget->hasStdExtZvfbfmin()">,
AssemblerPredicate<(all_of FeatureStdExtZvfbfmin),
"'Zvfbfmin' (Vector BF16 Converts)">;
@@ -472,18 +480,30 @@ def HasStdExtZvfbfmin : Predicate<"Subtarget->hasStdExtZvfbfmin()">,
def FeatureStdExtZvfbfwma
: SubtargetFeature<"experimental-zvfbfwma", "HasStdExtZvfbfwma", "true",
"'Zvfbfwma' (Vector BF16 widening mul-add)",
- [FeatureStdExtZve32f]>;
+ [FeatureStdExtZvfbfmin]>;
def HasStdExtZvfbfwma : Predicate<"Subtarget->hasStdExtZvfbfwma()">,
AssemblerPredicate<(all_of FeatureStdExtZvfbfwma),
"'Zvfbfwma' (Vector BF16 widening mul-add)">;
+def HasVInstructionsBF16 : Predicate<"Subtarget->hasVInstructionsBF16()">;
+
def FeatureStdExtZvfh
: SubtargetFeature<"zvfh", "HasStdExtZvfh", "true",
"'Zvfh' (Vector Half-Precision Floating-Point)",
[FeatureStdExtZve32f, FeatureStdExtZfhmin]>;
+def FeatureStdExtZvfhmin
+ : SubtargetFeature<"zvfhmin", "HasStdExtZvfhmin", "true",
+ "'Zvfhmin' (Vector Half-Precision Floating-Point Minimal)",
+ [FeatureStdExtZve32f]>;
+
def HasVInstructionsF16 : Predicate<"Subtarget->hasVInstructionsF16()">;
+def HasVInstructionsF16Minimal : Predicate<"Subtarget->hasVInstructionsF16Minimal()">,
+ AssemblerPredicate<(any_of FeatureStdExtZvfhmin, FeatureStdExtZvfh),
+ "'Zvfhmin' (Vector Half-Precision Floating-Point Minimal) or "
+ "'Zvfh' (Vector Half-Precision Floating-Point)">;
+
def HasStdExtZfhOrZvfh
: Predicate<"Subtarget->hasStdExtZfh() || Subtarget->hasStdExtZvfh()">,
AssemblerPredicate<(any_of FeatureStdExtZfh, FeatureStdExtZvfh),
@@ -529,9 +549,10 @@ def HasStdExtSvinval : Predicate<"Subtarget->hasStdExtSvinval()">,
def FeatureStdExtZtso
: SubtargetFeature<"experimental-ztso", "HasStdExtZtso", "true",
"'Ztso' (Memory Model - Total Store Order)">;
-def HasStdExtZtso : Predicate<"Subtarget->hasStdExtZTso()">,
+def HasStdExtZtso : Predicate<"Subtarget->hasStdExtZtso()">,
AssemblerPredicate<(all_of FeatureStdExtZtso),
"'Ztso' (Memory Model - Total Store Order)">;
+def NotHasStdExtZtso : Predicate<"!Subtarget->hasStdExtZtso()">;
def FeatureStdExtZawrs : SubtargetFeature<"zawrs", "HasStdExtZawrs", "true",
"'Zawrs' (Wait on Reservation Set)">;
@@ -539,12 +560,20 @@ def HasStdExtZawrs : Predicate<"Subtarget->hasStdExtZawrs()">,
AssemblerPredicate<(all_of FeatureStdExtZawrs),
"'Zawrs' (Wait on Reservation Set)">;
+def FeatureStdExtZvkb
+ : SubtargetFeature<"experimental-zvkb", "HasStdExtZvkb", "true",
+ "'Zvkb' (Vector Bit-manipulation used in Cryptography)">;
+def HasStdExtZvkb : Predicate<"Subtarget->hasStdExtZvkb()">,
+ AssemblerPredicate<(all_of FeatureStdExtZvkb),
+ "'Zvkb' (Vector Bit-manipulation used in Cryptography)">;
+
def FeatureStdExtZvbb
: SubtargetFeature<"experimental-zvbb", "HasStdExtZvbb", "true",
- "'Zvbb' (Vector Bit-manipulation used in Cryptography)">;
+ "'Zvbb' (Vector basic bit-manipulation instructions.)",
+ [FeatureStdExtZvkb]>;
def HasStdExtZvbb : Predicate<"Subtarget->hasStdExtZvbb()">,
AssemblerPredicate<(all_of FeatureStdExtZvbb),
- "'Zvbb' (Vector Bit-manipulation used in Cryptography)">;
+ "'Zvbb' (Vector basic bit-manipulation instructions.)">;
def FeatureStdExtZvbc
: SubtargetFeature<"experimental-zvbc", "HasStdExtZvbc", "true",
@@ -560,16 +589,6 @@ def HasStdExtZvkg : Predicate<"Subtarget->hasStdExtZvkg()">,
AssemblerPredicate<(all_of FeatureStdExtZvkg),
"'Zvkg' (Vector GCM instructions for Cryptography)">;
-def FeatureStdExtZvkn
- : SubtargetFeature<"experimental-zvkn", "HasStdExtZvkn", "true",
- "This extension is shorthand for the following set of "
- "other extensions: Zvkned, Zvknhb, Zvbb, Zvbc, and Zvkt.">;
-
-def FeatureStdExtZvknc
- : SubtargetFeature<"experimental-zvknc", "HasStdExtZvknc", "true",
- "This extension is shorthand for the following set of "
- "other extensions: Zvkn and Zvbc.">;
-
def FeatureStdExtZvkned
: SubtargetFeature<"experimental-zvkned", "HasStdExtZvkned", "true",
"'Zvkned' (Vector AES Encryption & Decryption (Single Round))">;
@@ -577,32 +596,24 @@ def HasStdExtZvkned : Predicate<"Subtarget->hasStdExtZvkned()">,
AssemblerPredicate<(all_of FeatureStdExtZvkned),
"'Zvkned' (Vector AES Encryption & Decryption (Single Round))">;
-def FeatureStdExtZvkng
- : SubtargetFeature<"experimental-zvkng", "HasStdExtZvkng", "true",
- "This extension is shorthand for the following set of "
- "other extensions: Zvkn and Zvkg.">;
-
def FeatureStdExtZvknha
: SubtargetFeature<"experimental-zvknha", "HasStdExtZvknha", "true",
"'Zvknha' (Vector SHA-2 (SHA-256 only))">;
-
-def FeatureStdExtZvknhb
- : SubtargetFeature<"experimental-zvknhb", "HasStdExtZvknhb", "true",
- "'Zvknhb' (Vector SHA-2 (SHA-256 and SHA-512))",
- [FeatureStdExtZvknha]>;
def HasStdExtZvknha : Predicate<"Subtarget->hasStdExtZvknha()">,
AssemblerPredicate<(all_of FeatureStdExtZvknha),
"'Zvknha' (Vector SHA-2 (SHA-256 only))">;
-def FeatureStdExtZvks
- : SubtargetFeature<"experimental-zvks", "HasStdExtZvks", "true",
- "This extension is shorthand for the following set of "
- "other extensions: Zvksed, Zvksh, Zvbb, Zvbc, and Zvkt.">;
+def FeatureStdExtZvknhb
+ : SubtargetFeature<"experimental-zvknhb", "HasStdExtZvknhb", "true",
+ "'Zvknhb' (Vector SHA-2 (SHA-256 and SHA-512))",
+ [FeatureStdExtZve64x]>;
+def HasStdExtZvknhb : Predicate<"Subtarget->hasStdExtZvknhb()">,
+ AssemblerPredicate<(all_of FeatureStdExtZvknhb),
+ "'Zvknhb' (Vector SHA-2 (SHA-256 and SHA-512))">;
-def FeatureStdExtZvksc
- : SubtargetFeature<"experimental-zvksc", "HasStdExtZvksc", "true",
- "This extension is shorthand for the following set of "
- "other extensions: Zvks and Zvbc.">;
+def HasStdExtZvknhaOrZvknhb : Predicate<"Subtarget->hasStdExtZvknha() || Subtarget->hasStdExtZvknhb()">,
+ AssemblerPredicate<(any_of FeatureStdExtZvknha, FeatureStdExtZvknhb),
+ "'Zvknha' or 'Zvknhb' (Vector SHA-2)">;
def FeatureStdExtZvksed
: SubtargetFeature<"experimental-zvksed", "HasStdExtZvksed", "true",
@@ -611,11 +622,6 @@ def HasStdExtZvksed : Predicate<"Subtarget->hasStdExtZvksed()">,
AssemblerPredicate<(all_of FeatureStdExtZvksed),
"'Zvksed' (SM4 Block Cipher Instructions)">;
-def FeatureStdExtZvksg
- : SubtargetFeature<"experimental-zvksg", "HasStdExtZvksg", "true",
- "This extension is shorthand for the following set of "
- "other extensions: Zvks and Zvkg.">;
-
def FeatureStdExtZvksh
: SubtargetFeature<"experimental-zvksh", "HasStdExtZvksh", "true",
"'Zvksh' (SM3 Hash Function Instructions)">;
@@ -627,6 +633,53 @@ def FeatureStdExtZvkt
: SubtargetFeature<"experimental-zvkt", "HasStdExtZvkt", "true",
"'Zvkt' (Vector Data-Independent Execution Latency)">;
+// Zvk short-hand extensions
+
+def FeatureStdExtZvkn
+ : SubtargetFeature<"experimental-zvkn", "HasStdExtZvkn", "true",
+ "This extension is shorthand for the following set of "
+ "other extensions: Zvkned, Zvknhb, Zvkb and Zvkt.",
+ [FeatureStdExtZvkned, FeatureStdExtZvknhb,
+ FeatureStdExtZvkb, FeatureStdExtZvkt]>;
+
+def FeatureStdExtZvknc
+ : SubtargetFeature<"experimental-zvknc", "HasStdExtZvknc", "true",
+ "This extension is shorthand for the following set of "
+ "other extensions: Zvkn and Zvbc.",
+ [FeatureStdExtZvkn, FeatureStdExtZvbc]>;
+
+def FeatureStdExtZvkng
+ : SubtargetFeature<"experimental-zvkng", "HasStdExtZvkng", "true",
+ "This extension is shorthand for the following set of "
+ "other extensions: Zvkn and Zvkg.",
+ [FeatureStdExtZvkn, FeatureStdExtZvkg]>;
+
+def FeatureStdExtZvks
+ : SubtargetFeature<"experimental-zvks", "HasStdExtZvks", "true",
+ "This extension is shorthand for the following set of "
+ "other extensions: Zvksed, Zvksh, Zvkb and Zvkt.",
+ [FeatureStdExtZvksed, FeatureStdExtZvksh,
+ FeatureStdExtZvkb, FeatureStdExtZvkt]>;
+
+def FeatureStdExtZvksc
+ : SubtargetFeature<"experimental-zvksc", "HasStdExtZvksc", "true",
+ "This extension is shorthand for the following set of "
+ "other extensions: Zvks and Zvbc.",
+ [FeatureStdExtZvks, FeatureStdExtZvbc]>;
+
+def FeatureStdExtZvksg
+ : SubtargetFeature<"experimental-zvksg", "HasStdExtZvksg", "true",
+ "This extension is shorthand for the following set of "
+ "other extensions: Zvks and Zvkg.",
+ [FeatureStdExtZvks, FeatureStdExtZvkg]>;
+
+def FeatureStdExtZicfilp
+ : SubtargetFeature<"experimental-zicfilp", "HasStdExtZicfilp", "true",
+ "'Zicfilp' (Landing pad)">;
+def HasStdExtZicfilp : Predicate<"Subtarget->hasStdExtZicfilp()">,
+ AssemblerPredicate<(all_of FeatureStdExtZicfilp),
+ "'Zicfilp' (Landing pad)">;
+
def FeatureStdExtZicond
: SubtargetFeature<"experimental-zicond", "HasStdExtZicond", "true",
"'Zicond' (Integer Conditional Operations)">;
@@ -635,34 +688,25 @@ def HasStdExtZicond : Predicate<"Subtarget->hasStdExtZicond()">,
"'Zicond' (Integer Conditional Operations)">;
def FeatureStdExtSmaia
- : SubtargetFeature<"experimental-smaia", "HasStdExtSmaia", "true",
+ : SubtargetFeature<"smaia", "HasStdExtSmaia", "true",
"'Smaia' (Smaia encompasses all added CSRs and all "
"modifications to interrupt response behavior that the "
"AIA specifies for a hart, over all privilege levels.)",
[]>;
def FeatureStdExtSsaia
- : SubtargetFeature<"experimental-ssaia", "HasStdExtSsaia", "true",
+ : SubtargetFeature<"ssaia", "HasStdExtSsaia", "true",
"'Ssaia' (Ssaia is essentially the same as Smaia except "
"excluding the machine-level CSRs and behavior not "
"directly visible to supervisor level.)", []>;
-def FeatureStdExtZfbfmin
- : SubtargetFeature<"experimental-zfbfmin", "HasStdExtZfbfmin", "true",
- "'Zfbfmin' (Scalar BF16 Converts)",
- [FeatureStdExtF]>;
-def HasStdExtZfbfmin : Predicate<"Subtarget->hasStdExtZfbfmin()">,
- AssemblerPredicate<(all_of FeatureStdExtZfbfmin),
- "'Zfbfmin' (Scalar BF16 Converts)">;
-
def HasHalfFPLoadStoreMove
: Predicate<"Subtarget->hasHalfFPLoadStoreMove()">,
AssemblerPredicate<(any_of FeatureStdExtZfh, FeatureStdExtZfhmin,
- FeatureStdExtZfbfmin, FeatureStdExtZvfbfwma),
+ FeatureStdExtZfbfmin),
"'Zfh' (Half-Precision Floating-Point) or "
"'Zfhmin' (Half-Precision Floating-Point Minimal) or "
- "'Zfbfmin' (Scalar BF16 Converts) or "
- "'Zvfbfwma' (Vector BF16 widening mul-add)">;
+ "'Zfbfmin' (Scalar BF16 Converts)">;
def FeatureStdExtZacas
: SubtargetFeature<"experimental-zacas", "HasStdExtZacas", "true",
@@ -776,6 +820,45 @@ def HasVendorXSfcie : Predicate<"Subtarget->hasVendorXSfcie()">,
AssemblerPredicate<(all_of FeatureVendorXSfcie),
"'XSfcie' (SiFive Custom Instruction Extension SCIE.)">;
+def FeatureVendorXSfvqmaccdod
+ : SubtargetFeature<"xsfvqmaccdod", "HasVendorXSfvqmaccdod", "true",
+ "'XSfvqmaccdod' (SiFive Int8 Matrix Multiplication Instructions (2-by-8 and 8-by-2))",
+ [FeatureStdExtZve32x]>;
+def HasVendorXSfvqmaccdod : Predicate<"Subtarget->hasVendorXSfvqmaccdod()">,
+ AssemblerPredicate<(all_of FeatureVendorXSfvqmaccdod),
+ "'XSfvqmaccdod' (SiFive Int8 Matrix Multiplication Instructions (2-by-8 and 8-by-2))">;
+
+def FeatureVendorXSfvqmaccqoq
+ : SubtargetFeature<"xsfvqmaccqoq", "HasVendorXSfvqmaccqoq", "true",
+ "'XSfvqmaccqoq' (SiFive Int8 Matrix Multiplication Instructions (4-by-8 and 8-by-4))",
+ [FeatureStdExtZve32x]>;
+def HasVendorXSfvqmaccqoq : Predicate<"Subtarget->hasVendorXSfvqmaccqoq()">,
+ AssemblerPredicate<(all_of FeatureVendorXSfvqmaccqoq),
+ "'XSfvqmaccqoq' (SiFive Int8 Matrix Multiplication Instructions (4-by-8 and 8-by-4))">;
+
+def FeatureVendorXSfvfwmaccqqq
+ : SubtargetFeature<"xsfvfwmaccqqq", "HasVendorXSfvfwmaccqqq", "true",
+ "'XSfvfwmaccqqq' (SiFive Matrix Multiply Accumulate Instruction and 4-by-4))",
+ [FeatureStdExtZve32f, FeatureStdExtZvfbfmin]>;
+def HasVendorXSfvfwmaccqqq : Predicate<"Subtarget->hasVendorXSfvfwmaccqqq()">,
+ AssemblerPredicate<(all_of FeatureVendorXSfvfwmaccqqq),
+ "'XSfvfwmaccqqq' (SiFive Matrix Multiply Accumulate Instruction and 4-by-4))">;
+
+def FeatureVendorXSfvfnrclipxfqf
+ : SubtargetFeature<"xsfvfnrclipxfqf", "HasVendorXSfvfnrclipxfqf", "true",
+ "'XSfvfnrclipxfqf' (SiFive FP32-to-int8 Ranged Clip Instructions)",
+ [FeatureStdExtZve32f]>;
+def HasVendorXSfvfnrclipxfqf : Predicate<"Subtarget->hasVendorXSfvfnrclipxfqf()">,
+ AssemblerPredicate<(all_of FeatureVendorXSfvfnrclipxfqf),
+ "'XSfvfnrclipxfqf' (SiFive FP32-to-int8 Ranged Clip Instructions)">;
+def FeatureVendorXCVelw
+ : SubtargetFeature<"xcvelw", "HasVendorXCVelw", "true",
+ "'XCVelw' (CORE-V Event Load Word)">;
+def HasVendorXCVelw
+ : Predicate<"Subtarget->hasVendorXCVelw()">,
+ AssemblerPredicate<(any_of FeatureVendorXCVelw),
+ "'XCVelw' (CORE-V Event Load Word)">;
+
def FeatureVendorXCVbitmanip
: SubtargetFeature<"xcvbitmanip", "HasVendorXCVbitmanip", "true",
"'XCVbitmanip' (CORE-V Bit Manipulation)">;
@@ -790,6 +873,36 @@ def HasVendorXCVmac : Predicate<"Subtarget->hasVendorXCVmac()">,
AssemblerPredicate<(all_of FeatureVendorXCVmac),
"'XCVmac' (CORE-V Multiply-Accumulate)">;
+def FeatureVendorXCVmem
+ : SubtargetFeature<"xcvmem", "HasVendorXCVmem", "true",
+ "'XCVmem' (CORE-V Post-incrementing Load & Store)">;
+def HasVendorXCVmem
+ : Predicate<"Subtarget->hasVendorXCVmem()">,
+ AssemblerPredicate<(any_of FeatureVendorXCVmem),
+ "'XCVmem' (CORE-V Post-incrementing Load & Store)">;
+
+def FeatureVendorXCValu
+ : SubtargetFeature<"xcvalu", "HasVendorXCValu", "true",
+ "'XCValu' (CORE-V ALU Operations)">;
+def HasVendorXCValu : Predicate<"Subtarget->hasVendorXCValu()">,
+ AssemblerPredicate<(all_of FeatureVendorXCValu),
+ "'XCValu' (CORE-V ALU Operations)">;
+
+def FeatureVendorXCVsimd
+ : SubtargetFeature<"xcvsimd", "HasVendorXCvsimd", "true",
+ "'XCVsimd' (CORE-V SIMD ALU)">;
+def HasVendorXCVsimd
+ : Predicate<"Subtarget->hasVendorXCVsimd()">,
+ AssemblerPredicate<(any_of FeatureVendorXCVsimd),
+ "'XCVsimd' (CORE-V SIMD ALU)">;
+
+def FeatureVendorXCVbi
+ : SubtargetFeature<"xcvbi", "HasVendorXCVbi", "true",
+ "'XCVbi' (CORE-V Immediate Branching)">;
+def HasVendorXCVbi : Predicate<"Subtarget->hasVendorXCVbi()">,
+ AssemblerPredicate<(all_of FeatureVendorXCVbi),
+ "'XCVbi' (CORE-V Immediate Branching)">;
+
//===----------------------------------------------------------------------===//
// LLVM specific features and extensions
//===----------------------------------------------------------------------===//
@@ -833,15 +946,13 @@ def FeatureTrailingSeqCstFence : SubtargetFeature<"seq-cst-trailing-fence",
"true",
"Enable trailing fence for seq-cst store.">;
-def FeatureUnalignedScalarMem
- : SubtargetFeature<"unaligned-scalar-mem", "EnableUnalignedScalarMem",
- "true", "Has reasonably performant unaligned scalar "
- "loads and stores">;
+def FeatureFastUnalignedAccess
+ : SubtargetFeature<"fast-unaligned-access", "HasFastUnalignedAccess",
+ "true", "Has reasonably performant unaligned "
+ "loads and stores (both scalar and vector)">;
-def FeatureUnalignedVectorMem
- : SubtargetFeature<"unaligned-vector-mem", "EnableUnalignedVectorMem",
- "true", "Has reasonably performant unaligned vector "
- "loads and stores">;
+def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
+ "UsePostRAScheduler", "true", "Schedule again after register allocation">;
def TuneNoOptimizedZeroStrideLoad
: SubtargetFeature<"no-optimized-zero-stride-load", "HasOptimizedZeroStrideLoad",
@@ -859,6 +970,16 @@ def TuneLUIADDIFusion
: SubtargetFeature<"lui-addi-fusion", "HasLUIADDIFusion",
"true", "Enable LUI+ADDI macrofusion">;
+def TuneAUIPCADDIFusion
+ : SubtargetFeature<"auipc-addi-fusion", "HasAUIPCADDIFusion",
+ "true", "Enable AUIPC+ADDI macrofusion">;
+def TuneShiftedZExtFusion
+ : SubtargetFeature<"shifted-zext-fusion", "HasShiftedZExtFusion",
+ "true", "Enable SLLI+SRLI to be fused when computing (shifted) zero extension">;
+def TuneLDADDFusion
+ : SubtargetFeature<"ld-add-fusion", "HasLDADDFusion",
+ "true", "Enable LD+ADD macrofusion.">;
+
def TuneNoDefaultUnroll
: SubtargetFeature<"no-default-unroll", "EnableDefaultUnroll", "false",
"Disable default unroll preference.">;
@@ -876,6 +997,13 @@ def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7",
[TuneNoDefaultUnroll,
TuneShortForwardBranchOpt]>;
+def TuneVeyronFusions : SubtargetFeature<"ventana-veyron", "RISCVProcFamily", "VentanaVeyron",
+ "Ventana Veyron-Series processors",
+ [TuneLUIADDIFusion,
+ TuneAUIPCADDIFusion,
+ TuneShiftedZExtFusion,
+ TuneLDADDFusion]>;
+
// Assume that lock-free native-width atomics are available, even if the target
// and operating system combination would not usually provide them. The user
// is responsible for providing any necessary __sync implementations. Code
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp
new file mode 100644
index 000000000000..6ee006525df5
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp
@@ -0,0 +1,216 @@
+//===- RISCVFoldMasks.cpp - MI Vector Pseudo Mask Peepholes ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+//
+// This pass performs various peephole optimisations that fold masks into vector
+// pseudo instructions after instruction selection.
+//
+// Currently it converts
+// PseudoVMERGE_VVM %false, %false, %true, %allonesmask, %vl, %sew
+// ->
+// PseudoVMV_V_V %false, %true, %vl, %sew
+//
+//===---------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVISelDAGToDAG.h"
+#include "RISCVSubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-fold-masks"
+
+namespace {
+
+class RISCVFoldMasks : public MachineFunctionPass {
+public:
+ static char ID;
+ const TargetInstrInfo *TII;
+ MachineRegisterInfo *MRI;
+ const TargetRegisterInfo *TRI;
+ RISCVFoldMasks() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::IsSSA);
+ }
+
+ StringRef getPassName() const override { return "RISC-V Fold Masks"; }
+
+private:
+ bool convertToUnmasked(MachineInstr &MI, MachineInstr *MaskDef);
+ bool convertVMergeToVMv(MachineInstr &MI, MachineInstr *MaskDef);
+
+ bool isAllOnesMask(MachineInstr *MaskDef);
+};
+
+} // namespace
+
+char RISCVFoldMasks::ID = 0;
+
+INITIALIZE_PASS(RISCVFoldMasks, DEBUG_TYPE, "RISC-V Fold Masks", false, false)
+
+bool RISCVFoldMasks::isAllOnesMask(MachineInstr *MaskDef) {
+ if (!MaskDef)
+ return false;
+ assert(MaskDef->isCopy() && MaskDef->getOperand(0).getReg() == RISCV::V0);
+ Register SrcReg = TRI->lookThruCopyLike(MaskDef->getOperand(1).getReg(), MRI);
+ if (!SrcReg.isVirtual())
+ return false;
+ MaskDef = MRI->getVRegDef(SrcReg);
+ if (!MaskDef)
+ return false;
+
+ // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
+ // undefined behaviour if it's the wrong bitwidth, so we could choose to
+ // assume that it's all-ones? Same applies to its VL.
+ switch (MaskDef->getOpcode()) {
+ case RISCV::PseudoVMSET_M_B1:
+ case RISCV::PseudoVMSET_M_B2:
+ case RISCV::PseudoVMSET_M_B4:
+ case RISCV::PseudoVMSET_M_B8:
+ case RISCV::PseudoVMSET_M_B16:
+ case RISCV::PseudoVMSET_M_B32:
+ case RISCV::PseudoVMSET_M_B64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+// Transform (VMERGE_VVM_<LMUL> false, false, true, allones, vl, sew) to
+// (VMV_V_V_<LMUL> false, true, vl, sew). It may decrease uses of VMSET.
+bool RISCVFoldMasks::convertVMergeToVMv(MachineInstr &MI, MachineInstr *V0Def) {
+#define CASE_VMERGE_TO_VMV(lmul) \
+ case RISCV::PseudoVMERGE_VVM_##lmul: \
+ NewOpc = RISCV::PseudoVMV_V_V_##lmul; \
+ break;
+ unsigned NewOpc;
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ CASE_VMERGE_TO_VMV(MF8)
+ CASE_VMERGE_TO_VMV(MF4)
+ CASE_VMERGE_TO_VMV(MF2)
+ CASE_VMERGE_TO_VMV(M1)
+ CASE_VMERGE_TO_VMV(M2)
+ CASE_VMERGE_TO_VMV(M4)
+ CASE_VMERGE_TO_VMV(M8)
+ }
+
+ Register MergeReg = MI.getOperand(1).getReg();
+ Register FalseReg = MI.getOperand(2).getReg();
+ // Check merge == false (or merge == undef)
+ if (MergeReg != RISCV::NoRegister && TRI->lookThruCopyLike(MergeReg, MRI) !=
+ TRI->lookThruCopyLike(FalseReg, MRI))
+ return false;
+
+ assert(MI.getOperand(4).isReg() && MI.getOperand(4).getReg() == RISCV::V0);
+ if (!isAllOnesMask(V0Def))
+ return false;
+
+ MI.setDesc(TII->get(NewOpc));
+ MI.removeOperand(1); // Merge operand
+ MI.tieOperands(0, 1); // Tie false to dest
+ MI.removeOperand(3); // Mask operand
+ MI.addOperand(
+ MachineOperand::CreateImm(RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED));
+
+ // vmv.v.v doesn't have a mask operand, so we may be able to inflate the
+ // register class for the destination and merge operands e.g. VRNoV0 -> VR
+ MRI->recomputeRegClass(MI.getOperand(0).getReg());
+ MRI->recomputeRegClass(MI.getOperand(1).getReg());
+ return true;
+}
+
+bool RISCVFoldMasks::convertToUnmasked(MachineInstr &MI,
+ MachineInstr *MaskDef) {
+ const RISCV::RISCVMaskedPseudoInfo *I =
+ RISCV::getMaskedPseudoInfo(MI.getOpcode());
+ if (!I)
+ return false;
+
+ if (!isAllOnesMask(MaskDef))
+ return false;
+
+ // There are two classes of pseudos in the table - compares and
+ // everything else. See the comment on RISCVMaskedPseudo for details.
+ const unsigned Opc = I->UnmaskedPseudo;
+ const MCInstrDesc &MCID = TII->get(Opc);
+ const bool HasPolicyOp = RISCVII::hasVecPolicyOp(MCID.TSFlags);
+ const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MCID);
+#ifndef NDEBUG
+ const MCInstrDesc &MaskedMCID = TII->get(MI.getOpcode());
+ assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) ==
+ RISCVII::hasVecPolicyOp(MCID.TSFlags) &&
+ "Masked and unmasked pseudos are inconsistent");
+ assert(HasPolicyOp == HasPassthru && "Unexpected pseudo structure");
+#endif
+ (void)HasPolicyOp;
+
+ MI.setDesc(MCID);
+
+ // TODO: Increment all MaskOpIdxs in tablegen by num of explicit defs?
+ unsigned MaskOpIdx = I->MaskOpIdx + MI.getNumExplicitDefs();
+ MI.removeOperand(MaskOpIdx);
+
+ // The unmasked pseudo will no longer be constrained to the vrnov0 reg class,
+ // so try and relax it to vr.
+ MRI->recomputeRegClass(MI.getOperand(0).getReg());
+ unsigned PassthruOpIdx = MI.getNumExplicitDefs();
+ if (HasPassthru) {
+ if (MI.getOperand(PassthruOpIdx).getReg() != RISCV::NoRegister)
+ MRI->recomputeRegClass(MI.getOperand(PassthruOpIdx).getReg());
+ } else
+ MI.removeOperand(PassthruOpIdx);
+
+ return true;
+}
+
+bool RISCVFoldMasks::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ // Skip if the vector extension is not enabled.
+ const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
+ if (!ST.hasVInstructions())
+ return false;
+
+ TII = ST.getInstrInfo();
+ MRI = &MF.getRegInfo();
+ TRI = MRI->getTargetRegisterInfo();
+
+ bool Changed = false;
+
+ // Masked pseudos coming out of isel will have their mask operand in the form:
+ //
+ // $v0:vr = COPY %mask:vr
+ // %x:vr = Pseudo_MASK %a:vr, %b:br, $v0:vr
+ //
+ // Because $v0 isn't in SSA, keep track of it so we can check the mask operand
+ // on each pseudo.
+ MachineInstr *CurrentV0Def;
+ for (MachineBasicBlock &MBB : MF) {
+ CurrentV0Def = nullptr;
+ for (MachineInstr &MI : MBB) {
+ Changed |= convertToUnmasked(MI, CurrentV0Def);
+ Changed |= convertVMergeToVMv(MI, CurrentV0Def);
+
+ if (MI.definesRegister(RISCV::V0, TRI))
+ CurrentV0Def = &MI;
+ }
+ }
+
+ return Changed;
+}
+
+FunctionPass *llvm::createRISCVFoldMasksPass() { return new RISCVFoldMasks(); }
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index f312cc8129dd..8dfea6d38620 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -226,37 +226,38 @@ getRestoreLibCallName(const MachineFunction &MF,
return RestoreLibCalls[LibCallID];
}
-// Return encoded value for PUSH/POP instruction, representing
-// registers to store/load.
-static unsigned getPushPopEncoding(const Register MaxReg) {
+// Return encoded value and register count for PUSH/POP instruction,
+// representing registers to store/load.
+static std::pair<unsigned, unsigned>
+getPushPopEncodingAndNum(const Register MaxReg) {
switch (MaxReg) {
default:
llvm_unreachable("Unexpected Reg for Push/Pop Inst");
case RISCV::X27: /*s11*/
case RISCV::X26: /*s10*/
- return llvm::RISCVZC::RLISTENCODE::RA_S0_S11;
+ return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S11, 13);
case RISCV::X25: /*s9*/
- return llvm::RISCVZC::RLISTENCODE::RA_S0_S9;
+ return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S9, 11);
case RISCV::X24: /*s8*/
- return llvm::RISCVZC::RLISTENCODE::RA_S0_S8;
+ return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S8, 10);
case RISCV::X23: /*s7*/
- return llvm::RISCVZC::RLISTENCODE::RA_S0_S7;
+ return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S7, 9);
case RISCV::X22: /*s6*/
- return llvm::RISCVZC::RLISTENCODE::RA_S0_S6;
+ return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S6, 8);
case RISCV::X21: /*s5*/
- return llvm::RISCVZC::RLISTENCODE::RA_S0_S5;
+ return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S5, 7);
case RISCV::X20: /*s4*/
- return llvm::RISCVZC::RLISTENCODE::RA_S0_S4;
+ return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S4, 6);
case RISCV::X19: /*s3*/
- return llvm::RISCVZC::RLISTENCODE::RA_S0_S3;
+ return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S3, 5);
case RISCV::X18: /*s2*/
- return llvm::RISCVZC::RLISTENCODE::RA_S0_S2;
+ return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S2, 4);
case RISCV::X9: /*s1*/
- return llvm::RISCVZC::RLISTENCODE::RA_S0_S1;
+ return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S1, 3);
case RISCV::X8: /*s0*/
- return llvm::RISCVZC::RLISTENCODE::RA_S0;
+ return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0, 2);
case RISCV::X1: /*ra*/
- return llvm::RISCVZC::RLISTENCODE::RA;
+ return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA, 1);
}
}
@@ -265,9 +266,10 @@ static Register getMaxPushPopReg(const MachineFunction &MF,
const std::vector<CalleeSavedInfo> &CSI) {
Register MaxPushPopReg = RISCV::NoRegister;
for (auto &CS : CSI) {
- Register Reg = CS.getReg();
- if (RISCV::PGPRRegClass.contains(Reg))
- MaxPushPopReg = std::max(MaxPushPopReg.id(), Reg.id());
+ // RISCVRegisterInfo::hasReservedSpillSlot assigns negative frame indices to
+ // registers which can be saved by Zcmp Push.
+ if (CS.getFrameIdx() < 0)
+ MaxPushPopReg = std::max(MaxPushPopReg.id(), CS.getReg().id());
}
// if rlist is {rs, s0-s10}, then s11 will also be included
if (MaxPushPopReg == RISCV::X26)
@@ -275,16 +277,6 @@ static Register getMaxPushPopReg(const MachineFunction &MF,
return MaxPushPopReg;
}
-static uint64_t adjSPInPushPop(MachineBasicBlock::iterator MBBI,
- unsigned RequiredStack, unsigned FreePushStack,
- bool IsPop) {
- if (FreePushStack > RequiredStack)
- RequiredStack = 0;
- unsigned Spimm = std::min(RequiredStack, 48u);
- MBBI->getOperand(1).setImm(Spimm);
- return alignTo(RequiredStack - Spimm, 16);
-}
-
// Return true if the specified function should have a dedicated frame
// pointer register. This is true if frame pointer elimination is
// disabled, if it needs dynamic stack realignment, if the function has
@@ -514,8 +506,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
// FIXME (note copied from Lanai): This appears to be overallocating. Needs
// investigation. Get the number of bytes to allocate from the FrameInfo.
uint64_t StackSize = getStackSizeWithRVVPadding(MF);
- uint64_t RealStackSize =
- StackSize + RVFI->getLibCallStackSize() + RVFI->getRVPushStackSize();
+ uint64_t RealStackSize = StackSize + RVFI->getReservedSpillsSize();
uint64_t RVVStackSize = RVFI->getRVVStackSize();
// Early exit if there is no need to allocate on the stack
@@ -535,13 +526,13 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
RealStackSize = FirstSPAdjustAmount;
}
- if (RVFI->isPushable(MF) && FirstFrameSetup->getOpcode() == RISCV::CM_PUSH) {
+ if (RVFI->isPushable(MF) && FirstFrameSetup != MBB.end() &&
+ FirstFrameSetup->getOpcode() == RISCV::CM_PUSH) {
// Use available stack adjustment in push instruction to allocate additional
// stack space.
- unsigned PushStack = RVFI->getRVPushRegs() * (STI.getXLen() / 8);
- unsigned SpImmBase = RVFI->getRVPushStackSize();
- StackSize = adjSPInPushPop(FirstFrameSetup, StackSize,
- (SpImmBase - PushStack), true);
+ uint64_t Spimm = std::min(StackSize, (uint64_t)48);
+ FirstFrameSetup->getOperand(1).setImm(Spimm);
+ StackSize -= Spimm;
}
if (StackSize != 0) {
@@ -584,8 +575,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
Offset = FrameIdx * (int64_t)STI.getXLen() / 8;
}
} else {
- Offset = MFI.getObjectOffset(Entry.getFrameIdx()) -
- RVFI->getLibCallStackSize();
+ Offset = MFI.getObjectOffset(FrameIdx) - RVFI->getReservedSpillsSize();
}
Register Reg = Entry.getReg();
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
@@ -730,8 +720,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
LastFrameDestroy = std::prev(MBBI, CSI.size());
uint64_t StackSize = getStackSizeWithRVVPadding(MF);
- uint64_t RealStackSize =
- StackSize + RVFI->getLibCallStackSize() + RVFI->getRVPushStackSize();
+ uint64_t RealStackSize = StackSize + RVFI->getReservedSpillsSize();
uint64_t FPOffset = RealStackSize - RVFI->getVarArgsSaveSize();
uint64_t RVVStackSize = RVFI->getRVVStackSize();
@@ -776,9 +765,9 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
MBBI->getOpcode() == RISCV::CM_POP) {
// Use available stack adjustment in pop instruction to deallocate stack
// space.
- unsigned PushStack = RVFI->getRVPushRegs() * (STI.getXLen() / 8);
- unsigned SpImmBase = RVFI->getRVPushStackSize();
- StackSize = adjSPInPushPop(MBBI, StackSize, (SpImmBase - PushStack), true);
+ uint64_t Spimm = std::min(StackSize, (uint64_t)48);
+ MBBI->getOperand(1).setImm(Spimm);
+ StackSize -= Spimm;
}
// Deallocate stack
@@ -882,7 +871,7 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
if (FrameReg == getFPReg(STI)) {
Offset += StackOffset::getFixed(RVFI->getVarArgsSaveSize());
if (FI >= 0)
- Offset -= StackOffset::getFixed(RVFI->getLibCallStackSize());
+ Offset -= StackOffset::getFixed(RVFI->getReservedSpillsSize());
// When using FP to access scalable vector objects, we need to minus
// the frame size.
//
@@ -950,8 +939,7 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
assert(!RI->hasStackRealignment(MF) &&
"Can't index across variable sized realign");
Offset += StackOffset::get(getStackSizeWithRVVPadding(MF) +
- RVFI->getLibCallStackSize() +
- RVFI->getRVPushStackSize(),
+ RVFI->getReservedSpillsSize(),
RVFI->getRVVStackSize());
} else {
Offset += StackOffset::getFixed(MFI.getStackSize());
@@ -993,11 +981,11 @@ void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF,
RISCV::X5, RISCV::X6, RISCV::X7, /* t0-t2 */
RISCV::X10, RISCV::X11, /* a0-a1, a2-a7 */
RISCV::X12, RISCV::X13, RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17,
- RISCV::X28, RISCV::X29, RISCV::X30, RISCV::X31, 0 /* t3-t6 */
+ RISCV::X28, RISCV::X29, RISCV::X30, RISCV::X31 /* t3-t6 */
};
- for (unsigned i = 0; CSRegs[i]; ++i)
- SavedRegs.set(CSRegs[i]);
+ for (auto Reg : CSRegs)
+ SavedRegs.set(Reg);
if (MF.getSubtarget<RISCVSubtarget>().hasStdExtF()) {
@@ -1277,7 +1265,8 @@ MachineBasicBlock::iterator RISCVFrameLowering::eliminateCallFramePseudoInstr(
// We would like to split the SP adjustment to reduce prologue/epilogue
// as following instructions. In this way, the offset of the callee saved
-// register could fit in a single store.
+// register could fit in a single store. Supposed that the first sp adjust
+// amount is 2032.
// add sp,sp,-2032
// sw ra,2028(sp)
// sw s0,2024(sp)
@@ -1295,19 +1284,60 @@ RISCVFrameLowering::getFirstSPAdjustAmount(const MachineFunction &MF) const {
// Disable SplitSPAdjust if save-restore libcall is used. The callee-saved
// registers will be pushed by the save-restore libcalls, so we don't have to
// split the SP adjustment in this case.
- if (RVFI->getLibCallStackSize() || RVFI->getRVPushStackSize())
+ if (RVFI->getReservedSpillsSize())
return 0;
// Return the FirstSPAdjustAmount if the StackSize can not fit in a signed
// 12-bit and there exists a callee-saved register needing to be pushed.
if (!isInt<12>(StackSize) && (CSI.size() > 0)) {
- // FirstSPAdjustAmount is chosen as (2048 - StackAlign) because 2048 will
- // cause sp = sp + 2048 in the epilogue to be split into multiple
+ // FirstSPAdjustAmount is chosen at most as (2048 - StackAlign) because
+ // 2048 will cause sp = sp + 2048 in the epilogue to be split into multiple
// instructions. Offsets smaller than 2048 can fit in a single load/store
// instruction, and we have to stick with the stack alignment. 2048 has
// 16-byte alignment. The stack alignment for RV32 and RV64 is 16 and for
// RV32E it is 4. So (2048 - StackAlign) will satisfy the stack alignment.
- return 2048 - getStackAlign().value();
+ const uint64_t StackAlign = getStackAlign().value();
+
+ // Amount of (2048 - StackAlign) will prevent callee saved and restored
+ // instructions be compressed, so try to adjust the amount to the largest
+ // offset that stack compression instructions accept when target supports
+ // compression instructions.
+ if (STI.hasStdExtCOrZca()) {
+ // The compression extensions may support the following instructions:
+ // riscv32: c.lwsp rd, offset[7:2] => 2^(6 + 2)
+ // c.swsp rs2, offset[7:2] => 2^(6 + 2)
+ // c.flwsp rd, offset[7:2] => 2^(6 + 2)
+ // c.fswsp rs2, offset[7:2] => 2^(6 + 2)
+ // riscv64: c.ldsp rd, offset[8:3] => 2^(6 + 3)
+ // c.sdsp rs2, offset[8:3] => 2^(6 + 3)
+ // c.fldsp rd, offset[8:3] => 2^(6 + 3)
+ // c.fsdsp rs2, offset[8:3] => 2^(6 + 3)
+ const uint64_t RVCompressLen = STI.getXLen() * 8;
+ // Compared with amount (2048 - StackAlign), StackSize needs to
+ // satisfy the following conditions to avoid using more instructions
+ // to adjust the sp after adjusting the amount, such as
+ // StackSize meets the condition (StackSize <= 2048 + RVCompressLen),
+ // case1: Amount is 2048 - StackAlign: use addi + addi to adjust sp.
+ // case2: Amount is RVCompressLen: use addi + addi to adjust sp.
+ auto CanCompress = [&](uint64_t CompressLen) -> bool {
+ if (StackSize <= 2047 + CompressLen ||
+ (StackSize > 2048 * 2 - StackAlign &&
+ StackSize <= 2047 * 2 + CompressLen) ||
+ StackSize > 2048 * 3 - StackAlign)
+ return true;
+
+ return false;
+ };
+ // In the epilogue, addi sp, sp, 496 is used to recover the sp and it
+ // can be compressed(C.ADDI16SP, offset can be [-512, 496]), but
+ // addi sp, sp, 512 can not be compressed. So try to use 496 first.
+ const uint64_t ADDI16SPCompressLen = 496;
+ if (STI.is64Bit() && CanCompress(ADDI16SPCompressLen))
+ return ADDI16SPCompressLen;
+ if (CanCompress(RVCompressLen))
+ return RVCompressLen;
+ }
+ return 2048 - StackAlign;
}
return 0;
}
@@ -1328,14 +1358,12 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters(
RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
if (RVFI->isPushable(*MF)) {
Register MaxReg = getMaxPushPopReg(*MF, CSI);
- unsigned PushedRegNum =
- getPushPopEncoding(MaxReg) - llvm::RISCVZC::RLISTENCODE::RA + 1;
- RVFI->setRVPushRegs(PushedRegNum);
- RVFI->setRVPushStackSize(alignTo((STI.getXLen() / 8) * PushedRegNum, 16));
-
if (MaxReg != RISCV::NoRegister) {
+ auto [RegEnc, PushedRegNum] = getPushPopEncodingAndNum(MaxReg);
+ RVFI->setRVPushRegs(PushedRegNum);
+ RVFI->setRVPushStackSize(alignTo((STI.getXLen() / 8) * PushedRegNum, 16));
+
// Use encoded number to represent registers to spill.
- unsigned RegEnc = getPushPopEncoding(MaxReg);
RVFI->setRVPushRlist(RegEnc);
MachineInstrBuilder PushBuilder =
BuildMI(MBB, MI, DL, TII.get(RISCV::CM_PUSH))
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h
index 79adc83e8d65..9bc100981f2f 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h
@@ -61,7 +61,7 @@ public:
const TargetRegisterInfo *TRI) const override;
// Get the first stack adjustment amount for SplitSPAdjust.
- // Return 0 if we don't want to to split the SP adjustment in prologue and
+ // Return 0 if we don't want to split the SP adjustment in prologue and
// epilogue.
uint64_t getFirstSPAdjustAmount(const MachineFunction &MF) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVGISel.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVGISel.td
new file mode 100644
index 000000000000..5f16ffb0a024
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVGISel.td
@@ -0,0 +1,159 @@
+//===-- RISCVGIsel.td - RISC-V GlobalISel Patterns ---------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file contains patterns that are relevant to GlobalISel, including
+/// GIComplexOperandMatcher definitions for equivalent SelectionDAG
+/// ComplexPatterns.
+//
+//===----------------------------------------------------------------------===//
+
+include "RISCV.td"
+include "RISCVCombine.td"
+
+def simm12Plus1 : ImmLeaf<XLenVT, [{
+ return (isInt<12>(Imm) && Imm != -2048) || Imm == 2048;}]>;
+def simm12Plus1i32 : ImmLeaf<i32, [{
+ return (isInt<12>(Imm) && Imm != -2048) || Imm == 2048;}]>;
+
+// FIXME: This doesn't check that the G_CONSTANT we're deriving the immediate
+// from is only used once
+def simm12Minus1Nonzero : ImmLeaf<XLenVT, [{
+ return (Imm >= -2049 && Imm < 0) || (Imm > 0 && Imm <= 2046);}]>;
+
+def simm12Minus1NonzeroNonNeg1 : ImmLeaf<XLenVT, [{
+ return (Imm >= -2049 && Imm < -1) || (Imm > 0 && Imm <= 2046);}]>;
+
+// Return an immediate value plus 1.
+def ImmPlus1 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getSExtValue() + 1, SDLoc(N),
+ N->getValuePtrVTpe(0));}]>;
+
+def GINegImm : GICustomOperandRenderer<"renderNegImm">,
+ GISDNodeXFormEquiv<NegImm>;
+
+def GIImmSubFromXLen : GICustomOperandRenderer<"renderImmSubFromXLen">,
+ GISDNodeXFormEquiv<ImmSubFromXLen>;
+def GIImmSubFrom32 : GICustomOperandRenderer<"renderImmSubFrom32">,
+ GISDNodeXFormEquiv<ImmSubFrom32>;
+
+def GIImmPlus1 :
+ GICustomOperandRenderer<"renderImmPlus1">,
+ GISDNodeXFormEquiv<ImmPlus1>;
+
+def GIAddrRegImm :
+ GIComplexOperandMatcher<s32, "selectAddrRegImm">,
+ GIComplexPatternEquiv<AddrRegImm>;
+
+def gi_as_i64imm : GICustomOperandRenderer<"renderImm">,
+ GISDNodeXFormEquiv<as_i64imm>;
+
+def gi_trailing_zero : GICustomOperandRenderer<"renderTrailingZeros">,
+ GISDNodeXFormEquiv<TrailingZeros>;
+
+// FIXME: This is labelled as handling 's32', however the ComplexPattern it
+// refers to handles both i32 and i64 based on the HwMode. Currently this LLT
+// parameter appears to be ignored so this pattern works for both, however we
+// should add a LowLevelTypeByHwMode, and use that to define our XLenLLT instead
+// here.
+def GIShiftMaskXLen :
+ GIComplexOperandMatcher<s32, "selectShiftMask">,
+ GIComplexPatternEquiv<shiftMaskXLen>;
+def GIShiftMask32 :
+ GIComplexOperandMatcher<s32, "selectShiftMask">,
+ GIComplexPatternEquiv<shiftMask32>;
+
+def gi_sh1add_op : GIComplexOperandMatcher<s32, "selectSHXADDOp<1>">,
+ GIComplexPatternEquiv<sh1add_op>;
+def gi_sh2add_op : GIComplexOperandMatcher<s32, "selectSHXADDOp<2>">,
+ GIComplexPatternEquiv<sh2add_op>;
+def gi_sh3add_op : GIComplexOperandMatcher<s32, "selectSHXADDOp<3>">,
+ GIComplexPatternEquiv<sh3add_op>;
+
+def gi_sh1add_uw_op : GIComplexOperandMatcher<s32, "selectSHXADD_UWOp<1>">,
+ GIComplexPatternEquiv<sh1add_uw_op>;
+def gi_sh2add_uw_op : GIComplexOperandMatcher<s32, "selectSHXADD_UWOp<2>">,
+ GIComplexPatternEquiv<sh2add_uw_op>;
+def gi_sh3add_uw_op : GIComplexOperandMatcher<s32, "selectSHXADD_UWOp<3>">,
+ GIComplexPatternEquiv<sh3add_uw_op>;
+
+// FIXME: Canonicalize (sub X, C) -> (add X, -C) earlier.
+def : Pat<(XLenVT (sub GPR:$rs1, simm12Plus1:$imm)),
+ (ADDI GPR:$rs1, (NegImm simm12Plus1:$imm))>;
+
+let Predicates = [IsRV64] in {
+def : Pat<(i32 (sub GPR:$rs1, simm12Plus1i32:$imm)),
+ (ADDIW GPR:$rs1, (i64 (NegImm $imm)))>;
+
+def : Pat<(i32 (shl GPR:$rs1, (i32 GPR:$rs2))), (SLLW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(i32 (sra GPR:$rs1, (i32 GPR:$rs2))), (SRAW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(i32 (srl GPR:$rs1, (i32 GPR:$rs2))), (SRLW GPR:$rs1, GPR:$rs2)>;
+}
+
+// Ptr type used in patterns with GlobalISelEmitter
+def PtrVT : PtrValueTypeByHwMode<XLenVT, 0>;
+
+// Define pattern expansions for pointer ult/slt conditional codes
+def : Pat<(XLenVT (setult (PtrVT GPR:$rs1), simm12:$imm12)),
+ (SLTIU GPR:$rs1, simm12:$imm12)>;
+def : Pat<(XLenVT (setult (PtrVT GPR:$rs1), (PtrVT GPR:$rs2))),
+ (SLTU GPR:$rs1, GPR:$rs2)>;
+def : Pat<(XLenVT (setlt (PtrVT GPR:$rs1), simm12:$imm12)),
+ (SLTI GPR:$rs1, simm12:$imm12)>;
+def : Pat<(XLenVT (setlt (PtrVT GPR:$rs1), (PtrVT GPR:$rs2))),
+ (SLT GPR:$rs1, GPR:$rs2)>;
+
+// Define pattern expansions for setcc operations that aren't directly
+// handled by a RISC-V instruction.
+foreach Ty = [PtrVT, XLenVT] in {
+def : Pat<(XLenVT (seteq (Ty GPR:$rs1), (Ty 0))), (SLTIU GPR:$rs1, 1)>;
+def : Pat<(XLenVT (seteq (Ty GPR:$rs1), (Ty simm12Plus1:$imm12))),
+ (SLTIU (ADDI GPR:$rs1, (NegImm simm12Plus1:$imm12)), 1)>;
+def : Pat<(XLenVT (seteq (Ty GPR:$rs1), (Ty GPR:$rs2))),
+ (SLTIU (XOR GPR:$rs1, GPR:$rs2), 1)>;
+def : Pat<(XLenVT (setne (Ty GPR:$rs1), (Ty 0))), (SLTU (XLenVT X0), GPR:$rs1)>;
+def : Pat<(XLenVT (setne (Ty GPR:$rs1), (Ty simm12Plus1:$imm12))),
+ (SLTU (XLenVT X0), (ADDI GPR:$rs1, (NegImm simm12Plus1:$imm12)))>;
+def : Pat<(XLenVT (setne (Ty GPR:$rs1), (Ty GPR:$rs2))),
+ (SLTU (XLenVT X0), (XOR GPR:$rs1, GPR:$rs2))>;
+def : Pat<(XLenVT (setugt (Ty GPR:$rs1), (Ty simm12Minus1NonzeroNonNeg1:$imm))),
+ (XORI (SLTIU GPR:$rs1,
+ (ImmPlus1 simm12Minus1NonzeroNonNeg1:$imm)), 1)>;
+def : Pat<(XLenVT (setugt (Ty GPR:$rs1), (Ty GPR:$rs2))),
+ (SLTU GPR:$rs2, GPR:$rs1)>;
+def : Pat<(XLenVT (setgt (Ty GPR:$rs1), (Ty simm12Minus1Nonzero:$imm))),
+ (XORI (SLTI GPR:$rs1, (ImmPlus1 simm12Minus1Nonzero:$imm)), 1)>;
+def : Pat<(XLenVT (setgt (Ty GPR:$rs1), (Ty GPR:$rs2))),
+ (SLT GPR:$rs2, GPR:$rs1)>;
+def : Pat<(XLenVT (setuge (XLenVT GPR:$rs1), (Ty simm12:$imm))),
+ (XORI (SLTIU GPR:$rs1, simm12:$imm), 1)>;
+def : Pat<(XLenVT (setuge (Ty GPR:$rs1), (Ty GPR:$rs2))),
+ (XORI (SLTU GPR:$rs1, GPR:$rs2), 1)>;
+def : Pat<(XLenVT (setge (Ty GPR:$rs1), (Ty simm12:$imm))),
+ (XORI (SLTI GPR:$rs1, simm12:$imm), 1)>;
+def : Pat<(XLenVT (setge (Ty GPR:$rs1), (Ty GPR:$rs2))),
+ (XORI (SLT GPR:$rs1, GPR:$rs2), 1)>;
+def : Pat<(XLenVT (setule (Ty GPR:$rs1), (Ty simm12Minus1NonzeroNonNeg1:$imm))),
+ (SLTIU GPR:$rs1, (ImmPlus1 simm12Minus1NonzeroNonNeg1:$imm))>;
+def : Pat<(XLenVT (setule (Ty GPR:$rs1), (Ty GPR:$rs2))),
+ (XORI (SLTU GPR:$rs2, GPR:$rs1), 1)>;
+def : Pat<(XLenVT (setle (Ty GPR:$rs1), (Ty simm12Minus1Nonzero:$imm))),
+ (SLTI GPR:$rs1, (ImmPlus1 simm12Minus1Nonzero:$imm))>;
+def : Pat<(XLenVT (setle (Ty GPR:$rs1), (Ty GPR:$rs2))),
+ (XORI (SLT GPR:$rs2, GPR:$rs1), 1)>;
+}
+
+let Predicates = [IsRV32] in {
+def : LdPat<load, LW, PtrVT>;
+def : StPat<store, SW, GPR, PtrVT>;
+}
+
+let Predicates = [IsRV64] in {
+def : LdPat<load, LD, PtrVT>;
+def : StPat<store, SD, GPR, PtrVT>;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
index b9c69a966b4a..5ad1e082344e 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
@@ -67,7 +67,7 @@ private:
bool tryCreateStridedLoadStore(IntrinsicInst *II, Type *DataType, Value *Ptr,
Value *AlignOp);
- std::pair<Value *, Value *> determineBaseAndStride(GetElementPtrInst *GEP,
+ std::pair<Value *, Value *> determineBaseAndStride(Instruction *Ptr,
IRBuilderBase &Builder);
bool matchStridedRecurrence(Value *Index, Loop *L, Value *&Stride,
@@ -321,9 +321,19 @@ bool RISCVGatherScatterLowering::matchStridedRecurrence(Value *Index, Loop *L,
}
std::pair<Value *, Value *>
-RISCVGatherScatterLowering::determineBaseAndStride(GetElementPtrInst *GEP,
+RISCVGatherScatterLowering::determineBaseAndStride(Instruction *Ptr,
IRBuilderBase &Builder) {
+ // A gather/scatter of a splat is a zero strided load/store.
+ if (auto *BasePtr = getSplatValue(Ptr)) {
+ Type *IntPtrTy = DL->getIntPtrType(BasePtr->getType());
+ return std::make_pair(BasePtr, ConstantInt::get(IntPtrTy, 0));
+ }
+
+ auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+ if (!GEP)
+ return std::make_pair(nullptr, nullptr);
+
auto I = StridedAddrs.find(GEP);
if (I != StridedAddrs.end())
return I->second;
@@ -331,8 +341,12 @@ RISCVGatherScatterLowering::determineBaseAndStride(GetElementPtrInst *GEP,
SmallVector<Value *, 2> Ops(GEP->operands());
// Base pointer needs to be a scalar.
- if (Ops[0]->getType()->isVectorTy())
- return std::make_pair(nullptr, nullptr);
+ Value *ScalarBase = Ops[0];
+ if (ScalarBase->getType()->isVectorTy()) {
+ ScalarBase = getSplatValue(ScalarBase);
+ if (!ScalarBase)
+ return std::make_pair(nullptr, nullptr);
+ }
std::optional<unsigned> VecOperand;
unsigned TypeScale = 0;
@@ -362,11 +376,19 @@ RISCVGatherScatterLowering::determineBaseAndStride(GetElementPtrInst *GEP,
// We can't extract the stride if the arithmetic is done at a different size
// than the pointer type. Adding the stride later may not wrap correctly.
// Technically we could handle wider indices, but I don't expect that in
- // practice.
+ // practice. Handle one special case here - constants. This simplifies
+ // writing test cases.
Value *VecIndex = Ops[*VecOperand];
Type *VecIntPtrTy = DL->getIntPtrType(GEP->getType());
- if (VecIndex->getType() != VecIntPtrTy)
- return std::make_pair(nullptr, nullptr);
+ if (VecIndex->getType() != VecIntPtrTy) {
+ auto *VecIndexC = dyn_cast<Constant>(VecIndex);
+ if (!VecIndexC)
+ return std::make_pair(nullptr, nullptr);
+ if (VecIndex->getType()->getScalarSizeInBits() > VecIntPtrTy->getScalarSizeInBits())
+ VecIndex = ConstantFoldCastInstruction(Instruction::Trunc, VecIndexC, VecIntPtrTy);
+ else
+ VecIndex = ConstantFoldCastInstruction(Instruction::SExt, VecIndexC, VecIntPtrTy);
+ }
// Handle the non-recursive case. This is what we see if the vectorizer
// decides to use a scalar IV + vid on demand instead of a vector IV.
@@ -379,7 +401,7 @@ RISCVGatherScatterLowering::determineBaseAndStride(GetElementPtrInst *GEP,
Ops[*VecOperand] = Start;
Type *SourceTy = GEP->getSourceElementType();
Value *BasePtr =
- Builder.CreateGEP(SourceTy, Ops[0], ArrayRef(Ops).drop_front());
+ Builder.CreateGEP(SourceTy, ScalarBase, ArrayRef(Ops).drop_front());
// Convert stride to pointer size if needed.
Type *IntPtrTy = DL->getIntPtrType(BasePtr->getType());
@@ -415,7 +437,7 @@ RISCVGatherScatterLowering::determineBaseAndStride(GetElementPtrInst *GEP,
Ops[*VecOperand] = BasePhi;
Type *SourceTy = GEP->getSourceElementType();
Value *BasePtr =
- Builder.CreateGEP(SourceTy, Ops[0], ArrayRef(Ops).drop_front());
+ Builder.CreateGEP(SourceTy, ScalarBase, ArrayRef(Ops).drop_front());
// Final adjustments to stride should go in the start block.
Builder.SetInsertPoint(
@@ -448,17 +470,17 @@ bool RISCVGatherScatterLowering::tryCreateStridedLoadStore(IntrinsicInst *II,
if (!TLI->isTypeLegal(DataTypeVT))
return false;
- // Pointer should be a GEP.
- auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
- if (!GEP)
+ // Pointer should be an instruction.
+ auto *PtrI = dyn_cast<Instruction>(Ptr);
+ if (!PtrI)
return false;
- LLVMContext &Ctx = GEP->getContext();
+ LLVMContext &Ctx = PtrI->getContext();
IRBuilder<InstSimplifyFolder> Builder(Ctx, *DL);
- Builder.SetInsertPoint(GEP);
+ Builder.SetInsertPoint(PtrI);
Value *BasePtr, *Stride;
- std::tie(BasePtr, Stride) = determineBaseAndStride(GEP, Builder);
+ std::tie(BasePtr, Stride) = determineBaseAndStride(PtrI, Builder);
if (!BasePtr)
return false;
assert(Stride != nullptr);
@@ -481,8 +503,8 @@ bool RISCVGatherScatterLowering::tryCreateStridedLoadStore(IntrinsicInst *II,
II->replaceAllUsesWith(Call);
II->eraseFromParent();
- if (GEP->use_empty())
- RecursivelyDeleteTriviallyDeadInstructions(GEP);
+ if (PtrI->use_empty())
+ RecursivelyDeleteTriviallyDeadInstructions(PtrI);
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 901204043b3c..09b3ab96974c 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -22,13 +22,18 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include <optional>
using namespace llvm;
#define DEBUG_TYPE "riscv-isel"
#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
+static cl::opt<bool> UsePseudoMovImm(
+ "riscv-use-rematerializable-movimm", cl::Hidden,
+ cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
+ "constant materialization"),
+ cl::init(false));
+
namespace llvm::RISCV {
#define GET_RISCVVSSEGTable_IMPL
#define GET_RISCVVLSEGTable_IMPL
@@ -61,8 +66,11 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() {
VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
SDLoc DL(N);
SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
- Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT),
- N->getOperand(0), VL);
+ SDValue Src = N->getOperand(0);
+ if (VT.isInteger())
+ Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
+ N->getOperand(0));
+ Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
break;
}
case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
@@ -83,7 +91,7 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() {
// Create temporary stack for each expanding node.
SDValue StackSlot =
- CurDAG->CreateStackTemporary(TypeSize::Fixed(8), Align(4));
+ CurDAG->CreateStackTemporary(TypeSize::getFixed(8), Align(8));
int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
@@ -91,7 +99,7 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() {
Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
SDValue OffsetSlot =
- CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL);
+ CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::getFixed(4), DL);
Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
Align(8));
@@ -142,13 +150,25 @@ void RISCVDAGToDAGISel::PostprocessISelDAG() {
continue;
MadeChange |= doPeepholeSExtW(N);
- MadeChange |= doPeepholeMaskedRVV(N);
+
+ // FIXME: This is here only because the VMerge transform doesn't
+ // know how to handle masked true inputs. Once that has been moved
+ // to post-ISEL, this can be deleted as well.
+ MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
}
CurDAG->setRoot(Dummy.getValue());
MadeChange |= doPeepholeMergeVVMFold();
+ // After we're done with everything else, convert IMPLICIT_DEF
+ // passthru operands to NoRegister. This is required to workaround
+ // an optimization deficiency in MachineCSE. This really should
+ // be merged back into each of the patterns (i.e. there's no good
+ // reason not to go directly to NoReg), but is being done this way
+ // to allow easy backporting.
+ MadeChange |= doPeepholeNoRegPassThru();
+
if (MadeChange)
CurDAG->RemoveDeadNodes();
}
@@ -184,28 +204,32 @@ static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
int64_t Imm, const RISCVSubtarget &Subtarget) {
- RISCVMatInt::InstSeq Seq =
- RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());
+ RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget);
- // See if we can create this constant as (ADD (SLLI X, 32), X) where X is at
+ // Use a rematerializable pseudo instruction for short sequences if enabled.
+ if (Seq.size() == 2 && UsePseudoMovImm)
+ return SDValue(
+ CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
+ CurDAG->getTargetConstant(Imm, DL, VT)),
+ 0);
+
+ // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
// worst an LUI+ADDIW. This will require an extra register, but avoids a
// constant pool.
+ // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
+ // low and high 32 bits are the same and bit 31 and 63 are set.
if (Seq.size() > 3) {
- int64_t LoVal = SignExtend64<32>(Imm);
- int64_t HiVal = SignExtend64<32>(((uint64_t)Imm - (uint64_t)LoVal) >> 32);
- if (LoVal == HiVal) {
- RISCVMatInt::InstSeq SeqLo =
- RISCVMatInt::generateInstSeq(LoVal, Subtarget.getFeatureBits());
- if ((SeqLo.size() + 2) < Seq.size()) {
- SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
-
- SDValue SLLI = SDValue(
- CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
- CurDAG->getTargetConstant(32, DL, VT)),
- 0);
- return SDValue(CurDAG->getMachineNode(RISCV::ADD, DL, VT, Lo, SLLI),
- 0);
- }
+ unsigned ShiftAmt, AddOpc;
+ RISCVMatInt::InstSeq SeqLo =
+ RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
+ if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
+ SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
+
+ SDValue SLLI = SDValue(
+ CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
+ CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
+ 0);
+ return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
}
}
@@ -552,6 +576,12 @@ void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) {
SDValue VLOperand;
unsigned Opcode = RISCV::PseudoVSETVLI;
+ if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
+ const unsigned VLEN = Subtarget->getRealMinVLen();
+ if (VLEN == Subtarget->getRealMaxVLen())
+ if (VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
+ VLMax = true;
+ }
if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
Opcode = RISCV::PseudoVSETVLIX0;
@@ -808,7 +838,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
switch (Opcode) {
case ISD::Constant: {
- assert(VT == Subtarget->getXLenVT() && "Unexpected VT");
+ assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT");
auto *ConstNode = cast<ConstantSDNode>(Node);
if (ConstNode->isZero()) {
SDValue New =
@@ -832,26 +862,34 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
}
case ISD::ConstantFP: {
const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
- int FPImm = static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(
- APF, VT);
+ auto [FPImm, NeedsFNeg] =
+ static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(APF,
+ VT);
if (FPImm >= 0) {
unsigned Opc;
+ unsigned FNegOpc;
switch (VT.SimpleTy) {
default:
llvm_unreachable("Unexpected size");
case MVT::f16:
Opc = RISCV::FLI_H;
+ FNegOpc = RISCV::FSGNJN_H;
break;
case MVT::f32:
Opc = RISCV::FLI_S;
+ FNegOpc = RISCV::FSGNJN_S;
break;
case MVT::f64:
Opc = RISCV::FLI_D;
+ FNegOpc = RISCV::FSGNJN_D;
break;
}
-
SDNode *Res = CurDAG->getMachineNode(
Opc, DL, VT, CurDAG->getTargetConstant(FPImm, DL, XLenVT));
+ if (NeedsFNeg)
+ Res = CurDAG->getMachineNode(FNegOpc, DL, VT, SDValue(Res, 0),
+ SDValue(Res, 0));
+
ReplaceNode(Node, Res);
return;
}
@@ -866,10 +904,16 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
*Subtarget);
+ bool HasZdinx = Subtarget->hasStdExtZdinx();
+ bool Is64Bit = Subtarget->is64Bit();
unsigned Opc;
switch (VT.SimpleTy) {
default:
llvm_unreachable("Unexpected size");
+ case MVT::bf16:
+ assert(Subtarget->hasStdExtZfbfmin());
+ Opc = RISCV::FMV_H_X;
+ break;
case MVT::f16:
Opc =
Subtarget->hasStdExtZhinxOrZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
@@ -881,20 +925,29 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
// For RV32, we can't move from a GPR, we need to convert instead. This
// should only happen for +0.0 and -0.0.
assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
- bool HasZdinx = Subtarget->hasStdExtZdinx();
- if (Subtarget->is64Bit())
+ if (Is64Bit)
Opc = HasZdinx ? RISCV::COPY : RISCV::FMV_D_X;
else
Opc = HasZdinx ? RISCV::FCVT_D_W_IN32X : RISCV::FCVT_D_W;
break;
}
- SDNode *Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
+ SDNode *Res;
+ if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
+ Res = CurDAG->getMachineNode(
+ Opc, DL, VT, Imm,
+ CurDAG->getTargetConstant(RISCVFPRndMode::RNE, DL, XLenVT));
+ else
+ Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
// For f64 -0.0, we need to insert a fneg.d idiom.
- if (NegZeroF64)
- Res = CurDAG->getMachineNode(RISCV::FSGNJN_D, DL, VT, SDValue(Res, 0),
- SDValue(Res, 0));
+ if (NegZeroF64) {
+ Opc = RISCV::FSGNJN_D;
+ if (HasZdinx)
+ Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
+ Res =
+ CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
+ }
ReplaceNode(Node, Res);
return;
@@ -2082,8 +2135,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
break;
- SmallVector<SDValue> Operands =
- {CurDAG->getUNDEF(VT), Ld->getBasePtr()};
+ SmallVector<SDValue> Operands = {
+ SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
+ Ld->getBasePtr()};
if (IsStrided)
Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
uint64_t Policy = RISCVII::MASK_AGNOSTIC | RISCVII::TAIL_AGNOSTIC;
@@ -2141,12 +2195,13 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
}
bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(
- const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
+ const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
+ std::vector<SDValue> &OutOps) {
// Always produce a register and immediate operand, as expected by
// RISCVAsmPrinter::PrintAsmMemoryOperand.
switch (ConstraintID) {
- case InlineAsm::Constraint_o:
- case InlineAsm::Constraint_m: {
+ case InlineAsm::ConstraintCode::o:
+ case InlineAsm::ConstraintCode::m: {
SDValue Op0, Op1;
bool Found = SelectAddrRegImm(Op, Op0, Op1);
assert(Found && "SelectAddrRegImm should always succeed");
@@ -2155,7 +2210,7 @@ bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(
OutOps.push_back(Op1);
return false;
}
- case InlineAsm::Constraint_A:
+ case InlineAsm::ConstraintCode::A:
OutOps.push_back(Op);
OutOps.push_back(
CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
@@ -2205,7 +2260,8 @@ bool RISCVDAGToDAGISel::SelectFrameAddrRegImm(SDValue Addr, SDValue &Base,
// Fold constant addresses.
static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
const MVT VT, const RISCVSubtarget *Subtarget,
- SDValue Addr, SDValue &Base, SDValue &Offset) {
+ SDValue Addr, SDValue &Base, SDValue &Offset,
+ bool IsPrefetch = false) {
if (!isa<ConstantSDNode>(Addr))
return false;
@@ -2217,6 +2273,9 @@ static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
int64_t Lo12 = SignExtend64<12>(CVal);
int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
+ if (IsPrefetch && (Lo12 & 0b11111) != 0)
+ return false;
+
if (Hi) {
int64_t Hi20 = (Hi >> 12) & 0xfffff;
Base = SDValue(
@@ -2231,14 +2290,15 @@ static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
}
// Ask how constant materialization would handle this constant.
- RISCVMatInt::InstSeq Seq =
- RISCVMatInt::generateInstSeq(CVal, Subtarget->getFeatureBits());
+ RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
// If the last instruction would be an ADDI, we can fold its immediate and
// emit the rest of the sequence as the base.
if (Seq.back().getOpcode() != RISCV::ADDI)
return false;
Lo12 = Seq.back().getImm();
+ if (IsPrefetch && (Lo12 & 0b11111) != 0)
+ return false;
// Drop the last instruction.
Seq.pop_back();
@@ -2419,14 +2479,85 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
return true;
}
+/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
+/// Offset shoule be all zeros.
+bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (SelectAddrFrameIndex(Addr, Base, Offset))
+ return true;
+
+ SDLoc DL(Addr);
+ MVT VT = Addr.getSimpleValueType();
+
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
+ if (isInt<12>(CVal)) {
+ Base = Addr.getOperand(0);
+
+ // Early-out if not a valid offset.
+ if ((CVal & 0b11111) != 0) {
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, DL, VT);
+ return true;
+ }
+
+ if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
+ Offset = CurDAG->getTargetConstant(CVal, DL, VT);
+ return true;
+ }
+ }
+
+ // Handle ADD with large immediates.
+ if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
+ int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
+ assert(!(isInt<12>(CVal) && isInt<12>(CVal)) &&
+ "simm12 not already handled?");
+
+ // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
+ // one instruction by folding adjustment (-2048 or 2016) into the address.
+ if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
+ int64_t Adj = CVal < 0 ? -2048 : 2016;
+ int64_t AdjustedOffset = CVal - Adj;
+ Base = SDValue(CurDAG->getMachineNode(
+ RISCV::ADDI, DL, VT, Addr.getOperand(0),
+ CurDAG->getTargetConstant(AdjustedOffset, DL, VT)),
+ 0);
+ Offset = CurDAG->getTargetConstant(Adj, DL, VT);
+ return true;
+ }
+
+ if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
+ Offset, true)) {
+ // Insert an ADD instruction with the materialized Hi52 bits.
+ Base = SDValue(
+ CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
+ 0);
+ return true;
+ }
+ }
+
+ if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset, true))
+ return true;
+
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, DL, VT);
+ return true;
+}
+
bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
SDValue &ShAmt) {
ShAmt = N;
+ // Peek through zext.
+ if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
+ ShAmt = ShAmt.getOperand(0);
+
// Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
// amount. If there is an AND on the shift amount, we can bypass it if it
// doesn't affect any of those bits.
- if (ShAmt.getOpcode() == ISD::AND && isa<ConstantSDNode>(ShAmt.getOperand(1))) {
+ if (ShAmt.getOpcode() == ISD::AND &&
+ isa<ConstantSDNode>(ShAmt.getOperand(1))) {
const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
// Since the max shift amount is a power of 2 we can subtract 1 to make a
@@ -2729,6 +2860,36 @@ bool RISCVDAGToDAGISel::selectSHXADD_UWOp(SDValue N, unsigned ShAmt,
return false;
}
+static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
+ unsigned Bits,
+ const TargetInstrInfo *TII) {
+ unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
+
+ if (!MCOpcode)
+ return false;
+
+ const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
+ const uint64_t TSFlags = MCID.TSFlags;
+ if (!RISCVII::hasSEWOp(TSFlags))
+ return false;
+ assert(RISCVII::hasVLOp(TSFlags));
+
+ bool HasGlueOp = User->getGluedNode() != nullptr;
+ unsigned ChainOpIdx = User->getNumOperands() - HasGlueOp - 1;
+ bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
+ bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
+ unsigned VLIdx =
+ User->getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
+ const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
+
+ if (UserOpNo == VLIdx)
+ return false;
+
+ auto NumDemandedBits =
+ RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
+ return NumDemandedBits && Bits >= *NumDemandedBits;
+}
+
// Return true if all users of this SDNode* only consume the lower \p Bits.
// This can be used to form W instructions for add/sub/mul/shl even when the
// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
@@ -2751,6 +2912,11 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits,
if (Depth >= SelectionDAG::MaxRecursionDepth)
return false;
+ // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
+ // the VT. Ensure the type is scalar to avoid wasting time on vectors.
+ if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
+ return false;
+
for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {
SDNode *User = *UI;
// Users of this node should have already been instruction selected
@@ -2760,6 +2926,8 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits,
// TODO: Add more opcodes?
switch (User->getMachineOpcode()) {
default:
+ if (vectorPseudoHasAllNBitUsers(User, UI.getOperandNo(), Bits, TII))
+ break;
return false;
case RISCV::ADDW:
case RISCV::ADDIW:
@@ -2937,27 +3105,41 @@ bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
return true;
}
+static SDValue findVSplat(SDValue N) {
+ if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
+ if (!N.getOperand(0).isUndef())
+ return SDValue();
+ N = N.getOperand(1);
+ }
+ SDValue Splat = N;
+ if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
+ Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
+ !Splat.getOperand(0).isUndef())
+ return SDValue();
+ assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
+ return Splat;
+}
+
bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) {
- if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef())
+ SDValue Splat = findVSplat(N);
+ if (!Splat)
return false;
- assert(N.getNumOperands() == 3 && "Unexpected number of operands");
- SplatVal = N.getOperand(1);
+
+ SplatVal = Splat.getOperand(1);
return true;
}
-using ValidateFn = bool (*)(int64_t);
-
-static bool selectVSplatSimmHelper(SDValue N, SDValue &SplatVal,
- SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget,
- ValidateFn ValidateImm) {
- if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() ||
- !isa<ConstantSDNode>(N.getOperand(1)))
+static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal,
+ SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget,
+ std::function<bool(int64_t)> ValidateImm) {
+ SDValue Splat = findVSplat(N);
+ if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
return false;
- assert(N.getNumOperands() == 3 && "Unexpected number of operands");
- int64_t SplatImm =
- cast<ConstantSDNode>(N.getOperand(1))->getSExtValue();
+ const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
+ assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
+ "Unexpected splat operand type");
// The semantics of RISCVISD::VMV_V_X_VL is that when the operand
// type is wider than the resulting vector element type: an implicit
@@ -2966,34 +3148,31 @@ static bool selectVSplatSimmHelper(SDValue N, SDValue &SplatVal,
// any zero-extended immediate.
// For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
// sign-extending to (XLenVT -1).
- MVT XLenVT = Subtarget.getXLenVT();
- assert(XLenVT == N.getOperand(1).getSimpleValueType() &&
- "Unexpected splat operand type");
- MVT EltVT = N.getSimpleValueType().getVectorElementType();
- if (EltVT.bitsLT(XLenVT))
- SplatImm = SignExtend64(SplatImm, EltVT.getSizeInBits());
+ APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
+
+ int64_t SplatImm = SplatConst.getSExtValue();
if (!ValidateImm(SplatImm))
return false;
- SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), XLenVT);
+ SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
return true;
}
bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) {
- return selectVSplatSimmHelper(N, SplatVal, *CurDAG, *Subtarget,
- [](int64_t Imm) { return isInt<5>(Imm); });
+ return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
+ [](int64_t Imm) { return isInt<5>(Imm); });
}
bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) {
- return selectVSplatSimmHelper(
+ return selectVSplatImmHelper(
N, SplatVal, *CurDAG, *Subtarget,
[](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; });
}
bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N,
SDValue &SplatVal) {
- return selectVSplatSimmHelper(
+ return selectVSplatImmHelper(
N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) {
return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
});
@@ -3001,29 +3180,34 @@ bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N,
bool RISCVDAGToDAGISel::selectVSplatUimm(SDValue N, unsigned Bits,
SDValue &SplatVal) {
- if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() ||
- !isa<ConstantSDNode>(N.getOperand(1)))
- return false;
-
- int64_t SplatImm =
- cast<ConstantSDNode>(N.getOperand(1))->getSExtValue();
-
- if (!isUIntN(Bits, SplatImm))
- return false;
-
- SplatVal =
- CurDAG->getTargetConstant(SplatImm, SDLoc(N), Subtarget->getXLenVT());
-
- return true;
+ return selectVSplatImmHelper(
+ N, SplatVal, *CurDAG, *Subtarget,
+ [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
}
-bool RISCVDAGToDAGISel::selectExtOneUseVSplat(SDValue N, SDValue &SplatVal) {
- if (N->getOpcode() == ISD::SIGN_EXTEND ||
- N->getOpcode() == ISD::ZERO_EXTEND) {
- if (!N.hasOneUse())
+bool RISCVDAGToDAGISel::selectLow8BitsVSplat(SDValue N, SDValue &SplatVal) {
+ // Truncates are custom lowered during legalization.
+ auto IsTrunc = [this](SDValue N) {
+ if (N->getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL)
+ return false;
+ SDValue VL;
+ selectVLOp(N->getOperand(2), VL);
+ // Any vmset_vl is ok, since any bits past VL are undefined and we can
+ // assume they are set.
+ return N->getOperand(1).getOpcode() == RISCVISD::VMSET_VL &&
+ isa<ConstantSDNode>(VL) &&
+ cast<ConstantSDNode>(VL)->getSExtValue() == RISCV::VLMaxSentinel;
+ };
+
+ // We can have multiple nested truncates, so unravel them all if needed.
+ while (N->getOpcode() == ISD::SIGN_EXTEND ||
+ N->getOpcode() == ISD::ZERO_EXTEND || IsTrunc(N)) {
+ if (!N.hasOneUse() ||
+ N.getValueType().getSizeInBits().getKnownMinValue() < 8)
return false;
N = N->getOperand(0);
}
+
return selectVSplat(N, SplatVal);
}
@@ -3038,8 +3222,12 @@ bool RISCVDAGToDAGISel::selectFPImm(SDValue N, SDValue &Imm) {
MVT VT = CFP->getSimpleValueType(0);
- if (static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(APF,
- VT) >= 0)
+ // Even if this FPImm requires an additional FNEG (i.e. the second element of
+ // the returned pair is true) we still prefer FLI + FNEG over immediate
+ // materialization as the latter might generate a longer instruction sequence.
+ if (static_cast<const RISCVTargetLowering *>(TLI)
+ ->getLegalZfaFPImm(APF, VT)
+ .first >= 0)
return false;
MVT XLenVT = Subtarget->getXLenVT();
@@ -3125,6 +3313,9 @@ bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
case RISCV::TH_MULAH:
case RISCV::TH_MULSW:
case RISCV::TH_MULSH:
+ if (N0.getValueType() == MVT::i32)
+ break;
+
// Result is already sign extended just remove the sext.w.
// NOTE: We only handle the nodes that are selected with hasAllWUsers.
ReplaceUses(N, N0.getNode());
@@ -3154,6 +3345,12 @@ static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) {
// Check the instruction defining V0; it needs to be a VMSET pseudo.
SDValue MaskSetter = Glued->getOperand(2);
+ // Sometimes the VMSET is wrapped in a COPY_TO_REGCLASS, e.g. if the mask came
+ // from an extract_subvector or insert_subvector.
+ if (MaskSetter->isMachineOpcode() &&
+ MaskSetter->getMachineOpcode() == RISCV::COPY_TO_REGCLASS)
+ MaskSetter = MaskSetter->getOperand(0);
+
const auto IsVMSet = [](unsigned Opc) {
return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
@@ -3183,7 +3380,7 @@ static bool isImplicitDef(SDValue V) {
// corresponding "unmasked" pseudo versions. The mask we're interested in will
// take the form of a V0 physical register operand, with a glued
// register-setting instruction.
-bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) {
+bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
const RISCV::RISCVMaskedPseudoInfo *I =
RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
if (!I)
@@ -3222,7 +3419,12 @@ bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) {
if (auto *TGlued = Glued->getGluedNode())
Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1));
- SDNode *Result = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
+ MachineSDNode *Result =
+ CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
+
+ if (!N->memoperands_empty())
+ CurDAG->setNodeMemRefs(Result, N->memoperands());
+
Result->setFlags(N->getFlags());
ReplaceUses(N, Result);
@@ -3230,21 +3432,11 @@ bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) {
}
static bool IsVMerge(SDNode *N) {
- unsigned Opc = N->getMachineOpcode();
- return Opc == RISCV::PseudoVMERGE_VVM_MF8 ||
- Opc == RISCV::PseudoVMERGE_VVM_MF4 ||
- Opc == RISCV::PseudoVMERGE_VVM_MF2 ||
- Opc == RISCV::PseudoVMERGE_VVM_M1 ||
- Opc == RISCV::PseudoVMERGE_VVM_M2 ||
- Opc == RISCV::PseudoVMERGE_VVM_M4 || Opc == RISCV::PseudoVMERGE_VVM_M8;
+ return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMERGE_VVM;
}
static bool IsVMv(SDNode *N) {
- unsigned Opc = N->getMachineOpcode();
- return Opc == RISCV::PseudoVMV_V_V_MF8 || Opc == RISCV::PseudoVMV_V_V_MF4 ||
- Opc == RISCV::PseudoVMV_V_V_MF2 || Opc == RISCV::PseudoVMV_V_V_M1 ||
- Opc == RISCV::PseudoVMV_V_V_M2 || Opc == RISCV::PseudoVMV_V_V_M4 ||
- Opc == RISCV::PseudoVMV_V_V_M8;
+ return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMV_V_V;
}
static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL) {
@@ -3336,6 +3528,11 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
if (!Info)
return false;
+ // When Mask is not a true mask, this transformation is illegal for some
+ // operations whose results are affected by mask, like viota.m.
+ if (Info->MaskAffectsResult && Mask && !usesAllOnesMask(Mask, Glue))
+ return false;
+
if (HasTiedDest && !isImplicitDef(True->getOperand(0))) {
// The vmerge instruction must be TU.
// FIXME: This could be relaxed, but we need to handle the policy for the
@@ -3503,10 +3700,13 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
// Add the glue for the CopyToReg of mask->v0.
Ops.push_back(Glue);
- SDNode *Result =
+ MachineSDNode *Result =
CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops);
Result->setFlags(True->getFlags());
+ if (!cast<MachineSDNode>(True)->memoperands_empty())
+ CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(True)->memoperands());
+
// Replace vmerge.vvm node by Result.
ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
@@ -3514,46 +3714,30 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx)
ReplaceUses(True.getValue(Idx), SDValue(Result, Idx));
- // Try to transform Result to unmasked intrinsic.
- doPeepholeMaskedRVV(Result);
return true;
}
-// Transform (VMERGE_VVM_<LMUL> false, false, true, allones, vl, sew) to
-// (VMV_V_V_<LMUL> false, true, vl, sew). It may decrease uses of VMSET.
-bool RISCVDAGToDAGISel::performVMergeToVMv(SDNode *N) {
-#define CASE_VMERGE_TO_VMV(lmul) \
- case RISCV::PseudoVMERGE_VVM_##lmul: \
- NewOpc = RISCV::PseudoVMV_V_V_##lmul; \
- break;
- unsigned NewOpc;
- switch (N->getMachineOpcode()) {
- default:
- llvm_unreachable("Expected VMERGE_VVM_<LMUL> instruction.");
- CASE_VMERGE_TO_VMV(MF8)
- CASE_VMERGE_TO_VMV(MF4)
- CASE_VMERGE_TO_VMV(MF2)
- CASE_VMERGE_TO_VMV(M1)
- CASE_VMERGE_TO_VMV(M2)
- CASE_VMERGE_TO_VMV(M4)
- CASE_VMERGE_TO_VMV(M8)
- }
+bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
+ bool MadeChange = false;
+ SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
- if (!usesAllOnesMask(N, /* MaskOpIdx */ 3))
- return false;
+ while (Position != CurDAG->allnodes_begin()) {
+ SDNode *N = &*--Position;
+ if (N->use_empty() || !N->isMachineOpcode())
+ continue;
- SDLoc DL(N);
- SDValue PolicyOp =
- CurDAG->getTargetConstant(/*TUMU*/ 0, DL, Subtarget->getXLenVT());
- SDNode *Result = CurDAG->getMachineNode(
- NewOpc, DL, N->getValueType(0),
- {N->getOperand(1), N->getOperand(2), N->getOperand(4), N->getOperand(5),
- PolicyOp});
- ReplaceUses(N, Result);
- return true;
+ if (IsVMerge(N) || IsVMv(N))
+ MadeChange |= performCombineVMergeAndVOps(N);
+ }
+ return MadeChange;
}
-bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
+/// If our passthru is an implicit_def, use noreg instead. This side
+/// steps issues with MachineCSE not being able to CSE expressions with
+/// IMPLICIT_DEF operands while preserving the semantic intent. See
+/// pr64282 for context. Note that this transform is the last one
+/// performed at ISEL DAG to DAG.
+bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
bool MadeChange = false;
SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
@@ -3562,18 +3746,34 @@ bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
if (N->use_empty() || !N->isMachineOpcode())
continue;
- if (IsVMerge(N) || IsVMv(N))
- MadeChange |= performCombineVMergeAndVOps(N);
- if (IsVMerge(N) && N->getOperand(0) == N->getOperand(1))
- MadeChange |= performVMergeToVMv(N);
+ const unsigned Opc = N->getMachineOpcode();
+ if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
+ !RISCVII::isFirstDefTiedToFirstUse(TII->get(Opc)) ||
+ !isImplicitDef(N->getOperand(0)))
+ continue;
+
+ SmallVector<SDValue> Ops;
+ Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
+ for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
+ SDValue Op = N->getOperand(I);
+ Ops.push_back(Op);
+ }
+
+ MachineSDNode *Result =
+ CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
+ Result->setFlags(N->getFlags());
+ CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
+ ReplaceUses(N, Result);
+ MadeChange = true;
}
return MadeChange;
}
+
// This pass converts a legalized DAG into a RISCV-specific DAG, ready
// for instruction scheduling.
FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM,
- CodeGenOpt::Level OptLevel) {
+ CodeGenOptLevel OptLevel) {
return new RISCVDAGToDAGISel(TM, OptLevel);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index 281719c12e70..77e174135a59 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -30,7 +30,7 @@ public:
RISCVDAGToDAGISel() = delete;
explicit RISCVDAGToDAGISel(RISCVTargetMachine &TargetMachine,
- CodeGenOpt::Level OptLevel)
+ CodeGenOptLevel OptLevel)
: SelectionDAGISel(ID, TargetMachine, OptLevel) {}
bool runOnMachineFunction(MachineFunction &MF) override {
@@ -43,7 +43,8 @@ public:
void Select(SDNode *Node) override;
- bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ InlineAsm::ConstraintCode ConstraintID,
std::vector<SDValue> &OutOps) override;
bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset);
@@ -53,6 +54,7 @@ public:
bool SelectAddrRegImmINX(SDValue Addr, SDValue &Base, SDValue &Offset) {
return SelectAddrRegImm(Addr, Base, Offset, true);
}
+ bool SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, SDValue &Offset);
bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount,
SDValue &Base, SDValue &Index, SDValue &Scale);
@@ -134,7 +136,9 @@ public:
}
bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal);
bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal);
- bool selectExtOneUseVSplat(SDValue N, SDValue &SplatVal);
+ // Matches the splat of a value which can be extended or truncated, such that
+ // only the bottom 8 bits are preserved.
+ bool selectLow8BitsVSplat(SDValue N, SDValue &SplatVal);
bool selectFPImm(SDValue N, SDValue &Imm);
bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm);
@@ -183,9 +187,9 @@ public:
private:
bool doPeepholeSExtW(SDNode *Node);
- bool doPeepholeMaskedRVV(SDNode *Node);
+ bool doPeepholeMaskedRVV(MachineSDNode *Node);
bool doPeepholeMergeVVMFold();
- bool performVMergeToVMv(SDNode *N);
+ bool doPeepholeNoRegPassThru();
bool performCombineVMergeAndVOps(SDNode *N);
};
@@ -259,6 +263,7 @@ struct RISCVMaskedPseudoInfo {
uint16_t MaskedPseudo;
uint16_t UnmaskedPseudo;
uint8_t MaskOpIdx;
+ uint8_t MaskAffectsResult : 1;
};
#define GET_RISCVVSSEGTable_DECL
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index f030982cb815..03e994586d0c 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -38,6 +39,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/InstructionCost.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
@@ -73,6 +75,10 @@ static cl::opt<int>
"use for creating a floating-point immediate value"),
cl::init(2));
+static cl::opt<bool>
+ RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden,
+ cl::desc("Make i32 a legal type for SelectionDAG on RV64."));
+
RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
const RISCVSubtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
@@ -113,6 +119,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// Set up the register classes.
addRegisterClass(XLenVT, &RISCV::GPRRegClass);
+ if (Subtarget.is64Bit() && RV64LegalI32)
+ addRegisterClass(MVT::i32, &RISCV::GPRRegClass);
if (Subtarget.hasStdExtZfhOrZfhmin())
addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
@@ -145,6 +153,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
static const MVT::SimpleValueType F16VecVTs[] = {
MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
+ static const MVT::SimpleValueType BF16VecVTs[] = {
+ MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
+ MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
static const MVT::SimpleValueType F32VecVTs[] = {
MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
static const MVT::SimpleValueType F64VecVTs[] = {
@@ -154,7 +165,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
auto addRegClassForRVV = [this](MVT VT) {
// Disable the smallest fractional LMUL types if ELEN is less than
// RVVBitsPerBlock.
- unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELEN();
+ unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
if (VT.getVectorMinNumElements() < MinElts)
return;
@@ -183,10 +194,14 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
addRegClassForRVV(VT);
}
- if (Subtarget.hasVInstructionsF16())
+ if (Subtarget.hasVInstructionsF16Minimal())
for (MVT VT : F16VecVTs)
addRegClassForRVV(VT);
+ if (Subtarget.hasVInstructionsBF16())
+ for (MVT VT : BF16VecVTs)
+ addRegClassForRVV(VT);
+
if (Subtarget.hasVInstructionsF32())
for (MVT VT : F32VecVTs)
addRegClassForRVV(VT);
@@ -228,8 +243,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
setOperationAction(ISD::BR_CC, XLenVT, Expand);
+ if (RV64LegalI32 && Subtarget.is64Bit())
+ setOperationAction(ISD::BR_CC, MVT::i32, Expand);
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
+ if (RV64LegalI32 && Subtarget.is64Bit())
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
setCondCodeAction(ISD::SETLE, XLenVT, Expand);
setCondCodeAction(ISD::SETGT, XLenVT, Custom);
@@ -238,6 +257,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setCondCodeAction(ISD::SETUGT, XLenVT, Custom);
setCondCodeAction(ISD::SETUGE, XLenVT, Expand);
+ if (RV64LegalI32 && Subtarget.is64Bit())
+ setOperationAction(ISD::SETCC, MVT::i32, Promote);
+
setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
setOperationAction(ISD::VASTART, MVT::Other, Custom);
@@ -253,14 +275,14 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (Subtarget.is64Bit()) {
setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
- setOperationAction(ISD::LOAD, MVT::i32, Custom);
-
- setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL},
- MVT::i32, Custom);
-
- setOperationAction(ISD::SADDO, MVT::i32, Custom);
- setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT},
- MVT::i32, Custom);
+ if (!RV64LegalI32) {
+ setOperationAction(ISD::LOAD, MVT::i32, Custom);
+ setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL},
+ MVT::i32, Custom);
+ setOperationAction(ISD::SADDO, MVT::i32, Custom);
+ setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT},
+ MVT::i32, Custom);
+ }
} else {
setLibcallName(
{RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
@@ -268,19 +290,36 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setLibcallName(RTLIB::MULO_I64, nullptr);
}
- if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul())
+ if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) {
setOperationAction({ISD::MUL, ISD::MULHS, ISD::MULHU}, XLenVT, Expand);
- else if (Subtarget.is64Bit())
- setOperationAction(ISD::MUL, {MVT::i32, MVT::i128}, Custom);
- else
+ if (RV64LegalI32 && Subtarget.is64Bit())
+ setOperationAction(ISD::MUL, MVT::i32, Promote);
+ } else if (Subtarget.is64Bit()) {
+ setOperationAction(ISD::MUL, MVT::i128, Custom);
+ if (!RV64LegalI32)
+ setOperationAction(ISD::MUL, MVT::i32, Custom);
+ } else {
setOperationAction(ISD::MUL, MVT::i64, Custom);
+ }
- if (!Subtarget.hasStdExtM())
+ if (!Subtarget.hasStdExtM()) {
setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM},
XLenVT, Expand);
- else if (Subtarget.is64Bit())
- setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM},
- {MVT::i8, MVT::i16, MVT::i32}, Custom);
+ if (RV64LegalI32 && Subtarget.is64Bit())
+ setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, MVT::i32,
+ Promote);
+ } else if (Subtarget.is64Bit()) {
+ if (!RV64LegalI32)
+ setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM},
+ {MVT::i8, MVT::i16, MVT::i32}, Custom);
+ }
+
+ if (RV64LegalI32 && Subtarget.is64Bit()) {
+ setOperationAction({ISD::MULHS, ISD::MULHU}, MVT::i32, Expand);
+ setOperationAction(
+ {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, MVT::i32,
+ Expand);
+ }
setOperationAction(
{ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, XLenVT,
@@ -290,14 +329,18 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
Custom);
if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
- if (Subtarget.is64Bit())
+ if (!RV64LegalI32 && Subtarget.is64Bit())
setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
} else if (Subtarget.hasVendorXTHeadBb()) {
if (Subtarget.is64Bit())
setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Custom);
+ } else if (Subtarget.hasVendorXCVbitmanip()) {
+ setOperationAction(ISD::ROTL, XLenVT, Expand);
} else {
setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Expand);
+ if (RV64LegalI32 && Subtarget.is64Bit())
+ setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Expand);
}
// With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
@@ -307,37 +350,74 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
Subtarget.hasVendorXTHeadBb())
? Legal
: Expand);
- // Zbkb can use rev8+brev8 to implement bitreverse.
- setOperationAction(ISD::BITREVERSE, XLenVT,
- Subtarget.hasStdExtZbkb() ? Custom : Expand);
+ if (RV64LegalI32 && Subtarget.is64Bit())
+ setOperationAction(ISD::BSWAP, MVT::i32,
+ (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
+ Subtarget.hasVendorXTHeadBb())
+ ? Promote
+ : Expand);
+
+
+ if (Subtarget.hasVendorXCVbitmanip()) {
+ setOperationAction(ISD::BITREVERSE, XLenVT, Legal);
+ } else {
+ // Zbkb can use rev8+brev8 to implement bitreverse.
+ setOperationAction(ISD::BITREVERSE, XLenVT,
+ Subtarget.hasStdExtZbkb() ? Custom : Expand);
+ }
if (Subtarget.hasStdExtZbb()) {
setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, XLenVT,
Legal);
+ if (RV64LegalI32 && Subtarget.is64Bit())
+ setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, MVT::i32,
+ Promote);
- if (Subtarget.is64Bit())
- setOperationAction(
- {ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF},
- MVT::i32, Custom);
- } else {
- setOperationAction({ISD::CTTZ, ISD::CTLZ, ISD::CTPOP}, XLenVT, Expand);
+ if (Subtarget.is64Bit()) {
+ if (RV64LegalI32)
+ setOperationAction(ISD::CTTZ, MVT::i32, Legal);
+ else
+ setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom);
+ }
+ } else if (!Subtarget.hasVendorXCVbitmanip()) {
+ setOperationAction({ISD::CTTZ, ISD::CTPOP}, XLenVT, Expand);
+ if (RV64LegalI32 && Subtarget.is64Bit())
+ setOperationAction({ISD::CTTZ, ISD::CTPOP}, MVT::i32, Expand);
}
- if (Subtarget.hasVendorXTHeadBb()) {
- setOperationAction(ISD::CTLZ, XLenVT, Legal);
-
+ if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
+ Subtarget.hasVendorXCVbitmanip()) {
// We need the custom lowering to make sure that the resulting sequence
// for the 32bit case is efficient on 64bit targets.
- if (Subtarget.is64Bit())
- setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom);
+ if (Subtarget.is64Bit()) {
+ if (RV64LegalI32) {
+ setOperationAction(ISD::CTLZ, MVT::i32,
+ Subtarget.hasStdExtZbb() ? Legal : Promote);
+ if (!Subtarget.hasStdExtZbb())
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote);
+ } else
+ setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom);
+ }
+ } else {
+ setOperationAction(ISD::CTLZ, XLenVT, Expand);
+ if (RV64LegalI32 && Subtarget.is64Bit())
+ setOperationAction(ISD::CTLZ, MVT::i32, Expand);
}
- if (Subtarget.is64Bit())
+ if (!RV64LegalI32 && Subtarget.is64Bit() &&
+ !Subtarget.hasShortForwardBranchOpt())
setOperationAction(ISD::ABS, MVT::i32, Custom);
+ // We can use PseudoCCSUB to implement ABS.
+ if (Subtarget.hasShortForwardBranchOpt())
+ setOperationAction(ISD::ABS, XLenVT, Legal);
+
if (!Subtarget.hasVendorXTHeadCondMov())
setOperationAction(ISD::SELECT, XLenVT, Custom);
+ if (RV64LegalI32 && Subtarget.is64Bit())
+ setOperationAction(ISD::SELECT, MVT::i32, Promote);
+
static const unsigned FPLegalNodeTypes[] = {
ISD::FMINNUM, ISD::FMAXNUM, ISD::LRINT,
ISD::LLRINT, ISD::LROUND, ISD::LLROUND,
@@ -361,7 +441,18 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
-
+
+ static const unsigned ZfhminZfbfminPromoteOps[] = {
+ ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD,
+ ISD::FSUB, ISD::FMUL, ISD::FMA,
+ ISD::FDIV, ISD::FSQRT, ISD::FABS,
+ ISD::FNEG, ISD::STRICT_FMA, ISD::STRICT_FADD,
+ ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
+ ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,
+ ISD::SETCC, ISD::FCEIL, ISD::FFLOOR,
+ ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
+ ISD::FROUNDEVEN, ISD::SELECT};
+
if (Subtarget.hasStdExtZfbfmin()) {
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
@@ -369,6 +460,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
setOperationAction(ISD::ConstantFP, MVT::bf16, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::bf16, Expand);
+ setOperationAction(ISD::BR_CC, MVT::bf16, Expand);
+ setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
+ setOperationAction(ISD::FREM, MVT::bf16, Promote);
+ // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
+ // DAGCombiner::visitFP_ROUND probably needs improvements first.
+ setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand);
}
if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) {
@@ -379,18 +477,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT, MVT::f16, Custom);
setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom);
} else {
- static const unsigned ZfhminPromoteOps[] = {
- ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD,
- ISD::FSUB, ISD::FMUL, ISD::FMA,
- ISD::FDIV, ISD::FSQRT, ISD::FABS,
- ISD::FNEG, ISD::STRICT_FMA, ISD::STRICT_FADD,
- ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
- ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,
- ISD::SETCC, ISD::FCEIL, ISD::FFLOOR,
- ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
- ISD::FROUNDEVEN, ISD::SELECT};
-
- setOperationAction(ZfhminPromoteOps, MVT::f16, Promote);
+ setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
setOperationAction({ISD::STRICT_LRINT, ISD::STRICT_LLRINT,
ISD::STRICT_LROUND, ISD::STRICT_LLROUND},
MVT::f16, Legal);
@@ -409,7 +496,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
Subtarget.hasStdExtZfa() ? Legal : Promote);
setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI,
ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
- ISD::FEXP2, ISD::FLOG, ISD::FLOG2, ISD::FLOG10},
+ ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2,
+ ISD::FLOG10},
MVT::f16, Promote);
// FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
@@ -439,6 +527,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(FPOpToExpand, MVT::f32, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
+ setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom);
setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
setOperationAction(ISD::FP_TO_BF16, MVT::f32,
@@ -481,6 +571,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(FPOpToExpand, MVT::f64, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
+ setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom);
setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
setOperationAction(ISD::FP_TO_BF16, MVT::f64,
@@ -504,6 +596,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP},
XLenVT, Legal);
+ if (RV64LegalI32 && Subtarget.is64Bit())
+ setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT,
+ ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP},
+ MVT::i32, Legal);
+
setOperationAction(ISD::GET_ROUNDING, XLenVT, Custom);
setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
}
@@ -548,6 +645,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setBooleanVectorContents(ZeroOrOneBooleanContent);
setOperationAction(ISD::VSCALE, XLenVT, Custom);
+ if (RV64LegalI32 && Subtarget.is64Bit())
+ setOperationAction(ISD::VSCALE, MVT::i32, Custom);
// RVV intrinsics may have illegal operands.
// We also need to custom legalize vmv.x.s.
@@ -576,7 +675,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
- ISD::VP_ABS};
+ ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE};
static const unsigned FloatingPointVPOps[] = {
ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
@@ -588,7 +687,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
- ISD::VP_FRINT, ISD::VP_FNEARBYINT};
+ ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
+ ISD::EXPERIMENTAL_VP_REVERSE};
static const unsigned IntegerVecReduceOps[] = {
ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
@@ -659,9 +759,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// Expand all extending loads to types larger than this, and truncating
// stores from types larger than this.
for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
- setTruncStoreAction(OtherVT, VT, Expand);
- setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, OtherVT,
- VT, Expand);
+ setTruncStoreAction(VT, OtherVT, Expand);
+ setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT,
+ OtherVT, Expand);
}
setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
@@ -673,6 +773,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
+ setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
+
setOperationPromotedToType(
ISD::VECTOR_SPLICE, VT,
MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
@@ -695,8 +797,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT,
Legal);
- setOperationAction({ISD::VP_FSHL, ISD::VP_FSHR}, VT, Expand);
-
// Custom-lower extensions and truncations from/to mask types.
setOperationAction({ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND},
VT, Custom);
@@ -712,7 +812,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
VT, Custom);
setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
Custom);
-
+ setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
setOperationAction(
{ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT, Legal);
@@ -751,8 +851,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
setTruncStoreAction(VT, OtherVT, Expand);
- setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, OtherVT,
- VT, Expand);
+ setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT,
+ OtherVT, Expand);
}
setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
@@ -761,15 +861,22 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// Splice
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
+ if (Subtarget.hasStdExtZvkb()) {
+ setOperationAction(ISD::BSWAP, VT, Legal);
+ setOperationAction(ISD::VP_BSWAP, VT, Custom);
+ } else {
+ setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
+ setOperationAction({ISD::ROTL, ISD::ROTR}, VT, Expand);
+ }
+
if (Subtarget.hasStdExtZvbb()) {
- setOperationAction({ISD::BITREVERSE, ISD::BSWAP}, VT, Legal);
- setOperationAction({ISD::VP_BITREVERSE, ISD::VP_BSWAP}, VT, Custom);
+ setOperationAction(ISD::BITREVERSE, VT, Legal);
+ setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
VT, Custom);
} else {
- setOperationAction({ISD::BITREVERSE, ISD::BSWAP}, VT, Expand);
- setOperationAction({ISD::VP_BITREVERSE, ISD::VP_BSWAP}, VT, Expand);
+ setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
setOperationAction({ISD::CTLZ, ISD::CTTZ, ISD::CTPOP}, VT, Expand);
setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
@@ -784,8 +891,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
VT, Custom);
}
-
- setOperationAction({ISD::ROTL, ISD::ROTR}, VT, Expand);
}
}
@@ -802,6 +907,27 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE,
};
+ // TODO: support more ops.
+ static const unsigned ZvfhminPromoteOps[] = {
+ ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB,
+ ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT,
+ ISD::FABS, ISD::FNEG, ISD::FCOPYSIGN, ISD::FCEIL,
+ ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN, ISD::FRINT,
+ ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SETCC, ISD::FMAXIMUM,
+ ISD::FMINIMUM, ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
+ ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA};
+
+ // TODO: support more vp ops.
+ static const unsigned ZvfhminPromoteVPOps[] = {
+ ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
+ ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
+ ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
+ ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT,
+ ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL,
+ ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,
+ ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,
+ ISD::VP_FNEARBYINT, ISD::VP_SETCC};
+
// Sets common operation actions on RVV floating-point vector types.
const auto SetCommonVFPActions = [&](MVT VT) {
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
@@ -817,6 +943,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setCondCodeAction(VFPCCToExpand, VT, Expand);
setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, VT, Legal);
+ setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Custom);
setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT,
@@ -833,6 +960,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSINCOS, VT, Expand);
setOperationAction(ISD::FEXP, VT, Expand);
setOperationAction(ISD::FEXP2, VT, Expand);
+ setOperationAction(ISD::FEXP10, VT, Expand);
setOperationAction(ISD::FLOG, VT, Expand);
setOperationAction(ISD::FLOG2, VT, Expand);
setOperationAction(ISD::FLOG10, VT, Expand);
@@ -891,6 +1019,38 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
continue;
SetCommonVFPActions(VT);
}
+ } else if (Subtarget.hasVInstructionsF16Minimal()) {
+ for (MVT VT : F16VecVTs) {
+ if (!isTypeLegal(VT))
+ continue;
+ setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
+ setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,
+ Custom);
+ setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
+ setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
+ Custom);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
+ setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP,
+ ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
+ VT, Custom);
+ setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
+ ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
+ VT, Custom);
+ setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
+ // load/store
+ setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
+
+ // Custom split nxv32f16 since nxv32f32 if not legal.
+ if (VT == MVT::nxv32f16) {
+ setOperationAction(ZvfhminPromoteOps, VT, Custom);
+ setOperationAction(ZvfhminPromoteVPOps, VT, Custom);
+ continue;
+ }
+ // Add more promote ops.
+ MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
+ setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
+ setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
+ }
}
if (Subtarget.hasVInstructionsF32()) {
@@ -922,8 +1082,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(Op, VT, Expand);
for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
setTruncStoreAction(VT, OtherVT, Expand);
- setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD},
- OtherVT, VT, Expand);
+ setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT,
+ OtherVT, Expand);
}
// Custom lower fixed vector undefs to scalable vector undefs to avoid
@@ -986,6 +1146,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
ISD::VP_SETCC, ISD::VP_TRUNCATE},
VT, Custom);
+
+ setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
continue;
}
@@ -1039,13 +1201,22 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(IntegerVPOps, VT, Custom);
- // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
- // range of f32.
- EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
- if (isTypeLegal(FloatVT))
- setOperationAction(
- {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
- Custom);
+ if (Subtarget.hasStdExtZvkb())
+ setOperationAction({ISD::BSWAP, ISD::ROTL, ISD::ROTR}, VT, Custom);
+
+ if (Subtarget.hasStdExtZvbb()) {
+ setOperationAction({ISD::BITREVERSE, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF,
+ ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTPOP},
+ VT, Custom);
+ } else {
+ // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
+ // range of f32.
+ EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
+ if (isTypeLegal(FloatVT))
+ setOperationAction(
+ {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
+ Custom);
+ }
}
for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
@@ -1066,6 +1237,34 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// expansion to a build_vector of 0s.
setOperationAction(ISD::UNDEF, VT, Custom);
+ if (VT.getVectorElementType() == MVT::f16 &&
+ !Subtarget.hasVInstructionsF16()) {
+ setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
+ setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,
+ Custom);
+ setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
+ setOperationAction(
+ {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
+ Custom);
+ setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP,
+ ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
+ VT, Custom);
+ setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
+ ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
+ VT, Custom);
+ setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
+ setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
+ MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
+ // Don't promote f16 vector operations to f32 if f32 vector type is
+ // not legal.
+ // TODO: could split the f16 vector into two vectors and do promotion.
+ if (!isTypeLegal(F32VecVT))
+ continue;
+ setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
+ setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
+ continue;
+ }
+
// We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
Custom);
@@ -1088,7 +1287,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV,
ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT,
ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM,
- ISD::IS_FPCLASS},
+ ISD::IS_FPCLASS, ISD::FMAXIMUM, ISD::FMINIMUM},
VT, Custom);
setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
@@ -1132,14 +1331,20 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
}
+ if (Subtarget.hasStdExtA()) {
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand);
+ if (RV64LegalI32 && Subtarget.is64Bit())
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
+ }
+
if (Subtarget.hasForcedAtomics()) {
- // Set atomic rmw/cas operations to expand to force __sync libcalls.
+ // Force __sync libcalls to be emitted for atomic rmw/cas operations.
setOperationAction(
{ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD,
ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR,
ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN,
ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX},
- XLenVT, Expand);
+ XLenVT, LibCall);
}
if (Subtarget.hasVendorXTHeadMemIdx()) {
@@ -1166,11 +1371,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
- setMinimumJumpTableEntries(5);
-
- // Jumps are expensive, compared to logic
- setJumpIsExpensive();
-
setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN,
ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND,
ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT});
@@ -1197,7 +1397,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR,
- ISD::CONCAT_VECTORS});
+ ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS,
+ ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL,
+ ISD::INSERT_VECTOR_ELT});
if (Subtarget.hasVendorXTHeadMemPair())
setTargetDAGCombine({ISD::LOAD, ISD::STORE});
if (Subtarget.useRVVForFixedLengthVectors())
@@ -1239,7 +1441,7 @@ bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
return true;
// Don't allow VF=1 if those types are't legal.
- if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELEN())
+ if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
return true;
// VLEN=32 support is incomplete.
@@ -1602,11 +1804,12 @@ bool RISCVTargetLowering::signExtendConstant(const ConstantInt *CI) const {
}
bool RISCVTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
- return Subtarget.hasStdExtZbb();
+ return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXCVbitmanip();
}
bool RISCVTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
- return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb();
+ return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
+ Subtarget.hasVendorXCVbitmanip();
}
bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial(
@@ -1677,7 +1880,7 @@ bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
// replace. If we don't support unaligned scalar mem, prefer the constant
// pool.
// TODO: Can the caller pass down the alignment?
- if (!Subtarget.enableUnalignedScalarMem())
+ if (!Subtarget.hasFastUnalignedAccess())
return true;
// Prefer to keep the load if it would require many instructions.
@@ -1686,8 +1889,7 @@ bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
// TODO: Should we keep the load only when we're definitely going to emit a
// constant pool?
- RISCVMatInt::InstSeq Seq =
- RISCVMatInt::generateInstSeq(Val, Subtarget.getFeatureBits());
+ RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, Subtarget);
return Seq.size() <= Subtarget.getMaxBuildIntsCost();
}
@@ -1844,8 +2046,11 @@ bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
// If the vector op is supported, but the scalar op is not, the transform may
// not be worthwhile.
+ // Permit a vector binary operation can be converted to scalar binary
+ // operation which is custom lowered with illegal type.
EVT ScalarVT = VecVT.getScalarType();
- return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
+ return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
+ isOperationCustom(Opc, ScalarVT);
}
bool RISCVTargetLowering::isOffsetFoldingLegal(
@@ -1857,11 +2062,17 @@ bool RISCVTargetLowering::isOffsetFoldingLegal(
return false;
}
-// Returns 0-31 if the fli instruction is available for the type and this is
-// legal FP immediate for the type. Returns -1 otherwise.
-int RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm, EVT VT) const {
+// Return one of the followings:
+// (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value.
+// (2) `{0-31 value, true}` if Imm is negative and FLI is available for its
+// positive counterpart, which will be materialized from the first returned
+// element. The second returned element indicated that there should be a FNEG
+// followed.
+// (3) `{-1, _}` if there is no way FLI can be used to materialize Imm.
+std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm,
+ EVT VT) const {
if (!Subtarget.hasStdExtZfa())
- return -1;
+ return std::make_pair(-1, false);
bool IsSupportedVT = false;
if (VT == MVT::f16) {
@@ -1874,9 +2085,14 @@ int RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm, EVT VT) const {
}
if (!IsSupportedVT)
- return -1;
+ return std::make_pair(-1, false);
- return RISCVLoadFPImm::getLoadFPImm(Imm);
+ int Index = RISCVLoadFPImm::getLoadFPImm(Imm);
+ if (Index < 0 && Imm.isNegative())
+ // Try the combination of its positive counterpart + FNEG.
+ return std::make_pair(RISCVLoadFPImm::getLoadFPImm(-Imm), true);
+ else
+ return std::make_pair(Index, false);
}
bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
@@ -1888,11 +2104,13 @@ bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
IsLegalVT = Subtarget.hasStdExtFOrZfinx();
else if (VT == MVT::f64)
IsLegalVT = Subtarget.hasStdExtDOrZdinx();
+ else if (VT == MVT::bf16)
+ IsLegalVT = Subtarget.hasStdExtZfbfmin();
if (!IsLegalVT)
return false;
- if (getLegalZfaFPImm(Imm, VT) >= 0)
+ if (getLegalZfaFPImm(Imm, VT).first >= 0)
return true;
// Cannot create a 64 bit floating-point immediate value for rv32.
@@ -1901,14 +2119,17 @@ bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
// -0.0 can be created by fmv + fneg.
return Imm.isZero();
}
- // Special case: the cost for -0.0 is 1.
- int Cost = Imm.isNegZero()
- ? 1
- : RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
- Subtarget.getXLen(),
- Subtarget.getFeatureBits());
- // If the constantpool data is already in cache, only Cost 1 is cheaper.
- return Cost < FPImmCost;
+
+ // Special case: fmv + fneg
+ if (Imm.isNegZero())
+ return true;
+
+ // Building an integer and then converting requires a fmv at the end of
+ // the integer sequence.
+ const int Cost =
+ 1 + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), Subtarget.getXLen(),
+ Subtarget);
+ return Cost <= FPImmCost;
}
// TODO: This is very conservative.
@@ -1953,7 +2174,12 @@ MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
!Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
return MVT::f32;
- return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
+ MVT PartVT = TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
+
+ if (RV64LegalI32 && Subtarget.is64Bit() && PartVT == MVT::i32)
+ return MVT::i64;
+
+ return PartVT;
}
unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
@@ -1968,6 +2194,21 @@ unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context
return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
}
+unsigned RISCVTargetLowering::getVectorTypeBreakdownForCallingConv(
+ LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
+ unsigned &NumIntermediates, MVT &RegisterVT) const {
+ unsigned NumRegs = TargetLowering::getVectorTypeBreakdownForCallingConv(
+ Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
+
+ if (RV64LegalI32 && Subtarget.is64Bit() && IntermediateVT == MVT::i32)
+ IntermediateVT = MVT::i64;
+
+ if (RV64LegalI32 && Subtarget.is64Bit() && RegisterVT == MVT::i32)
+ RegisterVT = MVT::i64;
+
+ return NumRegs;
+}
+
// Changes the condition code and swaps operands if necessary, so the SetCC
// operation matches one of the comparisons supported directly by branches
// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
@@ -2010,7 +2251,7 @@ static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
}
break;
case ISD::SETLT:
- // Convert X < 1 to 0 <= X.
+ // Convert X < 1 to 0 >= X.
if (C == 1) {
RHS = LHS;
LHS = DAG.getConstant(0, DL, RHS.getValueType());
@@ -2228,7 +2469,7 @@ static bool useRVVForFixedLengthVectorVT(MVT VT,
return false;
break;
case MVT::f16:
- if (!Subtarget.hasVInstructionsF16())
+ if (!Subtarget.hasVInstructionsF16Minimal())
return false;
break;
case MVT::f32:
@@ -2242,7 +2483,7 @@ static bool useRVVForFixedLengthVectorVT(MVT VT,
}
// Reject elements larger than ELEN.
- if (EltVT.getSizeInBits() > Subtarget.getELEN())
+ if (EltVT.getSizeInBits() > Subtarget.getELen())
return false;
unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
@@ -2271,7 +2512,7 @@ static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
"Expected legal fixed length vector!");
unsigned MinVLen = Subtarget.getRealMinVLen();
- unsigned MaxELen = Subtarget.getELEN();
+ unsigned MaxELen = Subtarget.getELen();
MVT EltVT = VT.getVectorElementType();
switch (EltVT.SimpleTy) {
@@ -2348,16 +2589,32 @@ static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
}
-static SDValue getVLOp(uint64_t NumElts, const SDLoc &DL, SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget) {
+static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
+ SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
+ // If we know the exact VLEN, our VL is exactly equal to VLMAX, and
+ // we can't encode the AVL as an immediate, use the VLMAX encoding.
+ const auto [MinVLMAX, MaxVLMAX] =
+ RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
+ if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX && NumElts > 31)
+ return DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
+
return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
}
static std::pair<SDValue, SDValue>
+getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ assert(VecVT.isScalableVector() && "Expecting a scalable vector");
+ SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
+ SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
+ return {Mask, VL};
+}
+
+static std::pair<SDValue, SDValue>
getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
- SDValue VL = getVLOp(NumElts, DL, DAG, Subtarget);
+ SDValue VL = getVLOp(NumElts, ContainerVT, DL, DAG, Subtarget);
SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
return {Mask, VL};
}
@@ -2373,18 +2630,7 @@ getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
Subtarget);
assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
- MVT XLenVT = Subtarget.getXLenVT();
- SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
- SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
- return {Mask, VL};
-}
-
-// As above but assuming the given type is a scalable vector type.
-static std::pair<SDValue, SDValue>
-getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget) {
- assert(VecVT.isScalableVector() && "Expecting a scalable vector");
- return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
+ return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
}
SDValue RISCVTargetLowering::computeVLMax(MVT VecVT, const SDLoc &DL,
@@ -2394,6 +2640,25 @@ SDValue RISCVTargetLowering::computeVLMax(MVT VecVT, const SDLoc &DL,
VecVT.getVectorElementCount());
}
+std::pair<unsigned, unsigned>
+RISCVTargetLowering::computeVLMAXBounds(MVT VecVT,
+ const RISCVSubtarget &Subtarget) {
+ assert(VecVT.isScalableVector() && "Expected scalable vector");
+
+ unsigned EltSize = VecVT.getScalarSizeInBits();
+ unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
+
+ unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
+ unsigned MaxVLMAX =
+ RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
+
+ unsigned VectorBitsMin = Subtarget.getRealMinVLen();
+ unsigned MinVLMAX =
+ RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
+
+ return std::make_pair(MinVLMAX, MaxVLMAX);
+}
+
// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
// of either is (currently) supported. This can get us into an infinite loop
// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
@@ -2407,6 +2672,51 @@ bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
return false;
}
+InstructionCost RISCVTargetLowering::getLMULCost(MVT VT) const {
+ // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
+ // implementation-defined.
+ if (!VT.isVector())
+ return InstructionCost::getInvalid();
+ unsigned DLenFactor = Subtarget.getDLenFactor();
+ unsigned Cost;
+ if (VT.isScalableVector()) {
+ unsigned LMul;
+ bool Fractional;
+ std::tie(LMul, Fractional) =
+ RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(VT));
+ if (Fractional)
+ Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
+ else
+ Cost = (LMul * DLenFactor);
+ } else {
+ Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
+ }
+ return Cost;
+}
+
+
+/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
+/// is generally quadratic in the number of vreg implied by LMUL. Note that
+/// operand (index and possibly mask) are handled separately.
+InstructionCost RISCVTargetLowering::getVRGatherVVCost(MVT VT) const {
+ return getLMULCost(VT) * getLMULCost(VT);
+}
+
+/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
+/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
+/// or may track the vrgather.vv cost. It is implementation-dependent.
+InstructionCost RISCVTargetLowering::getVRGatherVICost(MVT VT) const {
+ return getLMULCost(VT);
+}
+
+/// Return the cost of a vslidedown.vi/vx or vslideup.vi/vx instruction
+/// for the type VT. (This does not cover the vslide1up or vslide1down
+/// variants.) Slides may be linear in the number of vregs implied by LMUL,
+/// or may track the vrgather.vv cost. It is implementation-dependent.
+InstructionCost RISCVTargetLowering::getVSlideCost(MVT VT) const {
+ return getLMULCost(VT);
+}
+
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
// RISC-V FP-to-int conversions saturate to the destination register size, but
@@ -2420,9 +2730,10 @@ static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
if (!DstVT.isVector()) {
- // In absense of Zfh, promote f16 to f32, then saturate the result.
- if (Src.getSimpleValueType() == MVT::f16 &&
- !Subtarget.hasStdExtZfhOrZhinx()) {
+ // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
+ // the result.
+ if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
+ Src.getValueType() == MVT::bf16) {
Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
}
@@ -2778,6 +3089,31 @@ lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
}
+// Expand vector LRINT and LLRINT by converting to the integer domain.
+static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ MVT VT = Op.getSimpleValueType();
+ assert(VT.isVector() && "Unexpected type");
+
+ SDLoc DL(Op);
+ SDValue Src = Op.getOperand(0);
+ MVT ContainerVT = VT;
+
+ if (VT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
+ Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
+ }
+
+ auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
+ SDValue Truncated =
+ DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, ContainerVT, Src, Mask, VL);
+
+ if (!VT.isFixedLengthVector())
+ return Truncated;
+
+ return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
+}
+
static SDValue
getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget,
const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op,
@@ -2802,6 +3138,14 @@ getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
}
+static MVT getLMUL1VT(MVT VT) {
+ assert(VT.getVectorElementType().getSizeInBits() <= 64 &&
+ "Unexpected vector MVT");
+ return MVT::getScalableVectorVT(
+ VT.getVectorElementType(),
+ RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits());
+}
+
struct VIDSequence {
int64_t StepNumerator;
unsigned StepDenominator;
@@ -2975,8 +3319,124 @@ static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
return convertFromScalableVector(VT, Gather, DAG, Subtarget);
}
-static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget) {
+
+/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
+/// which constitute a large proportion of the elements. In such cases we can
+/// splat a vector with the dominant element and make up the shortfall with
+/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
+/// Note that this includes vectors of 2 elements by association. The
+/// upper-most element is the "dominant" one, allowing us to use a splat to
+/// "insert" the upper element, and an insert of the lower element at position
+/// 0, which improves codegen.
+static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ MVT VT = Op.getSimpleValueType();
+ assert(VT.isFixedLengthVector() && "Unexpected vector!");
+
+ MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
+
+ SDLoc DL(Op);
+ auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
+
+ MVT XLenVT = Subtarget.getXLenVT();
+ unsigned NumElts = Op.getNumOperands();
+
+ SDValue DominantValue;
+ unsigned MostCommonCount = 0;
+ DenseMap<SDValue, unsigned> ValueCounts;
+ unsigned NumUndefElts =
+ count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
+
+ // Track the number of scalar loads we know we'd be inserting, estimated as
+ // any non-zero floating-point constant. Other kinds of element are either
+ // already in registers or are materialized on demand. The threshold at which
+ // a vector load is more desirable than several scalar materializion and
+ // vector-insertion instructions is not known.
+ unsigned NumScalarLoads = 0;
+
+ for (SDValue V : Op->op_values()) {
+ if (V.isUndef())
+ continue;
+
+ ValueCounts.insert(std::make_pair(V, 0));
+ unsigned &Count = ValueCounts[V];
+ if (0 == Count)
+ if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
+ NumScalarLoads += !CFP->isExactlyValue(+0.0);
+
+ // Is this value dominant? In case of a tie, prefer the highest element as
+ // it's cheaper to insert near the beginning of a vector than it is at the
+ // end.
+ if (++Count >= MostCommonCount) {
+ DominantValue = V;
+ MostCommonCount = Count;
+ }
+ }
+
+ assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
+ unsigned NumDefElts = NumElts - NumUndefElts;
+ unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
+
+ // Don't perform this optimization when optimizing for size, since
+ // materializing elements and inserting them tends to cause code bloat.
+ if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
+ (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
+ ((MostCommonCount > DominantValueCountThreshold) ||
+ (ValueCounts.size() <= Log2_32(NumDefElts)))) {
+ // Start by splatting the most common element.
+ SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
+
+ DenseSet<SDValue> Processed{DominantValue};
+
+ // We can handle an insert into the last element (of a splat) via
+ // v(f)slide1down. This is slightly better than the vslideup insert
+ // lowering as it avoids the need for a vector group temporary. It
+ // is also better than using vmerge.vx as it avoids the need to
+ // materialize the mask in a vector register.
+ if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
+ !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
+ LastOp != DominantValue) {
+ Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
+ auto OpCode =
+ VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
+ if (!VT.isFloatingPoint())
+ LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
+ Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
+ LastOp, Mask, VL);
+ Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
+ Processed.insert(LastOp);
+ }
+
+ MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
+ for (const auto &OpIdx : enumerate(Op->ops())) {
+ const SDValue &V = OpIdx.value();
+ if (V.isUndef() || !Processed.insert(V).second)
+ continue;
+ if (ValueCounts[V] == 1) {
+ Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
+ DAG.getConstant(OpIdx.index(), DL, XLenVT));
+ } else {
+ // Blend in all instances of this value using a VSELECT, using a
+ // mask where each bit signals whether that element is the one
+ // we're after.
+ SmallVector<SDValue> Ops;
+ transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
+ return DAG.getConstant(V == V1, DL, XLenVT);
+ });
+ Vec = DAG.getNode(ISD::VSELECT, DL, VT,
+ DAG.getBuildVector(SelMaskTy, DL, Ops),
+ DAG.getSplatBuildVector(VT, DL, V), Vec);
+ }
+ }
+
+ return Vec;
+ }
+
+ return SDValue();
+}
+
+static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
assert(VT.isFixedLengthVector() && "Unexpected vector!");
@@ -3008,94 +3468,68 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// XLenVT if we're producing a v8i1. This results in more consistent
// codegen across RV32 and RV64.
unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
- NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELEN());
- if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
- // If we have to use more than one INSERT_VECTOR_ELT then this
- // optimization is likely to increase code size; avoid peforming it in
- // such a case. We can use a load from a constant pool in this case.
- if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
- return SDValue();
- // Now we can create our integer vector type. Note that it may be larger
- // than the resulting mask type: v4i1 would use v1i8 as its integer type.
- unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
- MVT IntegerViaVecVT =
- MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
- IntegerViaVecElts);
-
- uint64_t Bits = 0;
- unsigned BitPos = 0, IntegerEltIdx = 0;
- SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
-
- for (unsigned I = 0; I < NumElts;) {
- SDValue V = Op.getOperand(I);
- bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
- Bits |= ((uint64_t)BitValue << BitPos);
- ++BitPos;
- ++I;
-
- // Once we accumulate enough bits to fill our scalar type or process the
- // last element, insert into our vector and clear our accumulated data.
- if (I % NumViaIntegerBits == 0 || I == NumElts) {
- if (NumViaIntegerBits <= 32)
- Bits = SignExtend64<32>(Bits);
- SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
- Elts[IntegerEltIdx] = Elt;
- Bits = 0;
- BitPos = 0;
- IntegerEltIdx++;
- }
- }
-
- SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
-
- if (NumElts < NumViaIntegerBits) {
- // If we're producing a smaller vector than our minimum legal integer
- // type, bitcast to the equivalent (known-legal) mask type, and extract
- // our final mask.
- assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
- Vec = DAG.getBitcast(MVT::v8i1, Vec);
- Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
- DAG.getConstant(0, DL, XLenVT));
- } else {
- // Else we must have produced an integer type with the same size as the
- // mask type; bitcast for the final result.
- assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
- Vec = DAG.getBitcast(VT, Vec);
+ NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
+ // If we have to use more than one INSERT_VECTOR_ELT then this
+ // optimization is likely to increase code size; avoid peforming it in
+ // such a case. We can use a load from a constant pool in this case.
+ if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
+ return SDValue();
+ // Now we can create our integer vector type. Note that it may be larger
+ // than the resulting mask type: v4i1 would use v1i8 as its integer type.
+ unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
+ MVT IntegerViaVecVT =
+ MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
+ IntegerViaVecElts);
+
+ uint64_t Bits = 0;
+ unsigned BitPos = 0, IntegerEltIdx = 0;
+ SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
+
+ for (unsigned I = 0; I < NumElts;) {
+ SDValue V = Op.getOperand(I);
+ bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
+ Bits |= ((uint64_t)BitValue << BitPos);
+ ++BitPos;
+ ++I;
+
+ // Once we accumulate enough bits to fill our scalar type or process the
+ // last element, insert into our vector and clear our accumulated data.
+ if (I % NumViaIntegerBits == 0 || I == NumElts) {
+ if (NumViaIntegerBits <= 32)
+ Bits = SignExtend64<32>(Bits);
+ SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
+ Elts[IntegerEltIdx] = Elt;
+ Bits = 0;
+ BitPos = 0;
+ IntegerEltIdx++;
}
-
- return Vec;
}
- // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
- // vector type, we have a legal equivalently-sized i8 type, so we can use
- // that.
- MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
- SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
+ SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
- SDValue WideVec;
- if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
- // For a splat, perform a scalar truncate before creating the wider
- // vector.
- assert(Splat.getValueType() == XLenVT &&
- "Unexpected type for i1 splat value");
- Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat,
- DAG.getConstant(1, DL, XLenVT));
- WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
+ if (NumElts < NumViaIntegerBits) {
+ // If we're producing a smaller vector than our minimum legal integer
+ // type, bitcast to the equivalent (known-legal) mask type, and extract
+ // our final mask.
+ assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
+ Vec = DAG.getBitcast(MVT::v8i1, Vec);
+ Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
+ DAG.getConstant(0, DL, XLenVT));
} else {
- SmallVector<SDValue, 8> Ops(Op->op_values());
- WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
- SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
- WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
+ // Else we must have produced an integer type with the same size as the
+ // mask type; bitcast for the final result.
+ assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
+ Vec = DAG.getBitcast(VT, Vec);
}
- return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
+ return Vec;
}
if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
- if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
- return Gather;
unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
: RISCVISD::VMV_V_X_VL;
+ if (!VT.isFloatingPoint())
+ Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
Splat =
DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
return convertFromScalableVector(VT, Splat, DAG, Subtarget);
@@ -3142,18 +3576,16 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
(StepOpcode == ISD::SHL && SplatStepVal != 0)) {
- SDValue SplatStep = DAG.getSplatBuildVector(
- VIDVT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT));
+ SDValue SplatStep = DAG.getConstant(SplatStepVal, DL, VIDVT);
VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
}
if (StepDenominator != 1) {
- SDValue SplatStep = DAG.getSplatBuildVector(
- VIDVT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT));
+ SDValue SplatStep =
+ DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
}
if (Addend != 0 || Negate) {
- SDValue SplatAddend = DAG.getSplatBuildVector(
- VIDVT, DL, DAG.getConstant(Addend, DL, XLenVT));
+ SDValue SplatAddend = DAG.getConstant(Addend, DL, VIDVT);
VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
VID);
}
@@ -3165,6 +3597,48 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
}
}
+ // For very small build_vectors, use a single scalar insert of a constant.
+ // TODO: Base this on constant rematerialization cost, not size.
+ const unsigned EltBitSize = VT.getScalarSizeInBits();
+ if (VT.getSizeInBits() <= 32 &&
+ ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
+ MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
+ assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
+ "Unexpected sequence type");
+ // If we can use the original VL with the modified element type, this
+ // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
+ // be moved into InsertVSETVLI?
+ unsigned ViaVecLen =
+ (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
+ MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
+
+ uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
+ uint64_t SplatValue = 0;
+ // Construct the amalgamated value at this larger vector type.
+ for (const auto &OpIdx : enumerate(Op->op_values())) {
+ const auto &SeqV = OpIdx.value();
+ if (!SeqV.isUndef())
+ SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
+ << (OpIdx.index() * EltBitSize));
+ }
+
+ // On RV64, sign-extend from 32 to 64 bits where possible in order to
+ // achieve better constant materializion.
+ if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
+ SplatValue = SignExtend64<32>(SplatValue);
+
+ SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
+ DAG.getUNDEF(ViaVecVT),
+ DAG.getConstant(SplatValue, DL, XLenVT),
+ DAG.getConstant(0, DL, XLenVT));
+ if (ViaVecLen != 1)
+ Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
+ MVT::getVectorVT(ViaIntVT, 1), Vec,
+ DAG.getConstant(0, DL, XLenVT));
+ return DAG.getBitcast(VT, Vec);
+ }
+
+
// Attempt to detect "hidden" splats, which only reveal themselves as splats
// when re-interpreted as a vector with a larger element type. For example,
// v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
@@ -3173,7 +3647,6 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// TODO: This optimization could also work on non-constant splats, but it
// would require bit-manipulation instructions to construct the splat value.
SmallVector<SDValue> Sequence;
- unsigned EltBitSize = VT.getScalarSizeInBits();
const auto *BV = cast<BuildVectorSDNode>(Op);
if (VT.isInteger() && EltBitSize < 64 &&
ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
@@ -3181,11 +3654,19 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
(Sequence.size() * EltBitSize) <= 64) {
unsigned SeqLen = Sequence.size();
MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
- MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen);
assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
ViaIntVT == MVT::i64) &&
"Unexpected sequence type");
+ // If we can use the original VL with the modified element type, this
+ // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
+ // be moved into InsertVSETVLI?
+ const unsigned RequiredVL = NumElts / SeqLen;
+ const unsigned ViaVecLen =
+ (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
+ NumElts : RequiredVL;
+ MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
+
unsigned EltIdx = 0;
uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
uint64_t SplatValue = 0;
@@ -3219,94 +3700,171 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
DAG.getUNDEF(ViaContainerVT),
DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
+ if (ViaVecLen != RequiredVL)
+ Splat = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
+ MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
+ DAG.getConstant(0, DL, XLenVT));
return DAG.getBitcast(VT, Splat);
}
}
- // Try and optimize BUILD_VECTORs with "dominant values" - these are values
- // which constitute a large proportion of the elements. In such cases we can
- // splat a vector with the dominant element and make up the shortfall with
- // INSERT_VECTOR_ELTs.
- // Note that this includes vectors of 2 elements by association. The
- // upper-most element is the "dominant" one, allowing us to use a splat to
- // "insert" the upper element, and an insert of the lower element at position
- // 0, which improves codegen.
- SDValue DominantValue;
- unsigned MostCommonCount = 0;
- DenseMap<SDValue, unsigned> ValueCounts;
- unsigned NumUndefElts =
- count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
+ // If the number of signbits allows, see if we can lower as a <N x i8>.
+ // Our main goal here is to reduce LMUL (and thus work) required to
+ // build the constant, but we will also narrow if the resulting
+ // narrow vector is known to materialize cheaply.
+ // TODO: We really should be costing the smaller vector. There are
+ // profitable cases this misses.
+ if (EltBitSize > 8 && VT.isInteger() &&
+ (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen())) {
+ unsigned SignBits = DAG.ComputeNumSignBits(Op);
+ if (EltBitSize - SignBits < 8) {
+ SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
+ DL, Op->ops());
+ Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
+ Source, DAG, Subtarget);
+ SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
+ return convertFromScalableVector(VT, Res, DAG, Subtarget);
+ }
+ }
- // Track the number of scalar loads we know we'd be inserting, estimated as
- // any non-zero floating-point constant. Other kinds of element are either
- // already in registers or are materialized on demand. The threshold at which
- // a vector load is more desirable than several scalar materializion and
- // vector-insertion instructions is not known.
- unsigned NumScalarLoads = 0;
+ if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
+ return Res;
- for (SDValue V : Op->op_values()) {
- if (V.isUndef())
- continue;
+ // For constant vectors, use generic constant pool lowering. Otherwise,
+ // we'd have to materialize constants in GPRs just to move them into the
+ // vector.
+ return SDValue();
+}
- ValueCounts.insert(std::make_pair(V, 0));
- unsigned &Count = ValueCounts[V];
- if (0 == Count)
- if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
- NumScalarLoads += !CFP->isExactlyValue(+0.0);
+static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ MVT VT = Op.getSimpleValueType();
+ assert(VT.isFixedLengthVector() && "Unexpected vector!");
- // Is this value dominant? In case of a tie, prefer the highest element as
- // it's cheaper to insert near the beginning of a vector than it is at the
- // end.
- if (++Count >= MostCommonCount) {
- DominantValue = V;
- MostCommonCount = Count;
+ if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
+ ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
+ return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
+
+ MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
+
+ SDLoc DL(Op);
+ auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
+
+ MVT XLenVT = Subtarget.getXLenVT();
+
+ if (VT.getVectorElementType() == MVT::i1) {
+ // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
+ // vector type, we have a legal equivalently-sized i8 type, so we can use
+ // that.
+ MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
+ SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
+
+ SDValue WideVec;
+ if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
+ // For a splat, perform a scalar truncate before creating the wider
+ // vector.
+ Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
+ DAG.getConstant(1, DL, Splat.getValueType()));
+ WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
+ } else {
+ SmallVector<SDValue, 8> Ops(Op->op_values());
+ WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
+ SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
+ WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
}
+
+ return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
}
- assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
- unsigned NumDefElts = NumElts - NumUndefElts;
- unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
+ if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
+ if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
+ return Gather;
+ unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
+ : RISCVISD::VMV_V_X_VL;
+ if (!VT.isFloatingPoint())
+ Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
+ Splat =
+ DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
+ return convertFromScalableVector(VT, Splat, DAG, Subtarget);
+ }
- // Don't perform this optimization when optimizing for size, since
- // materializing elements and inserting them tends to cause code bloat.
- if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
- (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
- ((MostCommonCount > DominantValueCountThreshold) ||
- (ValueCounts.size() <= Log2_32(NumDefElts)))) {
- // Start by splatting the most common element.
- SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
+ if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
+ return Res;
- DenseSet<SDValue> Processed{DominantValue};
- MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
- for (const auto &OpIdx : enumerate(Op->ops())) {
- const SDValue &V = OpIdx.value();
- if (V.isUndef() || !Processed.insert(V).second)
- continue;
- if (ValueCounts[V] == 1) {
- Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
- DAG.getConstant(OpIdx.index(), DL, XLenVT));
- } else {
- // Blend in all instances of this value using a VSELECT, using a
- // mask where each bit signals whether that element is the one
- // we're after.
- SmallVector<SDValue> Ops;
- transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
- return DAG.getConstant(V == V1, DL, XLenVT);
- });
- Vec = DAG.getNode(ISD::VSELECT, DL, VT,
- DAG.getBuildVector(SelMaskTy, DL, Ops),
- DAG.getSplatBuildVector(VT, DL, V), Vec);
- }
+ // If we're compiling for an exact VLEN value, we can split our work per
+ // register in the register group.
+ const unsigned MinVLen = Subtarget.getRealMinVLen();
+ const unsigned MaxVLen = Subtarget.getRealMaxVLen();
+ if (MinVLen == MaxVLen && VT.getSizeInBits().getKnownMinValue() > MinVLen) {
+ MVT ElemVT = VT.getVectorElementType();
+ unsigned ElemsPerVReg = MinVLen / ElemVT.getFixedSizeInBits();
+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
+ MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
+ MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
+ assert(M1VT == getLMUL1VT(M1VT));
+
+ // The following semantically builds up a fixed length concat_vector
+ // of the component build_vectors. We eagerly lower to scalable and
+ // insert_subvector here to avoid DAG combining it back to a large
+ // build_vector.
+ SmallVector<SDValue> BuildVectorOps(Op->op_begin(), Op->op_end());
+ unsigned NumOpElts = M1VT.getVectorMinNumElements();
+ SDValue Vec = DAG.getUNDEF(ContainerVT);
+ for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
+ auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
+ SDValue SubBV =
+ DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
+ SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
+ unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
+ Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,
+ DAG.getVectorIdxConstant(InsertIdx, DL));
}
+ return convertFromScalableVector(VT, Vec, DAG, Subtarget);
+ }
- return Vec;
+ // Cap the cost at a value linear to the number of elements in the vector.
+ // The default lowering is to use the stack. The vector store + scalar loads
+ // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
+ // being (at least) linear in LMUL. As a result, using the vslidedown
+ // lowering for every element ends up being VL*LMUL..
+ // TODO: Should we be directly costing the stack alternative? Doing so might
+ // give us a more accurate upper bound.
+ InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
+
+ // TODO: unify with TTI getSlideCost.
+ InstructionCost PerSlideCost = 1;
+ switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
+ default: break;
+ case RISCVII::VLMUL::LMUL_2:
+ PerSlideCost = 2;
+ break;
+ case RISCVII::VLMUL::LMUL_4:
+ PerSlideCost = 4;
+ break;
+ case RISCVII::VLMUL::LMUL_8:
+ PerSlideCost = 8;
+ break;
}
- // For constant vectors, use generic constant pool lowering. Otherwise,
- // we'd have to materialize constants in GPRs just to move them into the
- // vector.
- if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
- ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
+ // TODO: Should we be using the build instseq then cost + evaluate scheme
+ // we use for integer constants here?
+ unsigned UndefCount = 0;
+ for (const SDValue &V : Op->ops()) {
+ if (V.isUndef()) {
+ UndefCount++;
+ continue;
+ }
+ if (UndefCount) {
+ LinearBudget -= PerSlideCost;
+ UndefCount = 0;
+ }
+ LinearBudget -= PerSlideCost;
+ }
+ if (UndefCount) {
+ LinearBudget -= PerSlideCost;
+ }
+
+ if (LinearBudget < 0)
return SDValue();
assert((!VT.isFloatingPoint() ||
@@ -3315,13 +3873,24 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
- SDValue Vec = DAG.getUNDEF(ContainerVT);
- unsigned UndefCount = 0;
- for (const SDValue &V : Op->ops()) {
+ SDValue Vec;
+ UndefCount = 0;
+ for (SDValue V : Op->ops()) {
if (V.isUndef()) {
UndefCount++;
continue;
}
+
+ // Start our sequence with a TA splat in the hopes that hardware is able to
+ // recognize there's no dependency on the prior value of our temporary
+ // register.
+ if (!Vec) {
+ Vec = DAG.getSplatVector(VT, DL, V);
+ Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
+ UndefCount = 0;
+ continue;
+ }
+
if (UndefCount) {
const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
@@ -3330,6 +3899,8 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
}
auto OpCode =
VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
+ if (!VT.isFloatingPoint())
+ V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
V, Mask, VL);
}
@@ -3354,19 +3925,43 @@ static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
if ((LoC >> 31) == HiC)
return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
- // If vl is equal to XLEN_MAX and Hi constant is equal to Lo, we could use
- // vmv.v.x whose EEW = 32 to lower it.
- if (LoC == HiC && isAllOnesConstant(VL)) {
- MVT InterVT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
- // TODO: if vl <= min(VLMAX), we can also do this. But we could not
- // access the subtarget here now.
- auto InterVec = DAG.getNode(
- RISCVISD::VMV_V_X_VL, DL, InterVT, DAG.getUNDEF(InterVT), Lo,
- DAG.getRegister(RISCV::X0, MVT::i32));
- return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
+ // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
+ // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
+ // vlmax vsetvli or vsetivli to change the VL.
+ // FIXME: Support larger constants?
+ // FIXME: Support non-constant VLs by saturating?
+ if (LoC == HiC) {
+ SDValue NewVL;
+ if (isAllOnesConstant(VL) ||
+ (isa<RegisterSDNode>(VL) &&
+ cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
+ NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
+ else if (isa<ConstantSDNode>(VL) &&
+ isUInt<4>(cast<ConstantSDNode>(VL)->getZExtValue()))
+ NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
+
+ if (NewVL) {
+ MVT InterVT =
+ MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
+ auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
+ DAG.getUNDEF(InterVT), Lo,
+ DAG.getRegister(RISCV::X0, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
+ }
}
}
+ // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
+ if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
+ isa<ConstantSDNode>(Hi.getOperand(1)) &&
+ Hi.getConstantOperandVal(1) == 31)
+ return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
+
+ // If the hi bits of the splat are undefined, then it's fine to just splat Lo
+ // even if it might be sign extended.
+ if (Hi.isUndef())
+ return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
+
// Fall back to a stack store and stride x0 vector load.
return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
Hi, VL);
@@ -3393,12 +3988,8 @@ static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
bool HasPassthru = Passthru && !Passthru.isUndef();
if (!HasPassthru && !Passthru)
Passthru = DAG.getUNDEF(VT);
- if (VT.isFloatingPoint()) {
- // If VL is 1, we could use vfmv.s.f.
- if (isOneConstant(VL))
- return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
+ if (VT.isFloatingPoint())
return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
- }
MVT XLenVT = Subtarget.getXLenVT();
@@ -3411,12 +4002,6 @@ static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
unsigned ExtOpc =
isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
- ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
- // If VL is 1 and the scalar value won't benefit from immediate, we could
- // use vmv.s.x.
- if (isOneConstant(VL) &&
- (!Const || isNullConstant(Scalar) || !isInt<5>(Const->getSExtValue())))
- return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
}
@@ -3431,14 +4016,6 @@ static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
}
-static MVT getLMUL1VT(MVT VT) {
- assert(VT.getVectorElementType().getSizeInBits() <= 64 &&
- "Unexpected vector MVT");
- return MVT::getScalableVectorVT(
- VT.getVectorElementType(),
- RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits());
-}
-
// This function lowers an insert of a scalar operand Scalar into lane
// 0 of the vector regardless of the value of VL. The contents of the
// remaining lanes of the result vector are unspecified. VL is assumed
@@ -3446,24 +4023,34 @@ static MVT getLMUL1VT(MVT VT) {
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT,
const SDLoc &DL, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
- const MVT XLenVT = Subtarget.getXLenVT();
+ assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
+ const MVT XLenVT = Subtarget.getXLenVT();
SDValue Passthru = DAG.getUNDEF(VT);
- if (VT.isFloatingPoint()) {
- // TODO: Use vmv.v.i for appropriate constants
- // Use M1 or smaller to avoid over constraining register allocation
- const MVT M1VT = getLMUL1VT(VT);
- auto InnerVT = VT.bitsLE(M1VT) ? VT : M1VT;
- SDValue Result = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, InnerVT,
- DAG.getUNDEF(InnerVT), Scalar, VL);
- if (VT != InnerVT)
- Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
- DAG.getUNDEF(VT),
- Result, DAG.getConstant(0, DL, XLenVT));
- return Result;
+
+ if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ isNullConstant(Scalar.getOperand(1))) {
+ SDValue ExtractedVal = Scalar.getOperand(0);
+ MVT ExtractedVT = ExtractedVal.getSimpleValueType();
+ MVT ExtractedContainerVT = ExtractedVT;
+ if (ExtractedContainerVT.isFixedLengthVector()) {
+ ExtractedContainerVT = getContainerForFixedLengthVector(
+ DAG, ExtractedContainerVT, Subtarget);
+ ExtractedVal = convertToScalableVector(ExtractedContainerVT, ExtractedVal,
+ DAG, Subtarget);
+ }
+ if (ExtractedContainerVT.bitsLE(VT))
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, ExtractedVal,
+ DAG.getConstant(0, DL, XLenVT));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
+ DAG.getConstant(0, DL, XLenVT));
}
+ if (VT.isFloatingPoint())
+ return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
+ DAG.getUNDEF(VT), Scalar, VL);
+
// Avoid the tricky legalization cases by falling back to using the
// splat code which already handles it gracefully.
if (!Scalar.getValueType().bitsLE(XLenVT))
@@ -3478,24 +4065,8 @@ static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT,
unsigned ExtOpc =
isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
- // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
- // higher would involve overly constraining the register allocator for
- // no purpose.
- if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar)) {
- if (!isNullConstant(Scalar) && isInt<5>(Const->getSExtValue()) &&
- VT.bitsLE(getLMUL1VT(VT)))
- return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
- }
- // Use M1 or smaller to avoid over constraining register allocation
- const MVT M1VT = getLMUL1VT(VT);
- auto InnerVT = VT.bitsLE(M1VT) ? VT : M1VT;
- SDValue Result = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, InnerVT,
- DAG.getUNDEF(InnerVT), Scalar, VL);
- if (VT != InnerVT)
- Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
- DAG.getUNDEF(VT),
- Result, DAG.getConstant(0, DL, XLenVT));
- return Result;
+ return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT,
+ DAG.getUNDEF(VT), Scalar, VL);
}
// Is this a shuffle extracts either the even or odd elements of a vector?
@@ -3509,7 +4080,7 @@ static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
const RISCVSubtarget &Subtarget) {
// Need to be able to widen the vector.
- if (VT.getScalarSizeInBits() >= Subtarget.getELEN())
+ if (VT.getScalarSizeInBits() >= Subtarget.getELen())
return false;
// Both input must be extracts.
@@ -3553,7 +4124,7 @@ static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
int &OddSrc, const RISCVSubtarget &Subtarget) {
// We need to be able to widen elements to the next larger integer type.
- if (VT.getScalarSizeInBits() >= Subtarget.getELEN())
+ if (VT.getScalarSizeInBits() >= Subtarget.getELen())
return false;
int Size = Mask.size();
@@ -3882,6 +4453,8 @@ static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT,
auto OpCode = IsVSlidedown ?
(VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) :
(VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);
+ if (!VT.isFloatingPoint())
+ Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
DAG.getUNDEF(ContainerVT),
convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
@@ -3904,7 +4477,7 @@ static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
}
- assert(VecVT.getScalarSizeInBits() < Subtarget.getELEN());
+ assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
// We're working with a vector of the same size as the resulting
// interleaved vector, but with half the number of elements and
@@ -3925,24 +4498,37 @@ static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
SDValue Passthru = DAG.getUNDEF(WideContainerVT);
- // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
- // vwaddu.vv
- SDValue Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT,
- EvenV, OddV, Passthru, Mask, VL);
-
- // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
- SDValue AllOnesVec = DAG.getSplatVector(
- VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
- SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT, OddV,
- AllOnesVec, Passthru, Mask, VL);
-
- // Add the two together so we get
- // (OddV * 0xff...ff) + (OddV + EvenV)
- // = (OddV * 0x100...00) + EvenV
- // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
- // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
- Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT, Interleaved,
- OddsMul, Passthru, Mask, VL);
+ SDValue Interleaved;
+ if (Subtarget.hasStdExtZvbb()) {
+ // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
+ SDValue OffsetVec =
+ DAG.getSplatVector(VecContainerVT, DL,
+ DAG.getConstant(VecVT.getScalarSizeInBits(), DL,
+ Subtarget.getXLenVT()));
+ Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
+ OffsetVec, Passthru, Mask, VL);
+ Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
+ Interleaved, EvenV, Passthru, Mask, VL);
+ } else {
+ // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
+ // vwaddu.vv
+ Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
+ OddV, Passthru, Mask, VL);
+
+ // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
+ SDValue AllOnesVec = DAG.getSplatVector(
+ VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
+ SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
+ OddV, AllOnesVec, Passthru, Mask, VL);
+
+ // Add the two together so we get
+ // (OddV * 0xff...ff) + (OddV + EvenV)
+ // = (OddV * 0x100...00) + EvenV
+ // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
+ // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
+ Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
+ Interleaved, OddsMul, Passthru, Mask, VL);
+ }
// Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
MVT ResultContainerVT = MVT::getVectorVT(
@@ -3961,6 +4547,96 @@ static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
return Interleaved;
}
+// If we have a vector of bits that we want to reverse, we can use a vbrev on a
+// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
+static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN,
+ SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ SDLoc DL(SVN);
+ MVT VT = SVN->getSimpleValueType(0);
+ SDValue V = SVN->getOperand(0);
+ unsigned NumElts = VT.getVectorNumElements();
+
+ assert(VT.getVectorElementType() == MVT::i1);
+
+ if (!ShuffleVectorInst::isReverseMask(SVN->getMask(),
+ SVN->getMask().size()) ||
+ !SVN->getOperand(1).isUndef())
+ return SDValue();
+
+ unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
+ EVT ViaVT = EVT::getVectorVT(
+ *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
+ EVT ViaBitVT =
+ EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
+
+ // If we don't have zvbb or the larger element type > ELEN, the operation will
+ // be illegal.
+ if (!Subtarget.getTargetLowering()->isOperationLegalOrCustom(ISD::BITREVERSE,
+ ViaVT) ||
+ !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
+ return SDValue();
+
+ // If the bit vector doesn't fit exactly into the larger element type, we need
+ // to insert it into the larger vector and then shift up the reversed bits
+ // afterwards to get rid of the gap introduced.
+ if (ViaEltSize > NumElts)
+ V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT),
+ V, DAG.getVectorIdxConstant(0, DL));
+
+ SDValue Res =
+ DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
+
+ // Shift up the reversed bits if the vector didn't exactly fit into the larger
+ // element type.
+ if (ViaEltSize > NumElts)
+ Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
+ DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
+
+ Res = DAG.getBitcast(ViaBitVT, Res);
+
+ if (ViaEltSize > NumElts)
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
+ DAG.getVectorIdxConstant(0, DL));
+ return Res;
+}
+
+// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
+// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
+// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
+static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN,
+ SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ SDLoc DL(SVN);
+
+ EVT VT = SVN->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ unsigned NumSubElts, RotateAmt;
+ if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
+ NumElts, NumSubElts, RotateAmt))
+ return SDValue();
+ MVT RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
+ NumElts / NumSubElts);
+
+ // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
+ if (!Subtarget.getTargetLowering()->isTypeLegal(RotateVT))
+ return SDValue();
+
+ SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
+
+ SDValue Rotate;
+ // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
+ // so canonicalize to vrev8.
+ if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
+ Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
+ else
+ Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
+ DAG.getConstant(RotateAmt, DL, RotateVT));
+
+ return DAG.getBitcast(VT, Rotate);
+}
+
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
SDValue V1 = Op.getOperand(0);
@@ -3971,8 +4647,15 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
unsigned NumElts = VT.getVectorNumElements();
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
- // Promote i1 shuffle to i8 shuffle.
if (VT.getVectorElementType() == MVT::i1) {
+ // Lower to a vror.vi of a larger element type if possible before we promote
+ // i1s to i8s.
+ if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
+ return V;
+ if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
+ return V;
+
+ // Promote i1 shuffle to i8 shuffle.
MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
@@ -4008,8 +4691,8 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
auto *Ld = cast<LoadSDNode>(V);
Offset *= SVT.getStoreSize();
- SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(),
- TypeSize::Fixed(Offset), DL);
+ SDValue NewAddr = DAG.getMemBasePlusOffset(
+ Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
// If this is SEW=64 on RV32, use a strided load with a stride of x0.
if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
@@ -4071,6 +4754,12 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
return V;
+ // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
+ // available.
+ if (Subtarget.hasStdExtZvkb())
+ if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
+ return V;
+
// Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
// be undef which can be handled with a single SLIDEDOWN/UP.
int LoSrc, HiSrc;
@@ -4197,6 +4886,12 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
if (IsSelect)
return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
+ // We might be able to express the shuffle as a bitrotate. But even if we
+ // don't have Zvkb and have to expand, the expanded sequence of approx. 2
+ // shifts and a vor will have a higher throughput than a vrgather.
+ if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
+ return V;
+
if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) {
// On such a large vector we're unable to use i8 as the index type.
// FIXME: We could promote the index to i16 and use vrgatherei16, but that
@@ -4216,6 +4911,15 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
IndexVT = IndexVT.changeVectorElementType(MVT::i16);
}
+ // If the mask allows, we can do all the index computation in 16 bits. This
+ // requires less work and less register pressure at high LMUL, and creates
+ // smaller constants which may be cheaper to materialize.
+ if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
+ (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
+ GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
+ IndexVT = IndexVT.changeVectorElementType(MVT::i16);
+ }
+
MVT IndexContainerVT =
ContainerVT.changeVectorElementType(IndexVT.getScalarType());
@@ -4490,26 +5194,26 @@ static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG,
if (!Subtarget.useConstantPoolForLargeInts())
return Op;
- RISCVMatInt::InstSeq Seq =
- RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());
+ RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget);
if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
return Op;
- // Special case. See if we can build the constant as (ADD (SLLI X, 32), X) do
+ // Optimizations below are disabled for opt size. If we're optimizing for
+ // size, use a constant pool.
+ if (DAG.shouldOptForSize())
+ return SDValue();
+
+ // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
// that if it will avoid a constant pool.
// It will require an extra temporary register though.
- if (!DAG.shouldOptForSize()) {
- int64_t LoVal = SignExtend64<32>(Imm);
- int64_t HiVal = SignExtend64<32>(((uint64_t)Imm - (uint64_t)LoVal) >> 32);
- if (LoVal == HiVal) {
- RISCVMatInt::InstSeq SeqLo =
- RISCVMatInt::generateInstSeq(LoVal, Subtarget.getFeatureBits());
- if ((SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
- return Op;
- }
- }
+ // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
+ // low and high 32 bits are the same and bit 31 and 63 are set.
+ unsigned ShiftAmt, AddOpc;
+ RISCVMatInt::InstSeq SeqLo =
+ RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
+ if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
+ return Op;
- // Expand to a constant pool using the default expansion code.
return SDValue();
}
@@ -4547,8 +5251,7 @@ SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
MVT XLenVT = Subtarget.getXLenVT();
- auto CNode = cast<ConstantSDNode>(Op.getOperand(1));
- unsigned Check = CNode->getZExtValue();
+ unsigned Check = Op.getConstantOperandVal(1);
unsigned TDCMask = 0;
if (Check & fcSNan)
TDCMask |= RISCV::FPMASK_Signaling_NaN;
@@ -4582,6 +5285,10 @@ SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
if (VT.isScalableVector()) {
MVT DstVT = VT0.changeVectorElementTypeToInteger();
auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
+ if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
+ Mask = Op.getOperand(2);
+ VL = Op.getOperand(3);
+ }
SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
VL, Op->getFlags());
if (IsOneBitMask)
@@ -4598,7 +5305,13 @@ SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
MVT ContainerVT = getContainerForFixedLengthVector(VT);
MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
-
+ if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
+ Mask = Op.getOperand(2);
+ MVT MaskContainerVT =
+ getContainerForFixedLengthVector(Mask.getSimpleValueType());
+ Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
+ VL = Op.getOperand(3);
+ }
Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
@@ -4616,7 +5329,7 @@ SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
- SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
+ SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
@@ -4626,10 +5339,11 @@ SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
}
- SDValue FPCLASS = DAG.getNode(RISCVISD::FPCLASS, DL, VT, Op.getOperand(0));
- SDValue AND = DAG.getNode(ISD::AND, DL, VT, FPCLASS, TDCMaskV);
- return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, XLenVT),
- ISD::CondCode::SETNE);
+ SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
+ SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
+ SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
+ ISD::CondCode::SETNE);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
}
// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
@@ -4637,38 +5351,88 @@ SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
SDLoc DL(Op);
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
SDValue X = Op.getOperand(0);
SDValue Y = Op.getOperand(1);
- MVT XLenVT = Subtarget.getXLenVT();
+ if (!VT.isVector()) {
+ MVT XLenVT = Subtarget.getXLenVT();
- // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
- // ensures that when one input is a nan, the other will also be a nan allowing
- // the nan to propagate. If both inputs are nan, this will swap the inputs
- // which is harmless.
- // FIXME: Handle nonans FMF and use isKnownNeverNaN.
- SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
- SDValue NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
+ // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
+ // ensures that when one input is a nan, the other will also be a nan
+ // allowing the nan to propagate. If both inputs are nan, this will swap the
+ // inputs which is harmless.
- SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
- SDValue NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
+ SDValue NewY = Y;
+ if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
+ SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
+ NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
+ }
+
+ SDValue NewX = X;
+ if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
+ SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
+ NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
+ }
+
+ unsigned Opc =
+ Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
+ return DAG.getNode(Opc, DL, VT, NewX, NewY);
+ }
+
+ // Check no NaNs before converting to fixed vector scalable.
+ bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
+ bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
+
+ MVT ContainerVT = VT;
+ if (VT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
+ X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
+ Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
+ }
+
+ auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
+
+ SDValue NewY = Y;
+ if (!XIsNeverNan) {
+ SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
+ {X, X, DAG.getCondCode(ISD::SETOEQ),
+ DAG.getUNDEF(ContainerVT), Mask, VL});
+ NewY =
+ DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, XIsNonNan, Y, X, VL);
+ }
+
+ SDValue NewX = X;
+ if (!YIsNeverNan) {
+ SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
+ {Y, Y, DAG.getCondCode(ISD::SETOEQ),
+ DAG.getUNDEF(ContainerVT), Mask, VL});
+ NewX =
+ DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, YIsNonNan, X, Y, VL);
+ }
unsigned Opc =
- Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
- return DAG.getNode(Opc, DL, VT, NewX, NewY);
+ Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::VFMAX_VL : RISCVISD::VFMIN_VL;
+ SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
+ DAG.getUNDEF(ContainerVT), Mask, VL);
+ if (VT.isFixedLengthVector())
+ Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
+ return Res;
}
-/// Get a RISCV target specified VL op for a given SDNode.
+/// Get a RISC-V target specified VL op for a given SDNode.
static unsigned getRISCVVLOp(SDValue Op) {
#define OP_CASE(NODE) \
case ISD::NODE: \
return RISCVISD::NODE##_VL;
+#define VP_CASE(NODE) \
+ case ISD::VP_##NODE: \
+ return RISCVISD::NODE##_VL;
+ // clang-format off
switch (Op.getOpcode()) {
default:
llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
- // clang-format off
OP_CASE(ADD)
OP_CASE(SUB)
OP_CASE(MUL)
@@ -4681,6 +5445,13 @@ static unsigned getRISCVVLOp(SDValue Op) {
OP_CASE(SHL)
OP_CASE(SRA)
OP_CASE(SRL)
+ OP_CASE(ROTL)
+ OP_CASE(ROTR)
+ OP_CASE(BSWAP)
+ OP_CASE(CTTZ)
+ OP_CASE(CTLZ)
+ OP_CASE(CTPOP)
+ OP_CASE(BITREVERSE)
OP_CASE(SADDSAT)
OP_CASE(UADDSAT)
OP_CASE(SSUBSAT)
@@ -4696,47 +5467,113 @@ static unsigned getRISCVVLOp(SDValue Op) {
OP_CASE(SMAX)
OP_CASE(UMIN)
OP_CASE(UMAX)
- OP_CASE(FMINNUM)
- OP_CASE(FMAXNUM)
OP_CASE(STRICT_FADD)
OP_CASE(STRICT_FSUB)
OP_CASE(STRICT_FMUL)
OP_CASE(STRICT_FDIV)
OP_CASE(STRICT_FSQRT)
- // clang-format on
-#undef OP_CASE
+ VP_CASE(ADD) // VP_ADD
+ VP_CASE(SUB) // VP_SUB
+ VP_CASE(MUL) // VP_MUL
+ VP_CASE(SDIV) // VP_SDIV
+ VP_CASE(SREM) // VP_SREM
+ VP_CASE(UDIV) // VP_UDIV
+ VP_CASE(UREM) // VP_UREM
+ VP_CASE(SHL) // VP_SHL
+ VP_CASE(FADD) // VP_FADD
+ VP_CASE(FSUB) // VP_FSUB
+ VP_CASE(FMUL) // VP_FMUL
+ VP_CASE(FDIV) // VP_FDIV
+ VP_CASE(FNEG) // VP_FNEG
+ VP_CASE(FABS) // VP_FABS
+ VP_CASE(SMIN) // VP_SMIN
+ VP_CASE(SMAX) // VP_SMAX
+ VP_CASE(UMIN) // VP_UMIN
+ VP_CASE(UMAX) // VP_UMAX
+ VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
+ VP_CASE(SETCC) // VP_SETCC
+ VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
+ VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
+ VP_CASE(BITREVERSE) // VP_BITREVERSE
+ VP_CASE(BSWAP) // VP_BSWAP
+ VP_CASE(CTLZ) // VP_CTLZ
+ VP_CASE(CTTZ) // VP_CTTZ
+ VP_CASE(CTPOP) // VP_CTPOP
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::VP_CTLZ_ZERO_UNDEF:
+ return RISCVISD::CTLZ_VL;
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::VP_CTTZ_ZERO_UNDEF:
+ return RISCVISD::CTTZ_VL;
case ISD::FMA:
+ case ISD::VP_FMA:
return RISCVISD::VFMADD_VL;
case ISD::STRICT_FMA:
return RISCVISD::STRICT_VFMADD_VL;
case ISD::AND:
+ case ISD::VP_AND:
if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
return RISCVISD::VMAND_VL;
return RISCVISD::AND_VL;
case ISD::OR:
+ case ISD::VP_OR:
if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
return RISCVISD::VMOR_VL;
return RISCVISD::OR_VL;
case ISD::XOR:
+ case ISD::VP_XOR:
if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
return RISCVISD::VMXOR_VL;
return RISCVISD::XOR_VL;
+ case ISD::VP_SELECT:
+ return RISCVISD::VSELECT_VL;
+ case ISD::VP_MERGE:
+ return RISCVISD::VP_MERGE_VL;
+ case ISD::VP_ASHR:
+ return RISCVISD::SRA_VL;
+ case ISD::VP_LSHR:
+ return RISCVISD::SRL_VL;
+ case ISD::VP_SQRT:
+ return RISCVISD::FSQRT_VL;
+ case ISD::VP_SIGN_EXTEND:
+ return RISCVISD::VSEXT_VL;
+ case ISD::VP_ZERO_EXTEND:
+ return RISCVISD::VZEXT_VL;
+ case ISD::VP_FP_TO_SINT:
+ return RISCVISD::VFCVT_RTZ_X_F_VL;
+ case ISD::VP_FP_TO_UINT:
+ return RISCVISD::VFCVT_RTZ_XU_F_VL;
+ case ISD::FMINNUM:
+ case ISD::VP_FMINNUM:
+ return RISCVISD::VFMIN_VL;
+ case ISD::FMAXNUM:
+ case ISD::VP_FMAXNUM:
+ return RISCVISD::VFMAX_VL;
}
+ // clang-format on
+#undef OP_CASE
+#undef VP_CASE
}
/// Return true if a RISC-V target specified op has a merge operand.
static bool hasMergeOp(unsigned Opcode) {
assert(Opcode > RISCVISD::FIRST_NUMBER &&
- Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL &&
+ Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE &&
"not a RISC-V target specific op");
- assert(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL - RISCVISD::FIRST_NUMBER == 421 &&
- "adding target specific op should update this function");
- if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::FMAXNUM_VL)
+ static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP ==
+ 125 &&
+ RISCVISD::LAST_RISCV_STRICTFP_OPCODE -
+ ISD::FIRST_TARGET_STRICTFP_OPCODE ==
+ 21 &&
+ "adding target specific op should update this function");
+ if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
return true;
if (Opcode == RISCVISD::FCOPYSIGN_VL)
return true;
if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
return true;
+ if (Opcode == RISCVISD::SETCC_VL)
+ return true;
if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
return true;
return false;
@@ -4745,10 +5582,14 @@ static bool hasMergeOp(unsigned Opcode) {
/// Return true if a RISC-V target specified op has a mask operand.
static bool hasMaskOp(unsigned Opcode) {
assert(Opcode > RISCVISD::FIRST_NUMBER &&
- Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL &&
+ Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE &&
"not a RISC-V target specific op");
- assert(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL - RISCVISD::FIRST_NUMBER == 421 &&
- "adding target specific op should update this function");
+ static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP ==
+ 125 &&
+ RISCVISD::LAST_RISCV_STRICTFP_OPCODE -
+ ISD::FIRST_TARGET_STRICTFP_OPCODE ==
+ 21 &&
+ "adding target specific op should update this function");
if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
return true;
if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
@@ -4759,6 +5600,112 @@ static bool hasMaskOp(unsigned Opcode) {
return false;
}
+static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG) {
+ auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
+ SDLoc DL(Op);
+
+ SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
+ SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
+
+ for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
+ if (!Op.getOperand(j).getValueType().isVector()) {
+ LoOperands[j] = Op.getOperand(j);
+ HiOperands[j] = Op.getOperand(j);
+ continue;
+ }
+ std::tie(LoOperands[j], HiOperands[j]) =
+ DAG.SplitVector(Op.getOperand(j), DL);
+ }
+
+ SDValue LoRes =
+ DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
+ SDValue HiRes =
+ DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
+}
+
+static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG) {
+ assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
+ auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
+ SDLoc DL(Op);
+
+ SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
+ SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
+
+ for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
+ if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
+ std::tie(LoOperands[j], HiOperands[j]) =
+ DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
+ continue;
+ }
+ if (!Op.getOperand(j).getValueType().isVector()) {
+ LoOperands[j] = Op.getOperand(j);
+ HiOperands[j] = Op.getOperand(j);
+ continue;
+ }
+ std::tie(LoOperands[j], HiOperands[j]) =
+ DAG.SplitVector(Op.getOperand(j), DL);
+ }
+
+ SDValue LoRes =
+ DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
+ SDValue HiRes =
+ DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
+}
+
+static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG) {
+ SDLoc DL(Op);
+
+ auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
+ auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
+ auto [EVLLo, EVLHi] =
+ DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
+
+ SDValue ResLo =
+ DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
+ {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
+ return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
+ {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
+}
+
+static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG) {
+
+ assert(Op->isStrictFPOpcode());
+
+ auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
+
+ SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
+ SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
+
+ SDLoc DL(Op);
+
+ SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
+ SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
+
+ for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
+ if (!Op.getOperand(j).getValueType().isVector()) {
+ LoOperands[j] = Op.getOperand(j);
+ HiOperands[j] = Op.getOperand(j);
+ continue;
+ }
+ std::tie(LoOperands[j], HiOperands[j]) =
+ DAG.SplitVector(Op.getOperand(j), DL);
+ }
+
+ SDValue LoRes =
+ DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
+ HiOperands[0] = LoRes.getValue(1);
+ SDValue HiRes =
+ DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
+
+ SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
+ LoRes.getValue(0), HiRes.getValue(0));
+ return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
+}
+
SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@@ -4796,6 +5743,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerShiftRightParts(Op, DAG, false);
case ISD::ROTL:
case ISD::ROTR:
+ if (Op.getValueType().isFixedLengthVector()) {
+ assert(Subtarget.hasStdExtZvkb());
+ return lowerToScalableOp(Op, DAG);
+ }
assert(Subtarget.hasVendorXTHeadBb() &&
!(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
"Unexpected custom legalization");
@@ -4889,6 +5840,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return LowerIS_FPCLASS(Op, DAG);
case ISD::BITREVERSE: {
MVT VT = Op.getSimpleValueType();
+ if (VT.isFixedLengthVector()) {
+ assert(Subtarget.hasStdExtZvbb());
+ return lowerToScalableOp(Op, DAG);
+ }
SDLoc DL(Op);
assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
@@ -4931,6 +5886,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
if (VT.isFixedLengthVector())
ContainerVT = getContainerForFixedLengthVector(VT);
SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
+ Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
DAG.getUNDEF(ContainerVT), Scalar, VL);
if (VT.isFixedLengthVector())
@@ -4938,9 +5894,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return V;
}
case ISD::VSCALE: {
+ MVT XLenVT = Subtarget.getXLenVT();
MVT VT = Op.getSimpleValueType();
SDLoc DL(Op);
- SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
+ SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
// We define our scalable vector types for lmul=1 to use a 64 bit known
// minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
// vscale as VLENB / 8.
@@ -4953,22 +5910,23 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
if (isPowerOf2_64(Val)) {
uint64_t Log2 = Log2_64(Val);
if (Log2 < 3)
- return DAG.getNode(ISD::SRL, DL, VT, VLENB,
- DAG.getConstant(3 - Log2, DL, VT));
- if (Log2 > 3)
- return DAG.getNode(ISD::SHL, DL, VT, VLENB,
- DAG.getConstant(Log2 - 3, DL, VT));
- return VLENB;
- }
- // If the multiplier is a multiple of 8, scale it down to avoid needing
- // to shift the VLENB value.
- if ((Val % 8) == 0)
- return DAG.getNode(ISD::MUL, DL, VT, VLENB,
- DAG.getConstant(Val / 8, DL, VT));
-
- SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
- DAG.getConstant(3, DL, VT));
- return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
+ Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
+ DAG.getConstant(3 - Log2, DL, VT));
+ else if (Log2 > 3)
+ Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
+ DAG.getConstant(Log2 - 3, DL, XLenVT));
+ } else if ((Val % 8) == 0) {
+ // If the multiplier is a multiple of 8, scale it down to avoid needing
+ // to shift the VLENB value.
+ Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
+ DAG.getConstant(Val / 8, DL, XLenVT));
+ } else {
+ SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
+ DAG.getConstant(3, DL, XLenVT));
+ Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
+ DAG.getConstant(Val, DL, XLenVT));
+ }
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
}
case ISD::FPOWI: {
// Custom promote f16 powi with illegal i32 integer type on RV64. Once
@@ -4986,6 +5944,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
}
case ISD::FMAXIMUM:
case ISD::FMINIMUM:
+ if (Op.getValueType() == MVT::nxv32f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16()))
+ return SplitVectorOp(Op, DAG);
return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
case ISD::FP_EXTEND: {
SDLoc DL(Op);
@@ -5026,10 +5988,42 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::STRICT_FP_ROUND:
case ISD::STRICT_FP_EXTEND:
return lowerStrictFPExtendOrRoundLike(Op, DAG);
- case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
+ if (Op.getValueType().isVector() &&
+ Op.getValueType().getScalarType() == MVT::f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16())) {
+ if (Op.getValueType() == MVT::nxv32f16)
+ return SplitVectorOp(Op, DAG);
+ // int -> f32
+ SDLoc DL(Op);
+ MVT NVT =
+ MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
+ SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
+ // f32 -> f16
+ return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
+ DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
+ }
+ [[fallthrough]];
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ if (SDValue Op1 = Op.getOperand(0);
+ Op1.getValueType().isVector() &&
+ Op1.getValueType().getScalarType() == MVT::f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16())) {
+ if (Op1.getValueType() == MVT::nxv32f16)
+ return SplitVectorOp(Op, DAG);
+ // f16 -> f32
+ SDLoc DL(Op);
+ MVT NVT = MVT::getVectorVT(MVT::f32,
+ Op1.getValueType().getVectorElementCount());
+ SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
+ // f32 -> int
+ return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
+ }
+ [[fallthrough]];
case ISD::STRICT_FP_TO_SINT:
case ISD::STRICT_FP_TO_UINT:
case ISD::STRICT_SINT_TO_FP:
@@ -5180,7 +6174,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
SDValue Res =
makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
- if (Subtarget.is64Bit())
+ if (Subtarget.is64Bit() && !RV64LegalI32)
return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
return DAG.getBitcast(MVT::i32, Res);
}
@@ -5209,7 +6203,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16);
SDValue Res =
makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
- if (Subtarget.is64Bit())
+ if (Subtarget.is64Bit() && !RV64LegalI32)
return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
return DAG.getBitcast(MVT::i32, Res);
}
@@ -5236,6 +6230,9 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::FROUND:
case ISD::FROUNDEVEN:
return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
+ case ISD::LRINT:
+ case ISD::LLRINT:
+ return lowerVectorXRINT(Op, DAG, Subtarget);
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_SMAX:
@@ -5262,6 +6259,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::VP_REDUCE_SEQ_FADD:
case ISD::VP_REDUCE_FMIN:
case ISD::VP_REDUCE_FMAX:
+ if (Op.getOperand(1).getValueType() == MVT::nxv32f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16()))
+ return SplitVectorReductionOp(Op, DAG);
return lowerVPREDUCE(Op, DAG);
case ISD::VP_REDUCE_AND:
case ISD::VP_REDUCE_OR:
@@ -5291,6 +6292,21 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::BUILD_VECTOR:
return lowerBUILD_VECTOR(Op, DAG, Subtarget);
case ISD::SPLAT_VECTOR:
+ if (Op.getValueType().getScalarType() == MVT::f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16())) {
+ if (Op.getValueType() == MVT::nxv32f16)
+ return SplitVectorOp(Op, DAG);
+ SDLoc DL(Op);
+ SDValue NewScalar =
+ DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
+ SDValue NewSplat = DAG.getNode(
+ ISD::SPLAT_VECTOR, DL,
+ MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()),
+ NewScalar);
+ return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NewSplat,
+ DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
+ }
if (Op.getValueType().getVectorElementType() == MVT::i1)
return lowerVectorMaskSplat(Op, DAG);
return SDValue();
@@ -5387,6 +6403,11 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
}
+ if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16()))
+ return SplitVectorOp(Op, DAG);
+
return lowerFixedLengthVectorSetccToRVV(Op, DAG);
}
case ISD::ADD:
@@ -5401,6 +6422,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::SREM:
case ISD::UDIV:
case ISD::UREM:
+ case ISD::BSWAP:
+ case ISD::CTPOP:
return lowerToScalableOp(Op, DAG);
case ISD::SHL:
case ISD::SRA:
@@ -5411,10 +6434,6 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
return SDValue();
- case ISD::SADDSAT:
- case ISD::UADDSAT:
- case ISD::SSUBSAT:
- case ISD::USUBSAT:
case ISD::FADD:
case ISD::FSUB:
case ISD::FMUL:
@@ -5423,23 +6442,40 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::FABS:
case ISD::FSQRT:
case ISD::FMA:
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+ if (Op.getValueType() == MVT::nxv32f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16()))
+ return SplitVectorOp(Op, DAG);
+ [[fallthrough]];
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT:
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
- case ISD::FMINNUM:
- case ISD::FMAXNUM:
return lowerToScalableOp(Op, DAG);
case ISD::ABS:
case ISD::VP_ABS:
return lowerABS(Op, DAG);
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
+ if (Subtarget.hasStdExtZvbb())
+ return lowerToScalableOp(Op, DAG);
+ assert(Op.getOpcode() != ISD::CTTZ);
return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
case ISD::VSELECT:
return lowerFixedLengthVectorSelectToRVV(Op, DAG);
case ISD::FCOPYSIGN:
+ if (Op.getValueType() == MVT::nxv32f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16()))
+ return SplitVectorOp(Op, DAG);
return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
case ISD::STRICT_FADD:
case ISD::STRICT_FSUB:
@@ -5447,6 +6483,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::STRICT_FDIV:
case ISD::STRICT_FSQRT:
case ISD::STRICT_FMA:
+ if (Op.getValueType() == MVT::nxv32f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16()))
+ return SplitStrictFPVectorOp(Op, DAG);
return lowerToScalableOp(Op, DAG);
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS:
@@ -5472,106 +6512,115 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::EH_DWARF_CFA:
return lowerEH_DWARF_CFA(Op, DAG);
case ISD::VP_SELECT:
- return lowerVPOp(Op, DAG, RISCVISD::VSELECT_VL);
case ISD::VP_MERGE:
- return lowerVPOp(Op, DAG, RISCVISD::VP_MERGE_VL);
case ISD::VP_ADD:
- return lowerVPOp(Op, DAG, RISCVISD::ADD_VL, /*HasMergeOp*/ true);
case ISD::VP_SUB:
- return lowerVPOp(Op, DAG, RISCVISD::SUB_VL, /*HasMergeOp*/ true);
case ISD::VP_MUL:
- return lowerVPOp(Op, DAG, RISCVISD::MUL_VL, /*HasMergeOp*/ true);
case ISD::VP_SDIV:
- return lowerVPOp(Op, DAG, RISCVISD::SDIV_VL, /*HasMergeOp*/ true);
case ISD::VP_UDIV:
- return lowerVPOp(Op, DAG, RISCVISD::UDIV_VL, /*HasMergeOp*/ true);
case ISD::VP_SREM:
- return lowerVPOp(Op, DAG, RISCVISD::SREM_VL, /*HasMergeOp*/ true);
case ISD::VP_UREM:
- return lowerVPOp(Op, DAG, RISCVISD::UREM_VL, /*HasMergeOp*/ true);
+ return lowerVPOp(Op, DAG);
case ISD::VP_AND:
- return lowerLogicVPOp(Op, DAG, RISCVISD::VMAND_VL, RISCVISD::AND_VL);
case ISD::VP_OR:
- return lowerLogicVPOp(Op, DAG, RISCVISD::VMOR_VL, RISCVISD::OR_VL);
case ISD::VP_XOR:
- return lowerLogicVPOp(Op, DAG, RISCVISD::VMXOR_VL, RISCVISD::XOR_VL);
- case ISD::VP_ASHR:
- return lowerVPOp(Op, DAG, RISCVISD::SRA_VL, /*HasMergeOp*/ true);
- case ISD::VP_LSHR:
- return lowerVPOp(Op, DAG, RISCVISD::SRL_VL, /*HasMergeOp*/ true);
- case ISD::VP_SHL:
- return lowerVPOp(Op, DAG, RISCVISD::SHL_VL, /*HasMergeOp*/ true);
+ return lowerLogicVPOp(Op, DAG);
case ISD::VP_FADD:
- return lowerVPOp(Op, DAG, RISCVISD::FADD_VL, /*HasMergeOp*/ true);
case ISD::VP_FSUB:
- return lowerVPOp(Op, DAG, RISCVISD::FSUB_VL, /*HasMergeOp*/ true);
case ISD::VP_FMUL:
- return lowerVPOp(Op, DAG, RISCVISD::FMUL_VL, /*HasMergeOp*/ true);
case ISD::VP_FDIV:
- return lowerVPOp(Op, DAG, RISCVISD::FDIV_VL, /*HasMergeOp*/ true);
case ISD::VP_FNEG:
- return lowerVPOp(Op, DAG, RISCVISD::FNEG_VL);
case ISD::VP_FABS:
- return lowerVPOp(Op, DAG, RISCVISD::FABS_VL);
case ISD::VP_SQRT:
- return lowerVPOp(Op, DAG, RISCVISD::FSQRT_VL);
case ISD::VP_FMA:
- return lowerVPOp(Op, DAG, RISCVISD::VFMADD_VL);
case ISD::VP_FMINNUM:
- return lowerVPOp(Op, DAG, RISCVISD::FMINNUM_VL, /*HasMergeOp*/ true);
case ISD::VP_FMAXNUM:
- return lowerVPOp(Op, DAG, RISCVISD::FMAXNUM_VL, /*HasMergeOp*/ true);
case ISD::VP_FCOPYSIGN:
- return lowerVPOp(Op, DAG, RISCVISD::FCOPYSIGN_VL, /*HasMergeOp*/ true);
+ if (Op.getValueType() == MVT::nxv32f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16()))
+ return SplitVPOp(Op, DAG);
+ [[fallthrough]];
+ case ISD::VP_ASHR:
+ case ISD::VP_LSHR:
+ case ISD::VP_SHL:
+ return lowerVPOp(Op, DAG);
+ case ISD::VP_IS_FPCLASS:
+ return LowerIS_FPCLASS(Op, DAG);
case ISD::VP_SIGN_EXTEND:
case ISD::VP_ZERO_EXTEND:
if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
return lowerVPExtMaskOp(Op, DAG);
- return lowerVPOp(Op, DAG,
- Op.getOpcode() == ISD::VP_SIGN_EXTEND
- ? RISCVISD::VSEXT_VL
- : RISCVISD::VZEXT_VL);
+ return lowerVPOp(Op, DAG);
case ISD::VP_TRUNCATE:
return lowerVectorTruncLike(Op, DAG);
case ISD::VP_FP_EXTEND:
case ISD::VP_FP_ROUND:
return lowerVectorFPExtendOrRoundLike(Op, DAG);
- case ISD::VP_FP_TO_SINT:
- return lowerVPFPIntConvOp(Op, DAG, RISCVISD::VFCVT_RTZ_X_F_VL);
- case ISD::VP_FP_TO_UINT:
- return lowerVPFPIntConvOp(Op, DAG, RISCVISD::VFCVT_RTZ_XU_F_VL);
case ISD::VP_SINT_TO_FP:
- return lowerVPFPIntConvOp(Op, DAG, RISCVISD::SINT_TO_FP_VL);
case ISD::VP_UINT_TO_FP:
- return lowerVPFPIntConvOp(Op, DAG, RISCVISD::UINT_TO_FP_VL);
+ if (Op.getValueType().isVector() &&
+ Op.getValueType().getScalarType() == MVT::f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16())) {
+ if (Op.getValueType() == MVT::nxv32f16)
+ return SplitVPOp(Op, DAG);
+ // int -> f32
+ SDLoc DL(Op);
+ MVT NVT =
+ MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
+ auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
+ // f32 -> f16
+ return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
+ DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
+ }
+ [[fallthrough]];
+ case ISD::VP_FP_TO_SINT:
+ case ISD::VP_FP_TO_UINT:
+ if (SDValue Op1 = Op.getOperand(0);
+ Op1.getValueType().isVector() &&
+ Op1.getValueType().getScalarType() == MVT::f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16())) {
+ if (Op1.getValueType() == MVT::nxv32f16)
+ return SplitVPOp(Op, DAG);
+ // f16 -> f32
+ SDLoc DL(Op);
+ MVT NVT = MVT::getVectorVT(MVT::f32,
+ Op1.getValueType().getVectorElementCount());
+ SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
+ // f32 -> int
+ return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
+ {WidenVec, Op.getOperand(1), Op.getOperand(2)});
+ }
+ return lowerVPFPIntConvOp(Op, DAG);
case ISD::VP_SETCC:
+ if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16()))
+ return SplitVPOp(Op, DAG);
if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
return lowerVPSetCCMaskOp(Op, DAG);
- return lowerVPOp(Op, DAG, RISCVISD::SETCC_VL, /*HasMergeOp*/ true);
+ [[fallthrough]];
case ISD::VP_SMIN:
- return lowerVPOp(Op, DAG, RISCVISD::SMIN_VL, /*HasMergeOp*/ true);
case ISD::VP_SMAX:
- return lowerVPOp(Op, DAG, RISCVISD::SMAX_VL, /*HasMergeOp*/ true);
case ISD::VP_UMIN:
- return lowerVPOp(Op, DAG, RISCVISD::UMIN_VL, /*HasMergeOp*/ true);
case ISD::VP_UMAX:
- return lowerVPOp(Op, DAG, RISCVISD::UMAX_VL, /*HasMergeOp*/ true);
case ISD::VP_BITREVERSE:
- return lowerVPOp(Op, DAG, RISCVISD::BITREVERSE_VL, /*HasMergeOp*/ true);
case ISD::VP_BSWAP:
- return lowerVPOp(Op, DAG, RISCVISD::BSWAP_VL, /*HasMergeOp*/ true);
+ return lowerVPOp(Op, DAG);
case ISD::VP_CTLZ:
case ISD::VP_CTLZ_ZERO_UNDEF:
if (Subtarget.hasStdExtZvbb())
- return lowerVPOp(Op, DAG, RISCVISD::CTLZ_VL, /*HasMergeOp*/ true);
+ return lowerVPOp(Op, DAG);
return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
case ISD::VP_CTTZ:
case ISD::VP_CTTZ_ZERO_UNDEF:
if (Subtarget.hasStdExtZvbb())
- return lowerVPOp(Op, DAG, RISCVISD::CTTZ_VL, /*HasMergeOp*/ true);
+ return lowerVPOp(Op, DAG);
return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
case ISD::VP_CTPOP:
- return lowerVPOp(Op, DAG, RISCVISD::CTPOP_VL, /*HasMergeOp*/ true);
+ return lowerVPOp(Op, DAG);
case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
return lowerVPStridedLoad(Op, DAG);
case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
@@ -5583,7 +6632,13 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::VP_FROUND:
case ISD::VP_FROUNDEVEN:
case ISD::VP_FROUNDTOZERO:
+ if (Op.getValueType() == MVT::nxv32f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16()))
+ return SplitVPOp(Op, DAG);
return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
+ case ISD::EXPERIMENTAL_VP_REVERSE:
+ return lowerVPReverseExperimental(Op, DAG);
}
}
@@ -5630,15 +6685,15 @@ SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
// Use PC-relative addressing to access the GOT for this symbol, then load
// the address from the GOT. This generates the pattern (PseudoLGA sym),
// which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
+ SDValue Load =
+ SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
MachineFunction &MF = DAG.getMachineFunction();
MachineMemOperand *MemOp = MF.getMachineMemOperand(
MachinePointerInfo::getGOT(MF),
MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant,
LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
- SDValue Load =
- DAG.getMemIntrinsicNode(RISCVISD::LGA, DL, DAG.getVTList(Ty, MVT::Other),
- {DAG.getEntryNode(), Addr}, Ty, MemOp);
+ DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
return Load;
}
@@ -5660,16 +6715,15 @@ SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
// not be within 2GiB of PC, so use GOT-indirect addressing to access the
// symbol. This generates the pattern (PseudoLGA sym), which expands to
// (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
+ SDValue Load =
+ SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
MachineFunction &MF = DAG.getMachineFunction();
MachineMemOperand *MemOp = MF.getMachineMemOperand(
MachinePointerInfo::getGOT(MF),
MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant,
LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
- SDValue Load =
- DAG.getMemIntrinsicNode(RISCVISD::LGA, DL,
- DAG.getVTList(Ty, MVT::Other),
- {DAG.getEntryNode(), Addr}, Ty, MemOp);
+ DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
return Load;
}
@@ -5724,15 +6778,15 @@ SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
// the pattern (PseudoLA_TLS_IE sym), which expands to
// (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
+ SDValue Load =
+ SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
MachineFunction &MF = DAG.getMachineFunction();
MachineMemOperand *MemOp = MF.getMachineMemOperand(
MachinePointerInfo::getGOT(MF),
MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant,
LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
- SDValue Load = DAG.getMemIntrinsicNode(
- RISCVISD::LA_TLS_IE, DL, DAG.getVTList(Ty, MVT::Other),
- {DAG.getEntryNode(), Addr}, Ty, MemOp);
+ DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
// Add the thread pointer.
SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
@@ -5768,7 +6822,8 @@ SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
// This generates the pattern (PseudoLA_TLS_GD sym), which expands to
// (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
- SDValue Load = DAG.getNode(RISCVISD::LA_TLS_GD, DL, Ty, Addr);
+ SDValue Load =
+ SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
// Prepare argument list to generate call.
ArgListTy Args;
@@ -5904,56 +6959,6 @@ static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
-/// check for equality with 0. This function emits nodes that convert the
-/// seteq/setne into something that can be compared with 0.
-/// Based on RISCVDAGToDAGISel::selectSETCC but modified to produce
-/// target-independent SelectionDAG nodes rather than machine nodes.
-static SDValue selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal,
- SelectionDAG &DAG) {
- assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
- "Unexpected condition code!");
-
- // We're looking for a setcc.
- if (N->getOpcode() != ISD::SETCC)
- return SDValue();
-
- // Must be an equality comparison.
- ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
- if (CCVal != ExpectedCCVal)
- return SDValue();
-
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
-
- if (!LHS.getValueType().isScalarInteger())
- return SDValue();
-
- // If the RHS side is 0, we don't need any extra instructions, return the LHS.
- if (isNullConstant(RHS))
- return LHS;
-
- SDLoc DL(N);
-
- if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
- int64_t CVal = C->getSExtValue();
- // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
- // non-zero otherwise.
- if (CVal == -2048)
- return DAG.getNode(ISD::XOR, DL, N->getValueType(0), LHS,
- DAG.getConstant(CVal, DL, N->getValueType(0)));
- // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the
- // LHS is equal to the RHS and non-zero otherwise.
- if (isInt<12>(CVal) || CVal == 2048)
- return DAG.getNode(ISD::ADD, DL, N->getValueType(0), LHS,
- DAG.getConstant(-CVal, DL, N->getValueType(0)));
- }
-
- // If nothing else we can XOR the LHS and RHS to produce zero if they are
- // equal and a non-zero value if they aren't.
- return DAG.getNode(ISD::XOR, DL, N->getValueType(0), LHS, RHS);
-}
-
// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
@@ -6041,35 +7046,6 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// sequence or RISCVISD::SELECT_CC node (branch-based select).
if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
VT.isScalarInteger()) {
- if (SDValue NewCondV = selectSETCC(CondV, ISD::SETNE, DAG)) {
- // (select (riscv_setne c), t, 0) -> (czero_eqz t, c)
- if (isNullConstant(FalseV))
- return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, NewCondV);
- // (select (riscv_setne c), 0, f) -> (czero_nez f, c)
- if (isNullConstant(TrueV))
- return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, NewCondV);
- // (select (riscv_setne c), t, f) -> (or (czero_eqz t, c), (czero_nez f,
- // c)
- return DAG.getNode(
- ISD::OR, DL, VT,
- DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, NewCondV),
- DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, NewCondV));
- }
- if (SDValue NewCondV = selectSETCC(CondV, ISD::SETEQ, DAG)) {
- // (select (riscv_seteq c), t, 0) -> (czero_nez t, c)
- if (isNullConstant(FalseV))
- return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, TrueV, NewCondV);
- // (select (riscv_seteq c), 0, f) -> (czero_eqz f, c)
- if (isNullConstant(TrueV))
- return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, FalseV, NewCondV);
- // (select (riscv_seteq c), t, f) -> (or (czero_eqz f, c), (czero_nez t,
- // c)
- return DAG.getNode(
- ISD::OR, DL, VT,
- DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, FalseV, NewCondV),
- DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, TrueV, NewCondV));
- }
-
// (select c, t, 0) -> (czero_eqz t, c)
if (isNullConstant(FalseV))
return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
@@ -6090,10 +7066,17 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
ISD::OR, DL, VT, FalseV,
DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
+ // Try some other optimizations before falling back to generic lowering.
+ if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
+ return V;
+
// (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
- return DAG.getNode(ISD::OR, DL, VT,
- DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
- DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
+ // Unless we have the short forward branch optimization.
+ if (!Subtarget.hasShortForwardBranchOpt())
+ return DAG.getNode(
+ ISD::OR, DL, VT,
+ DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
+ DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
}
if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
@@ -6297,7 +7280,7 @@ SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
// if Shamt-XLEN < 0: // Shamt < XLEN
// Lo = Lo << Shamt
- // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 ^ Shamt))
+ // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
// else:
// Lo = 0
// Hi = Lo << (Shamt-XLEN)
@@ -6336,7 +7319,7 @@ SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
// SRA expansion:
// if Shamt-XLEN < 0: // Shamt < XLEN
- // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1))
+ // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
// Hi = Hi >>s Shamt
// else:
// Lo = Hi >>s (Shamt-XLEN);
@@ -6344,7 +7327,7 @@ SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
//
// SRL expansion:
// if Shamt-XLEN < 0: // Shamt < XLEN
- // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1))
+ // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
// Hi = Hi >>u Shamt
// else:
// Lo = Hi >>u (Shamt-XLEN);
@@ -6394,12 +7377,9 @@ SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
}
- MVT XLenVT = Subtarget.getXLenVT();
- assert(SplatVal.getValueType() == XLenVT &&
- "Unexpected type for i1 splat value");
MVT InterVT = VT.changeVectorElementType(MVT::i8);
- SplatVal = DAG.getNode(ISD::AND, DL, XLenVT, SplatVal,
- DAG.getConstant(1, DL, XLenVT));
+ SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
+ DAG.getConstant(1, DL, SplatVal.getValueType()));
SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
SDValue Zero = DAG.getConstant(0, DL, InterVT);
return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
@@ -6420,37 +7400,19 @@ SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
SDValue Lo = Op.getOperand(0);
SDValue Hi = Op.getOperand(1);
- if (VecVT.isFixedLengthVector()) {
- MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
- SDLoc DL(Op);
- auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
+ MVT ContainerVT = VecVT;
+ if (VecVT.isFixedLengthVector())
+ ContainerVT = getContainerForFixedLengthVector(VecVT);
- SDValue Res =
- splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
- return convertFromScalableVector(VecVT, Res, DAG, Subtarget);
- }
+ auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
- if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
- int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
- int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
- // If Hi constant is all the same sign bit as Lo, lower this as a custom
- // node in order to try and match RVV vector/scalar instructions.
- if ((LoC >> 31) == HiC)
- return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
- Lo, DAG.getRegister(RISCV::X0, MVT::i32));
- }
+ SDValue Res =
+ splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
- // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
- if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
- isa<ConstantSDNode>(Hi.getOperand(1)) &&
- Hi.getConstantOperandVal(1) == 31)
- return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT), Lo,
- DAG.getRegister(RISCV::X0, MVT::i32));
+ if (VecVT.isFixedLengthVector())
+ Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
- // Fall back to use a stack store and stride x0 vector load. Use X0 as VL.
- return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VecVT,
- DAG.getUNDEF(VecVT), Lo, Hi,
- DAG.getRegister(RISCV::X0, MVT::i32));
+ return Res;
}
// Custom-lower extensions from mask vectors by using a vselect either with 1
@@ -6754,6 +7716,32 @@ RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
return Result;
}
+// Given a scalable vector type and an index into it, returns the type for the
+// smallest subvector that the index fits in. This can be used to reduce LMUL
+// for operations like vslidedown.
+//
+// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
+static std::optional<MVT>
+getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ assert(VecVT.isScalableVector());
+ const unsigned EltSize = VecVT.getScalarSizeInBits();
+ const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
+ const unsigned MinVLMAX = VectorBitsMin / EltSize;
+ MVT SmallerVT;
+ if (MaxIdx < MinVLMAX)
+ SmallerVT = getLMUL1VT(VecVT);
+ else if (MaxIdx < MinVLMAX * 2)
+ SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();
+ else if (MaxIdx < MinVLMAX * 4)
+ SmallerVT = getLMUL1VT(VecVT)
+ .getDoubleNumVectorElementsVT()
+ .getDoubleNumVectorElementsVT();
+ if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
+ return std::nullopt;
+ return SmallerVT;
+}
+
// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
// first position of a vector, and that vector is slid up to the insert index.
// By limiting the active vector length to index+1 and merging with the
@@ -6784,6 +7772,43 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
}
+ // If we know the index we're going to insert at, we can shrink Vec so that
+ // we're performing the scalar inserts and slideup on a smaller LMUL.
+ MVT OrigContainerVT = ContainerVT;
+ SDValue OrigVec = Vec;
+ SDValue AlignedIdx;
+ if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
+ const unsigned OrigIdx = IdxC->getZExtValue();
+ // Do we know an upper bound on LMUL?
+ if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
+ DL, DAG, Subtarget)) {
+ ContainerVT = *ShrunkVT;
+ AlignedIdx = DAG.getVectorIdxConstant(0, DL);
+ }
+
+ // If we're compiling for an exact VLEN value, we can always perform
+ // the insert in m1 as we can determine the register corresponding to
+ // the index in the register group.
+ const unsigned MinVLen = Subtarget.getRealMinVLen();
+ const unsigned MaxVLen = Subtarget.getRealMaxVLen();
+ const MVT M1VT = getLMUL1VT(ContainerVT);
+ if (MinVLen == MaxVLen && ContainerVT.bitsGT(M1VT)) {
+ EVT ElemVT = VecVT.getVectorElementType();
+ unsigned ElemsPerVReg = MinVLen / ElemVT.getFixedSizeInBits();
+ unsigned RemIdx = OrigIdx % ElemsPerVReg;
+ unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
+ unsigned ExtractIdx =
+ SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
+ AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL);
+ Idx = DAG.getVectorIdxConstant(RemIdx, DL);
+ ContainerVT = M1VT;
+ }
+
+ if (AlignedIdx)
+ Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
+ AlignedIdx);
+ }
+
MVT XLenVT = Subtarget.getXLenVT();
bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
@@ -6807,7 +7832,13 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
unsigned Opc =
VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
if (isNullConstant(Idx)) {
+ if (!VecVT.isFloatingPoint())
+ Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
+
+ if (AlignedIdx)
+ Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
+ Vec, AlignedIdx);
if (!VecVT.isFixedLengthVector())
return Vec;
return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
@@ -6840,6 +7871,10 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
// Bitcast back to the right container type.
ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
+ if (AlignedIdx)
+ ValInVec =
+ DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
+ ValInVec, AlignedIdx);
if (!VecVT.isFixedLengthVector())
return ValInVec;
return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
@@ -6870,6 +7905,10 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
Policy = RISCVII::TAIL_AGNOSTIC;
SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
Idx, Mask, InsertVL, Policy);
+
+ if (AlignedIdx)
+ Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
+ Slideup, AlignedIdx);
if (!VecVT.isFixedLengthVector())
return Slideup;
return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
@@ -6899,8 +7938,9 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
SDValue Vfirst =
DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
- return DAG.getSetCC(DL, XLenVT, Vfirst, DAG.getConstant(0, DL, XLenVT),
- ISD::SETEQ);
+ SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
+ DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
+ return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
}
if (VecVT.isFixedLengthVector()) {
unsigned NumElts = VecVT.getVectorNumElements();
@@ -6909,7 +7949,7 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
unsigned WidenVecLen;
SDValue ExtractElementIdx;
SDValue ExtractBitIdx;
- unsigned MaxEEW = Subtarget.getELEN();
+ unsigned MaxEEW = Subtarget.getELen();
MVT LargestEltVT = MVT::getIntegerVT(
std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
if (NumElts <= LargestEltVT.getSizeInBits()) {
@@ -6938,8 +7978,9 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
// Extract the bit from GPR.
SDValue ShiftRight =
DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
- return DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
- DAG.getConstant(1, DL, XLenVT));
+ SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
+ DAG.getConstant(1, DL, XLenVT));
+ return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
}
}
// Otherwise, promote to an i8 vector and extract from that.
@@ -6955,6 +7996,61 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
}
+ // If we're compiling for an exact VLEN value and we have a known
+ // constant index, we can always perform the extract in m1 (or
+ // smaller) as we can determine the register corresponding to
+ // the index in the register group.
+ const unsigned MinVLen = Subtarget.getRealMinVLen();
+ const unsigned MaxVLen = Subtarget.getRealMaxVLen();
+ if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
+ IdxC && MinVLen == MaxVLen &&
+ VecVT.getSizeInBits().getKnownMinValue() > MinVLen) {
+ MVT M1VT = getLMUL1VT(ContainerVT);
+ unsigned OrigIdx = IdxC->getZExtValue();
+ EVT ElemVT = VecVT.getVectorElementType();
+ unsigned ElemsPerVReg = MinVLen / ElemVT.getFixedSizeInBits();
+ unsigned RemIdx = OrigIdx % ElemsPerVReg;
+ unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
+ unsigned ExtractIdx =
+ SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
+ Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
+ DAG.getVectorIdxConstant(ExtractIdx, DL));
+ Idx = DAG.getVectorIdxConstant(RemIdx, DL);
+ ContainerVT = M1VT;
+ }
+
+ // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
+ // contains our index.
+ std::optional<uint64_t> MaxIdx;
+ if (VecVT.isFixedLengthVector())
+ MaxIdx = VecVT.getVectorNumElements() - 1;
+ if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
+ MaxIdx = IdxC->getZExtValue();
+ if (MaxIdx) {
+ if (auto SmallerVT =
+ getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
+ ContainerVT = *SmallerVT;
+ Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
+ DAG.getConstant(0, DL, XLenVT));
+ }
+ }
+
+ // If after narrowing, the required slide is still greater than LMUL2,
+ // fallback to generic expansion and go through the stack. This is done
+ // for a subtle reason: extracting *all* elements out of a vector is
+ // widely expected to be linear in vector size, but because vslidedown
+ // is linear in LMUL, performing N extracts using vslidedown becomes
+ // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
+ // seems to have the same problem (the store is linear in LMUL), but the
+ // generic expansion *memoizes* the store, and thus for many extracts of
+ // the same vector we end up with one store and a bunch of loads.
+ // TODO: We don't have the same code for insert_vector_elt because we
+ // have BUILD_VECTOR and handle the degenerate case there. Should we
+ // consider adding an inverse BUILD_VECTOR node?
+ MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT();
+ if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
+ return SDValue();
+
// If the index is 0, the vector is already in the right position.
if (!isNullConstant(Idx)) {
// Use a VL of 1 to avoid processing more elements than we need.
@@ -7062,16 +8158,8 @@ static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG,
// Optimize for constant AVL
if (isa<ConstantSDNode>(AVL)) {
- unsigned EltSize = VT.getScalarSizeInBits();
- unsigned MinSize = VT.getSizeInBits().getKnownMinValue();
-
- unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
- unsigned MaxVLMAX =
- RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
-
- unsigned VectorBitsMin = Subtarget.getRealMinVLen();
- unsigned MinVLMAX =
- RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
+ const auto [MinVLMAX, MaxVLMAX] =
+ RISCVTargetLowering::computeVLMAXBounds(VT, Subtarget);
uint64_t AVLInt = cast<ConstantSDNode>(AVL)->getZExtValue();
if (AVLInt <= MinVLMAX) {
@@ -7182,7 +8270,7 @@ static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG,
// Determine the VF that corresponds to LMUL 1 for ElementWidth.
unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
// We don't support VF==1 with ELEN==32.
- unsigned MinVF = RISCV::RVVBitsPerBlock / Subtarget.getELEN();
+ unsigned MinVF = RISCV::RVVBitsPerBlock / Subtarget.getELen();
unsigned VF = N->getConstantOperandVal(2);
assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
@@ -7202,7 +8290,39 @@ static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG,
SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
+ SDValue Res =
+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
+ return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
+}
+
+static void getVCIXOperands(SDValue &Op, SelectionDAG &DAG,
+ SmallVector<SDValue> &Ops) {
+ SDLoc DL(Op);
+
+ const RISCVSubtarget &Subtarget =
+ DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();
+ for (const SDValue &V : Op->op_values()) {
+ EVT ValType = V.getValueType();
+ if (ValType.isScalableVector() && ValType.isFloatingPoint()) {
+ MVT InterimIVT =
+ MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
+ ValType.getVectorElementCount());
+ Ops.push_back(DAG.getBitcast(InterimIVT, V));
+ } else if (ValType.isFixedLengthVector()) {
+ MVT OpContainerVT = getContainerForFixedLengthVector(
+ DAG, V.getSimpleValueType(), Subtarget);
+ Ops.push_back(convertToScalableVector(OpContainerVT, V, DAG, Subtarget));
+ } else
+ Ops.push_back(V);
+ }
+}
+
+// LMUL * VLEN should be greater than or equal to EGS * SEW
+static inline bool isValidEGW(int EGS, EVT VT,
+ const RISCVSubtarget &Subtarget) {
+ return (Subtarget.getRealMinVLen() *
+ VT.getSizeInBits().getKnownMinValue()) / RISCV::RVVBitsPerBlock >=
+ EGS * VT.getScalarSizeInBits();
}
SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
@@ -7238,12 +8358,30 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
}
+ if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
+ SDValue NewOp =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
+ SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
+ return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
+ }
+
return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
}
case Intrinsic::riscv_sm4ks:
case Intrinsic::riscv_sm4ed: {
unsigned Opc =
IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
+
+ if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
+ SDValue NewOp0 =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
+ SDValue NewOp1 =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
+ SDValue Res =
+ DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, Op.getOperand(3));
+ return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
+ }
+
return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
Op.getOperand(3));
}
@@ -7254,20 +8392,43 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
}
case Intrinsic::riscv_clmul:
+ if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
+ SDValue NewOp0 =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
+ SDValue NewOp1 =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
+ SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
+ return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
+ }
return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
Op.getOperand(2));
case Intrinsic::riscv_clmulh:
- return DAG.getNode(RISCVISD::CLMULH, DL, XLenVT, Op.getOperand(1),
- Op.getOperand(2));
- case Intrinsic::riscv_clmulr:
- return DAG.getNode(RISCVISD::CLMULR, DL, XLenVT, Op.getOperand(1),
- Op.getOperand(2));
+ case Intrinsic::riscv_clmulr: {
+ unsigned Opc =
+ IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
+ if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
+ SDValue NewOp0 =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
+ SDValue NewOp1 =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
+ NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
+ DAG.getConstant(32, DL, MVT::i64));
+ NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
+ DAG.getConstant(32, DL, MVT::i64));
+ SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
+ Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
+ DAG.getConstant(32, DL, MVT::i64));
+ return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
+ }
+
+ return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
+ }
case Intrinsic::experimental_get_vector_length:
return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
- case Intrinsic::riscv_vmv_x_s:
- assert(Op.getValueType() == XLenVT && "Unexpected VT!");
- return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
- Op.getOperand(1));
+ case Intrinsic::riscv_vmv_x_s: {
+ SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
+ return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
+ }
case Intrinsic::riscv_vfmv_f_s:
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
Op.getOperand(1), DAG.getConstant(0, DL, XLenVT));
@@ -7325,6 +8486,86 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal,
Vec, VL);
}
+ // EGS * EEW >= 128 bits
+ case Intrinsic::riscv_vaesdf_vv:
+ case Intrinsic::riscv_vaesdf_vs:
+ case Intrinsic::riscv_vaesdm_vv:
+ case Intrinsic::riscv_vaesdm_vs:
+ case Intrinsic::riscv_vaesef_vv:
+ case Intrinsic::riscv_vaesef_vs:
+ case Intrinsic::riscv_vaesem_vv:
+ case Intrinsic::riscv_vaesem_vs:
+ case Intrinsic::riscv_vaeskf1:
+ case Intrinsic::riscv_vaeskf2:
+ case Intrinsic::riscv_vaesz_vs:
+ case Intrinsic::riscv_vsm4k:
+ case Intrinsic::riscv_vsm4r_vv:
+ case Intrinsic::riscv_vsm4r_vs: {
+ if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
+ !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
+ !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
+ report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
+ return Op;
+ }
+ // EGS * EEW >= 256 bits
+ case Intrinsic::riscv_vsm3c:
+ case Intrinsic::riscv_vsm3me: {
+ if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
+ !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
+ report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
+ return Op;
+ }
+ // zvknha(SEW=32)/zvknhb(SEW=[32|64])
+ case Intrinsic::riscv_vsha2ch:
+ case Intrinsic::riscv_vsha2cl:
+ case Intrinsic::riscv_vsha2ms: {
+ if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
+ !Subtarget.hasStdExtZvknhb())
+ report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
+ if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
+ !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
+ !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
+ report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
+ return Op;
+ }
+ case Intrinsic::riscv_sf_vc_v_x:
+ case Intrinsic::riscv_sf_vc_v_i:
+ case Intrinsic::riscv_sf_vc_v_xv:
+ case Intrinsic::riscv_sf_vc_v_iv:
+ case Intrinsic::riscv_sf_vc_v_vv:
+ case Intrinsic::riscv_sf_vc_v_fv:
+ case Intrinsic::riscv_sf_vc_v_xvv:
+ case Intrinsic::riscv_sf_vc_v_ivv:
+ case Intrinsic::riscv_sf_vc_v_vvv:
+ case Intrinsic::riscv_sf_vc_v_fvv:
+ case Intrinsic::riscv_sf_vc_v_xvw:
+ case Intrinsic::riscv_sf_vc_v_ivw:
+ case Intrinsic::riscv_sf_vc_v_vvw:
+ case Intrinsic::riscv_sf_vc_v_fvw: {
+ MVT VT = Op.getSimpleValueType();
+
+ SmallVector<SDValue> Ops;
+ getVCIXOperands(Op, DAG, Ops);
+
+ MVT RetVT = VT;
+ if (VT.isFixedLengthVector())
+ RetVT = getContainerForFixedLengthVector(VT);
+ else if (VT.isFloatingPoint())
+ RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
+ VT.getVectorElementCount());
+
+ SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Ops);
+
+ if (VT.isFixedLengthVector())
+ NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
+ else if (VT.isFloatingPoint())
+ NewNode = DAG.getBitcast(VT, NewNode);
+
+ if (Op == NewNode)
+ break;
+
+ return NewNode;
+ }
}
return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
@@ -7425,7 +8666,8 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
MVT VT = Op->getSimpleValueType(0);
MVT ContainerVT = getContainerForFixedLengthVector(VT);
- SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget);
+ SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
+ Subtarget);
SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
auto *Load = cast<MemIntrinsicSDNode>(Op);
SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT);
@@ -7445,6 +8687,49 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Results.push_back(Result.getValue(NF));
return DAG.getMergeValues(Results, DL);
}
+ case Intrinsic::riscv_sf_vc_v_x_se:
+ case Intrinsic::riscv_sf_vc_v_i_se:
+ case Intrinsic::riscv_sf_vc_v_xv_se:
+ case Intrinsic::riscv_sf_vc_v_iv_se:
+ case Intrinsic::riscv_sf_vc_v_vv_se:
+ case Intrinsic::riscv_sf_vc_v_fv_se:
+ case Intrinsic::riscv_sf_vc_v_xvv_se:
+ case Intrinsic::riscv_sf_vc_v_ivv_se:
+ case Intrinsic::riscv_sf_vc_v_vvv_se:
+ case Intrinsic::riscv_sf_vc_v_fvv_se:
+ case Intrinsic::riscv_sf_vc_v_xvw_se:
+ case Intrinsic::riscv_sf_vc_v_ivw_se:
+ case Intrinsic::riscv_sf_vc_v_vvw_se:
+ case Intrinsic::riscv_sf_vc_v_fvw_se: {
+ MVT VT = Op.getSimpleValueType();
+ SDLoc DL(Op);
+ SmallVector<SDValue> Ops;
+ getVCIXOperands(Op, DAG, Ops);
+
+ MVT RetVT = VT;
+ if (VT.isFixedLengthVector())
+ RetVT = getContainerForFixedLengthVector(VT);
+ else if (VT.isFloatingPoint())
+ RetVT = MVT::getVectorVT(MVT::getIntegerVT(RetVT.getScalarSizeInBits()),
+ RetVT.getVectorElementCount());
+
+ SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
+ SDValue NewNode = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops);
+
+ if (VT.isFixedLengthVector()) {
+ SDValue FixedVector =
+ convertFromScalableVector(VT, NewNode, DAG, Subtarget);
+ NewNode = DAG.getMergeValues({FixedVector, NewNode.getValue(1)}, DL);
+ } else if (VT.isFloatingPoint()) {
+ SDValue BitCast = DAG.getBitcast(VT, NewNode.getValue(0));
+ NewNode = DAG.getMergeValues({BitCast, NewNode.getValue(1)}, DL);
+ }
+
+ if (Op == NewNode)
+ break;
+
+ return NewNode;
+ }
}
return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
@@ -7517,7 +8802,8 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
MVT VT = Op->getOperand(2).getSimpleValueType();
MVT ContainerVT = getContainerForFixedLengthVector(VT);
- SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget);
+ SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
+ Subtarget);
SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
SDValue Ptr = Op->getOperand(NF + 2);
@@ -7532,6 +8818,73 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
}
+ case Intrinsic::riscv_sf_vc_x_se_e8mf8:
+ case Intrinsic::riscv_sf_vc_x_se_e8mf4:
+ case Intrinsic::riscv_sf_vc_x_se_e8mf2:
+ case Intrinsic::riscv_sf_vc_x_se_e8m1:
+ case Intrinsic::riscv_sf_vc_x_se_e8m2:
+ case Intrinsic::riscv_sf_vc_x_se_e8m4:
+ case Intrinsic::riscv_sf_vc_x_se_e8m8:
+ case Intrinsic::riscv_sf_vc_x_se_e16mf4:
+ case Intrinsic::riscv_sf_vc_x_se_e16mf2:
+ case Intrinsic::riscv_sf_vc_x_se_e16m1:
+ case Intrinsic::riscv_sf_vc_x_se_e16m2:
+ case Intrinsic::riscv_sf_vc_x_se_e16m4:
+ case Intrinsic::riscv_sf_vc_x_se_e16m8:
+ case Intrinsic::riscv_sf_vc_x_se_e32mf2:
+ case Intrinsic::riscv_sf_vc_x_se_e32m1:
+ case Intrinsic::riscv_sf_vc_x_se_e32m2:
+ case Intrinsic::riscv_sf_vc_x_se_e32m4:
+ case Intrinsic::riscv_sf_vc_x_se_e32m8:
+ case Intrinsic::riscv_sf_vc_x_se_e64m1:
+ case Intrinsic::riscv_sf_vc_x_se_e64m2:
+ case Intrinsic::riscv_sf_vc_x_se_e64m4:
+ case Intrinsic::riscv_sf_vc_x_se_e64m8:
+ case Intrinsic::riscv_sf_vc_i_se_e8mf8:
+ case Intrinsic::riscv_sf_vc_i_se_e8mf4:
+ case Intrinsic::riscv_sf_vc_i_se_e8mf2:
+ case Intrinsic::riscv_sf_vc_i_se_e8m1:
+ case Intrinsic::riscv_sf_vc_i_se_e8m2:
+ case Intrinsic::riscv_sf_vc_i_se_e8m4:
+ case Intrinsic::riscv_sf_vc_i_se_e8m8:
+ case Intrinsic::riscv_sf_vc_i_se_e16mf4:
+ case Intrinsic::riscv_sf_vc_i_se_e16mf2:
+ case Intrinsic::riscv_sf_vc_i_se_e16m1:
+ case Intrinsic::riscv_sf_vc_i_se_e16m2:
+ case Intrinsic::riscv_sf_vc_i_se_e16m4:
+ case Intrinsic::riscv_sf_vc_i_se_e16m8:
+ case Intrinsic::riscv_sf_vc_i_se_e32mf2:
+ case Intrinsic::riscv_sf_vc_i_se_e32m1:
+ case Intrinsic::riscv_sf_vc_i_se_e32m2:
+ case Intrinsic::riscv_sf_vc_i_se_e32m4:
+ case Intrinsic::riscv_sf_vc_i_se_e32m8:
+ case Intrinsic::riscv_sf_vc_i_se_e64m1:
+ case Intrinsic::riscv_sf_vc_i_se_e64m2:
+ case Intrinsic::riscv_sf_vc_i_se_e64m4:
+ case Intrinsic::riscv_sf_vc_i_se_e64m8:
+ case Intrinsic::riscv_sf_vc_xv_se:
+ case Intrinsic::riscv_sf_vc_iv_se:
+ case Intrinsic::riscv_sf_vc_vv_se:
+ case Intrinsic::riscv_sf_vc_fv_se:
+ case Intrinsic::riscv_sf_vc_xvv_se:
+ case Intrinsic::riscv_sf_vc_ivv_se:
+ case Intrinsic::riscv_sf_vc_vvv_se:
+ case Intrinsic::riscv_sf_vc_fvv_se:
+ case Intrinsic::riscv_sf_vc_xvw_se:
+ case Intrinsic::riscv_sf_vc_ivw_se:
+ case Intrinsic::riscv_sf_vc_vvw_se:
+ case Intrinsic::riscv_sf_vc_fvw_se: {
+ SmallVector<SDValue> Ops;
+ getVCIXOperands(Op, DAG, Ops);
+
+ SDValue NewNode =
+ DAG.getNode(ISD::INTRINSIC_VOID, SDLoc(Op), Op->getVTList(), Ops);
+
+ if (Op == NewNode)
+ break;
+
+ return NewNode;
+ }
}
return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
@@ -7541,23 +8894,40 @@ static unsigned getRVVReductionOp(unsigned ISDOpcode) {
switch (ISDOpcode) {
default:
llvm_unreachable("Unhandled reduction");
+ case ISD::VP_REDUCE_ADD:
case ISD::VECREDUCE_ADD:
return RISCVISD::VECREDUCE_ADD_VL;
+ case ISD::VP_REDUCE_UMAX:
case ISD::VECREDUCE_UMAX:
return RISCVISD::VECREDUCE_UMAX_VL;
+ case ISD::VP_REDUCE_SMAX:
case ISD::VECREDUCE_SMAX:
return RISCVISD::VECREDUCE_SMAX_VL;
+ case ISD::VP_REDUCE_UMIN:
case ISD::VECREDUCE_UMIN:
return RISCVISD::VECREDUCE_UMIN_VL;
+ case ISD::VP_REDUCE_SMIN:
case ISD::VECREDUCE_SMIN:
return RISCVISD::VECREDUCE_SMIN_VL;
+ case ISD::VP_REDUCE_AND:
case ISD::VECREDUCE_AND:
return RISCVISD::VECREDUCE_AND_VL;
+ case ISD::VP_REDUCE_OR:
case ISD::VECREDUCE_OR:
return RISCVISD::VECREDUCE_OR_VL;
+ case ISD::VP_REDUCE_XOR:
case ISD::VECREDUCE_XOR:
return RISCVISD::VECREDUCE_XOR_VL;
+ case ISD::VP_REDUCE_FADD:
+ return RISCVISD::VECREDUCE_FADD_VL;
+ case ISD::VP_REDUCE_SEQ_FADD:
+ return RISCVISD::VECREDUCE_SEQ_FADD_VL;
+ case ISD::VP_REDUCE_FMAX:
+ return RISCVISD::VECREDUCE_FMAX_VL;
+ case ISD::VP_REDUCE_FMIN:
+ return RISCVISD::VECREDUCE_FMIN_VL;
}
+
}
SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
@@ -7575,8 +8945,6 @@ SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
"Unexpected reduction lowering");
MVT XLenVT = Subtarget.getXLenVT();
- assert(Op.getValueType() == XLenVT &&
- "Expected reduction output to be legalized to XLenVT");
MVT ContainerVT = VecVT;
if (VecVT.isFixedLengthVector()) {
@@ -7630,6 +8998,7 @@ SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
}
SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
+ SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
if (!IsVP)
return SetCC;
@@ -7640,7 +9009,7 @@ SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
// 0 for an inactive vector, and so we've already received the neutral value:
// AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
// can simply include the start value.
- return DAG.getNode(BaseOpc, DL, XLenVT, SetCC, Op.getOperand(0));
+ return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
}
static bool isNonZeroAVL(SDValue AVL) {
@@ -7716,9 +9085,19 @@ SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
- SDValue NeutralElem =
- DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
- return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), NeutralElem, Vec,
+ SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
+ switch (BaseOpc) {
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::UMAX:
+ case ISD::UMIN:
+ case ISD::SMAX:
+ case ISD::SMIN:
+ MVT XLenVT = Subtarget.getXLenVT();
+ StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec,
+ DAG.getConstant(0, DL, XLenVT));
+ }
+ return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
Mask, VL, DL, DAG, Subtarget);
}
@@ -7726,11 +9105,11 @@ SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
// the vector SDValue and the scalar SDValue required to lower this to a
// RISCVISD node.
static std::tuple<unsigned, SDValue, SDValue>
-getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) {
+getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT,
+ const RISCVSubtarget &Subtarget) {
SDLoc DL(Op);
auto Flags = Op->getFlags();
unsigned Opcode = Op.getOpcode();
- unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Opcode);
switch (Opcode) {
default:
llvm_unreachable("Unhandled reduction");
@@ -7744,11 +9123,16 @@ getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) {
return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
Op.getOperand(0));
case ISD::VECREDUCE_FMIN:
- return std::make_tuple(RISCVISD::VECREDUCE_FMIN_VL, Op.getOperand(0),
- DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
- case ISD::VECREDUCE_FMAX:
- return std::make_tuple(RISCVISD::VECREDUCE_FMAX_VL, Op.getOperand(0),
- DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
+ case ISD::VECREDUCE_FMAX: {
+ MVT XLenVT = Subtarget.getXLenVT();
+ SDValue Front =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0),
+ DAG.getConstant(0, DL, XLenVT));
+ unsigned RVVOpc = (Opcode == ISD::VECREDUCE_FMIN)
+ ? RISCVISD::VECREDUCE_FMIN_VL
+ : RISCVISD::VECREDUCE_FMAX_VL;
+ return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
+ }
}
}
@@ -7760,7 +9144,7 @@ SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
unsigned RVVOpcode;
SDValue VectorVal, ScalarVal;
std::tie(RVVOpcode, VectorVal, ScalarVal) =
- getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT);
+ getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
MVT VecVT = VectorVal.getSimpleValueType();
MVT ContainerVT = VecVT;
@@ -7774,37 +9158,6 @@ SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
VectorVal, Mask, VL, DL, DAG, Subtarget);
}
-static unsigned getRVVVPReductionOp(unsigned ISDOpcode) {
- switch (ISDOpcode) {
- default:
- llvm_unreachable("Unhandled reduction");
- case ISD::VP_REDUCE_ADD:
- return RISCVISD::VECREDUCE_ADD_VL;
- case ISD::VP_REDUCE_UMAX:
- return RISCVISD::VECREDUCE_UMAX_VL;
- case ISD::VP_REDUCE_SMAX:
- return RISCVISD::VECREDUCE_SMAX_VL;
- case ISD::VP_REDUCE_UMIN:
- return RISCVISD::VECREDUCE_UMIN_VL;
- case ISD::VP_REDUCE_SMIN:
- return RISCVISD::VECREDUCE_SMIN_VL;
- case ISD::VP_REDUCE_AND:
- return RISCVISD::VECREDUCE_AND_VL;
- case ISD::VP_REDUCE_OR:
- return RISCVISD::VECREDUCE_OR_VL;
- case ISD::VP_REDUCE_XOR:
- return RISCVISD::VECREDUCE_XOR_VL;
- case ISD::VP_REDUCE_FADD:
- return RISCVISD::VECREDUCE_FADD_VL;
- case ISD::VP_REDUCE_SEQ_FADD:
- return RISCVISD::VECREDUCE_SEQ_FADD_VL;
- case ISD::VP_REDUCE_FMAX:
- return RISCVISD::VECREDUCE_FMAX_VL;
- case ISD::VP_REDUCE_FMIN:
- return RISCVISD::VECREDUCE_FMIN_VL;
- }
-}
-
SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
@@ -7817,7 +9170,7 @@ SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
return SDValue();
MVT VecVT = VecEVT.getSimpleVT();
- unsigned RVVOpcode = getRVVVPReductionOp(Op.getOpcode());
+ unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
if (VecVT.isFixedLengthVector()) {
auto ContainerVT = getContainerForFixedLengthVector(VecVT);
@@ -7892,19 +9245,24 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
ContainerVT = getContainerForFixedLengthVector(VecVT);
Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
}
- SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
- DAG.getUNDEF(ContainerVT), SubVec,
- DAG.getConstant(0, DL, XLenVT));
+
if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) {
+ SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
+ DAG.getUNDEF(ContainerVT), SubVec,
+ DAG.getConstant(0, DL, XLenVT));
SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
return DAG.getBitcast(Op.getValueType(), SubVec);
}
+
+ SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
+ DAG.getUNDEF(ContainerVT), SubVec,
+ DAG.getConstant(0, DL, XLenVT));
SDValue Mask =
getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
// Set the vector length to only the number of elements we care about. Note
// that for slideup this includes the offset.
unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
- SDValue VL = getVLOp(EndIndex, DL, DAG, Subtarget);
+ SDValue VL = getVLOp(EndIndex, ContainerVT, DL, DAG, Subtarget);
// Use tail agnostic policy if we're inserting over Vec's tail.
unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
@@ -8051,26 +9409,38 @@ SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
}
}
+ // With an index of 0 this is a cast-like subvector, which can be performed
+ // with subregister operations.
+ if (OrigIdx == 0)
+ return Op;
+
// If the subvector vector is a fixed-length type, we cannot use subregister
// manipulation to simplify the codegen; we don't know which register of a
// LMUL group contains the specific subvector as we only know the minimum
// register size. Therefore we must slide the vector group down the full
// amount.
if (SubVecVT.isFixedLengthVector()) {
- // With an index of 0 this is a cast-like subvector, which can be performed
- // with subregister operations.
- if (OrigIdx == 0)
- return Op;
MVT ContainerVT = VecVT;
if (VecVT.isFixedLengthVector()) {
ContainerVT = getContainerForFixedLengthVector(VecVT);
Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
}
+
+ // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
+ unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
+ if (auto ShrunkVT =
+ getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
+ ContainerVT = *ShrunkVT;
+ Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
+ DAG.getVectorIdxConstant(0, DL));
+ }
+
SDValue Mask =
getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
// Set the vector length to only the number of elements we care about. This
// avoids sliding down elements we're going to discard straight away.
- SDValue VL = getVLOp(SubVecVT.getVectorNumElements(), DL, DAG, Subtarget);
+ SDValue VL = getVLOp(SubVecVT.getVectorNumElements(), ContainerVT, DL, DAG,
+ Subtarget);
SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
SDValue Slidedown =
getVSlidedown(DAG, Subtarget, DL, ContainerVT,
@@ -8092,17 +9462,18 @@ SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
if (RemIdx == 0)
return Op;
- // Else we must shift our vector register directly to extract the subvector.
- // Do this using VSLIDEDOWN.
+ // Else SubVecVT is a fractional LMUL and may need to be slid down.
+ assert(RISCVVType::decodeVLMUL(getLMUL(SubVecVT)).second);
// If the vector type is an LMUL-group type, extract a subvector equal to the
- // nearest full vector register type. This should resolve to a EXTRACT_SUBREG
- // instruction.
+ // nearest full vector register type.
MVT InterSubVT = VecVT;
if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
+ // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
+ // we should have successfully decomposed the extract into a subregister.
+ assert(SubRegIdx != RISCV::NoSubRegister);
InterSubVT = getLMUL1VT(VecVT);
- Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
- DAG.getConstant(OrigIdx - RemIdx, DL, XLenVT));
+ Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, InterSubVT, Vec);
}
// Slide this vector register down by the desired number of elements in order
@@ -8200,7 +9571,7 @@ SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
// We can deinterleave through vnsrl.wi if the element type is smaller than
// ELEN
- if (VecVT.getScalarSizeInBits() < Subtarget.getELEN()) {
+ if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
SDValue Even =
getDeinterleaveViaVNSRL(DL, VecVT, Concat, true, Subtarget, DAG);
SDValue Odd =
@@ -8269,7 +9640,7 @@ SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
// If the element type is smaller than ELEN, then we can interleave with
// vwaddu.vv and vwmaccu.vx
- if (VecVT.getScalarSizeInBits() < Subtarget.getELEN()) {
+ if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
DAG, Subtarget);
} else {
@@ -8476,7 +9847,20 @@ RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
MVT XLenVT = Subtarget.getXLenVT();
MVT ContainerVT = getContainerForFixedLengthVector(VT);
- SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget);
+ // If we know the exact VLEN and our fixed length vector completely fills
+ // the container, use a whole register load instead.
+ const auto [MinVLMAX, MaxVLMAX] =
+ RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
+ if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
+ getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
+ SDValue NewLoad =
+ DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
+ Load->getMemOperand());
+ SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
+ return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
+ }
+
+ SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, Subtarget);
bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
SDValue IntID = DAG.getTargetConstant(
@@ -8520,11 +9904,22 @@ RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
MVT ContainerVT = getContainerForFixedLengthVector(VT);
- SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget);
-
SDValue NewValue =
convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
+
+ // If we know the exact VLEN and our fixed length vector completely fills
+ // the container, use a whole register store instead.
+ const auto [MinVLMAX, MaxVLMAX] =
+ RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
+ if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
+ getLMUL1VT(ContainerVT).bitsLE(ContainerVT))
+ return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
+ Store->getMemOperand());
+
+ SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
+ Subtarget);
+
bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
SDValue IntID = DAG.getTargetConstant(
IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
@@ -8902,9 +10297,10 @@ SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
// * The EVL operand is promoted from i32 to i64 on RV64.
// * Fixed-length vectors are converted to their scalable-vector container
// types.
-SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG,
- unsigned RISCVISDOpc,
- bool HasMergeOp) const {
+SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
+ unsigned RISCVISDOpc = getRISCVVLOp(Op);
+ bool HasMergeOp = hasMergeOp(RISCVISDOpc);
+
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
SmallVector<SDValue, 4> Ops;
@@ -9053,13 +10449,14 @@ SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
}
// Lower Floating-Point/Integer Type-Convert VP SDNodes
-SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG,
- unsigned RISCVISDOpc) const {
+SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
+ SelectionDAG &DAG) const {
SDLoc DL(Op);
SDValue Src = Op.getOperand(0);
SDValue Mask = Op.getOperand(1);
SDValue VL = Op.getOperand(2);
+ unsigned RISCVISDOpc = getRISCVVLOp(Op);
MVT DstVT = Op.getSimpleValueType();
MVT SrcVT = Src.getSimpleValueType();
@@ -9185,12 +10582,132 @@ SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG,
return convertFromScalableVector(VT, Result, DAG, Subtarget);
}
-SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op, SelectionDAG &DAG,
- unsigned MaskOpc,
- unsigned VecOpc) const {
+SDValue
+RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ MVT VT = Op.getSimpleValueType();
+ MVT XLenVT = Subtarget.getXLenVT();
+
+ SDValue Op1 = Op.getOperand(0);
+ SDValue Mask = Op.getOperand(1);
+ SDValue EVL = Op.getOperand(2);
+
+ MVT ContainerVT = VT;
+ if (VT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(VT);
+ Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
+ MVT MaskVT = getMaskTypeFor(ContainerVT);
+ Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
+ }
+
+ MVT GatherVT = ContainerVT;
+ MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
+ // Check if we are working with mask vectors
+ bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
+ if (IsMaskVector) {
+ GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
+
+ // Expand input operand
+ SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
+ DAG.getUNDEF(IndicesVT),
+ DAG.getConstant(1, DL, XLenVT), EVL);
+ SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
+ DAG.getUNDEF(IndicesVT),
+ DAG.getConstant(0, DL, XLenVT), EVL);
+ Op1 = DAG.getNode(RISCVISD::VSELECT_VL, DL, IndicesVT, Op1, SplatOne,
+ SplatZero, EVL);
+ }
+
+ unsigned EltSize = GatherVT.getScalarSizeInBits();
+ unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
+ unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
+ unsigned MaxVLMAX =
+ RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
+
+ unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
+ // If this is SEW=8 and VLMAX is unknown or more than 256, we need
+ // to use vrgatherei16.vv.
+ // TODO: It's also possible to use vrgatherei16.vv for other types to
+ // decrease register width for the index calculation.
+ // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
+ if (MaxVLMAX > 256 && EltSize == 8) {
+ // If this is LMUL=8, we have to split before using vrgatherei16.vv.
+ // Split the vector in half and reverse each half using a full register
+ // reverse.
+ // Swap the halves and concatenate them.
+ // Slide the concatenated result by (VLMax - VL).
+ if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
+ auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
+ auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
+
+ SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
+ SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
+
+ // Reassemble the low and high pieces reversed.
+ // NOTE: this Result is unmasked (because we do not need masks for
+ // shuffles). If in the future this has to change, we can use a SELECT_VL
+ // between Result and UNDEF using the mask originally passed to VP_REVERSE
+ SDValue Result =
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
+
+ // Slide off any elements from past EVL that were reversed into the low
+ // elements.
+ unsigned MinElts = GatherVT.getVectorMinNumElements();
+ SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT,
+ DAG.getConstant(MinElts, DL, XLenVT));
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
+
+ Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
+ DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
+
+ if (IsMaskVector) {
+ // Truncate Result back to a mask vector
+ Result =
+ DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
+ {Result, DAG.getConstant(0, DL, GatherVT),
+ DAG.getCondCode(ISD::SETNE),
+ DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
+ }
+
+ if (!VT.isFixedLengthVector())
+ return Result;
+ return convertFromScalableVector(VT, Result, DAG, Subtarget);
+ }
+
+ // Just promote the int type to i16 which will double the LMUL.
+ IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
+ GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
+ }
+
+ SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
+ SDValue VecLen =
+ DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
+ SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
+ DAG.getUNDEF(IndicesVT), VecLen, EVL);
+ SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
+ DAG.getUNDEF(IndicesVT), Mask, EVL);
+ SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
+ DAG.getUNDEF(GatherVT), Mask, EVL);
+
+ if (IsMaskVector) {
+ // Truncate Result back to a mask vector
+ Result = DAG.getNode(
+ RISCVISD::SETCC_VL, DL, ContainerVT,
+ {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
+ DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
+ }
+
+ if (!VT.isFixedLengthVector())
+ return Result;
+ return convertFromScalableVector(VT, Result, DAG, Subtarget);
+}
+
+SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
+ SelectionDAG &DAG) const {
MVT VT = Op.getSimpleValueType();
if (VT.getVectorElementType() != MVT::i1)
- return lowerVPOp(Op, DAG, VecOpc, true);
+ return lowerVPOp(Op, DAG);
// It is safe to drop mask parameter as masked-off elements are undef.
SDValue Op1 = Op->getOperand(0);
@@ -9206,7 +10723,7 @@ SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op, SelectionDAG &DAG,
}
SDLoc DL(Op);
- SDValue Val = DAG.getNode(MaskOpc, DL, ContainerVT, Op1, Op2, VL);
+ SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
if (!IsFixed)
return Val;
return convertFromScalableVector(VT, Val, DAG, Subtarget);
@@ -9366,10 +10883,7 @@ SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
IndexVT = IndexVT.changeVectorElementType(XLenVT);
- SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, Mask.getValueType(),
- VL);
- Index = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, IndexVT, Index,
- TrueMask, VL);
+ Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
}
unsigned IntID =
@@ -9468,10 +10982,7 @@ SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
IndexVT = IndexVT.changeVectorElementType(XLenVT);
- SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, Mask.getValueType(),
- VL);
- Index = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, IndexVT, Index,
- TrueMask, VL);
+ Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
}
unsigned IntID =
@@ -9539,6 +11050,8 @@ SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
(RISCVFPRndMode::RUP << 4 * int(RoundingMode::TowardPositive)) |
(RISCVFPRndMode::RMM << 4 * int(RoundingMode::NearestTiesToAway));
+ RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
+
SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
DAG.getConstant(2, DL, XLenVT));
SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
@@ -9653,8 +11166,11 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(Res.getValue(1));
return;
}
- // In absense of Zfh, promote f16 to f32, then convert.
- if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
+ // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
+ // convert.
+ if ((Op0.getValueType() == MVT::f16 &&
+ !Subtarget.hasStdExtZfhOrZhinx()) ||
+ Op0.getValueType() == MVT::bf16)
Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
@@ -10281,6 +11797,136 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
}
}
+/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
+/// which corresponds to it.
+static unsigned getVecReduceOpcode(unsigned Opc) {
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unhandled binary to transfrom reduction");
+ case ISD::ADD:
+ return ISD::VECREDUCE_ADD;
+ case ISD::UMAX:
+ return ISD::VECREDUCE_UMAX;
+ case ISD::SMAX:
+ return ISD::VECREDUCE_SMAX;
+ case ISD::UMIN:
+ return ISD::VECREDUCE_UMIN;
+ case ISD::SMIN:
+ return ISD::VECREDUCE_SMIN;
+ case ISD::AND:
+ return ISD::VECREDUCE_AND;
+ case ISD::OR:
+ return ISD::VECREDUCE_OR;
+ case ISD::XOR:
+ return ISD::VECREDUCE_XOR;
+ case ISD::FADD:
+ // Note: This is the associative form of the generic reduction opcode.
+ return ISD::VECREDUCE_FADD;
+ }
+}
+
+/// Perform two related transforms whose purpose is to incrementally recognize
+/// an explode_vector followed by scalar reduction as a vector reduction node.
+/// This exists to recover from a deficiency in SLP which can't handle
+/// forests with multiple roots sharing common nodes. In some cases, one
+/// of the trees will be vectorized, and the other will remain (unprofitably)
+/// scalarized.
+static SDValue
+combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+
+ // This transforms need to run before all integer types have been legalized
+ // to i64 (so that the vector element type matches the add type), and while
+ // it's safe to introduce odd sized vector types.
+ if (DAG.NewNodesMustHaveLegalTypes)
+ return SDValue();
+
+ // Without V, this transform isn't useful. We could form the (illegal)
+ // operations and let them be scalarized again, but there's really no point.
+ if (!Subtarget.hasVInstructions())
+ return SDValue();
+
+ const SDLoc DL(N);
+ const EVT VT = N->getValueType(0);
+ const unsigned Opc = N->getOpcode();
+
+ // For FADD, we only handle the case with reassociation allowed. We
+ // could handle strict reduction order, but at the moment, there's no
+ // known reason to, and the complexity isn't worth it.
+ // TODO: Handle fminnum and fmaxnum here
+ if (!VT.isInteger() &&
+ (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
+ return SDValue();
+
+ const unsigned ReduceOpc = getVecReduceOpcode(Opc);
+ assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
+ "Inconsistent mappings");
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ if (!LHS.hasOneUse() || !RHS.hasOneUse())
+ return SDValue();
+
+ if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ std::swap(LHS, RHS);
+
+ if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !isa<ConstantSDNode>(RHS.getOperand(1)))
+ return SDValue();
+
+ uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
+ SDValue SrcVec = RHS.getOperand(0);
+ EVT SrcVecVT = SrcVec.getValueType();
+ assert(SrcVecVT.getVectorElementType() == VT);
+ if (SrcVecVT.isScalableVector())
+ return SDValue();
+
+ if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
+ return SDValue();
+
+ // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
+ // reduce_op (extract_subvector [2 x VT] from V). This will form the
+ // root of our reduction tree. TODO: We could extend this to any two
+ // adjacent aligned constant indices if desired.
+ if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
+ uint64_t LHSIdx =
+ cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
+ if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
+ EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
+ SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
+ DAG.getVectorIdxConstant(0, DL));
+ return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
+ }
+ }
+
+ // Match (binop (reduce (extract_subvector V, 0),
+ // (extract_vector_elt V, sizeof(SubVec))))
+ // into a reduction of one more element from the original vector V.
+ if (LHS.getOpcode() != ReduceOpc)
+ return SDValue();
+
+ SDValue ReduceVec = LHS.getOperand(0);
+ if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
+ isNullConstant(ReduceVec.getOperand(1)) &&
+ ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
+ // For illegal types (e.g. 3xi32), most will be combined again into a
+ // wider (hopefully legal) type. If this is a terminal state, we are
+ // relying on type legalization here to produce something reasonable
+ // and this lowering quality could probably be improved. (TODO)
+ EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
+ SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
+ DAG.getVectorIdxConstant(0, DL));
+ auto Flags = ReduceVec->getFlags();
+ Flags.intersectWith(N->getFlags());
+ return DAG.getNode(ReduceOpc, DL, VT, Vec, Flags);
+ }
+
+ return SDValue();
+}
+
+
// Try to fold (<bop> x, (reduction.<bop> vec, start))
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
@@ -10453,8 +12099,23 @@ static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
if (VT.isVector())
return SDValue();
- if (!Subtarget.hasShortForwardBranchOpt() ||
- (Slct.getOpcode() != ISD::SELECT &&
+ if (!Subtarget.hasShortForwardBranchOpt()) {
+ // (select cond, x, (and x, c)) has custom lowering with Zicond.
+ if ((!Subtarget.hasStdExtZicond() &&
+ !Subtarget.hasVendorXVentanaCondOps()) ||
+ N->getOpcode() != ISD::AND)
+ return SDValue();
+
+ // Maybe harmful when condition code has multiple use.
+ if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
+ return SDValue();
+
+ // Maybe harmful when VT is wider than XLen.
+ if (VT.getSizeInBits() > Subtarget.getXLen())
+ return SDValue();
+ }
+
+ if ((Slct.getOpcode() != ISD::SELECT &&
Slct.getOpcode() != RISCVISD::SELECT_CC) ||
!Slct.hasOneUse())
return SDValue();
@@ -10573,7 +12234,7 @@ static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT));
}
-// Try to turn (add (xor (setcc X, Y), 1) -1) into (neg (setcc X, Y)).
+// Try to turn (add (xor bool, 1) -1) into (neg bool).
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -10584,9 +12245,13 @@ static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG) {
if (!isAllOnesConstant(N1))
return SDValue();
- // Look for an (xor (setcc X, Y), 1).
- if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)) ||
- N0.getOperand(0).getOpcode() != ISD::SETCC)
+ // Look for (xor X, 1).
+ if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
+ return SDValue();
+
+ // First xor input should be 0 or 1.
+ APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1);
+ if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
return SDValue();
// Emit a negate of the setcc.
@@ -10604,6 +12269,9 @@ static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
return V;
if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
return V;
+ if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
+ return V;
+
// fold (add (select lhs, rhs, cc, 0, y), x) ->
// (select lhs, rhs, cc, x, (add x, y))
return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
@@ -10732,7 +12400,7 @@ static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG,
// shift amounts larger than 31 would produce poison. If we wait until
// type legalization, we'll create RISCVISD::SRLW and we can't recover it
// to use a BEXT instruction.
- if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
+ if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
!isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
SDLoc DL(N0);
@@ -10759,7 +12427,7 @@ static SDValue performANDCombine(SDNode *N,
// shift amounts larger than 31 would produce poison. If we wait until
// type legalization, we'll create RISCVISD::SRLW and we can't recover it
// to use a BEXT instruction.
- if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
+ if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
N0.hasOneUse()) {
@@ -10774,6 +12442,8 @@ static SDValue performANDCombine(SDNode *N,
if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
return V;
+ if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
+ return V;
if (DCI.isAfterLegalizeDAG())
if (SDValue V = combineDeMorganOfBoolean(N, DAG))
@@ -10784,17 +12454,64 @@ static SDValue performANDCombine(SDNode *N,
return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
}
+// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
+// FIXME: Generalize to other binary operators with same operand.
+static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1,
+ SelectionDAG &DAG) {
+ assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
+
+ if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
+ N1.getOpcode() != RISCVISD::CZERO_NEZ ||
+ !N0.hasOneUse() || !N1.hasOneUse())
+ return SDValue();
+
+ // Should have the same condition.
+ SDValue Cond = N0.getOperand(1);
+ if (Cond != N1.getOperand(1))
+ return SDValue();
+
+ SDValue TrueV = N0.getOperand(0);
+ SDValue FalseV = N1.getOperand(0);
+
+ if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
+ TrueV.getOperand(1) != FalseV.getOperand(1) ||
+ !isOneConstant(TrueV.getOperand(1)) ||
+ !TrueV.hasOneUse() || !FalseV.hasOneUse())
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
+ Cond);
+ SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
+ Cond);
+ SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
+ return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
+}
+
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
const RISCVSubtarget &Subtarget) {
SelectionDAG &DAG = DCI.DAG;
if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
return V;
+ if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
+ return V;
if (DCI.isAfterLegalizeDAG())
if (SDValue V = combineDeMorganOfBoolean(N, DAG))
return V;
+ // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
+ // We may be able to pull a common operation out of the true and false value.
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
+ return V;
+ if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
+ return V;
+
// fold (or (select cond, 0, y), x) ->
// (select cond, x, (or x, y))
return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
@@ -10805,6 +12522,21 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
+ // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
+ // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
+ // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
+ if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
+ N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
+ N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
+ !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
+ SDLoc DL(N);
+ SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
+ SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
+ SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
+ SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
+ return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
+ }
+
// fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
// NOTE: Assumes ROL being legal means ROLW is legal.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -10817,7 +12549,7 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
}
// Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
- if (N0.hasOneUse() && N0.getOpcode() == ISD::SETCC && isOneConstant(N1)) {
+ if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
if (ConstN00 && CC == ISD::SETLT) {
@@ -10832,32 +12564,102 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
return V;
+ if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
+ return V;
+
// fold (xor (select cond, 0, y), x) ->
// (select cond, x, (xor x, y))
return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
}
-// According to the property that indexed load/store instructions
-// zero-extended their indices, \p narrowIndex tries to narrow the type of index
-// operand if it is matched to pattern (shl (zext x to ty), C) and bits(x) + C <
-// bits(ty).
-static SDValue narrowIndex(SDValue N, SelectionDAG &DAG) {
- if (N.getOpcode() != ISD::SHL || !N->hasOneUse())
+static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ if (!VT.isVector())
return SDValue();
+ SDLoc DL(N);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue MulOper;
+ unsigned AddSubOpc;
+
+ // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
+ // (mul x, add (y, 1)) -> (add x, (mul x, y))
+ // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
+ // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
+ auto IsAddSubWith1 = [&](SDValue V) -> bool {
+ AddSubOpc = V->getOpcode();
+ if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
+ SDValue Opnd = V->getOperand(1);
+ MulOper = V->getOperand(0);
+ if (AddSubOpc == ISD::SUB)
+ std::swap(Opnd, MulOper);
+ if (isOneOrOneSplat(Opnd))
+ return true;
+ }
+ return false;
+ };
+
+ if (IsAddSubWith1(N0)) {
+ SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
+ return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
+ }
+
+ if (IsAddSubWith1(N1)) {
+ SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
+ return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
+ }
+
+ return SDValue();
+}
+
+/// According to the property that indexed load/store instructions zero-extend
+/// their indices, try to narrow the type of index operand.
+static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
+ if (isIndexTypeSigned(IndexType))
+ return false;
+
+ if (!N->hasOneUse())
+ return false;
+
+ EVT VT = N.getValueType();
+ SDLoc DL(N);
+
+ // In general, what we're doing here is seeing if we can sink a truncate to
+ // a smaller element type into the expression tree building our index.
+ // TODO: We can generalize this and handle a bunch more cases if useful.
+
+ // Narrow a buildvector to the narrowest element type. This requires less
+ // work and less register pressure at high LMUL, and creates smaller constants
+ // which may be cheaper to materialize.
+ if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
+ KnownBits Known = DAG.computeKnownBits(N);
+ unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
+ LLVMContext &C = *DAG.getContext();
+ EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
+ if (ResultVT.bitsLT(VT.getVectorElementType())) {
+ N = DAG.getNode(ISD::TRUNCATE, DL,
+ VT.changeVectorElementType(ResultVT), N);
+ return true;
+ }
+ }
+
+ // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
+ if (N.getOpcode() != ISD::SHL)
+ return false;
+
SDValue N0 = N.getOperand(0);
if (N0.getOpcode() != ISD::ZERO_EXTEND &&
N0.getOpcode() != RISCVISD::VZEXT_VL)
- return SDValue();
+ return false;;
if (!N0->hasOneUse())
- return SDValue();
+ return false;;
APInt ShAmt;
SDValue N1 = N.getOperand(1);
if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
- return SDValue();
+ return false;;
- SDLoc DL(N);
SDValue Src = N0.getOperand(0);
EVT SrcVT = Src.getValueType();
unsigned SrcElen = SrcVT.getScalarSizeInBits();
@@ -10867,14 +12669,15 @@ static SDValue narrowIndex(SDValue N, SelectionDAG &DAG) {
// Skip if NewElen is not narrower than the original extended type.
if (NewElen >= N0.getValueType().getScalarSizeInBits())
- return SDValue();
+ return false;
EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
- return DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
+ N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
+ return true;
}
// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
@@ -11949,10 +13752,18 @@ static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG) {
VL);
}
-static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
if (SDValue V = combineVFMADD_VLWithVFNEG_VL(N, DAG))
return V;
+ if (N->getValueType(0).isScalableVector() &&
+ N->getValueType(0).getVectorElementType() == MVT::f32 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16())) {
+ return SDValue();
+ }
+
// FIXME: Ignore strict opcodes for now.
if (N->isTargetStrictFPOpcode())
return SDValue();
@@ -12003,7 +13814,15 @@ static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG) {
N->getOperand(2), Mask, VL);
}
-static SDValue performVFMUL_VLCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue performVFMUL_VLCombine(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ if (N->getValueType(0).isScalableVector() &&
+ N->getValueType(0).getVectorElementType() == MVT::f32 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16())) {
+ return SDValue();
+ }
+
// FIXME: Ignore strict opcodes for now.
assert(!N->isTargetStrictFPOpcode() && "Unexpected opcode");
@@ -12036,7 +13855,15 @@ static SDValue performVFMUL_VLCombine(SDNode *N, SelectionDAG &DAG) {
Op1, Merge, Mask, VL);
}
-static SDValue performFADDSUB_VLCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue performFADDSUB_VLCombine(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ if (N->getValueType(0).isScalableVector() &&
+ N->getValueType(0).getVectorElementType() == MVT::f32 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16())) {
+ return SDValue();
+ }
+
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
SDValue Merge = N->getOperand(2);
@@ -12267,12 +14094,10 @@ static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
// shift can be omitted.
// Fold setlt (sra X, N), 0 -> setlt X, 0 and
// setge (sra X, N), 0 -> setge X, 0
- if (auto *RHSConst = dyn_cast<ConstantSDNode>(RHS.getNode())) {
- if ((CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
- LHS.getOpcode() == ISD::SRA && RHSConst->isZero()) {
- LHS = LHS.getOperand(0);
- return true;
- }
+ if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
+ LHS.getOpcode() == ISD::SRA) {
+ LHS = LHS.getOperand(0);
+ return true;
}
if (!ISD::isIntEqualitySetCC(CCVal))
@@ -12358,9 +14183,13 @@ static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG,
SDValue TrueVal, SDValue FalseVal,
bool Swapped) {
bool Commutative = true;
- switch (TrueVal.getOpcode()) {
+ unsigned Opc = TrueVal.getOpcode();
+ switch (Opc) {
default:
return SDValue();
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
case ISD::SUB:
Commutative = false;
break;
@@ -12383,12 +14212,18 @@ static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
SDLoc DL(N);
- SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
+ EVT OtherOpVT = OtherOp->getValueType(0);
+ SDValue IdentityOperand =
+ DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
+ if (!Commutative)
+ IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
+ assert(IdentityOperand && "No identity operand!");
if (Swapped)
- std::swap(OtherOp, Zero);
- SDValue NewSel = DAG.getSelect(DL, VT, N->getOperand(0), OtherOp, Zero);
+ std::swap(OtherOp, IdentityOperand);
+ SDValue NewSel =
+ DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
}
@@ -12453,11 +14288,45 @@ static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG) {
return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
}
+static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ SDValue Cond = N->getOperand(0);
+ SDValue True = N->getOperand(1);
+ SDValue False = N->getOperand(2);
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ EVT CondVT = Cond.getValueType();
+
+ if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
+ return SDValue();
+
+ // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
+ // BEXTI, where C is power of 2.
+ if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
+ (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
+ SDValue LHS = Cond.getOperand(0);
+ SDValue RHS = Cond.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+ if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
+ isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
+ uint64_t MaskVal = LHS.getConstantOperandVal(1);
+ if (isPowerOf2_64(MaskVal) && !isInt<12>(MaskVal))
+ return DAG.getSelect(DL, VT,
+ DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
+ False, True);
+ }
+ }
+ return SDValue();
+}
+
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
return Folded;
+ if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
+ return V;
+
if (Subtarget.hasShortForwardBranchOpt())
return SDValue();
@@ -12468,6 +14337,132 @@ static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
}
+/// If we have a build_vector where each lane is binop X, C, where C
+/// is a constant (but not necessarily the same constant on all lanes),
+/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
+/// We assume that materializing a constant build vector will be no more
+/// expensive that performing O(n) binops.
+static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget,
+ const RISCVTargetLowering &TLI) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ assert(!VT.isScalableVector() && "unexpected build vector");
+
+ if (VT.getVectorNumElements() == 1)
+ return SDValue();
+
+ const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
+ if (!TLI.isBinOp(Opcode))
+ return SDValue();
+
+ if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
+ return SDValue();
+
+ SmallVector<SDValue> LHSOps;
+ SmallVector<SDValue> RHSOps;
+ for (SDValue Op : N->ops()) {
+ if (Op.isUndef()) {
+ // We can't form a divide or remainder from undef.
+ if (!DAG.isSafeToSpeculativelyExecute(Opcode))
+ return SDValue();
+
+ LHSOps.push_back(Op);
+ RHSOps.push_back(Op);
+ continue;
+ }
+
+ // TODO: We can handle operations which have an neutral rhs value
+ // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
+ // of profit in a more explicit manner.
+ if (Op.getOpcode() != Opcode || !Op.hasOneUse())
+ return SDValue();
+
+ LHSOps.push_back(Op.getOperand(0));
+ if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
+ !isa<ConstantFPSDNode>(Op.getOperand(1)))
+ return SDValue();
+ // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
+ // have different LHS and RHS types.
+ if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
+ return SDValue();
+ RHSOps.push_back(Op.getOperand(1));
+ }
+
+ return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
+ DAG.getBuildVector(VT, DL, RHSOps));
+}
+
+static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget,
+ const RISCVTargetLowering &TLI) {
+ SDValue InVec = N->getOperand(0);
+ SDValue InVal = N->getOperand(1);
+ SDValue EltNo = N->getOperand(2);
+ SDLoc DL(N);
+
+ EVT VT = InVec.getValueType();
+ if (VT.isScalableVector())
+ return SDValue();
+
+ if (!InVec.hasOneUse())
+ return SDValue();
+
+ // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
+ // move the insert_vector_elts into the arms of the binop. Note that
+ // the new RHS must be a constant.
+ const unsigned InVecOpcode = InVec->getOpcode();
+ if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
+ InVal.hasOneUse()) {
+ SDValue InVecLHS = InVec->getOperand(0);
+ SDValue InVecRHS = InVec->getOperand(1);
+ SDValue InValLHS = InVal->getOperand(0);
+ SDValue InValRHS = InVal->getOperand(1);
+
+ if (!ISD::isBuildVectorOfConstantSDNodes(InVecRHS.getNode()))
+ return SDValue();
+ if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
+ return SDValue();
+ // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
+ // have different LHS and RHS types.
+ if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
+ return SDValue();
+ SDValue LHS = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
+ InVecLHS, InValLHS, EltNo);
+ SDValue RHS = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
+ InVecRHS, InValRHS, EltNo);
+ return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
+ }
+
+ // Given insert_vector_elt (concat_vectors ...), InVal, Elt
+ // move the insert_vector_elt to the source operand of the concat_vector.
+ if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
+ return SDValue();
+
+ auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
+ if (!IndexC)
+ return SDValue();
+ unsigned Elt = IndexC->getZExtValue();
+
+ EVT ConcatVT = InVec.getOperand(0).getValueType();
+ if (ConcatVT.getVectorElementType() != InVal.getValueType())
+ return SDValue();
+ unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
+ SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, DL,
+ EltNo.getValueType());
+
+ unsigned ConcatOpIdx = Elt / ConcatNumElts;
+ SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
+ ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT,
+ ConcatOp, InVal, NewIdx);
+
+ SmallVector<SDValue> ConcatOps;
+ ConcatOps.append(InVec->op_begin(), InVec->op_end());
+ ConcatOps[ConcatOpIdx] = ConcatOp;
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
+}
+
// If we're concatenating a series of vector loads like
// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
// Then we can turn this into a strided load by widening the vector elements
@@ -12492,13 +14487,11 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
EVT BaseLdVT = BaseLd->getValueType(0);
- SDValue BasePtr = BaseLd->getBasePtr();
// Go through the loads and check that they're strided
- SDValue CurPtr = BasePtr;
- SDValue Stride;
+ SmallVector<LoadSDNode *> Lds;
+ Lds.push_back(BaseLd);
Align Align = BaseLd->getAlign();
-
for (SDValue Op : N->ops().drop_front()) {
auto *Ld = dyn_cast<LoadSDNode>(Op);
if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
@@ -12506,42 +14499,46 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,
Ld->getValueType(0) != BaseLdVT)
return SDValue();
- SDValue Ptr = Ld->getBasePtr();
- // Check that each load's pointer is (add CurPtr, Stride)
- if (Ptr.getOpcode() != ISD::ADD || Ptr.getOperand(0) != CurPtr)
- return SDValue();
- SDValue Offset = Ptr.getOperand(1);
- if (!Stride)
- Stride = Offset;
- else if (Offset != Stride)
- return SDValue();
+ Lds.push_back(Ld);
// The common alignment is the most restrictive (smallest) of all the loads
Align = std::min(Align, Ld->getAlign());
-
- CurPtr = Ptr;
}
- // A special case is if the stride is exactly the width of one of the loads,
- // in which case it's contiguous and can be combined into a regular vle
- // without changing the element size
- if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
- ConstStride &&
- ConstStride->getZExtValue() == BaseLdVT.getFixedSizeInBits() / 8) {
- MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(),
- VT.getStoreSize(), Align);
- // Can't do the combine if the load isn't naturally aligned with the element
- // type
- if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(),
- DAG.getDataLayout(), VT, *MMO))
+ using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
+ auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
+ LoadSDNode *Ld2) -> std::optional<PtrDiff> {
+ // If the load ptrs can be decomposed into a common (Base + Index) with a
+ // common constant stride, then return the constant stride.
+ BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
+ BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
+ if (BIO1.equalBaseIndex(BIO2, DAG))
+ return {{BIO2.getOffset() - BIO1.getOffset(), false}};
+
+ // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
+ SDValue P1 = Ld1->getBasePtr();
+ SDValue P2 = Ld2->getBasePtr();
+ if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
+ return {{P2.getOperand(1), false}};
+ if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
+ return {{P1.getOperand(1), true}};
+
+ return std::nullopt;
+ };
+
+ // Get the distance between the first and second loads
+ auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
+ if (!BaseDiff)
+ return SDValue();
+
+ // Check all the loads are the same distance apart
+ for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
+ if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
return SDValue();
- SDValue WideLoad = DAG.getLoad(VT, DL, BaseLd->getChain(), BasePtr, MMO);
- for (SDValue Ld : N->ops())
- DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), WideLoad);
- return WideLoad;
- }
+ // TODO: At this point, we've successfully matched a generalized gather
+ // load. Maybe we should emit that, and then move the specialized
+ // matchers above and below into a DAG combine?
// Get the widened scalar type, e.g. v4i8 -> i64
unsigned WideScalarBitWidth =
@@ -12557,21 +14554,29 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,
if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
return SDValue();
- MVT ContainerVT = TLI.getContainerForFixedLengthVector(WideVecVT);
- SDValue VL =
- getDefaultVLOps(WideVecVT, ContainerVT, DL, DAG, Subtarget).second;
- SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
+ auto [StrideVariant, MustNegateStride] = *BaseDiff;
+ SDValue Stride = std::holds_alternative<SDValue>(StrideVariant)
+ ? std::get<SDValue>(StrideVariant)
+ : DAG.getConstant(std::get<int64_t>(StrideVariant), DL,
+ Lds[0]->getOffset().getValueType());
+ if (MustNegateStride)
+ Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
+
+ SDVTList VTs = DAG.getVTList({WideVecVT, MVT::Other});
SDValue IntID =
- DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, Subtarget.getXLenVT());
- SDValue Ops[] = {BaseLd->getChain(),
- IntID,
- DAG.getUNDEF(ContainerVT),
- BasePtr,
- Stride,
- VL};
+ DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
+ Subtarget.getXLenVT());
+
+ SDValue AllOneMask =
+ DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
+ DAG.getConstant(1, DL, MVT::i1));
+
+ SDValue Ops[] = {BaseLd->getChain(), IntID, DAG.getUNDEF(WideVecVT),
+ BaseLd->getBasePtr(), Stride, AllOneMask};
uint64_t MemSize;
- if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride))
+ if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
+ ConstStride && ConstStride->getSExtValue() >= 0)
// total size = (elsize * n) + (stride - elsize) * (n-1)
// = elsize + stride * (n-1)
MemSize = WideScalarVT.getSizeInBits() +
@@ -12589,11 +14594,7 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,
for (SDValue Ld : N->ops())
DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
- // Note: Perform the bitcast before the convertFromScalableVector so we have
- // balanced pairs of convertFromScalable/convertToScalable
- SDValue Res = DAG.getBitcast(
- TLI.getContainerForFixedLengthVector(VT.getSimpleVT()), StridedLoad);
- return convertFromScalableVector(VT, Res, DAG, Subtarget);
+ return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
}
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG,
@@ -12653,9 +14654,121 @@ static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(Opc, DL, VT, Ops);
}
+static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index,
+ ISD::MemIndexType &IndexType,
+ RISCVTargetLowering::DAGCombinerInfo &DCI) {
+ if (!DCI.isBeforeLegalize())
+ return false;
+
+ SelectionDAG &DAG = DCI.DAG;
+ const MVT XLenVT =
+ DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
+
+ const EVT IndexVT = Index.getValueType();
+
+ // RISC-V indexed loads only support the "unsigned unscaled" addressing
+ // mode, so anything else must be manually legalized.
+ if (!isIndexTypeSigned(IndexType))
+ return false;
+
+ if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
+ // Any index legalization should first promote to XLenVT, so we don't lose
+ // bits when scaling. This may create an illegal index type so we let
+ // LLVM's legalization take care of the splitting.
+ // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
+ Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
+ IndexVT.changeVectorElementType(XLenVT), Index);
+ }
+ IndexType = ISD::UNSIGNED_SCALED;
+ return true;
+}
+
+/// Match the index vector of a scatter or gather node as the shuffle mask
+/// which performs the rearrangement if possible. Will only match if
+/// all lanes are touched, and thus replacing the scatter or gather with
+/// a unit strided access and shuffle is legal.
+static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,
+ SmallVector<int> &ShuffleMask) {
+ if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
+ return false;
+ if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
+ return false;
+
+ const unsigned ElementSize = VT.getScalarStoreSize();
+ const unsigned NumElems = VT.getVectorNumElements();
+
+ // Create the shuffle mask and check all bits active
+ assert(ShuffleMask.empty());
+ BitVector ActiveLanes(NumElems);
+ for (unsigned i = 0; i < Index->getNumOperands(); i++) {
+ // TODO: We've found an active bit of UB, and could be
+ // more aggressive here if desired.
+ if (Index->getOperand(i)->isUndef())
+ return false;
+ uint64_t C = Index->getConstantOperandVal(i);
+ if (C % ElementSize != 0)
+ return false;
+ C = C / ElementSize;
+ if (C >= NumElems)
+ return false;
+ ShuffleMask.push_back(C);
+ ActiveLanes.set(C);
+ }
+ return ActiveLanes.all();
+}
+
+/// Match the index of a gather or scatter operation as an operation
+/// with twice the element width and half the number of elements. This is
+/// generally profitable (if legal) because these operations are linear
+/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
+/// come out ahead.
+static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
+ Align BaseAlign, const RISCVSubtarget &ST) {
+ if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
+ return false;
+ if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
+ return false;
+
+ // Attempt a doubling. If we can use a element type 4x or 8x in
+ // size, this will happen via multiply iterations of the transform.
+ const unsigned NumElems = VT.getVectorNumElements();
+ if (NumElems % 2 != 0)
+ return false;
+
+ const unsigned ElementSize = VT.getScalarStoreSize();
+ const unsigned WiderElementSize = ElementSize * 2;
+ if (WiderElementSize > ST.getELen()/8)
+ return false;
+
+ if (!ST.hasFastUnalignedAccess() && BaseAlign < WiderElementSize)
+ return false;
+
+ for (unsigned i = 0; i < Index->getNumOperands(); i++) {
+ // TODO: We've found an active bit of UB, and could be
+ // more aggressive here if desired.
+ if (Index->getOperand(i)->isUndef())
+ return false;
+ // TODO: This offset check is too strict if we support fully
+ // misaligned memory operations.
+ uint64_t C = Index->getConstantOperandVal(i);
+ if (i % 2 == 0) {
+ if (C % WiderElementSize != 0)
+ return false;
+ continue;
+ }
+ uint64_t Last = Index->getConstantOperandVal(i-1);
+ if (C != Last + ElementSize)
+ return false;
+ }
+ return true;
+}
+
+
SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
+ const MVT XLenVT = Subtarget.getXLenVT();
+ SDLoc DL(N);
// Helper to call SimplifyDemandedBits on an operand of N where only some low
// bits are demanded. N will be added to the Worklist if it was not deleted.
@@ -12687,8 +14800,6 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return DCI.CombineTo(N, Lo, Hi);
}
- SDLoc DL(N);
-
// It's cheaper to materialise two 32-bit integers than to load a double
// from the constant pool and transfer it to integer registers through the
// stack.
@@ -12795,14 +14906,21 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return performORCombine(N, DCI, Subtarget);
case ISD::XOR:
return performXORCombine(N, DAG, Subtarget);
+ case ISD::MUL:
+ return performMULCombine(N, DAG);
case ISD::FADD:
case ISD::UMAX:
case ISD::UMIN:
case ISD::SMAX:
case ISD::SMIN:
case ISD::FMAXNUM:
- case ISD::FMINNUM:
- return combineBinOpToReduce(N, DAG, Subtarget);
+ case ISD::FMINNUM: {
+ if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
+ return V;
+ if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
+ return V;
+ return SDValue();
+ }
case ISD::SETCC:
return performSETCCCombine(N, DAG, Subtarget);
case ISD::SIGN_EXTEND_INREG:
@@ -12829,6 +14947,56 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
}
}
return SDValue();
+ case RISCVISD::TRUNCATE_VECTOR_VL: {
+ // trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
+ // This would be benefit for the cases where X and Y are both the same value
+ // type of low precision vectors. Since the truncate would be lowered into
+ // n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
+ // restriction, such pattern would be expanded into a series of "vsetvli"
+ // and "vnsrl" instructions later to reach this point.
+ auto IsTruncNode = [](SDValue V) {
+ if (V.getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL)
+ return false;
+ SDValue VL = V.getOperand(2);
+ auto *C = dyn_cast<ConstantSDNode>(VL);
+ // Assume all TRUNCATE_VECTOR_VL nodes use VLMAX for VMSET_VL operand
+ bool IsVLMAXForVMSET = (C && C->isAllOnes()) ||
+ (isa<RegisterSDNode>(VL) &&
+ cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
+ return V.getOperand(1).getOpcode() == RISCVISD::VMSET_VL &&
+ IsVLMAXForVMSET;
+ };
+
+ SDValue Op = N->getOperand(0);
+
+ // We need to first find the inner level of TRUNCATE_VECTOR_VL node
+ // to distinguish such pattern.
+ while (IsTruncNode(Op)) {
+ if (!Op.hasOneUse())
+ return SDValue();
+ Op = Op.getOperand(0);
+ }
+
+ if (Op.getOpcode() == ISD::SRA && Op.hasOneUse()) {
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
+ N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse()) {
+ SDValue N00 = N0.getOperand(0);
+ SDValue N10 = N1.getOperand(0);
+ if (N00.getValueType().isVector() &&
+ N00.getValueType() == N10.getValueType() &&
+ N->getValueType(0) == N10.getValueType()) {
+ unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
+ SDValue SMin = DAG.getNode(
+ ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
+ DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
+ return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
+ }
+ }
+ }
+ break;
+ }
case ISD::TRUNCATE:
return performTRUNCATECombine(N, DAG, Subtarget);
case ISD::SELECT:
@@ -12939,6 +15107,19 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
}
}
+ // If both true/false are an xor with 1, pull through the select.
+ // This can occur after op legalization if both operands are setccs that
+ // require an xor to invert.
+ // FIXME: Generalize to other binary ops with identical operand?
+ if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
+ TrueV.getOperand(1) == FalseV.getOperand(1) &&
+ isOneConstant(TrueV.getOperand(1)) &&
+ TrueV.hasOneUse() && FalseV.hasOneUse()) {
+ SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
+ TrueV.getOperand(0), FalseV.getOperand(0));
+ return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
+ }
+
return SDValue();
}
case RISCVISD::BR_CC: {
@@ -12985,75 +15166,187 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
}
- case ISD::MGATHER:
- case ISD::MSCATTER:
- case ISD::VP_GATHER:
- case ISD::VP_SCATTER: {
- if (!DCI.isBeforeLegalize())
- break;
- SDValue Index, ScaleOp;
- bool IsIndexSigned = false;
- if (const auto *VPGSN = dyn_cast<VPGatherScatterSDNode>(N)) {
- Index = VPGSN->getIndex();
- ScaleOp = VPGSN->getScale();
- IsIndexSigned = VPGSN->isIndexSigned();
- assert(!VPGSN->isIndexScaled() &&
- "Scaled gather/scatter should not be formed");
- } else {
- const auto *MGSN = cast<MaskedGatherScatterSDNode>(N);
- Index = MGSN->getIndex();
- ScaleOp = MGSN->getScale();
- IsIndexSigned = MGSN->isIndexSigned();
- assert(!MGSN->isIndexScaled() &&
- "Scaled gather/scatter should not be formed");
+ case ISD::MGATHER: {
+ const auto *MGN = dyn_cast<MaskedGatherSDNode>(N);
+ const EVT VT = N->getValueType(0);
+ SDValue Index = MGN->getIndex();
+ SDValue ScaleOp = MGN->getScale();
+ ISD::MemIndexType IndexType = MGN->getIndexType();
+ assert(!MGN->isIndexScaled() &&
+ "Scaled gather/scatter should not be formed");
+ SDLoc DL(N);
+ if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
+ return DAG.getMaskedGather(
+ N->getVTList(), MGN->getMemoryVT(), DL,
+ {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
+ MGN->getBasePtr(), Index, ScaleOp},
+ MGN->getMemOperand(), IndexType, MGN->getExtensionType());
+
+ if (narrowIndex(Index, IndexType, DAG))
+ return DAG.getMaskedGather(
+ N->getVTList(), MGN->getMemoryVT(), DL,
+ {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
+ MGN->getBasePtr(), Index, ScaleOp},
+ MGN->getMemOperand(), IndexType, MGN->getExtensionType());
+
+ if (Index.getOpcode() == ISD::BUILD_VECTOR &&
+ MGN->getExtensionType() == ISD::NON_EXTLOAD) {
+ if (std::optional<VIDSequence> SimpleVID = isSimpleVIDSequence(Index);
+ SimpleVID && SimpleVID->StepDenominator == 1) {
+ const int64_t StepNumerator = SimpleVID->StepNumerator;
+ const int64_t Addend = SimpleVID->Addend;
+
+ // Note: We don't need to check alignment here since (by assumption
+ // from the existance of the gather), our offsets must be sufficiently
+ // aligned.
+
+ const EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
+ assert(IndexType == ISD::UNSIGNED_SCALED);
+ SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
+ DAG.getConstant(Addend, DL, PtrVT));
+
+ SDVTList VTs = DAG.getVTList({VT, MVT::Other});
+ SDValue IntID =
+ DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
+ XLenVT);
+ SDValue Ops[] =
+ {MGN->getChain(), IntID, MGN->getPassThru(), BasePtr,
+ DAG.getConstant(StepNumerator, DL, XLenVT), MGN->getMask()};
+ return DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs,
+ Ops, VT, MGN->getMemOperand());
+ }
}
- EVT IndexVT = Index.getValueType();
- MVT XLenVT = Subtarget.getXLenVT();
- // RISC-V indexed loads only support the "unsigned unscaled" addressing
- // mode, so anything else must be manually legalized.
- bool NeedsIdxLegalization =
- (IsIndexSigned && IndexVT.getVectorElementType().bitsLT(XLenVT));
- if (!NeedsIdxLegalization)
- break;
- SDLoc DL(N);
+ SmallVector<int> ShuffleMask;
+ if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
+ matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
+ SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
+ MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
+ MGN->getMask(), DAG.getUNDEF(VT),
+ MGN->getMemoryVT(), MGN->getMemOperand(),
+ ISD::UNINDEXED, ISD::NON_EXTLOAD);
+ SDValue Shuffle =
+ DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
+ return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
+ }
- // Any index legalization should first promote to XLenVT, so we don't lose
- // bits when scaling. This may create an illegal index type so we let
- // LLVM's legalization take care of the splitting.
- // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
- if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
- IndexVT = IndexVT.changeVectorElementType(XLenVT);
- Index = DAG.getNode(IsIndexSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
- DL, IndexVT, Index);
+ if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
+ matchIndexAsWiderOp(VT, Index, MGN->getMask(),
+ MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
+ SmallVector<SDValue> NewIndices;
+ for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
+ NewIndices.push_back(Index.getOperand(i));
+ EVT IndexVT = Index.getValueType()
+ .getHalfNumVectorElementsVT(*DAG.getContext());
+ Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
+
+ unsigned ElementSize = VT.getScalarStoreSize();
+ EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
+ auto EltCnt = VT.getVectorElementCount();
+ assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
+ EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
+ EltCnt.divideCoefficientBy(2));
+ SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
+ EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ EltCnt.divideCoefficientBy(2));
+ SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
+
+ SDValue Gather =
+ DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
+ {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
+ Index, ScaleOp},
+ MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
+ SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
+ return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
+ }
+ break;
+ }
+ case ISD::MSCATTER:{
+ const auto *MSN = dyn_cast<MaskedScatterSDNode>(N);
+ SDValue Index = MSN->getIndex();
+ SDValue ScaleOp = MSN->getScale();
+ ISD::MemIndexType IndexType = MSN->getIndexType();
+ assert(!MSN->isIndexScaled() &&
+ "Scaled gather/scatter should not be formed");
+
+ SDLoc DL(N);
+ if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
+ return DAG.getMaskedScatter(
+ N->getVTList(), MSN->getMemoryVT(), DL,
+ {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
+ Index, ScaleOp},
+ MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
+
+ if (narrowIndex(Index, IndexType, DAG))
+ return DAG.getMaskedScatter(
+ N->getVTList(), MSN->getMemoryVT(), DL,
+ {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
+ Index, ScaleOp},
+ MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
+
+ EVT VT = MSN->getValue()->getValueType(0);
+ SmallVector<int> ShuffleMask;
+ if (!MSN->isTruncatingStore() &&
+ matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
+ SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
+ DAG.getUNDEF(VT), ShuffleMask);
+ return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
+ DAG.getUNDEF(XLenVT), MSN->getMask(),
+ MSN->getMemoryVT(), MSN->getMemOperand(),
+ ISD::UNINDEXED, false);
}
+ break;
+ }
+ case ISD::VP_GATHER: {
+ const auto *VPGN = dyn_cast<VPGatherSDNode>(N);
+ SDValue Index = VPGN->getIndex();
+ SDValue ScaleOp = VPGN->getScale();
+ ISD::MemIndexType IndexType = VPGN->getIndexType();
+ assert(!VPGN->isIndexScaled() &&
+ "Scaled gather/scatter should not be formed");
+
+ SDLoc DL(N);
+ if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
+ return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
+ {VPGN->getChain(), VPGN->getBasePtr(), Index,
+ ScaleOp, VPGN->getMask(),
+ VPGN->getVectorLength()},
+ VPGN->getMemOperand(), IndexType);
- ISD::MemIndexType NewIndexTy = ISD::UNSIGNED_SCALED;
- if (const auto *VPGN = dyn_cast<VPGatherSDNode>(N))
+ if (narrowIndex(Index, IndexType, DAG))
return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
{VPGN->getChain(), VPGN->getBasePtr(), Index,
ScaleOp, VPGN->getMask(),
VPGN->getVectorLength()},
- VPGN->getMemOperand(), NewIndexTy);
- if (const auto *VPSN = dyn_cast<VPScatterSDNode>(N))
+ VPGN->getMemOperand(), IndexType);
+
+ break;
+ }
+ case ISD::VP_SCATTER: {
+ const auto *VPSN = dyn_cast<VPScatterSDNode>(N);
+ SDValue Index = VPSN->getIndex();
+ SDValue ScaleOp = VPSN->getScale();
+ ISD::MemIndexType IndexType = VPSN->getIndexType();
+ assert(!VPSN->isIndexScaled() &&
+ "Scaled gather/scatter should not be formed");
+
+ SDLoc DL(N);
+ if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
{VPSN->getChain(), VPSN->getValue(),
VPSN->getBasePtr(), Index, ScaleOp,
VPSN->getMask(), VPSN->getVectorLength()},
- VPSN->getMemOperand(), NewIndexTy);
- if (const auto *MGN = dyn_cast<MaskedGatherSDNode>(N))
- return DAG.getMaskedGather(
- N->getVTList(), MGN->getMemoryVT(), DL,
- {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
- MGN->getBasePtr(), Index, ScaleOp},
- MGN->getMemOperand(), NewIndexTy, MGN->getExtensionType());
- const auto *MSN = cast<MaskedScatterSDNode>(N);
- return DAG.getMaskedScatter(
- N->getVTList(), MSN->getMemoryVT(), DL,
- {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
- Index, ScaleOp},
- MSN->getMemOperand(), NewIndexTy, MSN->isTruncatingStore());
+ VPSN->getMemOperand(), IndexType);
+
+ if (narrowIndex(Index, IndexType, DAG))
+ return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
+ {VPSN->getChain(), VPSN->getValue(),
+ VPSN->getBasePtr(), Index, ScaleOp,
+ VPSN->getMask(), VPSN->getVectorLength()},
+ VPSN->getMemOperand(), IndexType);
+ break;
}
case RISCVISD::SRA_VL:
case RISCVISD::SRL_VL:
@@ -13062,7 +15355,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
// We don't need the upper 32 bits of a 64-bit element for a shift amount.
SDLoc DL(N);
- SDValue VL = N->getOperand(3);
+ SDValue VL = N->getOperand(4);
EVT VT = N->getValueType(0);
ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
ShAmt.getOperand(1), VL);
@@ -13108,12 +15401,12 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
case RISCVISD::STRICT_VFNMADD_VL:
case RISCVISD::STRICT_VFMSUB_VL:
case RISCVISD::STRICT_VFNMSUB_VL:
- return performVFMADD_VLCombine(N, DAG);
+ return performVFMADD_VLCombine(N, DAG, Subtarget);
case RISCVISD::FMUL_VL:
- return performVFMUL_VLCombine(N, DAG);
+ return performVFMUL_VLCombine(N, DAG, Subtarget);
case RISCVISD::FADD_VL:
case RISCVISD::FSUB_VL:
- return performFADDSUB_VLCombine(N, DAG);
+ return performFADDSUB_VLCombine(N, DAG, Subtarget);
case ISD::LOAD:
case ISD::STORE: {
if (DCI.isAfterLegalizeDAG())
@@ -13149,16 +15442,17 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
ISD::isBuildVectorOfConstantSDNodes(Val.getNode())) {
// Get the constant vector bits
APInt NewC(Val.getValueSizeInBits(), 0);
+ uint64_t EltSize = Val.getScalarValueSizeInBits();
for (unsigned i = 0; i < Val.getNumOperands(); i++) {
if (Val.getOperand(i).isUndef())
continue;
- NewC.insertBits(Val.getConstantOperandAPInt(i),
- i * Val.getScalarValueSizeInBits());
+ NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
+ i * EltSize);
}
MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
- if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(),
- Subtarget.getFeatureBits(), true) <= 2 &&
+ if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
+ true) <= 2 &&
allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
NewVT, *Store->getMemOperand())) {
SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
@@ -13201,7 +15495,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
SDValue Src = Val.getOperand(0);
MVT VecVT = Src.getSimpleValueType();
// VecVT should be scalable and memory VT should match the element type.
- if (VecVT.isScalableVector() &&
+ if (!Store->isIndexed() && VecVT.isScalableVector() &&
MemVT == VecVT.getVectorElementType()) {
SDLoc DL(N);
MVT MaskVT = getMaskTypeFor(VecVT);
@@ -13226,19 +15520,51 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return Gather;
break;
}
+ case ISD::BUILD_VECTOR:
+ if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
+ return V;
+ break;
case ISD::CONCAT_VECTORS:
if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
return V;
break;
+ case ISD::INSERT_VECTOR_ELT:
+ if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
+ return V;
+ break;
+ case RISCVISD::VFMV_V_F_VL: {
+ const MVT VT = N->getSimpleValueType(0);
+ SDValue Passthru = N->getOperand(0);
+ SDValue Scalar = N->getOperand(1);
+ SDValue VL = N->getOperand(2);
+
+ // If VL is 1, we can use vfmv.s.f.
+ if (isOneConstant(VL))
+ return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
+ break;
+ }
case RISCVISD::VMV_V_X_VL: {
+ const MVT VT = N->getSimpleValueType(0);
+ SDValue Passthru = N->getOperand(0);
+ SDValue Scalar = N->getOperand(1);
+ SDValue VL = N->getOperand(2);
+
// Tail agnostic VMV.V.X only demands the vector element bitwidth from the
// scalar input.
- unsigned ScalarSize = N->getOperand(1).getValueSizeInBits();
- unsigned EltWidth = N->getValueType(0).getScalarSizeInBits();
- if (ScalarSize > EltWidth && N->getOperand(0).isUndef())
+ unsigned ScalarSize = Scalar.getValueSizeInBits();
+ unsigned EltWidth = VT.getScalarSizeInBits();
+ if (ScalarSize > EltWidth && Passthru.isUndef())
if (SimplifyDemandedLowBitsHelper(1, EltWidth))
return SDValue(N, 0);
+ // If VL is 1 and the scalar value won't benefit from immediate, we can
+ // use vmv.s.x.
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
+ if (isOneConstant(VL) &&
+ (!Const || Const->isZero() ||
+ !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
+ return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
+
break;
}
case RISCVISD::VFMV_S_F_VL: {
@@ -13258,6 +15584,35 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return Src.getOperand(0);
// TODO: Use insert_subvector/extract_subvector to change widen/narrow?
}
+ [[fallthrough]];
+ }
+ case RISCVISD::VMV_S_X_VL: {
+ const MVT VT = N->getSimpleValueType(0);
+ SDValue Passthru = N->getOperand(0);
+ SDValue Scalar = N->getOperand(1);
+ SDValue VL = N->getOperand(2);
+
+ // Use M1 or smaller to avoid over constraining register allocation
+ const MVT M1VT = getLMUL1VT(VT);
+ if (M1VT.bitsLT(VT)) {
+ SDValue M1Passthru =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru,
+ DAG.getVectorIdxConstant(0, DL));
+ SDValue Result =
+ DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
+ Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result,
+ DAG.getConstant(0, DL, XLenVT));
+ return Result;
+ }
+
+ // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
+ // higher would involve overly constraining the register allocator for
+ // no purpose.
+ if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
+ Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
+ VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
+ return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
+
break;
}
case ISD::INTRINSIC_VOID:
@@ -13269,6 +15624,43 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
// By default we do not combine any intrinsic.
default:
return SDValue();
+ case Intrinsic::riscv_masked_strided_load: {
+ MVT VT = N->getSimpleValueType(0);
+ auto *Load = cast<MemIntrinsicSDNode>(N);
+ SDValue PassThru = N->getOperand(2);
+ SDValue Base = N->getOperand(3);
+ SDValue Stride = N->getOperand(4);
+ SDValue Mask = N->getOperand(5);
+
+ // If the stride is equal to the element size in bytes, we can use
+ // a masked.load.
+ const unsigned ElementSize = VT.getScalarStoreSize();
+ if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
+ StrideC && StrideC->getZExtValue() == ElementSize)
+ return DAG.getMaskedLoad(VT, DL, Load->getChain(), Base,
+ DAG.getUNDEF(XLenVT), Mask, PassThru,
+ Load->getMemoryVT(), Load->getMemOperand(),
+ ISD::UNINDEXED, ISD::NON_EXTLOAD);
+ return SDValue();
+ }
+ case Intrinsic::riscv_masked_strided_store: {
+ auto *Store = cast<MemIntrinsicSDNode>(N);
+ SDValue Value = N->getOperand(2);
+ SDValue Base = N->getOperand(3);
+ SDValue Stride = N->getOperand(4);
+ SDValue Mask = N->getOperand(5);
+
+ // If the stride is equal to the element size in bytes, we can use
+ // a masked.store.
+ const unsigned ElementSize = Value.getValueType().getScalarStoreSize();
+ if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
+ StrideC && StrideC->getZExtValue() == ElementSize)
+ return DAG.getMaskedStore(Store->getChain(), DL, Value, Base,
+ DAG.getUNDEF(XLenVT), Mask,
+ Store->getMemoryVT(), Store->getMemOperand(),
+ ISD::UNINDEXED, false);
+ return SDValue();
+ }
case Intrinsic::riscv_vcpop:
case Intrinsic::riscv_vcpop_mask:
case Intrinsic::riscv_vfirst:
@@ -13287,23 +15679,6 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return DAG.getConstant(-1, DL, VT);
return DAG.getConstant(0, DL, VT);
}
- case Intrinsic::riscv_vloxei:
- case Intrinsic::riscv_vloxei_mask:
- case Intrinsic::riscv_vluxei:
- case Intrinsic::riscv_vluxei_mask:
- case Intrinsic::riscv_vsoxei:
- case Intrinsic::riscv_vsoxei_mask:
- case Intrinsic::riscv_vsuxei:
- case Intrinsic::riscv_vsuxei_mask:
- if (SDValue V = narrowIndex(N->getOperand(4), DAG)) {
- SmallVector<SDValue, 8> Ops(N->ops());
- Ops[4] = V;
- const auto *MemSD = cast<MemIntrinsicSDNode>(N);
- return DAG.getMemIntrinsicNode(N->getOpcode(), SDLoc(N), N->getVTList(),
- Ops, MemSD->getMemoryVT(),
- MemSD->getMemOperand());
- }
- return SDValue();
}
}
case ISD::BITCAST: {
@@ -13386,12 +15761,12 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
// Neither constant will fit into an immediate, so find materialisation
// costs.
- int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
- Subtarget.getFeatureBits(),
- /*CompressionCost*/true);
+ int C1Cost =
+ RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
+ /*CompressionCost*/ true);
int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
- ShiftedC1Int, Ty.getSizeInBits(), Subtarget.getFeatureBits(),
- /*CompressionCost*/true);
+ ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
+ /*CompressionCost*/ true);
// Materialising `c1` is cheaper than materialising `c1 << c2`, so the
// combine should be prevented.
@@ -13562,6 +15937,15 @@ void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
Known = Known.sext(BitWidth);
break;
}
+ case RISCVISD::SLLW: {
+ KnownBits Known2;
+ Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
+ // Restore the original width by sign extending.
+ Known = Known.sext(BitWidth);
+ break;
+ }
case RISCVISD::CTZW: {
KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
@@ -13600,7 +15984,7 @@ void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
Known.One.setBit(Log2_32(MinVLenB));
break;
}
- case RISCVISD::FPCLASS: {
+ case RISCVISD::FCLASS: {
// fclass will only set one of the low 10 bits.
Known.Zero.setBitsFrom(10);
break;
@@ -13615,7 +15999,7 @@ void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
break;
case Intrinsic::riscv_vsetvli:
case Intrinsic::riscv_vsetvlimax:
- // Assume that VL output is >= 65536.
+ // Assume that VL output is <= 65536.
// TODO: Take SEW and LMUL into account.
if (BitWidth > 17)
Known.Zero.setBitsFrom(17);
@@ -13705,6 +16089,7 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
assert(Subtarget.hasStdExtA());
return 33;
}
+ break;
}
}
@@ -14187,47 +16572,6 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
return TailMBB;
}
-static MachineBasicBlock *emitVFCVT_RM(MachineInstr &MI, MachineBasicBlock *BB,
- unsigned Opcode) {
- DebugLoc DL = MI.getDebugLoc();
-
- const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
-
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- Register SavedFRM = MRI.createVirtualRegister(&RISCV::GPRRegClass);
-
- assert(MI.getNumOperands() == 8 || MI.getNumOperands() == 7);
- unsigned FRMIdx = MI.getNumOperands() == 8 ? 4 : 3;
-
- // Update FRM and save the old value.
- BuildMI(*BB, MI, DL, TII.get(RISCV::SwapFRMImm), SavedFRM)
- .addImm(MI.getOperand(FRMIdx).getImm());
-
- // Emit an VFCVT with the FRM == DYN
- auto MIB = BuildMI(*BB, MI, DL, TII.get(Opcode));
-
- for (unsigned I = 0; I < MI.getNumOperands(); I++)
- if (I != FRMIdx)
- MIB = MIB.add(MI.getOperand(I));
- else
- MIB = MIB.add(MachineOperand::CreateImm(7)); // frm = DYN
-
- MIB.add(MachineOperand::CreateReg(RISCV::FRM,
- /*IsDef*/ false,
- /*IsImp*/ true));
-
- if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
- MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
-
- // Restore FRM.
- BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFRM))
- .addReg(SavedFRM, RegState::Kill);
-
- // Erase the pseudoinstruction.
- MI.eraseFromParent();
- return BB;
-}
-
static MachineBasicBlock *emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI,
MachineBasicBlock *BB,
unsigned CVTXOpc,
@@ -14472,43 +16816,6 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
Subtarget);
-#define PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, LMUL) \
- case RISCV::RMOpc##_##LMUL: \
- return emitVFCVT_RM(MI, BB, RISCV::Opc##_##LMUL); \
- case RISCV::RMOpc##_##LMUL##_MASK: \
- return emitVFCVT_RM(MI, BB, RISCV::Opc##_##LMUL##_MASK);
-
-#define PseudoVFCVT_RM_CASE(RMOpc, Opc) \
- PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M1) \
- PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M2) \
- PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M4) \
- PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF2) \
- PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF4)
-
-#define PseudoVFCVT_RM_CASE_M8(RMOpc, Opc) \
- PseudoVFCVT_RM_CASE(RMOpc, Opc) \
- PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M8)
-
-#define PseudoVFCVT_RM_CASE_MF8(RMOpc, Opc) \
- PseudoVFCVT_RM_CASE(RMOpc, Opc) \
- PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF8)
-
- // VFCVT
- PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_X_F_V, PseudoVFCVT_X_F_V)
- PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_XU_F_V, PseudoVFCVT_XU_F_V)
- PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_F_XU_V, PseudoVFCVT_F_XU_V)
- PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_F_X_V, PseudoVFCVT_F_X_V)
-
- // VFWCVT
- PseudoVFCVT_RM_CASE(PseudoVFWCVT_RM_XU_F_V, PseudoVFWCVT_XU_F_V);
- PseudoVFCVT_RM_CASE(PseudoVFWCVT_RM_X_F_V, PseudoVFWCVT_X_F_V);
-
- // VFNCVT
- PseudoVFCVT_RM_CASE_MF8(PseudoVFNCVT_RM_XU_F_W, PseudoVFNCVT_XU_F_W);
- PseudoVFCVT_RM_CASE_MF8(PseudoVFNCVT_RM_X_F_W, PseudoVFNCVT_X_F_W);
- PseudoVFCVT_RM_CASE(PseudoVFNCVT_RM_F_XU_W, PseudoVFNCVT_F_XU_W);
- PseudoVFCVT_RM_CASE(PseudoVFNCVT_RM_F_X_W, PseudoVFNCVT_F_X_W);
-
case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK,
RISCV::PseudoVFCVT_F_X_V_M1_MASK);
@@ -14535,41 +16842,26 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case RISCV::PseudoFROUND_D_INX:
case RISCV::PseudoFROUND_D_IN32X:
return emitFROUND(MI, BB, Subtarget);
+ case TargetOpcode::STATEPOINT:
+ case TargetOpcode::STACKMAP:
+ case TargetOpcode::PATCHPOINT:
+ if (!Subtarget.is64Bit())
+ report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
+ "supported on 64-bit targets");
+ return emitPatchPoint(MI, BB);
}
}
-// Returns the index to the rounding mode immediate value if any, otherwise the
-// function will return None.
-static std::optional<unsigned> getRoundModeIdx(const MachineInstr &MI) {
- uint64_t TSFlags = MI.getDesc().TSFlags;
- if (!RISCVII::hasRoundModeOp(TSFlags))
- return std::nullopt;
-
- // The operand order
- // -------------------------------------
- // | n-1 (if any) | n-2 | n-3 | n-4 |
- // | policy | sew | vl | rm |
- // -------------------------------------
- return MI.getNumExplicitOperands() - RISCVII::hasVecPolicyOp(TSFlags) - 3;
-}
-
void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
SDNode *Node) const {
- // Add FRM dependency to vector floating-point instructions with dynamic
- // rounding mode.
- if (auto RoundModeIdx = getRoundModeIdx(MI)) {
- unsigned FRMImm = MI.getOperand(*RoundModeIdx).getImm();
- if (FRMImm == RISCVFPRndMode::DYN && !MI.readsRegister(RISCV::FRM)) {
- MI.addOperand(MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false,
- /*isImp*/ true));
- }
- }
-
// Add FRM dependency to any instructions with dynamic rounding mode.
- unsigned Opc = MI.getOpcode();
- auto Idx = RISCV::getNamedOperandIdx(Opc, RISCV::OpName::frm);
- if (Idx < 0)
- return;
+ int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
+ if (Idx < 0) {
+ // Vector pseudos have FRM index indicated by TSFlags.
+ Idx = RISCVII::getFRMOpNum(MI.getDesc());
+ if (Idx < 0)
+ return;
+ }
if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
return;
// If the instruction already reads FRM, don't add another read.
@@ -14604,10 +16896,6 @@ void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
// register-size fields in the same situations they would be for fixed
// arguments.
-static const MCPhysReg ArgGPRs[] = {
- RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
- RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
-};
static const MCPhysReg ArgFPR16s[] = {
RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
@@ -14632,6 +16920,14 @@ static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
RISCV::V20M4};
static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
+ArrayRef<MCPhysReg> RISCV::getArgGPRs() {
+ static const MCPhysReg ArgGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
+ RISCV::X13, RISCV::X14, RISCV::X15,
+ RISCV::X16, RISCV::X17};
+
+ return ArrayRef(ArgGPRs);
+}
+
// Pass a 2*XLEN argument that has been split into two XLEN values through
// registers or the stack as necessary.
static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
@@ -14639,6 +16935,7 @@ static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
MVT ValVT2, MVT LocVT2,
ISD::ArgFlagsTy ArgFlags2) {
unsigned XLenInBytes = XLen / 8;
+ ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs();
if (Register Reg = State.AllocateReg(ArgGPRs)) {
// At least one half can be passed via register.
State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
@@ -14759,6 +17056,8 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
LocInfo = CCValAssign::BCvt;
}
+ ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs();
+
// If this is a variadic argument, the RISC-V calling convention requires
// that it is assigned an 'even' or 'aligned' register if it has 8-byte
// alignment (RV32) or 16-byte alignment (RV64). An aligned register should
@@ -14785,23 +17084,29 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
// Handle passing f64 on RV32D with a soft float ABI or when floating point
// registers are exhausted.
if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
- assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
- "Can't lower f64 if it is split");
+ assert(PendingLocs.empty() && "Can't lower f64 if it is split");
// Depending on available argument GPRS, f64 may be passed in a pair of
// GPRs, split between a GPR and the stack, or passed completely on the
// stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
// cases.
Register Reg = State.AllocateReg(ArgGPRs);
- LocVT = MVT::i32;
if (!Reg) {
unsigned StackOffset = State.AllocateStack(8, Align(8));
State.addLoc(
CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
return false;
}
- if (!State.AllocateReg(ArgGPRs))
- State.AllocateStack(4, Align(4));
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ LocVT = MVT::i32;
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ Register HiReg = State.AllocateReg(ArgGPRs);
+ if (HiReg) {
+ State.addLoc(
+ CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
+ } else {
+ unsigned StackOffset = State.AllocateStack(4, Align(4));
+ State.addLoc(
+ CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
+ }
return false;
}
@@ -15002,12 +17307,18 @@ static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
break;
case CCValAssign::BCvt:
if (VA.getLocVT().isInteger() &&
- (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
+ (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
Val = DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
- else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
- Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
- else
+ } else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
+ if (RV64LegalI32) {
+ Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Val);
+ Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
+ } else {
+ Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
+ }
+ } else {
Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
+ }
break;
}
return Val;
@@ -15061,13 +17372,19 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
break;
case CCValAssign::BCvt:
- if (VA.getLocVT().isInteger() &&
- (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
- Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val);
- else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
- Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
- else
+ if (LocVT.isInteger() &&
+ (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
+ Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
+ } else if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32) {
+ if (RV64LegalI32) {
+ Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
+ Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val);
+ } else {
+ Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
+ }
+ } else {
Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
+ }
break;
}
return Val;
@@ -15110,38 +17427,32 @@ static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
}
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
- const CCValAssign &VA, const SDLoc &DL) {
+ const CCValAssign &VA,
+ const CCValAssign &HiVA,
+ const SDLoc &DL) {
assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
"Unexpected VA");
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
- if (VA.isMemLoc()) {
- // f64 is passed on the stack.
- int FI =
- MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*IsImmutable=*/true);
- SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
- return DAG.getLoad(MVT::f64, DL, Chain, FIN,
- MachinePointerInfo::getFixedStack(MF, FI));
- }
-
assert(VA.isRegLoc() && "Expected register VA assignment");
Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
SDValue Hi;
- if (VA.getLocReg() == RISCV::X17) {
+ if (HiVA.isMemLoc()) {
// Second half of f64 is passed on the stack.
- int FI = MFI.CreateFixedObject(4, 0, /*IsImmutable=*/true);
+ int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
+ /*IsImmutable=*/true);
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
MachinePointerInfo::getFixedStack(MF, FI));
} else {
// Second half of f64 is passed in another GPR.
Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
- RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
+ RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
}
return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
@@ -15346,6 +17657,8 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
report_fatal_error("Unsupported calling convention");
case CallingConv::C:
case CallingConv::Fast:
+ case CallingConv::SPIR_KERNEL:
+ case CallingConv::GRAAL:
break;
case CallingConv::GHC:
if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
@@ -15384,15 +17697,16 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC
: RISCV::CC_RISCV);
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
CCValAssign &VA = ArgLocs[i];
SDValue ArgValue;
// Passing f64 on RV32D with a soft float ABI must be handled as a special
// case.
- if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
- ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
- else if (VA.isRegLoc())
- ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[i], *this);
+ if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
+ assert(VA.needsCustom());
+ ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
+ } else if (VA.isRegLoc())
+ ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
else
ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
@@ -15404,12 +17718,12 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
// stores are relative to that.
InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
MachinePointerInfo()));
- unsigned ArgIndex = Ins[i].OrigArgIndex;
- unsigned ArgPartOffset = Ins[i].PartOffset;
+ unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
+ unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
assert(VA.getValVT().isVector() || ArgPartOffset == 0);
- while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
+ while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
CCValAssign &PartVA = ArgLocs[i + 1];
- unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
+ unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
if (PartVA.getValVT().isScalableVector())
Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
@@ -15417,6 +17731,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
MachinePointerInfo()));
++i;
+ ++InsIdx;
}
continue;
}
@@ -15428,57 +17743,56 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
if (IsVarArg) {
- ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs);
+ ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs();
unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
const TargetRegisterClass *RC = &RISCV::GPRRegClass;
MachineFrameInfo &MFI = MF.getFrameInfo();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
- // Offset of the first variable argument from stack pointer, and size of
- // the vararg save area. For now, the varargs save area is either zero or
- // large enough to hold a0-a7.
- int VaArgOffset, VarArgsSaveSize;
+ // Size of the vararg save area. For now, the varargs save area is either
+ // zero or large enough to hold a0-a7.
+ int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
+ int FI;
// If all registers are allocated, then all varargs must be passed on the
// stack and we don't need to save any argregs.
- if (ArgRegs.size() == Idx) {
- VaArgOffset = CCInfo.getStackSize();
- VarArgsSaveSize = 0;
+ if (VarArgsSaveSize == 0) {
+ int VaArgOffset = CCInfo.getStackSize();
+ FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
} else {
- VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
- VaArgOffset = -VarArgsSaveSize;
+ int VaArgOffset = -VarArgsSaveSize;
+ FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
+
+ // If saving an odd number of registers then create an extra stack slot to
+ // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
+ // offsets to even-numbered registered remain 2*XLEN-aligned.
+ if (Idx % 2) {
+ MFI.CreateFixedObject(
+ XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
+ VarArgsSaveSize += XLenInBytes;
+ }
+
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+
+ // Copy the integer registers that may have been used for passing varargs
+ // to the vararg save area.
+ for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
+ const Register Reg = RegInfo.createVirtualRegister(RC);
+ RegInfo.addLiveIn(ArgRegs[I], Reg);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
+ SDValue Store = DAG.getStore(
+ Chain, DL, ArgValue, FIN,
+ MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
+ OutChains.push_back(Store);
+ FIN =
+ DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
+ }
}
// Record the frame index of the first variable argument
// which is a value necessary to VASTART.
- int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
RVFI->setVarArgsFrameIndex(FI);
-
- // If saving an odd number of registers then create an extra stack slot to
- // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
- // offsets to even-numbered registered remain 2*XLEN-aligned.
- if (Idx % 2) {
- MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true);
- VarArgsSaveSize += XLenInBytes;
- }
-
- // Copy the integer registers that may have been used for passing varargs
- // to the vararg save area.
- for (unsigned I = Idx; I < ArgRegs.size();
- ++I, VaArgOffset += XLenInBytes) {
- const Register Reg = RegInfo.createVirtualRegister(RC);
- RegInfo.addLiveIn(ArgRegs[I], Reg);
- SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
- FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
- SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
- SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
- MachinePointerInfo::getFixedStack(MF, FI));
- cast<StoreSDNode>(Store.getNode())
- ->getMemOperand()
- ->setValue((Value *)nullptr);
- OutChains.push_back(Store);
- }
RVFI->setVarArgsSaveSize(VarArgsSaveSize);
}
@@ -15632,15 +17946,16 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
SDValue StackPtr;
- for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
+ for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
+ ++i, ++OutIdx) {
CCValAssign &VA = ArgLocs[i];
- SDValue ArgValue = OutVals[i];
- ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ SDValue ArgValue = OutVals[OutIdx];
+ ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
// Handle passing f64 on RV32D with a soft float ABI as a special case.
- bool IsF64OnRV32DSoftABI =
- VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
- if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
+ if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
+ assert(VA.isRegLoc() && "Expected register VA assignment");
+ assert(VA.needsCustom());
SDValue SplitF64 = DAG.getNode(
RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
SDValue Lo = SplitF64.getValue(0);
@@ -15649,32 +17964,33 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
Register RegLo = VA.getLocReg();
RegsToPass.push_back(std::make_pair(RegLo, Lo));
- if (RegLo == RISCV::X17) {
+ // Get the CCValAssign for the Hi part.
+ CCValAssign &HiVA = ArgLocs[++i];
+
+ if (HiVA.isMemLoc()) {
// Second half of f64 is passed on the stack.
- // Work out the address of the stack slot.
if (!StackPtr.getNode())
StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
+ SDValue Address =
+ DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
+ DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
// Emit the store.
MemOpChains.push_back(
- DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
+ DAG.getStore(Chain, DL, Hi, Address, MachinePointerInfo()));
} else {
// Second half of f64 is passed in another GPR.
- assert(RegLo < RISCV::X31 && "Invalid register pair");
- Register RegHigh = RegLo + 1;
+ Register RegHigh = HiVA.getLocReg();
RegsToPass.push_back(std::make_pair(RegHigh, Hi));
}
continue;
}
- // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
- // as any other MemLoc.
-
// Promote the value if needed.
// For now, only handle fully promoted and indirect arguments.
if (VA.getLocInfo() == CCValAssign::Indirect) {
// Store the argument in a stack slot and pass its address.
Align StackAlign =
- std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
+ std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
getPrefTypeAlign(ArgValue.getValueType(), DAG));
TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
// If the original argument was split (e.g. i128), we need
@@ -15682,16 +17998,16 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Vectors may be partly split to registers and partly to the stack, in
// which case the base address is partly offset and subsequent stores are
// relative to that.
- unsigned ArgIndex = Outs[i].OrigArgIndex;
- unsigned ArgPartOffset = Outs[i].PartOffset;
+ unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
+ unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
assert(VA.getValVT().isVector() || ArgPartOffset == 0);
// Calculate the total size to store. We don't have access to what we're
// actually storing other than performing the loop and collecting the
// info.
SmallVector<std::pair<SDValue, SDValue>> Parts;
- while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
- SDValue PartValue = OutVals[i + 1];
- unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
+ while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
+ SDValue PartValue = OutVals[OutIdx + 1];
+ unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
EVT PartVT = PartValue.getValueType();
if (PartVT.isScalableVector())
@@ -15700,6 +18016,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
Parts.push_back(std::make_pair(PartValue, Offset));
++i;
+ ++OutIdx;
}
SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
@@ -15841,7 +18158,8 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, RISCV::CC_RISCV);
// Copy all of the result registers out of their specified physreg.
- for (auto &VA : RVLocs) {
+ for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
+ auto &VA = RVLocs[i];
// Copy the value out
SDValue RetValue =
DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
@@ -15850,9 +18168,9 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
Glue = RetValue.getValue(2);
if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
- assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
- SDValue RetValue2 =
- DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
+ assert(VA.needsCustom());
+ SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
+ MVT::i32, Glue);
Chain = RetValue2.getValue(1);
Glue = RetValue2.getValue(2);
RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
@@ -15915,21 +18233,21 @@ RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SmallVector<SDValue, 4> RetOps(1, Chain);
// Copy the result values into the output registers.
- for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
- SDValue Val = OutVals[i];
+ for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
+ SDValue Val = OutVals[OutIdx];
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
// Handle returning f64 on RV32D with a soft float ABI.
assert(VA.isRegLoc() && "Expected return via registers");
+ assert(VA.needsCustom());
SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
DAG.getVTList(MVT::i32, MVT::i32), Val);
SDValue Lo = SplitF64.getValue(0);
SDValue Hi = SplitF64.getValue(1);
Register RegLo = VA.getLocReg();
- assert(RegLo < RISCV::X31 && "Invalid register pair");
- Register RegHi = RegLo + 1;
+ Register RegHi = RVLocs[++i].getLocReg();
if (STI.isRegisterReservedByUser(RegLo) ||
STI.isRegisterReservedByUser(RegHi))
@@ -16067,10 +18385,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(ADD_LO)
NODE_NAME_CASE(HI)
NODE_NAME_CASE(LLA)
- NODE_NAME_CASE(LGA)
NODE_NAME_CASE(ADD_TPREL)
- NODE_NAME_CASE(LA_TLS_IE)
- NODE_NAME_CASE(LA_TLS_GD)
NODE_NAME_CASE(MULHSU)
NODE_NAME_CASE(SLLW)
NODE_NAME_CASE(SRAW)
@@ -16097,7 +18412,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(FP_ROUND_BF16)
NODE_NAME_CASE(FP_EXTEND_BF16)
NODE_NAME_CASE(FROUND)
- NODE_NAME_CASE(FPCLASS)
+ NODE_NAME_CASE(FCLASS)
NODE_NAME_CASE(FMAX)
NODE_NAME_CASE(FMIN)
NODE_NAME_CASE(READ_CYCLE_WIDE)
@@ -16159,6 +18474,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(SREM_VL)
NODE_NAME_CASE(SRA_VL)
NODE_NAME_CASE(SRL_VL)
+ NODE_NAME_CASE(ROTL_VL)
+ NODE_NAME_CASE(ROTR_VL)
NODE_NAME_CASE(SUB_VL)
NODE_NAME_CASE(UDIV_VL)
NODE_NAME_CASE(UREM_VL)
@@ -16193,8 +18510,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(CTLZ_VL)
NODE_NAME_CASE(CTTZ_VL)
NODE_NAME_CASE(CTPOP_VL)
- NODE_NAME_CASE(FMINNUM_VL)
- NODE_NAME_CASE(FMAXNUM_VL)
+ NODE_NAME_CASE(VFMIN_VL)
+ NODE_NAME_CASE(VFMAX_VL)
NODE_NAME_CASE(MULHS_VL)
NODE_NAME_CASE(MULHU_VL)
NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
@@ -16241,6 +18558,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VWADDU_W_VL)
NODE_NAME_CASE(VWSUB_W_VL)
NODE_NAME_CASE(VWSUBU_W_VL)
+ NODE_NAME_CASE(VWSLL_VL)
NODE_NAME_CASE(VFWMUL_VL)
NODE_NAME_CASE(VFWADD_VL)
NODE_NAME_CASE(VFWSUB_VL)
@@ -16314,6 +18632,12 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// TODO: Support fixed vectors up to XLen for P extension?
if (VT.isVector())
break;
+ if (VT == MVT::f16 && Subtarget.hasStdExtZhinxOrZhinxmin())
+ return std::make_pair(0U, &RISCV::GPRF16RegClass);
+ if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
+ return std::make_pair(0U, &RISCV::GPRF32RegClass);
+ if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
+ return std::make_pair(0U, &RISCV::GPRPF64RegClass);
return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
case 'f':
if (Subtarget.hasStdExtZfhOrZfhmin() && VT == MVT::f16)
@@ -16501,13 +18825,13 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return Res;
}
-unsigned
+InlineAsm::ConstraintCode
RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
// Currently only support length 1 constraints.
if (ConstraintCode.size() == 1) {
switch (ConstraintCode[0]) {
case 'A':
- return InlineAsm::Constraint_A;
+ return InlineAsm::ConstraintCode::A;
default:
break;
}
@@ -16517,10 +18841,10 @@ RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
}
void RISCVTargetLowering::LowerAsmOperandForConstraint(
- SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
+ SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
// Currently only support length 1 constraints.
- if (Constraint.length() == 1) {
+ if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'I':
// Validate & create a 12-bit signed immediate operand.
@@ -16581,8 +18905,11 @@ Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
Instruction *Inst,
AtomicOrdering Ord) const {
- if (Subtarget.hasStdExtZtso())
+ if (Subtarget.hasStdExtZtso()) {
+ if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
+ return Builder.CreateFence(Ord);
return nullptr;
+ }
if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
return Builder.CreateFence(AtomicOrdering::Acquire);
@@ -16666,6 +18993,22 @@ getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
+ // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
+ // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
+ // mask, as this produces better code than the LR/SC loop emitted by
+ // int_riscv_masked_atomicrmw_xchg.
+ if (AI->getOperation() == AtomicRMWInst::Xchg &&
+ isa<ConstantInt>(AI->getValOperand())) {
+ ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
+ if (CVal->isZero())
+ return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
+ Builder.CreateNot(Mask, "Inv_Mask"),
+ AI->getAlign(), Ord);
+ if (CVal->isMinusOne())
+ return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
+ AI->getAlign(), Ord);
+ }
+
unsigned XLen = Subtarget.getXLen();
Value *Ordering =
Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
@@ -16741,9 +19084,13 @@ Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
return Result;
}
-bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT IndexVT,
+bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(SDValue Extend,
EVT DataVT) const {
- return false;
+ // We have indexed loads for all legal index types. Indices are always
+ // zero extended
+ return Extend.getOpcode() == ISD::ZERO_EXTEND &&
+ isTypeLegal(Extend.getValueType()) &&
+ isTypeLegal(Extend.getOperand(0).getValueType());
}
bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
@@ -16999,8 +19346,8 @@ bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
unsigned *Fast) const {
if (!VT.isVector()) {
if (Fast)
- *Fast = Subtarget.enableUnalignedScalarMem();
- return Subtarget.enableUnalignedScalarMem();
+ *Fast = Subtarget.hasFastUnalignedAccess();
+ return Subtarget.hasFastUnalignedAccess();
}
// All vector implementations must support element alignment
@@ -17016,8 +19363,51 @@ bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
// misaligned accesses. TODO: Work through the codegen implications of
// allowing such accesses to be formed, and considered fast.
if (Fast)
- *Fast = Subtarget.enableUnalignedVectorMem();
- return Subtarget.enableUnalignedVectorMem();
+ *Fast = Subtarget.hasFastUnalignedAccess();
+ return Subtarget.hasFastUnalignedAccess();
+}
+
+
+EVT RISCVTargetLowering::getOptimalMemOpType(const MemOp &Op,
+ const AttributeList &FuncAttributes) const {
+ if (!Subtarget.hasVInstructions())
+ return MVT::Other;
+
+ if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
+ return MVT::Other;
+
+ // We use LMUL1 memory operations here for a non-obvious reason. Our caller
+ // has an expansion threshold, and we want the number of hardware memory
+ // operations to correspond roughly to that threshold. LMUL>1 operations
+ // are typically expanded linearly internally, and thus correspond to more
+ // than one actual memory operation. Note that store merging and load
+ // combining will typically form larger LMUL operations from the LMUL1
+ // operations emitted here, and that's okay because combining isn't
+ // introducing new memory operations; it's just merging existing ones.
+ const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8;
+ if (Op.size() < MinVLenInBytes)
+ // TODO: Figure out short memops. For the moment, do the default thing
+ // which ends up using scalar sequences.
+ return MVT::Other;
+
+ // Prefer i8 for non-zero memset as it allows us to avoid materializing
+ // a large scalar constant and instead use vmv.v.x/i to do the
+ // broadcast. For everything else, prefer ELenVT to minimize VL and thus
+ // maximize the chance we can encode the size in the vsetvli.
+ MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
+ MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
+
+ // Do we have sufficient alignment for our preferred VT? If not, revert
+ // to largest size allowed by our alignment criteria.
+ if (PreferredVT != MVT::i8 && !Subtarget.hasFastUnalignedAccess()) {
+ Align RequiredAlign(PreferredVT.getStoreSize());
+ if (Op.isFixedDstAlign())
+ RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
+ if (Op.isMemcpy())
+ RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
+ PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
+ }
+ return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
}
bool RISCVTargetLowering::splitValueIntoRegisterParts(
@@ -17142,10 +19532,8 @@ static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
Module *M = IRB.GetInsertBlock()->getParent()->getParent();
Function *ThreadPointerFunc =
Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
- return IRB.CreatePointerCast(
- IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
- IRB.CreateCall(ThreadPointerFunc), Offset),
- IRB.getInt8PtrTy()->getPointerTo(0));
+ return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
+ IRB.CreateCall(ThreadPointerFunc), Offset);
}
Value *RISCVTargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
@@ -17203,7 +19591,7 @@ bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType,
if (!isLegalElementTypeForRVV(ScalarType))
return false;
- if (!Subtarget.enableUnalignedVectorMem() &&
+ if (!Subtarget.hasFastUnalignedAccess() &&
Alignment < ScalarType.getStoreSize())
return false;
@@ -17503,6 +19891,72 @@ bool RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable(
return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
}
+bool RISCVTargetLowering::isCtpopFast(EVT VT) const {
+ if (VT.isScalableVector())
+ return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
+ if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
+ return true;
+ return Subtarget.hasStdExtZbb() &&
+ (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
+}
+
+unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT,
+ ISD::CondCode Cond) const {
+ return isCtpopFast(VT) ? 0 : 1;
+}
+
+bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
+ // At the moment, the only scalable instruction GISel knows how to lower is
+ // ret with scalable argument.
+
+ if (Inst.getType()->isScalableTy())
+ return true;
+
+ for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
+ if (Inst.getOperand(i)->getType()->isScalableTy() &&
+ !isa<ReturnInst>(&Inst))
+ return true;
+
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
+ if (AI->getAllocatedType()->isScalableTy())
+ return true;
+ }
+
+ return false;
+}
+
+SDValue
+RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const {
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+ if (isIntDivCheap(N->getValueType(0), Attr))
+ return SDValue(N, 0); // Lower SDIV as SDIV
+
+ // Only perform this transform if short forward branch opt is supported.
+ if (!Subtarget.hasShortForwardBranchOpt())
+ return SDValue();
+ EVT VT = N->getValueType(0);
+ if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
+ return SDValue();
+
+ // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
+ if (Divisor.sgt(2048) || Divisor.slt(-2048))
+ return SDValue();
+ return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
+}
+
+bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
+ EVT VT, const APInt &AndMask) const {
+ if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
+ return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);
+ return TargetLowering::shouldFoldSelectWithSingleBitTest(VT, AndMask);
+}
+
+unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
+ return Subtarget.getMinimumJumpTableEntries();
+}
+
namespace llvm::RISCVVIntrinsicsTable {
#define GET_RISCVVIntrinsicsTable_IMPL
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 164ded95a1b5..41a2dc5771c8 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -22,9 +22,12 @@
#include <optional>
namespace llvm {
+class InstructionCost;
class RISCVSubtarget;
struct RISCVRegisterInfo;
+
namespace RISCVISD {
+// clang-format off
enum NodeType : unsigned {
FIRST_NUMBER = ISD::BUILTIN_OP_END,
RET_GLUE,
@@ -54,9 +57,6 @@ enum NodeType : unsigned {
// Selected as PseudoAddTPRel. Used to emit a TP-relative relocation.
ADD_TPREL,
- // Load address.
- LA_TLS_GD,
-
// Multiply high for signedxunsigned.
MULHSU,
// RV64I shifts, directly matching the semantics of the named RISC-V
@@ -121,7 +121,7 @@ enum NodeType : unsigned {
// inserter.
FROUND,
- FPCLASS,
+ FCLASS,
// Floating point fmax and fmin matching the RISC-V instruction semantics.
FMAX, FMIN,
@@ -143,10 +143,11 @@ enum NodeType : unsigned {
SM3P0, SM3P1,
// Vector Extension
+ FIRST_VL_VECTOR_OP,
// VMV_V_V_VL matches the semantics of vmv.v.v but includes an extra operand
// for the VL value to be used for the operation. The first operand is
// passthru operand.
- VMV_V_V_VL,
+ VMV_V_V_VL = FIRST_VL_VECTOR_OP,
// VMV_V_X_VL matches the semantics of vmv.v.x but includes an extra operand
// for the VL value to be used for the operation. The first operand is
// passthru operand.
@@ -166,15 +167,13 @@ enum NodeType : unsigned {
// expanded late to two scalar stores and a stride 0 vector load.
// The first operand is passthru operand.
SPLAT_VECTOR_SPLIT_I64_VL,
- // Read VLENB CSR
- READ_VLENB,
// Truncates a RVV integer vector by one power-of-two. Carries both an extra
// mask and VL operand.
TRUNCATE_VECTOR_VL,
// Matches the semantics of vslideup/vslidedown. The first operand is the
- // pass-thru operand, the second is the source vector, the third is the
- // XLenVT index (either constant or non-constant), the fourth is the mask
- // and the fifth the VL.
+ // pass-thru operand, the second is the source vector, the third is the XLenVT
+ // index (either constant or non-constant), the fourth is the mask, the fifth
+ // is the VL and the sixth is the policy.
VSLIDEUP_VL,
VSLIDEDOWN_VL,
// Matches the semantics of vslide1up/slide1down. The first operand is
@@ -232,6 +231,8 @@ enum NodeType : unsigned {
SREM_VL,
SRA_VL,
SRL_VL,
+ ROTL_VL,
+ ROTR_VL,
SUB_VL,
UDIV_VL,
UREM_VL,
@@ -258,8 +259,8 @@ enum NodeType : unsigned {
FSUB_VL,
FMUL_VL,
FDIV_VL,
- FMINNUM_VL,
- FMAXNUM_VL,
+ VFMIN_VL,
+ VFMAX_VL,
// Vector unary ops with a mask as a second operand and VL as a third operand.
FNEG_VL,
@@ -307,6 +308,7 @@ enum NodeType : unsigned {
VWADDU_W_VL,
VWSUB_W_VL,
VWSUBU_W_VL,
+ VWSLL_VL,
VFWMUL_VL,
VFWADD_VL,
@@ -360,6 +362,10 @@ enum NodeType : unsigned {
// vfirst.m with additional mask and VL operands.
VFIRST_VL,
+ LAST_VL_VECTOR_OP = VFIRST_VL,
+
+ // Read VLENB CSR
+ READ_VLENB,
// Reads value of CSR.
// The first operand is a chain pointer. The second specifies address of the
// required CSR. Two results are produced, the read value and the new chain
@@ -405,22 +411,19 @@ enum NodeType : unsigned {
STRICT_FSETCC_VL,
STRICT_FSETCCS_VL,
STRICT_VFROUND_NOEXCEPT_VL,
+ LAST_RISCV_STRICTFP_OPCODE = STRICT_VFROUND_NOEXCEPT_VL,
// WARNING: Do not add anything in the end unless you want the node to
// have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
// opcodes will be thought as target memory ops!
- // Represents an AUIPC+L[WD] pair. Selected to PseudoLGA.
- LGA = ISD::FIRST_TARGET_MEMORY_OPCODE,
- // Load initial exec thread-local address.
- LA_TLS_IE,
-
- TH_LWD,
+ TH_LWD = ISD::FIRST_TARGET_MEMORY_OPCODE,
TH_LWUD,
TH_LDD,
TH_SWD,
TH_SDD,
};
+// clang-format on
} // namespace RISCVISD
class RISCVTargetLowering : public TargetLowering {
@@ -464,7 +467,7 @@ public:
SmallVectorImpl<Use *> &Ops) const override;
bool shouldScalarizeBinop(SDValue VecOp) const override;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
- int getLegalZfaFPImm(const APFloat &Imm, EVT VT) const;
+ std::pair<int, bool> getLegalZfaFPImm(const APFloat &Imm, EVT VT) const;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
@@ -487,6 +490,12 @@ public:
CallingConv::ID CC,
EVT VT) const override;
+ unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC, EVT VT,
+ EVT &IntermediateVT,
+ unsigned &NumIntermediates,
+ MVT &RegisterVT) const override;
+
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
EVT VT) const override;
@@ -514,6 +523,13 @@ public:
shouldExpandBuildVectorWithShuffles(EVT VT,
unsigned DefinedValues) const override;
+ /// Return the cost of LMUL for linear operations.
+ InstructionCost getLMULCost(MVT VT) const;
+
+ InstructionCost getVRGatherVVCost(MVT VT) const;
+ InstructionCost getVRGatherVICost(MVT VT) const;
+ InstructionCost getVSlideCost(MVT VT) const;
+
// Provide custom lowering hooks for some operations.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
@@ -552,13 +568,14 @@ public:
ConstraintType getConstraintType(StringRef Constraint) const override;
- unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override;
+ InlineAsm::ConstraintCode
+ getInlineAsmMemConstraint(StringRef ConstraintCode) const override;
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint, MVT VT) const override;
- void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
+ void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
@@ -592,6 +609,10 @@ public:
}
bool convertSelectOfConstantsToMath(EVT VT) const override { return true; }
+ bool isCtpopFast(EVT VT) const override;
+
+ unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override;
+
bool preferZeroCompareBranch() const override { return true; }
bool shouldInsertFencesForAtomic(const Instruction *I) const override {
@@ -698,6 +719,9 @@ public:
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
unsigned *Fast = nullptr) const override;
+ EVT getOptimalMemOpType(const MemOp &Op,
+ const AttributeList &FuncAttributes) const override;
+
bool splitValueIntoRegisterParts(
SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC)
@@ -720,7 +744,13 @@ public:
// The following equations have been reordered to prevent loss of precision
// when calculating fractional LMUL.
return ((VectorBits / EltSize) * MinSize) / RISCV::RVVBitsPerBlock;
- };
+ }
+
+ // Return inclusive (low, high) bounds on the value of VLMAX for the
+ // given scalable container type given known bounds on VLEN.
+ static std::pair<unsigned, unsigned>
+ computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget);
+
static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul);
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index);
static unsigned getRegClassIDForVecVT(MVT VT);
@@ -730,7 +760,7 @@ public:
const RISCVRegisterInfo *TRI);
MVT getContainerForFixedLengthVector(MVT VT) const;
- bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const override;
+ bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override;
bool isLegalElementTypeForRVV(EVT ScalarTy) const;
@@ -777,6 +807,8 @@ public:
unsigned getMaxSupportedInterleaveFactor() const override { return 8; }
+ bool fallBackToDAGISel(const Instruction &Inst) const override;
+
bool lowerInterleavedLoad(LoadInst *LI,
ArrayRef<ShuffleVectorInst *> Shuffles,
ArrayRef<unsigned> Indices,
@@ -874,14 +906,12 @@ private:
SelectionDAG &DAG) const;
SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerVPOp(SDValue Op, SelectionDAG &DAG, unsigned RISCVISDOpc,
- bool HasMergeOp = false) const;
- SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG, unsigned MaskOpc,
- unsigned VecOpc) const;
+ SDValue lowerVPOp(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPExtMaskOp(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPSetCCMaskOp(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG,
- unsigned RISCVISDOpc) const;
+ SDValue lowerVPReverseExperimental(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPStridedLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPStridedStore(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFixedLengthVectorExtendToRVV(SDValue Op, SelectionDAG &DAG,
@@ -935,6 +965,14 @@ private:
/// For available scheduling models FDIV + two independent FMULs are much
/// faster than two FDIVs.
unsigned combineRepeatedFPDivisors() const override;
+
+ SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const override;
+
+ bool shouldFoldSelectWithSingleBitTest(EVT VT,
+ const APInt &AndMask) const override;
+
+ unsigned getMinimumJumpTableEntries() const override;
};
namespace RISCV {
@@ -954,6 +992,9 @@ bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
CCState &State);
+
+ArrayRef<MCPhysReg> getArgGPRs();
+
} // end namespace RISCV
namespace RISCVVIntrinsicsTable {
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp
index 4b26c27bb4f8..b807abcc5681 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp
@@ -8,8 +8,9 @@
// This file implements the machine function pass to insert read/write of CSR-s
// of the RISC-V instructions.
//
-// Currently the pass implements naive insertion of a write to vxrm before an
-// RVV fixed-point instruction.
+// Currently the pass implements:
+// -Writing and saving frm before an RVV floating-point instruction with a
+// static rounding mode and restores the value after.
//
//===----------------------------------------------------------------------===//
@@ -30,9 +31,7 @@ class RISCVInsertReadWriteCSR : public MachineFunctionPass {
public:
static char ID;
- RISCVInsertReadWriteCSR() : MachineFunctionPass(ID) {
- initializeRISCVInsertReadWriteCSRPass(*PassRegistry::getPassRegistry());
- }
+ RISCVInsertReadWriteCSR() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -56,60 +55,36 @@ char RISCVInsertReadWriteCSR::ID = 0;
INITIALIZE_PASS(RISCVInsertReadWriteCSR, DEBUG_TYPE,
RISCV_INSERT_READ_WRITE_CSR_NAME, false, false)
-// Returns the index to the rounding mode immediate value if any, otherwise the
-// function will return None.
-static std::optional<unsigned> getRoundModeIdx(const MachineInstr &MI) {
- uint64_t TSFlags = MI.getDesc().TSFlags;
- if (!RISCVII::hasRoundModeOp(TSFlags))
- return std::nullopt;
-
- // The operand order
- // -------------------------------------
- // | n-1 (if any) | n-2 | n-3 | n-4 |
- // | policy | sew | vl | rm |
- // -------------------------------------
- return MI.getNumExplicitOperands() - RISCVII::hasVecPolicyOp(TSFlags) - 3;
-}
-
-// This function inserts a write to vxrm when encountering an RVV fixed-point
-// instruction.
+// This function also swaps frm and restores it when encountering an RVV
+// floating point instruction with a static rounding mode.
bool RISCVInsertReadWriteCSR::emitWriteRoundingMode(MachineBasicBlock &MBB) {
bool Changed = false;
for (MachineInstr &MI : MBB) {
- if (auto RoundModeIdx = getRoundModeIdx(MI)) {
- if (RISCVII::usesVXRM(MI.getDesc().TSFlags)) {
- unsigned VXRMImm = MI.getOperand(*RoundModeIdx).getImm();
-
- Changed = true;
-
- BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::WriteVXRMImm))
- .addImm(VXRMImm);
- MI.addOperand(MachineOperand::CreateReg(RISCV::VXRM, /*IsDef*/ false,
- /*IsImp*/ true));
- } else { // FRM
- unsigned FRMImm = MI.getOperand(*RoundModeIdx).getImm();
-
- // The value is a hint to this pass to not alter the frm value.
- if (FRMImm == RISCVFPRndMode::DYN)
- continue;
-
- Changed = true;
-
- // Save
- MachineRegisterInfo *MRI = &MBB.getParent()->getRegInfo();
- Register SavedFRM = MRI->createVirtualRegister(&RISCV::GPRRegClass);
- BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::SwapFRMImm),
- SavedFRM)
- .addImm(FRMImm);
- MI.addOperand(MachineOperand::CreateReg(RISCV::FRM, /*IsDef*/ false,
- /*IsImp*/ true));
- // Restore
- MachineInstrBuilder MIB =
- BuildMI(*MBB.getParent(), {}, TII->get(RISCV::WriteFRM))
- .addReg(SavedFRM);
- MBB.insertAfter(MI, MIB);
- }
- }
+ int FRMIdx = RISCVII::getFRMOpNum(MI.getDesc());
+ if (FRMIdx < 0)
+ continue;
+
+ unsigned FRMImm = MI.getOperand(FRMIdx).getImm();
+
+ // The value is a hint to this pass to not alter the frm value.
+ if (FRMImm == RISCVFPRndMode::DYN)
+ continue;
+
+ Changed = true;
+
+ // Save
+ MachineRegisterInfo *MRI = &MBB.getParent()->getRegInfo();
+ Register SavedFRM = MRI->createVirtualRegister(&RISCV::GPRRegClass);
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::SwapFRMImm),
+ SavedFRM)
+ .addImm(FRMImm);
+ MI.addOperand(MachineOperand::CreateReg(RISCV::FRM, /*IsDef*/ false,
+ /*IsImp*/ true));
+ // Restore
+ MachineInstrBuilder MIB =
+ BuildMI(*MBB.getParent(), {}, TII->get(RISCV::WriteFRM))
+ .addReg(SavedFRM);
+ MBB.insertAfter(MI, MIB);
}
return Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index f1ebe63cfa14..3400b24e0abb 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -67,16 +67,28 @@ static bool isVLPreservingConfig(const MachineInstr &MI) {
return RISCV::X0 == MI.getOperand(0).getReg();
}
-static uint16_t getRVVMCOpcode(uint16_t RVVPseudoOpcode) {
- const RISCVVPseudosTable::PseudoInfo *RVV =
- RISCVVPseudosTable::getPseudoInfo(RVVPseudoOpcode);
- if (!RVV)
- return 0;
- return RVV->BaseInstr;
+static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr &MI) {
+ switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
+ default:
+ return false;
+ case RISCV::VFMV_S_F:
+ case RISCV::VFMV_V_F:
+ return true;
+ }
+}
+
+static bool isScalarExtractInstr(const MachineInstr &MI) {
+ switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
+ default:
+ return false;
+ case RISCV::VMV_X_S:
+ case RISCV::VFMV_F_S:
+ return true;
+ }
}
-static bool isScalarMoveInstr(const MachineInstr &MI) {
- switch (getRVVMCOpcode(MI.getOpcode())) {
+static bool isScalarInsertInstr(const MachineInstr &MI) {
+ switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
default:
return false;
case RISCV::VMV_S_X:
@@ -86,7 +98,7 @@ static bool isScalarMoveInstr(const MachineInstr &MI) {
}
static bool isScalarSplatInstr(const MachineInstr &MI) {
- switch (getRVVMCOpcode(MI.getOpcode())) {
+ switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
default:
return false;
case RISCV::VMV_V_I:
@@ -97,7 +109,7 @@ static bool isScalarSplatInstr(const MachineInstr &MI) {
}
static bool isVSlideInstr(const MachineInstr &MI) {
- switch (getRVVMCOpcode(MI.getOpcode())) {
+ switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
default:
return false;
case RISCV::VSLIDEDOWN_VX:
@@ -111,7 +123,7 @@ static bool isVSlideInstr(const MachineInstr &MI) {
/// Get the EEW for a load or store instruction. Return std::nullopt if MI is
/// not a load or store which ignores SEW.
static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
- switch (getRVVMCOpcode(MI.getOpcode())) {
+ switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
default:
return std::nullopt;
case RISCV::VLE8_V:
@@ -137,6 +149,13 @@ static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
}
}
+static bool isNonZeroLoadImmediate(MachineInstr &MI) {
+ return MI.getOpcode() == RISCV::ADDI &&
+ MI.getOperand(1).isReg() && MI.getOperand(2).isImm() &&
+ MI.getOperand(1).getReg() == RISCV::X0 &&
+ MI.getOperand(2).getImm() != 0;
+}
+
/// Return true if this is an operation on mask registers. Note that
/// this includes both arithmetic/logical ops and load/store (vlm/vsm).
static bool isMaskRegOp(const MachineInstr &MI) {
@@ -160,9 +179,13 @@ static bool hasUndefinedMergeOp(const MachineInstr &MI,
// lanes are undefined.
return true;
- // If the tied operand is an IMPLICIT_DEF (or a REG_SEQUENCE whose operands
- // are solely IMPLICIT_DEFS), the pass through lanes are undefined.
+ // If the tied operand is NoReg, an IMPLICIT_DEF, or a REG_SEQEUENCE whose
+ // operands are solely IMPLICIT_DEFS, then the pass through lanes are
+ // undefined.
const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
+ if (UseMO.getReg() == RISCV::NoRegister)
+ return true;
+
if (MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg())) {
if (UseMI->isImplicitDef())
return true;
@@ -188,10 +211,14 @@ struct DemandedFields {
bool VLZeroness = false;
// What properties of SEW we need to preserve.
enum : uint8_t {
- SEWEqual = 2, // The exact value of SEW needs to be preserved.
- SEWGreaterThanOrEqual = 1, // SEW can be changed as long as it's greater
+ SEWEqual = 3, // The exact value of SEW needs to be preserved.
+ SEWGreaterThanOrEqual = 2, // SEW can be changed as long as it's greater
// than or equal to the original value.
- SEWNone = 0 // We don't need to preserve SEW at all.
+ SEWGreaterThanOrEqualAndLessThan64 =
+ 1, // SEW can be changed as long as it's greater
+ // than or equal to the original value, but must be less
+ // than 64.
+ SEWNone = 0 // We don't need to preserve SEW at all.
} SEW = SEWNone;
bool LMUL = false;
bool SEWLMULRatio = false;
@@ -243,6 +270,9 @@ struct DemandedFields {
case SEWGreaterThanOrEqual:
OS << "SEWGreaterThanOrEqual";
break;
+ case SEWGreaterThanOrEqualAndLessThan64:
+ OS << "SEWGreaterThanOrEqualAndLessThan64";
+ break;
case SEWNone:
OS << "SEWNone";
break;
@@ -270,13 +300,23 @@ inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) {
/// of instructions) which use only the Used subfields and properties.
static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
const DemandedFields &Used) {
- if (Used.SEW == DemandedFields::SEWEqual &&
- RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType))
- return false;
-
- if (Used.SEW == DemandedFields::SEWGreaterThanOrEqual &&
- RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType))
- return false;
+ switch (Used.SEW) {
+ case DemandedFields::SEWNone:
+ break;
+ case DemandedFields::SEWEqual:
+ if (RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType))
+ return false;
+ break;
+ case DemandedFields::SEWGreaterThanOrEqual:
+ if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType))
+ return false;
+ break;
+ case DemandedFields::SEWGreaterThanOrEqualAndLessThan64:
+ if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType) ||
+ RISCVVType::getSEW(NewVType) >= 64)
+ return false;
+ break;
+ }
if (Used.LMUL &&
RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType))
@@ -302,7 +342,8 @@ static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
/// Return the fields and properties demanded by the provided instruction.
DemandedFields getDemanded(const MachineInstr &MI,
- const MachineRegisterInfo *MRI) {
+ const MachineRegisterInfo *MRI,
+ const RISCVSubtarget *ST) {
// Warning: This function has to work on both the lowered (i.e. post
// emitVSETVLIs) and pre-lowering forms. The main implication of this is
// that it can't use the value of a SEW, VL, or Policy operand as they might
@@ -354,7 +395,7 @@ DemandedFields getDemanded(const MachineInstr &MI,
}
// For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.
- if (isScalarMoveInstr(MI)) {
+ if (isScalarInsertInstr(MI)) {
Res.LMUL = false;
Res.SEWLMULRatio = false;
Res.VLAny = false;
@@ -365,11 +406,23 @@ DemandedFields getDemanded(const MachineInstr &MI,
// tail lanes to either be the original value or -1. We are writing
// unknown bits to the lanes here.
if (hasUndefinedMergeOp(MI, *MRI)) {
- Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
+ if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
+ Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
+ else
+ Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
Res.TailPolicy = false;
}
}
+ // vmv.x.s, and vmv.f.s are unconditional and ignore everything except SEW.
+ if (isScalarExtractInstr(MI)) {
+ assert(!RISCVII::hasVLOp(TSFlags));
+ Res.LMUL = false;
+ Res.SEWLMULRatio = false;
+ Res.TailPolicy = false;
+ Res.MaskPolicy = false;
+ }
+
return Res;
}
@@ -431,8 +484,22 @@ public:
return AVLImm;
}
+ void setAVL(VSETVLIInfo Info) {
+ assert(Info.isValid());
+ if (Info.isUnknown())
+ setUnknown();
+ else if (Info.hasAVLReg())
+ setAVLReg(Info.getAVLReg());
+ else {
+ assert(Info.hasAVLImm());
+ setAVLImm(Info.getAVLImm());
+ }
+ }
+
unsigned getSEW() const { return SEW; }
RISCVII::VLMUL getVLMUL() const { return VLMul; }
+ bool getTailAgnostic() const { return TailAgnostic; }
+ bool getMaskAgnostic() const { return MaskAgnostic; }
bool hasNonZeroAVL(const MachineRegisterInfo &MRI) const {
if (hasAVLImm())
@@ -441,10 +508,7 @@ public:
if (getAVLReg() == RISCV::X0)
return true;
if (MachineInstr *MI = MRI.getVRegDef(getAVLReg());
- MI && MI->getOpcode() == RISCV::ADDI &&
- MI->getOperand(1).isReg() && MI->getOperand(2).isImm() &&
- MI->getOperand(1).getReg() == RISCV::X0 &&
- MI->getOperand(2).getImm() != 0)
+ MI && isNonZeroLoadImmediate(*MI))
return true;
return false;
}
@@ -485,6 +549,8 @@ public:
MaskAgnostic = MA;
}
+ void setVLMul(RISCVII::VLMUL VLMul) { this->VLMul = VLMul; }
+
unsigned encodeVTYPE() const {
assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
"Can't encode VTYPE for uninitialized or unknown");
@@ -545,12 +611,6 @@ public:
if (SEWLMULRatioOnly)
return false;
- // If the instruction doesn't need an AVLReg and the SEW matches, consider
- // it compatible.
- if (Require.hasAVLReg() && Require.AVLReg == RISCV::NoRegister)
- if (SEW == Require.SEW)
- return true;
-
if (Used.VLAny && !hasSameAVL(Require))
return false;
@@ -661,10 +721,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) {
#endif
struct BlockData {
- // The VSETVLIInfo that represents the net changes to the VL/VTYPE registers
- // made by this block. Calculated in Phase 1.
- VSETVLIInfo Change;
-
// The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
// block. Calculated in Phase 2.
VSETVLIInfo Exit;
@@ -680,6 +736,7 @@ struct BlockData {
};
class RISCVInsertVSETVLI : public MachineFunctionPass {
+ const RISCVSubtarget *ST;
const TargetInstrInfo *TII;
MachineRegisterInfo *MRI;
@@ -689,9 +746,7 @@ class RISCVInsertVSETVLI : public MachineFunctionPass {
public:
static char ID;
- RISCVInsertVSETVLI() : MachineFunctionPass(ID) {
- initializeRISCVInsertVSETVLIPass(*PassRegistry::getPassRegistry());
- }
+ RISCVInsertVSETVLI() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -712,9 +767,10 @@ private:
MachineBasicBlock::iterator InsertPt, DebugLoc DL,
const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
- void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI);
- void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI);
- bool computeVLVTYPEChanges(const MachineBasicBlock &MBB);
+ void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) const;
+ void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) const;
+ bool computeVLVTYPEChanges(const MachineBasicBlock &MBB,
+ VSETVLIInfo &Info) const;
void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
void emitVSETVLIs(MachineBasicBlock &MBB);
void doLocalPostpass(MachineBasicBlock &MBB);
@@ -729,6 +785,25 @@ char RISCVInsertVSETVLI::ID = 0;
INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
false, false)
+// Return a VSETVLIInfo representing the changes made by this VSETVLI or
+// VSETIVLI instruction.
+static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
+ VSETVLIInfo NewInfo;
+ if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
+ NewInfo.setAVLImm(MI.getOperand(1).getImm());
+ } else {
+ assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
+ MI.getOpcode() == RISCV::PseudoVSETVLIX0);
+ Register AVLReg = MI.getOperand(1).getReg();
+ assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
+ "Can't handle X0, X0 vsetvli yet");
+ NewInfo.setAVLReg(AVLReg);
+ }
+ NewInfo.setVTYPE(MI.getOperand(2).getImm());
+
+ return NewInfo;
+}
+
static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
const MachineRegisterInfo *MRI) {
VSETVLIInfo InstrInfo;
@@ -779,6 +854,7 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
InstrInfo.setAVLReg(VLOp.getReg());
}
} else {
+ assert(isScalarExtractInstr(MI));
InstrInfo.setAVLReg(RISCV::NoRegister);
}
#ifndef NDEBUG
@@ -788,6 +864,21 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
#endif
InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
+ // If AVL is defined by a vsetvli with the same VLMAX, we can replace the
+ // AVL operand with the AVL of the defining vsetvli. We avoid general
+ // register AVLs to avoid extending live ranges without being sure we can
+ // kill the original source reg entirely.
+ if (InstrInfo.hasAVLReg() && InstrInfo.getAVLReg().isVirtual()) {
+ MachineInstr *DefMI = MRI->getVRegDef(InstrInfo.getAVLReg());
+ if (DefMI && isVectorConfigInstr(*DefMI)) {
+ VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(*DefMI);
+ if (DefInstrInfo.hasSameVLMAX(InstrInfo) &&
+ (DefInstrInfo.hasAVLImm() || DefInstrInfo.getAVLReg() == RISCV::X0)) {
+ InstrInfo.setAVL(DefInstrInfo);
+ }
+ }
+ }
+
return InstrInfo;
}
@@ -798,25 +889,6 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo);
}
-// Return a VSETVLIInfo representing the changes made by this VSETVLI or
-// VSETIVLI instruction.
-static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
- VSETVLIInfo NewInfo;
- if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
- NewInfo.setAVLImm(MI.getOperand(1).getImm());
- } else {
- assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
- MI.getOpcode() == RISCV::PseudoVSETVLIX0);
- Register AVLReg = MI.getOperand(1).getReg();
- assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
- "Can't handle X0, X0 vsetvli yet");
- NewInfo.setAVLReg(AVLReg);
- }
- NewInfo.setVTYPE(MI.getOperand(2).getImm());
-
- return NewInfo;
-}
-
void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertPt, DebugLoc DL,
const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
@@ -875,10 +947,10 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
.addReg(RISCV::VL, RegState::Implicit);
return;
}
- // Otherwise use an AVL of 0 to avoid depending on previous vl.
+ // Otherwise use an AVL of 1 to avoid depending on previous vl.
BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
.addReg(RISCV::X0, RegState::Define | RegState::Dead)
- .addImm(0)
+ .addImm(1)
.addImm(Info.encodeVTYPE());
return;
}
@@ -916,7 +988,7 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
return true;
- DemandedFields Used = getDemanded(MI, MRI);
+ DemandedFields Used = getDemanded(MI, MRI, ST);
// A slidedown/slideup with an *undefined* merge op can freely clobber
// elements not copied from the source vector (e.g. masked off, tail, or
@@ -944,7 +1016,10 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
Used.LMUL = false;
Used.SEWLMULRatio = false;
Used.VLAny = false;
- Used.SEW = DemandedFields::SEWGreaterThanOrEqual;
+ if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
+ Used.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
+ else
+ Used.SEW = DemandedFields::SEWGreaterThanOrEqual;
Used.TailPolicy = false;
}
@@ -969,67 +1044,82 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
return true;
}
-// Given an incoming state reaching MI, modifies that state so that it is minimally
-// compatible with MI. The resulting state is guaranteed to be semantically legal
-// for MI, but may not be the state requested by MI.
-void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) {
+// If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
+// maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
+// places.
+static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo,
+ DemandedFields &Demanded) {
+ VSETVLIInfo Info = NewInfo;
+
+ if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() &&
+ !PrevInfo.isUnknown()) {
+ if (auto NewVLMul = RISCVVType::getSameRatioLMUL(
+ PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW()))
+ Info.setVLMul(*NewVLMul);
+ Demanded.LMUL = true;
+ }
+
+ return Info;
+}
+
+// Given an incoming state reaching MI, minimally modifies that state so that it
+// is compatible with MI. The resulting state is guaranteed to be semantically
+// legal for MI, but may not be the state requested by MI.
+void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
+ const MachineInstr &MI) const {
uint64_t TSFlags = MI.getDesc().TSFlags;
if (!RISCVII::hasSEWOp(TSFlags))
return;
const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI);
+ assert(NewInfo.isValid() && !NewInfo.isUnknown());
if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info))
return;
const VSETVLIInfo PrevInfo = Info;
- Info = NewInfo;
-
- if (!RISCVII::hasVLOp(TSFlags))
- return;
-
- // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and
- // VL > 0. We can discard the user requested AVL and just use the last
- // one if we can prove it equally zero. This removes a vsetvli entirely
- // if the types match or allows use of cheaper avl preserving variant
- // if VLMAX doesn't change. If VLMAX might change, we couldn't use
- // the 'vsetvli x0, x0, vtype" variant, so we avoid the transform to
- // prevent extending live range of an avl register operand.
+ if (!Info.isValid() || Info.isUnknown())
+ Info = NewInfo;
+
+ DemandedFields Demanded = getDemanded(MI, MRI, ST);
+ const VSETVLIInfo IncomingInfo = adjustIncoming(PrevInfo, NewInfo, Demanded);
+
+ // If MI only demands that VL has the same zeroness, we only need to set the
+ // AVL if the zeroness differs. This removes a vsetvli entirely if the types
+ // match or allows use of cheaper avl preserving variant if VLMAX doesn't
+ // change. If VLMAX might change, we couldn't use the 'vsetvli x0, x0, vtype"
+ // variant, so we avoid the transform to prevent extending live range of an
+ // avl register operand.
// TODO: We can probably relax this for immediates.
- if (isScalarMoveInstr(MI) && PrevInfo.isValid() &&
- PrevInfo.hasEquallyZeroAVL(Info, *MRI) &&
- Info.hasSameVLMAX(PrevInfo)) {
- if (PrevInfo.hasAVLImm())
- Info.setAVLImm(PrevInfo.getAVLImm());
- else
- Info.setAVLReg(PrevInfo.getAVLReg());
- return;
- }
-
- // If AVL is defined by a vsetvli with the same VLMAX, we can
- // replace the AVL operand with the AVL of the defining vsetvli.
- // We avoid general register AVLs to avoid extending live ranges
- // without being sure we can kill the original source reg entirely.
- if (!Info.hasAVLReg() || !Info.getAVLReg().isVirtual())
- return;
- MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg());
- if (!DefMI || !isVectorConfigInstr(*DefMI))
- return;
-
- VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
- if (DefInfo.hasSameVLMAX(Info) &&
- (DefInfo.hasAVLImm() || DefInfo.getAVLReg() == RISCV::X0)) {
- if (DefInfo.hasAVLImm())
- Info.setAVLImm(DefInfo.getAVLImm());
- else
- Info.setAVLReg(DefInfo.getAVLReg());
- return;
+ bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(PrevInfo, *MRI) &&
+ IncomingInfo.hasSameVLMAX(PrevInfo);
+ if (Demanded.VLAny || (Demanded.VLZeroness && !EquallyZero))
+ Info.setAVL(IncomingInfo);
+
+ Info.setVTYPE(
+ ((Demanded.LMUL || Demanded.SEWLMULRatio) ? IncomingInfo : Info)
+ .getVLMUL(),
+ ((Demanded.SEW || Demanded.SEWLMULRatio) ? IncomingInfo : Info).getSEW(),
+ // Prefer tail/mask agnostic since it can be relaxed to undisturbed later
+ // if needed.
+ (Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() ||
+ IncomingInfo.getTailAgnostic(),
+ (Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() ||
+ IncomingInfo.getMaskAgnostic());
+
+ // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep
+ // the AVL.
+ if (Info.hasSEWLMULRatioOnly()) {
+ VSETVLIInfo RatiolessInfo = IncomingInfo;
+ RatiolessInfo.setAVL(Info);
+ Info = RatiolessInfo;
}
}
// Given a state with which we evaluated MI (see transferBefore above for why
// this might be different that the state MI requested), modify the state to
// reflect the changes MI might make.
-void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) {
+void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info,
+ const MachineInstr &MI) const {
if (isVectorConfigInstr(MI)) {
Info = getInfoForVSETVLI(MI);
return;
@@ -1048,18 +1138,18 @@ void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info, const MachineInstr &MI
Info = VSETVLIInfo::getUnknown();
}
-bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) {
+bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB,
+ VSETVLIInfo &Info) const {
bool HadVectorOp = false;
- BlockData &BBInfo = BlockInfo[MBB.getNumber()];
- BBInfo.Change = BBInfo.Pred;
+ Info = BlockInfo[MBB.getNumber()].Pred;
for (const MachineInstr &MI : MBB) {
- transferBefore(BBInfo.Change, MI);
+ transferBefore(Info, MI);
if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags))
HadVectorOp = true;
- transferAfter(BBInfo.Change, MI);
+ transferAfter(Info, MI);
}
return HadVectorOp;
@@ -1098,8 +1188,8 @@ void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
// compatibility checks performed a blocks output state can change based on
// the input state. To cache, we'd have to add logic for finding
// never-compatible state changes.
- computeVLVTYPEChanges(MBB);
- VSETVLIInfo TmpStatus = BBInfo.Change;
+ VSETVLIInfo TmpStatus;
+ computeVLVTYPEChanges(MBB, TmpStatus);
// If the new exit value matches the old exit value, we don't need to revisit
// any blocks.
@@ -1205,9 +1295,20 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
if (RISCVII::hasVLOp(TSFlags)) {
MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
if (VLOp.isReg()) {
+ Register Reg = VLOp.getReg();
+ MachineInstr *VLOpDef = MRI->getVRegDef(Reg);
+
// Erase the AVL operand from the instruction.
VLOp.setReg(RISCV::NoRegister);
VLOp.setIsKill(false);
+
+ // If the AVL was an immediate > 31, then it would have been emitted
+ // as an ADDI. However, the ADDI might not have been used in the
+ // vsetvli, or a vsetvli might not have been emitted, so it may be
+ // dead now.
+ if (VLOpDef && TII->isAddImmediate(*VLOpDef, Reg) &&
+ MRI->use_nodbg_empty(Reg))
+ VLOpDef->eraseFromParent();
}
MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
/*isImp*/ true));
@@ -1251,36 +1352,12 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
}
}
-/// Return true if the VL value configured must be equal to the requested one.
-static bool hasFixedResult(const VSETVLIInfo &Info, const RISCVSubtarget &ST) {
- if (!Info.hasAVLImm())
- // VLMAX is always the same value.
- // TODO: Could extend to other registers by looking at the associated vreg
- // def placement.
- return RISCV::X0 == Info.getAVLReg();
-
- unsigned AVL = Info.getAVLImm();
- unsigned SEW = Info.getSEW();
- unsigned AVLInBits = AVL * SEW;
-
- unsigned LMul;
- bool Fractional;
- std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(Info.getVLMUL());
-
- if (Fractional)
- return ST.getRealMinVLen() / LMul >= AVLInBits;
- return ST.getRealMinVLen() * LMul >= AVLInBits;
-}
-
/// Perform simple partial redundancy elimination of the VSETVLI instructions
/// we're about to insert by looking for cases where we can PRE from the
/// beginning of one block to the end of one of its predecessors. Specifically,
/// this is geared to catch the common case of a fixed length vsetvl in a single
/// block loop when it could execute once in the preheader instead.
void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
- const MachineFunction &MF = *MBB.getParent();
- const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
-
if (!BlockInfo[MBB.getNumber()].Pred.isUnknown())
return;
@@ -1308,9 +1385,21 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
if (UnavailablePred->succ_size() != 1)
return;
- // If VL can be less than AVL, then we can't reduce the frequency of exec.
- if (!hasFixedResult(AvailableInfo, ST))
- return;
+ // If the AVL value is a register (other than our VLMAX sentinel),
+ // we need to prove the value is available at the point we're going
+ // to insert the vsetvli at.
+ if (AvailableInfo.hasAVLReg() && RISCV::X0 != AvailableInfo.getAVLReg()) {
+ MachineInstr *AVLDefMI = MRI->getVRegDef(AvailableInfo.getAVLReg());
+ if (!AVLDefMI)
+ return;
+ // This is an inline dominance check which covers the case of
+ // UnavailablePred being the preheader of a loop.
+ if (AVLDefMI->getParent() != UnavailablePred)
+ return;
+ for (auto &TermMI : UnavailablePred->terminators())
+ if (&TermMI == AVLDefMI)
+ return;
+ }
// Model the effect of changing the input state of the block MBB to
// AvailableInfo. We're looking for two issues here; one legality,
@@ -1370,9 +1459,16 @@ static void doUnion(DemandedFields &A, DemandedFields B) {
A.MaskPolicy |= B.MaskPolicy;
}
-static bool isNonZeroAVL(const MachineOperand &MO) {
- if (MO.isReg())
- return RISCV::X0 == MO.getReg();
+static bool isNonZeroAVL(const MachineOperand &MO,
+ const MachineRegisterInfo &MRI) {
+ if (MO.isReg()) {
+ if (MO.getReg() == RISCV::X0)
+ return true;
+ if (MachineInstr *MI = MRI.getVRegDef(MO.getReg());
+ MI && isNonZeroLoadImmediate(*MI))
+ return true;
+ return false;
+ }
assert(MO.isImm());
return 0 != MO.getImm();
}
@@ -1381,7 +1477,8 @@ static bool isNonZeroAVL(const MachineOperand &MO) {
// fields which would be observed.
static bool canMutatePriorConfig(const MachineInstr &PrevMI,
const MachineInstr &MI,
- const DemandedFields &Used) {
+ const DemandedFields &Used,
+ const MachineRegisterInfo &MRI) {
// If the VL values aren't equal, return false if either a) the former is
// demanded, or b) we can't rewrite the former to be the later for
// implementation reasons.
@@ -1389,29 +1486,21 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI,
if (Used.VLAny)
return false;
- // TODO: Requires more care in the mutation...
- if (isVLPreservingConfig(PrevMI))
- return false;
-
// We don't bother to handle the equally zero case here as it's largely
// uninteresting.
- if (Used.VLZeroness &&
- (!isNonZeroAVL(MI.getOperand(1)) ||
- !isNonZeroAVL(PrevMI.getOperand(1))))
- return false;
+ if (Used.VLZeroness) {
+ if (isVLPreservingConfig(PrevMI))
+ return false;
+ if (!isNonZeroAVL(MI.getOperand(1), MRI) ||
+ !isNonZeroAVL(PrevMI.getOperand(1), MRI))
+ return false;
+ }
// TODO: Track whether the register is defined between
// PrevMI and MI.
if (MI.getOperand(1).isReg() &&
RISCV::X0 != MI.getOperand(1).getReg())
return false;
-
- // TODO: We need to change the result register to allow this rewrite
- // without the result forming a vl preserving vsetvli which is not
- // a correct state merge.
- if (PrevMI.getOperand(0).getReg() == RISCV::X0 &&
- MI.getOperand(1).isReg())
- return false;
}
if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm())
@@ -1433,7 +1522,7 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
if (!isVectorConfigInstr(MI)) {
- doUnion(Used, getDemanded(MI, MRI));
+ doUnion(Used, getDemanded(MI, MRI, ST));
continue;
}
@@ -1447,25 +1536,32 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
ToDelete.push_back(&MI);
// Leave NextMI unchanged
continue;
- } else if (canMutatePriorConfig(MI, *NextMI, Used)) {
+ } else if (canMutatePriorConfig(MI, *NextMI, Used, *MRI)) {
if (!isVLPreservingConfig(*NextMI)) {
+ MI.getOperand(0).setReg(NextMI->getOperand(0).getReg());
+ MI.getOperand(0).setIsDead(false);
+ Register OldVLReg;
+ if (MI.getOperand(1).isReg())
+ OldVLReg = MI.getOperand(1).getReg();
if (NextMI->getOperand(1).isImm())
MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
else
MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false);
+ if (OldVLReg) {
+ MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
+ if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
+ MRI->use_nodbg_empty(OldVLReg))
+ VLOpDef->eraseFromParent();
+ }
MI.setDesc(NextMI->getDesc());
}
MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
- // Don't delete a vsetvli if its result might be used.
- Register NextVRefDef = NextMI->getOperand(0).getReg();
- if (NextVRefDef == RISCV::X0 ||
- (NextVRefDef.isVirtual() && MRI->use_nodbg_empty(NextVRefDef)))
- ToDelete.push_back(NextMI);
+ ToDelete.push_back(NextMI);
// fallthrough
}
}
NextMI = &MI;
- Used = getDemanded(MI, MRI);
+ Used = getDemanded(MI, MRI, ST);
}
for (auto *MI : ToDelete)
@@ -1488,13 +1584,13 @@ void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
// Skip if the vector extension is not enabled.
- const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
- if (!ST.hasVInstructions())
+ ST = &MF.getSubtarget<RISCVSubtarget>();
+ if (!ST->hasVInstructions())
return false;
LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n");
- TII = ST.getInstrInfo();
+ TII = ST->getInstrInfo();
MRI = &MF.getRegInfo();
assert(BlockInfo.empty() && "Expect empty block infos");
@@ -1504,10 +1600,11 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
// Phase 1 - determine how VL/VTYPE are affected by the each block.
for (const MachineBasicBlock &MBB : MF) {
- HaveVectorOp |= computeVLVTYPEChanges(MBB);
+ VSETVLIInfo TmpStatus;
+ HaveVectorOp |= computeVLVTYPEChanges(MBB, TmpStatus);
// Initial exit state is whatever change we found in the block.
BlockData &BBInfo = BlockInfo[MBB.getNumber()];
- BBInfo.Exit = BBInfo.Change;
+ BBInfo.Exit = TmpStatus;
LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB)
<< " is " << BBInfo.Exit << "\n");
@@ -1552,22 +1649,6 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF)
doLocalPostpass(MBB);
- // Once we're fully done rewriting all the instructions, do a final pass
- // through to check for VSETVLIs which write to an unused destination.
- // For the non X0, X0 variant, we can replace the destination register
- // with X0 to reduce register pressure. This is really a generic
- // optimization which can be applied to any dead def (TODO: generalize).
- for (MachineBasicBlock &MBB : MF) {
- for (MachineInstr &MI : MBB) {
- if (MI.getOpcode() == RISCV::PseudoVSETVLI ||
- MI.getOpcode() == RISCV::PseudoVSETIVLI) {
- Register VRegDef = MI.getOperand(0).getReg();
- if (VRegDef != RISCV::X0 && MRI->use_nodbg_empty(VRegDef))
- MI.getOperand(0).setReg(RISCV::X0);
- }
- }
- }
-
// Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
// of VLEFF/VLSEGFF.
for (MachineBasicBlock &MBB : MF)
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp
new file mode 100644
index 000000000000..de2227f82192
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp
@@ -0,0 +1,458 @@
+//===-- RISCVInsertWriteVXRM.cpp - Insert Write of RISC-V VXRM CSR --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass inserts writes to the VXRM CSR as needed by vector instructions.
+// Each instruction that uses VXRM carries an operand that contains its required
+// VXRM value. This pass tries to optimize placement to avoid redundant writes
+// to VXRM.
+//
+// This is done using 2 dataflow algorithms. The first is a forward data flow
+// to calculate where a VXRM value is available. The second is a backwards
+// dataflow to determine where a VXRM value is anticipated.
+//
+// Finally, we use the results of these two dataflows to insert VXRM writes
+// where a value is anticipated, but not available.
+//
+// FIXME: This pass does not split critical edges, so there can still be some
+// redundancy.
+//
+// FIXME: If we are willing to have writes that aren't always needed, we could
+// reduce the number of VXRM writes in some cases.
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/RISCVBaseInfo.h"
+#include "RISCV.h"
+#include "RISCVSubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include <queue>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-insert-write-vxrm"
+#define RISCV_INSERT_WRITE_VXRM_NAME "RISC-V Insert Write VXRM Pass"
+
+namespace {
+
+class VXRMInfo {
+ uint8_t VXRMImm = 0;
+
+ enum : uint8_t {
+ Uninitialized,
+ Static,
+ Unknown,
+ } State = Uninitialized;
+
+public:
+ VXRMInfo() {}
+
+ static VXRMInfo getUnknown() {
+ VXRMInfo Info;
+ Info.setUnknown();
+ return Info;
+ }
+
+ bool isValid() const { return State != Uninitialized; }
+ void setUnknown() { State = Unknown; }
+ bool isUnknown() const { return State == Unknown; }
+
+ bool isStatic() const { return State == Static; }
+
+ void setVXRMImm(unsigned Imm) {
+ assert(Imm <= 3 && "Unexpected VXRM value");
+ VXRMImm = Imm;
+ State = Static;
+ }
+ unsigned getVXRMImm() const {
+ assert(isStatic() && VXRMImm <= 3 && "Unexpected state");
+ return VXRMImm;
+ }
+
+ bool operator==(const VXRMInfo &Other) const {
+ // Uninitialized is only equal to another Uninitialized.
+ if (State != Other.State)
+ return false;
+
+ if (isStatic())
+ return VXRMImm == Other.VXRMImm;
+
+ assert((isValid() || isUnknown()) && "Unexpected state");
+ return true;
+ }
+
+ bool operator!=(const VXRMInfo &Other) const { return !(*this == Other); }
+
+ // Calculate the VXRMInfo visible to a block assuming this and Other are
+ // both predecessors.
+ VXRMInfo intersect(const VXRMInfo &Other) const {
+ // If the new value isn't valid, ignore it.
+ if (!Other.isValid())
+ return *this;
+
+ // If this value isn't valid, this must be the first predecessor, use it.
+ if (!isValid())
+ return Other;
+
+ // If either is unknown, the result is unknown.
+ if (isUnknown() || Other.isUnknown())
+ return VXRMInfo::getUnknown();
+
+ // If we have an exact match, return this.
+ if (*this == Other)
+ return *this;
+
+ // Otherwise the result is unknown.
+ return VXRMInfo::getUnknown();
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Support for debugging, callable in GDB: V->dump()
+ LLVM_DUMP_METHOD void dump() const {
+ print(dbgs());
+ dbgs() << "\n";
+ }
+
+ void print(raw_ostream &OS) const {
+ OS << '{';
+ if (!isValid())
+ OS << "Uninitialized";
+ else if (isUnknown())
+ OS << "Unknown";
+ else
+ OS << getVXRMImm();
+ OS << '}';
+ }
+#endif
+};
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_ATTRIBUTE_USED
+inline raw_ostream &operator<<(raw_ostream &OS, const VXRMInfo &V) {
+ V.print(OS);
+ return OS;
+}
+#endif
+
+struct BlockData {
+ // Indicates if the block uses VXRM. Uninitialized means no use.
+ VXRMInfo VXRMUse;
+
+ // Indicates the VXRM output from the block. Unitialized means transparent.
+ VXRMInfo VXRMOut;
+
+ // Keeps track of the available VXRM value at the start of the basic bloc.
+ VXRMInfo AvailableIn;
+
+ // Keeps track of the available VXRM value at the end of the basic block.
+ VXRMInfo AvailableOut;
+
+ // Keeps track of what VXRM is anticipated at the start of the basic block.
+ VXRMInfo AnticipatedIn;
+
+ // Keeps track of what VXRM is anticipated at the end of the basic block.
+ VXRMInfo AnticipatedOut;
+
+ // Keeps track of whether the block is already in the queue.
+ bool InQueue;
+
+ BlockData() = default;
+};
+
+class RISCVInsertWriteVXRM : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+
+ std::vector<BlockData> BlockInfo;
+ std::queue<const MachineBasicBlock *> WorkList;
+
+public:
+ static char ID;
+
+ RISCVInsertWriteVXRM() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ StringRef getPassName() const override {
+ return RISCV_INSERT_WRITE_VXRM_NAME;
+ }
+
+private:
+ bool computeVXRMChanges(const MachineBasicBlock &MBB);
+ void computeAvailable(const MachineBasicBlock &MBB);
+ void computeAnticipated(const MachineBasicBlock &MBB);
+ void emitWriteVXRM(MachineBasicBlock &MBB);
+};
+
+} // end anonymous namespace
+
+char RISCVInsertWriteVXRM::ID = 0;
+
+INITIALIZE_PASS(RISCVInsertWriteVXRM, DEBUG_TYPE, RISCV_INSERT_WRITE_VXRM_NAME,
+ false, false)
+
+bool RISCVInsertWriteVXRM::computeVXRMChanges(const MachineBasicBlock &MBB) {
+ BlockData &BBInfo = BlockInfo[MBB.getNumber()];
+
+ bool NeedVXRMWrite = false;
+ for (const MachineInstr &MI : MBB) {
+ int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc());
+ if (VXRMIdx >= 0) {
+ unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm();
+
+ if (!BBInfo.VXRMUse.isValid())
+ BBInfo.VXRMUse.setVXRMImm(NewVXRMImm);
+
+ BBInfo.VXRMOut.setVXRMImm(NewVXRMImm);
+ NeedVXRMWrite = true;
+ continue;
+ }
+
+ if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VXRM)) {
+ if (!BBInfo.VXRMUse.isValid())
+ BBInfo.VXRMUse.setUnknown();
+
+ BBInfo.VXRMOut.setUnknown();
+ }
+ }
+
+ return NeedVXRMWrite;
+}
+
+void RISCVInsertWriteVXRM::computeAvailable(const MachineBasicBlock &MBB) {
+ BlockData &BBInfo = BlockInfo[MBB.getNumber()];
+
+ BBInfo.InQueue = false;
+
+ VXRMInfo Available;
+ if (MBB.pred_empty()) {
+ Available.setUnknown();
+ } else {
+ for (const MachineBasicBlock *P : MBB.predecessors())
+ Available = Available.intersect(BlockInfo[P->getNumber()].AvailableOut);
+ }
+
+ // If we don't have any valid available info, wait until we do.
+ if (!Available.isValid())
+ return;
+
+ if (Available != BBInfo.AvailableIn) {
+ BBInfo.AvailableIn = Available;
+ LLVM_DEBUG(dbgs() << "AvailableIn state of " << printMBBReference(MBB)
+ << " changed to " << BBInfo.AvailableIn << "\n");
+ }
+
+ if (BBInfo.VXRMOut.isValid())
+ Available = BBInfo.VXRMOut;
+
+ if (Available == BBInfo.AvailableOut)
+ return;
+
+ BBInfo.AvailableOut = Available;
+ LLVM_DEBUG(dbgs() << "AvailableOut state of " << printMBBReference(MBB)
+ << " changed to " << BBInfo.AvailableOut << "\n");
+
+ // Add the successors to the work list so that we can propagate.
+ for (MachineBasicBlock *S : MBB.successors()) {
+ if (!BlockInfo[S->getNumber()].InQueue) {
+ BlockInfo[S->getNumber()].InQueue = true;
+ WorkList.push(S);
+ }
+ }
+}
+
+void RISCVInsertWriteVXRM::computeAnticipated(const MachineBasicBlock &MBB) {
+ BlockData &BBInfo = BlockInfo[MBB.getNumber()];
+
+ BBInfo.InQueue = false;
+
+ VXRMInfo Anticipated;
+ if (MBB.succ_empty()) {
+ Anticipated.setUnknown();
+ } else {
+ for (const MachineBasicBlock *S : MBB.successors())
+ Anticipated =
+ Anticipated.intersect(BlockInfo[S->getNumber()].AnticipatedIn);
+ }
+
+ // If we don't have any valid anticipated info, wait until we do.
+ if (!Anticipated.isValid())
+ return;
+
+ if (Anticipated != BBInfo.AnticipatedOut) {
+ BBInfo.AnticipatedOut = Anticipated;
+ LLVM_DEBUG(dbgs() << "AnticipatedOut state of " << printMBBReference(MBB)
+ << " changed to " << BBInfo.AnticipatedOut << "\n");
+ }
+
+ // If this block reads VXRM, copy it.
+ if (BBInfo.VXRMUse.isValid())
+ Anticipated = BBInfo.VXRMUse;
+
+ if (Anticipated == BBInfo.AnticipatedIn)
+ return;
+
+ BBInfo.AnticipatedIn = Anticipated;
+ LLVM_DEBUG(dbgs() << "AnticipatedIn state of " << printMBBReference(MBB)
+ << " changed to " << BBInfo.AnticipatedIn << "\n");
+
+ // Add the predecessors to the work list so that we can propagate.
+ for (MachineBasicBlock *P : MBB.predecessors()) {
+ if (!BlockInfo[P->getNumber()].InQueue) {
+ BlockInfo[P->getNumber()].InQueue = true;
+ WorkList.push(P);
+ }
+ }
+}
+
+void RISCVInsertWriteVXRM::emitWriteVXRM(MachineBasicBlock &MBB) {
+ const BlockData &BBInfo = BlockInfo[MBB.getNumber()];
+
+ VXRMInfo Info = BBInfo.AvailableIn;
+
+ // Flag to indicates we need to insert a VXRM write. We want to delay it as
+ // late as possible in this block.
+ bool PendingInsert = false;
+
+ // Insert VXRM write if anticipated and not available.
+ if (BBInfo.AnticipatedIn.isStatic()) {
+ // If this is the entry block and the value is anticipated, insert.
+ if (MBB.isEntryBlock()) {
+ PendingInsert = true;
+ } else {
+ // Search for any predecessors that wouldn't satisfy our requirement and
+ // insert a write VXRM if needed.
+ // NOTE: If one predecessor is able to provide the requirement, but
+ // another isn't, it means we have a critical edge. The better placement
+ // would be to split the critical edge.
+ for (MachineBasicBlock *P : MBB.predecessors()) {
+ const BlockData &PInfo = BlockInfo[P->getNumber()];
+ // If it's available out of the predecessor, then we're ok.
+ if (PInfo.AvailableOut.isStatic() &&
+ PInfo.AvailableOut.getVXRMImm() ==
+ BBInfo.AnticipatedIn.getVXRMImm())
+ continue;
+ // If the predecessor anticipates this value for all its succesors,
+ // then a write to VXRM would have already occured before this block is
+ // executed.
+ if (PInfo.AnticipatedOut.isStatic() &&
+ PInfo.AnticipatedOut.getVXRMImm() ==
+ BBInfo.AnticipatedIn.getVXRMImm())
+ continue;
+ PendingInsert = true;
+ break;
+ }
+ }
+
+ Info = BBInfo.AnticipatedIn;
+ }
+
+ for (MachineInstr &MI : MBB) {
+ int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc());
+ if (VXRMIdx >= 0) {
+ unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm();
+
+ if (PendingInsert || !Info.isStatic() ||
+ Info.getVXRMImm() != NewVXRMImm) {
+ assert((!PendingInsert ||
+ (Info.isStatic() && Info.getVXRMImm() == NewVXRMImm)) &&
+ "Pending VXRM insertion mismatch");
+ LLVM_DEBUG(dbgs() << "Inserting before "; MI.print(dbgs()));
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::WriteVXRMImm))
+ .addImm(NewVXRMImm);
+ PendingInsert = false;
+ }
+
+ MI.addOperand(MachineOperand::CreateReg(RISCV::VXRM, /*IsDef*/ false,
+ /*IsImp*/ true));
+ Info.setVXRMImm(NewVXRMImm);
+ continue;
+ }
+
+ if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VXRM))
+ Info.setUnknown();
+ }
+
+ // If all our successors anticipate a value, do the insert.
+ // NOTE: It's possible that not all predecessors of our successor provide the
+ // correct value. This can occur on critical edges. If we don't split the
+ // critical edge we'll also have a write vxrm in the succesor that is
+ // redundant with this one.
+ if (PendingInsert ||
+ (BBInfo.AnticipatedOut.isStatic() &&
+ (!Info.isStatic() ||
+ Info.getVXRMImm() != BBInfo.AnticipatedOut.getVXRMImm()))) {
+ assert((!PendingInsert ||
+ (Info.isStatic() && BBInfo.AnticipatedOut.isStatic() &&
+ Info.getVXRMImm() == BBInfo.AnticipatedOut.getVXRMImm())) &&
+ "Pending VXRM insertion mismatch");
+ LLVM_DEBUG(dbgs() << "Inserting at end of " << printMBBReference(MBB)
+ << " changing to " << BBInfo.AnticipatedOut << "\n");
+ BuildMI(MBB, MBB.getFirstTerminator(), DebugLoc(),
+ TII->get(RISCV::WriteVXRMImm))
+ .addImm(BBInfo.AnticipatedOut.getVXRMImm());
+ }
+}
+
+bool RISCVInsertWriteVXRM::runOnMachineFunction(MachineFunction &MF) {
+ // Skip if the vector extension is not enabled.
+ const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
+ if (!ST.hasVInstructions())
+ return false;
+
+ TII = ST.getInstrInfo();
+
+ assert(BlockInfo.empty() && "Expect empty block infos");
+ BlockInfo.resize(MF.getNumBlockIDs());
+
+ // Phase 1 - collect block information.
+ bool NeedVXRMChange = false;
+ for (const MachineBasicBlock &MBB : MF)
+ NeedVXRMChange |= computeVXRMChanges(MBB);
+
+ if (!NeedVXRMChange) {
+ BlockInfo.clear();
+ return false;
+ }
+
+ // Phase 2 - Compute available VXRM using a forward walk.
+ for (const MachineBasicBlock &MBB : MF) {
+ WorkList.push(&MBB);
+ BlockInfo[MBB.getNumber()].InQueue = true;
+ }
+ while (!WorkList.empty()) {
+ const MachineBasicBlock &MBB = *WorkList.front();
+ WorkList.pop();
+ computeAvailable(MBB);
+ }
+
+ // Phase 3 - Compute anticipated VXRM using a backwards walk.
+ for (const MachineBasicBlock &MBB : llvm::reverse(MF)) {
+ WorkList.push(&MBB);
+ BlockInfo[MBB.getNumber()].InQueue = true;
+ }
+ while (!WorkList.empty()) {
+ const MachineBasicBlock &MBB = *WorkList.front();
+ WorkList.pop();
+ computeAnticipated(MBB);
+ }
+
+ // Phase 4 - Emit VXRM writes at the earliest place possible.
+ for (MachineBasicBlock &MBB : MF)
+ emitWriteVXRM(MBB);
+
+ BlockInfo.clear();
+
+ return true;
+}
+
+FunctionPass *llvm::createRISCVInsertWriteVXRMPass() {
+ return new RISCVInsertWriteVXRM();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td
index 504952b6bd2f..e80ba26800a1 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td
@@ -241,8 +241,8 @@ class PseudoQuietFCMP<DAGOperand Ty>
}
// Pseudo load instructions.
-class PseudoLoad<string opcodestr, RegisterClass rdty = GPR>
- : Pseudo<(outs rdty:$rd), (ins bare_symbol:$addr), [], opcodestr, "$rd, $addr"> {
+class PseudoLoad<string opcodestr>
+ : Pseudo<(outs GPR:$rd), (ins bare_symbol:$addr), [], opcodestr, "$rd, $addr"> {
let hasSideEffects = 0;
let mayLoad = 1;
let mayStore = 0;
@@ -250,7 +250,7 @@ class PseudoLoad<string opcodestr, RegisterClass rdty = GPR>
let isAsmParserOnly = 1;
}
-class PseudoFloatLoad<string opcodestr, RegisterClass rdty = GPR>
+class PseudoFloatLoad<string opcodestr, RegisterClass rdty>
: Pseudo<(outs GPR:$tmp, rdty:$rd), (ins bare_symbol:$addr), [], opcodestr, "$rd, $addr, $tmp"> {
let hasSideEffects = 0;
let mayLoad = 1;
@@ -270,20 +270,51 @@ class PseudoStore<string opcodestr, RegisterClass rsty = GPR>
}
// Instruction formats are listed in the order they appear in the RISC-V
-// instruction set manual (R, I, S, B, U, J) with sub-formats (e.g. RVInstR4,
-// RVInstRAtomic) sorted alphabetically.
+// instruction set manual (R, R4, I, S, B, U, J).
+
+// Common base class for R format instructions. Bits {31-25} should be set by
+// the subclasses.
+class RVInstRBase<bits<3> funct3, RISCVOpcode opcode, dag outs,
+ dag ins, string opcodestr, string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> {
+ bits<5> rs2;
+ bits<5> rs1;
+ bits<5> rd;
+
+ let Inst{24-20} = rs2;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = funct3;
+ let Inst{11-7} = rd;
+ let Inst{6-0} = opcode.Value;
+}
class RVInstR<bits<7> funct7, bits<3> funct3, RISCVOpcode opcode, dag outs,
dag ins, string opcodestr, string argstr>
+ : RVInstRBase<funct3, opcode, outs, ins, opcodestr, argstr> {
+ let Inst{31-25} = funct7;
+}
+
+class RVInstRAtomic<bits<5> funct5, bit aq, bit rl, bits<3> funct3,
+ RISCVOpcode opcode, dag outs, dag ins, string opcodestr,
+ string argstr>
+ : RVInstRBase<funct3, opcode, outs, ins, opcodestr, argstr> {
+ let Inst{31-27} = funct5;
+ let Inst{26} = aq;
+ let Inst{25} = rl;
+}
+
+class RVInstRFrm<bits<7> funct7, RISCVOpcode opcode, dag outs, dag ins,
+ string opcodestr, string argstr>
: RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> {
bits<5> rs2;
bits<5> rs1;
+ bits<3> frm;
bits<5> rd;
let Inst{31-25} = funct7;
let Inst{24-20} = rs2;
let Inst{19-15} = rs1;
- let Inst{14-12} = funct3;
+ let Inst{14-12} = frm;
let Inst{11-7} = rd;
let Inst{6-0} = opcode.Value;
}
@@ -323,83 +354,51 @@ class RVInstR4Frm<bits<2> funct2, RISCVOpcode opcode, dag outs, dag ins,
let Inst{6-0} = opcode.Value;
}
-class RVInstRAtomic<bits<5> funct5, bit aq, bit rl, bits<3> funct3,
- RISCVOpcode opcode, dag outs, dag ins, string opcodestr,
- string argstr>
- : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> {
- bits<5> rs2;
+// Common base class for I format instructions. Bits {31-20} should be set by
+// the subclasses.
+class RVInstIBase<bits<3> funct3, RISCVOpcode opcode, dag outs, dag ins,
+ string opcodestr, string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> {
bits<5> rs1;
bits<5> rd;
- let Inst{31-27} = funct5;
- let Inst{26} = aq;
- let Inst{25} = rl;
- let Inst{24-20} = rs2;
let Inst{19-15} = rs1;
let Inst{14-12} = funct3;
let Inst{11-7} = rd;
let Inst{6-0} = opcode.Value;
}
-class RVInstRFrm<bits<7> funct7, RISCVOpcode opcode, dag outs, dag ins,
- string opcodestr, string argstr>
- : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> {
- bits<5> rs2;
- bits<5> rs1;
- bits<3> frm;
- bits<5> rd;
-
- let Inst{31-25} = funct7;
- let Inst{24-20} = rs2;
- let Inst{19-15} = rs1;
- let Inst{14-12} = frm;
- let Inst{11-7} = rd;
- let Inst{6-0} = opcode.Value;
-}
-
class RVInstI<bits<3> funct3, RISCVOpcode opcode, dag outs, dag ins,
string opcodestr, string argstr>
- : RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> {
+ : RVInstIBase<funct3, opcode, outs, ins, opcodestr, argstr> {
bits<12> imm12;
- bits<5> rs1;
- bits<5> rd;
let Inst{31-20} = imm12;
- let Inst{19-15} = rs1;
- let Inst{14-12} = funct3;
- let Inst{11-7} = rd;
- let Inst{6-0} = opcode.Value;
}
class RVInstIShift<bits<5> imm11_7, bits<3> funct3, RISCVOpcode opcode,
dag outs, dag ins, string opcodestr, string argstr>
- : RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> {
+ : RVInstIBase<funct3, opcode, outs, ins, opcodestr, argstr> {
bits<6> shamt;
- bits<5> rs1;
- bits<5> rd;
let Inst{31-27} = imm11_7;
let Inst{26} = 0;
let Inst{25-20} = shamt;
- let Inst{19-15} = rs1;
- let Inst{14-12} = funct3;
- let Inst{11-7} = rd;
- let Inst{6-0} = opcode.Value;
}
class RVInstIShiftW<bits<7> imm11_5, bits<3> funct3, RISCVOpcode opcode,
dag outs, dag ins, string opcodestr, string argstr>
- : RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> {
+ : RVInstIBase<funct3, opcode, outs, ins, opcodestr, argstr> {
bits<5> shamt;
- bits<5> rs1;
- bits<5> rd;
let Inst{31-25} = imm11_5;
let Inst{24-20} = shamt;
- let Inst{19-15} = rs1;
- let Inst{14-12} = funct3;
- let Inst{11-7} = rd;
- let Inst{6-0} = opcode.Value;
+}
+
+class RVInstIUnary<bits<12> imm12, bits<3> funct3, RISCVOpcode opcode,
+ dag outs, dag ins, string opcodestr, string argstr>
+ : RVInstIBase<funct3, opcode, outs, ins, opcodestr, argstr> {
+ let Inst{31-20} = imm12;
}
class RVInstS<bits<3> funct3, RISCVOpcode opcode, dag outs, dag ins,
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrGISel.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrGISel.td
new file mode 100644
index 000000000000..ede8c9809833
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrGISel.td
@@ -0,0 +1,26 @@
+//===-- RISCVInstrGISel.td - RISC-V GISel target pseudos ----*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+// RISC-V GlobalISel target pseudo instruction definitions. This is kept
+// separately from the other tablegen files for organizational purposes, but
+// share the same infrastructure.
+//
+//===----------------------------------------------------------------------===//
+
+class RISCVGenericInstruction : GenericInstruction {
+ let Namespace = "RISCV";
+}
+
+// Pseudo equivalent to a RISCVISD::FCLASS.
+def G_FCLASS : RISCVGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type1:$src);
+ let hasSideEffects = false;
+}
+def : GINodeEquiv<G_FCLASS, riscv_fclass>;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index c1065f73000f..1dcff7eb563e 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineCombinerPattern.h"
@@ -27,6 +28,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineTraceMetrics.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/TargetRegistry.h"
@@ -293,6 +295,112 @@ static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI,
return false;
}
+void RISCVInstrInfo::copyPhysRegVector(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, MCRegister DstReg,
+ MCRegister SrcReg, bool KillSrc,
+ unsigned Opc, unsigned NF) const {
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+
+ RISCVII::VLMUL LMul;
+ unsigned SubRegIdx;
+ unsigned VVOpc, VIOpc;
+ switch (Opc) {
+ default:
+ llvm_unreachable("Impossible LMUL for vector register copy.");
+ case RISCV::VMV1R_V:
+ LMul = RISCVII::LMUL_1;
+ SubRegIdx = RISCV::sub_vrm1_0;
+ VVOpc = RISCV::PseudoVMV_V_V_M1;
+ VIOpc = RISCV::PseudoVMV_V_I_M1;
+ break;
+ case RISCV::VMV2R_V:
+ LMul = RISCVII::LMUL_2;
+ SubRegIdx = RISCV::sub_vrm2_0;
+ VVOpc = RISCV::PseudoVMV_V_V_M2;
+ VIOpc = RISCV::PseudoVMV_V_I_M2;
+ break;
+ case RISCV::VMV4R_V:
+ LMul = RISCVII::LMUL_4;
+ SubRegIdx = RISCV::sub_vrm4_0;
+ VVOpc = RISCV::PseudoVMV_V_V_M4;
+ VIOpc = RISCV::PseudoVMV_V_I_M4;
+ break;
+ case RISCV::VMV8R_V:
+ assert(NF == 1);
+ LMul = RISCVII::LMUL_8;
+ SubRegIdx = RISCV::sub_vrm1_0; // There is no sub_vrm8_0.
+ VVOpc = RISCV::PseudoVMV_V_V_M8;
+ VIOpc = RISCV::PseudoVMV_V_I_M8;
+ break;
+ }
+
+ bool UseVMV_V_V = false;
+ bool UseVMV_V_I = false;
+ MachineBasicBlock::const_iterator DefMBBI;
+ if (isConvertibleToVMV_V_V(STI, MBB, MBBI, DefMBBI, LMul)) {
+ UseVMV_V_V = true;
+ Opc = VVOpc;
+
+ if (DefMBBI->getOpcode() == VIOpc) {
+ UseVMV_V_I = true;
+ Opc = VIOpc;
+ }
+ }
+
+ if (NF == 1) {
+ auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), DstReg);
+ if (UseVMV_V_V)
+ MIB.addReg(DstReg, RegState::Undef);
+ if (UseVMV_V_I)
+ MIB = MIB.add(DefMBBI->getOperand(2));
+ else
+ MIB = MIB.addReg(SrcReg, getKillRegState(KillSrc));
+ if (UseVMV_V_V) {
+ const MCInstrDesc &Desc = DefMBBI->getDesc();
+ MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL
+ MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW
+ MIB.addImm(0); // tu, mu
+ MIB.addReg(RISCV::VL, RegState::Implicit);
+ MIB.addReg(RISCV::VTYPE, RegState::Implicit);
+ }
+ return;
+ }
+
+ int I = 0, End = NF, Incr = 1;
+ unsigned SrcEncoding = TRI->getEncodingValue(SrcReg);
+ unsigned DstEncoding = TRI->getEncodingValue(DstReg);
+ unsigned LMulVal;
+ bool Fractional;
+ std::tie(LMulVal, Fractional) = RISCVVType::decodeVLMUL(LMul);
+ assert(!Fractional && "It is impossible be fractional lmul here.");
+ if (forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NF * LMulVal)) {
+ I = NF - 1;
+ End = -1;
+ Incr = -1;
+ }
+
+ for (; I != End; I += Incr) {
+ auto MIB =
+ BuildMI(MBB, MBBI, DL, get(Opc), TRI->getSubReg(DstReg, SubRegIdx + I));
+ if (UseVMV_V_V)
+ MIB.addReg(TRI->getSubReg(DstReg, SubRegIdx + I), RegState::Undef);
+ if (UseVMV_V_I)
+ MIB = MIB.add(DefMBBI->getOperand(2));
+ else
+ MIB = MIB.addReg(TRI->getSubReg(SrcReg, SubRegIdx + I),
+ getKillRegState(KillSrc));
+ if (UseVMV_V_V) {
+ const MCInstrDesc &Desc = DefMBBI->getDesc();
+ MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL
+ MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW
+ MIB.addImm(0); // tu, mu
+ MIB.addReg(RISCV::VL, RegState::Implicit);
+ MIB.addReg(RISCV::VTYPE, RegState::Implicit);
+ }
+ }
+}
+
void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, MCRegister DstReg,
@@ -329,195 +437,159 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
- // FPR->FPR copies and VR->VR copies.
- unsigned Opc;
- bool IsScalableVector = true;
- unsigned NF = 1;
- RISCVII::VLMUL LMul = RISCVII::LMUL_1;
- unsigned SubRegIdx = RISCV::sub_vrm1_0;
if (RISCV::FPR16RegClass.contains(DstReg, SrcReg)) {
+ unsigned Opc;
if (STI.hasStdExtZfh()) {
Opc = RISCV::FSGNJ_H;
} else {
- assert(STI.hasStdExtF() && STI.hasStdExtZfhmin() &&
+ assert(STI.hasStdExtF() &&
+ (STI.hasStdExtZfhmin() || STI.hasStdExtZfbfmin()) &&
"Unexpected extensions");
- // Zfhmin subset doesn't have FSGNJ_H, replaces FSGNJ_H with FSGNJ_S.
+ // Zfhmin/Zfbfmin doesn't have FSGNJ_H, replace FSGNJ_H with FSGNJ_S.
DstReg = TRI->getMatchingSuperReg(DstReg, RISCV::sub_16,
&RISCV::FPR32RegClass);
SrcReg = TRI->getMatchingSuperReg(SrcReg, RISCV::sub_16,
&RISCV::FPR32RegClass);
Opc = RISCV::FSGNJ_S;
}
- IsScalableVector = false;
- } else if (RISCV::FPR32RegClass.contains(DstReg, SrcReg)) {
- Opc = RISCV::FSGNJ_S;
- IsScalableVector = false;
- } else if (RISCV::FPR64RegClass.contains(DstReg, SrcReg)) {
- Opc = RISCV::FSGNJ_D;
- IsScalableVector = false;
- } else if (RISCV::VRRegClass.contains(DstReg, SrcReg)) {
- Opc = RISCV::VMV1R_V;
- LMul = RISCVII::LMUL_1;
- } else if (RISCV::VRM2RegClass.contains(DstReg, SrcReg)) {
- Opc = RISCV::VMV2R_V;
- LMul = RISCVII::LMUL_2;
- } else if (RISCV::VRM4RegClass.contains(DstReg, SrcReg)) {
- Opc = RISCV::VMV4R_V;
- LMul = RISCVII::LMUL_4;
- } else if (RISCV::VRM8RegClass.contains(DstReg, SrcReg)) {
- Opc = RISCV::VMV8R_V;
- LMul = RISCVII::LMUL_8;
- } else if (RISCV::VRN2M1RegClass.contains(DstReg, SrcReg)) {
- Opc = RISCV::VMV1R_V;
- SubRegIdx = RISCV::sub_vrm1_0;
- NF = 2;
- LMul = RISCVII::LMUL_1;
- } else if (RISCV::VRN2M2RegClass.contains(DstReg, SrcReg)) {
- Opc = RISCV::VMV2R_V;
- SubRegIdx = RISCV::sub_vrm2_0;
- NF = 2;
- LMul = RISCVII::LMUL_2;
- } else if (RISCV::VRN2M4RegClass.contains(DstReg, SrcReg)) {
- Opc = RISCV::VMV4R_V;
- SubRegIdx = RISCV::sub_vrm4_0;
- NF = 2;
- LMul = RISCVII::LMUL_4;
- } else if (RISCV::VRN3M1RegClass.contains(DstReg, SrcReg)) {
- Opc = RISCV::VMV1R_V;
- SubRegIdx = RISCV::sub_vrm1_0;
- NF = 3;
- LMul = RISCVII::LMUL_1;
- } else if (RISCV::VRN3M2RegClass.contains(DstReg, SrcReg)) {
- Opc = RISCV::VMV2R_V;
- SubRegIdx = RISCV::sub_vrm2_0;
- NF = 3;
- LMul = RISCVII::LMUL_2;
- } else if (RISCV::VRN4M1RegClass.contains(DstReg, SrcReg)) {
- Opc = RISCV::VMV1R_V;
- SubRegIdx = RISCV::sub_vrm1_0;
- NF = 4;
- LMul = RISCVII::LMUL_1;
- } else if (RISCV::VRN4M2RegClass.contains(DstReg, SrcReg)) {
- Opc = RISCV::VMV2R_V;
- SubRegIdx = RISCV::sub_vrm2_0;
- NF = 4;
- LMul = RISCVII::LMUL_2;
- } else if (RISCV::VRN5M1RegClass.contains(DstReg, SrcReg)) {
- Opc = RISCV::VMV1R_V;
- SubRegIdx = RISCV::sub_vrm1_0;
- NF = 5;
- LMul = RISCVII::LMUL_1;
- } else if (RISCV::VRN6M1RegClass.contains(DstReg, SrcReg)) {
- Opc = RISCV::VMV1R_V;
- SubRegIdx = RISCV::sub_vrm1_0;
- NF = 6;
- LMul = RISCVII::LMUL_1;
- } else if (RISCV::VRN7M1RegClass.contains(DstReg, SrcReg)) {
- Opc = RISCV::VMV1R_V;
- SubRegIdx = RISCV::sub_vrm1_0;
- NF = 7;
- LMul = RISCVII::LMUL_1;
- } else if (RISCV::VRN8M1RegClass.contains(DstReg, SrcReg)) {
- Opc = RISCV::VMV1R_V;
- SubRegIdx = RISCV::sub_vrm1_0;
- NF = 8;
- LMul = RISCVII::LMUL_1;
- } else {
- llvm_unreachable("Impossible reg-to-reg copy");
+ BuildMI(MBB, MBBI, DL, get(Opc), DstReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
}
- if (IsScalableVector) {
- bool UseVMV_V_V = false;
- bool UseVMV_V_I = false;
- MachineBasicBlock::const_iterator DefMBBI;
- if (isConvertibleToVMV_V_V(STI, MBB, MBBI, DefMBBI, LMul)) {
- UseVMV_V_V = true;
- // We only need to handle LMUL = 1/2/4/8 here because we only define
- // vector register classes for LMUL = 1/2/4/8.
- unsigned VIOpc;
- switch (LMul) {
- default:
- llvm_unreachable("Impossible LMUL for vector register copy.");
- case RISCVII::LMUL_1:
- Opc = RISCV::PseudoVMV_V_V_M1;
- VIOpc = RISCV::PseudoVMV_V_I_M1;
- break;
- case RISCVII::LMUL_2:
- Opc = RISCV::PseudoVMV_V_V_M2;
- VIOpc = RISCV::PseudoVMV_V_I_M2;
- break;
- case RISCVII::LMUL_4:
- Opc = RISCV::PseudoVMV_V_V_M4;
- VIOpc = RISCV::PseudoVMV_V_I_M4;
- break;
- case RISCVII::LMUL_8:
- Opc = RISCV::PseudoVMV_V_V_M8;
- VIOpc = RISCV::PseudoVMV_V_I_M8;
- break;
- }
+ if (RISCV::FPR32RegClass.contains(DstReg, SrcReg)) {
+ BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_S), DstReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
- if (DefMBBI->getOpcode() == VIOpc) {
- UseVMV_V_I = true;
- Opc = VIOpc;
- }
- }
+ if (RISCV::FPR64RegClass.contains(DstReg, SrcReg)) {
+ BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_D), DstReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
- if (NF == 1) {
- auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), DstReg);
- if (UseVMV_V_V)
- MIB.addReg(DstReg, RegState::Undef);
- if (UseVMV_V_I)
- MIB = MIB.add(DefMBBI->getOperand(2));
- else
- MIB = MIB.addReg(SrcReg, getKillRegState(KillSrc));
- if (UseVMV_V_V) {
- const MCInstrDesc &Desc = DefMBBI->getDesc();
- MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL
- MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW
- MIB.addImm(0); // tu, mu
- MIB.addReg(RISCV::VL, RegState::Implicit);
- MIB.addReg(RISCV::VTYPE, RegState::Implicit);
- }
- } else {
- int I = 0, End = NF, Incr = 1;
- unsigned SrcEncoding = TRI->getEncodingValue(SrcReg);
- unsigned DstEncoding = TRI->getEncodingValue(DstReg);
- unsigned LMulVal;
- bool Fractional;
- std::tie(LMulVal, Fractional) = RISCVVType::decodeVLMUL(LMul);
- assert(!Fractional && "It is impossible be fractional lmul here.");
- if (forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NF * LMulVal)) {
- I = NF - 1;
- End = -1;
- Incr = -1;
- }
+ if (RISCV::FPR32RegClass.contains(DstReg) &&
+ RISCV::GPRRegClass.contains(SrcReg)) {
+ BuildMI(MBB, MBBI, DL, get(RISCV::FMV_W_X), DstReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
- for (; I != End; I += Incr) {
- auto MIB = BuildMI(MBB, MBBI, DL, get(Opc),
- TRI->getSubReg(DstReg, SubRegIdx + I));
- if (UseVMV_V_V)
- MIB.addReg(TRI->getSubReg(DstReg, SubRegIdx + I),
- RegState::Undef);
- if (UseVMV_V_I)
- MIB = MIB.add(DefMBBI->getOperand(2));
- else
- MIB = MIB.addReg(TRI->getSubReg(SrcReg, SubRegIdx + I),
- getKillRegState(KillSrc));
- if (UseVMV_V_V) {
- const MCInstrDesc &Desc = DefMBBI->getDesc();
- MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL
- MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW
- MIB.addImm(0); // tu, mu
- MIB.addReg(RISCV::VL, RegState::Implicit);
- MIB.addReg(RISCV::VTYPE, RegState::Implicit);
- }
- }
- }
- } else {
- BuildMI(MBB, MBBI, DL, get(Opc), DstReg)
- .addReg(SrcReg, getKillRegState(KillSrc))
+ if (RISCV::GPRRegClass.contains(DstReg) &&
+ RISCV::FPR32RegClass.contains(SrcReg)) {
+ BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_W), DstReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+
+ if (RISCV::FPR64RegClass.contains(DstReg) &&
+ RISCV::GPRRegClass.contains(SrcReg)) {
+ assert(STI.getXLen() == 64 && "Unexpected GPR size");
+ BuildMI(MBB, MBBI, DL, get(RISCV::FMV_D_X), DstReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+
+ if (RISCV::GPRRegClass.contains(DstReg) &&
+ RISCV::FPR64RegClass.contains(SrcReg)) {
+ assert(STI.getXLen() == 64 && "Unexpected GPR size");
+ BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_D), DstReg)
.addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+
+ // VR->VR copies.
+ if (RISCV::VRRegClass.contains(DstReg, SrcReg)) {
+ copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V);
+ return;
+ }
+
+ if (RISCV::VRM2RegClass.contains(DstReg, SrcReg)) {
+ copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV2R_V);
+ return;
+ }
+
+ if (RISCV::VRM4RegClass.contains(DstReg, SrcReg)) {
+ copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV4R_V);
+ return;
+ }
+
+ if (RISCV::VRM8RegClass.contains(DstReg, SrcReg)) {
+ copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV8R_V);
+ return;
+ }
+
+ if (RISCV::VRN2M1RegClass.contains(DstReg, SrcReg)) {
+ copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V,
+ /*NF=*/2);
+ return;
+ }
+
+ if (RISCV::VRN2M2RegClass.contains(DstReg, SrcReg)) {
+ copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV2R_V,
+ /*NF=*/2);
+ return;
+ }
+
+ if (RISCV::VRN2M4RegClass.contains(DstReg, SrcReg)) {
+ copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV4R_V,
+ /*NF=*/2);
+ return;
+ }
+
+ if (RISCV::VRN3M1RegClass.contains(DstReg, SrcReg)) {
+ copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V,
+ /*NF=*/3);
+ return;
+ }
+
+ if (RISCV::VRN3M2RegClass.contains(DstReg, SrcReg)) {
+ copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV2R_V,
+ /*NF=*/3);
+ return;
+ }
+
+ if (RISCV::VRN4M1RegClass.contains(DstReg, SrcReg)) {
+ copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V,
+ /*NF=*/4);
+ return;
+ }
+
+ if (RISCV::VRN4M2RegClass.contains(DstReg, SrcReg)) {
+ copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV2R_V,
+ /*NF=*/4);
+ return;
+ }
+
+ if (RISCV::VRN5M1RegClass.contains(DstReg, SrcReg)) {
+ copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V,
+ /*NF=*/5);
+ return;
+ }
+
+ if (RISCV::VRN6M1RegClass.contains(DstReg, SrcReg)) {
+ copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V,
+ /*NF=*/6);
+ return;
+ }
+
+ if (RISCV::VRN7M1RegClass.contains(DstReg, SrcReg)) {
+ copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V,
+ /*NF=*/7);
+ return;
+ }
+
+ if (RISCV::VRN8M1RegClass.contains(DstReg, SrcReg)) {
+ copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V,
+ /*NF=*/8);
+ return;
}
+
+ llvm_unreachable("Impossible reg-to-reg copy");
}
void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
@@ -526,10 +598,6 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI,
Register VReg) const {
- DebugLoc DL;
- if (I != MBB.end())
- DL = I->getDebugLoc();
-
MachineFunction *MF = MBB.getParent();
MachineFrameInfo &MFI = MF->getFrameInfo();
@@ -590,7 +658,7 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MemoryLocation::UnknownSize, MFI.getObjectAlign(FI));
MFI.setStackID(FI, TargetStackID::ScalableVector);
- BuildMI(MBB, I, DL, get(Opcode))
+ BuildMI(MBB, I, DebugLoc(), get(Opcode))
.addReg(SrcReg, getKillRegState(IsKill))
.addFrameIndex(FI)
.addMemOperand(MMO);
@@ -599,7 +667,7 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore,
MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
- BuildMI(MBB, I, DL, get(Opcode))
+ BuildMI(MBB, I, DebugLoc(), get(Opcode))
.addReg(SrcReg, getKillRegState(IsKill))
.addFrameIndex(FI)
.addImm(0)
@@ -613,10 +681,6 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI,
Register VReg) const {
- DebugLoc DL;
- if (I != MBB.end())
- DL = I->getDebugLoc();
-
MachineFunction *MF = MBB.getParent();
MachineFrameInfo &MFI = MF->getFrameInfo();
@@ -677,7 +741,7 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MemoryLocation::UnknownSize, MFI.getObjectAlign(FI));
MFI.setStackID(FI, TargetStackID::ScalableVector);
- BuildMI(MBB, I, DL, get(Opcode), DstReg)
+ BuildMI(MBB, I, DebugLoc(), get(Opcode), DstReg)
.addFrameIndex(FI)
.addMemOperand(MMO);
} else {
@@ -685,7 +749,7 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
- BuildMI(MBB, I, DL, get(Opcode), DstReg)
+ BuildMI(MBB, I, DebugLoc(), get(Opcode), DstReg)
.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMO);
@@ -704,8 +768,7 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
if (MF.getDataLayout().isBigEndian())
return nullptr;
- // Fold load from stack followed by sext.w into lw.
- // TODO: Fold with sext.b, sext.h, zext.b, zext.h, zext.w?
+ // Fold load from stack followed by sext.b/sext.h/sext.w/zext.b/zext.h/zext.w.
if (Ops.size() != 1 || Ops[0] != 1)
return nullptr;
@@ -753,38 +816,50 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
void RISCVInstrInfo::movImm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, Register DstReg, uint64_t Val,
- MachineInstr::MIFlag Flag) const {
+ MachineInstr::MIFlag Flag, bool DstRenamable,
+ bool DstIsDead) const {
Register SrcReg = RISCV::X0;
if (!STI.is64Bit() && !isInt<32>(Val))
report_fatal_error("Should only materialize 32-bit constants for RV32");
- RISCVMatInt::InstSeq Seq =
- RISCVMatInt::generateInstSeq(Val, STI.getFeatureBits());
+ RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, STI);
assert(!Seq.empty());
+ bool SrcRenamable = false;
+ unsigned Num = 0;
+
for (const RISCVMatInt::Inst &Inst : Seq) {
+ bool LastItem = ++Num == Seq.size();
+ unsigned DstRegState = getDeadRegState(DstIsDead && LastItem) |
+ getRenamableRegState(DstRenamable);
+ unsigned SrcRegState = getKillRegState(SrcReg != RISCV::X0) |
+ getRenamableRegState(SrcRenamable);
switch (Inst.getOpndKind()) {
case RISCVMatInt::Imm:
- BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()), DstReg)
+ BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
+ .addReg(DstReg, RegState::Define | DstRegState)
.addImm(Inst.getImm())
.setMIFlag(Flag);
break;
case RISCVMatInt::RegX0:
- BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()), DstReg)
- .addReg(SrcReg, RegState::Kill)
+ BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
+ .addReg(DstReg, RegState::Define | DstRegState)
+ .addReg(SrcReg, SrcRegState)
.addReg(RISCV::X0)
.setMIFlag(Flag);
break;
case RISCVMatInt::RegReg:
- BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()), DstReg)
- .addReg(SrcReg, RegState::Kill)
- .addReg(SrcReg, RegState::Kill)
+ BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
+ .addReg(DstReg, RegState::Define | DstRegState)
+ .addReg(SrcReg, SrcRegState)
+ .addReg(SrcReg, SrcRegState)
.setMIFlag(Flag);
break;
case RISCVMatInt::RegImm:
- BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()), DstReg)
- .addReg(SrcReg, RegState::Kill)
+ BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
+ .addReg(DstReg, RegState::Define | DstRegState)
+ .addReg(SrcReg, SrcRegState)
.addImm(Inst.getImm())
.setMIFlag(Flag);
break;
@@ -792,6 +867,7 @@ void RISCVInstrInfo::movImm(MachineBasicBlock &MBB,
// Only the first instruction has X0 as its source.
SrcReg = DstReg;
+ SrcRenamable = DstRenamable;
}
}
@@ -829,25 +905,29 @@ static void parseCondBranch(MachineInstr &LastInst, MachineBasicBlock *&Target,
Cond.push_back(LastInst.getOperand(1));
}
-const MCInstrDesc &RISCVInstrInfo::getBrCond(RISCVCC::CondCode CC) const {
+unsigned RISCVCC::getBrCond(RISCVCC::CondCode CC) {
switch (CC) {
default:
llvm_unreachable("Unknown condition code!");
case RISCVCC::COND_EQ:
- return get(RISCV::BEQ);
+ return RISCV::BEQ;
case RISCVCC::COND_NE:
- return get(RISCV::BNE);
+ return RISCV::BNE;
case RISCVCC::COND_LT:
- return get(RISCV::BLT);
+ return RISCV::BLT;
case RISCVCC::COND_GE:
- return get(RISCV::BGE);
+ return RISCV::BGE;
case RISCVCC::COND_LTU:
- return get(RISCV::BLTU);
+ return RISCV::BLTU;
case RISCVCC::COND_GEU:
- return get(RISCV::BGEU);
+ return RISCV::BGEU;
}
}
+const MCInstrDesc &RISCVInstrInfo::getBrCond(RISCVCC::CondCode CC) const {
+ return get(RISCVCC::getBrCond(CC));
+}
+
RISCVCC::CondCode RISCVCC::getOppositeBranchCondition(RISCVCC::CondCode CC) {
switch (CC) {
default:
@@ -907,6 +987,10 @@ bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
if (I->getDesc().isIndirectBranch())
return true;
+ // We can't handle Generic branch opcodes from Global ISel.
+ if (I->isPreISelOpcode())
+ return true;
+
// We can't handle blocks with more than 2 terminators.
if (NumTerminators > 2)
return true;
@@ -1079,6 +1163,125 @@ bool RISCVInstrInfo::reverseBranchCondition(
return false;
}
+bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const {
+ MachineBasicBlock *MBB = MI.getParent();
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+ MachineBasicBlock *TBB, *FBB;
+ SmallVector<MachineOperand, 3> Cond;
+ if (analyzeBranch(*MBB, TBB, FBB, Cond, /*AllowModify=*/false))
+ return false;
+ (void)FBB;
+
+ RISCVCC::CondCode CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
+ assert(CC != RISCVCC::COND_INVALID);
+
+ if (CC == RISCVCC::COND_EQ || CC == RISCVCC::COND_NE)
+ return false;
+
+ // For two constants C0 and C1 from
+ // ```
+ // li Y, C0
+ // li Z, C1
+ // ```
+ // 1. if C1 = C0 + 1
+ // we can turn:
+ // (a) blt Y, X -> bge X, Z
+ // (b) bge Y, X -> blt X, Z
+ //
+ // 2. if C1 = C0 - 1
+ // we can turn:
+ // (a) blt X, Y -> bge Z, X
+ // (b) bge X, Y -> blt Z, X
+ //
+ // To make sure this optimization is really beneficial, we only
+ // optimize for cases where Y had only one use (i.e. only used by the branch).
+
+ // Right now we only care about LI (i.e. ADDI x0, imm)
+ auto isLoadImm = [](const MachineInstr *MI, int64_t &Imm) -> bool {
+ if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() &&
+ MI->getOperand(1).getReg() == RISCV::X0) {
+ Imm = MI->getOperand(2).getImm();
+ return true;
+ }
+ return false;
+ };
+ // Either a load from immediate instruction or X0.
+ auto isFromLoadImm = [&](const MachineOperand &Op, int64_t &Imm) -> bool {
+ if (!Op.isReg())
+ return false;
+ Register Reg = Op.getReg();
+ if (Reg == RISCV::X0) {
+ Imm = 0;
+ return true;
+ }
+ if (!Reg.isVirtual())
+ return false;
+ return isLoadImm(MRI.getVRegDef(Op.getReg()), Imm);
+ };
+
+ MachineOperand &LHS = MI.getOperand(0);
+ MachineOperand &RHS = MI.getOperand(1);
+ // Try to find the register for constant Z; return
+ // invalid register otherwise.
+ auto searchConst = [&](int64_t C1) -> Register {
+ MachineBasicBlock::reverse_iterator II(&MI), E = MBB->rend();
+ auto DefC1 = std::find_if(++II, E, [&](const MachineInstr &I) -> bool {
+ int64_t Imm;
+ return isLoadImm(&I, Imm) && Imm == C1;
+ });
+ if (DefC1 != E)
+ return DefC1->getOperand(0).getReg();
+
+ return Register();
+ };
+
+ bool Modify = false;
+ int64_t C0;
+ if (isFromLoadImm(LHS, C0) && MRI.hasOneUse(LHS.getReg())) {
+ // Might be case 1.
+ // Signed integer overflow is UB. (UINT64_MAX is bigger so we don't need
+ // to worry about unsigned overflow here)
+ if (C0 < INT64_MAX)
+ if (Register RegZ = searchConst(C0 + 1)) {
+ reverseBranchCondition(Cond);
+ Cond[1] = MachineOperand::CreateReg(RHS.getReg(), /*isDef=*/false);
+ Cond[2] = MachineOperand::CreateReg(RegZ, /*isDef=*/false);
+ // We might extend the live range of Z, clear its kill flag to
+ // account for this.
+ MRI.clearKillFlags(RegZ);
+ Modify = true;
+ }
+ } else if (isFromLoadImm(RHS, C0) && MRI.hasOneUse(RHS.getReg())) {
+ // Might be case 2.
+ // For unsigned cases, we don't want C1 to wrap back to UINT64_MAX
+ // when C0 is zero.
+ if ((CC == RISCVCC::COND_GE || CC == RISCVCC::COND_LT) || C0)
+ if (Register RegZ = searchConst(C0 - 1)) {
+ reverseBranchCondition(Cond);
+ Cond[1] = MachineOperand::CreateReg(RegZ, /*isDef=*/false);
+ Cond[2] = MachineOperand::CreateReg(LHS.getReg(), /*isDef=*/false);
+ // We might extend the live range of Z, clear its kill flag to
+ // account for this.
+ MRI.clearKillFlags(RegZ);
+ Modify = true;
+ }
+ }
+
+ if (!Modify)
+ return false;
+
+ // Build the new branch and remove the old one.
+ BuildMI(*MBB, MI, MI.getDebugLoc(),
+ getBrCond(static_cast<RISCVCC::CondCode>(Cond[0].getImm())))
+ .add(Cond[1])
+ .add(Cond[2])
+ .addMBB(TBB);
+ MI.eraseFromParent();
+
+ return true;
+}
+
MachineBasicBlock *
RISCVInstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
assert(MI.getDesc().isBranch() && "Unexpected opcode!");
@@ -1118,12 +1321,31 @@ unsigned getPredicatedOpcode(unsigned Opcode) {
switch (Opcode) {
case RISCV::ADD: return RISCV::PseudoCCADD; break;
case RISCV::SUB: return RISCV::PseudoCCSUB; break;
+ case RISCV::SLL: return RISCV::PseudoCCSLL; break;
+ case RISCV::SRL: return RISCV::PseudoCCSRL; break;
+ case RISCV::SRA: return RISCV::PseudoCCSRA; break;
case RISCV::AND: return RISCV::PseudoCCAND; break;
case RISCV::OR: return RISCV::PseudoCCOR; break;
case RISCV::XOR: return RISCV::PseudoCCXOR; break;
+ case RISCV::ADDI: return RISCV::PseudoCCADDI; break;
+ case RISCV::SLLI: return RISCV::PseudoCCSLLI; break;
+ case RISCV::SRLI: return RISCV::PseudoCCSRLI; break;
+ case RISCV::SRAI: return RISCV::PseudoCCSRAI; break;
+ case RISCV::ANDI: return RISCV::PseudoCCANDI; break;
+ case RISCV::ORI: return RISCV::PseudoCCORI; break;
+ case RISCV::XORI: return RISCV::PseudoCCXORI; break;
+
case RISCV::ADDW: return RISCV::PseudoCCADDW; break;
case RISCV::SUBW: return RISCV::PseudoCCSUBW; break;
+ case RISCV::SLLW: return RISCV::PseudoCCSLLW; break;
+ case RISCV::SRLW: return RISCV::PseudoCCSRLW; break;
+ case RISCV::SRAW: return RISCV::PseudoCCSRAW; break;
+
+ case RISCV::ADDIW: return RISCV::PseudoCCADDIW; break;
+ case RISCV::SLLIW: return RISCV::PseudoCCSLLIW; break;
+ case RISCV::SRLIW: return RISCV::PseudoCCSRLIW; break;
+ case RISCV::SRAIW: return RISCV::PseudoCCSRAIW; break;
}
return RISCV::INSTRUCTION_LIST_END;
@@ -1144,6 +1366,10 @@ static MachineInstr *canFoldAsPredicatedOp(Register Reg,
// Check if MI can be predicated and folded into the CCMOV.
if (getPredicatedOpcode(MI->getOpcode()) == RISCV::INSTRUCTION_LIST_END)
return nullptr;
+ // Don't predicate li idiom.
+ if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() &&
+ MI->getOperand(1).getReg() == RISCV::X0)
+ return nullptr;
// Check if MI has any other defs or physreg uses.
for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
// Reject frame index operands, PEI can't handle the predicated pseudos.
@@ -1290,7 +1516,20 @@ unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
if (isCompressibleInst(MI, STI))
return 2;
}
- return get(Opcode).getSize();
+
+ switch (Opcode) {
+ case TargetOpcode::STACKMAP:
+ // The upper bound for a stackmap intrinsic is the full length of its shadow
+ return StackMapOpers(&MI).getNumPatchBytes();
+ case TargetOpcode::PATCHPOINT:
+ // The size of the patchpoint intrinsic is the number of bytes requested
+ return PatchPointOpers(&MI).getNumPatchBytes();
+ case TargetOpcode::STATEPOINT:
+ // The size of the statepoint intrinsic is the number of bytes requested
+ return StatepointOpers(&MI).getNumPatchBytes();
+ default:
+ return get(Opcode).getSize();
+ }
}
unsigned RISCVInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
@@ -1372,15 +1611,6 @@ MachineTraceStrategy RISCVInstrInfo::getMachineCombinerTraceStrategy() const {
return ForceMachineCombinerStrategy;
}
-void RISCVInstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1,
- MachineInstr &OldMI2,
- MachineInstr &NewMI1,
- MachineInstr &NewMI2) const {
- uint32_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags();
- NewMI1.setFlags(IntersectedFlags);
- NewMI2.setFlags(IntersectedFlags);
-}
-
void RISCVInstrInfo::finalizeInsInstrs(
MachineInstr &Root, MachineCombinerPattern &P,
SmallVectorImpl<MachineInstr *> &InsInstrs) const {
@@ -1896,8 +2126,174 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
return true;
}
-// Return true if get the base operand, byte offset of an instruction and the
-// memory width. Width is the size of memory that is being loaded/stored.
+bool RISCVInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg,
+ const MachineInstr &AddrI,
+ ExtAddrMode &AM) const {
+ switch (MemI.getOpcode()) {
+ default:
+ return false;
+ case RISCV::LB:
+ case RISCV::LBU:
+ case RISCV::LH:
+ case RISCV::LHU:
+ case RISCV::LW:
+ case RISCV::LWU:
+ case RISCV::LD:
+ case RISCV::FLH:
+ case RISCV::FLW:
+ case RISCV::FLD:
+ case RISCV::SB:
+ case RISCV::SH:
+ case RISCV::SW:
+ case RISCV::SD:
+ case RISCV::FSH:
+ case RISCV::FSW:
+ case RISCV::FSD:
+ break;
+ }
+
+ if (MemI.getOperand(0).getReg() == Reg)
+ return false;
+
+ if (AddrI.getOpcode() != RISCV::ADDI || !AddrI.getOperand(1).isReg() ||
+ !AddrI.getOperand(2).isImm())
+ return false;
+
+ int64_t OldOffset = MemI.getOperand(2).getImm();
+ int64_t Disp = AddrI.getOperand(2).getImm();
+ int64_t NewOffset = OldOffset + Disp;
+ if (!STI.is64Bit())
+ NewOffset = SignExtend64<32>(NewOffset);
+
+ if (!isInt<12>(NewOffset))
+ return false;
+
+ AM.BaseReg = AddrI.getOperand(1).getReg();
+ AM.ScaledReg = 0;
+ AM.Scale = 0;
+ AM.Displacement = NewOffset;
+ AM.Form = ExtAddrMode::Formula::Basic;
+ return true;
+}
+
+MachineInstr *RISCVInstrInfo::emitLdStWithAddr(MachineInstr &MemI,
+ const ExtAddrMode &AM) const {
+
+ const DebugLoc &DL = MemI.getDebugLoc();
+ MachineBasicBlock &MBB = *MemI.getParent();
+
+ assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
+ "Addressing mode not supported for folding");
+
+ return BuildMI(MBB, MemI, DL, get(MemI.getOpcode()))
+ .addReg(MemI.getOperand(0).getReg(),
+ MemI.mayLoad() ? RegState::Define : 0)
+ .addReg(AM.BaseReg)
+ .addImm(AM.Displacement)
+ .setMemRefs(MemI.memoperands())
+ .setMIFlags(MemI.getFlags());
+}
+
+bool RISCVInstrInfo::getMemOperandsWithOffsetWidth(
+ const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
+ int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
+ const TargetRegisterInfo *TRI) const {
+ if (!LdSt.mayLoadOrStore())
+ return false;
+
+ // Conservatively, only handle scalar loads/stores for now.
+ switch (LdSt.getOpcode()) {
+ case RISCV::LB:
+ case RISCV::LBU:
+ case RISCV::SB:
+ case RISCV::LH:
+ case RISCV::LHU:
+ case RISCV::FLH:
+ case RISCV::SH:
+ case RISCV::FSH:
+ case RISCV::LW:
+ case RISCV::LWU:
+ case RISCV::FLW:
+ case RISCV::SW:
+ case RISCV::FSW:
+ case RISCV::LD:
+ case RISCV::FLD:
+ case RISCV::SD:
+ case RISCV::FSD:
+ break;
+ default:
+ return false;
+ }
+ const MachineOperand *BaseOp;
+ OffsetIsScalable = false;
+ if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))
+ return false;
+ BaseOps.push_back(BaseOp);
+ return true;
+}
+
+// TODO: This was copied from SIInstrInfo. Could it be lifted to a common
+// helper?
+static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
+ ArrayRef<const MachineOperand *> BaseOps1,
+ const MachineInstr &MI2,
+ ArrayRef<const MachineOperand *> BaseOps2) {
+ // Only examine the first "base" operand of each instruction, on the
+ // assumption that it represents the real base address of the memory access.
+ // Other operands are typically offsets or indices from this base address.
+ if (BaseOps1.front()->isIdenticalTo(*BaseOps2.front()))
+ return true;
+
+ if (!MI1.hasOneMemOperand() || !MI2.hasOneMemOperand())
+ return false;
+
+ auto MO1 = *MI1.memoperands_begin();
+ auto MO2 = *MI2.memoperands_begin();
+ if (MO1->getAddrSpace() != MO2->getAddrSpace())
+ return false;
+
+ auto Base1 = MO1->getValue();
+ auto Base2 = MO2->getValue();
+ if (!Base1 || !Base2)
+ return false;
+ Base1 = getUnderlyingObject(Base1);
+ Base2 = getUnderlyingObject(Base2);
+
+ if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))
+ return false;
+
+ return Base1 == Base2;
+}
+
+bool RISCVInstrInfo::shouldClusterMemOps(
+ ArrayRef<const MachineOperand *> BaseOps1, int64_t Offset1,
+ bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
+ int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize,
+ unsigned NumBytes) const {
+ // If the mem ops (to be clustered) do not have the same base ptr, then they
+ // should not be clustered
+ if (!BaseOps1.empty() && !BaseOps2.empty()) {
+ const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent();
+ const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent();
+ if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOps1, SecondLdSt, BaseOps2))
+ return false;
+ } else if (!BaseOps1.empty() || !BaseOps2.empty()) {
+ // If only one base op is empty, they do not have the same base ptr
+ return false;
+ }
+
+ // TODO: Use a more carefully chosen heuristic, e.g. only cluster if offsets
+ // indicate they likely share a cache line.
+ return ClusterSize <= 4;
+}
+
+// Set BaseReg (the base register operand), Offset (the byte offset being
+// accessed) and the access Width of the passed instruction that reads/writes
+// memory. Returns false if the instruction does not read/write memory or the
+// BaseReg/Offset/Width can't be determined. Is not guaranteed to always
+// recognise base operands and offsets in all cases.
+// TODO: Add an IsScalable bool ref argument (like the equivalent AArch64
+// function) and set it as appropriate.
bool RISCVInstrInfo::getMemOperandWithOffsetWidth(
const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
unsigned &Width, const TargetRegisterInfo *TRI) const {
@@ -1906,10 +2302,11 @@ bool RISCVInstrInfo::getMemOperandWithOffsetWidth(
// Here we assume the standard RISC-V ISA, which uses a base+offset
// addressing mode. You'll need to relax these conditions to support custom
- // load/stores instructions.
+ // load/store instructions.
if (LdSt.getNumExplicitOperands() != 3)
return false;
- if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm())
+ if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
+ !LdSt.getOperand(2).isImm())
return false;
if (!LdSt.hasOneMemOperand())
@@ -2132,6 +2529,23 @@ MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall(
return It;
}
+std::optional<RegImmPair> RISCVInstrInfo::isAddImmediate(const MachineInstr &MI,
+ Register Reg) const {
+ // TODO: Handle cases where Reg is a super- or sub-register of the
+ // destination register.
+ const MachineOperand &Op0 = MI.getOperand(0);
+ if (!Op0.isReg() || Reg != Op0.getReg())
+ return std::nullopt;
+
+ // Don't consider ADDIW as a candidate because the caller may not be aware
+ // of its sign extension behaviour.
+ if (MI.getOpcode() == RISCV::ADDI && MI.getOperand(1).isReg() &&
+ MI.getOperand(2).isImm())
+ return RegImmPair{MI.getOperand(1).getReg(), MI.getOperand(2).getImm()};
+
+ return std::nullopt;
+}
+
// MIR printer helper function to annotate Operands with a comment.
std::string RISCVInstrInfo::createMIROperandComment(
const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
@@ -2202,9 +2616,9 @@ std::string RISCVInstrInfo::createMIROperandComment(
case CASE_VFMA_OPCODE_LMULS_MF4(OP, TYPE)
#define CASE_VFMA_SPLATS(OP) \
- CASE_VFMA_OPCODE_LMULS_MF4(OP, VF16): \
- case CASE_VFMA_OPCODE_LMULS_MF2(OP, VF32): \
- case CASE_VFMA_OPCODE_LMULS_M1(OP, VF64)
+ CASE_VFMA_OPCODE_LMULS_MF4(OP, VFPR16): \
+ case CASE_VFMA_OPCODE_LMULS_MF2(OP, VFPR32): \
+ case CASE_VFMA_OPCODE_LMULS_M1(OP, VFPR64)
// clang-format on
bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
@@ -2365,9 +2779,9 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE)
#define CASE_VFMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \
- CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VF16) \
- CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VF32) \
- CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VF64)
+ CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16) \
+ CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32) \
+ CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64)
MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
bool NewMI,
@@ -2591,6 +3005,7 @@ MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
.add(MI.getOperand(3))
.add(MI.getOperand(4))
.add(MI.getOperand(5));
+ break;
}
}
MIB.copyImplicitOps(MI);
@@ -2836,3 +3251,123 @@ bool RISCV::hasEqualFRM(const MachineInstr &MI1, const MachineInstr &MI2) {
MachineOperand FrmOp2 = MI2.getOperand(MI2FrmOpIdx);
return FrmOp1.getImm() == FrmOp2.getImm();
}
+
+std::optional<unsigned>
+RISCV::getVectorLowDemandedScalarBits(uint16_t Opcode, unsigned Log2SEW) {
+ // TODO: Handle Zvbb instructions
+ switch (Opcode) {
+ default:
+ return std::nullopt;
+
+ // 11.6. Vector Single-Width Shift Instructions
+ case RISCV::VSLL_VX:
+ case RISCV::VSRL_VX:
+ case RISCV::VSRA_VX:
+ // 12.4. Vector Single-Width Scaling Shift Instructions
+ case RISCV::VSSRL_VX:
+ case RISCV::VSSRA_VX:
+ // Only the low lg2(SEW) bits of the shift-amount value are used.
+ return Log2SEW;
+
+ // 11.7 Vector Narrowing Integer Right Shift Instructions
+ case RISCV::VNSRL_WX:
+ case RISCV::VNSRA_WX:
+ // 12.5. Vector Narrowing Fixed-Point Clip Instructions
+ case RISCV::VNCLIPU_WX:
+ case RISCV::VNCLIP_WX:
+ // Only the low lg2(2*SEW) bits of the shift-amount value are used.
+ return Log2SEW + 1;
+
+ // 11.1. Vector Single-Width Integer Add and Subtract
+ case RISCV::VADD_VX:
+ case RISCV::VSUB_VX:
+ case RISCV::VRSUB_VX:
+ // 11.2. Vector Widening Integer Add/Subtract
+ case RISCV::VWADDU_VX:
+ case RISCV::VWSUBU_VX:
+ case RISCV::VWADD_VX:
+ case RISCV::VWSUB_VX:
+ case RISCV::VWADDU_WX:
+ case RISCV::VWSUBU_WX:
+ case RISCV::VWADD_WX:
+ case RISCV::VWSUB_WX:
+ // 11.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
+ case RISCV::VADC_VXM:
+ case RISCV::VADC_VIM:
+ case RISCV::VMADC_VXM:
+ case RISCV::VMADC_VIM:
+ case RISCV::VMADC_VX:
+ case RISCV::VSBC_VXM:
+ case RISCV::VMSBC_VXM:
+ case RISCV::VMSBC_VX:
+ // 11.5 Vector Bitwise Logical Instructions
+ case RISCV::VAND_VX:
+ case RISCV::VOR_VX:
+ case RISCV::VXOR_VX:
+ // 11.8. Vector Integer Compare Instructions
+ case RISCV::VMSEQ_VX:
+ case RISCV::VMSNE_VX:
+ case RISCV::VMSLTU_VX:
+ case RISCV::VMSLT_VX:
+ case RISCV::VMSLEU_VX:
+ case RISCV::VMSLE_VX:
+ case RISCV::VMSGTU_VX:
+ case RISCV::VMSGT_VX:
+ // 11.9. Vector Integer Min/Max Instructions
+ case RISCV::VMINU_VX:
+ case RISCV::VMIN_VX:
+ case RISCV::VMAXU_VX:
+ case RISCV::VMAX_VX:
+ // 11.10. Vector Single-Width Integer Multiply Instructions
+ case RISCV::VMUL_VX:
+ case RISCV::VMULH_VX:
+ case RISCV::VMULHU_VX:
+ case RISCV::VMULHSU_VX:
+ // 11.11. Vector Integer Divide Instructions
+ case RISCV::VDIVU_VX:
+ case RISCV::VDIV_VX:
+ case RISCV::VREMU_VX:
+ case RISCV::VREM_VX:
+ // 11.12. Vector Widening Integer Multiply Instructions
+ case RISCV::VWMUL_VX:
+ case RISCV::VWMULU_VX:
+ case RISCV::VWMULSU_VX:
+ // 11.13. Vector Single-Width Integer Multiply-Add Instructions
+ case RISCV::VMACC_VX:
+ case RISCV::VNMSAC_VX:
+ case RISCV::VMADD_VX:
+ case RISCV::VNMSUB_VX:
+ // 11.14. Vector Widening Integer Multiply-Add Instructions
+ case RISCV::VWMACCU_VX:
+ case RISCV::VWMACC_VX:
+ case RISCV::VWMACCSU_VX:
+ case RISCV::VWMACCUS_VX:
+ // 11.15. Vector Integer Merge Instructions
+ case RISCV::VMERGE_VXM:
+ // 11.16. Vector Integer Move Instructions
+ case RISCV::VMV_V_X:
+ // 12.1. Vector Single-Width Saturating Add and Subtract
+ case RISCV::VSADDU_VX:
+ case RISCV::VSADD_VX:
+ case RISCV::VSSUBU_VX:
+ case RISCV::VSSUB_VX:
+ // 12.2. Vector Single-Width Averaging Add and Subtract
+ case RISCV::VAADDU_VX:
+ case RISCV::VAADD_VX:
+ case RISCV::VASUBU_VX:
+ case RISCV::VASUB_VX:
+ // 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
+ case RISCV::VSMUL_VX:
+ // 16.1. Integer Scalar Move Instructions
+ case RISCV::VMV_S_X:
+ return 1U << Log2SEW;
+ }
+}
+
+unsigned RISCV::getRVVMCOpcode(unsigned RVVPseudoOpcode) {
+ const RISCVVPseudosTable::PseudoInfo *RVV =
+ RISCVVPseudosTable::getPseudoInfo(RVVPseudoOpcode);
+ if (!RVV)
+ return 0;
+ return RVV->BaseInstr;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 99c907a98121..7e1d3f311806 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -43,6 +43,7 @@ enum CondCode {
};
CondCode getOppositeBranchCondition(CondCode);
+unsigned getBrCond(CondCode CC);
} // end of namespace RISCVCC
@@ -63,6 +64,10 @@ public:
unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex,
unsigned &MemBytes) const override;
+ void copyPhysRegVector(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
+ MCRegister DstReg, MCRegister SrcReg, bool KillSrc,
+ unsigned Opc, unsigned NF = 1) const;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg,
bool KillSrc) const override;
@@ -91,7 +96,8 @@ public:
// Materializes the given integer Val into DstReg.
void movImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, Register DstReg, uint64_t Val,
- MachineInstr::MIFlag Flag = MachineInstr::NoFlags) const;
+ MachineInstr::MIFlag Flag = MachineInstr::NoFlags,
+ bool DstRenamable = false, bool DstIsDead = false) const;
unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
@@ -116,6 +122,8 @@ public:
bool
reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+ bool optimizeCondBranch(MachineInstr &MI) const override;
+
MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
bool isBranchOffsetInRange(unsigned BranchOpc,
@@ -137,6 +145,25 @@ public:
bool verifyInstruction(const MachineInstr &MI,
StringRef &ErrInfo) const override;
+ bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg,
+ const MachineInstr &AddrI,
+ ExtAddrMode &AM) const override;
+
+ MachineInstr *emitLdStWithAddr(MachineInstr &MemI,
+ const ExtAddrMode &AM) const override;
+
+ bool getMemOperandsWithOffsetWidth(
+ const MachineInstr &MI, SmallVectorImpl<const MachineOperand *> &BaseOps,
+ int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
+ const TargetRegisterInfo *TRI) const override;
+
+ bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
+ int64_t Offset1, bool OffsetIsScalable1,
+ ArrayRef<const MachineOperand *> BaseOps2,
+ int64_t Offset2, bool OffsetIsScalable2,
+ unsigned ClusterSize,
+ unsigned NumBytes) const override;
+
bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt,
const MachineOperand *&BaseOp,
int64_t &Offset, unsigned &Width,
@@ -182,6 +209,9 @@ public:
MachineBasicBlock::iterator &It, MachineFunction &MF,
outliner::Candidate &C) const override;
+ std::optional<RegImmPair> isAddImmediate(const MachineInstr &MI,
+ Register Reg) const override;
+
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const override;
MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
@@ -206,9 +236,6 @@ public:
MachineTraceStrategy getMachineCombinerTraceStrategy() const override;
- void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2,
- MachineInstr &NewMI1,
- MachineInstr &NewMI2) const override;
bool
getMachineCombinerPatterns(MachineInstr &Root,
SmallVectorImpl<MachineCombinerPattern> &Patterns,
@@ -265,6 +292,15 @@ int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex);
// one of the instructions does not have rounding mode, false will be returned.
bool hasEqualFRM(const MachineInstr &MI1, const MachineInstr &MI2);
+// If \p Opcode is a .vx vector instruction, returns the lower number of bits
+// that are used from the scalar .x operand for a given \p Log2SEW. Otherwise
+// returns null.
+std::optional<unsigned> getVectorLowDemandedScalarBits(uint16_t Opcode,
+ unsigned Log2SEW);
+
+// Returns the MC opcode of RVV pseudo instruction.
+unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode);
+
// Special immediate for AVL operand of V pseudo instructions to indicate VLMax.
static constexpr int64_t VLMaxSentinel = -1LL;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index e58e3412aea3..edc08187d8f7 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -84,18 +84,12 @@ def riscv_read_cycle_wide : SDNode<"RISCVISD::READ_CYCLE_WIDE",
def riscv_add_lo : SDNode<"RISCVISD::ADD_LO", SDTIntBinOp>;
def riscv_hi : SDNode<"RISCVISD::HI", SDTIntUnaryOp>;
def riscv_lla : SDNode<"RISCVISD::LLA", SDTIntUnaryOp>;
-def riscv_lga : SDNode<"RISCVISD::LGA", SDTLoad,
- [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def riscv_add_tprel : SDNode<"RISCVISD::ADD_TPREL",
SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>,
SDTCisInt<0>]>>;
-def riscv_la_tls_ie : SDNode<"RISCVISD::LA_TLS_IE", SDTLoad,
- [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
-def riscv_la_tls_gd : SDNode<"RISCVISD::LA_TLS_GD", SDTIntUnaryOp>;
-
//===----------------------------------------------------------------------===//
// Operand and SDNode transformation definitions.
//===----------------------------------------------------------------------===//
@@ -149,18 +143,40 @@ class UImmAsmOperand<int width, string suffix = "">
: ImmAsmOperand<"U", width, suffix> {
}
+class RISCVOp<ValueType vt = XLenVT> : Operand<vt> {
+ let OperandNamespace = "RISCVOp";
+}
+
+class RISCVUImmOp<int bitsNum> : RISCVOp {
+ let ParserMatchClass = UImmAsmOperand<bitsNum>;
+ let DecoderMethod = "decodeUImmOperand<" # bitsNum # ">";
+ let OperandType = "OPERAND_UIMM" # bitsNum;
+}
+
+class RISCVUImmLeafOp<int bitsNum> :
+ RISCVUImmOp<bitsNum>, ImmLeaf<XLenVT, "return isUInt<" # bitsNum # ">(Imm);">;
+
+class RISCVSImmOp<int bitsNum> : RISCVOp {
+ let ParserMatchClass = SImmAsmOperand<bitsNum>;
+ let EncoderMethod = "getImmOpValue";
+ let DecoderMethod = "decodeSImmOperand<" # bitsNum # ">";
+ let OperandType = "OPERAND_SIMM" # bitsNum;
+}
+
+class RISCVSImmLeafOp<int bitsNum> :
+ RISCVSImmOp<bitsNum>, ImmLeaf<XLenVT, "return isInt<" # bitsNum # ">(Imm);">;
+
def FenceArg : AsmOperandClass {
let Name = "FenceArg";
let RenderMethod = "addFenceArgOperands";
let ParserMethod = "parseFenceArg";
}
-def fencearg : Operand<XLenVT> {
+def fencearg : RISCVOp {
let ParserMatchClass = FenceArg;
let PrintMethod = "printFenceArg";
let DecoderMethod = "decodeUImmOperand<4>";
let OperandType = "OPERAND_UIMM4";
- let OperandNamespace = "RISCVOp";
}
def UImmLog2XLenAsmOperand : AsmOperandClass {
@@ -169,7 +185,7 @@ def UImmLog2XLenAsmOperand : AsmOperandClass {
let DiagnosticType = "InvalidUImmLog2XLen";
}
-def uimmlog2xlen : Operand<XLenVT>, ImmLeaf<XLenVT, [{
+def uimmlog2xlen : RISCVOp, ImmLeaf<XLenVT, [{
if (Subtarget->is64Bit())
return isUInt<6>(Imm);
return isUInt<5>(Imm);
@@ -186,21 +202,17 @@ def uimmlog2xlen : Operand<XLenVT>, ImmLeaf<XLenVT, [{
return isUInt<5>(Imm);
}];
let OperandType = "OPERAND_UIMMLOG2XLEN";
- let OperandNamespace = "RISCVOp";
}
-def uimm1 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isUInt<1>(Imm);}]> {
- let ParserMatchClass = UImmAsmOperand<1>;
- let DecoderMethod = "decodeUImmOperand<1>";
- let OperandType = "OPERAND_UIMM1";
- let OperandNamespace = "RISCVOp";
+def InsnDirectiveOpcode : AsmOperandClass {
+ let Name = "InsnDirectiveOpcode";
+ let ParserMethod = "parseInsnDirectiveOpcode";
+ let RenderMethod = "addImmOperands";
+ let PredicateMethod = "isImm";
}
-def uimm2 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isUInt<2>(Imm);}]> {
- let ParserMatchClass = UImmAsmOperand<2>;
- let DecoderMethod = "decodeUImmOperand<2>";
- let OperandType = "OPERAND_UIMM2";
- let OperandNamespace = "RISCVOp";
+def uimm1 : RISCVUImmLeafOp<1>;
+def uimm2 : RISCVUImmLeafOp<2> {
let MCOperandPredicate = [{
int64_t Imm;
if (!MCOp.evaluateAsConstantImm(Imm))
@@ -208,75 +220,22 @@ def uimm2 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isUInt<2>(Imm);}]> {
return isUInt<2>(Imm);
}];
}
-
-def uimm3 : Operand<XLenVT> {
- let ParserMatchClass = UImmAsmOperand<3>;
- let DecoderMethod = "decodeUImmOperand<3>";
- let OperandType = "OPERAND_UIMM3";
- let OperandNamespace = "RISCVOp";
-}
-
-def uimm4 : Operand<XLenVT> {
- let ParserMatchClass = UImmAsmOperand<4>;
- let DecoderMethod = "decodeUImmOperand<4>";
- let OperandType = "OPERAND_UIMM4";
- let OperandNamespace = "RISCVOp";
-}
-
-def uimm5 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isUInt<5>(Imm);}]> {
- let ParserMatchClass = UImmAsmOperand<5>;
- let DecoderMethod = "decodeUImmOperand<5>";
- let OperandType = "OPERAND_UIMM5";
- let OperandNamespace = "RISCVOp";
-}
-
-def InsnDirectiveOpcode : AsmOperandClass {
- let Name = "InsnDirectiveOpcode";
- let ParserMethod = "parseInsnDirectiveOpcode";
- let RenderMethod = "addImmOperands";
- let PredicateMethod = "isImm";
-}
-
-def uimm6 : Operand<XLenVT> {
- let ParserMatchClass = UImmAsmOperand<6>;
- let DecoderMethod = "decodeUImmOperand<6>";
- let OperandType = "OPERAND_UIMM6";
- let OperandNamespace = "RISCVOp";
-}
-
-def uimm7_opcode : Operand<XLenVT> {
+def uimm3 : RISCVUImmOp<3>;
+def uimm4 : RISCVUImmOp<4>;
+def uimm5 : RISCVUImmLeafOp<5>;
+def uimm6 : RISCVUImmLeafOp<6>;
+def uimm7_opcode : RISCVUImmOp<7> {
let ParserMatchClass = InsnDirectiveOpcode;
- let DecoderMethod = "decodeUImmOperand<7>";
- let OperandType = "OPERAND_UIMM7";
- let OperandNamespace = "RISCVOp";
}
-
-def uimm7 : Operand<XLenVT> {
- let ParserMatchClass = UImmAsmOperand<7>;
- let DecoderMethod = "decodeUImmOperand<7>";
- let OperandType = "OPERAND_UIMM7";
- let OperandNamespace = "RISCVOp";
-}
-
-def uimm8 : Operand<XLenVT> {
- let ParserMatchClass = UImmAsmOperand<8>;
- let DecoderMethod = "decodeUImmOperand<8>";
- let OperandType = "OPERAND_UIMM8";
- let OperandNamespace = "RISCVOp";
-}
-
-def simm12 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isInt<12>(Imm);}]> {
- let ParserMatchClass = SImmAsmOperand<12>;
- let EncoderMethod = "getImmOpValue";
- let DecoderMethod = "decodeSImmOperand<12>";
+def uimm7 : RISCVUImmOp<7>;
+def uimm8 : RISCVUImmOp<8>;
+def simm12 : RISCVSImmLeafOp<12> {
let MCOperandPredicate = [{
int64_t Imm;
if (MCOp.evaluateAsConstantImm(Imm))
return isInt<12>(Imm);
return MCOp.isBareSymbolRef();
}];
- let OperandType = "OPERAND_SIMM12";
- let OperandNamespace = "RISCVOp";
}
// A 12-bit signed immediate which cannot fit in 6-bit signed immediate,
@@ -299,26 +258,38 @@ def simm13_lsb0 : Operand<OtherVT> {
let OperandType = "OPERAND_PCREL";
}
-class UImm20Operand : Operand<XLenVT> {
+class UImm20Operand : RISCVOp {
let EncoderMethod = "getImmOpValue";
let DecoderMethod = "decodeUImmOperand<20>";
+ let OperandType = "OPERAND_UIMM20";
+}
+
+class UImm20OperandMaybeSym : UImm20Operand {
let MCOperandPredicate = [{
int64_t Imm;
if (MCOp.evaluateAsConstantImm(Imm))
return isUInt<20>(Imm);
return MCOp.isBareSymbolRef();
}];
- let OperandType = "OPERAND_UIMM20";
- let OperandNamespace = "RISCVOp";
}
-def uimm20_lui : UImm20Operand {
+def uimm20_lui : UImm20OperandMaybeSym {
let ParserMatchClass = UImmAsmOperand<20, "LUI">;
}
-def uimm20_auipc : UImm20Operand {
+def uimm20_auipc : UImm20OperandMaybeSym {
let ParserMatchClass = UImmAsmOperand<20, "AUIPC">;
}
+def uimm20 : UImm20Operand {
+ let ParserMatchClass = UImmAsmOperand<20>;
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (!MCOp.evaluateAsConstantImm(Imm))
+ return false;
+ return isUInt<20>(Imm);
+ }];
+}
+
def Simm21Lsb0JALAsmOperand : SImmAsmOperand<21, "Lsb0JAL"> {
let ParserMethod = "parseJALOffset";
}
@@ -392,12 +363,11 @@ def CSRSystemRegister : AsmOperandClass {
let DiagnosticType = "InvalidCSRSystemRegister";
}
-def csr_sysreg : Operand<XLenVT> {
+def csr_sysreg : RISCVOp {
let ParserMatchClass = CSRSystemRegister;
let PrintMethod = "printCSRSystemRegister";
let DecoderMethod = "decodeUImmOperand<12>";
let OperandType = "OPERAND_UIMM12";
- let OperandNamespace = "RISCVOp";
}
// A parameterized register class alternative to i32imm/i64imm from Target.td.
@@ -1105,6 +1075,10 @@ def : MnemonicAlias<"sbreak", "ebreak">;
// that don't support this alias.
def : InstAlias<"zext.b $rd, $rs", (ANDI GPR:$rd, GPR:$rs, 0xFF), 0>;
+let Predicates = [HasStdExtZicfilp] in {
+def : InstAlias<"lpad $imm20", (AUIPC X0, uimm20:$imm20)>;
+}
+
//===----------------------------------------------------------------------===//
// .insn directive instructions
//===----------------------------------------------------------------------===//
@@ -1209,11 +1183,13 @@ def : InstAlias<".insn_s $opcode, $funct3, $rs2, ${imm12}(${rs1})",
class PatGpr<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT>
: Pat<(vt (OpNode (vt GPR:$rs1))), (Inst GPR:$rs1)>;
-class PatGprGpr<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT>
- : Pat<(vt (OpNode (vt GPR:$rs1), (vt GPR:$rs2))), (Inst GPR:$rs1, GPR:$rs2)>;
+class PatGprGpr<SDPatternOperator OpNode, RVInst Inst, ValueType vt1 = XLenVT,
+ ValueType vt2 = XLenVT>
+ : Pat<(vt1 (OpNode (vt1 GPR:$rs1), (vt2 GPR:$rs2))), (Inst GPR:$rs1, GPR:$rs2)>;
-class PatGprImm<SDPatternOperator OpNode, RVInst Inst, ImmLeaf ImmType>
- : Pat<(XLenVT (OpNode (XLenVT GPR:$rs1), ImmType:$imm)),
+class PatGprImm<SDPatternOperator OpNode, RVInst Inst, ImmLeaf ImmType,
+ ValueType vt = XLenVT>
+ : Pat<(vt (OpNode (vt GPR:$rs1), ImmType:$imm)),
(Inst GPR:$rs1, ImmType:$imm)>;
class PatGprSimm12<SDPatternOperator OpNode, RVInstI Inst>
: PatGprImm<OpNode, Inst, simm12>;
@@ -1232,7 +1208,9 @@ def assertzexti32 : PatFrag<(ops node:$src), (assertzext node:$src), [{
}]>;
def zexti32 : ComplexPattern<i64, 1, "selectZExtBits<32>">;
def zexti16 : ComplexPattern<XLenVT, 1, "selectZExtBits<16>">;
+def zexti16i32 : ComplexPattern<i32, 1, "selectZExtBits<16>">;
def zexti8 : ComplexPattern<XLenVT, 1, "selectZExtBits<8>">;
+def zexti8i32 : ComplexPattern<i32, 1, "selectZExtBits<8>">;
def ext : PatFrags<(ops node:$A), [(sext node:$A), (zext node:$A)]>;
@@ -1264,6 +1242,10 @@ def anyext_oneuse : unop_oneuse<anyext>;
def ext_oneuse : unop_oneuse<ext>;
def fpext_oneuse : unop_oneuse<any_fpextend>;
+def 33signbits_node : PatLeaf<(i64 GPR:$src), [{
+ return CurDAG->ComputeNumSignBits(SDValue(N, 0)) > 32;
+}]>;
+
/// Simple arithmetic operations
def : PatGprGpr<add, ADD>;
@@ -1421,6 +1403,21 @@ def PseudoCCSUB : Pseudo<(outs GPR:$dst),
GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp,
ReadSFBALU, ReadSFBALU, ReadSFBALU]>;
+def PseudoCCSLL : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
+ ReadSFBALU, ReadSFBALU]>;
+def PseudoCCSRL : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
+ ReadSFBALU, ReadSFBALU]>;
+def PseudoCCSRA : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
+ ReadSFBALU, ReadSFBALU]>;
def PseudoCCAND : Pseudo<(outs GPR:$dst),
(ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
@@ -1437,6 +1434,42 @@ def PseudoCCXOR : Pseudo<(outs GPR:$dst),
Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp,
ReadSFBALU, ReadSFBALU, ReadSFBALU]>;
+def PseudoCCADDI : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
+ ReadSFBALU]>;
+def PseudoCCSLLI : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
+ ReadSFBALU]>;
+def PseudoCCSRLI : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
+ ReadSFBALU]>;
+def PseudoCCSRAI : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
+ ReadSFBALU]>;
+def PseudoCCANDI : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
+ ReadSFBALU]>;
+def PseudoCCORI : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
+ ReadSFBALU]>;
+def PseudoCCXORI : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
+ ReadSFBALU]>;
+
// RV64I instructions
def PseudoCCADDW : Pseudo<(outs GPR:$dst),
(ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
@@ -1448,6 +1481,42 @@ def PseudoCCSUBW : Pseudo<(outs GPR:$dst),
GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp,
ReadSFBALU, ReadSFBALU, ReadSFBALU]>;
+def PseudoCCSLLW : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
+ ReadSFBALU, ReadSFBALU]>;
+def PseudoCCSRLW : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
+ ReadSFBALU, ReadSFBALU]>;
+def PseudoCCSRAW : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
+ ReadSFBALU, ReadSFBALU]>;
+
+def PseudoCCADDIW : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
+ ReadSFBALU]>;
+def PseudoCCSLLIW : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
+ ReadSFBALU]>;
+def PseudoCCSRLIW : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
+ ReadSFBALU]>;
+def PseudoCCSRAIW : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
+ ReadSFBALU]>;
}
multiclass SelectCC_GPR_rrirr<DAGOperand valty, ValueType vt> {
@@ -1603,6 +1672,16 @@ def PseudoJump : Pseudo<(outs GPR:$rd), (ins pseudo_jump_symbol:$target), [],
"jump", "$target, $rd">,
Sched<[WriteIALU, WriteJalr, ReadJalr]>;
+// Pseudo for a rematerializable constant materialization sequence.
+// This is an experimental feature enabled by
+// -riscv-use-rematerializable-movimm in RISCVISelDAGToDAG.cpp
+// It will be expanded after register allocation.
+// FIXME: The scheduling information does not reflect the multiple instructions.
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 8, isCodeGenOnly = 1,
+ isPseudo = 1, isReMaterializable = 1, IsSignExtendingOpW = 1 in
+def PseudoMovImm : Pseudo<(outs GPR:$dst), (ins i32imm:$imm), []>,
+ Sched<[WriteIALU]>;
+
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 8, isCodeGenOnly = 0,
isAsmParserOnly = 1 in
def PseudoLLA : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
@@ -1623,8 +1702,6 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 0,
def PseudoLGA : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
"lga", "$dst, $src">;
-def : Pat<(iPTR (riscv_lga tglobaladdr:$in)), (PseudoLGA tglobaladdr:$in)>;
-
let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 0,
isAsmParserOnly = 1 in
def PseudoLA : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
@@ -1641,16 +1718,11 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 0,
def PseudoLA_TLS_IE : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
"la.tls.ie", "$dst, $src">;
-def : Pat<(iPTR (riscv_la_tls_ie tglobaltlsaddr:$in)),
- (PseudoLA_TLS_IE tglobaltlsaddr:$in)>;
-
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 8, isCodeGenOnly = 0,
isAsmParserOnly = 1 in
def PseudoLA_TLS_GD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
"la.tls.gd", "$dst, $src">;
-def : Pat<(riscv_la_tls_gd tglobaltlsaddr:$in),
- (PseudoLA_TLS_GD tglobaltlsaddr:$in)>;
/// Sign/Zero Extends
@@ -1680,7 +1752,7 @@ def : LdPat<sextloadi8, LB>;
def : LdPat<extloadi8, LBU>; // Prefer unsigned due to no c.lb in Zcb.
def : LdPat<sextloadi16, LH>;
def : LdPat<extloadi16, LH>;
-def : LdPat<load, LW, i32>, Requires<[IsRV32]>;
+def : LdPat<load, LW, i32>;
def : LdPat<zextloadi8, LBU>;
def : LdPat<zextloadi16, LHU>;
@@ -1694,7 +1766,7 @@ class StPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy,
def : StPat<truncstorei8, SB, GPR, XLenVT>;
def : StPat<truncstorei16, SH, GPR, XLenVT>;
-def : StPat<store, SW, GPR, i32>, Requires<[IsRV32]>;
+def : StPat<store, SW, GPR, i32>;
/// Fences
@@ -1796,6 +1868,12 @@ def : Pat<(i64 (shl (and GPR:$rs1, 0xffffffff), uimm5:$shamt)),
(SRLI (SLLI GPR:$rs1, 32), (ImmSubFrom32 uimm5:$shamt))>;
}
+class binop_allhusers<SDPatternOperator operator>
+ : PatFrag<(ops node:$lhs, node:$rhs),
+ (XLenVT (operator node:$lhs, node:$rhs)), [{
+ return hasAllHUsers(Node);
+}]>;
+
// PatFrag to allow ADDW/SUBW/MULW/SLLW to be selected from i64 add/sub/mul/shl
// if only the lower 32 bits of their result is used.
class binop_allwusers<SDPatternOperator operator>
@@ -1904,9 +1982,9 @@ def : Pat<(debugtrap), (EBREAK)>;
let Predicates = [IsRV64], Uses = [X5],
Defs = [X1, X6, X7, X28, X29, X30, X31] in
-def HWASAN_CHECK_MEMACCESS_SHORTGRANULES
+def HWASAN_CHECK_MEMACCESS_SHORTGRANULES
: Pseudo<(outs), (ins GPRJALR:$ptr, i32imm:$accessinfo),
- [(int_hwasan_check_memaccess_shortgranules X5, GPRJALR:$ptr,
+ [(int_hwasan_check_memaccess_shortgranules (i64 X5), GPRJALR:$ptr,
(i32 timm:$accessinfo))]>;
// This gets lowered into a 20-byte instruction sequence (at most)
@@ -1928,6 +2006,86 @@ def : Pat<(binop_allwusers<add> GPR:$rs1, (AddiPair:$rs2)),
(AddiPairImmSmall AddiPair:$rs2))>;
}
+let Predicates = [HasShortForwardBranchOpt] in
+def : Pat<(XLenVT (abs GPR:$rs1)),
+ (PseudoCCSUB (XLenVT GPR:$rs1), (XLenVT X0), /* COND_LT */ 2,
+ (XLenVT GPR:$rs1), (XLenVT X0), (XLenVT GPR:$rs1))>;
+let Predicates = [HasShortForwardBranchOpt, IsRV64] in
+def : Pat<(sext_inreg (abs 33signbits_node:$rs1), i32),
+ (PseudoCCSUBW (i64 GPR:$rs1), (i64 X0), /* COND_LT */ 2,
+ (i64 GPR:$rs1), (i64 X0), (i64 GPR:$rs1))>;
+
+//===----------------------------------------------------------------------===//
+// Experimental RV64 i32 legalization patterns.
+//===----------------------------------------------------------------------===//
+
+def simm12i32 : ImmLeaf<i32, [{return isInt<12>(Imm);}]>;
+
+// Convert from i32 immediate to i64 target immediate to make SelectionDAG type
+// checking happy so we can use ADDIW which expects an XLen immediate.
+def as_i64imm : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64);
+}]>;
+
+def zext_is_sext : PatFrag<(ops node:$src), (zext node:$src), [{
+ KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0), 0);
+ return Known.isNonNegative();
+}]>;
+
+let Predicates = [IsRV64] in {
+def : LdPat<sextloadi8, LB, i32>;
+def : LdPat<extloadi8, LBU, i32>; // Prefer unsigned due to no c.lb in Zcb.
+def : LdPat<sextloadi16, LH, i32>;
+def : LdPat<extloadi16, LH, i32>;
+def : LdPat<zextloadi8, LBU, i32>;
+def : LdPat<zextloadi16, LHU, i32>;
+
+def : StPat<truncstorei8, SB, GPR, i32>;
+def : StPat<truncstorei16, SH, GPR, i32>;
+
+def : Pat<(anyext GPR:$src), (COPY GPR:$src)>;
+def : Pat<(sext GPR:$src), (ADDIW GPR:$src, 0)>;
+def : Pat<(trunc GPR:$src), (COPY GPR:$src)>;
+
+def : PatGprGpr<add, ADDW, i32, i32>;
+def : PatGprGpr<sub, SUBW, i32, i32>;
+def : PatGprGpr<and, AND, i32, i32>;
+def : PatGprGpr<or, OR, i32, i32>;
+def : PatGprGpr<xor, XOR, i32, i32>;
+def : PatGprGpr<shiftopw<shl>, SLLW, i32, i64>;
+def : PatGprGpr<shiftopw<srl>, SRLW, i32, i64>;
+def : PatGprGpr<shiftopw<sra>, SRAW, i32, i64>;
+
+def : Pat<(i32 (add GPR:$rs1, simm12i32:$imm)),
+ (ADDIW GPR:$rs1, (i64 (as_i64imm $imm)))>;
+def : Pat<(i32 (and GPR:$rs1, simm12i32:$imm)),
+ (ANDI GPR:$rs1, (i64 (as_i64imm $imm)))>;
+def : Pat<(i32 (or GPR:$rs1, simm12i32:$imm)),
+ (ORI GPR:$rs1, (i64 (as_i64imm $imm)))>;
+def : Pat<(i32 (xor GPR:$rs1, simm12i32:$imm)),
+ (XORI GPR:$rs1, (i64 (as_i64imm $imm)))>;
+
+def : PatGprImm<shl, SLLIW, uimm5, i32>;
+def : PatGprImm<srl, SRLIW, uimm5, i32>;
+def : PatGprImm<sra, SRAIW, uimm5, i32>;
+
+def : Pat<(i32 (and GPR:$rs, TrailingOnesMask:$mask)),
+ (SRLI (SLLI $rs, (i64 (XLenSubTrailingOnes $mask))),
+ (i64 (XLenSubTrailingOnes $mask)))>;
+
+// Use sext if the sign bit of the input is 0.
+def : Pat<(zext_is_sext GPR:$src), (ADDIW GPR:$src, 0)>;
+}
+
+let Predicates = [IsRV64, NotHasStdExtZba] in {
+def : Pat<(zext GPR:$src), (SRLI (SLLI GPR:$src, 32), 32)>;
+
+// If we're shifting a 32-bit zero extended value left by 0-31 bits, use 2
+// shifts instead of 3. This can occur when unsigned is used to index an array.
+def : Pat<(shl (zext GPR:$rs), uimm5:$shamt),
+ (SRLI (SLLI GPR:$rs, 32), (ImmSubFrom32 uimm5:$shamt))>;
+}
+
//===----------------------------------------------------------------------===//
// Standard extensions
//===----------------------------------------------------------------------===//
@@ -1951,7 +2109,6 @@ include "RISCVInstrInfoZk.td"
// Vector
include "RISCVInstrInfoV.td"
-include "RISCVInstrInfoZvfbf.td"
include "RISCVInstrInfoZvk.td"
// Integer
@@ -1970,3 +2127,9 @@ include "RISCVInstrInfoXVentana.td"
include "RISCVInstrInfoXTHead.td"
include "RISCVInstrInfoXSf.td"
include "RISCVInstrInfoXCV.td"
+
+//===----------------------------------------------------------------------===//
+// Global ISel
+//===----------------------------------------------------------------------===//
+
+include "RISCVInstrGISel.td"
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index 8421109b8514..c8301fcc6b93 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -44,17 +44,11 @@ multiclass AMO_rr_aq_rl<bits<5> funct5, bits<3> funct3, string opcodestr> {
def _AQ_RL : AMO_rr<funct5, 1, 1, funct3, opcodestr # ".aqrl">;
}
-class AtomicStPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy,
- ValueType vt = XLenVT>
- : Pat<(StoreOp (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm12),
- (vt StTy:$rs2)),
- (Inst StTy:$rs2, GPR:$rs1, simm12:$imm12)>;
-
//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtA] in {
+let Predicates = [HasStdExtA], IsSignExtendingOpW = 1 in {
defm LR_W : LR_r_aq_rl<0b010, "lr.w">, Sched<[WriteAtomicLDW, ReadAtomicLDW]>;
defm SC_W : AMO_rr_aq_rl<0b00011, 0b010, "sc.w">,
Sched<[WriteAtomicSTW, ReadAtomicSTW, ReadAtomicSTW]>;
@@ -123,21 +117,21 @@ let Predicates = [HasAtomicLdSt] in {
def : LdPat<atomic_load_16, LH>;
def : LdPat<atomic_load_32, LW>;
- def : AtomicStPat<atomic_store_8, SB, GPR>;
- def : AtomicStPat<atomic_store_16, SH, GPR>;
- def : AtomicStPat<atomic_store_32, SW, GPR>;
+ def : StPat<atomic_store_8, SB, GPR, XLenVT>;
+ def : StPat<atomic_store_16, SH, GPR, XLenVT>;
+ def : StPat<atomic_store_32, SW, GPR, XLenVT>;
}
let Predicates = [HasAtomicLdSt, IsRV64] in {
def : LdPat<atomic_load_64, LD, i64>;
- def : AtomicStPat<atomic_store_64, SD, GPR, i64>;
+ def : StPat<atomic_store_64, SD, GPR, i64>;
}
-let Predicates = [HasStdExtA] in {
-
/// AMOs
-multiclass AMOPat<string AtomicOp, string BaseInst, ValueType vt = XLenVT> {
+multiclass AMOPat<string AtomicOp, string BaseInst, ValueType vt = XLenVT,
+ list<Predicate> ExtraPreds = []> {
+let Predicates = !listconcat([HasStdExtA, NotHasStdExtZtso], ExtraPreds) in {
def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_monotonic"),
!cast<RVInst>(BaseInst), vt>;
def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acquire"),
@@ -149,6 +143,19 @@ multiclass AMOPat<string AtomicOp, string BaseInst, ValueType vt = XLenVT> {
def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_seq_cst"),
!cast<RVInst>(BaseInst#"_AQ_RL"), vt>;
}
+let Predicates = !listconcat([HasStdExtA, HasStdExtZtso], ExtraPreds) in {
+ def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_monotonic"),
+ !cast<RVInst>(BaseInst), vt>;
+ def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acquire"),
+ !cast<RVInst>(BaseInst), vt>;
+ def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_release"),
+ !cast<RVInst>(BaseInst), vt>;
+ def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acq_rel"),
+ !cast<RVInst>(BaseInst), vt>;
+ def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_seq_cst"),
+ !cast<RVInst>(BaseInst), vt>;
+}
+}
defm : AMOPat<"atomic_swap_32", "AMOSWAP_W">;
defm : AMOPat<"atomic_load_add_32", "AMOADD_W">;
@@ -160,16 +167,7 @@ defm : AMOPat<"atomic_load_min_32", "AMOMIN_W">;
defm : AMOPat<"atomic_load_umax_32", "AMOMAXU_W">;
defm : AMOPat<"atomic_load_umin_32", "AMOMINU_W">;
-def : Pat<(XLenVT (atomic_load_sub_32_monotonic GPR:$addr, GPR:$incr)),
- (AMOADD_W GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
-def : Pat<(XLenVT (atomic_load_sub_32_acquire GPR:$addr, GPR:$incr)),
- (AMOADD_W_AQ GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
-def : Pat<(XLenVT (atomic_load_sub_32_release GPR:$addr, GPR:$incr)),
- (AMOADD_W_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
-def : Pat<(XLenVT (atomic_load_sub_32_acq_rel GPR:$addr, GPR:$incr)),
- (AMOADD_W_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
-def : Pat<(XLenVT (atomic_load_sub_32_seq_cst GPR:$addr, GPR:$incr)),
- (AMOADD_W_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
+let Predicates = [HasStdExtA] in {
/// Pseudo AMOs
@@ -318,30 +316,17 @@ def : Pat<(int_riscv_masked_cmpxchg_i32
} // Predicates = [HasStdExtA]
-let Predicates = [HasStdExtA, IsRV64] in {
+defm : AMOPat<"atomic_swap_64", "AMOSWAP_D", i64, [IsRV64]>;
+defm : AMOPat<"atomic_load_add_64", "AMOADD_D", i64, [IsRV64]>;
+defm : AMOPat<"atomic_load_and_64", "AMOAND_D", i64, [IsRV64]>;
+defm : AMOPat<"atomic_load_or_64", "AMOOR_D", i64, [IsRV64]>;
+defm : AMOPat<"atomic_load_xor_64", "AMOXOR_D", i64, [IsRV64]>;
+defm : AMOPat<"atomic_load_max_64", "AMOMAX_D", i64, [IsRV64]>;
+defm : AMOPat<"atomic_load_min_64", "AMOMIN_D", i64, [IsRV64]>;
+defm : AMOPat<"atomic_load_umax_64", "AMOMAXU_D", i64, [IsRV64]>;
+defm : AMOPat<"atomic_load_umin_64", "AMOMINU_D", i64, [IsRV64]>;
-defm : AMOPat<"atomic_swap_64", "AMOSWAP_D", i64>;
-defm : AMOPat<"atomic_load_add_64", "AMOADD_D", i64>;
-defm : AMOPat<"atomic_load_and_64", "AMOAND_D", i64>;
-defm : AMOPat<"atomic_load_or_64", "AMOOR_D", i64>;
-defm : AMOPat<"atomic_load_xor_64", "AMOXOR_D", i64>;
-defm : AMOPat<"atomic_load_max_64", "AMOMAX_D", i64>;
-defm : AMOPat<"atomic_load_min_64", "AMOMIN_D", i64>;
-defm : AMOPat<"atomic_load_umax_64", "AMOMAXU_D", i64>;
-defm : AMOPat<"atomic_load_umin_64", "AMOMINU_D", i64>;
-
-/// 64-bit AMOs
-
-def : Pat<(i64 (atomic_load_sub_64_monotonic GPR:$addr, GPR:$incr)),
- (AMOADD_D GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
-def : Pat<(i64 (atomic_load_sub_64_acquire GPR:$addr, GPR:$incr)),
- (AMOADD_D_AQ GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
-def : Pat<(i64 (atomic_load_sub_64_release GPR:$addr, GPR:$incr)),
- (AMOADD_D_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
-def : Pat<(i64 (atomic_load_sub_64_acq_rel GPR:$addr, GPR:$incr)),
- (AMOADD_D_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
-def : Pat<(i64 (atomic_load_sub_64_seq_cst GPR:$addr, GPR:$incr)),
- (AMOADD_D_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
+let Predicates = [HasStdExtA, IsRV64] in {
/// 64-bit pseudo AMOs
@@ -387,3 +372,61 @@ def : Pat<(int_riscv_masked_cmpxchg_i64
(PseudoMaskedCmpXchg32
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>;
} // Predicates = [HasStdExtA, IsRV64]
+
+//===----------------------------------------------------------------------===//
+// Experimental RV64 i32 legalization patterns.
+//===----------------------------------------------------------------------===//
+
+class PatGprGprA<SDPatternOperator OpNode, RVInst Inst, ValueType vt>
+ : Pat<(vt (OpNode (XLenVT GPR:$rs1), (vt GPR:$rs2))), (Inst GPR:$rs1, GPR:$rs2)>;
+
+multiclass AMOPat2<string AtomicOp, string BaseInst, ValueType vt = XLenVT,
+ list<Predicate> ExtraPreds = []> {
+let Predicates = !listconcat([HasStdExtA, NotHasStdExtZtso], ExtraPreds) in {
+ def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_monotonic"),
+ !cast<RVInst>(BaseInst), vt>;
+ def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_acquire"),
+ !cast<RVInst>(BaseInst#"_AQ"), vt>;
+ def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_release"),
+ !cast<RVInst>(BaseInst#"_RL"), vt>;
+ def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_acq_rel"),
+ !cast<RVInst>(BaseInst#"_AQ_RL"), vt>;
+ def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_seq_cst"),
+ !cast<RVInst>(BaseInst#"_AQ_RL"), vt>;
+}
+let Predicates = !listconcat([HasStdExtA, HasStdExtZtso], ExtraPreds) in {
+ def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_monotonic"),
+ !cast<RVInst>(BaseInst), vt>;
+ def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_acquire"),
+ !cast<RVInst>(BaseInst), vt>;
+ def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_release"),
+ !cast<RVInst>(BaseInst), vt>;
+ def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_acq_rel"),
+ !cast<RVInst>(BaseInst), vt>;
+ def : PatGprGprA<!cast<PatFrag>(AtomicOp#"_seq_cst"),
+ !cast<RVInst>(BaseInst), vt>;
+}
+}
+
+defm : AMOPat2<"atomic_swap_32", "AMOSWAP_W", i32>;
+defm : AMOPat2<"atomic_load_add_32", "AMOADD_W", i32>;
+defm : AMOPat2<"atomic_load_and_32", "AMOAND_W", i32>;
+defm : AMOPat2<"atomic_load_or_32", "AMOOR_W", i32>;
+defm : AMOPat2<"atomic_load_xor_32", "AMOXOR_W", i32>;
+defm : AMOPat2<"atomic_load_max_32", "AMOMAX_W", i32>;
+defm : AMOPat2<"atomic_load_min_32", "AMOMIN_W", i32>;
+defm : AMOPat2<"atomic_load_umax_32", "AMOMAXU_W", i32>;
+defm : AMOPat2<"atomic_load_umin_32", "AMOMINU_W", i32>;
+
+defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32, i32>;
+
+let Predicates = [HasAtomicLdSt] in {
+ def : LdPat<atomic_load_8, LB, i32>;
+ def : LdPat<atomic_load_16, LH, i32>;
+ def : LdPat<atomic_load_32, LW, i32>;
+
+ def : StPat<atomic_store_8, SB, GPR, i32>;
+ def : StPat<atomic_store_16, SH, GPR, i32>;
+ def : StPat<atomic_store_32, SW, GPR, i32>;
+}
+
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
index 74439bb67c61..07137031d9fc 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
@@ -18,7 +18,7 @@ def UImmLog2XLenNonZeroAsmOperand : AsmOperandClass {
let DiagnosticType = "InvalidUImmLog2XLenNonZero";
}
-def uimmlog2xlennonzero : Operand<XLenVT>, ImmLeaf<XLenVT, [{
+def uimmlog2xlennonzero : RISCVOp, ImmLeaf<XLenVT, [{
if (Subtarget->is64Bit())
return isUInt<6>(Imm) && (Imm != 0);
return isUInt<5>(Imm) && (Imm != 0);
@@ -27,7 +27,6 @@ def uimmlog2xlennonzero : Operand<XLenVT>, ImmLeaf<XLenVT, [{
// TODO: should ensure invalid shamt is rejected when decoding.
let DecoderMethod = "decodeUImmNonZeroOperand<6>";
let OperandType = "OPERAND_UIMMLOG2XLEN_NONZERO";
- let OperandNamespace = "RISCVOp";
let MCOperandPredicate = [{
int64_t Imm;
if (!MCOp.evaluateAsConstantImm(Imm))
@@ -38,12 +37,7 @@ def uimmlog2xlennonzero : Operand<XLenVT>, ImmLeaf<XLenVT, [{
}];
}
-def simm6 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isInt<6>(Imm);}]> {
- let ParserMatchClass = SImmAsmOperand<6>;
- let EncoderMethod = "getImmOpValue";
- let DecoderMethod = "decodeSImmOperand<6>";
- let OperandType = "OPERAND_SIMM6";
- let OperandNamespace = "RISCVOp";
+def simm6 : RISCVSImmLeafOp<6> {
let MCOperandPredicate = [{
int64_t Imm;
if (MCOp.evaluateAsConstantImm(Imm))
@@ -52,13 +46,12 @@ def simm6 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isInt<6>(Imm);}]> {
}];
}
-def simm6nonzero : Operand<XLenVT>,
+def simm6nonzero : RISCVOp,
ImmLeaf<XLenVT, [{return (Imm != 0) && isInt<6>(Imm);}]> {
let ParserMatchClass = SImmAsmOperand<6, "NonZero">;
let EncoderMethod = "getImmOpValue";
let DecoderMethod = "decodeSImmNonZeroOperand<6>";
let OperandType = "OPERAND_SIMM6_NONZERO";
- let OperandNamespace = "RISCVOp";
let MCOperandPredicate = [{
int64_t Imm;
if (MCOp.evaluateAsConstantImm(Imm))
@@ -67,11 +60,10 @@ def simm6nonzero : Operand<XLenVT>,
}];
}
-def immzero : Operand<XLenVT>,
+def immzero : RISCVOp,
ImmLeaf<XLenVT, [{return (Imm == 0);}]> {
let ParserMatchClass = ImmZeroAsmOperand;
let OperandType = "OPERAND_ZERO";
- let OperandNamespace = "RISCVOp";
}
def CLUIImmAsmOperand : AsmOperandClass {
@@ -86,7 +78,7 @@ def CLUIImmAsmOperand : AsmOperandClass {
// loaded in to bits 17-12 of the destination register and sign extended from
// bit 17. Therefore, this 6-bit immediate can represent values in the ranges
// [1, 31] and [0xfffe0, 0xfffff].
-def c_lui_imm : Operand<XLenVT>,
+def c_lui_imm : RISCVOp,
ImmLeaf<XLenVT, [{return (Imm != 0) &&
(isUInt<5>(Imm) ||
(Imm >= 0xfffe0 && Imm <= 0xfffff));}]> {
@@ -94,7 +86,6 @@ def c_lui_imm : Operand<XLenVT>,
let EncoderMethod = "getImmOpValue";
let DecoderMethod = "decodeCLUIImmOperand";
let OperandType = "OPERAND_CLUI_IMM";
- let OperandNamespace = "RISCVOp";
let MCOperandPredicate = [{
int64_t Imm;
if (MCOp.evaluateAsConstantImm(Imm))
@@ -105,13 +96,12 @@ def c_lui_imm : Operand<XLenVT>,
}
// A 7-bit unsigned immediate where the least significant two bits are zero.
-def uimm7_lsb00 : Operand<XLenVT>,
+def uimm7_lsb00 : RISCVOp,
ImmLeaf<XLenVT, [{return isShiftedUInt<5, 2>(Imm);}]> {
let ParserMatchClass = UImmAsmOperand<7, "Lsb00">;
let EncoderMethod = "getImmOpValue";
let DecoderMethod = "decodeUImmOperand<7>";
let OperandType = "OPERAND_UIMM7_LSB00";
- let OperandNamespace = "RISCVOp";
let MCOperandPredicate = [{
int64_t Imm;
if (!MCOp.evaluateAsConstantImm(Imm))
@@ -121,13 +111,12 @@ def uimm7_lsb00 : Operand<XLenVT>,
}
// A 8-bit unsigned immediate where the least significant two bits are zero.
-def uimm8_lsb00 : Operand<XLenVT>,
+def uimm8_lsb00 : RISCVOp,
ImmLeaf<XLenVT, [{return isShiftedUInt<6, 2>(Imm);}]> {
let ParserMatchClass = UImmAsmOperand<8, "Lsb00">;
let EncoderMethod = "getImmOpValue";
let DecoderMethod = "decodeUImmOperand<8>";
let OperandType = "OPERAND_UIMM8_LSB00";
- let OperandNamespace = "RISCVOp";
let MCOperandPredicate = [{
int64_t Imm;
if (!MCOp.evaluateAsConstantImm(Imm))
@@ -137,13 +126,12 @@ def uimm8_lsb00 : Operand<XLenVT>,
}
// A 8-bit unsigned immediate where the least significant three bits are zero.
-def uimm8_lsb000 : Operand<XLenVT>,
+def uimm8_lsb000 : RISCVOp,
ImmLeaf<XLenVT, [{return isShiftedUInt<5, 3>(Imm);}]> {
let ParserMatchClass = UImmAsmOperand<8, "Lsb000">;
let EncoderMethod = "getImmOpValue";
let DecoderMethod = "decodeUImmOperand<8>";
let OperandType = "OPERAND_UIMM8_LSB000";
- let OperandNamespace = "RISCVOp";
let MCOperandPredicate = [{
int64_t Imm;
if (!MCOp.evaluateAsConstantImm(Imm))
@@ -170,13 +158,12 @@ def simm9_lsb0 : Operand<OtherVT>,
}
// A 9-bit unsigned immediate where the least significant three bits are zero.
-def uimm9_lsb000 : Operand<XLenVT>,
+def uimm9_lsb000 : RISCVOp,
ImmLeaf<XLenVT, [{return isShiftedUInt<6, 3>(Imm);}]> {
let ParserMatchClass = UImmAsmOperand<9, "Lsb000">;
let EncoderMethod = "getImmOpValue";
let DecoderMethod = "decodeUImmOperand<9>";
let OperandType = "OPERAND_UIMM9_LSB000";
- let OperandNamespace = "RISCVOp";
let MCOperandPredicate = [{
int64_t Imm;
if (!MCOp.evaluateAsConstantImm(Imm))
@@ -187,14 +174,13 @@ def uimm9_lsb000 : Operand<XLenVT>,
// A 10-bit unsigned immediate where the least significant two bits are zero
// and the immediate can't be zero.
-def uimm10_lsb00nonzero : Operand<XLenVT>,
+def uimm10_lsb00nonzero : RISCVOp,
ImmLeaf<XLenVT,
[{return isShiftedUInt<8, 2>(Imm) && (Imm != 0);}]> {
let ParserMatchClass = UImmAsmOperand<10, "Lsb00NonZero">;
let EncoderMethod = "getImmOpValue";
let DecoderMethod = "decodeUImmNonZeroOperand<10>";
let OperandType = "OPERAND_UIMM10_LSB00_NONZERO";
- let OperandNamespace = "RISCVOp";
let MCOperandPredicate = [{
int64_t Imm;
if (!MCOp.evaluateAsConstantImm(Imm))
@@ -204,14 +190,13 @@ def uimm10_lsb00nonzero : Operand<XLenVT>,
}
// A 10-bit signed immediate where the least significant four bits are zero.
-def simm10_lsb0000nonzero : Operand<XLenVT>,
+def simm10_lsb0000nonzero : RISCVOp,
ImmLeaf<XLenVT,
[{return (Imm != 0) && isShiftedInt<6, 4>(Imm);}]> {
let ParserMatchClass = SImmAsmOperand<10, "Lsb0000NonZero">;
let EncoderMethod = "getImmOpValue";
let DecoderMethod = "decodeSImmNonZeroOperand<10>";
let OperandType = "OPERAND_SIMM10_LSB0000_NONZERO";
- let OperandNamespace = "RISCVOp";
let MCOperandPredicate = [{
int64_t Imm;
if (!MCOp.evaluateAsConstantImm(Imm))
@@ -243,11 +228,10 @@ def InsnCDirectiveOpcode : AsmOperandClass {
let PredicateMethod = "isImm";
}
-def uimm2_opcode : Operand<XLenVT> {
+def uimm2_opcode : RISCVOp {
let ParserMatchClass = InsnCDirectiveOpcode;
let DecoderMethod = "decodeUImmOperand<2>";
let OperandType = "OPERAND_UIMM2";
- let OperandNamespace = "RISCVOp";
}
//===----------------------------------------------------------------------===//
@@ -972,8 +956,14 @@ def : CompressPat<(JAL X0, simm12_lsb0:$offset),
(C_J simm12_lsb0:$offset)>;
def : CompressPat<(BEQ GPRC:$rs1, X0, simm9_lsb0:$imm),
(C_BEQZ GPRC:$rs1, simm9_lsb0:$imm)>;
+let isCompressOnly = true in
+def : CompressPat<(BEQ X0, GPRC:$rs1, simm9_lsb0:$imm),
+ (C_BEQZ GPRC:$rs1, simm9_lsb0:$imm)>;
def : CompressPat<(BNE GPRC:$rs1, X0, simm9_lsb0:$imm),
(C_BNEZ GPRC:$rs1, simm9_lsb0:$imm)>;
+let isCompressOnly = true in
+def : CompressPat<(BNE X0, GPRC:$rs1, simm9_lsb0:$imm),
+ (C_BNEZ GPRC:$rs1, simm9_lsb0:$imm)>;
} // Predicates = [HasStdExtCOrZca]
// Quadrant 2
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
index 7a79e3ca6a2f..6af710049a9d 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
@@ -78,7 +78,7 @@ def FSD : FPStore_r<0b011, "fsd", FPR64, WriteFST64>;
} // Predicates = [HasStdExtD]
foreach Ext = DExts in {
- let SchedRW = [WriteFMA64, ReadFMA64, ReadFMA64, ReadFMA64] in {
+ let SchedRW = [WriteFMA64, ReadFMA64, ReadFMA64, ReadFMA64Addend] in {
defm FMADD_D : FPFMA_rrr_frm_m<OPC_MADD, 0b01, "fmadd.d", Ext>;
defm FMSUB_D : FPFMA_rrr_frm_m<OPC_MSUB, 0b01, "fmsub.d", Ext>;
defm FNMSUB_D : FPFMA_rrr_frm_m<OPC_NMSUB, 0b01, "fnmsub.d", Ext>;
@@ -115,8 +115,8 @@ foreach Ext = DExts in {
Ext.PrimaryTy, "fcvt.s.d">,
Sched<[WriteFCvtF64ToF32, ReadFCvtF64ToF32]>;
- defm FCVT_D_S : FPUnaryOp_r_m<0b0100001, 0b00000, 0b000, Ext, Ext.PrimaryTy,
- Ext.F32Ty, "fcvt.d.s">,
+ defm FCVT_D_S : FPUnaryOp_r_frmlegacy_m<0b0100001, 0b00000, Ext, Ext.PrimaryTy,
+ Ext.F32Ty, "fcvt.d.s">,
Sched<[WriteFCvtF32ToF64, ReadFCvtF32ToF64]>;
let SchedRW = [WriteFCmp64, ReadFCmp64, ReadFCmp64] in {
@@ -140,12 +140,12 @@ foreach Ext = DExts in {
"fcvt.wu.d">,
Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]>;
- defm FCVT_D_W : FPUnaryOp_r_m<0b1101001, 0b00000, 0b000, Ext, Ext.PrimaryTy, GPR,
- "fcvt.d.w">,
+ defm FCVT_D_W : FPUnaryOp_r_frmlegacy_m<0b1101001, 0b00000, Ext, Ext.PrimaryTy, GPR,
+ "fcvt.d.w">,
Sched<[WriteFCvtI32ToF64, ReadFCvtI32ToF64]>;
- defm FCVT_D_WU : FPUnaryOp_r_m<0b1101001, 0b00001, 0b000, Ext, Ext.PrimaryTy, GPR,
- "fcvt.d.wu">,
+ defm FCVT_D_WU : FPUnaryOp_r_frmlegacy_m<0b1101001, 0b00001, Ext, Ext.PrimaryTy, GPR,
+ "fcvt.d.wu">,
Sched<[WriteFCvtI32ToF64, ReadFCvtI32ToF64]>;
} // foreach Ext = DExts
@@ -240,7 +240,7 @@ let Predicates = [HasStdExtD] in {
// f64 -> f32, f32 -> f64
def : Pat<(any_fpround FPR64:$rs1), (FCVT_S_D FPR64:$rs1, FRM_DYN)>;
-def : Pat<(any_fpextend FPR32:$rs1), (FCVT_D_S FPR32:$rs1)>;
+def : Pat<(any_fpextend FPR32:$rs1), (FCVT_D_S FPR32:$rs1, FRM_RNE)>;
} // Predicates = [HasStdExtD]
let Predicates = [HasStdExtZdinx, IsRV64] in {
@@ -248,7 +248,7 @@ let Predicates = [HasStdExtZdinx, IsRV64] in {
// f64 -> f32, f32 -> f64
def : Pat<(any_fpround FPR64INX:$rs1), (FCVT_S_D_INX FPR64INX:$rs1, FRM_DYN)>;
-def : Pat<(any_fpextend FPR32INX:$rs1), (FCVT_D_S_INX FPR32INX:$rs1)>;
+def : Pat<(any_fpextend FPR32INX:$rs1), (FCVT_D_S_INX FPR32INX:$rs1, FRM_RNE)>;
} // Predicates = [HasStdExtZdinx, IsRV64]
let Predicates = [HasStdExtZdinx, IsRV32] in {
@@ -256,7 +256,7 @@ let Predicates = [HasStdExtZdinx, IsRV32] in {
// f64 -> f32, f32 -> f64
def : Pat<(any_fpround FPR64IN32X:$rs1), (FCVT_S_D_IN32X FPR64IN32X:$rs1, FRM_DYN)>;
-def : Pat<(any_fpextend FPR32INX:$rs1), (FCVT_D_S_IN32X FPR32INX:$rs1)>;
+def : Pat<(any_fpextend FPR32INX:$rs1), (FCVT_D_S_IN32X FPR32INX:$rs1, FRM_RNE)>;
} // Predicates = [HasStdExtZdinx, IsRV32]
// [u]int<->double conversion patterns must be gated on IsRV32 or IsRV64, so
@@ -277,11 +277,12 @@ def : Pat<(any_fsqrt FPR64:$rs1), (FSQRT_D FPR64:$rs1, FRM_DYN)>;
def : Pat<(fneg FPR64:$rs1), (FSGNJN_D $rs1, $rs1)>;
def : Pat<(fabs FPR64:$rs1), (FSGNJX_D $rs1, $rs1)>;
-def : Pat<(riscv_fpclass FPR64:$rs1), (FCLASS_D $rs1)>;
+def : Pat<(riscv_fclass FPR64:$rs1), (FCLASS_D $rs1)>;
def : PatFprFpr<fcopysign, FSGNJ_D, FPR64, f64>;
def : Pat<(fcopysign FPR64:$rs1, (fneg FPR64:$rs2)), (FSGNJN_D $rs1, $rs2)>;
-def : Pat<(fcopysign FPR64:$rs1, FPR32:$rs2), (FSGNJ_D $rs1, (FCVT_D_S $rs2))>;
+def : Pat<(fcopysign FPR64:$rs1, FPR32:$rs2), (FSGNJ_D $rs1, (FCVT_D_S $rs2,
+ FRM_RNE))>;
def : Pat<(fcopysign FPR32:$rs1, FPR64:$rs2), (FSGNJ_S $rs1, (FCVT_S_D $rs2,
FRM_DYN))>;
@@ -312,13 +313,13 @@ def : Pat<(any_fsqrt FPR64INX:$rs1), (FSQRT_D_INX FPR64INX:$rs1, FRM_DYN)>;
def : Pat<(fneg FPR64INX:$rs1), (FSGNJN_D_INX $rs1, $rs1)>;
def : Pat<(fabs FPR64INX:$rs1), (FSGNJX_D_INX $rs1, $rs1)>;
-def : Pat<(riscv_fpclass FPR64INX:$rs1), (FCLASS_D_INX $rs1)>;
+def : Pat<(riscv_fclass FPR64INX:$rs1), (FCLASS_D_INX $rs1)>;
def : PatFprFpr<fcopysign, FSGNJ_D_INX, FPR64INX, f64>;
def : Pat<(fcopysign FPR64INX:$rs1, (fneg FPR64INX:$rs2)),
(FSGNJN_D_INX $rs1, $rs2)>;
def : Pat<(fcopysign FPR64INX:$rs1, FPR32INX:$rs2),
- (FSGNJ_D_INX $rs1, (FCVT_D_S_INX $rs2))>;
+ (FSGNJ_D_INX $rs1, (FCVT_D_S_INX $rs2, FRM_RNE))>;
def : Pat<(fcopysign FPR32INX:$rs1, FPR64INX:$rs2),
(FSGNJ_S_INX $rs1, (FCVT_S_D_INX $rs2, FRM_DYN))>;
@@ -349,13 +350,13 @@ def : Pat<(any_fsqrt FPR64IN32X:$rs1), (FSQRT_D_IN32X FPR64IN32X:$rs1, FRM_DYN)>
def : Pat<(fneg FPR64IN32X:$rs1), (FSGNJN_D_IN32X $rs1, $rs1)>;
def : Pat<(fabs FPR64IN32X:$rs1), (FSGNJX_D_IN32X $rs1, $rs1)>;
-def : Pat<(riscv_fpclass FPR64IN32X:$rs1), (FCLASS_D_IN32X $rs1)>;
+def : Pat<(riscv_fclass FPR64IN32X:$rs1), (FCLASS_D_IN32X $rs1)>;
def : PatFprFpr<fcopysign, FSGNJ_D_IN32X, FPR64IN32X, f64>;
def : Pat<(fcopysign FPR64IN32X:$rs1, (fneg FPR64IN32X:$rs2)),
(FSGNJN_D_IN32X $rs1, $rs2)>;
def : Pat<(fcopysign FPR64IN32X:$rs1, FPR32INX:$rs2),
- (FSGNJ_D_IN32X $rs1, (FCVT_D_S_INX $rs2))>;
+ (FSGNJ_D_IN32X $rs1, (FCVT_D_S_INX $rs2, FRM_RNE))>;
def : Pat<(fcopysign FPR32INX:$rs1, FPR64IN32X:$rs2),
(FSGNJ_S_INX $rs1, (FCVT_S_D_IN32X $rs2, FRM_DYN))>;
@@ -396,12 +397,12 @@ foreach Ext = DExts in {
// Match non-signaling FEQ_D
foreach Ext = DExts in {
- defm : PatSetCC_m<any_fsetcc, SETEQ, FEQ_D, Ext, f64>;
- defm : PatSetCC_m<any_fsetcc, SETOEQ, FEQ_D, Ext, f64>;
- defm : PatSetCC_m<strict_fsetcc, SETLT, PseudoQuietFLT_D, Ext, f64>;
- defm : PatSetCC_m<strict_fsetcc, SETOLT, PseudoQuietFLT_D, Ext, f64>;
- defm : PatSetCC_m<strict_fsetcc, SETLE, PseudoQuietFLE_D, Ext, f64>;
- defm : PatSetCC_m<strict_fsetcc, SETOLE, PseudoQuietFLE_D, Ext, f64>;
+ defm : PatSetCC_m<any_fsetcc, SETEQ, FEQ_D, Ext>;
+ defm : PatSetCC_m<any_fsetcc, SETOEQ, FEQ_D, Ext>;
+ defm : PatSetCC_m<strict_fsetcc, SETLT, PseudoQuietFLT_D, Ext>;
+ defm : PatSetCC_m<strict_fsetcc, SETOLT, PseudoQuietFLT_D, Ext>;
+ defm : PatSetCC_m<strict_fsetcc, SETLE, PseudoQuietFLE_D, Ext>;
+ defm : PatSetCC_m<strict_fsetcc, SETOLE, PseudoQuietFLE_D, Ext>;
}
let Predicates = [HasStdExtD] in {
@@ -537,7 +538,7 @@ def SplitF64Pseudo_INX
[(set GPR:$dst1, GPR:$dst2, (RISCVSplitF64 FPR64IN32X:$src))]>;
} // Predicates = [HasStdExtZdinx, IsRV32]
-let Predicates = [HasStdExtD, IsRV32] in {
+let Predicates = [HasStdExtD] in {
// double->[u]int. Round-to-zero must be used.
def : Pat<(i32 (any_fp_to_sint FPR64:$rs1)), (FCVT_W_D FPR64:$rs1, FRM_RTZ)>;
@@ -554,9 +555,9 @@ def : Pat<(i32 (any_lrint FPR64:$rs1)), (FCVT_W_D $rs1, FRM_DYN)>;
def : Pat<(i32 (any_lround FPR64:$rs1)), (FCVT_W_D $rs1, FRM_RMM)>;
// [u]int->double.
-def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_D_W GPR:$rs1)>;
-def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_D_WU GPR:$rs1)>;
-} // Predicates = [HasStdExtD, IsRV32]
+def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_D_W GPR:$rs1, FRM_RNE)>;
+def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_D_WU GPR:$rs1, FRM_RNE)>;
+} // Predicates = [HasStdExtD]
let Predicates = [HasStdExtZdinx, IsRV32] in {
@@ -575,8 +576,8 @@ def : Pat<(i32 (any_lrint FPR64IN32X:$rs1)), (FCVT_W_D_IN32X $rs1, FRM_DYN)>;
def : Pat<(i32 (any_lround FPR64IN32X:$rs1)), (FCVT_W_D_IN32X $rs1, FRM_RMM)>;
// [u]int->double.
-def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_D_W_IN32X GPR:$rs1)>;
-def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_D_WU_IN32X GPR:$rs1)>;
+def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_D_W_IN32X GPR:$rs1, FRM_RNE)>;
+def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_D_WU_IN32X GPR:$rs1, FRM_RNE)>;
} // Predicates = [HasStdExtZdinx, IsRV32]
let Predicates = [HasStdExtD, IsRV64] in {
@@ -592,8 +593,8 @@ def : Pat<(riscv_any_fcvt_w_rv64 FPR64:$rs1, timm:$frm), (FCVT_W_D $rs1, timm:$
def : Pat<(riscv_any_fcvt_wu_rv64 FPR64:$rs1, timm:$frm), (FCVT_WU_D $rs1, timm:$frm)>;
// [u]int32->fp
-def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_D_W $rs1)>;
-def : Pat<(any_uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_D_WU $rs1)>;
+def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_D_W $rs1, FRM_RNE)>;
+def : Pat<(any_uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_D_WU $rs1, FRM_RNE)>;
// Saturating double->[u]int64.
def : Pat<(i64 (riscv_fcvt_x FPR64:$rs1, timm:$frm)), (FCVT_L_D $rs1, timm:$frm)>;
@@ -629,8 +630,8 @@ def : Pat<(riscv_any_fcvt_w_rv64 FPR64INX:$rs1, timm:$frm), (FCVT_W_D_INX $rs1,
def : Pat<(riscv_any_fcvt_wu_rv64 FPR64INX:$rs1, timm:$frm), (FCVT_WU_D_INX $rs1, timm:$frm)>;
// [u]int32->fp
-def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_D_W_INX $rs1)>;
-def : Pat<(any_uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_D_WU_INX $rs1)>;
+def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_D_W_INX $rs1, FRM_RNE)>;
+def : Pat<(any_uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_D_WU_INX $rs1, FRM_RNE)>;
// Saturating double->[u]int64.
def : Pat<(i64 (riscv_fcvt_x FPR64INX:$rs1, timm:$frm)), (FCVT_L_D_INX $rs1, timm:$frm)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
index 290c03defc5f..52eadbdec255 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -29,11 +29,11 @@ def SDT_RISCVFCVT_X
def SDT_RISCVFROUND
: SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
SDTCisVT<3, XLenVT>]>;
-def SDT_RISCVFPCLASS
+def SDT_RISCVFCLASS
: SDTypeProfile<1, 1, [SDTCisVT<0, XLenVT>, SDTCisFP<1>]>;
-def riscv_fpclass
- : SDNode<"RISCVISD::FPCLASS", SDT_RISCVFPCLASS>;
+def riscv_fclass
+ : SDNode<"RISCVISD::FCLASS", SDT_RISCVFCLASS>;
def riscv_fround
: SDNode<"RISCVISD::FROUND", SDT_RISCVFROUND>;
@@ -132,6 +132,26 @@ def frmarg : Operand<XLenVT> {
let DecoderMethod = "decodeFRMArg";
}
+// Variants of the rounding mode operand that default to 'rne'. This is used
+// for historical/legacy reasons. fcvt functions where the rounding mode
+// doesn't affect the output originally always set it to 0b000 ('rne'). As old
+// versions of LLVM and GCC will fail to decode versions of these instructions
+// with the rounding mode set to something other than 'rne', we retain this
+// default.
+def FRMArgLegacy : AsmOperandClass {
+ let Name = "FRMArgLegacy";
+ let RenderMethod = "addFRMArgOperands";
+ let ParserMethod = "parseFRMArg";
+ let IsOptional = 1;
+ let DefaultMethod = "defaultFRMArgLegacyOp";
+}
+
+def frmarglegacy : Operand<XLenVT> {
+ let ParserMatchClass = FRMArgLegacy;
+ let PrintMethod = "printFRMArgLegacy";
+ let DecoderMethod = "decodeFRMArg";
+}
+
//===----------------------------------------------------------------------===//
// Instruction class templates
//===----------------------------------------------------------------------===//
@@ -227,6 +247,24 @@ multiclass FPUnaryOp_r_frm_m<bits<7> funct7, bits<5> rs2val,
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1,
+ UseNamedOperandTable = 1, hasPostISelHook = 1 in
+class FPUnaryOp_r_frmlegacy<bits<7> funct7, bits<5> rs2val, DAGOperand rdty,
+ DAGOperand rs1ty, string opcodestr>
+ : RVInstRFrm<funct7, OPC_OP_FP, (outs rdty:$rd),
+ (ins rs1ty:$rs1, frmarglegacy:$frm), opcodestr,
+ "$rd, $rs1$frm"> {
+ let rs2 = rs2val;
+}
+multiclass FPUnaryOp_r_frmlegacy_m<bits<7> funct7, bits<5> rs2val,
+ ExtInfo Ext, DAGOperand rdty, DAGOperand rs1ty,
+ string opcodestr, list<Predicate> ExtraPreds = []> {
+ let Predicates = !listconcat(Ext.Predicates, ExtraPreds),
+ DecoderNamespace = Ext.Space in
+ def Ext.Suffix : FPUnaryOp_r_frmlegacy<funct7, rs2val, rdty, rs1ty,
+ opcodestr>;
+}
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1,
IsSignExtendingOpW = 1 in
class FPCmp_rr<bits<7> funct7, bits<3> funct3, string opcodestr,
DAGOperand rty, bit Commutable = 0>
@@ -264,7 +302,7 @@ def FSW : FPStore_r<0b010, "fsw", FPR32, WriteFST32>;
} // Predicates = [HasStdExtF]
foreach Ext = FExts in {
- let SchedRW = [WriteFMA32, ReadFMA32, ReadFMA32, ReadFMA32] in {
+ let SchedRW = [WriteFMA32, ReadFMA32, ReadFMA32, ReadFMA32Addend] in {
defm FMADD_S : FPFMA_rrr_frm_m<OPC_MADD, 0b00, "fmadd.s", Ext>;
defm FMSUB_S : FPFMA_rrr_frm_m<OPC_MSUB, 0b00, "fmsub.s", Ext>;
defm FNMSUB_S : FPFMA_rrr_frm_m<OPC_NMSUB, 0b00, "fnmsub.s", Ext>;
@@ -443,10 +481,10 @@ class PatSetCC<DAGOperand Ty, SDPatternOperator OpNode, CondCode Cond,
RVInst Inst, ValueType vt>
: Pat<(XLenVT (OpNode (vt Ty:$rs1), Ty:$rs2, Cond)), (Inst $rs1, $rs2)>;
multiclass PatSetCC_m<SDPatternOperator OpNode, CondCode Cond,
- RVInst Inst, ExtInfo Ext, ValueType vt> {
+ RVInst Inst, ExtInfo Ext> {
let Predicates = Ext.Predicates in
def Ext.Suffix : PatSetCC<Ext.PrimaryTy, OpNode, Cond,
- !cast<RVInst>(Inst#Ext.Suffix), vt>;
+ !cast<RVInst>(Inst#Ext.Suffix), Ext.PrimaryVT>;
}
class PatFprFpr<SDPatternOperator OpNode, RVInstR Inst,
@@ -489,7 +527,7 @@ def : Pat<(any_fsqrt FPR32:$rs1), (FSQRT_S FPR32:$rs1, FRM_DYN)>;
def : Pat<(fneg FPR32:$rs1), (FSGNJN_S $rs1, $rs1)>;
def : Pat<(fabs FPR32:$rs1), (FSGNJX_S $rs1, $rs1)>;
-def : Pat<(riscv_fpclass FPR32:$rs1), (FCLASS_S $rs1)>;
+def : Pat<(riscv_fclass FPR32:$rs1), (FCLASS_S $rs1)>;
} // Predicates = [HasStdExtF]
let Predicates = [HasStdExtZfinx] in {
@@ -498,7 +536,7 @@ def : Pat<(any_fsqrt FPR32INX:$rs1), (FSQRT_S_INX FPR32INX:$rs1, FRM_DYN)>;
def : Pat<(fneg FPR32INX:$rs1), (FSGNJN_S_INX $rs1, $rs1)>;
def : Pat<(fabs FPR32INX:$rs1), (FSGNJX_S_INX $rs1, $rs1)>;
-def : Pat<(riscv_fpclass FPR32INX:$rs1), (FCLASS_S_INX $rs1)>;
+def : Pat<(riscv_fclass FPR32INX:$rs1), (FCLASS_S_INX $rs1)>;
} // Predicates = [HasStdExtZfinx]
foreach Ext = FExts in
@@ -568,12 +606,12 @@ foreach Ext = FExts in {
// Match non-signaling FEQ_S
foreach Ext = FExts in {
- defm : PatSetCC_m<any_fsetcc, SETEQ, FEQ_S, Ext, f32>;
- defm : PatSetCC_m<any_fsetcc, SETOEQ, FEQ_S, Ext, f32>;
- defm : PatSetCC_m<strict_fsetcc, SETLT, PseudoQuietFLT_S, Ext, f32>;
- defm : PatSetCC_m<strict_fsetcc, SETOLT, PseudoQuietFLT_S, Ext, f32>;
- defm : PatSetCC_m<strict_fsetcc, SETLE, PseudoQuietFLE_S, Ext, f32>;
- defm : PatSetCC_m<strict_fsetcc, SETOLE, PseudoQuietFLE_S, Ext, f32>;
+ defm : PatSetCC_m<any_fsetcc, SETEQ, FEQ_S, Ext>;
+ defm : PatSetCC_m<any_fsetcc, SETOEQ, FEQ_S, Ext>;
+ defm : PatSetCC_m<strict_fsetcc, SETLT, PseudoQuietFLT_S, Ext>;
+ defm : PatSetCC_m<strict_fsetcc, SETOLT, PseudoQuietFLT_S, Ext>;
+ defm : PatSetCC_m<strict_fsetcc, SETLE, PseudoQuietFLE_S, Ext>;
+ defm : PatSetCC_m<strict_fsetcc, SETOLE, PseudoQuietFLE_S, Ext>;
}
let Predicates = [HasStdExtF] in {
@@ -607,10 +645,10 @@ def : Pat<(XLenVT (strict_fsetccs FPR32INX:$rs1, FPR32INX:$rs1, SETOEQ)),
} // Predicates = [HasStdExtZfinx]
foreach Ext = FExts in {
- defm : PatSetCC_m<any_fsetccs, SETLT, FLT_S, Ext, f32>;
- defm : PatSetCC_m<any_fsetccs, SETOLT, FLT_S, Ext, f32>;
- defm : PatSetCC_m<any_fsetccs, SETLE, FLE_S, Ext, f32>;
- defm : PatSetCC_m<any_fsetccs, SETOLE, FLE_S, Ext, f32>;
+ defm : PatSetCC_m<any_fsetccs, SETLT, FLT_S, Ext>;
+ defm : PatSetCC_m<any_fsetccs, SETOLT, FLT_S, Ext>;
+ defm : PatSetCC_m<any_fsetccs, SETLE, FLE_S, Ext>;
+ defm : PatSetCC_m<any_fsetccs, SETOLE, FLE_S, Ext>;
}
let Predicates = [HasStdExtF] in {
@@ -642,19 +680,19 @@ def : Pat<(store (f32 FPR32INX:$rs2), (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm
(SW (COPY_TO_REGCLASS FPR32INX:$rs2, GPR), GPR:$rs1, simm12:$imm12)>;
} // Predicates = [HasStdExtZfinx]
-let Predicates = [HasStdExtF, IsRV32] in {
+let Predicates = [HasStdExtF] in {
// Moves (no conversion)
def : Pat<(bitconvert (i32 GPR:$rs1)), (FMV_W_X GPR:$rs1)>;
def : Pat<(i32 (bitconvert FPR32:$rs1)), (FMV_X_W FPR32:$rs1)>;
-} // Predicates = [HasStdExtF, IsRV32]
+} // Predicates = [HasStdExtF]
-let Predicates = [HasStdExtZfinx, IsRV32] in {
+let Predicates = [HasStdExtZfinx] in {
// Moves (no conversion)
def : Pat<(f32 (bitconvert (i32 GPR:$rs1))), (COPY_TO_REGCLASS GPR:$rs1, GPRF32)>;
def : Pat<(i32 (bitconvert FPR32INX:$rs1)), (COPY_TO_REGCLASS FPR32INX:$rs1, GPR)>;
-} // Predicates = [HasStdExtZfinx, IsRV32]
+} // Predicates = [HasStdExtZfinx]
-let Predicates = [HasStdExtF, IsRV32] in {
+let Predicates = [HasStdExtF] in {
// float->[u]int. Round-to-zero must be used.
def : Pat<(i32 (any_fp_to_sint FPR32:$rs1)), (FCVT_W_S $rs1, FRM_RTZ)>;
def : Pat<(i32 (any_fp_to_uint FPR32:$rs1)), (FCVT_WU_S $rs1, FRM_RTZ)>;
@@ -672,9 +710,9 @@ def : Pat<(i32 (any_lround FPR32:$rs1)), (FCVT_W_S $rs1, FRM_RMM)>;
// [u]int->float. Match GCC and default to using dynamic rounding mode.
def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_S_W $rs1, FRM_DYN)>;
def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_S_WU $rs1, FRM_DYN)>;
-} // Predicates = [HasStdExtF, IsRV32]
+} // Predicates = [HasStdExtF]
-let Predicates = [HasStdExtZfinx, IsRV32] in {
+let Predicates = [HasStdExtZfinx] in {
// float->[u]int. Round-to-zero must be used.
def : Pat<(i32 (any_fp_to_sint FPR32INX:$rs1)), (FCVT_W_S_INX $rs1, FRM_RTZ)>;
def : Pat<(i32 (any_fp_to_uint FPR32INX:$rs1)), (FCVT_WU_S_INX $rs1, FRM_RTZ)>;
@@ -692,7 +730,7 @@ def : Pat<(i32 (any_lround FPR32INX:$rs1)), (FCVT_W_S_INX $rs1, FRM_RMM)>;
// [u]int->float. Match GCC and default to using dynamic rounding mode.
def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_S_W_INX $rs1, FRM_DYN)>;
def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_S_WU_INX $rs1, FRM_DYN)>;
-} // Predicates = [HasStdExtZfinx, IsRV32]
+} // Predicates = [HasStdExtZfinx]
let Predicates = [HasStdExtF, IsRV64] in {
// Moves (no conversion)
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoM.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
index 6c3c9a771d94..f9890ca4b0ee 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
@@ -114,3 +114,18 @@ let Predicates = [HasStdExtMOrZmmul, IsRV64, NotHasStdExtZba] in {
def : Pat<(i64 (mul (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff))),
(MULHU (SLLI GPR:$rs1, 32), (SLLI GPR:$rs2, 32))>;
} // Predicates = [HasStdExtMOrZmmul, IsRV64, NotHasStdExtZba]
+
+//===----------------------------------------------------------------------===//
+// Experimental RV64 i32 legalization patterns.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtMOrZmmul, IsRV64] in {
+def : PatGprGpr<mul, MULW, i32, i32>;
+}
+
+let Predicates = [HasStdExtM, IsRV64] in {
+def : PatGprGpr<sdiv, DIVW, i32, i32>;
+def : PatGprGpr<udiv, DIVUW, i32, i32>;
+def : PatGprGpr<srem, REMW, i32, i32>;
+def : PatGprGpr<urem, REMUW, i32, i32>;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
index 6e5ee8043e92..9fc9a29c210d 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
@@ -24,12 +24,11 @@ class VTypeIAsmOperand<int VTypeINum> : AsmOperandClass {
let RenderMethod = "addVTypeIOperands";
}
-class VTypeIOp<int VTypeINum> : Operand<XLenVT> {
+class VTypeIOp<int VTypeINum> : RISCVOp {
let ParserMatchClass = VTypeIAsmOperand<VTypeINum>;
let PrintMethod = "printVTypeI";
let DecoderMethod = "decodeUImmOperand<"#VTypeINum#">";
let OperandType = "OPERAND_VTYPEI" # VTypeINum;
- let OperandNamespace = "RISCVOp";
let MCOperandPredicate = [{
int64_t Imm;
if (MCOp.evaluateAsConstantImm(Imm))
@@ -58,12 +57,7 @@ def VMaskOp : RegisterOperand<VMV0> {
let DecoderMethod = "decodeVMaskReg";
}
-def simm5 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isInt<5>(Imm);}]> {
- let ParserMatchClass = SImmAsmOperand<5>;
- let EncoderMethod = "getImmOpValue";
- let DecoderMethod = "decodeSImmOperand<5>";
- let OperandType = "OPERAND_SIMM5";
- let OperandNamespace = "RISCVOp";
+def simm5 : RISCVSImmLeafOp<5> {
let MCOperandPredicate = [{
int64_t Imm;
if (MCOp.evaluateAsConstantImm(Imm))
@@ -78,11 +72,10 @@ def SImm5Plus1AsmOperand : AsmOperandClass {
let DiagnosticType = "InvalidSImm5Plus1";
}
-def simm5_plus1 : Operand<XLenVT>, ImmLeaf<XLenVT,
+def simm5_plus1 : RISCVOp, ImmLeaf<XLenVT,
[{return (isInt<5>(Imm) && Imm != -16) || Imm == 16;}]> {
let ParserMatchClass = SImm5Plus1AsmOperand;
let OperandType = "OPERAND_SIMM5_PLUS1";
- let OperandNamespace = "RISCVOp";
let MCOperandPredicate = [{
int64_t Imm;
if (MCOp.evaluateAsConstantImm(Imm))
@@ -98,88 +91,209 @@ def simm5_plus1_nonzero : ImmLeaf<XLenVT,
// Scheduling definitions.
//===----------------------------------------------------------------------===//
-class VMVRSched<int n> : Sched<[
- !cast<SchedReadWrite>("WriteVMov" #n #"V"),
- !cast<SchedReadWrite>("ReadVMov" #n #"V")
-]>;
-
-class VLESched<string lmul = "WorstCase"> : Sched<[
- !cast<SchedReadWrite>("WriteVLDE_" #lmul),
- ReadVLDX, ReadVMask
-]>;
-
-class VSESched<string lmul = "WorstCase"> : Sched<[
- !cast<SchedReadWrite>("WriteVSTE_" #lmul),
- !cast<SchedReadWrite>("ReadVSTEV_" #lmul),
- ReadVSTX, ReadVMask
-]>;
-
-class VLSSched<int eew, string emul = "WorstCase"> : Sched<[
- !cast<SchedReadWrite>("WriteVLDS" #eew #"_" #emul),
- ReadVLDX, ReadVLDSX, ReadVMask
-]>;
-
-class VSSSched<int eew, string emul = "WorstCase"> : Sched<[
- !cast<SchedReadWrite>("WriteVSTS" #eew #"_" #emul),
- !cast<SchedReadWrite>("ReadVSTS" #eew #"V_" #emul),
- ReadVSTX, ReadVSTSX, ReadVMask
-]>;
-
-class VLXSched<int dataEEW, string isOrdered,
- string dataEMUL = "WorstCase",
- string idxEMUL = "WorstCase"> : Sched<[
- !cast<SchedReadWrite>("WriteVLD" #isOrdered #"X" #dataEEW #"_" #dataEMUL),
- ReadVLDX,
- !cast<SchedReadWrite>("ReadVLD" #isOrdered #"XV_" #idxEMUL), ReadVMask
-]>;
-
-class VSXSched<int dataEEW, string isOrdered,
- string dataEMUL = "WorstCase",
- string idxEMUL = "WorstCase"> : Sched<[
- !cast<SchedReadWrite>("WriteVST" #isOrdered #"X" #dataEEW #"_" #dataEMUL),
- !cast<SchedReadWrite>("ReadVST" #isOrdered #"X" #dataEEW #"_" #dataEMUL),
- ReadVSTX, !cast<SchedReadWrite>("ReadVST" #isOrdered #"XV_" #idxEMUL), ReadVMask
-]>;
-
-class VLFSched<string lmul = "WorstCase"> : Sched<[
- !cast<SchedReadWrite>("WriteVLDFF_" #lmul),
- ReadVLDX, ReadVMask
-]>;
+// Common class of scheduling definitions.
+// `ReadVMergeOp` will be prepended to reads if instruction is masked.
+// `ReadVMask` will be appended to reads if instruction is masked.
+// Operands:
+// `writes` SchedWrites that are listed for each explicit def operand
+// in order.
+// `reads` SchedReads that are listed for each explicit use operand.
+// `forceMasked` Forced to be masked (e.g. Add-with-Carry Instructions).
+// `forceMergeOpRead` Force to have read for merge operand.
+class SchedCommon<list<SchedWrite> writes, list<SchedRead> reads,
+ string mx = "WorstCase", int sew = 0, bit forceMasked = 0,
+ bit forceMergeOpRead = 0> : Sched<[]> {
+ defvar isMasked = !ne(!find(NAME, "_MASK"), -1);
+ defvar isMaskedOrForceMasked = !or(forceMasked, isMasked);
+ defvar mergeRead = !if(!or(!eq(mx, "WorstCase"), !eq(sew, 0)),
+ !cast<SchedRead>("ReadVMergeOp_" # mx),
+ !cast<SchedRead>("ReadVMergeOp_" # mx # "_E" #sew));
+ defvar needsMergeRead = !or(isMaskedOrForceMasked, forceMergeOpRead);
+ defvar readsWithMask =
+ !if(isMaskedOrForceMasked, !listconcat(reads, [ReadVMask]), reads);
+ defvar allReads =
+ !if(needsMergeRead, !listconcat([mergeRead], readsWithMask), reads);
+ let SchedRW = !listconcat(writes, allReads);
+}
+
+// Common class of scheduling definitions for n-ary instructions.
+// The scheudling resources are relevant to LMUL and may be relevant to SEW.
+class SchedNary<string write, list<string> reads, string mx, int sew = 0,
+ bit forceMasked = 0, bit forceMergeOpRead = 0>
+ : SchedCommon<[!cast<SchedWrite>(
+ !if(sew,
+ write # "_" # mx # "_E" # sew,
+ write # "_" # mx))],
+ !foreach(read, reads,
+ !cast<SchedRead>(!if(sew, read #"_" #mx #"_E" #sew,
+ read #"_" #mx))),
+ mx, sew, forceMasked, forceMergeOpRead>;
+
+// Classes with postfix "MC" are only used in MC layer.
+// For these classes, we assume that they are with the worst case costs and
+// `ReadVMask` is always needed (with some exceptions).
+
+// For instructions with no operand.
+class SchedNullary<string write, string mx, int sew = 0, bit forceMasked = 0,
+ bit forceMergeOpRead = 0>:
+ SchedNary<write, [], mx, sew, forceMasked, forceMergeOpRead>;
+class SchedNullaryMC<string write, bit forceMasked = 1>:
+ SchedNullary<write, "WorstCase", forceMasked=forceMasked>;
+
+// For instructions with one operand.
+class SchedUnary<string write, string read0, string mx, int sew = 0,
+ bit forceMasked = 0, bit forceMergeOpRead = 0>:
+ SchedNary<write, [read0], mx, sew, forceMasked, forceMergeOpRead>;
+class SchedUnaryMC<string write, string read0, bit forceMasked = 1>:
+ SchedUnary<write, read0, "WorstCase", forceMasked=forceMasked>;
+
+// For instructions with two operands.
+class SchedBinary<string write, string read0, string read1, string mx,
+ int sew = 0, bit forceMasked = 0, bit forceMergeOpRead = 0>
+ : SchedNary<write, [read0, read1], mx, sew, forceMasked, forceMergeOpRead>;
+class SchedBinaryMC<string write, string read0, string read1,
+ bit forceMasked = 1>:
+ SchedBinary<write, read0, read1, "WorstCase", forceMasked=forceMasked>;
+
+// For instructions with three operands.
+class SchedTernary<string write, string read0, string read1, string read2,
+ string mx, int sew = 0, bit forceMasked = 0,
+ bit forceMergeOpRead = 0>
+ : SchedNary<write, [read0, read1, read2], mx, sew, forceMasked,
+ forceMergeOpRead>;
+class SchedTernaryMC<string write, string read0, string read1, string read2,
+ int sew = 0, bit forceMasked = 1>:
+ SchedNary<write, [read0, read1, read2], "WorstCase", sew, forceMasked>;
+
+// For reduction instructions.
+class SchedReduction<string write, string read, string mx, int sew,
+ bit forceMergeOpRead = 0>
+ : SchedCommon<[!cast<SchedWrite>(write #"_" #mx #"_E" #sew)],
+ !listsplat(!cast<SchedRead>(read), 3), mx, sew, forceMergeOpRead>;
+class SchedReductionMC<string write, string readV, string readV0>:
+ SchedCommon<[!cast<SchedWrite>(write # "_WorstCase")],
+ [!cast<SchedRead>(readV), !cast<SchedRead>(readV0)],
+ forceMasked=1>;
+
+// Whole Vector Register Move
+class VMVRSched<int n> : SchedCommon<
+ [!cast<SchedWrite>("WriteVMov" # n # "V")],
+ [!cast<SchedRead>("ReadVMov" # n # "V")]
+>;
+
+// Vector Unit-Stride Loads and Stores
+class VLESched<string lmul, bit forceMasked = 0> : SchedCommon<
+ [!cast<SchedWrite>("WriteVLDE_" # lmul)],
+ [ReadVLDX], mx=lmul, forceMasked=forceMasked
+>;
+class VLESchedMC : VLESched<"WorstCase", forceMasked=1>;
+
+class VSESched<string lmul, bit forceMasked = 0> : SchedCommon<
+ [!cast<SchedWrite>("WriteVSTE_" # lmul)],
+ [!cast<SchedRead>("ReadVSTEV_" # lmul), ReadVSTX], mx=lmul,
+ forceMasked=forceMasked
+>;
+class VSESchedMC : VSESched<"WorstCase", forceMasked=1>;
+
+// Vector Strided Loads and Stores
+class VLSSched<int eew, string emul, bit forceMasked = 0> : SchedCommon<
+ [!cast<SchedWrite>("WriteVLDS" # eew # "_" # emul)],
+ [ReadVLDX, ReadVLDSX], emul, eew, forceMasked
+>;
+class VLSSchedMC<int eew> : VLSSched<eew, "WorstCase", forceMasked=1>;
+
+class VSSSched<int eew, string emul, bit forceMasked = 0> : SchedCommon<
+ [!cast<SchedWrite>("WriteVSTS" # eew # "_" # emul)],
+ [!cast<SchedRead>("ReadVSTS" # eew # "V_" # emul), ReadVSTX, ReadVSTSX],
+ emul, eew, forceMasked
+>;
+class VSSSchedMC<int eew> : VSSSched<eew, "WorstCase", forceMasked=1>;
+
+// Vector Indexed Loads and Stores
+class VLXSched<int dataEEW, bit isOrdered, string dataEMUL, string idxEMUL,
+ bit forceMasked = 0> : SchedCommon<
+ [!cast<SchedWrite>("WriteVLD" # !if(isOrdered, "O", "U") # "X" # dataEEW # "_" # dataEMUL)],
+ [ReadVLDX, !cast<SchedRead>("ReadVLD" # !if(isOrdered, "O", "U") # "XV_" # idxEMUL)],
+ dataEMUL, dataEEW, forceMasked
+>;
+class VLXSchedMC<int dataEEW, bit isOrdered>:
+ VLXSched<dataEEW, isOrdered, "WorstCase", "WorstCase", forceMasked=1>;
+
+class VSXSched<int dataEEW, bit isOrdered, string dataEMUL, string idxEMUL,
+ bit forceMasked = 0> : SchedCommon<
+ [!cast<SchedWrite>("WriteVST" # !if(isOrdered, "O", "U") # "X" # dataEEW # "_" # dataEMUL)],
+ [!cast<SchedRead>("ReadVST" # !if(isOrdered, "O", "U") #"X" # dataEEW # "_" # dataEMUL),
+ ReadVSTX, !cast<SchedRead>("ReadVST" # !if(isOrdered, "O", "U") # "XV_" # idxEMUL)],
+ dataEMUL, dataEEW, forceMasked
+>;
+class VSXSchedMC<int dataEEW, bit isOrdered>:
+ VSXSched<dataEEW, isOrdered, "WorstCase", "WorstCase", forceMasked=1>;
+
+// Unit-stride Fault-Only-First Loads
+class VLFSched<string lmul, bit forceMasked = 0> : SchedCommon<
+ [!cast<SchedWrite>("WriteVLDFF_" # lmul)],
+ [ReadVLDX], mx=lmul, forceMasked=forceMasked
+>;
+class VLFSchedMC: VLFSched<"WorstCase", forceMasked=1>;
// Unit-Stride Segment Loads and Stores
-class VLSEGSched<int nf, int eew, string emul = "WorstCase"> : Sched<[
- !cast<SchedReadWrite>("WriteVLSEG" #nf #"e" #eew #"_" #emul),
- ReadVLDX, ReadVMask
-]>;
-class VSSEGSched<int nf, int eew, string emul = "WorstCase"> : Sched<[
- !cast<SchedReadWrite>("WriteVSSEG" #nf #"e" #eew #"_" #emul),
- !cast<SchedReadWrite>("ReadVSTEV_" #emul),
- ReadVSTX, ReadVMask
-]>;
-class VLSEGFFSched<int nf, int eew, string emul = "WorstCase"> : Sched<[
- !cast<SchedReadWrite>("WriteVLSEGFF" #nf #"e" #eew #"_" #emul),
- ReadVLDX, ReadVMask
-]>;
+class VLSEGSched<int nf, int eew, string emul, bit forceMasked = 0> : SchedCommon<
+ [!cast<SchedWrite>("WriteVLSEG" #nf #"e" #eew #"_" #emul)],
+ [ReadVLDX], emul, eew, forceMasked
+>;
+class VLSEGSchedMC<int nf, int eew> : VLSEGSched<nf, eew, "WorstCase",
+ forceMasked=1>;
+
+class VSSEGSched<int nf, int eew, string emul, bit forceMasked = 0> : SchedCommon<
+ [!cast<SchedWrite>("WriteVSSEG" # nf # "e" # eew # "_" # emul)],
+ [!cast<SchedRead>("ReadVSTEV_" #emul), ReadVSTX], emul, eew, forceMasked
+>;
+class VSSEGSchedMC<int nf, int eew> : VSSEGSched<nf, eew, "WorstCase",
+ forceMasked=1>;
+
+class VLSEGFFSched<int nf, int eew, string emul, bit forceMasked = 0> : SchedCommon<
+ [!cast<SchedWrite>("WriteVLSEGFF" # nf # "e" # eew # "_" # emul)],
+ [ReadVLDX], emul, eew, forceMasked
+>;
+class VLSEGFFSchedMC<int nf, int eew> : VLSEGFFSched<nf, eew, "WorstCase",
+ forceMasked=1>;
+
// Strided Segment Loads and Stores
-class VLSSEGSched<int nf, int eew, string emul = "WorstCase"> : Sched<[
- !cast<SchedReadWrite>("WriteVLSSEG" #nf #"e" #eew #"_" #emul),
- ReadVLDX, ReadVLDSX, ReadVMask
-]>;
-class VSSSEGSched<int nf, int eew, string emul = "WorstCase"> : Sched<[
- !cast<SchedReadWrite>("WriteVSSSEG" #nf #"e" #eew #"_" #emul),
- !cast<SchedReadWrite>("ReadVSTS" #eew #"V_" #emul),
- ReadVSTX, ReadVSTSX, ReadVMask
-]>;
+class VLSSEGSched<int nf, int eew, string emul, bit forceMasked = 0> : SchedCommon<
+ [!cast<SchedWrite>("WriteVLSSEG" #nf #"e" #eew #"_" #emul)],
+ [ReadVLDX, ReadVLDSX], emul, eew, forceMasked
+>;
+class VLSSEGSchedMC<int nf, int eew> : VLSSEGSched<nf, eew, "WorstCase",
+ forceMasked=1>;
+
+class VSSSEGSched<int nf, int eew, string emul, bit forceMasked = 0> : SchedCommon<
+ [!cast<SchedWrite>("WriteVSSSEG" #nf #"e" #eew #"_" #emul)],
+ [!cast<SchedRead>("ReadVSTS" #eew #"V_" #emul),
+ ReadVSTX, ReadVSTSX], emul, eew, forceMasked
+>;
+class VSSSEGSchedMC<int nf, int eew> : VSSSEGSched<nf, eew, "WorstCase",
+ forceMasked=1>;
+
// Indexed Segment Loads and Stores
-class VLXSEGSched<int nf, int eew, string isOrdered, string emul = "WorstCase"> : Sched<[
- !cast<SchedReadWrite>("WriteVL" #isOrdered #"XSEG" #nf #"e" #eew #"_" #emul),
- ReadVLDX, !cast<SchedReadWrite>("ReadVLD" #isOrdered #"XV_" #emul), ReadVMask
-]>;
-class VSXSEGSched<int nf, int eew, string isOrdered, string emul = "WorstCase"> : Sched<[
- !cast<SchedReadWrite>("WriteVS" #isOrdered #"XSEG" #nf #"e" #eew #"_" #emul),
- !cast<SchedReadWrite>("ReadVST" #isOrdered #"X" #eew #"_" #emul),
- ReadVSTX, !cast<SchedReadWrite>("ReadVST" #isOrdered #"XV_" #emul), ReadVMask
-]>;
+class VLXSEGSched<int nf, int eew, bit isOrdered, string emul,
+ bit forceMasked = 0> : SchedCommon<
+ [!cast<SchedWrite>("WriteVL" #!if(isOrdered, "O", "U") #"XSEG" #nf #"e" #eew #"_" #emul)],
+ [ReadVLDX, !cast<SchedRead>("ReadVLD" #!if(isOrdered, "O", "U") #"XV_" #emul)],
+ emul, eew, forceMasked
+>;
+class VLXSEGSchedMC<int nf, int eew, bit isOrdered>:
+ VLXSEGSched<nf, eew, isOrdered, "WorstCase", forceMasked=1>;
+
+// Passes sew=0 instead of eew=0 since this pseudo does not follow MX_E form.
+class VSXSEGSched<int nf, int eew, bit isOrdered, string emul,
+ bit forceMasked = 0> : SchedCommon<
+ [!cast<SchedWrite>("WriteVS" #!if(isOrdered, "O", "U") #"XSEG" #nf #"e" #eew #"_" #emul)],
+ [!cast<SchedRead>("ReadVST" #!if(isOrdered, "O", "U") #"X" #eew #"_" #emul),
+ ReadVSTX, !cast<SchedRead>("ReadVST" #!if(isOrdered, "O", "U") #"XV_" #emul)],
+ emul, sew=0, forceMasked=forceMasked
+>;
+class VSXSEGSchedMC<int nf, int eew, bit isOrdered>:
+ VSXSEGSched<nf, eew, isOrdered, "WorstCase", forceMasked=1>;
//===----------------------------------------------------------------------===//
// Instruction class templates
@@ -327,10 +441,14 @@ class VALUmVV<bits<6> funct6, RISCVVFormat opv, string opcodestr>
}
// op vd, vs1, vs2, vm (reverse the order of vs1 and vs2)
-class VALUrVV<bits<6> funct6, RISCVVFormat opv, string opcodestr>
- : RVInstVV<funct6, opv, (outs VR:$vd),
- (ins VR:$vs1, VR:$vs2, VMaskOp:$vm),
- opcodestr, "$vd, $vs1, $vs2$vm">;
+class VALUrVV<bits<6> funct6, RISCVVFormat opv, string opcodestr,
+ bit EarlyClobber = 0>
+ : RVInstVV<funct6, opv, (outs VR:$vd_wb),
+ (ins VR:$vd, VR:$vs1, VR:$vs2, VMaskOp:$vm),
+ opcodestr, "$vd, $vs1, $vs2$vm"> {
+ let Constraints = !if(EarlyClobber, "@earlyclobber $vd_wb, $vd = $vd_wb",
+ "$vd = $vd_wb");
+}
// op vd, vs2, vs1
class VALUVVNoVm<bits<6> funct6, RISCVVFormat opv, string opcodestr>
@@ -355,10 +473,14 @@ class VALUmVX<bits<6> funct6, RISCVVFormat opv, string opcodestr>
}
// op vd, rs1, vs2, vm (reverse the order of rs1 and vs2)
-class VALUrVX<bits<6> funct6, RISCVVFormat opv, string opcodestr>
- : RVInstVX<funct6, opv, (outs VR:$vd),
- (ins GPR:$rs1, VR:$vs2, VMaskOp:$vm),
- opcodestr, "$vd, $rs1, $vs2$vm">;
+class VALUrVX<bits<6> funct6, RISCVVFormat opv, string opcodestr,
+ bit EarlyClobber = 0>
+ : RVInstVX<funct6, opv, (outs VR:$vd_wb),
+ (ins VR:$vd, GPR:$rs1, VR:$vs2, VMaskOp:$vm),
+ opcodestr, "$vd, $rs1, $vs2$vm"> {
+ let Constraints = !if(EarlyClobber, "@earlyclobber $vd_wb, $vd = $vd_wb",
+ "$vd = $vd_wb");
+}
// op vd, vs1, vs2
class VALUVXNoVm<bits<6> funct6, RISCVVFormat opv, string opcodestr>
@@ -397,10 +519,14 @@ class VALUVF<bits<6> funct6, RISCVVFormat opv, string opcodestr>
opcodestr, "$vd, $vs2, $rs1$vm">;
// op vd, rs1, vs2, vm (Float) (with mask, reverse the order of rs1 and vs2)
-class VALUrVF<bits<6> funct6, RISCVVFormat opv, string opcodestr>
- : RVInstVX<funct6, opv, (outs VR:$vd),
- (ins FPR32:$rs1, VR:$vs2, VMaskOp:$vm),
- opcodestr, "$vd, $rs1, $vs2$vm">;
+class VALUrVF<bits<6> funct6, RISCVVFormat opv, string opcodestr,
+ bit EarlyClobber = 0>
+ : RVInstVX<funct6, opv, (outs VR:$vd_wb),
+ (ins VR:$vd, FPR32:$rs1, VR:$vs2, VMaskOp:$vm),
+ opcodestr, "$vd, $rs1, $vs2$vm"> {
+ let Constraints = !if(EarlyClobber, "@earlyclobber $vd_wb, $vd = $vd_wb",
+ "$vd = $vd_wb");
+}
// op vd, vs2, vm (use vs1 as instruction encoding)
class VALUVs2<bits<6> funct6, bits<5> vs1, RISCVVFormat opv, string opcodestr>
@@ -422,42 +548,37 @@ class VALUVs2NoVm<bits<6> funct6, bits<5> vs1, RISCVVFormat opv, string opcodest
// Use these multiclasses to define instructions more easily.
//===----------------------------------------------------------------------===//
-multiclass VIndexLoadStore<list<int> EEWList> {
- foreach n = EEWList in {
- defvar w = !cast<RISCVWidth>("LSWidth" # n);
-
- def VLUXEI # n # _V :
- VIndexedLoad<MOPLDIndexedUnord, w, "vluxei" # n # ".v">,
- VLXSched<n, "U">;
- def VLOXEI # n # _V :
- VIndexedLoad<MOPLDIndexedOrder, w, "vloxei" # n # ".v">,
- VLXSched<n, "O">;
-
- def VSUXEI # n # _V :
- VIndexedStore<MOPSTIndexedUnord, w, "vsuxei" # n # ".v">,
- VSXSched<n, "U">;
- def VSOXEI # n # _V :
- VIndexedStore<MOPSTIndexedOrder, w, "vsoxei" # n # ".v">,
- VSXSched<n, "O">;
- }
+multiclass VIndexLoadStore<int eew> {
+ defvar w = !cast<RISCVWidth>("LSWidth" # eew);
+
+ def VLUXEI # eew # _V :
+ VIndexedLoad<MOPLDIndexedUnord, w, "vluxei" # eew # ".v">,
+ VLXSchedMC<eew, isOrdered=0>;
+ def VLOXEI # eew # _V :
+ VIndexedLoad<MOPLDIndexedOrder, w, "vloxei" # eew # ".v">,
+ VLXSchedMC<eew, isOrdered=1>;
+
+ def VSUXEI # eew # _V :
+ VIndexedStore<MOPSTIndexedUnord, w, "vsuxei" # eew # ".v">,
+ VSXSchedMC<eew, isOrdered=0>;
+ def VSOXEI # eew # _V :
+ VIndexedStore<MOPSTIndexedOrder, w, "vsoxei" # eew # ".v">,
+ VSXSchedMC<eew, isOrdered=1>;
}
multiclass VALU_IV_V<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPIVV, opcodestr # ".vv">,
- Sched<[WriteVIALUV_WorstCase, ReadVIALUV_WorstCase,
- ReadVIALUV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV">;
}
multiclass VALU_IV_X<string opcodestr, bits<6> funct6> {
def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">,
- Sched<[WriteVIALUX_WorstCase, ReadVIALUV_WorstCase,
- ReadVIALUX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVIALUX", "ReadVIALUV", "ReadVIALUX">;
}
multiclass VALU_IV_I<string opcodestr, bits<6> funct6> {
def I : VALUVI<funct6, opcodestr # ".vi", simm5>,
- Sched<[WriteVIALUI_WorstCase, ReadVIALUV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVIALUI", "ReadVIALUV">;
}
multiclass VALU_IV_V_X_I<string opcodestr, bits<6> funct6>
@@ -475,364 +596,314 @@ multiclass VALU_IV_X_I<string opcodestr, bits<6> funct6>
multiclass VALU_MV_V_X<string opcodestr, bits<6> funct6, string vw> {
def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">,
- Sched<[WriteVIWALUV_WorstCase, ReadVIWALUV_WorstCase,
- ReadVIWALUV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVIWALUV", "ReadVIWALUV", "ReadVIWALUV">;
def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
- Sched<[WriteVIWALUX_WorstCase, ReadVIWALUV_WorstCase,
- ReadVIWALUX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVIWALUX", "ReadVIWALUV", "ReadVIWALUX">;
}
multiclass VMAC_MV_V_X<string opcodestr, bits<6> funct6> {
def V : VALUrVV<funct6, OPMVV, opcodestr # ".vv">,
- Sched<[WriteVIMulAddV_WorstCase, ReadVIMulAddV_WorstCase,
- ReadVIMulAddV_WorstCase, ReadVMask]>;
+ SchedTernaryMC<"WriteVIMulAddV", "ReadVIMulAddV", "ReadVIMulAddV",
+ "ReadVIMulAddV">;
def X : VALUrVX<funct6, OPMVX, opcodestr # ".vx">,
- Sched<[WriteVIMulAddX_WorstCase, ReadVIMulAddV_WorstCase,
- ReadVIMulAddX_WorstCase, ReadVMask]>;
+ SchedTernaryMC<"WriteVIMulAddX", "ReadVIMulAddV", "ReadVIMulAddX",
+ "ReadVIMulAddV">;
}
multiclass VWMAC_MV_X<string opcodestr, bits<6> funct6> {
+ let RVVConstraint = WidenV in
def X : VALUrVX<funct6, OPMVX, opcodestr # ".vx">,
- Sched<[WriteVIWMulAddX_WorstCase, ReadVIWMulAddV_WorstCase,
- ReadVIWMulAddX_WorstCase, ReadVMask]>;
+ SchedTernaryMC<"WriteVIWMulAddX", "ReadVIWMulAddV", "ReadVIWMulAddX",
+ "ReadVIWMulAddV">;
}
multiclass VWMAC_MV_V_X<string opcodestr, bits<6> funct6>
: VWMAC_MV_X<opcodestr, funct6> {
- def V : VALUrVV<funct6, OPMVV, opcodestr # ".vv">,
- Sched<[WriteVIWMulAddV_WorstCase, ReadVIWMulAddV_WorstCase,
- ReadVIWMulAddV_WorstCase, ReadVMask]>;
+ let RVVConstraint = WidenV in
+ def V : VALUrVV<funct6, OPMVV, opcodestr # ".vv", EarlyClobber=1>,
+ SchedTernaryMC<"WriteVIWMulAddV", "ReadVIWMulAddV", "ReadVIWMulAddV",
+ "ReadVIWMulAddV">;
}
multiclass VALU_MV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPMVV, opcodestr>,
- Sched<[WriteVExtV_WorstCase, ReadVExtV_WorstCase, ReadVMask]>;
+ SchedUnaryMC<"WriteVExtV", "ReadVExtV">;
}
multiclass VMRG_IV_V_X_I<string opcodestr, bits<6> funct6> {
def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">,
- Sched<[WriteVIMergeV_WorstCase, ReadVIMergeV_WorstCase,
- ReadVIMergeV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVIMergeV", "ReadVIMergeV", "ReadVIMergeV">;
def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">,
- Sched<[WriteVIMergeX_WorstCase, ReadVIMergeV_WorstCase,
- ReadVIMergeX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVIMergeX", "ReadVIMergeV", "ReadVIMergeX">;
def IM : VALUmVI<funct6, opcodestr # ".vim">,
- Sched<[WriteVIMergeI_WorstCase, ReadVIMergeV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVIMergeI", "ReadVIMergeV">;
}
multiclass VALUm_IV_V_X<string opcodestr, bits<6> funct6> {
def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">,
- Sched<[WriteVICALUV_WorstCase, ReadVICALUV_WorstCase,
- ReadVICALUV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV">;
def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">,
- Sched<[WriteVICALUX_WorstCase, ReadVICALUV_WorstCase,
- ReadVICALUX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX">;
}
multiclass VALUm_IV_V_X_I<string opcodestr, bits<6> funct6>
: VALUm_IV_V_X<opcodestr, funct6> {
def IM : VALUmVI<funct6, opcodestr # ".vim">,
- Sched<[WriteVICALUI_WorstCase, ReadVICALUV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVICALUI", "ReadVICALUV">;
}
multiclass VALUNoVm_IV_V_X<string opcodestr, bits<6> funct6> {
def V : VALUVVNoVm<funct6, OPIVV, opcodestr # ".vv">,
- Sched<[WriteVICALUV_WorstCase, ReadVICALUV_WorstCase,
- ReadVICALUV_WorstCase]>;
+ SchedBinaryMC<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV",
+ forceMasked=0>;
def X : VALUVXNoVm<funct6, OPIVX, opcodestr # ".vx">,
- Sched<[WriteVICALUX_WorstCase, ReadVICALUV_WorstCase,
- ReadVICALUX_WorstCase]>;
+ SchedBinaryMC<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX",
+ forceMasked=0>;
}
multiclass VALUNoVm_IV_V_X_I<string opcodestr, bits<6> funct6>
: VALUNoVm_IV_V_X<opcodestr, funct6> {
def I : VALUVINoVm<funct6, opcodestr # ".vi", simm5>,
- Sched<[WriteVICALUI_WorstCase, ReadVICALUV_WorstCase]>;
+ SchedUnaryMC<"WriteVICALUI", "ReadVICALUV", forceMasked=0>;
}
multiclass VALU_FV_F<string opcodestr, bits<6> funct6> {
def F : VALUVF<funct6, OPFVF, opcodestr # ".vf">,
- Sched<[WriteVFALUF_WorstCase, ReadVFALUV_WorstCase,
- ReadVFALUF_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFALUF", "ReadVFALUV", "ReadVFALUF">;
}
multiclass VALU_FV_V_F<string opcodestr, bits<6> funct6>
: VALU_FV_F<opcodestr, funct6> {
def V : VALUVV<funct6, OPFVV, opcodestr # ".vv">,
- Sched<[WriteVFALUV_WorstCase, ReadVFALUV_WorstCase,
- ReadVFALUV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFALUV", "ReadVFALUV", "ReadVFALUV">;
}
multiclass VWALU_FV_V_F<string opcodestr, bits<6> funct6, string vw> {
def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
- Sched<[WriteVFWALUV_WorstCase, ReadVFWALUV_WorstCase,
- ReadVFWALUV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFWALUV", "ReadVFWALUV", "ReadVFWALUV">;
def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
- Sched<[WriteVFWALUF_WorstCase, ReadVFWALUV_WorstCase,
- ReadVFWALUF_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFWALUF", "ReadVFWALUV", "ReadVFWALUF">;
}
multiclass VMUL_FV_V_F<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPFVV, opcodestr # ".vv">,
- Sched<[WriteVFMulV_WorstCase, ReadVFMulV_WorstCase,
- ReadVFMulV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFMulV", "ReadVFMulV", "ReadVFMulV">;
def F : VALUVF<funct6, OPFVF, opcodestr # ".vf">,
- Sched<[WriteVFMulF_WorstCase, ReadVFMulV_WorstCase,
- ReadVFMulF_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFMulF", "ReadVFMulV", "ReadVFMulF">;
}
multiclass VDIV_FV_F<string opcodestr, bits<6> funct6> {
def F : VALUVF<funct6, OPFVF, opcodestr # ".vf">,
- Sched<[WriteVFDivF_WorstCase, ReadVFDivV_WorstCase,
- ReadVFDivF_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFDivF", "ReadVFDivV", "ReadVFDivF">;
}
multiclass VDIV_FV_V_F<string opcodestr, bits<6> funct6>
: VDIV_FV_F<opcodestr, funct6> {
def V : VALUVV<funct6, OPFVV, opcodestr # ".vv">,
- Sched<[WriteVFDivV_WorstCase, ReadVFDivV_WorstCase,
- ReadVFDivV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFDivV", "ReadVFDivV", "ReadVFDivV">;
}
multiclass VWMUL_FV_V_F<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPFVV, opcodestr # ".vv">,
- Sched<[WriteVFWMulV_WorstCase, ReadVFWMulV_WorstCase,
- ReadVFWMulV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFWMulV", "ReadVFWMulV", "ReadVFWMulV">;
def F : VALUVF<funct6, OPFVF, opcodestr # ".vf">,
- Sched<[WriteVFWMulF_WorstCase, ReadVFWMulV_WorstCase,
- ReadVFWMulF_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFWMulF", "ReadVFWMulV", "ReadVFWMulF">;
}
multiclass VMAC_FV_V_F<string opcodestr, bits<6> funct6> {
def V : VALUrVV<funct6, OPFVV, opcodestr # ".vv">,
- Sched<[WriteVFMulAddV_WorstCase, ReadVFMulAddV_WorstCase,
- ReadVFMulAddV_WorstCase, ReadVMask]>;
+ SchedTernaryMC<"WriteVFMulAddV", "ReadVFMulAddV", "ReadVFMulAddV",
+ "ReadVFMulAddV">;
def F : VALUrVF<funct6, OPFVF, opcodestr # ".vf">,
- Sched<[WriteVFMulAddF_WorstCase, ReadVFMulAddV_WorstCase,
- ReadVFMulAddF_WorstCase, ReadVMask]>;
+ SchedTernaryMC<"WriteVFMulAddF", "ReadVFMulAddV", "ReadVFMulAddF",
+ "ReadVFMulAddV">;
}
multiclass VWMAC_FV_V_F<string opcodestr, bits<6> funct6> {
- def V : VALUrVV<funct6, OPFVV, opcodestr # ".vv">,
- Sched<[WriteVFWMulAddV_WorstCase, ReadVFWMulAddV_WorstCase,
- ReadVFWMulAddV_WorstCase, ReadVMask]>;
- def F : VALUrVF<funct6, OPFVF, opcodestr # ".vf">,
- Sched<[WriteVFWMulAddF_WorstCase, ReadVFWMulAddV_WorstCase,
- ReadVFWMulAddF_WorstCase, ReadVMask]>;
+ let RVVConstraint = WidenV in {
+ def V : VALUrVV<funct6, OPFVV, opcodestr # ".vv", EarlyClobber=1>,
+ SchedTernaryMC<"WriteVFWMulAddV", "ReadVFWMulAddV", "ReadVFWMulAddV",
+ "ReadVFWMulAddV">;
+ def F : VALUrVF<funct6, OPFVF, opcodestr # ".vf", EarlyClobber=1>,
+ SchedTernaryMC<"WriteVFWMulAddF", "ReadVFWMulAddV", "ReadVFWMulAddF",
+ "ReadVFWMulAddV">;
+ }
}
multiclass VSQR_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
- Sched<[WriteVFSqrtV_WorstCase, ReadVFSqrtV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVFSqrtV", "ReadVFSqrtV">;
}
multiclass VRCP_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
- Sched<[WriteVFRecpV_WorstCase, ReadVFRecpV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVFRecpV", "ReadVFRecpV">;
}
multiclass VMINMAX_FV_V_F<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPFVV, opcodestr # ".vv">,
- Sched<[WriteVFMinMaxV_WorstCase, ReadVFMinMaxV_WorstCase,
- ReadVFMinMaxV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFMinMaxV", "ReadVFMinMaxV", "ReadVFMinMaxV">;
def F : VALUVF<funct6, OPFVF, opcodestr # ".vf">,
- Sched<[WriteVFMinMaxF_WorstCase, ReadVFMinMaxV_WorstCase,
- ReadVFMinMaxF_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFMinMaxF", "ReadVFMinMaxV", "ReadVFMinMaxF">;
}
multiclass VCMP_FV_F<string opcodestr, bits<6> funct6> {
def F : VALUVF<funct6, OPFVF, opcodestr # ".vf">,
- Sched<[WriteVFCmpF_WorstCase, ReadVFCmpV_WorstCase,
- ReadVFCmpF_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFCmpF", "ReadVFCmpV", "ReadVFCmpF">;
}
multiclass VCMP_FV_V_F<string opcodestr, bits<6> funct6>
: VCMP_FV_F<opcodestr, funct6> {
def V : VALUVV<funct6, OPFVV, opcodestr # ".vv">,
- Sched<[WriteVFCmpV_WorstCase, ReadVFCmpV_WorstCase,
- ReadVFCmpV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFCmpV", "ReadVFCmpV", "ReadVFCmpV">;
}
multiclass VSGNJ_FV_V_F<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPFVV, opcodestr # ".vv">,
- Sched<[WriteVFSgnjV_WorstCase, ReadVFSgnjV_WorstCase,
- ReadVFSgnjV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFSgnjV", "ReadVFSgnjV", "ReadVFSgnjV">;
def F : VALUVF<funct6, OPFVF, opcodestr # ".vf">,
- Sched<[WriteVFSgnjF_WorstCase, ReadVFSgnjV_WorstCase,
- ReadVFSgnjF_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFSgnjF", "ReadVFSgnjV", "ReadVFSgnjF">;
}
multiclass VCLS_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
- Sched<[WriteVFClassV_WorstCase, ReadVFClassV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVFClassV", "ReadVFClassV">;
}
multiclass VCVTF_IV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
- Sched<[WriteVFCvtIToFV_WorstCase, ReadVFCvtIToFV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVFCvtIToFV", "ReadVFCvtIToFV">;
}
multiclass VCVTI_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
- Sched<[WriteVFCvtFToIV_WorstCase, ReadVFCvtFToIV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVFCvtFToIV", "ReadVFCvtFToIV">;
}
multiclass VWCVTF_IV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
- Sched<[WriteVFWCvtIToFV_WorstCase, ReadVFWCvtIToFV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVFWCvtIToFV", "ReadVFWCvtIToFV">;
}
multiclass VWCVTI_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
- Sched<[WriteVFWCvtFToIV_WorstCase, ReadVFWCvtFToIV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVFWCvtFToIV", "ReadVFWCvtFToIV">;
}
multiclass VWCVTF_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
- Sched<[WriteVFWCvtFToFV_WorstCase, ReadVFWCvtFToFV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVFWCvtFToFV", "ReadVFWCvtFToFV">;
}
multiclass VNCVTF_IV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
- Sched<[WriteVFNCvtIToFV_WorstCase, ReadVFNCvtIToFV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVFNCvtIToFV", "ReadVFNCvtIToFV">;
}
multiclass VNCVTI_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
- Sched<[WriteVFNCvtFToIV_WorstCase, ReadVFNCvtFToIV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVFNCvtFToIV", "ReadVFNCvtFToIV">;
}
multiclass VNCVTF_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
- Sched<[WriteVFNCvtFToFV_WorstCase, ReadVFNCvtFToFV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVFNCvtFToFV", "ReadVFNCvtFToFV">;
}
multiclass VRED_MV_V<string opcodestr, bits<6> funct6> {
def _VS : VALUVV<funct6, OPMVV, opcodestr # ".vs">,
- Sched<[WriteVIRedV_From_WorstCase, ReadVIRedV, ReadVIRedV0,
- ReadVMask]>;
+ SchedReductionMC<"WriteVIRedV_From", "ReadVIRedV", "ReadVIRedV0">;
}
multiclass VREDMINMAX_MV_V<string opcodestr, bits<6> funct6> {
def _VS : VALUVV<funct6, OPMVV, opcodestr # ".vs">,
- Sched<[WriteVIRedMinMaxV_From_WorstCase, ReadVIRedV, ReadVIRedV0,
- ReadVMask]>;
+ SchedReductionMC<"WriteVIRedMinMaxV_From", "ReadVIRedV", "ReadVIRedV0">;
}
multiclass VWRED_IV_V<string opcodestr, bits<6> funct6> {
def _VS : VALUVV<funct6, OPIVV, opcodestr # ".vs">,
- Sched<[WriteVIWRedV_From_WorstCase, ReadVIWRedV, ReadVIWRedV0,
- ReadVMask]>;
+ SchedReductionMC<"WriteVIWRedV_From", "ReadVIWRedV", "ReadVIWRedV0">;
}
multiclass VRED_FV_V<string opcodestr, bits<6> funct6> {
def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">,
- Sched<[WriteVFRedV_From_WorstCase, ReadVFRedV, ReadVFRedV0,
- ReadVMask]>;
+ SchedReductionMC<"WriteVFRedV_From", "ReadVFRedV", "ReadVFRedV0">;
}
multiclass VREDMINMAX_FV_V<string opcodestr, bits<6> funct6> {
def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">,
- Sched<[WriteVFRedMinMaxV_From_WorstCase, ReadVFRedV, ReadVFRedV0,
- ReadVMask]>;
+ SchedReductionMC<"WriteVFRedMinMaxV_From", "ReadVFRedV", "ReadVFRedV0">;
}
multiclass VREDO_FV_V<string opcodestr, bits<6> funct6> {
def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">,
- Sched<[WriteVFRedOV_From_WorstCase, ReadVFRedOV, ReadVFRedOV0,
- ReadVMask]>;
+ SchedReductionMC<"WriteVFRedOV_From", "ReadVFRedOV", "ReadVFRedOV0">;
}
multiclass VWRED_FV_V<string opcodestr, bits<6> funct6> {
def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">,
- Sched<[WriteVFWRedV_From_WorstCase, ReadVFWRedV, ReadVFWRedV0,
- ReadVMask]>;
+ SchedReductionMC<"WriteVFWRedV_From", "ReadVFWRedV", "ReadVFWRedV0">;
}
multiclass VWREDO_FV_V<string opcodestr, bits<6> funct6> {
def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">,
- Sched<[WriteVFWRedOV_From_WorstCase, ReadVFWRedOV, ReadVFWRedOV0,
- ReadVMask]>;
+ SchedReductionMC<"WriteVFWRedOV_From", "ReadVFWRedOV", "ReadVFWRedOV0">;
}
multiclass VMALU_MV_Mask<string opcodestr, bits<6> funct6, string vm = "v"> {
def M : VALUVVNoVm<funct6, OPMVV, opcodestr #"." #vm #"m">,
- Sched<[WriteVMALUV_WorstCase, ReadVMALUV_WorstCase,
- ReadVMALUV_WorstCase]>;
+ SchedBinaryMC<"WriteVMALUV", "ReadVMALUV", "ReadVMALUV",
+ forceMasked=0>;
}
multiclass VMSFS_MV_V<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPMVV, opcodestr>,
- Sched<[WriteVMSFSV_WorstCase, ReadVMSFSV_WorstCase, ReadVMask]>;
+ SchedUnaryMC<"WriteVMSFSV", "ReadVMSFSV">;
}
multiclass VMIOT_MV_V<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPMVV, opcodestr>,
- Sched<[WriteVMIotV_WorstCase, ReadVMIotV_WorstCase, ReadVMask]>;
+ SchedUnaryMC<"WriteVMIotV", "ReadVMIotV">;
}
multiclass VSHT_IV_V_X_I<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPIVV, opcodestr # ".vv">,
- Sched<[WriteVShiftV_WorstCase, ReadVShiftV_WorstCase,
- ReadVShiftV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVShiftV", "ReadVShiftV", "ReadVShiftV">;
def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">,
- Sched<[WriteVShiftX_WorstCase, ReadVShiftV_WorstCase,
- ReadVShiftX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVShiftX", "ReadVShiftV", "ReadVShiftX">;
def I : VALUVI<funct6, opcodestr # ".vi", uimm5>,
- Sched<[WriteVShiftI_WorstCase, ReadVShiftV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVShiftI", "ReadVShiftV">;
}
multiclass VNSHT_IV_V_X_I<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPIVV, opcodestr # ".wv">,
- Sched<[WriteVNShiftV_WorstCase, ReadVNShiftV_WorstCase,
- ReadVNShiftV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVNShiftV", "ReadVNShiftV", "ReadVNShiftV">;
def X : VALUVX<funct6, OPIVX, opcodestr # ".wx">,
- Sched<[WriteVNShiftX_WorstCase, ReadVNShiftV_WorstCase,
- ReadVNShiftX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVNShiftX", "ReadVNShiftV", "ReadVNShiftX">;
def I : VALUVI<funct6, opcodestr # ".wi", uimm5>,
- Sched<[WriteVNShiftI_WorstCase, ReadVNShiftV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVNShiftI", "ReadVNShiftV">;
}
multiclass VMINMAX_IV_V_X<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPIVV, opcodestr # ".vv">,
- Sched<[WriteVIMinMaxV_WorstCase, ReadVIMinMaxV_WorstCase,
- ReadVIMinMaxV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVIMinMaxV", "ReadVIMinMaxV", "ReadVIMinMaxV">;
def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">,
- Sched<[WriteVIMinMaxX_WorstCase, ReadVIMinMaxV_WorstCase,
- ReadVIMinMaxX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVIMinMaxX", "ReadVIMinMaxV", "ReadVIMinMaxX">;
}
multiclass VCMP_IV_V<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPIVV, opcodestr # ".vv">,
- Sched<[WriteVICmpV_WorstCase, ReadVICmpV_WorstCase,
- ReadVICmpV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVICmpV", "ReadVICmpV", "ReadVICmpV">;
}
multiclass VCMP_IV_X<string opcodestr, bits<6> funct6> {
def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">,
- Sched<[WriteVICmpX_WorstCase, ReadVICmpV_WorstCase,
- ReadVICmpX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVICmpX", "ReadVICmpV", "ReadVICmpX">;
}
multiclass VCMP_IV_I<string opcodestr, bits<6> funct6> {
def I : VALUVI<funct6, opcodestr # ".vi", simm5>,
- Sched<[WriteVICmpI_WorstCase, ReadVICmpV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVICmpI", "ReadVICmpV">;
}
multiclass VCMP_IV_V_X_I<string opcodestr, bits<6> funct6>
@@ -850,140 +921,109 @@ multiclass VCMP_IV_V_X<string opcodestr, bits<6> funct6>
multiclass VMUL_MV_V_X<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPMVV, opcodestr # ".vv">,
- Sched<[WriteVIMulV_WorstCase, ReadVIMulV_WorstCase,
- ReadVIMulV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVIMulV", "ReadVIMulV", "ReadVIMulV">;
def X : VALUVX<funct6, OPMVX, opcodestr # ".vx">,
- Sched<[WriteVIMulX_WorstCase, ReadVIMulV_WorstCase,
- ReadVIMulX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVIMulX", "ReadVIMulV", "ReadVIMulX">;
}
multiclass VWMUL_MV_V_X<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPMVV, opcodestr # ".vv">,
- Sched<[WriteVIWMulV_WorstCase, ReadVIWMulV_WorstCase,
- ReadVIWMulV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVIWMulV", "ReadVIWMulV", "ReadVIWMulV">;
def X : VALUVX<funct6, OPMVX, opcodestr # ".vx">,
- Sched<[WriteVIWMulX_WorstCase, ReadVIWMulV_WorstCase,
- ReadVIWMulX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVIWMulX", "ReadVIWMulV", "ReadVIWMulX">;
}
multiclass VDIV_MV_V_X<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPMVV, opcodestr # ".vv">,
- Sched<[WriteVIDivV_WorstCase, ReadVIDivV_WorstCase,
- ReadVIDivV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVIDivV", "ReadVIDivV", "ReadVIDivV">;
def X : VALUVX<funct6, OPMVX, opcodestr # ".vx">,
- Sched<[WriteVIDivX_WorstCase, ReadVIDivV_WorstCase,
- ReadVIDivX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVIDivX", "ReadVIDivV", "ReadVIDivX">;
}
multiclass VSALU_IV_V_X<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPIVV, opcodestr # ".vv">,
- Sched<[WriteVSALUV_WorstCase, ReadVSALUV_WorstCase,
- ReadVSALUV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVSALUV", "ReadVSALUV", "ReadVSALUV">;
def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">,
- Sched<[WriteVSALUX_WorstCase, ReadVSALUV_WorstCase,
- ReadVSALUX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVSALUX", "ReadVSALUV", "ReadVSALUX">;
}
multiclass VSALU_IV_V_X_I<string opcodestr, bits<6> funct6>
: VSALU_IV_V_X<opcodestr, funct6> {
def I : VALUVI<funct6, opcodestr # ".vi", simm5>,
- Sched<[WriteVSALUI_WorstCase, ReadVSALUV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVSALUI", "ReadVSALUV">;
}
multiclass VAALU_MV_V_X<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPMVV, opcodestr # ".vv">,
- Sched<[WriteVAALUV_WorstCase, ReadVAALUV_WorstCase,
- ReadVAALUV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVAALUV", "ReadVAALUV", "ReadVAALUV">;
def X : VALUVX<funct6, OPMVX, opcodestr # ".vx">,
- Sched<[WriteVAALUX_WorstCase, ReadVAALUV_WorstCase,
- ReadVAALUX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVAALUX", "ReadVAALUV", "ReadVAALUX">;
}
multiclass VSMUL_IV_V_X<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPIVV, opcodestr # ".vv">,
- Sched<[WriteVSMulV_WorstCase, ReadVSMulV_WorstCase,
- ReadVSMulV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVSMulV", "ReadVSMulV", "ReadVSMulV">;
def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">,
- Sched<[WriteVSMulX_WorstCase, ReadVSMulV_WorstCase,
- ReadVSMulX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVSMulX", "ReadVSMulV", "ReadVSMulX">;
}
multiclass VSSHF_IV_V_X_I<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPIVV, opcodestr # ".vv">,
- Sched<[WriteVSShiftV_WorstCase, ReadVSShiftV_WorstCase,
- ReadVSShiftV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVSShiftV", "ReadVSShiftV", "ReadVSShiftV">;
def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">,
- Sched<[WriteVSShiftX_WorstCase, ReadVSShiftV_WorstCase,
- ReadVSShiftX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVSShiftX", "ReadVSShiftV", "ReadVSShiftX">;
def I : VALUVI<funct6, opcodestr # ".vi", uimm5>,
- Sched<[WriteVSShiftI_WorstCase, ReadVSShiftV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVSShiftI", "ReadVSShiftV">;
}
multiclass VNCLP_IV_V_X_I<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPIVV, opcodestr # ".wv">,
- Sched<[WriteVNClipV_WorstCase, ReadVNClipV_WorstCase,
- ReadVNClipV_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVNClipV", "ReadVNClipV", "ReadVNClipV">;
def X : VALUVX<funct6, OPIVX, opcodestr # ".wx">,
- Sched<[WriteVNClipX_WorstCase, ReadVNClipV_WorstCase,
- ReadVNClipX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVNClipX", "ReadVNClipV", "ReadVNClipX">;
def I : VALUVI<funct6, opcodestr # ".wi", uimm5>,
- Sched<[WriteVNClipI_WorstCase, ReadVNClipV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVNClipI", "ReadVNClipV">;
}
multiclass VSLD_IV_X_I<string opcodestr, bits<6> funct6> {
def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">,
- Sched<[WriteVISlideX_WorstCase, ReadVISlideV_WorstCase,
- ReadVISlideX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVISlideX", "ReadVISlideV", "ReadVISlideX">;
def I : VALUVI<funct6, opcodestr # ".vi", uimm5>,
- Sched<[WriteVISlideI_WorstCase, ReadVISlideV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVISlideI", "ReadVISlideV">;
}
multiclass VSLD1_MV_X<string opcodestr, bits<6> funct6> {
def X : VALUVX<funct6, OPMVX, opcodestr # ".vx">,
- Sched<[WriteVISlide1X_WorstCase, ReadVISlideV_WorstCase,
- ReadVISlideX_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVISlide1X", "ReadVISlideV", "ReadVISlideX">;
}
multiclass VSLD1_FV_F<string opcodestr, bits<6> funct6> {
def F : VALUVF<funct6, OPFVF, opcodestr # ".vf">,
- Sched<[WriteVFSlide1F_WorstCase, ReadVFSlideV_WorstCase,
- ReadVFSlideF_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFSlide1F", "ReadVFSlideV", "ReadVFSlideF">;
}
multiclass VGTR_IV_V_X_I<string opcodestr, bits<6> funct6> {
def V : VALUVV<funct6, OPIVV, opcodestr # ".vv">,
- Sched<[WriteVRGatherVV_WorstCase, ReadVRGatherVV_data_WorstCase,
- ReadVRGatherVV_index_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVRGatherVV", "ReadVRGatherVV_data",
+ "ReadVRGatherVV_index">;
def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">,
- Sched<[WriteVRGatherVX_WorstCase, ReadVRGatherVX_data_WorstCase,
- ReadVRGatherVX_index_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVRGatherVX", "ReadVRGatherVX_data",
+ "ReadVRGatherVX_index">;
def I : VALUVI<funct6, opcodestr # ".vi", uimm5>,
- Sched<[WriteVRGatherVI_WorstCase, ReadVRGatherVI_data_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVRGatherVI", "ReadVRGatherVI_data">;
}
multiclass VCPR_MV_Mask<string opcodestr, bits<6> funct6, string vm = "v"> {
def M : VALUVVNoVm<funct6, OPMVV, opcodestr # "." # vm # "m">,
- Sched<[WriteVCompressV_WorstCase, ReadVCompressV_WorstCase,
- ReadVCompressV_WorstCase]>;
+ SchedBinaryMC<"WriteVCompressV", "ReadVCompressV", "ReadVCompressV">;
}
-multiclass VWholeLoadN<bits<3> nf, string opcodestr, RegisterClass VRC> {
- foreach l = [8, 16, 32] in {
- defvar w = !cast<RISCVWidth>("LSWidth" # l);
- defvar s = !cast<SchedWrite>("WriteVLD" # !add(nf, 1) # "R");
+multiclass VWholeLoadN<int l, bits<3> nf, string opcodestr, RegisterClass VRC> {
+ defvar w = !cast<RISCVWidth>("LSWidth" # l);
+ defvar s = !cast<SchedWrite>("WriteVLD" # !add(nf, 1) # "R");
- def E # l # _V : VWholeLoad<nf, w, opcodestr # "e" # l # ".v", VRC>,
- Sched<[s, ReadVLDX]>;
- }
-}
-multiclass VWholeLoadEEW64<bits<3> nf, string opcodestr, RegisterClass VRC, SchedReadWrite schedrw> {
- def E64_V : VWholeLoad<nf, LSWidth64, opcodestr # "e64.v", VRC>,
- Sched<[schedrw, ReadVLDX]>;
+ def E # l # _V : VWholeLoad<nf, w, opcodestr # "e" # l # ".v", VRC>,
+ Sched<[s, ReadVLDX]>;
}
//===----------------------------------------------------------------------===//
@@ -1003,23 +1043,34 @@ def VSETVL : RVInstSetVL<(outs GPR:$rd), (ins GPR:$rs1, GPR:$rs2),
"vsetvl", "$rd, $rs1, $rs2">,
Sched<[WriteVSETVL, ReadVSETVL, ReadVSETVL]>;
} // hasSideEffects = 1, mayLoad = 0, mayStore = 0
-foreach eew = [8, 16, 32] in {
+} // Predicates = [HasVInstructions]
+
+foreach eew = [8, 16, 32, 64] in {
defvar w = !cast<RISCVWidth>("LSWidth" # eew);
- // Vector Unit-Stride Instructions
- def VLE#eew#_V : VUnitStrideLoad<w, "vle"#eew#".v">, VLESched;
- def VSE#eew#_V : VUnitStrideStore<w, "vse"#eew#".v">, VSESched;
+ let Predicates = !if(!eq(eew, 64), [HasVInstructionsI64],
+ [HasVInstructions]) in {
+ // Vector Unit-Stride Instructions
+ def VLE#eew#_V : VUnitStrideLoad<w, "vle"#eew#".v">, VLESchedMC;
+ def VSE#eew#_V : VUnitStrideStore<w, "vse"#eew#".v">, VSESchedMC;
- // Vector Unit-Stride Fault-only-First Loads
- def VLE#eew#FF_V : VUnitStrideLoadFF<w, "vle"#eew#"ff.v">, VLFSched;
+ // Vector Unit-Stride Fault-only-First Loads
+ def VLE#eew#FF_V : VUnitStrideLoadFF<w, "vle"#eew#"ff.v">, VLFSchedMC;
- // Vector Strided Instructions
- def VLSE#eew#_V : VStridedLoad<w, "vlse"#eew#".v">, VLSSched<eew>;
- def VSSE#eew#_V : VStridedStore<w, "vsse"#eew#".v">, VSSSched<eew>;
-}
+ // Vector Strided Instructions
+ def VLSE#eew#_V : VStridedLoad<w, "vlse"#eew#".v">, VLSSchedMC<eew>;
+ def VSSE#eew#_V : VStridedStore<w, "vsse"#eew#".v">, VSSSchedMC<eew>;
-defm "" : VIndexLoadStore<[8, 16, 32]>;
-} // Predicates = [HasVInstructions]
+ defm VL1R : VWholeLoadN<eew, 0, "vl1r", VR>;
+ defm VL2R : VWholeLoadN<eew, 1, "vl2r", VRM2>;
+ defm VL4R : VWholeLoadN<eew, 3, "vl4r", VRM4>;
+ defm VL8R : VWholeLoadN<eew, 7, "vl8r", VRM8>;
+ }
+
+ let Predicates = !if(!eq(eew, 64), [IsRV64, HasVInstructionsI64],
+ [HasVInstructions]) in
+ defm "" : VIndexLoadStore<eew>;
+}
let Predicates = [HasVInstructions] in {
def VLM_V : VUnitStrideLoadMask<"vlm.v">,
@@ -1031,11 +1082,6 @@ def : InstAlias<"vle1.v $vd, (${rs1})",
def : InstAlias<"vse1.v $vs3, (${rs1})",
(VSM_V VR:$vs3, GPR:$rs1), 0>;
-defm VL1R : VWholeLoadN<0, "vl1r", VR>;
-defm VL2R : VWholeLoadN<1, "vl2r", VRM2>;
-defm VL4R : VWholeLoadN<3, "vl4r", VRM4>;
-defm VL8R : VWholeLoadN<7, "vl8r", VRM8>;
-
def VS1R_V : VWholeStore<0, "vs1r.v", VR>,
Sched<[WriteVST1R, ReadVST1R, ReadVSTX]>;
def VS2R_V : VWholeStore<1, "vs2r.v", VRM2>,
@@ -1051,33 +1097,6 @@ def : InstAlias<"vl4r.v $vd, (${rs1})", (VL4RE8_V VRM4:$vd, GPR:$rs1)>;
def : InstAlias<"vl8r.v $vd, (${rs1})", (VL8RE8_V VRM8:$vd, GPR:$rs1)>;
} // Predicates = [HasVInstructions]
-let Predicates = [HasVInstructionsI64] in {
-// Vector Unit-Stride Instructions
-def VLE64_V : VUnitStrideLoad<LSWidth64, "vle64.v">,
- VLESched;
-
-def VLE64FF_V : VUnitStrideLoadFF<LSWidth64, "vle64ff.v">,
- VLFSched;
-
-def VSE64_V : VUnitStrideStore<LSWidth64, "vse64.v">,
- VSESched;
-// Vector Strided Instructions
-def VLSE64_V : VStridedLoad<LSWidth64, "vlse64.v">,
- VLSSched<32>;
-
-def VSSE64_V : VStridedStore<LSWidth64, "vsse64.v">,
- VSSSched<64>;
-
-defm VL1R: VWholeLoadEEW64<0, "vl1r", VR, WriteVLD1R>;
-defm VL2R: VWholeLoadEEW64<1, "vl2r", VRM2, WriteVLD2R>;
-defm VL4R: VWholeLoadEEW64<3, "vl4r", VRM4, WriteVLD4R>;
-defm VL8R: VWholeLoadEEW64<7, "vl8r", VRM8, WriteVLD8R>;
-} // Predicates = [HasVInstructionsI64]
-let Predicates = [IsRV64, HasVInstructionsI64] in {
- // Vector Indexed Instructions
- defm "" : VIndexLoadStore<[64]>;
-} // [IsRV64, HasVInstructionsI64]
-
let Predicates = [HasVInstructions] in {
// Vector Single-Width Integer Add and Subtract
defm VADD_V : VALU_IV_V_X_I<"vadd", 0b000000>;
@@ -1268,12 +1287,10 @@ defm VMADD_V : VMAC_MV_V_X<"vmadd", 0b101001>;
defm VNMSUB_V : VMAC_MV_V_X<"vnmsub", 0b101011>;
// Vector Widening Integer Multiply-Add Instructions
-let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in {
defm VWMACCU_V : VWMAC_MV_V_X<"vwmaccu", 0b111100>;
defm VWMACC_V : VWMAC_MV_V_X<"vwmacc", 0b111101>;
defm VWMACCSU_V : VWMAC_MV_V_X<"vwmaccsu", 0b111111>;
defm VWMACCUS_V : VWMAC_MV_X<"vwmaccus", 0b111110>;
-} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV
// Vector Integer Merge Instructions
defm VMERGE_V : VMRG_IV_V_X_I<"vmerge", 0b010111>;
@@ -1284,15 +1301,15 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, vs2 = 0, vm = 1,
// op vd, vs1
def VMV_V_V : RVInstVV<0b010111, OPIVV, (outs VR:$vd),
(ins VR:$vs1), "vmv.v.v", "$vd, $vs1">,
- Sched<[WriteVIMovV_WorstCase, ReadVIMovV_WorstCase]>;
+ SchedUnaryMC<"WriteVIMovV", "ReadVIMovV", forceMasked=0>;
// op vd, rs1
def VMV_V_X : RVInstVX<0b010111, OPIVX, (outs VR:$vd),
(ins GPR:$rs1), "vmv.v.x", "$vd, $rs1">,
- Sched<[WriteVIMovX_WorstCase, ReadVIMovX_WorstCase]>;
+ SchedUnaryMC<"WriteVIMovX", "ReadVIMovX", forceMasked=0>;
// op vd, imm
def VMV_V_I : RVInstIVI<0b010111, (outs VR:$vd),
(ins simm5:$imm), "vmv.v.i", "$vd, $imm">,
- Sched<[WriteVIMovI_WorstCase]>;
+ SchedNullaryMC<"WriteVIMovI", forceMasked=0>;
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
// Vector Fixed-Point Arithmetic Instructions
@@ -1373,8 +1390,7 @@ defm VFNMSUB_V : VMAC_FV_V_F<"vfnmsub", 0b101011>;
}
// Vector Widening Floating-Point Fused Multiply-Add Instructions
-let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV,
- Uses = [FRM], mayRaiseFPException = true in {
+let Uses = [FRM], mayRaiseFPException = true in {
defm VFWMACC_V : VWMAC_FV_V_F<"vfwmacc", 0b111100>;
defm VFWNMACC_V : VWMAC_FV_V_F<"vfwnmacc", 0b111101>;
defm VFWMSAC_V : VWMAC_FV_V_F<"vfwmsac", 0b111110>;
@@ -1435,15 +1451,14 @@ let vm = 0 in
def VFMERGE_VFM : RVInstVX<0b010111, OPFVF, (outs VR:$vd),
(ins VR:$vs2, FPR32:$rs1, VMV0:$v0),
"vfmerge.vfm", "$vd, $vs2, $rs1, v0">,
- Sched<[WriteVFMergeV_WorstCase, ReadVFMergeV_WorstCase,
- ReadVFMergeF_WorstCase, ReadVMask]>;
+ SchedBinaryMC<"WriteVFMergeV", "ReadVFMergeV", "ReadVFMergeF">;
// Vector Floating-Point Move Instruction
let RVVConstraint = NoConstraint in
let vm = 1, vs2 = 0 in
def VFMV_V_F : RVInstVX<0b010111, OPFVF, (outs VR:$vd),
(ins FPR32:$rs1), "vfmv.v.f", "$vd, $rs1">,
- Sched<[WriteVFMovV_WorstCase, ReadVFMovF_WorstCase]>;
+ SchedUnaryMC<"WriteVFMovV", "ReadVFMovF", forceMasked=0>;
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
@@ -1584,15 +1599,13 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0,
def VCPOP_M : RVInstV<0b010000, 0b10000, OPMVV, (outs GPR:$vd),
(ins VR:$vs2, VMaskOp:$vm),
"vcpop.m", "$vd, $vs2$vm">,
- Sched<[WriteVMPopV_WorstCase, ReadVMPopV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVMPopV", "ReadVMPopV">;
// vfirst find-first-set mask bit
def VFIRST_M : RVInstV<0b010000, 0b10001, OPMVV, (outs GPR:$vd),
(ins VR:$vs2, VMaskOp:$vm),
"vfirst.m", "$vd, $vs2$vm">,
- Sched<[WriteVMFFSV_WorstCase, ReadVMFFSV_WorstCase,
- ReadVMask]>;
+ SchedUnaryMC<"WriteVMFFSV", "ReadVMFFSV">;
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
@@ -1618,7 +1631,7 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
let vs2 = 0 in
def VID_V : RVInstV<0b010100, 0b10001, OPMVV, (outs VR:$vd),
(ins VMaskOp:$vm), "vid.v", "$vd$vm">,
- Sched<[WriteVMIdxV_WorstCase, ReadVMask]>;
+ SchedNullaryMC<"WriteVMIdxV">;
// Integer Scalar Move Instructions
let vm = 1, RVVConstraint = NoConstraint in {
@@ -1674,8 +1687,8 @@ let Predicates = [HasVInstructions] in {
let Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather in {
defm VRGATHER_V : VGTR_IV_V_X_I<"vrgather", 0b001100>;
def VRGATHEREI16_VV : VALUVV<0b001110, OPIVV, "vrgatherei16.vv">,
- Sched<[WriteVRGatherVV_WorstCase, ReadVRGatherVV_data_WorstCase,
- ReadVRGatherVV_index_WorstCase]>;
+ SchedBinaryMC<"WriteVRGatherVV", "ReadVRGatherVV_data",
+ "ReadVRGatherVV_index">;
} // Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather
// Vector Compress Instruction
@@ -1705,38 +1718,38 @@ let Predicates = [HasVInstructions] in {
def VLSEG#nf#E#eew#_V :
VUnitStrideSegmentLoad<!add(nf, -1), w, "vlseg"#nf#"e"#eew#".v">,
- VLSEGSched<nf, eew>;
+ VLSEGSchedMC<nf, eew>;
def VLSEG#nf#E#eew#FF_V :
VUnitStrideSegmentLoadFF<!add(nf, -1), w, "vlseg"#nf#"e"#eew#"ff.v">,
- VLSEGFFSched<nf, eew>;
+ VLSEGFFSchedMC<nf, eew>;
def VSSEG#nf#E#eew#_V :
VUnitStrideSegmentStore<!add(nf, -1), w, "vsseg"#nf#"e"#eew#".v">,
- VSSEGSched<nf, eew>;
+ VSSEGSchedMC<nf, eew>;
// Vector Strided Instructions
def VLSSEG#nf#E#eew#_V :
VStridedSegmentLoad<!add(nf, -1), w, "vlsseg"#nf#"e"#eew#".v">,
- VLSSEGSched<nf, eew>;
+ VLSSEGSchedMC<nf, eew>;
def VSSSEG#nf#E#eew#_V :
VStridedSegmentStore<!add(nf, -1), w, "vssseg"#nf#"e"#eew#".v">,
- VSSSEGSched<nf, eew>;
+ VSSSEGSchedMC<nf, eew>;
// Vector Indexed Instructions
def VLUXSEG#nf#EI#eew#_V :
VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord, w,
"vluxseg"#nf#"ei"#eew#".v">,
- VLXSEGSched<nf, eew, "U">;
+ VLXSEGSchedMC<nf, eew, isOrdered=0>;
def VLOXSEG#nf#EI#eew#_V :
VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder, w,
"vloxseg"#nf#"ei"#eew#".v">,
- VLXSEGSched<nf, eew, "O">;
+ VLXSEGSchedMC<nf, eew, isOrdered=1>;
def VSUXSEG#nf#EI#eew#_V :
VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord, w,
"vsuxseg"#nf#"ei"#eew#".v">,
- VSXSEGSched<nf, eew, "U">;
+ VSXSEGSchedMC<nf, eew, isOrdered=0>;
def VSOXSEG#nf#EI#eew#_V :
VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder, w,
"vsoxseg"#nf#"ei"#eew#".v">,
- VSXSEGSched<nf, eew, "O">;
+ VSXSEGSchedMC<nf, eew, isOrdered=1>;
}
}
} // Predicates = [HasVInstructions]
@@ -1746,21 +1759,21 @@ let Predicates = [HasVInstructionsI64] in {
// Vector Unit-strided Segment Instructions
def VLSEG#nf#E64_V :
VUnitStrideSegmentLoad<!add(nf, -1), LSWidth64, "vlseg"#nf#"e64.v">,
- VLSEGSched<nf, 64>;
+ VLSEGSchedMC<nf, 64>;
def VLSEG#nf#E64FF_V :
VUnitStrideSegmentLoadFF<!add(nf, -1), LSWidth64, "vlseg"#nf#"e64ff.v">,
- VLSEGFFSched<nf, 64>;
+ VLSEGFFSchedMC<nf, 64>;
def VSSEG#nf#E64_V :
VUnitStrideSegmentStore<!add(nf, -1), LSWidth64, "vsseg"#nf#"e64.v">,
- VSSEGSched<nf, 64>;
+ VSSEGSchedMC<nf, 64>;
// Vector Strided Segment Instructions
def VLSSEG#nf#E64_V :
VStridedSegmentLoad<!add(nf, -1), LSWidth64, "vlsseg"#nf#"e64.v">,
- VLSSEGSched<nf, 64>;
+ VLSSEGSchedMC<nf, 64>;
def VSSSEG#nf#E64_V :
VStridedSegmentStore<!add(nf, -1), LSWidth64, "vssseg"#nf#"e64.v">,
- VSSSEGSched<nf, 64>;
+ VSSSEGSchedMC<nf, 64>;
}
} // Predicates = [HasVInstructionsI64]
let Predicates = [HasVInstructionsI64, IsRV64] in {
@@ -1769,20 +1782,21 @@ let Predicates = [HasVInstructionsI64, IsRV64] in {
def VLUXSEG #nf #EI64_V
: VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord, LSWidth64,
"vluxseg" #nf #"ei64.v">,
- VLXSEGSched<nf, 64, "U">;
+ VLXSEGSchedMC<nf, 64, isOrdered=0>;
def VLOXSEG #nf #EI64_V
: VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder, LSWidth64,
"vloxseg" #nf #"ei64.v">,
- VLXSEGSched<nf, 64, "O">;
+ VLXSEGSchedMC<nf, 64, isOrdered=1>;
def VSUXSEG #nf #EI64_V
: VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord, LSWidth64,
"vsuxseg" #nf #"ei64.v">,
- VSXSEGSched<nf, 64, "U">;
+ VSXSEGSchedMC<nf, 64, isOrdered=0>;
def VSOXSEG #nf #EI64_V
: VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder, LSWidth64,
"vsoxseg" #nf #"ei64.v">,
- VSXSEGSched<nf, 64, "O">;
+ VSXSEGSchedMC<nf, 64, isOrdered=1>;
}
} // Predicates = [HasVInstructionsI64, IsRV64]
+include "RISCVInstrInfoZvfbf.td"
include "RISCVInstrInfoVPseudos.td"
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index f8b7e32fe34c..5e06422cf9ad 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -31,7 +31,7 @@
/// the exact bit pattern of inactive lanes, or produce the bit pattern -1 for
/// those lanes. Note that each lane can make this choice independently.
/// Instructions which produce masks (and only those instructions) also have the
-/// option of producing a result as-if VL had been VLMAX.
+/// option of producing a result as-if VL had been VLMAX.
/// * "Undefined" - The bit pattern of the inactive lanes is unspecified, and
/// can be changed without impacting the semantics of the program. Note that
/// this concept does not exist in the specification, and requires source
@@ -52,26 +52,26 @@
///
/// Currently, the policy is represented via the following instrinsic families:
/// * _MASK - Can represent all three policy states for both tail and mask. If
-/// passthrough is IMPLICIT_DEF, then represents "undefined". Otherwise,
-/// policy operand and tablegen flags drive the interpretation. (If policy
-/// operand is not present - there are a couple, thought we're rapidly
-/// removing them - a non-undefined policy defaults to "tail agnostic", and
-/// "mask undisturbed". Since this is the only variant with a mask, all
-/// other variants are "mask undefined".
+/// passthrough is IMPLICIT_DEF (or NoReg), then represents "undefined".
+/// Otherwise, policy operand and tablegen flags drive the interpretation.
+/// (If policy operand is not present - there are a couple, though we're
+/// rapidly removing them - a non-undefined policy defaults to "tail
+/// agnostic", and "mask undisturbed". Since this is the only variant with
+/// a mask, all other variants are "mask undefined".
/// * Unsuffixed w/ both passthrough and policy operand. Can represent all
-/// three policy states. If passthrough is IMPLICIT_DEF, then represents
-/// "undefined". Otherwise, policy operand and tablegen flags drive the
-/// interpretation.
+/// three policy states. If passthrough is IMPLICIT_DEF (or NoReg), then
+/// represents "undefined". Otherwise, policy operand and tablegen flags
+/// drive the interpretation.
/// * Unsuffixed w/o passthrough or policy operand -- Does not have a
/// passthrough operand, and thus represents the "undefined" state. Note
/// that terminology in code frequently refers to these as "TA" which is
/// confusing. We're in the process of migrating away from this
/// representation.
/// * _TU w/o policy operand -- Has a passthrough operand, and always
-/// represents the tail undisturbed state.
+/// represents the tail undisturbed state.
/// * _TU w/policy operand - Can represent all three policy states. If
-/// passthrough is IMPLICIT_DEF, then represents "undefined". Otherwise,
-/// policy operand and tablegen flags drive the interpretation.
+/// passthrough is IMPLICIT_DEF (or NoReg), then represents "undefined".
+/// Otherwise, policy operand and tablegen flags drive the interpretation.
///
//===----------------------------------------------------------------------===//
@@ -81,9 +81,9 @@ def riscv_vmv_x_s : SDNode<"RISCVISD::VMV_X_S",
def riscv_read_vlenb : SDNode<"RISCVISD::READ_VLENB",
SDTypeProfile<1, 0, [SDTCisVT<0, XLenVT>]>>;
-// Operand that is allowed to be a register or a 5 bit immediate.
-// This allows us to pick between VSETIVLI and VSETVLI opcodes using the same
-// pseudo instructions.
+// Operand that is allowed to be a register other than X0, a 5 bit unsigned
+// immediate, or -1. -1 means VLMAX. This allows us to pick between VSETIVLI and
+// VSETVLI opcodes using the same pseudo instructions.
def AVL : RegisterOperand<GPRNoX0> {
let OperandNamespace = "RISCVOp";
let OperandType = "OPERAND_AVL";
@@ -115,16 +115,9 @@ class PseudoToVInst<string PseudoInst> {
["_E32", ""],
["_E16", ""],
["_E8", ""],
- ["_F64", "_F"],
- ["_F32", "_F"],
- ["_F16", "_F"],
- ["_VF64", "_VF"],
- ["_VF32", "_VF"],
- ["_VF16", "_VF"],
- ["_WF64", "_WF"],
- ["_WF32", "_WF"],
- ["_WF16", "_WF"],
- ["_TU", ""],
+ ["FPR64", "F"],
+ ["FPR32", "F"],
+ ["FPR16", "F"],
["_TIED", ""],
["_MASK", ""],
["_B64", ""],
@@ -141,7 +134,8 @@ class PseudoToVInst<string PseudoInst> {
["_M2", ""],
["_M4", ""],
["_M8", ""],
- ["_SE", ""]
+ ["_SE", ""],
+ ["_RM", ""]
];
string VInst = !foldl(PseudoInst, AffixSubsts, Acc, AffixSubst,
!subst(AffixSubst[0], AffixSubst[1], Acc));
@@ -189,7 +183,7 @@ defvar MxListFWRed = [V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8];
// Use for zext/sext.vf2
defvar MxListVF2 = [V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8];
-// Use for zext/sext.vf4
+// Use for zext/sext.vf4 and vector crypto instructions
defvar MxListVF4 = [V_MF2, V_M1, V_M2, V_M4, V_M8];
// Use for zext/sext.vf8
@@ -204,7 +198,7 @@ class MxSet<int eew> {
class FPR_Info<int sew> {
RegisterClass fprclass = !cast<RegisterClass>("FPR" # sew);
- string FX = "F" # sew;
+ string FX = "FPR" # sew;
int SEW = sew;
list<LMULInfo> MxList = MxSet<sew>.m;
list<LMULInfo> MxListFW = !if(!eq(sew, 64), [], !listremove(MxList, [V_M8]));
@@ -214,16 +208,20 @@ def SCALAR_F16 : FPR_Info<16>;
def SCALAR_F32 : FPR_Info<32>;
def SCALAR_F64 : FPR_Info<64>;
+// BF16 uses the same register class as F16.
+def SCALAR_BF16 : FPR_Info<16>;
+
defvar FPList = [SCALAR_F16, SCALAR_F32, SCALAR_F64];
// Used for widening instructions. It excludes F64.
defvar FPListW = [SCALAR_F16, SCALAR_F32];
+// Used for widening bf16 instructions.
+defvar BFPListW = [SCALAR_BF16];
+
class NFSet<LMULInfo m> {
- list<int> L = !cond(!eq(m.value, V_M8.value): [],
- !eq(m.value, V_M4.value): [2],
- !eq(m.value, V_M2.value): [2, 3, 4],
- true: [2, 3, 4, 5, 6, 7, 8]);
+ defvar lmul = !shl(1, m.value);
+ list<int> L = NFList<lmul>.L;
}
class octuple_to_str<int octuple> {
@@ -243,6 +241,8 @@ def VLOpFrag : PatFrag<(ops), (XLenVT (VLOp (XLenVT AVL:$vl)))>;
// This must be kept in sync with RISCV::VLMaxSentinel.
def VLMax : OutPatFrag<(ops), (XLenVT -1)>;
+def SelectFPImm : ComplexPattern<fAny, 1, "selectFPImm", [], [], 1>;
+
// List of EEW.
defvar EEWList = [8, 16, 32, 64];
@@ -272,9 +272,10 @@ class VTypeInfo<ValueType Vec, ValueType Mas, int Sew, VReg Reg, LMULInfo M,
OutPatFrag AVL = VLMax;
string ScalarSuffix = !cond(!eq(Scal, XLenVT) : "X",
- !eq(Scal, f16) : "F16",
- !eq(Scal, f32) : "F32",
- !eq(Scal, f64) : "F64");
+ !eq(Scal, f16) : "FPR16",
+ !eq(Scal, bf16) : "FPR16",
+ !eq(Scal, f32) : "FPR32",
+ !eq(Scal, f64) : "FPR64");
}
class GroupVTypeInfo<ValueType Vec, ValueType VecM1, ValueType Mas, int Sew,
@@ -356,6 +357,25 @@ defset list<VTypeInfo> AllVectors = {
}
}
+defset list<VTypeInfo> AllBFloatVectors = {
+ defset list<VTypeInfo> NoGroupBFloatVectors = {
+ defset list<VTypeInfo> FractionalGroupBFloatVectors = {
+ def VBF16MF4: VTypeInfo<vbfloat16mf4_t, vbool64_t, 16, VR, V_MF4, bf16, FPR16>;
+ def VBF16MF2: VTypeInfo<vbfloat16mf2_t, vbool32_t, 16, VR, V_MF2, bf16, FPR16>;
+ }
+ def VBF16M1: VTypeInfo<vbfloat16m1_t, vbool16_t, 16, VR, V_M1, bf16, FPR16>;
+ }
+
+ defset list<GroupVTypeInfo> GroupBFloatVectors = {
+ def VBF16M2: GroupVTypeInfo<vbfloat16m2_t, vbfloat16m1_t, vbool8_t, 16,
+ VRM2, V_M2, bf16, FPR16>;
+ def VBF16M4: GroupVTypeInfo<vbfloat16m4_t, vbfloat16m1_t, vbool4_t, 16,
+ VRM4, V_M4, bf16, FPR16>;
+ def VBF16M8: GroupVTypeInfo<vbfloat16m8_t, vbfloat16m1_t, vbool2_t, 16,
+ VRM8, V_M8, bf16, FPR16>;
+ }
+}
+
// This functor is used to obtain the int vector type that has the same SEW and
// multiplier as the input parameter type
class GetIntVTypeInfo<VTypeInfo vti> {
@@ -491,6 +511,14 @@ defset list<VTypeInfoToWide> AllWidenableIntToFloatVectors = {
def : VTypeInfoToWide<VI32M4, VF64M8>;
}
+defset list<VTypeInfoToWide> AllWidenableBFloatToFloatVectors = {
+ def : VTypeInfoToWide<VBF16MF4, VF32MF2>;
+ def : VTypeInfoToWide<VBF16MF2, VF32M1>;
+ def : VTypeInfoToWide<VBF16M1, VF32M2>;
+ def : VTypeInfoToWide<VBF16M2, VF32M4>;
+ def : VTypeInfoToWide<VBF16M4, VF32M8>;
+}
+
// This class holds the record of the RISCVVPseudoTable below.
// This represents the information we need in codegen for each pseudo.
// The definition should be consistent with `struct PseudoInfo` in
@@ -500,11 +528,21 @@ class RISCVVPseudo {
Instruction BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
// SEW = 0 is used to denote that the Pseudo is not SEW specific (or unknown).
bits<8> SEW = 0;
+ bit NeedBeInPseudoTable = 1;
+ // TargetOverlapConstraintType indicates that these instructions can
+ // overlap between source operands and destination operands.
+ // 1 -> default value, remain current constraint
+ // 2 -> narrow case
+ // 3 -> widen case
+ // TODO: Add TargetOverlapConstraintType into PseudosTable for further
+ // query.
+ bits<2> TargetOverlapConstraintType = 1;
}
// The actual table.
def RISCVVPseudosTable : GenericTable {
let FilterClass = "RISCVVPseudo";
+ let FilterClassField = "NeedBeInPseudoTable";
let CppTypeName = "PseudoInfo";
let Fields = [ "Pseudo", "BaseInstr" ];
let PrimaryKey = [ "Pseudo" ];
@@ -534,16 +572,17 @@ def RISCVVIntrinsicsTable : GenericTable {
// unmasked variant. For all but compares, both the masked and
// unmasked variant have a passthru and policy operand. For compares,
// neither has a policy op, and only the masked version has a passthru.
-class RISCVMaskedPseudo<bits<4> MaskIdx> {
+class RISCVMaskedPseudo<bits<4> MaskIdx, bit MaskAffectsRes=false> {
Pseudo MaskedPseudo = !cast<Pseudo>(NAME);
Pseudo UnmaskedPseudo = !cast<Pseudo>(!subst("_MASK", "", NAME));
bits<4> MaskOpIdx = MaskIdx;
+ bit MaskAffectsResult = MaskAffectsRes;
}
def RISCVMaskedPseudosTable : GenericTable {
let FilterClass = "RISCVMaskedPseudo";
let CppTypeName = "RISCVMaskedPseudoInfo";
- let Fields = ["MaskedPseudo", "UnmaskedPseudo", "MaskOpIdx"];
+ let Fields = ["MaskedPseudo", "UnmaskedPseudo", "MaskOpIdx", "MaskAffectsResult"];
let PrimaryKey = ["MaskedPseudo"];
let PrimaryKeyName = "getMaskedPseudoInfo";
}
@@ -723,16 +762,18 @@ class VPseudo<Instruction instr, LMULInfo m, dag outs, dag ins, int sew = 0> :
class GetVTypePredicates<VTypeInfo vti> {
list<Predicate> Predicates = !cond(!eq(vti.Scalar, f16) : [HasVInstructionsF16],
+ !eq(vti.Scalar, bf16) : [HasVInstructionsBF16],
!eq(vti.Scalar, f32) : [HasVInstructionsAnyF],
!eq(vti.Scalar, f64) : [HasVInstructionsF64],
!eq(vti.SEW, 64) : [HasVInstructionsI64],
true : [HasVInstructions]);
}
-class VPseudoUSLoadNoMask<VReg RetClass, int EEW> :
+class VPseudoUSLoadNoMask<VReg RetClass,
+ int EEW> :
Pseudo<(outs RetClass:$rd),
(ins RetClass:$dest, GPRMem:$rs1, AVL:$vl, ixlenimm:$sew,
- ixlenimm:$policy),[]>,
+ ixlenimm:$policy), []>,
RISCVVPseudo,
RISCVVLE</*Masked*/0, /*Strided*/0, /*FF*/0, !logtwo(EEW), VLMul> {
let mayLoad = 1;
@@ -744,11 +785,12 @@ class VPseudoUSLoadNoMask<VReg RetClass, int EEW> :
let Constraints = "$rd = $dest";
}
-class VPseudoUSLoadMask<VReg RetClass, int EEW> :
+class VPseudoUSLoadMask<VReg RetClass,
+ int EEW> :
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
- (ins GetVRegNoV0<RetClass>.R:$merge,
- GPRMem:$rs1,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>,
+ (ins GetVRegNoV0<RetClass>.R:$merge,
+ GPRMem:$rs1,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
RISCVVPseudo,
RISCVVLE</*Masked*/1, /*Strided*/0, /*FF*/0, !logtwo(EEW), VLMul> {
let mayLoad = 1;
@@ -761,10 +803,11 @@ class VPseudoUSLoadMask<VReg RetClass, int EEW> :
let UsesMaskPolicy = 1;
}
-class VPseudoUSLoadFFNoMask<VReg RetClass, int EEW> :
+class VPseudoUSLoadFFNoMask<VReg RetClass,
+ int EEW> :
Pseudo<(outs RetClass:$rd, GPR:$vl),
(ins RetClass:$dest, GPRMem:$rs1, AVL:$avl,
- ixlenimm:$sew, ixlenimm:$policy),[]>,
+ ixlenimm:$sew, ixlenimm:$policy), []>,
RISCVVPseudo,
RISCVVLE</*Masked*/0, /*Strided*/0, /*FF*/1, !logtwo(EEW), VLMul> {
let mayLoad = 1;
@@ -776,11 +819,12 @@ class VPseudoUSLoadFFNoMask<VReg RetClass, int EEW> :
let Constraints = "$rd = $dest";
}
-class VPseudoUSLoadFFMask<VReg RetClass, int EEW> :
+class VPseudoUSLoadFFMask<VReg RetClass,
+ int EEW> :
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd, GPR:$vl),
- (ins GetVRegNoV0<RetClass>.R:$merge,
- GPRMem:$rs1,
- VMaskOp:$vm, AVL:$avl, ixlenimm:$sew, ixlenimm:$policy),[]>,
+ (ins GetVRegNoV0<RetClass>.R:$merge,
+ GPRMem:$rs1,
+ VMaskOp:$vm, AVL:$avl, ixlenimm:$sew, ixlenimm:$policy), []>,
RISCVVPseudo,
RISCVVLE</*Masked*/1, /*Strided*/0, /*FF*/1, !logtwo(EEW), VLMul> {
let mayLoad = 1;
@@ -793,10 +837,11 @@ class VPseudoUSLoadFFMask<VReg RetClass, int EEW> :
let UsesMaskPolicy = 1;
}
-class VPseudoSLoadNoMask<VReg RetClass, int EEW>:
+class VPseudoSLoadNoMask<VReg RetClass,
+ int EEW> :
Pseudo<(outs RetClass:$rd),
(ins RetClass:$dest, GPRMem:$rs1, GPR:$rs2, AVL:$vl,
- ixlenimm:$sew, ixlenimm:$policy),[]>,
+ ixlenimm:$sew, ixlenimm:$policy), []>,
RISCVVPseudo,
RISCVVLE</*Masked*/0, /*Strided*/1, /*FF*/0, !logtwo(EEW), VLMul> {
let mayLoad = 1;
@@ -808,11 +853,12 @@ class VPseudoSLoadNoMask<VReg RetClass, int EEW>:
let Constraints = "$rd = $dest";
}
-class VPseudoSLoadMask<VReg RetClass, int EEW>:
+class VPseudoSLoadMask<VReg RetClass,
+ int EEW> :
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
- (ins GetVRegNoV0<RetClass>.R:$merge,
- GPRMem:$rs1, GPR:$rs2,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>,
+ (ins GetVRegNoV0<RetClass>.R:$merge,
+ GPRMem:$rs1, GPR:$rs2,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
RISCVVPseudo,
RISCVVLE</*Masked*/1, /*Strided*/1, /*FF*/0, !logtwo(EEW), VLMul> {
let mayLoad = 1;
@@ -825,11 +871,16 @@ class VPseudoSLoadMask<VReg RetClass, int EEW>:
let UsesMaskPolicy = 1;
}
-class VPseudoILoadNoMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
- bit Ordered, bit EarlyClobber>:
+class VPseudoILoadNoMask<VReg RetClass,
+ VReg IdxClass,
+ int EEW,
+ bits<3> LMUL,
+ bit Ordered,
+ bit EarlyClobber,
+ int TargetConstraintType = 1> :
Pseudo<(outs RetClass:$rd),
(ins RetClass:$dest, GPRMem:$rs1, IdxClass:$rs2, AVL:$vl,
- ixlenimm:$sew, ixlenimm:$policy),[]>,
+ ixlenimm:$sew, ixlenimm:$policy), []>,
RISCVVPseudo,
RISCVVLX</*Masked*/0, Ordered, !logtwo(EEW), VLMul, LMUL> {
let mayLoad = 1;
@@ -839,29 +890,37 @@ class VPseudoILoadNoMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
let HasSEWOp = 1;
let HasVecPolicyOp = 1;
let Constraints = !if(!eq(EarlyClobber, 1), "@earlyclobber $rd, $rd = $dest", "$rd = $dest");
+ let TargetOverlapConstraintType = TargetConstraintType;
}
-class VPseudoILoadMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
- bit Ordered, bit EarlyClobber>:
+class VPseudoILoadMask<VReg RetClass,
+ VReg IdxClass,
+ int EEW,
+ bits<3> LMUL,
+ bit Ordered,
+ bit EarlyClobber,
+ int TargetConstraintType = 1> :
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
- (ins GetVRegNoV0<RetClass>.R:$merge,
- GPRMem:$rs1, IdxClass:$rs2,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>,
+ (ins GetVRegNoV0<RetClass>.R:$merge,
+ GPRMem:$rs1, IdxClass:$rs2,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
RISCVVPseudo,
RISCVVLX</*Masked*/1, Ordered, !logtwo(EEW), VLMul, LMUL> {
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = !if(!eq(EarlyClobber, 1), "@earlyclobber $rd, $rd = $merge", "$rd = $merge");
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
let HasVecPolicyOp = 1;
let UsesMaskPolicy = 1;
}
-class VPseudoUSStoreNoMask<VReg StClass, int EEW>:
+class VPseudoUSStoreNoMask<VReg StClass,
+ int EEW> :
Pseudo<(outs),
- (ins StClass:$rd, GPRMem:$rs1, AVL:$vl, ixlenimm:$sew),[]>,
+ (ins StClass:$rd, GPRMem:$rs1, AVL:$vl, ixlenimm:$sew), []>,
RISCVVPseudo,
RISCVVSE</*Masked*/0, /*Strided*/0, !logtwo(EEW), VLMul> {
let mayLoad = 0;
@@ -871,9 +930,11 @@ class VPseudoUSStoreNoMask<VReg StClass, int EEW>:
let HasSEWOp = 1;
}
-class VPseudoUSStoreMask<VReg StClass, int EEW>:
+class VPseudoUSStoreMask<VReg StClass,
+ int EEW> :
Pseudo<(outs),
- (ins StClass:$rd, GPRMem:$rs1, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
+ (ins StClass:$rd, GPRMem:$rs1,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
RISCVVPseudo,
RISCVVSE</*Masked*/1, /*Strided*/0, !logtwo(EEW), VLMul> {
let mayLoad = 0;
@@ -883,9 +944,11 @@ class VPseudoUSStoreMask<VReg StClass, int EEW>:
let HasSEWOp = 1;
}
-class VPseudoSStoreNoMask<VReg StClass, int EEW>:
+class VPseudoSStoreNoMask<VReg StClass,
+ int EEW> :
Pseudo<(outs),
- (ins StClass:$rd, GPRMem:$rs1, GPR:$rs2, AVL:$vl, ixlenimm:$sew),[]>,
+ (ins StClass:$rd, GPRMem:$rs1, GPR:$rs2,
+ AVL:$vl, ixlenimm:$sew), []>,
RISCVVPseudo,
RISCVVSE</*Masked*/0, /*Strided*/1, !logtwo(EEW), VLMul> {
let mayLoad = 0;
@@ -895,9 +958,11 @@ class VPseudoSStoreNoMask<VReg StClass, int EEW>:
let HasSEWOp = 1;
}
-class VPseudoSStoreMask<VReg StClass, int EEW>:
+class VPseudoSStoreMask<VReg StClass,
+ int EEW> :
Pseudo<(outs),
- (ins StClass:$rd, GPRMem:$rs1, GPR:$rs2, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
+ (ins StClass:$rd, GPRMem:$rs1, GPR:$rs2,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
RISCVVPseudo,
RISCVVSE</*Masked*/1, /*Strided*/1, !logtwo(EEW), VLMul> {
let mayLoad = 0;
@@ -907,10 +972,11 @@ class VPseudoSStoreMask<VReg StClass, int EEW>:
let HasSEWOp = 1;
}
-class VPseudoNullaryNoMask<VReg RegClass>:
+class VPseudoNullaryNoMask<VReg RegClass> :
Pseudo<(outs RegClass:$rd),
- (ins RegClass:$merge, AVL:$vl, ixlenimm:$sew,
- ixlenimm:$policy), []>, RISCVVPseudo {
+ (ins RegClass:$merge,
+ AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -920,10 +986,11 @@ class VPseudoNullaryNoMask<VReg RegClass>:
let HasVecPolicyOp = 1;
}
-class VPseudoNullaryMask<VReg RegClass>:
+class VPseudoNullaryMask<VReg RegClass> :
Pseudo<(outs GetVRegNoV0<RegClass>.R:$rd),
- (ins GetVRegNoV0<RegClass>.R:$merge, VMaskOp:$vm, AVL:$vl,
- ixlenimm:$sew, ixlenimm:$policy), []>, RISCVVPseudo {
+ (ins GetVRegNoV0<RegClass>.R:$merge,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -936,9 +1003,9 @@ class VPseudoNullaryMask<VReg RegClass>:
// Nullary for pseudo instructions. They are expanded in
// RISCVExpandPseudoInsts pass.
-class VPseudoNullaryPseudoM<string BaseInst>
- : Pseudo<(outs VR:$rd), (ins AVL:$vl, ixlenimm:$sew), []>,
- RISCVVPseudo {
+class VPseudoNullaryPseudoM<string BaseInst> :
+ Pseudo<(outs VR:$rd), (ins AVL:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -947,33 +1014,41 @@ class VPseudoNullaryPseudoM<string BaseInst>
// BaseInstr is not used in RISCVExpandPseudoInsts pass.
// Just fill a corresponding real v-inst to pass tablegen check.
let BaseInstr = !cast<Instruction>(BaseInst);
+ // We exclude them from RISCVVPseudoTable.
+ let NeedBeInPseudoTable = 0;
}
-class VPseudoUnaryNoMask<DAGOperand RetClass, DAGOperand OpClass,
- string Constraint = ""> :
+class VPseudoUnaryNoMask<DAGOperand RetClass,
+ DAGOperand OpClass,
+ string Constraint = "",
+ int TargetConstraintType = 1> :
Pseudo<(outs RetClass:$rd),
- (ins RetClass:$merge, OpClass:$rs2, AVL:$vl, ixlenimm:$sew,
- ixlenimm:$policy), []>,
- RISCVVPseudo {
+ (ins RetClass:$merge, OpClass:$rs2,
+ AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
let HasVecPolicyOp = 1;
}
-class VPseudoUnaryNoMaskRoundingMode<DAGOperand RetClass, DAGOperand OpClass,
- string Constraint = ""> :
+class VPseudoUnaryNoMaskRoundingMode<DAGOperand RetClass,
+ DAGOperand OpClass,
+ string Constraint = "",
+ int TargetConstraintType = 1> :
Pseudo<(outs RetClass:$rd),
- (ins RetClass:$merge, OpClass:$rs2, ixlenimm:$rm, AVL:$vl, ixlenimm:$sew,
- ixlenimm:$policy), []>,
- RISCVVPseudo {
+ (ins RetClass:$merge, OpClass:$rs2, ixlenimm:$rm,
+ AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
let HasVecPolicyOp = 1;
@@ -981,27 +1056,33 @@ class VPseudoUnaryNoMaskRoundingMode<DAGOperand RetClass, DAGOperand OpClass,
let UsesVXRM = 0;
}
-class VPseudoUnaryMask<VReg RetClass, VReg OpClass, string Constraint = ""> :
- Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
- (ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
- RISCVVPseudo {
+class VPseudoUnaryMask<VReg RetClass,
+ VReg OpClass,
+ string Constraint = "",
+ int TargetConstraintType = 1> :
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
let HasVecPolicyOp = 1;
let UsesMaskPolicy = 1;
}
-class VPseudoUnaryMaskRoundingMode<VReg RetClass, VReg OpClass, string Constraint = ""> :
- Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
- (ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2,
- VMaskOp:$vm, ixlenimm:$rm,
- AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
- RISCVVPseudo {
+class VPseudoUnaryMaskRoundingMode<VReg RetClass,
+ VReg OpClass,
+ string Constraint = ""> :
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2,
+ VMaskOp:$vm, ixlenimm:$rm,
+ AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1014,10 +1095,12 @@ class VPseudoUnaryMaskRoundingMode<VReg RetClass, VReg OpClass, string Constrain
let UsesVXRM = 0;
}
-class VPseudoUnaryMask_NoExcept<VReg RetClass, VReg OpClass, string Constraint = ""> :
- Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
- (ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2, VMaskOp:$vm,
- AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []> {
+class VPseudoUnaryMask_NoExcept<VReg RetClass,
+ VReg OpClass,
+ string Constraint = ""> :
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1029,10 +1112,13 @@ class VPseudoUnaryMask_NoExcept<VReg RetClass, VReg OpClass, string Constraint =
let usesCustomInserter = 1;
}
-class VPseudoUnaryNoMask_FRM<VReg RetClass, VReg OpClass, string Constraint = ""> :
- Pseudo<(outs RetClass:$rd),
- (ins RetClass:$merge, OpClass:$rs2, ixlenimm:$frm, AVL:$vl,
- ixlenimm:$sew, ixlenimm:$policy), []> {
+class VPseudoUnaryNoMask_FRM<VReg RetClass,
+ VReg OpClass,
+ string Constraint = ""> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$merge, OpClass:$rs2, ixlenimm:$frm,
+ AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1040,13 +1126,17 @@ class VPseudoUnaryNoMask_FRM<VReg RetClass, VReg OpClass, string Constraint = ""
let HasVLOp = 1;
let HasSEWOp = 1;
let HasVecPolicyOp = 1;
- let usesCustomInserter = 1;
+ let HasRoundModeOp = 1;
}
-class VPseudoUnaryMask_FRM<VReg RetClass, VReg OpClass, string Constraint = ""> :
- Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
- (ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2,
- VMaskOp:$vm, ixlenimm:$frm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []> {
+class VPseudoUnaryMask_FRM<VReg RetClass,
+ VReg OpClass,
+ string Constraint = ""> :
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2,
+ VMaskOp:$vm, ixlenimm:$frm,
+ AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1055,13 +1145,13 @@ class VPseudoUnaryMask_FRM<VReg RetClass, VReg OpClass, string Constraint = "">
let HasSEWOp = 1;
let HasVecPolicyOp = 1;
let UsesMaskPolicy = 1;
- let usesCustomInserter = 1;
+ let HasRoundModeOp = 1;
}
class VPseudoUnaryNoMaskGPROut :
- Pseudo<(outs GPR:$rd),
- (ins VR:$rs2, AVL:$vl, ixlenimm:$sew), []>,
- RISCVVPseudo {
+ Pseudo<(outs GPR:$rd),
+ (ins VR:$rs2, AVL:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1069,10 +1159,10 @@ class VPseudoUnaryNoMaskGPROut :
let HasSEWOp = 1;
}
-class VPseudoUnaryMaskGPROut:
- Pseudo<(outs GPR:$rd),
- (ins VR:$rs1, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
- RISCVVPseudo {
+class VPseudoUnaryMaskGPROut :
+ Pseudo<(outs GPR:$rd),
+ (ins VR:$rs1, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1084,10 +1174,8 @@ class VPseudoUnaryMaskGPROut:
class VPseudoUnaryAnyMask<VReg RetClass,
VReg Op1Class> :
Pseudo<(outs RetClass:$rd),
- (ins RetClass:$merge,
- Op1Class:$rs2,
- VR:$vm, AVL:$vl, ixlenimm:$sew),
- []>,
+ (ins RetClass:$merge, Op1Class:$rs2,
+ VR:$vm, AVL:$vl, ixlenimm:$sew), []>,
RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
@@ -1100,14 +1188,16 @@ class VPseudoUnaryAnyMask<VReg RetClass,
class VPseudoBinaryNoMask<VReg RetClass,
VReg Op1Class,
DAGOperand Op2Class,
- string Constraint> :
- Pseudo<(outs RetClass:$rd),
- (ins Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>,
- RISCVVPseudo {
+ string Constraint,
+ int TargetConstraintType = 1> :
+ Pseudo<(outs RetClass:$rd),
+ (ins Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = Constraint;
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
}
@@ -1115,15 +1205,17 @@ class VPseudoBinaryNoMask<VReg RetClass,
class VPseudoBinaryNoMaskTU<VReg RetClass,
VReg Op1Class,
DAGOperand Op2Class,
- string Constraint> :
- Pseudo<(outs RetClass:$rd),
- (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, AVL:$vl,
- ixlenimm:$sew, ixlenimm:$policy), []>,
- RISCVVPseudo {
+ string Constraint,
+ int TargetConstraintType = 1> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, AVL:$vl,
+ ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
let HasVecPolicyOp = 1;
@@ -1133,14 +1225,16 @@ class VPseudoBinaryNoMaskRoundingMode<VReg RetClass,
VReg Op1Class,
DAGOperand Op2Class,
string Constraint,
- int UsesVXRM_ = 1> :
- Pseudo<(outs RetClass:$rd),
- (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, ixlenimm:$rm,
- AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
- RISCVVPseudo {
+ int UsesVXRM_ = 1,
+ int TargetConstraintType = 1> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, ixlenimm:$rm,
+ AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
let HasVecPolicyOp = 1;
@@ -1152,16 +1246,18 @@ class VPseudoBinaryMaskPolicyRoundingMode<VReg RetClass,
RegisterClass Op1Class,
DAGOperand Op2Class,
string Constraint,
- int UsesVXRM_> :
- Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
- (ins GetVRegNoV0<RetClass>.R:$merge,
- Op1Class:$rs2, Op2Class:$rs1,
- VMaskOp:$vm, ixlenimm:$rm, AVL:$vl,
- ixlenimm:$sew, ixlenimm:$policy), []>,
- RISCVVPseudo {
+ int UsesVXRM_,
+ int TargetConstraintType = 1> :
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge,
+ Op1Class:$rs2, Op2Class:$rs1,
+ VMaskOp:$vm, ixlenimm:$rm, AVL:$vl,
+ ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
let HasVecPolicyOp = 1;
@@ -1175,15 +1271,17 @@ class VPseudoBinaryMaskPolicyRoundingMode<VReg RetClass,
// This allows maskedoff and rs2 to be the same register.
class VPseudoTiedBinaryNoMask<VReg RetClass,
DAGOperand Op2Class,
- string Constraint> :
- Pseudo<(outs RetClass:$rd),
- (ins RetClass:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew,
- ixlenimm:$policy), []>,
- RISCVVPseudo {
+ string Constraint,
+ int TargetConstraintType = 1> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew,
+ ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = !interleave([Constraint, "$rd = $rs2"], ",");
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
let HasVecPolicyOp = 1;
@@ -1193,17 +1291,19 @@ class VPseudoTiedBinaryNoMask<VReg RetClass,
class VPseudoTiedBinaryNoMaskRoundingMode<VReg RetClass,
DAGOperand Op2Class,
- string Constraint> :
- Pseudo<(outs RetClass:$rd),
- (ins RetClass:$rs2, Op2Class:$rs1,
- ixlenimm:$rm,
- AVL:$vl, ixlenimm:$sew,
- ixlenimm:$policy), []>,
- RISCVVPseudo {
+ string Constraint,
+ int TargetConstraintType = 1> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$rs2, Op2Class:$rs1,
+ ixlenimm:$rm,
+ AVL:$vl, ixlenimm:$sew,
+ ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = !interleave([Constraint, "$rd = $rs2"], ",");
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
let HasVecPolicyOp = 1;
@@ -1216,7 +1316,8 @@ class VPseudoTiedBinaryNoMaskRoundingMode<VReg RetClass,
class VPseudoIStoreNoMask<VReg StClass, VReg IdxClass, int EEW, bits<3> LMUL,
bit Ordered>:
Pseudo<(outs),
- (ins StClass:$rd, GPRMem:$rs1, IdxClass:$rs2, AVL:$vl, ixlenimm:$sew),[]>,
+ (ins StClass:$rd, GPRMem:$rs1, IdxClass:$rs2, AVL:$vl,
+ ixlenimm:$sew),[]>,
RISCVVPseudo,
RISCVVSX</*Masked*/0, Ordered, !logtwo(EEW), VLMul, LMUL> {
let mayLoad = 0;
@@ -1229,7 +1330,8 @@ class VPseudoIStoreNoMask<VReg StClass, VReg IdxClass, int EEW, bits<3> LMUL,
class VPseudoIStoreMask<VReg StClass, VReg IdxClass, int EEW, bits<3> LMUL,
bit Ordered>:
Pseudo<(outs),
- (ins StClass:$rd, GPRMem:$rs1, IdxClass:$rs2, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
+ (ins StClass:$rd, GPRMem:$rs1, IdxClass:$rs2,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
RISCVVSX</*Masked*/1, Ordered, !logtwo(EEW), VLMul, LMUL> {
let mayLoad = 0;
@@ -1243,11 +1345,11 @@ class VPseudoBinaryMask<VReg RetClass,
RegisterClass Op1Class,
DAGOperand Op2Class,
string Constraint> :
- Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
- (ins GetVRegNoV0<RetClass>.R:$merge,
- Op1Class:$rs2, Op2Class:$rs1,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
- RISCVVPseudo {
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge,
+ Op1Class:$rs2, Op2Class:$rs1,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1259,16 +1361,18 @@ class VPseudoBinaryMask<VReg RetClass,
class VPseudoBinaryMaskPolicy<VReg RetClass,
RegisterClass Op1Class,
DAGOperand Op2Class,
- string Constraint> :
- Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
- (ins GetVRegNoV0<RetClass>.R:$merge,
- Op1Class:$rs2, Op2Class:$rs1,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
- RISCVVPseudo {
+ string Constraint,
+ int TargetConstraintType = 1> :
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge,
+ Op1Class:$rs2, Op2Class:$rs1,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
let HasVecPolicyOp = 1;
@@ -1279,11 +1383,11 @@ class VPseudoTernaryMaskPolicy<VReg RetClass,
RegisterClass Op1Class,
DAGOperand Op2Class,
string Constraint> :
- Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
- (ins GetVRegNoV0<RetClass>.R:$merge,
- Op1Class:$rs2, Op2Class:$rs1,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
- RISCVVPseudo {
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge,
+ Op1Class:$rs2, Op2Class:$rs1,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1297,13 +1401,13 @@ class VPseudoTernaryMaskPolicyRoundingMode<VReg RetClass,
RegisterClass Op1Class,
DAGOperand Op2Class,
string Constraint> :
- Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
- (ins GetVRegNoV0<RetClass>.R:$merge,
- Op1Class:$rs2, Op2Class:$rs1,
- VMaskOp:$vm,
- ixlenimm:$rm,
- AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
- RISCVVPseudo {
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge,
+ Op1Class:$rs2, Op2Class:$rs1,
+ VMaskOp:$vm,
+ ixlenimm:$rm,
+ AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1319,14 +1423,16 @@ class VPseudoTernaryMaskPolicyRoundingMode<VReg RetClass,
class VPseudoBinaryMOutNoMask<VReg RetClass,
VReg Op1Class,
DAGOperand Op2Class,
- string Constraint> :
- Pseudo<(outs RetClass:$rd),
- (ins Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>,
- RISCVVPseudo {
+ string Constraint,
+ int TargetConstraintType = 1> :
+ Pseudo<(outs RetClass:$rd),
+ (ins Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = Constraint;
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
}
@@ -1335,16 +1441,18 @@ class VPseudoBinaryMOutNoMask<VReg RetClass,
class VPseudoBinaryMOutMask<VReg RetClass,
RegisterClass Op1Class,
DAGOperand Op2Class,
- string Constraint> :
- Pseudo<(outs RetClass:$rd),
- (ins RetClass:$merge,
- Op1Class:$rs2, Op2Class:$rs1,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
- RISCVVPseudo {
+ string Constraint,
+ int TargetConstraintType = 1> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$merge,
+ Op1Class:$rs2, Op2Class:$rs1,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
let UsesMaskPolicy = 1;
@@ -1355,16 +1463,18 @@ class VPseudoBinaryMOutMask<VReg RetClass,
// This allows maskedoff and rs2 to be the same register.
class VPseudoTiedBinaryMask<VReg RetClass,
DAGOperand Op2Class,
- string Constraint> :
- Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
- (ins GetVRegNoV0<RetClass>.R:$merge,
- Op2Class:$rs1,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
- RISCVVPseudo {
+ string Constraint,
+ int TargetConstraintType = 1> :
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge,
+ Op2Class:$rs1,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
let HasVecPolicyOp = 1;
@@ -1374,18 +1484,20 @@ class VPseudoTiedBinaryMask<VReg RetClass,
class VPseudoTiedBinaryMaskRoundingMode<VReg RetClass,
DAGOperand Op2Class,
- string Constraint> :
- Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
- (ins GetVRegNoV0<RetClass>.R:$merge,
- Op2Class:$rs1,
- VMaskOp:$vm,
- ixlenimm:$rm,
- AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
- RISCVVPseudo {
+ string Constraint,
+ int TargetConstraintType = 1> :
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge,
+ Op2Class:$rs1,
+ VMaskOp:$vm,
+ ixlenimm:$rm,
+ AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
let HasVecPolicyOp = 1;
@@ -1400,17 +1512,20 @@ class VPseudoBinaryCarryIn<VReg RetClass,
DAGOperand Op2Class,
LMULInfo MInfo,
bit CarryIn,
- string Constraint> :
- Pseudo<(outs RetClass:$rd),
- !if(CarryIn,
- (ins Op1Class:$rs2, Op2Class:$rs1, VMV0:$carry, AVL:$vl,
- ixlenimm:$sew),
- (ins Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew)), []>,
- RISCVVPseudo {
+ string Constraint,
+ int TargetConstraintType = 1> :
+ Pseudo<(outs RetClass:$rd),
+ !if(CarryIn,
+ (ins Op1Class:$rs2, Op2Class:$rs1,
+ VMV0:$carry, AVL:$vl, ixlenimm:$sew),
+ (ins Op1Class:$rs2, Op2Class:$rs1,
+ AVL:$vl, ixlenimm:$sew)), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = Constraint;
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
let VLMul = MInfo.value;
@@ -1422,12 +1537,13 @@ class VPseudoTiedBinaryCarryIn<VReg RetClass,
LMULInfo MInfo,
bit CarryIn,
string Constraint> :
- Pseudo<(outs RetClass:$rd),
- !if(CarryIn,
- (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, VMV0:$carry, AVL:$vl,
- ixlenimm:$sew),
- (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew)), []>,
- RISCVVPseudo {
+ Pseudo<(outs RetClass:$rd),
+ !if(CarryIn,
+ (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1,
+ VMV0:$carry, AVL:$vl, ixlenimm:$sew),
+ (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1,
+ AVL:$vl, ixlenimm:$sew)), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1442,11 +1558,10 @@ class VPseudoTernaryNoMask<VReg RetClass,
RegisterClass Op1Class,
DAGOperand Op2Class,
string Constraint> :
- Pseudo<(outs RetClass:$rd),
- (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2,
- AVL:$vl, ixlenimm:$sew),
- []>,
- RISCVVPseudo {
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2,
+ AVL:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1458,16 +1573,17 @@ class VPseudoTernaryNoMask<VReg RetClass,
class VPseudoTernaryNoMaskWithPolicy<VReg RetClass,
RegisterClass Op1Class,
DAGOperand Op2Class,
- string Constraint> :
- Pseudo<(outs RetClass:$rd),
- (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2,
- AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),
- []>,
- RISCVVPseudo {
+ string Constraint,
+ int TargetConstraintType = 1> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2,
+ AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = !interleave([Constraint, "$rd = $rs3"], ",");
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVecPolicyOp = 1;
let HasVLOp = 1;
let HasSEWOp = 1;
@@ -1476,16 +1592,17 @@ class VPseudoTernaryNoMaskWithPolicy<VReg RetClass,
class VPseudoTernaryNoMaskWithPolicyRoundingMode<VReg RetClass,
RegisterClass Op1Class,
DAGOperand Op2Class,
- string Constraint> :
- Pseudo<(outs RetClass:$rd),
- (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2,
- ixlenimm:$rm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),
- []>,
- RISCVVPseudo {
+ string Constraint,
+ int TargetConstraintType = 1> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2,
+ ixlenimm:$rm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = !interleave([Constraint, "$rd = $rs3"], ",");
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVecPolicyOp = 1;
let HasVLOp = 1;
let HasSEWOp = 1;
@@ -1493,10 +1610,12 @@ class VPseudoTernaryNoMaskWithPolicyRoundingMode<VReg RetClass,
let UsesVXRM = 0;
}
-class VPseudoUSSegLoadNoMask<VReg RetClass, int EEW, bits<4> NF>:
+class VPseudoUSSegLoadNoMask<VReg RetClass,
+ int EEW,
+ bits<4> NF> :
Pseudo<(outs RetClass:$rd),
(ins RetClass:$dest, GPRMem:$rs1, AVL:$vl,
- ixlenimm:$sew, ixlenimm:$policy),[]>,
+ ixlenimm:$sew, ixlenimm:$policy), []>,
RISCVVPseudo,
RISCVVLSEG<NF, /*Masked*/0, /*Strided*/0, /*FF*/0, !logtwo(EEW), VLMul> {
let mayLoad = 1;
@@ -1508,10 +1627,12 @@ class VPseudoUSSegLoadNoMask<VReg RetClass, int EEW, bits<4> NF>:
let Constraints = "$rd = $dest";
}
-class VPseudoUSSegLoadMask<VReg RetClass, int EEW, bits<4> NF>:
+class VPseudoUSSegLoadMask<VReg RetClass,
+ int EEW,
+ bits<4> NF> :
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge, GPRMem:$rs1,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
RISCVVPseudo,
RISCVVLSEG<NF, /*Masked*/1, /*Strided*/0, /*FF*/0, !logtwo(EEW), VLMul> {
let mayLoad = 1;
@@ -1524,10 +1645,12 @@ class VPseudoUSSegLoadMask<VReg RetClass, int EEW, bits<4> NF>:
let UsesMaskPolicy = 1;
}
-class VPseudoUSSegLoadFFNoMask<VReg RetClass, int EEW, bits<4> NF>:
+class VPseudoUSSegLoadFFNoMask<VReg RetClass,
+ int EEW,
+ bits<4> NF> :
Pseudo<(outs RetClass:$rd, GPR:$vl),
(ins RetClass:$dest, GPRMem:$rs1, AVL:$avl,
- ixlenimm:$sew, ixlenimm:$policy),[]>,
+ ixlenimm:$sew, ixlenimm:$policy), []>,
RISCVVPseudo,
RISCVVLSEG<NF, /*Masked*/0, /*Strided*/0, /*FF*/1, !logtwo(EEW), VLMul> {
let mayLoad = 1;
@@ -1539,10 +1662,12 @@ class VPseudoUSSegLoadFFNoMask<VReg RetClass, int EEW, bits<4> NF>:
let Constraints = "$rd = $dest";
}
-class VPseudoUSSegLoadFFMask<VReg RetClass, int EEW, bits<4> NF>:
+class VPseudoUSSegLoadFFMask<VReg RetClass,
+ int EEW,
+ bits<4> NF> :
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd, GPR:$vl),
(ins GetVRegNoV0<RetClass>.R:$merge, GPRMem:$rs1,
- VMaskOp:$vm, AVL:$avl, ixlenimm:$sew, ixlenimm:$policy),[]>,
+ VMaskOp:$vm, AVL:$avl, ixlenimm:$sew, ixlenimm:$policy), []>,
RISCVVPseudo,
RISCVVLSEG<NF, /*Masked*/1, /*Strided*/0, /*FF*/1, !logtwo(EEW), VLMul> {
let mayLoad = 1;
@@ -1555,10 +1680,12 @@ class VPseudoUSSegLoadFFMask<VReg RetClass, int EEW, bits<4> NF>:
let UsesMaskPolicy = 1;
}
-class VPseudoSSegLoadNoMask<VReg RetClass, int EEW, bits<4> NF>:
+class VPseudoSSegLoadNoMask<VReg RetClass,
+ int EEW,
+ bits<4> NF> :
Pseudo<(outs RetClass:$rd),
(ins RetClass:$merge, GPRMem:$rs1, GPR:$offset, AVL:$vl,
- ixlenimm:$sew, ixlenimm:$policy),[]>,
+ ixlenimm:$sew, ixlenimm:$policy), []>,
RISCVVPseudo,
RISCVVLSEG<NF, /*Masked*/0, /*Strided*/1, /*FF*/0, !logtwo(EEW), VLMul> {
let mayLoad = 1;
@@ -1570,11 +1697,13 @@ class VPseudoSSegLoadNoMask<VReg RetClass, int EEW, bits<4> NF>:
let Constraints = "$rd = $merge";
}
-class VPseudoSSegLoadMask<VReg RetClass, int EEW, bits<4> NF>:
+class VPseudoSSegLoadMask<VReg RetClass,
+ int EEW,
+ bits<4> NF> :
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge, GPRMem:$rs1,
GPR:$offset, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew,
- ixlenimm:$policy),[]>,
+ ixlenimm:$policy), []>,
RISCVVPseudo,
RISCVVLSEG<NF, /*Masked*/1, /*Strided*/1, /*FF*/0, !logtwo(EEW), VLMul> {
let mayLoad = 1;
@@ -1587,11 +1716,15 @@ class VPseudoSSegLoadMask<VReg RetClass, int EEW, bits<4> NF>:
let UsesMaskPolicy = 1;
}
-class VPseudoISegLoadNoMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
- bits<4> NF, bit Ordered>:
+class VPseudoISegLoadNoMask<VReg RetClass,
+ VReg IdxClass,
+ int EEW,
+ bits<3> LMUL,
+ bits<4> NF,
+ bit Ordered> :
Pseudo<(outs RetClass:$rd),
(ins RetClass:$merge, GPRMem:$rs1, IdxClass:$offset, AVL:$vl,
- ixlenimm:$sew, ixlenimm:$policy),[]>,
+ ixlenimm:$sew, ixlenimm:$policy), []>,
RISCVVPseudo,
RISCVVLXSEG<NF, /*Masked*/0, Ordered, !logtwo(EEW), VLMul, LMUL> {
let mayLoad = 1;
@@ -1605,12 +1738,16 @@ class VPseudoISegLoadNoMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
let HasVecPolicyOp = 1;
}
-class VPseudoISegLoadMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
- bits<4> NF, bit Ordered>:
+class VPseudoISegLoadMask<VReg RetClass,
+ VReg IdxClass,
+ int EEW,
+ bits<3> LMUL,
+ bits<4> NF,
+ bit Ordered> :
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge, GPRMem:$rs1,
IdxClass:$offset, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew,
- ixlenimm:$policy),[]>,
+ ixlenimm:$policy), []>,
RISCVVPseudo,
RISCVVLXSEG<NF, /*Masked*/1, Ordered, !logtwo(EEW), VLMul, LMUL> {
let mayLoad = 1;
@@ -1625,9 +1762,11 @@ class VPseudoISegLoadMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
let UsesMaskPolicy = 1;
}
-class VPseudoUSSegStoreNoMask<VReg ValClass, int EEW, bits<4> NF>:
+class VPseudoUSSegStoreNoMask<VReg ValClass,
+ int EEW,
+ bits<4> NF> :
Pseudo<(outs),
- (ins ValClass:$rd, GPRMem:$rs1, AVL:$vl, ixlenimm:$sew),[]>,
+ (ins ValClass:$rd, GPRMem:$rs1, AVL:$vl, ixlenimm:$sew), []>,
RISCVVPseudo,
RISCVVSSEG<NF, /*Masked*/0, /*Strided*/0, !logtwo(EEW), VLMul> {
let mayLoad = 0;
@@ -1637,10 +1776,12 @@ class VPseudoUSSegStoreNoMask<VReg ValClass, int EEW, bits<4> NF>:
let HasSEWOp = 1;
}
-class VPseudoUSSegStoreMask<VReg ValClass, int EEW, bits<4> NF>:
+class VPseudoUSSegStoreMask<VReg ValClass,
+ int EEW,
+ bits<4> NF> :
Pseudo<(outs),
(ins ValClass:$rd, GPRMem:$rs1,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
RISCVVPseudo,
RISCVVSSEG<NF, /*Masked*/1, /*Strided*/0, !logtwo(EEW), VLMul> {
let mayLoad = 0;
@@ -1650,9 +1791,12 @@ class VPseudoUSSegStoreMask<VReg ValClass, int EEW, bits<4> NF>:
let HasSEWOp = 1;
}
-class VPseudoSSegStoreNoMask<VReg ValClass, int EEW, bits<4> NF>:
+class VPseudoSSegStoreNoMask<VReg ValClass,
+ int EEW,
+ bits<4> NF> :
Pseudo<(outs),
- (ins ValClass:$rd, GPRMem:$rs1, GPR: $offset, AVL:$vl, ixlenimm:$sew),[]>,
+ (ins ValClass:$rd, GPRMem:$rs1, GPR:$offset,
+ AVL:$vl, ixlenimm:$sew), []>,
RISCVVPseudo,
RISCVVSSEG<NF, /*Masked*/0, /*Strided*/1, !logtwo(EEW), VLMul> {
let mayLoad = 0;
@@ -1662,10 +1806,12 @@ class VPseudoSSegStoreNoMask<VReg ValClass, int EEW, bits<4> NF>:
let HasSEWOp = 1;
}
-class VPseudoSSegStoreMask<VReg ValClass, int EEW, bits<4> NF>:
+class VPseudoSSegStoreMask<VReg ValClass,
+ int EEW,
+ bits<4> NF> :
Pseudo<(outs),
(ins ValClass:$rd, GPRMem:$rs1, GPR: $offset,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
RISCVVPseudo,
RISCVVSSEG<NF, /*Masked*/1, /*Strided*/1, !logtwo(EEW), VLMul> {
let mayLoad = 0;
@@ -1675,11 +1821,15 @@ class VPseudoSSegStoreMask<VReg ValClass, int EEW, bits<4> NF>:
let HasSEWOp = 1;
}
-class VPseudoISegStoreNoMask<VReg ValClass, VReg IdxClass, int EEW, bits<3> LMUL,
- bits<4> NF, bit Ordered>:
+class VPseudoISegStoreNoMask<VReg ValClass,
+ VReg IdxClass,
+ int EEW,
+ bits<3> LMUL,
+ bits<4> NF,
+ bit Ordered> :
Pseudo<(outs),
(ins ValClass:$rd, GPRMem:$rs1, IdxClass: $index,
- AVL:$vl, ixlenimm:$sew),[]>,
+ AVL:$vl, ixlenimm:$sew), []>,
RISCVVPseudo,
RISCVVSXSEG<NF, /*Masked*/0, Ordered, !logtwo(EEW), VLMul, LMUL> {
let mayLoad = 0;
@@ -1689,11 +1839,15 @@ class VPseudoISegStoreNoMask<VReg ValClass, VReg IdxClass, int EEW, bits<3> LMUL
let HasSEWOp = 1;
}
-class VPseudoISegStoreMask<VReg ValClass, VReg IdxClass, int EEW, bits<3> LMUL,
- bits<4> NF, bit Ordered>:
+class VPseudoISegStoreMask<VReg ValClass,
+ VReg IdxClass,
+ int EEW,
+ bits<3> LMUL,
+ bits<4> NF,
+ bit Ordered> :
Pseudo<(outs),
(ins ValClass:$rd, GPRMem:$rs1, IdxClass: $index,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
RISCVVPseudo,
RISCVVSXSEG<NF, /*Masked*/1, Ordered, !logtwo(EEW), VLMul, LMUL> {
let mayLoad = 0;
@@ -1782,15 +1936,16 @@ multiclass VPseudoILoad<bit Ordered> {
defvar Vreg = dataEMUL.vrclass;
defvar IdxVreg = idxEMUL.vrclass;
defvar HasConstraint = !ne(dataEEW, idxEEW);
- defvar Order = !if(Ordered, "O", "U");
+ defvar TypeConstraints =
+ !if(!eq(dataEEW, idxEEW), 1, !if(!gt(dataEEW, idxEEW), !if(!ge(idxEMULOctuple, 8), 3, 1), 2));
let VLMul = dataEMUL.value in {
def "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo :
- VPseudoILoadNoMask<Vreg, IdxVreg, idxEEW, idxEMUL.value, Ordered, HasConstraint>,
- VLXSched<dataEEW, Order, DataLInfo, IdxLInfo>;
+ VPseudoILoadNoMask<Vreg, IdxVreg, idxEEW, idxEMUL.value, Ordered, HasConstraint, TypeConstraints>,
+ VLXSched<dataEEW, Ordered, DataLInfo, IdxLInfo>;
def "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo # "_MASK" :
- VPseudoILoadMask<Vreg, IdxVreg, idxEEW, idxEMUL.value, Ordered, HasConstraint>,
+ VPseudoILoadMask<Vreg, IdxVreg, idxEEW, idxEMUL.value, Ordered, HasConstraint, TypeConstraints>,
RISCVMaskedPseudo<MaskIdx=3>,
- VLXSched<dataEEW, Order, DataLInfo, IdxLInfo>;
+ VLXSched<dataEEW, Ordered, DataLInfo, IdxLInfo>;
}
}
}
@@ -1853,14 +2008,13 @@ multiclass VPseudoIStore<bit Ordered> {
defvar idxEMUL = !cast<LMULInfo>("V_" # IdxLInfo);
defvar Vreg = dataEMUL.vrclass;
defvar IdxVreg = idxEMUL.vrclass;
- defvar Order = !if(Ordered, "O", "U");
let VLMul = dataEMUL.value in {
def "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo :
VPseudoIStoreNoMask<Vreg, IdxVreg, idxEEW, idxEMUL.value, Ordered>,
- VSXSched<dataEEW, Order, DataLInfo, IdxLInfo>;
+ VSXSched<dataEEW, Ordered, DataLInfo, IdxLInfo>;
def "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo # "_MASK" :
VPseudoIStoreMask<Vreg, IdxVreg, idxEEW, idxEMUL.value, Ordered>,
- VSXSched<dataEEW, Order, DataLInfo, IdxLInfo>;
+ VSXSched<dataEEW, Ordered, DataLInfo, IdxLInfo>;
}
}
}
@@ -1871,13 +2025,11 @@ multiclass VPseudoIStore<bit Ordered> {
multiclass VPseudoVPOP_M {
foreach mti = AllMasks in {
defvar mx = mti.LMul.MX;
- defvar WriteVMPopV_MX = !cast<SchedWrite>("WriteVMPopV_" # mx);
- defvar ReadVMPopV_MX = !cast<SchedRead>("ReadVMPopV_" # mx);
let VLMul = mti.LMul.value in {
def "_M_" # mti.BX : VPseudoUnaryNoMaskGPROut,
- Sched<[WriteVMPopV_MX, ReadVMPopV_MX, ReadVMPopV_MX]>;
+ SchedBinary<"WriteVMPopV", "ReadVMPopV", "ReadVMPopV", mx>;
def "_M_" # mti.BX # "_MASK" : VPseudoUnaryMaskGPROut,
- Sched<[WriteVMPopV_MX, ReadVMPopV_MX, ReadVMPopV_MX]>;
+ SchedBinary<"WriteVMPopV", "ReadVMPopV", "ReadVMPopV", mx>;
}
}
}
@@ -1885,13 +2037,11 @@ multiclass VPseudoVPOP_M {
multiclass VPseudoV1ST_M {
foreach mti = AllMasks in {
defvar mx = mti.LMul.MX;
- defvar WriteVMFFSV_MX = !cast<SchedWrite>("WriteVMFFSV_" # mx);
- defvar ReadVMFFSV_MX = !cast<SchedRead>("ReadVMFFSV_" # mx);
let VLMul = mti.LMul.value in {
- def "_M_" # mti.BX : VPseudoUnaryNoMaskGPROut,
- Sched<[WriteVMFFSV_MX, ReadVMFFSV_MX, ReadVMFFSV_MX]>;
+ def "_M_" #mti.BX : VPseudoUnaryNoMaskGPROut,
+ SchedBinary<"WriteVMFFSV", "ReadVMFFSV", "ReadVMFFSV", mx>;
def "_M_" # mti.BX # "_MASK" : VPseudoUnaryMaskGPROut,
- Sched<[WriteVMFFSV_MX, ReadVMFFSV_MX, ReadVMFFSV_MX]>;
+ SchedBinary<"WriteVMFFSV", "ReadVMFFSV", "ReadVMFFSV", mx>;
}
}
}
@@ -1900,13 +2050,13 @@ multiclass VPseudoVSFS_M {
defvar constraint = "@earlyclobber $rd";
foreach mti = AllMasks in {
defvar mx = mti.LMul.MX;
- defvar WriteVMSFSV_MX = !cast<SchedWrite>("WriteVMSFSV_" # mx);
- defvar ReadVMSFSV_MX = !cast<SchedRead>("ReadVMSFSV_" # mx);
let VLMul = mti.LMul.value in {
def "_M_" # mti.BX : VPseudoUnaryNoMask<VR, VR, constraint>,
- Sched<[WriteVMSFSV_MX, ReadVMSFSV_MX, ReadVMask]>;
+ SchedUnary<"WriteVMSFSV", "ReadVMSFSV", mx,
+ forceMergeOpRead=true>;
def "_M_" # mti.BX # "_MASK" : VPseudoUnaryMask<VR, VR, constraint>,
- Sched<[WriteVMSFSV_MX, ReadVMSFSV_MX, ReadVMask]>;
+ SchedUnary<"WriteVMSFSV", "ReadVMSFSV", mx,
+ forceMergeOpRead=true>;
}
}
}
@@ -1914,28 +2064,22 @@ multiclass VPseudoVSFS_M {
multiclass VPseudoVID_V {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVMIdxV_MX = !cast<SchedWrite>("WriteVMIdxV_" # mx);
- defvar ReadVMIdxV_MX = !cast<SchedRead>("ReadVMIdxV_" # mx);
-
let VLMul = m.value in {
- def "_V_" # m.MX : VPseudoNullaryNoMask<m.vrclass>,
- Sched<[WriteVMIdxV_MX, ReadVMask]>;
- def "_V_" # m.MX # "_MASK" : VPseudoNullaryMask<m.vrclass>,
+ def "_V_" # mx : VPseudoNullaryNoMask<m.vrclass>,
+ SchedNullary<"WriteVMIdxV", mx, forceMergeOpRead=true>;
+ def "_V_" # mx # "_MASK" : VPseudoNullaryMask<m.vrclass>,
RISCVMaskedPseudo<MaskIdx=1>,
- Sched<[WriteVMIdxV_MX, ReadVMask]>;
+ SchedNullary<"WriteVMIdxV", mx,
+ forceMergeOpRead=true>;
}
}
}
multiclass VPseudoNullaryPseudoM <string BaseInst> {
foreach mti = AllMasks in {
- defvar mx = mti.LMul.MX;
- defvar WriteVMALUV_MX = !cast<SchedWrite>("WriteVMALUV_" # mx);
- defvar ReadVMALUV_MX = !cast<SchedRead>("ReadVMALUV_" # mx);
-
let VLMul = mti.LMul.value in {
def "_M_" # mti.BX : VPseudoNullaryPseudoM<BaseInst # "_MM">,
- Sched<[WriteVMALUV_MX, ReadVMALUV_MX, ReadVMALUV_MX]>;
+ SchedBinary<"WriteVMALUV", "ReadVMALUV", "ReadVMALUV", mti.LMul.MX>;
}
}
}
@@ -1944,14 +2088,14 @@ multiclass VPseudoVIOT_M {
defvar constraint = "@earlyclobber $rd";
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVMIotV_MX = !cast<SchedWrite>("WriteVMIotV_" # mx);
- defvar ReadVMIotV_MX = !cast<SchedRead>("ReadVMIotV_" # mx);
let VLMul = m.value in {
- def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, VR, constraint>,
- Sched<[WriteVMIotV_MX, ReadVMIotV_MX, ReadVMask]>;
- def "_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, VR, constraint>,
- RISCVMaskedPseudo<MaskIdx=2>,
- Sched<[WriteVMIotV_MX, ReadVMIotV_MX, ReadVMask]>;
+ def "_" # mx : VPseudoUnaryNoMask<m.vrclass, VR, constraint>,
+ SchedUnary<"WriteVMIotV", "ReadVMIotV", mx,
+ forceMergeOpRead=true>;
+ def "_" # mx # "_MASK" : VPseudoUnaryMask<m.vrclass, VR, constraint>,
+ RISCVMaskedPseudo<MaskIdx=2, MaskAffectsRes=true>,
+ SchedUnary<"WriteVMIotV", "ReadVMIotV", mx,
+ forceMergeOpRead=true>;
}
}
}
@@ -1963,12 +2107,11 @@ multiclass VPseudoVCPR_V {
let VLMul = m.value in
foreach e = sews in {
defvar suffix = "_" # m.MX # "_E" # e;
- defvar WriteVCompressV_MX_E = !cast<SchedWrite>("WriteVCompressV" # suffix);
- defvar ReadVCompressV_MX_E = !cast<SchedRead>("ReadVCompressV" # suffix);
-
let SEW = e in
- def _VM # suffix : VPseudoUnaryAnyMask<m.vrclass, m.vrclass>,
- Sched<[WriteVCompressV_MX_E, ReadVCompressV_MX_E, ReadVCompressV_MX_E]>;
+ def _VM # suffix
+ : VPseudoUnaryAnyMask<m.vrclass, m.vrclass>,
+ SchedBinary<"WriteVCompressV", "ReadVCompressV", "ReadVCompressV",
+ mx, e>;
}
}
}
@@ -1978,33 +2121,50 @@ multiclass VPseudoBinary<VReg RetClass,
DAGOperand Op2Class,
LMULInfo MInfo,
string Constraint = "",
- int sew = 0> {
+ int sew = 0,
+ int TargetConstraintType = 1> {
let VLMul = MInfo.value, SEW=sew in {
defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX);
def suffix : VPseudoBinaryNoMaskTU<RetClass, Op1Class, Op2Class,
- Constraint>;
+ Constraint, TargetConstraintType>;
def suffix # "_MASK" : VPseudoBinaryMaskPolicy<RetClass, Op1Class, Op2Class,
- Constraint>,
+ Constraint, TargetConstraintType>,
RISCVMaskedPseudo<MaskIdx=3>;
}
}
+multiclass VPseudoBinaryNoMask<VReg RetClass,
+ VReg Op1Class,
+ DAGOperand Op2Class,
+ LMULInfo MInfo,
+ string Constraint = "",
+ int sew = 0> {
+ let VLMul = MInfo.value, SEW=sew in {
+ defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX);
+ def suffix : VPseudoBinaryNoMaskTU<RetClass, Op1Class, Op2Class,
+ Constraint>;
+ }
+}
+
multiclass VPseudoBinaryRoundingMode<VReg RetClass,
VReg Op1Class,
DAGOperand Op2Class,
LMULInfo MInfo,
string Constraint = "",
int sew = 0,
- int UsesVXRM = 1> {
+ int UsesVXRM = 1,
+ int TargetConstraintType = 1> {
let VLMul = MInfo.value, SEW=sew in {
defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX);
def suffix : VPseudoBinaryNoMaskRoundingMode<RetClass, Op1Class, Op2Class,
- Constraint, UsesVXRM>;
+ Constraint, UsesVXRM,
+ TargetConstraintType>;
def suffix # "_MASK" : VPseudoBinaryMaskPolicyRoundingMode<RetClass,
Op1Class,
Op2Class,
Constraint,
- UsesVXRM>,
+ UsesVXRM,
+ TargetConstraintType>,
RISCVMaskedPseudo<MaskIdx=3>;
}
}
@@ -2014,13 +2174,14 @@ multiclass VPseudoBinaryM<VReg RetClass,
VReg Op1Class,
DAGOperand Op2Class,
LMULInfo MInfo,
- string Constraint = ""> {
+ string Constraint = "",
+ int TargetConstraintType = 1> {
let VLMul = MInfo.value in {
def "_" # MInfo.MX : VPseudoBinaryMOutNoMask<RetClass, Op1Class, Op2Class,
- Constraint>;
+ Constraint, TargetConstraintType>;
let ForceTailAgnostic = true in
def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMOutMask<RetClass, Op1Class,
- Op2Class, Constraint>,
+ Op2Class, Constraint, TargetConstraintType>,
RISCVMaskedPseudo<MaskIdx=3>;
}
}
@@ -2045,24 +2206,26 @@ multiclass VPseudoBinaryEmul<VReg RetClass,
multiclass VPseudoTiedBinary<VReg RetClass,
DAGOperand Op2Class,
LMULInfo MInfo,
- string Constraint = ""> {
+ string Constraint = "",
+ int TargetConstraintType = 1> {
let VLMul = MInfo.value in {
def "_" # MInfo.MX # "_TIED": VPseudoTiedBinaryNoMask<RetClass, Op2Class,
- Constraint>;
+ Constraint, TargetConstraintType>;
def "_" # MInfo.MX # "_MASK_TIED" : VPseudoTiedBinaryMask<RetClass, Op2Class,
- Constraint>;
+ Constraint, TargetConstraintType>;
}
}
multiclass VPseudoTiedBinaryRoundingMode<VReg RetClass,
DAGOperand Op2Class,
LMULInfo MInfo,
- string Constraint = ""> {
+ string Constraint = "",
+ int TargetConstraintType = 1> {
let VLMul = MInfo.value in {
def "_" # MInfo.MX # "_TIED":
- VPseudoTiedBinaryNoMaskRoundingMode<RetClass, Op2Class, Constraint>;
+ VPseudoTiedBinaryNoMaskRoundingMode<RetClass, Op2Class, Constraint, TargetConstraintType>;
def "_" # MInfo.MX # "_MASK_TIED" :
- VPseudoTiedBinaryMaskRoundingMode<RetClass, Op2Class, Constraint>;
+ VPseudoTiedBinaryMaskRoundingMode<RetClass, Op2Class, Constraint, TargetConstraintType>;
}
}
@@ -2098,11 +2261,11 @@ multiclass VPseudoVGTR_VV_EEW<int eew, string Constraint = ""> {
defvar emul = !cast<LMULInfo>("V_" # emulMX);
defvar sews = SchedSEWSet<mx>.val;
foreach e = sews in {
- defvar WriteVRGatherVV_MX_E = !cast<SchedWrite>("WriteVRGatherVV_" # mx # "_E" # e);
- defvar ReadVRGatherVV_data_MX_E = !cast<SchedRead>("ReadVRGatherVV_data_" # mx # "_E" # e);
- defvar ReadVRGatherVV_index_MX_E = !cast<SchedRead>("ReadVRGatherVV_index_" # mx # "_E" # e);
- defm _VV : VPseudoBinaryEmul<m.vrclass, m.vrclass, emul.vrclass, m, emul, Constraint, e>,
- Sched<[WriteVRGatherVV_MX_E, ReadVRGatherVV_data_MX_E, ReadVRGatherVV_index_MX_E]>;
+ defm _VV
+ : VPseudoBinaryEmul<m.vrclass, m.vrclass, emul.vrclass, m, emul,
+ Constraint, e>,
+ SchedBinary<"WriteVRGatherVV", "ReadVRGatherVV_data",
+ "ReadVRGatherVV_index", mx, e, forceMergeOpRead=true>;
}
}
}
@@ -2119,13 +2282,9 @@ multiclass VPseudoBinaryV_VX_RM<LMULInfo m, string Constraint = ""> {
multiclass VPseudoVSLD1_VX<string Constraint = ""> {
foreach m = MxList in {
- defvar mx = m.MX;
- defvar WriteVISlide1X_MX = !cast<SchedWrite>("WriteVISlide1X_" # mx);
- defvar ReadVISlideV_MX = !cast<SchedRead>("ReadVISlideV_" # mx);
- defvar ReadVISlideX_MX = !cast<SchedRead>("ReadVISlideX_" # mx);
-
defm "_VX" : VPseudoBinary<m.vrclass, m.vrclass, GPR, m, Constraint>,
- Sched<[WriteVISlide1X_MX, ReadVISlideV_MX, ReadVISlideX_MX, ReadVMask]>;
+ SchedBinary<"WriteVISlide1X", "ReadVISlideV", "ReadVISlideX",
+ m.MX, forceMergeOpRead=true>;
}
}
@@ -2143,14 +2302,10 @@ multiclass VPseudoBinaryV_VF_RM<LMULInfo m, FPR_Info f, string Constraint = "",
multiclass VPseudoVSLD1_VF<string Constraint = ""> {
foreach f = FPList in {
foreach m = f.MxList in {
- defvar mx = m.MX;
- defvar WriteVFSlide1F_MX = !cast<SchedWrite>("WriteVFSlide1F_" # mx);
- defvar ReadVFSlideV_MX = !cast<SchedRead>("ReadVFSlideV_" # mx);
- defvar ReadVFSlideF_MX = !cast<SchedRead>("ReadVFSlideF_" # mx);
-
- defm "_V" # f.FX :
- VPseudoBinary<m.vrclass, m.vrclass, f.fprclass, m, Constraint>,
- Sched<[WriteVFSlide1F_MX, ReadVFSlideV_MX, ReadVFSlideF_MX, ReadVMask]>;
+ defm "_V" #f.FX
+ : VPseudoBinary<m.vrclass, m.vrclass, f.fprclass, m, Constraint>,
+ SchedBinary<"WriteVFSlide1F", "ReadVFSlideV", "ReadVFSlideF", m.MX,
+ forceMergeOpRead=true>;
}
}
}
@@ -2163,15 +2318,12 @@ multiclass VPseudoBinaryV_VI_RM<Operand ImmType = simm5, LMULInfo m, string Cons
defm _VI : VPseudoBinaryRoundingMode<m.vrclass, m.vrclass, ImmType, m, Constraint>;
}
-multiclass VPseudoVALU_MM {
+multiclass VPseudoVALU_MM<bit Commutable = 0> {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVMALUV_MX = !cast<SchedWrite>("WriteVMALUV_" # mx);
- defvar ReadVMALUV_MX = !cast<SchedRead>("ReadVMALUV_" # mx);
-
- let VLMul = m.value in {
+ let VLMul = m.value, isCommutable = Commutable in {
def "_MM_" # mx : VPseudoBinaryNoMask<VR, VR, VR, "">,
- Sched<[WriteVMALUV_MX, ReadVMALUV_MX, ReadVMALUV_MX]>;
+ SchedBinary<"WriteVMALUV", "ReadVMALUV", "ReadVMALUV", mx>;
}
}
}
@@ -2185,17 +2337,23 @@ multiclass VPseudoVALU_MM {
// destination register group is legal. Otherwise, it is illegal.
multiclass VPseudoBinaryW_VV<LMULInfo m> {
defm _VV : VPseudoBinary<m.wvrclass, m.vrclass, m.vrclass, m,
- "@earlyclobber $rd">;
+ "@earlyclobber $rd", TargetConstraintType=3>;
}
multiclass VPseudoBinaryW_VV_RM<LMULInfo m> {
defm _VV : VPseudoBinaryRoundingMode<m.wvrclass, m.vrclass, m.vrclass, m,
- "@earlyclobber $rd", UsesVXRM=0>;
+ "@earlyclobber $rd", UsesVXRM=0,
+ TargetConstraintType=3>;
}
multiclass VPseudoBinaryW_VX<LMULInfo m> {
defm "_VX" : VPseudoBinary<m.wvrclass, m.vrclass, GPR, m,
- "@earlyclobber $rd">;
+ "@earlyclobber $rd", TargetConstraintType=3>;
+}
+
+multiclass VPseudoBinaryW_VI<Operand ImmType, LMULInfo m> {
+ defm "_VI" : VPseudoBinary<m.wvrclass, m.vrclass, ImmType, m,
+ "@earlyclobber $rd", TargetConstraintType=3>;
}
multiclass VPseudoBinaryW_VF<LMULInfo m, FPR_Info f> {
@@ -2208,36 +2366,40 @@ multiclass VPseudoBinaryW_VF_RM<LMULInfo m, FPR_Info f> {
defm "_V" # f.FX : VPseudoBinaryRoundingMode<m.wvrclass, m.vrclass,
f.fprclass, m,
"@earlyclobber $rd",
- UsesVXRM=0>;
+ UsesVXRM=0,
+ TargetConstraintType=3>;
}
multiclass VPseudoBinaryW_WV<LMULInfo m> {
defm _WV : VPseudoBinary<m.wvrclass, m.wvrclass, m.vrclass, m,
- "@earlyclobber $rd">;
+ "@earlyclobber $rd", TargetConstraintType=3>;
defm _WV : VPseudoTiedBinary<m.wvrclass, m.vrclass, m,
- "@earlyclobber $rd">;
+ "@earlyclobber $rd", TargetConstraintType=3>;
}
multiclass VPseudoBinaryW_WV_RM<LMULInfo m> {
defm _WV : VPseudoBinaryRoundingMode<m.wvrclass, m.wvrclass, m.vrclass, m,
- "@earlyclobber $rd", UsesVXRM=0>;
+ "@earlyclobber $rd", UsesVXRM=0, TargetConstraintType=3>;
defm _WV : VPseudoTiedBinaryRoundingMode<m.wvrclass, m.vrclass, m,
- "@earlyclobber $rd">;
+ "@earlyclobber $rd", TargetConstraintType=3>;
}
multiclass VPseudoBinaryW_WX<LMULInfo m> {
- defm "_WX" : VPseudoBinary<m.wvrclass, m.wvrclass, GPR, m>;
+ defm "_WX" : VPseudoBinary<m.wvrclass, m.wvrclass, GPR, m, /*Constraint*/ "", TargetConstraintType=3>;
}
-multiclass VPseudoBinaryW_WF<LMULInfo m, FPR_Info f> {
+multiclass VPseudoBinaryW_WF<LMULInfo m, FPR_Info f, int TargetConstraintType = 1> {
defm "_W" # f.FX : VPseudoBinary<m.wvrclass, m.wvrclass,
- f.fprclass, m>;
+ f.fprclass, m, /*Constraint*/ "", TargetConstraintType=TargetConstraintType>;
}
multiclass VPseudoBinaryW_WF_RM<LMULInfo m, FPR_Info f> {
defm "_W" # f.FX : VPseudoBinaryRoundingMode<m.wvrclass, m.wvrclass,
f.fprclass, m,
- UsesVXRM=0>;
+ Constraint="",
+ sew=0,
+ UsesVXRM=0,
+ TargetConstraintType=3>;
}
// Narrowing instructions like vnsrl/vnsra/vnclip(u) don't need @earlyclobber
@@ -2245,9 +2407,9 @@ multiclass VPseudoBinaryW_WF_RM<LMULInfo m, FPR_Info f> {
// exception from the spec.
// "The destination EEW is smaller than the source EEW and the overlap is in the
// lowest-numbered part of the source register group."
-multiclass VPseudoBinaryV_WV<LMULInfo m> {
+multiclass VPseudoBinaryV_WV<LMULInfo m, int TargetConstraintType = 1> {
defm _WV : VPseudoBinary<m.vrclass, m.wvrclass, m.vrclass, m,
- !if(!ge(m.octuple, 8), "@earlyclobber $rd", "")>;
+ !if(!ge(m.octuple, 8), "@earlyclobber $rd", ""), TargetConstraintType=TargetConstraintType>;
}
multiclass VPseudoBinaryV_WV_RM<LMULInfo m> {
@@ -2256,9 +2418,9 @@ multiclass VPseudoBinaryV_WV_RM<LMULInfo m> {
"@earlyclobber $rd", "")>;
}
-multiclass VPseudoBinaryV_WX<LMULInfo m> {
+multiclass VPseudoBinaryV_WX<LMULInfo m, int TargetConstraintType = 1> {
defm _WX : VPseudoBinary<m.vrclass, m.wvrclass, GPR, m,
- !if(!ge(m.octuple, 8), "@earlyclobber $rd", "")>;
+ !if(!ge(m.octuple, 8), "@earlyclobber $rd", ""), TargetConstraintType=TargetConstraintType>;
}
multiclass VPseudoBinaryV_WX_RM<LMULInfo m> {
@@ -2267,9 +2429,9 @@ multiclass VPseudoBinaryV_WX_RM<LMULInfo m> {
"@earlyclobber $rd", "")>;
}
-multiclass VPseudoBinaryV_WI<LMULInfo m> {
+multiclass VPseudoBinaryV_WI<LMULInfo m, int TargetConstraintType = 1> {
defm _WI : VPseudoBinary<m.vrclass, m.wvrclass, uimm5, m,
- !if(!ge(m.octuple, 8), "@earlyclobber $rd", "")>;
+ !if(!ge(m.octuple, 8), "@earlyclobber $rd", ""), TargetConstraintType=TargetConstraintType>;
}
multiclass VPseudoBinaryV_WI_RM<LMULInfo m> {
@@ -2282,12 +2444,15 @@ multiclass VPseudoBinaryV_WI_RM<LMULInfo m> {
// vector register is v0.
// For vadc and vsbc, CarryIn == 1 and CarryOut == 0
multiclass VPseudoBinaryV_VM<LMULInfo m, bit CarryOut = 0, bit CarryIn = 1,
- string Constraint = ""> {
+ string Constraint = "",
+ bit Commutable = 0,
+ int TargetConstraintType = 1> {
+ let isCommutable = Commutable in
def "_VV" # !if(CarryIn, "M", "") # "_" # m.MX :
VPseudoBinaryCarryIn<!if(CarryOut, VR,
!if(!and(CarryIn, !not(CarryOut)),
GetVRegNoV0<m.vrclass>.R, m.vrclass)),
- m.vrclass, m.vrclass, m, CarryIn, Constraint>;
+ m.vrclass, m.vrclass, m, CarryIn, Constraint, TargetConstraintType>;
}
multiclass VPseudoTiedBinaryV_VM<LMULInfo m> {
@@ -2297,12 +2462,12 @@ multiclass VPseudoTiedBinaryV_VM<LMULInfo m> {
}
multiclass VPseudoBinaryV_XM<LMULInfo m, bit CarryOut = 0, bit CarryIn = 1,
- string Constraint = ""> {
+ string Constraint = "", int TargetConstraintType = 1> {
def "_VX" # !if(CarryIn, "M", "") # "_" # m.MX :
VPseudoBinaryCarryIn<!if(CarryOut, VR,
!if(!and(CarryIn, !not(CarryOut)),
GetVRegNoV0<m.vrclass>.R, m.vrclass)),
- m.vrclass, GPR, m, CarryIn, Constraint>;
+ m.vrclass, GPR, m, CarryIn, Constraint, TargetConstraintType>;
}
multiclass VPseudoTiedBinaryV_XM<LMULInfo m> {
@@ -2315,25 +2480,23 @@ multiclass VPseudoVMRG_FM {
foreach f = FPList in {
foreach m = f.MxList in {
defvar mx = m.MX;
- defvar WriteVFMergeV_MX = !cast<SchedWrite>("WriteVFMergeV_" # mx);
- defvar ReadVFMergeV_MX = !cast<SchedRead>("ReadVFMergeV_" # mx);
- defvar ReadVFMergeF_MX = !cast<SchedRead>("ReadVFMergeF_" # mx);
-
- def "_V" # f.FX # "M_" # mx:
- VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
- m.vrclass, f.fprclass, m, CarryIn=1, Constraint="">,
- Sched<[WriteVFMergeV_MX, ReadVFMergeV_MX, ReadVFMergeF_MX, ReadVMask]>;
+ def "_V" # f.FX # "M_" # mx
+ : VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R, m.vrclass,
+ f.fprclass, m, CarryIn=1,
+ Constraint = "">,
+ SchedBinary<"WriteVFMergeV", "ReadVFMergeV", "ReadVFMergeF", mx,
+ forceMasked=1, forceMergeOpRead=true>;
}
}
}
multiclass VPseudoBinaryV_IM<LMULInfo m, bit CarryOut = 0, bit CarryIn = 1,
- string Constraint = ""> {
+ string Constraint = "", int TargetConstraintType = 1> {
def "_VI" # !if(CarryIn, "M", "") # "_" # m.MX :
VPseudoBinaryCarryIn<!if(CarryOut, VR,
!if(!and(CarryIn, !not(CarryOut)),
GetVRegNoV0<m.vrclass>.R, m.vrclass)),
- m.vrclass, simm5, m, CarryIn, Constraint>;
+ m.vrclass, simm5, m, CarryIn, Constraint, TargetConstraintType>;
}
multiclass VPseudoTiedBinaryV_IM<LMULInfo m> {
@@ -2346,19 +2509,16 @@ multiclass VPseudoUnaryVMV_V_X_I {
foreach m = MxList in {
let VLMul = m.value in {
defvar mx = m.MX;
- defvar WriteVIMovV_MX = !cast<SchedWrite>("WriteVIMovV_" # mx);
- defvar WriteVIMovX_MX = !cast<SchedWrite>("WriteVIMovX_" # mx);
- defvar WriteVIMovI_MX = !cast<SchedWrite>("WriteVIMovI_" # mx);
- defvar ReadVIMovV_MX = !cast<SchedRead>("ReadVIMovV_" # mx);
- defvar ReadVIMovX_MX = !cast<SchedRead>("ReadVIMovX_" # mx);
-
let VLMul = m.value in {
def "_V_" # mx : VPseudoUnaryNoMask<m.vrclass, m.vrclass>,
- Sched<[WriteVIMovV_MX, ReadVIMovV_MX]>;
+ SchedUnary<"WriteVIMovV", "ReadVIMovV", mx,
+ forceMergeOpRead=true>;
def "_X_" # mx : VPseudoUnaryNoMask<m.vrclass, GPR>,
- Sched<[WriteVIMovX_MX, ReadVIMovX_MX]>;
+ SchedUnary<"WriteVIMovX", "ReadVIMovX", mx,
+ forceMergeOpRead=true>;
def "_I_" # mx : VPseudoUnaryNoMask<m.vrclass, simm5>,
- Sched<[WriteVIMovI_MX]>;
+ SchedNullary<"WriteVIMovI", mx,
+ forceMergeOpRead=true>;
}
}
}
@@ -2368,13 +2528,10 @@ multiclass VPseudoVMV_F {
foreach f = FPList in {
foreach m = f.MxList in {
defvar mx = m.MX;
- defvar WriteVFMovV_MX = !cast<SchedWrite>("WriteVFMovV_" # mx);
- defvar ReadVFMovF_MX = !cast<SchedRead>("ReadVFMovF_" # mx);
-
let VLMul = m.value in {
def "_" # f.FX # "_" # mx :
VPseudoUnaryNoMask<m.vrclass, f.fprclass>,
- Sched<[WriteVFMovV_MX, ReadVFMovF_MX]>;
+ SchedUnary<"WriteVFMovV", "ReadVFMovF", mx, forceMergeOpRead=true>;
}
}
}
@@ -2383,15 +2540,14 @@ multiclass VPseudoVMV_F {
multiclass VPseudoVCLS_V {
foreach m = MxListF in {
defvar mx = m.MX;
- defvar WriteVFClassV_MX = !cast<SchedWrite>("WriteVFClassV_" # mx);
- defvar ReadVFClassV_MX = !cast<SchedRead>("ReadVFClassV_" # mx);
-
let VLMul = m.value in {
def "_V_" # mx : VPseudoUnaryNoMask<m.vrclass, m.vrclass>,
- Sched<[WriteVFClassV_MX, ReadVFClassV_MX, ReadVMask]>;
+ SchedUnary<"WriteVFClassV", "ReadVFClassV", mx,
+ forceMergeOpRead=true>;
def "_V_" # mx # "_MASK" : VPseudoUnaryMask<m.vrclass, m.vrclass>,
RISCVMaskedPseudo<MaskIdx=2>,
- Sched<[WriteVFClassV_MX, ReadVFClassV_MX, ReadVMask]>;
+ SchedUnary<"WriteVFClassV", "ReadVFClassV", mx,
+ forceMergeOpRead=true>;
}
}
}
@@ -2404,17 +2560,15 @@ multiclass VPseudoVSQR_V_RM {
let VLMul = m.value in
foreach e = sews in {
defvar suffix = "_" # mx # "_E" # e;
- defvar WriteVFSqrtV_MX_E = !cast<SchedWrite>("WriteVFSqrtV" # suffix);
- defvar ReadVFSqrtV_MX_E = !cast<SchedRead>("ReadVFSqrtV" # suffix);
-
let SEW = e in {
def "_V" # suffix : VPseudoUnaryNoMaskRoundingMode<m.vrclass, m.vrclass>,
- Sched<[WriteVFSqrtV_MX_E, ReadVFSqrtV_MX_E,
- ReadVMask]>;
- def "_V" # suffix # "_MASK" : VPseudoUnaryMaskRoundingMode<m.vrclass, m.vrclass>,
- RISCVMaskedPseudo<MaskIdx=2>,
- Sched<[WriteVFSqrtV_MX_E, ReadVFSqrtV_MX_E,
- ReadVMask]>;
+ SchedUnary<"WriteVFSqrtV", "ReadVFSqrtV", mx, e,
+ forceMergeOpRead=true>;
+ def "_V" #suffix # "_MASK"
+ : VPseudoUnaryMaskRoundingMode<m.vrclass, m.vrclass>,
+ RISCVMaskedPseudo<MaskIdx = 2>,
+ SchedUnary<"WriteVFSqrtV", "ReadVFSqrtV", mx, e,
+ forceMergeOpRead=true>;
}
}
}
@@ -2423,15 +2577,14 @@ multiclass VPseudoVSQR_V_RM {
multiclass VPseudoVRCP_V {
foreach m = MxListF in {
defvar mx = m.MX;
- defvar WriteVFRecpV_MX = !cast<SchedWrite>("WriteVFRecpV_" # mx);
- defvar ReadVFRecpV_MX = !cast<SchedRead>("ReadVFRecpV_" # mx);
-
let VLMul = m.value in {
- def "_V_" # mx : VPseudoUnaryNoMask<m.vrclass, m.vrclass>,
- Sched<[WriteVFRecpV_MX, ReadVFRecpV_MX, ReadVMask]>;
- def "_V_" # mx # "_MASK" : VPseudoUnaryMask<m.vrclass, m.vrclass>,
- RISCVMaskedPseudo<MaskIdx=2>,
- Sched<[WriteVFRecpV_MX, ReadVFRecpV_MX, ReadVMask]>;
+ def "_V_" # mx
+ : VPseudoUnaryNoMask<m.vrclass, m.vrclass>,
+ SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, forceMergeOpRead=true>;
+ def "_V_" # mx # "_MASK"
+ : VPseudoUnaryMask<m.vrclass, m.vrclass>,
+ RISCVMaskedPseudo<MaskIdx = 2>,
+ SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, forceMergeOpRead=true>;
}
}
}
@@ -2439,69 +2592,59 @@ multiclass VPseudoVRCP_V {
multiclass VPseudoVRCP_V_RM {
foreach m = MxListF in {
defvar mx = m.MX;
- defvar WriteVFRecpV_MX = !cast<SchedWrite>("WriteVFRecpV_" # mx);
- defvar ReadVFRecpV_MX = !cast<SchedRead>("ReadVFRecpV_" # mx);
-
let VLMul = m.value in {
- def "_V_" # mx : VPseudoUnaryNoMaskRoundingMode<m.vrclass, m.vrclass>,
- Sched<[WriteVFRecpV_MX, ReadVFRecpV_MX, ReadVMask]>;
- def "_V_" # mx # "_MASK" : VPseudoUnaryMaskRoundingMode<m.vrclass, m.vrclass>,
- RISCVMaskedPseudo<MaskIdx=2>,
- Sched<[WriteVFRecpV_MX, ReadVFRecpV_MX, ReadVMask]>;
+ def "_V_" # mx
+ : VPseudoUnaryNoMaskRoundingMode<m.vrclass, m.vrclass>,
+ SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, forceMergeOpRead=true>;
+ def "_V_" # mx # "_MASK"
+ : VPseudoUnaryMaskRoundingMode<m.vrclass, m.vrclass>,
+ RISCVMaskedPseudo<MaskIdx = 2>,
+ SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, forceMergeOpRead=true>;
}
}
}
-multiclass PseudoVEXT_VF2 {
+multiclass PseudoVEXT_VF2<int TargetConstraintType = 1> {
defvar constraints = "@earlyclobber $rd";
foreach m = MxListVF2 in {
defvar mx = m.MX;
- defvar WriteVExtV_MX = !cast<SchedWrite>("WriteVExtV_" # mx);
- defvar ReadVExtV_MX = !cast<SchedRead>("ReadVExtV_" # mx);
-
let VLMul = m.value in {
- def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f2vrclass, constraints>,
- Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>;
+ def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f2vrclass, constraints, TargetConstraintType>,
+ SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>;
def "_" # mx # "_MASK" :
- VPseudoUnaryMask<m.vrclass, m.f2vrclass, constraints>,
+ VPseudoUnaryMask<m.vrclass, m.f2vrclass, constraints, TargetConstraintType>,
RISCVMaskedPseudo<MaskIdx=2>,
- Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>;
+ SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>;
}
}
}
-multiclass PseudoVEXT_VF4 {
+multiclass PseudoVEXT_VF4<int TargetConstraintType = 1> {
defvar constraints = "@earlyclobber $rd";
foreach m = MxListVF4 in {
defvar mx = m.MX;
- defvar WriteVExtV_MX = !cast<SchedWrite>("WriteVExtV_" # mx);
- defvar ReadVExtV_MX = !cast<SchedRead>("ReadVExtV_" # mx);
-
let VLMul = m.value in {
- def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f4vrclass, constraints>,
- Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>;
+ def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f4vrclass, constraints, TargetConstraintType>,
+ SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>;
def "_" # mx # "_MASK" :
- VPseudoUnaryMask<m.vrclass, m.f4vrclass, constraints>,
+ VPseudoUnaryMask<m.vrclass, m.f4vrclass, constraints, TargetConstraintType>,
RISCVMaskedPseudo<MaskIdx=2>,
- Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>;
+ SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>;
}
}
}
-multiclass PseudoVEXT_VF8 {
+multiclass PseudoVEXT_VF8<int TargetConstraintType = 1> {
defvar constraints = "@earlyclobber $rd";
foreach m = MxListVF8 in {
defvar mx = m.MX;
- defvar WriteVExtV_MX = !cast<SchedWrite>("WriteVExtV_" # mx);
- defvar ReadVExtV_MX = !cast<SchedRead>("ReadVExtV_" # mx);
-
let VLMul = m.value in {
- def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f8vrclass, constraints>,
- Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>;
+ def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f8vrclass, constraints, TargetConstraintType>,
+ SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>;
def "_" # mx # "_MASK" :
- VPseudoUnaryMask<m.vrclass, m.f8vrclass, constraints>,
+ VPseudoUnaryMask<m.vrclass, m.f8vrclass, constraints, TargetConstraintType>,
RISCVMaskedPseudo<MaskIdx=2>,
- Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>;
+ SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>;
}
}
}
@@ -2517,51 +2660,43 @@ multiclass PseudoVEXT_VF8 {
// lowest-numbered part of the source register group".
// With LMUL<=1 the source and dest occupy a single register so any overlap
// is in the lowest-numbered part.
-multiclass VPseudoBinaryM_VV<LMULInfo m> {
+multiclass VPseudoBinaryM_VV<LMULInfo m, int TargetConstraintType = 1> {
defm _VV : VPseudoBinaryM<VR, m.vrclass, m.vrclass, m,
- !if(!ge(m.octuple, 16), "@earlyclobber $rd", "")>;
+ !if(!ge(m.octuple, 16), "@earlyclobber $rd", ""), TargetConstraintType>;
}
-multiclass VPseudoBinaryM_VX<LMULInfo m> {
+multiclass VPseudoBinaryM_VX<LMULInfo m, int TargetConstraintType = 1> {
defm "_VX" :
VPseudoBinaryM<VR, m.vrclass, GPR, m,
- !if(!ge(m.octuple, 16), "@earlyclobber $rd", "")>;
+ !if(!ge(m.octuple, 16), "@earlyclobber $rd", ""), TargetConstraintType>;
}
-multiclass VPseudoBinaryM_VF<LMULInfo m, FPR_Info f> {
+multiclass VPseudoBinaryM_VF<LMULInfo m, FPR_Info f, int TargetConstraintType = 1> {
defm "_V" # f.FX :
VPseudoBinaryM<VR, m.vrclass, f.fprclass, m,
- !if(!ge(m.octuple, 16), "@earlyclobber $rd", "")>;
+ !if(!ge(m.octuple, 16), "@earlyclobber $rd", ""), TargetConstraintType>;
}
-multiclass VPseudoBinaryM_VI<LMULInfo m> {
+multiclass VPseudoBinaryM_VI<LMULInfo m, int TargetConstraintType = 1> {
defm _VI : VPseudoBinaryM<VR, m.vrclass, simm5, m,
- !if(!ge(m.octuple, 16), "@earlyclobber $rd", "")>;
+ !if(!ge(m.octuple, 16), "@earlyclobber $rd", ""), TargetConstraintType>;
}
multiclass VPseudoVGTR_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVRGatherVX_MX = !cast<SchedWrite>("WriteVRGatherVX_" # mx);
- defvar WriteVRGatherVI_MX = !cast<SchedWrite>("WriteVRGatherVI_" # mx);
- defvar ReadVRGatherVX_data_MX = !cast<SchedRead>("ReadVRGatherVX_data_" # mx);
- defvar ReadVRGatherVX_index_MX = !cast<SchedRead>("ReadVRGatherVX_index_" # mx);
- defvar ReadVRGatherVI_data_MX = !cast<SchedRead>("ReadVRGatherVI_data_" # mx);
-
defm "" : VPseudoBinaryV_VX<m, Constraint>,
- Sched<[WriteVRGatherVX_MX, ReadVRGatherVX_data_MX,
- ReadVRGatherVX_index_MX, ReadVMask]>;
+ SchedBinary<"WriteVRGatherVX", "ReadVRGatherVX_data",
+ "ReadVRGatherVX_index", mx, forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_VI<ImmType, m, Constraint>,
- Sched<[WriteVRGatherVI_MX, ReadVRGatherVI_data_MX, ReadVMask]>;
+ SchedUnary<"WriteVRGatherVI", "ReadVRGatherVI_data", mx,
+ forceMergeOpRead=true>;
defvar sews = SchedSEWSet<mx>.val;
foreach e = sews in {
- defvar WriteVRGatherVV_MX_E = !cast<SchedWrite>("WriteVRGatherVV_" # mx # "_E" # e);
- defvar ReadVRGatherVV_data_MX_E = !cast<SchedRead>("ReadVRGatherVV_data_" # mx # "_E" # e);
- defvar ReadVRGatherVV_index_MX_E = !cast<SchedRead>("ReadVRGatherVV_index_" # mx # "_E" # e);
defm "" : VPseudoBinaryV_VV<m, Constraint, e>,
- Sched<[WriteVRGatherVV_MX_E, ReadVRGatherVV_data_MX_E,
- ReadVRGatherVV_index_MX_E, ReadVMask]>;
+ SchedBinary<"WriteVRGatherVV", "ReadVRGatherVV_data",
+ "ReadVRGatherVV_index", mx, e, forceMergeOpRead=true>;
}
}
}
@@ -2569,18 +2704,14 @@ multiclass VPseudoVGTR_VV_VX_VI<Operand ImmType = simm5, string Constraint = "">
multiclass VPseudoVSALU_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVSALUV_MX = !cast<SchedWrite>("WriteVSALUV_" # mx);
- defvar WriteVSALUX_MX = !cast<SchedWrite>("WriteVSALUX_" # mx);
- defvar WriteVSALUI_MX = !cast<SchedWrite>("WriteVSALUI_" # mx);
- defvar ReadVSALUV_MX = !cast<SchedRead>("ReadVSALUV_" # mx);
- defvar ReadVSALUX_MX = !cast<SchedRead>("ReadVSALUX_" # mx);
-
defm "" : VPseudoBinaryV_VV<m, Constraint>,
- Sched<[WriteVSALUV_MX, ReadVSALUV_MX, ReadVSALUV_MX, ReadVMask]>;
+ SchedBinary<"WriteVSALUV", "ReadVSALUV", "ReadVSALUX", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_VX<m, Constraint>,
- Sched<[WriteVSALUX_MX, ReadVSALUV_MX, ReadVSALUX_MX, ReadVMask]>;
+ SchedBinary<"WriteVSALUX", "ReadVSALUV", "ReadVSALUX", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_VI<ImmType, m, Constraint>,
- Sched<[WriteVSALUI_MX, ReadVSALUV_MX, ReadVMask]>;
+ SchedUnary<"WriteVSALUI", "ReadVSALUV", mx, forceMergeOpRead=true>;
}
}
@@ -2588,129 +2719,98 @@ multiclass VPseudoVSALU_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""
multiclass VPseudoVSHT_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVShiftV_MX = !cast<SchedWrite>("WriteVShiftV_" # mx);
- defvar WriteVShiftX_MX = !cast<SchedWrite>("WriteVShiftX_" # mx);
- defvar WriteVShiftI_MX = !cast<SchedWrite>("WriteVShiftI_" # mx);
- defvar ReadVShiftV_MX = !cast<SchedRead>("ReadVShiftV_" # mx);
- defvar ReadVShiftX_MX = !cast<SchedRead>("ReadVShiftX_" # mx);
-
defm "" : VPseudoBinaryV_VV<m, Constraint>,
- Sched<[WriteVShiftV_MX, ReadVShiftV_MX, ReadVShiftV_MX, ReadVMask]>;
+ SchedBinary<"WriteVShiftV", "ReadVShiftV", "ReadVShiftV", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_VX<m, Constraint>,
- Sched<[WriteVShiftX_MX, ReadVShiftV_MX, ReadVShiftX_MX, ReadVMask]>;
+ SchedBinary<"WriteVShiftX", "ReadVShiftV", "ReadVShiftX", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_VI<ImmType, m, Constraint>,
- Sched<[WriteVShiftI_MX, ReadVShiftV_MX, ReadVMask]>;
+ SchedUnary<"WriteVShiftI", "ReadVShiftV", mx, forceMergeOpRead=true>;
}
}
multiclass VPseudoVSSHT_VV_VX_VI_RM<Operand ImmType = simm5, string Constraint = ""> {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVSShiftV_MX = !cast<SchedWrite>("WriteVSShiftV_" # mx);
- defvar WriteVSShiftX_MX = !cast<SchedWrite>("WriteVSShiftX_" # mx);
- defvar WriteVSShiftI_MX = !cast<SchedWrite>("WriteVSShiftI_" # mx);
- defvar ReadVSShiftV_MX = !cast<SchedRead>("ReadVSShiftV_" # mx);
- defvar ReadVSShiftX_MX = !cast<SchedRead>("ReadVSShiftX_" # mx);
-
defm "" : VPseudoBinaryV_VV_RM<m, Constraint>,
- Sched<[WriteVSShiftV_MX, ReadVSShiftV_MX, ReadVSShiftV_MX, ReadVMask]>;
+ SchedBinary<"WriteVSShiftV", "ReadVSShiftV", "ReadVSShiftV", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_VX_RM<m, Constraint>,
- Sched<[WriteVSShiftX_MX, ReadVSShiftV_MX, ReadVSShiftX_MX, ReadVMask]>;
+ SchedBinary<"WriteVSShiftX", "ReadVSShiftV", "ReadVSShiftX", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_VI_RM<ImmType, m, Constraint>,
- Sched<[WriteVSShiftI_MX, ReadVSShiftV_MX, ReadVMask]>;
+ SchedUnary<"WriteVSShiftI", "ReadVSShiftV", mx, forceMergeOpRead=true>;
}
}
multiclass VPseudoVALU_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVIALUV_MX = !cast<SchedWrite>("WriteVIALUV_" # mx);
- defvar WriteVIALUX_MX = !cast<SchedWrite>("WriteVIALUX_" # mx);
- defvar WriteVIALUI_MX = !cast<SchedWrite>("WriteVIALUI_" # mx);
- defvar ReadVIALUV_MX = !cast<SchedRead>("ReadVIALUV_" # mx);
- defvar ReadVIALUX_MX = !cast<SchedRead>("ReadVIALUX_" # mx);
-
defm "" : VPseudoBinaryV_VV<m, Constraint>,
- Sched<[WriteVIALUV_MX, ReadVIALUV_MX, ReadVIALUV_MX, ReadVMask]>;
+ SchedBinary<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_VX<m, Constraint>,
- Sched<[WriteVIALUX_MX, ReadVIALUV_MX, ReadVIALUX_MX, ReadVMask]>;
+ SchedBinary<"WriteVIALUX", "ReadVIALUV", "ReadVIALUX", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_VI<ImmType, m, Constraint>,
- Sched<[WriteVIALUI_MX, ReadVIALUV_MX, ReadVMask]>;
+ SchedUnary<"WriteVIALUI", "ReadVIALUV", mx, forceMergeOpRead=true>;
}
}
multiclass VPseudoVSALU_VV_VX {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVSALUV_MX = !cast<SchedWrite>("WriteVSALUV_" # mx);
- defvar WriteVSALUX_MX = !cast<SchedWrite>("WriteVSALUX_" # mx);
- defvar ReadVSALUV_MX = !cast<SchedRead>("ReadVSALUV_" # mx);
- defvar ReadVSALUX_MX = !cast<SchedRead>("ReadVSALUX_" # mx);
-
defm "" : VPseudoBinaryV_VV<m>,
- Sched<[WriteVSALUV_MX, ReadVSALUV_MX, ReadVSALUV_MX, ReadVMask]>;
+ SchedBinary<"WriteVSALUV", "ReadVSALUV", "ReadVSALUV", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_VX<m>,
- Sched<[WriteVSALUX_MX, ReadVSALUV_MX, ReadVSALUX_MX, ReadVMask]>;
+ SchedBinary<"WriteVSALUX", "ReadVSALUV", "ReadVSALUX", mx,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVSMUL_VV_VX_RM {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVSMulV_MX = !cast<SchedWrite>("WriteVSMulV_" # mx);
- defvar WriteVSMulX_MX = !cast<SchedWrite>("WriteVSMulX_" # mx);
- defvar ReadVSMulV_MX = !cast<SchedRead>("ReadVSMulV_" # mx);
- defvar ReadVSMulX_MX = !cast<SchedRead>("ReadVSMulX_" # mx);
-
defm "" : VPseudoBinaryV_VV_RM<m>,
- Sched<[WriteVSMulV_MX, ReadVSMulV_MX, ReadVSMulV_MX, ReadVMask]>;
+ SchedBinary<"WriteVSMulV", "ReadVSMulV", "ReadVSMulV", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_VX_RM<m>,
- Sched<[WriteVSMulX_MX, ReadVSMulV_MX, ReadVSMulX_MX, ReadVMask]>;
+ SchedBinary<"WriteVSMulX", "ReadVSMulV", "ReadVSMulX", mx,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVAALU_VV_VX_RM {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVAALUV_MX = !cast<SchedWrite>("WriteVAALUV_" # mx);
- defvar WriteVAALUX_MX = !cast<SchedWrite>("WriteVAALUX_" # mx);
- defvar ReadVAALUV_MX = !cast<SchedRead>("ReadVAALUV_" # mx);
- defvar ReadVAALUX_MX = !cast<SchedRead>("ReadVAALUX_" # mx);
-
defm "" : VPseudoBinaryV_VV_RM<m>,
- Sched<[WriteVAALUV_MX, ReadVAALUV_MX, ReadVAALUV_MX, ReadVMask]>;
+ SchedBinary<"WriteVAALUV", "ReadVAALUV", "ReadVAALUV", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_VX_RM<m>,
- Sched<[WriteVAALUX_MX, ReadVAALUV_MX, ReadVAALUX_MX, ReadVMask]>;
+ SchedBinary<"WriteVAALUX", "ReadVAALUV", "ReadVAALUX", mx,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVMINMAX_VV_VX {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVIMinMaxV_MX = !cast<SchedWrite>("WriteVIMinMaxV_" # mx);
- defvar WriteVIMinMaxX_MX = !cast<SchedWrite>("WriteVIMinMaxX_" # mx);
- defvar ReadVIMinMaxV_MX = !cast<SchedRead>("ReadVIMinMaxV_" # mx);
- defvar ReadVIMinMaxX_MX = !cast<SchedRead>("ReadVIMinMaxX_" # mx);
-
defm "" : VPseudoBinaryV_VV<m>,
- Sched<[WriteVIMinMaxV_MX, ReadVIMinMaxV_MX, ReadVIMinMaxV_MX, ReadVMask]>;
+ SchedBinary<"WriteVIMinMaxV", "ReadVIMinMaxV", "ReadVIMinMaxV", mx>;
defm "" : VPseudoBinaryV_VX<m>,
- Sched<[WriteVIMinMaxX_MX, ReadVIMinMaxV_MX, ReadVIMinMaxX_MX, ReadVMask]>;
+ SchedBinary<"WriteVIMinMaxX", "ReadVIMinMaxV", "ReadVIMinMaxX", mx>;
}
}
multiclass VPseudoVMUL_VV_VX {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVIMulV_MX = !cast<SchedWrite>("WriteVIMulV_" # mx);
- defvar WriteVIMulX_MX = !cast<SchedWrite>("WriteVIMulX_" # mx);
- defvar ReadVIMulV_MX = !cast<SchedRead>("ReadVIMulV_" # mx);
- defvar ReadVIMulX_MX = !cast<SchedRead>("ReadVIMulX_" # mx);
-
defm "" : VPseudoBinaryV_VV<m>,
- Sched<[WriteVIMulV_MX, ReadVIMulV_MX, ReadVIMulV_MX, ReadVMask]>;
+ SchedBinary<"WriteVIMulV", "ReadVIMulV", "ReadVIMulV", mx>;
defm "" : VPseudoBinaryV_VX<m>,
- Sched<[WriteVIMulX_MX, ReadVIMulV_MX, ReadVIMulX_MX, ReadVMask]>;
+ SchedBinary<"WriteVIMulX", "ReadVIMulV", "ReadVIMulX", mx>;
}
}
@@ -2719,38 +2819,26 @@ multiclass VPseudoVDIV_VV_VX {
defvar mx = m.MX;
defvar sews = SchedSEWSet<mx>.val;
foreach e = sews in {
- defvar WriteVIDivV_MX_E = !cast<SchedWrite>("WriteVIDivV_" # mx # "_E" # e);
- defvar WriteVIDivX_MX_E = !cast<SchedWrite>("WriteVIDivX_" # mx # "_E" # e);
- defvar ReadVIDivV_MX_E = !cast<SchedRead>("ReadVIDivV_" # mx # "_E" # e);
- defvar ReadVIDivX_MX_E = !cast<SchedRead>("ReadVIDivX_" # mx # "_E" # e);
-
defm "" : VPseudoBinaryV_VV<m, "", e>,
- Sched<[WriteVIDivV_MX_E, ReadVIDivV_MX_E, ReadVIDivV_MX_E, ReadVMask]>;
+ SchedBinary<"WriteVIDivV", "ReadVIDivV", "ReadVIDivV", mx, e>;
defm "" : VPseudoBinaryV_VX<m, "", e>,
- Sched<[WriteVIDivX_MX_E, ReadVIDivV_MX_E, ReadVIDivX_MX_E, ReadVMask]>;
+ SchedBinary<"WriteVIDivX", "ReadVIDivV", "ReadVIDivX", mx, e>;
}
}
}
multiclass VPseudoVFMUL_VV_VF_RM {
foreach m = MxListF in {
- defvar mx = m.MX;
- defvar WriteVFMulV_MX = !cast<SchedWrite>("WriteVFMulV_" # mx);
- defvar ReadVFMulV_MX = !cast<SchedRead>("ReadVFMulV_" # mx);
-
defm "" : VPseudoBinaryFV_VV_RM<m>,
- Sched<[WriteVFMulV_MX, ReadVFMulV_MX, ReadVFMulV_MX, ReadVMask]>;
+ SchedBinary<"WriteVFMulV", "ReadVFMulV", "ReadVFMulV", m.MX,
+ forceMergeOpRead=true>;
}
foreach f = FPList in {
foreach m = f.MxList in {
- defvar mx = m.MX;
- defvar WriteVFMulF_MX = !cast<SchedWrite>("WriteVFMulF_" # mx);
- defvar ReadVFMulV_MX = !cast<SchedRead>("ReadVFMulV_" # mx);
- defvar ReadVFMulF_MX = !cast<SchedRead>("ReadVFMulF_" # mx);
-
defm "" : VPseudoBinaryV_VF_RM<m, f>,
- Sched<[WriteVFMulF_MX, ReadVFMulV_MX, ReadVFMulF_MX, ReadVMask]>;
+ SchedBinary<"WriteVFMulF", "ReadVFMulV", "ReadVFMulF", m.MX,
+ forceMergeOpRead=true>;
}
}
}
@@ -2760,23 +2848,17 @@ multiclass VPseudoVFDIV_VV_VF_RM {
defvar mx = m.MX;
defvar sews = SchedSEWSet<mx, isF=1>.val;
foreach e = sews in {
- defvar WriteVFDivV_MX_E = !cast<SchedWrite>("WriteVFDivV_" # mx # "_E" # e);
- defvar ReadVFDivV_MX_E = !cast<SchedRead>("ReadVFDivV_" # mx # "_E" # e);
-
defm "" : VPseudoBinaryFV_VV_RM<m, "", e>,
- Sched<[WriteVFDivV_MX_E, ReadVFDivV_MX_E, ReadVFDivV_MX_E, ReadVMask]>;
+ SchedBinary<"WriteVFDivV", "ReadVFDivV", "ReadVFDivV", mx, e,
+ forceMergeOpRead=true>;
}
}
foreach f = FPList in {
foreach m = f.MxList in {
- defvar mx = m.MX;
- defvar WriteVFDivF_MX_E = !cast<SchedWrite>("WriteVFDivF_" # mx # "_E" # f.SEW);
- defvar ReadVFDivV_MX_E = !cast<SchedRead>("ReadVFDivV_" # mx # "_E" # f.SEW);
- defvar ReadVFDivF_MX_E = !cast<SchedRead>("ReadVFDivF_" # mx # "_E" # f.SEW);
-
defm "" : VPseudoBinaryV_VF_RM<m, f, "", f.SEW>,
- Sched<[WriteVFDivF_MX_E, ReadVFDivV_MX_E, ReadVFDivF_MX_E, ReadVMask]>;
+ SchedBinary<"WriteVFDivF", "ReadVFDivV", "ReadVFDivF", m.MX, f.SEW,
+ forceMergeOpRead=true>;
}
}
}
@@ -2784,118 +2866,84 @@ multiclass VPseudoVFDIV_VV_VF_RM {
multiclass VPseudoVFRDIV_VF_RM {
foreach f = FPList in {
foreach m = f.MxList in {
- defvar mx = m.MX;
- defvar WriteVFDivF_MX_E = !cast<SchedWrite>("WriteVFDivF_" # mx # "_E" # f.SEW);
- defvar ReadVFDivV_MX_E = !cast<SchedRead>("ReadVFDivV_" # mx # "_E" # f.SEW);
- defvar ReadVFDivF_MX_E = !cast<SchedRead>("ReadVFDivF_" # mx # "_E" # f.SEW);
-
defm "" : VPseudoBinaryV_VF_RM<m, f, "", f.SEW>,
- Sched<[WriteVFDivF_MX_E, ReadVFDivV_MX_E, ReadVFDivF_MX_E, ReadVMask]>;
+ SchedBinary<"WriteVFDivF", "ReadVFDivV", "ReadVFDivF", m.MX, f.SEW,
+ forceMergeOpRead=true>;
}
}
}
multiclass VPseudoVALU_VV_VX {
foreach m = MxList in {
- defvar mx = m.MX;
- defvar WriteVIALUV_MX = !cast<SchedWrite>("WriteVIALUV_" # mx);
- defvar WriteVIALUX_MX = !cast<SchedWrite>("WriteVIALUV_" # mx);
- defvar ReadVIALUV_MX = !cast<SchedRead>("ReadVIALUV_" # mx);
- defvar ReadVIALUX_MX = !cast<SchedRead>("ReadVIALUX_" # mx);
-
defm "" : VPseudoBinaryV_VV<m>,
- Sched<[WriteVIALUV_MX, ReadVIALUV_MX, ReadVIALUV_MX, ReadVMask]>;
+ SchedBinary<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", m.MX,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_VX<m>,
- Sched<[WriteVIALUX_MX, ReadVIALUV_MX, ReadVIALUX_MX, ReadVMask]>;
+ SchedBinary<"WriteVIALUX", "ReadVIALUV", "ReadVIALUX", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVSGNJ_VV_VF {
foreach m = MxListF in {
- defvar mx = m.MX;
- defvar WriteVFSgnjV_MX = !cast<SchedWrite>("WriteVFSgnjV_" # mx);
- defvar ReadVFSgnjV_MX = !cast<SchedRead>("ReadVFSgnjV_" # mx);
-
defm "" : VPseudoBinaryFV_VV<m>,
- Sched<[WriteVFSgnjV_MX, ReadVFSgnjV_MX, ReadVFSgnjV_MX, ReadVMask]>;
+ SchedBinary<"WriteVFSgnjV", "ReadVFSgnjV", "ReadVFSgnjV", m.MX,
+ forceMergeOpRead=true>;
}
foreach f = FPList in {
foreach m = f.MxList in {
- defvar mx = m.MX;
- defvar WriteVFSgnjF_MX = !cast<SchedWrite>("WriteVFSgnjF_" # mx);
- defvar ReadVFSgnjV_MX = !cast<SchedRead>("ReadVFSgnjV_" # mx);
- defvar ReadVFSgnjF_MX = !cast<SchedRead>("ReadVFSgnjF_" # mx);
-
defm "" : VPseudoBinaryV_VF<m, f>,
- Sched<[WriteVFSgnjF_MX, ReadVFSgnjV_MX, ReadVFSgnjF_MX, ReadVMask]>;
+ SchedBinary<"WriteVFSgnjF", "ReadVFSgnjV", "ReadVFSgnjF", m.MX,
+ forceMergeOpRead=true>;
}
}
}
multiclass VPseudoVMAX_VV_VF {
foreach m = MxListF in {
- defvar mx = m.MX;
- defvar WriteVFMinMaxV_MX = !cast<SchedWrite>("WriteVFMinMaxV_" # mx);
- defvar ReadVFMinMaxV_MX = !cast<SchedRead>("ReadVFMinMaxV_" # mx);
-
defm "" : VPseudoBinaryFV_VV<m>,
- Sched<[WriteVFMinMaxV_MX, ReadVFMinMaxV_MX, ReadVFMinMaxV_MX, ReadVMask]>;
+ SchedBinary<"WriteVFMinMaxV", "ReadVFMinMaxV", "ReadVFMinMaxV", m.MX,
+ forceMergeOpRead=true>;
}
foreach f = FPList in {
foreach m = f.MxList in {
- defvar mx = m.MX;
- defvar WriteVFMinMaxF_MX = !cast<SchedWrite>("WriteVFMinMaxF_" # mx);
- defvar ReadVFMinMaxV_MX = !cast<SchedRead>("ReadVFMinMaxV_" # mx);
- defvar ReadVFMinMaxF_MX = !cast<SchedRead>("ReadVFMinMaxF_" # mx);
-
defm "" : VPseudoBinaryV_VF<m, f>,
- Sched<[WriteVFMinMaxF_MX, ReadVFMinMaxV_MX, ReadVFMinMaxF_MX, ReadVMask]>;
+ SchedBinary<"WriteVFMinMaxF", "ReadVFMinMaxV", "ReadVFMinMaxF", m.MX,
+ forceMergeOpRead=true>;
}
}
}
multiclass VPseudoVALU_VV_VF {
foreach m = MxListF in {
- defvar mx = m.MX;
- defvar WriteVFALUV_MX = !cast<SchedWrite>("WriteVFALUV_" # mx);
- defvar ReadVFALUV_MX = !cast<SchedRead>("ReadVFALUV_" # mx);
-
defm "" : VPseudoBinaryFV_VV<m>,
- Sched<[WriteVFALUV_MX, ReadVFALUV_MX, ReadVFALUV_MX, ReadVMask]>;
+ SchedBinary<"WriteVFALUV", "ReadVFALUV", "ReadVFALUV", m.MX,
+ forceMergeOpRead=true>;
}
foreach f = FPList in {
foreach m = f.MxList in {
- defvar mx = m.MX;
- defvar WriteVFALUF_MX = !cast<SchedWrite>("WriteVFALUF_" # mx);
- defvar ReadVFALUV_MX = !cast<SchedRead>("ReadVFALUV_" # mx);
- defvar ReadVFALUF_MX = !cast<SchedRead>("ReadVFALUF_" # mx);
defm "" : VPseudoBinaryV_VF<m, f>,
- Sched<[WriteVFALUF_MX, ReadVFALUV_MX, ReadVFALUF_MX, ReadVMask]>;
+ SchedBinary<"WriteVFALUF", "ReadVFALUV", "ReadVFALUF", m.MX,
+ forceMergeOpRead=true>;
}
}
}
multiclass VPseudoVALU_VV_VF_RM {
foreach m = MxListF in {
- defvar mx = m.MX;
- defvar WriteVFALUV_MX = !cast<SchedWrite>("WriteVFALUV_" # mx);
- defvar ReadVFALUV_MX = !cast<SchedRead>("ReadVFALUV_" # mx);
-
defm "" : VPseudoBinaryFV_VV_RM<m>,
- Sched<[WriteVFALUV_MX, ReadVFALUV_MX, ReadVFALUV_MX, ReadVMask]>;
+ SchedBinary<"WriteVFALUV", "ReadVFALUV", "ReadVFALUV", m.MX,
+ forceMergeOpRead=true>;
}
foreach f = FPList in {
foreach m = f.MxList in {
- defvar mx = m.MX;
- defvar WriteVFALUF_MX = !cast<SchedWrite>("WriteVFALUF_" # mx);
- defvar ReadVFALUV_MX = !cast<SchedRead>("ReadVFALUV_" # mx);
- defvar ReadVFALUF_MX = !cast<SchedRead>("ReadVFALUF_" # mx);
defm "" : VPseudoBinaryV_VF_RM<m, f>,
- Sched<[WriteVFALUF_MX, ReadVFALUV_MX, ReadVFALUF_MX, ReadVMask]>;
+ SchedBinary<"WriteVFALUF", "ReadVFALUV", "ReadVFALUF", m.MX,
+ forceMergeOpRead=true>;
}
}
}
@@ -2903,13 +2951,9 @@ multiclass VPseudoVALU_VV_VF_RM {
multiclass VPseudoVALU_VF {
foreach f = FPList in {
foreach m = f.MxList in {
- defvar mx = m.MX;
- defvar WriteVFALUF_MX = !cast<SchedWrite>("WriteVFALUF_" # mx);
- defvar ReadVFALUV_MX = !cast<SchedRead>("ReadVFALUV_" # mx);
- defvar ReadVFALUF_MX = !cast<SchedRead>("ReadVFALUF_" # mx);
-
defm "" : VPseudoBinaryV_VF<m, f>,
- Sched<[WriteVFALUF_MX, ReadVFALUV_MX, ReadVFALUF_MX, ReadVMask]>;
+ SchedBinary<"WriteVFALUF", "ReadVFALUV", "ReadVFALUF", m.MX,
+ forceMergeOpRead=true>;
}
}
}
@@ -2917,13 +2961,9 @@ multiclass VPseudoVALU_VF {
multiclass VPseudoVALU_VF_RM {
foreach f = FPList in {
foreach m = f.MxList in {
- defvar mx = m.MX;
- defvar WriteVFALUF_MX = !cast<SchedWrite>("WriteVFALUF_" # mx);
- defvar ReadVFALUV_MX = !cast<SchedRead>("ReadVFALUV_" # mx);
- defvar ReadVFALUF_MX = !cast<SchedRead>("ReadVFALUF_" # mx);
-
defm "" : VPseudoBinaryV_VF_RM<m, f>,
- Sched<[WriteVFALUF_MX, ReadVFALUV_MX, ReadVFALUF_MX, ReadVMask]>;
+ SchedBinary<"WriteVFALUF", "ReadVFALUV", "ReadVFALUF", m.MX,
+ forceMergeOpRead=true>;
}
}
}
@@ -2931,67 +2971,56 @@ multiclass VPseudoVALU_VF_RM {
multiclass VPseudoVALU_VX_VI<Operand ImmType = simm5> {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVIALUX_MX = !cast<SchedWrite>("WriteVIALUX_" # mx);
- defvar WriteVIALUI_MX = !cast<SchedWrite>("WriteVIALUI_" # mx);
- defvar ReadVIALUV_MX = !cast<SchedRead>("ReadVIALUV_" # mx);
- defvar ReadVIALUX_MX = !cast<SchedRead>("ReadVIALUX_" # mx);
-
defm "" : VPseudoBinaryV_VX<m>,
- Sched<[WriteVIALUX_MX, ReadVIALUV_MX, ReadVIALUX_MX, ReadVMask]>;
+ SchedBinary<"WriteVIALUX", "ReadVIALUV", "ReadVIALUX", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_VI<ImmType, m>,
- Sched<[WriteVIALUI_MX, ReadVIALUV_MX, ReadVMask]>;
+ SchedUnary<"WriteVIALUI", "ReadVIALUV", mx, forceMergeOpRead=true>;
}
}
multiclass VPseudoVWALU_VV_VX {
foreach m = MxListW in {
defvar mx = m.MX;
- defvar WriteVIWALUV_MX = !cast<SchedWrite>("WriteVIWALUV_" # mx);
- defvar WriteVIWALUX_MX = !cast<SchedWrite>("WriteVIWALUX_" # mx);
- defvar ReadVIWALUV_MX = !cast<SchedRead>("ReadVIWALUV_" # mx);
- defvar ReadVIWALUX_MX = !cast<SchedRead>("ReadVIWALUX_" # mx);
-
defm "" : VPseudoBinaryW_VV<m>,
- Sched<[WriteVIWALUV_MX, ReadVIWALUV_MX, ReadVIWALUV_MX, ReadVMask]>;
- defm "" : VPseudoBinaryW_VX<m>,
- Sched<[WriteVIWALUX_MX, ReadVIWALUV_MX, ReadVIWALUX_MX, ReadVMask]>;
+ SchedBinary<"WriteVIWALUV", "ReadVIWALUV", "ReadVIWALUV", mx,
+ forceMergeOpRead=true>;
+ defm "" : VPseudoBinaryW_VX<m>,
+ SchedBinary<"WriteVIWALUX", "ReadVIWALUV", "ReadVIWALUX", mx,
+ forceMergeOpRead=true>;
+ }
+}
+
+multiclass VPseudoVWALU_VV_VX_VI<Operand ImmType> : VPseudoVWALU_VV_VX {
+ foreach m = MxListW in {
+ defm "" : VPseudoBinaryW_VI<ImmType, m>;
}
}
multiclass VPseudoVWMUL_VV_VX {
foreach m = MxListW in {
defvar mx = m.MX;
- defvar WriteVIWMulV_MX = !cast<SchedWrite>("WriteVIWMulV_" # mx);
- defvar WriteVIWMulX_MX = !cast<SchedWrite>("WriteVIWMulX_" # mx);
- defvar ReadVIWMulV_MX = !cast<SchedRead>("ReadVIWMulV_" # mx);
- defvar ReadVIWMulX_MX = !cast<SchedRead>("ReadVIWMulX_" # mx);
-
defm "" : VPseudoBinaryW_VV<m>,
- Sched<[WriteVIWMulV_MX, ReadVIWMulV_MX, ReadVIWMulV_MX, ReadVMask]>;
+ SchedBinary<"WriteVIWMulV", "ReadVIWMulV", "ReadVIWMulV", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryW_VX<m>,
- Sched<[WriteVIWMulX_MX, ReadVIWMulV_MX, ReadVIWMulX_MX, ReadVMask]>;
+ SchedBinary<"WriteVIWMulX", "ReadVIWMulV", "ReadVIWMulX", mx,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVWMUL_VV_VF_RM {
foreach m = MxListFW in {
- defvar mx = m.MX;
- defvar WriteVFWMulV_MX = !cast<SchedWrite>("WriteVFWMulV_" # mx);
- defvar ReadVFWMulV_MX = !cast<SchedRead>("ReadVFWMulV_" # mx);
-
defm "" : VPseudoBinaryW_VV_RM<m>,
- Sched<[WriteVFWMulV_MX, ReadVFWMulV_MX, ReadVFWMulV_MX, ReadVMask]>;
+ SchedBinary<"WriteVFWMulV", "ReadVFWMulV", "ReadVFWMulV", m.MX,
+ forceMergeOpRead=true>;
}
foreach f = FPListW in {
foreach m = f.MxListFW in {
- defvar mx = m.MX;
- defvar WriteVFWMulF_MX = !cast<SchedWrite>("WriteVFWMulF_" # mx);
- defvar ReadVFWMulV_MX = !cast<SchedRead>("ReadVFWMulV_" # mx);
- defvar ReadVFWMulF_MX = !cast<SchedRead>("ReadVFWMulF_" # mx);
-
defm "" : VPseudoBinaryW_VF_RM<m, f>,
- Sched<[WriteVFWMulF_MX, ReadVFWMulV_MX, ReadVFWMulF_MX, ReadVMask]>;
+ SchedBinary<"WriteVFWMulF", "ReadVFWMulV", "ReadVFWMulF", m.MX,
+ forceMergeOpRead=true>;
}
}
}
@@ -2999,59 +3028,42 @@ multiclass VPseudoVWMUL_VV_VF_RM {
multiclass VPseudoVWALU_WV_WX {
foreach m = MxListW in {
defvar mx = m.MX;
- defvar WriteVIWALUV_MX = !cast<SchedWrite>("WriteVIWALUV_" # mx);
- defvar WriteVIWALUX_MX = !cast<SchedWrite>("WriteVIWALUX_" # mx);
- defvar ReadVIWALUV_MX = !cast<SchedRead>("ReadVIWALUV_" # mx);
- defvar ReadVIWALUX_MX = !cast<SchedRead>("ReadVIWALUX_" # mx);
-
defm "" : VPseudoBinaryW_WV<m>,
- Sched<[WriteVIWALUV_MX, ReadVIWALUV_MX, ReadVIWALUV_MX, ReadVMask]>;
+ SchedBinary<"WriteVIWALUV", "ReadVIWALUV", "ReadVIWALUV", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryW_WX<m>,
- Sched<[WriteVIWALUX_MX, ReadVIWALUV_MX, ReadVIWALUX_MX, ReadVMask]>;
+ SchedBinary<"WriteVIWALUX", "ReadVIWALUV", "ReadVIWALUX", mx,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVFWALU_VV_VF_RM {
foreach m = MxListFW in {
- defvar mx = m.MX;
- defvar WriteVFWALUV_MX = !cast<SchedWrite>("WriteVFWALUV_" # mx);
- defvar ReadVFWALUV_MX = !cast<SchedRead>("ReadVFWALUV_" # mx);
-
defm "" : VPseudoBinaryW_VV_RM<m>,
- Sched<[WriteVFWALUV_MX, ReadVFWALUV_MX, ReadVFWALUV_MX, ReadVMask]>;
+ SchedBinary<"WriteVFWALUV", "ReadVFWALUV", "ReadVFWALUV", m.MX,
+ forceMergeOpRead=true>;
}
foreach f = FPListW in {
foreach m = f.MxListFW in {
- defvar mx = m.MX;
- defvar WriteVFWALUF_MX = !cast<SchedWrite>("WriteVFWALUF_" # mx);
- defvar ReadVFWALUV_MX = !cast<SchedRead>("ReadVFWALUV_" # mx);
- defvar ReadVFWALUF_MX = !cast<SchedRead>("ReadVFWALUF_" # mx);
-
defm "" : VPseudoBinaryW_VF_RM<m, f>,
- Sched<[WriteVFWALUF_MX, ReadVFWALUV_MX, ReadVFWALUF_MX, ReadVMask]>;
+ SchedBinary<"WriteVFWALUF", "ReadVFWALUV", "ReadVFWALUF", m.MX,
+ forceMergeOpRead=true>;
}
}
}
multiclass VPseudoVFWALU_WV_WF_RM {
foreach m = MxListFW in {
- defvar mx = m.MX;
- defvar WriteVFWALUV_MX = !cast<SchedWrite>("WriteVFWALUV_" # mx);
- defvar ReadVFWALUV_MX = !cast<SchedRead>("ReadVFWALUV_" # mx);
-
defm "" : VPseudoBinaryW_WV_RM<m>,
- Sched<[WriteVFWALUV_MX, ReadVFWALUV_MX, ReadVFWALUV_MX, ReadVMask]>;
+ SchedBinary<"WriteVFWALUV", "ReadVFWALUV", "ReadVFWALUV", m.MX,
+ forceMergeOpRead=true>;
}
foreach f = FPListW in {
foreach m = f.MxListFW in {
- defvar mx = m.MX;
- defvar WriteVFWALUF_MX = !cast<SchedWrite>("WriteVFWALUF_" # mx);
- defvar ReadVFWALUV_MX = !cast<SchedRead>("ReadVFWALUV_" # mx);
- defvar ReadVFWALUF_MX = !cast<SchedRead>("ReadVFWALUF_" # mx);
-
defm "" : VPseudoBinaryW_WF_RM<m, f>,
- Sched<[WriteVFWALUF_MX, ReadVFWALUV_MX, ReadVFWALUF_MX, ReadVMask]>;
+ SchedBinary<"WriteVFWALUF", "ReadVFWALUV", "ReadVFWALUF", m.MX,
+ forceMergeOpRead=true>;
}
}
}
@@ -3059,159 +3071,134 @@ multiclass VPseudoVFWALU_WV_WF_RM {
multiclass VPseudoVMRG_VM_XM_IM {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVIMergeV_MX = !cast<SchedWrite>("WriteVIMergeV_" # mx);
- defvar WriteVIMergeX_MX = !cast<SchedWrite>("WriteVIMergeX_" # mx);
- defvar WriteVIMergeI_MX = !cast<SchedWrite>("WriteVIMergeI_" # mx);
- defvar ReadVIMergeV_MX = !cast<SchedRead>("ReadVIMergeV_" # mx);
- defvar ReadVIMergeX_MX = !cast<SchedRead>("ReadVIMergeX_" # mx);
-
def "_VVM" # "_" # m.MX:
VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
m.vrclass, m.vrclass, m, 1, "">,
- Sched<[WriteVIMergeV_MX, ReadVIMergeV_MX, ReadVIMergeV_MX, ReadVMask]>;
+ SchedBinary<"WriteVIMergeV", "ReadVIMergeV", "ReadVIMergeV", mx,
+ forceMergeOpRead=true>;
def "_VXM" # "_" # m.MX:
VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
m.vrclass, GPR, m, 1, "">,
- Sched<[WriteVIMergeX_MX, ReadVIMergeV_MX, ReadVIMergeX_MX, ReadVMask]>;
+ SchedBinary<"WriteVIMergeX", "ReadVIMergeV", "ReadVIMergeX", mx,
+ forceMergeOpRead=true>;
def "_VIM" # "_" # m.MX:
VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
m.vrclass, simm5, m, 1, "">,
- Sched<[WriteVIMergeI_MX, ReadVIMergeV_MX, ReadVMask]>;
+ SchedUnary<"WriteVIMergeI", "ReadVIMergeV", mx,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVCALU_VM_XM_IM {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVICALUV_MX = !cast<SchedWrite>("WriteVICALUV_" # mx);
- defvar WriteVICALUX_MX = !cast<SchedWrite>("WriteVICALUX_" # mx);
- defvar WriteVICALUI_MX = !cast<SchedWrite>("WriteVICALUI_" # mx);
- defvar ReadVICALUV_MX = !cast<SchedRead>("ReadVICALUV_" # mx);
- defvar ReadVICALUX_MX = !cast<SchedRead>("ReadVICALUX_" # mx);
-
defm "" : VPseudoTiedBinaryV_VM<m>,
- Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX, ReadVMask]>;
+ SchedBinary<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoTiedBinaryV_XM<m>,
- Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX, ReadVMask]>;
+ SchedBinary<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoTiedBinaryV_IM<m>,
- Sched<[WriteVICALUI_MX, ReadVICALUV_MX, ReadVMask]>;
+ SchedUnary<"WriteVICALUI", "ReadVICALUV", mx,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVCALU_VM_XM {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVICALUV_MX = !cast<SchedWrite>("WriteVICALUV_" # mx);
- defvar WriteVICALUX_MX = !cast<SchedWrite>("WriteVICALUX_" # mx);
- defvar ReadVICALUV_MX = !cast<SchedRead>("ReadVICALUV_" # mx);
- defvar ReadVICALUX_MX = !cast<SchedRead>("ReadVICALUX_" # mx);
-
defm "" : VPseudoTiedBinaryV_VM<m>,
- Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX, ReadVMask]>;
+ SchedBinary<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoTiedBinaryV_XM<m>,
- Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX, ReadVMask]>;
+ SchedBinary<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX", mx,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVCALUM_VM_XM_IM<string Constraint> {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVICALUV_MX = !cast<SchedWrite>("WriteVICALUV_" # mx);
- defvar WriteVICALUX_MX = !cast<SchedWrite>("WriteVICALUX_" # mx);
- defvar WriteVICALUI_MX = !cast<SchedWrite>("WriteVICALUI_" # mx);
- defvar ReadVICALUV_MX = !cast<SchedRead>("ReadVICALUV_" # mx);
- defvar ReadVICALUX_MX = !cast<SchedRead>("ReadVICALUX_" # mx);
-
- defm "" : VPseudoBinaryV_VM<m, CarryOut=1, CarryIn=1, Constraint=Constraint>,
- Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX, ReadVMask]>;
- defm "" : VPseudoBinaryV_XM<m, CarryOut=1, CarryIn=1, Constraint=Constraint>,
- Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX, ReadVMask]>;
- defm "" : VPseudoBinaryV_IM<m, CarryOut=1, CarryIn=1, Constraint=Constraint>,
- Sched<[WriteVICALUI_MX, ReadVICALUV_MX, ReadVMask]>;
+ defm "" : VPseudoBinaryV_VM<m, CarryOut=1, CarryIn=1, Constraint=Constraint,
+ Commutable=1, TargetConstraintType=2>,
+ SchedBinary<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV", mx, forceMasked=1,
+ forceMergeOpRead=true>;
+ defm "" : VPseudoBinaryV_XM<m, CarryOut=1, CarryIn=1, Constraint=Constraint, TargetConstraintType=2>,
+ SchedBinary<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX", mx, forceMasked=1,
+ forceMergeOpRead=true>;
+ defm "" : VPseudoBinaryV_IM<m, CarryOut=1, CarryIn=1, Constraint=Constraint, TargetConstraintType=2>,
+ SchedUnary<"WriteVICALUI", "ReadVICALUV", mx, forceMasked=1,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVCALUM_VM_XM<string Constraint> {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVICALUV_MX = !cast<SchedWrite>("WriteVICALUV_" # mx);
- defvar WriteVICALUX_MX = !cast<SchedWrite>("WriteVICALUX_" # mx);
- defvar ReadVICALUV_MX = !cast<SchedRead>("ReadVICALUV_" # mx);
- defvar ReadVICALUX_MX = !cast<SchedRead>("ReadVICALUX_" # mx);
-
- defm "" : VPseudoBinaryV_VM<m, CarryOut=1, CarryIn=1, Constraint=Constraint>,
- Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX, ReadVMask]>;
- defm "" : VPseudoBinaryV_XM<m, CarryOut=1, CarryIn=1, Constraint=Constraint>,
- Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX, ReadVMask]>;
+ defm "" : VPseudoBinaryV_VM<m, CarryOut=1, CarryIn=1, Constraint=Constraint, TargetConstraintType=2>,
+ SchedBinary<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV", mx, forceMasked=1,
+ forceMergeOpRead=true>;
+ defm "" : VPseudoBinaryV_XM<m, CarryOut=1, CarryIn=1, Constraint=Constraint, TargetConstraintType=2>,
+ SchedBinary<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX", mx, forceMasked=1,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVCALUM_V_X_I<string Constraint> {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVICALUV_MX = !cast<SchedWrite>("WriteVICALUV_" # mx);
- defvar WriteVICALUX_MX = !cast<SchedWrite>("WriteVICALUX_" # mx);
- defvar WriteVICALUI_MX = !cast<SchedWrite>("WriteVICALUI_" # mx);
- defvar ReadVICALUV_MX = !cast<SchedRead>("ReadVICALUV_" # mx);
- defvar ReadVICALUX_MX = !cast<SchedRead>("ReadVICALUX_" # mx);
-
- defm "" : VPseudoBinaryV_VM<m, CarryOut=1, CarryIn=0, Constraint=Constraint>,
- Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX]>;
- defm "" : VPseudoBinaryV_XM<m, CarryOut=1, CarryIn=0, Constraint=Constraint>,
- Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX]>;
+ defm "" : VPseudoBinaryV_VM<m, CarryOut=1, CarryIn=0, Constraint=Constraint,
+ Commutable=1, TargetConstraintType=2>,
+ SchedBinary<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV", mx,
+ forceMergeOpRead=true>;
+ defm "" : VPseudoBinaryV_XM<m, CarryOut=1, CarryIn=0, Constraint=Constraint, TargetConstraintType=2>,
+ SchedBinary<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_IM<m, CarryOut=1, CarryIn=0, Constraint=Constraint>,
- Sched<[WriteVICALUI_MX, ReadVICALUV_MX]>;
+ SchedUnary<"WriteVICALUI", "ReadVICALUV", mx,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVCALUM_V_X<string Constraint> {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVICALUV_MX = !cast<SchedWrite>("WriteVICALUV_" # mx);
- defvar WriteVICALUX_MX = !cast<SchedWrite>("WriteVICALUX_" # mx);
- defvar ReadVICALUV_MX = !cast<SchedRead>("ReadVICALUV_" # mx);
- defvar ReadVICALUX_MX = !cast<SchedRead>("ReadVICALUX_" # mx);
-
- defm "" : VPseudoBinaryV_VM<m, CarryOut=1, CarryIn=0, Constraint=Constraint>,
- Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX]>;
- defm "" : VPseudoBinaryV_XM<m, CarryOut=1, CarryIn=0, Constraint=Constraint>,
- Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX]>;
+ defm "" : VPseudoBinaryV_VM<m, CarryOut=1, CarryIn=0, Constraint=Constraint, TargetConstraintType=2>,
+ SchedBinary<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV", mx,
+ forceMergeOpRead=true>;
+ defm "" : VPseudoBinaryV_XM<m, CarryOut=1, CarryIn=0, Constraint=Constraint, TargetConstraintType=2>,
+ SchedBinary<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX", mx,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVNCLP_WV_WX_WI_RM {
foreach m = MxListW in {
defvar mx = m.MX;
- defvar WriteVNClipV_MX = !cast<SchedWrite>("WriteVNClipV_" # mx);
- defvar WriteVNClipX_MX = !cast<SchedWrite>("WriteVNClipX_" # mx);
- defvar WriteVNClipI_MX = !cast<SchedWrite>("WriteVNClipI_" # mx);
- defvar ReadVNClipV_MX = !cast<SchedRead>("ReadVNClipV_" # mx);
- defvar ReadVNClipX_MX = !cast<SchedRead>("ReadVNClipX_" # mx);
-
defm "" : VPseudoBinaryV_WV_RM<m>,
- Sched<[WriteVNClipV_MX, ReadVNClipV_MX, ReadVNClipV_MX, ReadVMask]>;
+ SchedBinary<"WriteVNClipV", "ReadVNClipV", "ReadVNClipV", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_WX_RM<m>,
- Sched<[WriteVNClipX_MX, ReadVNClipV_MX, ReadVNClipX_MX, ReadVMask]>;
+ SchedBinary<"WriteVNClipX", "ReadVNClipV", "ReadVNClipX", mx,
+ forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_WI_RM<m>,
- Sched<[WriteVNClipI_MX, ReadVNClipV_MX, ReadVMask]>;
+ SchedUnary<"WriteVNClipI", "ReadVNClipV", mx,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVNSHT_WV_WX_WI {
foreach m = MxListW in {
defvar mx = m.MX;
- defvar WriteVNShiftV_MX = !cast<SchedWrite>("WriteVNShiftV_" # mx);
- defvar WriteVNShiftX_MX = !cast<SchedWrite>("WriteVNShiftX_" # mx);
- defvar WriteVNShiftI_MX = !cast<SchedWrite>("WriteVNShiftI_" # mx);
- defvar ReadVNShiftV_MX = !cast<SchedRead>("ReadVNShiftV_" # mx);
- defvar ReadVNShiftX_MX = !cast<SchedRead>("ReadVNShiftX_" # mx);
-
- defm "" : VPseudoBinaryV_WV<m>,
- Sched<[WriteVNShiftV_MX, ReadVNShiftV_MX, ReadVNShiftV_MX, ReadVMask]>;
- defm "" : VPseudoBinaryV_WX<m>,
- Sched<[WriteVNShiftX_MX, ReadVNShiftV_MX, ReadVNShiftX_MX, ReadVMask]>;
- defm "" : VPseudoBinaryV_WI<m>,
- Sched<[WriteVNShiftI_MX, ReadVNShiftV_MX, ReadVMask]>;
+ defm "" : VPseudoBinaryV_WV<m, TargetConstraintType=2>,
+ SchedBinary<"WriteVNShiftV", "ReadVNShiftV", "ReadVNShiftV", mx,
+ forceMergeOpRead=true>;
+ defm "" : VPseudoBinaryV_WX<m, TargetConstraintType=2>,
+ SchedBinary<"WriteVNShiftX", "ReadVNShiftV", "ReadVNShiftX", mx,
+ forceMergeOpRead=true>;
+ defm "" : VPseudoBinaryV_WI<m, TargetConstraintType=2>,
+ SchedUnary<"WriteVNShiftI", "ReadVNShiftV", mx,
+ forceMergeOpRead=true>;
}
}
@@ -3222,11 +3209,12 @@ multiclass VPseudoTernaryWithTailPolicy<VReg RetClass,
int sew,
string Constraint = "",
bit Commutable = 0> {
- let VLMul = MInfo.value in {
+ let VLMul = MInfo.value, SEW=sew in {
defvar mx = MInfo.MX;
let isCommutable = Commutable in
def "_" # mx # "_E" # sew : VPseudoTernaryNoMaskWithPolicy<RetClass, Op1Class, Op2Class, Constraint>;
- def "_" # mx # "_E" # sew # "_MASK" : VPseudoTernaryMaskPolicy<RetClass, Op1Class, Op2Class, Constraint>;
+ def "_" # mx # "_E" # sew # "_MASK" : VPseudoTernaryMaskPolicy<RetClass, Op1Class, Op2Class, Constraint>,
+ RISCVMaskedPseudo<MaskIdx=3, MaskAffectsRes=true>;
}
}
@@ -3237,15 +3225,16 @@ multiclass VPseudoTernaryWithTailPolicyRoundingMode<VReg RetClass,
int sew,
string Constraint = "",
bit Commutable = 0> {
- let VLMul = MInfo.value in {
+ let VLMul = MInfo.value, SEW=sew in {
defvar mx = MInfo.MX;
let isCommutable = Commutable in
def "_" # mx # "_E" # sew
- : VPseudoTernaryNoMaskWithPolicyRoundingMode<RetClass, Op1Class,
+ : VPseudoTernaryNoMaskWithPolicyRoundingMode<RetClass, Op1Class,
Op2Class, Constraint>;
def "_" # mx # "_E" # sew # "_MASK"
: VPseudoTernaryMaskPolicyRoundingMode<RetClass, Op1Class,
- Op2Class, Constraint>;
+ Op2Class, Constraint>,
+ RISCVMaskedPseudo<MaskIdx=3, MaskAffectsRes=true>;
}
}
@@ -3254,11 +3243,12 @@ multiclass VPseudoTernaryWithPolicy<VReg RetClass,
DAGOperand Op2Class,
LMULInfo MInfo,
string Constraint = "",
- bit Commutable = 0> {
+ bit Commutable = 0,
+ int TargetConstraintType = 1> {
let VLMul = MInfo.value in {
let isCommutable = Commutable in
- def "_" # MInfo.MX : VPseudoTernaryNoMaskWithPolicy<RetClass, Op1Class, Op2Class, Constraint>;
- def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMaskPolicy<RetClass, Op1Class, Op2Class, Constraint>,
+ def "_" # MInfo.MX : VPseudoTernaryNoMaskWithPolicy<RetClass, Op1Class, Op2Class, Constraint, TargetConstraintType>;
+ def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMaskPolicy<RetClass, Op1Class, Op2Class, Constraint, TargetConstraintType>,
RISCVMaskedPseudo<MaskIdx=3>;
}
}
@@ -3268,16 +3258,19 @@ multiclass VPseudoTernaryWithPolicyRoundingMode<VReg RetClass,
DAGOperand Op2Class,
LMULInfo MInfo,
string Constraint = "",
- bit Commutable = 0> {
+ bit Commutable = 0,
+ int TargetConstraintType = 1> {
let VLMul = MInfo.value in {
let isCommutable = Commutable in
def "_" # MInfo.MX :
VPseudoTernaryNoMaskWithPolicyRoundingMode<RetClass, Op1Class,
- Op2Class, Constraint>;
+ Op2Class, Constraint,
+ TargetConstraintType>;
def "_" # MInfo.MX # "_MASK" :
VPseudoBinaryMaskPolicyRoundingMode<RetClass, Op1Class,
Op2Class, Constraint,
- UsesVXRM_=0>,
+ UsesVXRM_=0,
+ TargetConstraintType=TargetConstraintType>,
RISCVMaskedPseudo<MaskIdx=3>;
}
}
@@ -3312,31 +3305,34 @@ multiclass VPseudoTernaryV_VF_AAXA_RM<LMULInfo m, FPR_Info f, string Constraint
multiclass VPseudoTernaryW_VV<LMULInfo m> {
defvar constraint = "@earlyclobber $rd";
defm _VV : VPseudoTernaryWithPolicy<m.wvrclass, m.vrclass, m.vrclass, m,
- constraint>;
+ constraint, /*Commutable*/ 0, TargetConstraintType=3>;
}
multiclass VPseudoTernaryW_VV_RM<LMULInfo m> {
defvar constraint = "@earlyclobber $rd";
defm _VV : VPseudoTernaryWithPolicyRoundingMode<m.wvrclass, m.vrclass, m.vrclass, m,
- constraint>;
+ constraint, /* Commutable */ 0,
+ TargetConstraintType=3>;
}
multiclass VPseudoTernaryW_VX<LMULInfo m> {
defvar constraint = "@earlyclobber $rd";
defm "_VX" : VPseudoTernaryWithPolicy<m.wvrclass, GPR, m.vrclass, m,
- constraint>;
+ constraint, /*Commutable*/ 0, TargetConstraintType=3>;
}
-multiclass VPseudoTernaryW_VF<LMULInfo m, FPR_Info f> {
+multiclass VPseudoTernaryW_VF<LMULInfo m, FPR_Info f, int TargetConstraintType = 1> {
defvar constraint = "@earlyclobber $rd";
defm "_V" # f.FX : VPseudoTernaryWithPolicy<m.wvrclass, f.fprclass,
- m.vrclass, m, constraint>;
+ m.vrclass, m, constraint, /*Commutable*/ 0, TargetConstraintType>;
}
multiclass VPseudoTernaryW_VF_RM<LMULInfo m, FPR_Info f> {
defvar constraint = "@earlyclobber $rd";
defm "_V" # f.FX : VPseudoTernaryWithPolicyRoundingMode<m.wvrclass, f.fprclass,
- m.vrclass, m, constraint>;
+ m.vrclass, m, constraint,
+ /* Commutable */ 0,
+ TargetConstraintType=3>;
}
multiclass VPseudoVSLDVWithPolicy<VReg RetClass,
@@ -3362,62 +3358,43 @@ multiclass VPseudoVSLDV_VI<Operand ImmType = simm5, LMULInfo m, string Constrain
multiclass VPseudoVMAC_VV_VX_AAXA<string Constraint = ""> {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVIMulAddV_MX = !cast<SchedWrite>("WriteVIMulAddV_" # mx);
- defvar WriteVIMulAddX_MX = !cast<SchedWrite>("WriteVIMulAddX_" # mx);
- defvar ReadVIMulAddV_MX = !cast<SchedRead>("ReadVIMulAddV_" # mx);
- defvar ReadVIMulAddX_MX = !cast<SchedRead>("ReadVIMulAddX_" # mx);
-
defm "" : VPseudoTernaryV_VV_AAXA<m, Constraint>,
- Sched<[WriteVIMulAddV_MX, ReadVIMulAddV_MX, ReadVIMulAddV_MX,
- ReadVIMulAddV_MX, ReadVMask]>;
+ SchedTernary<"WriteVIMulAddV", "ReadVIMulAddV", "ReadVIMulAddV",
+ "ReadVIMulAddV", mx>;
defm "" : VPseudoTernaryV_VX_AAXA<m, Constraint>,
- Sched<[WriteVIMulAddX_MX, ReadVIMulAddV_MX, ReadVIMulAddV_MX,
- ReadVIMulAddX_MX, ReadVMask]>;
+ SchedTernary<"WriteVIMulAddX", "ReadVIMulAddV", "ReadVIMulAddX",
+ "ReadVIMulAddV", mx>;
}
}
multiclass VPseudoVMAC_VV_VF_AAXA<string Constraint = ""> {
foreach m = MxListF in {
- defvar mx = m.MX;
- defvar WriteVFMulAddV_MX = !cast<SchedWrite>("WriteVFMulAddV_" # mx);
- defvar ReadVFMulAddV_MX = !cast<SchedRead>("ReadVFMulAddV_" # mx);
-
defm "" : VPseudoTernaryV_VV_AAXA<m, Constraint>,
- Sched<[WriteVFMulAddV_MX, ReadVFMulAddV_MX, ReadVFMulAddV_MX, ReadVFMulAddV_MX, ReadVMask]>;
+ SchedTernary<"WriteVFMulAddV", "ReadVFMulAddV", "ReadVFMulAddV",
+ "ReadVFMulAddV", m.MX>;
}
foreach f = FPList in {
foreach m = f.MxList in {
- defvar mx = m.MX;
- defvar WriteVFMulAddF_MX = !cast<SchedWrite>("WriteVFMulAddF_" # mx);
- defvar ReadVFMulAddV_MX = !cast<SchedRead>("ReadVFMulAddV_" # mx);
- defvar ReadVFMulAddF_MX = !cast<SchedRead>("ReadVFMulAddF_" # mx);
-
defm "" : VPseudoTernaryV_VF_AAXA<m, f, Constraint>,
- Sched<[WriteVFMulAddF_MX, ReadVFMulAddV_MX, ReadVFMulAddV_MX, ReadVFMulAddF_MX, ReadVMask]>;
+ SchedTernary<"WriteVFMulAddF", "ReadVFMulAddV", "ReadVFMulAddF",
+ "ReadVFMulAddV", m.MX>;
}
}
}
multiclass VPseudoVMAC_VV_VF_AAXA_RM<string Constraint = ""> {
foreach m = MxListF in {
- defvar mx = m.MX;
- defvar WriteVFMulAddV_MX = !cast<SchedWrite>("WriteVFMulAddV_" # mx);
- defvar ReadVFMulAddV_MX = !cast<SchedRead>("ReadVFMulAddV_" # mx);
-
defm "" : VPseudoTernaryV_VV_AAXA_RM<m, Constraint>,
- Sched<[WriteVFMulAddV_MX, ReadVFMulAddV_MX, ReadVFMulAddV_MX, ReadVFMulAddV_MX, ReadVMask]>;
+ SchedTernary<"WriteVFMulAddV", "ReadVFMulAddV", "ReadVFMulAddV",
+ "ReadVFMulAddV", m.MX>;
}
foreach f = FPList in {
foreach m = f.MxList in {
- defvar mx = m.MX;
- defvar WriteVFMulAddF_MX = !cast<SchedWrite>("WriteVFMulAddF_" # mx);
- defvar ReadVFMulAddV_MX = !cast<SchedRead>("ReadVFMulAddV_" # mx);
- defvar ReadVFMulAddF_MX = !cast<SchedRead>("ReadVFMulAddF_" # mx);
-
defm "" : VPseudoTernaryV_VF_AAXA_RM<m, f, Constraint>,
- Sched<[WriteVFMulAddF_MX, ReadVFMulAddV_MX, ReadVFMulAddV_MX, ReadVFMulAddF_MX, ReadVMask]>;
+ SchedTernary<"WriteVFMulAddF", "ReadVFMulAddV", "ReadVFMulAddF",
+ "ReadVFMulAddV", m.MX>;
}
}
}
@@ -3425,70 +3402,64 @@ multiclass VPseudoVMAC_VV_VF_AAXA_RM<string Constraint = ""> {
multiclass VPseudoVSLD_VX_VI<Operand ImmType = simm5, string Constraint = ""> {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVISlideX_MX = !cast<SchedWrite>("WriteVISlideX_" # mx);
- defvar WriteVISlideI_MX = !cast<SchedWrite>("WriteVISlideI_" # mx);
- defvar ReadVISlideV_MX = !cast<SchedRead>("ReadVISlideV_" # mx);
- defvar ReadVISlideX_MX = !cast<SchedRead>("ReadVISlideX_" # mx);
-
defm "" : VPseudoVSLDV_VX<m, Constraint>,
- Sched<[WriteVISlideX_MX, ReadVISlideV_MX, ReadVISlideV_MX,
- ReadVISlideX_MX, ReadVMask]>;
+ SchedTernary<"WriteVISlideX", "ReadVISlideV", "ReadVISlideV",
+ "ReadVISlideX", mx>;
defm "" : VPseudoVSLDV_VI<ImmType, m, Constraint>,
- Sched<[WriteVISlideI_MX, ReadVISlideV_MX, ReadVISlideV_MX, ReadVMask]>;
+ SchedBinary<"WriteVISlideI", "ReadVISlideV", "ReadVISlideV", mx>;
}
}
multiclass VPseudoVWMAC_VV_VX {
foreach m = MxListW in {
defvar mx = m.MX;
- defvar WriteVIWMulAddV_MX = !cast<SchedWrite>("WriteVIWMulAddV_" # mx);
- defvar WriteVIWMulAddX_MX = !cast<SchedWrite>("WriteVIWMulAddX_" # mx);
- defvar ReadVIWMulAddV_MX = !cast<SchedRead>("ReadVIWMulAddV_" # mx);
- defvar ReadVIWMulAddX_MX = !cast<SchedRead>("ReadVIWMulAddX_" # mx);
-
defm "" : VPseudoTernaryW_VV<m>,
- Sched<[WriteVIWMulAddV_MX, ReadVIWMulAddV_MX, ReadVIWMulAddV_MX,
- ReadVIWMulAddV_MX, ReadVMask]>;
+ SchedTernary<"WriteVIWMulAddV", "ReadVIWMulAddV", "ReadVIWMulAddV",
+ "ReadVIWMulAddV", mx>;
defm "" : VPseudoTernaryW_VX<m>,
- Sched<[WriteVIWMulAddX_MX, ReadVIWMulAddV_MX, ReadVIWMulAddV_MX,
- ReadVIWMulAddX_MX, ReadVMask]>;
+ SchedTernary<"WriteVIWMulAddX", "ReadVIWMulAddV", "ReadVIWMulAddX",
+ "ReadVIWMulAddV", mx>;
}
}
multiclass VPseudoVWMAC_VX {
foreach m = MxListW in {
- defvar mx = m.MX;
- defvar WriteVIWMulAddX_MX = !cast<SchedWrite>("WriteVIWMulAddX_" # mx);
- defvar ReadVIWMulAddV_MX= !cast<SchedRead>("ReadVIWMulAddV_" # mx);
- defvar ReadVIWMulAddX_MX = !cast<SchedRead>("ReadVIWMulAddX_" # mx);
-
defm "" : VPseudoTernaryW_VX<m>,
- Sched<[WriteVIWMulAddX_MX, ReadVIWMulAddV_MX, ReadVIWMulAddV_MX,
- ReadVIWMulAddX_MX, ReadVMask]>;
+ SchedTernary<"WriteVIWMulAddX", "ReadVIWMulAddV", "ReadVIWMulAddX",
+ "ReadVIWMulAddV", m.MX>;
}
}
multiclass VPseudoVWMAC_VV_VF_RM {
foreach m = MxListFW in {
- defvar mx = m.MX;
- defvar WriteVFWMulAddV_MX = !cast<SchedWrite>("WriteVFWMulAddV_" # mx);
- defvar ReadVFWMulAddV_MX = !cast<SchedRead>("ReadVFWMulAddV_" # mx);
-
defm "" : VPseudoTernaryW_VV_RM<m>,
- Sched<[WriteVFWMulAddV_MX, ReadVFWMulAddV_MX,
- ReadVFWMulAddV_MX, ReadVFWMulAddV_MX, ReadVMask]>;
+ SchedTernary<"WriteVFWMulAddV", "ReadVFWMulAddV",
+ "ReadVFWMulAddV", "ReadVFWMulAddV", m.MX>;
}
foreach f = FPListW in {
foreach m = f.MxListFW in {
- defvar mx = m.MX;
- defvar WriteVFWMulAddF_MX = !cast<SchedWrite>("WriteVFWMulAddF_" # mx);
- defvar ReadVFWMulAddV_MX = !cast<SchedRead>("ReadVFWMulAddV_" # mx);
- defvar ReadVFWMulAddF_MX = !cast<SchedRead>("ReadVFWMulAddF_" # mx);
+ defm "" : VPseudoTernaryW_VF_RM<m, f>,
+ SchedTernary<"WriteVFWMulAddF", "ReadVFWMulAddV",
+ "ReadVFWMulAddF", "ReadVFWMulAddV", m.MX>;
+ }
+ }
+}
+multiclass VPseudoVWMAC_VV_VF_BF_RM {
+ foreach m = MxListFW in {
+ defvar mx = m.MX;
+ defm "" : VPseudoTernaryW_VV_RM<m>,
+ SchedTernary<"WriteVFWMulAddV", "ReadVFWMulAddV",
+ "ReadVFWMulAddV", "ReadVFWMulAddV", mx>;
+ }
+
+ foreach f = BFPListW in {
+ foreach m = f.MxListFW in {
+ defvar mx = m.MX;
defm "" : VPseudoTernaryW_VF_RM<m, f>,
- Sched<[WriteVFWMulAddF_MX, ReadVFWMulAddV_MX,
- ReadVFWMulAddV_MX, ReadVFWMulAddF_MX, ReadVMask]>;
+ SchedTernary<"WriteVFWMulAddF", "ReadVFWMulAddV",
+ "ReadVFWMulAddF", "ReadVFWMulAddV", mx>;
}
}
}
@@ -3496,55 +3467,35 @@ multiclass VPseudoVWMAC_VV_VF_RM {
multiclass VPseudoVCMPM_VV_VX_VI {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVICmpV_MX = !cast<SchedWrite>("WriteVICmpV_" # mx);
- defvar WriteVICmpX_MX = !cast<SchedWrite>("WriteVICmpX_" # mx);
- defvar WriteVICmpI_MX = !cast<SchedWrite>("WriteVICmpI_" # mx);
- defvar ReadVICmpV_MX = !cast<SchedRead>("ReadVICmpV_" # mx);
- defvar ReadVICmpX_MX = !cast<SchedRead>("ReadVICmpX_" # mx);
-
- defm "" : VPseudoBinaryM_VV<m>,
- Sched<[WriteVICmpV_MX, ReadVICmpV_MX, ReadVICmpV_MX, ReadVMask]>;
- defm "" : VPseudoBinaryM_VX<m>,
- Sched<[WriteVICmpX_MX, ReadVICmpV_MX, ReadVICmpX_MX, ReadVMask]>;
- defm "" : VPseudoBinaryM_VI<m>,
- Sched<[WriteVICmpI_MX, ReadVICmpV_MX, ReadVMask]>;
+ defm "" : VPseudoBinaryM_VV<m, TargetConstraintType=2>,
+ SchedBinary<"WriteVICmpV", "ReadVICmpV", "ReadVICmpV", mx>;
+ defm "" : VPseudoBinaryM_VX<m, TargetConstraintType=2>,
+ SchedBinary<"WriteVICmpX", "ReadVICmpV", "ReadVICmpX", mx>;
+ defm "" : VPseudoBinaryM_VI<m, TargetConstraintType=2>,
+ SchedUnary<"WriteVICmpI", "ReadVICmpV", mx>;
}
}
multiclass VPseudoVCMPM_VV_VX {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVICmpV_MX = !cast<SchedWrite>("WriteVICmpV_" # mx);
- defvar WriteVICmpX_MX = !cast<SchedWrite>("WriteVICmpX_" # mx);
- defvar ReadVICmpV_MX = !cast<SchedRead>("ReadVICmpV_" # mx);
- defvar ReadVICmpX_MX = !cast<SchedRead>("ReadVICmpX_" # mx);
-
- defm "" : VPseudoBinaryM_VV<m>,
- Sched<[WriteVICmpV_MX, ReadVICmpV_MX, ReadVICmpV_MX, ReadVMask]>;
- defm "" : VPseudoBinaryM_VX<m>,
- Sched<[WriteVICmpX_MX, ReadVICmpV_MX, ReadVICmpX_MX, ReadVMask]>;
+ defm "" : VPseudoBinaryM_VV<m, TargetConstraintType=2>,
+ SchedBinary<"WriteVICmpV", "ReadVICmpV", "ReadVICmpV", mx>;
+ defm "" : VPseudoBinaryM_VX<m, TargetConstraintType=2>,
+ SchedBinary<"WriteVICmpX", "ReadVICmpV", "ReadVICmpX", mx>;
}
}
multiclass VPseudoVCMPM_VV_VF {
foreach m = MxListF in {
- defvar mx = m.MX;
- defvar WriteVFCmpV_MX = !cast<SchedWrite>("WriteVFCmpV_" # mx);
- defvar ReadVFCmpV_MX = !cast<SchedRead>("ReadVFCmpV_" # mx);
-
- defm "" : VPseudoBinaryM_VV<m>,
- Sched<[WriteVFCmpV_MX, ReadVFCmpV_MX, ReadVFCmpV_MX, ReadVMask]>;
+ defm "" : VPseudoBinaryM_VV<m, TargetConstraintType=2>,
+ SchedBinary<"WriteVFCmpV", "ReadVFCmpV", "ReadVFCmpV", m.MX>;
}
foreach f = FPList in {
foreach m = f.MxList in {
- defvar mx = m.MX;
- defvar WriteVFCmpF_MX = !cast<SchedWrite>("WriteVFCmpF_" # mx);
- defvar ReadVFCmpV_MX = !cast<SchedRead>("ReadVFCmpV_" # mx);
- defvar ReadVFCmpF_MX = !cast<SchedRead>("ReadVFCmpF_" # mx);
-
- defm "" : VPseudoBinaryM_VF<m, f>,
- Sched<[WriteVFCmpF_MX, ReadVFCmpV_MX, ReadVFCmpF_MX, ReadVMask]>;
+ defm "" : VPseudoBinaryM_VF<m, f, TargetConstraintType=2>,
+ SchedBinary<"WriteVFCmpF", "ReadVFCmpV", "ReadVFCmpF", m.MX>;
}
}
}
@@ -3552,13 +3503,8 @@ multiclass VPseudoVCMPM_VV_VF {
multiclass VPseudoVCMPM_VF {
foreach f = FPList in {
foreach m = f.MxList in {
- defvar mx = m.MX;
- defvar WriteVFCmpF_MX = !cast<SchedWrite>("WriteVFCmpF_" # mx);
- defvar ReadVFCmpV_MX = !cast<SchedRead>("ReadVFCmpV_" # mx);
- defvar ReadVFCmpF_MX = !cast<SchedRead>("ReadVFCmpF_" # mx);
-
- defm "" : VPseudoBinaryM_VF<m, f>,
- Sched<[WriteVFCmpF_MX, ReadVFCmpV_MX, ReadVFCmpF_MX, ReadVMask]>;
+ defm "" : VPseudoBinaryM_VF<m, f, TargetConstraintType=2>,
+ SchedBinary<"WriteVFCmpF", "ReadVFCmpV", "ReadVFCmpF", m.MX>;
}
}
}
@@ -3566,15 +3512,10 @@ multiclass VPseudoVCMPM_VF {
multiclass VPseudoVCMPM_VX_VI {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVICmpX_MX = !cast<SchedWrite>("WriteVICmpX_" # mx);
- defvar WriteVICmpI_MX = !cast<SchedWrite>("WriteVICmpI_" # mx);
- defvar ReadVICmpV_MX = !cast<SchedRead>("ReadVICmpV_" # mx);
- defvar ReadVICmpX_MX = !cast<SchedRead>("ReadVICmpX_" # mx);
-
- defm "" : VPseudoBinaryM_VX<m>,
- Sched<[WriteVICmpX_MX, ReadVICmpV_MX, ReadVICmpX_MX, ReadVMask]>;
- defm "" : VPseudoBinaryM_VI<m>,
- Sched<[WriteVICmpI_MX, ReadVICmpV_MX, ReadVMask]>;
+ defm "" : VPseudoBinaryM_VX<m, TargetConstraintType=2>,
+ SchedBinary<"WriteVICmpX", "ReadVICmpV", "ReadVICmpX", mx>;
+ defm "" : VPseudoBinaryM_VI<m, TargetConstraintType=2>,
+ SchedUnary<"WriteVICmpI", "ReadVICmpV", mx>;
}
}
@@ -3582,10 +3523,8 @@ multiclass VPseudoVRED_VS {
foreach m = MxList in {
defvar mx = m.MX;
foreach e = SchedSEWSet<mx>.val in {
- defvar WriteVIRedV_From_MX_E = !cast<SchedWrite>("WriteVIRedV_From_" # mx # "_E" # e);
defm _VS : VPseudoTernaryWithTailPolicy<V_M1.vrclass, m.vrclass, V_M1.vrclass, m, e>,
- Sched<[WriteVIRedV_From_MX_E, ReadVIRedV, ReadVIRedV, ReadVIRedV,
- ReadVMask]>;
+ SchedReduction<"WriteVIRedV_From", "ReadVIRedV", mx, e>;
}
}
}
@@ -3594,10 +3533,8 @@ multiclass VPseudoVREDMINMAX_VS {
foreach m = MxList in {
defvar mx = m.MX;
foreach e = SchedSEWSet<mx>.val in {
- defvar WriteVIRedMinMaxV_From_MX_E = !cast<SchedWrite>("WriteVIRedMinMaxV_From_" # mx # "_E" # e);
defm _VS : VPseudoTernaryWithTailPolicy<V_M1.vrclass, m.vrclass, V_M1.vrclass, m, e>,
- Sched<[WriteVIRedMinMaxV_From_MX_E, ReadVIRedV, ReadVIRedV,
- ReadVIRedV, ReadVMask]>;
+ SchedReduction<"WriteVIRedMinMaxV_From", "ReadVIRedV", mx, e>;
}
}
}
@@ -3606,10 +3543,8 @@ multiclass VPseudoVWRED_VS {
foreach m = MxListWRed in {
defvar mx = m.MX;
foreach e = SchedSEWSet<mx, isWidening=1>.val in {
- defvar WriteVIWRedV_From_MX_E = !cast<SchedWrite>("WriteVIWRedV_From_" # mx # "_E" # e);
defm _VS : VPseudoTernaryWithTailPolicy<V_M1.vrclass, m.vrclass, V_M1.vrclass, m, e>,
- Sched<[WriteVIWRedV_From_MX_E, ReadVIWRedV, ReadVIWRedV,
- ReadVIWRedV, ReadVMask]>;
+ SchedReduction<"WriteVIWRedV_From", "ReadVIWRedV", mx, e>;
}
}
}
@@ -3618,12 +3553,10 @@ multiclass VPseudoVFRED_VS_RM {
foreach m = MxListF in {
defvar mx = m.MX;
foreach e = SchedSEWSet<mx, isF=1>.val in {
- defvar WriteVFRedV_From_MX_E = !cast<SchedWrite>("WriteVFRedV_From_" # mx # "_E" # e);
defm _VS
- : VPseudoTernaryWithTailPolicyRoundingMode<V_M1.vrclass, m.vrclass,
+ : VPseudoTernaryWithTailPolicyRoundingMode<V_M1.vrclass, m.vrclass,
V_M1.vrclass, m, e>,
- Sched<[WriteVFRedV_From_MX_E, ReadVFRedV, ReadVFRedV, ReadVFRedV,
- ReadVMask]>;
+ SchedReduction<"WriteVFRedV_From", "ReadVFRedV", mx, e>;
}
}
}
@@ -3632,10 +3565,8 @@ multiclass VPseudoVFREDMINMAX_VS {
foreach m = MxListF in {
defvar mx = m.MX;
foreach e = SchedSEWSet<mx, isF=1>.val in {
- defvar WriteVFRedMinMaxV_From_MX_E = !cast<SchedWrite>("WriteVFRedMinMaxV_From_" # mx # "_E" # e);
defm _VS : VPseudoTernaryWithTailPolicy<V_M1.vrclass, m.vrclass, V_M1.vrclass, m, e>,
- Sched<[WriteVFRedMinMaxV_From_MX_E, ReadVFRedV, ReadVFRedV, ReadVFRedV,
- ReadVMask]>;
+ SchedReduction<"WriteVFRedMinMaxV_From", "ReadVFRedV", mx, e>;
}
}
}
@@ -3644,11 +3575,9 @@ multiclass VPseudoVFREDO_VS_RM {
foreach m = MxListF in {
defvar mx = m.MX;
foreach e = SchedSEWSet<mx, isF=1>.val in {
- defvar WriteVFRedOV_From_MX_E = !cast<SchedWrite>("WriteVFRedOV_From_" # mx # "_E" # e);
defm _VS : VPseudoTernaryWithTailPolicyRoundingMode<V_M1.vrclass, m.vrclass,
V_M1.vrclass, m, e>,
- Sched<[WriteVFRedOV_From_MX_E, ReadVFRedOV, ReadVFRedOV,
- ReadVFRedOV, ReadVMask]>;
+ SchedReduction<"WriteVFRedOV_From", "ReadVFRedOV", mx, e>;
}
}
}
@@ -3657,12 +3586,22 @@ multiclass VPseudoVFWRED_VS_RM {
foreach m = MxListFWRed in {
defvar mx = m.MX;
foreach e = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
- defvar WriteVFWRedV_From_MX_E = !cast<SchedWrite>("WriteVFWRedV_From_" # mx # "_E" # e);
defm _VS
: VPseudoTernaryWithTailPolicyRoundingMode<V_M1.vrclass, m.vrclass,
V_M1.vrclass, m, e>,
- Sched<[WriteVFWRedV_From_MX_E, ReadVFWRedV, ReadVFWRedV,
- ReadVFWRedV, ReadVMask]>;
+ SchedReduction<"WriteVFWRedV_From", "ReadVFWRedV", mx, e>;
+ }
+ }
+}
+
+multiclass VPseudoVFWREDO_VS_RM {
+ foreach m = MxListFWRed in {
+ defvar mx = m.MX;
+ foreach e = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
+ defm _VS
+ : VPseudoTernaryWithTailPolicyRoundingMode<V_M1.vrclass, m.vrclass,
+ V_M1.vrclass, m, e>,
+ SchedReduction<"WriteVFWRedOV_From", "ReadVFWRedV", mx, e>;
}
}
}
@@ -3670,11 +3609,12 @@ multiclass VPseudoVFWRED_VS_RM {
multiclass VPseudoConversion<VReg RetClass,
VReg Op1Class,
LMULInfo MInfo,
- string Constraint = ""> {
+ string Constraint = "",
+ int TargetConstraintType = 1> {
let VLMul = MInfo.value in {
- def "_" # MInfo.MX : VPseudoUnaryNoMask<RetClass, Op1Class, Constraint>;
+ def "_" # MInfo.MX : VPseudoUnaryNoMask<RetClass, Op1Class, Constraint, TargetConstraintType>;
def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMask<RetClass, Op1Class,
- Constraint>,
+ Constraint, TargetConstraintType>,
RISCVMaskedPseudo<MaskIdx=2>;
}
}
@@ -3682,9 +3622,10 @@ multiclass VPseudoConversion<VReg RetClass,
multiclass VPseudoConversionRoundingMode<VReg RetClass,
VReg Op1Class,
LMULInfo MInfo,
- string Constraint = ""> {
+ string Constraint = "",
+ int TargetConstraintType = 1> {
let VLMul = MInfo.value in {
- def "_" # MInfo.MX : VPseudoUnaryNoMaskRoundingMode<RetClass, Op1Class, Constraint>;
+ def "_" # MInfo.MX : VPseudoUnaryNoMaskRoundingMode<RetClass, Op1Class, Constraint, TargetConstraintType>;
def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMaskRoundingMode<RetClass, Op1Class,
Constraint>,
RISCVMaskedPseudo<MaskIdx=2>;
@@ -3716,211 +3657,157 @@ multiclass VPseudoConversionNoExcept<VReg RetClass,
multiclass VPseudoVCVTI_V {
foreach m = MxListF in {
- defvar mx = m.MX;
- defvar WriteVFCvtFToIV_MX = !cast<SchedWrite>("WriteVFCvtFToIV_" # mx);
- defvar ReadVFCvtFToIV_MX = !cast<SchedRead>("ReadVFCvtFToIV_" # mx);
-
defm _V : VPseudoConversion<m.vrclass, m.vrclass, m>,
- Sched<[WriteVFCvtFToIV_MX, ReadVFCvtFToIV_MX, ReadVMask]>;
+ SchedUnary<"WriteVFCvtFToIV", "ReadVFCvtFToIV", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVCVTI_V_RM {
foreach m = MxListF in {
- defvar mx = m.MX;
- defvar WriteVFCvtFToIV_MX = !cast<SchedWrite>("WriteVFCvtFToIV_" # mx);
- defvar ReadVFCvtFToIV_MX = !cast<SchedRead>("ReadVFCvtFToIV_" # mx);
-
defm _V : VPseudoConversionRoundingMode<m.vrclass, m.vrclass, m>,
- Sched<[WriteVFCvtFToIV_MX, ReadVFCvtFToIV_MX, ReadVMask]>;
+ SchedUnary<"WriteVFCvtFToIV", "ReadVFCvtFToIV", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVCVTI_RM_V {
foreach m = MxListF in {
- defvar mx = m.MX;
- defvar WriteVFCvtFToIV_MX = !cast<SchedWrite>("WriteVFCvtFToIV_" # mx);
- defvar ReadVFCvtFToIV_MX = !cast<SchedRead>("ReadVFCvtFToIV_" # mx);
-
defm _V : VPseudoConversionRM<m.vrclass, m.vrclass, m>,
- Sched<[WriteVFCvtFToIV_MX, ReadVFCvtFToIV_MX, ReadVMask]>;
+ SchedUnary<"WriteVFCvtFToIV", "ReadVFCvtFToIV", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVFROUND_NOEXCEPT_V {
foreach m = MxListF in {
- defvar mx = m.MX;
- defvar WriteVFCvtFToIV_MX = !cast<SchedWrite>("WriteVFCvtFToIV_" # mx);
- defvar ReadVFCvtFToIV_MX = !cast<SchedRead>("ReadVFCvtFToIV_" # mx);
-
defm _V : VPseudoConversionNoExcept<m.vrclass, m.vrclass, m>,
- Sched<[WriteVFCvtFToIV_MX, ReadVFCvtFToIV_MX, ReadVMask]>;
+ SchedUnary<"WriteVFCvtFToIV", "ReadVFCvtFToIV", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVCVTF_V_RM {
foreach m = MxListF in {
- defvar mx = m.MX;
- defvar WriteVFCvtIToFV_MX = !cast<SchedWrite>("WriteVFCvtIToFV_" # mx);
- defvar ReadVFCvtIToFV_MX = !cast<SchedRead>("ReadVFCvtIToFV_" # mx);
-
defm _V : VPseudoConversionRoundingMode<m.vrclass, m.vrclass, m>,
- Sched<[WriteVFCvtIToFV_MX, ReadVFCvtIToFV_MX, ReadVMask]>;
+ SchedUnary<"WriteVFCvtIToFV", "ReadVFCvtIToFV", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVCVTF_RM_V {
foreach m = MxListF in {
- defvar mx = m.MX;
- defvar WriteVFCvtIToFV_MX = !cast<SchedWrite>("WriteVFCvtIToFV_" # mx);
- defvar ReadVFCvtIToFV_MX = !cast<SchedRead>("ReadVFCvtIToFV_" # mx);
-
defm _V : VPseudoConversionRM<m.vrclass, m.vrclass, m>,
- Sched<[WriteVFCvtIToFV_MX, ReadVFCvtIToFV_MX, ReadVMask]>;
+ SchedUnary<"WriteVFCvtIToFV", "ReadVFCvtIToFV", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVWCVTI_V {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListFW in {
- defvar mx = m.MX;
- defvar WriteVFWCvtFToIV_MX = !cast<SchedWrite>("WriteVFWCvtFToIV_" # mx);
- defvar ReadVFWCvtFToIV_MX = !cast<SchedRead>("ReadVFWCvtFToIV_" # mx);
-
- defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint>,
- Sched<[WriteVFWCvtFToIV_MX, ReadVFWCvtFToIV_MX, ReadVMask]>;
+ defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint, TargetConstraintType=3>,
+ SchedUnary<"WriteVFWCvtFToIV", "ReadVFWCvtFToIV", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVWCVTI_V_RM {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListFW in {
- defvar mx = m.MX;
- defvar WriteVFWCvtFToIV_MX = !cast<SchedWrite>("WriteVFWCvtFToIV_" # mx);
- defvar ReadVFWCvtFToIV_MX = !cast<SchedRead>("ReadVFWCvtFToIV_" # mx);
-
- defm _V : VPseudoConversionRoundingMode<m.wvrclass, m.vrclass, m, constraint>,
- Sched<[WriteVFWCvtFToIV_MX, ReadVFWCvtFToIV_MX, ReadVMask]>;
+ defm _V : VPseudoConversionRoundingMode<m.wvrclass, m.vrclass, m, constraint, TargetConstraintType=3>,
+ SchedUnary<"WriteVFWCvtFToIV", "ReadVFWCvtFToIV", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVWCVTI_RM_V {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListFW in {
- defvar mx = m.MX;
- defvar WriteVFWCvtFToIV_MX = !cast<SchedWrite>("WriteVFWCvtFToIV_" # mx);
- defvar ReadVFWCvtFToIV_MX = !cast<SchedRead>("ReadVFWCvtFToIV_" # mx);
-
defm _V : VPseudoConversionRM<m.wvrclass, m.vrclass, m, constraint>,
- Sched<[WriteVFWCvtFToIV_MX, ReadVFWCvtFToIV_MX, ReadVMask]>;
+ SchedUnary<"WriteVFWCvtFToIV", "ReadVFWCvtFToIV", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVWCVTF_V {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListW in {
- defvar mx = m.MX;
- defvar WriteVFWCvtIToFV_MX = !cast<SchedWrite>("WriteVFWCvtIToFV_" # mx);
- defvar ReadVFWCvtIToFV_MX = !cast<SchedRead>("ReadVFWCvtIToFV_" # mx);
-
- defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint>,
- Sched<[WriteVFWCvtIToFV_MX, ReadVFWCvtIToFV_MX, ReadVMask]>;
+ defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint, TargetConstraintType=3>,
+ SchedUnary<"WriteVFWCvtIToFV", "ReadVFWCvtIToFV", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVWCVTD_V {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListFW in {
- defvar mx = m.MX;
- defvar WriteVFWCvtFToFV_MX = !cast<SchedWrite>("WriteVFWCvtFToFV_" # mx);
- defvar ReadVFWCvtFToFV_MX = !cast<SchedRead>("ReadVFWCvtFToFV_" # mx);
-
- defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint>,
- Sched<[WriteVFWCvtFToFV_MX, ReadVFWCvtFToFV_MX, ReadVMask]>;
+ defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint, TargetConstraintType=3>,
+ SchedUnary<"WriteVFWCvtFToFV", "ReadVFWCvtFToFV", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVNCVTI_W {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListW in {
- defvar mx = m.MX;
- defvar WriteVFNCvtFToIV_MX = !cast<SchedWrite>("WriteVFNCvtFToIV_" # mx);
- defvar ReadVFNCvtFToIV_MX = !cast<SchedRead>("ReadVFNCvtFToIV_" # mx);
-
- defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint>,
- Sched<[WriteVFNCvtFToIV_MX, ReadVFNCvtFToIV_MX, ReadVMask]>;
+ defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint, TargetConstraintType=2>,
+ SchedUnary<"WriteVFNCvtFToIV", "ReadVFNCvtFToIV", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVNCVTI_W_RM {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListW in {
- defvar mx = m.MX;
- defvar WriteVFNCvtFToIV_MX = !cast<SchedWrite>("WriteVFNCvtFToIV_" # mx);
- defvar ReadVFNCvtFToIV_MX = !cast<SchedRead>("ReadVFNCvtFToIV_" # mx);
-
- defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m, constraint>,
- Sched<[WriteVFNCvtFToIV_MX, ReadVFNCvtFToIV_MX, ReadVMask]>;
+ defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m, constraint, TargetConstraintType=2>,
+ SchedUnary<"WriteVFNCvtFToIV", "ReadVFNCvtFToIV", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVNCVTI_RM_W {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListW in {
- defvar mx = m.MX;
- defvar WriteVFNCvtFToIV_MX = !cast<SchedWrite>("WriteVFNCvtFToIV_" # mx);
- defvar ReadVFNCvtFToIV_MX = !cast<SchedRead>("ReadVFNCvtFToIV_" # mx);
-
defm _W : VPseudoConversionRM<m.vrclass, m.wvrclass, m, constraint>,
- Sched<[WriteVFNCvtFToIV_MX, ReadVFNCvtFToIV_MX, ReadVMask]>;
+ SchedUnary<"WriteVFNCvtFToIV", "ReadVFNCvtFToIV", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVNCVTF_W_RM {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListFW in {
- defvar mx = m.MX;
- defvar WriteVFNCvtIToFV_MX = !cast<SchedWrite>("WriteVFNCvtIToFV_" # mx);
- defvar ReadVFNCvtIToFV_MX = !cast<SchedRead>("ReadVFNCvtIToFV_" # mx);
-
- defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m, constraint>,
- Sched<[WriteVFNCvtIToFV_MX, ReadVFNCvtIToFV_MX, ReadVMask]>;
+ defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m, constraint, TargetConstraintType=2>,
+ SchedUnary<"WriteVFNCvtIToFV", "ReadVFNCvtIToFV", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVNCVTF_RM_W {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListFW in {
- defvar mx = m.MX;
- defvar WriteVFNCvtIToFV_MX = !cast<SchedWrite>("WriteVFNCvtIToFV_" # mx);
- defvar ReadVFNCvtIToFV_MX = !cast<SchedRead>("ReadVFNCvtIToFV_" # mx);
-
defm _W : VPseudoConversionRM<m.vrclass, m.wvrclass, m, constraint>,
- Sched<[WriteVFNCvtIToFV_MX, ReadVFNCvtIToFV_MX, ReadVMask]>;
+ SchedUnary<"WriteVFNCvtIToFV", "ReadVFNCvtIToFV", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVNCVTD_W {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListFW in {
- defvar mx = m.MX;
- defvar WriteVFNCvtFToFV_MX = !cast<SchedWrite>("WriteVFNCvtFToFV_" # mx);
- defvar ReadVFNCvtFToFV_MX = !cast<SchedRead>("ReadVFNCvtFToFV_" # mx);
-
- defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint>,
- Sched<[WriteVFNCvtFToFV_MX, ReadVFNCvtFToFV_MX, ReadVMask]>;
+ defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint, TargetConstraintType=2>,
+ SchedUnary<"WriteVFNCvtFToFV", "ReadVFNCvtFToFV", m.MX,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVNCVTD_W_RM {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListFW in {
- defvar mx = m.MX;
- defvar WriteVFNCvtFToFV_MX = !cast<SchedWrite>("WriteVFNCvtFToFV_" # mx);
- defvar ReadVFNCvtFToFV_MX = !cast<SchedRead>("ReadVFNCvtFToFV_" # mx);
-
- defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m, constraint>,
- Sched<[WriteVFNCvtFToFV_MX, ReadVFNCvtFToFV_MX, ReadVMask]>;
+ defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m, constraint, TargetConstraintType=2>,
+ SchedUnary<"WriteVFNCvtFToFV", "ReadVFNCvtFToFV", m.MX,
+ forceMergeOpRead=true>;
}
}
@@ -3988,18 +3875,17 @@ multiclass VPseudoISegLoad<bit Ordered> {
defvar idxEMUL = !cast<LMULInfo>("V_" # IdxLInfo);
defvar DataVreg = dataEMUL.vrclass;
defvar IdxVreg = idxEMUL.vrclass;
- defvar Order = !if(Ordered, "O", "U");
let VLMul = dataEMUL.value in {
foreach nf = NFSet<dataEMUL>.L in {
defvar Vreg = SegRegClass<dataEMUL, nf>.RC;
def nf # "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo :
VPseudoISegLoadNoMask<Vreg, IdxVreg, idxEEW, idxEMUL.value,
nf, Ordered>,
- VLXSEGSched<nf, dataEEW, Order, DataLInfo>;
+ VLXSEGSched<nf, dataEEW, Ordered, DataLInfo>;
def nf # "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo # "_MASK" :
VPseudoISegLoadMask<Vreg, IdxVreg, idxEEW, idxEMUL.value,
nf, Ordered>,
- VLXSEGSched<nf, dataEEW, Order, DataLInfo>;
+ VLXSEGSched<nf, dataEEW, Ordered, DataLInfo>;
}
}
}
@@ -4055,18 +3941,17 @@ multiclass VPseudoISegStore<bit Ordered> {
defvar idxEMUL = !cast<LMULInfo>("V_" # IdxLInfo);
defvar DataVreg = dataEMUL.vrclass;
defvar IdxVreg = idxEMUL.vrclass;
- defvar Order = !if(Ordered, "O", "U");
let VLMul = dataEMUL.value in {
foreach nf = NFSet<dataEMUL>.L in {
defvar Vreg = SegRegClass<dataEMUL, nf>.RC;
def nf # "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo :
VPseudoISegStoreNoMask<Vreg, IdxVreg, idxEEW, idxEMUL.value,
nf, Ordered>,
- VSXSEGSched<nf, idxEEW, Order, DataLInfo>;
+ VSXSEGSched<nf, idxEEW, Ordered, DataLInfo>;
def nf # "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo # "_MASK" :
VPseudoISegStoreMask<Vreg, IdxVreg, idxEEW, idxEMUL.value,
nf, Ordered>,
- VSXSEGSched<nf, idxEEW, Order, DataLInfo>;
+ VSXSEGSched<nf, idxEEW, Ordered, DataLInfo>;
}
}
}
@@ -4087,16 +3972,12 @@ class VPatUnaryNoMask<string intrinsic_name,
int log2sew,
LMULInfo vlmul,
VReg result_reg_class,
- VReg op2_reg_class,
- bit isSEWAware = 0> :
+ VReg op2_reg_class> :
Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
(result_type result_reg_class:$merge),
(op2_type op2_reg_class:$rs2),
VLOpFrag)),
- (!cast<Instruction>(
- !if(isSEWAware,
- inst#"_"#kind#"_"#vlmul.MX#"_E"#!shl(1, log2sew),
- inst#"_"#kind#"_"#vlmul.MX))
+ (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
(result_type result_reg_class:$merge),
(op2_type op2_reg_class:$rs2),
GPR:$vl, log2sew, TU_MU)>;
@@ -4135,17 +4016,13 @@ class VPatUnaryMask<string intrinsic_name,
int log2sew,
LMULInfo vlmul,
VReg result_reg_class,
- VReg op2_reg_class,
- bit isSEWAware = 0> :
+ VReg op2_reg_class> :
Pat<(result_type (!cast<Intrinsic>(intrinsic_name#"_mask")
(result_type result_reg_class:$merge),
(op2_type op2_reg_class:$rs2),
(mask_type V0),
VLOpFrag, (XLenVT timm:$policy))),
- (!cast<Instruction>(
- !if(isSEWAware,
- inst#"_"#kind#"_"#vlmul.MX#"_E"#!shl(1, log2sew)#"_MASK",
- inst#"_"#kind#"_"#vlmul.MX#"_MASK"))
+ (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX#"_MASK")
(result_type result_reg_class:$merge),
(op2_type op2_reg_class:$rs2),
(mask_type V0), GPR:$vl, log2sew, (XLenVT timm:$policy))>;
@@ -4187,7 +4064,7 @@ class VPatMaskUnaryNoMask<string intrinsic_name,
(!cast<Instruction>(inst#"_M_"#mti.BX)
(mti.Mask (IMPLICIT_DEF)),
(mti.Mask VR:$rs2),
- GPR:$vl, mti.Log2SEW, TU_MU)>;
+ GPR:$vl, mti.Log2SEW, TA_MA)>;
class VPatMaskUnaryMask<string intrinsic_name,
string inst,
@@ -4831,15 +4708,15 @@ multiclass VPatUnaryV_VF<string intrinsic, string instruction, string suffix,
}
multiclass VPatUnaryV_V<string intrinsic, string instruction,
- list<VTypeInfo> vtilist, bit isSEWAware = 0> {
+ list<VTypeInfo> vtilist> {
foreach vti = vtilist in {
let Predicates = GetVTypePredicates<vti>.Predicates in {
def : VPatUnaryNoMask<intrinsic, instruction, "V",
vti.Vector, vti.Vector, vti.Log2SEW,
- vti.LMul, vti.RegClass, vti.RegClass, isSEWAware>;
+ vti.LMul, vti.RegClass, vti.RegClass>;
def : VPatUnaryMask<intrinsic, instruction, "V",
vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.RegClass, vti.RegClass, isSEWAware>;
+ vti.LMul, vti.RegClass, vti.RegClass>;
}
}
}
@@ -6080,6 +5957,21 @@ multiclass VPatConversionWF_VF<string intrinsic, string instruction> {
foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar fwti = fvtiToFWti.Wti;
+ // Define vfwcvt.f.f.v for f16 when Zvfhmin is enable.
+ let Predicates = !if(!eq(fvti.Scalar, f16), [HasVInstructionsF16Minimal],
+ !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates)) in
+ defm : VPatConversionTA<intrinsic, instruction, "V",
+ fwti.Vector, fvti.Vector, fwti.Mask, fvti.Log2SEW,
+ fvti.LMul, fwti.RegClass, fvti.RegClass>;
+ }
+}
+
+multiclass VPatConversionWF_VF_BF <string intrinsic, string instruction> {
+ foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in
+ {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
GetVTypePredicates<fwti>.Predicates) in
defm : VPatConversionTA<intrinsic, instruction, "V",
@@ -6136,8 +6028,21 @@ multiclass VPatConversionVF_WF <string intrinsic, string instruction> {
}
}
-multiclass VPatConversionVF_WF_RM <string intrinsic, string instruction> {
- foreach fvtiToFWti = AllWidenableFloatVectors in {
+multiclass VPatConversionVF_WF_RM <string intrinsic, string instruction,
+ list<VTypeInfoToWide> wlist = AllWidenableFloatVectors> {
+ foreach fvtiToFWti = wlist in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
+ defm : VPatConversionTARoundingMode<intrinsic, instruction, "W",
+ fvti.Vector, fwti.Vector, fvti.Mask, fvti.Log2SEW,
+ fvti.LMul, fvti.RegClass, fwti.RegClass>;
+ }
+}
+
+multiclass VPatConversionVF_WF_BF_RM <string intrinsic, string instruction> {
+ foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar fwti = fvtiToFWti.Wti;
let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
@@ -6336,7 +6241,7 @@ foreach vti = AllIntegerVectors in {
GPR:$vl,
vti.Log2SEW,
(XLenVT timm:$policy))>;
-
+
// Match VSUB with a small immediate to vadd.vi by negating the immediate.
def : Pat<(vti.Vector (int_riscv_vsub (vti.Vector (undef)),
(vti.Vector vti.RegClass:$rs1),
@@ -6346,7 +6251,7 @@ foreach vti = AllIntegerVectors in {
vti.RegClass:$rs1,
(NegImm simm5_plus1:$rs2),
GPR:$vl,
- vti.Log2SEW, TU_MU)>;
+ vti.Log2SEW, TA_MA)>;
def : Pat<(vti.Vector (int_riscv_vsub_mask (vti.Vector vti.RegClass:$merge),
(vti.Vector vti.RegClass:$rs1),
(vti.Scalar simm5_plus1:$rs2),
@@ -6593,6 +6498,8 @@ defm PseudoVFWMACC : VPseudoVWMAC_VV_VF_RM;
defm PseudoVFWNMACC : VPseudoVWMAC_VV_VF_RM;
defm PseudoVFWMSAC : VPseudoVWMAC_VV_VF_RM;
defm PseudoVFWNMSAC : VPseudoVWMAC_VV_VF_RM;
+let Predicates = [HasStdExtZvfbfwma] in
+defm PseudoVFWMACCBF16 : VPseudoVWMAC_VV_VF_BF_RM;
}
//===----------------------------------------------------------------------===//
@@ -6697,6 +6604,7 @@ defm PseudoVFWCVT_F_XU : VPseudoVWCVTF_V;
defm PseudoVFWCVT_F_X : VPseudoVWCVTF_V;
defm PseudoVFWCVT_F_F : VPseudoVWCVTD_V;
+defm PseudoVFWCVTBF16_F_F : VPseudoVWCVTD_V;
} // mayRaiseFPException = true
//===----------------------------------------------------------------------===//
@@ -6722,6 +6630,7 @@ defm PseudoVFNCVT_RM_F_X : VPseudoVNCVTF_RM_W;
let hasSideEffects = 0, hasPostISelHook = 1 in
defm PseudoVFNCVT_F_F : VPseudoVNCVTD_W_RM;
+defm PseudoVFNCVTBF16_F_F : VPseudoVNCVTD_W_RM;
defm PseudoVFNCVT_ROD_F_F : VPseudoVNCVTD_W;
} // mayRaiseFPException = true
@@ -6774,7 +6683,7 @@ let IsRVVWideningReduction = 1,
hasSideEffects = 0,
mayRaiseFPException = true in {
defm PseudoVFWREDUSUM : VPseudoVFWRED_VS_RM;
-defm PseudoVFWREDOSUM : VPseudoVFWRED_VS_RM;
+defm PseudoVFWREDOSUM : VPseudoVFWREDO_VS_RM;
}
} // Predicates = [HasVInstructionsAnyF]
@@ -6787,14 +6696,14 @@ defm PseudoVFWREDOSUM : VPseudoVFWRED_VS_RM;
// 15.1 Vector Mask-Register Logical Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVMAND: VPseudoVALU_MM;
-defm PseudoVMNAND: VPseudoVALU_MM;
+defm PseudoVMAND: VPseudoVALU_MM<Commutable=1>;
+defm PseudoVMNAND: VPseudoVALU_MM<Commutable=1>;
defm PseudoVMANDN: VPseudoVALU_MM;
-defm PseudoVMXOR: VPseudoVALU_MM;
-defm PseudoVMOR: VPseudoVALU_MM;
-defm PseudoVMNOR: VPseudoVALU_MM;
+defm PseudoVMXOR: VPseudoVALU_MM<Commutable=1>;
+defm PseudoVMOR: VPseudoVALU_MM<Commutable=1>;
+defm PseudoVMNOR: VPseudoVALU_MM<Commutable=1>;
defm PseudoVMORN: VPseudoVALU_MM;
-defm PseudoVMXNOR: VPseudoVALU_MM;
+defm PseudoVMXNOR: VPseudoVALU_MM<Commutable=1>;
// Pseudo instructions
defm PseudoVMCLR : VPseudoNullaryPseudoM<"VMXOR">;
@@ -7005,7 +6914,7 @@ foreach vti = AllIntegerVectors in {
(XLenVT 1), VLOpFrag)),
(!cast<Instruction>("PseudoVADD_VV_"#vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1,
- vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW, TU_MU)>;
+ vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW, TA_MA)>;
def : Pat<(vti.Vector (int_riscv_vsll_mask (vti.Vector vti.RegClass:$merge),
(vti.Vector vti.RegClass:$rs1),
(XLenVT 1),
@@ -7139,7 +7048,7 @@ foreach vti = AllVectors in {
VLOpFrag)),
(!cast<Instruction>("PseudoVMV_V_V_"#vti.LMul.MX)
$passthru, $rs1, GPR:$vl, vti.Log2SEW, TU_MU)>;
-
+
// vmv.v.x/vmv.v.i are handled in RISCInstrVInstrInfoVVLPatterns.td
}
}
@@ -7222,7 +7131,7 @@ defm : VPatBinaryW_WV_WX_RM<"int_riscv_vfwsub_w", "PseudoVFWSUB",
//===----------------------------------------------------------------------===//
// 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
//===----------------------------------------------------------------------===//
-defm : VPatBinaryV_VV_VX_RM<"int_riscv_vfmul", "PseudoVFMUL",
+defm : VPatBinaryV_VV_VX_RM<"int_riscv_vfmul", "PseudoVFMUL",
AllFloatVectors>;
defm : VPatBinaryV_VV_VX_RM<"int_riscv_vfdiv", "PseudoVFDIV",
AllFloatVectors, isSEWAware=1>;
@@ -7258,6 +7167,9 @@ defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwmsac", "PseudoVFWMSAC",
AllWidenableFloatVectors>;
defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwnmsac", "PseudoVFWNMSAC",
AllWidenableFloatVectors>;
+let Predicates = [HasStdExtZvfbfwma] in
+defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwmaccbf16", "PseudoVFWMACCBF16",
+ AllWidenableBFloatToFloatVectors>;
//===----------------------------------------------------------------------===//
// 13.8. Vector Floating-Point Square-Root Instruction
@@ -7362,6 +7274,8 @@ defm : VPatConversionWI_VF<"int_riscv_vfwcvt_rtz_x_f_v", "PseudoVFWCVT_RTZ_X_F">
defm : VPatConversionWF_VI<"int_riscv_vfwcvt_f_xu_v", "PseudoVFWCVT_F_XU">;
defm : VPatConversionWF_VI<"int_riscv_vfwcvt_f_x_v", "PseudoVFWCVT_F_X">;
defm : VPatConversionWF_VF<"int_riscv_vfwcvt_f_f_v", "PseudoVFWCVT_F_F">;
+defm : VPatConversionWF_VF_BF<"int_riscv_vfwcvtbf16_f_f_v",
+ "PseudoVFWCVTBF16_F_F">;
//===----------------------------------------------------------------------===//
// 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions
@@ -7372,7 +7286,18 @@ defm : VPatConversionVI_WF<"int_riscv_vfncvt_rtz_xu_f_w", "PseudoVFNCVT_RTZ_XU_F
defm : VPatConversionVI_WF<"int_riscv_vfncvt_rtz_x_f_w", "PseudoVFNCVT_RTZ_X_F">;
defm : VPatConversionVF_WI_RM <"int_riscv_vfncvt_f_xu_w", "PseudoVFNCVT_F_XU">;
defm : VPatConversionVF_WI_RM <"int_riscv_vfncvt_f_x_w", "PseudoVFNCVT_F_X">;
-defm : VPatConversionVF_WF_RM<"int_riscv_vfncvt_f_f_w", "PseudoVFNCVT_F_F">;
+defvar WidenableFloatVectorsExceptF16 = !filter(fvtiToFWti, AllWidenableFloatVectors,
+ !ne(fvtiToFWti.Vti.Scalar, f16));
+defm : VPatConversionVF_WF_RM<"int_riscv_vfncvt_f_f_w", "PseudoVFNCVT_F_F",
+ WidenableFloatVectorsExceptF16>;
+// Define vfncvt.f.f.w for f16 when Zvfhmin is enable.
+defvar F16WidenableFloatVectors = !filter(fvtiToFWti, AllWidenableFloatVectors,
+ !eq(fvtiToFWti.Vti.Scalar, f16));
+let Predicates = [HasVInstructionsF16Minimal] in
+defm : VPatConversionVF_WF_RM<"int_riscv_vfncvt_f_f_w", "PseudoVFNCVT_F_F",
+ F16WidenableFloatVectors>;
+defm : VPatConversionVF_WF_BF_RM<"int_riscv_vfncvtbf16_f_f_w",
+ "PseudoVFNCVTBF16_F_F">;
defm : VPatConversionVF_WF<"int_riscv_vfncvt_rod_f_f_w", "PseudoVFNCVT_ROD_F_F">;
//===----------------------------------------------------------------------===//
@@ -7500,6 +7425,11 @@ foreach fvti = AllFloatVectors in {
(fvti.Scalar (fpimm0)), VLOpFrag)),
(!cast<Instruction>("PseudoVMV_S_X_" # fvti.LMul.MX)
(fvti.Vector $rs1), (XLenVT X0), GPR:$vl, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (int_riscv_vfmv_s_f (fvti.Vector fvti.RegClass:$rs1),
+ (fvti.Scalar (SelectFPImm (XLenVT GPR:$imm))), VLOpFrag)),
+ (!cast<Instruction>("PseudoVMV_S_X_" # fvti.LMul.MX)
+ (fvti.Vector $rs1), GPR:$imm, GPR:$vl, fvti.Log2SEW)>;
}
}
@@ -7532,10 +7462,6 @@ defm : VPatBinaryV_VV_INT_EEW<"int_riscv_vrgatherei16_vv", "PseudoVRGATHEREI16",
// 16.5. Vector Compress Instruction
//===----------------------------------------------------------------------===//
defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllIntegerVectors>;
-defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllIntegerVectors>;
-defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllIntegerVectors>;
-defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllFloatVectors>;
-defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllFloatVectors>;
defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllFloatVectors>;
// Include the non-intrinsic ISel patterns
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index 4141c7698bb4..b7c845703794 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -35,7 +35,7 @@ multiclass VPatUSLoadStoreSDNode<ValueType type,
// Load
def : Pat<(type (load GPR:$rs1)),
(load_instr (type (IMPLICIT_DEF)), GPR:$rs1, avl,
- log2sew, TU_MU)>;
+ log2sew, TA_MA)>;
// Store
def : Pat<(store type:$rs2, GPR:$rs1),
(store_instr reg_class:$rs2, GPR:$rs1, avl, log2sew)>;
@@ -399,7 +399,7 @@ multiclass VPatExtendSDNode_V<list<SDNode> ops, string inst_name, string suffix,
def : Pat<(vti.Vector (op (fti.Vector fti.RegClass:$rs2))),
(!cast<Instruction>(inst_name#"_"#suffix#"_"#vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
- fti.RegClass:$rs2, fti.AVL, vti.Log2SEW, TU_MU)>;
+ fti.RegClass:$rs2, fti.AVL, vti.Log2SEW, TA_MA)>;
}
}
@@ -416,7 +416,7 @@ multiclass VPatConvertI2FPSDNode_V_RM<SDPatternOperator vop,
// Value to indicate no rounding mode change in
// RISCVInsertReadWriteCSR
FRM_DYN,
- fvti.AVL, fvti.Log2SEW, TU_MU)>;
+ fvti.AVL, fvti.Log2SEW, TA_MA)>;
}
}
@@ -429,7 +429,7 @@ multiclass VPatConvertFP2ISDNode_V<SDPatternOperator vop,
def : Pat<(ivti.Vector (vop (fvti.Vector fvti.RegClass:$rs1))),
(!cast<Instruction>(instruction_name#"_"#ivti.LMul.MX)
(ivti.Vector (IMPLICIT_DEF)),
- fvti.RegClass:$rs1, ivti.AVL, ivti.Log2SEW, TU_MU)>;
+ fvti.RegClass:$rs1, ivti.AVL, ivti.Log2SEW, TA_MA)>;
}
}
@@ -444,7 +444,7 @@ multiclass VPatWConvertI2FPSDNode_V<SDPatternOperator vop,
(!cast<Instruction>(instruction_name#"_"#ivti.LMul.MX)
(fwti.Vector (IMPLICIT_DEF)),
ivti.RegClass:$rs1,
- ivti.AVL, ivti.Log2SEW, TU_MU)>;
+ ivti.AVL, ivti.Log2SEW, TA_MA)>;
}
}
@@ -458,7 +458,7 @@ multiclass VPatWConvertFP2ISDNode_V<SDPatternOperator vop,
def : Pat<(iwti.Vector (vop (fvti.Vector fvti.RegClass:$rs1))),
(!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX)
(iwti.Vector (IMPLICIT_DEF)),
- fvti.RegClass:$rs1, fvti.AVL, fvti.Log2SEW, TU_MU)>;
+ fvti.RegClass:$rs1, fvti.AVL, fvti.Log2SEW, TA_MA)>;
}
}
@@ -476,7 +476,7 @@ multiclass VPatNConvertI2FPSDNode_W_RM<SDPatternOperator vop,
// Value to indicate no rounding mode change in
// RISCVInsertReadWriteCSR
FRM_DYN,
- fvti.AVL, fvti.Log2SEW, TU_MU)>;
+ fvti.AVL, fvti.Log2SEW, TA_MA)>;
}
}
@@ -490,7 +490,7 @@ multiclass VPatNConvertFP2ISDNode_W<SDPatternOperator vop,
def : Pat<(vti.Vector (vop (fwti.Vector fwti.RegClass:$rs1))),
(!cast<Instruction>(instruction_name#"_"#vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
- fwti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>;
+ fwti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TA_MA)>;
}
}
@@ -505,12 +505,12 @@ multiclass VPatWidenBinarySDNode_VV_VX<SDNode op, PatFrags extop1, PatFrags exto
(wti.Vector (extop2 (vti.Vector vti.RegClass:$rs1)))),
(!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
(wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
- vti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TA_MA)>;
def : Pat<(op (wti.Vector (extop1 (vti.Vector vti.RegClass:$rs2))),
(wti.Vector (extop2 (vti.Vector (SplatPat (XLenVT GPR:$rs1)))))),
(!cast<Instruction>(instruction_name#"_VX_"#vti.LMul.MX)
(wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
- GPR:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>;
+ GPR:$rs1, vti.AVL, vti.Log2SEW, TA_MA)>;
}
}
}
@@ -531,7 +531,7 @@ multiclass VPatWidenBinarySDNode_WV_WX<SDNode op, PatFrags extop,
(wti.Vector (extop (vti.Vector (SplatPat (XLenVT GPR:$rs1)))))),
(!cast<Instruction>(instruction_name#"_WX_"#vti.LMul.MX)
(wti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs2, GPR:$rs1,
- vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.AVL, vti.Log2SEW, TA_MA)>;
}
}
}
@@ -588,7 +588,7 @@ multiclass VPatWidenBinaryFPSDNode_VV_VF<SDNode op, string instruction_name> {
(vti.Mask true_mask), (XLenVT srcvalue)))),
(!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
(wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
- vti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TA_MA)>;
def : Pat<(op (wti.Vector (riscv_fpextend_vl_oneuse
(vti.Vector vti.RegClass:$rs2),
(vti.Mask true_mask), (XLenVT srcvalue))),
@@ -597,14 +597,14 @@ multiclass VPatWidenBinaryFPSDNode_VV_VF<SDNode op, string instruction_name> {
(vti.Mask true_mask), (XLenVT srcvalue)))),
(!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
(wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
- vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW, TA_MA)>;
def : Pat<(op (wti.Vector (riscv_fpextend_vl_oneuse
(vti.Vector vti.RegClass:$rs2),
(vti.Mask true_mask), (XLenVT srcvalue))),
(wti.Vector (SplatFPOp (fpext_oneuse vti.ScalarRegClass:$rs1)))),
(!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
(wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
- vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW, TA_MA)>;
}
}
}
@@ -627,7 +627,7 @@ multiclass VPatWidenBinaryFPSDNode_VV_VF_RM<SDNode op, string instruction_name>
// Value to indicate no rounding mode change in
// RISCVInsertReadWriteCSR
FRM_DYN,
- vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.AVL, vti.Log2SEW, TA_MA)>;
def : Pat<(op (wti.Vector (riscv_fpextend_vl_oneuse
(vti.Vector vti.RegClass:$rs2),
(vti.Mask true_mask), (XLenVT srcvalue))),
@@ -640,7 +640,7 @@ multiclass VPatWidenBinaryFPSDNode_VV_VF_RM<SDNode op, string instruction_name>
// Value to indicate no rounding mode change in
// RISCVInsertReadWriteCSR
FRM_DYN,
- vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.AVL, vti.Log2SEW, TA_MA)>;
def : Pat<(op (wti.Vector (riscv_fpextend_vl_oneuse
(vti.Vector vti.RegClass:$rs2),
(vti.Mask true_mask), (XLenVT srcvalue))),
@@ -651,7 +651,7 @@ multiclass VPatWidenBinaryFPSDNode_VV_VF_RM<SDNode op, string instruction_name>
// Value to indicate no rounding mode change in
// RISCVInsertReadWriteCSR
FRM_DYN,
- vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.AVL, vti.Log2SEW, TA_MA)>;
}
}
}
@@ -683,7 +683,7 @@ multiclass VPatWidenBinaryFPSDNode_WV_WF_RM<SDNode op, string instruction_name>
// Value to indicate no rounding mode change in
// RISCVInsertReadWriteCSR
FRM_DYN,
- vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.AVL, vti.Log2SEW, TA_MA)>;
def : Pat<(op (wti.Vector wti.RegClass:$rs2),
(wti.Vector (SplatFPOp (fpext_oneuse (vti.Scalar vti.ScalarRegClass:$rs1))))),
(!cast<Instruction>(instruction_name#"_W"#vti.ScalarSuffix#"_"#vti.LMul.MX)
@@ -692,7 +692,7 @@ multiclass VPatWidenBinaryFPSDNode_WV_WF_RM<SDNode op, string instruction_name>
// Value to indicate no rounding mode change in
// RISCVInsertReadWriteCSR
FRM_DYN,
- vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.AVL, vti.Log2SEW, TA_MA)>;
}
}
}
@@ -883,16 +883,20 @@ multiclass VPatMultiplyAddSDNode_VV_VX<SDNode op, string instruction_name> {
// 7.4. Vector Unit-Stride Instructions
foreach vti = !listconcat(FractionalGroupIntegerVectors,
- FractionalGroupFloatVectors) in
- let Predicates = GetVTypePredicates<vti>.Predicates in
+ FractionalGroupFloatVectors,
+ FractionalGroupBFloatVectors) in
+ let Predicates = !if(!eq(vti.Scalar, f16), [HasVInstructionsF16Minimal],
+ GetVTypePredicates<vti>.Predicates) in
defm : VPatUSLoadStoreSDNode<vti.Vector, vti.Log2SEW, vti.LMul,
vti.AVL, vti.RegClass>;
-foreach vti = [VI8M1, VI16M1, VI32M1, VI64M1, VF16M1, VF32M1, VF64M1] in
- let Predicates = GetVTypePredicates<vti>.Predicates in
+foreach vti = [VI8M1, VI16M1, VI32M1, VI64M1, VBF16M1, VF16M1, VF32M1, VF64M1] in
+ let Predicates = !if(!eq(vti.Scalar, f16), [HasVInstructionsF16Minimal],
+ GetVTypePredicates<vti>.Predicates) in
defm : VPatUSLoadStoreWholeVRSDNode<vti.Vector, vti.Log2SEW, vti.LMul,
vti.RegClass>;
-foreach vti = !listconcat(GroupIntegerVectors, GroupFloatVectors) in
- let Predicates = GetVTypePredicates<vti>.Predicates in
+foreach vti = !listconcat(GroupIntegerVectors, GroupFloatVectors, GroupBFloatVectors) in
+ let Predicates = !if(!eq(vti.Scalar, f16), [HasVInstructionsF16Minimal],
+ GetVTypePredicates<vti>.Predicates) in
defm : VPatUSLoadStoreWholeVRSDNode<vti.Vector, vti.Log2SEW, vti.LMul,
vti.RegClass>;
foreach mti = AllMasks in
@@ -916,12 +920,12 @@ foreach vti = AllIntegerVectors in {
(vti.Vector vti.RegClass:$rs1)),
(!cast<Instruction>("PseudoVRSUB_VX_"# vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, GPR:$rs2,
- vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.AVL, vti.Log2SEW, TA_MA)>;
def : Pat<(sub (vti.Vector (SplatPat_simm5 simm5:$rs2)),
(vti.Vector vti.RegClass:$rs1)),
(!cast<Instruction>("PseudoVRSUB_VI_"# vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1,
- simm5:$rs2, vti.AVL, vti.Log2SEW, TU_MU)>;
+ simm5:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
}
}
@@ -944,17 +948,17 @@ foreach vtiToWti = AllWidenableIntVectors in {
(wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), 1, (XLenVT srcvalue)))),
(!cast<Instruction>("PseudoVWADD_VV_"#vti.LMul.MX)
(wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs1,
- vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.AVL, vti.Log2SEW, TA_MA)>;
def : Pat<(shl (wti.Vector (zext_oneuse (vti.Vector vti.RegClass:$rs1))),
(wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), 1, (XLenVT srcvalue)))),
(!cast<Instruction>("PseudoVWADDU_VV_"#vti.LMul.MX)
(wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs1,
- vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.AVL, vti.Log2SEW, TA_MA)>;
def : Pat<(shl (wti.Vector (anyext_oneuse (vti.Vector vti.RegClass:$rs1))),
(wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), 1, (XLenVT srcvalue)))),
(!cast<Instruction>("PseudoVWADDU_VV_"#vti.LMul.MX)
(wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs1,
- vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.AVL, vti.Log2SEW, TA_MA)>;
}
}
@@ -989,7 +993,7 @@ foreach vti = AllIntegerVectors in {
(vti.Vector (riscv_vmv_v_x_vl (vti.Vector undef), 1, (XLenVT srcvalue)))),
(!cast<Instruction>("PseudoVADD_VV_"# vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1,
- vti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TA_MA)>;
}
@@ -1051,6 +1055,23 @@ defm : VPatBinarySDNode_VV_VX<sdiv, "PseudoVDIV", isSEWAware=1>;
defm : VPatBinarySDNode_VV_VX<urem, "PseudoVREMU", isSEWAware=1>;
defm : VPatBinarySDNode_VV_VX<srem, "PseudoVREM", isSEWAware=1>;
+foreach vtiTowti = AllWidenableIntVectors in {
+ defvar vti = vtiTowti.Vti;
+ defvar wti = vtiTowti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ def : Pat<
+ (vti.Vector
+ (riscv_trunc_vector_vl
+ (srem (wti.Vector (sext_oneuse (vti.Vector vti.RegClass:$rs1))),
+ (wti.Vector (sext_oneuse (vti.Vector vti.RegClass:$rs2)))),
+ (vti.Mask true_mask), (XLenVT srcvalue))),
+ (!cast<Instruction>("PseudoVREM_VV_"#vti.LMul.MX#"_E"#!shl(1, vti.Log2SEW))
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
+ }
+}
+
// 11.12. Vector Widening Integer Multiply Instructions
defm : VPatWidenBinarySDNode_VV_VX<mul, sext_oneuse, sext_oneuse,
"PseudoVWMUL">;
@@ -1145,7 +1166,7 @@ foreach mti = AllMasks in {
// Handle rvv_vnot the same as the vmnot.m pseudoinstruction.
def : Pat<(mti.Mask (rvv_vnot VR:$rs)),
(!cast<Instruction>("PseudoVMNAND_MM_"#mti.LMul.MX)
- VR:$rs, VR:$rs, mti.AVL, mti.Log2SEW)>;
+ VR:$rs, VR:$rs, mti.AVL, mti.Log2SEW)>;
}
}
@@ -1279,40 +1300,40 @@ foreach vti = AllFloatVectors in {
// Value to indicate no rounding mode change in
// RISCVInsertReadWriteCSR
FRM_DYN,
- vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.AVL, vti.Log2SEW, TA_MA)>;
// 13.12. Vector Floating-Point Sign-Injection Instructions
def : Pat<(fabs (vti.Vector vti.RegClass:$rs)),
(!cast<Instruction>("PseudoVFSGNJX_VV_"# vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
- vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TA_MA)>;
// Handle fneg with VFSGNJN using the same input for both operands.
def : Pat<(fneg (vti.Vector vti.RegClass:$rs)),
(!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
- vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TA_MA)>;
def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
(vti.Vector vti.RegClass:$rs2))),
(!cast<Instruction>("PseudoVFSGNJ_VV_"# vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
- vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
(vti.Vector (SplatFPOp vti.ScalarRegClass:$rs2)))),
(!cast<Instruction>("PseudoVFSGNJ_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
- vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TU_MU)>;
-
+ vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
+
def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
(vti.Vector (fneg vti.RegClass:$rs2)))),
(!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
- vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
(vti.Vector (fneg (SplatFPOp vti.ScalarRegClass:$rs2))))),
(!cast<Instruction>("PseudoVFSGNJN_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
- vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TU_MU)>;
+ vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
}
}
@@ -1337,7 +1358,8 @@ defm : VPatFPSetCCSDNode_VV_VF_FV<SETOLE, "PseudoVMFLE", "PseudoVMFGE">;
// 11.15. Vector Integer Merge Instructions
// 13.15. Vector Floating-Point Merge Instruction
foreach fvti = AllFloatVectors in {
- let Predicates = GetVTypePredicates<fvti>.Predicates in {
+ defvar ivti = GetIntVTypeInfo<fvti>.Vti;
+ let Predicates = GetVTypePredicates<ivti>.Predicates in {
def : Pat<(fvti.Vector (vselect (fvti.Mask V0), fvti.RegClass:$rs1,
fvti.RegClass:$rs2)),
(!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX)
@@ -1346,6 +1368,15 @@ foreach fvti = AllFloatVectors in {
fvti.AVL, fvti.Log2SEW)>;
def : Pat<(fvti.Vector (vselect (fvti.Mask V0),
+ (SplatFPOp (fvti.Scalar fpimm0)),
+ fvti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
+ (fvti.Vector (IMPLICIT_DEF)),
+ fvti.RegClass:$rs2, 0, (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>;
+
+ }
+ let Predicates = GetVTypePredicates<fvti>.Predicates in
+ def : Pat<(fvti.Vector (vselect (fvti.Mask V0),
(SplatFPOp fvti.ScalarRegClass:$rs1),
fvti.RegClass:$rs2)),
(!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX)
@@ -1353,14 +1384,6 @@ foreach fvti = AllFloatVectors in {
fvti.RegClass:$rs2,
(fvti.Scalar fvti.ScalarRegClass:$rs1),
(fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>;
-
- def : Pat<(fvti.Vector (vselect (fvti.Mask V0),
- (SplatFPOp (fvti.Scalar fpimm0)),
- fvti.RegClass:$rs2)),
- (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
- (fvti.Vector (IMPLICIT_DEF)),
- fvti.RegClass:$rs2, 0, (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>;
- }
}
// 13.17. Vector Single-Width Floating-Point/Integer Type-Convert Instructions
@@ -1383,8 +1406,9 @@ defm : VPatNConvertI2FPSDNode_W_RM<any_uint_to_fp, "PseudoVFNCVT_F_XU_W">;
foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar fwti = fvtiToFWti.Wti;
- let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
- GetVTypePredicates<fwti>.Predicates) in
+ let Predicates = !if(!eq(fvti.Scalar, f16), [HasVInstructionsF16Minimal],
+ !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates)) in
def : Pat<(fvti.Vector (fpround (fwti.Vector fwti.RegClass:$rs1))),
(!cast<Instruction>("PseudoVFNCVT_F_F_W_"#fvti.LMul.MX)
(fvti.Vector (IMPLICIT_DEF)),
@@ -1392,7 +1416,7 @@ foreach fvtiToFWti = AllWidenableFloatVectors in {
// Value to indicate no rounding mode change in
// RISCVInsertReadWriteCSR
FRM_DYN,
- fvti.AVL, fvti.Log2SEW, TU_MU)>;
+ fvti.AVL, fvti.Log2SEW, TA_MA)>;
}
//===----------------------------------------------------------------------===//
@@ -1400,18 +1424,18 @@ foreach fvtiToFWti = AllWidenableFloatVectors in {
//===----------------------------------------------------------------------===//
foreach fvti = AllFloatVectors in {
- let Predicates = GetVTypePredicates<fvti>.Predicates in {
- def : Pat<(fvti.Vector (SplatFPOp fvti.ScalarRegClass:$rs1)),
+ let Predicates = GetVTypePredicates<fvti>.Predicates in
+ def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl undef, fvti.ScalarRegClass:$rs1, srcvalue)),
(!cast<Instruction>("PseudoVFMV_V_"#fvti.ScalarSuffix#"_"#fvti.LMul.MX)
(fvti.Vector (IMPLICIT_DEF)),
(fvti.Scalar fvti.ScalarRegClass:$rs1),
- fvti.AVL, fvti.Log2SEW, TU_MU)>;
-
+ fvti.AVL, fvti.Log2SEW, TA_MA)>;
+ defvar ivti = GetIntVTypeInfo<fvti>.Vti;
+ let Predicates = GetVTypePredicates<ivti>.Predicates in
def : Pat<(fvti.Vector (SplatFPOp (fvti.Scalar fpimm0))),
(!cast<Instruction>("PseudoVMV_V_I_"#fvti.LMul.MX)
(fvti.Vector (IMPLICIT_DEF)),
- 0, fvti.AVL, fvti.Log2SEW, TU_MU)>;
- }
+ 0, fvti.AVL, fvti.Log2SEW, TA_MA)>;
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 900f9dd1be05..dc6b57fad321 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -98,6 +98,8 @@ def riscv_urem_vl : SDNode<"RISCVISD::UREM_VL", SDT_RISCVIntBinOp_VL>;
def riscv_shl_vl : SDNode<"RISCVISD::SHL_VL", SDT_RISCVIntBinOp_VL>;
def riscv_sra_vl : SDNode<"RISCVISD::SRA_VL", SDT_RISCVIntBinOp_VL>;
def riscv_srl_vl : SDNode<"RISCVISD::SRL_VL", SDT_RISCVIntBinOp_VL>;
+def riscv_rotl_vl : SDNode<"RISCVISD::ROTL_VL", SDT_RISCVIntBinOp_VL>;
+def riscv_rotr_vl : SDNode<"RISCVISD::ROTR_VL", SDT_RISCVIntBinOp_VL>;
def riscv_smin_vl : SDNode<"RISCVISD::SMIN_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
def riscv_smax_vl : SDNode<"RISCVISD::SMAX_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
def riscv_umin_vl : SDNode<"RISCVISD::UMIN_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
@@ -122,8 +124,8 @@ def riscv_fneg_vl : SDNode<"RISCVISD::FNEG_VL", SDT_RISCVFPUnOp_VL>;
def riscv_fabs_vl : SDNode<"RISCVISD::FABS_VL", SDT_RISCVFPUnOp_VL>;
def riscv_fsqrt_vl : SDNode<"RISCVISD::FSQRT_VL", SDT_RISCVFPUnOp_VL>;
def riscv_fcopysign_vl : SDNode<"RISCVISD::FCOPYSIGN_VL", SDT_RISCVCopySign_VL>;
-def riscv_fminnum_vl : SDNode<"RISCVISD::FMINNUM_VL", SDT_RISCVFPBinOp_VL, [SDNPCommutative]>;
-def riscv_fmaxnum_vl : SDNode<"RISCVISD::FMAXNUM_VL", SDT_RISCVFPBinOp_VL, [SDNPCommutative]>;
+def riscv_vfmin_vl : SDNode<"RISCVISD::VFMIN_VL", SDT_RISCVFPBinOp_VL, [SDNPCommutative]>;
+def riscv_vfmax_vl : SDNode<"RISCVISD::VFMAX_VL", SDT_RISCVFPBinOp_VL, [SDNPCommutative]>;
def riscv_strict_fadd_vl : SDNode<"RISCVISD::STRICT_FADD_VL", SDT_RISCVFPBinOp_VL, [SDNPCommutative, SDNPHasChain]>;
def riscv_strict_fsub_vl : SDNode<"RISCVISD::STRICT_FSUB_VL", SDT_RISCVFPBinOp_VL, [SDNPHasChain]>;
@@ -407,6 +409,7 @@ def riscv_vwadd_vl : SDNode<"RISCVISD::VWADD_VL", SDT_RISCVVWIntBinOp_VL, [S
def riscv_vwaddu_vl : SDNode<"RISCVISD::VWADDU_VL", SDT_RISCVVWIntBinOp_VL, [SDNPCommutative]>;
def riscv_vwsub_vl : SDNode<"RISCVISD::VWSUB_VL", SDT_RISCVVWIntBinOp_VL, []>;
def riscv_vwsubu_vl : SDNode<"RISCVISD::VWSUBU_VL", SDT_RISCVVWIntBinOp_VL, []>;
+def riscv_vwsll_vl : SDNode<"RISCVISD::VWSLL_VL", SDT_RISCVVWIntBinOp_VL, []>;
def SDT_RISCVVWIntTernOp_VL : SDTypeProfile<1, 5, [SDTCisVec<0>, SDTCisInt<0>,
SDTCisInt<1>,
@@ -577,14 +580,15 @@ def SplatPat_simm5_plus1
def SplatPat_simm5_plus1_nonzero
: ComplexPattern<vAny, 1, "selectVSplatSimm5Plus1NonZero", [], [], 3>;
-def ext_oneuse_SplatPat
- : ComplexPattern<vAny, 1, "selectExtOneUseVSplat", [], [], 2>;
+// Selects extends or truncates of splats where we only care about the lowest 8
+// bits of each element.
+def Low8BitsSplatPat
+ : ComplexPattern<vAny, 1, "selectLow8BitsVSplat", [], [], 2>;
-def SelectFPImm : ComplexPattern<fAny, 1, "selectFPImm", [], [], 1>;
-
-// Ignore the vl operand.
-def SplatFPOp : PatFrag<(ops node:$op),
- (riscv_vfmv_v_f_vl undef, node:$op, srcvalue)>;
+// Ignore the vl operand on vmv_v_f, and vmv_s_f.
+def SplatFPOp : PatFrags<(ops node:$op),
+ [(riscv_vfmv_v_f_vl undef, node:$op, srcvalue),
+ (riscv_vfmv_s_f_vl undef, node:$op, srcvalue)]>;
def sew8simm5 : ComplexPattern<XLenVT, 1, "selectRVVSimm5<8>", []>;
def sew16simm5 : ComplexPattern<XLenVT, 1, "selectRVVSimm5<16>", []>;
@@ -1377,16 +1381,6 @@ multiclass VPatReductionVL<SDNode vop, string instruction_name, bit is_float> {
let Predicates = GetVTypePredicates<vti>.Predicates in {
def: Pat<(vti_m1.Vector (vop (vti_m1.Vector VR:$merge),
(vti.Vector vti.RegClass:$rs1), VR:$rs2,
- (vti.Mask true_mask), VLOpFrag,
- (XLenVT timm:$policy))),
- (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW)
- (vti_m1.Vector VR:$merge),
- (vti.Vector vti.RegClass:$rs1),
- (vti_m1.Vector VR:$rs2),
- GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>;
-
- def: Pat<(vti_m1.Vector (vop (vti_m1.Vector VR:$merge),
- (vti.Vector vti.RegClass:$rs1), VR:$rs2,
(vti.Mask V0), VLOpFrag,
(XLenVT timm:$policy))),
(!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK")
@@ -1404,19 +1398,6 @@ multiclass VPatReductionVL_RM<SDNode vop, string instruction_name, bit is_float>
let Predicates = GetVTypePredicates<vti>.Predicates in {
def: Pat<(vti_m1.Vector (vop (vti_m1.Vector VR:$merge),
(vti.Vector vti.RegClass:$rs1), VR:$rs2,
- (vti.Mask true_mask), VLOpFrag,
- (XLenVT timm:$policy))),
- (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW)
- (vti_m1.Vector VR:$merge),
- (vti.Vector vti.RegClass:$rs1),
- (vti_m1.Vector VR:$rs2),
- // Value to indicate no rounding mode change in
- // RISCVInsertReadWriteCSR
- FRM_DYN,
- GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>;
-
- def: Pat<(vti_m1.Vector (vop (vti_m1.Vector VR:$merge),
- (vti.Vector vti.RegClass:$rs1), VR:$rs2,
(vti.Mask V0), VLOpFrag,
(XLenVT timm:$policy))),
(!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK")
@@ -1447,18 +1428,18 @@ multiclass VPatBinaryVL_WV_WX_WI<SDNode op, string instruction_name> {
VLOpFrag)),
(!cast<Instruction>(instruction_name#"_WV_"#vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
- wti.RegClass:$rs2, vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW, TU_MU)>;
+ wti.RegClass:$rs2, vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW, TA_MA)>;
def : Pat<
(vti.Vector
(riscv_trunc_vector_vl
(op (wti.Vector wti.RegClass:$rs2),
- (wti.Vector (ext_oneuse_SplatPat (XLenVT GPR:$rs1)))),
+ (wti.Vector (Low8BitsSplatPat (XLenVT GPR:$rs1)))),
(vti.Mask true_mask),
VLOpFrag)),
(!cast<Instruction>(instruction_name#"_WX_"#vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
- wti.RegClass:$rs2, GPR:$rs1, GPR:$vl, vti.Log2SEW, TU_MU)>;
+ wti.RegClass:$rs2, GPR:$rs1, GPR:$vl, vti.Log2SEW, TA_MA)>;
def : Pat<
(vti.Vector
@@ -1468,7 +1449,7 @@ multiclass VPatBinaryVL_WV_WX_WI<SDNode op, string instruction_name> {
VLOpFrag)),
(!cast<Instruction>(instruction_name#"_WI_"#vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
- wti.RegClass:$rs2, uimm5:$rs1, GPR:$vl, vti.Log2SEW, TU_MU)>;
+ wti.RegClass:$rs2, uimm5:$rs1, GPR:$vl, vti.Log2SEW, TA_MA)>;
}
}
}
@@ -1482,14 +1463,6 @@ multiclass VPatWidenReductionVL<SDNode vop, PatFrags extop, string instruction_n
GetVTypePredicates<wti>.Predicates) in {
def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge),
(wti.Vector (extop (vti.Vector vti.RegClass:$rs1))),
- VR:$rs2, (vti.Mask true_mask), VLOpFrag,
- (XLenVT timm:$policy))),
- (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW)
- (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1),
- (wti_m1.Vector VR:$rs2), GPR:$vl, vti.Log2SEW,
- (XLenVT timm:$policy))>;
- def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge),
- (wti.Vector (extop (vti.Vector vti.RegClass:$rs1))),
VR:$rs2, (vti.Mask V0), VLOpFrag,
(XLenVT timm:$policy))),
(!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK")
@@ -1509,18 +1482,6 @@ multiclass VPatWidenReductionVL_RM<SDNode vop, PatFrags extop, string instructio
GetVTypePredicates<wti>.Predicates) in {
def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge),
(wti.Vector (extop (vti.Vector vti.RegClass:$rs1))),
- VR:$rs2, (vti.Mask true_mask), VLOpFrag,
- (XLenVT timm:$policy))),
- (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW)
- (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1),
- (wti_m1.Vector VR:$rs2),
- // Value to indicate no rounding mode change in
- // RISCVInsertReadWriteCSR
- FRM_DYN,
- GPR:$vl, vti.Log2SEW,
- (XLenVT timm:$policy))>;
- def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge),
- (wti.Vector (extop (vti.Vector vti.RegClass:$rs1))),
VR:$rs2, (vti.Mask V0), VLOpFrag,
(XLenVT timm:$policy))),
(!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK")
@@ -1544,14 +1505,6 @@ multiclass VPatWidenReductionVL_Ext_VL<SDNode vop, PatFrags extop, string instru
GetVTypePredicates<wti>.Predicates) in {
def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge),
(wti.Vector (extop (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), VLOpFrag)),
- VR:$rs2, (vti.Mask true_mask), VLOpFrag,
- (XLenVT timm:$policy))),
- (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW)
- (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1),
- (wti_m1.Vector VR:$rs2), GPR:$vl, vti.Log2SEW,
- (XLenVT timm:$policy))>;
- def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge),
- (wti.Vector (extop (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), VLOpFrag)),
VR:$rs2, (vti.Mask V0), VLOpFrag,
(XLenVT timm:$policy))),
(!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK")
@@ -1571,18 +1524,6 @@ multiclass VPatWidenReductionVL_Ext_VL_RM<SDNode vop, PatFrags extop, string ins
GetVTypePredicates<wti>.Predicates) in {
def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge),
(wti.Vector (extop (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), VLOpFrag)),
- VR:$rs2, (vti.Mask true_mask), VLOpFrag,
- (XLenVT timm:$policy))),
- (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW)
- (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1),
- (wti_m1.Vector VR:$rs2),
- // Value to indicate no rounding mode change in
- // RISCVInsertReadWriteCSR
- FRM_DYN,
- GPR:$vl, vti.Log2SEW,
- (XLenVT timm:$policy))>;
- def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge),
- (wti.Vector (extop (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), VLOpFrag)),
VR:$rs2, (vti.Mask V0), VLOpFrag,
(XLenVT timm:$policy))),
(!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK")
@@ -1693,7 +1634,7 @@ multiclass VPatNarrowShiftSplatExt_WX<SDNode op, PatFrags extop, string instruct
(vti.Mask true_mask), VLOpFrag)),
(!cast<Instruction>(instruction_name#"_WX_"#vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
- wti.RegClass:$rs2, GPR:$rs1, GPR:$vl, vti.Log2SEW, TU_MU)>;
+ wti.RegClass:$rs2, GPR:$rs1, GPR:$vl, vti.Log2SEW, TA_MA)>;
}
}
@@ -1713,7 +1654,7 @@ multiclass VPatNarrowShiftExtVL_WV<SDNode op, PatFrags extop, string instruction
(vti.Mask V0), VLOpFrag)),
(!cast<Instruction>(instruction_name#"_WV_"#vti.LMul.MX#"_MASK")
(vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs2, vti.RegClass:$rs1,
- (vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>;
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>;
}
}
@@ -1832,13 +1773,13 @@ multiclass VPatNarrowShiftSplat_WX_WI<SDNode op, string instruction_name> {
srcvalue, true_mask, VLOpFrag)), true_mask, VLOpFrag)),
(!cast<Instruction>(instruction_name#"_WX_"#vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
- wti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>;
+ wti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW, TA_MA)>;
def : Pat<(vti.Vector (riscv_trunc_vector_vl
(wti.Vector (op wti.RegClass:$rs1, (SplatPat_uimm5 uimm5:$rs2),
srcvalue, true_mask, VLOpFrag)), true_mask, VLOpFrag)),
(!cast<Instruction>(instruction_name#"_WI_"#vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
- wti.RegClass:$rs1, uimm5:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>;
+ wti.RegClass:$rs1, uimm5:$rs2, GPR:$vl, vti.Log2SEW, TA_MA)>;
}
}
}
@@ -2039,6 +1980,56 @@ multiclass VPatWidenFPMulAccVL_VV_VF_RM<SDNode vop, string instruction_name> {
}
}
+multiclass VPatSlideVL_VX_VI<SDNode vop, string instruction_name> {
+ foreach vti = AllVectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(vti.Vector (vop (vti.Vector vti.RegClass:$rd),
+ (vti.Vector vti.RegClass:$rs1),
+ uimm5:$rs2, (vti.Mask V0),
+ VLOpFrag, (XLenVT timm:$policy))),
+ (!cast<Instruction>(instruction_name#"_VI_"#vti.LMul.MX#"_MASK")
+ vti.RegClass:$rd, vti.RegClass:$rs1, uimm5:$rs2,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW,
+ (XLenVT timm:$policy))>;
+
+ def : Pat<(vti.Vector (vop (vti.Vector vti.RegClass:$rd),
+ (vti.Vector vti.RegClass:$rs1),
+ GPR:$rs2, (vti.Mask V0),
+ VLOpFrag, (XLenVT timm:$policy))),
+ (!cast<Instruction>(instruction_name#"_VX_"#vti.LMul.MX#"_MASK")
+ vti.RegClass:$rd, vti.RegClass:$rs1, GPR:$rs2,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW,
+ (XLenVT timm:$policy))>;
+ }
+ }
+}
+
+multiclass VPatSlide1VL_VX<SDNode vop, string instruction_name> {
+ foreach vti = AllIntegerVectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(vti.Vector (vop (vti.Vector vti.RegClass:$rs3),
+ (vti.Vector vti.RegClass:$rs1),
+ GPR:$rs2, (vti.Mask V0), VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_VX_"#vti.LMul.MX#"_MASK")
+ vti.RegClass:$rs3, vti.RegClass:$rs1, GPR:$rs2,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>;
+ }
+ }
+}
+
+multiclass VPatSlide1VL_VF<SDNode vop, string instruction_name> {
+ foreach vti = AllFloatVectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(vti.Vector (vop (vti.Vector vti.RegClass:$rs3),
+ (vti.Vector vti.RegClass:$rs1),
+ vti.Scalar:$rs2, (vti.Mask V0), VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_MASK")
+ vti.RegClass:$rs3, vti.RegClass:$rs1, vti.Scalar:$rs2,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>;
+ }
+ }
+}
+
//===----------------------------------------------------------------------===//
// Patterns.
//===----------------------------------------------------------------------===//
@@ -2132,7 +2123,7 @@ foreach vti = AllIntegerVectors in {
srcvalue, (vti.Mask true_mask), VLOpFrag),
(!cast<Instruction>("PseudoVADD_VV_"# vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
- vti.RegClass:$rs1, vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW, TU_MU)>;
+ vti.RegClass:$rs1, vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW, TA_MA)>;
}
// 11.7. Vector Narrowing Integer Right Shift Instructions
@@ -2216,7 +2207,7 @@ defm : VPatBinaryVL_VV_VX<riscv_mulhu_vl, "PseudoVMULHU", IntegerVectorsExceptI6
// vsmul.vv and vsmul.vx are not included in EEW=64 in Zve64*.
let Predicates = [HasVInstructionsFullMultiply] in {
defm : VPatBinaryVL_VV_VX<riscv_mulhs_vl, "PseudoVMULH", I64IntegerVectors>;
- defm : VPatBinaryVL_VV_VX<riscv_mulhu_vl, "PseudoVMULHU", I64IntegerVectors>;
+ defm : VPatBinaryVL_VV_VX<riscv_mulhu_vl, "PseudoVMULHU", I64IntegerVectors>;
}
// 11.11. Vector Integer Divide Instructions
@@ -2373,8 +2364,8 @@ defm : VPatWidenFPMulAccVL_VV_VF_RM<riscv_vfwmsub_vl, "PseudoVFWMSAC">;
defm : VPatWidenFPMulAccVL_VV_VF_RM<riscv_vfwnmsub_vl, "PseudoVFWNMSAC">;
// 13.11. Vector Floating-Point MIN/MAX Instructions
-defm : VPatBinaryFPVL_VV_VF<riscv_fminnum_vl, "PseudoVFMIN">;
-defm : VPatBinaryFPVL_VV_VF<riscv_fmaxnum_vl, "PseudoVFMAX">;
+defm : VPatBinaryFPVL_VV_VF<riscv_vfmin_vl, "PseudoVFMIN">;
+defm : VPatBinaryFPVL_VV_VF<riscv_vfmax_vl, "PseudoVFMAX">;
// 13.13. Vector Floating-Point Compare Instructions
defm : VPatFPSetCCVL_VV_VF_FV<any_riscv_fsetcc_vl, SETEQ,
@@ -2441,7 +2432,7 @@ foreach vti = AllFloatVectors in {
VLOpFrag),
(!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
- vti.RegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>;
+ vti.RegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TA_MA)>;
def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1),
(SplatFPOp vti.ScalarRegClass:$rs2),
@@ -2459,12 +2450,13 @@ foreach vti = AllFloatVectors in {
(!cast<Instruction>("PseudoVFROUND_NOEXCEPT_V_" # vti.LMul.MX #"_MASK")
(vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1,
(vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>;
-
+
// 14.14. Vector Floating-Point Classify Instruction
- def : Pat<(riscv_fclass_vl (vti.Vector vti.RegClass:$rs2),
- (vti.Mask true_mask), VLOpFrag),
- (!cast<Instruction>("PseudoVFCLASS_V_"# vti.LMul.MX)
- (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>;
+ def : Pat<(riscv_fclass_vl (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask V0), VLOpFrag),
+ (!cast<Instruction>("PseudoVFCLASS_V_"# vti.LMul.MX #"_MASK")
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>;
}
}
@@ -2472,7 +2464,8 @@ foreach fvti = AllFloatVectors in {
// Floating-point vselects:
// 11.15. Vector Integer Merge Instructions
// 13.15. Vector Floating-Point Merge Instruction
- let Predicates = GetVTypePredicates<fvti>.Predicates in {
+ defvar ivti = GetIntVTypeInfo<fvti>.Vti;
+ let Predicates = GetVTypePredicates<ivti>.Predicates in {
def : Pat<(fvti.Vector (riscv_vselect_vl (fvti.Mask V0),
fvti.RegClass:$rs1,
fvti.RegClass:$rs2,
@@ -2483,16 +2476,6 @@ foreach fvti = AllFloatVectors in {
GPR:$vl, fvti.Log2SEW)>;
def : Pat<(fvti.Vector (riscv_vselect_vl (fvti.Mask V0),
- (SplatFPOp fvti.ScalarRegClass:$rs1),
- fvti.RegClass:$rs2,
- VLOpFrag)),
- (!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX)
- (fvti.Vector (IMPLICIT_DEF)),
- fvti.RegClass:$rs2,
- (fvti.Scalar fvti.ScalarRegClass:$rs1),
- (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>;
-
- def : Pat<(fvti.Vector (riscv_vselect_vl (fvti.Mask V0),
(SplatFPOp (SelectFPImm (XLenVT GPR:$imm))),
fvti.RegClass:$rs2,
VLOpFrag)),
@@ -2519,21 +2502,33 @@ foreach fvti = AllFloatVectors in {
GPR:$vl, fvti.Log2SEW)>;
def : Pat<(fvti.Vector (riscv_vp_merge_vl (fvti.Mask V0),
- (SplatFPOp fvti.ScalarRegClass:$rs1),
+ (SplatFPOp (fvti.Scalar fpimm0)),
fvti.RegClass:$rs2,
VLOpFrag)),
+ (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
+ fvti.RegClass:$rs2, fvti.RegClass:$rs2, 0, (fvti.Mask V0),
+ GPR:$vl, fvti.Log2SEW)>;
+ }
+
+ let Predicates = GetVTypePredicates<fvti>.Predicates in {
+ def : Pat<(fvti.Vector (riscv_vselect_vl (fvti.Mask V0),
+ (SplatFPOp fvti.ScalarRegClass:$rs1),
+ fvti.RegClass:$rs2,
+ VLOpFrag)),
(!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX)
- fvti.RegClass:$rs2, fvti.RegClass:$rs2,
+ (fvti.Vector (IMPLICIT_DEF)),
+ fvti.RegClass:$rs2,
(fvti.Scalar fvti.ScalarRegClass:$rs1),
(fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>;
def : Pat<(fvti.Vector (riscv_vp_merge_vl (fvti.Mask V0),
- (SplatFPOp (fvti.Scalar fpimm0)),
+ (SplatFPOp fvti.ScalarRegClass:$rs1),
fvti.RegClass:$rs2,
VLOpFrag)),
- (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
- fvti.RegClass:$rs2, fvti.RegClass:$rs2, 0, (fvti.Mask V0),
- GPR:$vl, fvti.Log2SEW)>;
+ (!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX)
+ fvti.RegClass:$rs2, fvti.RegClass:$rs2,
+ (fvti.Scalar fvti.ScalarRegClass:$rs1),
+ (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>;
// 13.16. Vector Floating-Point Move Instruction
// If we're splatting fpimm0, use vmv.v.x vd, x0.
@@ -2585,8 +2580,9 @@ defm : VPatWConvertI2FPVL_V<any_riscv_sint_to_fp_vl, "PseudoVFWCVT_F_X_V">;
foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar fwti = fvtiToFWti.Wti;
- let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
- GetVTypePredicates<fwti>.Predicates) in
+ let Predicates = !if(!eq(fvti.Scalar, f16), [HasVInstructionsF16Minimal],
+ !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates)) in
def : Pat<(fwti.Vector (any_riscv_fpextend_vl
(fvti.Vector fvti.RegClass:$rs1),
(fvti.Mask V0),
@@ -2615,8 +2611,10 @@ defm : VPatNConvertI2FP_RM_VL_W<riscv_vfcvt_rm_f_x_vl, "PseudoVFNCVT_RM_F_X_W">;
foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar fwti = fvtiToFWti.Wti;
- let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
- GetVTypePredicates<fwti>.Predicates) in {
+ // Define vfwcvt.f.f.v for f16 when Zvfhmin is enable.
+ let Predicates = !if(!eq(fvti.Scalar, f16), [HasVInstructionsF16Minimal],
+ !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates)) in {
def : Pat<(fvti.Vector (any_riscv_fpround_vl
(fwti.Vector fwti.RegClass:$rs1),
(fwti.Mask V0), VLOpFrag)),
@@ -2628,6 +2626,8 @@ foreach fvtiToFWti = AllWidenableFloatVectors in {
FRM_DYN,
GPR:$vl, fvti.Log2SEW, TA_MA)>;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
def : Pat<(fvti.Vector (any_riscv_fncvt_rod_vl
(fwti.Vector fwti.RegClass:$rs1),
(fwti.Mask V0), VLOpFrag)),
@@ -2766,7 +2766,7 @@ foreach vti = AllIntegerVectors in {
(!cast<Instruction>("PseudoVMV_S_X_"#vti.LMul.MX)
vti.RegClass:$merge,
(vti.Scalar vti.ScalarRegClass:$rs1), GPR:$vl, vti.Log2SEW)>;
-
+
def : Pat<(vti.Vector (riscv_vrgather_vv_vl vti.RegClass:$rs2,
vti.RegClass:$rs1,
vti.RegClass:$merge,
@@ -2922,70 +2922,12 @@ foreach vti = AllIntegerVectors in {
(!cast<Instruction>("PseudoVID_V_"#vti.LMul.MX#"_MASK")
(vti.Vector (IMPLICIT_DEF)), (vti.Mask V0), GPR:$vl, vti.Log2SEW,
TAIL_AGNOSTIC)>;
- def : Pat<(vti.Vector (riscv_slide1up_vl (vti.Vector vti.RegClass:$rd),
- (vti.Vector vti.RegClass:$rs1),
- GPR:$rs2, (vti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVSLIDE1UP_VX_"#vti.LMul.MX)
- vti.RegClass:$rd, vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>;
- def : Pat<(vti.Vector (riscv_slide1down_vl (vti.Vector vti.RegClass:$rd),
- (vti.Vector vti.RegClass:$rs1),
- GPR:$rs2, (vti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVSLIDE1DOWN_VX_"#vti.LMul.MX)
- vti.RegClass:$rd, vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>;
}
}
-foreach vti = AllFloatVectors in {
- let Predicates = GetVTypePredicates<vti>.Predicates in {
- def : Pat<(vti.Vector (riscv_fslide1up_vl (vti.Vector vti.RegClass:$rd),
- (vti.Vector vti.RegClass:$rs1),
- vti.Scalar:$rs2, (vti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVFSLIDE1UP_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
- vti.RegClass:$rd, vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>;
- def : Pat<(vti.Vector (riscv_fslide1down_vl (vti.Vector vti.RegClass:$rd),
- (vti.Vector vti.RegClass:$rs1),
- vti.Scalar:$rs2, (vti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVFSLIDE1DOWN_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
- vti.RegClass:$rd, vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>;
- }
-}
-
-foreach vti = AllVectors in {
- let Predicates = GetVTypePredicates<vti>.Predicates in {
- def : Pat<(vti.Vector (riscv_slideup_vl (vti.Vector vti.RegClass:$rs3),
- (vti.Vector vti.RegClass:$rs1),
- uimm5:$rs2, (vti.Mask true_mask),
- VLOpFrag, (XLenVT timm:$policy))),
- (!cast<Instruction>("PseudoVSLIDEUP_VI_"#vti.LMul.MX)
- vti.RegClass:$rs3, vti.RegClass:$rs1, uimm5:$rs2,
- GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>;
-
- def : Pat<(vti.Vector (riscv_slideup_vl (vti.Vector vti.RegClass:$rs3),
- (vti.Vector vti.RegClass:$rs1),
- GPR:$rs2, (vti.Mask true_mask),
- VLOpFrag, (XLenVT timm:$policy))),
- (!cast<Instruction>("PseudoVSLIDEUP_VX_"#vti.LMul.MX)
- vti.RegClass:$rs3, vti.RegClass:$rs1, GPR:$rs2,
- GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>;
-
- def : Pat<(vti.Vector (riscv_slidedown_vl (vti.Vector vti.RegClass:$rs3),
- (vti.Vector vti.RegClass:$rs1),
- uimm5:$rs2, (vti.Mask true_mask),
- VLOpFrag, (XLenVT timm:$policy))),
- (!cast<Instruction>("PseudoVSLIDEDOWN_VI_"#vti.LMul.MX)
- vti.RegClass:$rs3, vti.RegClass:$rs1, uimm5:$rs2,
- GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>;
-
- def : Pat<(vti.Vector (riscv_slidedown_vl (vti.Vector vti.RegClass:$rs3),
- (vti.Vector vti.RegClass:$rs1),
- GPR:$rs2, (vti.Mask true_mask),
- VLOpFrag, (XLenVT timm:$policy))),
- (!cast<Instruction>("PseudoVSLIDEDOWN_VX_"#vti.LMul.MX)
- vti.RegClass:$rs3, vti.RegClass:$rs1, GPR:$rs2,
- GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>;
- }
-}
+defm : VPatSlideVL_VX_VI<riscv_slideup_vl, "PseudoVSLIDEUP">;
+defm : VPatSlideVL_VX_VI<riscv_slidedown_vl, "PseudoVSLIDEDOWN">;
+defm : VPatSlide1VL_VX<riscv_slide1up_vl, "PseudoVSLIDE1UP">;
+defm : VPatSlide1VL_VF<riscv_fslide1up_vl, "PseudoVFSLIDE1UP">;
+defm : VPatSlide1VL_VX<riscv_slide1down_vl, "PseudoVSLIDE1DOWN">;
+defm : VPatSlide1VL_VF<riscv_fslide1down_vl, "PseudoVFSLIDE1DOWN">;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
index 4ba052b25e42..924e91e15c34 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
@@ -13,10 +13,12 @@
let DecoderNamespace = "XCVbitmanip" in {
class CVInstBitManipRII<bits<2> funct2, bits<3> funct3, dag outs, dag ins,
string opcodestr, string argstr>
- : RVInstI<funct3, OPC_CUSTOM_2, outs, ins, opcodestr, argstr> {
+ : RVInstIBase<funct3, OPC_CUSTOM_2, outs, ins, opcodestr, argstr> {
bits<5> is3;
bits<5> is2;
- let imm12 = {funct2, is3, is2};
+ let Inst{31-30} = funct2;
+ let Inst{29-25} = is3;
+ let Inst{24-20} = is2;
}
class CVBitManipRII<bits<2> funct2, bits<3> funct3, string opcodestr,
@@ -31,7 +33,7 @@ let DecoderNamespace = "XCVbitmanip" in {
class CVBitManipR<bits<7> funct7, string opcodestr>
: RVInstR<funct7, 0b011, OPC_CUSTOM_1, (outs GPR:$rd),
- (ins GPR:$rs1, GPR:$rs2), opcodestr, "$rd, $rs1"> {
+ (ins GPR:$rs1), opcodestr, "$rd, $rs1"> {
let rs2 = 0b00000;
}
}
@@ -67,125 +69,80 @@ let Predicates = [HasVendorXCVbitmanip, IsRV32],
def CV_CNT : CVBitManipR<0b0100100, "cv.cnt">;
}
-class CVInstMac<bits<7> funct7, bits<3> funct3, dag outs, dag ins,
- string opcodestr, string argstr, list<dag> pattern>
- : RVInst<outs, ins, opcodestr, argstr, pattern, InstFormatOther> {
- bits<5> rs2;
- bits<5> rs1;
- bits<5> rd;
-
- let Inst{31-25} = funct7;
- let Inst{24-20} = rs2;
- let Inst{19-15} = rs1;
- let Inst{14-12} = funct3;
- let Inst{11-7} = rd;
- let Inst{6-0} = OPC_CUSTOM_1.Value;
+class CVInstMac<bits<7> funct7, bits<3> funct3, string opcodestr>
+ : RVInstR<funct7, funct3, OPC_CUSTOM_1,
+ (outs GPR:$rd_wb), (ins GPR:$rd, GPR:$rs1, GPR:$rs2),
+ opcodestr, "$rd, $rs1, $rs2"> {
let DecoderNamespace = "XCVmac";
}
-class CVInstMac16I<bits<2> funct2, bits<3> funct3, dag outs, dag ins,
- string opcodestr, string argstr, list<dag> pattern>
- : RVInst<outs, ins, opcodestr, argstr, pattern, InstFormatOther> {
+class CVInstMacMulN<bits<2> funct2, bits<3> funct3, dag outs, dag ins,
+ string opcodestr>
+ : RVInstRBase<funct3, OPC_CUSTOM_2, outs, ins, opcodestr,
+ "$rd, $rs1, $rs2, $imm5"> {
bits<5> imm5;
- bits<5> rs2;
- bits<5> rs1;
- bits<5> rd;
let Inst{31-30} = funct2;
let Inst{29-25} = imm5;
- let Inst{24-20} = rs2;
- let Inst{19-15} = rs1;
- let Inst{14-12} = funct3;
- let Inst{11-7} = rd;
- let Inst{6-0} = OPC_CUSTOM_2.Value;
let DecoderNamespace = "XCVmac";
}
+class CVInstMacN<bits<2> funct2, bits<3> funct3, string opcodestr>
+ : CVInstMacMulN<funct2, funct3, (outs GPR:$rd_wb),
+ (ins GPR:$rd, GPR:$rs1, GPR:$rs2, uimm5:$imm5), opcodestr>;
+
+class CVInstMulN<bits<2> funct2, bits<3> funct3, string opcodestr>
+ : CVInstMacMulN<funct2, funct3, (outs GPR:$rd),
+ (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5), opcodestr>;
+
let Predicates = [HasVendorXCVmac, IsRV32], hasSideEffects = 0, mayLoad = 0,
mayStore = 0, Constraints = "$rd = $rd_wb" in {
// 32x32 bit macs
- def CV_MAC : CVInstMac<0b1001000, 0b011, (outs GPR:$rd_wb),
- (ins GPR:$rd, GPR:$rs1, GPR:$rs2),
- "cv.mac", "$rd, $rs1, $rs2", []>,
+ def CV_MAC : CVInstMac<0b1001000, 0b011, "cv.mac">,
Sched<[]>;
- def CV_MSU : CVInstMac<0b1001001, 0b011, (outs GPR:$rd_wb),
- (ins GPR:$rd, GPR:$rs1, GPR:$rs2),
- "cv.msu", "$rd, $rs1, $rs2", []>,
+ def CV_MSU : CVInstMac<0b1001001, 0b011, "cv.msu">,
Sched<[]>;
// Signed 16x16 bit macs with imm
- def CV_MACSN : CVInstMac16I<0b00, 0b110, (outs GPR:$rd_wb),
- (ins GPR:$rd, GPR:$rs1, GPR:$rs2, uimm5:$imm5),
- "cv.macsn", "$rd, $rs1, $rs2, $imm5", []>,
+ def CV_MACSN : CVInstMacN<0b00, 0b110, "cv.macsn">,
Sched<[]>;
- def CV_MACHHSN : CVInstMac16I<0b01, 0b110, (outs GPR:$rd_wb),
- (ins GPR:$rd, GPR:$rs1, GPR:$rs2, uimm5:$imm5),
- "cv.machhsn", "$rd, $rs1, $rs2, $imm5", []>,
+ def CV_MACHHSN : CVInstMacN<0b01, 0b110, "cv.machhsn">,
Sched<[]>;
- def CV_MACSRN : CVInstMac16I<0b10, 0b110, (outs GPR:$rd_wb),
- (ins GPR:$rd, GPR:$rs1, GPR:$rs2, uimm5:$imm5),
- "cv.macsrn", "$rd, $rs1, $rs2, $imm5", []>,
+ def CV_MACSRN : CVInstMacN<0b10, 0b110, "cv.macsrn">,
Sched<[]>;
- def CV_MACHHSRN : CVInstMac16I<0b11, 0b110, (outs GPR:$rd_wb),
- (ins GPR:$rd, GPR:$rs1, GPR:$rs2, uimm5:$imm5),
- "cv.machhsrn", "$rd, $rs1, $rs2, $imm5", []>,
+ def CV_MACHHSRN : CVInstMacN<0b11, 0b110, "cv.machhsrn">,
Sched<[]>;
// Unsigned 16x16 bit macs with imm
- def CV_MACUN : CVInstMac16I<0b00, 0b111, (outs GPR:$rd_wb),
- (ins GPR:$rd, GPR:$rs1, GPR:$rs2, uimm5:$imm5),
- "cv.macun", "$rd, $rs1, $rs2, $imm5", []>,
+ def CV_MACUN : CVInstMacN<0b00, 0b111, "cv.macun">,
Sched<[]>;
- def CV_MACHHUN : CVInstMac16I<0b01, 0b111, (outs GPR:$rd_wb),
- (ins GPR:$rd, GPR:$rs1, GPR:$rs2, uimm5:$imm5),
- "cv.machhun", "$rd, $rs1, $rs2, $imm5", []>,
+ def CV_MACHHUN : CVInstMacN<0b01, 0b111, "cv.machhun">,
Sched<[]>;
- def CV_MACURN : CVInstMac16I<0b10, 0b111, (outs GPR:$rd_wb),
- (ins GPR:$rd, GPR:$rs1, GPR:$rs2, uimm5:$imm5),
- "cv.macurn", "$rd, $rs1, $rs2, $imm5", []>,
+ def CV_MACURN : CVInstMacN<0b10, 0b111, "cv.macurn">,
Sched<[]>;
- def CV_MACHHURN : CVInstMac16I<0b11, 0b111, (outs GPR:$rd_wb),
- (ins GPR:$rd, GPR:$rs1, GPR:$rs2, uimm5:$imm5),
- "cv.machhurn", "$rd, $rs1, $rs2, $imm5", []>,
+ def CV_MACHHURN : CVInstMacN<0b11, 0b111, "cv.machhurn">,
Sched<[]>;
} // Predicates = [HasVendorXCVmac, IsRV32], hasSideEffects = 0, mayLoad = 0...
let Predicates = [HasVendorXCVmac, IsRV32], hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
// Signed 16x16 bit muls with imm
- def CV_MULSN : CVInstMac16I<0b00, 0b100, (outs GPR:$rd),
- (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5),
- "cv.mulsn", "$rd, $rs1, $rs2, $imm5", []>,
+ def CV_MULSN : CVInstMulN<0b00, 0b100, "cv.mulsn">,
Sched<[]>;
- def CV_MULHHSN : CVInstMac16I<0b01, 0b100, (outs GPR:$rd),
- (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5),
- "cv.mulhhsn", "$rd, $rs1, $rs2, $imm5", []>,
+ def CV_MULHHSN : CVInstMulN<0b01, 0b100, "cv.mulhhsn">,
Sched<[]>;
- def CV_MULSRN : CVInstMac16I<0b10, 0b100, (outs GPR:$rd),
- (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5),
- "cv.mulsrn", "$rd, $rs1, $rs2, $imm5", []>,
+ def CV_MULSRN : CVInstMulN<0b10, 0b100, "cv.mulsrn">,
Sched<[]>;
- def CV_MULHHSRN : CVInstMac16I<0b11, 0b100, (outs GPR:$rd),
- (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5),
- "cv.mulhhsrn", "$rd, $rs1, $rs2, $imm5", []>,
+ def CV_MULHHSRN : CVInstMulN<0b11, 0b100, "cv.mulhhsrn">,
Sched<[]>;
-
// Unsigned 16x16 bit muls with imm
- def CV_MULUN : CVInstMac16I<0b00, 0b101, (outs GPR:$rd),
- (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5),
- "cv.mulun", "$rd, $rs1, $rs2, $imm5", []>,
+ def CV_MULUN : CVInstMulN<0b00, 0b101, "cv.mulun">,
Sched<[]>;
- def CV_MULHHUN : CVInstMac16I<0b01, 0b101, (outs GPR:$rd),
- (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5),
- "cv.mulhhun", "$rd, $rs1, $rs2, $imm5", []>,
+ def CV_MULHHUN : CVInstMulN<0b01, 0b101, "cv.mulhhun">,
Sched<[]>;
- def CV_MULURN : CVInstMac16I<0b10, 0b101, (outs GPR:$rd),
- (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5),
- "cv.mulurn", "$rd, $rs1, $rs2, $imm5", []>,
+ def CV_MULURN : CVInstMulN<0b10, 0b101, "cv.mulurn">,
Sched<[]>;
- def CV_MULHHURN : CVInstMac16I<0b11, 0b101, (outs GPR:$rd),
- (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5),
- "cv.mulhhurn", "$rd, $rs1, $rs2, $imm5", []>,
+ def CV_MULHHURN : CVInstMulN<0b11, 0b101, "cv.mulhhurn">,
Sched<[]>;
} // Predicates = [HasVendorXCVmac, IsRV32], hasSideEffects = 0, mayLoad = 0...
@@ -203,3 +160,547 @@ let Predicates = [HasVendorXCVmac, IsRV32] in {
def : InstAlias<"cv.mulhhu $rd1, $rs1, $rs2",
(CV_MULHHUN GPR:$rd1, GPR:$rs1, GPR:$rs2, 0)>;
} // Predicates = [HasVendorXCVmac, IsRV32]
+
+let DecoderNamespace = "XCValu" in {
+ class CVInstAluRRI<bits<2> funct2, bits<3> funct3, string opcodestr>
+ : RVInstRBase<funct3, OPC_CUSTOM_2, (outs GPR:$rd),
+ (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5), opcodestr,
+ "$rd, $rs1, $rs2, $imm5"> {
+ bits<5> imm5;
+
+ let Inst{31-30} = funct2;
+ let Inst{29-25} = imm5;
+ }
+
+ class CVInstAluRR<bits<7> funct7, bits<3> funct3, string opcodestr>
+ : RVInstR<funct7, funct3, OPC_CUSTOM_1, (outs GPR:$rd),
+ (ins GPR:$rs1, GPR:$rs2), opcodestr, "$rd, $rs1, $rs2">;
+
+ class CVInstAluRRNR<bits<7> funct7, bits<3> funct3, string opcodestr>
+ : RVInstR<funct7, funct3, OPC_CUSTOM_1, (outs GPR:$rd_wb),
+ (ins GPR:$rd, GPR:$rs1, GPR:$rs2), opcodestr, "$rd, $rs1, $rs2">;
+
+ class CVInstAluRI<bits<7> funct7, bits<3> funct3, string opcodestr>
+ : RVInstIBase<funct3, OPC_CUSTOM_1, (outs GPR:$rd),
+ (ins GPR:$rs1, uimm5:$imm5), opcodestr,
+ "$rd, $rs1, $imm5"> {
+ bits<5> imm5;
+
+ let Inst{31-25} = funct7;
+ let Inst{24-20} = imm5;
+ }
+
+ class CVInstAluR<bits<7> funct7, bits<3> funct3, string opcodestr>
+ : RVInstR<funct7, funct3, OPC_CUSTOM_1, (outs GPR:$rd), (ins GPR:$rs1),
+ opcodestr, "$rd, $rs1"> {
+ let rs2 = 0b00000;
+ }
+
+} // DecoderNamespace = "XCValu"
+
+let Predicates = [HasVendorXCValu],
+ hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+ // General ALU Operations
+ def CV_ABS : CVInstAluR<0b0101000, 0b011, "cv.abs">,
+ Sched<[]>;
+ def CV_SLET : CVInstAluRR<0b0101001, 0b011, "cv.slet">,
+ Sched<[]>;
+ def CV_SLETU : CVInstAluRR<0b0101010, 0b011, "cv.sletu">,
+ Sched<[]>;
+ def CV_MIN : CVInstAluRR<0b0101011, 0b011, "cv.min">,
+ Sched<[]>;
+ def CV_MINU : CVInstAluRR<0b0101100, 0b011, "cv.minu">,
+ Sched<[]>;
+ def CV_MAX : CVInstAluRR<0b0101101, 0b011, "cv.max">,
+ Sched<[]>;
+ def CV_MAXU : CVInstAluRR<0b0101110, 0b011, "cv.maxu">,
+ Sched<[]>;
+ def CV_EXTHS : CVInstAluR<0b0110000, 0b011, "cv.exths">,
+ Sched<[]>;
+ def CV_EXTHZ : CVInstAluR<0b0110001, 0b011, "cv.exthz">,
+ Sched<[]>;
+ def CV_EXTBS : CVInstAluR<0b0110010, 0b011, "cv.extbs">,
+ Sched<[]>;
+ def CV_EXTBZ : CVInstAluR<0b0110011, 0b011, "cv.extbz">,
+ Sched<[]>;
+
+ def CV_CLIP : CVInstAluRI<0b0111000, 0b011, "cv.clip">,
+ Sched<[]>;
+ def CV_CLIPU : CVInstAluRI<0b0111001, 0b011, "cv.clipu">,
+ Sched<[]>;
+ def CV_CLIPR : CVInstAluRR<0b0111010, 0b011, "cv.clipr">,
+ Sched<[]>;
+ def CV_CLIPUR : CVInstAluRR<0b0111011, 0b011, "cv.clipur">,
+ Sched<[]>;
+
+ def CV_ADDN : CVInstAluRRI<0b00, 0b010, "cv.addn">,
+ Sched<[]>;
+ def CV_ADDUN : CVInstAluRRI<0b01, 0b010, "cv.addun">,
+ Sched<[]>;
+ def CV_ADDRN : CVInstAluRRI<0b10, 0b010, "cv.addrn">,
+ Sched<[]>;
+ def CV_ADDURN : CVInstAluRRI<0b11, 0b010, "cv.addurn">,
+ Sched<[]>;
+ def CV_SUBN : CVInstAluRRI<0b00, 0b011, "cv.subn">,
+ Sched<[]>;
+ def CV_SUBUN : CVInstAluRRI<0b01, 0b011, "cv.subun">,
+ Sched<[]>;
+ def CV_SUBRN : CVInstAluRRI<0b10, 0b011, "cv.subrn">,
+ Sched<[]>;
+ def CV_SUBURN : CVInstAluRRI<0b11, 0b011, "cv.suburn">,
+ Sched<[]>;
+} // Predicates = [HasVendorXCValu],
+ // hasSideEffects = 0, mayLoad = 0, mayStore = 0
+
+let Predicates = [HasVendorXCValu],
+ hasSideEffects = 0, mayLoad = 0, mayStore = 0,
+ Constraints = "$rd = $rd_wb" in {
+ def CV_ADDNR : CVInstAluRRNR<0b1000000, 0b011, "cv.addnr">,
+ Sched<[]>;
+ def CV_ADDUNR : CVInstAluRRNR<0b1000001, 0b011, "cv.addunr">,
+ Sched<[]>;
+ def CV_ADDRNR : CVInstAluRRNR<0b1000010, 0b011, "cv.addrnr">,
+ Sched<[]>;
+ def CV_ADDURNR : CVInstAluRRNR<0b1000011, 0b011, "cv.addurnr">,
+ Sched<[]>;
+ def CV_SUBNR : CVInstAluRRNR<0b1000100, 0b011, "cv.subnr">,
+ Sched<[]>;
+ def CV_SUBUNR : CVInstAluRRNR<0b1000101, 0b011, "cv.subunr">,
+ Sched<[]>;
+ def CV_SUBRNR : CVInstAluRRNR<0b1000110, 0b011, "cv.subrnr">,
+ Sched<[]>;
+ def CV_SUBURNR : CVInstAluRRNR<0b1000111, 0b011, "cv.suburnr">,
+ Sched<[]>;
+
+} // Predicates = [HasVendorXCValu],
+ // hasSideEffects = 0, mayLoad = 0, mayStore = 0,
+ // Constraints = "$rd = $rd_wb"
+
+
+class CVInstSIMDRR<bits<5> funct5, bit F, bit funct1, bits<3> funct3,
+ RISCVOpcode opcode, dag outs,
+ dag ins, string opcodestr, string argstr>
+ : RVInstRBase<funct3, opcode, outs, ins, opcodestr, argstr> {
+ let Inst{31-27} = funct5;
+ let Inst{26} = F;
+ let Inst{25} = funct1;
+ let DecoderNamespace = "XCVsimd";
+}
+
+class CVInstSIMDRI<bits<5> funct5, bit F, bits<3> funct3, RISCVOpcode opcode,
+ dag outs, dag ins, string opcodestr, string argstr>
+ : RVInstIBase<funct3, opcode, outs, ins, opcodestr, argstr> {
+ bits<6> imm6;
+
+ let Inst{31-27} = funct5;
+ let Inst{26} = F;
+ let Inst{25} = imm6{0}; // funct1 unused
+ let Inst{24-20} = imm6{5-1};
+ let DecoderNamespace = "XCVsimd";
+}
+
+class CVSIMDRR<bits<5> funct5, bit F, bit funct1, bits<3> funct3,
+ string opcodestr>
+ : CVInstSIMDRR<funct5, F, funct1, funct3, OPC_CUSTOM_3, (outs GPR:$rd),
+ (ins GPR:$rs1, GPR:$rs2), opcodestr, "$rd, $rs1, $rs2">;
+
+class CVSIMDRRWb<bits<5> funct5, bit F, bit funct1, bits<3> funct3,
+ string opcodestr>
+ : CVInstSIMDRR<funct5, F, funct1, funct3, OPC_CUSTOM_3, (outs GPR:$rd_wb),
+ (ins GPR:$rd, GPR:$rs1, GPR:$rs2), opcodestr, "$rd, $rs1, $rs2"> {
+ let Constraints = "$rd = $rd_wb";
+}
+
+class CVSIMDRI<bits<5> funct5, bit F, bits<3> funct3, string opcodestr>
+ : CVInstSIMDRI<funct5, F, funct3, OPC_CUSTOM_3, (outs GPR:$rd),
+ (ins GPR:$rs1, simm6:$imm6), opcodestr, "$rd, $rs1, $imm6">;
+
+class CVSIMDRIWb<bits<5> funct5, bit F, bits<3> funct3, string opcodestr>
+ : CVInstSIMDRI<funct5, F, funct3, OPC_CUSTOM_3,
+ (outs GPR:$rd_wb), (ins GPR:$rd, GPR:$rs1, simm6:$imm6),
+ opcodestr, "$rd, $rs1, $imm6"> {
+ let Constraints = "$rd = $rd_wb";
+}
+
+class CVSIMDRU<bits<5> funct5, bit F, bits<3> funct3, string opcodestr,
+ Operand immtype = uimm6>
+ : CVInstSIMDRI<funct5, F, funct3, OPC_CUSTOM_3,
+ (outs GPR:$rd), (ins GPR:$rs1, immtype:$imm6),
+ opcodestr, "$rd, $rs1, $imm6">;
+
+class CVSIMDRUWb<bits<5> funct5, bit F, bits<3> funct3, string opcodestr>
+ : CVInstSIMDRI<funct5, F, funct3, OPC_CUSTOM_3,
+ (outs GPR:$rd_wb),
+ (ins GPR:$rd, GPR:$rs1, uimm6:$imm6),
+ opcodestr, "$rd, $rs1, $imm6"> {
+ let Constraints = "$rd = $rd_wb";
+}
+
+class CVSIMDR<bits<5> funct5, bit F, bit funct1, bits<3> funct3,
+ string opcodestr>
+ : CVInstSIMDRR<funct5, F, funct1, funct3, OPC_CUSTOM_3, (outs GPR:$rd),
+ (ins GPR:$rs1), opcodestr, "$rd, $rs1"> {
+ let rs2 = 0b00000;
+}
+
+multiclass CVSIMDBinarySigned<bits<5> funct5, bit F, bit funct1, string mnemonic> {
+ def CV_ # NAME # _H : CVSIMDRR<funct5, F, funct1, 0b000, "cv." # mnemonic # ".h">;
+ def CV_ # NAME # _B : CVSIMDRR<funct5, F, funct1, 0b001, "cv." # mnemonic # ".b">;
+ def CV_ # NAME # _SC_H : CVSIMDRR<funct5, F, funct1, 0b100, "cv." # mnemonic # ".sc.h">;
+ def CV_ # NAME # _SC_B : CVSIMDRR<funct5, F, funct1, 0b101, "cv." # mnemonic # ".sc.b">;
+ def CV_ # NAME # _SCI_H : CVSIMDRI<funct5, F, 0b110, "cv." # mnemonic # ".sci.h">;
+ def CV_ # NAME # _SCI_B : CVSIMDRI<funct5, F, 0b111, "cv." # mnemonic # ".sci.b">;
+}
+
+multiclass CVSIMDBinaryUnsigned<bits<5> funct5, bit F, bit funct1, string mnemonic> {
+ def CV_ # NAME # _H : CVSIMDRR<funct5, F, funct1, 0b000, "cv." # mnemonic # ".h">;
+ def CV_ # NAME # _B : CVSIMDRR<funct5, F, funct1, 0b001, "cv." # mnemonic # ".b">;
+ def CV_ # NAME # _SC_H : CVSIMDRR<funct5, F, funct1, 0b100, "cv." # mnemonic # ".sc.h">;
+ def CV_ # NAME # _SC_B : CVSIMDRR<funct5, F, funct1, 0b101, "cv." # mnemonic # ".sc.b">;
+ def CV_ # NAME # _SCI_H : CVSIMDRU<funct5, F, 0b110, "cv." # mnemonic # ".sci.h">;
+ def CV_ # NAME # _SCI_B : CVSIMDRU<funct5, F, 0b111, "cv." # mnemonic # ".sci.b">;
+}
+
+multiclass CVSIMDShift<bits<5> funct5, bit F, bit funct1, string mnemonic> {
+ def CV_ # NAME # _H : CVSIMDRR<funct5, F, funct1, 0b000, "cv." # mnemonic # ".h">;
+ def CV_ # NAME # _B : CVSIMDRR<funct5, F, funct1, 0b001, "cv." # mnemonic # ".b">;
+ def CV_ # NAME # _SC_H : CVSIMDRR<funct5, F, funct1, 0b100, "cv." # mnemonic # ".sc.h">;
+ def CV_ # NAME # _SC_B : CVSIMDRR<funct5, F, funct1, 0b101, "cv." # mnemonic # ".sc.b">;
+ def CV_ # NAME # _SCI_H : CVSIMDRU<funct5, F, 0b110, "cv." # mnemonic # ".sci.h", uimm4>;
+ def CV_ # NAME # _SCI_B : CVSIMDRU<funct5, F, 0b111, "cv." # mnemonic # ".sci.b", uimm3>;
+}
+
+multiclass CVSIMDBinarySignedWb<bits<5> funct5, bit F, bit funct1, string mnemonic> {
+ def CV_ # NAME # _H : CVSIMDRRWb<funct5, F, funct1, 0b000, "cv." # mnemonic # ".h">;
+ def CV_ # NAME # _B : CVSIMDRRWb<funct5, F, funct1, 0b001, "cv." # mnemonic # ".b">;
+ def CV_ # NAME # _SC_H : CVSIMDRRWb<funct5, F, funct1, 0b100, "cv." # mnemonic # ".sc.h">;
+ def CV_ # NAME # _SC_B : CVSIMDRRWb<funct5, F, funct1, 0b101, "cv." # mnemonic # ".sc.b">;
+ def CV_ # NAME # _SCI_H : CVSIMDRIWb<funct5, F, 0b110, "cv." # mnemonic # ".sci.h">;
+ def CV_ # NAME # _SCI_B : CVSIMDRIWb<funct5, F, 0b111, "cv." # mnemonic # ".sci.b">;
+}
+
+multiclass CVSIMDBinaryUnsignedWb<bits<5> funct5, bit F, bit funct1, string mnemonic> {
+ def CV_ # NAME # _H : CVSIMDRRWb<funct5, F, funct1, 0b000, "cv." # mnemonic # ".h">;
+ def CV_ # NAME # _B : CVSIMDRRWb<funct5, F, funct1, 0b001, "cv." # mnemonic # ".b">;
+ def CV_ # NAME # _SC_H : CVSIMDRRWb<funct5, F, funct1, 0b100, "cv." # mnemonic # ".sc.h">;
+ def CV_ # NAME # _SC_B : CVSIMDRRWb<funct5, F, funct1, 0b101, "cv." # mnemonic # ".sc.b">;
+ def CV_ # NAME # _SCI_H : CVSIMDRUWb<funct5, F, 0b110, "cv." # mnemonic # ".sci.h">;
+ def CV_ # NAME # _SCI_B : CVSIMDRUWb<funct5, F, 0b111, "cv." # mnemonic # ".sci.b">;
+}
+
+
+let Predicates = [HasVendorXCVsimd, IsRV32],
+ hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+ defm ADD : CVSIMDBinarySigned<0b00000, 0, 0, "add">;
+ defm SUB : CVSIMDBinarySigned<0b00001, 0, 0, "sub">;
+ defm AVG : CVSIMDBinarySigned<0b00010, 0, 0, "avg">;
+ defm AVGU : CVSIMDBinaryUnsigned<0b00011, 0, 0, "avgu">;
+ defm MIN : CVSIMDBinarySigned<0b00100, 0, 0, "min">;
+ defm MINU : CVSIMDBinaryUnsigned<0b00101, 0, 0, "minu">;
+ defm MAX : CVSIMDBinarySigned<0b00110, 0, 0, "max">;
+ defm MAXU : CVSIMDBinaryUnsigned<0b00111, 0, 0, "maxu">;
+ defm SRL : CVSIMDShift<0b01000, 0, 0, "srl">;
+ defm SRA : CVSIMDShift<0b01001, 0, 0, "sra">;
+ defm SLL : CVSIMDShift<0b01010, 0, 0, "sll">;
+ defm OR : CVSIMDBinarySigned<0b01011, 0, 0, "or">;
+ defm XOR : CVSIMDBinarySigned<0b01100, 0, 0, "xor">;
+ defm AND : CVSIMDBinarySigned<0b01101, 0, 0, "and">;
+
+ def CV_ABS_H : CVSIMDR<0b01110, 0, 0, 0b000, "cv.abs.h">;
+ def CV_ABS_B : CVSIMDR<0b01110, 0, 0, 0b001, "cv.abs.b">;
+
+ // 0b01111xx: UNDEF
+
+ defm DOTUP : CVSIMDBinaryUnsigned<0b10000, 0, 0, "dotup">;
+ defm DOTUSP : CVSIMDBinarySigned<0b10001, 0, 0, "dotusp">;
+ defm DOTSP : CVSIMDBinarySigned<0b10010, 0, 0, "dotsp">;
+ defm SDOTUP : CVSIMDBinaryUnsignedWb<0b10011, 0, 0, "sdotup">;
+ defm SDOTUSP : CVSIMDBinarySignedWb<0b10100, 0, 0, "sdotusp">;
+ defm SDOTSP : CVSIMDBinarySignedWb<0b10101, 0, 0, "sdotsp">;
+
+ // 0b10110xx: UNDEF
+
+ def CV_EXTRACT_H : CVSIMDRU<0b10111, 0, 0b000, "cv.extract.h">;
+ def CV_EXTRACT_B : CVSIMDRU<0b10111, 0, 0b001, "cv.extract.b">;
+ def CV_EXTRACTU_H : CVSIMDRU<0b10111, 0, 0b010, "cv.extractu.h">;
+ def CV_EXTRACTU_B : CVSIMDRU<0b10111, 0, 0b011, "cv.extractu.b">;
+ def CV_INSERT_H : CVSIMDRUWb<0b10111, 0, 0b100, "cv.insert.h">;
+ def CV_INSERT_B : CVSIMDRUWb<0b10111, 0, 0b101, "cv.insert.b">;
+
+ def CV_SHUFFLE_H : CVSIMDRR<0b11000, 0, 0, 0b000, "cv.shuffle.h">;
+ def CV_SHUFFLE_B : CVSIMDRR<0b11000, 0, 0, 0b001, "cv.shuffle.b">;
+ def CV_SHUFFLE_SCI_H : CVSIMDRU<0b11000, 0, 0b110, "cv.shuffle.sci.h">;
+ def CV_SHUFFLEI0_SCI_B : CVSIMDRU<0b11000, 0, 0b111, "cv.shufflei0.sci.b">;
+
+ def CV_SHUFFLEI1_SCI_B : CVSIMDRU<0b11001, 0, 0b111, "cv.shufflei1.sci.b">;
+
+ def CV_SHUFFLEI2_SCI_B : CVSIMDRU<0b11010, 0, 0b111, "cv.shufflei2.sci.b">;
+
+ def CV_SHUFFLEI3_SCI_B : CVSIMDRU<0b11011, 0, 0b111, "cv.shufflei3.sci.b">;
+
+ def CV_SHUFFLE2_H : CVSIMDRRWb<0b11100, 0, 0, 0b000, "cv.shuffle2.h">;
+ def CV_SHUFFLE2_B : CVSIMDRRWb<0b11100, 0, 0, 0b001, "cv.shuffle2.b">;
+
+ // 0b11101xx: UNDEF
+
+ def CV_PACK : CVSIMDRR<0b11110, 0, 0, 0b000, "cv.pack">;
+ def CV_PACK_H : CVSIMDRR<0b11110, 0, 1, 0b000, "cv.pack.h">;
+
+ def CV_PACKHI_B : CVSIMDRRWb<0b11111, 0, 1, 0b001, "cv.packhi.b">;
+ def CV_PACKLO_B : CVSIMDRRWb<0b11111, 0, 0, 0b001, "cv.packlo.b">;
+
+ defm CMPEQ : CVSIMDBinarySigned<0b00000, 1, 0, "cmpeq">;
+ defm CMPNE : CVSIMDBinarySigned<0b00001, 1, 0, "cmpne">;
+ defm CMPGT : CVSIMDBinarySigned<0b00010, 1, 0, "cmpgt">;
+ defm CMPGE : CVSIMDBinarySigned<0b00011, 1, 0, "cmpge">;
+ defm CMPLT : CVSIMDBinarySigned<0b00100, 1, 0, "cmplt">;
+ defm CMPLE : CVSIMDBinarySigned<0b00101, 1, 0, "cmple">;
+ defm CMPGTU : CVSIMDBinaryUnsigned<0b00110, 1, 0, "cmpgtu">;
+ defm CMPGEU : CVSIMDBinaryUnsigned<0b00111, 1, 0, "cmpgeu">;
+ defm CMPLTU : CVSIMDBinaryUnsigned<0b01000, 1, 0, "cmpltu">;
+ defm CMPLEU : CVSIMDBinaryUnsigned<0b01001, 1, 0, "cmpleu">;
+
+ def CV_CPLXMUL_R : CVSIMDRRWb<0b01010, 1, 0, 0b000, "cv.cplxmul.r">;
+ def CV_CPLXMUL_I : CVSIMDRRWb<0b01010, 1, 1, 0b000, "cv.cplxmul.i">;
+ def CV_CPLXMUL_R_DIV2 : CVSIMDRRWb<0b01010, 1, 0, 0b010, "cv.cplxmul.r.div2">;
+ def CV_CPLXMUL_I_DIV2 : CVSIMDRRWb<0b01010, 1, 1, 0b010, "cv.cplxmul.i.div2">;
+ def CV_CPLXMUL_R_DIV4 : CVSIMDRRWb<0b01010, 1, 0, 0b100, "cv.cplxmul.r.div4">;
+ def CV_CPLXMUL_I_DIV4 : CVSIMDRRWb<0b01010, 1, 1, 0b100, "cv.cplxmul.i.div4">;
+ def CV_CPLXMUL_R_DIV8 : CVSIMDRRWb<0b01010, 1, 0, 0b110, "cv.cplxmul.r.div8">;
+ def CV_CPLXMUL_I_DIV8 : CVSIMDRRWb<0b01010, 1, 1, 0b110, "cv.cplxmul.i.div8">;
+
+ def CV_CPLXCONJ : CVSIMDR<0b01011, 1, 0, 0b000, "cv.cplxconj">;
+
+ // 0b01011xx: UNDEF
+
+ def CV_SUBROTMJ : CVSIMDRR<0b01100, 1, 0, 0b000, "cv.subrotmj">;
+ def CV_SUBROTMJ_DIV2 : CVSIMDRR<0b01100, 1, 0, 0b010, "cv.subrotmj.div2">;
+ def CV_SUBROTMJ_DIV4 : CVSIMDRR<0b01100, 1, 0, 0b100, "cv.subrotmj.div4">;
+ def CV_SUBROTMJ_DIV8 : CVSIMDRR<0b01100, 1, 0, 0b110, "cv.subrotmj.div8">;
+
+ def CV_ADD_DIV2 : CVSIMDRR<0b01101, 1, 0, 0b010, "cv.add.div2">;
+ def CV_ADD_DIV4 : CVSIMDRR<0b01101, 1, 0, 0b100, "cv.add.div4">;
+ def CV_ADD_DIV8 : CVSIMDRR<0b01101, 1, 0, 0b110, "cv.add.div8">;
+
+ def CV_SUB_DIV2 : CVSIMDRR<0b01110, 1, 0, 0b010, "cv.sub.div2">;
+ def CV_SUB_DIV4 : CVSIMDRR<0b01110, 1, 0, 0b100, "cv.sub.div4">;
+ def CV_SUB_DIV8 : CVSIMDRR<0b01110, 1, 0, 0b110, "cv.sub.div8">;
+}
+
+class CVInstImmBranch<bits<3> funct3, dag outs, dag ins,
+ string opcodestr, string argstr>
+ : RVInstB<funct3, OPC_CUSTOM_0, outs, ins, opcodestr, argstr> {
+ bits<5> imm5;
+ let rs2 = imm5;
+ let DecoderNamespace = "XCVbi";
+}
+
+let Predicates = [HasVendorXCVbi, IsRV32], hasSideEffects = 0, mayLoad = 0,
+ mayStore = 0, isBranch = 1, isTerminator = 1 in {
+ // Immediate branching operations
+ def CV_BEQIMM : CVInstImmBranch<0b110, (outs),
+ (ins GPR:$rs1, simm5:$imm5, simm13_lsb0:$imm12),
+ "cv.beqimm", "$rs1, $imm5, $imm12">, Sched<[]>;
+ def CV_BNEIMM : CVInstImmBranch<0b111, (outs),
+ (ins GPR:$rs1, simm5:$imm5, simm13_lsb0:$imm12),
+ "cv.bneimm", "$rs1, $imm5, $imm12">, Sched<[]>;
+}
+
+def CVrrAsmOperand : AsmOperandClass {
+ let Name = "RegReg";
+ let ParserMethod = "parseRegReg";
+ let DiagnosticType = "InvalidRegReg";
+}
+
+def CVrr : Operand<OtherVT> {
+ let ParserMatchClass = CVrrAsmOperand;
+ let EncoderMethod = "getRegReg";
+ let DecoderMethod = "decodeRegReg";
+ let PrintMethod = "printRegReg";
+}
+
+class CVLoad_ri_inc<bits<3> funct3, string opcodestr>
+ : RVInstI<funct3, OPC_CUSTOM_0, (outs GPR:$rd, GPR:$rs1_wb), (ins GPRMem:$rs1, simm12:$imm12),
+ opcodestr, "$rd, (${rs1}), ${imm12}"> {
+ let Constraints = "$rs1_wb = $rs1";
+ let DecoderNamespace = "XCVmem";
+}
+
+class CVLoad_rr_inc<bits<7> funct7, bits<3> funct3, string opcodestr>
+ : RVInstR<funct7, funct3, OPC_CUSTOM_1, (outs GPR:$rd, GPR:$rs1_wb), (ins GPRMem:$rs1, GPR:$rs2),
+ opcodestr, "$rd, (${rs1}), ${rs2}"> {
+ let Constraints = "$rs1_wb = $rs1";
+ let DecoderNamespace = "XCVmem";
+}
+
+class CVLoad_rr<bits<7> funct7, bits<3> funct3, string opcodestr>
+ : RVInstR<funct7, funct3, OPC_CUSTOM_1, (outs GPR:$rd), (ins CVrr:$cvrr),
+ opcodestr, "$rd, $cvrr"> {
+ bits<5> rd;
+ bits<10> cvrr;
+
+ let Inst{31-25} = funct7;
+ let Inst{24-20} = cvrr{4-0};
+ let Inst{19-15} = cvrr{9-5};
+ let Inst{14-12} = funct3;
+ let Inst{11-7} = rd;
+ let DecoderNamespace = "XCVmem";
+}
+
+let Predicates = [HasVendorXCVmem, IsRV32], hasSideEffects = 0,
+ mayLoad = 1, mayStore = 0, Constraints = "$rs1_wb = $rs1" in {
+ // Register-Immediate load with post-increment
+ def CV_LB_ri_inc : CVLoad_ri_inc<0b000, "cv.lb">;
+ def CV_LBU_ri_inc : CVLoad_ri_inc<0b100, "cv.lbu">;
+ def CV_LH_ri_inc : CVLoad_ri_inc<0b001, "cv.lh">;
+ def CV_LHU_ri_inc : CVLoad_ri_inc<0b101, "cv.lhu">;
+ def CV_LW_ri_inc : CVLoad_ri_inc<0b010, "cv.lw">;
+
+ // Register-Register load with post-increment
+ def CV_LB_rr_inc : CVLoad_rr_inc<0b0000000, 0b011, "cv.lb">;
+ def CV_LBU_rr_inc : CVLoad_rr_inc<0b0001000, 0b011, "cv.lbu">;
+ def CV_LH_rr_inc : CVLoad_rr_inc<0b0000001, 0b011, "cv.lh">;
+ def CV_LHU_rr_inc : CVLoad_rr_inc<0b0001001, 0b011, "cv.lhu">;
+ def CV_LW_rr_inc : CVLoad_rr_inc<0b0000010, 0b011, "cv.lw">;
+}
+
+let Predicates = [HasVendorXCVmem, IsRV32], hasSideEffects = 0,
+ mayLoad = 1, mayStore = 0 in {
+ // Register-Register load
+ def CV_LB_rr : CVLoad_rr<0b0000100, 0b011, "cv.lb">;
+ def CV_LBU_rr : CVLoad_rr<0b0001100, 0b011, "cv.lbu">;
+ def CV_LH_rr : CVLoad_rr<0b0000101, 0b011, "cv.lh">;
+ def CV_LHU_rr : CVLoad_rr<0b0001101, 0b011, "cv.lhu">;
+ def CV_LW_rr : CVLoad_rr<0b0000110, 0b011, "cv.lw">;
+}
+
+class CVStore_ri_inc<bits<3> funct3, string opcodestr>
+ : RVInstS<funct3, OPC_CUSTOM_1, (outs GPR:$rs1_wb),
+ (ins GPR:$rs2, GPR:$rs1, simm12:$imm12),
+ opcodestr, "$rs2, (${rs1}), ${imm12}"> {
+ let Constraints = "$rs1_wb = $rs1";
+ let DecoderNamespace = "XCVmem";
+}
+
+class CVStore_rr_inc<bits<3> funct3, bits<7> funct7, dag outs, dag ins,
+ string opcodestr, string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatOther> {
+ bits<5> rs3;
+ bits<5> rs2;
+ bits<5> rs1;
+
+ let Inst{31-25} = funct7;
+ let Inst{24-20} = rs2;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = funct3;
+ let Inst{11-7} = rs3;
+ let Inst{6-0} = OPC_CUSTOM_1.Value;
+ let DecoderNamespace = "XCVmem";
+}
+
+
+class CVStore_rr<bits<3> funct3, bits<7> funct7, dag outs, dag ins,
+ string opcodestr, string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatOther> {
+ bits<5> rs2;
+ bits<10> cvrr;
+
+ let Inst{31-25} = funct7;
+ let Inst{24-20} = rs2;
+ let Inst{19-15} = cvrr{9-5};
+ let Inst{14-12} = funct3;
+ let Inst{11-7} = cvrr{4-0};
+ let Inst{6-0} = OPC_CUSTOM_1.Value;
+ let DecoderNamespace = "XCVmem";
+}
+
+let Predicates = [HasVendorXCVmem, IsRV32], hasSideEffects = 0,
+ mayLoad = 0, mayStore = 1, Constraints = "$rs1_wb = $rs1" in {
+ // Register-Immediate store with post-increment
+ def CV_SB_ri_inc : CVStore_ri_inc<0b000, "cv.sb">;
+ def CV_SH_ri_inc : CVStore_ri_inc<0b001, "cv.sh">;
+ def CV_SW_ri_inc : CVStore_ri_inc<0b010, "cv.sw">;
+
+ // Register-Register store with post-increment
+ def CV_SB_rr_inc : CVStore_rr_inc<0b011, 0b0010000,
+ (outs GPR:$rs1_wb), (ins GPR:$rs2, GPR:$rs1, GPR:$rs3),
+ "cv.sb", "$rs2, (${rs1}), ${rs3}">;
+ def CV_SH_rr_inc : CVStore_rr_inc<0b011, 0b0010001,
+ (outs GPR:$rs1_wb), (ins GPR:$rs2, GPR:$rs1, GPR:$rs3),
+ "cv.sh", "$rs2, (${rs1}), ${rs3}">;
+ def CV_SW_rr_inc : CVStore_rr_inc<0b011, 0b0010010,
+ (outs GPR:$rs1_wb), (ins GPR:$rs2, GPR:$rs1, GPR:$rs3),
+ "cv.sw", "$rs2, (${rs1}), ${rs3}">;
+}
+
+
+let Predicates = [HasVendorXCVmem, IsRV32], hasSideEffects = 0,
+ mayLoad = 0, mayStore = 1 in {
+ // Register-Register store
+ def CV_SB_rr : CVStore_rr<0b011, 0b0010100,
+ (outs), (ins GPR:$rs2, CVrr:$cvrr),
+ "cv.sb", "$rs2, $cvrr">;
+ def CV_SH_rr : CVStore_rr<0b011, 0b0010101,
+ (outs), (ins GPR:$rs2, CVrr:$cvrr),
+ "cv.sh", "$rs2, $cvrr">;
+ def CV_SW_rr : CVStore_rr<0b011, 0b0010110,
+ (outs), (ins GPR:$rs2, CVrr:$cvrr),
+ "cv.sw", "$rs2, $cvrr">;
+}
+
+let DecoderNamespace = "XCVelw" in
+class CVLoad_ri<bits<3> funct3, string opcodestr>
+ : RVInstI<funct3, OPC_CUSTOM_0, (outs GPR:$rd),
+ (ins GPRMem:$rs1, simm12:$imm12), opcodestr, "$rd, ${imm12}(${rs1})">;
+
+let Predicates = [HasVendorXCVelw, IsRV32], hasSideEffects = 0,
+ mayLoad = 1, mayStore = 0 in {
+ // Event load
+ def CV_ELW : CVLoad_ri<0b011, "cv.elw">;
+}
+
+def cv_tuimm2 : TImmLeaf<XLenVT, [{return isUInt<2>(Imm);}]>;
+def cv_tuimm5 : TImmLeaf<XLenVT, [{return isUInt<5>(Imm);}]>;
+def cv_uimm10 : ImmLeaf<XLenVT, [{return isUInt<10>(Imm);}]>;
+
+def CV_LO5: SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & 0x1f, SDLoc(N),
+ N->getValueType(0));
+}]>;
+
+def CV_HI5: SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() >> 5, SDLoc(N),
+ N->getValueType(0));
+}]>;
+
+multiclass PatCoreVBitManip<Intrinsic intr> {
+ def : PatGprGpr<intr, !cast<RVInst>("CV_" # NAME # "R")>;
+ def : Pat<(intr GPR:$rs1, cv_uimm10:$imm),
+ (!cast<RVInst>("CV_" # NAME)
+ GPR:$rs1, (CV_HI5 cv_uimm10:$imm), (CV_LO5 cv_uimm10:$imm))>;
+}
+
+let Predicates = [HasVendorXCVbitmanip, IsRV32] in {
+ defm EXTRACT : PatCoreVBitManip<int_riscv_cv_bitmanip_extract>;
+ defm EXTRACTU : PatCoreVBitManip<int_riscv_cv_bitmanip_extractu>;
+ defm BCLR : PatCoreVBitManip<int_riscv_cv_bitmanip_bclr>;
+ defm BSET : PatCoreVBitManip<int_riscv_cv_bitmanip_bset>;
+
+ def : Pat<(int_riscv_cv_bitmanip_insert GPR:$rs1, GPR:$rs2, GPR:$rd),
+ (CV_INSERTR GPR:$rd, GPR:$rs1, GPR:$rs2)>;
+ def : Pat<(int_riscv_cv_bitmanip_insert GPR:$rs1, cv_uimm10:$imm, GPR:$rd),
+ (CV_INSERT GPR:$rd, GPR:$rs1, (CV_HI5 cv_uimm10:$imm),
+ (CV_LO5 cv_uimm10:$imm))>;
+
+ def : PatGpr<cttz, CV_FF1>;
+ def : PatGpr<ctlz, CV_FL1>;
+ def : PatGpr<int_riscv_cv_bitmanip_clb, CV_CLB>;
+ def : PatGpr<ctpop, CV_CNT>;
+
+ def : PatGprGpr<rotr, CV_ROR>;
+
+ def : Pat<(int_riscv_cv_bitmanip_bitrev GPR:$rs1, cv_tuimm5:$pts,
+ cv_tuimm2:$radix),
+ (CV_BITREV GPR:$rs1, cv_tuimm2:$radix, cv_tuimm5:$pts)>;
+ def : Pat<(bitreverse (XLenVT GPR:$rs)), (CV_BITREV GPR:$rs, 0, 0)>;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
index 03ed501ba6a3..fa618b437ce7 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
@@ -27,30 +27,19 @@ def VCIX_XV : VCIXType<0b0010>;
def VCIX_XVV : VCIXType<0b1010>;
def VCIX_XVW : VCIXType<0b1111>;
-// The payload and timm5 operands are all marked as ImmArg in the IR
+// The payload and tsimm5 operands are all marked as ImmArg in the IR
// intrinsic and will be target constant, so use TImmLeaf rather than ImmLeaf.
-def payload1 : Operand<XLenVT>, TImmLeaf<XLenVT, [{return isUInt<1>(Imm);}]> {
- let ParserMatchClass = UImmAsmOperand<1>;
- let DecoderMethod = "decodeUImmOperand<1>";
- let OperandType = "OPERAND_UIMM1";
- let OperandNamespace = "RISCVOp";
+class PayloadOp<int bitsNum> : RISCVOp, TImmLeaf<XLenVT, "return isUInt<" # bitsNum # ">(Imm);"> {
+ let ParserMatchClass = UImmAsmOperand<bitsNum>;
+ let DecoderMethod = "decodeUImmOperand<"# bitsNum # ">";
+ let OperandType = "OPERAND_UIMM" # bitsNum;
}
-def payload2 : Operand<XLenVT>, TImmLeaf<XLenVT, [{return isUInt<2>(Imm);}]> {
- let ParserMatchClass = UImmAsmOperand<2>;
- let DecoderMethod = "decodeUImmOperand<2>";
- let OperandType = "OPERAND_UIMM2";
- let OperandNamespace = "RISCVOp";
-}
-
-def payload5 : Operand<XLenVT>, TImmLeaf<XLenVT, [{return isUInt<5>(Imm);}]> {
- let ParserMatchClass = UImmAsmOperand<5>;
- let DecoderMethod = "decodeUImmOperand<5>";
- let OperandType = "OPERAND_UIMM5";
- let OperandNamespace = "RISCVOp";
-}
+def payload1 : PayloadOp<1>;
+def payload2 : PayloadOp<2>;
+def payload5 : PayloadOp<5>;
-def timm5 : Operand<XLenVT>, TImmLeaf<XLenVT, [{return isInt<5>(Imm);}]> {
+def tsimm5 : Operand<XLenVT>, TImmLeaf<XLenVT, [{return isInt<5>(Imm);}]> {
let ParserMatchClass = SImmAsmOperand<5>;
let EncoderMethod = "getImmOpValue";
let DecoderMethod = "decodeSImmOperand<5>";
@@ -111,15 +100,6 @@ class RVInstVCFCustom2<bits<4> funct6_hi4, bits<3> funct3, dag outs, dag ins,
let RVVConstraint = NoConstraint;
}
-class GetFTypeInfo<int sew> {
- ValueType Scalar = !cond(!eq(sew, 16): f16,
- !eq(sew, 32): f32,
- !eq(sew, 64): f64);
- RegisterClass ScalarRegClass = !cond(!eq(sew, 16): FPR16,
- !eq(sew, 32): FPR32,
- !eq(sew, 64): FPR64);
-}
-
class VCIXInfo<string suffix, VCIXType type, DAGOperand TyRd,
DAGOperand TyRs2, DAGOperand TyRs1, bit HaveOutputDst> {
string OpcodeStr = !if(HaveOutputDst, "sf.vc.v." # suffix,
@@ -187,6 +167,20 @@ multiclass CustomSiFiveVCIX<string suffix, VCIXType type,
InTyRs1, 1>;
}
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+class CustomSiFiveVMACC<bits<6> funct6, RISCVVFormat opv, string opcodestr>
+ : RVInstVCCustom2<funct6{5-2}, opv.Value, (outs VR:$rd), (ins VR:$rs1, VR:$rs2),
+ opcodestr, "$rd, $rs1, $rs2"> {
+ let vm = 1;
+ let funct6_lo2 = funct6{1-0};
+}
+}
+
+class CustomSiFiveVFNRCLIP<bits<6> funct6, RISCVVFormat opv, string opcodestr>
+ : VALUVF<funct6, opv, opcodestr> {
+ let Inst{6-0} = OPC_CUSTOM_2.Value;
+}
+
let Predicates = [HasVendorXSfvcp], mayLoad = 0, mayStore = 0,
hasSideEffects = 1, hasNoSchedulingInfo = 1, DecoderNamespace = "XSfvcp" in {
defm X : CustomSiFiveVCIX<"x", VCIX_X, uimm5, uimm5, GPR>, Sched<[]>;
@@ -205,6 +199,28 @@ let Predicates = [HasVendorXSfvcp], mayLoad = 0, mayStore = 0,
defm FVW : CustomSiFiveVCIX<"fvw", VCIX_XVW, VR, VR, FPR32>, Sched<[]>;
}
+let Predicates = [HasVendorXSfvqmaccdod], DecoderNamespace = "XSfvqmaccdod" in {
+ def VQMACCU_2x8x2 : CustomSiFiveVMACC<0b101100, OPMVV, "sf.vqmaccu.2x8x2">;
+ def VQMACC_2x8x2 : CustomSiFiveVMACC<0b101101, OPMVV, "sf.vqmacc.2x8x2">;
+ def VQMACCUS_2x8x2 : CustomSiFiveVMACC<0b101110, OPMVV, "sf.vqmaccus.2x8x2">;
+ def VQMACCSU_2x8x2 : CustomSiFiveVMACC<0b101111, OPMVV, "sf.vqmaccsu.2x8x2">;
+}
+
+let Predicates = [HasVendorXSfvqmaccqoq], DecoderNamespace = "XSfvqmaccqoq" in {
+ def VQMACCU_4x8x4 : CustomSiFiveVMACC<0b111100, OPMVV, "sf.vqmaccu.4x8x4">;
+ def VQMACC_4x8x4 : CustomSiFiveVMACC<0b111101, OPMVV, "sf.vqmacc.4x8x4">;
+ def VQMACCUS_4x8x4 : CustomSiFiveVMACC<0b111110, OPMVV, "sf.vqmaccus.4x8x4">;
+ def VQMACCSU_4x8x4 : CustomSiFiveVMACC<0b111111, OPMVV, "sf.vqmaccsu.4x8x4">;
+}
+
+let Predicates = [HasVendorXSfvfwmaccqqq], DecoderNamespace = "XSfvfwmaccqqq" in {
+ def VFWMACC_4x4x4 : CustomSiFiveVMACC<0b111100, OPFVV, "sf.vfwmacc.4x4x4">;
+}
+
+let Predicates = [HasVendorXSfvfnrclipxfqf], DecoderNamespace = "XSfvfnrclipxfqf" in {
+ def VFNRCLIP_XU_F_QF : CustomSiFiveVFNRCLIP<0b100010, OPFVF, "sf.vfnrclip.xu.f.qf">;
+ def VFNRCLIP_X_F_QF : CustomSiFiveVFNRCLIP<0b100011, OPFVF, "sf.vfnrclip.x.f.qf">;
+}
class VPseudoVC_X<Operand OpClass, DAGOperand RS1Class,
bit HasSideEffect = 1> :
Pseudo<(outs),
@@ -327,15 +343,41 @@ multiclass VPseudoVC_XVW<LMULInfo m, DAGOperand RS1Class,
}
}
+multiclass VPseudoSiFiveVMACC<string mx, VReg vd_type, VReg vs2_type,
+ string Constraint = ""> {
+ def "Pseudo" # NAME # "_" # mx
+ : VPseudoTernaryNoMaskWithPolicy<vd_type, V_M1.vrclass, vs2_type, Constraint>;
+}
+
+multiclass VPseudoSiFiveVQMACC<string Constraint = ""> {
+ foreach m = MxListVF8 in
+ defm NAME : VPseudoSiFiveVMACC<m.MX, m.vrclass, m.vrclass, Constraint>;
+}
+
+multiclass VPseudoSiFiveVFWMACC<string Constraint = ""> {
+ foreach m = MxListFW in
+ defm NAME : VPseudoSiFiveVMACC<m.MX, m.wvrclass, m.vrclass, Constraint>;
+}
+
+multiclass VPseudoSiFiveVFNRCLIP<string Constraint = "@earlyclobber $rd"> {
+ foreach m = MxListVF4 in
+ let hasSideEffects = 0 in
+ defm "Pseudo" # NAME : VPseudoBinaryRoundingMode<!if(!eq(m.vrclass, VRM8),
+ VRM2, VR),
+ m.vrclass, FPR32, m,
+ Constraint, /*sew*/0,
+ UsesVXRM=0>;
+}
+
let Predicates = [HasVendorXSfvcp] in {
foreach m = MxList in {
defm X : VPseudoVC_X<m, GPR>;
- defm I : VPseudoVC_X<m, timm5>;
+ defm I : VPseudoVC_X<m, tsimm5>;
defm XV : VPseudoVC_XV<m, GPR>;
- defm IV : VPseudoVC_XV<m, timm5>;
+ defm IV : VPseudoVC_XV<m, tsimm5>;
defm VV : VPseudoVC_XV<m, m.vrclass>;
defm XVV : VPseudoVC_XVV<m, GPR>;
- defm IVV : VPseudoVC_XVV<m, timm5>;
+ defm IVV : VPseudoVC_XVV<m, tsimm5>;
defm VVV : VPseudoVC_XVV<m, m.vrclass>;
}
foreach f = FPList in {
@@ -346,7 +388,7 @@ let Predicates = [HasVendorXSfvcp] in {
}
foreach m = MxListW in {
defm XVW : VPseudoVC_XVW<m, GPR>;
- defm IVW : VPseudoVC_XVW<m, timm5>;
+ defm IVW : VPseudoVC_XVW<m, tsimm5>;
defm VVW : VPseudoVC_XVW<m, m.vrclass>;
}
foreach f = FPListW in {
@@ -355,6 +397,29 @@ let Predicates = [HasVendorXSfvcp] in {
}
}
+let Predicates = [HasVendorXSfvqmaccdod] in {
+ defm VQMACCU_2x8x2 : VPseudoSiFiveVQMACC;
+ defm VQMACC_2x8x2 : VPseudoSiFiveVQMACC;
+ defm VQMACCUS_2x8x2 : VPseudoSiFiveVQMACC;
+ defm VQMACCSU_2x8x2 : VPseudoSiFiveVQMACC;
+}
+
+let Predicates = [HasVendorXSfvqmaccqoq] in {
+ defm VQMACCU_4x8x4 : VPseudoSiFiveVQMACC;
+ defm VQMACC_4x8x4 : VPseudoSiFiveVQMACC;
+ defm VQMACCUS_4x8x4 : VPseudoSiFiveVQMACC;
+ defm VQMACCSU_4x8x4 : VPseudoSiFiveVQMACC;
+}
+
+let Predicates = [HasVendorXSfvfwmaccqqq] in {
+ defm VFWMACC_4x4x4 : VPseudoSiFiveVFWMACC;
+}
+
+let Predicates = [HasVendorXSfvfnrclipxfqf] in {
+ defm VFNRCLIP_XU_F_QF : VPseudoSiFiveVFNRCLIP;
+ defm VFNRCLIP_X_F_QF : VPseudoSiFiveVFNRCLIP;
+}
+
class VPatVC_OP4<string intrinsic_name,
string inst,
ValueType op2_type,
@@ -472,39 +537,126 @@ multiclass VPatVC_XVV<string intrinsic_suffix, string instruction_suffix,
wti.RegClass, vti.RegClass, kind, op1_kind>;
}
+class GetFTypeInfo<int Sew> {
+ ValueType Scalar = !cond(!eq(Sew, 16) : f16,
+ !eq(Sew, 32) : f32,
+ !eq(Sew, 64) : f64);
+ RegisterClass ScalarRegClass = !cond(!eq(Sew, 16) : FPR16,
+ !eq(Sew, 32) : FPR32,
+ !eq(Sew, 64) : FPR64);
+
+ string ScalarSuffix = !cond(!eq(Scalar, f16) : "FPR16",
+ !eq(Scalar, f32) : "FPR32",
+ !eq(Scalar, f64) : "FPR64");
+}
+
+multiclass VPatVMACC<string intrinsic, string instruction, string kind,
+ list<VTypeInfoToWide> info_pairs, ValueType vec_m1> {
+ foreach pair = info_pairs in {
+ defvar VdInfo = pair.Wti;
+ defvar Vs2Info = pair.Vti;
+ let Predicates = [HasVInstructions] in
+ def : VPatTernaryNoMaskWithPolicy<"int_riscv_sf_" # intrinsic,
+ "Pseudo" # instruction, kind, VdInfo.Vector,
+ vec_m1, Vs2Info.Vector,
+ Vs2Info.Log2SEW, Vs2Info.LMul,
+ VdInfo.RegClass, VR, Vs2Info.RegClass>;
+ }
+}
+
+defset list<VTypeInfoToWide> VQMACCInfoPairs = {
+ def : VTypeInfoToWide<VI8M1, VI32M1>;
+ def : VTypeInfoToWide<VI8M2, VI32M2>;
+ def : VTypeInfoToWide<VI8M4, VI32M4>;
+ def : VTypeInfoToWide<VI8M8, VI32M8>;
+}
+
+multiclass VPatVQMACC<string intrinsic, string instruction, string kind>
+ : VPatVMACC<intrinsic, instruction, kind, VQMACCInfoPairs, vint8m1_t>;
+
+
+multiclass VPatVFWMACC<string intrinsic, string instruction, string kind>
+ : VPatVMACC<intrinsic, instruction, kind, AllWidenableBFloatToFloatVectors,
+ vbfloat16m1_t>;
+
+defset list<VTypeInfoToWide> VFNRCLIPInfoPairs = {
+ def : VTypeInfoToWide<VI8MF8, VF32MF2>;
+ def : VTypeInfoToWide<VI8MF4, VF32M1>;
+ def : VTypeInfoToWide<VI8MF2, VF32M2>;
+ def : VTypeInfoToWide<VI8M1, VF32M4>;
+ def : VTypeInfoToWide<VI8M2, VF32M8>;
+}
+
+multiclass VPatVFNRCLIP<string intrinsic, string instruction> {
+ foreach pair = VFNRCLIPInfoPairs in {
+ defvar Vti = pair.Vti;
+ defvar Wti = pair.Wti;
+ defm : VPatBinaryRoundingMode<"int_riscv_sf_" # intrinsic,
+ "Pseudo" # instruction # "_" # Wti.LMul.MX,
+ Vti.Vector, Wti.Vector, Wti.Scalar, Vti.Mask,
+ Vti.Log2SEW, Vti.RegClass,
+ Wti.RegClass, Wti.ScalarRegClass>;
+ }
+}
+
let Predicates = [HasVendorXSfvcp] in {
foreach vti = AllIntegerVectors in {
- defm : VPatVC_X<"x", "X", vti, vti.Scalar, vti.ScalarRegClass>;
- defm : VPatVC_X<"i", "I", vti, XLenVT, timm5>;
- defm : VPatVC_XV<"xv", "XV", vti, vti.Scalar, vti.ScalarRegClass>;
- defm : VPatVC_XV<"iv", "IV", vti, XLenVT, timm5>;
+ defm : VPatVC_X<"x", "X", vti, XLenVT, GPR>;
+ defm : VPatVC_X<"i", "I", vti, XLenVT, tsimm5>;
+ defm : VPatVC_XV<"xv", "XV", vti, XLenVT, GPR>;
+ defm : VPatVC_XV<"iv", "IV", vti, XLenVT, tsimm5>;
defm : VPatVC_XV<"vv", "VV", vti, vti.Vector, vti.RegClass>;
- defm : VPatVC_XVV<"xvv", "XVV", vti, vti, vti.Scalar, vti.ScalarRegClass>;
- defm : VPatVC_XVV<"ivv", "IVV", vti, vti, XLenVT, timm5>;
+ defm : VPatVC_XVV<"xvv", "XVV", vti, vti, XLenVT, GPR>;
+ defm : VPatVC_XVV<"ivv", "IVV", vti, vti, XLenVT, tsimm5>;
defm : VPatVC_XVV<"vvv", "VVV", vti, vti, vti.Vector, vti.RegClass>;
- if !ge(vti.SEW, 16) then {
- defm : VPatVC_XV<"fv", "F" # vti.SEW # "V", vti,
- GetFTypeInfo<vti.SEW>.Scalar,
- GetFTypeInfo<vti.SEW>.ScalarRegClass, payload1>;
- defm : VPatVC_XVV<"fvv", "F" # vti.SEW # "VV", vti, vti,
- GetFTypeInfo<vti.SEW>.Scalar,
- GetFTypeInfo<vti.SEW>.ScalarRegClass, payload1>;
+
+ if !ne(vti.SEW, 8) then {
+ defvar finfo = GetFTypeInfo<vti.SEW>;
+ defm : VPatVC_XV<"fv", finfo.ScalarSuffix # "V", vti, finfo.Scalar,
+ finfo.ScalarRegClass, payload1>;
+ defm : VPatVC_XVV<"fvv", finfo.ScalarSuffix # "VV", vti, vti, finfo.Scalar,
+ finfo.ScalarRegClass, payload1>;
}
}
foreach VtiToWti = AllWidenableIntVectors in {
defvar vti = VtiToWti.Vti;
defvar wti = VtiToWti.Wti;
- defm : VPatVC_XVV<"xvw", "XVW", wti, vti, vti.Scalar, vti.ScalarRegClass>;
- defm : VPatVC_XVV<"ivw", "IVW", wti, vti, XLenVT, timm5>;
+ defvar iinfo = GetIntVTypeInfo<vti>.Vti;
+ defm : VPatVC_XVV<"xvw", "XVW", wti, vti, iinfo.Scalar, iinfo.ScalarRegClass>;
+ defm : VPatVC_XVV<"ivw", "IVW", wti, vti, XLenVT, tsimm5>;
defm : VPatVC_XVV<"vvw", "VVW", wti, vti, vti.Vector, vti.RegClass>;
- if !ge(vti.SEW, 16) then {
- defm : VPatVC_XVV<"fvw", "F" # vti.SEW # "VW", wti, vti,
- GetFTypeInfo<vti.SEW>.Scalar,
- GetFTypeInfo<vti.SEW>.ScalarRegClass, payload1>;
+
+ if !ne(vti.SEW, 8) then {
+ defvar finfo = GetFTypeInfo<vti.SEW>;
+ defm : VPatVC_XVV<"fvw", finfo.ScalarSuffix # "VW", wti, vti, finfo.Scalar,
+ finfo.ScalarRegClass, payload1>;
}
}
}
+let Predicates = [HasVendorXSfvqmaccdod] in {
+ defm : VPatVQMACC<"vqmaccu_2x8x2", "VQMACCU", "2x8x2">;
+ defm : VPatVQMACC<"vqmacc_2x8x2", "VQMACC", "2x8x2">;
+ defm : VPatVQMACC<"vqmaccus_2x8x2", "VQMACCUS", "2x8x2">;
+ defm : VPatVQMACC<"vqmaccsu_2x8x2", "VQMACCSU", "2x8x2">;
+}
+
+let Predicates = [HasVendorXSfvqmaccqoq] in {
+ defm : VPatVQMACC<"vqmaccu_4x8x4", "VQMACCU", "4x8x4">;
+ defm : VPatVQMACC<"vqmacc_4x8x4", "VQMACC", "4x8x4">;
+ defm : VPatVQMACC<"vqmaccus_4x8x4", "VQMACCUS", "4x8x4">;
+ defm : VPatVQMACC<"vqmaccsu_4x8x4", "VQMACCSU", "4x8x4">;
+}
+
+let Predicates = [HasVendorXSfvfwmaccqqq] in {
+ defm : VPatVFWMACC<"vfwmacc_4x4x4", "VFWMACC", "4x4x4">;
+}
+
+let Predicates = [HasVendorXSfvfnrclipxfqf] in {
+ defm : VPatVFNRCLIP<"vfnrclip_xu_f_qf", "VFNRCLIP_XU_F_QF">;
+ defm : VPatVFNRCLIP<"vfnrclip_x_f_qf", "VFNRCLIP_X_F_QF">;
+}
+
let Predicates = [HasVendorXSfcie] in {
let hasSideEffects = 1, mayLoad = 0, mayStore = 0, DecoderNamespace = "XSfcie" in {
def SF_CFLUSH_D_L1 : RVInstI<0b000, OPC_SYSTEM, (outs), (ins GPR:$rs1), "cflush.d.l1","$rs1">,
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
index e840dfddd8d9..1d44b1ad2636 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
@@ -14,31 +14,36 @@
// T-HEAD specific DAG Nodes.
//===----------------------------------------------------------------------===//
-def SDT_LoadPair : SDTypeProfile<2, 2,
- [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 3>, SDTCisPtrTy<2>, SDTCisVT<3, XLenVT>]>;
-def SDT_StorePair : SDTypeProfile<0, 4,
- [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 3>, SDTCisPtrTy<2>, SDTCisVT<3, XLenVT>]>;
+def SDT_LoadPair : SDTypeProfile<2, 2, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<1, 3>,
+ SDTCisPtrTy<2>,
+ SDTCisVT<3, XLenVT>]>;
+def SDT_StorePair : SDTypeProfile<0, 4, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<1, 3>,
+ SDTCisPtrTy<2>,
+ SDTCisVT<3, XLenVT>]>;
def th_lwud : SDNode<"RISCVISD::TH_LWUD", SDT_LoadPair,
- [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def th_lwd : SDNode<"RISCVISD::TH_LWD", SDT_LoadPair,
- [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def th_ldd : SDNode<"RISCVISD::TH_LDD", SDT_LoadPair,
- [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def th_swd : SDNode<"RISCVISD::TH_SWD", SDT_StorePair,
- [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def th_sdd : SDNode<"RISCVISD::TH_SDD", SDT_StorePair,
- [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
//===----------------------------------------------------------------------===//
// Instruction class templates
//===----------------------------------------------------------------------===//
+
class THInstVdotVV<bits<6> funct6, RISCVVFormat opv, dag outs, dag ins,
string opcodestr, string argstr>
: RVInstVV<funct6, opv, outs, ins, opcodestr, argstr> {
let Inst{26} = 0;
let Inst{6-0} = OPC_CUSTOM_0.Value;
- let DecoderNamespace = "THeadV";
+ let DecoderNamespace = "XTHeadVdot";
}
class THInstVdotVX<bits<6> funct6, RISCVVFormat opv, dag outs, dag ins,
@@ -46,45 +51,53 @@ class THInstVdotVX<bits<6> funct6, RISCVVFormat opv, dag outs, dag ins,
: RVInstVX<funct6, opv, outs, ins, opcodestr, argstr> {
let Inst{26} = 1;
let Inst{6-0} = OPC_CUSTOM_0.Value;
- let DecoderNamespace = "THeadV";
+ let DecoderNamespace = "XTHeadVdot";
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
// op vd, vs1, vs2, vm (reverse the order of vs1 and vs2)
-class THVdotALUrVV<bits<6> funct6, RISCVVFormat opv, string opcodestr>
- : THInstVdotVV<funct6, opv, (outs VR:$vd),
- (ins VR:$vs1, VR:$vs2, VMaskOp:$vm),
- opcodestr, "$vd, $vs1, $vs2$vm">;
+class THVdotALUrVV<bits<6> funct6, RISCVVFormat opv, string opcodestr,
+ bit EarlyClobber>
+ : THInstVdotVV<funct6, opv, (outs VR:$vd_wb),
+ (ins VR:$vd, VR:$vs1, VR:$vs2, VMaskOp:$vm),
+ opcodestr, "$vd, $vs1, $vs2$vm"> {
+ let Constraints = !if(EarlyClobber, "@earlyclobber $vd_wb, $vd = $vd_wb",
+ "$vd = $vd_wb");
+}
// op vd, rs1, vs2, vm (reverse the order of rs1 and vs2)
-class THVdotALUrVX<bits<6> funct6, RISCVVFormat opv, string opcodestr>
- : THInstVdotVX<funct6, opv, (outs VR:$vd),
- (ins GPR:$rs1, VR:$vs2, VMaskOp:$vm),
- opcodestr, "$vd, $rs1, $vs2$vm">;
+class THVdotALUrVX<bits<6> funct6, RISCVVFormat opv, string opcodestr,
+ bit EarlyClobber>
+ : THInstVdotVX<funct6, opv, (outs VR:$vd_wb),
+ (ins VR:$vd, GPR:$rs1, VR:$vs2, VMaskOp:$vm),
+ opcodestr, "$vd, $rs1, $vs2$vm"> {
+ let Constraints = !if(EarlyClobber, "@earlyclobber $vd_wb, $vd = $vd_wb",
+ "$vd = $vd_wb");
+}
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
-let Predicates = [HasVendorXTHeadBa], DecoderNamespace = "THeadBa",
- hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+let Predicates = [HasVendorXTHeadBa], DecoderNamespace = "XTHeadBa",
+ hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class THShiftALU_rri<bits<3> funct3, string opcodestr>
- : RVInstR<0, funct3, OPC_CUSTOM_0, (outs GPR:$rd),
- (ins GPR:$rs1, GPR:$rs2, uimm2:$uimm2),
- opcodestr, "$rd, $rs1, $rs2, $uimm2"> {
+ : RVInstRBase<funct3, OPC_CUSTOM_0, (outs GPR:$rd),
+ (ins GPR:$rs1, GPR:$rs2, uimm2:$uimm2),
+ opcodestr, "$rd, $rs1, $rs2, $uimm2"> {
bits<2> uimm2;
let Inst{31-27} = 0;
let Inst{26-25} = uimm2;
}
-let Predicates = [HasVendorXTHeadBb], DecoderNamespace = "THeadBb",
- hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+let Predicates = [HasVendorXTHeadBb], DecoderNamespace = "XTHeadBb",
+ hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
class THShift_ri<bits<5> funct5, bits<3> funct3, string opcodestr>
: RVInstIShift<funct5, funct3, OPC_CUSTOM_0, (outs GPR:$rd),
(ins GPR:$rs1, uimmlog2xlen:$shamt),
opcodestr, "$rd, $rs1, $shamt">;
class THBitfieldExtract_rii<bits<3> funct3, string opcodestr>
- : RVInstI<funct3, OPC_CUSTOM_0, (outs GPR:$rd),
- (ins GPR:$rs1, uimmlog2xlen:$msb, uimmlog2xlen:$lsb),
- opcodestr, "$rd, $rs1, $msb, $lsb"> {
+ : RVInstIBase<funct3, OPC_CUSTOM_0, (outs GPR:$rd),
+ (ins GPR:$rs1, uimmlog2xlen:$msb, uimmlog2xlen:$lsb),
+ opcodestr, "$rd, $rs1, $msb, $lsb"> {
bits<6> msb;
bits<6> lsb;
let Inst{31-26} = msb;
@@ -92,21 +105,18 @@ class THBitfieldExtract_rii<bits<3> funct3, string opcodestr>
}
class THRev_r<bits<5> funct5, bits<2> funct2, string opcodestr>
- : RVInstR4<funct2, 0b001, OPC_CUSTOM_0, (outs GPR:$rd), (ins GPR:$rs1),
- opcodestr, "$rd, $rs1"> {
- let rs3 = funct5;
- let rs2 = 0;
-}
+ : RVInstIUnary<{funct5, funct2, 0b00000}, 0b001, OPC_CUSTOM_0,
+ (outs GPR:$rd), (ins GPR:$rs1), opcodestr, "$rd, $rs1">;
}
-let Predicates = [HasVendorXTHeadBb, IsRV64], DecoderNamespace = "THeadBb",
- hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+let Predicates = [HasVendorXTHeadBb, IsRV64], DecoderNamespace = "XTHeadBb",
+ hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class THShiftW_ri<bits<7> funct7, bits<3> funct3, string opcodestr>
: RVInstIShiftW<funct7, funct3, OPC_CUSTOM_0, (outs GPR:$rd),
(ins GPR:$rs1, uimm5:$shamt),
opcodestr, "$rd, $rs1, $shamt">;
-let Predicates = [HasVendorXTHeadCondMov], DecoderNamespace = "THeadCondMov",
+let Predicates = [HasVendorXTHeadCondMov], DecoderNamespace = "XTHeadCondMov",
hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCommutable = 1 in
class THCondMov_rr<bits<7> funct7, string opcodestr>
: RVInstR<funct7, 0b001, OPC_CUSTOM_0, (outs GPR:$rd_wb),
@@ -115,7 +125,7 @@ class THCondMov_rr<bits<7> funct7, string opcodestr>
let Constraints = "$rd_wb = $rd";
}
-let Predicates = [HasVendorXTHeadMac], DecoderNamespace = "THeadMac",
+let Predicates = [HasVendorXTHeadMac], DecoderNamespace = "XTHeadMac",
hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCommutable = 1 in
class THMulAccumulate_rr<bits<7> funct7, string opcodestr>
: RVInstR<funct7, 0b001, OPC_CUSTOM_0, (outs GPR:$rd_wb),
@@ -124,27 +134,28 @@ class THMulAccumulate_rr<bits<7> funct7, string opcodestr>
let Constraints = "$rd_wb = $rd";
}
-let Predicates = [HasVendorXTHeadMemPair], DecoderNamespace = "THeadMemPair",
- hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
+let Predicates = [HasVendorXTHeadMemPair], DecoderNamespace = "XTHeadMemPair",
+ hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
class THLoadPair<bits<5> funct5, string opcodestr>
- : RVInstR<!shl(funct5, 2), 0b100, OPC_CUSTOM_0,
- (outs GPR:$rd, GPR:$rs2),
- (ins GPR:$rs1, uimm2:$uimm2, uimm7:$const3or4),
- opcodestr, "$rd, $rs2, (${rs1}), $uimm2, $const3or4"> {
+ : RVInstRBase<0b100, OPC_CUSTOM_0,
+ (outs GPR:$rd, GPR:$rs2),
+ (ins GPR:$rs1, uimm2:$uimm2, uimm7:$const3or4),
+ opcodestr, "$rd, $rs2, (${rs1}), $uimm2, $const3or4"> {
bits<2> uimm2;
+ let Inst{31-27} = funct5;
let Inst{26-25} = uimm2;
let DecoderMethod = "decodeXTHeadMemPair";
let Constraints = "@earlyclobber $rd,@earlyclobber $rs2";
}
-let Predicates = [HasVendorXTHeadMemPair], DecoderNamespace = "THeadMemPair",
- hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
+let Predicates = [HasVendorXTHeadMemPair], DecoderNamespace = "XTHeadMemPair",
+ hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
class THStorePair<bits<5> funct5, string opcodestr>
- : RVInstR<!shl(funct5, 2), 0b101, OPC_CUSTOM_0,
- (outs),
- (ins GPR:$rd, GPR:$rs2, GPR:$rs1, uimm2:$uimm2, uimm7:$const3or4),
- opcodestr, "$rd, $rs2, (${rs1}), $uimm2, $const3or4"> {
+ : RVInstRBase<0b101, OPC_CUSTOM_0, (outs),
+ (ins GPR:$rd, GPR:$rs2, GPR:$rs1, uimm2:$uimm2, uimm7:$const3or4),
+ opcodestr, "$rd, $rs2, (${rs1}), $uimm2, $const3or4"> {
bits<2> uimm2;
+ let Inst{31-27} = funct5;
let Inst{26-25} = uimm2;
let DecoderMethod = "decodeXTHeadMemPair";
}
@@ -174,44 +185,46 @@ class THCacheInst_void<bits<5> funct5, string opcodestr>
let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
class THLoadIndexed<RegisterClass Ty, bits<5> funct5, string opcodestr>
- : RVInstR<!shl(funct5, 2), !if(!eq(Ty, GPR), 0b100, 0b110), OPC_CUSTOM_0,
- (outs Ty:$rd), (ins GPR:$rs1, GPR:$rs2, uimm2:$uimm2),
- opcodestr, "$rd, $rs1, $rs2, $uimm2"> {
+ : RVInstRBase<!if(!eq(Ty, GPR), 0b100, 0b110), OPC_CUSTOM_0,
+ (outs Ty:$rd), (ins GPR:$rs1, GPR:$rs2, uimm2:$uimm2),
+ opcodestr, "$rd, $rs1, $rs2, $uimm2"> {
bits<2> uimm2;
+ let Inst{31-27} = funct5;
let Inst{26-25} = uimm2;
}
class THLoadUpdate<bits<5> funct5, string opcodestr>
- : RVInstI<0b100, OPC_CUSTOM_0, (outs GPR:$rd, GPR:$rs1_wb),
- (ins GPR:$rs1, simm5:$simm5, uimm2:$uimm2),
- opcodestr, "$rd, (${rs1}), $simm5, $uimm2"> {
+ : RVInstIBase<0b100, OPC_CUSTOM_0, (outs GPR:$rd, GPR:$rs1_wb),
+ (ins GPR:$rs1, simm5:$simm5, uimm2:$uimm2),
+ opcodestr, "$rd, (${rs1}), $simm5, $uimm2"> {
bits<5> simm5;
bits<2> uimm2;
- let imm12{11-7} = funct5;
- let imm12{6-5} = uimm2;
- let imm12{4-0} = simm5;
+ let Inst{31-27} = funct5;
+ let Inst{26-25} = uimm2;
+ let Inst{24-20} = simm5;
let Constraints = "@earlyclobber $rd, $rs1_wb = $rs1";
}
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
class THStoreIndexed<RegisterClass StTy, bits<5> funct5, string opcodestr>
- : RVInstR<!shl(funct5, 2), !if(!eq(StTy, GPR), 0b101, 0b111), OPC_CUSTOM_0,
- (outs), (ins StTy:$rd, GPR:$rs1, GPR:$rs2, uimm2:$uimm2),
- opcodestr, "$rd, $rs1, $rs2, $uimm2"> {
+ : RVInstRBase<!if(!eq(StTy, GPR), 0b101, 0b111), OPC_CUSTOM_0,
+ (outs), (ins StTy:$rd, GPR:$rs1, GPR:$rs2, uimm2:$uimm2),
+ opcodestr, "$rd, $rs1, $rs2, $uimm2"> {
bits<2> uimm2;
+ let Inst{31-27} = funct5;
let Inst{26-25} = uimm2;
}
class THStoreUpdate<bits<5> funct5, string opcodestr>
- : RVInstI<0b101, OPC_CUSTOM_0, (outs GPR:$rs1_up),
- (ins GPR:$rd, GPR:$rs1, simm5:$simm5, uimm2:$uimm2),
- opcodestr, "$rd, (${rs1}), $simm5, $uimm2"> {
+ : RVInstIBase<0b101, OPC_CUSTOM_0, (outs GPR:$rs1_up),
+ (ins GPR:$rd, GPR:$rs1, simm5:$simm5, uimm2:$uimm2),
+ opcodestr, "$rd, (${rs1}), $simm5, $uimm2"> {
bits<5> simm5;
bits<2> uimm2;
- let imm12{11-7} = funct5;
- let imm12{6-5} = uimm2;
- let imm12{4-0} = simm5;
+ let Inst{31-27} = funct5;
+ let Inst{26-25} = uimm2;
+ let Inst{24-20} = simm5;
let Constraints = "$rs1_up = $rs1";
}
}
@@ -220,22 +233,25 @@ class THStoreUpdate<bits<5> funct5, string opcodestr>
// Combination of instruction classes.
// Use these multiclasses to define instructions more easily.
//===----------------------------------------------------------------------===//
+
multiclass THVdotVMAQA_VX<string opcodestr, bits<6> funct6> {
- def _VX : THVdotALUrVX<funct6, OPMVX, opcodestr # ".vx">;
+ let RVVConstraint = WidenV in
+ def _VX : THVdotALUrVX<funct6, OPMVX, opcodestr # ".vx", EarlyClobber=1>;
}
-multiclass THVdotVMAQA<string opcodestr, bits<6> funct6> {
- def _VV : THVdotALUrVV<funct6, OPMVX, opcodestr # ".vv">;
- defm "" : THVdotVMAQA_VX<opcodestr, funct6>;
+multiclass THVdotVMAQA<string opcodestr, bits<6> funct6>
+ : THVdotVMAQA_VX<opcodestr, funct6> {
+ let RVVConstraint = WidenV in
+ def _VV : THVdotALUrVV<funct6, OPMVX, opcodestr # ".vv", EarlyClobber=1>;
}
//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasVendorXTHeadBa] in {
+
+let Predicates = [HasVendorXTHeadBa] in
def TH_ADDSL : THShiftALU_rri<0b001, "th.addsl">,
Sched<[WriteSHXADD, ReadSHXADD, ReadSHXADD]>;
-} // Predicates = [HasVendorXTHeadBa]
let Predicates = [HasVendorXTHeadBb] in {
def TH_SRRI : THShift_ri<0b00010, 0b001, "th.srri">;
@@ -252,20 +268,19 @@ def TH_SRRIW : THShiftW_ri<0b0001010, 0b001, "th.srriw">;
def TH_REVW : THRev_r<0b10010, 0b00, "th.revw">;
} // Predicates = [HasVendorXTHeadBb, IsRV64]
-let Predicates = [HasVendorXTHeadBs], DecoderNamespace = "THeadBs" in {
-let IsSignExtendingOpW = 1 in
+let Predicates = [HasVendorXTHeadBs], DecoderNamespace = "XTHeadBs",
+ IsSignExtendingOpW = 1 in
def TH_TST : RVBShift_ri<0b10001, 0b001, OPC_CUSTOM_0, "th.tst">,
Sched<[WriteSingleBitImm, ReadSingleBitImm]>;
-} // Predicates = [HasVendorXTHeadBs]
let Predicates = [HasVendorXTHeadCondMov] in {
-def TH_MVEQZ : THCondMov_rr<0b0100000, "th.mveqz">;
-def TH_MVNEZ : THCondMov_rr<0b0100001, "th.mvnez">;
+def TH_MVEQZ : THCondMov_rr<0b0100000, "th.mveqz">;
+def TH_MVNEZ : THCondMov_rr<0b0100001, "th.mvnez">;
} // Predicates = [HasVendorXTHeadCondMov]
let Predicates = [HasVendorXTHeadMac] in {
-def TH_MULA : THMulAccumulate_rr<0b0010000, "th.mula">;
-def TH_MULS : THMulAccumulate_rr<0b0010001, "th.muls">;
+def TH_MULA : THMulAccumulate_rr<0b0010000, "th.mula">;
+def TH_MULS : THMulAccumulate_rr<0b0010001, "th.muls">;
} // Predicates = [HasVendorXTHeadMac]
let Predicates = [HasVendorXTHeadMac], IsSignExtendingOpW = 1 in {
@@ -289,130 +304,130 @@ def TH_LWD : THLoadPair<0b11100, "th.lwd">,
}
let Predicates = [HasVendorXTHeadMemPair, IsRV64] in {
-def TH_LDD : THLoadPair<0b11111, "th.ldd">,
- Sched<[WriteLDD, WriteLDD, ReadMemBase]>;
-def TH_SDD : THStorePair<0b11111, "th.sdd">,
- Sched<[WriteSTD, WriteSTD, ReadStoreData, ReadMemBase]>;
+def TH_LDD : THLoadPair<0b11111, "th.ldd">,
+ Sched<[WriteLDD, WriteLDD, ReadMemBase]>;
+def TH_SDD : THStorePair<0b11111, "th.sdd">,
+ Sched<[WriteSTD, WriteSTD, ReadStoreData, ReadMemBase]>;
}
-let Predicates = [HasVendorXTHeadMemIdx], DecoderNamespace = "THeadMemIdx" in {
+let Predicates = [HasVendorXTHeadMemIdx], DecoderNamespace = "XTHeadMemIdx" in {
// T-Head Load/Store + Update instructions.
def TH_LBIA : THLoadUpdate<0b00011, "th.lbia">,
- Sched<[WriteLDB, ReadMemBase]>;
+ Sched<[WriteLDB, ReadMemBase]>;
def TH_LBIB : THLoadUpdate<0b00001, "th.lbib">,
- Sched<[WriteLDB, ReadMemBase]>;
+ Sched<[WriteLDB, ReadMemBase]>;
def TH_LBUIA : THLoadUpdate<0b10011, "th.lbuia">,
- Sched<[WriteLDB, ReadMemBase]>;
+ Sched<[WriteLDB, ReadMemBase]>;
def TH_LBUIB : THLoadUpdate<0b10001, "th.lbuib">,
- Sched<[WriteLDB, ReadMemBase]>;
+ Sched<[WriteLDB, ReadMemBase]>;
def TH_LHIA : THLoadUpdate<0b00111, "th.lhia">,
- Sched<[WriteLDH, ReadMemBase]>;
+ Sched<[WriteLDH, ReadMemBase]>;
def TH_LHIB : THLoadUpdate<0b00101, "th.lhib">,
- Sched<[WriteLDH, ReadMemBase]>;
+ Sched<[WriteLDH, ReadMemBase]>;
def TH_LHUIA : THLoadUpdate<0b10111, "th.lhuia">,
- Sched<[WriteLDH, ReadMemBase]>;
+ Sched<[WriteLDH, ReadMemBase]>;
def TH_LHUIB : THLoadUpdate<0b10101, "th.lhuib">,
- Sched<[WriteLDH, ReadMemBase]>;
+ Sched<[WriteLDH, ReadMemBase]>;
def TH_LWIA : THLoadUpdate<0b01011, "th.lwia">,
- Sched<[WriteLDW, ReadMemBase]>;
+ Sched<[WriteLDW, ReadMemBase]>;
def TH_LWIB : THLoadUpdate<0b01001, "th.lwib">,
- Sched<[WriteLDW, ReadMemBase]>;
+ Sched<[WriteLDW, ReadMemBase]>;
def TH_SBIA : THStoreUpdate<0b00011, "th.sbia">,
- Sched<[WriteSTB, ReadStoreData, ReadMemBase]>;
+ Sched<[WriteSTB, ReadStoreData, ReadMemBase]>;
def TH_SBIB : THStoreUpdate<0b00001, "th.sbib">,
- Sched<[WriteSTB, ReadStoreData, ReadMemBase]>;
+ Sched<[WriteSTB, ReadStoreData, ReadMemBase]>;
def TH_SHIA : THStoreUpdate<0b00111, "th.shia">,
- Sched<[WriteSTH, ReadStoreData, ReadMemBase]>;
+ Sched<[WriteSTH, ReadStoreData, ReadMemBase]>;
def TH_SHIB : THStoreUpdate<0b00101, "th.shib">,
- Sched<[WriteSTH, ReadStoreData, ReadMemBase]>;
+ Sched<[WriteSTH, ReadStoreData, ReadMemBase]>;
def TH_SWIA : THStoreUpdate<0b01011, "th.swia">,
- Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
+ Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
def TH_SWIB : THStoreUpdate<0b01001, "th.swib">,
- Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
+ Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
// T-Head Load/Store Indexed instructions.
def TH_LRB : THLoadIndexed<GPR, 0b00000, "th.lrb">,
- Sched<[WriteLDB, ReadMemBase]>;
+ Sched<[WriteLDB, ReadMemBase]>;
def TH_LRBU : THLoadIndexed<GPR, 0b10000, "th.lrbu">,
- Sched<[WriteLDB, ReadMemBase]>;
+ Sched<[WriteLDB, ReadMemBase]>;
def TH_LURB : THLoadIndexed<GPR, 0b00010, "th.lurb">,
- Sched<[WriteLDB, ReadMemBase]>;
+ Sched<[WriteLDB, ReadMemBase]>;
def TH_LURBU : THLoadIndexed<GPR, 0b10010, "th.lurbu">,
- Sched<[WriteLDB, ReadMemBase]>;
+ Sched<[WriteLDB, ReadMemBase]>;
def TH_LRH : THLoadIndexed<GPR, 0b00100, "th.lrh">,
- Sched<[WriteLDH, ReadMemBase]>;
+ Sched<[WriteLDH, ReadMemBase]>;
def TH_LRHU : THLoadIndexed<GPR, 0b10100, "th.lrhu">,
- Sched<[WriteLDH, ReadMemBase]>;
+ Sched<[WriteLDH, ReadMemBase]>;
def TH_LURH : THLoadIndexed<GPR, 0b00110, "th.lurh">,
- Sched<[WriteLDB, ReadMemBase]>;
+ Sched<[WriteLDB, ReadMemBase]>;
def TH_LURHU : THLoadIndexed<GPR, 0b10110, "th.lurhu">,
- Sched<[WriteLDB, ReadMemBase]>;
+ Sched<[WriteLDB, ReadMemBase]>;
def TH_LRW : THLoadIndexed<GPR, 0b01000, "th.lrw">,
- Sched<[WriteLDW, ReadMemBase]>;
+ Sched<[WriteLDW, ReadMemBase]>;
def TH_LURW : THLoadIndexed<GPR, 0b01010, "th.lurw">,
- Sched<[WriteLDB, ReadMemBase]>;
+ Sched<[WriteLDB, ReadMemBase]>;
def TH_SRB : THStoreIndexed<GPR, 0b00000, "th.srb">,
- Sched<[WriteSTB, ReadStoreData, ReadMemBase]>;
+ Sched<[WriteSTB, ReadStoreData, ReadMemBase]>;
def TH_SURB : THStoreIndexed<GPR, 0b00010, "th.surb">,
- Sched<[WriteLDB, ReadMemBase]>;
+ Sched<[WriteLDB, ReadMemBase]>;
def TH_SRH : THStoreIndexed<GPR, 0b00100, "th.srh">,
- Sched<[WriteSTH, ReadStoreData, ReadMemBase]>;
+ Sched<[WriteSTH, ReadStoreData, ReadMemBase]>;
def TH_SURH : THStoreIndexed<GPR, 0b00110, "th.surh">,
- Sched<[WriteLDB, ReadMemBase]>;
+ Sched<[WriteLDB, ReadMemBase]>;
def TH_SRW : THStoreIndexed<GPR, 0b01000, "th.srw">,
- Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
+ Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
def TH_SURW : THStoreIndexed<GPR, 0b01010, "th.surw">,
- Sched<[WriteLDB, ReadMemBase]>;
+ Sched<[WriteLDB, ReadMemBase]>;
}
-let Predicates = [HasVendorXTHeadMemIdx, IsRV64], DecoderNamespace = "THeadMemIdx" in {
+let Predicates = [HasVendorXTHeadMemIdx, IsRV64], DecoderNamespace = "XTHeadMemIdx" in {
// T-Head Load/Store + Update instructions.
def TH_LWUIA : THLoadUpdate<0b11011, "th.lwuia">,
- Sched<[WriteLDH, ReadMemBase]>;
+ Sched<[WriteLDH, ReadMemBase]>;
def TH_LWUIB : THLoadUpdate<0b11001, "th.lwuib">,
- Sched<[WriteLDH, ReadMemBase]>;
+ Sched<[WriteLDH, ReadMemBase]>;
def TH_LDIA : THLoadUpdate<0b01111, "th.ldia">,
- Sched<[WriteLDW, ReadMemBase]>;
+ Sched<[WriteLDW, ReadMemBase]>;
def TH_LDIB : THLoadUpdate<0b01101, "th.ldib">,
- Sched<[WriteLDW, ReadMemBase]>;
+ Sched<[WriteLDW, ReadMemBase]>;
def TH_SDIA : THStoreUpdate<0b01111, "th.sdia">,
- Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
+ Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
def TH_SDIB : THStoreUpdate<0b01101, "th.sdib">,
- Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
+ Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
// T-Head Load/Store Indexed instructions.
def TH_LRWU : THLoadIndexed<GPR, 0b11000, "th.lrwu">,
- Sched<[WriteLDW, ReadMemBase]>;
+ Sched<[WriteLDW, ReadMemBase]>;
def TH_LURWU : THLoadIndexed<GPR, 0b11010, "th.lurwu">,
- Sched<[WriteLDB, ReadMemBase]>;
+ Sched<[WriteLDB, ReadMemBase]>;
def TH_LRD : THLoadIndexed<GPR, 0b01100, "th.lrd">,
- Sched<[WriteLDW, ReadMemBase]>;
+ Sched<[WriteLDW, ReadMemBase]>;
def TH_LURD : THLoadIndexed<GPR, 0b01110, "th.lurd">,
- Sched<[WriteLDB, ReadMemBase]>;
+ Sched<[WriteLDB, ReadMemBase]>;
def TH_SRD : THStoreIndexed<GPR, 0b01100, "th.srd">,
- Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
+ Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
def TH_SURD : THStoreIndexed<GPR, 0b01110, "th.surd">,
- Sched<[WriteLDB, ReadMemBase]>;
+ Sched<[WriteLDB, ReadMemBase]>;
}
// T-Head Load/Store Indexed instructions for floating point registers.
let Predicates = [HasVendorXTHeadFMemIdx, HasStdExtF],
- DecoderNamespace = "THeadFMemIdx" in {
+ DecoderNamespace = "XTHeadFMemIdx" in {
def TH_FLRW : THLoadIndexed<FPR32, 0b01000, "th.flrw">,
Sched<[WriteFLD32, ReadFMemBase]>;
def TH_FSRW : THStoreIndexed<FPR32, 0b01000, "th.fsrw">,
@@ -420,7 +435,7 @@ def TH_FSRW : THStoreIndexed<FPR32, 0b01000, "th.fsrw">,
}
let Predicates = [HasVendorXTHeadFMemIdx, HasStdExtD],
- DecoderNamespace = "THeadFMemIdx" in {
+ DecoderNamespace = "XTHeadFMemIdx" in {
def TH_FLRD : THLoadIndexed<FPR64, 0b01100, "th.flrd">,
Sched<[WriteFLD64, ReadFMemBase]>;
def TH_FSRD : THStoreIndexed<FPR64, 0b01100, "th.fsrd">,
@@ -428,7 +443,7 @@ def TH_FSRD : THStoreIndexed<FPR64, 0b01100, "th.fsrd">,
}
let Predicates = [HasVendorXTHeadFMemIdx, HasStdExtF, IsRV64],
- DecoderNamespace = "THeadFMemIdx" in {
+ DecoderNamespace = "XTHeadFMemIdx" in {
def TH_FLURW : THLoadIndexed<FPR32, 0b01010, "th.flurw">,
Sched<[WriteFLD32, ReadFMemBase]>;
def TH_FSURW : THStoreIndexed<FPR32, 0b01010, "th.fsurw">,
@@ -436,16 +451,14 @@ def TH_FSURW : THStoreIndexed<FPR32, 0b01010, "th.fsurw">,
}
let Predicates = [HasVendorXTHeadFMemIdx, HasStdExtD, IsRV64],
- DecoderNamespace = "THeadFMemIdx" in {
+ DecoderNamespace = "XTHeadFMemIdx" in {
def TH_FLURD : THLoadIndexed<FPR64, 0b01110, "th.flurd">,
Sched<[WriteFLD64, ReadFMemBase]>;
def TH_FSURD : THStoreIndexed<FPR64, 0b01110, "th.fsurd">,
Sched<[WriteFST64, ReadFStoreData, ReadFMemBase]>;
}
-let Predicates = [HasVendorXTHeadVdot],
- Constraints = "@earlyclobber $vd",
- RVVConstraint = WidenV in {
+let Predicates = [HasVendorXTHeadVdot] in {
defm THVdotVMAQA : THVdotVMAQA<"th.vmaqa", 0b100000>;
defm THVdotVMAQAU : THVdotVMAQA<"th.vmaqau", 0b100010>;
defm THVdotVMAQASU : THVdotVMAQA<"th.vmaqasu", 0b100100>;
@@ -472,8 +485,10 @@ defset list<VTypeInfoToWide> AllQuadWidenableInt8NoVLMulVectors = {
// Combination of instruction classes.
// Use these multiclasses to define instructions more easily.
//===----------------------------------------------------------------------===//
+
multiclass VPseudoVMAQA_VV_VX {
foreach m = MxListTHVdot in {
+ // TODO: Add Sched
defm "" : VPseudoTernaryW_VV<m>;
defm "" : VPseudoTernaryW_VX<m>;
}
@@ -481,6 +496,7 @@ multiclass VPseudoVMAQA_VV_VX {
multiclass VPseudoVMAQA_VX {
foreach m = MxListTHVdot in {
+ // TODO: Add Sched
defm "" : VPseudoTernaryW_VX<m>;
}
}
@@ -518,16 +534,17 @@ multiclass VPatTernaryVMAQA_VV_VX<string intrinsic, string instruction,
//===----------------------------------------------------------------------===//
// Pseudo-instructions and codegen patterns
//===----------------------------------------------------------------------===//
+
let Predicates = [HasVendorXTHeadBa] in {
def : Pat<(add (XLenVT GPR:$rs1), (shl GPR:$rs2, uimm2:$uimm2)),
(TH_ADDSL GPR:$rs1, GPR:$rs2, uimm2:$uimm2)>;
// Reuse complex patterns from StdExtZba
-def : Pat<(add sh1add_op:$rs1, non_imm12:$rs2),
+def : Pat<(add_non_imm12 sh1add_op:$rs1, (XLenVT GPR:$rs2)),
(TH_ADDSL GPR:$rs2, sh1add_op:$rs1, 1)>;
-def : Pat<(add sh2add_op:$rs1, non_imm12:$rs2),
+def : Pat<(add_non_imm12 sh2add_op:$rs1, (XLenVT GPR:$rs2)),
(TH_ADDSL GPR:$rs2, sh2add_op:$rs1, 2)>;
-def : Pat<(add sh3add_op:$rs1, non_imm12:$rs2),
+def : Pat<(add_non_imm12 sh3add_op:$rs1, (XLenVT GPR:$rs2)),
(TH_ADDSL GPR:$rs2, sh3add_op:$rs1, 3)>;
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 6)), GPR:$rs2),
@@ -687,10 +704,14 @@ defm PseudoTHVdotVMAQASU : VPseudoVMAQA_VV_VX;
defm PseudoTHVdotVMAQAUS : VPseudoVMAQA_VX;
let Predicates = [HasVendorXTHeadVdot] in {
-defm : VPatTernaryVMAQA_VV_VX<"int_riscv_th_vmaqa", "PseudoTHVdotVMAQA", AllQuadWidenableInt8NoVLMulVectors>;
-defm : VPatTernaryVMAQA_VV_VX<"int_riscv_th_vmaqau", "PseudoTHVdotVMAQAU", AllQuadWidenableInt8NoVLMulVectors>;
-defm : VPatTernaryVMAQA_VV_VX<"int_riscv_th_vmaqasu","PseudoTHVdotVMAQASU",AllQuadWidenableInt8NoVLMulVectors>;
-defm : VPatTernaryVMAQA_VX<"int_riscv_th_vmaqaus", "PseudoTHVdotVMAQAUS",AllQuadWidenableInt8NoVLMulVectors>;
+defm : VPatTernaryVMAQA_VV_VX<"int_riscv_th_vmaqa", "PseudoTHVdotVMAQA",
+ AllQuadWidenableInt8NoVLMulVectors>;
+defm : VPatTernaryVMAQA_VV_VX<"int_riscv_th_vmaqau", "PseudoTHVdotVMAQAU",
+ AllQuadWidenableInt8NoVLMulVectors>;
+defm : VPatTernaryVMAQA_VV_VX<"int_riscv_th_vmaqasu","PseudoTHVdotVMAQASU",
+ AllQuadWidenableInt8NoVLMulVectors>;
+defm : VPatTernaryVMAQA_VX<"int_riscv_th_vmaqaus", "PseudoTHVdotVMAQAUS",
+ AllQuadWidenableInt8NoVLMulVectors>;
}
def uimm2_3_XFORM : SDNodeXForm<imm, [{
@@ -725,7 +746,7 @@ let Predicates = [HasVendorXTHeadMemPair] in {
(TH_SWD GPR:$rd1, GPR:$rd2, GPR:$rs1, uimm2_3:$uimm2_3, 3)>;
}
-let Predicates = [HasVendorXTHeadCmo], DecoderNamespace = "THeadCmo" in {
+let Predicates = [HasVendorXTHeadCmo], DecoderNamespace = "XTHeadCmo" in {
def TH_DCACHE_CSW : THCacheInst_r<0b00001, "th.dcache.csw">;
def TH_DCACHE_ISW : THCacheInst_r<0b00010, "th.dcache.isw">;
def TH_DCACHE_CISW : THCacheInst_r<0b00011, "th.dcache.cisw">;
@@ -750,7 +771,7 @@ def TH_L2CACHE_IALL : THCacheInst_void<0b10110, "th.l2cache.iall">;
def TH_L2CACHE_CIALL : THCacheInst_void<0b10111, "th.l2cache.ciall">;
}
-let Predicates = [HasVendorXTHeadSync], DecoderNamespace = "THeadSync" in {
+let Predicates = [HasVendorXTHeadSync], DecoderNamespace = "XTHeadSync" in {
def TH_SFENCE_VMAS : THCacheInst_rr<0b0000010, "th.sfence.vmas">;
def TH_SYNC : THCacheInst_void<0b11000, "th.sync">;
def TH_SYNC_S : THCacheInst_void<0b11001, "th.sync.s">;
@@ -865,9 +886,7 @@ defm : StoreUpdatePat<post_truncsti8, TH_SBIA>;
defm : StoreUpdatePat<pre_truncsti8, TH_SBIB>;
defm : StoreUpdatePat<post_truncsti16, TH_SHIA>;
defm : StoreUpdatePat<pre_truncsti16, TH_SHIB>;
-}
-let Predicates = [HasVendorXTHeadMemIdx, IsRV32] in {
defm : StoreUpdatePat<post_store, TH_SWIA, i32>;
defm : StoreUpdatePat<pre_store, TH_SWIB, i32>;
}
@@ -878,3 +897,15 @@ defm : StoreUpdatePat<pre_truncsti32, TH_SWIB, i64>;
defm : StoreUpdatePat<post_store, TH_SDIA, i64>;
defm : StoreUpdatePat<pre_store, TH_SDIB, i64>;
}
+
+//===----------------------------------------------------------------------===//
+// Experimental RV64 i32 legalization patterns.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasVendorXTHeadMemIdx, IsRV64] in {
+defm : StoreUpdatePat<post_truncsti8, TH_SBIA, i32>;
+defm : StoreUpdatePat<pre_truncsti8, TH_SBIB, i32>;
+defm : StoreUpdatePat<post_truncsti16, TH_SHIA, i32>;
+defm : StoreUpdatePat<pre_truncsti16, TH_SHIB, i32>;
+}
+
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXVentana.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXVentana.td
index f6b0feaf7628..d0a798ef475c 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXVentana.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXVentana.td
@@ -15,7 +15,7 @@
//===----------------------------------------------------------------------===//
let Predicates = [IsRV64, HasVendorXVentanaCondOps], hasSideEffects = 0,
- mayLoad = 0, mayStore = 0, isCodeGenOnly = 0, DecoderNamespace = "Ventana" in
+ mayLoad = 0, mayStore = 0, isCodeGenOnly = 0, DecoderNamespace = "XVentana" in
class VTMaskedMove<bits<3> funct3, string opcodestr>
: RVInstR<0b0000000, funct3, OPC_CUSTOM_3, (outs GPR:$rd),
(ins GPR:$rs1, GPR:$rs2), opcodestr,
@@ -29,8 +29,17 @@ def VT_MASKCN : VTMaskedMove<0b111, "vt.maskcn">,
Sched<[WriteIALU, ReadIALU, ReadIALU]>;
let Predicates = [IsRV64, HasVendorXVentanaCondOps] in {
-def : Pat<(XLenVT (riscv_czero_eqz GPR:$rs1, GPR:$rc)),
+def : Pat<(i64 (riscv_czero_eqz GPR:$rs1, GPR:$rc)),
(VT_MASKC GPR:$rs1, GPR:$rc)>;
-def : Pat<(XLenVT (riscv_czero_nez GPR:$rs1, GPR:$rc)),
+def : Pat<(i64 (riscv_czero_nez GPR:$rs1, GPR:$rc)),
(VT_MASKCN GPR:$rs1, GPR:$rc)>;
+
+def : Pat<(i64 (riscv_czero_eqz GPR:$rs1, (riscv_setne (i64 GPR:$rc)))),
+ (VT_MASKC GPR:$rs1, GPR:$rc)>;
+def : Pat<(i64 (riscv_czero_eqz GPR:$rs1, (riscv_seteq (i64 GPR:$rc)))),
+ (VT_MASKCN GPR:$rs1, GPR:$rc)>;
+def : Pat<(i64 (riscv_czero_nez GPR:$rs1, (riscv_setne (i64 GPR:$rc)))),
+ (VT_MASKCN GPR:$rs1, GPR:$rc)>;
+def : Pat<(i64 (riscv_czero_nez GPR:$rs1, (riscv_seteq (i64 GPR:$rc)))),
+ (VT_MASKC GPR:$rs1, GPR:$rc)>;
} // Predicates = [IsRV64, HasVendorXVentanaCondOps]
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index caeedfa652e4..8055473a37c3 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -13,9 +13,6 @@
// Zbc - 1.0
// Zbs - 1.0
//
-// The experimental extensions appeared in an earlier draft of the Bitmanip
-// extensions. They are not ratified and subject to change.
-//
// This file also describes RISC-V instructions from the Zbk* extensions in
// Cryptography Extensions Volume I: Scalar & Entropy Source Instructions,
// versions:
@@ -48,7 +45,7 @@ def UImmLog2XLenHalfAsmOperand : AsmOperandClass {
let DiagnosticType = "InvalidUImmLog2XLenHalf";
}
-def shfl_uimm : Operand<XLenVT>, ImmLeaf<XLenVT, [{
+def shfl_uimm : RISCVOp, ImmLeaf<XLenVT, [{
if (Subtarget->is64Bit())
return isUInt<5>(Imm);
return isUInt<4>(Imm);
@@ -56,7 +53,6 @@ def shfl_uimm : Operand<XLenVT>, ImmLeaf<XLenVT, [{
let ParserMatchClass = UImmLog2XLenHalfAsmOperand;
let DecoderMethod = "decodeUImmOperand<5>";
let OperandType = "OPERAND_UIMM_SHFL";
- let OperandNamespace = "RISCVOp";
let MCOperandPredicate = [{
int64_t Imm;
if (!MCOp.evaluateAsConstantImm(Imm))
@@ -183,7 +179,7 @@ def C5LeftShift : PatLeaf<(imm), [{
def C9LeftShift : PatLeaf<(imm), [{
uint64_t C = N->getZExtValue();
- return C > 5 && (C >> llvm::countr_zero(C)) == 9;
+ return C > 9 && (C >> llvm::countr_zero(C)) == 9;
}]>;
// Constant of the form (3 << C) where C is less than 32.
@@ -234,18 +230,38 @@ def SimmShiftRightBy3XForm : SDNodeXForm<imm, [{
N->getValueType(0));
}]>;
-// Pattern to exclude simm12 immediates from matching.
-def non_imm12 : PatLeaf<(XLenVT GPR:$a), [{
- auto *C = dyn_cast<ConstantSDNode>(N);
+// Pattern to exclude simm12 immediates from matching, namely `non_imm12`.
+// GISel currently doesn't support PatFrag for leaf nodes, so `non_imm12`
+// cannot be implemented in that way. To reuse patterns between the two
+// ISels, we instead create PatFrag on operators that use `non_imm12`.
+class binop_with_non_imm12<SDPatternOperator binop>
+ : PatFrag<(ops node:$x, node:$y), (binop node:$x, node:$y), [{
+ auto *C = dyn_cast<ConstantSDNode>(Operands[1]);
return !C || !isInt<12>(C->getSExtValue());
-}]>;
+}]> {
+ let PredicateCodeUsesOperands = 1;
+ let GISelPredicateCode = [{
+ const MachineOperand &ImmOp = *Operands[1];
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ if (ImmOp.isReg() && ImmOp.getReg())
+ if (auto Val = getIConstantVRegValWithLookThrough(ImmOp.getReg(), MRI)) {
+ // We do NOT want immediates that fit in 12 bits.
+ return !isInt<12>(Val->Value.getSExtValue());
+ }
+
+ return true;
+ }];
+}
+def add_non_imm12 : binop_with_non_imm12<add>;
+def or_is_add_non_imm12 : binop_with_non_imm12<or_is_add>;
-def Shifted32OnesMask : PatLeaf<(imm), [{
- uint64_t Imm = N->getZExtValue();
- if (!isShiftedMask_64(Imm))
+def Shifted32OnesMask : IntImmLeaf<XLenVT, [{
+ if (!Imm.isShiftedMask())
return false;
- unsigned TrailingZeros = llvm::countr_zero(Imm);
+ unsigned TrailingZeros = Imm.countr_zero();
return TrailingZeros > 0 && TrailingZeros < 32 &&
Imm == UINT64_C(0xFFFFFFFF) << TrailingZeros;
}], TrailingZeros>;
@@ -262,16 +278,11 @@ def sh3add_uw_op : ComplexPattern<XLenVT, 1, "selectSHXADD_UWOp<3>", [], [], 6>;
// Instruction class templates
//===----------------------------------------------------------------------===//
-// Some of these templates should be moved to RISCVInstrFormats.td once the B
-// extension has been ratified.
-
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class RVBUnary<bits<7> funct7, bits<5> funct5, bits<3> funct3,
+class RVBUnary<bits<12> imm12, bits<3> funct3,
RISCVOpcode opcode, string opcodestr>
- : RVInstR<funct7, funct3, opcode, (outs GPR:$rd), (ins GPR:$rs1),
- opcodestr, "$rd, $rs1"> {
- let rs2 = funct5;
-}
+ : RVInstIUnary<imm12, funct3, opcode, (outs GPR:$rd), (ins GPR:$rs1),
+ opcodestr, "$rd, $rs1">;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class RVBShift_ri<bits<5> imm11_7, bits<3> funct3, RISCVOpcode opcode,
@@ -375,27 +386,27 @@ def XPERM8 : ALU_rr<0b0010100, 0b100, "xperm8">,
} // Predicates = [HasStdExtZbkx]
let Predicates = [HasStdExtZbb], IsSignExtendingOpW = 1 in {
-def CLZ : RVBUnary<0b0110000, 0b00000, 0b001, OPC_OP_IMM, "clz">,
+def CLZ : RVBUnary<0b011000000000, 0b001, OPC_OP_IMM, "clz">,
Sched<[WriteCLZ, ReadCLZ]>;
-def CTZ : RVBUnary<0b0110000, 0b00001, 0b001, OPC_OP_IMM, "ctz">,
+def CTZ : RVBUnary<0b011000000001, 0b001, OPC_OP_IMM, "ctz">,
Sched<[WriteCTZ, ReadCTZ]>;
-def CPOP : RVBUnary<0b0110000, 0b00010, 0b001, OPC_OP_IMM, "cpop">,
+def CPOP : RVBUnary<0b011000000010, 0b001, OPC_OP_IMM, "cpop">,
Sched<[WriteCPOP, ReadCPOP]>;
} // Predicates = [HasStdExtZbb]
let Predicates = [HasStdExtZbb, IsRV64], IsSignExtendingOpW = 1 in {
-def CLZW : RVBUnary<0b0110000, 0b00000, 0b001, OPC_OP_IMM_32, "clzw">,
+def CLZW : RVBUnary<0b011000000000, 0b001, OPC_OP_IMM_32, "clzw">,
Sched<[WriteCLZ32, ReadCLZ32]>;
-def CTZW : RVBUnary<0b0110000, 0b00001, 0b001, OPC_OP_IMM_32, "ctzw">,
+def CTZW : RVBUnary<0b011000000001, 0b001, OPC_OP_IMM_32, "ctzw">,
Sched<[WriteCTZ32, ReadCTZ32]>;
-def CPOPW : RVBUnary<0b0110000, 0b00010, 0b001, OPC_OP_IMM_32, "cpopw">,
+def CPOPW : RVBUnary<0b011000000010, 0b001, OPC_OP_IMM_32, "cpopw">,
Sched<[WriteCPOP32, ReadCPOP32]>;
} // Predicates = [HasStdExtZbb, IsRV64]
let Predicates = [HasStdExtZbb], IsSignExtendingOpW = 1 in {
-def SEXT_B : RVBUnary<0b0110000, 0b00100, 0b001, OPC_OP_IMM, "sext.b">,
+def SEXT_B : RVBUnary<0b011000000100, 0b001, OPC_OP_IMM, "sext.b">,
Sched<[WriteIALU, ReadIALU]>;
-def SEXT_H : RVBUnary<0b0110000, 0b00101, 0b001, OPC_OP_IMM, "sext.h">,
+def SEXT_H : RVBUnary<0b011000000101, 0b001, OPC_OP_IMM, "sext.h">,
Sched<[WriteIALU, ReadIALU]>;
} // Predicates = [HasStdExtZbb]
@@ -435,38 +446,38 @@ def PACKW : ALUW_rr<0b0000100, 0b100, "packw">,
Sched<[WritePACK32, ReadPACK32, ReadPACK32]>;
let Predicates = [HasStdExtZbb, IsRV32] in {
-def ZEXT_H_RV32 : RVBUnary<0b0000100, 0b00000, 0b100, OPC_OP, "zext.h">,
+def ZEXT_H_RV32 : RVBUnary<0b000010000000, 0b100, OPC_OP, "zext.h">,
Sched<[WriteIALU, ReadIALU]>;
} // Predicates = [HasStdExtZbb, IsRV32]
let Predicates = [HasStdExtZbb, IsRV64], IsSignExtendingOpW = 1 in {
-def ZEXT_H_RV64 : RVBUnary<0b0000100, 0b00000, 0b100, OPC_OP_32, "zext.h">,
+def ZEXT_H_RV64 : RVBUnary<0b000010000000, 0b100, OPC_OP_32, "zext.h">,
Sched<[WriteIALU, ReadIALU]>;
} // Predicates = [HasStdExtZbb, IsRV64]
let Predicates = [HasStdExtZbbOrZbkb, IsRV32] in {
-def REV8_RV32 : RVBUnary<0b0110100, 0b11000, 0b101, OPC_OP_IMM, "rev8">,
+def REV8_RV32 : RVBUnary<0b011010011000, 0b101, OPC_OP_IMM, "rev8">,
Sched<[WriteREV8, ReadREV8]>;
} // Predicates = [HasStdExtZbbOrZbkb, IsRV32]
let Predicates = [HasStdExtZbbOrZbkb, IsRV64] in {
-def REV8_RV64 : RVBUnary<0b0110101, 0b11000, 0b101, OPC_OP_IMM, "rev8">,
+def REV8_RV64 : RVBUnary<0b011010111000, 0b101, OPC_OP_IMM, "rev8">,
Sched<[WriteREV8, ReadREV8]>;
} // Predicates = [HasStdExtZbbOrZbkb, IsRV64]
let Predicates = [HasStdExtZbb] in {
-def ORC_B : RVBUnary<0b0010100, 0b00111, 0b101, OPC_OP_IMM, "orc.b">,
+def ORC_B : RVBUnary<0b001010000111, 0b101, OPC_OP_IMM, "orc.b">,
Sched<[WriteORCB, ReadORCB]>;
} // Predicates = [HasStdExtZbb]
let Predicates = [HasStdExtZbkb] in
-def BREV8 : RVBUnary<0b0110100, 0b00111, 0b101, OPC_OP_IMM, "brev8">,
+def BREV8 : RVBUnary<0b011010000111, 0b101, OPC_OP_IMM, "brev8">,
Sched<[WriteBREV8, ReadBREV8]>;
let Predicates = [HasStdExtZbkb, IsRV32] in {
-def ZIP_RV32 : RVBUnary<0b0000100, 0b01111, 0b001, OPC_OP_IMM, "zip">,
+def ZIP_RV32 : RVBUnary<0b000010001111, 0b001, OPC_OP_IMM, "zip">,
Sched<[WriteZIP, ReadZIP]>;
-def UNZIP_RV32 : RVBUnary<0b0000100, 0b01111, 0b101, OPC_OP_IMM, "unzip">,
+def UNZIP_RV32 : RVBUnary<0b000010001111, 0b101, OPC_OP_IMM, "unzip">,
Sched<[WriteZIP, ReadZIP]>;
} // Predicates = [HasStdExtZbkb, IsRV32]
@@ -543,6 +554,8 @@ def : Pat<(XLenVT (and (shiftop<srl> GPR:$rs1, (XLenVT GPR:$rs2)), 1)),
def : Pat<(XLenVT (shiftop<shl> 1, (XLenVT GPR:$rs2))),
(BSET (XLenVT X0), GPR:$rs2)>;
+def : Pat<(XLenVT (not (shiftop<shl> -1, (XLenVT GPR:$rs2)))),
+ (ADDI (BSET (XLenVT X0), GPR:$rs2), -1)>;
def : Pat<(XLenVT (and GPR:$rs1, BCLRMask:$mask)),
(BCLRI GPR:$rs1, BCLRMask:$mask)>;
@@ -632,6 +645,10 @@ def : Pat<(or (shl (zexti8 (XLenVT GPR:$rs2)), (XLenVT 8)),
def : Pat<(and (or (shl GPR:$rs2, (XLenVT 8)),
(zexti8 (XLenVT GPR:$rs1))), 0xFFFF),
(PACKH GPR:$rs1, GPR:$rs2)>;
+
+def : Pat<(binop_allhusers<or> (shl GPR:$rs2, (XLenVT 8)),
+ (zexti8 (XLenVT GPR:$rs1))),
+ (PACKH GPR:$rs1, GPR:$rs2)>;
} // Predicates = [HasStdExtZbkb]
let Predicates = [HasStdExtZbkb, IsRV32] in
@@ -656,20 +673,17 @@ let Predicates = [HasStdExtZbb, IsRV64] in
def : Pat<(i64 (and GPR:$rs, 0xFFFF)), (ZEXT_H_RV64 GPR:$rs)>;
let Predicates = [HasStdExtZba] in {
-def : Pat<(add (shl GPR:$rs1, (XLenVT 1)), non_imm12:$rs2),
- (SH1ADD GPR:$rs1, GPR:$rs2)>;
-def : Pat<(add (shl GPR:$rs1, (XLenVT 2)), non_imm12:$rs2),
- (SH2ADD GPR:$rs1, GPR:$rs2)>;
-def : Pat<(add (shl GPR:$rs1, (XLenVT 3)), non_imm12:$rs2),
- (SH3ADD GPR:$rs1, GPR:$rs2)>;
-// More complex cases use a ComplexPattern.
-def : Pat<(add sh1add_op:$rs1, non_imm12:$rs2),
- (SH1ADD sh1add_op:$rs1, GPR:$rs2)>;
-def : Pat<(add sh2add_op:$rs1, non_imm12:$rs2),
- (SH2ADD sh2add_op:$rs1, GPR:$rs2)>;
-def : Pat<(add sh3add_op:$rs1, non_imm12:$rs2),
- (SH3ADD sh3add_op:$rs1, GPR:$rs2)>;
+foreach i = {1,2,3} in {
+ defvar shxadd = !cast<Instruction>("SH"#i#"ADD");
+ def : Pat<(XLenVT (add_non_imm12 (shl GPR:$rs1, (XLenVT i)), GPR:$rs2)),
+ (shxadd GPR:$rs1, GPR:$rs2)>;
+
+ defvar pat = !cast<ComplexPattern>("sh"#i#"add_op");
+ // More complex cases use a ComplexPattern.
+ def : Pat<(XLenVT (add_non_imm12 pat:$rs1, GPR:$rs2)),
+ (shxadd pat:$rs1, GPR:$rs2)>;
+}
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 6)), GPR:$rs2),
(SH1ADD (SH1ADD GPR:$rs1, GPR:$rs1), GPR:$rs2)>;
@@ -739,46 +753,46 @@ def : Pat<(i64 (shl (and GPR:$rs1, 0xFFFFFFFF), uimm5:$shamt)),
def : Pat<(i64 (and GPR:$rs1, Shifted32OnesMask:$mask)),
(SLLI_UW (SRLI GPR:$rs1, Shifted32OnesMask:$mask),
Shifted32OnesMask:$mask)>;
-
-def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFF), non_imm12:$rs2)),
+def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0xFFFFFFFF), GPR:$rs2)),
(ADD_UW GPR:$rs1, GPR:$rs2)>;
def : Pat<(i64 (and GPR:$rs, 0xFFFFFFFF)), (ADD_UW GPR:$rs, (XLenVT X0))>;
-def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 1)), non_imm12:$rs2)),
- (SH1ADD_UW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 2)), non_imm12:$rs2)),
- (SH2ADD_UW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 3)), non_imm12:$rs2)),
- (SH3ADD_UW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(i64 (or_is_add_non_imm12 (and GPR:$rs1, 0xFFFFFFFF), GPR:$rs2)),
+ (ADD_UW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 1)), 0x1FFFFFFFF), non_imm12:$rs2)),
+foreach i = {1,2,3} in {
+ defvar shxadd_uw = !cast<Instruction>("SH"#i#"ADD_UW");
+ def : Pat<(i64 (add_non_imm12 (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 i)), (XLenVT GPR:$rs2))),
+ (shxadd_uw GPR:$rs1, GPR:$rs2)>;
+}
+
+def : Pat<(i64 (add_non_imm12 (and (shl GPR:$rs1, (i64 1)), 0x1FFFFFFFF), (XLenVT GPR:$rs2))),
(SH1ADD_UW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 2)), 0x3FFFFFFFF), non_imm12:$rs2)),
+def : Pat<(i64 (add_non_imm12 (and (shl GPR:$rs1, (i64 2)), 0x3FFFFFFFF), (XLenVT GPR:$rs2))),
(SH2ADD_UW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 3)), 0x7FFFFFFFF), non_imm12:$rs2)),
+def : Pat<(i64 (add_non_imm12 (and (shl GPR:$rs1, (i64 3)), 0x7FFFFFFFF), (XLenVT GPR:$rs2))),
(SH3ADD_UW GPR:$rs1, GPR:$rs2)>;
// More complex cases use a ComplexPattern.
-def : Pat<(i64 (add sh1add_uw_op:$rs1, non_imm12:$rs2)),
- (SH1ADD_UW sh1add_uw_op:$rs1, GPR:$rs2)>;
-def : Pat<(i64 (add sh2add_uw_op:$rs1, non_imm12:$rs2)),
- (SH2ADD_UW sh2add_uw_op:$rs1, GPR:$rs2)>;
-def : Pat<(i64 (add sh3add_uw_op:$rs1, non_imm12:$rs2)),
- (SH3ADD_UW sh3add_uw_op:$rs1, GPR:$rs2)>;
-
-def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFE), non_imm12:$rs2)),
+foreach i = {1,2,3} in {
+ defvar pat = !cast<ComplexPattern>("sh"#i#"add_uw_op");
+ def : Pat<(i64 (add_non_imm12 pat:$rs1, (XLenVT GPR:$rs2))),
+ (!cast<Instruction>("SH"#i#"ADD_UW") pat:$rs1, GPR:$rs2)>;
+}
+
+def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0xFFFFFFFE), (XLenVT GPR:$rs2))),
(SH1ADD (SRLIW GPR:$rs1, 1), GPR:$rs2)>;
-def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFC), non_imm12:$rs2)),
+def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0xFFFFFFFC), (XLenVT GPR:$rs2))),
(SH2ADD (SRLIW GPR:$rs1, 2), GPR:$rs2)>;
-def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFF8), non_imm12:$rs2)),
+def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0xFFFFFFF8), (XLenVT GPR:$rs2))),
(SH3ADD (SRLIW GPR:$rs1, 3), GPR:$rs2)>;
// Use SRLI to clear the LSBs and SHXADD_UW to mask and shift.
-def : Pat<(i64 (add (and GPR:$rs1, 0x1FFFFFFFE), non_imm12:$rs2)),
+def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0x1FFFFFFFE), (XLenVT GPR:$rs2))),
(SH1ADD_UW (SRLI GPR:$rs1, 1), GPR:$rs2)>;
-def : Pat<(i64 (add (and GPR:$rs1, 0x3FFFFFFFC), non_imm12:$rs2)),
+def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0x3FFFFFFFC), (XLenVT GPR:$rs2))),
(SH2ADD_UW (SRLI GPR:$rs1, 2), GPR:$rs2)>;
-def : Pat<(i64 (add (and GPR:$rs1, 0x7FFFFFFF8), non_imm12:$rs2)),
+def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0x7FFFFFFF8), (XLenVT GPR:$rs2))),
(SH3ADD_UW (SRLI GPR:$rs1, 3), GPR:$rs2)>;
def : Pat<(i64 (mul (and_oneuse GPR:$r, 0xFFFFFFFF), C3LeftShiftUW:$i)),
@@ -804,3 +818,99 @@ let Predicates = [HasStdExtZbkx] in {
def : PatGprGpr<int_riscv_xperm4, XPERM4>;
def : PatGprGpr<int_riscv_xperm8, XPERM8>;
} // Predicates = [HasStdExtZbkx]
+
+//===----------------------------------------------------------------------===//
+// Experimental RV64 i32 legalization patterns.
+//===----------------------------------------------------------------------===//
+
+def BCLRMaski32 : ImmLeaf<i32, [{
+ return !isInt<12>(Imm) && isPowerOf2_32(~Imm);
+}]>;
+def SingleBitSetMaski32 : ImmLeaf<i32, [{
+ return !isInt<12>(Imm) && isPowerOf2_32(Imm);
+}]>;
+
+let Predicates = [HasStdExtZbb, IsRV64] in {
+def : PatGpr<ctlz, CLZW, i32>;
+def : PatGpr<cttz, CTZW, i32>;
+def : PatGpr<ctpop, CPOPW, i32>;
+
+def : Pat<(i32 (sext_inreg GPR:$rs1, i8)), (SEXT_B GPR:$rs1)>;
+def : Pat<(i32 (sext_inreg GPR:$rs1, i16)), (SEXT_H GPR:$rs1)>;
+} // Predicates = [HasStdExtZbb, IsRV64]
+
+let Predicates = [HasStdExtZbbOrZbkb, IsRV64] in {
+def : Pat<(i32 (and GPR:$rs1, (not GPR:$rs2))), (ANDN GPR:$rs1, GPR:$rs2)>;
+def : Pat<(i32 (or GPR:$rs1, (not GPR:$rs2))), (ORN GPR:$rs1, GPR:$rs2)>;
+def : Pat<(i32 (xor GPR:$rs1, (not GPR:$rs2))), (XNOR GPR:$rs1, GPR:$rs2)>;
+
+def : PatGprGpr<shiftopw<rotl>, ROLW, i32, i64>;
+def : PatGprGpr<shiftopw<rotr>, RORW, i32, i64>;
+def : PatGprImm<rotr, RORIW, uimm5, i32>;
+
+def : Pat<(i32 (rotl GPR:$rs1, uimm5:$rs2)),
+ (RORIW GPR:$rs1, (ImmSubFrom32 uimm5:$rs2))>;
+} // Predicates = [HasStdExtZbbOrZbkb, IsRV64]
+
+let Predicates = [HasStdExtZbkb, IsRV64] in {
+def : Pat<(or (and (shl GPR:$rs2, (i64 8)), 0xFFFF),
+ (zexti8i32 (i32 GPR:$rs1))),
+ (PACKH GPR:$rs1, GPR:$rs2)>;
+def : Pat<(or (shl (zexti8i32 (i32 GPR:$rs2)), (i64 8)),
+ (zexti8i32 (i32 GPR:$rs1))),
+ (PACKH GPR:$rs1, GPR:$rs2)>;
+def : Pat<(and (anyext (or (shl GPR:$rs2, (XLenVT 8)),
+ (zexti8i32 (i32 GPR:$rs1)))), 0xFFFF),
+ (PACKH GPR:$rs1, GPR:$rs2)>;
+
+def : Pat<(i32 (or (shl GPR:$rs2, (i64 16)), (zexti16i32 (i32 GPR:$rs1)))),
+ (PACKW GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasStdExtZbkb, IsRV64]
+
+let Predicates = [HasStdExtZba, IsRV64] in {
+def : Pat<(shl (i64 (zext i32:$rs1)), uimm5:$shamt),
+ (SLLI_UW GPR:$rs1, uimm5:$shamt)>;
+
+def : Pat<(i64 (add_non_imm12 (zext GPR:$rs1), GPR:$rs2)),
+ (ADD_UW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(zext GPR:$src), (ADD_UW GPR:$src, (XLenVT X0))>;
+
+def : Pat<(i64 (or_is_add_non_imm12 (zext GPR:$rs1), GPR:$rs2)),
+ (ADD_UW GPR:$rs1, GPR:$rs2)>;
+
+foreach i = {1,2,3} in {
+ defvar shxadd = !cast<Instruction>("SH"#i#"ADD");
+ def : Pat<(i32 (add_non_imm12 (shl GPR:$rs1, (i64 i)), GPR:$rs2)),
+ (shxadd GPR:$rs1, GPR:$rs2)>;
+}
+}
+
+let Predicates = [HasStdExtZbs, IsRV64] in {
+def : Pat<(i32 (and (not (shiftop<shl> 1, (XLenVT GPR:$rs2))), GPR:$rs1)),
+ (BCLR GPR:$rs1, GPR:$rs2)>;
+def : Pat<(i32 (and (rotl -2, (XLenVT GPR:$rs2)), GPR:$rs1)),
+ (BCLR GPR:$rs1, GPR:$rs2)>;
+def : Pat<(i32 (or (shiftop<shl> 1, (XLenVT GPR:$rs2)), GPR:$rs1)),
+ (BSET GPR:$rs1, GPR:$rs2)>;
+def : Pat<(i32 (xor (shiftop<shl> 1, (XLenVT GPR:$rs2)), GPR:$rs1)),
+ (BINV GPR:$rs1, GPR:$rs2)>;
+def : Pat<(i32 (and (shiftop<srl> GPR:$rs1, (XLenVT GPR:$rs2)), 1)),
+ (BEXT GPR:$rs1, GPR:$rs2)>;
+def : Pat<(i64 (and (anyext (i32 (shiftop<srl> GPR:$rs1, (XLenVT GPR:$rs2)))), 1)),
+ (BEXT GPR:$rs1, GPR:$rs2)>;
+
+def : Pat<(i32 (shiftop<shl> 1, (XLenVT GPR:$rs2))),
+ (BSET (XLenVT X0), GPR:$rs2)>;
+def : Pat<(i32 (not (shiftop<shl> -1, (XLenVT GPR:$rs2)))),
+ (ADDI (BSET (XLenVT X0), GPR:$rs2), -1)>;
+
+def : Pat<(i32 (and (srl GPR:$rs1, uimm5:$shamt), (i32 1))),
+ (BEXTI GPR:$rs1, uimm5:$shamt)>;
+
+def : Pat<(i32 (and GPR:$rs1, BCLRMaski32:$mask)),
+ (BCLRI GPR:$rs1, (i64 (BCLRXForm $mask)))>;
+def : Pat<(i32 (or GPR:$rs1, SingleBitSetMaski32:$mask)),
+ (BSETI GPR:$rs1, (i64 (SingleBitSetMaskToIndex $mask)))>;
+def : Pat<(i32 (xor GPR:$rs1, SingleBitSetMaski32:$mask)),
+ (BINVI GPR:$rs1, (i64 (SingleBitSetMaskToIndex $mask)))>;
+} // Predicates = [HasStdExtZbs, IsRV64]
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td
index 6687343086da..a78f36244468 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td
@@ -8,8 +8,6 @@
///
/// This file describes the RISC-V instructions from the 'Zc*' compressed
/// instruction extensions, version 1.0.3.
-/// This version is still experimental as the 'Zc*' extensions haven't been
-/// ratified yet.
///
//===----------------------------------------------------------------------===//
@@ -17,13 +15,12 @@
// Operand and SDNode transformation definitions.
//===----------------------------------------------------------------------===//
-def uimm2_lsb0 : Operand<XLenVT>,
+def uimm2_lsb0 : RISCVOp,
ImmLeaf<XLenVT, [{return isShiftedUInt<1, 1>(Imm);}]> {
let ParserMatchClass = UImmAsmOperand<2, "Lsb0">;
let EncoderMethod = "getImmOpValue";
let DecoderMethod = "decodeUImmOperand<2>";
let OperandType = "OPERAND_UIMM2_LSB0";
- let OperandNamespace = "RISCVOp";
let MCOperandPredicate = [{
int64_t Imm;
if (!MCOp.evaluateAsConstantImm(Imm))
@@ -32,11 +29,10 @@ def uimm2_lsb0 : Operand<XLenVT>,
}];
}
-def uimm8ge32 : Operand<XLenVT> {
+def uimm8ge32 : RISCVOp {
let ParserMatchClass = UImmAsmOperand<8, "GE32">;
let DecoderMethod = "decodeUImmOperand<8>";
let OperandType = "OPERAND_UIMM8_GE32";
- let OperandNamespace = "RISCVOp";
}
def RlistAsmOperand : AsmOperandClass {
@@ -131,7 +127,7 @@ class RVZcArith_r<bits<5> funct5, string OpcodeStr> :
class RVInstZcCPPP<bits<5> funct5, string opcodestr>
: RVInst16<(outs), (ins rlist:$rlist, spimm:$spimm),
- opcodestr, "{$rlist}, $spimm", [], InstFormatOther> {
+ opcodestr, "$rlist, $spimm", [], InstFormatOther> {
bits<4> rlist;
bits<16> spimm;
@@ -285,9 +281,9 @@ def : CompressPat<(SH GPRC:$rs2, GPRCMem:$rs1, uimm2_lsb0:$imm),
//===----------------------------------------------------------------------===//
let Predicates = [HasStdExtZcb] in {
-def : InstAlias<"c.lbu $rd, (${rs1})",(C_LBU GPRC:$rd, GPRC:$rs1, 0)>;
-def : InstAlias<"c.lhu $rd, (${rs1})",(C_LHU GPRC:$rd, GPRC:$rs1, 0)>;
-def : InstAlias<"c.lh $rd, (${rs1})", (C_LH GPRC:$rd, GPRC:$rs1, 0)>;
-def : InstAlias<"c.sb $rd, (${rs1})", (C_SB GPRC:$rd, GPRC:$rs1, 0)>;
-def : InstAlias<"c.sh $rd, (${rs1})", (C_SH GPRC:$rd, GPRC:$rs1, 0)>;
+def : InstAlias<"c.lbu $rd, (${rs1})",(C_LBU GPRC:$rd, GPRC:$rs1, 0), 0>;
+def : InstAlias<"c.lhu $rd, (${rs1})",(C_LHU GPRC:$rd, GPRC:$rs1, 0), 0>;
+def : InstAlias<"c.lh $rd, (${rs1})", (C_LH GPRC:$rd, GPRC:$rs1, 0), 0>;
+def : InstAlias<"c.sb $rd, (${rs1})", (C_SB GPRC:$rd, GPRC:$rs1, 0), 0>;
+def : InstAlias<"c.sh $rd, (${rs1})", (C_SH GPRC:$rd, GPRC:$rs1, 0), 0>;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
index f36882f9a968..6f88ff7f7ac1 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
@@ -7,9 +7,7 @@
//===----------------------------------------------------------------------===//
//
// This file describes the RISC-V instructions from the standard 'Zfa'
-// additional floating-point extension, version 0.1.
-// This version is still experimental as the 'Zfa' extension hasn't been
-// ratified yet.
+// additional floating-point extension, version 1.0.
//
//===----------------------------------------------------------------------===//
@@ -54,18 +52,14 @@ class FPBinaryOp_rr<bits<7> funct7, bits<3> funct3, DAGOperand rdty,
(ins rsty:$rs1, rsty:$rs2), opcodestr, "$rd, $rs1, $rs2">;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class FPUnaryOp_imm<bits<7> funct7, bits<5> rs2val, bits<3> funct3,
- dag outs, dag ins, string opcodestr, string argstr>
- : RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> {
+class FPFLI_r<bits<7> funct7, bits<5> rs2val, bits<3> funct3,
+ DAGOperand rdty, string opcodestr>
+ : RVInstR<funct7, funct3, OPC_OP_FP, (outs rdty:$rd),
+ (ins loadfpimm:$imm), opcodestr, "$rd, $imm"> {
bits<5> imm;
- bits<5> rd;
-
- let Inst{31-25} = funct7;
- let Inst{24-20} = rs2val;
- let Inst{19-15} = imm;
- let Inst{14-12} = funct3;
- let Inst{11-7} = rd;
- let Inst{6-0} = OPC_OP_FP.Value;
+
+ let rs2 = rs2val;
+ let rs1 = imm;
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1,
@@ -84,8 +78,7 @@ class FPUnaryOp_r_rtz<bits<7> funct7, bits<5> rs2val, DAGOperand rdty,
let Predicates = [HasStdExtZfa] in {
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def FLI_S : FPUnaryOp_imm<0b1111000, 0b00001, 0b000, (outs FPR32:$rd),
- (ins loadfpimm:$imm), "fli.s", "$rd, $imm">,
+def FLI_S : FPFLI_r<0b1111000, 0b00001, 0b000, FPR32, "fli.s">,
Sched<[WriteFLI32]>;
let SchedRW = [WriteFMinMax32, ReadFMinMax32, ReadFMinMax32] in {
@@ -106,8 +99,7 @@ def FLEQ_S : FPCmp_rr<0b1010000, 0b100, "fleq.s", FPR32>;
let Predicates = [HasStdExtZfa, HasStdExtD] in {
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def FLI_D : FPUnaryOp_imm<0b1111001, 0b00001, 0b000, (outs FPR64:$rd),
- (ins loadfpimm:$imm), "fli.d", "$rd, $imm">,
+def FLI_D : FPFLI_r<0b1111001, 0b00001, 0b000, FPR64, "fli.d">,
Sched<[WriteFLI64]>;
let SchedRW = [WriteFMinMax64, ReadFMinMax64, ReadFMinMax64] in {
@@ -120,6 +112,7 @@ def FROUND_D : FPUnaryOp_r_frm<0b0100001, 0b00100, FPR64, FPR64, "fround.d">,
def FROUNDNX_D : FPUnaryOp_r_frm<0b0100001, 0b00101, FPR64, FPR64, "froundnx.d">,
Sched<[WriteFRoundF64, ReadFRoundF64]>;
+let IsSignExtendingOpW = 1 in
def FCVTMOD_W_D
: FPUnaryOp_r_rtz<0b1100001, 0b01000, GPR, FPR64, "fcvtmod.w.d">,
Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]>;
@@ -146,8 +139,7 @@ def FMV_X_W_FPR64 : FPUnaryOp_r<0b1110000, 0b00000, 0b000, GPR, FPR64,
let Predicates = [HasStdExtZfa, HasStdExtZfhOrZvfh] in
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def FLI_H : FPUnaryOp_imm<0b1111010, 0b00001, 0b000, (outs FPR16:$rd),
- (ins loadfpimm:$imm), "fli.h", "$rd, $imm">,
+def FLI_H : FPFLI_r<0b1111010, 0b00001, 0b000, FPR16, "fli.h">,
Sched<[WriteFLI16]>;
let Predicates = [HasStdExtZfa, HasStdExtZfh] in {
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfbfmin.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfbfmin.td
index 35f9f03f61a1..d819033eea68 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfbfmin.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfbfmin.td
@@ -16,12 +16,12 @@
//===----------------------------------------------------------------------===//
// RISC-V specific DAG Nodes.
//===----------------------------------------------------------------------===//
-
+
def SDT_RISCVFP_ROUND_BF16
: SDTypeProfile<1, 1, [SDTCisVT<0, bf16>, SDTCisVT<1, f32>]>;
def SDT_RISCVFP_EXTEND_BF16
: SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, bf16>]>;
-
+
def riscv_fpround_bf16
: SDNode<"RISCVISD::FP_ROUND_BF16", SDT_RISCVFP_ROUND_BF16>;
def riscv_fpextend_bf16
@@ -41,7 +41,7 @@ def FCVT_S_BF16 : FPUnaryOp_r_frm<0b0100000, 0b00110, FPR32, FPR16, "fcvt.s.bf16
//===----------------------------------------------------------------------===//
// Pseudo-instructions and codegen patterns
//===----------------------------------------------------------------------===//
-
+
let Predicates = [HasStdExtZfbfmin] in {
/// Loads
def : LdPat<load, FLH, bf16>;
@@ -51,9 +51,9 @@ def : StPat<store, FSH, FPR16, bf16>;
/// Float conversion operations
// f32 -> bf16, bf16 -> f32
-def : Pat<(bf16 (riscv_fpround_bf16 FPR32:$rs1)),
+def : Pat<(bf16 (riscv_fpround_bf16 FPR32:$rs1)),
(FCVT_BF16_S FPR32:$rs1, FRM_DYN)>;
-def : Pat<(riscv_fpextend_bf16 (bf16 FPR16:$rs1)),
+def : Pat<(riscv_fpextend_bf16 (bf16 FPR16:$rs1)),
(FCVT_S_BF16 FPR16:$rs1, FRM_DYN)>;
// Moves (no conversion)
@@ -61,3 +61,25 @@ def : Pat<(bf16 (riscv_fmv_h_x GPR:$src)), (FMV_H_X GPR:$src)>;
def : Pat<(riscv_fmv_x_anyexth (bf16 FPR16:$src)), (FMV_X_H FPR16:$src)>;
def : Pat<(riscv_fmv_x_signexth (bf16 FPR16:$src)), (FMV_X_H FPR16:$src)>;
} // Predicates = [HasStdExtZfbfmin]
+
+let Predicates = [HasStdExtZfbfmin] in {
+// bf16->[u]int. Round-to-zero must be used for the f32->int step, the
+// rounding mode has no effect for bf16->f32.
+def : Pat<(i32 (any_fp_to_sint (bf16 FPR16:$rs1))), (FCVT_W_S (FCVT_S_BF16 $rs1, FRM_RNE), FRM_RTZ)>;
+def : Pat<(i32 (any_fp_to_uint (bf16 FPR16:$rs1))), (FCVT_WU_S (FCVT_S_BF16 $rs1, FRM_RNE), FRM_RTZ)>;
+
+// [u]int->bf16. Match GCC and default to using dynamic rounding mode.
+def : Pat<(bf16 (any_sint_to_fp (i32 GPR:$rs1))), (FCVT_BF16_S (FCVT_S_W $rs1, FRM_DYN), FRM_DYN)>;
+def : Pat<(bf16 (any_uint_to_fp (i32 GPR:$rs1))), (FCVT_BF16_S (FCVT_S_WU $rs1, FRM_DYN), FRM_DYN)>;
+}
+
+let Predicates = [HasStdExtZfbfmin, IsRV64] in {
+// bf16->[u]int64. Round-to-zero must be used for the f32->int step, the
+// rounding mode has no effect for bf16->f32.
+def : Pat<(i64 (any_fp_to_sint (bf16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_BF16 $rs1, FRM_RNE), FRM_RTZ)>;
+def : Pat<(i64 (any_fp_to_uint (bf16 FPR16:$rs1))), (FCVT_LU_S (FCVT_S_BF16 $rs1, FRM_RNE), FRM_RTZ)>;
+
+// [u]int->bf16. Match GCC and default to using dynamic rounding mode.
+def : Pat<(bf16 (any_sint_to_fp (i64 GPR:$rs1))), (FCVT_BF16_S (FCVT_S_L $rs1, FRM_DYN), FRM_DYN)>;
+def : Pat<(bf16 (any_uint_to_fp (i64 GPR:$rs1))), (FCVT_BF16_S (FCVT_S_LU $rs1, FRM_DYN), FRM_DYN)>;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
index 810775a78241..055f13032788 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
@@ -85,7 +85,7 @@ def FSH : FPStore_r<0b001, "fsh", FPR16, WriteFST16>;
} // Predicates = [HasHalfFPLoadStoreMove]
foreach Ext = ZfhExts in {
- let SchedRW = [WriteFMA16, ReadFMA16, ReadFMA16, ReadFMA16] in {
+ let SchedRW = [WriteFMA16, ReadFMA16, ReadFMA16, ReadFMA16Addend] in {
defm FMADD_H : FPFMA_rrr_frm_m<OPC_MADD, 0b10, "fmadd.h", Ext>;
defm FMSUB_H : FPFMA_rrr_frm_m<OPC_MSUB, 0b10, "fmsub.h", Ext>;
defm FNMSUB_H : FPFMA_rrr_frm_m<OPC_NMSUB, 0b10, "fnmsub.h", Ext>;
@@ -142,8 +142,8 @@ foreach Ext = ZfhminExts in {
Ext.F32Ty, "fcvt.h.s">,
Sched<[WriteFCvtF32ToF16, ReadFCvtF32ToF16]>;
- defm FCVT_S_H : FPUnaryOp_r_m<0b0100000, 0b00010, 0b000, Ext, Ext.F32Ty,
- Ext.PrimaryTy, "fcvt.s.h">,
+ defm FCVT_S_H : FPUnaryOp_r_frmlegacy_m<0b0100000, 0b00010,Ext, Ext.F32Ty,
+ Ext.PrimaryTy, "fcvt.s.h">,
Sched<[WriteFCvtF16ToF32, ReadFCvtF16ToF32]>;
} // foreach Ext = ZfhminExts
@@ -191,8 +191,8 @@ foreach Ext = ZfhminDExts in {
Ext.F64Ty, "fcvt.h.d">,
Sched<[WriteFCvtF64ToF16, ReadFCvtF64ToF16]>;
- defm FCVT_D_H : FPUnaryOp_r_m<0b0100001, 0b00010, 0b000, Ext, Ext.F64Ty,
- Ext.F16Ty, "fcvt.d.h">,
+ defm FCVT_D_H : FPUnaryOp_r_frmlegacy_m<0b0100001, 0b00010, Ext, Ext.F64Ty,
+ Ext.F16Ty, "fcvt.d.h">,
Sched<[WriteFCvtF16ToF64, ReadFCvtF16ToF64]>;
} // foreach Ext = ZfhminDExts
@@ -248,7 +248,6 @@ def PseudoQuietFLT_H_INX : PseudoQuietFCMP<FPR16INX>;
// Pseudo-instructions and codegen patterns
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtZfh] in {
/// Float conversion operations
@@ -257,17 +256,20 @@ let Predicates = [HasStdExtZfh] in {
/// Float arithmetic operations
-def : PatFprFprDynFrm<any_fadd, FADD_H, FPR16, f16>;
-def : PatFprFprDynFrm<any_fsub, FSUB_H, FPR16, f16>;
-def : PatFprFprDynFrm<any_fmul, FMUL_H, FPR16, f16>;
-def : PatFprFprDynFrm<any_fdiv, FDIV_H, FPR16, f16>;
+foreach Ext = ZfhExts in {
+ defm : PatFprFprDynFrm_m<any_fadd, FADD_H, Ext>;
+ defm : PatFprFprDynFrm_m<any_fsub, FSUB_H, Ext>;
+ defm : PatFprFprDynFrm_m<any_fmul, FMUL_H, Ext>;
+ defm : PatFprFprDynFrm_m<any_fdiv, FDIV_H, Ext>;
+}
+let Predicates = [HasStdExtZfh] in {
def : Pat<(f16 (any_fsqrt FPR16:$rs1)), (FSQRT_H FPR16:$rs1, FRM_DYN)>;
def : Pat<(f16 (fneg FPR16:$rs1)), (FSGNJN_H $rs1, $rs1)>;
def : Pat<(f16 (fabs FPR16:$rs1)), (FSGNJX_H $rs1, $rs1)>;
-def : Pat<(riscv_fpclass (f16 FPR16:$rs1)), (FCLASS_H $rs1)>;
+def : Pat<(riscv_fclass (f16 FPR16:$rs1)), (FCLASS_H $rs1)>;
def : PatFprFpr<fcopysign, FSGNJ_H, FPR16, f16>;
def : Pat<(f16 (fcopysign FPR16:$rs1, (f16 (fneg FPR16:$rs2)))), (FSGNJN_H $rs1, $rs2)>;
@@ -304,17 +306,12 @@ let Predicates = [HasStdExtZhinx] in {
/// Float arithmetic operations
-def : PatFprFprDynFrm<any_fadd, FADD_H_INX, FPR16INX, f16>;
-def : PatFprFprDynFrm<any_fsub, FSUB_H_INX, FPR16INX, f16>;
-def : PatFprFprDynFrm<any_fmul, FMUL_H_INX, FPR16INX, f16>;
-def : PatFprFprDynFrm<any_fdiv, FDIV_H_INX, FPR16INX, f16>;
-
def : Pat<(any_fsqrt FPR16INX:$rs1), (FSQRT_H_INX FPR16INX:$rs1, FRM_DYN)>;
def : Pat<(fneg FPR16INX:$rs1), (FSGNJN_H_INX $rs1, $rs1)>;
def : Pat<(fabs FPR16INX:$rs1), (FSGNJX_H_INX $rs1, $rs1)>;
-def : Pat<(riscv_fpclass FPR16INX:$rs1), (FCLASS_H_INX $rs1)>;
+def : Pat<(riscv_fclass FPR16INX:$rs1), (FCLASS_H_INX $rs1)>;
def : PatFprFpr<fcopysign, FSGNJ_H_INX, FPR16INX, f16>;
def : Pat<(fcopysign FPR16INX:$rs1, (fneg FPR16INX:$rs2)), (FSGNJN_H_INX $rs1, $rs2)>;
@@ -358,12 +355,12 @@ foreach Ext = ZfhExts in {
// Match non-signaling FEQ_D
foreach Ext = ZfhExts in {
- defm : PatSetCC_m<any_fsetcc, SETEQ, FEQ_H, Ext, f16>;
- defm : PatSetCC_m<any_fsetcc, SETOEQ, FEQ_H, Ext, f16>;
- defm : PatSetCC_m<strict_fsetcc, SETLT, PseudoQuietFLT_H, Ext, f16>;
- defm : PatSetCC_m<strict_fsetcc, SETOLT, PseudoQuietFLT_H, Ext, f16>;
- defm : PatSetCC_m<strict_fsetcc, SETLE, PseudoQuietFLE_H, Ext, f16>;
- defm : PatSetCC_m<strict_fsetcc, SETOLE, PseudoQuietFLE_H, Ext, f16>;
+ defm : PatSetCC_m<any_fsetcc, SETEQ, FEQ_H, Ext>;
+ defm : PatSetCC_m<any_fsetcc, SETOEQ, FEQ_H, Ext>;
+ defm : PatSetCC_m<strict_fsetcc, SETLT, PseudoQuietFLT_H, Ext>;
+ defm : PatSetCC_m<strict_fsetcc, SETOLT, PseudoQuietFLT_H, Ext>;
+ defm : PatSetCC_m<strict_fsetcc, SETLE, PseudoQuietFLE_H, Ext>;
+ defm : PatSetCC_m<strict_fsetcc, SETOLE, PseudoQuietFLE_H, Ext>;
}
let Predicates = [HasStdExtZfh] in {
@@ -397,10 +394,10 @@ def : Pat<(XLenVT (strict_fsetccs FPR16INX:$rs1, FPR16INX:$rs1, SETOEQ)),
} // Predicates = [HasStdExtZhinx]
foreach Ext = ZfhExts in {
- defm : PatSetCC_m<any_fsetccs, SETLT, FLT_H, Ext, f16>;
- defm : PatSetCC_m<any_fsetccs, SETOLT, FLT_H, Ext, f16>;
- defm : PatSetCC_m<any_fsetccs, SETLE, FLE_H, Ext, f16>;
- defm : PatSetCC_m<any_fsetccs, SETOLE, FLE_H, Ext, f16>;
+ defm : PatSetCC_m<any_fsetccs, SETLT, FLT_H, Ext>;
+ defm : PatSetCC_m<any_fsetccs, SETOLT, FLT_H, Ext>;
+ defm : PatSetCC_m<any_fsetccs, SETLE, FLE_H, Ext>;
+ defm : PatSetCC_m<any_fsetccs, SETOLE, FLE_H, Ext>;
}
let Predicates = [HasStdExtZfh] in {
@@ -425,11 +422,13 @@ def : StPat<store, FSH, FPR16, f16>;
let Predicates = [HasStdExtZhinxOrZhinxmin] in {
/// Loads
-def : Pat<(f16 (load GPR:$rs1)), (COPY_TO_REGCLASS (LH GPR:$rs1, 0), GPRF16)>;
+def : Pat<(f16 (load (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm12))),
+ (COPY_TO_REGCLASS (LH GPR:$rs1, simm12:$imm12), GPRF16)>;
/// Stores
-def : Pat<(store (f16 FPR16INX:$rs2), GPR:$rs1),
- (SH (COPY_TO_REGCLASS FPR16INX:$rs2, GPR), GPR:$rs1, 0)>;
+def : Pat<(store (f16 FPR16INX:$rs2),
+ (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm12)),
+ (SH (COPY_TO_REGCLASS FPR16INX:$rs2, GPR), GPR:$rs1, simm12:$imm12)>;
} // Predicates = [HasStdExtZhinxOrZhinxmin]
let Predicates = [HasStdExtZfhOrZfhmin] in {
@@ -437,14 +436,14 @@ let Predicates = [HasStdExtZfhOrZfhmin] in {
// f32 -> f16, f16 -> f32
def : Pat<(f16 (any_fpround FPR32:$rs1)), (FCVT_H_S FPR32:$rs1, FRM_DYN)>;
-def : Pat<(any_fpextend (f16 FPR16:$rs1)), (FCVT_S_H FPR16:$rs1)>;
+def : Pat<(any_fpextend (f16 FPR16:$rs1)), (FCVT_S_H FPR16:$rs1, FRM_RNE)>;
// Moves (no conversion)
def : Pat<(f16 (riscv_fmv_h_x GPR:$src)), (FMV_H_X GPR:$src)>;
def : Pat<(riscv_fmv_x_anyexth (f16 FPR16:$src)), (FMV_X_H FPR16:$src)>;
def : Pat<(riscv_fmv_x_signexth (f16 FPR16:$src)), (FMV_X_H FPR16:$src)>;
-def : Pat<(fcopysign FPR32:$rs1, (f16 FPR16:$rs2)), (FSGNJ_S $rs1, (FCVT_S_H $rs2))>;
+def : Pat<(fcopysign FPR32:$rs1, (f16 FPR16:$rs2)), (FSGNJ_S $rs1, (FCVT_S_H $rs2, FRM_RNE))>;
} // Predicates = [HasStdExtZfhOrZfhmin]
let Predicates = [HasStdExtZhinxOrZhinxmin] in {
@@ -452,17 +451,17 @@ let Predicates = [HasStdExtZhinxOrZhinxmin] in {
// f32 -> f16, f16 -> f32
def : Pat<(any_fpround FPR32INX:$rs1), (FCVT_H_S_INX FPR32INX:$rs1, FRM_DYN)>;
-def : Pat<(any_fpextend FPR16INX:$rs1), (FCVT_S_H_INX FPR16INX:$rs1)>;
+def : Pat<(any_fpextend FPR16INX:$rs1), (FCVT_S_H_INX FPR16INX:$rs1, FRM_RNE)>;
// Moves (no conversion)
def : Pat<(f16 (riscv_fmv_h_x GPR:$src)), (COPY_TO_REGCLASS GPR:$src, GPR)>;
def : Pat<(riscv_fmv_x_anyexth FPR16INX:$src), (COPY_TO_REGCLASS FPR16INX:$src, GPR)>;
def : Pat<(riscv_fmv_x_signexth FPR16INX:$src), (COPY_TO_REGCLASS FPR16INX:$src, GPR)>;
-def : Pat<(fcopysign FPR32INX:$rs1, FPR16INX:$rs2), (FSGNJ_S_INX $rs1, (FCVT_S_H_INX $rs2))>;
+def : Pat<(fcopysign FPR32INX:$rs1, FPR16INX:$rs2), (FSGNJ_S_INX $rs1, (FCVT_S_H_INX $rs2, FRM_RNE))>;
} // Predicates = [HasStdExtZhinxOrZhinxmin]
-let Predicates = [HasStdExtZfh, IsRV32] in {
+let Predicates = [HasStdExtZfh] in {
// half->[u]int. Round-to-zero must be used.
def : Pat<(i32 (any_fp_to_sint (f16 FPR16:$rs1))), (FCVT_W_H $rs1, 0b001)>;
def : Pat<(i32 (any_fp_to_uint (f16 FPR16:$rs1))), (FCVT_WU_H $rs1, 0b001)>;
@@ -480,9 +479,9 @@ def : Pat<(i32 (any_lround (f16 FPR16:$rs1))), (FCVT_W_H $rs1, FRM_RMM)>;
// [u]int->half. Match GCC and default to using dynamic rounding mode.
def : Pat<(f16 (any_sint_to_fp (i32 GPR:$rs1))), (FCVT_H_W $rs1, FRM_DYN)>;
def : Pat<(f16 (any_uint_to_fp (i32 GPR:$rs1))), (FCVT_H_WU $rs1, FRM_DYN)>;
-} // Predicates = [HasStdExtZfh, IsRV32]
+} // Predicates = [HasStdExtZfh]
-let Predicates = [HasStdExtZhinx, IsRV32] in {
+let Predicates = [HasStdExtZhinx] in {
// half->[u]int. Round-to-zero must be used.
def : Pat<(i32 (any_fp_to_sint FPR16INX:$rs1)), (FCVT_W_H_INX $rs1, 0b001)>;
def : Pat<(i32 (any_fp_to_uint FPR16INX:$rs1)), (FCVT_WU_H_INX $rs1, 0b001)>;
@@ -500,7 +499,7 @@ def : Pat<(i32 (any_lround FPR16INX:$rs1)), (FCVT_W_H_INX $rs1, FRM_RMM)>;
// [u]int->half. Match GCC and default to using dynamic rounding mode.
def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_H_W_INX $rs1, FRM_DYN)>;
def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_H_WU_INX $rs1, FRM_DYN)>;
-} // Predicates = [HasStdExtZhinx, IsRV32]
+} // Predicates = [HasStdExtZhinx]
let Predicates = [HasStdExtZfh, IsRV64] in {
// Use target specific isd nodes to help us remember the result is sign
@@ -566,82 +565,82 @@ let Predicates = [HasStdExtZfhOrZfhmin, HasStdExtD] in {
/// Float conversion operations
// f64 -> f16, f16 -> f64
def : Pat<(f16 (any_fpround FPR64:$rs1)), (FCVT_H_D FPR64:$rs1, FRM_DYN)>;
-def : Pat<(any_fpextend (f16 FPR16:$rs1)), (FCVT_D_H FPR16:$rs1)>;
+def : Pat<(any_fpextend (f16 FPR16:$rs1)), (FCVT_D_H FPR16:$rs1, FRM_RNE)>;
/// Float arithmetic operations
def : Pat<(f16 (fcopysign FPR16:$rs1, FPR64:$rs2)),
(FSGNJ_H $rs1, (FCVT_H_D $rs2, FRM_DYN))>;
-def : Pat<(fcopysign FPR64:$rs1, (f16 FPR16:$rs2)), (FSGNJ_D $rs1, (FCVT_D_H $rs2))>;
+def : Pat<(fcopysign FPR64:$rs1, (f16 FPR16:$rs2)), (FSGNJ_D $rs1, (FCVT_D_H $rs2, FRM_RNE))>;
} // Predicates = [HasStdExtZfhOrZfhmin, HasStdExtD]
let Predicates = [HasStdExtZhinxOrZhinxmin, HasStdExtZdinx, IsRV32] in {
/// Float conversion operations
// f64 -> f16, f16 -> f64
def : Pat<(any_fpround FPR64IN32X:$rs1), (FCVT_H_D_IN32X FPR64IN32X:$rs1, FRM_DYN)>;
-def : Pat<(any_fpextend FPR16INX:$rs1), (FCVT_D_H_IN32X FPR16INX:$rs1)>;
+def : Pat<(any_fpextend FPR16INX:$rs1), (FCVT_D_H_IN32X FPR16INX:$rs1, FRM_RNE)>;
/// Float arithmetic operations
def : Pat<(fcopysign FPR16INX:$rs1, FPR64IN32X:$rs2),
(FSGNJ_H_INX $rs1, (FCVT_H_D_IN32X $rs2, 0b111))>;
-def : Pat<(fcopysign FPR64IN32X:$rs1, FPR16INX:$rs2), (FSGNJ_D_IN32X $rs1, (FCVT_D_H_IN32X $rs2))>;
+def : Pat<(fcopysign FPR64IN32X:$rs1, FPR16INX:$rs2), (FSGNJ_D_IN32X $rs1, (FCVT_D_H_IN32X $rs2, FRM_RNE))>;
} // Predicates = [HasStdExtZhinxOrZhinxmin, HasStdExtZdinx, IsRV32]
let Predicates = [HasStdExtZhinxOrZhinxmin, HasStdExtZdinx, IsRV64] in {
/// Float conversion operations
// f64 -> f16, f16 -> f64
def : Pat<(any_fpround FPR64INX:$rs1), (FCVT_H_D_INX FPR64INX:$rs1, FRM_DYN)>;
-def : Pat<(any_fpextend FPR16INX:$rs1), (FCVT_D_H_INX FPR16INX:$rs1)>;
+def : Pat<(any_fpextend FPR16INX:$rs1), (FCVT_D_H_INX FPR16INX:$rs1, FRM_RNE)>;
/// Float arithmetic operations
def : Pat<(fcopysign FPR16INX:$rs1, FPR64INX:$rs2),
(FSGNJ_H_INX $rs1, (FCVT_H_D_INX $rs2, 0b111))>;
-def : Pat<(fcopysign FPR64INX:$rs1, FPR16INX:$rs2), (FSGNJ_D_INX $rs1, (FCVT_D_H_INX $rs2))>;
+def : Pat<(fcopysign FPR64INX:$rs1, FPR16INX:$rs2), (FSGNJ_D_INX $rs1, (FCVT_D_H_INX $rs2, FRM_RNE))>;
} // Predicates = [HasStdExtZhinxOrZhinxmin, HasStdExtZdinx, IsRV64]
-let Predicates = [HasStdExtZfhmin, NoStdExtZfh, IsRV32] in {
+let Predicates = [HasStdExtZfhmin, NoStdExtZfh] in {
// half->[u]int. Round-to-zero must be used.
-def : Pat<(i32 (any_fp_to_sint (f16 FPR16:$rs1))), (FCVT_W_S (FCVT_S_H $rs1), FRM_RTZ)>;
-def : Pat<(i32 (any_fp_to_uint (f16 FPR16:$rs1))), (FCVT_WU_S (FCVT_S_H $rs1), FRM_RTZ)>;
+def : Pat<(i32 (any_fp_to_sint (f16 FPR16:$rs1))), (FCVT_W_S (FCVT_S_H $rs1, FRM_RNE), FRM_RTZ)>;
+def : Pat<(i32 (any_fp_to_uint (f16 FPR16:$rs1))), (FCVT_WU_S (FCVT_S_H $rs1, FRM_RNE), FRM_RTZ)>;
// half->int32 with current rounding mode.
-def : Pat<(i32 (any_lrint (f16 FPR16:$rs1))), (FCVT_W_S (FCVT_S_H $rs1), FRM_DYN)>;
+def : Pat<(i32 (any_lrint (f16 FPR16:$rs1))), (FCVT_W_S (FCVT_S_H $rs1, FRM_RNE), FRM_DYN)>;
// half->int32 rounded to nearest with ties rounded away from zero.
-def : Pat<(i32 (any_lround (f16 FPR16:$rs1))), (FCVT_W_S (FCVT_S_H $rs1), FRM_RMM)>;
+def : Pat<(i32 (any_lround (f16 FPR16:$rs1))), (FCVT_W_S (FCVT_S_H $rs1, FRM_RNE), FRM_RMM)>;
// [u]int->half. Match GCC and default to using dynamic rounding mode.
def : Pat<(f16 (any_sint_to_fp (i32 GPR:$rs1))), (FCVT_H_S (FCVT_S_W $rs1, FRM_DYN), FRM_DYN)>;
def : Pat<(f16 (any_uint_to_fp (i32 GPR:$rs1))), (FCVT_H_S (FCVT_S_WU $rs1, FRM_DYN), FRM_DYN)>;
-} // Predicates = [HasStdExtZfhmin, NoStdExtZfh, IsRV32]
+} // Predicates = [HasStdExtZfhmin, NoStdExtZfh]
-let Predicates = [HasStdExtZhinxmin, NoStdExtZhinx, IsRV32] in {
+let Predicates = [HasStdExtZhinxmin, NoStdExtZhinx] in {
// half->[u]int. Round-to-zero must be used.
-def : Pat<(i32 (any_fp_to_sint FPR16INX:$rs1)), (FCVT_W_S_INX (FCVT_S_H_INX $rs1), FRM_RTZ)>;
-def : Pat<(i32 (any_fp_to_uint FPR16INX:$rs1)), (FCVT_WU_S_INX (FCVT_S_H_INX $rs1), FRM_RTZ)>;
+def : Pat<(i32 (any_fp_to_sint FPR16INX:$rs1)), (FCVT_W_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_RTZ)>;
+def : Pat<(i32 (any_fp_to_uint FPR16INX:$rs1)), (FCVT_WU_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_RTZ)>;
// half->int32 with current rounding mode.
-def : Pat<(i32 (any_lrint FPR16INX:$rs1)), (FCVT_W_S_INX (FCVT_S_H_INX $rs1), FRM_DYN)>;
+def : Pat<(i32 (any_lrint FPR16INX:$rs1)), (FCVT_W_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_DYN)>;
// half->int32 rounded to nearest with ties rounded away from zero.
-def : Pat<(i32 (any_lround FPR16INX:$rs1)), (FCVT_W_S_INX (FCVT_S_H_INX $rs1), FRM_RMM)>;
+def : Pat<(i32 (any_lround FPR16INX:$rs1)), (FCVT_W_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_RMM)>;
// [u]int->half. Match GCC and default to using dynamic rounding mode.
def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_H_S_INX (FCVT_S_W_INX $rs1, FRM_DYN), FRM_DYN)>;
def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_H_S_INX (FCVT_S_WU_INX $rs1, FRM_DYN), FRM_DYN)>;
-} // Predicates = [HasStdExtZhinxmin, NoStdExtZhinx, IsRV32]
+} // Predicates = [HasStdExtZhinxmin, NoStdExtZhinx]
let Predicates = [HasStdExtZfhmin, NoStdExtZfh, IsRV64] in {
// half->[u]int64. Round-to-zero must be used.
-def : Pat<(i64 (any_fp_to_sint (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1), FRM_RTZ)>;
-def : Pat<(i64 (any_fp_to_uint (f16 FPR16:$rs1))), (FCVT_LU_S (FCVT_S_H $rs1), FRM_RTZ)>;
+def : Pat<(i64 (any_fp_to_sint (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1, FRM_RNE), FRM_RTZ)>;
+def : Pat<(i64 (any_fp_to_uint (f16 FPR16:$rs1))), (FCVT_LU_S (FCVT_S_H $rs1, FRM_RNE), FRM_RTZ)>;
// half->int64 with current rounding mode.
-def : Pat<(i64 (any_lrint (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1), FRM_DYN)>;
-def : Pat<(i64 (any_llrint (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1), FRM_DYN)>;
+def : Pat<(i64 (any_lrint (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1, FRM_RNE), FRM_DYN)>;
+def : Pat<(i64 (any_llrint (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1, FRM_RNE), FRM_DYN)>;
// half->int64 rounded to nearest with ties rounded away from zero.
-def : Pat<(i64 (any_lround (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1), FRM_RMM)>;
-def : Pat<(i64 (any_llround (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1), FRM_RMM)>;
+def : Pat<(i64 (any_lround (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1, FRM_RNE), FRM_RMM)>;
+def : Pat<(i64 (any_llround (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1, FRM_RNE), FRM_RMM)>;
// [u]int->fp. Match GCC and default to using dynamic rounding mode.
def : Pat<(f16 (any_sint_to_fp (i64 GPR:$rs1))), (FCVT_H_S (FCVT_S_L $rs1, FRM_DYN), FRM_DYN)>;
@@ -650,16 +649,16 @@ def : Pat<(f16 (any_uint_to_fp (i64 GPR:$rs1))), (FCVT_H_S (FCVT_S_LU $rs1, FRM_
let Predicates = [HasStdExtZhinxmin, NoStdExtZhinx, IsRV64] in {
// half->[u]int64. Round-to-zero must be used.
-def : Pat<(i64 (any_fp_to_sint FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1), FRM_RTZ)>;
-def : Pat<(i64 (any_fp_to_uint FPR16INX:$rs1)), (FCVT_LU_S_INX (FCVT_S_H_INX $rs1), FRM_RTZ)>;
+def : Pat<(i64 (any_fp_to_sint FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_RTZ)>;
+def : Pat<(i64 (any_fp_to_uint FPR16INX:$rs1)), (FCVT_LU_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_RTZ)>;
// half->int64 with current rounding mode.
-def : Pat<(i64 (any_lrint FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1), FRM_DYN)>;
-def : Pat<(i64 (any_llrint FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1), FRM_DYN)>;
+def : Pat<(i64 (any_lrint FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_DYN)>;
+def : Pat<(i64 (any_llrint FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_DYN)>;
// half->int64 rounded to nearest with ties rounded away from zero.
-def : Pat<(i64 (any_lround FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1), FRM_RMM)>;
-def : Pat<(i64 (any_llround FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1), FRM_RMM)>;
+def : Pat<(i64 (any_lround FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_RMM)>;
+def : Pat<(i64 (any_llround FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_RMM)>;
// [u]int->fp. Match GCC and default to using dynamic rounding mode.
def : Pat<(any_sint_to_fp (i64 GPR:$rs1)), (FCVT_H_S_INX (FCVT_S_L_INX $rs1, FRM_DYN), FRM_DYN)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td
index 509d1cfcd874..56b68e324de2 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td
@@ -16,7 +16,7 @@
//===----------------------------------------------------------------------===//
// A 12-bit signed immediate where the least significant five bits are zero.
-def simm12_lsb00000 : Operand<XLenVT>,
+def simm12_lsb00000 : RISCVOp,
ImmLeaf<XLenVT, [{return isShiftedInt<7, 5>(Imm);}]> {
let ParserMatchClass = SImmAsmOperand<12, "Lsb00000">;
let EncoderMethod = "getImmOpValue";
@@ -28,7 +28,6 @@ def simm12_lsb00000 : Operand<XLenVT>,
return MCOp.isBareSymbolRef();
}];
let OperandType = "OPERAND_SIMM12_LSB00000";
- let OperandNamespace = "RISCVOp";
}
//===----------------------------------------------------------------------===//
@@ -74,12 +73,16 @@ def PREFETCH_W : Prefetch_ri<0b00011, "prefetch.w">, Sched<[]>;
// Patterns
//===----------------------------------------------------------------------===//
+def AddrRegImmLsb00000 : ComplexPattern<iPTR, 2, "SelectAddrRegImmLsb00000">;
+
let Predicates = [HasStdExtZicbop] in {
- // FIXME: Match address with offset
- def : Pat<(prefetch GPR:$rs1, imm, imm, (XLenVT 0)),
- (PREFETCH_I GPR:$rs1, 0)>;
- def : Pat<(prefetch GPR:$rs1, (XLenVT 0), imm, (XLenVT 1)),
- (PREFETCH_R GPR:$rs1, 0)>;
- def : Pat<(prefetch GPR:$rs1, (XLenVT 1), imm, (XLenVT 1)),
- (PREFETCH_W GPR:$rs1, 0)>;
+ def : Pat<(prefetch (AddrRegImmLsb00000 (XLenVT GPR:$rs1), simm12_lsb00000:$imm12),
+ timm, timm, (i32 0)),
+ (PREFETCH_I GPR:$rs1, simm12_lsb00000:$imm12)>;
+ def : Pat<(prefetch (AddrRegImmLsb00000 (XLenVT GPR:$rs1), simm12_lsb00000:$imm12),
+ (i32 0), timm, (i32 1)),
+ (PREFETCH_R GPR:$rs1, simm12_lsb00000:$imm12)>;
+ def : Pat<(prefetch (AddrRegImmLsb00000 (XLenVT GPR:$rs1), simm12_lsb00000:$imm12),
+ (i32 1), timm, (i32 1)),
+ (PREFETCH_W GPR:$rs1, simm12_lsb00000:$imm12)>;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicond.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicond.td
index ab0b93d62af5..0790a941823b 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicond.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicond.td
@@ -40,4 +40,13 @@ def : Pat<(XLenVT (riscv_czero_eqz GPR:$rs1, GPR:$rc)),
(CZERO_EQZ GPR:$rs1, GPR:$rc)>;
def : Pat<(XLenVT (riscv_czero_nez GPR:$rs1, GPR:$rc)),
(CZERO_NEZ GPR:$rs1, GPR:$rc)>;
+
+def : Pat<(XLenVT (riscv_czero_eqz GPR:$rs1, (riscv_setne (XLenVT GPR:$rc)))),
+ (CZERO_EQZ GPR:$rs1, GPR:$rc)>;
+def : Pat<(XLenVT (riscv_czero_eqz GPR:$rs1, (riscv_seteq (XLenVT GPR:$rc)))),
+ (CZERO_NEZ GPR:$rs1, GPR:$rc)>;
+def : Pat<(XLenVT (riscv_czero_nez GPR:$rs1, (riscv_setne (XLenVT GPR:$rc)))),
+ (CZERO_NEZ GPR:$rs1, GPR:$rc)>;
+def : Pat<(XLenVT (riscv_czero_nez GPR:$rs1, (riscv_seteq (XLenVT GPR:$rc)))),
+ (CZERO_EQZ GPR:$rs1, GPR:$rc)>;
} // Predicates = [HasStdExtZicond]
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td
index b8c0606034c5..3ec63b1b6adb 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td
@@ -36,30 +36,26 @@ def RnumArg : AsmOperandClass {
let DiagnosticType = "InvalidRnumArg";
}
-def rnum : Operand<i32>, TImmLeaf<i32, [{return (Imm >= 0 && Imm <= 10);}]> {
+def rnum : RISCVOp<i32>, TImmLeaf<i32, [{return (Imm >= 0 && Imm <= 10);}]> {
let ParserMatchClass = RnumArg;
let EncoderMethod = "getImmOpValue";
let DecoderMethod = "decodeUImmOperand<4>";
let OperandType = "OPERAND_RVKRNUM";
- let OperandNamespace = "RISCVOp";
}
-def byteselect : Operand<i32>, TImmLeaf<i32, [{return isUInt<2>(Imm);}]> {
+def byteselect : RISCVOp<i32>, TImmLeaf<i32, [{return isUInt<2>(Imm);}]> {
let ParserMatchClass = UImmAsmOperand<2>;
let DecoderMethod = "decodeUImmOperand<2>";
let OperandType = "OPERAND_UIMM2";
- let OperandNamespace = "RISCVOp";
}
//===----------------------------------------------------------------------===//
// Instruction class templates
//===----------------------------------------------------------------------===//
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class RVKUnary<bits<12> imm12_in, bits<3> funct3, string opcodestr>
- : RVInstI<funct3, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1),
- opcodestr, "$rd, $rs1">{
- let imm12 = imm12_in;
-}
+class RVKUnary<bits<12> imm12, bits<3> funct3, string opcodestr>
+ : RVInstIUnary<imm12, funct3, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1),
+ opcodestr, "$rd, $rs1">;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class RVKByteSelect<bits<5> funct5, string opcodestr>
@@ -72,12 +68,12 @@ class RVKByteSelect<bits<5> funct5, string opcodestr>
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class RVKUnary_rnum<bits<7> funct7, bits<3> funct3, string opcodestr>
- : RVInstI<funct3, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1, rnum:$rnum),
- opcodestr, "$rd, $rs1, $rnum">{
- bits<4> rnum;
- let Inst{31-25} = funct7;
- let Inst{24} = 1;
- let Inst{23-20} = rnum;
+ : RVInstIBase<funct3, OPC_OP_IMM, (outs GPR:$rd),
+ (ins GPR:$rs1, rnum:$rnum), opcodestr, "$rd, $rs1, $rnum"> {
+ bits<4> rnum;
+ let Inst{31-25} = funct7;
+ let Inst{24} = 0b1;
+ let Inst{23-20} = rnum;
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
index 046074d848f5..1b1f3b9b16e4 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
@@ -25,7 +25,8 @@ let Uses = [FRM] in
defm VFNCVTBF16_F_F_W : VNCVTF_FV_VS2<"vfncvtbf16.f.f.w", 0b010010, 0b11101>;
}
-let Predicates = [HasStdExtZvfbfwma], Constraints = "@earlyclobber $vd",
+let Predicates = [HasStdExtZvfbfwma],
+ Constraints = "@earlyclobber $vd_wb, $vd = $vd_wb",
RVVConstraint = WidenV, Uses = [FRM], mayRaiseFPException = true in {
defm VFWMACCBF16_V : VWMAC_FV_V_F<"vfwmaccbf16", 0b111011>;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
index 13c98ce92d14..1ffa78a28d09 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
@@ -15,46 +15,16 @@
// Operand and SDNode transformation definitions.
//===----------------------------------------------------------------------===//
-def RnumArg_0_7 : AsmOperandClass {
- let Name = "RnumArg_0_7";
- let RenderMethod = "addImmOperands";
- let DiagnosticType = "InvalidRnumArg_0_7";
-}
-
-def RnumArg_1_10 : AsmOperandClass {
- let Name = "RnumArg_1_10";
- let RenderMethod = "addImmOperands";
- let DiagnosticType = "InvalidRnumArg_1_10";
-}
-
-def RnumArg_2_14 : AsmOperandClass {
- let Name = "RnumArg_2_14";
- let RenderMethod = "addImmOperands";
- let DiagnosticType = "InvalidRnumArg_2_14";
-}
-
-def rnum_0_7 : Operand<XLenVT>, ImmLeaf<XLenVT,
- [{return (0 <= Imm && Imm <= 7);}]> {
- let ParserMatchClass = RnumArg_0_7;
+def tuimm5 : Operand<XLenVT>, TImmLeaf<XLenVT, [{return isUInt<5>(Imm);}]> {
+ let ParserMatchClass = UImmAsmOperand<5>;
+ let EncoderMethod = "getUImmOpValue";
let DecoderMethod = "decodeUImmOperand<5>";
- let OperandType = "OPERAND_RVKRNUM_0_7";
- let OperandNamespace = "RISCVOp";
-}
-
-def rnum_1_10 : Operand<XLenVT>, ImmLeaf<XLenVT,
- [{return (1 <= Imm && Imm <= 10);}]> {
- let ParserMatchClass = RnumArg_1_10;
- let DecoderMethod = "decodeUImmOperand<5>";
- let OperandType = "OPERAND_RVKRNUM_1_10";
- let OperandNamespace = "RISCVOp";
-}
-
-def rnum_2_14 : Operand<XLenVT>, ImmLeaf<XLenVT,
- [{return (2 <= Imm && Imm <= 14);}]> {
- let ParserMatchClass = RnumArg_2_14;
- let DecoderMethod = "decodeUImmOperand<5>";
- let OperandType = "OPERAND_RVKRNUM_2_14";
- let OperandNamespace = "RISCVOp";
+ let MCOperandPredicate = [{
+ int64_t UImm;
+ if (MCOp.evaluateAsConstantImm(UImm))
+ return isUInt<5>(UImm);
+ return MCOp.isBareSymbolRef();
+ }];
}
//===----------------------------------------------------------------------===//
@@ -140,15 +110,10 @@ class VAESKF_MV_I<bits<6> funct6, string opcodestr, Operand optype>
//===----------------------------------------------------------------------===//
let Predicates = [HasStdExtZvbb] in {
- defm VANDN_V : VALU_IV_V_X<"vandn", 0b000001>;
- def VBREV8_V : VALUVs2<0b010010, 0b01000, OPMVV, "vbrev8.v">;
def VBREV_V : VALUVs2<0b010010, 0b01010, OPMVV, "vbrev.v">;
def VCLZ_V : VALUVs2<0b010010, 0b01100, OPMVV, "vclz.v">;
def VCPOP_V : VALUVs2<0b010010, 0b01110, OPMVV, "vcpop.v">;
def VCTZ_V : VALUVs2<0b010010, 0b01101, OPMVV, "vctz.v">;
- def VREV8_V : VALUVs2<0b010010, 0b01001, OPMVV, "vrev8.v">;
- defm VROL_V : VALU_IV_V_X<"vrol", 0b010101>;
- defm VROR_V : VROR_IV_V_X_I<"vror", 0b010100>;
let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in
defm VWSLL_V : VSHT_IV_V_X_I<"vwsll", 0b110101>;
} // Predicates = [HasStdExtZvbb]
@@ -158,16 +123,24 @@ let Predicates = [HasStdExtZvbc] in {
defm VCLMULH_V : VCLMUL_MV_V_X<"vclmulh", 0b001101>;
} // Predicates = [HasStdExtZvbc]
+let Predicates = [HasStdExtZvkb] in {
+ defm VANDN_V : VALU_IV_V_X<"vandn", 0b000001>;
+ def VBREV8_V : VALUVs2<0b010010, 0b01000, OPMVV, "vbrev8.v">;
+ def VREV8_V : VALUVs2<0b010010, 0b01001, OPMVV, "vrev8.v">;
+ defm VROL_V : VALU_IV_V_X<"vrol", 0b010101>;
+ defm VROR_V : VROR_IV_V_X_I<"vror", 0b010100>;
+} // Predicates = [HasStdExtZvkb]
+
let Predicates = [HasStdExtZvkg], RVVConstraint = NoConstraint in {
def VGHSH_VV : PALUVVNoVm<0b101100, OPMVV, "vghsh.vv">;
def VGMUL_VV : PALUVs2NoVm<0b101000, 0b10001, OPMVV, "vgmul.vv">;
} // Predicates = [HasStdExtZvkg]
-let Predicates = [HasStdExtZvknha], RVVConstraint = NoConstraint in {
+let Predicates = [HasStdExtZvknhaOrZvknhb], RVVConstraint = NoConstraint in {
def VSHA2CH_VV : PALUVVNoVm<0b101110, OPMVV, "vsha2ch.vv">;
def VSHA2CL_VV : PALUVVNoVm<0b101111, OPMVV, "vsha2cl.vv">;
def VSHA2MS_VV : PALUVVNoVm<0b101101, OPMVV, "vsha2ms.vv">;
-} // Predicates = [HasStdExtZvknha]
+} // Predicates = [HasStdExtZvknhaOrZvknhb]
let Predicates = [HasStdExtZvkned], RVVConstraint = NoConstraint in {
defm VAESDF : VAES_MV_V_S<0b101000, 0b101001, 0b00001, OPMVV, "vaesdf">;
@@ -193,34 +166,254 @@ let Predicates = [HasStdExtZvksh], RVVConstraint = NoConstraint in {
// Pseudo instructions
//===----------------------------------------------------------------------===//
-defm PseudoVANDN : VPseudoVALU_VV_VX;
+defvar I32IntegerVectors = !filter(vti, AllIntegerVectors, !eq(vti.SEW, 32));
+defvar I32I64IntegerVectors = !filter(vti, AllIntegerVectors,
+ !or(!eq(vti.SEW, 32), !eq(vti.SEW, 64)));
+
+class ZvkI32IntegerVectors<string vd_lmul> {
+ list<VTypeInfo> vs2_types = !cond(!eq(vd_lmul, "M8") : !filter(vti, I32IntegerVectors, !le(vti.LMul.octuple, 32)),
+ !eq(vd_lmul, "M4") : !filter(vti, I32IntegerVectors, !le(vti.LMul.octuple, 32)),
+ !eq(vd_lmul, "M2") : !filter(vti, I32IntegerVectors, !le(vti.LMul.octuple, 16)),
+ !eq(vd_lmul, "M1") : !filter(vti, I32IntegerVectors, !le(vti.LMul.octuple, 8)),
+ !eq(vd_lmul, "MF2") : !filter(vti, I32IntegerVectors, !le(vti.LMul.octuple, 4)),
+ !eq(vd_lmul, "MF4") : !filter(vti, I32IntegerVectors, !le(vti.LMul.octuple, 2)),
+ !eq(vd_lmul, "MF8") : !filter(vti, I32IntegerVectors, !le(vti.LMul.octuple, 1)));
+}
+
+class ZvkMxSet<string vd_lmul> {
+ list<LMULInfo> vs2_lmuls = !cond(!eq(vd_lmul, "M8") : [V_MF8, V_MF4, V_MF2, V_M1, V_M2, V_M4],
+ !eq(vd_lmul, "M4") : [V_MF8, V_MF4, V_MF2, V_M1, V_M2, V_M4],
+ !eq(vd_lmul, "M2") : [V_MF8, V_MF4, V_MF2, V_M1, V_M2],
+ !eq(vd_lmul, "M1") : [V_MF8, V_MF4, V_MF2, V_M1],
+ !eq(vd_lmul, "MF2") : [V_MF8, V_MF4, V_MF2],
+ !eq(vd_lmul, "MF4") : [V_MF8, V_MF4],
+ !eq(vd_lmul, "MF8") : [V_MF8]);
+}
+
+class VPseudoUnaryNoMask_Zvk<DAGOperand RetClass, VReg OpClass, string Constraint = ""> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$merge, OpClass:$rs2, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasVecPolicyOp = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoBinaryNoMask_Zvk<VReg RetClass,
+ VReg Op1Class,
+ DAGOperand Op2Class,
+ string Constraint> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1,
+ AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasVecPolicyOp = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+multiclass VPseudoBinaryNoMask_Zvk<VReg RetClass,
+ VReg Op1Class,
+ DAGOperand Op2Class,
+ LMULInfo MInfo,
+ string Constraint = ""> {
+ let VLMul = MInfo.value in
+ def "_" # MInfo.MX : VPseudoBinaryNoMask_Zvk<RetClass, Op1Class, Op2Class,
+ Constraint>;
+}
+
+multiclass VPseudoUnaryV_V_NoMask_Zvk<LMULInfo m, string Constraint = ""> {
+ let VLMul = m.value in {
+ def "_VV_" # m.MX : VPseudoUnaryNoMask_Zvk<m.vrclass, m.vrclass, Constraint>;
+ }
+}
+
+multiclass VPseudoUnaryV_S_NoMask_Zvk<LMULInfo m, string Constraint = ""> {
+ let VLMul = m.value in
+ foreach vs2_lmul = ZvkMxSet<m.MX>.vs2_lmuls in
+ def "_VS_" # m.MX # "_" # vs2_lmul.MX : VPseudoUnaryNoMask_Zvk<m.vrclass, vs2_lmul.vrclass, Constraint>;
+}
+
+multiclass VPseudoVALU_V_NoMask_Zvk<string Constraint = ""> {
+ foreach m = MxListVF4 in {
+ defvar mx = m.MX;
+ defvar WriteVIALUV_MX = !cast<SchedWrite>("WriteVIALUV_" # mx);
+ defvar ReadVIALUV_MX = !cast<SchedRead>("ReadVIALUV_" # mx);
+
+ defm "" : VPseudoUnaryV_V_NoMask_Zvk<m, Constraint>,
+ Sched<[WriteVIALUV_MX, ReadVIALUV_MX, ReadVIALUV_MX, ReadVMask]>;
+ }
+}
+
+multiclass VPseudoVALU_S_NoMask_Zvk<string Constraint = ""> {
+ foreach m = MxListVF4 in {
+ defvar mx = m.MX;
+ defvar WriteVIALUV_MX = !cast<SchedWrite>("WriteVIALUV_" # mx);
+ defvar ReadVIALUV_MX = !cast<SchedRead>("ReadVIALUV_" # mx);
+
+ defm "" : VPseudoUnaryV_S_NoMask_Zvk<m, Constraint>,
+ Sched<[WriteVIALUV_MX, ReadVIALUV_MX, ReadVIALUV_MX, ReadVMask]>;
+ }
+}
+
+multiclass VPseudoVALU_V_S_NoMask_Zvk<string Constraint = ""> {
+ defm "" : VPseudoVALU_V_NoMask_Zvk<Constraint>;
+ defm "" : VPseudoVALU_S_NoMask_Zvk<Constraint>;
+}
+
+multiclass VPseudoVALU_VV_NoMask_Zvk<string Constraint = ""> {
+ foreach m = MxListVF4 in {
+ defvar mx = m.MX;
+ defvar WriteVIALUV_MX = !cast<SchedWrite>("WriteVIALUV_" # mx);
+ defvar ReadVIALUV_MX = !cast<SchedRead>("ReadVIALUV_" # mx);
+
+ defm _VV : VPseudoBinaryNoMask_Zvk<m.vrclass, m.vrclass, m.vrclass, m,
+ Constraint>,
+ Sched<[WriteVIALUV_MX, ReadVIALUV_MX, ReadVIALUV_MX, ReadVMask]>;
+ }
+}
-multiclass VPseudoUnaryV_V {
+multiclass VPseudoVALU_VI_NoMask_Zvk<Operand ImmType = simm5, string Constraint = ""> {
+ foreach m = MxListVF4 in {
+ defvar mx = m.MX;
+ defvar WriteVIALUV_MX = !cast<SchedWrite>("WriteVIALUV_" # mx);
+ defvar ReadVIALUV_MX = !cast<SchedRead>("ReadVIALUV_" # mx);
+
+ defm _VI : VPseudoBinaryNoMask_Zvk<m.vrclass, m.vrclass, ImmType, m,
+ Constraint>,
+ Sched<[WriteVIALUV_MX, ReadVIALUV_MX, ReadVIALUV_MX, ReadVMask]>;
+ }
+}
+
+multiclass VPseudoVALU_VI_NoMaskTU_Zvk<Operand ImmType = uimm5, string Constraint = ""> {
+ foreach m = MxListVF4 in {
+ defvar mx = m.MX;
+ defvar WriteVIALUV_MX = !cast<SchedWrite>("WriteVIALUV_" # mx);
+ defvar ReadVIALUV_MX = !cast<SchedRead>("ReadVIALUV_" # mx);
+
+ defm _VI : VPseudoBinaryNoMask<m.vrclass, m.vrclass, ImmType, m,
+ Constraint>,
+ Sched<[WriteVIALUV_MX, ReadVIALUV_MX, ReadVIALUV_MX, ReadVMask]>;
+ }
+}
+
+multiclass VPseudoVALU_VV_NoMaskTU_Zvk<string Constraint = ""> {
+ foreach m = MxListVF4 in {
+ defvar mx = m.MX;
+ defvar WriteVIALUV_MX = !cast<SchedWrite>("WriteVIALUV_" # mx);
+ defvar ReadVIALUV_MX = !cast<SchedRead>("ReadVIALUV_" # mx);
+
+ defm _VV : VPseudoBinaryNoMask<m.vrclass, m.vrclass, m.vrclass, m,
+ Constraint>,
+ Sched<[WriteVIALUV_MX, ReadVIALUV_MX, ReadVIALUV_MX, ReadVMask]>;
+ }
+}
+
+multiclass VPseudoVCLMUL_VV_VX {
foreach m = MxList in {
- let VLMul = m.value in {
- def "_V_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.vrclass>;
- def "_V_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, m.vrclass>,
- RISCVMaskedPseudo<MaskIdx=2>;
- }
+ defvar mx = m.MX;
+ defvar WriteVIALUV_MX = !cast<SchedWrite>("WriteVIALUV_" # mx);
+ defvar WriteVIALUX_MX = !cast<SchedWrite>("WriteVIALUV_" # mx);
+ defvar ReadVIALUV_MX = !cast<SchedRead>("ReadVIALUV_" # mx);
+ defvar ReadVIALUX_MX = !cast<SchedRead>("ReadVIALUX_" # mx);
+
+ defm "" : VPseudoBinaryV_VV<m>,
+ Sched<[WriteVIALUV_MX, ReadVIALUV_MX, ReadVIALUV_MX, ReadVMask]>;
+ defm "" : VPseudoBinaryV_VX<m>,
+ Sched<[WriteVIALUX_MX, ReadVIALUV_MX, ReadVIALUX_MX, ReadVMask]>;
+ }
+}
+
+multiclass VPseudoUnaryV_V<LMULInfo m> {
+ let VLMul = m.value in {
+ defvar suffix = "_V_" # m.MX;
+ def suffix : VPseudoUnaryNoMask<m.vrclass, m.vrclass>;
+ def suffix # "_MASK" : VPseudoUnaryMask<m.vrclass, m.vrclass>,
+ RISCVMaskedPseudo<MaskIdx=2>;
+ }
+}
+
+multiclass VPseudoVALU_V {
+ foreach m = MxList in {
+ defvar mx = m.MX;
+ defvar WriteVIALUV_MX = !cast<SchedWrite>("WriteVIALUV_" # mx);
+ defvar ReadVIALUV_MX = !cast<SchedRead>("ReadVIALUV_" # mx);
+
+ defm "" : VPseudoUnaryV_V<m>,
+ Sched<[WriteVIALUV_MX, ReadVIALUV_MX, ReadVIALUV_MX, ReadVMask]>;
}
}
-defm PseudoVBREV : VPseudoUnaryV_V;
-defm PseudoVREV8 : VPseudoUnaryV_V;
-defm PseudoVCLZ : VPseudoUnaryV_V;
-defm PseudoVCTZ : VPseudoUnaryV_V;
-defm PseudoVCPOP : VPseudoUnaryV_V;
+let Predicates = [HasStdExtZvbb] in {
+ defm PseudoVBREV : VPseudoVALU_V;
+ defm PseudoVCLZ : VPseudoVALU_V;
+ defm PseudoVCTZ : VPseudoVALU_V;
+ defm PseudoVCPOP : VPseudoVALU_V;
+ defm PseudoVWSLL : VPseudoVWALU_VV_VX_VI<uimm5>;
+} // Predicates = [HasStdExtZvbb]
+
+let Predicates = [HasStdExtZvbc] in {
+ defm PseudoVCLMUL : VPseudoVCLMUL_VV_VX;
+ defm PseudoVCLMULH : VPseudoVCLMUL_VV_VX;
+} // Predicates = [HasStdExtZvbc]
+
+let Predicates = [HasStdExtZvkb] in {
+ defm PseudoVANDN : VPseudoVALU_VV_VX;
+ defm PseudoVBREV8 : VPseudoVALU_V;
+ defm PseudoVREV8 : VPseudoVALU_V;
+ defm PseudoVROL : VPseudoVALU_VV_VX;
+ defm PseudoVROR : VPseudoVALU_VV_VX_VI<uimm6>;
+} // Predicates = [HasStdExtZvkb]
+
+let Predicates = [HasStdExtZvkg] in {
+ defm PseudoVGHSH : VPseudoVALU_VV_NoMask_Zvk;
+ defm PseudoVGMUL : VPseudoVALU_V_NoMask_Zvk;
+} // Predicates = [HasStdExtZvkg]
+
+let Predicates = [HasStdExtZvkned] in {
+ defm PseudoVAESDF : VPseudoVALU_V_S_NoMask_Zvk;
+ defm PseudoVAESDM : VPseudoVALU_V_S_NoMask_Zvk;
+ defm PseudoVAESEF : VPseudoVALU_V_S_NoMask_Zvk;
+ defm PseudoVAESEM : VPseudoVALU_V_S_NoMask_Zvk;
+ defm PseudoVAESKF1 : VPseudoVALU_VI_NoMaskTU_Zvk;
+ defm PseudoVAESKF2 : VPseudoVALU_VI_NoMask_Zvk<uimm5>;
+ defm PseudoVAESZ : VPseudoVALU_S_NoMask_Zvk;
+} // Predicates = [HasStdExtZvkned]
+
+let Predicates = [HasStdExtZvknhaOrZvknhb] in {
+ defm PseudoVSHA2CH : VPseudoVALU_VV_NoMask_Zvk;
+ defm PseudoVSHA2CL : VPseudoVALU_VV_NoMask_Zvk;
+ defm PseudoVSHA2MS : VPseudoVALU_VV_NoMask_Zvk;
+} // Predicates = [HasStdExtZvknhaOrZvknhb]
+
+let Predicates = [HasStdExtZvksed] in {
+ defm PseudoVSM4K : VPseudoVALU_VI_NoMaskTU_Zvk;
+ defm PseudoVSM4R : VPseudoVALU_V_S_NoMask_Zvk;
+} // Predicates = [HasStdExtZvksed]
-defm PseudoVROL : VPseudoVALU_VV_VX;
-defm PseudoVROR : VPseudoVALU_VV_VX_VI<uimm6>;
+let Predicates = [HasStdExtZvksh] in {
+ defm PseudoVSM3C : VPseudoVALU_VI_NoMask_Zvk<uimm5>;
+ defm PseudoVSM3ME : VPseudoVALU_VV_NoMaskTU_Zvk;
+} // Predicates = [HasStdExtZvksh]
//===----------------------------------------------------------------------===//
// SDNode patterns
//===----------------------------------------------------------------------===//
-multiclass VPatUnarySDNode_V<SDPatternOperator op, string instruction_name> {
+multiclass VPatUnarySDNode_V<SDPatternOperator op, string instruction_name,
+ Predicate predicate = HasStdExtZvbb> {
foreach vti = AllIntegerVectors in {
- let Predicates = !listconcat([HasStdExtZvbb],
+ let Predicates = !listconcat([predicate],
GetVTypePredicates<vti>.Predicates) in {
def : Pat<(vti.Vector (op (vti.Vector vti.RegClass:$rs1))),
(!cast<Instruction>(instruction_name#"_V_"#vti.LMul.MX)
@@ -239,7 +432,7 @@ def riscv_vnot : PatFrag<(ops node:$rs1), (xor node:$rs1,
(riscv_splat_vector -1))>;
foreach vti = AllIntegerVectors in {
- let Predicates = !listconcat([HasStdExtZvbb],
+ let Predicates = !listconcat([HasStdExtZvkb],
GetVTypePredicates<vti>.Predicates) in {
def : Pat<(vti.Vector (and (riscv_vnot vti.RegClass:$rs1),
vti.RegClass:$rs2)),
@@ -260,14 +453,27 @@ foreach vti = AllIntegerVectors in {
}
defm : VPatUnarySDNode_V<bitreverse, "PseudoVBREV">;
-defm : VPatUnarySDNode_V<bswap, "PseudoVREV8">;
+defm : VPatUnarySDNode_V<bswap, "PseudoVREV8", HasStdExtZvkb>;
defm : VPatUnarySDNode_V<ctlz, "PseudoVCLZ">;
defm : VPatUnarySDNode_V<cttz, "PseudoVCTZ">;
defm : VPatUnarySDNode_V<ctpop, "PseudoVCPOP">;
defm : VPatBinarySDNode_VV_VX<rotl, "PseudoVROL">;
-def NegImm64 : SDNodeXForm<imm, [{
+// Invert the immediate and mask it to SEW for readability.
+def InvRot8Imm : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(0x7 & (64 - N->getZExtValue()), SDLoc(N),
+ N->getValueType(0));
+}]>;
+def InvRot16Imm : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(0xf & (64 - N->getZExtValue()), SDLoc(N),
+ N->getValueType(0));
+}]>;
+def InvRot32Imm : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(0x1f & (64 - N->getZExtValue()), SDLoc(N),
+ N->getValueType(0));
+}]>;
+def InvRot64Imm : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(0x3f & (64 - N->getZExtValue()), SDLoc(N),
N->getValueType(0));
}]>;
@@ -275,26 +481,56 @@ def NegImm64 : SDNodeXForm<imm, [{
// Although there is no vrol.vi, an immediate rotate left can be achieved by
// negating the immediate in vror.vi
foreach vti = AllIntegerVectors in {
- let Predicates = !listconcat([HasStdExtZvbb],
+ let Predicates = !listconcat([HasStdExtZvkb],
GetVTypePredicates<vti>.Predicates) in {
def : Pat<(vti.Vector (rotl vti.RegClass:$rs2,
(vti.Vector (SplatPat_uimm6 uimm6:$rs1)))),
(!cast<Instruction>("PseudoVROR_VI_"#vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
vti.RegClass:$rs2,
- (NegImm64 uimm6:$rs1),
+ (!cast<SDNodeXForm>("InvRot" # vti.SEW # "Imm") uimm6:$rs1),
vti.AVL, vti.Log2SEW, TA_MA)>;
}
}
defm : VPatBinarySDNode_VV_VX_VI<rotr, "PseudoVROR", uimm6>;
+foreach vtiToWti = AllWidenableIntVectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar wti = vtiToWti.Wti;
+ let Predicates = !listconcat([HasStdExtZvbb],
+ GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ def : Pat<(shl (wti.Vector (zext_oneuse (vti.Vector vti.RegClass:$rs2))),
+ (wti.Vector (ext_oneuse (vti.Vector vti.RegClass:$rs1)))),
+ (!cast<Instruction>("PseudoVWSLL_VV_"#vti.LMul.MX)
+ (wti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs2, vti.RegClass:$rs1,
+ vti.AVL, vti.Log2SEW, TA_MA)>;
+
+ def : Pat<(shl (wti.Vector (zext_oneuse (vti.Vector vti.RegClass:$rs2))),
+ (wti.Vector (Low8BitsSplatPat (XLenVT GPR:$rs1)))),
+ (!cast<Instruction>("PseudoVWSLL_VX_"#vti.LMul.MX)
+ (wti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs2, GPR:$rs1,
+ vti.AVL, vti.Log2SEW, TA_MA)>;
+
+ def : Pat<(shl (wti.Vector (zext_oneuse (vti.Vector vti.RegClass:$rs2))),
+ (wti.Vector (SplatPat_uimm5 uimm5:$rs1))),
+ (!cast<Instruction>("PseudoVWSLL_VI_"#vti.LMul.MX)
+ (wti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs2, uimm5:$rs1,
+ vti.AVL, vti.Log2SEW, TA_MA)>;
+ }
+}
+
//===----------------------------------------------------------------------===//
// VL patterns
//===----------------------------------------------------------------------===//
-multiclass VPatUnaryVL_V<SDPatternOperator op, string instruction_name> {
+multiclass VPatUnaryVL_V<SDPatternOperator op, string instruction_name,
+ Predicate predicate = HasStdExtZvbb> {
foreach vti = AllIntegerVectors in {
- let Predicates = !listconcat([HasStdExtZvbb],
+ let Predicates = !listconcat([predicate],
GetVTypePredicates<vti>.Predicates) in {
def : Pat<(vti.Vector (op (vti.Vector vti.RegClass:$rs1),
(vti.Vector vti.RegClass:$merge),
@@ -312,7 +548,7 @@ multiclass VPatUnaryVL_V<SDPatternOperator op, string instruction_name> {
}
foreach vti = AllIntegerVectors in {
- let Predicates = !listconcat([HasStdExtZvbb],
+ let Predicates = !listconcat([HasStdExtZvkb],
GetVTypePredicates<vti>.Predicates) in {
def : Pat<(vti.Vector (riscv_and_vl (riscv_xor_vl
(vti.Vector vti.RegClass:$rs1),
@@ -351,7 +587,339 @@ foreach vti = AllIntegerVectors in {
}
defm : VPatUnaryVL_V<riscv_bitreverse_vl, "PseudoVBREV">;
-defm : VPatUnaryVL_V<riscv_bswap_vl, "PseudoVREV8">;
+defm : VPatUnaryVL_V<riscv_bswap_vl, "PseudoVREV8", HasStdExtZvkb>;
defm : VPatUnaryVL_V<riscv_ctlz_vl, "PseudoVCLZ">;
defm : VPatUnaryVL_V<riscv_cttz_vl, "PseudoVCTZ">;
defm : VPatUnaryVL_V<riscv_ctpop_vl, "PseudoVCPOP">;
+
+defm : VPatBinaryVL_VV_VX<riscv_rotl_vl, "PseudoVROL">;
+// Although there is no vrol.vi, an immediate rotate left can be achieved by
+// negating the immediate in vror.vi
+foreach vti = AllIntegerVectors in {
+ let Predicates = !listconcat([HasStdExtZvkb],
+ GetVTypePredicates<vti>.Predicates) in {
+ def : Pat<(riscv_rotl_vl vti.RegClass:$rs2,
+ (vti.Vector (SplatPat_uimm6 uimm6:$rs1)),
+ (vti.Vector vti.RegClass:$merge),
+ (vti.Mask V0), VLOpFrag),
+ (!cast<Instruction>("PseudoVROR_VI_"#vti.LMul.MX#"_MASK")
+ vti.RegClass:$merge,
+ vti.RegClass:$rs2,
+ (!cast<SDNodeXForm>("InvRot" # vti.SEW # "Imm") uimm6:$rs1),
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
+}
+defm : VPatBinaryVL_VV_VX_VI<riscv_rotr_vl, "PseudoVROR", uimm6>;
+
+foreach vtiToWti = AllWidenableIntVectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar wti = vtiToWti.Wti;
+ let Predicates = !listconcat([HasStdExtZvbb],
+ GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ def : Pat<(riscv_shl_vl
+ (wti.Vector (zext_oneuse (vti.Vector vti.RegClass:$rs2))),
+ (wti.Vector (ext_oneuse (vti.Vector vti.RegClass:$rs1))),
+ (wti.Vector wti.RegClass:$merge),
+ (vti.Mask V0), VLOpFrag),
+ (!cast<Instruction>("PseudoVWSLL_VV_"#vti.LMul.MX#"_MASK")
+ wti.RegClass:$merge, vti.RegClass:$rs2, vti.RegClass:$rs1,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+
+ def : Pat<(riscv_shl_vl
+ (wti.Vector (zext_oneuse (vti.Vector vti.RegClass:$rs2))),
+ (wti.Vector (Low8BitsSplatPat (XLenVT GPR:$rs1))),
+ (wti.Vector wti.RegClass:$merge),
+ (vti.Mask V0), VLOpFrag),
+ (!cast<Instruction>("PseudoVWSLL_VX_"#vti.LMul.MX#"_MASK")
+ wti.RegClass:$merge, vti.RegClass:$rs2, GPR:$rs1,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+
+ def : Pat<(riscv_shl_vl
+ (wti.Vector (zext_oneuse (vti.Vector vti.RegClass:$rs2))),
+ (wti.Vector (SplatPat_uimm5 uimm5:$rs1)),
+ (wti.Vector wti.RegClass:$merge),
+ (vti.Mask V0), VLOpFrag),
+ (!cast<Instruction>("PseudoVWSLL_VI_"#vti.LMul.MX#"_MASK")
+ wti.RegClass:$merge, vti.RegClass:$rs2, uimm5:$rs1,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+
+ def : Pat<(riscv_vwsll_vl
+ (vti.Vector vti.RegClass:$rs2),
+ (vti.Vector vti.RegClass:$rs1),
+ (wti.Vector wti.RegClass:$merge),
+ (vti.Mask V0), VLOpFrag),
+ (!cast<Instruction>("PseudoVWSLL_VV_"#vti.LMul.MX#"_MASK")
+ wti.RegClass:$merge, vti.RegClass:$rs2, vti.RegClass:$rs1,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+
+ def : Pat<(riscv_vwsll_vl
+ (vti.Vector vti.RegClass:$rs2),
+ (vti.Vector (Low8BitsSplatPat (XLenVT GPR:$rs1))),
+ (wti.Vector wti.RegClass:$merge),
+ (vti.Mask V0), VLOpFrag),
+ (!cast<Instruction>("PseudoVWSLL_VX_"#vti.LMul.MX#"_MASK")
+ wti.RegClass:$merge, vti.RegClass:$rs2, GPR:$rs1,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+
+ def : Pat<(riscv_vwsll_vl
+ (vti.Vector vti.RegClass:$rs2),
+ (vti.Vector (SplatPat_uimm5 uimm5:$rs1)),
+ (wti.Vector wti.RegClass:$merge),
+ (vti.Mask V0), VLOpFrag),
+ (!cast<Instruction>("PseudoVWSLL_VI_"#vti.LMul.MX#"_MASK")
+ wti.RegClass:$merge, vti.RegClass:$rs2, uimm5:$rs1,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Codegen patterns
+//===----------------------------------------------------------------------===//
+
+class VPatUnaryNoMask_Zvk<string intrinsic_name,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op2_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ VReg op2_reg_class> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
+ (result_type result_reg_class:$merge),
+ (op2_type op2_reg_class:$rs2),
+ VLOpFrag, (XLenVT timm:$policy))),
+ (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
+ (result_type result_reg_class:$merge),
+ (op2_type op2_reg_class:$rs2),
+ GPR:$vl, sew, (XLenVT timm:$policy))>;
+
+class VPatUnaryNoMask_VS_Zvk<string intrinsic_name,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op2_type,
+ int sew,
+ LMULInfo vlmul,
+ LMULInfo vs2_lmul,
+ VReg result_reg_class,
+ VReg op2_reg_class> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
+ (result_type result_reg_class:$merge),
+ (op2_type op2_reg_class:$rs2),
+ VLOpFrag, (XLenVT timm:$policy))),
+ (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX#"_"#vs2_lmul.MX)
+ (result_type result_reg_class:$merge),
+ (op2_type op2_reg_class:$rs2),
+ GPR:$vl, sew, (XLenVT timm:$policy))>;
+
+multiclass VPatUnaryV_V_NoMask_Zvk<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist> {
+ foreach vti = vtilist in
+ def : VPatUnaryNoMask_Zvk<intrinsic # "_vv", instruction, "VV",
+ vti.Vector, vti.Vector, vti.Log2SEW,
+ vti.LMul, vti.RegClass, vti.RegClass>;
+}
+
+multiclass VPatUnaryV_S_NoMaskVectorCrypto<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist> {
+ foreach vti = vtilist in
+ foreach vti_vs2 = ZvkI32IntegerVectors<vti.LMul.MX>.vs2_types in
+ def : VPatUnaryNoMask_VS_Zvk<intrinsic # "_vs", instruction, "VS",
+ vti.Vector, vti_vs2.Vector, vti.Log2SEW,
+ vti.LMul, vti_vs2.LMul, vti.RegClass, vti_vs2.RegClass>;
+}
+
+multiclass VPatUnaryV_V_S_NoMask_Zvk<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist> {
+ defm : VPatUnaryV_V_NoMask_Zvk<intrinsic, instruction, vtilist>;
+ defm : VPatUnaryV_S_NoMaskVectorCrypto<intrinsic, instruction, vtilist>;
+}
+
+multiclass VPatBinaryV_VV_NoMask<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist> {
+ foreach vti = vtilist in
+ def : VPatTernaryNoMaskWithPolicy<intrinsic, instruction, "VV",
+ vti.Vector, vti.Vector, vti.Vector,
+ vti.Log2SEW, vti.LMul, vti.RegClass,
+ vti.RegClass, vti.RegClass>;
+}
+
+multiclass VPatBinaryV_VI_NoMask<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist, Operand imm_type = tuimm5> {
+ foreach vti = vtilist in
+ def : VPatTernaryNoMaskWithPolicy<intrinsic, instruction, "VI",
+ vti.Vector, vti.Vector, XLenVT,
+ vti.Log2SEW, vti.LMul, vti.RegClass,
+ vti.RegClass, imm_type>;
+}
+
+multiclass VPatBinaryV_VI_NoMaskTU<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist, Operand imm_type = tuimm5> {
+ foreach vti = vtilist in
+ def : VPatBinaryNoMaskTU<intrinsic, instruction # "_VI_" # vti.LMul.MX,
+ vti.Vector, vti.Vector, XLenVT, vti.Log2SEW,
+ vti.RegClass, vti.RegClass, imm_type>;
+}
+
+multiclass VPatBinaryV_VV_NoMaskTU<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist> {
+ foreach vti = vtilist in
+ def : VPatBinaryNoMaskTU<intrinsic, instruction # "_VV_" # vti.LMul.MX,
+ vti.Vector, vti.Vector, vti.Vector, vti.Log2SEW,
+ vti.RegClass, vti.RegClass, vti.RegClass>;
+}
+
+multiclass VPatBinaryV_VX_VROTATE<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist, bit isSEWAware = 0> {
+ foreach vti = vtilist in {
+ defvar kind = "V"#vti.ScalarSuffix;
+ let Predicates = GetVTypePredicates<vti>.Predicates in
+ defm : VPatBinary<intrinsic,
+ !if(isSEWAware,
+ instruction#"_"#kind#"_"#vti.LMul.MX#"_E"#vti.SEW,
+ instruction#"_"#kind#"_"#vti.LMul.MX),
+ vti.Vector, vti.Vector, XLenVT, vti.Mask,
+ vti.Log2SEW, vti.RegClass,
+ vti.RegClass, vti.ScalarRegClass>;
+ }
+}
+
+multiclass VPatBinaryV_VI_VROL<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist, bit isSEWAware = 0> {
+ foreach vti = vtilist in {
+ defvar Intr = !cast<Intrinsic>(intrinsic);
+ defvar Pseudo = !cast<Instruction>(
+ !if(isSEWAware, instruction#"_VI_"#vti.LMul.MX#"_E"#vti.SEW,
+ instruction#"_VI_"#vti.LMul.MX));
+ let Predicates = GetVTypePredicates<vti>.Predicates in
+ def : Pat<(vti.Vector (Intr (vti.Vector vti.RegClass:$merge),
+ (vti.Vector vti.RegClass:$rs2),
+ (XLenVT uimm6:$rs1),
+ VLOpFrag)),
+ (Pseudo (vti.Vector vti.RegClass:$merge),
+ (vti.Vector vti.RegClass:$rs2),
+ (InvRot64Imm uimm6:$rs1),
+ GPR:$vl, vti.Log2SEW, TU_MU)>;
+
+ defvar IntrMask = !cast<Intrinsic>(intrinsic#"_mask");
+ defvar PseudoMask = !cast<Instruction>(
+ !if(isSEWAware, instruction#"_VI_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK",
+ instruction#"_VI_"#vti.LMul.MX#"_MASK"));
+ let Predicates = GetVTypePredicates<vti>.Predicates in
+ def : Pat<(vti.Vector (IntrMask (vti.Vector vti.RegClass:$merge),
+ (vti.Vector vti.RegClass:$rs2),
+ (XLenVT uimm6:$rs1),
+ (vti.Mask V0),
+ VLOpFrag, (XLenVT timm:$policy))),
+ (PseudoMask (vti.Vector vti.RegClass:$merge),
+ (vti.Vector vti.RegClass:$rs2),
+ (InvRot64Imm uimm6:$rs1),
+ (vti.Mask V0),
+ GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>;
+ }
+}
+
+multiclass VPatBinaryV_VV_VX_VROL<string intrinsic, string instruction,
+ string instruction2, list<VTypeInfo> vtilist>
+ : VPatBinaryV_VV<intrinsic, instruction, vtilist>,
+ VPatBinaryV_VX_VROTATE<intrinsic, instruction, vtilist>,
+ VPatBinaryV_VI_VROL<intrinsic, instruction2, vtilist>;
+
+multiclass VPatBinaryV_VV_VX_VI_VROR<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist, Operand ImmType = uimm6>
+ : VPatBinaryV_VV<intrinsic, instruction, vtilist>,
+ VPatBinaryV_VX_VROTATE<intrinsic, instruction, vtilist>,
+ VPatBinaryV_VI<intrinsic, instruction, vtilist, ImmType>;
+
+multiclass VPatBinaryW_VI_VWSLL<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist> {
+ foreach VtiToWti = vtilist in {
+ defvar Vti = VtiToWti.Vti;
+ defvar Wti = VtiToWti.Wti;
+ defm : VPatBinary<intrinsic, instruction # "_VI_" # Vti.LMul.MX,
+ Wti.Vector, Vti.Vector, XLenVT, Vti.Mask,
+ Vti.Log2SEW, Wti.RegClass,
+ Vti.RegClass, uimm5>;
+ }
+}
+
+multiclass VPatBinaryW_VX_VWSLL<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist> {
+ foreach VtiToWti = vtilist in {
+ defvar Vti = VtiToWti.Vti;
+ defvar Wti = VtiToWti.Wti;
+ defvar kind = "V"#Vti.ScalarSuffix;
+ let Predicates = !listconcat(GetVTypePredicates<Vti>.Predicates,
+ GetVTypePredicates<Wti>.Predicates) in
+ defm : VPatBinary<intrinsic, instruction#"_"#kind#"_"#Vti.LMul.MX,
+ Wti.Vector, Vti.Vector, XLenVT, Vti.Mask,
+ Vti.Log2SEW, Wti.RegClass,
+ Vti.RegClass, Vti.ScalarRegClass>;
+ }
+}
+
+multiclass VPatBinaryW_VV_VX_VI_VWSLL<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist>
+ : VPatBinaryW_VV<intrinsic, instruction, vtilist>,
+ VPatBinaryW_VX_VWSLL<intrinsic, instruction, vtilist>,
+ VPatBinaryW_VI_VWSLL<intrinsic, instruction, vtilist>;
+
+let Predicates = [HasStdExtZvbb] in {
+ defm : VPatUnaryV_V<"int_riscv_vbrev", "PseudoVBREV", AllIntegerVectors>;
+ defm : VPatUnaryV_V<"int_riscv_vclz", "PseudoVCLZ", AllIntegerVectors>;
+ defm : VPatUnaryV_V<"int_riscv_vctz", "PseudoVCTZ", AllIntegerVectors>;
+ defm : VPatUnaryV_V<"int_riscv_vcpopv", "PseudoVCPOP", AllIntegerVectors>;
+ defm : VPatBinaryW_VV_VX_VI_VWSLL<"int_riscv_vwsll", "PseudoVWSLL", AllWidenableIntVectors>;
+} // Predicates = [HasStdExtZvbb]
+
+let Predicates = [HasStdExtZvbc] in {
+ defm : VPatBinaryV_VV_VX<"int_riscv_vclmul", "PseudoVCLMUL", I64IntegerVectors>;
+ defm : VPatBinaryV_VV_VX<"int_riscv_vclmulh", "PseudoVCLMULH", I64IntegerVectors>;
+} // Predicates = [HasStdExtZvbc]
+
+let Predicates = [HasStdExtZvkb] in {
+ defm : VPatBinaryV_VV_VX<"int_riscv_vandn", "PseudoVANDN", AllIntegerVectors>;
+ defm : VPatUnaryV_V<"int_riscv_vbrev8", "PseudoVBREV8", AllIntegerVectors>;
+ defm : VPatUnaryV_V<"int_riscv_vrev8", "PseudoVREV8", AllIntegerVectors>;
+ defm : VPatBinaryV_VV_VX_VROL<"int_riscv_vrol", "PseudoVROL", "PseudoVROR", AllIntegerVectors>;
+ defm : VPatBinaryV_VV_VX_VI_VROR<"int_riscv_vror", "PseudoVROR", AllIntegerVectors>;
+} // Predicates = [HasStdExtZvkb]
+
+let Predicates = [HasStdExtZvkg] in {
+ defm : VPatBinaryV_VV_NoMask<"int_riscv_vghsh", "PseudoVGHSH", I32IntegerVectors>;
+ defm : VPatUnaryV_V_NoMask_Zvk<"int_riscv_vgmul", "PseudoVGMUL", I32IntegerVectors>;
+} // Predicates = [HasStdExtZvkg]
+
+let Predicates = [HasStdExtZvkned] in {
+ defm : VPatUnaryV_V_S_NoMask_Zvk<"int_riscv_vaesdf", "PseudoVAESDF", I32IntegerVectors>;
+ defm : VPatUnaryV_V_S_NoMask_Zvk<"int_riscv_vaesdm", "PseudoVAESDM", I32IntegerVectors>;
+ defm : VPatUnaryV_V_S_NoMask_Zvk<"int_riscv_vaesef", "PseudoVAESEF", I32IntegerVectors>;
+ defm : VPatUnaryV_V_S_NoMask_Zvk<"int_riscv_vaesem", "PseudoVAESEM", I32IntegerVectors>;
+ defm : VPatBinaryV_VI_NoMaskTU<"int_riscv_vaeskf1", "PseudoVAESKF1", I32IntegerVectors>;
+ defm : VPatBinaryV_VI_NoMask<"int_riscv_vaeskf2", "PseudoVAESKF2", I32IntegerVectors>;
+ defm : VPatUnaryV_S_NoMaskVectorCrypto<"int_riscv_vaesz", "PseudoVAESZ", I32IntegerVectors>;
+} // Predicates = [HasStdExtZvkned]
+
+let Predicates = [HasStdExtZvknha] in {
+ defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ch", "PseudoVSHA2CH", I32IntegerVectors>;
+ defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2cl", "PseudoVSHA2CH", I32IntegerVectors>;
+ defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ms", "PseudoVSHA2MS", I32IntegerVectors>;
+} // Predicates = [HasStdExtZvknha]
+
+let Predicates = [HasStdExtZvknhb] in {
+ defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ch", "PseudoVSHA2CH", I32I64IntegerVectors>;
+ defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2cl", "PseudoVSHA2CH", I32I64IntegerVectors>;
+ defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ms", "PseudoVSHA2MS", I32I64IntegerVectors>;
+} // Predicates = [HasStdExtZvknhb]
+
+let Predicates = [HasStdExtZvksed] in {
+ defm : VPatBinaryV_VI_NoMaskTU<"int_riscv_vsm4k", "PseudoVSM4K", I32IntegerVectors>;
+ defm : VPatUnaryV_V_S_NoMask_Zvk<"int_riscv_vsm4r", "PseudoVSM4R", I32IntegerVectors>;
+} // Predicates = [HasStdExtZvksed]
+
+let Predicates = [HasStdExtZvksh] in {
+ defm : VPatBinaryV_VI_NoMask<"int_riscv_vsm3c", "PseudoVSM3C", I32IntegerVectors>;
+ defm : VPatBinaryV_VV_NoMaskTU<"int_riscv_vsm3me", "PseudoVSM3ME", I32IntegerVectors>;
+} // Predicates = [HasStdExtZvksh]
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
index 5dfd47a687e9..fcc20c17c6b4 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
@@ -104,13 +104,18 @@ public:
BranchRelaxationScratchFrameIndex = Index;
}
+ unsigned getReservedSpillsSize() const {
+ return LibCallStackSize + RVPushStackSize;
+ }
+
unsigned getLibCallStackSize() const { return LibCallStackSize; }
void setLibCallStackSize(unsigned Size) { LibCallStackSize = Size; }
bool useSaveRestoreLibCalls(const MachineFunction &MF) const {
// We cannot use fixed locations for the callee saved spill slots if the
// function uses a varargs save area, or is an interrupt handler.
- return MF.getSubtarget<RISCVSubtarget>().enableSaveRestore() &&
+ return !isPushable(MF) &&
+ MF.getSubtarget<RISCVSubtarget>().enableSaveRestore() &&
VarArgsSaveSize == 0 && !MF.getFrameInfo().hasTailCall() &&
!MF.getFunction().hasFnAttribute("interrupt");
}
@@ -127,10 +132,13 @@ public:
unsigned getCalleeSavedStackSize() const { return CalleeSavedStackSize; }
void setCalleeSavedStackSize(unsigned Size) { CalleeSavedStackSize = Size; }
- uint64_t isPushable(const MachineFunction &MF) const {
- return (!useSaveRestoreLibCalls(MF) &&
- MF.getSubtarget<RISCVSubtarget>().hasStdExtZcmp() &&
- !MF.getTarget().Options.DisableFramePointerElim(MF));
+ bool isPushable(const MachineFunction &MF) const {
+ // We cannot use fixed locations for the callee saved spill slots if the
+ // function uses a varargs save area.
+ // TODO: Use a seperate placement for vararg registers to enable Zcmp.
+ return MF.getSubtarget<RISCVSubtarget>().hasStdExtZcmp() &&
+ !MF.getTarget().Options.DisableFramePointerElim(MF) &&
+ VarArgsSaveSize == 0;
}
int getRVPushRlist() const { return RVPushRlist; }
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
index da104657680a..02ea5270823d 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
@@ -18,6 +18,101 @@
using namespace llvm;
+static bool checkRegisters(Register FirstDest, const MachineInstr &SecondMI) {
+ if (!SecondMI.getOperand(1).isReg())
+ return false;
+
+ if (SecondMI.getOperand(1).getReg() != FirstDest)
+ return false;
+
+ // If the input is virtual make sure this is the only user.
+ if (FirstDest.isVirtual()) {
+ auto &MRI = SecondMI.getMF()->getRegInfo();
+ return MRI.hasOneNonDBGUse(FirstDest);
+ }
+
+ return SecondMI.getOperand(0).getReg() == FirstDest;
+}
+
+// Fuse load with add:
+// add rd, rs1, rs2
+// ld rd, 0(rd)
+static bool isLDADD(const MachineInstr *FirstMI, const MachineInstr &SecondMI) {
+ if (SecondMI.getOpcode() != RISCV::LD)
+ return false;
+
+ if (!SecondMI.getOperand(2).isImm())
+ return false;
+
+ if (SecondMI.getOperand(2).getImm() != 0)
+ return false;
+
+ // Given SecondMI, when FirstMI is unspecified, we must return
+ // if SecondMI may be part of a fused pair at all.
+ if (!FirstMI)
+ return true;
+
+ if (FirstMI->getOpcode() != RISCV::ADD)
+ return true;
+
+ return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
+}
+
+// Fuse these patterns:
+//
+// slli rd, rs1, 32
+// srli rd, rd, x
+// where 0 <= x <= 32
+//
+// and
+//
+// slli rd, rs1, 48
+// srli rd, rd, x
+static bool isShiftedZExt(const MachineInstr *FirstMI,
+ const MachineInstr &SecondMI) {
+ if (SecondMI.getOpcode() != RISCV::SRLI)
+ return false;
+
+ if (!SecondMI.getOperand(2).isImm())
+ return false;
+
+ unsigned SRLIImm = SecondMI.getOperand(2).getImm();
+ bool IsShiftBy48 = SRLIImm == 48;
+ if (SRLIImm > 32 && !IsShiftBy48)
+ return false;
+
+ // Given SecondMI, when FirstMI is unspecified, we must return
+ // if SecondMI may be part of a fused pair at all.
+ if (!FirstMI)
+ return true;
+
+ if (FirstMI->getOpcode() != RISCV::SLLI)
+ return false;
+
+ unsigned SLLIImm = FirstMI->getOperand(2).getImm();
+ if (IsShiftBy48 ? (SLLIImm != 48) : (SLLIImm != 32))
+ return false;
+
+ return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
+}
+
+// Fuse AUIPC followed by ADDI
+// auipc rd, imm20
+// addi rd, rd, imm12
+static bool isAUIPCADDI(const MachineInstr *FirstMI,
+ const MachineInstr &SecondMI) {
+ if (SecondMI.getOpcode() != RISCV::ADDI)
+ return false;
+ // Assume the 1st instr to be a wildcard if it is unspecified.
+ if (!FirstMI)
+ return true;
+
+ if (FirstMI->getOpcode() != RISCV::AUIPC)
+ return false;
+
+ return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
+}
+
// Fuse LUI followed by ADDI or ADDIW.
// rd = imm[31:0] which decomposes to
// lui rd, imm[31:12]
@@ -27,7 +122,6 @@ static bool isLUIADDI(const MachineInstr *FirstMI,
if (SecondMI.getOpcode() != RISCV::ADDI &&
SecondMI.getOpcode() != RISCV::ADDIW)
return false;
-
// Assume the 1st instr to be a wildcard if it is unspecified.
if (!FirstMI)
return true;
@@ -35,25 +129,7 @@ static bool isLUIADDI(const MachineInstr *FirstMI,
if (FirstMI->getOpcode() != RISCV::LUI)
return false;
- // The first operand of ADDI might be a frame index.
- if (!SecondMI.getOperand(1).isReg())
- return false;
-
- Register FirstDest = FirstMI->getOperand(0).getReg();
-
- // Destination of LUI should be the ADDI(W) source register.
- if (SecondMI.getOperand(1).getReg() != FirstDest)
- return false;
-
- // If the input is virtual make sure this is the only user.
- if (FirstDest.isVirtual()) {
- auto &MRI = SecondMI.getMF()->getRegInfo();
- return MRI.hasOneNonDBGUse(FirstDest);
- }
-
- // If the FirstMI destination is non-virtual, it should match the SecondMI
- // destination.
- return SecondMI.getOperand(0).getReg() == FirstDest;
+ return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
}
static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
@@ -65,6 +141,15 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
if (ST.hasLUIADDIFusion() && isLUIADDI(FirstMI, SecondMI))
return true;
+ if (ST.hasAUIPCADDIFusion() && isAUIPCADDI(FirstMI, SecondMI))
+ return true;
+
+ if (ST.hasShiftedZExtFusion() && isShiftedZExt(FirstMI, SecondMI))
+ return true;
+
+ if (ST.hasLDADDFusion() && isLDADD(FirstMI, SecondMI))
+ return true;
+
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
index 841439bb732e..ff21fe1d4064 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
@@ -84,9 +84,7 @@ struct RISCVMakeCompressibleOpt : public MachineFunctionPass {
bool runOnMachineFunction(MachineFunction &Fn) override;
- RISCVMakeCompressibleOpt() : MachineFunctionPass(ID) {
- initializeRISCVMakeCompressibleOptPass(*PassRegistry::getPassRegistry());
- }
+ RISCVMakeCompressibleOpt() : MachineFunctionPass(ID) {}
StringRef getPassName() const override { return RISCV_COMPRESS_INSTRS_NAME; }
};
@@ -271,7 +269,7 @@ static Register analyzeCompressibleUses(MachineInstr &FirstMI,
RegScavenger RS;
RS.enterBasicBlockEnd(MBB);
- RS.backward(MIs.back()->getIterator());
+ RS.backward(std::next(MIs.back()->getIterator()));
return RS.scavengeRegisterBackwards(*RCToScavenge, FirstMI.getIterator(),
/*RestoreAfter=*/false, /*SPAdj=*/0,
/*AllowSpill=*/false);
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
index 855322b981fb..ae46d5554d35 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
@@ -19,7 +19,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetOptions.h"
#include <optional>
-#include <set>
using namespace llvm;
#define DEBUG_TYPE "riscv-merge-base-offset"
@@ -94,7 +93,8 @@ bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi,
if (HiOp1.getTargetFlags() != ExpectedFlags)
return false;
- if (!(HiOp1.isGlobal() || HiOp1.isCPI()) || HiOp1.getOffset() != 0)
+ if (!(HiOp1.isGlobal() || HiOp1.isCPI() || HiOp1.isBlockAddress()) ||
+ HiOp1.getOffset() != 0)
return false;
Register HiDestReg = Hi.getOperand(0).getReg();
@@ -108,7 +108,8 @@ bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi,
const MachineOperand &LoOp2 = Lo->getOperand(2);
if (Hi.getOpcode() == RISCV::LUI) {
if (LoOp2.getTargetFlags() != RISCVII::MO_LO ||
- !(LoOp2.isGlobal() || LoOp2.isCPI()) || LoOp2.getOffset() != 0)
+ !(LoOp2.isGlobal() || LoOp2.isCPI() || LoOp2.isBlockAddress()) ||
+ LoOp2.getOffset() != 0)
return false;
} else {
assert(Hi.getOpcode() == RISCV::AUIPC);
@@ -120,8 +121,10 @@ bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi,
if (HiOp1.isGlobal()) {
LLVM_DEBUG(dbgs() << " Found lowered global address: "
<< *HiOp1.getGlobal() << "\n");
- } else {
- assert(HiOp1.isCPI());
+ } else if (HiOp1.isBlockAddress()) {
+ LLVM_DEBUG(dbgs() << " Found lowered basic address: "
+ << *HiOp1.getBlockAddress() << "\n");
+ } else if (HiOp1.isCPI()) {
LLVM_DEBUG(dbgs() << " Found lowered constant pool: " << HiOp1.getIndex()
<< "\n");
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMoveMerger.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMoveMerger.cpp
index 6c1b0cf5ca7f..3c5462057b28 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMoveMerger.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMoveMerger.cpp
@@ -1,4 +1,4 @@
-//===-- RISCVMoveMerger.cpp - RISCV move merge pass -----------------------===//
+//===-- RISCVMoveMerger.cpp - RISC-V move merge pass ----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -22,9 +22,7 @@ namespace {
struct RISCVMoveMerge : public MachineFunctionPass {
static char ID;
- RISCVMoveMerge() : MachineFunctionPass(ID) {
- initializeRISCVMoveMergePass(*PassRegistry::getPassRegistry());
- }
+ RISCVMoveMerge() : MachineFunctionPass(ID) {}
const RISCVInstrInfo *TII;
const TargetRegisterInfo *TRI;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
index 7014755b6706..2c2b34bb5b77 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
@@ -12,16 +12,21 @@
// extended bits aren't consumed or because the input was already sign extended
// by an earlier instruction.
//
-// Then it removes the -w suffix from each addiw and slliw instructions
-// whenever all users are dependent only on the lower word of the result of the
-// instruction. We do this only for addiw, slliw, and mulw because the -w forms
-// are less compressible.
+// Then it removes the -w suffix from opw instructions whenever all users are
+// dependent only on the lower word of the result of the instruction.
+// The cases handled are:
+// * addw because c.add has a larger register encoding than c.addw.
+// * addiw because it helps reduce test differences between RV32 and RV64
+// w/o being a pessimization.
+// * mulw because c.mulw doesn't exist but c.mul does (w/ zcb)
+// * slliw because c.slliw doesn't exist and c.slli does
//
//===---------------------------------------------------------------------===//
#include "RISCV.h"
#include "RISCVMachineFunctionInfo.h"
#include "RISCVSubtarget.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -48,9 +53,7 @@ class RISCVOptWInstrs : public MachineFunctionPass {
public:
static char ID;
- RISCVOptWInstrs() : MachineFunctionPass(ID) {
- initializeRISCVOptWInstrsPass(*PassRegistry::getPassRegistry());
- }
+ RISCVOptWInstrs() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
bool removeSExtWInstrs(MachineFunction &MF, const RISCVInstrInfo &TII,
@@ -76,6 +79,29 @@ FunctionPass *llvm::createRISCVOptWInstrsPass() {
return new RISCVOptWInstrs();
}
+static bool vectorPseudoHasAllNBitUsers(const MachineOperand &UserOp,
+ unsigned Bits) {
+ const MachineInstr &MI = *UserOp.getParent();
+ unsigned MCOpcode = RISCV::getRVVMCOpcode(MI.getOpcode());
+
+ if (!MCOpcode)
+ return false;
+
+ const MCInstrDesc &MCID = MI.getDesc();
+ const uint64_t TSFlags = MCID.TSFlags;
+ if (!RISCVII::hasSEWOp(TSFlags))
+ return false;
+ assert(RISCVII::hasVLOp(TSFlags));
+ const unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MCID)).getImm();
+
+ if (UserOp.getOperandNo() == RISCVII::getVLOpNum(MCID))
+ return false;
+
+ auto NumDemandedBits =
+ RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
+ return NumDemandedBits && Bits >= *NumDemandedBits;
+}
+
// Checks if all users only demand the lower \p OrigBits of the original
// instruction's result.
// TODO: handle multiple interdependent transformations
@@ -100,12 +126,14 @@ static bool hasAllNBitUsers(const MachineInstr &OrigMI,
if (MI->getNumExplicitDefs() != 1)
return false;
- for (auto &UserOp : MRI.use_operands(MI->getOperand(0).getReg())) {
+ for (auto &UserOp : MRI.use_nodbg_operands(MI->getOperand(0).getReg())) {
const MachineInstr *UserMI = UserOp.getParent();
unsigned OpIdx = UserOp.getOperandNo();
switch (UserMI->getOpcode()) {
default:
+ if (vectorPseudoHasAllNBitUsers(UserOp, Bits))
+ break;
return false;
case RISCV::ADDIW:
@@ -283,6 +311,8 @@ static bool hasAllNBitUsers(const MachineInstr &OrigMI,
Worklist.push_back(std::make_pair(UserMI, Bits));
break;
+ case RISCV::CZERO_EQZ:
+ case RISCV::CZERO_NEZ:
case RISCV::VT_MASKC:
case RISCV::VT_MASKCN:
if (OpIdx != 1)
@@ -327,9 +357,27 @@ static bool isSignExtendingOpW(const MachineInstr &MI,
// An ORI with an >11 bit immediate (negative 12-bit) will set bits 63:11.
case RISCV::ORI:
return !isUInt<11>(MI.getOperand(2).getImm());
+ // A bseti with X0 is sign extended if the immediate is less than 31.
+ case RISCV::BSETI:
+ return MI.getOperand(2).getImm() < 31 &&
+ MI.getOperand(1).getReg() == RISCV::X0;
// Copying from X0 produces zero.
case RISCV::COPY:
return MI.getOperand(1).getReg() == RISCV::X0;
+ case RISCV::PseudoAtomicLoadNand32:
+ return true;
+ case RISCV::PseudoVMV_X_S_MF8:
+ case RISCV::PseudoVMV_X_S_MF4:
+ case RISCV::PseudoVMV_X_S_MF2:
+ case RISCV::PseudoVMV_X_S_M1:
+ case RISCV::PseudoVMV_X_S_M2:
+ case RISCV::PseudoVMV_X_S_M4:
+ case RISCV::PseudoVMV_X_S_M8: {
+ // vmv.x.s has at least 33 sign bits if log2(sew) <= 5.
+ int64_t Log2SEW = MI.getOperand(2).getImm();
+ assert(Log2SEW >= 3 && Log2SEW <= 6 && "Unexpected Log2SEW");
+ return Log2SEW <= 5;
+ }
}
return false;
@@ -348,6 +396,11 @@ static bool isSignExtendedW(Register SrcReg, const RISCVSubtarget &ST,
MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
if (!SrcMI)
return false;
+ // Code assumes the register is operand 0.
+ // TODO: Maybe the worklist should store register?
+ if (!SrcMI->getOperand(0).isReg() ||
+ SrcMI->getOperand(0).getReg() != SrcReg)
+ return false;
// Add SrcMI to the worklist.
Worklist.push_back(SrcMI);
return true;
@@ -446,9 +499,16 @@ static bool isSignExtendedW(Register SrcReg, const RISCVSubtarget &ST,
break;
case RISCV::PseudoCCADDW:
+ case RISCV::PseudoCCADDIW:
case RISCV::PseudoCCSUBW:
- // Returns operand 4 or an ADDW/SUBW of operands 5 and 6. We only need to
- // check if operand 4 is sign extended.
+ case RISCV::PseudoCCSLLW:
+ case RISCV::PseudoCCSRLW:
+ case RISCV::PseudoCCSRAW:
+ case RISCV::PseudoCCSLLIW:
+ case RISCV::PseudoCCSRLIW:
+ case RISCV::PseudoCCSRAIW:
+ // Returns operand 4 or an ADDW/SUBW/etc. of operands 5 and 6. We only
+ // need to check if operand 4 is sign extended.
if (!AddRegDefToWorkList(MI->getOperand(4).getReg()))
return false;
break;
@@ -504,6 +564,8 @@ static bool isSignExtendedW(Register SrcReg, const RISCVSubtarget &ST,
break;
}
+ case RISCV::CZERO_EQZ:
+ case RISCV::CZERO_NEZ:
case RISCV::VT_MASKC:
case RISCV::VT_MASKCN:
// Instructions return zero or operand 1. Result is sign extended if
@@ -567,25 +629,23 @@ bool RISCVOptWInstrs::removeSExtWInstrs(MachineFunction &MF,
bool MadeChange = false;
for (MachineBasicBlock &MBB : MF) {
- for (auto I = MBB.begin(), IE = MBB.end(); I != IE;) {
- MachineInstr *MI = &*I++;
-
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
// We're looking for the sext.w pattern ADDIW rd, rs1, 0.
- if (!RISCV::isSEXT_W(*MI))
+ if (!RISCV::isSEXT_W(MI))
continue;
- Register SrcReg = MI->getOperand(1).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
SmallPtrSet<MachineInstr *, 4> FixableDefs;
// If all users only use the lower bits, this sext.w is redundant.
// Or if all definitions reaching MI sign-extend their output,
// then sext.w is redundant.
- if (!hasAllWUsers(*MI, ST, MRI) &&
+ if (!hasAllWUsers(MI, ST, MRI) &&
!isSignExtendedW(SrcReg, ST, MRI, FixableDefs))
continue;
- Register DstReg = MI->getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
if (!MRI.constrainRegClass(SrcReg, MRI.getRegClass(DstReg)))
continue;
@@ -603,7 +663,7 @@ bool RISCVOptWInstrs::removeSExtWInstrs(MachineFunction &MF,
LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
MRI.replaceRegWith(DstReg, SrcReg);
MRI.clearKillFlags(SrcReg);
- MI->eraseFromParent();
+ MI.eraseFromParent();
++NumRemovedSExtW;
MadeChange = true;
}
@@ -621,14 +681,13 @@ bool RISCVOptWInstrs::stripWSuffixes(MachineFunction &MF,
bool MadeChange = false;
for (MachineBasicBlock &MBB : MF) {
- for (auto I = MBB.begin(), IE = MBB.end(); I != IE; ++I) {
- MachineInstr &MI = *I;
-
+ for (MachineInstr &MI : MBB) {
unsigned Opc;
switch (MI.getOpcode()) {
default:
continue;
case RISCV::ADDW: Opc = RISCV::ADD; break;
+ case RISCV::ADDIW: Opc = RISCV::ADDI; break;
case RISCV::MULW: Opc = RISCV::MUL; break;
case RISCV::SLLIW: Opc = RISCV::SLLI; break;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp
new file mode 100644
index 000000000000..57b473645ae7
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp
@@ -0,0 +1,116 @@
+//===-- RISCVPostRAExpandPseudoInsts.cpp - Expand pseudo instrs ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that expands the pseudo instruction pseudolisimm32
+// into target instructions. This pass should be run during the post-regalloc
+// passes, before post RA scheduling.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/RISCVMatInt.h"
+#include "RISCV.h"
+#include "RISCVInstrInfo.h"
+#include "RISCVTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+using namespace llvm;
+
+#define RISCV_POST_RA_EXPAND_PSEUDO_NAME \
+ "RISC-V post-regalloc pseudo instruction expansion pass"
+
+namespace {
+
+class RISCVPostRAExpandPseudo : public MachineFunctionPass {
+public:
+ const RISCVInstrInfo *TII;
+ static char ID;
+
+ RISCVPostRAExpandPseudo() : MachineFunctionPass(ID) {
+ initializeRISCVPostRAExpandPseudoPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override {
+ return RISCV_POST_RA_EXPAND_PSEUDO_NAME;
+ }
+
+private:
+ bool expandMBB(MachineBasicBlock &MBB);
+ bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI);
+ bool expandMovImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
+};
+
+char RISCVPostRAExpandPseudo::ID = 0;
+
+bool RISCVPostRAExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
+ TII = static_cast<const RISCVInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ bool Modified = false;
+ for (auto &MBB : MF)
+ Modified |= expandMBB(MBB);
+ return Modified;
+}
+
+bool RISCVPostRAExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ while (MBBI != E) {
+ MachineBasicBlock::iterator NMBBI = std::next(MBBI);
+ Modified |= expandMI(MBB, MBBI, NMBBI);
+ MBBI = NMBBI;
+ }
+
+ return Modified;
+}
+
+bool RISCVPostRAExpandPseudo::expandMI(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI) {
+ switch (MBBI->getOpcode()) {
+ case RISCV::PseudoMovImm:
+ return expandMovImm(MBB, MBBI);
+ default:
+ return false;
+ }
+}
+
+bool RISCVPostRAExpandPseudo::expandMovImm(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) {
+ DebugLoc DL = MBBI->getDebugLoc();
+
+ int64_t Val = MBBI->getOperand(1).getImm();
+
+ RISCVMatInt::InstSeq Seq =
+ RISCVMatInt::generateInstSeq(Val, MBB.getParent()->getSubtarget());
+ assert(!Seq.empty());
+
+ Register DstReg = MBBI->getOperand(0).getReg();
+ bool DstIsDead = MBBI->getOperand(0).isDead();
+ bool Renamable = MBBI->getOperand(0).isRenamable();
+
+ TII->movImm(MBB, MBBI, DL, DstReg, Val, MachineInstr::NoFlags, Renamable,
+ DstIsDead);
+
+ MBBI->eraseFromParent();
+ return true;
+}
+
+} // end of anonymous namespace
+
+INITIALIZE_PASS(RISCVPostRAExpandPseudo, "riscv-expand-pseudolisimm32",
+ RISCV_POST_RA_EXPAND_PSEUDO_NAME, false, false)
+namespace llvm {
+
+FunctionPass *createRISCVPostRAExpandPseudoPass() {
+ return new RISCVPostRAExpandPseudo();
+}
+
+} // end of namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVProcessors.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVProcessors.td
index 01291001cd7c..58989fd716fa 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -10,6 +10,35 @@
// RISC-V processors supported.
//===----------------------------------------------------------------------===//
+class RISCVTuneInfo {
+ bits<8> PrefFunctionAlignment = 1;
+ bits<8> PrefLoopAlignment = 1;
+
+ // Information needed by LoopDataPrefetch.
+ bits<16> CacheLineSize = 0;
+ bits<16> PrefetchDistance = 0;
+ bits<16> MinPrefetchStride = 1;
+ bits<32> MaxPrefetchIterationsAhead = -1;
+
+ bits<32> MinimumJumpTableEntries = 5;
+}
+
+def RISCVTuneInfoTable : GenericTable {
+ let FilterClass = "RISCVTuneInfo";
+ let CppTypeName = "RISCVTuneInfo";
+ let Fields = ["Name", "PrefFunctionAlignment", "PrefLoopAlignment",
+ "CacheLineSize", "PrefetchDistance",
+ "MinPrefetchStride", "MaxPrefetchIterationsAhead",
+ "MinimumJumpTableEntries"];
+}
+
+def getRISCVTuneInfo : SearchIndex {
+ let Table = RISCVTuneInfoTable;
+ let Key = ["Name"];
+}
+
+class GenericTuneInfo: RISCVTuneInfo;
+
class RISCVProcessorModel<string n,
SchedMachineModel m,
list<SubtargetFeature> f,
@@ -27,13 +56,15 @@ class RISCVTuneProcessorModel<string n,
def GENERIC_RV32 : RISCVProcessorModel<"generic-rv32",
NoSchedModel,
- [Feature32Bit]>;
+ [Feature32Bit]>,
+ GenericTuneInfo;
def GENERIC_RV64 : RISCVProcessorModel<"generic-rv64",
NoSchedModel,
- [Feature64Bit]>;
+ [Feature64Bit]>,
+ GenericTuneInfo;
// Support generic for compatibility with other targets. The triple will be used
// to change to the appropriate rv32/rv64 version.
-def : ProcessorModel<"generic", NoSchedModel, []>;
+def : ProcessorModel<"generic", NoSchedModel, []>, GenericTuneInfo;
def ROCKET_RV32 : RISCVProcessorModel<"rocket-rv32",
RocketModel,
@@ -201,3 +232,47 @@ def SYNTACORE_SCR1_MAX : RISCVProcessorModel<"syntacore-scr1-max",
FeatureStdExtM,
FeatureStdExtC],
[TuneNoDefaultUnroll]>;
+
+def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1",
+ NoSchedModel,
+ [Feature64Bit,
+ FeatureStdExtZifencei,
+ FeatureStdExtZicsr,
+ FeatureStdExtZicntr,
+ FeatureStdExtZihpm,
+ FeatureStdExtZihintpause,
+ FeatureStdExtM,
+ FeatureStdExtA,
+ FeatureStdExtF,
+ FeatureStdExtD,
+ FeatureStdExtC,
+ FeatureStdExtZba,
+ FeatureStdExtZbb,
+ FeatureStdExtZbc,
+ FeatureStdExtZbs,
+ FeatureStdExtZicbom,
+ FeatureStdExtZicbop,
+ FeatureStdExtZicboz,
+ FeatureVendorXVentanaCondOps],
+ [TuneVeyronFusions]>;
+
+def XIANGSHAN_NANHU : RISCVProcessorModel<"xiangshan-nanhu",
+ NoSchedModel,
+ [Feature64Bit,
+ FeatureStdExtZicsr,
+ FeatureStdExtZifencei,
+ FeatureStdExtM,
+ FeatureStdExtA,
+ FeatureStdExtF,
+ FeatureStdExtD,
+ FeatureStdExtC,
+ FeatureStdExtZba,
+ FeatureStdExtZbb,
+ FeatureStdExtZbc,
+ FeatureStdExtZbs,
+ FeatureStdExtZkn,
+ FeatureStdExtZksed,
+ FeatureStdExtZksh,
+ FeatureStdExtSvinval,
+ FeatureStdExtZicbom,
+ FeatureStdExtZicboz]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVPushPopOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVPushPopOptimizer.cpp
index f885adca669f..009dcf57f46d 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVPushPopOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVPushPopOptimizer.cpp
@@ -1,4 +1,4 @@
-//===------- RISCVPushPopOptimizer.cpp - RISCV Push/Pop opt. pass ---------===//
+//===------- RISCVPushPopOptimizer.cpp - RISC-V Push/Pop opt. pass --------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,9 +6,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains a pass that modifies PUSH/POP instructions from Zca
-// standard to use their non prolog/epilog related functionalities
-// and generates POPRET instruction.
+// This file contains a pass that replaces Zcmp POP instructions with
+// POPRET[Z] where possible.
//
//===----------------------------------------------------------------------===//
@@ -23,9 +22,7 @@ namespace {
struct RISCVPushPopOpt : public MachineFunctionPass {
static char ID;
- RISCVPushPopOpt() : MachineFunctionPass(ID) {
- initializeRISCVPushPopOptPass(*PassRegistry::getPassRegistry());
- }
+ RISCVPushPopOpt() : MachineFunctionPass(ID) {}
const RISCVInstrInfo *TII;
const TargetRegisterInfo *TRI;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp
index fed3fa2987e5..735fc1350c00 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp
@@ -9,7 +9,8 @@
// This file implements a function pass that initializes undef vector value to
// temporary pseudo instruction and remove it in expandpseudo pass to prevent
// register allocation resulting in a constraint violated result for vector
-// instruction.
+// instruction. It also rewrites the NoReg tied operand back to an
+// IMPLICIT_DEF.
//
// RISC-V vector instruction has register overlapping constraint for certain
// instructions, and will cause illegal instruction trap if violated, we use
@@ -30,10 +31,18 @@
//
// See also: https://github.com/llvm/llvm-project/issues/50157
//
+// Additionally, this pass rewrites tied operands of vector instructions
+// from NoReg to IMPLICIT_DEF. (Not that this is a non-overlapping set of
+// operands to the above.) We use NoReg to side step a MachineCSE
+// optimization quality problem but need to convert back before
+// TwoAddressInstruction. See pr64282 for context.
+//
//===----------------------------------------------------------------------===//
#include "RISCV.h"
#include "RISCVSubtarget.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/DetectDeadLanes.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
using namespace llvm;
@@ -49,12 +58,14 @@ class RISCVInitUndef : public MachineFunctionPass {
const RISCVSubtarget *ST;
const TargetRegisterInfo *TRI;
+ // Newly added vregs, assumed to be fully rewritten
+ SmallSet<Register, 8> NewRegs;
+ SmallVector<MachineInstr *, 8> DeadInsts;
+
public:
static char ID;
- RISCVInitUndef() : MachineFunctionPass(ID) {
- initializeRISCVInitUndefPass(*PassRegistry::getPassRegistry());
- }
+ RISCVInitUndef() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -67,13 +78,13 @@ public:
private:
bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB,
const DeadLaneDetector &DLD);
- bool handleImplicitDef(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &Inst);
bool isVectorRegClass(const Register R);
const TargetRegisterClass *
getVRLargestSuperClass(const TargetRegisterClass *RC) const;
bool handleSubReg(MachineFunction &MF, MachineInstr &MI,
const DeadLaneDetector &DLD);
+ bool fixupIllOperand(MachineInstr *MI, MachineOperand &MO);
+ bool handleReg(MachineInstr *MI);
};
} // end anonymous namespace
@@ -118,65 +129,38 @@ static unsigned getUndefInitOpcode(unsigned RegClassID) {
}
}
-bool RISCVInitUndef::handleImplicitDef(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &Inst) {
- const TargetRegisterInfo &TRI =
- *MBB.getParent()->getSubtarget().getRegisterInfo();
-
- assert(Inst->getOpcode() == TargetOpcode::IMPLICIT_DEF);
-
- Register Reg = Inst->getOperand(0).getReg();
- if (!Reg.isVirtual())
- return false;
-
- bool NeedPseudoInit = false;
- SmallVector<MachineOperand *, 1> UseMOs;
- for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
- MachineInstr *UserMI = MO.getParent();
-
- bool HasEarlyClobber = false;
- bool TiedToDef = false;
- for (MachineOperand &UserMO : UserMI->operands()) {
- if (!UserMO.isReg())
- continue;
- if (UserMO.isEarlyClobber())
- HasEarlyClobber = true;
- if (UserMO.isUse() && UserMO.isTied() &&
- TRI.regsOverlap(UserMO.getReg(), Reg))
- TiedToDef = true;
- }
- if (HasEarlyClobber && !TiedToDef) {
- NeedPseudoInit = true;
- UseMOs.push_back(&MO);
- }
- }
-
- if (!NeedPseudoInit)
- return false;
-
- LLVM_DEBUG(
- dbgs() << "Emitting PseudoRVVInitUndef for implicit vector register "
- << Reg << '\n');
-
- unsigned RegClassID = getVRLargestSuperClass(MRI->getRegClass(Reg))->getID();
- unsigned Opcode = getUndefInitOpcode(RegClassID);
-
- BuildMI(MBB, Inst, Inst->getDebugLoc(), TII->get(Opcode), Reg);
-
- Inst = MBB.erase(Inst);
-
- for (auto MO : UseMOs)
- MO->setIsUndef(false);
-
- return true;
-}
-
static bool isEarlyClobberMI(MachineInstr &MI) {
return llvm::any_of(MI.defs(), [](const MachineOperand &DefMO) {
return DefMO.isReg() && DefMO.isEarlyClobber();
});
}
+static bool findImplictDefMIFromReg(Register Reg, MachineRegisterInfo *MRI) {
+ for (auto &DefMI : MRI->def_instructions(Reg)) {
+ if (DefMI.getOpcode() == TargetOpcode::IMPLICIT_DEF)
+ return true;
+ }
+ return false;
+}
+
+bool RISCVInitUndef::handleReg(MachineInstr *MI) {
+ bool Changed = false;
+ for (auto &UseMO : MI->uses()) {
+ if (!UseMO.isReg())
+ continue;
+ if (UseMO.isTied())
+ continue;
+ if (!UseMO.getReg().isVirtual())
+ continue;
+ if (!isVectorRegClass(UseMO.getReg()))
+ continue;
+
+ if (UseMO.isUndef() || findImplictDefMIFromReg(UseMO.getReg(), MRI))
+ Changed |= fixupIllOperand(MI, UseMO);
+ }
+ return Changed;
+}
+
bool RISCVInitUndef::handleSubReg(MachineFunction &MF, MachineInstr &MI,
const DeadLaneDetector &DLD) {
bool Changed = false;
@@ -186,8 +170,12 @@ bool RISCVInitUndef::handleSubReg(MachineFunction &MF, MachineInstr &MI,
continue;
if (!UseMO.getReg().isVirtual())
continue;
+ if (UseMO.isTied())
+ continue;
Register Reg = UseMO.getReg();
+ if (NewRegs.count(Reg))
+ continue;
DeadLaneDetector::VRegInfo Info =
DLD.getVRegInfo(Register::virtReg2Index(Reg));
@@ -235,18 +223,53 @@ bool RISCVInitUndef::handleSubReg(MachineFunction &MF, MachineInstr &MI,
return Changed;
}
+bool RISCVInitUndef::fixupIllOperand(MachineInstr *MI, MachineOperand &MO) {
+
+ LLVM_DEBUG(
+ dbgs() << "Emitting PseudoRVVInitUndef for implicit vector register "
+ << MO.getReg() << '\n');
+
+ const TargetRegisterClass *TargetRegClass =
+ getVRLargestSuperClass(MRI->getRegClass(MO.getReg()));
+ unsigned Opcode = getUndefInitOpcode(TargetRegClass->getID());
+ Register NewReg = MRI->createVirtualRegister(TargetRegClass);
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(Opcode), NewReg);
+ MO.setReg(NewReg);
+ if (MO.isUndef())
+ MO.setIsUndef(false);
+ return true;
+}
+
bool RISCVInitUndef::processBasicBlock(MachineFunction &MF,
MachineBasicBlock &MBB,
const DeadLaneDetector &DLD) {
bool Changed = false;
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
MachineInstr &MI = *I;
- if (ST->enableSubRegLiveness() && isEarlyClobberMI(MI))
- Changed |= handleSubReg(MF, MI, DLD);
- if (MI.isImplicitDef()) {
- auto DstReg = MI.getOperand(0).getReg();
- if (isVectorRegClass(DstReg))
- Changed |= handleImplicitDef(MBB, I);
+
+ // If we used NoReg to represent the passthru, switch this back to being
+ // an IMPLICIT_DEF before TwoAddressInstructions.
+ unsigned UseOpIdx;
+ if (MI.getNumDefs() != 0 && MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
+ MachineOperand &UseMO = MI.getOperand(UseOpIdx);
+ if (UseMO.getReg() == RISCV::NoRegister) {
+ const TargetRegisterClass *RC =
+ TII->getRegClass(MI.getDesc(), UseOpIdx, TRI, MF);
+ Register NewDest = MRI->createVirtualRegister(RC);
+ // We don't have a way to update dead lanes, so keep track of the
+ // new register so that we avoid querying it later.
+ NewRegs.insert(NewDest);
+ BuildMI(MBB, I, I->getDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), NewDest);
+ UseMO.setReg(NewDest);
+ Changed = true;
+ }
+ }
+
+ if (isEarlyClobberMI(MI)) {
+ if (ST->enableSubRegLiveness())
+ Changed |= handleSubReg(MF, MI, DLD);
+ Changed |= handleReg(&MI);
}
}
return Changed;
@@ -268,6 +291,10 @@ bool RISCVInitUndef::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock &BB : MF)
Changed |= processBasicBlock(MF, BB, DLD);
+ for (auto *DeadMI : DeadInsts)
+ DeadMI->eraseFromParent();
+ DeadInsts.clear();
+
return Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index c3ba4c1e7fdb..a3c19115bd31 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -14,6 +14,7 @@
#include "RISCV.h"
#include "RISCVMachineFunctionInfo.h"
#include "RISCVSubtarget.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -84,10 +85,11 @@ RISCVRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
const RISCVFrameLowering *TFI = getFrameLowering(MF);
BitVector Reserved(getNumRegs());
+ auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
// Mark any registers requested to be reserved as such
for (size_t Reg = 0; Reg < getNumRegs(); Reg++) {
- if (MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(Reg))
+ if (Subtarget.isRegisterReservedByUser(Reg))
markSuperRegs(Reserved, Reg);
}
@@ -118,6 +120,13 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
markSuperRegs(Reserved, RISCV::FRM);
markSuperRegs(Reserved, RISCV::FFLAGS);
+ if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) {
+ if (Subtarget.isRVE())
+ report_fatal_error("Graal reserved registers do not exist in RVE");
+ markSuperRegs(Reserved, RISCV::X23);
+ markSuperRegs(Reserved, RISCV::X27);
+ }
+
assert(checkAllSuperRegsMarked(Reserved));
return Reserved;
}
@@ -132,7 +141,7 @@ const uint32_t *RISCVRegisterInfo::getNoPreservedMask() const {
}
// Frame indexes representing locations of CSRs which are given a fixed location
-// by save/restore libcalls.
+// by save/restore libcalls or Zcmp Push/Pop.
static const std::pair<unsigned, int> FixedCSRFIMap[] = {
{/*ra*/ RISCV::X1, -1},
{/*s0*/ RISCV::X8, -2},
@@ -290,12 +299,20 @@ void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const {
"Unexpected subreg numbering");
Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass);
- BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL);
- uint32_t ShiftAmount = Log2_32(LMUL);
- if (ShiftAmount != 0)
- BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VL)
- .addReg(VL)
- .addImm(ShiftAmount);
+ // Optimize for constant VLEN.
+ const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
+ if (STI.getRealMinVLen() == STI.getRealMaxVLen()) {
+ const int64_t VLENB = STI.getRealMinVLen() / 8;
+ int64_t Offset = VLENB * LMUL;
+ STI.getInstrInfo()->movImm(MBB, II, DL, VL, Offset);
+ } else {
+ BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL);
+ uint32_t ShiftAmount = Log2_32(LMUL);
+ if (ShiftAmount != 0)
+ BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VL)
+ .addReg(VL)
+ .addImm(ShiftAmount);
+ }
Register SrcReg = II->getOperand(0).getReg();
Register Base = II->getOperand(1).getReg();
@@ -359,12 +376,20 @@ void RISCVRegisterInfo::lowerVRELOAD(MachineBasicBlock::iterator II) const {
"Unexpected subreg numbering");
Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass);
- BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL);
- uint32_t ShiftAmount = Log2_32(LMUL);
- if (ShiftAmount != 0)
- BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VL)
- .addReg(VL)
- .addImm(ShiftAmount);
+ // Optimize for constant VLEN.
+ const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
+ if (STI.getRealMinVLen() == STI.getRealMaxVLen()) {
+ const int64_t VLENB = STI.getRealMinVLen() / 8;
+ int64_t Offset = VLENB * LMUL;
+ STI.getInstrInfo()->movImm(MBB, II, DL, VL, Offset);
+ } else {
+ BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL);
+ uint32_t ShiftAmount = Log2_32(LMUL);
+ if (ShiftAmount != 0)
+ BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VL)
+ .addReg(VL)
+ .addImm(ShiftAmount);
+ }
Register DestReg = II->getOperand(0).getReg();
Register Base = II->getOperand(1).getReg();
@@ -435,9 +460,16 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// offset can by construction, at worst, a LUI and a ADD.
int64_t Val = Offset.getFixed();
int64_t Lo12 = SignExtend64<12>(Val);
- MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Lo12);
- Offset = StackOffset::get((uint64_t)Val - (uint64_t)Lo12,
- Offset.getScalable());
+ if ((MI.getOpcode() == RISCV::PREFETCH_I ||
+ MI.getOpcode() == RISCV::PREFETCH_R ||
+ MI.getOpcode() == RISCV::PREFETCH_W) &&
+ (Lo12 & 0b11111) != 0)
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
+ else {
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Lo12);
+ Offset = StackOffset::get((uint64_t)Val - (uint64_t)Lo12,
+ Offset.getScalable());
+ }
}
}
@@ -655,6 +687,14 @@ RISCVRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
const MachineFunction &) const {
if (RC == &RISCV::VMV0RegClass)
return &RISCV::VRRegClass;
+ if (RC == &RISCV::VRNoV0RegClass)
+ return &RISCV::VRRegClass;
+ if (RC == &RISCV::VRM2NoV0RegClass)
+ return &RISCV::VRM2RegClass;
+ if (RC == &RISCV::VRM4NoV0RegClass)
+ return &RISCV::VRM4RegClass;
+ if (RC == &RISCV::VRM8NoV0RegClass)
+ return &RISCV::VRM8RegClass;
return RC;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index 0b17f54431ef..c59c9b294d79 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -165,15 +165,6 @@ def SP : GPRRegisterClass<(add X2)>;
def SR07 : GPRRegisterClass<(add (sequence "X%u", 8, 9),
(sequence "X%u", 18, 23))>;
-// Registers saveable by PUSH/POP instruction in Zcmp extension
-def PGPR : RegisterClass<"RISCV", [XLenVT], 32, (add
- (sequence "X%u", 8, 9),
- (sequence "X%u", 18, 27),
- X1
- )> {
- let RegInfos = XLenRI;
-}
-
// Floating point registers
let RegAltNameIndices = [ABIRegAltName] in {
def F0_H : RISCVReg16<0, "f0", ["ft0"]>, DwarfRegNum<[32]>;
@@ -313,6 +304,13 @@ defvar vfloat16m2_t = nxv8f16;
defvar vfloat16m4_t = nxv16f16;
defvar vfloat16m8_t = nxv32f16;
+defvar vbfloat16mf4_t = nxv1bf16;
+defvar vbfloat16mf2_t = nxv2bf16;
+defvar vbfloat16m1_t = nxv4bf16;
+defvar vbfloat16m2_t = nxv8bf16;
+defvar vbfloat16m4_t = nxv16bf16;
+defvar vbfloat16m8_t = nxv32bf16;
+
defvar vfloat32mf2_t = nxv1f32;
defvar vfloat32m1_t = nxv2f32;
defvar vfloat32m2_t = nxv4f32;
@@ -339,20 +337,21 @@ defvar LMULList = [1, 2, 4, 8];
// Utility classes for segment load/store.
//===----------------------------------------------------------------------===//
// The set of legal NF for LMUL = lmul.
-// LMUL == 1, NF = 2, 3, 4, 5, 6, 7, 8
+// LMUL <= 1, NF = 2, 3, 4, 5, 6, 7, 8
// LMUL == 2, NF = 2, 3, 4
// LMUL == 4, NF = 2
+// LMUL == 8, no legal NF
class NFList<int lmul> {
- list<int> L = !cond(!eq(lmul, 1): [2, 3, 4, 5, 6, 7, 8],
- !eq(lmul, 2): [2, 3, 4],
+ list<int> L = !cond(!eq(lmul, 8): [],
!eq(lmul, 4): [2],
- !eq(lmul, 8): []);
+ !eq(lmul, 2): [2, 3, 4],
+ true: [2, 3, 4, 5, 6, 7, 8]);
}
// Generate [start, end) SubRegIndex list.
class SubRegSet<int nf, int lmul> {
list<SubRegIndex> L = !foldl([]<SubRegIndex>,
- [0, 1, 2, 3, 4, 5, 6, 7],
+ !range(0, 8),
AccList, i,
!listconcat(AccList,
!if(!lt(i, nf),
@@ -380,15 +379,9 @@ class IndexSet<int tuple_index, int nf, int lmul, bit isV0 = false> {
!foldl([]<int>,
!if(isV0, [0],
!cond(
- !eq(lmul, 1):
- [8, 9, 10, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 20, 21, 22, 23,
- 24, 25, 26, 27, 28, 29, 30, 31,
- 1, 2, 3, 4, 5, 6, 7],
- !eq(lmul, 2):
- [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3],
- !eq(lmul, 4):
- [2, 3, 4, 5, 6, 7, 1])),
+ !eq(lmul, 1): !listconcat(!range(8, 32), !range(1, 8)),
+ !eq(lmul, 2): !listconcat(!range(4, 16), !range(1, 4)),
+ !eq(lmul, 4): !listconcat(!range(2, 8), !range(1, 2)))),
L, i,
!listconcat(L,
!if(!le(!mul(!add(i, tuple_index), lmul),
@@ -418,12 +411,11 @@ class VRegList<list<dag> LIn, int start, int nf, int lmul, bit isV0> {
}
// Vector registers
-foreach Index = 0-31 in {
+foreach Index = !range(0, 32, 1) in {
def V#Index : RISCVReg<Index, "v"#Index>, DwarfRegNum<[!add(Index, 96)]>;
}
-foreach Index = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22,
- 24, 26, 28, 30] in {
+foreach Index = !range(0, 32, 2) in {
def V#Index#M2 : RISCVRegWithSubRegs<Index, "v"#Index,
[!cast<Register>("V"#Index),
!cast<Register>("V"#!add(Index, 1))]>,
@@ -432,7 +424,7 @@ foreach Index = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22,
}
}
-foreach Index = [0, 4, 8, 12, 16, 20, 24, 28] in {
+foreach Index = !range(0, 32, 4) in {
def V#Index#M4 : RISCVRegWithSubRegs<Index, "v"#Index,
[!cast<Register>("V"#Index#"M2"),
!cast<Register>("V"#!add(Index, 2)#"M2")]>,
@@ -441,7 +433,7 @@ foreach Index = [0, 4, 8, 12, 16, 20, 24, 28] in {
}
}
-foreach Index = [0, 8, 16, 24] in {
+foreach Index = !range(0, 32, 8) in {
def V#Index#M8 : RISCVRegWithSubRegs<Index, "v"#Index,
[!cast<Register>("V"#Index#"M4"),
!cast<Register>("V"#!add(Index, 4)#"M4")]>,
@@ -461,6 +453,7 @@ def VLENB : RISCVReg<0, "vlenb">,
def VCSR : RegisterClass<"RISCV", [XLenVT], 32,
(add VTYPE, VL, VLENB)> {
let RegInfos = XLenRI;
+ let isAllocatable = 0;
}
@@ -488,19 +481,23 @@ defvar VMaskVTs = [vbool1_t, vbool2_t, vbool4_t, vbool8_t, vbool16_t,
vbool32_t, vbool64_t];
defvar VM1VTs = [vint8m1_t, vint16m1_t, vint32m1_t, vint64m1_t,
- vfloat16m1_t, vfloat32m1_t, vfloat64m1_t,
- vint8mf2_t, vint8mf4_t, vint8mf8_t,
- vint16mf2_t, vint16mf4_t, vint32mf2_t,
- vfloat16mf4_t, vfloat16mf2_t, vfloat32mf2_t];
+ vbfloat16m1_t, vfloat16m1_t, vfloat32m1_t,
+ vfloat64m1_t, vint8mf2_t, vint8mf4_t, vint8mf8_t,
+ vint16mf2_t, vint16mf4_t, vint32mf2_t,
+ vfloat16mf4_t, vfloat16mf2_t, vbfloat16mf4_t,
+ vbfloat16mf2_t, vfloat32mf2_t];
defvar VM2VTs = [vint8m2_t, vint16m2_t, vint32m2_t, vint64m2_t,
- vfloat16m2_t, vfloat32m2_t, vfloat64m2_t];
+ vfloat16m2_t, vbfloat16m2_t,
+ vfloat32m2_t, vfloat64m2_t];
defvar VM4VTs = [vint8m4_t, vint16m4_t, vint32m4_t, vint64m4_t,
- vfloat16m4_t, vfloat32m4_t, vfloat64m4_t];
+ vfloat16m4_t, vbfloat16m4_t,
+ vfloat32m4_t, vfloat64m4_t];
defvar VM8VTs = [vint8m8_t, vint16m8_t, vint32m8_t, vint64m8_t,
- vfloat16m8_t, vfloat32m8_t, vfloat64m8_t];
+ vfloat16m8_t, vbfloat16m8_t,
+ vfloat32m8_t, vfloat64m8_t];
def VR : VReg<!listconcat(VM1VTs, VMaskVTs),
(add (sequence "V%u", 8, 31),
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket.td
index b14cdd40f154..bb9dfe5d0124 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket.td
@@ -70,11 +70,11 @@ def : WriteRes<WriteIMul32, [RocketUnitIMul]>;
// Worst case latency is used.
def : WriteRes<WriteIDiv32, [RocketUnitIDiv]> {
let Latency = 34;
- let ResourceCycles = [34];
+ let ReleaseAtCycles = [34];
}
def : WriteRes<WriteIDiv, [RocketUnitIDiv]> {
let Latency = 33;
- let ResourceCycles = [33];
+ let ReleaseAtCycles = [33];
}
// Memory
@@ -157,16 +157,16 @@ def : WriteRes<WriteFMA64, [RocketUnitFPALU]>;
// FP division
// FP division unit on Rocket is not pipelined, so set resource cycles to latency.
-let Latency = 20, ResourceCycles = [20] in {
+let Latency = 20, ReleaseAtCycles = [20] in {
def : WriteRes<WriteFDiv32, [RocketUnitFPDivSqrt]>;
def : WriteRes<WriteFDiv64, [RocketUnitFPDivSqrt]>;
}
// FP square root unit on Rocket is not pipelined, so set resource cycles to latency.
def : WriteRes<WriteFSqrt32, [RocketUnitFPDivSqrt]> { let Latency = 20;
- let ResourceCycles = [20]; }
+ let ReleaseAtCycles = [20]; }
def : WriteRes<WriteFSqrt64, [RocketUnitFPDivSqrt]> { let Latency = 25;
- let ResourceCycles = [25]; }
+ let ReleaseAtCycles = [25]; }
// Others
def : WriteRes<WriteCSR, []>;
@@ -206,7 +206,9 @@ def : ReadAdvance<ReadFAdd64, 0>;
def : ReadAdvance<ReadFMul32, 0>;
def : ReadAdvance<ReadFMul64, 0>;
def : ReadAdvance<ReadFMA32, 0>;
+def : ReadAdvance<ReadFMA32Addend, 0>;
def : ReadAdvance<ReadFMA64, 0>;
+def : ReadAdvance<ReadFMA64Addend, 0>;
def : ReadAdvance<ReadFDiv32, 0>;
def : ReadAdvance<ReadFDiv64, 0>;
def : ReadAdvance<ReadFSqrt32, 0>;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index e22c05b30b7f..45783d482f3b 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -154,12 +154,12 @@ class SiFive7GetReductionCycles<string mx, int sew> {
!eq(mx, "MF8") : 1
);
int c = !add(
- !div(TwoTimesLMUL, DLEN),
+ TwoTimesLMUL,
!mul(5, !add(4, !logtwo(!div(DLEN, sew))))
);
}
-/// Cycles for ordered reductions take approximatley 5*VL cycles
+/// Cycles for ordered reductions take approximatley 6*VL cycles
class SiFive7GetOrderedReductionCycles<string mx, int sew> {
defvar VLEN = 512;
// (VLEN * LMUL) / SEW
@@ -172,7 +172,7 @@ class SiFive7GetOrderedReductionCycles<string mx, int sew> {
!eq(mx, "MF4") : !div(!div(VLEN, 4), sew),
!eq(mx, "MF8") : !div(!div(VLEN, 8), sew),
);
- int c = !mul(5, VLUpperBound);
+ int c = !mul(6, VLUpperBound);
}
class SiFive7AnyToGPRBypass<SchedRead read, int cycles = 2>
@@ -182,6 +182,8 @@ class SiFive7AnyToGPRBypass<SchedRead read, int cycles = 2>
WriteSHXADD, WriteSHXADD32,
WriteRotateImm, WriteRotateImm32,
WriteRotateReg, WriteRotateReg32,
+ WriteSingleBit, WriteSingleBitImm,
+ WriteBEXT, WriteBEXTI,
WriteCLZ, WriteCLZ32, WriteCTZ, WriteCTZ32,
WriteCPOP, WriteCPOP32,
WriteREV8, WriteORCB, WriteSFB,
@@ -206,20 +208,26 @@ def SiFive7Model : SchedMachineModel {
// Pipe A can handle memory, integer alu and vector operations.
// Pipe B can handle integer alu, control flow, integer multiply and divide,
// and floating point computation.
-// Pipe V can handle the V extension.
+// The V pipeline is modeled by the VCQ, VA, VL, and VS resources.
let SchedModel = SiFive7Model in {
let BufferSize = 0 in {
def SiFive7PipeA : ProcResource<1>;
def SiFive7PipeB : ProcResource<1>;
-def SiFive7PipeV : ProcResource<1>;
-}
-
-let BufferSize = 1 in {
-def SiFive7IDiv : ProcResource<1> { let Super = SiFive7PipeB; } // Int Division
-def SiFive7FDiv : ProcResource<1> { let Super = SiFive7PipeB; } // FP Division/Sqrt
-def SiFive7VA : ProcResource<1> { let Super = SiFive7PipeV; } // Arithmetic sequencer
-def SiFive7VL : ProcResource<1> { let Super = SiFive7PipeV; } // Load sequencer
-def SiFive7VS : ProcResource<1> { let Super = SiFive7PipeV; } // Store sequencer
+def SiFive7IDiv : ProcResource<1>; // Int Division
+def SiFive7FDiv : ProcResource<1>; // FP Division/Sqrt
+def SiFive7VA : ProcResource<1>; // Arithmetic sequencer
+def SiFive7VL : ProcResource<1>; // Load sequencer
+def SiFive7VS : ProcResource<1>; // Store sequencer
+// The VCQ accepts instructions from the the A Pipe and holds them until the
+// vector unit is ready to dequeue them. The unit dequeues up to one instruction
+// per cycle, in order, as soon as the sequencer for that type of instruction is
+// avaliable. This resource is meant to be used for 1 cycle by all vector
+// instructions, to model that only one vector instruction may be dequed at a
+// time. The actual dequeueing into the sequencer is modeled by the VA, VL, and
+// VS sequencer resources below. Each of them will only accept a single
+// instruction at a time and remain busy for the number of cycles associated
+// with that instruction.
+def SiFive7VCQ : ProcResource<1>; // Vector Command Queue
}
def SiFive7PipeAB : ProcResGroup<[SiFive7PipeA, SiFive7PipeB]>;
@@ -256,11 +264,11 @@ def : WriteRes<WriteIMul32, [SiFive7PipeB]>;
// Integer division
def : WriteRes<WriteIDiv, [SiFive7PipeB, SiFive7IDiv]> {
let Latency = 66;
- let ResourceCycles = [1, 65];
+ let ReleaseAtCycles = [1, 65];
}
def : WriteRes<WriteIDiv32, [SiFive7PipeB, SiFive7IDiv]> {
let Latency = 34;
- let ResourceCycles = [1, 33];
+ let ReleaseAtCycles = [1, 33];
}
// Bitmanip
@@ -292,6 +300,16 @@ def : WriteRes<WriteSHXADD, [SiFive7PipeB]>;
def : WriteRes<WriteSHXADD32, [SiFive7PipeB]>;
}
+// Single-bit instructions
+// BEXT[I] instruction is available on all ALUs and the other instructions
+// are only available on the SiFive7B pipe.
+let Latency = 3 in {
+def : WriteRes<WriteSingleBit, [SiFive7PipeB]>;
+def : WriteRes<WriteSingleBitImm, [SiFive7PipeB]>;
+def : WriteRes<WriteBEXT, [SiFive7PipeAB]>;
+def : WriteRes<WriteBEXTI, [SiFive7PipeAB]>;
+}
+
// Memory
def : WriteRes<WriteSTB, [SiFive7PipeA]>;
def : WriteRes<WriteSTH, [SiFive7PipeA]>;
@@ -336,7 +354,7 @@ def : WriteRes<WriteFSGNJ16, [SiFive7PipeB]>;
def : WriteRes<WriteFMinMax16, [SiFive7PipeB]>;
}
-let Latency = 14, ResourceCycles = [1, 13] in {
+let Latency = 14, ReleaseAtCycles = [1, 13] in {
def : WriteRes<WriteFDiv16, [SiFive7PipeB, SiFive7FDiv]>;
def : WriteRes<WriteFSqrt16, [SiFive7PipeB, SiFive7FDiv]>;
}
@@ -353,9 +371,9 @@ def : WriteRes<WriteFMinMax32, [SiFive7PipeB]>;
}
def : WriteRes<WriteFDiv32, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 27;
- let ResourceCycles = [1, 26]; }
+ let ReleaseAtCycles = [1, 26]; }
def : WriteRes<WriteFSqrt32, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 27;
- let ResourceCycles = [1, 26]; }
+ let ReleaseAtCycles = [1, 26]; }
// Double precision
let Latency = 7 in {
@@ -369,9 +387,9 @@ def : WriteRes<WriteFMinMax64, [SiFive7PipeB]>;
}
def : WriteRes<WriteFDiv64, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 56;
- let ResourceCycles = [1, 55]; }
+ let ReleaseAtCycles = [1, 55]; }
def : WriteRes<WriteFSqrt64, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 56;
- let ResourceCycles = [1, 55]; }
+ let ReleaseAtCycles = [1, 55]; }
// Conversions
let Latency = 3 in {
@@ -421,21 +439,21 @@ def : WriteRes<WriteVSETVL, [SiFive7PipeA]>;
foreach mx = SchedMxList in {
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = 4, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVLDE", [SiFive7VL], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLDFF", [SiFive7VL], mx, IsWorstCase>;
+ let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVLDE", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDFF", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
}
- let Latency = 1, ResourceCycles = [Cycles] in
- defm "" : LMULWriteResMX<"WriteVSTE", [SiFive7VS], mx, IsWorstCase>;
+ let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
+ defm "" : LMULWriteResMX<"WriteVSTE", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
}
foreach mx = SchedMxList in {
defvar Cycles = SiFive7GetMaskLoadStoreCycles<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = 4, ResourceCycles = [Cycles] in
- defm "" : LMULWriteResMX<"WriteVLDM", [SiFive7VL], mx, IsWorstCase>;
- let Latency = 1, ResourceCycles = [Cycles] in
- defm "" : LMULWriteResMX<"WriteVSTM", [SiFive7VS], mx, IsWorstCase>;
+ let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
+ defm "" : LMULWriteResMX<"WriteVLDM", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
+ let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
+ defm "" : LMULWriteResMX<"WriteVSTM", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
}
// Strided loads and stores operate at one element per cycle and should be
@@ -445,81 +463,101 @@ foreach mx = SchedMxList in {
// specific suffixes, but since SEW is already encoded in the name of the
// resource, we do not need to use LMULSEWXXX constructors. However, we do
// use the SEW from the name to determine the number of Cycles.
+
+// This predicate is true when the rs2 operand of vlse or vsse is x0, false
+// otherwise.
+def VLDSX0Pred : MCSchedPredicate<CheckRegOperand<3, X0>>;
+
foreach mx = SchedMxList in {
+ defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 8>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVLDS8", [SiFive7VL], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLDUX8", [SiFive7VL], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLDOX8", [SiFive7VL], mx, IsWorstCase>;
+ defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS8", VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
+ 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
+ [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
+ let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVLDUX8", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDOX8", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
}
- let Latency = 1, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVSTS8", [SiFive7VS], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSTUX8", [SiFive7VS], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSTOX8", [SiFive7VS], mx, IsWorstCase>;
+ let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVSTS8", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTUX8", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTOX8", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
}
}
-foreach mx = SchedMxList in {
+// TODO: The MxLists need to be filtered by EEW. We only need to support
+// LMUL >= SEW_min/ELEN. Here, the smallest EEW prevents us from having MF8
+// since LMUL >= 16/64.
+foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in {
+ defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 16>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVLDS16", [SiFive7VL], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLDUX16", [SiFive7VL], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLDOX16", [SiFive7VL], mx, IsWorstCase>;
+ defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS16", VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
+ 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
+ [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
+ let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVLDUX16", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDOX16", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
}
- let Latency = 1, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVSTS16", [SiFive7VS], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSTUX16", [SiFive7VS], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSTOX16", [SiFive7VS], mx, IsWorstCase>;
+ let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVSTS16", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTUX16", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTOX16", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
}
}
-foreach mx = SchedMxList in {
+foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in {
+ defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 32>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVLDS32", [SiFive7VL], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLDUX32", [SiFive7VL], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLDOX32", [SiFive7VL], mx, IsWorstCase>;
+ defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS32", VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
+ 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
+ [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
+ let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVLDUX32", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDOX32", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
}
- let Latency = 1, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVSTS32", [SiFive7VS], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSTUX32", [SiFive7VS], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSTOX32", [SiFive7VS], mx, IsWorstCase>;
+ let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVSTS32", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTUX32", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTOX32", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
}
}
-foreach mx = SchedMxList in {
+foreach mx = ["M1", "M2", "M4", "M8"] in {
+ defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 64>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVLDS64", [SiFive7VL], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLDUX64", [SiFive7VL], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLDOX64", [SiFive7VL], mx, IsWorstCase>;
+ defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS64", VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
+ 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
+ [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
+ let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVLDUX64", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDOX64", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
}
- let Latency = 1, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVSTS64", [SiFive7VS], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSTUX64", [SiFive7VS], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSTOX64", [SiFive7VS], mx, IsWorstCase>;
+ let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVSTS64", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTUX64", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTOX64", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
}
}
// VLD*R is LMUL aware
-let Latency = 4, ResourceCycles = [2] in
- def : WriteRes<WriteVLD1R, [SiFive7VL]>;
-let Latency = 4, ResourceCycles = [4] in
- def : WriteRes<WriteVLD2R, [SiFive7VL]>;
-let Latency = 4, ResourceCycles = [8] in
- def : WriteRes<WriteVLD4R, [SiFive7VL]>;
-let Latency = 4, ResourceCycles = [16] in
- def : WriteRes<WriteVLD8R, [SiFive7VL]>;
+let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in
+ def : WriteRes<WriteVLD1R, [SiFive7VCQ, SiFive7VL]>;
+let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in
+ def : WriteRes<WriteVLD2R, [SiFive7VCQ, SiFive7VL]>;
+let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in
+ def : WriteRes<WriteVLD4R, [SiFive7VCQ, SiFive7VL]>;
+let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in
+ def : WriteRes<WriteVLD8R, [SiFive7VCQ, SiFive7VL]>;
// VST*R is LMUL aware
-let Latency = 1, ResourceCycles = [2] in
- def : WriteRes<WriteVST1R, [SiFive7VS]>;
-let Latency = 1, ResourceCycles = [4] in
- def : WriteRes<WriteVST2R, [SiFive7VS]>;
-let Latency = 1, ResourceCycles = [8] in
- def : WriteRes<WriteVST4R, [SiFive7VS]>;
-let Latency = 1, ResourceCycles = [16] in
- def : WriteRes<WriteVST8R, [SiFive7VS]>;
+let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in
+ def : WriteRes<WriteVST1R, [SiFive7VCQ, SiFive7VS]>;
+let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in
+ def : WriteRes<WriteVST2R, [SiFive7VCQ, SiFive7VS]>;
+let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in
+ def : WriteRes<WriteVST4R, [SiFive7VCQ, SiFive7VS]>;
+let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in
+ def : WriteRes<WriteVST8R, [SiFive7VCQ, SiFive7VS]>;
// Segmented Loads and Stores
// Unit-stride segmented loads and stores are effectively converted into strided
@@ -532,22 +570,22 @@ foreach mx = SchedMxList in {
defvar Cycles = SiFive7GetCyclesSegmentedSeg2<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
// Does not chain so set latency high
- let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVLSEG2e" # eew, [SiFive7VL], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLSEGFF2e" # eew, [SiFive7VL], mx, IsWorstCase>;
+ let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVLSEG2e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLSEGFF2e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
}
- let Latency = 1, ResourceCycles = [Cycles] in
- defm "" : LMULWriteResMX<"WriteVSSEG2e" # eew, [SiFive7VS], mx, IsWorstCase>;
+ let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
+ defm "" : LMULWriteResMX<"WriteVSSEG2e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
foreach nf=3-8 in {
defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
// Does not chain so set latency high
- let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" # eew, [SiFive7VL], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" # eew, [SiFive7VL], mx, IsWorstCase>;
+ let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
}
- let Latency = 1, ResourceCycles = [Cycles] in
- defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" # eew, [SiFive7VS], mx, IsWorstCase>;
+ let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
+ defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
}
}
}
@@ -557,15 +595,15 @@ foreach mx = SchedMxList in {
defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
// Does not chain so set latency high
- let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" # eew, [SiFive7VL], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" # eew, [SiFive7VL], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" # eew, [SiFive7VL], mx, IsWorstCase>;
+ let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
}
- let Latency = 1, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" # eew, [SiFive7VS], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" # eew, [SiFive7VS], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" # eew, [SiFive7VS], mx, IsWorstCase>;
+ let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
}
}
}
@@ -575,41 +613,41 @@ foreach mx = SchedMxList in {
foreach mx = SchedMxList in {
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = 4, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVIALUV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIALUX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIALUI", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICALUV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICALUX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICALUI", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVShiftV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVShiftX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVShiftI", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMulV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMulX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMulAddV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMulAddX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMergeV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMergeX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMergeI", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMovV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMovX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMovI", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVIALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVShiftV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVShiftX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVShiftI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMulX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMulAddX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMergeV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMergeX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMergeI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMovV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMovX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMovI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
// Mask results can't chain.
- let Latency = !add(Cycles, 3), ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVICmpV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICmpX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICmpI", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVICmpV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICmpX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICmpI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
}
foreach mx = SchedMxList in {
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = 4, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVExtV", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVExtV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
}
foreach mx = SchedMxList in {
@@ -617,9 +655,9 @@ foreach mx = SchedMxList in {
defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c,
!div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4));
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
- let Latency = Cycles, ResourceCycles = [Cycles] in {
- defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SiFive7VA], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SiFive7VA], mx, sew, IsWorstCase>;
+ let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
}
}
}
@@ -628,24 +666,24 @@ foreach mx = SchedMxList in {
foreach mx = SchedMxListW in {
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
- let Latency = 8, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVIWALUV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIWALUX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIWALUI", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIWMulV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIWMulX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIWMulAddV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIWMulAddX", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVIWALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIWALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIWALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIWMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIWMulX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIWMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIWMulAddX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
}
// Narrowing
foreach mx = SchedMxListW in {
defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
- let Latency = 8, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVNShiftV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVNShiftX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVNShiftI", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVNShiftV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVNShiftX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVNShiftI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
}
@@ -653,27 +691,27 @@ foreach mx = SchedMxListW in {
foreach mx = SchedMxList in {
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = 8, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVSALUV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSALUX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSALUI", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVAALUV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVAALUX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSMulV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSMulX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSShiftV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSShiftX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSShiftI", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVSALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVAALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVAALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSMulX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSShiftV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSShiftX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSShiftI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
}
// Narrowing
foreach mx = SchedMxListW in {
defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
- let Latency = 8, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVNClipV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVNClipX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVNClipI", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVNClipV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVNClipX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVNClipI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
}
@@ -681,30 +719,30 @@ foreach mx = SchedMxListW in {
foreach mx = SchedMxList in {
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = 8, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVFALUV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFALUF", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFMulV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFMulF", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFMulAddV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFMulAddF", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFRecpV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFCvtIToFV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVFALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFALUF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFMulF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFMulAddF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFRecpV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
- let Latency = 4, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVFSgnjV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFSgnjF", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFMinMaxV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFMinMaxF", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFClassV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFMergeV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFMovV", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVFSgnjV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFSgnjF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFMinMaxV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFMinMaxF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFClassV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFMergeV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFMovV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
// Mask results can't chain.
- let Latency = !add(Cycles, 3), ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVFCmpV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFCmpF", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVFCmpV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFCmpF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
}
foreach mx = SchedMxListF in {
@@ -712,10 +750,10 @@ foreach mx = SchedMxListF in {
defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c,
!div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4));
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
- let Latency = Cycles, ResourceCycles = [Cycles] in {
- defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SiFive7VA], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV", [SiFive7VA], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF", [SiFive7VA], mx, sew, IsWorstCase>;
+ let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
}
}
}
@@ -724,38 +762,38 @@ foreach mx = SchedMxListF in {
foreach mx = SchedMxListW in {
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
- let Latency = 8, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVFWCvtIToFV", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVFWCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
}
foreach mx = SchedMxListFW in {
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c;
- let Latency = 8, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVFWALUV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFWMulV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFWMulAddV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFWCvtFToFV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFWMulAddF", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFWMulF", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFWALUF", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVFWALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFWMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFWMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFWCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFWMulAddF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFWMulF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFWALUF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
}
// Narrowing
foreach mx = SchedMxListW in {
defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
- let Latency = 8, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVFNCvtFToIV", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVFNCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
}
foreach mx = SchedMxListFW in {
defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c;
- let Latency = 8, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVFNCvtIToFV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFNCvtFToFV", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVFNCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFNCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
}
@@ -764,11 +802,12 @@ foreach mx = SchedMxList in {
foreach sew = SchedSEWSet<mx>.val in {
defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
- let Latency = Cycles, ResourceCycles = [Cycles] in
- defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SiFive7VA],
- mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SiFive7VA],
- mx, sew, IsWorstCase>;
+ let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SiFive7VCQ, SiFive7VA],
+ mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SiFive7VCQ, SiFive7VA],
+ mx, sew, IsWorstCase>;
+ }
}
}
@@ -776,8 +815,8 @@ foreach mx = SchedMxListWRed in {
foreach sew = SchedSEWSet<mx, 0, 1>.val in {
defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListWRed>.c;
- let Latency = Cycles, ResourceCycles = [Cycles] in
- defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SiFive7VA],
+ let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SiFive7VCQ, SiFive7VA],
mx, sew, IsWorstCase>;
}
}
@@ -786,15 +825,15 @@ foreach mx = SchedMxListF in {
foreach sew = SchedSEWSet<mx, 1>.val in {
defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
- let Latency = RedCycles, ResourceCycles = [RedCycles] in {
- defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SiFive7VA],
+ let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, RedCycles)] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SiFive7VCQ, SiFive7VA],
mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SiFive7VA],
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SiFive7VCQ, SiFive7VA],
mx, sew, IsWorstCase>;
}
defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c;
- let Latency = OrdRedCycles, ResourceCycles = [OrdRedCycles] in
- defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SiFive7VA],
+ let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, OrdRedCycles)] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SiFive7VCQ, SiFive7VA],
mx, sew, IsWorstCase>;
}
}
@@ -803,12 +842,12 @@ foreach mx = SchedMxListFWRed in {
foreach sew = SchedSEWSet<mx, 1, 1>.val in {
defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFWRed, 1>.c;
- let Latency = RedCycles, ResourceCycles = [RedCycles] in
- defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SiFive7VA],
+ let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, RedCycles)] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SiFive7VCQ, SiFive7VA],
mx, sew, IsWorstCase>;
defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c;
- let Latency = OrdRedCycles, ResourceCycles = [OrdRedCycles] in
- defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SiFive7VA],
+ let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, OrdRedCycles)] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SiFive7VCQ, SiFive7VA],
mx, sew, IsWorstCase>;
}
}
@@ -817,35 +856,35 @@ foreach mx = SchedMxListFWRed in {
foreach mx = SchedMxList in {
defvar Cycles = SiFive7GetCyclesVMask<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = 4, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVMALUV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVMPopV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVMFFSV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVMSFSV", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVMALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVMPopV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVMFFSV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVMSFSV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
}
foreach mx = SchedMxList in {
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = 4, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVMIotV", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVMIdxV", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVMIotV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVMIdxV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
}
// 16. Vector Permutation Instructions
-let Latency = 4, ResourceCycles = [1] in {
- def : WriteRes<WriteVIMovVX, [SiFive7VA]>;
- def : WriteRes<WriteVIMovXV, [SiFive7VA]>;
- def : WriteRes<WriteVFMovVF, [SiFive7VA]>;
- def : WriteRes<WriteVFMovFV, [SiFive7VA]>;
+let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 1)] in {
+ def : WriteRes<WriteVIMovVX, [SiFive7VCQ, SiFive7VA]>;
+ def : WriteRes<WriteVIMovXV, [SiFive7VCQ, SiFive7VA]>;
+ def : WriteRes<WriteVFMovVF, [SiFive7VCQ, SiFive7VA]>;
+ def : WriteRes<WriteVFMovFV, [SiFive7VCQ, SiFive7VA]>;
}
foreach mx = SchedMxList in {
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = 8, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVRGatherVX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVRGatherVI", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVRGatherVX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVRGatherVI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
}
@@ -853,9 +892,9 @@ foreach mx = SchedMxList in {
foreach sew = SchedSEWSet<mx>.val in {
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, sew>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
- let Latency = !add(Cycles, 3), ResourceCycles = [Cycles] in {
- defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SiFive7VA], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SiFive7VA], mx, sew, IsWorstCase>;
+ let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
}
}
}
@@ -863,23 +902,23 @@ foreach mx = SchedMxList in {
foreach mx = SchedMxList in {
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = 4, ResourceCycles = [Cycles] in {
- defm "" : LMULWriteResMX<"WriteVISlideX", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVISlideI", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVISlide1X", [SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFSlide1F", [SiFive7VA], mx, IsWorstCase>;
+ let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVISlideX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVISlideI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVISlide1X", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFSlide1F", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
}
// VMov*V is LMUL Aware
-let Latency = 4, ResourceCycles = [2] in
- def : WriteRes<WriteVMov1V, [SiFive7VA]>;
-let Latency = 4, ResourceCycles = [4] in
- def : WriteRes<WriteVMov2V, [SiFive7VA]>;
-let Latency = 4, ResourceCycles = [8] in
- def : WriteRes<WriteVMov4V, [SiFive7VA]>;
-let Latency = 4, ResourceCycles = [16] in
- def : WriteRes<WriteVMov8V, [SiFive7VA]>;
+let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in
+ def : WriteRes<WriteVMov1V, [SiFive7VCQ, SiFive7VA]>;
+let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in
+ def : WriteRes<WriteVMov2V, [SiFive7VCQ, SiFive7VA]>;
+let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in
+ def : WriteRes<WriteVMov4V, [SiFive7VCQ, SiFive7VA]>;
+let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in
+ def : WriteRes<WriteVMov8V, [SiFive7VCQ, SiFive7VA]>;
// Others
def : WriteRes<WriteCSR, [SiFive7PipeB]>;
@@ -922,10 +961,13 @@ def : ReadAdvance<ReadFAdd32, 0>;
def : ReadAdvance<ReadFAdd64, 0>;
def : ReadAdvance<ReadFMul16, 0>;
def : ReadAdvance<ReadFMA16, 0>;
+def : ReadAdvance<ReadFMA16Addend, 0>;
def : ReadAdvance<ReadFMul32, 0>;
def : ReadAdvance<ReadFMul64, 0>;
def : ReadAdvance<ReadFMA32, 0>;
+def : ReadAdvance<ReadFMA32Addend, 0>;
def : ReadAdvance<ReadFMA64, 0>;
+def : ReadAdvance<ReadFMA64Addend, 0>;
def : ReadAdvance<ReadFDiv16, 0>;
def : ReadAdvance<ReadFDiv32, 0>;
def : ReadAdvance<ReadFDiv64, 0>;
@@ -987,6 +1029,9 @@ def : SiFive7AnyToGPRBypass<ReadORCB>;
def : SiFive7AnyToGPRBypass<ReadREV8>;
def : SiFive7AnyToGPRBypass<ReadSHXADD>;
def : SiFive7AnyToGPRBypass<ReadSHXADD32>;
+// Single-bit instructions
+def : SiFive7AnyToGPRBypass<ReadSingleBit>;
+def : SiFive7AnyToGPRBypass<ReadSingleBitImm>;
// 6. Configuration-Setting Instructions
def : ReadAdvance<ReadVSETVLI, 2>;
@@ -1154,11 +1199,16 @@ def : ReadAdvance<ReadVMov8V, 0>;
// Others
def : ReadAdvance<ReadVMask, 0>;
+def : ReadAdvance<ReadVMergeOp_WorstCase, 0>;
+foreach mx = SchedMxList in {
+ def : ReadAdvance<!cast<SchedRead>("ReadVMergeOp_" # mx), 0>;
+ foreach sew = SchedSEWSet<mx>.val in
+ def : ReadAdvance<!cast<SchedRead>("ReadVMergeOp_" # mx # "_E" # sew), 0>;
+}
//===----------------------------------------------------------------------===//
// Unsupported extensions
defm : UnsupportedSchedZbc;
-defm : UnsupportedSchedZbs;
defm : UnsupportedSchedZbkb;
defm : UnsupportedSchedZbkx;
defm : UnsupportedSchedZfa;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td
index 41eefa0c67d9..06ad2075b073 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td
@@ -55,14 +55,14 @@ def : WriteRes<WriteIMul, [SCR1_MUL]>;
def : WriteRes<WriteIMul32, [SCR1_MUL]>;
// Integer division: latency 33, inverse throughput 33
-let Latency = 33, ResourceCycles = [33] in {
+let Latency = 33, ReleaseAtCycles = [33] in {
def : WriteRes<WriteIDiv32, [SCR1_DIV]>;
def : WriteRes<WriteIDiv, [SCR1_DIV]>;
}
// Load/store instructions on SCR1 have latency 2 and inverse throughput 2
// (SCR1_CFG_RV32IMC_MAX includes TCM)
-let Latency = 2, ResourceCycles=[2] in {
+let Latency = 2, ReleaseAtCycles=[2] in {
// Memory
def : WriteRes<WriteSTB, [SCR1_LSU]>;
def : WriteRes<WriteSTH, [SCR1_LSU]>;
@@ -164,7 +164,9 @@ def : ReadAdvance<ReadFAdd64, 0>;
def : ReadAdvance<ReadFMul32, 0>;
def : ReadAdvance<ReadFMul64, 0>;
def : ReadAdvance<ReadFMA32, 0>;
+def : ReadAdvance<ReadFMA32Addend, 0>;
def : ReadAdvance<ReadFMA64, 0>;
+def : ReadAdvance<ReadFMA64Addend, 0>;
def : ReadAdvance<ReadFDiv32, 0>;
def : ReadAdvance<ReadFDiv64, 0>;
def : ReadAdvance<ReadFSqrt32, 0>;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedule.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedule.td
index af318ea5bf68..f6c1b096ad90 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedule.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedule.td
@@ -150,8 +150,11 @@ def ReadFMul16 : SchedRead; // 16-bit floating point multiply
def ReadFMul32 : SchedRead; // 32-bit floating point multiply
def ReadFMul64 : SchedRead; // 64-bit floating point multiply
def ReadFMA16 : SchedRead; // 16-bit floating point fused multiply-add
+def ReadFMA16Addend : SchedRead; // 16-bit floating point fused multiply-add (addend)
def ReadFMA32 : SchedRead; // 32-bit floating point fused multiply-add
+def ReadFMA32Addend : SchedRead; // 32-bit floating point fused multiply-add (addend)
def ReadFMA64 : SchedRead; // 64-bit floating point fused multiply-add
+def ReadFMA64Addend : SchedRead; // 64-bit floating point fused multiply-add (addend)
def ReadFDiv16 : SchedRead; // 16-bit floating point divide
def ReadFDiv32 : SchedRead; // 32-bit floating point divide
def ReadFDiv64 : SchedRead; // 64-bit floating point divide
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVScheduleV.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVScheduleV.td
index 676383c5a636..29f2ceec25fa 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVScheduleV.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVScheduleV.td
@@ -62,6 +62,52 @@ multiclass LMULSEWWriteResMXSEW<string name, list<ProcResourceKind> resources,
def : WriteRes<!cast<SchedWrite>(name # "_WorstCase"), resources>;
}
+// Define a SchedAlias for the SchedWrite associated with (name, mx) whose
+// behavior is aliased to a Variant. The Variant has Latency predLad and
+// ReleaseAtCycles predCycles if the SchedPredicate Pred is true, otherwise has
+// Latency noPredLat and ReleaseAtCycles noPredCycles. The WorstCase SchedWrite
+// is created similiarly if IsWorstCase is true.
+multiclass LMULWriteResMXVariant<string name, SchedPredicateBase Pred,
+ list<ProcResourceKind> resources,
+ int predLat, list<int> predAcquireCycles,
+ list<int> predReleaseCycles, int noPredLat,
+ list<int> noPredAcquireCycles,
+ list<int> noPredReleaseCycles,
+ string mx, bit IsWorstCase> {
+ defvar nameMX = name # "_" # mx;
+
+ // Define the different behaviors
+ def nameMX # "_Pred" : SchedWriteRes<resources>{
+ let Latency = predLat;
+ let AcquireAtCycles = predAcquireCycles;
+ let ReleaseAtCycles = predReleaseCycles;
+ }
+ def nameMX # "_NoPred" : SchedWriteRes<resources> {
+ let Latency = noPredLat;
+ let AcquireAtCycles = noPredAcquireCycles;
+ let ReleaseAtCycles = noPredReleaseCycles;
+ }
+
+ // Tie behavior to predicate
+ def NAME # nameMX # "_Variant" : SchedWriteVariant<[
+ SchedVar<Pred, [!cast<SchedWriteRes>(NAME # nameMX # "_Pred")]>,
+ SchedVar<NoSchedPred, [!cast<SchedWriteRes>(NAME # nameMX # "_NoPred")]>
+ ]>;
+ def : SchedAlias<
+ !cast<SchedReadWrite>(nameMX),
+ !cast<SchedReadWrite>(NAME # nameMX # "_Variant")>;
+
+ if IsWorstCase then {
+ def NAME # name # "_WorstCase_Variant" : SchedWriteVariant<[
+ SchedVar<Pred, [!cast<SchedWriteRes>(NAME # nameMX # "_Pred")]>,
+ SchedVar<NoSchedPred, [!cast<SchedWriteRes>(NAME # nameMX # "_NoPred")]>
+ ]>;
+ def : SchedAlias<
+ !cast<SchedReadWrite>(name # "_WorstCase"),
+ !cast<SchedReadWrite>(NAME # name # "_WorstCase_Variant")>;
+ }
+}
+
// Define multiclasses to define SchedWrite, SchedRead, WriteRes, and
// ReadAdvance for each (name, LMUL) pair and for each LMUL in each of the
// SchedMxList variants above. Each multiclass is responsible for defining
@@ -687,6 +733,12 @@ def ReadVMov8V : SchedRead;
// Others
def ReadVMask : SchedRead;
+def ReadVMergeOp_WorstCase : SchedRead;
+foreach mx = SchedMxList in {
+ def ReadVMergeOp_ # mx : SchedRead;
+ foreach sew = SchedSEWSet<mx>.val in
+ def ReadVMergeOp_ # mx # "_E" # sew : SchedRead;
+}
//===----------------------------------------------------------------------===//
/// Define default scheduler resources for V.
@@ -1050,6 +1102,12 @@ def : ReadAdvance<ReadVMov8V, 0>;
// Others
def : ReadAdvance<ReadVMask, 0>;
+def : ReadAdvance<ReadVMergeOp_WorstCase, 0>;
+foreach mx = SchedMxList in {
+ def : ReadAdvance<!cast<SchedRead>("ReadVMergeOp_" # mx), 0>;
+ foreach sew = SchedSEWSet<mx>.val in
+ def : ReadAdvance<!cast<SchedRead>("ReadVMergeOp_" # mx # "_E" # sew), 0>;
+}
} // Unsupported
} // UnsupportedSchedV
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index eec2e7359eda..7b64d3cee9c8 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -29,6 +29,12 @@ using namespace llvm;
#define GET_SUBTARGETINFO_CTOR
#include "RISCVGenSubtargetInfo.inc"
+namespace llvm::RISCVTuneInfoTable {
+
+#define GET_RISCVTuneInfoTable_IMPL
+#include "RISCVGenSearchableTables.inc"
+} // namespace llvm::RISCVTuneInfoTable
+
static cl::opt<bool> EnableSubRegLiveness("riscv-enable-subreg-liveness",
cl::init(true), cl::Hidden);
@@ -48,6 +54,13 @@ static cl::opt<unsigned> RISCVMaxBuildIntsCost(
cl::desc("The maximum cost used for building integers."), cl::init(0),
cl::Hidden);
+static cl::opt<bool> UseAA("riscv-use-aa", cl::init(true),
+ cl::desc("Enable the use of AA during codegen."));
+
+static cl::opt<unsigned> RISCVMinimumJumpTableEntries(
+ "riscv-min-jump-table-entries", cl::Hidden,
+ cl::desc("Set minimum number of entries to use a jump table on RISCV"));
+
void RISCVSubtarget::anchor() {}
RISCVSubtarget &
@@ -62,12 +75,13 @@ RISCVSubtarget::initializeSubtargetDependencies(const Triple &TT, StringRef CPU,
if (TuneCPU.empty())
TuneCPU = CPU;
- ParseSubtargetFeatures(CPU, TuneCPU, FS);
- if (Is64Bit) {
- XLenVT = MVT::i64;
- XLen = 64;
- }
+ TuneInfo = RISCVTuneInfoTable::getRISCVTuneInfo(TuneCPU);
+ // If there is no TuneInfo for this CPU, we fail back to generic.
+ if (!TuneInfo)
+ TuneInfo = RISCVTuneInfoTable::getRISCVTuneInfo("generic");
+ assert(TuneInfo && "TuneInfo shouldn't be nullptr!");
+ ParseSubtargetFeatures(CPU, TuneCPU, FS);
TargetABI = RISCVABI::computeTargetABI(TT, getFeatureBits(), ABIName);
RISCVFeatures::validate(TT, getFeatureBits());
return *this;
@@ -175,3 +189,13 @@ void RISCVSubtarget::getPostRAMutations(
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
Mutations.push_back(createRISCVMacroFusionDAGMutation());
}
+
+ /// Enable use of alias analysis during code generation (during MI
+ /// scheduling, DAGCombine, etc.).
+bool RISCVSubtarget::useAA() const { return UseAA; }
+
+unsigned RISCVSubtarget::getMinimumJumpTableEntries() const {
+ return RISCVMinimumJumpTableEntries.getNumOccurrences() > 0
+ ? RISCVMinimumJumpTableEntries
+ : TuneInfo->MinimumJumpTableEntries;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h
index a831beb7edd9..23d56cfa6e4e 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -25,6 +25,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
+#include <bitset>
#define GET_SUBTARGETINFO_HEADER
#include "RISCVGenSubtargetInfo.inc"
@@ -32,13 +33,35 @@
namespace llvm {
class StringRef;
+namespace RISCVTuneInfoTable {
+
+struct RISCVTuneInfo {
+ const char *Name;
+ uint8_t PrefFunctionAlignment;
+ uint8_t PrefLoopAlignment;
+
+ // Information needed by LoopDataPrefetch.
+ uint16_t CacheLineSize;
+ uint16_t PrefetchDistance;
+ uint16_t MinPrefetchStride;
+ unsigned MaxPrefetchIterationsAhead;
+
+ unsigned MinimumJumpTableEntries;
+};
+
+#define GET_RISCVTuneInfoTable_DECL
+#include "RISCVGenSearchableTables.inc"
+} // namespace RISCVTuneInfoTable
+
class RISCVSubtarget : public RISCVGenSubtargetInfo {
public:
+ // clang-format off
enum RISCVProcFamilyEnum : uint8_t {
Others,
SiFive7,
+ VentanaVeyron,
};
-
+ // clang-format on
private:
virtual void anchor();
@@ -48,16 +71,13 @@ private:
bool ATTRIBUTE = DEFAULT;
#include "RISCVGenSubtargetInfo.inc"
- unsigned XLen = 32;
unsigned ZvlLen = 0;
- MVT XLenVT = MVT::i32;
unsigned RVVVectorBitsMin;
unsigned RVVVectorBitsMax;
uint8_t MaxInterleaveFactor = 2;
RISCVABI::ABI TargetABI = RISCVABI::ABI_Unknown;
std::bitset<RISCV::NUM_TARGET_REGS> UserReservedRegister;
- Align PrefFunctionAlignment;
- Align PrefLoopAlignment;
+ const RISCVTuneInfoTable::RISCVTuneInfo *TuneInfo;
RISCVFrameLowering FrameLowering;
RISCVInstrInfo InstrInfo;
@@ -98,8 +118,16 @@ public:
}
bool enableMachineScheduler() const override { return true; }
- Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; }
- Align getPrefLoopAlignment() const { return PrefLoopAlignment; }
+ bool enablePostRAScheduler() const override {
+ return getSchedModel().PostRAScheduler || UsePostRAScheduler;
+ }
+
+ Align getPrefFunctionAlignment() const {
+ return Align(TuneInfo->PrefFunctionAlignment);
+ }
+ Align getPrefLoopAlignment() const {
+ return Align(TuneInfo->PrefLoopAlignment);
+ }
/// Returns RISC-V processor family.
/// Avoid this function! CPU specifics should be kept local to this class
@@ -124,12 +152,15 @@ public:
return hasStdExtZfhOrZfhmin() || hasStdExtZhinxOrZhinxmin();
}
bool hasHalfFPLoadStoreMove() const {
- return HasStdExtZfh || HasStdExtZfhmin || HasStdExtZfbfmin ||
- HasStdExtZvfbfwma;
+ return hasStdExtZfhOrZfhmin() || HasStdExtZfbfmin;
}
bool is64Bit() const { return IsRV64; }
- MVT getXLenVT() const { return XLenVT; }
- unsigned getXLen() const { return XLen; }
+ MVT getXLenVT() const {
+ return is64Bit() ? MVT::i64 : MVT::i32;
+ }
+ unsigned getXLen() const {
+ return is64Bit() ? 64 : 32;
+ }
unsigned getFLen() const {
if (HasStdExtD)
return 64;
@@ -139,7 +170,7 @@ public:
return 0;
}
- unsigned getELEN() const {
+ unsigned getELen() const {
assert(hasVInstructions() && "Expected V extension");
return hasVInstructionsI64() ? 64 : 32;
}
@@ -162,16 +193,21 @@ public:
return UserReservedRegister[i];
}
- bool hasMacroFusion() const { return hasLUIADDIFusion(); }
+ bool hasMacroFusion() const {
+ return hasLUIADDIFusion() || hasAUIPCADDIFusion() ||
+ hasShiftedZExtFusion() || hasLDADDFusion();
+ }
// Vector codegen related methods.
bool hasVInstructions() const { return HasStdExtZve32x; }
bool hasVInstructionsI64() const { return HasStdExtZve64x; }
+ bool hasVInstructionsF16Minimal() const {
+ return HasStdExtZvfhmin || HasStdExtZvfh;
+ }
bool hasVInstructionsF16() const { return HasStdExtZvfh; }
- // FIXME: Consider Zfinx in the future
- bool hasVInstructionsF32() const { return HasStdExtZve32f && HasStdExtF; }
- // FIXME: Consider Zdinx in the future
- bool hasVInstructionsF64() const { return HasStdExtZve64d && HasStdExtD; }
+ bool hasVInstructionsBF16() const { return HasStdExtZvfbfmin; }
+ bool hasVInstructionsF32() const { return HasStdExtZve32f; }
+ bool hasVInstructionsF64() const { return HasStdExtZve64d; }
// F16 and F64 both require F32.
bool hasVInstructionsAnyF() const { return hasVInstructionsF32(); }
bool hasVInstructionsFullMultiply() const { return HasStdExtV; }
@@ -222,6 +258,26 @@ public:
void getPostRAMutations(std::vector<std::unique_ptr<ScheduleDAGMutation>>
&Mutations) const override;
+
+ bool useAA() const override;
+
+ unsigned getCacheLineSize() const override {
+ return TuneInfo->CacheLineSize;
+ };
+ unsigned getPrefetchDistance() const override {
+ return TuneInfo->PrefetchDistance;
+ };
+ unsigned getMinPrefetchStride(unsigned NumMemAccesses,
+ unsigned NumStridedMemAccesses,
+ unsigned NumPrefetches,
+ bool HasCall) const override {
+ return TuneInfo->MinPrefetchStride;
+ };
+ unsigned getMaxPrefetchIterationsAhead() const override {
+ return TuneInfo->MaxPrefetchIterationsAhead;
+ };
+
+ unsigned getMinimumJumpTableEntries() const;
};
} // End llvm namespace
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 59dac5c7b57d..3abdb6003659 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/MIRParser/MIParser.h"
#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/InitializePasses.h"
@@ -34,6 +35,7 @@
#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Scalar.h"
#include <optional>
using namespace llvm;
@@ -71,21 +73,55 @@ static cl::opt<bool> EnableRISCVCopyPropagation(
cl::desc("Enable the copy propagation with RISC-V copy instr"),
cl::init(true), cl::Hidden);
+static cl::opt<bool> EnableRISCVDeadRegisterElimination(
+ "riscv-enable-dead-defs", cl::Hidden,
+ cl::desc("Enable the pass that removes dead"
+ " definitons and replaces stores to"
+ " them with stores to x0"),
+ cl::init(true));
+
+static cl::opt<bool>
+ EnableSinkFold("riscv-enable-sink-fold",
+ cl::desc("Enable sinking and folding of instruction copies"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+ EnableLoopDataPrefetch("riscv-enable-loop-data-prefetch", cl::Hidden,
+ cl::desc("Enable the loop data prefetch pass"),
+ cl::init(true));
+
+static cl::opt<bool>
+ EnableSplitRegAlloc("riscv-split-regalloc", cl::Hidden,
+ cl::desc("Enable Split RegisterAlloc for RVV"),
+ cl::init(true));
+
+static cl::opt<bool> EnableMISchedLoadClustering(
+ "riscv-misched-load-clustering", cl::Hidden,
+ cl::desc("Enable load clustering in the machine scheduler"),
+ cl::init(false));
+
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
auto *PR = PassRegistry::getPassRegistry();
initializeGlobalISel(*PR);
+ initializeRISCVO0PreLegalizerCombinerPass(*PR);
+ initializeRISCVPreLegalizerCombinerPass(*PR);
+ initializeRISCVPostLegalizerCombinerPass(*PR);
initializeKCFIPass(*PR);
+ initializeRISCVDeadRegisterDefinitionsPass(*PR);
initializeRISCVMakeCompressibleOptPass(*PR);
initializeRISCVGatherScatterLoweringPass(*PR);
initializeRISCVCodeGenPreparePass(*PR);
+ initializeRISCVPostRAExpandPseudoPass(*PR);
initializeRISCVMergeBaseOffsetOptPass(*PR);
initializeRISCVOptWInstrsPass(*PR);
initializeRISCVPreRAExpandPseudoPass(*PR);
initializeRISCVExpandPseudoPass(*PR);
+ initializeRISCVFoldMasksPass(*PR);
initializeRISCVInsertVSETVLIPass(*PR);
initializeRISCVInsertReadWriteCSRPass(*PR);
+ initializeRISCVInsertWriteVXRMPass(*PR);
initializeRISCVDAGToDAGISelPass(*PR);
initializeRISCVInitUndefPass(*PR);
initializeRISCVMoveMergePass(*PR);
@@ -109,7 +145,7 @@ RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT)
+ CodeGenOptLevel OL, bool JIT)
: LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options,
getEffectiveRelocModel(TT, RM),
getEffectiveCodeModel(CM, CodeModel::Small), OL),
@@ -177,13 +213,8 @@ RISCVTargetMachine::getSubtargetImpl(const Function &F) const {
llvm::bit_floor((RVVBitsMax < 64 || RVVBitsMax > 65536) ? 0 : RVVBitsMax);
SmallString<512> Key;
- Key += "RVVMin";
- Key += std::to_string(RVVBitsMin);
- Key += "RVVMax";
- Key += std::to_string(RVVBitsMax);
- Key += CPU;
- Key += TuneCPU;
- Key += FS;
+ raw_svector_ostream(Key) << "RVVMin" << RVVBitsMin << "RVVMax" << RVVBitsMax
+ << CPU << TuneCPU << FS;
auto &I = SubtargetMap[Key];
if (!I) {
// This needs to be done before we create a new subtarget since any
@@ -228,10 +259,84 @@ bool RISCVTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
}
namespace {
+
+class RVVRegisterRegAlloc : public RegisterRegAllocBase<RVVRegisterRegAlloc> {
+public:
+ RVVRegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)
+ : RegisterRegAllocBase(N, D, C) {}
+};
+
+static bool onlyAllocateRVVReg(const TargetRegisterInfo &TRI,
+ const TargetRegisterClass &RC) {
+ return RISCV::VRRegClass.hasSubClassEq(&RC) ||
+ RISCV::VRM2RegClass.hasSubClassEq(&RC) ||
+ RISCV::VRM4RegClass.hasSubClassEq(&RC) ||
+ RISCV::VRM8RegClass.hasSubClassEq(&RC) ||
+ RISCV::VRN2M1RegClass.hasSubClassEq(&RC) ||
+ RISCV::VRN2M2RegClass.hasSubClassEq(&RC) ||
+ RISCV::VRN2M4RegClass.hasSubClassEq(&RC) ||
+ RISCV::VRN3M1RegClass.hasSubClassEq(&RC) ||
+ RISCV::VRN3M2RegClass.hasSubClassEq(&RC) ||
+ RISCV::VRN4M1RegClass.hasSubClassEq(&RC) ||
+ RISCV::VRN4M2RegClass.hasSubClassEq(&RC) ||
+ RISCV::VRN5M1RegClass.hasSubClassEq(&RC) ||
+ RISCV::VRN6M1RegClass.hasSubClassEq(&RC) ||
+ RISCV::VRN7M1RegClass.hasSubClassEq(&RC) ||
+ RISCV::VRN8M1RegClass.hasSubClassEq(&RC);
+}
+
+static FunctionPass *useDefaultRegisterAllocator() { return nullptr; }
+
+static llvm::once_flag InitializeDefaultRVVRegisterAllocatorFlag;
+
+/// -riscv-rvv-regalloc=<fast|basic|greedy> command line option.
+/// This option could designate the rvv register allocator only.
+/// For example: -riscv-rvv-regalloc=basic
+static cl::opt<RVVRegisterRegAlloc::FunctionPassCtor, false,
+ RegisterPassParser<RVVRegisterRegAlloc>>
+ RVVRegAlloc("riscv-rvv-regalloc", cl::Hidden,
+ cl::init(&useDefaultRegisterAllocator),
+ cl::desc("Register allocator to use for RVV register."));
+
+static void initializeDefaultRVVRegisterAllocatorOnce() {
+ RegisterRegAlloc::FunctionPassCtor Ctor = RVVRegisterRegAlloc::getDefault();
+
+ if (!Ctor) {
+ Ctor = RVVRegAlloc;
+ RVVRegisterRegAlloc::setDefault(RVVRegAlloc);
+ }
+}
+
+static FunctionPass *createBasicRVVRegisterAllocator() {
+ return createBasicRegisterAllocator(onlyAllocateRVVReg);
+}
+
+static FunctionPass *createGreedyRVVRegisterAllocator() {
+ return createGreedyRegisterAllocator(onlyAllocateRVVReg);
+}
+
+static FunctionPass *createFastRVVRegisterAllocator() {
+ return createFastRegisterAllocator(onlyAllocateRVVReg, false);
+}
+
+static RVVRegisterRegAlloc basicRegAllocRVVReg("basic",
+ "basic register allocator",
+ createBasicRVVRegisterAllocator);
+static RVVRegisterRegAlloc
+ greedyRegAllocRVVReg("greedy", "greedy register allocator",
+ createGreedyRVVRegisterAllocator);
+
+static RVVRegisterRegAlloc fastRegAllocRVVReg("fast", "fast register allocator",
+ createFastRVVRegisterAllocator);
+
class RISCVPassConfig : public TargetPassConfig {
public:
RISCVPassConfig(RISCVTargetMachine &TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {}
+ : TargetPassConfig(TM, PM) {
+ if (TM.getOptLevel() != CodeGenOptLevel::None)
+ substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
+ setEnableSinkAndFold(EnableSinkFold);
+ }
RISCVTargetMachine &getRISCVTargetMachine() const {
return getTM<RISCVTargetMachine>();
@@ -240,12 +345,16 @@ public:
ScheduleDAGInstrs *
createMachineScheduler(MachineSchedContext *C) const override {
const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>();
+ ScheduleDAGMILive *DAG = nullptr;
+ if (EnableMISchedLoadClustering) {
+ DAG = createGenericSchedLive(C);
+ DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+ }
if (ST.hasMacroFusion()) {
- ScheduleDAGMILive *DAG = createGenericSchedLive(C);
+ DAG = DAG ? DAG : createGenericSchedLive(C);
DAG->addMutation(createRISCVMacroFusionDAGMutation());
- return DAG;
}
- return nullptr;
+ return DAG;
}
ScheduleDAGInstrs *
@@ -263,16 +372,22 @@ public:
bool addPreISel() override;
bool addInstSelector() override;
bool addIRTranslator() override;
+ void addPreLegalizeMachineIR() override;
bool addLegalizeMachineIR() override;
+ void addPreRegBankSelect() override;
bool addRegBankSelect() override;
bool addGlobalInstructionSelect() override;
void addPreEmitPass() override;
void addPreEmitPass2() override;
void addPreSched2() override;
void addMachineSSAOptimization() override;
+ FunctionPass *createRVVRegAllocPass(bool Optimized);
+ bool addRegAssignAndRewriteFast() override;
+ bool addRegAssignAndRewriteOptimized() override;
void addPreRegAlloc() override;
void addPostRegAlloc() override;
void addOptimizedRegAlloc() override;
+ void addFastRegAlloc() override;
};
} // namespace
@@ -280,10 +395,42 @@ TargetPassConfig *RISCVTargetMachine::createPassConfig(PassManagerBase &PM) {
return new RISCVPassConfig(*this, PM);
}
+FunctionPass *RISCVPassConfig::createRVVRegAllocPass(bool Optimized) {
+ // Initialize the global default.
+ llvm::call_once(InitializeDefaultRVVRegisterAllocatorFlag,
+ initializeDefaultRVVRegisterAllocatorOnce);
+
+ RegisterRegAlloc::FunctionPassCtor Ctor = RVVRegisterRegAlloc::getDefault();
+ if (Ctor != useDefaultRegisterAllocator)
+ return Ctor();
+
+ if (Optimized)
+ return createGreedyRVVRegisterAllocator();
+
+ return createFastRVVRegisterAllocator();
+}
+
+bool RISCVPassConfig::addRegAssignAndRewriteFast() {
+ if (EnableSplitRegAlloc)
+ addPass(createRVVRegAllocPass(false));
+ return TargetPassConfig::addRegAssignAndRewriteFast();
+}
+
+bool RISCVPassConfig::addRegAssignAndRewriteOptimized() {
+ if (EnableSplitRegAlloc) {
+ addPass(createRVVRegAllocPass(true));
+ addPass(createVirtRegRewriter(false));
+ }
+ return TargetPassConfig::addRegAssignAndRewriteOptimized();
+}
+
void RISCVPassConfig::addIRPasses() {
addPass(createAtomicExpandPass());
- if (getOptLevel() != CodeGenOpt::None) {
+ if (getOptLevel() != CodeGenOptLevel::None) {
+ if (EnableLoopDataPrefetch)
+ addPass(createLoopDataPrefetchPass());
+
addPass(createRISCVGatherScatterLoweringPass());
addPass(createInterleavedAccessPass());
addPass(createRISCVCodeGenPreparePass());
@@ -293,7 +440,7 @@ void RISCVPassConfig::addIRPasses() {
}
bool RISCVPassConfig::addPreISel() {
- if (TM->getOptLevel() != CodeGenOpt::None) {
+ if (TM->getOptLevel() != CodeGenOptLevel::None) {
// Add a barrier before instruction selection so that we will not get
// deleted block address after enabling default outlining. See D99707 for
// more details.
@@ -320,11 +467,24 @@ bool RISCVPassConfig::addIRTranslator() {
return false;
}
+void RISCVPassConfig::addPreLegalizeMachineIR() {
+ if (getOptLevel() == CodeGenOptLevel::None) {
+ addPass(createRISCVO0PreLegalizerCombiner());
+ } else {
+ addPass(createRISCVPreLegalizerCombiner());
+ }
+}
+
bool RISCVPassConfig::addLegalizeMachineIR() {
addPass(new Legalizer());
return false;
}
+void RISCVPassConfig::addPreRegBankSelect() {
+ if (getOptLevel() != CodeGenOptLevel::None)
+ addPass(createRISCVPostLegalizerCombiner());
+}
+
bool RISCVPassConfig::addRegBankSelect() {
addPass(new RegBankSelect());
return false;
@@ -336,6 +496,8 @@ bool RISCVPassConfig::addGlobalInstructionSelect() {
}
void RISCVPassConfig::addPreSched2() {
+ addPass(createRISCVPostRAExpandPseudoPass());
+
// Emit KCFI checks for indirect calls.
addPass(createKCFIPass());
}
@@ -349,12 +511,13 @@ void RISCVPassConfig::addPreEmitPass() {
// propagation after the machine outliner (which runs after addPreEmitPass)
// currently leads to incorrect code-gen, where copies to registers within
// outlined functions are removed erroneously.
- if (TM->getOptLevel() >= CodeGenOpt::Default && EnableRISCVCopyPropagation)
+ if (TM->getOptLevel() >= CodeGenOptLevel::Default &&
+ EnableRISCVCopyPropagation)
addPass(createMachineCopyPropagationPass(true));
}
void RISCVPassConfig::addPreEmitPass2() {
- if (TM->getOptLevel() != CodeGenOpt::None) {
+ if (TM->getOptLevel() != CodeGenOptLevel::None) {
addPass(createRISCVMoveMergePass());
// Schedule PushPop Optimization before expansion of Pseudo instruction,
// ensuring return instruction is detected correctly.
@@ -374,32 +537,45 @@ void RISCVPassConfig::addPreEmitPass2() {
}
void RISCVPassConfig::addMachineSSAOptimization() {
+ addPass(createRISCVFoldMasksPass());
+
TargetPassConfig::addMachineSSAOptimization();
+
if (EnableMachineCombiner)
addPass(&MachineCombinerID);
- if (TM->getTargetTriple().getArch() == Triple::riscv64) {
+ if (TM->getTargetTriple().isRISCV64()) {
addPass(createRISCVOptWInstrsPass());
}
}
void RISCVPassConfig::addPreRegAlloc() {
addPass(createRISCVPreRAExpandPseudoPass());
- if (TM->getOptLevel() != CodeGenOpt::None)
+ if (TM->getOptLevel() != CodeGenOptLevel::None)
addPass(createRISCVMergeBaseOffsetOptPass());
addPass(createRISCVInsertVSETVLIPass());
+ if (TM->getOptLevel() != CodeGenOptLevel::None &&
+ EnableRISCVDeadRegisterElimination)
+ addPass(createRISCVDeadRegisterDefinitionsPass());
addPass(createRISCVInsertReadWriteCSRPass());
+ addPass(createRISCVInsertWriteVXRMPass());
}
void RISCVPassConfig::addOptimizedRegAlloc() {
- if (getOptimizeRegAlloc())
- insertPass(&DetectDeadLanesID, &RISCVInitUndefID);
+ insertPass(&DetectDeadLanesID, &RISCVInitUndefID);
TargetPassConfig::addOptimizedRegAlloc();
}
+void RISCVPassConfig::addFastRegAlloc() {
+ addPass(createRISCVInitUndefPass());
+ TargetPassConfig::addFastRegAlloc();
+}
+
+
void RISCVPassConfig::addPostRegAlloc() {
- if (TM->getOptLevel() != CodeGenOpt::None && EnableRedundantCopyElimination)
+ if (TM->getOptLevel() != CodeGenOptLevel::None &&
+ EnableRedundantCopyElimination)
addPass(createRISCVRedundantCopyEliminationPass());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.h
index 775422075314..68dfb3c81f2f 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.h
@@ -29,7 +29,7 @@ public:
RISCVTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
bool JIT);
const RISCVSubtarget *getSubtargetImpl(const Function &F) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 62883e962b4c..4614446b2150 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -34,28 +34,6 @@ static cl::opt<unsigned> SLPMaxVF(
"exclusively by SLP vectorizer."),
cl::Hidden);
-InstructionCost RISCVTTIImpl::getLMULCost(MVT VT) {
- // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
- // implementation-defined.
- if (!VT.isVector())
- return InstructionCost::getInvalid();
- unsigned DLenFactor = ST->getDLenFactor();
- unsigned Cost;
- if (VT.isScalableVector()) {
- unsigned LMul;
- bool Fractional;
- std::tie(LMul, Fractional) =
- RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(VT));
- if (Fractional)
- Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
- else
- Cost = (LMul * DLenFactor);
- } else {
- Cost = divideCeil(VT.getSizeInBits(), ST->getRealMinVLen() / DLenFactor);
- }
- return Cost;
-}
-
InstructionCost RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy() &&
@@ -67,8 +45,7 @@ InstructionCost RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
// Otherwise, we check how many instructions it will take to materialise.
const DataLayout &DL = getDataLayout();
- return RISCVMatInt::getIntMatCost(Imm, DL.getTypeSizeInBits(Ty),
- getST()->getFeatureBits());
+ return RISCVMatInt::getIntMatCost(Imm, DL.getTypeSizeInBits(Ty), *getST());
}
// Look for patterns of shift followed by AND that can be turned into a pair of
@@ -149,6 +126,9 @@ InstructionCost RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
// Power of 2 is a shift. Negated power of 2 is a shift and a negate.
if (Imm.isPowerOf2() || Imm.isNegatedPowerOf2())
return TTI::TCC_Free;
+ // One more or less than a power of 2 can use SLLI+ADD/SUB.
+ if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2())
+ return TTI::TCC_Free;
// FIXME: There is no MULI instruction.
Takes12BitImm = true;
break;
@@ -192,7 +172,9 @@ RISCVTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
TargetTransformInfo::PopcntSupportKind
RISCVTTIImpl::getPopcntSupport(unsigned TyWidth) {
assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
- return ST->hasStdExtZbb() ? TTI::PSK_FastHardware : TTI::PSK_Software;
+ return ST->hasStdExtZbb() || ST->hasVendorXCVbitmanip()
+ ? TTI::PSK_FastHardware
+ : TTI::PSK_Software;
}
bool RISCVTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const {
@@ -263,19 +245,12 @@ static VectorType *getVRGatherIndexType(MVT DataVT, const RISCVSubtarget &ST,
return cast<VectorType>(EVT(IndexVT).getTypeForEVT(C));
}
-/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
-/// is generally quadratic in the number of vreg implied by LMUL. Note that
-/// operand (index and possibly mask) are handled separately.
-InstructionCost RISCVTTIImpl::getVRGatherVVCost(MVT VT) {
- return getLMULCost(VT) * getLMULCost(VT);
-}
-
InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *Tp, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind,
int Index, VectorType *SubTp,
ArrayRef<const Value *> Args) {
- Kind = improveShuffleKindFromMask(Kind, Mask);
+ Kind = improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp);
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
@@ -292,52 +267,86 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
// If the size of the element is < ELEN then shuffles of interleaves and
// deinterleaves of 2 vectors can be lowered into the following
// sequences
- if (EltTp.getScalarSizeInBits() < ST->getELEN()) {
+ if (EltTp.getScalarSizeInBits() < ST->getELen()) {
// Example sequence:
// vsetivli zero, 4, e8, mf4, ta, ma (ignored)
// vwaddu.vv v10, v8, v9
// li a0, -1 (ignored)
// vwmaccu.vx v10, a0, v9
if (ShuffleVectorInst::isInterleaveMask(Mask, 2, Mask.size()))
- return 2 * LT.first * getLMULCost(LT.second);
+ return 2 * LT.first * TLI->getLMULCost(LT.second);
if (Mask[0] == 0 || Mask[0] == 1) {
auto DeinterleaveMask = createStrideMask(Mask[0], 2, Mask.size());
// Example sequence:
// vnsrl.wi v10, v8, 0
if (equal(DeinterleaveMask, Mask))
- return LT.first * getLMULCost(LT.second);
+ return LT.first * TLI->getLMULCost(LT.second);
}
}
-
- // vrgather + cost of generating the mask constant.
- // We model this for an unknown mask with a single vrgather.
- if (LT.first == 1 &&
- (LT.second.getScalarSizeInBits() != 8 ||
- LT.second.getVectorNumElements() <= 256)) {
- VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, Tp->getContext());
- InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind);
- return IndexCost + getVRGatherVVCost(LT.second);
- }
}
- break;
+ // vrgather + cost of generating the mask constant.
+ // We model this for an unknown mask with a single vrgather.
+ if (LT.second.isFixedLengthVector() && LT.first == 1 &&
+ (LT.second.getScalarSizeInBits() != 8 ||
+ LT.second.getVectorNumElements() <= 256)) {
+ VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, Tp->getContext());
+ InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind);
+ return IndexCost + TLI->getVRGatherVVCost(LT.second);
+ }
+ [[fallthrough]];
}
case TTI::SK_Transpose:
case TTI::SK_PermuteTwoSrc: {
- if (Mask.size() >= 2 && LT.second.isFixedLengthVector()) {
- // 2 x (vrgather + cost of generating the mask constant) + cost of mask
- // register for the second vrgather. We model this for an unknown
- // (shuffle) mask.
- if (LT.first == 1 &&
- (LT.second.getScalarSizeInBits() != 8 ||
- LT.second.getVectorNumElements() <= 256)) {
- auto &C = Tp->getContext();
- auto EC = Tp->getElementCount();
- VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, C);
- VectorType *MaskTy = VectorType::get(IntegerType::getInt1Ty(C), EC);
- InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind);
- InstructionCost MaskCost = getConstantPoolLoadCost(MaskTy, CostKind);
- return 2 * IndexCost + 2 * getVRGatherVVCost(LT.second) + MaskCost;
+ // 2 x (vrgather + cost of generating the mask constant) + cost of mask
+ // register for the second vrgather. We model this for an unknown
+ // (shuffle) mask.
+ if (LT.second.isFixedLengthVector() && LT.first == 1 &&
+ (LT.second.getScalarSizeInBits() != 8 ||
+ LT.second.getVectorNumElements() <= 256)) {
+ auto &C = Tp->getContext();
+ auto EC = Tp->getElementCount();
+ VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, C);
+ VectorType *MaskTy = VectorType::get(IntegerType::getInt1Ty(C), EC);
+ InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind);
+ InstructionCost MaskCost = getConstantPoolLoadCost(MaskTy, CostKind);
+ return 2 * IndexCost + 2 * TLI->getVRGatherVVCost(LT.second) + MaskCost;
+ }
+ [[fallthrough]];
+ }
+ case TTI::SK_Select: {
+ // We are going to permute multiple sources and the result will be in
+ // multiple destinations. Providing an accurate cost only for splits where
+ // the element type remains the same.
+ if (!Mask.empty() && LT.first.isValid() && LT.first != 1 &&
+ LT.second.isFixedLengthVector() &&
+ LT.second.getVectorElementType().getSizeInBits() ==
+ Tp->getElementType()->getPrimitiveSizeInBits() &&
+ LT.second.getVectorNumElements() <
+ cast<FixedVectorType>(Tp)->getNumElements() &&
+ divideCeil(Mask.size(),
+ cast<FixedVectorType>(Tp)->getNumElements()) ==
+ static_cast<unsigned>(*LT.first.getValue())) {
+ unsigned NumRegs = *LT.first.getValue();
+ unsigned VF = cast<FixedVectorType>(Tp)->getNumElements();
+ unsigned SubVF = PowerOf2Ceil(VF / NumRegs);
+ auto *SubVecTy = FixedVectorType::get(Tp->getElementType(), SubVF);
+
+ InstructionCost Cost = 0;
+ for (unsigned I = 0; I < NumRegs; ++I) {
+ bool IsSingleVector = true;
+ SmallVector<int> SubMask(SubVF, PoisonMaskElem);
+ transform(Mask.slice(I * SubVF,
+ I == NumRegs - 1 ? Mask.size() % SubVF : SubVF),
+ SubMask.begin(), [&](int I) {
+ bool SingleSubVector = I / VF == 0;
+ IsSingleVector &= SingleSubVector;
+ return (SingleSubVector ? 0 : 1) * SubVF + I % VF;
+ });
+ Cost += getShuffleCost(IsSingleVector ? TTI::SK_PermuteSingleSrc
+ : TTI::SK_PermuteTwoSrc,
+ SubVecTy, SubMask, CostKind, 0, nullptr);
+ return Cost;
}
}
break;
@@ -356,19 +365,19 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
// Example sequence:
// vsetivli zero, 4, e8, mf2, tu, ma (ignored)
// vslidedown.vi v8, v9, 2
- return LT.first * getLMULCost(LT.second);
+ return LT.first * TLI->getVSlideCost(LT.second);
case TTI::SK_InsertSubvector:
// Example sequence:
// vsetivli zero, 4, e8, mf2, tu, ma (ignored)
// vslideup.vi v8, v9, 2
- return LT.first * getLMULCost(LT.second);
+ return LT.first * TLI->getVSlideCost(LT.second);
case TTI::SK_Select: {
// Example sequence:
// li a0, 90
// vsetivli zero, 8, e8, mf2, ta, ma (ignored)
// vmv.s.x v0, a0
// vmerge.vvm v8, v9, v8, v0
- return LT.first * 3 * getLMULCost(LT.second);
+ return LT.first * 3 * TLI->getLMULCost(LT.second);
}
case TTI::SK_Broadcast: {
bool HasScalar = (Args.size() > 0) && (Operator::getOpcode(Args[0]) ==
@@ -380,7 +389,7 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
// vsetivli zero, 2, e8, mf8, ta, ma (ignored)
// vmv.v.x v8, a0
// vmsne.vi v0, v8, 0
- return LT.first * getLMULCost(LT.second) * 3;
+ return LT.first * TLI->getLMULCost(LT.second) * 3;
}
// Example sequence:
// vsetivli zero, 2, e8, mf8, ta, mu (ignored)
@@ -391,26 +400,24 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
// vmv.v.x v8, a0
// vmsne.vi v0, v8, 0
- return LT.first * getLMULCost(LT.second) * 6;
+ return LT.first * TLI->getLMULCost(LT.second) * 6;
}
if (HasScalar) {
// Example sequence:
// vmv.v.x v8, a0
- return LT.first * getLMULCost(LT.second);
+ return LT.first * TLI->getLMULCost(LT.second);
}
// Example sequence:
// vrgather.vi v9, v8, 0
- // TODO: vrgather could be slower than vmv.v.x. It is
- // implementation-dependent.
- return LT.first * getLMULCost(LT.second);
+ return LT.first * TLI->getVRGatherVICost(LT.second);
}
case TTI::SK_Splice:
// vslidedown+vslideup.
// TODO: Multiplying by LT.first implies this legalizes into multiple copies
// of similar code, but I think we expand through memory.
- return 2 * LT.first * getLMULCost(LT.second);
+ return 2 * LT.first * TLI->getVSlideCost(LT.second);
case TTI::SK_Reverse: {
// TODO: Cases to improve here:
// * Illegal vector types
@@ -430,7 +437,7 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
if (LT.second.isFixedLengthVector())
// vrsub.vi has a 5 bit immediate field, otherwise an li suffices
LenCost = isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1;
- InstructionCost GatherCost = 2 + getVRGatherVVCost(LT.second);
+ InstructionCost GatherCost = 2 + TLI->getVRGatherVVCost(LT.second);
// Mask operation additionally required extend and truncate
InstructionCost ExtendCost = Tp->getElementType()->isIntegerTy(1) ? 3 : 0;
return LT.first * (LenCost + GatherCost + ExtendCost);
@@ -495,7 +502,7 @@ InstructionCost RISCVTTIImpl::getInterleavedMemoryOpCost(
InstructionCost Cost = MemCost;
for (unsigned Index : Indices) {
FixedVectorType *SubVecTy =
- FixedVectorType::get(FVTy->getElementType(), VF);
+ FixedVectorType::get(FVTy->getElementType(), VF * Factor);
auto Mask = createStrideMask(Index, Factor, VF);
InstructionCost ShuffleCost =
getShuffleCost(TTI::ShuffleKind::SK_PermuteSingleSrc, SubVecTy, Mask,
@@ -662,6 +669,31 @@ static const CostTblEntry VectorIntrinsicCostTable[]{
{Intrinsic::rint, MVT::nxv2f64, 7},
{Intrinsic::rint, MVT::nxv4f64, 7},
{Intrinsic::rint, MVT::nxv8f64, 7},
+ {Intrinsic::lrint, MVT::v2i32, 1},
+ {Intrinsic::lrint, MVT::v4i32, 1},
+ {Intrinsic::lrint, MVT::v8i32, 1},
+ {Intrinsic::lrint, MVT::v16i32, 1},
+ {Intrinsic::lrint, MVT::nxv1i32, 1},
+ {Intrinsic::lrint, MVT::nxv2i32, 1},
+ {Intrinsic::lrint, MVT::nxv4i32, 1},
+ {Intrinsic::lrint, MVT::nxv8i32, 1},
+ {Intrinsic::lrint, MVT::nxv16i32, 1},
+ {Intrinsic::lrint, MVT::v2i64, 1},
+ {Intrinsic::lrint, MVT::v4i64, 1},
+ {Intrinsic::lrint, MVT::v8i64, 1},
+ {Intrinsic::lrint, MVT::v16i64, 1},
+ {Intrinsic::lrint, MVT::nxv1i64, 1},
+ {Intrinsic::lrint, MVT::nxv2i64, 1},
+ {Intrinsic::lrint, MVT::nxv4i64, 1},
+ {Intrinsic::lrint, MVT::nxv8i64, 1},
+ {Intrinsic::llrint, MVT::v2i64, 1},
+ {Intrinsic::llrint, MVT::v4i64, 1},
+ {Intrinsic::llrint, MVT::v8i64, 1},
+ {Intrinsic::llrint, MVT::v16i64, 1},
+ {Intrinsic::llrint, MVT::nxv1i64, 1},
+ {Intrinsic::llrint, MVT::nxv2i64, 1},
+ {Intrinsic::llrint, MVT::nxv4i64, 1},
+ {Intrinsic::llrint, MVT::nxv8i64, 1},
{Intrinsic::nearbyint, MVT::v2f32, 9},
{Intrinsic::nearbyint, MVT::v4f32, 9},
{Intrinsic::nearbyint, MVT::v8f32, 9},
@@ -1045,6 +1077,8 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
case Intrinsic::floor:
case Intrinsic::trunc:
case Intrinsic::rint:
+ case Intrinsic::lrint:
+ case Intrinsic::llrint:
case Intrinsic::round:
case Intrinsic::roundeven: {
// These all use the same code.
@@ -1074,6 +1108,12 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
return LT.first;
break;
}
+ case Intrinsic::ctpop: {
+ auto LT = getTypeLegalizationCost(RetTy);
+ if (ST->hasVInstructions() && ST->hasStdExtZvbb() && LT.second.isVector())
+ return LT.first;
+ break;
+ }
case Intrinsic::abs: {
auto LT = getTypeLegalizationCost(RetTy);
if (ST->hasVInstructions() && LT.second.isVector()) {
@@ -1142,8 +1182,8 @@ InstructionCost RISCVTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
// Skip if element size of Dst or Src is bigger than ELEN.
- if (Src->getScalarSizeInBits() > ST->getELEN() ||
- Dst->getScalarSizeInBits() > ST->getELEN())
+ if (Src->getScalarSizeInBits() > ST->getELen() ||
+ Dst->getScalarSizeInBits() > ST->getELen())
return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
int ISD = TLI->InstructionOpcodeToISD(Opcode);
@@ -1226,7 +1266,7 @@ RISCVTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
return BaseT::getMinMaxReductionCost(IID, Ty, FMF, CostKind);
// Skip if scalar size of Ty is bigger than ELEN.
- if (Ty->getScalarSizeInBits() > ST->getELEN())
+ if (Ty->getScalarSizeInBits() > ST->getELen())
return BaseT::getMinMaxReductionCost(IID, Ty, FMF, CostKind);
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
@@ -1253,7 +1293,7 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
// Skip if scalar size of Ty is bigger than ELEN.
- if (Ty->getScalarSizeInBits() > ST->getELEN())
+ if (Ty->getScalarSizeInBits() > ST->getELen())
return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
int ISD = TLI->InstructionOpcodeToISD(Opcode);
@@ -1288,7 +1328,7 @@ InstructionCost RISCVTTIImpl::getExtendedReductionCost(
FMF, CostKind);
// Skip if scalar size of ResTy is bigger than ELEN.
- if (ResTy->getScalarSizeInBits() > ST->getELEN())
+ if (ResTy->getScalarSizeInBits() > ST->getELen())
return BaseT::getExtendedReductionCost(Opcode, IsUnsigned, ResTy, ValTy,
FMF, CostKind);
@@ -1349,7 +1389,7 @@ InstructionCost RISCVTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
// handles the LT.first term for us.
if (std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Src);
LT.second.isVector())
- BaseCost *= getLMULCost(LT.second);
+ BaseCost *= TLI->getLMULCost(LT.second);
return Cost + BaseCost;
}
@@ -1368,7 +1408,7 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
I);
// Skip if scalar size of ValTy is bigger than ELEN.
- if (ValTy->isVectorTy() && ValTy->getScalarSizeInBits() > ST->getELEN())
+ if (ValTy->isVectorTy() && ValTy->getScalarSizeInBits() > ST->getELen())
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
I);
@@ -1437,6 +1477,15 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
}
+InstructionCost RISCVTTIImpl::getCFInstrCost(unsigned Opcode,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) {
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return Opcode == Instruction::PHI ? 0 : 1;
+ // Branches are assumed to be predicted.
+ return 0;
+}
+
InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
TTI::TargetCostKind CostKind,
unsigned Index, Value *Op0,
@@ -1451,8 +1500,26 @@ InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Val);
// This type is legalized to a scalar type.
- if (!LT.second.isVector())
- return 0;
+ if (!LT.second.isVector()) {
+ auto *FixedVecTy = cast<FixedVectorType>(Val);
+ // If Index is a known constant, cost is zero.
+ if (Index != -1U)
+ return 0;
+ // Extract/InsertElement with non-constant index is very costly when
+ // scalarized; estimate cost of loads/stores sequence via the stack:
+ // ExtractElement cost: store vector to stack, load scalar;
+ // InsertElement cost: store vector to stack, store scalar, load vector.
+ Type *ElemTy = FixedVecTy->getElementType();
+ auto NumElems = FixedVecTy->getNumElements();
+ auto Align = DL.getPrefTypeAlign(ElemTy);
+ InstructionCost LoadCost =
+ getMemoryOpCost(Instruction::Load, ElemTy, Align, 0, CostKind);
+ InstructionCost StoreCost =
+ getMemoryOpCost(Instruction::Store, ElemTy, Align, 0, CostKind);
+ return Opcode == Instruction::ExtractElement
+ ? StoreCost * NumElems + LoadCost
+ : (StoreCost + LoadCost) * NumElems + StoreCost;
+ }
// For unsupported scalable vector.
if (LT.second.isScalableVector() && !LT.first.isValid())
@@ -1461,6 +1528,31 @@ InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
if (!isTypeLegal(Val))
return BaseT::getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
+ // Mask vector extract/insert is expanded via e8.
+ if (Val->getScalarSizeInBits() == 1) {
+ VectorType *WideTy =
+ VectorType::get(IntegerType::get(Val->getContext(), 8),
+ cast<VectorType>(Val)->getElementCount());
+ if (Opcode == Instruction::ExtractElement) {
+ InstructionCost ExtendCost
+ = getCastInstrCost(Instruction::ZExt, WideTy, Val,
+ TTI::CastContextHint::None, CostKind);
+ InstructionCost ExtractCost
+ = getVectorInstrCost(Opcode, WideTy, CostKind, Index, nullptr, nullptr);
+ return ExtendCost + ExtractCost;
+ }
+ InstructionCost ExtendCost
+ = getCastInstrCost(Instruction::ZExt, WideTy, Val,
+ TTI::CastContextHint::None, CostKind);
+ InstructionCost InsertCost
+ = getVectorInstrCost(Opcode, WideTy, CostKind, Index, nullptr, nullptr);
+ InstructionCost TruncCost
+ = getCastInstrCost(Instruction::Trunc, Val, WideTy,
+ TTI::CastContextHint::None, CostKind);
+ return ExtendCost + InsertCost + TruncCost;
+ }
+
+
// In RVV, we could use vslidedown + vmv.x.s to extract element from vector
// and vslideup + vmv.s.x to insert element to vector.
unsigned BaseCost = 1;
@@ -1482,30 +1574,6 @@ InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
SlideCost = 1; // With a constant index, we do not need to use addi.
}
- // Mask vector extract/insert element is different from normal case.
- if (Val->getScalarSizeInBits() == 1) {
- // For extractelement, we need the following instructions:
- // vmv.v.i v8, 0
- // vmerge.vim v8, v8, 1, v0
- // vsetivli zero, 1, e8, m2, ta, mu (not count)
- // vslidedown.vx v8, v8, a0
- // vmv.x.s a0, v8
-
- // For insertelement, we need the following instructions:
- // vsetvli a2, zero, e8, m1, ta, mu (not count)
- // vmv.s.x v8, a0
- // vmv.v.i v9, 0
- // vmerge.vim v9, v9, 1, v0
- // addi a0, a1, 1
- // vsetvli zero, a0, e8, m1, tu, mu (not count)
- // vslideup.vx v9, v8, a1
- // vsetvli a0, zero, e8, m1, ta, mu (not count)
- // vand.vi v8, v9, 1
- // vmsne.vi v0, v8, 0
-
- // TODO: should we count these special vsetvlis?
- BaseCost = Opcode == Instruction::InsertElement ? 5 : 3;
- }
// Extract i64 in the target that has XLEN=32 need more instruction.
if (Val->getScalarType()->isIntegerTy() &&
ST->getXLen() < Val->getScalarSizeInBits()) {
@@ -1547,7 +1615,7 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost(
Args, CxtI);
// Skip if scalar size of Ty is bigger than ELEN.
- if (isa<VectorType>(Ty) && Ty->getScalarSizeInBits() > ST->getELEN())
+ if (isa<VectorType>(Ty) && Ty->getScalarSizeInBits() > ST->getELen())
return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
Args, CxtI);
@@ -1596,7 +1664,7 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost(
case ISD::FSUB:
case ISD::FMUL:
case ISD::FNEG: {
- return ConstantMatCost + getLMULCost(LT.second) * LT.first * 1;
+ return ConstantMatCost + TLI->getLMULCost(LT.second) * LT.first * 1;
}
default:
return ConstantMatCost +
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 7ffcb4828d0c..efc8350064a6 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -48,9 +48,6 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
/// actual target hardware.
unsigned getEstimatedVLFor(VectorType *Ty);
- /// Return the cost of LMUL. The larger the LMUL, the higher the cost.
- InstructionCost getLMULCost(MVT VT);
-
/// Return the cost of accessing a constant pool entry of the specified
/// type.
InstructionCost getConstantPoolLoadCost(Type *Ty,
@@ -123,8 +120,6 @@ public:
return ST->useRVVForFixedLengthVectors() ? 16 : 0;
}
- InstructionCost getVRGatherVVCost(MVT VT);
-
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
@@ -174,6 +169,9 @@ public:
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
+ InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr);
+
using BaseT::getVectorInstrCost;
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
TTI::TargetCostKind CostKind,
@@ -201,7 +199,7 @@ public:
return false;
EVT ElemType = DataTypeVT.getScalarType();
- if (!ST->enableUnalignedVectorMem() && Alignment < ElemType.getStoreSize())
+ if (!ST->hasFastUnalignedAccess() && Alignment < ElemType.getStoreSize())
return false;
return TLI->isLegalElementTypeForRVV(ElemType);
@@ -226,7 +224,7 @@ public:
return false;
EVT ElemType = DataTypeVT.getScalarType();
- if (!ST->enableUnalignedVectorMem() && Alignment < ElemType.getStoreSize())
+ if (!ST->hasFastUnalignedAccess() && Alignment < ElemType.getStoreSize())
return false;
return TLI->isLegalElementTypeForRVV(ElemType);
@@ -288,9 +286,9 @@ public:
case RecurKind::UMax:
case RecurKind::FMin:
case RecurKind::FMax:
- case RecurKind::SelectICmp:
- case RecurKind::SelectFCmp:
case RecurKind::FMulAdd:
+ case RecurKind::IAnyOf:
+ case RecurKind::FAnyOf:
return true;
default:
return false;
@@ -359,6 +357,10 @@ public:
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
const TargetTransformInfo::LSRCost &C2);
+
+ bool shouldFoldTerminatingConditionAfterLSR() const {
+ return true;
+ }
};
} // end namespace llvm