aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/VE
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/VE')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp103
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp55
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/LVLGen.cpp137
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VE.h23
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VE.td5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEAsmPrinter.cpp48
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VECallingConv.td138
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.cpp402
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.h9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEISelDAGToDAG.cpp40
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp2238
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.h150
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEInstrBuilder.h41
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEInstrFormats.td89
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp534
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.h32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.td845
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td1604
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEInstrIntrinsicVL.td64
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEInstrPatternsVec.td91
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEInstrVec.td1510
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEMCInstLower.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.cpp105
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.td104
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.cpp64
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.h21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VETargetTransformInfo.h25
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VVPInstrInfo.td46
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VVPInstrPatternsVec.td71
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VVPNodes.def41
35 files changed, 7853 insertions, 833 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
index 7a899b4b38e2..a3309a68c76d 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
@@ -125,6 +125,9 @@ static const MCPhysReg F128Regs[32] = {
VE::Q16, VE::Q17, VE::Q18, VE::Q19, VE::Q20, VE::Q21, VE::Q22, VE::Q23,
VE::Q24, VE::Q25, VE::Q26, VE::Q27, VE::Q28, VE::Q29, VE::Q30, VE::Q31};
+static const MCPhysReg VM512Regs[8] = {VE::VMP0, VE::VMP1, VE::VMP2, VE::VMP3,
+ VE::VMP4, VE::VMP5, VE::VMP6, VE::VMP7};
+
static const MCPhysReg MISCRegs[31] = {
VE::USRCC, VE::PSW, VE::SAR, VE::NoRegister,
VE::NoRegister, VE::NoRegister, VE::NoRegister, VE::PMMR,
@@ -277,6 +280,17 @@ public:
}
return false;
}
+ bool isUImm4() {
+ if (!isImm())
+ return false;
+
+ // Constant case
+ if (const auto *ConstExpr = dyn_cast<MCConstantExpr>(Imm.Val)) {
+ int64_t Value = ConstExpr->getValue();
+ return isUInt<4>(Value);
+ }
+ return false;
+ }
bool isUImm6() {
if (!isImm())
return false;
@@ -476,6 +490,10 @@ public:
addImmOperands(Inst, N);
}
+ void addUImm4Operands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+
void addUImm6Operands(MCInst &Inst, unsigned N) const {
addImmOperands(Inst, N);
}
@@ -648,6 +666,15 @@ public:
return true;
}
+ static bool MorphToVM512Reg(VEOperand &Op) {
+ unsigned Reg = Op.getReg();
+ unsigned regIdx = Reg - VE::VM0;
+ if (regIdx % 2 || regIdx > 15)
+ return false;
+ Op.Reg.RegNum = VM512Regs[regIdx / 2];
+ return true;
+ }
+
static bool MorphToMISCReg(VEOperand &Op) {
const auto *ConstExpr = dyn_cast<MCConstantExpr>(Op.getImm());
if (!ConstExpr)
@@ -902,6 +929,24 @@ StringRef VEAsmParser::splitMnemonic(StringRef Name, SMLoc NameLoc,
Mnemonic = parseRD(Name, 10, NameLoc, Operands);
} else if (Name.startswith("cvt.l.d")) {
Mnemonic = parseRD(Name, 7, NameLoc, Operands);
+ } else if (Name.startswith("vcvt.w.d.sx") || Name.startswith("vcvt.w.d.zx") ||
+ Name.startswith("vcvt.w.s.sx") || Name.startswith("vcvt.w.s.zx")) {
+ Mnemonic = parseRD(Name, 11, NameLoc, Operands);
+ } else if (Name.startswith("vcvt.l.d")) {
+ Mnemonic = parseRD(Name, 8, NameLoc, Operands);
+ } else if (Name.startswith("pvcvt.w.s.lo") ||
+ Name.startswith("pvcvt.w.s.up")) {
+ Mnemonic = parseRD(Name, 12, NameLoc, Operands);
+ } else if (Name.startswith("pvcvt.w.s")) {
+ Mnemonic = parseRD(Name, 9, NameLoc, Operands);
+ } else if (Name.startswith("vfmk.l.") || Name.startswith("vfmk.w.") ||
+ Name.startswith("vfmk.d.") || Name.startswith("vfmk.s.")) {
+ bool ICC = Name[5] == 'l' || Name[5] == 'w' ? true : false;
+ Mnemonic = parseCC(Name, 7, Name.size(), ICC, true, NameLoc, Operands);
+ } else if (Name.startswith("pvfmk.w.lo.") || Name.startswith("pvfmk.w.up.") ||
+ Name.startswith("pvfmk.s.lo.") || Name.startswith("pvfmk.s.up.")) {
+ bool ICC = Name[6] == 'l' || Name[6] == 'w' ? true : false;
+ Mnemonic = parseCC(Name, 11, Name.size(), ICC, true, NameLoc, Operands);
} else {
Operands->push_back(VEOperand::CreateToken(Mnemonic, NameLoc));
}
@@ -1362,9 +1407,38 @@ OperandMatchResultTy VEAsmParser::parseOperand(OperandVector &Operands,
return ResTy;
switch (getLexer().getKind()) {
- case AsmToken::LParen:
- // FIXME: Parsing "(" + %vreg + ", " + %vreg + ")"
- // FALLTHROUGH
+ case AsmToken::LParen: {
+ // Parsing "(" + %vreg + ", " + %vreg + ")"
+ const AsmToken Tok1 = Parser.getTok();
+ Parser.Lex(); // Eat the '('.
+
+ unsigned RegNo1;
+ SMLoc S1, E1;
+ if (tryParseRegister(RegNo1, S1, E1) != MatchOperand_Success) {
+ getLexer().UnLex(Tok1);
+ return MatchOperand_NoMatch;
+ }
+
+ if (!Parser.getTok().is(AsmToken::Comma))
+ return MatchOperand_ParseFail;
+ Parser.Lex(); // Eat the ','.
+
+ unsigned RegNo2;
+ SMLoc S2, E2;
+ if (tryParseRegister(RegNo2, S2, E2) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+
+ if (!Parser.getTok().is(AsmToken::RParen))
+ return MatchOperand_ParseFail;
+
+ Operands.push_back(VEOperand::CreateToken(Tok1.getString(), Tok1.getLoc()));
+ Operands.push_back(VEOperand::CreateReg(RegNo1, S1, E1));
+ Operands.push_back(VEOperand::CreateReg(RegNo2, S2, E2));
+ Operands.push_back(VEOperand::CreateToken(Parser.getTok().getString(),
+ Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat the ')'.
+ break;
+ }
default: {
std::unique_ptr<VEOperand> Op;
ResTy = parseVEAsmOperand(Op);
@@ -1377,7 +1451,24 @@ OperandMatchResultTy VEAsmParser::parseOperand(OperandVector &Operands,
if (!Parser.getTok().is(AsmToken::LParen))
break;
- // FIXME: Parsing %vec-reg + "(" + %sclar-reg/number + ")"
+ // Parsing %vec-reg + "(" + %sclar-reg/number + ")"
+ std::unique_ptr<VEOperand> Op1 = VEOperand::CreateToken(
+ Parser.getTok().getString(), Parser.getTok().getLoc());
+ Parser.Lex(); // Eat the '('.
+
+ std::unique_ptr<VEOperand> Op2;
+ ResTy = parseVEAsmOperand(Op2);
+ if (ResTy != MatchOperand_Success || !Op2)
+ return MatchOperand_ParseFail;
+
+ if (!Parser.getTok().is(AsmToken::RParen))
+ return MatchOperand_ParseFail;
+
+ Operands.push_back(std::move(Op1));
+ Operands.push_back(std::move(Op2));
+ Operands.push_back(VEOperand::CreateToken(Parser.getTok().getString(),
+ Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat the ')'.
break;
}
}
@@ -1445,6 +1536,10 @@ unsigned VEAsmParser::validateTargetOperandClass(MCParsedAsmOperand &GOp,
if (Op.isReg() && VEOperand::MorphToF128Reg(Op))
return MCTargetAsmParser::Match_Success;
break;
+ case MCK_VM512:
+ if (Op.isReg() && VEOperand::MorphToVM512Reg(Op))
+ return MCTargetAsmParser::Match_Success;
+ break;
case MCK_MISC:
if (Op.isImm() && VEOperand::MorphToMISCReg(Op))
return MCTargetAsmParser::Match_Success;
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp
index 35885a4e3cae..20d609bc6b32 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp
@@ -47,7 +47,7 @@ static MCDisassembler *createVEDisassembler(const Target &T,
return new VEDisassembler(STI, Ctx);
}
-extern "C" void LLVMInitializeVEDisassembler() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeVEDisassembler() {
// Register the disassembler.
TargetRegistry::RegisterMCDisassembler(getTheVETarget(),
createVEDisassembler);
@@ -95,6 +95,25 @@ static const unsigned F128RegDecoderTable[] = {
VE::Q16, VE::Q17, VE::Q18, VE::Q19, VE::Q20, VE::Q21, VE::Q22, VE::Q23,
VE::Q24, VE::Q25, VE::Q26, VE::Q27, VE::Q28, VE::Q29, VE::Q30, VE::Q31};
+static const unsigned V64RegDecoderTable[] = {
+ VE::V0, VE::V1, VE::V2, VE::V3, VE::V4, VE::V5, VE::V6, VE::V7,
+ VE::V8, VE::V9, VE::V10, VE::V11, VE::V12, VE::V13, VE::V14, VE::V15,
+ VE::V16, VE::V17, VE::V18, VE::V19, VE::V20, VE::V21, VE::V22, VE::V23,
+ VE::V24, VE::V25, VE::V26, VE::V27, VE::V28, VE::V29, VE::V30, VE::V31,
+ VE::V32, VE::V33, VE::V34, VE::V35, VE::V36, VE::V37, VE::V38, VE::V39,
+ VE::V40, VE::V41, VE::V42, VE::V43, VE::V44, VE::V45, VE::V46, VE::V47,
+ VE::V48, VE::V49, VE::V50, VE::V51, VE::V52, VE::V53, VE::V54, VE::V55,
+ VE::V56, VE::V57, VE::V58, VE::V59, VE::V60, VE::V61, VE::V62, VE::V63};
+
+static const unsigned VMRegDecoderTable[] = {
+ VE::VM0, VE::VM1, VE::VM2, VE::VM3, VE::VM4, VE::VM5,
+ VE::VM6, VE::VM7, VE::VM8, VE::VM9, VE::VM10, VE::VM11,
+ VE::VM12, VE::VM13, VE::VM14, VE::VM15};
+
+static const unsigned VM512RegDecoderTable[] = {VE::VMP0, VE::VMP1, VE::VMP2,
+ VE::VMP3, VE::VMP4, VE::VMP5,
+ VE::VMP6, VE::VMP7};
+
static const unsigned MiscRegDecoderTable[] = {
VE::USRCC, VE::PSW, VE::SAR, VE::NoRegister,
VE::NoRegister, VE::NoRegister, VE::NoRegister, VE::PMMR,
@@ -145,6 +164,40 @@ static DecodeStatus DecodeF128RegisterClass(MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeV64RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned Reg = VE::NoRegister;
+ if (RegNo == 255)
+ Reg = VE::VIX;
+ else if (RegNo > 63)
+ return MCDisassembler::Fail;
+ else
+ Reg = V64RegDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::createReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeVMRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 15)
+ return MCDisassembler::Fail;
+ unsigned Reg = VMRegDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::createReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeVM512RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo % 2 || RegNo > 15)
+ return MCDisassembler::Fail;
+ unsigned Reg = VM512RegDecoderTable[RegNo / 2];
+ Inst.addOperand(MCOperand::createReg(Reg));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeMISCRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder) {
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/LVLGen.cpp b/contrib/llvm-project/llvm/lib/Target/VE/LVLGen.cpp
new file mode 100644
index 000000000000..c4588926af9e
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/LVLGen.cpp
@@ -0,0 +1,137 @@
+//===-- LVLGen.cpp - LVL instruction generator ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "VE.h"
+#include "VESubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "lvl-gen"
+
+namespace {
+struct LVLGen : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+
+ static char ID;
+ LVLGen() : MachineFunctionPass(ID) {}
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+ bool runOnMachineFunction(MachineFunction &F) override;
+
+ unsigned getVL(const MachineInstr &MI);
+ int getVLIndex(unsigned Opcode);
+};
+char LVLGen::ID = 0;
+
+} // end of anonymous namespace
+
+FunctionPass *llvm::createLVLGenPass() { return new LVLGen; }
+
+int LVLGen::getVLIndex(unsigned Opcode) {
+ const MCInstrDesc &MCID = TII->get(Opcode);
+
+ // If an instruction has VLIndex information, return it.
+ if (HAS_VLINDEX(MCID.TSFlags))
+ return GET_VLINDEX(MCID.TSFlags);
+
+ return -1;
+}
+
+// returns a register holding a vector length. NoRegister is returned when
+// this MI does not have a vector length.
+unsigned LVLGen::getVL(const MachineInstr &MI) {
+ int Index = getVLIndex(MI.getOpcode());
+ if (Index >= 0)
+ return MI.getOperand(Index).getReg();
+
+ return VE::NoRegister;
+}
+
+bool LVLGen::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+#define RegName(no) \
+ (MBB.getParent()->getSubtarget<VESubtarget>().getRegisterInfo()->getName(no))
+
+ bool Changed = false;
+ bool HasRegForVL = false;
+ unsigned RegForVL;
+
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) {
+ MachineBasicBlock::iterator MI = I;
+
+ // Check whether MI uses a vector length operand. If so, we prepare for VL
+ // register. We would like to reuse VL register as much as possible. We
+ // also would like to keep the number of LEA instructions as fewer as
+ // possible. Therefore, we use a regular scalar register to hold immediate
+ // values to load VL register. And try to reuse identical scalar registers
+ // to avoid new LVLr instructions as much as possible.
+ unsigned Reg = getVL(*MI);
+ if (Reg != VE::NoRegister) {
+ LLVM_DEBUG(dbgs() << "Vector instruction found: ");
+ LLVM_DEBUG(MI->dump());
+ LLVM_DEBUG(dbgs() << "Vector length is " << RegName(Reg) << ". ");
+ LLVM_DEBUG(dbgs() << "Current VL is "
+ << (HasRegForVL ? RegName(RegForVL) : "unknown")
+ << ". ");
+
+ if (!HasRegForVL || RegForVL != Reg) {
+ // Use VL, but a different value in a different scalar register.
+ // So, generate new LVL instruction just before the current instruction.
+ LLVM_DEBUG(dbgs() << "Generate a LVL instruction to load "
+ << RegName(Reg) << ".\n");
+ BuildMI(MBB, I, MI->getDebugLoc(), TII->get(VE::LVLr)).addReg(Reg);
+ HasRegForVL = true;
+ RegForVL = Reg;
+ Changed = true;
+ } else {
+ LLVM_DEBUG(dbgs() << "Reuse current VL.\n");
+ }
+ }
+ // Check the update of a given scalar register holding an immediate value
+ // for VL register. Also, a call doesn't preserve VL register.
+ if (HasRegForVL) {
+ if (MI->definesRegister(RegForVL, TRI) ||
+ MI->modifiesRegister(RegForVL, TRI) ||
+ MI->killsRegister(RegForVL, TRI) || MI->isCall()) {
+ // The latest VL is needed to be updated, so disable HasRegForVL.
+ LLVM_DEBUG(dbgs() << RegName(RegForVL) << " is needed to be updated: ");
+ LLVM_DEBUG(MI->dump());
+ HasRegForVL = false;
+ }
+ }
+
+ ++I;
+ }
+ return Changed;
+}
+
+bool LVLGen::runOnMachineFunction(MachineFunction &F) {
+ LLVM_DEBUG(dbgs() << "********** Begin LVLGen **********\n");
+ LLVM_DEBUG(dbgs() << "********** Function: " << F.getName() << '\n');
+ LLVM_DEBUG(F.dump());
+
+ bool Changed = false;
+
+ const VESubtarget &Subtarget = F.getSubtarget<VESubtarget>();
+ TII = Subtarget.getInstrInfo();
+ TRI = Subtarget.getRegisterInfo();
+
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
+ Changed |= runOnMachineBasicBlock(*FI);
+
+ if (Changed) {
+ LLVM_DEBUG(dbgs() << "\n");
+ LLVM_DEBUG(F.dump());
+ }
+ LLVM_DEBUG(dbgs() << "********** End LVLGen **********\n");
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h
index 657cc513b3c5..6995007c6dc6 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h
@@ -29,6 +29,7 @@ public:
const MCSubtargetInfo &STI, raw_ostream &OS) override;
// Autogenerated by tblgen.
+ std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
bool printAliasInstr(const MCInst *, uint64_t Address,
const MCSubtargetInfo &, raw_ostream &);
void printInstruction(const MCInst *, uint64_t, const MCSubtargetInfo &,
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp
index a39cffc8f4a6..4c480c050274 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp
@@ -56,8 +56,8 @@ static MCRegisterInfo *createVEMCRegisterInfo(const Triple &TT) {
static MCSubtargetInfo *createVEMCSubtargetInfo(const Triple &TT, StringRef CPU,
StringRef FS) {
if (CPU.empty())
- CPU = "ve";
- return createVEMCSubtargetInfoImpl(TT, CPU, FS);
+ CPU = "generic";
+ return createVEMCSubtargetInfoImpl(TT, CPU, /*TuneCPU=*/CPU, FS);
}
static MCTargetStreamer *
@@ -80,7 +80,7 @@ static MCInstPrinter *createVEMCInstPrinter(const Triple &T,
return new VEInstPrinter(MAI, MII, MRI);
}
-extern "C" void LLVMInitializeVETargetMC() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeVETargetMC() {
// Register the MC asm info.
RegisterMCAsmInfoFn X(getTheVETarget(), createVEMCAsmInfo);
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp b/contrib/llvm-project/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp
index 65bd142fe0db..a95a299def88 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp
@@ -16,7 +16,7 @@ Target &llvm::getTheVETarget() {
return TheVETarget;
}
-extern "C" void LLVMInitializeVETargetInfo() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeVETargetInfo() {
RegisterTarget<Triple::ve, /*HasJIT=*/false> X(getTheVETarget(), "ve",
"VE", "VE");
}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VE.h b/contrib/llvm-project/llvm/lib/Target/VE/VE.h
index 7ed7797cbb83..8c1fa840f19c 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VE.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VE.h
@@ -29,6 +29,7 @@ class MachineInstr;
FunctionPass *createVEISelDag(VETargetMachine &TM);
FunctionPass *createVEPromoteToI1Pass();
+FunctionPass *createLVLGenPass();
void LowerVEMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
AsmPrinter &AP);
@@ -333,7 +334,7 @@ inline static bool isMImmVal(uint64_t Val) {
return true;
}
// (m)1 patterns
- return (Val & (1UL << 63)) && isShiftedMask_64(Val);
+ return (Val & (UINT64_C(1) << 63)) && isShiftedMask_64(Val);
}
inline static bool isMImm32Val(uint32_t Val) {
@@ -346,7 +347,25 @@ inline static bool isMImm32Val(uint32_t Val) {
return true;
}
// (m)1 patterns
- return (Val & (1 << 31)) && isShiftedMask_32(Val);
+ return (Val & (UINT32_C(1) << 31)) && isShiftedMask_32(Val);
+}
+
+/// val2MImm - Convert an integer immediate value to target MImm immediate.
+inline static uint64_t val2MImm(uint64_t Val) {
+ if (Val == 0)
+ return 0; // (0)1
+ if (Val & (UINT64_C(1) << 63))
+ return countLeadingOnes(Val); // (m)1
+ return countLeadingZeros(Val) | 0x40; // (m)0
+}
+
+/// mimm2Val - Convert a target MImm immediate to an integer immediate value.
+inline static uint64_t mimm2Val(uint64_t Val) {
+ if (Val == 0)
+ return 0; // (0)1
+ if ((Val & 0x40) == 0)
+ return (uint64_t)((INT64_C(1) << 63) >> (Val & 0x3f)); // (m)1
+ return ((uint64_t)INT64_C(-1) >> (Val & 0x3f)); // (m)0
}
inline unsigned M0(unsigned Val) { return Val + 64; }
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VE.td b/contrib/llvm-project/llvm/lib/Target/VE/VE.td
index 617a6ea458b6..9e8adcd42077 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VE.td
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VE.td
@@ -18,6 +18,9 @@ include "llvm/Target/Target.td"
//===----------------------------------------------------------------------===//
// VE Subtarget features.
//
+def FeatureEnableVPU
+ : SubtargetFeature<"vpu", "EnableVPU", "true",
+ "Enable the VPU">;
//===----------------------------------------------------------------------===//
// Register File, Calling Conv, Instruction Descriptions
@@ -43,7 +46,7 @@ def VEAsmParser : AsmParser {
class Proc<string Name, list<SubtargetFeature> Features>
: Processor<Name, NoItineraries, Features>;
-def : Proc<"ve", []>;
+def : Proc<"generic", []>;
//===----------------------------------------------------------------------===//
// Declare the target which we are implementing
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEAsmPrinter.cpp
index 86e3aa3d3fa1..08a75b6b8c55 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEAsmPrinter.cpp
@@ -60,6 +60,9 @@ public:
static const char *getRegisterName(unsigned RegNo) {
return VEInstPrinter::getRegisterName(RegNo);
}
+ void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &OS);
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ const char *ExtraCode, raw_ostream &O) override;
};
} // end of anonymous namespace
@@ -203,7 +206,7 @@ void VEAsmPrinter::lowerGETGOTAndEmitMCInsts(const MachineInstr *MI,
// lea %got, _GLOBAL_OFFSET_TABLE_@PC_LO(-24)
// and %got, %got, (32)0
// sic %plt
- // lea.sl %got, _GLOBAL_OFFSET_TABLE_@PC_HI(%got, %plt)
+ // lea.sl %got, _GLOBAL_OFFSET_TABLE_@PC_HI(%plt, %got)
MCOperand cim24 = MCOperand::createImm(-24);
MCOperand loImm =
createGOTRelExprOp(VEMCExpr::VK_VE_PC_LO32, GOTLabel, OutContext);
@@ -248,10 +251,10 @@ void VEAsmPrinter::lowerGETFunPLTAndEmitMCInsts(const MachineInstr *MI,
MCOperand RegPLT = MCOperand::createReg(VE::SX16); // PLT
- // lea %dst, %plt_lo(func)(-24)
+ // lea %dst, func@plt_lo(-24)
// and %dst, %dst, (32)0
// sic %plt ; FIXME: is it safe to use %plt here?
- // lea.sl %dst, %plt_hi(func)(%dst, %plt)
+ // lea.sl %dst, func@plt_hi(%plt, %dst)
MCOperand cim24 = MCOperand::createImm(-24);
MCOperand loImm =
createGOTRelExprOp(VEMCExpr::VK_VE_PLT_LO32, AddrSym, OutContext);
@@ -295,7 +298,7 @@ void VEAsmPrinter::lowerGETTLSAddrAndEmitMCInsts(const MachineInstr *MI,
// lea %s0, sym@tls_gd_lo(-24)
// and %s0, %s0, (32)0
// sic %lr
- // lea.sl %s0, sym@tls_gd_hi(%s0, %lr)
+ // lea.sl %s0, sym@tls_gd_hi(%lr, %s0)
// lea %s12, __tls_get_addr@plt_lo(8)
// and %s12, %s12, (32)0
// lea.sl %s12, __tls_get_addr@plt_hi(%s12, %lr)
@@ -349,7 +352,42 @@ void VEAsmPrinter::emitInstruction(const MachineInstr *MI) {
} while ((++I != E) && I->isInsideBundle()); // Delay slot check.
}
+void VEAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(OpNum);
+
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ O << "%" << StringRef(getRegisterName(MO.getReg())).lower();
+ break;
+ default:
+ llvm_unreachable("<unknown operand type>");
+ }
+}
+
+// PrintAsmOperand - Print out an operand for an inline asm expression.
+bool VEAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ const char *ExtraCode, raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0)
+ return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
+ case 'r':
+ case 'v':
+ break;
+ }
+ }
+
+ printOperand(MI, OpNo, O);
+
+ return false;
+}
+
// Force static initialization.
-extern "C" void LLVMInitializeVEAsmPrinter() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeVEAsmPrinter() {
RegisterAsmPrinter<VEAsmPrinter> X(getTheVETarget());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VECallingConv.td b/contrib/llvm-project/llvm/lib/Target/VE/VECallingConv.td
index 4f04dae884ab..93899c2cae3d 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VECallingConv.td
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VECallingConv.td
@@ -14,71 +14,133 @@
// Aurora VE
//===----------------------------------------------------------------------===//
def CC_VE_C_Stack: CallingConv<[
- // float --> need special handling like below.
- // 0 4
- // +------+------+
- // | empty| float|
- // +------+------+
- CCIfType<[f32], CCCustom<"allocateFloat">>,
+ // F128 are assigned to the stack in 16-byte aligned units
+ CCIfType<[f128], CCAssignToStackWithShadow<16, 16, [SX7]>>,
// All of the rest are assigned to the stack in 8-byte aligned units.
CCAssignToStack<0, 8>
]>;
-def CC_VE : CallingConv<[
+///// C Calling Convention (VE ABI v2.1) /////
+//
+// Reference: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-ABI_v2.1.pdf
+//
+def CC_VE_C : CallingConv<[
// All arguments get passed in generic registers if there is space.
- // Promote i1/i8/i16 arguments to i32.
- CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
-
- // bool, char, int, enum, long --> generic integer 32 bit registers
- CCIfType<[i32], CCAssignToRegWithShadow<
- [SW0, SW1, SW2, SW3, SW4, SW5, SW6, SW7],
- [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>,
+ // Promote i1/i8/i16/i32 arguments to i64.
+ CCIfType<[i1, i8, i16, i32], CCPromoteToType<i64>>,
- // float --> generic floating point 32 bit registers
- CCIfType<[f32], CCAssignToRegWithShadow<
- [SF0, SF1, SF2, SF3, SF4, SF5, SF6, SF7],
- [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>,
+ // Convert float arguments to i64 with padding.
+ // 63 31 0
+ // +------+------+
+ // | float| 0 |
+ // +------+------+
+ CCIfType<[f32], CCBitConvertToType<i64>>,
- // long long/double --> generic 64 bit registers
+ // bool, char, int, enum, long, long long, float, double
+ // --> generic 64 bit registers
CCIfType<[i64, f64],
CCAssignToReg<[SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>,
+ // long double --> pair of generic 64 bit registers
+ //
+ // NOTE: If Q1 is allocated while SX1 is free, llvm tries to allocate SX1 for
+ // following operands, this masks SX1 to avoid such behavior.
+ CCIfType<[f128],
+ CCAssignToRegWithShadow<[Q0, Q1, Q2, Q3],
+ [SX0, SX1, SX3, SX5]>>,
+
// Alternatively, they are assigned to the stack in 8-byte aligned units.
CCDelegateTo<CC_VE_C_Stack>
]>;
+///// Standard vararg C Calling Convention (VE ABI v2.1) /////
// All arguments get passed in stack for varargs function or non-prototyped
// function.
def CC_VE2 : CallingConv<[
- // float --> need special handling like below.
- // 0 4
+ // Promote i1/i8/i16/i32 arguments to i64.
+ CCIfType<[i1, i8, i16, i32], CCPromoteToType<i64>>,
+
+ // Convert float arguments to i64 with padding.
+ // 63 31 0
// +------+------+
- // | empty| float|
+ // | float| 0 |
// +------+------+
- CCIfType<[f32], CCCustom<"allocateFloat">>,
+ CCIfType<[f32], CCBitConvertToType<i64>>,
+
+ // F128 are assigned to the stack in 16-byte aligned units
+ CCIfType<[f128], CCAssignToStack<16, 16>>,
CCAssignToStack<0, 8>
]>;
-def RetCC_VE : CallingConv<[
- // Promote i1/i8/i16 arguments to i32.
- CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+def RetCC_VE_C : CallingConv<[
+ // Promote i1/i8/i16/i32 return values to i64.
+ CCIfType<[i1, i8, i16, i32], CCPromoteToType<i64>>,
- // bool, char, int, enum, long --> generic integer 32 bit registers
- CCIfType<[i32], CCAssignToRegWithShadow<
- [SW0, SW1, SW2, SW3, SW4, SW5, SW6, SW7],
- [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>,
-
- // float --> generic floating point 32 bit registers
- CCIfType<[f32], CCAssignToRegWithShadow<
- [SF0, SF1, SF2, SF3, SF4, SF5, SF6, SF7],
- [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>,
+ // Convert float return values to i64 with padding.
+ // 63 31 0
+ // +------+------+
+ // | float| 0 |
+ // +------+------+
+ CCIfType<[f32], CCBitConvertToType<i64>>,
- // long long/double --> generic 64 bit registers
+ // bool, char, int, enum, long, long long, float, double
+ // --> generic 64 bit registers
CCIfType<[i64, f64],
CCAssignToReg<[SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>,
+
+ // long double --> pair of generic 64 bit registers
+ CCIfType<[f128],
+ CCAssignToRegWithShadow<[Q0, Q1, Q2, Q3],
+ [SX0, SX1, SX3, SX5]>>,
+]>;
+
+///// Custom fastcc /////
+//
+// This passes vector params and return values in registers. Scalar values are
+// handled conforming to the standard cc.
+def CC_VE_Fast : CallingConv<[
+ // vector --> generic vector registers
+ CCIfType<[v256i32, v256f32, v256i64, v256f64],
+ CCAssignToReg<[V0, V1, V2, V3, V4, V5, V6, V7]>>,
+ // TODO: make this conditional on packed mode
+ CCIfType<[v512i32, v512f32],
+ CCAssignToReg<[V0, V1, V2, V3, V4, V5, V6, V7]>>,
+
+ // vector mask --> generic vector mask registers
+ CCIfType<[v256i1],
+ CCAssignToReg<[VM1, VM2, VM3, VM4, VM5, VM6, VM7]>>,
+
+ // pair of vector mask --> generic vector mask registers
+ CCIfType<[v512i1],
+ CCAssignToRegWithShadow<[VMP1, VMP2, VMP3],
+ [VM1, VM3, VM5]>>,
+
+ // Follow the standard C CC for scalars.
+ CCDelegateTo<CC_VE_C>
+]>;
+
+def RetCC_VE_Fast : CallingConv<[
+ // vector --> generic vector registers
+ CCIfType<[v256i32, v256f32, v256i64, v256f64],
+ CCAssignToReg<[V0, V1, V2, V3, V4, V5, V6, V7]>>,
+ // TODO: make this conditional on packed mode
+ CCIfType<[v512i32, v512f32],
+ CCAssignToReg<[V0, V1, V2, V3, V4, V5, V6, V7]>>,
+
+ // vector mask --> generic vector mask registers
+ CCIfType<[v256i1],
+ CCAssignToReg<[VM1, VM2, VM3, VM4, VM5, VM6, VM7]>>,
+
+ // pair of vector mask --> generic vector mask registers
+ CCIfType<[v512i1],
+ CCAssignToRegWithShadow<[VMP1, VMP2, VMP3],
+ [VM1, VM3, VM5]>>,
+
+ // Follow the standard C CC for scalars.
+ CCDelegateTo<RetCC_VE_C>
]>;
// Callee-saved registers
@@ -86,4 +148,6 @@ def CSR : CalleeSavedRegs<(add (sequence "SX%u", 18, 33))>;
def CSR_NoRegs : CalleeSavedRegs<(add)>;
// PreserveAll (clobbers s62,s63) - used for ve_grow_stack
-def CSR_preserve_all : CalleeSavedRegs<(add (sequence "SX%u", 0, 61))>;
+def CSR_preserve_all : CalleeSavedRegs<(add (sequence "SX%u", 0, 61),
+ (sequence "V%u", 0, 63),
+ (sequence "VM%u", 1, 15))>;
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.cpp
index 8b10e6466123..9e97d0eca833 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.cpp
@@ -8,6 +8,105 @@
//
// This file contains the VE implementation of TargetFrameLowering class.
//
+// On VE, stack frames are structured as follows:
+//
+// The stack grows downward.
+//
+// All of the individual frame areas on the frame below are optional, i.e. it's
+// possible to create a function so that the particular area isn't present
+// in the frame.
+//
+// At function entry, the "frame" looks as follows:
+//
+// | | Higher address
+// |----------------------------------------------|
+// | Parameter area for this function |
+// |----------------------------------------------|
+// | Register save area (RSA) for this function |
+// |----------------------------------------------|
+// | Return address for this function |
+// |----------------------------------------------|
+// | Frame pointer for this function |
+// |----------------------------------------------| <- sp
+// | | Lower address
+//
+// VE doesn't use on demand stack allocation, so user code generated by LLVM
+// needs to call VEOS to allocate stack frame. VE's ABI want to reduce the
+// number of VEOS calls, so ABI requires to allocate not only RSA (in general
+// CSR, callee saved register) area but also call frame at the prologue of
+// caller function.
+//
+// After the prologue has run, the frame has the following general structure.
+// Note that technically the last frame area (VLAs) doesn't get created until
+// in the main function body, after the prologue is run. However, it's depicted
+// here for completeness.
+//
+// | | Higher address
+// |----------------------------------------------|
+// | Parameter area for this function |
+// |----------------------------------------------|
+// | Register save area (RSA) for this function |
+// |----------------------------------------------|
+// | Return address for this function |
+// |----------------------------------------------|
+// | Frame pointer for this function |
+// |----------------------------------------------| <- fp(=old sp)
+// |.empty.space.to.make.part.below.aligned.in....|
+// |.case.it.needs.more.than.the.standard.16-byte.| (size of this area is
+// |.alignment....................................| unknown at compile time)
+// |----------------------------------------------|
+// | Local variables of fixed size including spill|
+// | slots |
+// |----------------------------------------------| <- bp(not defined by ABI,
+// |.variable-sized.local.variables.(VLAs)........| LLVM chooses SX17)
+// |..............................................| (size of this area is
+// |..............................................| unknown at compile time)
+// |----------------------------------------------| <- stack top (returned by
+// | Parameter area for callee | alloca)
+// |----------------------------------------------|
+// | Register save area (RSA) for callee |
+// |----------------------------------------------|
+// | Return address for callee |
+// |----------------------------------------------|
+// | Frame pointer for callee |
+// |----------------------------------------------| <- sp
+// | | Lower address
+//
+// To access the data in a frame, at-compile time, a constant offset must be
+// computable from one of the pointers (fp, bp, sp) to access it. The size
+// of the areas with a dotted background cannot be computed at compile-time
+// if they are present, making it required to have all three of fp, bp and
+// sp to be set up to be able to access all contents in the frame areas,
+// assuming all of the frame areas are non-empty.
+//
+// For most functions, some of the frame areas are empty. For those functions,
+// it may not be necessary to set up fp or bp:
+// * A base pointer is definitely needed when there are both VLAs and local
+// variables with more-than-default alignment requirements.
+// * A frame pointer is definitely needed when there are local variables with
+// more-than-default alignment requirements.
+//
+// In addition, VE ABI defines RSA frame, return address, and frame pointer
+// as follows:
+//
+// |----------------------------------------------| <- sp+176
+// | %s18...%s33 |
+// |----------------------------------------------| <- sp+48
+// | Linkage area register (%s17) |
+// |----------------------------------------------| <- sp+40
+// | Procedure linkage table register (%plt=%s16) |
+// |----------------------------------------------| <- sp+32
+// | Global offset table register (%got=%s15) |
+// |----------------------------------------------| <- sp+24
+// | Thread pointer register (%tp=%s14) |
+// |----------------------------------------------| <- sp+16
+// | Return address |
+// |----------------------------------------------| <- sp+8
+// | Frame pointer |
+// |----------------------------------------------| <- sp+0
+//
+// NOTE: This description is based on VE ABI and description in
+// AArch64FrameLowering.cpp. Thanks a lot.
//===----------------------------------------------------------------------===//
#include "VEFrameLowering.h"
@@ -38,48 +137,47 @@ void VEFrameLowering::emitPrologueInsns(MachineFunction &MF,
MachineBasicBlock::iterator MBBI,
uint64_t NumBytes,
bool RequireFPUpdate) const {
+ const VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
+ DebugLoc DL;
+ const VEInstrInfo &TII = *STI.getInstrInfo();
- DebugLoc dl;
- const VEInstrInfo &TII =
- *static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo());
// Insert following codes here as prologue
//
- // st %fp, 0(,%sp)
- // st %lr, 8(,%sp)
- // st %got, 24(,%sp)
- // st %plt, 32(,%sp)
- // st %s17, 40(,%sp) iff this function is using s17 as BP
- // or %fp, 0, %sp
-
- BuildMI(MBB, MBBI, dl, TII.get(VE::STrii))
- .addReg(VE::SX11)
- .addImm(0)
- .addImm(0)
- .addReg(VE::SX9);
- BuildMI(MBB, MBBI, dl, TII.get(VE::STrii))
- .addReg(VE::SX11)
- .addImm(0)
- .addImm(8)
- .addReg(VE::SX10);
- BuildMI(MBB, MBBI, dl, TII.get(VE::STrii))
- .addReg(VE::SX11)
- .addImm(0)
- .addImm(24)
- .addReg(VE::SX15);
- BuildMI(MBB, MBBI, dl, TII.get(VE::STrii))
- .addReg(VE::SX11)
- .addImm(0)
- .addImm(32)
- .addReg(VE::SX16);
+ // st %fp, 0(, %sp) iff !isLeafProc
+ // st %lr, 8(, %sp) iff !isLeafProc
+ // st %got, 24(, %sp) iff hasGOT
+ // st %plt, 32(, %sp) iff hasGOT
+ // st %s17, 40(, %sp) iff hasBP
+ if (!FuncInfo->isLeafProc()) {
+ BuildMI(MBB, MBBI, DL, TII.get(VE::STrii))
+ .addReg(VE::SX11)
+ .addImm(0)
+ .addImm(0)
+ .addReg(VE::SX9);
+ BuildMI(MBB, MBBI, DL, TII.get(VE::STrii))
+ .addReg(VE::SX11)
+ .addImm(0)
+ .addImm(8)
+ .addReg(VE::SX10);
+ }
+ if (hasGOT(MF)) {
+ BuildMI(MBB, MBBI, DL, TII.get(VE::STrii))
+ .addReg(VE::SX11)
+ .addImm(0)
+ .addImm(24)
+ .addReg(VE::SX15);
+ BuildMI(MBB, MBBI, DL, TII.get(VE::STrii))
+ .addReg(VE::SX11)
+ .addImm(0)
+ .addImm(32)
+ .addReg(VE::SX16);
+ }
if (hasBP(MF))
- BuildMI(MBB, MBBI, dl, TII.get(VE::STrii))
+ BuildMI(MBB, MBBI, DL, TII.get(VE::STrii))
.addReg(VE::SX11)
.addImm(0)
.addImm(40)
.addReg(VE::SX17);
- BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX9)
- .addReg(VE::SX11)
- .addImm(0);
}
void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF,
@@ -87,43 +185,42 @@ void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF,
MachineBasicBlock::iterator MBBI,
uint64_t NumBytes,
bool RequireFPUpdate) const {
+ const VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
+ DebugLoc DL;
+ const VEInstrInfo &TII = *STI.getInstrInfo();
- DebugLoc dl;
- const VEInstrInfo &TII =
- *static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo());
// Insert following codes here as epilogue
//
- // or %sp, 0, %fp
- // ld %s17, 40(,%sp) iff this function is using s17 as BP
- // ld %got, 32(,%sp)
- // ld %plt, 24(,%sp)
- // ld %lr, 8(,%sp)
- // ld %fp, 0(,%sp)
-
- BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX11)
- .addReg(VE::SX9)
- .addImm(0);
+ // ld %s17, 40(, %sp) iff hasBP
+ // ld %plt, 32(, %sp) iff hasGOT
+ // ld %got, 24(, %sp) iff hasGOT
+ // ld %lr, 8(, %sp) iff !isLeafProc
+ // ld %fp, 0(, %sp) iff !isLeafProc
if (hasBP(MF))
- BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX17)
+ BuildMI(MBB, MBBI, DL, TII.get(VE::LDrii), VE::SX17)
.addReg(VE::SX11)
.addImm(0)
.addImm(40);
- BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX16)
- .addReg(VE::SX11)
- .addImm(0)
- .addImm(32);
- BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX15)
- .addReg(VE::SX11)
- .addImm(0)
- .addImm(24);
- BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX10)
- .addReg(VE::SX11)
- .addImm(0)
- .addImm(8);
- BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX9)
- .addReg(VE::SX11)
- .addImm(0)
- .addImm(0);
+ if (hasGOT(MF)) {
+ BuildMI(MBB, MBBI, DL, TII.get(VE::LDrii), VE::SX16)
+ .addReg(VE::SX11)
+ .addImm(0)
+ .addImm(32);
+ BuildMI(MBB, MBBI, DL, TII.get(VE::LDrii), VE::SX15)
+ .addReg(VE::SX11)
+ .addImm(0)
+ .addImm(24);
+ }
+ if (!FuncInfo->isLeafProc()) {
+ BuildMI(MBB, MBBI, DL, TII.get(VE::LDrii), VE::SX10)
+ .addReg(VE::SX11)
+ .addImm(0)
+ .addImm(8);
+ BuildMI(MBB, MBBI, DL, TII.get(VE::LDrii), VE::SX9)
+ .addReg(VE::SX11)
+ .addImm(0)
+ .addImm(0);
+ }
}
void VEFrameLowering::emitSPAdjustment(MachineFunction &MF,
@@ -131,37 +228,44 @@ void VEFrameLowering::emitSPAdjustment(MachineFunction &MF,
MachineBasicBlock::iterator MBBI,
int64_t NumBytes,
MaybeAlign MaybeAlign) const {
- DebugLoc dl;
- const VEInstrInfo &TII =
- *static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ DebugLoc DL;
+ const VEInstrInfo &TII = *STI.getInstrInfo();
- if (NumBytes >= -64 && NumBytes < 63) {
- BuildMI(MBB, MBBI, dl, TII.get(VE::ADDSLri), VE::SX11)
+ if (NumBytes == 0) {
+ // Nothing to do here.
+ } else if (isInt<7>(NumBytes)) {
+ // adds.l %s11, NumBytes@lo, %s11
+ BuildMI(MBB, MBBI, DL, TII.get(VE::ADDSLri), VE::SX11)
.addReg(VE::SX11)
.addImm(NumBytes);
- return;
+ } else if (isInt<32>(NumBytes)) {
+ // lea %s11, NumBytes@lo(, %s11)
+ BuildMI(MBB, MBBI, DL, TII.get(VE::LEArii), VE::SX11)
+ .addReg(VE::SX11)
+ .addImm(0)
+ .addImm(Lo_32(NumBytes));
+ } else {
+ // Emit following codes. This clobbers SX13 which we always know is
+ // available here.
+ // lea %s13, NumBytes@lo
+ // and %s13, %s13, (32)0
+ // lea.sl %sp, NumBytes@hi(%s13, %sp)
+ BuildMI(MBB, MBBI, DL, TII.get(VE::LEAzii), VE::SX13)
+ .addImm(0)
+ .addImm(0)
+ .addImm(Lo_32(NumBytes));
+ BuildMI(MBB, MBBI, DL, TII.get(VE::ANDrm), VE::SX13)
+ .addReg(VE::SX13)
+ .addImm(M0(32));
+ BuildMI(MBB, MBBI, DL, TII.get(VE::LEASLrri), VE::SX11)
+ .addReg(VE::SX11)
+ .addReg(VE::SX13)
+ .addImm(Hi_32(NumBytes));
}
- // Emit following codes. This clobbers SX13 which we always know is
- // available here.
- // lea %s13,%lo(NumBytes)
- // and %s13,%s13,(32)0
- // lea.sl %sp,%hi(NumBytes)(%sp, %s13)
- BuildMI(MBB, MBBI, dl, TII.get(VE::LEAzii), VE::SX13)
- .addImm(0)
- .addImm(0)
- .addImm(Lo_32(NumBytes));
- BuildMI(MBB, MBBI, dl, TII.get(VE::ANDrm), VE::SX13)
- .addReg(VE::SX13)
- .addImm(M0(32));
- BuildMI(MBB, MBBI, dl, TII.get(VE::LEASLrri), VE::SX11)
- .addReg(VE::SX11)
- .addReg(VE::SX13)
- .addImm(Hi_32(NumBytes));
-
if (MaybeAlign) {
// and %sp, %sp, Align-1
- BuildMI(MBB, MBBI, dl, TII.get(VE::ANDrm), VE::SX11)
+ BuildMI(MBB, MBBI, DL, TII.get(VE::ANDrm), VE::SX11)
.addReg(VE::SX11)
.addImm(M1(64 - Log2_64(MaybeAlign.valueOrOne().value())));
}
@@ -169,9 +273,8 @@ void VEFrameLowering::emitSPAdjustment(MachineFunction &MF,
void VEFrameLowering::emitSPExtend(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) const {
- DebugLoc dl;
- const VEInstrInfo &TII =
- *static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ DebugLoc DL;
+ const VEInstrInfo &TII = *STI.getInstrInfo();
// Emit following codes. It is not possible to insert multiple
// BasicBlocks in PEI pass, so we emit two pseudo instructions here.
@@ -198,22 +301,23 @@ void VEFrameLowering::emitSPExtend(MachineFunction &MF, MachineBasicBlock &MBB,
// EXTEND_STACK_GUARD pseudo will be simply eliminated by ExpandPostRA
// pass. This pseudo is required to be at the next of EXTEND_STACK
// pseudo in order to protect iteration loop in ExpandPostRA.
-
- BuildMI(MBB, MBBI, dl, TII.get(VE::EXTEND_STACK));
- BuildMI(MBB, MBBI, dl, TII.get(VE::EXTEND_STACK_GUARD));
+ BuildMI(MBB, MBBI, DL, TII.get(VE::EXTEND_STACK));
+ BuildMI(MBB, MBBI, DL, TII.get(VE::EXTEND_STACK_GUARD));
}
void VEFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
+ const VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
MachineFrameInfo &MFI = MF.getFrameInfo();
const VEInstrInfo &TII = *STI.getInstrInfo();
const VERegisterInfo &RegInfo = *STI.getRegisterInfo();
MachineBasicBlock::iterator MBBI = MBB.begin();
+ bool NeedsStackRealignment = RegInfo.needsStackRealignment(MF);
+
// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
- DebugLoc dl;
- bool NeedsStackRealignment = RegInfo.needsStackRealignment(MF);
+ DebugLoc DL;
// FIXME: unfortunately, returning false from canRealignStack
// actually just causes needsStackRealignment to return false,
@@ -226,12 +330,17 @@ void VEFrameLowering::emitPrologue(MachineFunction &MF,
"stack re-alignment, but LLVM couldn't handle it "
"(probably because it has a dynamic alloca).");
- // Get the number of bytes to allocate from the FrameInfo
+ // Get the number of bytes to allocate from the FrameInfo.
+ // This number of bytes is already aligned to ABI stack alignment.
uint64_t NumBytes = MFI.getStackSize();
- // The VE ABI requires a reserved 176 bytes area at the top
- // of stack as described in VESubtarget.cpp. So, we adjust it here.
- NumBytes = STI.getAdjustedFrameSize(NumBytes);
+ // Adjust stack size if this function is not a leaf function since the
+ // VE ABI requires a reserved area at the top of stack as described in
+ // VEFrameLowering.cpp.
+ if (!FuncInfo->isLeafProc()) {
+ // NOTE: The number is aligned to ABI stack alignment after adjustment.
+ NumBytes = STI.getAdjustedFrameSize(NumBytes);
+ }
// Finally, ensure that the size is sufficiently aligned for the
// data on the stack.
@@ -240,36 +349,34 @@ void VEFrameLowering::emitPrologue(MachineFunction &MF,
// Update stack size with corrected value.
MFI.setStackSize(NumBytes);
- // Emit Prologue instructions to save %lr
+ // Emit Prologue instructions to save multiple registers.
emitPrologueInsns(MF, MBB, MBBI, NumBytes, true);
+ // Emit instructions to save SP in FP as follows if this is not a leaf
+ // function:
+ // or %fp, 0, %sp
+ if (!FuncInfo->isLeafProc())
+ BuildMI(MBB, MBBI, DL, TII.get(VE::ORri), VE::SX9)
+ .addReg(VE::SX11)
+ .addImm(0);
+
// Emit stack adjust instructions
MaybeAlign RuntimeAlign =
NeedsStackRealignment ? MaybeAlign(MFI.getMaxAlign()) : None;
+ assert((RuntimeAlign == None || !FuncInfo->isLeafProc()) &&
+ "SP has to be saved in order to align variable sized stack object!");
emitSPAdjustment(MF, MBB, MBBI, -(int64_t)NumBytes, RuntimeAlign);
if (hasBP(MF)) {
// Copy SP to BP.
- BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX17)
+ BuildMI(MBB, MBBI, DL, TII.get(VE::ORri), VE::SX17)
.addReg(VE::SX11)
.addImm(0);
}
// Emit stack extend instructions
- emitSPExtend(MF, MBB, MBBI);
-
- Register RegFP = RegInfo.getDwarfRegNum(VE::SX9, true);
-
- // Emit ".cfi_def_cfa_register 30".
- unsigned CFIIndex =
- MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, RegFP));
- BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
-
- // Emit ".cfi_window_save".
- CFIIndex = MF.addFrameInst(MCCFIInstruction::createWindowSave(nullptr));
- BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ if (NumBytes != 0)
+ emitSPExtend(MF, MBB, MBBI);
}
MachineBasicBlock::iterator VEFrameLowering::eliminateCallFramePseudoInstr(
@@ -289,21 +396,33 @@ MachineBasicBlock::iterator VEFrameLowering::eliminateCallFramePseudoInstr(
void VEFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
+ const VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
+ DebugLoc DL;
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- DebugLoc dl = MBBI->getDebugLoc();
MachineFrameInfo &MFI = MF.getFrameInfo();
+ const VEInstrInfo &TII = *STI.getInstrInfo();
uint64_t NumBytes = MFI.getStackSize();
- // Emit Epilogue instructions to restore %lr
+ // Emit instructions to retrieve original SP.
+ if (!FuncInfo->isLeafProc()) {
+ // If SP is saved in FP, retrieve it as follows:
+ // or %sp, 0, %fp iff !isLeafProc
+ BuildMI(MBB, MBBI, DL, TII.get(VE::ORri), VE::SX11)
+ .addReg(VE::SX9)
+ .addImm(0);
+ } else {
+ // Emit stack adjust instructions.
+ emitSPAdjustment(MF, MBB, MBBI, NumBytes, None);
+ }
+
+ // Emit Epilogue instructions to restore multiple registers.
emitEpilogueInsns(MF, MBB, MBBI, NumBytes, true);
}
// hasFP - Return true if the specified function should have a dedicated frame
// pointer register. This is true if the function has variable sized allocas
-// or if frame pointer elimination is disabled. For the case of VE, we don't
-// implement FP eliminator yet, but we returns false from this function to
-// not refer fp from generated code.
+// or if frame pointer elimination is disabled.
bool VEFrameLowering::hasFP(const MachineFunction &MF) const {
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
@@ -320,34 +439,41 @@ bool VEFrameLowering::hasBP(const MachineFunction &MF) const {
return MFI.hasVarSizedObjects() && TRI->needsStackRealignment(MF);
}
-int VEFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
- Register &FrameReg) const {
+bool VEFrameLowering::hasGOT(const MachineFunction &MF) const {
+ const VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
+
+ // If a global base register is assigned (!= 0), GOT is used.
+ return FuncInfo->getGlobalBaseReg() != 0;
+}
+
+StackOffset VEFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+ int FI,
+ Register &FrameReg) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const VERegisterInfo *RegInfo = STI.getRegisterInfo();
- const VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
bool isFixed = MFI.isFixedObjectIndex(FI);
int64_t FrameOffset = MF.getFrameInfo().getObjectOffset(FI);
- if (FuncInfo->isLeafProc()) {
- // If there's a leaf proc, all offsets need to be %sp-based,
- // because we haven't caused %fp to actually point to our frame.
+ if (!hasFP(MF)) {
+ // If FP is not used, frame indexies are based on a %sp regiter.
FrameReg = VE::SX11; // %sp
- return FrameOffset + MF.getFrameInfo().getStackSize();
+ return StackOffset::getFixed(FrameOffset +
+ MF.getFrameInfo().getStackSize());
}
if (RegInfo->needsStackRealignment(MF) && !isFixed) {
- // If there is dynamic stack realignment, all local object
- // references need to be via %sp or %s17 (bp), to take account
- // of the re-alignment.
+ // If data on stack require realignemnt, frame indexies are based on a %sp
+ // or %s17 (bp) register. If there is a variable sized object, bp is used.
if (hasBP(MF))
FrameReg = VE::SX17; // %bp
else
FrameReg = VE::SX11; // %sp
- return FrameOffset + MF.getFrameInfo().getStackSize();
+ return StackOffset::getFixed(FrameOffset +
+ MF.getFrameInfo().getStackSize());
}
- // Finally, default to using %fp.
+ // Use %fp by default.
FrameReg = RegInfo->getFrameRegister(MF);
- return FrameOffset;
+ return StackOffset::getFixed(FrameOffset);
}
bool VEFrameLowering::isLeafProc(MachineFunction &MF) const {
@@ -367,8 +493,10 @@ void VEFrameLowering::determineCalleeSaves(MachineFunction &MF,
RegScavenger *RS) const {
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
- if (isLeafProc(MF)) {
- VEMachineFunctionInfo *MFI = MF.getInfo<VEMachineFunctionInfo>();
- MFI->setLeafProc(true);
+ // Functions having BP need to emit prologue and epilogue to allocate local
+ // buffer on the stack even if the function is a leaf function.
+ if (isLeafProc(MF) && !hasBP(MF)) {
+ VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
+ FuncInfo->setLeafProc(true);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.h
index b548d663c504..99eb41189b25 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.h
@@ -15,6 +15,7 @@
#include "VE.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/Support/TypeSize.h"
namespace llvm {
@@ -38,8 +39,10 @@ public:
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const override;
- bool hasBP(const MachineFunction &MF) const;
bool hasFP(const MachineFunction &MF) const override;
+ bool hasBP(const MachineFunction &MF) const;
+ bool hasGOT(const MachineFunction &MF) const;
+
// VE reserves argument space always for call sites in the function
// immediately on entry of the current function.
bool hasReservedCallFrame(const MachineFunction &MF) const override {
@@ -48,8 +51,8 @@ public:
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS = nullptr) const override;
- int getFrameIndexReference(const MachineFunction &MF, int FI,
- Register &FrameReg) const override;
+ StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const override;
const SpillSlot *
getCalleeSavedSpillSlots(unsigned &NumEntries) const override {
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEISelDAGToDAG.cpp
index f3d067d55fdb..761baa79b4ab 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEISelDAGToDAG.cpp
@@ -113,15 +113,6 @@ inline static uint64_t getFpImmVal(const ConstantFPSDNode *N) {
return Val;
}
-/// convMImmVal - Convert a mimm integer immediate value to target immediate.
-inline static uint64_t convMImmVal(uint64_t Val) {
- if (Val == 0)
- return 0; // (0)1
- if (Val & (1UL << 63))
- return countLeadingOnes(Val); // (m)1
- return countLeadingZeros(Val) | 0x40; // (m)0
-}
-
//===--------------------------------------------------------------------===//
/// VEDAGToDAGISel - VE specific code to select VE machine
/// instructions for SelectionDAG operations.
@@ -148,6 +139,7 @@ public:
bool selectADDRzri(SDValue N, SDValue &Base, SDValue &Index, SDValue &Offset);
bool selectADDRzii(SDValue N, SDValue &Base, SDValue &Index, SDValue &Offset);
bool selectADDRri(SDValue N, SDValue &Base, SDValue &Offset);
+ bool selectADDRzi(SDValue N, SDValue &Base, SDValue &Offset);
StringRef getPassName() const override {
return "VE DAG->DAG Pattern Instruction Selection";
@@ -183,6 +175,14 @@ bool VEDAGToDAGISel::selectADDRrri(SDValue Addr, SDValue &Base, SDValue &Index,
return false;
}
if (matchADDRrr(Addr, LHS, RHS)) {
+ // If the input is a pair of a frame-index and a register, move a
+ // frame-index to LHS. This generates MI with following operands.
+ // %dest, #FI, %reg, offset
+ // In the eliminateFrameIndex, above MI is converted to the following.
+ // %dest, %fp, %reg, fi_offset + offset
+ if (dyn_cast<FrameIndexSDNode>(RHS))
+ std::swap(LHS, RHS);
+
if (matchADDRri(RHS, Index, Offset)) {
Base = LHS;
return true;
@@ -228,7 +228,7 @@ bool VEDAGToDAGISel::selectADDRzii(SDValue Addr, SDValue &Base, SDValue &Index,
Addr.getOpcode() == ISD::TargetGlobalTLSAddress)
return false; // direct calls.
- if (ConstantSDNode *CN = cast<ConstantSDNode>(Addr)) {
+ if (auto *CN = dyn_cast<ConstantSDNode>(Addr)) {
if (isInt<32>(CN->getSExtValue())) {
Base = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
Index = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
@@ -250,6 +250,26 @@ bool VEDAGToDAGISel::selectADDRri(SDValue Addr, SDValue &Base,
return true;
}
+bool VEDAGToDAGISel::selectADDRzi(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (dyn_cast<FrameIndexSDNode>(Addr))
+ return false;
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress ||
+ Addr.getOpcode() == ISD::TargetGlobalTLSAddress)
+ return false; // direct calls.
+
+ if (auto *CN = dyn_cast<ConstantSDNode>(Addr)) {
+ if (isInt<32>(CN->getSExtValue())) {
+ Base = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+ Offset =
+ CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(Addr), MVT::i32);
+ return true;
+ }
+ }
+ return false;
+}
+
bool VEDAGToDAGISel::matchADDRrr(SDValue Addr, SDValue &Base, SDValue &Index) {
if (dyn_cast<FrameIndexSDNode>(Addr))
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp
index ab720545dd83..d377f8e27cfd 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -13,6 +13,7 @@
#include "VEISelLowering.h"
#include "MCTargetDesc/VEMCExpr.h"
+#include "VEInstrBuilder.h"
#include "VEMachineFunctionInfo.h"
#include "VERegisterInfo.h"
#include "VETargetMachine.h"
@@ -21,6 +22,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
@@ -38,39 +40,280 @@ using namespace llvm;
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
-static bool allocateFloat(unsigned ValNo, MVT ValVT, MVT LocVT,
- CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State) {
- switch (LocVT.SimpleTy) {
- case MVT::f32: {
- // Allocate stack like below
- // 0 4
- // +------+------+
- // | empty| float|
- // +------+------+
- // Use align=8 for dummy area to align the beginning of these 2 area.
- State.AllocateStack(4, Align(8)); // for empty area
- // Use align=4 for value to place it at just after the dummy area.
- unsigned Offset = State.AllocateStack(4, Align(4)); // for float value area
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return true;
- }
+#include "VEGenCallingConv.inc"
+
+CCAssignFn *getReturnCC(CallingConv::ID CallConv) {
+ switch (CallConv) {
default:
- return false;
+ return RetCC_VE_C;
+ case CallingConv::Fast:
+ return RetCC_VE_Fast;
}
}
-#include "VEGenCallingConv.inc"
+CCAssignFn *getParamCC(CallingConv::ID CallConv, bool IsVarArg) {
+ if (IsVarArg)
+ return CC_VE2;
+ switch (CallConv) {
+ default:
+ return CC_VE_C;
+ case CallingConv::Fast:
+ return CC_VE_Fast;
+ }
+}
bool VETargetLowering::CanLowerReturn(
CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
- CCAssignFn *RetCC = RetCC_VE;
+ CCAssignFn *RetCC = getReturnCC(CallConv);
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
return CCInfo.CheckReturn(Outs, RetCC);
}
+static const MVT AllVectorVTs[] = {MVT::v256i32, MVT::v512i32, MVT::v256i64,
+ MVT::v256f32, MVT::v512f32, MVT::v256f64};
+
+static const MVT AllPackedVTs[] = {MVT::v512i32, MVT::v512f32};
+
+void VETargetLowering::initRegisterClasses() {
+ // Set up the register classes.
+ addRegisterClass(MVT::i32, &VE::I32RegClass);
+ addRegisterClass(MVT::i64, &VE::I64RegClass);
+ addRegisterClass(MVT::f32, &VE::F32RegClass);
+ addRegisterClass(MVT::f64, &VE::I64RegClass);
+ addRegisterClass(MVT::f128, &VE::F128RegClass);
+
+ if (Subtarget->enableVPU()) {
+ for (MVT VecVT : AllVectorVTs)
+ addRegisterClass(VecVT, &VE::V64RegClass);
+ addRegisterClass(MVT::v256i1, &VE::VMRegClass);
+ addRegisterClass(MVT::v512i1, &VE::VM512RegClass);
+ }
+}
+
+void VETargetLowering::initSPUActions() {
+ const auto &TM = getTargetMachine();
+ /// Load & Store {
+
+ // VE doesn't have i1 sign extending load.
+ for (MVT VT : MVT::integer_valuetypes()) {
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
+ setTruncStoreAction(VT, MVT::i1, Expand);
+ }
+
+ // VE doesn't have floating point extload/truncstore, so expand them.
+ for (MVT FPVT : MVT::fp_valuetypes()) {
+ for (MVT OtherFPVT : MVT::fp_valuetypes()) {
+ setLoadExtAction(ISD::EXTLOAD, FPVT, OtherFPVT, Expand);
+ setTruncStoreAction(FPVT, OtherFPVT, Expand);
+ }
+ }
+
+ // VE doesn't have fp128 load/store, so expand them in custom lower.
+ setOperationAction(ISD::LOAD, MVT::f128, Custom);
+ setOperationAction(ISD::STORE, MVT::f128, Custom);
+
+ /// } Load & Store
+
+ // Custom legalize address nodes into LO/HI parts.
+ MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
+ setOperationAction(ISD::BlockAddress, PtrVT, Custom);
+ setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
+ setOperationAction(ISD::ConstantPool, PtrVT, Custom);
+ setOperationAction(ISD::JumpTable, PtrVT, Custom);
+
+ /// VAARG handling {
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ // VAARG needs to be lowered to access with 8 bytes alignment.
+ setOperationAction(ISD::VAARG, MVT::Other, Custom);
+ // Use the default implementation.
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ /// } VAARG handling
+
+ /// Stack {
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
+
+ // Use the default implementation.
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ /// } Stack
+
+ /// Branch {
+
+ // VE doesn't have BRCOND
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+
+ // BR_JT is not implemented yet.
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+
+ /// } Branch
+
+ /// Int Ops {
+ for (MVT IntVT : {MVT::i32, MVT::i64}) {
+ // VE has no REM or DIVREM operations.
+ setOperationAction(ISD::UREM, IntVT, Expand);
+ setOperationAction(ISD::SREM, IntVT, Expand);
+ setOperationAction(ISD::SDIVREM, IntVT, Expand);
+ setOperationAction(ISD::UDIVREM, IntVT, Expand);
+
+ // VE has no SHL_PARTS/SRA_PARTS/SRL_PARTS operations.
+ setOperationAction(ISD::SHL_PARTS, IntVT, Expand);
+ setOperationAction(ISD::SRA_PARTS, IntVT, Expand);
+ setOperationAction(ISD::SRL_PARTS, IntVT, Expand);
+
+ // VE has no MULHU/S or U/SMUL_LOHI operations.
+ // TODO: Use MPD instruction to implement SMUL_LOHI for i32 type.
+ setOperationAction(ISD::MULHU, IntVT, Expand);
+ setOperationAction(ISD::MULHS, IntVT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, IntVT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, IntVT, Expand);
+
+ // VE has no CTTZ, ROTL, ROTR operations.
+ setOperationAction(ISD::CTTZ, IntVT, Expand);
+ setOperationAction(ISD::ROTL, IntVT, Expand);
+ setOperationAction(ISD::ROTR, IntVT, Expand);
+
+ // VE has 64 bits instruction which works as i64 BSWAP operation. This
+ // instruction works fine as i32 BSWAP operation with an additional
+ // parameter. Use isel patterns to lower BSWAP.
+ setOperationAction(ISD::BSWAP, IntVT, Legal);
+
+ // VE has only 64 bits instructions which work as i64 BITREVERSE/CTLZ/CTPOP
+ // operations. Use isel patterns for i64, promote for i32.
+ LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal;
+ setOperationAction(ISD::BITREVERSE, IntVT, Act);
+ setOperationAction(ISD::CTLZ, IntVT, Act);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, IntVT, Act);
+ setOperationAction(ISD::CTPOP, IntVT, Act);
+
+ // VE has only 64 bits instructions which work as i64 AND/OR/XOR operations.
+ // Use isel patterns for i64, promote for i32.
+ setOperationAction(ISD::AND, IntVT, Act);
+ setOperationAction(ISD::OR, IntVT, Act);
+ setOperationAction(ISD::XOR, IntVT, Act);
+ }
+ /// } Int Ops
+
+ /// Conversion {
+ // VE doesn't have instructions for fp<->uint, so expand them by llvm
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+
+ // fp16 not supported
+ for (MVT FPVT : MVT::fp_valuetypes()) {
+ setOperationAction(ISD::FP16_TO_FP, FPVT, Expand);
+ setOperationAction(ISD::FP_TO_FP16, FPVT, Expand);
+ }
+ /// } Conversion
+
+ /// Floating-point Ops {
+ /// Note: Floating-point operations are fneg, fadd, fsub, fmul, fdiv, frem,
+ /// and fcmp.
+
+ // VE doesn't have following floating point operations.
+ for (MVT VT : MVT::fp_valuetypes()) {
+ setOperationAction(ISD::FNEG, VT, Expand);
+ setOperationAction(ISD::FREM, VT, Expand);
+ }
+
+ // VE doesn't have fdiv of f128.
+ setOperationAction(ISD::FDIV, MVT::f128, Expand);
+
+ for (MVT FPVT : {MVT::f32, MVT::f64}) {
+ // f32 and f64 uses ConstantFP. f128 uses ConstantPool.
+ setOperationAction(ISD::ConstantFP, FPVT, Legal);
+ }
+ /// } Floating-point Ops
+
+ /// Floating-point math functions {
+
+ // VE doesn't have following floating point math functions.
+ for (MVT VT : MVT::fp_valuetypes()) {
+ setOperationAction(ISD::FABS, VT, Expand);
+ setOperationAction(ISD::FCOPYSIGN, VT, Expand);
+ setOperationAction(ISD::FCOS, VT, Expand);
+ setOperationAction(ISD::FSIN, VT, Expand);
+ setOperationAction(ISD::FSQRT, VT, Expand);
+ }
+
+ /// } Floating-point math functions
+
+ /// Atomic instructions {
+
+ setMaxAtomicSizeInBitsSupported(64);
+ setMinCmpXchgSizeInBits(32);
+ setSupportsUnalignedAtomics(false);
+
+ // Use custom inserter for ATOMIC_FENCE.
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+
+ // Other atomic instructions.
+ for (MVT VT : MVT::integer_valuetypes()) {
+ // Support i8/i16 atomic swap.
+ setOperationAction(ISD::ATOMIC_SWAP, VT, Custom);
+
+ // FIXME: Support "atmam" instructions.
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Expand);
+
+ // VE doesn't have follwing instructions.
+ setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_CLR, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Expand);
+ }
+
+ /// } Atomic instructions
+
+ /// SJLJ instructions {
+ setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
+ setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+ setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
+ if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
+ setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
+ /// } SJLJ instructions
+
+ // Intrinsic instructions
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+}
+
+void VETargetLowering::initVPUActions() {
+ for (MVT LegalVecVT : AllVectorVTs) {
+ setOperationAction(ISD::BUILD_VECTOR, LegalVecVT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, LegalVecVT, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalVecVT, Legal);
+ // Translate all vector instructions with legal element types to VVP_*
+ // nodes.
+ // TODO We will custom-widen into VVP_* nodes in the future. While we are
+ // buildling the infrastructure for this, we only do this for legal vector
+ // VTs.
+#define HANDLE_VP_TO_VVP(VP_OPC, VVP_NAME) \
+ setOperationAction(ISD::VP_OPC, LegalVecVT, Custom);
+#define ADD_VVP_OP(VVP_NAME, ISD_NAME) \
+ setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom);
+#include "VVPNodes.def"
+ }
+
+ for (MVT LegalPackedVT : AllPackedVTs) {
+ setOperationAction(ISD::INSERT_VECTOR_ELT, LegalPackedVT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalPackedVT, Custom);
+ }
+}
+
SDValue
VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
bool IsVarArg,
@@ -85,7 +328,7 @@ VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
*DAG.getContext());
// Analyze return values.
- CCInfo.AnalyzeReturn(Outs, RetCC_VE);
+ CCInfo.AnalyzeReturn(Outs, getReturnCC(CallConv));
SDValue Flag;
SmallVector<SDValue, 4> RetOps(1, Chain);
@@ -94,6 +337,7 @@ VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
+ assert(!VA.needsCustom() && "Unexpected custom lowering");
SDValue OutVal = OutVals[i];
// Integer return values must be sign or zero extended by the callee.
@@ -109,12 +353,26 @@ VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
case CCValAssign::AExt:
OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal);
break;
+ case CCValAssign::BCvt: {
+ // Convert a float return value to i64 with padding.
+ // 63 31 0
+ // +------+------+
+ // | float| 0 |
+ // +------+------+
+ assert(VA.getLocVT() == MVT::i64);
+ assert(VA.getValVT() == MVT::f32);
+ SDValue Undef = SDValue(
+ DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0);
+ SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
+ OutVal = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
+ MVT::i64, Undef, OutVal, Sub_f32),
+ 0);
+ break;
+ }
default:
llvm_unreachable("Unknown loc info!");
}
- assert(!VA.needsCustom() && "Unexpected custom lowering");
-
Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag);
// Guarantee that all emitted copies are stuck together with flags.
@@ -138,7 +396,7 @@ SDValue VETargetLowering::LowerFormalArguments(
MachineFunction &MF = DAG.getMachineFunction();
// Get the base offset of the incoming arguments stack space.
- unsigned ArgsBaseOffset = 176;
+ unsigned ArgsBaseOffset = Subtarget->getRsaSize();
// Get the size of the preserved arguments area
unsigned ArgsPreserved = 64;
@@ -150,10 +408,11 @@ SDValue VETargetLowering::LowerFormalArguments(
CCInfo.AllocateStack(ArgsPreserved, Align(8));
// We already allocated the preserved area, so the stack offset computed
// by CC_VE would be correct now.
- CCInfo.AnalyzeFormalArguments(Ins, CC_VE);
+ CCInfo.AnalyzeFormalArguments(Ins, getParamCC(CallConv, false));
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
+ assert(!VA.needsCustom() && "Unexpected custom lowering");
if (VA.isRegLoc()) {
// This argument is passed in a register.
// All integer register arguments are promoted by the caller to i64.
@@ -163,11 +422,6 @@ SDValue VETargetLowering::LowerFormalArguments(
MF.addLiveIn(VA.getLocReg(), getRegClassFor(VA.getLocVT()));
SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT());
- // Get the high bits for i32 struct elements.
- if (VA.getValVT() == MVT::i32 && VA.needsCustom())
- Arg = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Arg,
- DAG.getConstant(32, DL, MVT::i32));
-
// The caller promoted the argument, so insert an Assert?ext SDNode so we
// won't promote the value again in this function.
switch (VA.getLocInfo()) {
@@ -179,6 +433,20 @@ SDValue VETargetLowering::LowerFormalArguments(
Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg,
DAG.getValueType(VA.getValVT()));
break;
+ case CCValAssign::BCvt: {
+ // Extract a float argument from i64 with padding.
+ // 63 31 0
+ // +------+------+
+ // | float| 0 |
+ // +------+------+
+ assert(VA.getLocVT() == MVT::i64);
+ assert(VA.getValVT() == MVT::f32);
+ SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
+ Arg = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
+ MVT::f32, Arg, Sub_f32),
+ 0);
+ break;
+ }
default:
break;
}
@@ -194,9 +462,23 @@ SDValue VETargetLowering::LowerFormalArguments(
// The registers are exhausted. This argument was passed on the stack.
assert(VA.isMemLoc());
// The CC_VE_Full/Half functions compute stack offsets relative to the
- // beginning of the arguments area at %fp+176.
+ // beginning of the arguments area at %fp + the size of reserved area.
unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset;
unsigned ValSize = VA.getValVT().getSizeInBits() / 8;
+
+ // Adjust offset for a float argument by adding 4 since the argument is
+ // stored in 8 bytes buffer with offset like below. LLVM generates
+ // 4 bytes load instruction, so need to adjust offset here. This
+ // adjustment is required in only LowerFormalArguments. In LowerCall,
+ // a float argument is converted to i64 first, and stored as 8 bytes
+ // data, which is required by ABI, so no need for adjustment.
+ // 0 4
+ // +------+------+
+ // | empty| float|
+ // +------+------+
+ if (VA.getValVT() == MVT::f32)
+ Offset += 4;
+
int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true);
InVals.push_back(
DAG.getLoad(VA.getValVT(), DL, Chain,
@@ -215,7 +497,7 @@ SDValue VETargetLowering::LowerFormalArguments(
// TODO: need to calculate offset correctly once we support f128.
unsigned ArgOffset = ArgLocs.size() * 8;
VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
- // Skip the 176 bytes of register save area.
+ // Skip the reserved area at the top of stack.
FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset);
return Chain;
@@ -258,7 +540,7 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CLI.IsTailCall = false;
// Get the base offset of the outgoing arguments stack space.
- unsigned ArgsBaseOffset = 176;
+ unsigned ArgsBaseOffset = Subtarget->getRsaSize();
// Get the size of the preserved arguments area
unsigned ArgsPreserved = 8 * 8u;
@@ -270,7 +552,7 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CCInfo.AllocateStack(ArgsPreserved, Align(8));
// We already allocated the preserved area, so the stack offset computed
// by CC_VE would be correct now.
- CCInfo.AnalyzeCallOperands(CLI.Outs, CC_VE);
+ CCInfo.AnalyzeCallOperands(CLI.Outs, getParamCC(CLI.CallConv, false));
// VE requires to use both register and stack for varargs or no-prototyped
// functions.
@@ -281,7 +563,7 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
ArgLocs2, *DAG.getContext());
if (UseBoth)
- CCInfo2.AnalyzeCallOperands(CLI.Outs, CC_VE2);
+ CCInfo2.AnalyzeCallOperands(CLI.Outs, getParamCC(CLI.CallConv, true));
// Get the size of the outgoing arguments stack space requirement.
unsigned ArgsSize = CCInfo.getNextStackOffset();
@@ -371,6 +653,22 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
case CCValAssign::AExt:
Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
break;
+ case CCValAssign::BCvt: {
+ // Convert a float argument to i64 with padding.
+ // 63 31 0
+ // +------+------+
+ // | float| 0 |
+ // +------+------+
+ assert(VA.getLocVT() == MVT::i64);
+ assert(VA.getValVT() == MVT::f32);
+ SDValue Undef = SDValue(
+ DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0);
+ SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
+ Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
+ MVT::i64, Undef, Arg, Sub_f32),
+ 0);
+ break;
+ }
}
if (VA.isRegLoc()) {
@@ -384,8 +682,7 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Create a store off the stack pointer for this argument.
SDValue StackPtr = DAG.getRegister(VE::SX11, PtrVT);
- // The argument area starts at %fp+176 in the callee frame,
- // %sp+176 in ours.
+ // The argument area starts at %fp/%sp + the size of reserved area.
SDValue PtrOff =
DAG.getIntPtrConstant(VA.getLocMemOffset() + ArgsBaseOffset, DL);
PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
@@ -450,11 +747,12 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CB)
CLI.Ins[0].Flags.setInReg();
- RVInfo.AnalyzeCallResult(CLI.Ins, RetCC_VE);
+ RVInfo.AnalyzeCallResult(CLI.Ins, getReturnCC(CLI.CallConv));
// Copy all of the result registers out of their specified physreg.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign &VA = RVLocs[i];
+ assert(!VA.needsCustom() && "Unexpected custom lowering");
unsigned Reg = VA.getLocReg();
// When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
@@ -472,11 +770,6 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
InGlue = Chain.getValue(2);
}
- // Get the high bits for i32 struct elements.
- if (VA.getValVT() == MVT::i32 && VA.needsCustom())
- RV = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), RV,
- DAG.getConstant(32, DL, MVT::i32));
-
// The callee promoted the return value, so insert an Assert?ext SDNode so
// we won't promote the value again in this function.
switch (VA.getLocInfo()) {
@@ -488,6 +781,20 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV,
DAG.getValueType(VA.getValVT()));
break;
+ case CCValAssign::BCvt: {
+ // Extract a float return value from i64 with padding.
+ // 63 31 0
+ // +------+------+
+ // | float| 0 |
+ // +------+------+
+ assert(VA.getLocVT() == MVT::i64);
+ assert(VA.getValVT() == MVT::f32);
+ SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
+ RV = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
+ MVT::f32, RV, Sub_f32),
+ 0);
+ break;
+ }
default:
break;
}
@@ -502,6 +809,15 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
return Chain;
}
+bool VETargetLowering::isOffsetFoldingLegal(
+ const GlobalAddressSDNode *GA) const {
+ // VE uses 64 bit addressing, so we need multiple instructions to generate
+ // an address. Folding address with offset increases the number of
+ // instructions, so that we disable it here. Offsets will be folded in
+ // the DAG combine later if it worth to do so.
+ return false;
+}
+
/// isFPImmLegal - Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
@@ -531,30 +847,6 @@ bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
return true;
}
-bool VETargetLowering::hasAndNot(SDValue Y) const {
- EVT VT = Y.getValueType();
-
- // VE doesn't have vector and not instruction.
- if (VT.isVector())
- return false;
-
- // VE allows different immediate values for X and Y where ~X & Y.
- // Only simm7 works for X, and only mimm works for Y on VE. However, this
- // function is used to check whether an immediate value is OK for and-not
- // instruction as both X and Y. Generating additional instruction to
- // retrieve an immediate value is no good since the purpose of this
- // function is to convert a series of 3 instructions to another series of
- // 3 instructions with better parallelism. Therefore, we return false
- // for all immediate values now.
- // FIXME: Change hasAndNot function to have two operands to make it work
- // correctly with Aurora VE.
- if (isa<ConstantSDNode>(Y))
- return false;
-
- // It's ok for generic registers.
- return true;
-}
-
VETargetLowering::VETargetLowering(const TargetMachine &TM,
const VESubtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
@@ -566,91 +858,15 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM,
setBooleanContents(ZeroOrOneBooleanContent);
setBooleanVectorContents(ZeroOrOneBooleanContent);
- // Set up the register classes.
- addRegisterClass(MVT::i32, &VE::I32RegClass);
- addRegisterClass(MVT::i64, &VE::I64RegClass);
- addRegisterClass(MVT::f32, &VE::F32RegClass);
- addRegisterClass(MVT::f64, &VE::I64RegClass);
-
- /// Load & Store {
- for (MVT FPVT : MVT::fp_valuetypes()) {
- for (MVT OtherFPVT : MVT::fp_valuetypes()) {
- // Turn FP extload into load/fpextend
- setLoadExtAction(ISD::EXTLOAD, FPVT, OtherFPVT, Expand);
-
- // Turn FP truncstore into trunc + store.
- setTruncStoreAction(FPVT, OtherFPVT, Expand);
- }
- }
-
- // VE doesn't have i1 sign extending load
- for (MVT VT : MVT::integer_valuetypes()) {
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
- setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
- setTruncStoreAction(VT, MVT::i1, Expand);
- }
- /// } Load & Store
-
- // Custom legalize address nodes into LO/HI parts.
- MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
- setOperationAction(ISD::BlockAddress, PtrVT, Custom);
- setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
- setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
-
- /// VAARG handling {
- setOperationAction(ISD::VASTART, MVT::Other, Custom);
- // VAARG needs to be lowered to access with 8 bytes alignment.
- setOperationAction(ISD::VAARG, MVT::Other, Custom);
- // Use the default implementation.
- setOperationAction(ISD::VACOPY, MVT::Other, Expand);
- setOperationAction(ISD::VAEND, MVT::Other, Expand);
- /// } VAARG handling
-
- /// Stack {
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
- /// } Stack
-
- /// Int Ops {
- for (MVT IntVT : {MVT::i32, MVT::i64}) {
- // VE has no REM or DIVREM operations.
- setOperationAction(ISD::UREM, IntVT, Expand);
- setOperationAction(ISD::SREM, IntVT, Expand);
- setOperationAction(ISD::SDIVREM, IntVT, Expand);
- setOperationAction(ISD::UDIVREM, IntVT, Expand);
-
- setOperationAction(ISD::CTTZ, IntVT, Expand);
- setOperationAction(ISD::ROTL, IntVT, Expand);
- setOperationAction(ISD::ROTR, IntVT, Expand);
-
- // Use isel patterns for i32 and i64
- setOperationAction(ISD::BSWAP, IntVT, Legal);
- setOperationAction(ISD::CTLZ, IntVT, Legal);
- setOperationAction(ISD::CTPOP, IntVT, Legal);
-
- // Use isel patterns for i64, Promote i32
- LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal;
- setOperationAction(ISD::BITREVERSE, IntVT, Act);
- }
- /// } Int Ops
-
- /// Conversion {
- // VE doesn't have instructions for fp<->uint, so expand them by llvm
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64
- setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64
- setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
- setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
-
- // fp16 not supported
- for (MVT FPVT : MVT::fp_valuetypes()) {
- setOperationAction(ISD::FP16_TO_FP, FPVT, Expand);
- setOperationAction(ISD::FP_TO_FP16, FPVT, Expand);
- }
- /// } Conversion
+ initRegisterClasses();
+ initSPUActions();
+ initVPUActions();
setStackPointerRegisterToSaveRestore(VE::SX11);
+ // We have target-specific dag combine patterns for the following nodes:
+ setTargetDAGCombine(ISD::TRUNCATE);
+
// Set function alignment to 16 bytes
setMinFunctionAlignment(Align(16));
@@ -667,14 +883,24 @@ const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((VEISD::NodeType)Opcode) {
case VEISD::FIRST_NUMBER:
break;
- TARGET_NODE_CASE(Lo)
- TARGET_NODE_CASE(Hi)
+ TARGET_NODE_CASE(CALL)
+ TARGET_NODE_CASE(EH_SJLJ_LONGJMP)
+ TARGET_NODE_CASE(EH_SJLJ_SETJMP)
+ TARGET_NODE_CASE(EH_SJLJ_SETUP_DISPATCH)
TARGET_NODE_CASE(GETFUNPLT)
TARGET_NODE_CASE(GETSTACKTOP)
TARGET_NODE_CASE(GETTLSADDR)
- TARGET_NODE_CASE(CALL)
- TARGET_NODE_CASE(RET_FLAG)
TARGET_NODE_CASE(GLOBAL_BASE_REG)
+ TARGET_NODE_CASE(Hi)
+ TARGET_NODE_CASE(Lo)
+ TARGET_NODE_CASE(MEMBARRIER)
+ TARGET_NODE_CASE(RET_FLAG)
+ TARGET_NODE_CASE(TS1AM)
+ TARGET_NODE_CASE(VEC_BROADCAST)
+
+ // Register the VVP_* SDNodes.
+#define ADD_VVP_OP(VVP_NAME, ...) TARGET_NODE_CASE(VVP_NAME)
+#include "VVPNodes.def"
}
#undef TARGET_NODE_CASE
return nullptr;
@@ -696,10 +922,17 @@ SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF,
return DAG.getTargetBlockAddress(BA->getBlockAddress(), Op.getValueType(),
0, TF);
+ if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op))
+ return DAG.getTargetConstantPool(CP->getConstVal(), CP->getValueType(0),
+ CP->getAlign(), CP->getOffset(), TF);
+
if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op))
return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
TF);
+ if (const JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op))
+ return DAG.getTargetJumpTable(JT->getIndex(), JT->getValueType(0), TF);
+
llvm_unreachable("Unhandled address SDNode");
}
@@ -722,32 +955,24 @@ SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
// Handle PIC mode first. VE needs a got load for every variable!
if (isPositionIndependent()) {
- // GLOBAL_BASE_REG codegen'ed with call. Inform MFI that this
- // function has calls.
- MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
- MFI.setHasCalls(true);
auto GlobalN = dyn_cast<GlobalAddressSDNode>(Op);
- if (isa<ConstantPoolSDNode>(Op) ||
+ if (isa<ConstantPoolSDNode>(Op) || isa<JumpTableSDNode>(Op) ||
(GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) {
// Create following instructions for local linkage PIC code.
- // lea %s35, %gotoff_lo(.LCPI0_0)
- // and %s35, %s35, (32)0
- // lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35)
- // adds.l %s35, %s15, %s35 ; %s15 is GOT
- // FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15)
+ // lea %reg, label@gotoff_lo
+ // and %reg, %reg, (32)0
+ // lea.sl %reg, label@gotoff_hi(%reg, %got)
SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
}
// Create following instructions for not local linkage PIC code.
- // lea %s35, %got_lo(.LCPI0_0)
- // and %s35, %s35, (32)0
- // lea.sl %s35, %got_hi(.LCPI0_0)(%s35)
- // adds.l %s35, %s15, %s35 ; %s15 is GOT
- // ld %s35, (,%s35)
- // FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15)
+ // lea %reg, label@got_lo
+ // and %reg, %reg, (32)0
+ // lea.sl %reg, label@got_hi(%reg)
+ // ld %reg, (%reg, %got)
SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOT_HI32,
VEMCExpr::VK_VE_GOT_LO32, DAG);
SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
@@ -770,20 +995,222 @@ SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
/// Custom Lower {
-SDValue VETargetLowering::LowerGlobalAddress(SDValue Op,
+// The mappings for emitLeading/TrailingFence for VE is designed by following
+// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
+Instruction *VETargetLowering::emitLeadingFence(IRBuilder<> &Builder,
+ Instruction *Inst,
+ AtomicOrdering Ord) const {
+ switch (Ord) {
+ case AtomicOrdering::NotAtomic:
+ case AtomicOrdering::Unordered:
+ llvm_unreachable("Invalid fence: unordered/non-atomic");
+ case AtomicOrdering::Monotonic:
+ case AtomicOrdering::Acquire:
+ return nullptr; // Nothing to do
+ case AtomicOrdering::Release:
+ case AtomicOrdering::AcquireRelease:
+ return Builder.CreateFence(AtomicOrdering::Release);
+ case AtomicOrdering::SequentiallyConsistent:
+ if (!Inst->hasAtomicStore())
+ return nullptr; // Nothing to do
+ return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
+ }
+ llvm_unreachable("Unknown fence ordering in emitLeadingFence");
+}
+
+Instruction *VETargetLowering::emitTrailingFence(IRBuilder<> &Builder,
+ Instruction *Inst,
+ AtomicOrdering Ord) const {
+ switch (Ord) {
+ case AtomicOrdering::NotAtomic:
+ case AtomicOrdering::Unordered:
+ llvm_unreachable("Invalid fence: unordered/not-atomic");
+ case AtomicOrdering::Monotonic:
+ case AtomicOrdering::Release:
+ return nullptr; // Nothing to do
+ case AtomicOrdering::Acquire:
+ case AtomicOrdering::AcquireRelease:
+ return Builder.CreateFence(AtomicOrdering::Acquire);
+ case AtomicOrdering::SequentiallyConsistent:
+ return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
+ }
+ llvm_unreachable("Unknown fence ordering in emitTrailingFence");
+}
+
+SDValue VETargetLowering::lowerATOMIC_FENCE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
+ cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
+ SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
+ cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
+
+ // VE uses Release consistency, so need a fence instruction if it is a
+ // cross-thread fence.
+ if (FenceSSID == SyncScope::System) {
+ switch (FenceOrdering) {
+ case AtomicOrdering::NotAtomic:
+ case AtomicOrdering::Unordered:
+ case AtomicOrdering::Monotonic:
+ // No need to generate fencem instruction here.
+ break;
+ case AtomicOrdering::Acquire:
+ // Generate "fencem 2" as acquire fence.
+ return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
+ DAG.getTargetConstant(2, DL, MVT::i32),
+ Op.getOperand(0)),
+ 0);
+ case AtomicOrdering::Release:
+ // Generate "fencem 1" as release fence.
+ return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
+ DAG.getTargetConstant(1, DL, MVT::i32),
+ Op.getOperand(0)),
+ 0);
+ case AtomicOrdering::AcquireRelease:
+ case AtomicOrdering::SequentiallyConsistent:
+ // Generate "fencem 3" as acq_rel and seq_cst fence.
+ // FIXME: "fencem 3" doesn't wait for for PCIe deveices accesses,
+ // so seq_cst may require more instruction for them.
+ return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
+ DAG.getTargetConstant(3, DL, MVT::i32),
+ Op.getOperand(0)),
+ 0);
+ }
+ }
+
+ // MEMBARRIER is a compiler barrier; it codegens to a no-op.
+ return DAG.getNode(VEISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
+}
+
+TargetLowering::AtomicExpansionKind
+VETargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+ // We have TS1AM implementation for i8/i16/i32/i64, so use it.
+ if (AI->getOperation() == AtomicRMWInst::Xchg) {
+ return AtomicExpansionKind::None;
+ }
+ // FIXME: Support "ATMAM" instruction for LOAD_ADD/SUB/AND/OR.
+
+ // Otherwise, expand it using compare and exchange instruction to not call
+ // __sync_fetch_and_* functions.
+ return AtomicExpansionKind::CmpXChg;
+}
+
+static SDValue prepareTS1AM(SDValue Op, SelectionDAG &DAG, SDValue &Flag,
+ SDValue &Bits) {
+ SDLoc DL(Op);
+ AtomicSDNode *N = cast<AtomicSDNode>(Op);
+ SDValue Ptr = N->getOperand(1);
+ SDValue Val = N->getOperand(2);
+ EVT PtrVT = Ptr.getValueType();
+ bool Byte = N->getMemoryVT() == MVT::i8;
+ // Remainder = AND Ptr, 3
+ // Flag = 1 << Remainder ; If Byte is true (1 byte swap flag)
+ // Flag = 3 << Remainder ; If Byte is false (2 bytes swap flag)
+ // Bits = Remainder << 3
+ // NewVal = Val << Bits
+ SDValue Const3 = DAG.getConstant(3, DL, PtrVT);
+ SDValue Remainder = DAG.getNode(ISD::AND, DL, PtrVT, {Ptr, Const3});
+ SDValue Mask = Byte ? DAG.getConstant(1, DL, MVT::i32)
+ : DAG.getConstant(3, DL, MVT::i32);
+ Flag = DAG.getNode(ISD::SHL, DL, MVT::i32, {Mask, Remainder});
+ Bits = DAG.getNode(ISD::SHL, DL, PtrVT, {Remainder, Const3});
+ return DAG.getNode(ISD::SHL, DL, Val.getValueType(), {Val, Bits});
+}
+
+static SDValue finalizeTS1AM(SDValue Op, SelectionDAG &DAG, SDValue Data,
+ SDValue Bits) {
+ SDLoc DL(Op);
+ EVT VT = Data.getValueType();
+ bool Byte = cast<AtomicSDNode>(Op)->getMemoryVT() == MVT::i8;
+ // NewData = Data >> Bits
+ // Result = NewData & 0xff ; If Byte is true (1 byte)
+ // Result = NewData & 0xffff ; If Byte is false (2 bytes)
+
+ SDValue NewData = DAG.getNode(ISD::SRL, DL, VT, Data, Bits);
+ return DAG.getNode(ISD::AND, DL, VT,
+ {NewData, DAG.getConstant(Byte ? 0xff : 0xffff, DL, VT)});
+}
+
+SDValue VETargetLowering::lowerATOMIC_SWAP(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ AtomicSDNode *N = cast<AtomicSDNode>(Op);
+
+ if (N->getMemoryVT() == MVT::i8) {
+ // For i8, use "ts1am"
+ // Input:
+ // ATOMIC_SWAP Ptr, Val, Order
+ //
+ // Output:
+ // Remainder = AND Ptr, 3
+ // Flag = 1 << Remainder ; 1 byte swap flag for TS1AM inst.
+ // Bits = Remainder << 3
+ // NewVal = Val << Bits
+ //
+ // Aligned = AND Ptr, -4
+ // Data = TS1AM Aligned, Flag, NewVal
+ //
+ // NewData = Data >> Bits
+ // Result = NewData & 0xff ; 1 byte result
+ SDValue Flag;
+ SDValue Bits;
+ SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
+
+ SDValue Ptr = N->getOperand(1);
+ SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(),
+ {Ptr, DAG.getConstant(-4, DL, MVT::i64)});
+ SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(),
+ DAG.getVTList(Op.getNode()->getValueType(0),
+ Op.getNode()->getValueType(1)),
+ {N->getChain(), Aligned, Flag, NewVal},
+ N->getMemOperand());
+
+ SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits);
+ SDValue Chain = TS1AM.getValue(1);
+ return DAG.getMergeValues({Result, Chain}, DL);
+ }
+ if (N->getMemoryVT() == MVT::i16) {
+ // For i16, use "ts1am"
+ SDValue Flag;
+ SDValue Bits;
+ SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
+
+ SDValue Ptr = N->getOperand(1);
+ SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(),
+ {Ptr, DAG.getConstant(-4, DL, MVT::i64)});
+ SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(),
+ DAG.getVTList(Op.getNode()->getValueType(0),
+ Op.getNode()->getValueType(1)),
+ {N->getChain(), Aligned, Flag, NewVal},
+ N->getMemOperand());
+
+ SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits);
+ SDValue Chain = TS1AM.getValue(1);
+ return DAG.getMergeValues({Result, Chain}, DL);
+ }
+ // Otherwise, let llvm legalize it.
+ return Op;
+}
+
+SDValue VETargetLowering::lowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
return makeAddress(Op, DAG);
}
-SDValue VETargetLowering::LowerBlockAddress(SDValue Op,
+SDValue VETargetLowering::lowerBlockAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ return makeAddress(Op, DAG);
+}
+
+SDValue VETargetLowering::lowerConstantPool(SDValue Op,
SelectionDAG &DAG) const {
return makeAddress(Op, DAG);
}
SDValue
-VETargetLowering::LowerToTLSGeneralDynamicModel(SDValue Op,
+VETargetLowering::lowerToTLSGeneralDynamicModel(SDValue Op,
SelectionDAG &DAG) const {
- SDLoc dl(Op);
+ SDLoc DL(Op);
// Generate the following code:
// t1: ch,glue = callseq_start t0, 0, 0
@@ -799,13 +1226,13 @@ VETargetLowering::LowerToTLSGeneralDynamicModel(SDValue Op,
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask(
DAG.getMachineFunction(), CallingConv::C);
- Chain = DAG.getCALLSEQ_START(Chain, 64, 0, dl);
+ Chain = DAG.getCALLSEQ_START(Chain, 64, 0, DL);
SDValue Args[] = {Chain, Label, DAG.getRegisterMask(Mask), Chain.getValue(1)};
- Chain = DAG.getNode(VEISD::GETTLSADDR, dl, NodeTys, Args);
- Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(64, dl, true),
- DAG.getIntPtrConstant(0, dl, true),
- Chain.getValue(1), dl);
- Chain = DAG.getCopyFromReg(Chain, dl, VE::SX0, PtrVT, Chain.getValue(1));
+ Chain = DAG.getNode(VEISD::GETTLSADDR, DL, NodeTys, Args);
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(64, DL, true),
+ DAG.getIntPtrConstant(0, DL, true),
+ Chain.getValue(1), DL);
+ Chain = DAG.getCopyFromReg(Chain, DL, VE::SX0, PtrVT, Chain.getValue(1));
// GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls.
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
@@ -820,17 +1247,133 @@ VETargetLowering::LowerToTLSGeneralDynamicModel(SDValue Op,
return Chain;
}
-SDValue VETargetLowering::LowerGlobalTLSAddress(SDValue Op,
+SDValue VETargetLowering::lowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
// The current implementation of nld (2.26) doesn't allow local exec model
// code described in VE-tls_v1.1.pdf (*1) as its input. Instead, we always
// generate the general dynamic model code sequence.
//
// *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf
- return LowerToTLSGeneralDynamicModel(Op, DAG);
+ return lowerToTLSGeneralDynamicModel(Op, DAG);
+}
+
+SDValue VETargetLowering::lowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+ return makeAddress(Op, DAG);
+}
+
+// Lower a f128 load into two f64 loads.
+static SDValue lowerLoadF128(SDValue Op, SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Op.getNode());
+ assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
+ unsigned Alignment = LdNode->getAlign().value();
+ if (Alignment > 8)
+ Alignment = 8;
+
+ SDValue Lo64 =
+ DAG.getLoad(MVT::f64, DL, LdNode->getChain(), LdNode->getBasePtr(),
+ LdNode->getPointerInfo(), Alignment,
+ LdNode->isVolatile() ? MachineMemOperand::MOVolatile
+ : MachineMemOperand::MONone);
+ EVT AddrVT = LdNode->getBasePtr().getValueType();
+ SDValue HiPtr = DAG.getNode(ISD::ADD, DL, AddrVT, LdNode->getBasePtr(),
+ DAG.getConstant(8, DL, AddrVT));
+ SDValue Hi64 =
+ DAG.getLoad(MVT::f64, DL, LdNode->getChain(), HiPtr,
+ LdNode->getPointerInfo(), Alignment,
+ LdNode->isVolatile() ? MachineMemOperand::MOVolatile
+ : MachineMemOperand::MONone);
+
+ SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);
+ SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);
+
+ // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
+ SDNode *InFP128 =
+ DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f128);
+ InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128,
+ SDValue(InFP128, 0), Hi64, SubRegEven);
+ InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128,
+ SDValue(InFP128, 0), Lo64, SubRegOdd);
+ SDValue OutChains[2] = {SDValue(Lo64.getNode(), 1),
+ SDValue(Hi64.getNode(), 1)};
+ SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
+ SDValue Ops[2] = {SDValue(InFP128, 0), OutChain};
+ return DAG.getMergeValues(Ops, DL);
}
-SDValue VETargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+SDValue VETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+ LoadSDNode *LdNode = cast<LoadSDNode>(Op.getNode());
+
+ SDValue BasePtr = LdNode->getBasePtr();
+ if (isa<FrameIndexSDNode>(BasePtr.getNode())) {
+ // Do not expand store instruction with frame index here because of
+ // dependency problems. We expand it later in eliminateFrameIndex().
+ return Op;
+ }
+
+ EVT MemVT = LdNode->getMemoryVT();
+ if (MemVT == MVT::f128)
+ return lowerLoadF128(Op, DAG);
+
+ return Op;
+}
+
+// Lower a f128 store into two f64 stores.
+static SDValue lowerStoreF128(SDValue Op, SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ StoreSDNode *StNode = dyn_cast<StoreSDNode>(Op.getNode());
+ assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
+
+ SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);
+ SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);
+
+ SDNode *Hi64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64,
+ StNode->getValue(), SubRegEven);
+ SDNode *Lo64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64,
+ StNode->getValue(), SubRegOdd);
+
+ unsigned Alignment = StNode->getAlign().value();
+ if (Alignment > 8)
+ Alignment = 8;
+
+ // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
+ SDValue OutChains[2];
+ OutChains[0] =
+ DAG.getStore(StNode->getChain(), DL, SDValue(Lo64, 0),
+ StNode->getBasePtr(), MachinePointerInfo(), Alignment,
+ StNode->isVolatile() ? MachineMemOperand::MOVolatile
+ : MachineMemOperand::MONone);
+ EVT AddrVT = StNode->getBasePtr().getValueType();
+ SDValue HiPtr = DAG.getNode(ISD::ADD, DL, AddrVT, StNode->getBasePtr(),
+ DAG.getConstant(8, DL, AddrVT));
+ OutChains[1] =
+ DAG.getStore(StNode->getChain(), DL, SDValue(Hi64, 0), HiPtr,
+ MachinePointerInfo(), Alignment,
+ StNode->isVolatile() ? MachineMemOperand::MOVolatile
+ : MachineMemOperand::MONone);
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
+}
+
+SDValue VETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+ StoreSDNode *StNode = cast<StoreSDNode>(Op.getNode());
+ assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
+
+ SDValue BasePtr = StNode->getBasePtr();
+ if (isa<FrameIndexSDNode>(BasePtr.getNode())) {
+ // Do not expand store instruction with frame index here because of
+ // dependency problems. We expand it later in eliminateFrameIndex().
+ return Op;
+ }
+
+ EVT MemVT = StNode->getMemoryVT();
+ if (MemVT == MVT::f128)
+ return lowerStoreF128(Op, DAG);
+
+ // Otherwise, ask llvm to expand it.
+ return SDValue();
+}
+
+SDValue VETargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
auto PtrVT = getPointerTy(DAG.getDataLayout());
@@ -849,7 +1392,7 @@ SDValue VETargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MachinePointerInfo(SV));
}
-SDValue VETargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
+SDValue VETargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const {
SDNode *Node = Op.getNode();
EVT VT = Node->getValueType(0);
SDValue InChain = Node->getOperand(0);
@@ -862,7 +1405,19 @@ SDValue VETargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = VAList.getValue(1);
SDValue NextPtr;
- if (VT == MVT::f32) {
+ if (VT == MVT::f128) {
+ // VE f128 values must be stored with 16 bytes alignment. We doesn't
+ // know the actual alignment of VAList, so we take alignment of it
+ // dyanmically.
+ int Align = 16;
+ VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
+ DAG.getConstant(Align - 1, DL, PtrVT));
+ VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
+ DAG.getConstant(-Align, DL, PtrVT));
+ // Increment the pointer, VAList, by 16 to the next vaarg.
+ NextPtr =
+ DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(16, DL));
+ } else if (VT == MVT::f32) {
// float --> need special handling like below.
// 0 4
// +------+------+
@@ -955,22 +1510,1325 @@ SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
return DAG.getMergeValues(Ops, DL);
}
+SDValue VETargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ return DAG.getNode(VEISD::EH_SJLJ_LONGJMP, DL, MVT::Other, Op.getOperand(0),
+ Op.getOperand(1));
+}
+
+SDValue VETargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ return DAG.getNode(VEISD::EH_SJLJ_SETJMP, DL,
+ DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
+ Op.getOperand(1));
+}
+
+SDValue VETargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ return DAG.getNode(VEISD::EH_SJLJ_SETUP_DISPATCH, DL, MVT::Other,
+ Op.getOperand(0));
+}
+
+static SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG,
+ const VETargetLowering &TLI,
+ const VESubtarget *Subtarget) {
+ SDLoc DL(Op);
+ MachineFunction &MF = DAG.getMachineFunction();
+ EVT PtrVT = TLI.getPointerTy(MF.getDataLayout());
+
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ MFI.setFrameAddressIsTaken(true);
+
+ unsigned Depth = Op.getConstantOperandVal(0);
+ const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+ unsigned FrameReg = RegInfo->getFrameRegister(MF);
+ SDValue FrameAddr =
+ DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, PtrVT);
+ while (Depth--)
+ FrameAddr = DAG.getLoad(Op.getValueType(), DL, DAG.getEntryNode(),
+ FrameAddr, MachinePointerInfo());
+ return FrameAddr;
+}
+
+static SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG,
+ const VETargetLowering &TLI,
+ const VESubtarget *Subtarget) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ MFI.setReturnAddressIsTaken(true);
+
+ if (TLI.verifyReturnAddressArgumentIsConstant(Op, DAG))
+ return SDValue();
+
+ SDValue FrameAddr = lowerFRAMEADDR(Op, DAG, TLI, Subtarget);
+
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ SDValue Offset = DAG.getConstant(8, DL, VT);
+ return DAG.getLoad(VT, DL, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
+ MachinePointerInfo());
+}
+
+SDValue VETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ default: // Don't custom lower most intrinsics.
+ return SDValue();
+ case Intrinsic::eh_sjlj_lsda: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MVT VT = Op.getSimpleValueType();
+ const VETargetMachine *TM =
+ static_cast<const VETargetMachine *>(&DAG.getTarget());
+
+ // Create GCC_except_tableXX string. The real symbol for that will be
+ // generated in EHStreamer::emitExceptionTable() later. So, we just
+ // borrow it's name here.
+ TM->getStrList()->push_back(std::string(
+ (Twine("GCC_except_table") + Twine(MF.getFunctionNumber())).str()));
+ SDValue Addr =
+ DAG.getTargetExternalSymbol(TM->getStrList()->back().c_str(), VT, 0);
+ if (isPositionIndependent()) {
+ Addr = makeHiLoPair(Addr, VEMCExpr::VK_VE_GOTOFF_HI32,
+ VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
+ SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, VT);
+ return DAG.getNode(ISD::ADD, DL, VT, GlobalBase, Addr);
+ }
+ return makeHiLoPair(Addr, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
+ }
+ }
+}
+
+static bool getUniqueInsertion(SDNode *N, unsigned &UniqueIdx) {
+ if (!isa<BuildVectorSDNode>(N))
+ return false;
+ const auto *BVN = cast<BuildVectorSDNode>(N);
+
+ // Find first non-undef insertion.
+ unsigned Idx;
+ for (Idx = 0; Idx < BVN->getNumOperands(); ++Idx) {
+ auto ElemV = BVN->getOperand(Idx);
+ if (!ElemV->isUndef())
+ break;
+ }
+ // Catch the (hypothetical) all-undef case.
+ if (Idx == BVN->getNumOperands())
+ return false;
+ // Remember insertion.
+ UniqueIdx = Idx++;
+ // Verify that all other insertions are undef.
+ for (; Idx < BVN->getNumOperands(); ++Idx) {
+ auto ElemV = BVN->getOperand(Idx);
+ if (!ElemV->isUndef())
+ return false;
+ }
+ return true;
+}
+
+static SDValue getSplatValue(SDNode *N) {
+ if (auto *BuildVec = dyn_cast<BuildVectorSDNode>(N)) {
+ return BuildVec->getSplatValue();
+ }
+ return SDValue();
+}
+
+SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ unsigned NumEls = Op.getValueType().getVectorNumElements();
+ MVT ElemVT = Op.getSimpleValueType().getVectorElementType();
+
+ // If there is just one element, expand to INSERT_VECTOR_ELT.
+ unsigned UniqueIdx;
+ if (getUniqueInsertion(Op.getNode(), UniqueIdx)) {
+ SDValue AccuV = DAG.getUNDEF(Op.getValueType());
+ auto ElemV = Op->getOperand(UniqueIdx);
+ SDValue IdxV = DAG.getConstant(UniqueIdx, DL, MVT::i64);
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(), AccuV,
+ ElemV, IdxV);
+ }
+
+ // Else emit a broadcast.
+ if (SDValue ScalarV = getSplatValue(Op.getNode())) {
+ // lower to VEC_BROADCAST
+ MVT LegalResVT = MVT::getVectorVT(ElemVT, 256);
+
+ auto AVL = DAG.getConstant(NumEls, DL, MVT::i32);
+ return DAG.getNode(VEISD::VEC_BROADCAST, DL, LegalResVT, Op.getOperand(0),
+ AVL);
+ }
+
+ // Expand
+ return SDValue();
+}
+
SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
- switch (Op.getOpcode()) {
+ unsigned Opcode = Op.getOpcode();
+ if (ISD::isVPOpcode(Opcode))
+ return lowerToVVP(Op, DAG);
+
+ switch (Opcode) {
default:
llvm_unreachable("Should not custom lower this!");
+ case ISD::ATOMIC_FENCE:
+ return lowerATOMIC_FENCE(Op, DAG);
+ case ISD::ATOMIC_SWAP:
+ return lowerATOMIC_SWAP(Op, DAG);
case ISD::BlockAddress:
- return LowerBlockAddress(Op, DAG);
+ return lowerBlockAddress(Op, DAG);
+ case ISD::ConstantPool:
+ return lowerConstantPool(Op, DAG);
case ISD::DYNAMIC_STACKALLOC:
return lowerDYNAMIC_STACKALLOC(Op, DAG);
+ case ISD::EH_SJLJ_LONGJMP:
+ return lowerEH_SJLJ_LONGJMP(Op, DAG);
+ case ISD::EH_SJLJ_SETJMP:
+ return lowerEH_SJLJ_SETJMP(Op, DAG);
+ case ISD::EH_SJLJ_SETUP_DISPATCH:
+ return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
+ case ISD::FRAMEADDR:
+ return lowerFRAMEADDR(Op, DAG, *this, Subtarget);
case ISD::GlobalAddress:
- return LowerGlobalAddress(Op, DAG);
+ return lowerGlobalAddress(Op, DAG);
case ISD::GlobalTLSAddress:
- return LowerGlobalTLSAddress(Op, DAG);
+ return lowerGlobalTLSAddress(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN:
+ return lowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::JumpTable:
+ return lowerJumpTable(Op, DAG);
+ case ISD::LOAD:
+ return lowerLOAD(Op, DAG);
+ case ISD::RETURNADDR:
+ return lowerRETURNADDR(Op, DAG, *this, Subtarget);
+ case ISD::BUILD_VECTOR:
+ return lowerBUILD_VECTOR(Op, DAG);
+ case ISD::STORE:
+ return lowerSTORE(Op, DAG);
case ISD::VASTART:
- return LowerVASTART(Op, DAG);
+ return lowerVASTART(Op, DAG);
case ISD::VAARG:
- return LowerVAARG(Op, DAG);
+ return lowerVAARG(Op, DAG);
+
+ case ISD::INSERT_VECTOR_ELT:
+ return lowerINSERT_VECTOR_ELT(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT:
+ return lowerEXTRACT_VECTOR_ELT(Op, DAG);
+
+#define ADD_BINARY_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
+#include "VVPNodes.def"
+ return lowerToVVP(Op, DAG);
}
}
/// } Custom Lower
+
+void VETargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const {
+ switch (N->getOpcode()) {
+ case ISD::ATOMIC_SWAP:
+ // Let LLVM expand atomic swap instruction through LowerOperation.
+ return;
+ default:
+ LLVM_DEBUG(N->dumpr(&DAG));
+ llvm_unreachable("Do not know how to custom type legalize this operation!");
+ }
+}
+
+/// JumpTable for VE.
+///
+/// VE cannot generate relocatable symbol in jump table. VE cannot
+/// generate expressions using symbols in both text segment and data
+/// segment like below.
+/// .4byte .LBB0_2-.LJTI0_0
+/// So, we generate offset from the top of function like below as
+/// a custom label.
+/// .4byte .LBB0_2-<function name>
+
+unsigned VETargetLowering::getJumpTableEncoding() const {
+ // Use custom label for PIC.
+ if (isPositionIndependent())
+ return MachineJumpTableInfo::EK_Custom32;
+
+ // Otherwise, use the normal jump table encoding heuristics.
+ return TargetLowering::getJumpTableEncoding();
+}
+
+const MCExpr *VETargetLowering::LowerCustomJumpTableEntry(
+ const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
+ unsigned Uid, MCContext &Ctx) const {
+ assert(isPositionIndependent());
+
+ // Generate custom label for PIC like below.
+ // .4bytes .LBB0_2-<function name>
+ const auto *Value = MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
+ MCSymbol *Sym = Ctx.getOrCreateSymbol(MBB->getParent()->getName().data());
+ const auto *Base = MCSymbolRefExpr::create(Sym, Ctx);
+ return MCBinaryExpr::createSub(Value, Base, Ctx);
+}
+
+SDValue VETargetLowering::getPICJumpTableRelocBase(SDValue Table,
+ SelectionDAG &DAG) const {
+ assert(isPositionIndependent());
+ SDLoc DL(Table);
+ Function *Function = &DAG.getMachineFunction().getFunction();
+ assert(Function != nullptr);
+ auto PtrTy = getPointerTy(DAG.getDataLayout(), Function->getAddressSpace());
+
+ // In the jump table, we have following values in PIC mode.
+ // .4bytes .LBB0_2-<function name>
+ // We need to add this value and the address of this function to generate
+ // .LBB0_2 label correctly under PIC mode. So, we want to generate following
+ // instructions:
+ // lea %reg, fun@gotoff_lo
+ // and %reg, %reg, (32)0
+ // lea.sl %reg, fun@gotoff_hi(%reg, %got)
+ // In order to do so, we need to genarate correctly marked DAG node using
+ // makeHiLoPair.
+ SDValue Op = DAG.getGlobalAddress(Function, DL, PtrTy);
+ SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
+ VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
+ SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrTy);
+ return DAG.getNode(ISD::ADD, DL, PtrTy, GlobalBase, HiLo);
+}
+
+Register VETargetLowering::prepareMBB(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock *TargetBB,
+ const DebugLoc &DL) const {
+ MachineFunction *MF = MBB.getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const VEInstrInfo *TII = Subtarget->getInstrInfo();
+
+ const TargetRegisterClass *RC = &VE::I64RegClass;
+ Register Tmp1 = MRI.createVirtualRegister(RC);
+ Register Tmp2 = MRI.createVirtualRegister(RC);
+ Register Result = MRI.createVirtualRegister(RC);
+
+ if (isPositionIndependent()) {
+ // Create following instructions for local linkage PIC code.
+ // lea %Tmp1, TargetBB@gotoff_lo
+ // and %Tmp2, %Tmp1, (32)0
+ // lea.sl %Result, TargetBB@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
+ BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
+ .addImm(0)
+ .addImm(0)
+ .addMBB(TargetBB, VEMCExpr::VK_VE_GOTOFF_LO32);
+ BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
+ .addReg(Tmp1, getKillRegState(true))
+ .addImm(M0(32));
+ BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)
+ .addReg(VE::SX15)
+ .addReg(Tmp2, getKillRegState(true))
+ .addMBB(TargetBB, VEMCExpr::VK_VE_GOTOFF_HI32);
+ } else {
+ // Create following instructions for non-PIC code.
+ // lea %Tmp1, TargetBB@lo
+ // and %Tmp2, %Tmp1, (32)0
+ // lea.sl %Result, TargetBB@hi(%Tmp2)
+ BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
+ .addImm(0)
+ .addImm(0)
+ .addMBB(TargetBB, VEMCExpr::VK_VE_LO32);
+ BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
+ .addReg(Tmp1, getKillRegState(true))
+ .addImm(M0(32));
+ BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)
+ .addReg(Tmp2, getKillRegState(true))
+ .addImm(0)
+ .addMBB(TargetBB, VEMCExpr::VK_VE_HI32);
+ }
+ return Result;
+}
+
+Register VETargetLowering::prepareSymbol(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ StringRef Symbol, const DebugLoc &DL,
+ bool IsLocal = false,
+ bool IsCall = false) const {
+ MachineFunction *MF = MBB.getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const VEInstrInfo *TII = Subtarget->getInstrInfo();
+
+ const TargetRegisterClass *RC = &VE::I64RegClass;
+ Register Result = MRI.createVirtualRegister(RC);
+
+ if (isPositionIndependent()) {
+ if (IsCall && !IsLocal) {
+ // Create following instructions for non-local linkage PIC code function
+ // calls. These instructions uses IC and magic number -24, so we expand
+ // them in VEAsmPrinter.cpp from GETFUNPLT pseudo instruction.
+ // lea %Reg, Symbol@plt_lo(-24)
+ // and %Reg, %Reg, (32)0
+ // sic %s16
+ // lea.sl %Result, Symbol@plt_hi(%Reg, %s16) ; %s16 is PLT
+ BuildMI(MBB, I, DL, TII->get(VE::GETFUNPLT), Result)
+ .addExternalSymbol("abort");
+ } else if (IsLocal) {
+ Register Tmp1 = MRI.createVirtualRegister(RC);
+ Register Tmp2 = MRI.createVirtualRegister(RC);
+ // Create following instructions for local linkage PIC code.
+ // lea %Tmp1, Symbol@gotoff_lo
+ // and %Tmp2, %Tmp1, (32)0
+ // lea.sl %Result, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
+ BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
+ .addImm(0)
+ .addImm(0)
+ .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOTOFF_LO32);
+ BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
+ .addReg(Tmp1, getKillRegState(true))
+ .addImm(M0(32));
+ BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)
+ .addReg(VE::SX15)
+ .addReg(Tmp2, getKillRegState(true))
+ .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOTOFF_HI32);
+ } else {
+ Register Tmp1 = MRI.createVirtualRegister(RC);
+ Register Tmp2 = MRI.createVirtualRegister(RC);
+ // Create following instructions for not local linkage PIC code.
+ // lea %Tmp1, Symbol@got_lo
+ // and %Tmp2, %Tmp1, (32)0
+ // lea.sl %Tmp3, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
+ // ld %Result, 0(%Tmp3)
+ Register Tmp3 = MRI.createVirtualRegister(RC);
+ BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
+ .addImm(0)
+ .addImm(0)
+ .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOT_LO32);
+ BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
+ .addReg(Tmp1, getKillRegState(true))
+ .addImm(M0(32));
+ BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Tmp3)
+ .addReg(VE::SX15)
+ .addReg(Tmp2, getKillRegState(true))
+ .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOT_HI32);
+ BuildMI(MBB, I, DL, TII->get(VE::LDrii), Result)
+ .addReg(Tmp3, getKillRegState(true))
+ .addImm(0)
+ .addImm(0);
+ }
+ } else {
+ Register Tmp1 = MRI.createVirtualRegister(RC);
+ Register Tmp2 = MRI.createVirtualRegister(RC);
+ // Create following instructions for non-PIC code.
+ // lea %Tmp1, Symbol@lo
+ // and %Tmp2, %Tmp1, (32)0
+ // lea.sl %Result, Symbol@hi(%Tmp2)
+ BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
+ .addImm(0)
+ .addImm(0)
+ .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_LO32);
+ BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
+ .addReg(Tmp1, getKillRegState(true))
+ .addImm(M0(32));
+ BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)
+ .addReg(Tmp2, getKillRegState(true))
+ .addImm(0)
+ .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_HI32);
+ }
+ return Result;
+}
+
+void VETargetLowering::setupEntryBlockForSjLj(MachineInstr &MI,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *DispatchBB,
+ int FI, int Offset) const {
+ DebugLoc DL = MI.getDebugLoc();
+ const VEInstrInfo *TII = Subtarget->getInstrInfo();
+
+ Register LabelReg =
+ prepareMBB(*MBB, MachineBasicBlock::iterator(MI), DispatchBB, DL);
+
+ // Store an address of DispatchBB to a given jmpbuf[1] where has next IC
+ // referenced by longjmp (throw) later.
+ MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
+ addFrameReference(MIB, FI, Offset); // jmpbuf[1]
+ MIB.addReg(LabelReg, getKillRegState(true));
+}
+
+MachineBasicBlock *
+VETargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI.getDebugLoc();
+ MachineFunction *MF = MBB->getParent();
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ const BasicBlock *BB = MBB->getBasicBlock();
+ MachineFunction::iterator I = ++MBB->getIterator();
+
+ // Memory Reference.
+ SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
+ MI.memoperands_end());
+ Register BufReg = MI.getOperand(1).getReg();
+
+ Register DstReg;
+
+ DstReg = MI.getOperand(0).getReg();
+ const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
+ assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
+ (void)TRI;
+ Register MainDestReg = MRI.createVirtualRegister(RC);
+ Register RestoreDestReg = MRI.createVirtualRegister(RC);
+
+ // For `v = call @llvm.eh.sjlj.setjmp(buf)`, we generate following
+ // instructions. SP/FP must be saved in jmpbuf before `llvm.eh.sjlj.setjmp`.
+ //
+ // ThisMBB:
+ // buf[3] = %s17 iff %s17 is used as BP
+ // buf[1] = RestoreMBB as IC after longjmp
+ // # SjLjSetup RestoreMBB
+ //
+ // MainMBB:
+ // v_main = 0
+ //
+ // SinkMBB:
+ // v = phi(v_main, MainMBB, v_restore, RestoreMBB)
+ // ...
+ //
+ // RestoreMBB:
+ // %s17 = buf[3] = iff %s17 is used as BP
+ // v_restore = 1
+ // goto SinkMBB
+
+ MachineBasicBlock *ThisMBB = MBB;
+ MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(I, MainMBB);
+ MF->insert(I, SinkMBB);
+ MF->push_back(RestoreMBB);
+ RestoreMBB->setHasAddressTaken();
+
+ // Transfer the remainder of BB and its successor edges to SinkMBB.
+ SinkMBB->splice(SinkMBB->begin(), MBB,
+ std::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // ThisMBB:
+ Register LabelReg =
+ prepareMBB(*MBB, MachineBasicBlock::iterator(MI), RestoreMBB, DL);
+
+ // Store BP in buf[3] iff this function is using BP.
+ const VEFrameLowering *TFI = Subtarget->getFrameLowering();
+ if (TFI->hasBP(*MF)) {
+ MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
+ MIB.addReg(BufReg);
+ MIB.addImm(0);
+ MIB.addImm(24);
+ MIB.addReg(VE::SX17);
+ MIB.setMemRefs(MMOs);
+ }
+
+ // Store IP in buf[1].
+ MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
+ MIB.add(MI.getOperand(1)); // we can preserve the kill flags here.
+ MIB.addImm(0);
+ MIB.addImm(8);
+ MIB.addReg(LabelReg, getKillRegState(true));
+ MIB.setMemRefs(MMOs);
+
+ // SP/FP are already stored in jmpbuf before `llvm.eh.sjlj.setjmp`.
+
+ // Insert setup.
+ MIB =
+ BuildMI(*ThisMBB, MI, DL, TII->get(VE::EH_SjLj_Setup)).addMBB(RestoreMBB);
+
+ const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+ MIB.addRegMask(RegInfo->getNoPreservedMask());
+ ThisMBB->addSuccessor(MainMBB);
+ ThisMBB->addSuccessor(RestoreMBB);
+
+ // MainMBB:
+ BuildMI(MainMBB, DL, TII->get(VE::LEAzii), MainDestReg)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0);
+ MainMBB->addSuccessor(SinkMBB);
+
+ // SinkMBB:
+ BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(VE::PHI), DstReg)
+ .addReg(MainDestReg)
+ .addMBB(MainMBB)
+ .addReg(RestoreDestReg)
+ .addMBB(RestoreMBB);
+
+ // RestoreMBB:
+ // Restore BP from buf[3] iff this function is using BP. The address of
+ // buf is in SX10.
+ // FIXME: Better to not use SX10 here
+ if (TFI->hasBP(*MF)) {
+ MachineInstrBuilder MIB =
+ BuildMI(RestoreMBB, DL, TII->get(VE::LDrii), VE::SX17);
+ MIB.addReg(VE::SX10);
+ MIB.addImm(0);
+ MIB.addImm(24);
+ MIB.setMemRefs(MMOs);
+ }
+ BuildMI(RestoreMBB, DL, TII->get(VE::LEAzii), RestoreDestReg)
+ .addImm(0)
+ .addImm(0)
+ .addImm(1);
+ BuildMI(RestoreMBB, DL, TII->get(VE::BRCFLa_t)).addMBB(SinkMBB);
+ RestoreMBB->addSuccessor(SinkMBB);
+
+ MI.eraseFromParent();
+ return SinkMBB;
+}
+
+MachineBasicBlock *
+VETargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI.getDebugLoc();
+ MachineFunction *MF = MBB->getParent();
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ // Memory Reference.
+ SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
+ MI.memoperands_end());
+ Register BufReg = MI.getOperand(0).getReg();
+
+ Register Tmp = MRI.createVirtualRegister(&VE::I64RegClass);
+ // Since FP is only updated here but NOT referenced, it's treated as GPR.
+ Register FP = VE::SX9;
+ Register SP = VE::SX11;
+
+ MachineInstrBuilder MIB;
+
+ MachineBasicBlock *ThisMBB = MBB;
+
+ // For `call @llvm.eh.sjlj.longjmp(buf)`, we generate following instructions.
+ //
+ // ThisMBB:
+ // %fp = load buf[0]
+ // %jmp = load buf[1]
+ // %s10 = buf ; Store an address of buf to SX10 for RestoreMBB
+ // %sp = load buf[2] ; generated by llvm.eh.sjlj.setjmp.
+ // jmp %jmp
+
+ // Reload FP.
+ MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), FP);
+ MIB.addReg(BufReg);
+ MIB.addImm(0);
+ MIB.addImm(0);
+ MIB.setMemRefs(MMOs);
+
+ // Reload IP.
+ MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), Tmp);
+ MIB.addReg(BufReg);
+ MIB.addImm(0);
+ MIB.addImm(8);
+ MIB.setMemRefs(MMOs);
+
+ // Copy BufReg to SX10 for later use in setjmp.
+ // FIXME: Better to not use SX10 here
+ BuildMI(*ThisMBB, MI, DL, TII->get(VE::ORri), VE::SX10)
+ .addReg(BufReg)
+ .addImm(0);
+
+ // Reload SP.
+ MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), SP);
+ MIB.add(MI.getOperand(0)); // we can preserve the kill flags here.
+ MIB.addImm(0);
+ MIB.addImm(16);
+ MIB.setMemRefs(MMOs);
+
+ // Jump.
+ BuildMI(*ThisMBB, MI, DL, TII->get(VE::BCFLari_t))
+ .addReg(Tmp, getKillRegState(true))
+ .addImm(0);
+
+ MI.eraseFromParent();
+ return ThisMBB;
+}
+
+MachineBasicBlock *
+VETargetLowering::emitSjLjDispatchBlock(MachineInstr &MI,
+ MachineBasicBlock *BB) const {
+ DebugLoc DL = MI.getDebugLoc();
+ MachineFunction *MF = BB->getParent();
+ MachineFrameInfo &MFI = MF->getFrameInfo();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const VEInstrInfo *TII = Subtarget->getInstrInfo();
+ int FI = MFI.getFunctionContextIndex();
+
+ // Get a mapping of the call site numbers to all of the landing pads they're
+ // associated with.
+ DenseMap<unsigned, SmallVector<MachineBasicBlock *, 2>> CallSiteNumToLPad;
+ unsigned MaxCSNum = 0;
+ for (auto &MBB : *MF) {
+ if (!MBB.isEHPad())
+ continue;
+
+ MCSymbol *Sym = nullptr;
+ for (const auto &MI : MBB) {
+ if (MI.isDebugInstr())
+ continue;
+
+ assert(MI.isEHLabel() && "expected EH_LABEL");
+ Sym = MI.getOperand(0).getMCSymbol();
+ break;
+ }
+
+ if (!MF->hasCallSiteLandingPad(Sym))
+ continue;
+
+ for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) {
+ CallSiteNumToLPad[CSI].push_back(&MBB);
+ MaxCSNum = std::max(MaxCSNum, CSI);
+ }
+ }
+
+ // Get an ordered list of the machine basic blocks for the jump table.
+ std::vector<MachineBasicBlock *> LPadList;
+ SmallPtrSet<MachineBasicBlock *, 32> InvokeBBs;
+ LPadList.reserve(CallSiteNumToLPad.size());
+
+ for (unsigned CSI = 1; CSI <= MaxCSNum; ++CSI) {
+ for (auto &LP : CallSiteNumToLPad[CSI]) {
+ LPadList.push_back(LP);
+ InvokeBBs.insert(LP->pred_begin(), LP->pred_end());
+ }
+ }
+
+ assert(!LPadList.empty() &&
+ "No landing pad destinations for the dispatch jump table!");
+
+ // The %fn_context is allocated like below (from --print-after=sjljehprepare):
+ // %fn_context = alloca { i8*, i64, [4 x i64], i8*, i8*, [5 x i8*] }
+ //
+ // This `[5 x i8*]` is jmpbuf, so jmpbuf[1] is FI+72.
+ // First `i64` is callsite, so callsite is FI+8.
+ static const int OffsetIC = 72;
+ static const int OffsetCS = 8;
+
+ // Create the MBBs for the dispatch code like following:
+ //
+ // ThisMBB:
+ // Prepare DispatchBB address and store it to buf[1].
+ // ...
+ //
+ // DispatchBB:
+ // %s15 = GETGOT iff isPositionIndependent
+ // %callsite = load callsite
+ // brgt.l.t #size of callsites, %callsite, DispContBB
+ //
+ // TrapBB:
+ // Call abort.
+ //
+ // DispContBB:
+ // %breg = address of jump table
+ // %pc = load and calculate next pc from %breg and %callsite
+ // jmp %pc
+
+ // Shove the dispatch's address into the return slot in the function context.
+ MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
+ DispatchBB->setIsEHPad(true);
+
+ // Trap BB will causes trap like `assert(0)`.
+ MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
+ DispatchBB->addSuccessor(TrapBB);
+
+ MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
+ DispatchBB->addSuccessor(DispContBB);
+
+ // Insert MBBs.
+ MF->push_back(DispatchBB);
+ MF->push_back(DispContBB);
+ MF->push_back(TrapBB);
+
+ // Insert code to call abort in the TrapBB.
+ Register Abort = prepareSymbol(*TrapBB, TrapBB->end(), "abort", DL,
+ /* Local */ false, /* Call */ true);
+ BuildMI(TrapBB, DL, TII->get(VE::BSICrii), VE::SX10)
+ .addReg(Abort, getKillRegState(true))
+ .addImm(0)
+ .addImm(0);
+
+ // Insert code into the entry block that creates and registers the function
+ // context.
+ setupEntryBlockForSjLj(MI, BB, DispatchBB, FI, OffsetIC);
+
+ // Create the jump table and associated information
+ unsigned JTE = getJumpTableEncoding();
+ MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(JTE);
+ unsigned MJTI = JTI->createJumpTableIndex(LPadList);
+
+ const VERegisterInfo &RI = TII->getRegisterInfo();
+ // Add a register mask with no preserved registers. This results in all
+ // registers being marked as clobbered.
+ BuildMI(DispatchBB, DL, TII->get(VE::NOP))
+ .addRegMask(RI.getNoPreservedMask());
+
+ if (isPositionIndependent()) {
+ // Force to generate GETGOT, since current implementation doesn't store GOT
+ // register.
+ BuildMI(DispatchBB, DL, TII->get(VE::GETGOT), VE::SX15);
+ }
+
+ // IReg is used as an index in a memory operand and therefore can't be SP
+ const TargetRegisterClass *RC = &VE::I64RegClass;
+ Register IReg = MRI.createVirtualRegister(RC);
+ addFrameReference(BuildMI(DispatchBB, DL, TII->get(VE::LDLZXrii), IReg), FI,
+ OffsetCS);
+ if (LPadList.size() < 64) {
+ BuildMI(DispatchBB, DL, TII->get(VE::BRCFLir_t))
+ .addImm(VECC::CC_ILE)
+ .addImm(LPadList.size())
+ .addReg(IReg)
+ .addMBB(TrapBB);
+ } else {
+ assert(LPadList.size() <= 0x7FFFFFFF && "Too large Landing Pad!");
+ Register TmpReg = MRI.createVirtualRegister(RC);
+ BuildMI(DispatchBB, DL, TII->get(VE::LEAzii), TmpReg)
+ .addImm(0)
+ .addImm(0)
+ .addImm(LPadList.size());
+ BuildMI(DispatchBB, DL, TII->get(VE::BRCFLrr_t))
+ .addImm(VECC::CC_ILE)
+ .addReg(TmpReg, getKillRegState(true))
+ .addReg(IReg)
+ .addMBB(TrapBB);
+ }
+
+ Register BReg = MRI.createVirtualRegister(RC);
+ Register Tmp1 = MRI.createVirtualRegister(RC);
+ Register Tmp2 = MRI.createVirtualRegister(RC);
+
+ if (isPositionIndependent()) {
+ // Create following instructions for local linkage PIC code.
+ // lea %Tmp1, .LJTI0_0@gotoff_lo
+ // and %Tmp2, %Tmp1, (32)0
+ // lea.sl %BReg, .LJTI0_0@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
+ BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)
+ .addImm(0)
+ .addImm(0)
+ .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_LO32);
+ BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)
+ .addReg(Tmp1, getKillRegState(true))
+ .addImm(M0(32));
+ BuildMI(DispContBB, DL, TII->get(VE::LEASLrri), BReg)
+ .addReg(VE::SX15)
+ .addReg(Tmp2, getKillRegState(true))
+ .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_HI32);
+ } else {
+ // Create following instructions for non-PIC code.
+ // lea %Tmp1, .LJTI0_0@lo
+ // and %Tmp2, %Tmp1, (32)0
+ // lea.sl %BReg, .LJTI0_0@hi(%Tmp2)
+ BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)
+ .addImm(0)
+ .addImm(0)
+ .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_LO32);
+ BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)
+ .addReg(Tmp1, getKillRegState(true))
+ .addImm(M0(32));
+ BuildMI(DispContBB, DL, TII->get(VE::LEASLrii), BReg)
+ .addReg(Tmp2, getKillRegState(true))
+ .addImm(0)
+ .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_HI32);
+ }
+
+ switch (JTE) {
+ case MachineJumpTableInfo::EK_BlockAddress: {
+ // Generate simple block address code for no-PIC model.
+ // sll %Tmp1, %IReg, 3
+ // lds %TReg, 0(%Tmp1, %BReg)
+ // bcfla %TReg
+
+ Register TReg = MRI.createVirtualRegister(RC);
+ Register Tmp1 = MRI.createVirtualRegister(RC);
+
+ BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)
+ .addReg(IReg, getKillRegState(true))
+ .addImm(3);
+ BuildMI(DispContBB, DL, TII->get(VE::LDrri), TReg)
+ .addReg(BReg, getKillRegState(true))
+ .addReg(Tmp1, getKillRegState(true))
+ .addImm(0);
+ BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))
+ .addReg(TReg, getKillRegState(true))
+ .addImm(0);
+ break;
+ }
+ case MachineJumpTableInfo::EK_Custom32: {
+ // Generate block address code using differences from the function pointer
+ // for PIC model.
+ // sll %Tmp1, %IReg, 2
+ // ldl.zx %OReg, 0(%Tmp1, %BReg)
+ // Prepare function address in BReg2.
+ // adds.l %TReg, %BReg2, %OReg
+ // bcfla %TReg
+
+ assert(isPositionIndependent());
+ Register OReg = MRI.createVirtualRegister(RC);
+ Register TReg = MRI.createVirtualRegister(RC);
+ Register Tmp1 = MRI.createVirtualRegister(RC);
+
+ BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)
+ .addReg(IReg, getKillRegState(true))
+ .addImm(2);
+ BuildMI(DispContBB, DL, TII->get(VE::LDLZXrri), OReg)
+ .addReg(BReg, getKillRegState(true))
+ .addReg(Tmp1, getKillRegState(true))
+ .addImm(0);
+ Register BReg2 =
+ prepareSymbol(*DispContBB, DispContBB->end(),
+ DispContBB->getParent()->getName(), DL, /* Local */ true);
+ BuildMI(DispContBB, DL, TII->get(VE::ADDSLrr), TReg)
+ .addReg(OReg, getKillRegState(true))
+ .addReg(BReg2, getKillRegState(true));
+ BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))
+ .addReg(TReg, getKillRegState(true))
+ .addImm(0);
+ break;
+ }
+ default:
+ llvm_unreachable("Unexpected jump table encoding");
+ }
+
+ // Add the jump table entries as successors to the MBB.
+ SmallPtrSet<MachineBasicBlock *, 8> SeenMBBs;
+ for (auto &LP : LPadList)
+ if (SeenMBBs.insert(LP).second)
+ DispContBB->addSuccessor(LP);
+
+ // N.B. the order the invoke BBs are processed in doesn't matter here.
+ SmallVector<MachineBasicBlock *, 64> MBBLPads;
+ const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs();
+ for (MachineBasicBlock *MBB : InvokeBBs) {
+ // Remove the landing pad successor from the invoke block and replace it
+ // with the new dispatch block.
+ // Keep a copy of Successors since it's modified inside the loop.
+ SmallVector<MachineBasicBlock *, 8> Successors(MBB->succ_rbegin(),
+ MBB->succ_rend());
+ // FIXME: Avoid quadratic complexity.
+ for (auto MBBS : Successors) {
+ if (MBBS->isEHPad()) {
+ MBB->removeSuccessor(MBBS);
+ MBBLPads.push_back(MBBS);
+ }
+ }
+
+ MBB->addSuccessor(DispatchBB);
+
+ // Find the invoke call and mark all of the callee-saved registers as
+ // 'implicit defined' so that they're spilled. This prevents code from
+ // moving instructions to before the EH block, where they will never be
+ // executed.
+ for (auto &II : reverse(*MBB)) {
+ if (!II.isCall())
+ continue;
+
+ DenseMap<Register, bool> DefRegs;
+ for (auto &MOp : II.operands())
+ if (MOp.isReg())
+ DefRegs[MOp.getReg()] = true;
+
+ MachineInstrBuilder MIB(*MF, &II);
+ for (unsigned RI = 0; SavedRegs[RI]; ++RI) {
+ Register Reg = SavedRegs[RI];
+ if (!DefRegs[Reg])
+ MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);
+ }
+
+ break;
+ }
+ }
+
+ // Mark all former landing pads as non-landing pads. The dispatch is the only
+ // landing pad now.
+ for (auto &LP : MBBLPads)
+ LP->setIsEHPad(false);
+
+ // The instruction is gone now.
+ MI.eraseFromParent();
+ return BB;
+}
+
+MachineBasicBlock *
+VETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
+ MachineBasicBlock *BB) const {
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("Unknown Custom Instruction!");
+ case VE::EH_SjLj_LongJmp:
+ return emitEHSjLjLongJmp(MI, BB);
+ case VE::EH_SjLj_SetJmp:
+ return emitEHSjLjSetJmp(MI, BB);
+ case VE::EH_SjLj_Setup_Dispatch:
+ return emitSjLjDispatchBlock(MI, BB);
+ }
+}
+
+static bool isI32Insn(const SDNode *User, const SDNode *N) {
+ switch (User->getOpcode()) {
+ default:
+ return false;
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SETCC:
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::BSWAP:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::BR_CC:
+ case ISD::BITCAST:
+ case ISD::ATOMIC_CMP_SWAP:
+ case ISD::ATOMIC_SWAP:
+ return true;
+ case ISD::SRL:
+ if (N->getOperand(0).getOpcode() != ISD::SRL)
+ return true;
+ // (srl (trunc (srl ...))) may be optimized by combining srl, so
+ // doesn't optimize trunc now.
+ return false;
+ case ISD::SELECT_CC:
+ if (User->getOperand(2).getNode() != N &&
+ User->getOperand(3).getNode() != N)
+ return true;
+ LLVM_FALLTHROUGH;
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::SELECT:
+ case ISD::CopyToReg:
+ // Check all use of selections, bit operations, and copies. If all of them
+ // are safe, optimize truncate to extract_subreg.
+ for (SDNode::use_iterator UI = User->use_begin(), UE = User->use_end();
+ UI != UE; ++UI) {
+ switch ((*UI)->getOpcode()) {
+ default:
+ // If the use is an instruction which treats the source operand as i32,
+ // it is safe to avoid truncate here.
+ if (isI32Insn(*UI, N))
+ continue;
+ break;
+ case ISD::ANY_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND: {
+ // Special optimizations to the combination of ext and trunc.
+ // (ext ... (select ... (trunc ...))) is safe to avoid truncate here
+ // since this truncate instruction clears higher 32 bits which is filled
+ // by one of ext instructions later.
+ assert(N->getValueType(0) == MVT::i32 &&
+ "find truncate to not i32 integer");
+ if (User->getOpcode() == ISD::SELECT_CC ||
+ User->getOpcode() == ISD::SELECT)
+ continue;
+ break;
+ }
+ }
+ return false;
+ }
+ return true;
+ }
+}
+
+// Optimize TRUNCATE in DAG combining. Optimizing it in CUSTOM lower is
+// sometime too early. Optimizing it in DAG pattern matching in VEInstrInfo.td
+// is sometime too late. So, doing it at here.
+SDValue VETargetLowering::combineTRUNCATE(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ assert(N->getOpcode() == ISD::TRUNCATE &&
+ "Should be called with a TRUNCATE node");
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ // We prefer to do this when all types are legal.
+ if (!DCI.isAfterLegalizeDAG())
+ return SDValue();
+
+ // Skip combine TRUNCATE atm if the operand of TRUNCATE might be a constant.
+ if (N->getOperand(0)->getOpcode() == ISD::SELECT_CC &&
+ isa<ConstantSDNode>(N->getOperand(0)->getOperand(0)) &&
+ isa<ConstantSDNode>(N->getOperand(0)->getOperand(1)))
+ return SDValue();
+
+ // Check all use of this TRUNCATE.
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE;
+ ++UI) {
+ SDNode *User = *UI;
+
+ // Make sure that we're not going to replace TRUNCATE for non i32
+ // instructions.
+ //
+ // FIXME: Although we could sometimes handle this, and it does occur in
+ // practice that one of the condition inputs to the select is also one of
+ // the outputs, we currently can't deal with this.
+ if (isI32Insn(User, N))
+ continue;
+
+ return SDValue();
+ }
+
+ SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
+ return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT,
+ N->getOperand(0), SubI32),
+ 0);
+}
+
+SDValue VETargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ switch (N->getOpcode()) {
+ default:
+ break;
+ case ISD::TRUNCATE:
+ return combineTRUNCATE(N, DCI);
+ }
+
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// VE Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+VETargetLowering::ConstraintType
+VETargetLowering::getConstraintType(StringRef Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default:
+ break;
+ case 'v': // vector registers
+ return C_RegisterClass;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+std::pair<unsigned, const TargetRegisterClass *>
+VETargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+ StringRef Constraint,
+ MVT VT) const {
+ const TargetRegisterClass *RC = nullptr;
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default:
+ return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
+ case 'r':
+ RC = &VE::I64RegClass;
+ break;
+ case 'v':
+ RC = &VE::V64RegClass;
+ break;
+ }
+ return std::make_pair(0U, RC);
+ }
+
+ return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
+}
+
+//===----------------------------------------------------------------------===//
+// VE Target Optimization Support
+//===----------------------------------------------------------------------===//
+
+unsigned VETargetLowering::getMinimumJumpTableEntries() const {
+ // Specify 8 for PIC model to relieve the impact of PIC load instructions.
+ if (isJumpTableRelative())
+ return 8;
+
+ return TargetLowering::getMinimumJumpTableEntries();
+}
+
+bool VETargetLowering::hasAndNot(SDValue Y) const {
+ EVT VT = Y.getValueType();
+
+ // VE doesn't have vector and not instruction.
+ if (VT.isVector())
+ return false;
+
+ // VE allows different immediate values for X and Y where ~X & Y.
+ // Only simm7 works for X, and only mimm works for Y on VE. However, this
+ // function is used to check whether an immediate value is OK for and-not
+ // instruction as both X and Y. Generating additional instruction to
+ // retrieve an immediate value is no good since the purpose of this
+ // function is to convert a series of 3 instructions to another series of
+ // 3 instructions with better parallelism. Therefore, we return false
+ // for all immediate values now.
+ // FIXME: Change hasAndNot function to have two operands to make it work
+ // correctly with Aurora VE.
+ if (isa<ConstantSDNode>(Y))
+ return false;
+
+ // It's ok for generic registers.
+ return true;
+}
+
+/// \returns the VVP_* SDNode opcode corresponsing to \p OC.
+static Optional<unsigned> getVVPOpcode(unsigned Opcode) {
+ switch (Opcode) {
+#define HANDLE_VP_TO_VVP(VPOPC, VVPNAME) \
+ case ISD::VPOPC: \
+ return VEISD::VVPNAME;
+#define ADD_VVP_OP(VVPNAME, SDNAME) \
+ case VEISD::VVPNAME: \
+ case ISD::SDNAME: \
+ return VEISD::VVPNAME;
+#include "VVPNodes.def"
+ }
+ return None;
+}
+
+SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const {
+ // Can we represent this as a VVP node.
+ const unsigned Opcode = Op->getOpcode();
+ auto VVPOpcodeOpt = getVVPOpcode(Opcode);
+ if (!VVPOpcodeOpt.hasValue())
+ return SDValue();
+ unsigned VVPOpcode = VVPOpcodeOpt.getValue();
+ const bool FromVP = ISD::isVPOpcode(Opcode);
+
+ // The representative and legalized vector type of this operation.
+ SDLoc DL(Op);
+ MVT MaskVT = MVT::v256i1; // TODO: packed mode.
+ EVT OpVecVT = Op.getValueType();
+ EVT LegalVecVT = getTypeToTransformTo(*DAG.getContext(), OpVecVT);
+
+ SDValue AVL;
+ SDValue Mask;
+
+ if (FromVP) {
+ // All upstream VP SDNodes always have a mask and avl.
+ auto MaskIdx = ISD::getVPMaskIdx(Opcode).getValue();
+ auto AVLIdx = ISD::getVPExplicitVectorLengthIdx(Opcode).getValue();
+ Mask = Op->getOperand(MaskIdx);
+ AVL = Op->getOperand(AVLIdx);
+
+ } else {
+ // Materialize the VL parameter.
+ AVL = DAG.getConstant(OpVecVT.getVectorNumElements(), DL, MVT::i32);
+ SDValue ConstTrue = DAG.getConstant(1, DL, MVT::i32);
+ Mask = DAG.getNode(VEISD::VEC_BROADCAST, DL, MaskVT,
+ ConstTrue); // emit a VEISD::VEC_BROADCAST here.
+ }
+
+ // Categories we are interested in.
+ bool IsBinaryOp = false;
+
+ switch (VVPOpcode) {
+#define ADD_BINARY_VVP_OP(VVPNAME, ...) \
+ case VEISD::VVPNAME: \
+ IsBinaryOp = true; \
+ break;
+#include "VVPNodes.def"
+ }
+
+ if (IsBinaryOp) {
+ assert(LegalVecVT.isSimple());
+ return DAG.getNode(VVPOpcode, DL, LegalVecVT, Op->getOperand(0),
+ Op->getOperand(1), Mask, AVL);
+ }
+ llvm_unreachable("lowerToVVP called for unexpected SDNode.");
+}
+
+SDValue VETargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
+ MVT VT = Op.getOperand(0).getSimpleValueType();
+
+ // Special treatment for packed V64 types.
+ assert(VT == MVT::v512i32 || VT == MVT::v512f32);
+ // Example of codes:
+ // %packed_v = extractelt %vr, %idx / 2
+ // %v = %packed_v >> (%idx % 2 * 32)
+ // %res = %v & 0xffffffff
+
+ SDValue Vec = Op.getOperand(0);
+ SDValue Idx = Op.getOperand(1);
+ SDLoc DL(Op);
+ SDValue Result = Op;
+ if (0 /* Idx->isConstant() */) {
+ // TODO: optimized implementation using constant values
+ } else {
+ SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
+ SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
+ SDValue PackedElt =
+ SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
+ SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
+ SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
+ SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);
+ Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
+ PackedElt = DAG.getNode(ISD::SRL, DL, MVT::i64, {PackedElt, Shift});
+ SDValue Mask = DAG.getConstant(0xFFFFFFFFL, DL, MVT::i64);
+ PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
+ SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
+ Result = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
+ MVT::i32, PackedElt, SubI32),
+ 0);
+
+ if (Op.getSimpleValueType() == MVT::f32) {
+ Result = DAG.getBitcast(MVT::f32, Result);
+ } else {
+ assert(Op.getSimpleValueType() == MVT::i32);
+ }
+ }
+ return Result;
+}
+
+SDValue VETargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
+ MVT VT = Op.getOperand(0).getSimpleValueType();
+
+ // Special treatment for packed V64 types.
+ assert(VT == MVT::v512i32 || VT == MVT::v512f32);
+ // The v512i32 and v512f32 starts from upper bits (0..31). This "upper
+ // bits" required `val << 32` from C implementation's point of view.
+ //
+ // Example of codes:
+ // %packed_elt = extractelt %vr, (%idx >> 1)
+ // %shift = ((%idx & 1) ^ 1) << 5
+ // %packed_elt &= 0xffffffff00000000 >> shift
+ // %packed_elt |= (zext %val) << shift
+ // %vr = insertelt %vr, %packed_elt, (%idx >> 1)
+
+ SDLoc DL(Op);
+ SDValue Vec = Op.getOperand(0);
+ SDValue Val = Op.getOperand(1);
+ SDValue Idx = Op.getOperand(2);
+ if (Idx.getSimpleValueType() == MVT::i32)
+ Idx = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Idx);
+ if (Val.getSimpleValueType() == MVT::f32)
+ Val = DAG.getBitcast(MVT::i32, Val);
+ assert(Val.getSimpleValueType() == MVT::i32);
+ Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
+
+ SDValue Result = Op;
+ if (0 /* Idx->isConstant()*/) {
+ // TODO: optimized implementation using constant values
+ } else {
+ SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
+ SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
+ SDValue PackedElt =
+ SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
+ SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
+ SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
+ SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);
+ Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
+ SDValue Mask = DAG.getConstant(0xFFFFFFFF00000000L, DL, MVT::i64);
+ Mask = DAG.getNode(ISD::SRL, DL, MVT::i64, {Mask, Shift});
+ PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
+ Val = DAG.getNode(ISD::SHL, DL, MVT::i64, {Val, Shift});
+ PackedElt = DAG.getNode(ISD::OR, DL, MVT::i64, {PackedElt, Val});
+ Result =
+ SDValue(DAG.getMachineNode(VE::LSVrr_v, DL, Vec.getSimpleValueType(),
+ {HalfIdx, PackedElt, Vec}),
+ 0);
+ }
+ return Result;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.h b/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.h
index 4633220efaa1..a6e1bf396035 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.h
@@ -24,23 +24,36 @@ namespace VEISD {
enum NodeType : unsigned {
FIRST_NUMBER = ISD::BUILTIN_OP_END,
- Hi,
- Lo, // Hi/Lo operations, typically on a global address.
-
- GETFUNPLT, // load function address through %plt insturction
- GETTLSADDR, // load address for TLS access
- GETSTACKTOP, // retrieve address of stack top (first address of
- // locals and temporaries)
-
- CALL, // A call instruction.
- RET_FLAG, // Return with a flag operand.
- GLOBAL_BASE_REG, // Global base reg for PIC.
+ CALL, // A call instruction.
+ EH_SJLJ_LONGJMP, // SjLj exception handling longjmp.
+ EH_SJLJ_SETJMP, // SjLj exception handling setjmp.
+ EH_SJLJ_SETUP_DISPATCH, // SjLj exception handling setup_dispatch.
+ GETFUNPLT, // Load function address through %plt insturction.
+ GETTLSADDR, // Load address for TLS access.
+ GETSTACKTOP, // Retrieve address of stack top (first address of
+ // locals and temporaries).
+ GLOBAL_BASE_REG, // Global base reg for PIC.
+ Hi, // Hi/Lo operations, typically on a global address.
+ Lo, // Hi/Lo operations, typically on a global address.
+ MEMBARRIER, // Compiler barrier only; generate a no-op.
+ RET_FLAG, // Return with a flag operand.
+ TS1AM, // A TS1AM instruction used for 1/2 bytes swap.
+ VEC_BROADCAST, // A vector broadcast instruction.
+ // 0: scalar value, 1: VL
+
+// VVP_* nodes.
+#define ADD_VVP_OP(VVP_NAME, ...) VVP_NAME,
+#include "VVPNodes.def"
};
}
class VETargetLowering : public TargetLowering {
const VESubtarget *Subtarget;
+ void initRegisterClasses();
+ void initSPUActions();
+ void initVPUActions();
+
public:
VETargetLowering(const TargetMachine &TM, const VESubtarget &STI);
@@ -74,23 +87,98 @@ public:
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl,
SelectionDAG &DAG) const override;
+ /// Helper functions for atomic operations.
+ bool shouldInsertFencesForAtomic(const Instruction *I) const override {
+ // VE uses release consistency, so need fence for each atomics.
+ return true;
+ }
+ Instruction *emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst,
+ AtomicOrdering Ord) const override;
+ Instruction *emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst,
+ AtomicOrdering Ord) const override;
+ TargetLoweringBase::AtomicExpansionKind
+ shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+
/// Custom Lower {
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
-
- SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerToTLSGeneralDynamicModel(SDValue Op, SelectionDAG &DAG) const;
+ unsigned getJumpTableEncoding() const override;
+ const MCExpr *LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ const MachineBasicBlock *MBB,
+ unsigned Uid,
+ MCContext &Ctx) const override;
+ SDValue getPICJumpTableRelocBase(SDValue Table,
+ SelectionDAG &DAG) const override;
+ // VE doesn't need getPICJumpTableRelocBaseExpr since it is used for only
+ // EK_LabelDifference32.
+
+ SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerATOMIC_SWAP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerToTLSGeneralDynamicModel(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVAARG(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
/// } Custom Lower
+ /// Replace the results of node with an illegal result
+ /// type with new values built out of custom code.
+ ///
+ void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const override;
+
+ /// Custom Inserter {
+ MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr &MI,
+ MachineBasicBlock *MBB) const override;
+ MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
+ MachineBasicBlock *MBB) const;
+ MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
+ MachineBasicBlock *MBB) const;
+ MachineBasicBlock *emitSjLjDispatchBlock(MachineInstr &MI,
+ MachineBasicBlock *BB) const;
+
+ void setupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
+ MachineBasicBlock *DispatchBB, int FI,
+ int Offset) const;
+ // Setup basic block address.
+ Register prepareMBB(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ MachineBasicBlock *TargetBB, const DebugLoc &DL) const;
+ // Prepare function/variable address.
+ Register prepareSymbol(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ StringRef Symbol, const DebugLoc &DL, bool IsLocal,
+ bool IsCall) const;
+ /// } Custom Inserter
+
+ /// VVP Lowering {
+ SDValue lowerToVVP(SDValue Op, SelectionDAG &DAG) const;
+ /// } VVPLowering
+
+ /// Custom DAGCombine {
+ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
+
+ SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const;
+ /// } Custom DAGCombine
+
SDValue withTargetFlags(SDValue Op, unsigned TF, SelectionDAG &DAG) const;
SDValue makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
SelectionDAG &DAG) const;
SDValue makeAddress(SDValue Op, SelectionDAG &DAG) const;
+ bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
/// Returns true if the target allows unaligned memory accesses of the
@@ -99,10 +187,32 @@ public:
MachineMemOperand::Flags Flags,
bool *Fast) const override;
- // Block s/udiv lowering for now
- bool isIntDivCheap(EVT VT, AttributeList Attr) const override { return true; }
+ /// Inline Assembly {
+
+ ConstraintType getConstraintType(StringRef Constraint) const override;
+ std::pair<unsigned, const TargetRegisterClass *>
+ getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+ StringRef Constraint, MVT VT) const override;
+
+ /// } Inline Assembly
+ /// Target Optimization {
+
+ // Return lower limit for number of blocks in a jump table.
+ unsigned getMinimumJumpTableEntries() const override;
+
+ // SX-Aurora VE's s/udiv is 5-9 times slower than multiply.
+ bool isIntDivCheap(EVT, AttributeList) const override { return false; }
+ // VE doesn't have rem.
+ bool hasStandaloneRem(EVT) const override { return false; }
+ // VE LDZ instruction returns 64 if the input is zero.
+ bool isCheapToSpeculateCtlz() const override { return true; }
+ // VE LDZ instruction is fast.
+ bool isCtlzFast() const override { return true; }
+ // VE has NND instruction.
bool hasAndNot(SDValue Y) const override;
+
+ /// } Target Optimization
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrBuilder.h b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrBuilder.h
new file mode 100644
index 000000000000..1b0e07546931
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrBuilder.h
@@ -0,0 +1,41 @@
+//===-- VEInstrBuilder.h - Aides for building VE insts ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to simplify generating frame and constant pool
+// references.
+//
+// For reference, the order of operands for memory references is:
+// (Operand), Dest Reg, Base Reg, and either Reg Index or Immediate
+// Displacement.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_VE_VEINSTRBUILDER_H
+#define LLVM_LIB_TARGET_VE_VEINSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+namespace llvm {
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function. This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+static inline const MachineInstrBuilder &
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0,
+ bool ThreeOp = true) {
+ if (ThreeOp)
+ return MIB.addFrameIndex(FI).addImm(0).addImm(Offset);
+ return MIB.addFrameIndex(FI).addImm(Offset);
+}
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrFormats.td
index 0c02411ff916..f43c9755f1b9 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrFormats.td
@@ -35,6 +35,25 @@ class InstVE<dag outs, dag ins, string asmstr, list<dag> pattern>
let AsmString = asmstr;
let Pattern = pattern;
+ bits<1> VE_Vector = 0;
+ bits<1> VE_VLInUse = 0;
+ bits<3> VE_VLIndex = 0;
+ bits<1> VE_VLWithMask = 0;
+
+ /// These fields correspond to the fields in VEInstrInfo.h. Any changes to
+ /// these must be reflected there! See comments there for what these are.
+ ///
+ /// VLIndex is the index of VL register in MI's operands. The HW instruction
+ /// doesn't have that field, but we add is in MI for the ease of optimization.
+ /// For example, the index of VL of (VST $sy, $sz, $sx, $vl) is 3 (beginning
+ /// from 0), and the index of VL of (VST $sy, $sz, $sx, $vm, $vl) is 4. We
+ /// define vector instructions hierarchically, so use VE_VLIndex which is
+ /// defined by the type of instruction and VE_VLWithMask which is defined
+ /// whether the insturction use mask or not.
+ let TSFlags{0} = VE_Vector;
+ let TSFlags{1} = VE_VLInUse;
+ let TSFlags{4-2} = !add(VE_VLIndex, VE_VLWithMask);
+
let DecoderNamespace = "VE";
field bits<64> SoftFail = 0;
}
@@ -179,12 +198,82 @@ class RRFENCE<bits<8>opVal, dag outs, dag ins, string asmstr,
//-----------------------------------------------------------------------------
// Section 5.6 RVM Type
+//
+// RVM type is for vector transfer instructions.
//-----------------------------------------------------------------------------
+class RVM<bits<8>opVal, dag outs, dag ins, string asmstr,
+ list<dag> pattern = []>
+ : InstVE<outs, ins, asmstr, pattern> {
+ bits<1> cx = 0;
+ bits<1> vc = 0;
+ bits<1> cs = 0;
+ bits<4> m = 0;
+ bits<1> cy = 1;
+ bits<7> sy;
+ bits<1> cz = 1;
+ bits<7> sz;
+ bits<8> vx;
+ bits<8> vy = 0;
+ bits<7> sw = 0;
+ let op = opVal;
+ let Inst{55} = cx;
+ let Inst{54} = vc;
+ let Inst{53} = cs;
+ let Inst{52} = 0;
+ let Inst{51-48} = m;
+ let Inst{47} = cy;
+ let Inst{46-40} = sy;
+ let Inst{39} = cz;
+ let Inst{38-32} = sz;
+ let Inst{31-24} = vx;
+ let Inst{23-16} = vy;
+ let Inst{15-8} = 0;
+ let Inst{7} = 0;
+ let Inst{6-0} = sw;
+
+ let VE_Vector = 1;
+}
+
//-----------------------------------------------------------------------------
// Section 5.7 RV Type
+//
+// RV type is for vector instructions.
//-----------------------------------------------------------------------------
+class RV<bits<8>opVal, dag outs, dag ins, string asmstr, list<dag> pattern = []>
+ : InstVE<outs, ins, asmstr, pattern> {
+ bits<1> cx = 0;
+ bits<1> cx2 = 0;
+ bits<1> cs = 0;
+ bits<1> cs2 = 0;
+ bits<4> m = 0;
+ bits<1> cy = 1;
+ bits<7> sy;
+ bits<1> cz = 0;
+ bits<7> sz = 0;
+ bits<8> vx = 0;
+ bits<8> vy = 0;
+ bits<8> vz = 0;
+ bits<8> vw = 0;
+ let op = opVal;
+ let Inst{55} = cx;
+ let Inst{54} = cx2;
+ let Inst{53} = cs;
+ let Inst{52} = cs2;
+ let Inst{51-48} = m;
+ let Inst{47} = cy;
+ let Inst{46-40} = sy;
+ let Inst{39} = cz;
+ let Inst{38-32} = sz;
+ let Inst{31-24} = vx;
+ let Inst{23-16} = vy;
+ let Inst{15-8} = vz;
+ let Inst{7-0} = vw;
+
+ let VE_Vector = 1;
+}
+
// Pseudo instructions.
class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern = []>
: InstVE<outs, ins, asmstr, pattern> {
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp
index 86b2ac2078b1..9770052ff913 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp
@@ -92,38 +92,46 @@ static VECC::CondCode GetOppositeBranchCondition(VECC::CondCode CC) {
llvm_unreachable("Invalid cond code");
}
-// Treat br.l [BRCF AT] as unconditional branch
+// Treat a branch relative long always instruction as unconditional branch.
+// For example, br.l.t and br.l.
static bool isUncondBranchOpcode(int Opc) {
- return Opc == VE::BRCFLa || Opc == VE::BRCFWa ||
- Opc == VE::BRCFLa_nt || Opc == VE::BRCFWa_nt ||
- Opc == VE::BRCFLa_t || Opc == VE::BRCFWa_t ||
- Opc == VE::BRCFDa || Opc == VE::BRCFSa ||
- Opc == VE::BRCFDa_nt || Opc == VE::BRCFSa_nt ||
- Opc == VE::BRCFDa_t || Opc == VE::BRCFSa_t;
+ using namespace llvm::VE;
+
+#define BRKIND(NAME) (Opc == NAME##a || Opc == NAME##a_nt || Opc == NAME##a_t)
+ // VE has other branch relative always instructions for word/double/float,
+ // but we use only long branches in our lower. So, sanity check it here.
+ assert(!BRKIND(BRCFW) && !BRKIND(BRCFD) && !BRKIND(BRCFS) &&
+ "Branch relative word/double/float always instructions should not be "
+ "used!");
+ return BRKIND(BRCFL);
+#undef BRKIND
}
+// Treat branch relative conditional as conditional branch instructions.
+// For example, brgt.l.t and brle.s.nt.
static bool isCondBranchOpcode(int Opc) {
- return Opc == VE::BRCFLrr || Opc == VE::BRCFLir ||
- Opc == VE::BRCFLrr_nt || Opc == VE::BRCFLir_nt ||
- Opc == VE::BRCFLrr_t || Opc == VE::BRCFLir_t ||
- Opc == VE::BRCFWrr || Opc == VE::BRCFWir ||
- Opc == VE::BRCFWrr_nt || Opc == VE::BRCFWir_nt ||
- Opc == VE::BRCFWrr_t || Opc == VE::BRCFWir_t ||
- Opc == VE::BRCFDrr || Opc == VE::BRCFDir ||
- Opc == VE::BRCFDrr_nt || Opc == VE::BRCFDir_nt ||
- Opc == VE::BRCFDrr_t || Opc == VE::BRCFDir_t ||
- Opc == VE::BRCFSrr || Opc == VE::BRCFSir ||
- Opc == VE::BRCFSrr_nt || Opc == VE::BRCFSir_nt ||
- Opc == VE::BRCFSrr_t || Opc == VE::BRCFSir_t;
+ using namespace llvm::VE;
+
+#define BRKIND(NAME) \
+ (Opc == NAME##rr || Opc == NAME##rr_nt || Opc == NAME##rr_t || \
+ Opc == NAME##ir || Opc == NAME##ir_nt || Opc == NAME##ir_t)
+ return BRKIND(BRCFL) || BRKIND(BRCFW) || BRKIND(BRCFD) || BRKIND(BRCFS);
+#undef BRKIND
}
+// Treat branch long always instructions as indirect branch.
+// For example, b.l.t and b.l.
static bool isIndirectBranchOpcode(int Opc) {
- return Opc == VE::BCFLari || Opc == VE::BCFLari ||
- Opc == VE::BCFLari_nt || Opc == VE::BCFLari_nt ||
- Opc == VE::BCFLari_t || Opc == VE::BCFLari_t ||
- Opc == VE::BCFLari || Opc == VE::BCFLari ||
- Opc == VE::BCFLari_nt || Opc == VE::BCFLari_nt ||
- Opc == VE::BCFLari_t || Opc == VE::BCFLari_t;
+ using namespace llvm::VE;
+
+#define BRKIND(NAME) \
+ (Opc == NAME##ari || Opc == NAME##ari_nt || Opc == NAME##ari_t)
+ // VE has other branch always instructions for word/double/float, but
+ // we use only long branches in our lower. So, sanity check it here.
+ assert(!BRKIND(BCFW) && !BRKIND(BCFD) && !BRKIND(BCFS) &&
+ "Branch word/double/float always instructions should not be used!");
+ return BRKIND(BCFL);
+#undef BRKIND
}
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
@@ -311,11 +319,43 @@ bool VEInstrInfo::reverseBranchCondition(
}
static bool IsAliasOfSX(Register Reg) {
- return VE::I8RegClass.contains(Reg) || VE::I16RegClass.contains(Reg) ||
- VE::I32RegClass.contains(Reg) || VE::I64RegClass.contains(Reg) ||
+ return VE::I32RegClass.contains(Reg) || VE::I64RegClass.contains(Reg) ||
VE::F32RegClass.contains(Reg);
}
+static void copyPhysSubRegs(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, const DebugLoc &DL,
+ MCRegister DestReg, MCRegister SrcReg, bool KillSrc,
+ const MCInstrDesc &MCID, unsigned int NumSubRegs,
+ const unsigned *SubRegIdx,
+ const TargetRegisterInfo *TRI) {
+ MachineInstr *MovMI = nullptr;
+
+ for (unsigned Idx = 0; Idx != NumSubRegs; ++Idx) {
+ Register SubDest = TRI->getSubReg(DestReg, SubRegIdx[Idx]);
+ Register SubSrc = TRI->getSubReg(SrcReg, SubRegIdx[Idx]);
+ assert(SubDest && SubSrc && "Bad sub-register");
+
+ if (MCID.getOpcode() == VE::ORri) {
+ // generate "ORri, dest, src, 0" instruction.
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, I, DL, MCID, SubDest).addReg(SubSrc).addImm(0);
+ MovMI = MIB.getInstr();
+ } else if (MCID.getOpcode() == VE::ANDMmm) {
+ // generate "ANDM, dest, vm0, src" instruction.
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, I, DL, MCID, SubDest).addReg(VE::VM0).addReg(SubSrc);
+ MovMI = MIB.getInstr();
+ } else {
+ llvm_unreachable("Unexpected reg-to-reg copy instruction");
+ }
+ }
+ // Add implicit super-register defs and kills to the last MovMI.
+ MovMI->addRegisterDefined(DestReg, TRI);
+ if (KillSrc)
+ MovMI->addRegisterKilled(SrcReg, TRI, true);
+}
+
void VEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, const DebugLoc &DL,
MCRegister DestReg, MCRegister SrcReg,
@@ -325,6 +365,41 @@ void VEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, get(VE::ORri), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc))
.addImm(0);
+ } else if (VE::V64RegClass.contains(DestReg, SrcReg)) {
+ // Generate following instructions
+ // %sw16 = LEA32zii 256
+ // VORmvl %dest, (0)1, %src, %sw16
+ // TODO: reuse a register if vl is already assigned to a register
+ // FIXME: it would be better to scavenge a register here instead of
+ // reserving SX16 all of the time.
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ Register TmpReg = VE::SX16;
+ Register SubTmp = TRI->getSubReg(TmpReg, VE::sub_i32);
+ BuildMI(MBB, I, DL, get(VE::LEAzii), TmpReg)
+ .addImm(0)
+ .addImm(0)
+ .addImm(256);
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(VE::VORmvl), DestReg)
+ .addImm(M1(0)) // Represent (0)1.
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addReg(SubTmp, getKillRegState(true));
+ MIB.getInstr()->addRegisterKilled(TmpReg, TRI, true);
+ } else if (VE::VMRegClass.contains(DestReg, SrcReg)) {
+ BuildMI(MBB, I, DL, get(VE::ANDMmm), DestReg)
+ .addReg(VE::VM0)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ } else if (VE::VM512RegClass.contains(DestReg, SrcReg)) {
+ // Use two instructions.
+ const unsigned SubRegIdx[] = {VE::sub_vm_even, VE::sub_vm_odd};
+ unsigned int NumSubRegs = 2;
+ copyPhysSubRegs(MBB, I, DL, DestReg, SrcReg, KillSrc, get(VE::ANDMmm),
+ NumSubRegs, SubRegIdx, &getRegisterInfo());
+ } else if (VE::F128RegClass.contains(DestReg, SrcReg)) {
+ // Use two instructions.
+ const unsigned SubRegIdx[] = {VE::sub_even, VE::sub_odd};
+ unsigned int NumSubRegs = 2;
+ copyPhysSubRegs(MBB, I, DL, DestReg, SrcReg, KillSrc, get(VE::ORri),
+ NumSubRegs, SubRegIdx, &getRegisterInfo());
} else {
const TargetRegisterInfo *TRI = &getRegisterInfo();
dbgs() << "Impossible reg-to-reg copy from " << printReg(SrcReg, TRI)
@@ -342,7 +417,8 @@ unsigned VEInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
if (MI.getOpcode() == VE::LDrii || // I64
MI.getOpcode() == VE::LDLSXrii || // I32
- MI.getOpcode() == VE::LDUrii // F32
+ MI.getOpcode() == VE::LDUrii || // F32
+ MI.getOpcode() == VE::LDQrii // F128 (pseudo)
) {
if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
MI.getOperand(2).getImm() == 0 && MI.getOperand(3).isImm() &&
@@ -363,7 +439,8 @@ unsigned VEInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
if (MI.getOpcode() == VE::STrii || // I64
MI.getOpcode() == VE::STLrii || // I32
- MI.getOpcode() == VE::STUrii // F32
+ MI.getOpcode() == VE::STUrii || // F32
+ MI.getOpcode() == VE::STQrii // F128 (pseudo)
) {
if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() &&
MI.getOperand(1).getImm() == 0 && MI.getOperand(2).isImm() &&
@@ -412,6 +489,13 @@ void VEInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
.addImm(0)
.addReg(SrcReg, getKillRegState(isKill))
.addMemOperand(MMO);
+ } else if (VE::F128RegClass.hasSubClassEq(RC)) {
+ BuildMI(MBB, I, DL, get(VE::STQrii))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO);
} else
report_fatal_error("Can't store this register to stack slot");
}
@@ -449,10 +533,194 @@ void VEInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
.addImm(0)
.addImm(0)
.addMemOperand(MMO);
+ } else if (VE::F128RegClass.hasSubClassEq(RC)) {
+ BuildMI(MBB, I, DL, get(VE::LDQrii), DestReg)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addImm(0)
+ .addMemOperand(MMO);
} else
report_fatal_error("Can't load this register from stack slot");
}
+bool VEInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
+ Register Reg, MachineRegisterInfo *MRI) const {
+ LLVM_DEBUG(dbgs() << "FoldImmediate\n");
+
+ LLVM_DEBUG(dbgs() << "checking DefMI\n");
+ int64_t ImmVal;
+ switch (DefMI.getOpcode()) {
+ default:
+ return false;
+ case VE::ORim:
+ // General move small immediate instruction on VE.
+ LLVM_DEBUG(dbgs() << "checking ORim\n");
+ LLVM_DEBUG(DefMI.dump());
+ // FIXME: We may need to support FPImm too.
+ assert(DefMI.getOperand(1).isImm());
+ assert(DefMI.getOperand(2).isImm());
+ ImmVal =
+ DefMI.getOperand(1).getImm() + mimm2Val(DefMI.getOperand(2).getImm());
+ LLVM_DEBUG(dbgs() << "ImmVal is " << ImmVal << "\n");
+ break;
+ case VE::LEAzii:
+ // General move immediate instruction on VE.
+ LLVM_DEBUG(dbgs() << "checking LEAzii\n");
+ LLVM_DEBUG(DefMI.dump());
+ // FIXME: We may need to support FPImm too.
+ assert(DefMI.getOperand(2).isImm());
+ if (!DefMI.getOperand(3).isImm())
+ // LEAzii may refer label
+ return false;
+ ImmVal = DefMI.getOperand(2).getImm() + DefMI.getOperand(3).getImm();
+ LLVM_DEBUG(dbgs() << "ImmVal is " << ImmVal << "\n");
+ break;
+ }
+
+ // Try to fold like below:
+ // %1:i64 = ORim 0, 0(1)
+ // %2:i64 = CMPSLrr %0, %1
+ // To
+ // %2:i64 = CMPSLrm %0, 0(1)
+ //
+ // Another example:
+ // %1:i64 = ORim 6, 0(1)
+ // %2:i64 = CMPSLrr %1, %0
+ // To
+ // %2:i64 = CMPSLir 6, %0
+ //
+ // Support commutable instructions like below:
+ // %1:i64 = ORim 6, 0(1)
+ // %2:i64 = ADDSLrr %1, %0
+ // To
+ // %2:i64 = ADDSLri %0, 6
+ //
+ // FIXME: Need to support i32. Current implementtation requires
+ // EXTRACT_SUBREG, so input has following COPY and it avoids folding:
+ // %1:i64 = ORim 6, 0(1)
+ // %2:i32 = COPY %1.sub_i32
+ // %3:i32 = ADDSWSXrr %0, %2
+ // FIXME: Need to support shift, cmov, and more instructions.
+ // FIXME: Need to support lvl too, but LVLGen runs after peephole-opt.
+
+ LLVM_DEBUG(dbgs() << "checking UseMI\n");
+ LLVM_DEBUG(UseMI.dump());
+ unsigned NewUseOpcSImm7;
+ unsigned NewUseOpcMImm;
+ enum InstType {
+ rr2ri_rm, // rr -> ri or rm, commutable
+ rr2ir_rm, // rr -> ir or rm
+ } InstType;
+
+ using namespace llvm::VE;
+#define INSTRKIND(NAME) \
+ case NAME##rr: \
+ NewUseOpcSImm7 = NAME##ri; \
+ NewUseOpcMImm = NAME##rm; \
+ InstType = rr2ri_rm; \
+ break
+#define NCINSTRKIND(NAME) \
+ case NAME##rr: \
+ NewUseOpcSImm7 = NAME##ir; \
+ NewUseOpcMImm = NAME##rm; \
+ InstType = rr2ir_rm; \
+ break
+
+ switch (UseMI.getOpcode()) {
+ default:
+ return false;
+
+ INSTRKIND(ADDUL);
+ INSTRKIND(ADDSWSX);
+ INSTRKIND(ADDSWZX);
+ INSTRKIND(ADDSL);
+ NCINSTRKIND(SUBUL);
+ NCINSTRKIND(SUBSWSX);
+ NCINSTRKIND(SUBSWZX);
+ NCINSTRKIND(SUBSL);
+ INSTRKIND(MULUL);
+ INSTRKIND(MULSWSX);
+ INSTRKIND(MULSWZX);
+ INSTRKIND(MULSL);
+ NCINSTRKIND(DIVUL);
+ NCINSTRKIND(DIVSWSX);
+ NCINSTRKIND(DIVSWZX);
+ NCINSTRKIND(DIVSL);
+ NCINSTRKIND(CMPUL);
+ NCINSTRKIND(CMPSWSX);
+ NCINSTRKIND(CMPSWZX);
+ NCINSTRKIND(CMPSL);
+ INSTRKIND(MAXSWSX);
+ INSTRKIND(MAXSWZX);
+ INSTRKIND(MAXSL);
+ INSTRKIND(MINSWSX);
+ INSTRKIND(MINSWZX);
+ INSTRKIND(MINSL);
+ INSTRKIND(AND);
+ INSTRKIND(OR);
+ INSTRKIND(XOR);
+ INSTRKIND(EQV);
+ NCINSTRKIND(NND);
+ NCINSTRKIND(MRG);
+ }
+
+#undef INSTRKIND
+
+ unsigned NewUseOpc;
+ unsigned UseIdx;
+ bool Commute = false;
+ LLVM_DEBUG(dbgs() << "checking UseMI operands\n");
+ switch (InstType) {
+ case rr2ri_rm:
+ UseIdx = 2;
+ if (UseMI.getOperand(1).getReg() == Reg) {
+ Commute = true;
+ } else {
+ assert(UseMI.getOperand(2).getReg() == Reg);
+ }
+ if (isInt<7>(ImmVal)) {
+ // This ImmVal matches to SImm7 slot, so change UseOpc to an instruction
+ // holds a simm7 slot.
+ NewUseOpc = NewUseOpcSImm7;
+ } else if (isMImmVal(ImmVal)) {
+ // Similarly, change UseOpc to an instruction holds a mimm slot.
+ NewUseOpc = NewUseOpcMImm;
+ ImmVal = val2MImm(ImmVal);
+ } else
+ return false;
+ break;
+ case rr2ir_rm:
+ if (UseMI.getOperand(1).getReg() == Reg) {
+ // Check immediate value whether it matchs to the UseMI instruction.
+ if (!isInt<7>(ImmVal))
+ return false;
+ NewUseOpc = NewUseOpcSImm7;
+ UseIdx = 1;
+ } else {
+ assert(UseMI.getOperand(2).getReg() == Reg);
+ // Check immediate value whether it matchs to the UseMI instruction.
+ if (!isMImmVal(ImmVal))
+ return false;
+ NewUseOpc = NewUseOpcMImm;
+ ImmVal = val2MImm(ImmVal);
+ UseIdx = 2;
+ }
+ break;
+ }
+
+ LLVM_DEBUG(dbgs() << "modifying UseMI\n");
+ bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
+ UseMI.setDesc(get(NewUseOpc));
+ if (Commute) {
+ UseMI.getOperand(1).setReg(UseMI.getOperand(UseIdx).getReg());
+ }
+ UseMI.getOperand(UseIdx).ChangeToImmediate(ImmVal);
+ if (DeleteDef)
+ DefMI.eraseFromParent();
+
+ return true;
+}
+
Register VEInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
VEMachineFunctionInfo *VEFI = MF->getInfo<VEMachineFunctionInfo>();
Register GlobalBaseReg = VEFI->getGlobalBaseReg();
@@ -472,6 +740,106 @@ Register VEInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
return GlobalBaseReg;
}
+static Register getVM512Upper(Register reg) {
+ return (reg - VE::VMP0) * 2 + VE::VM0;
+}
+
+static Register getVM512Lower(Register reg) { return getVM512Upper(reg) + 1; }
+
+// Expand pseudo logical vector instructions for VM512 registers.
+static void expandPseudoLogM(MachineInstr &MI, const MCInstrDesc &MCID) {
+ MachineBasicBlock *MBB = MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ Register VMXu = getVM512Upper(MI.getOperand(0).getReg());
+ Register VMXl = getVM512Lower(MI.getOperand(0).getReg());
+ Register VMYu = getVM512Upper(MI.getOperand(1).getReg());
+ Register VMYl = getVM512Lower(MI.getOperand(1).getReg());
+
+ switch (MI.getOpcode()) {
+ default: {
+ Register VMZu = getVM512Upper(MI.getOperand(2).getReg());
+ Register VMZl = getVM512Lower(MI.getOperand(2).getReg());
+ BuildMI(*MBB, MI, DL, MCID).addDef(VMXu).addUse(VMYu).addUse(VMZu);
+ BuildMI(*MBB, MI, DL, MCID).addDef(VMXl).addUse(VMYl).addUse(VMZl);
+ break;
+ }
+ case VE::NEGMy:
+ BuildMI(*MBB, MI, DL, MCID).addDef(VMXu).addUse(VMYu);
+ BuildMI(*MBB, MI, DL, MCID).addDef(VMXl).addUse(VMYl);
+ break;
+ }
+ MI.eraseFromParent();
+}
+
+static void addOperandsForVFMK(MachineInstrBuilder &MIB, MachineInstr &MI,
+ bool Upper) {
+ // VM512
+ MIB.addReg(Upper ? getVM512Upper(MI.getOperand(0).getReg())
+ : getVM512Lower(MI.getOperand(0).getReg()));
+
+ switch (MI.getNumExplicitOperands()) {
+ default:
+ report_fatal_error("unexpected number of operands for pvfmk");
+ case 2: // _Ml: VM512, VL
+ // VL
+ MIB.addReg(MI.getOperand(1).getReg());
+ break;
+ case 4: // _Mvl: VM512, CC, VR, VL
+ // CC
+ MIB.addImm(MI.getOperand(1).getImm());
+ // VR
+ MIB.addReg(MI.getOperand(2).getReg());
+ // VL
+ MIB.addReg(MI.getOperand(3).getReg());
+ break;
+ case 5: // _MvMl: VM512, CC, VR, VM512, VL
+ // CC
+ MIB.addImm(MI.getOperand(1).getImm());
+ // VR
+ MIB.addReg(MI.getOperand(2).getReg());
+ // VM512
+ MIB.addReg(Upper ? getVM512Upper(MI.getOperand(3).getReg())
+ : getVM512Lower(MI.getOperand(3).getReg()));
+ // VL
+ MIB.addReg(MI.getOperand(4).getReg());
+ break;
+ }
+}
+
+static void expandPseudoVFMK(const TargetInstrInfo &TI, MachineInstr &MI) {
+ // replace to pvfmk.w.up and pvfmk.w.lo
+ // replace to pvfmk.s.up and pvfmk.s.lo
+
+ static std::map<unsigned, std::pair<unsigned, unsigned>> VFMKMap = {
+ {VE::VFMKyal, {VE::VFMKLal, VE::VFMKLal}},
+ {VE::VFMKynal, {VE::VFMKLnal, VE::VFMKLnal}},
+ {VE::VFMKWyvl, {VE::PVFMKWUPvl, VE::PVFMKWLOvl}},
+ {VE::VFMKWyvyl, {VE::PVFMKWUPvml, VE::PVFMKWLOvml}},
+ {VE::VFMKSyvl, {VE::PVFMKSUPvl, VE::PVFMKSLOvl}},
+ {VE::VFMKSyvyl, {VE::PVFMKSUPvml, VE::PVFMKSLOvml}},
+ };
+
+ unsigned Opcode = MI.getOpcode();
+
+ auto Found = VFMKMap.find(Opcode);
+ if (Found == VFMKMap.end())
+ report_fatal_error("unexpected opcode for pseudo vfmk");
+
+ unsigned OpcodeUpper = (*Found).second.first;
+ unsigned OpcodeLower = (*Found).second.second;
+
+ MachineBasicBlock *MBB = MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ MachineInstrBuilder Bu = BuildMI(*MBB, MI, DL, TI.get(OpcodeUpper));
+ addOperandsForVFMK(Bu, MI, /* Upper */ true);
+ MachineInstrBuilder Bl = BuildMI(*MBB, MI, DL, TI.get(OpcodeLower));
+ addOperandsForVFMK(Bl, MI, /* Upper */ false);
+
+ MI.eraseFromParent();
+}
+
bool VEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
switch (MI.getOpcode()) {
case VE::EXTEND_STACK: {
@@ -484,6 +852,110 @@ bool VEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case VE::GETSTACKTOP: {
return expandGetStackTopPseudo(MI);
}
+
+ case VE::ANDMyy:
+ expandPseudoLogM(MI, get(VE::ANDMmm));
+ return true;
+ case VE::ORMyy:
+ expandPseudoLogM(MI, get(VE::ORMmm));
+ return true;
+ case VE::XORMyy:
+ expandPseudoLogM(MI, get(VE::XORMmm));
+ return true;
+ case VE::EQVMyy:
+ expandPseudoLogM(MI, get(VE::EQVMmm));
+ return true;
+ case VE::NNDMyy:
+ expandPseudoLogM(MI, get(VE::NNDMmm));
+ return true;
+ case VE::NEGMy:
+ expandPseudoLogM(MI, get(VE::NEGMm));
+ return true;
+
+ case VE::LVMyir:
+ case VE::LVMyim:
+ case VE::LVMyir_y:
+ case VE::LVMyim_y: {
+ Register VMXu = getVM512Upper(MI.getOperand(0).getReg());
+ Register VMXl = getVM512Lower(MI.getOperand(0).getReg());
+ int64_t Imm = MI.getOperand(1).getImm();
+ bool IsSrcReg =
+ MI.getOpcode() == VE::LVMyir || MI.getOpcode() == VE::LVMyir_y;
+ Register Src = IsSrcReg ? MI.getOperand(2).getReg() : VE::NoRegister;
+ int64_t MImm = IsSrcReg ? 0 : MI.getOperand(2).getImm();
+ bool KillSrc = IsSrcReg ? MI.getOperand(2).isKill() : false;
+ Register VMX = VMXl;
+ if (Imm >= 4) {
+ VMX = VMXu;
+ Imm -= 4;
+ }
+ MachineBasicBlock *MBB = MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+ switch (MI.getOpcode()) {
+ case VE::LVMyir:
+ BuildMI(*MBB, MI, DL, get(VE::LVMir))
+ .addDef(VMX)
+ .addImm(Imm)
+ .addReg(Src, getKillRegState(KillSrc));
+ break;
+ case VE::LVMyim:
+ BuildMI(*MBB, MI, DL, get(VE::LVMim))
+ .addDef(VMX)
+ .addImm(Imm)
+ .addImm(MImm);
+ break;
+ case VE::LVMyir_y:
+ assert(MI.getOperand(0).getReg() == MI.getOperand(3).getReg() &&
+ "LVMyir_y has different register in 3rd operand");
+ BuildMI(*MBB, MI, DL, get(VE::LVMir_m))
+ .addDef(VMX)
+ .addImm(Imm)
+ .addReg(Src, getKillRegState(KillSrc))
+ .addReg(VMX);
+ break;
+ case VE::LVMyim_y:
+ assert(MI.getOperand(0).getReg() == MI.getOperand(3).getReg() &&
+ "LVMyim_y has different register in 3rd operand");
+ BuildMI(*MBB, MI, DL, get(VE::LVMim_m))
+ .addDef(VMX)
+ .addImm(Imm)
+ .addImm(MImm)
+ .addReg(VMX);
+ break;
+ }
+ MI.eraseFromParent();
+ return true;
+ }
+ case VE::SVMyi: {
+ Register Dest = MI.getOperand(0).getReg();
+ Register VMZu = getVM512Upper(MI.getOperand(1).getReg());
+ Register VMZl = getVM512Lower(MI.getOperand(1).getReg());
+ bool KillSrc = MI.getOperand(1).isKill();
+ int64_t Imm = MI.getOperand(2).getImm();
+ Register VMZ = VMZl;
+ if (Imm >= 4) {
+ VMZ = VMZu;
+ Imm -= 4;
+ }
+ MachineBasicBlock *MBB = MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+ MachineInstrBuilder MIB =
+ BuildMI(*MBB, MI, DL, get(VE::SVMmi), Dest).addReg(VMZ).addImm(Imm);
+ MachineInstr *Inst = MIB.getInstr();
+ MI.eraseFromParent();
+ if (KillSrc) {
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ Inst->addRegisterKilled(MI.getOperand(1).getReg(), TRI, true);
+ }
+ return true;
+ }
+ case VE::VFMKyal:
+ case VE::VFMKynal:
+ case VE::VFMKWyvl:
+ case VE::VFMKWyvyl:
+ case VE::VFMKSyvl:
+ case VE::VFMKSyvyl:
+ expandPseudoVFMK(*this, MI);
}
return false;
}
@@ -586,8 +1058,8 @@ bool VEInstrInfo::expandGetStackTopPseudo(MachineInstr &MI) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const VEFrameLowering &TFL = *STI.getFrameLowering();
- // The VE ABI requires a reserved 176 bytes area at the top
- // of stack as described in VESubtarget.cpp. So, we adjust it here.
+ // The VE ABI requires a reserved area at the top of stack as described
+ // in VEFrameLowering.cpp. So, we adjust it here.
unsigned NumBytes = STI.getAdjustedFrameSize(0);
// Also adds the size of parameter area.
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.h
index 7b6662df1d60..ed1f49182150 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.h
@@ -23,6 +23,31 @@ namespace llvm {
class VESubtarget;
+/// VEII - This namespace holds all of the Aurora VE target-specific
+/// per-instruction flags. These must match the corresponding definitions in
+/// VEInstrFormats.td.
+namespace VEII {
+enum {
+ // Aurora VE Instruction Flags. These flags describe the characteristics of
+ // the Aurora VE instructions for vector handling.
+
+ /// VE_Vector - This instruction is Vector Instruction.
+ VE_Vector = 0x1,
+
+ /// VE_VLInUse - This instruction has a vector register in its operands.
+ VE_VLInUse = 0x2,
+
+ /// VE_VLMask/Shift - This is a bitmask that selects the index number where
+ /// an instruction holds vector length informatio (0 to 6, 7 means undef).n
+ VE_VLShift = 2,
+ VE_VLMask = 0x07 << VE_VLShift,
+};
+
+#define HAS_VLINDEX(TSF) ((TSF)&VEII::VE_VLInUse)
+#define GET_VLINDEX(TSF) \
+ (HAS_VLINDEX(TSF) ? (int)(((TSF)&VEII::VE_VLMask) >> VEII::VE_VLShift) : -1)
+} // end namespace VEII
+
class VEInstrInfo : public VEGenInstrInfo {
const VERegisterInfo RI;
virtual void anchor();
@@ -75,6 +100,13 @@ public:
const TargetRegisterInfo *TRI) const override;
/// } Stack Spill & Reload
+ /// Optimization {
+
+ bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
+ MachineRegisterInfo *MRI) const override;
+
+ /// } Optimization
+
Register getGlobalBaseReg(MachineFunction *MF) const;
// Lower pseudo instructions after register allocation.
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.td
index 8500f8ef1292..b6862cf7b30d 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.td
@@ -48,7 +48,7 @@ def LO7 : SDNodeXForm<imm, [{
SDLoc(N), MVT::i32);
}]>;
def MIMM : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(convMImmVal(getImmVal(N)),
+ return CurDAG->getTargetConstant(val2MImm(getImmVal(N)),
SDLoc(N), MVT::i32);
}]>;
def LO32 : SDNodeXForm<imm, [{
@@ -66,7 +66,7 @@ def LO7FP : SDNodeXForm<fpimm, [{
return CurDAG->getTargetConstant(SignExtend32(Val, 7), SDLoc(N), MVT::i32);
}]>;
def MIMMFP : SDNodeXForm<fpimm, [{
- return CurDAG->getTargetConstant(convMImmVal(getFpImmVal(N)),
+ return CurDAG->getTargetConstant(val2MImm(getFpImmVal(N)),
SDLoc(N), MVT::i32);
}]>;
def LOFP32 : SDNodeXForm<fpimm, [{
@@ -157,6 +157,15 @@ def uimm3 : Operand<i32>, PatLeaf<(imm), [{
let ParserMatchClass = UImm3AsmOperand;
}
+// uimm4 - Generic immediate value.
+def UImm4AsmOperand : AsmOperandClass {
+ let Name = "UImm4";
+}
+def uimm4 : Operand<i32>, PatLeaf<(imm), [{
+ return isUInt<4>(N->getZExtValue()); }], ULO7> {
+ let ParserMatchClass = UImm4AsmOperand;
+}
+
// uimm6 - Generic immediate value.
def UImm6AsmOperand : AsmOperandClass {
let Name = "UImm6";
@@ -196,6 +205,12 @@ def mimm : Operand<i32>, PatLeaf<(imm), [{
let PrintMethod = "printMImmOperand";
}
+// zerofp - Generic fp immediate zero value.
+def zerofp : Operand<i32>, PatLeaf<(fpimm), [{
+ return getFpImmVal(N) == 0; }]> {
+ let ParserMatchClass = ZeroAsmOperand;
+}
+
// simm7fp - Generic fp immediate value.
def simm7fp : Operand<i32>, PatLeaf<(fpimm), [{
return isInt<7>(getFpImmVal(N));
@@ -230,6 +245,7 @@ def fplomsbzero : PatLeaf<(fpimm), [{ return (getFpImmVal(N) & 0x80000000)
== 0; }]>;
def fplozero : PatLeaf<(fpimm), [{ return (getFpImmVal(N) & 0xffffffff)
== 0; }]>;
+def nonzero : PatLeaf<(imm), [{ return N->getSExtValue() !=0 ; }]>;
def CCSIOp : PatLeaf<(cond), [{
switch (N->get()) {
@@ -430,6 +446,17 @@ def retflag : SDNode<"VEISD::RET_FLAG", SDTNone,
def getGOT : Operand<iPTR>;
+def VEeh_sjlj_setjmp: SDNode<"VEISD::EH_SJLJ_SETJMP",
+ SDTypeProfile<1, 1, [SDTCisInt<0>,
+ SDTCisPtrTy<1>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
+def VEeh_sjlj_longjmp: SDNode<"VEISD::EH_SJLJ_LONGJMP",
+ SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
+def VEeh_sjlj_setup_dispatch: SDNode<"VEISD::EH_SJLJ_SETUP_DISPATCH",
+ SDTypeProfile<0, 0, []>,
+ [SDNPHasChain, SDNPSideEffect]>;
+
// GETFUNPLT for PIC
def GetFunPLT : SDNode<"VEISD::GETFUNPLT", SDTIntUnaryOp>;
@@ -442,6 +469,16 @@ def GetTLSAddr : SDNode<"VEISD::GETTLSADDR", SDT_SPCall,
def GetStackTop : SDNode<"VEISD::GETSTACKTOP", SDTNone,
[SDNPHasChain, SDNPSideEffect]>;
+// MEMBARRIER
+def MemBarrier : SDNode<"VEISD::MEMBARRIER", SDTNone,
+ [SDNPHasChain, SDNPSideEffect]>;
+
+// TS1AM
+def SDT_TS1AM : SDTypeProfile<1, 3, [SDTCisSameAs<0, 3>, SDTCisPtrTy<1>,
+ SDTCisVT<2, i32>, SDTCisInt<3>]>;
+def ts1am : SDNode<"VEISD::TS1AM", SDT_TS1AM,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
+ SDNPMemOperand]>;
//===----------------------------------------------------------------------===//
// VE Flag Conditions
@@ -497,7 +534,8 @@ multiclass RRbm<string opcStr, bits<8>opc,
RegisterClass RCo, ValueType Tyo,
RegisterClass RCi, ValueType Tyi,
SDPatternOperator OpNode = null_frag,
- Operand immOp = simm7, Operand mOp = mimm> {
+ Operand immOp = simm7, Operand mOp = mimm,
+ bit MoveImm = 0> {
def rr : RR<opc, (outs RCo:$sx), (ins RCi:$sy, RCi:$sz),
!strconcat(opcStr, " $sx, $sy, $sz"),
[(set Tyo:$sx, (OpNode Tyi:$sy, Tyi:$sz))]>;
@@ -514,7 +552,12 @@ multiclass RRbm<string opcStr, bits<8>opc,
let cy = 0, cz = 0 in
def im : RR<opc, (outs RCo:$sx), (ins immOp:$sy, mOp:$sz),
!strconcat(opcStr, " $sx, $sy, $sz"),
- [(set Tyo:$sx, (OpNode (Tyi immOp:$sy), (Tyi mOp:$sz)))]>;
+ [(set Tyo:$sx, (OpNode (Tyi immOp:$sy), (Tyi mOp:$sz)))]> {
+ // VE uses ORim as a move immediate instruction, so declare it here.
+ // An instruction declared as MoveImm will be optimized in FoldImmediate
+ // later.
+ let isMoveImm = MoveImm;
+ }
}
// Multiclass for non-commutative RR type instructions
@@ -546,8 +589,8 @@ multiclass RRNCbm<string opcStr, bits<8>opc,
multiclass RRm<string opcStr, bits<8>opc,
RegisterClass RC, ValueType Ty,
SDPatternOperator OpNode = null_frag,
- Operand immOp = simm7, Operand mOp = mimm> :
- RRbm<opcStr, opc, RC, Ty, RC, Ty, OpNode, immOp, mOp>;
+ Operand immOp = simm7, Operand mOp = mimm, bit MoveImm = 0> :
+ RRbm<opcStr, opc, RC, Ty, RC, Ty, OpNode, immOp, mOp, MoveImm>;
// Generic RR multiclass for non-commutative instructions with 2 arguments.
// e.g. SUBUL, SUBUW, SUBSWSX, and etc.
@@ -775,10 +818,10 @@ multiclass BCbpfm<string opcStr, string cmpStr, bits<8> opc, dag cond,
let bpf = 0 /* NONE */ in
def "" : CF<opc, (outs), !con(cond, (ins ADDR:$addr)),
!strconcat(opcStr, " ", cmpStr, "$addr")>;
- let bpf = 2 /* NOT TaKEN */ in
+ let bpf = 2 /* NOT TAKEN */ in
def _nt : CF<opc, (outs), !con(cond, (ins ADDR:$addr)),
!strconcat(opcStr, ".nt ", cmpStr, "$addr")>;
- let bpf = 3 /* TaKEN */ in
+ let bpf = 3 /* TAKEN */ in
def _t : CF<opc, (outs), !con(cond, (ins ADDR:$addr)),
!strconcat(opcStr, ".t ", cmpStr, "$addr")>;
}
@@ -807,18 +850,25 @@ multiclass BCRbpfm<string opcStr, string cmpStr, bits<8> opc, dag cond> {
let bpf = 0 /* NONE */ in
def "" : CF<opc, (outs), !con(cond, (ins brtarget32:$imm32)),
!strconcat(opcStr, " ", cmpStr, "$imm32")>;
- let bpf = 2 /* NOT TaKEN */ in
+ let bpf = 2 /* NOT TAKEN */ in
def _nt : CF<opc, (outs), !con(cond, (ins brtarget32:$imm32)),
!strconcat(opcStr, ".nt ", cmpStr, "$imm32")>;
- let bpf = 3 /* TaKEN */ in
+ let bpf = 3 /* TAKEN */ in
def _t : CF<opc, (outs), !con(cond, (ins brtarget32:$imm32)),
!strconcat(opcStr, ".t ", cmpStr, "$imm32")>;
}
multiclass BCRm<string opcStr, string opcStrAt, string opcStrAf, bits<8> opc,
- RegisterClass RC, Operand immOp> {
+ RegisterClass RC, Operand immOp, Operand zeroOp> {
defm rr : BCRbpfm<opcStr, "$sy, $sz, ", opc, (ins CCOp:$cf, RC:$sy, RC:$sz)>;
let cy = 0 in
- defm ir : BCRbpfm<opcStr, "$sy, $sz, ", opc, (ins CCOp:$cf, immOp:$sy, RC:$sz)>;
+ defm ir : BCRbpfm<opcStr, "$sy, $sz, ", opc, (ins CCOp:$cf, immOp:$sy,
+ RC:$sz)>;
+ let cz = 0 in
+ defm rz : BCRbpfm<opcStr, "$sy, $sz, ", opc, (ins CCOp:$cf, RC:$sy,
+ zeroOp:$sz)>;
+ let cy = 0, cz = 0 in
+ defm iz : BCRbpfm<opcStr, "$sy, $sz, ", opc, (ins CCOp:$cf, immOp:$sy,
+ zeroOp:$sz)>;
let cy = 0, sy = 0, cz = 0, sz = 0, cf = 15 /* AT */, isBarrier = 1 in
defm a : BCRbpfm<opcStrAt, "", opc, (ins)>;
let cy = 0, sy = 0, cz = 0, sz = 0, cf = 0 /* AF */ in
@@ -898,7 +948,7 @@ multiclass SHMm<string opcStr, bits<8> opc, RegisterClass RC> {
//-----------------------------------------------------------------------------
// Multiclass for generic RM instructions
-multiclass RMm<string opcStr, bits<8>opc, RegisterClass RC> {
+multiclass RMm<string opcStr, bits<8>opc, RegisterClass RC, bit MoveImm = 0> {
def rri : RM<opc, (outs RC:$dest), (ins MEMrri:$addr),
!strconcat(opcStr, " $dest, $addr"), []>;
let cy = 0 in
@@ -909,36 +959,27 @@ multiclass RMm<string opcStr, bits<8>opc, RegisterClass RC> {
!strconcat(opcStr, " $dest, $addr"), []>;
let cy = 0, cz = 0 in
def zii : RM<opc, (outs RC:$dest), (ins MEMzii:$addr),
- !strconcat(opcStr, " $dest, $addr"), []>;
+ !strconcat(opcStr, " $dest, $addr"), []> {
+ // VE uses LEAzii and LEASLzii as a move immediate instruction, so declare
+ // it here. An instruction declared as MoveImm will be optimized in
+ // FoldImmediate later.
+ let isMoveImm = MoveImm;
+ }
}
// Section 8.2.1 - LEA
-let cx = 0, DecoderMethod = "DecodeLoadI64" in
-defm LEA : RMm<"lea", 0x06, I64>;
-let cx = 1, DecoderMethod = "DecodeLoadI64" in
-defm LEASL : RMm<"lea.sl", 0x06, I64>;
-let cx = 0, DecoderMethod = "DecodeLoadI32", isCodeGenOnly = 1 in
-defm LEA32 : RMm<"lea", 0x06, I32>;
+let isReMaterializable = 1, isAsCheapAsAMove = 1,
+ DecoderMethod = "DecodeLoadI64" in {
+ let cx = 0 in defm LEA : RMm<"lea", 0x06, I64, /* MoveImm */ 1>;
+ let cx = 1 in defm LEASL : RMm<"lea.sl", 0x06, I64, /* MoveImm */ 1>;
+}
+// LEA basic patterns.
+// Need to be defined here to prioritize LEA over ADX.
def : Pat<(iPTR ADDRrri:$addr), (LEArri MEMrri:$addr)>;
def : Pat<(iPTR ADDRrii:$addr), (LEArii MEMrii:$addr)>;
def : Pat<(add I64:$base, simm32:$disp), (LEArii $base, 0, (LO32 $disp))>;
def : Pat<(add I64:$base, lozero:$disp), (LEASLrii $base, 0, (HI32 $disp))>;
-def : Pat<(add I32:$base, simm32:$disp),
- (LEA32rii (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $base, sub_i32), 0,
- (LO32 $disp))>;
-
-def lea_add : PatFrags<(ops node:$base, node:$idx, node:$disp),
- [(add (add node:$base, node:$idx), node:$disp),
- (add (add node:$base, node:$disp), node:$idx)]>;
-def : Pat<(lea_add I64:$base, simm7:$idx, simm32:$disp),
- (LEArii $base, (LO7 $idx), (LO32 $disp))>;
-def : Pat<(lea_add I64:$base, I64:$idx, simm32:$disp),
- (LEArri $base, $idx, (LO32 $disp))>;
-def : Pat<(lea_add I64:$base, simm7:$idx, lozero:$disp),
- (LEASLrii $base, (LO7 $idx), (HI32 $disp))>;
-def : Pat<(lea_add I64:$base, I64:$idx, lozero:$disp),
- (LEASLrri $base, $idx, (HI32 $disp))>;
// Multiclass for load instructions.
let mayLoad = 1, hasSideEffects = 0 in
@@ -991,6 +1032,13 @@ defm LD1BSX : LOADm<"ld1b.sx", 0x05, I32, i32, sextloadi8>;
let cx = 1, DecoderMethod = "DecodeLoadI32" in
defm LD1BZX : LOADm<"ld1b.zx", 0x05, I32, i32, zextloadi8>;
+// LDQ pseudo instructions
+let mayLoad = 1, hasSideEffects = 0 in {
+ def LDQrii : Pseudo<(outs F128:$dest), (ins MEMrii:$addr),
+ "# pseudo ldq $dest, $addr",
+ [(set f128:$dest, (load ADDRrii:$addr))]>;
+}
+
// Multiclass for store instructions.
let mayStore = 1 in
multiclass STOREm<string opcStr, bits<8> opc, RegisterClass RC, ValueType Ty,
@@ -1036,6 +1084,13 @@ defm ST2B : STOREm<"st2b", 0x14, I32, i32, truncstorei16>;
let DecoderMethod = "DecodeStoreI32" in
defm ST1B : STOREm<"st1b", 0x15, I32, i32, truncstorei8>;
+// STQ pseudo instructions
+let mayStore = 1, hasSideEffects = 0 in {
+ def STQrii : Pseudo<(outs), (ins MEMrii:$addr, F128:$sx),
+ "# pseudo stq $sx, $addr",
+ [(store f128:$sx, ADDRrii:$addr)]>;
+}
+
// Section 8.2.12 - DLDS
let DecoderMethod = "DecodeLoadI64" in
defm DLD : LOADm<"dld", 0x09, I64, i64, load>;
@@ -1074,9 +1129,9 @@ defm ATMAM : RRCASm<"atmam", 0x53, I64, i64, uimm0to2>;
// Section 8.2.20 - CAS (Compare and Swap)
let DecoderMethod = "DecodeCASI64" in
-defm CASL : RRCASm<"cas.l", 0x62, I64, i64, simm7>;
+defm CASL : RRCASm<"cas.l", 0x62, I64, i64, simm7, atomic_cmp_swap_64>;
let DecoderMethod = "DecodeCASI32", cx = 1 in
-defm CASW : RRCASm<"cas.w", 0x62, I32, i32, simm7>;
+defm CASW : RRCASm<"cas.w", 0x62, I32, i32, simm7, atomic_cmp_swap_32>;
//-----------------------------------------------------------------------------
// Section 8.3 - Transfer Control Instructions
@@ -1106,6 +1161,8 @@ def SVOB : RR<0x30, (outs), (ins), "svob">;
// Section 8.4 - Fixed-point Operation Instructions
//-----------------------------------------------------------------------------
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+
// Section 8.4.1 - ADD (Add)
defm ADDUL : RRm<"addu.l", 0x48, I64, i64>;
let cx = 1 in defm ADDUW : RRm<"addu.w", 0x48, I32, i32>;
@@ -1128,6 +1185,8 @@ let cx = 1 in defm SUBSWZX : RRNCm<"subs.w.zx", 0x5A, I32, i32>;
// Section 8.4.6 - SBX (Subtract)
defm SUBSL : RRNCm<"subs.l", 0x5B, I64, i64, sub>;
+} // isReMaterializable, isAsCheapAsAMove
+
// Section 8.4.7 - MPY (Multiply)
defm MULUL : RRm<"mulu.l", 0x49, I64, i64>;
let cx = 1 in defm MULUW : RRm<"mulu.w", 0x49, I32, i32>;
@@ -1153,6 +1212,8 @@ let cx = 1 in defm DIVSWZX : RRNCm<"divs.w.zx", 0x7B, I32, i32>;
// Section 8.4.13 - DVX (Divide)
defm DIVSL : RRNCm<"divs.l", 0x7F, I64, i64, sdiv>;
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+
// Section 8.4.14 - CMP (Compare)
defm CMPUL : RRNCm<"cmpu.l", 0x55, I64, i64>;
let cx = 1 in defm CMPUW : RRNCm<"cmpu.w", 0x55, I32, i32>;
@@ -1175,45 +1236,66 @@ let cx = 1, cw = 1 in defm MINSWZX : RRm<"mins.w.zx", 0x78, I32, i32>;
defm MAXSL : RRm<"maxs.l", 0x68, I64, i64>;
let cw = 1 in defm MINSL : RRm<"mins.l", 0x68, I64, i64>;
+} // isReMaterializable, isAsCheapAsAMove
+
//-----------------------------------------------------------------------------
// Section 8.5 - Logical Operation Instructions
//-----------------------------------------------------------------------------
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+
// Section 8.5.1 - AND (AND)
defm AND : RRm<"and", 0x44, I64, i64, and>;
-let isCodeGenOnly = 1 in defm AND32 : RRm<"and", 0x44, I32, i32, and>;
// Section 8.5.2 - OR (OR)
-defm OR : RRm<"or", 0x45, I64, i64, or>;
-let isCodeGenOnly = 1 in defm OR32 : RRm<"or", 0x45, I32, i32, or>;
+defm OR : RRm<"or", 0x45, I64, i64, or, simm7, mimm, /* MoveImm */ 1>;
// Section 8.5.3 - XOR (Exclusive OR)
defm XOR : RRm<"xor", 0x46, I64, i64, xor>;
-let isCodeGenOnly = 1 in defm XOR32 : RRm<"xor", 0x46, I32, i32, xor>;
// Section 8.5.4 - EQV (Equivalence)
defm EQV : RRm<"eqv", 0x47, I64, i64>;
+} // isReMaterializable, isAsCheapAsAMove
+
// Section 8.5.5 - NND (Negate AND)
def and_not : PatFrags<(ops node:$x, node:$y),
[(and (not node:$x), node:$y)]>;
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
defm NND : RRNCm<"nnd", 0x54, I64, i64, and_not>;
// Section 8.5.6 - MRG (Merge)
defm MRG : RRMRGm<"mrg", 0x56, I64, i64>;
// Section 8.5.7 - LDZ (Leading Zero Count)
-defm LDZ : RRI1m<"ldz", 0x67, I64, i64, ctlz>;
+def ctlz_pat : PatFrags<(ops node:$src),
+ [(ctlz node:$src),
+ (ctlz_zero_undef node:$src)]>;
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+defm LDZ : RRI1m<"ldz", 0x67, I64, i64, ctlz_pat>;
// Section 8.5.8 - PCNT (Population Count)
defm PCNT : RRI1m<"pcnt", 0x38, I64, i64, ctpop>;
// Section 8.5.9 - BRV (Bit Reverse)
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
defm BRV : RRI1m<"brv", 0x39, I64, i64, bitreverse>;
// Section 8.5.10 - BSWP (Byte Swap)
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
defm BSWP : RRSWPm<"bswp", 0x2B, I64, i64>;
+def : Pat<(i64 (bswap i64:$src)),
+ (BSWPri $src, 0)>;
+def : Pat<(i64 (bswap (i64 mimm:$src))),
+ (BSWPmi (MIMM $src), 0)>;
+def : Pat<(i32 (bswap i32:$src)),
+ (EXTRACT_SUBREG
+ (BSWPri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $src, sub_i32), 1),
+ sub_i32)>;
+def : Pat<(i32 (bswap (i32 mimm:$src))),
+ (EXTRACT_SUBREG (BSWPmi (MIMM $src), 1), sub_i32)>;
+
// Section 8.5.11 - CMOV (Conditional Move)
let cw = 0, cw2 = 0 in defm CMOVL : RRCMOVm<"cmov.l.${cfw}", 0x3B, I64, i64>;
let cw = 1, cw2 = 0 in defm CMOVW : RRCMOVm<"cmov.w.${cfw}", 0x3B, I32, i32>;
@@ -1229,17 +1311,21 @@ def : MnemonicAlias<"cmov.s", "cmov.s.at">;
//-----------------------------------------------------------------------------
// Section 8.6.1 - SLL (Shift Left Logical)
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
defm SLL : RRIm<"sll", 0x65, I64, i64, shl>;
// Section 8.6.2 - SLD (Shift Left Double)
defm SLD : RRILDm<"sld", 0x64, I64, i64>;
// Section 8.6.3 - SRL (Shift Right Logical)
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
defm SRL : RRIm<"srl", 0x75, I64, i64, srl>;
// Section 8.6.4 - SRD (Shift Right Double)
defm SRD : RRIRDm<"srd", 0x74, I64, i64>;
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+
// Section 8.6.5 - SLA (Shift Left Arithmetic)
defm SLAWSX : RRIm<"sla.w.sx", 0x66, I32, i32, shl>;
let cx = 1 in defm SLAWZX : RRIm<"sla.w.zx", 0x66, I32, i32>;
@@ -1254,6 +1340,8 @@ let cx = 1 in defm SRAWZX : RRIm<"sra.w.zx", 0x76, I32, i32>;
// Section 8.6.8 - SRAX (Shift Right Arithmetic)
defm SRAL : RRIm<"sra.l", 0x77, I64, i64, sra>;
+} // isReMaterializable, isAsCheapAsAMove
+
def : Pat<(i32 (srl i32:$src, (i32 simm7:$val))),
(EXTRACT_SUBREG (SRLri (ANDrm (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
$src, sub_i32), !add(32, 64)), imm:$val), sub_i32)>;
@@ -1302,13 +1390,13 @@ let cw = 1, cx = 1 in
defm FMINS : RRFm<"fmin.s", 0x3E, F32, f32, fminnum, simm7fp, mimmfp32>;
// Section 8.7.7 - FAQ (Floating Add Quadruple)
-defm FADDQ : RRFm<"fadd.q", 0x6C, F128, f128>;
+defm FADDQ : RRFm<"fadd.q", 0x6C, F128, f128, fadd>;
// Section 8.7.8 - FSQ (Floating Subtract Quadruple)
-defm FSUBQ : RRFm<"fsub.q", 0x7C, F128, f128>;
+defm FSUBQ : RRFm<"fsub.q", 0x7C, F128, f128, fsub>;
// Section 8.7.9 - FMQ (Floating Subtract Quadruple)
-defm FMULQ : RRFm<"fmul.q", 0x6D, F128, f128>;
+defm FMULQ : RRFm<"fmul.q", 0x6D, F128, f128, fmul>;
// Section 8.7.10 - FCQ (Floating Compare Quadruple)
defm FCMPQ : RRNCbm<"fcmp.q", 0x7D, I64, f64, F128, f128, null_frag, simm7fp,
@@ -1339,17 +1427,17 @@ defm CVTDL : CVTm<"cvt.d.l", 0x5F, I64, f64, I64, i64, sint_to_fp>;
// Section 8.7.15 - CVS (Convert to Single-format)
defm CVTSD : CVTm<"cvt.s.d", 0x1F, F32, f32, I64, f64, fpround>;
let cx = 1 in
-defm CVTSQ : CVTm<"cvt.s.q", 0x1F, F32, f32, F128, f128>;
+defm CVTSQ : CVTm<"cvt.s.q", 0x1F, F32, f32, F128, f128, fpround>;
// Section 8.7.16 - CVD (Convert to Double-format)
defm CVTDS : CVTm<"cvt.d.s", 0x0F, I64, f64, F32, f32, fpextend>;
let cx = 1 in
-defm CVTDQ : CVTm<"cvt.d.q", 0x0F, I64, f64, F128, f128>;
+defm CVTDQ : CVTm<"cvt.d.q", 0x0F, I64, f64, F128, f128, fpround>;
// Section 8.7.17 - CVQ (Convert to Single-format)
-defm CVTQD : CVTm<"cvt.q.d", 0x2D, F128, f128, I64, f64>;
+defm CVTQD : CVTm<"cvt.q.d", 0x2D, F128, f128, I64, f64, fpextend>;
let cx = 1 in
-defm CVTQS : CVTm<"cvt.q.s", 0x2D, F128, f128, F32, f32>;
+defm CVTQS : CVTm<"cvt.q.s", 0x2D, F128, f128, F32, f32, fpextend>;
//-----------------------------------------------------------------------------
// Section 8.8 - Branch instructions
@@ -1378,13 +1466,13 @@ defm BCFS : BCm<"b${cond}.s", "b.s", "baf.s", 0x1C, F32, simm7fp>;
// Section 8.8.4 - BCR (Branch on Condition Relative)
let cx = 0, cx2 = 0 in
-defm BRCFL : BCRm<"br${cf}.l", "br.l", "braf.l", 0x18, I64, simm7>;
+defm BRCFL : BCRm<"br${cf}.l", "br.l", "braf.l", 0x18, I64, simm7, zero>;
let cx = 1, cx2 = 0 in
-defm BRCFW : BCRm<"br${cf}.w", "br.w", "braf.w", 0x18, I32, simm7>;
+defm BRCFW : BCRm<"br${cf}.w", "br.w", "braf.w", 0x18, I32, simm7, zero>;
let cx = 0, cx2 = 1 in
-defm BRCFD : BCRm<"br${cf}.d", "br.d", "braf.d", 0x18, I64, simm7fp>;
+defm BRCFD : BCRm<"br${cf}.d", "br.d", "braf.d", 0x18, I64, simm7fp, zerofp>;
let cx = 1, cx2 = 1 in
-defm BRCFS : BCRm<"br${cf}.s", "br.s", "braf.s", 0x18, F32, simm7fp>;
+defm BRCFS : BCRm<"br${cf}.s", "br.s", "braf.s", 0x18, F32, simm7fp, zerofp>;
// Section 8.8.5 - BSIC (Branch and Save IC)
let isCall = 1, hasSideEffects = 0, DecoderMethod = "DecodeCall" in
@@ -1481,11 +1569,23 @@ defm SHMB : SHMm<"shm.b", 0x31, I64>;
// Pattern Matchings
//===----------------------------------------------------------------------===//
+// Basic cast between registers. This is often used in ISel patterns, so make
+// them as OutPatFrag.
+def i2l : OutPatFrag<(ops node:$exp),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $exp, sub_i32)>;
+def l2i : OutPatFrag<(ops node:$exp),
+ (EXTRACT_SUBREG $exp, sub_i32)>;
+def f2l : OutPatFrag<(ops node:$exp),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $exp, sub_f32)>;
+def l2f : OutPatFrag<(ops node:$exp),
+ (EXTRACT_SUBREG $exp, sub_f32)>;
+
// Small immediates.
-def : Pat<(i32 simm7:$val), (OR32im (LO7 $val), 0)>;
+def : Pat<(i32 simm7:$val), (EXTRACT_SUBREG (ORim (LO7 $val), 0), sub_i32)>;
def : Pat<(i64 simm7:$val), (ORim (LO7 $val), 0)>;
// Medium immediates.
-def : Pat<(i32 simm32:$val), (LEA32zii 0, 0, (LO32 $val))>;
+def : Pat<(i32 simm32:$val),
+ (EXTRACT_SUBREG (LEAzii 0, 0, (LO32 $val)), sub_i32)>;
def : Pat<(i64 simm32:$val), (LEAzii 0, 0, (LO32 $val))>;
def : Pat<(i64 uimm32:$val), (ANDrm (LEAzii 0, 0, (LO32 $val)), !add(32, 64))>;
// Arbitrary immediates.
@@ -1497,6 +1597,54 @@ def : Pat<(i64 imm:$val),
(LEASLrii (ANDrm (LEAzii 0, 0, (LO32 imm:$val)), !add(32, 64)), 0,
(HI32 imm:$val))>;
+// LEA patterns
+def lea_add : PatFrags<(ops node:$base, node:$idx, node:$disp),
+ [(add (add node:$base, node:$idx), node:$disp),
+ (add (add node:$base, node:$disp), node:$idx),
+ (add node:$base, (add $idx, $disp))]>;
+def : Pat<(lea_add I64:$base, simm7:$idx, simm32:$disp),
+ (LEArii $base, (LO7 $idx), (LO32 $disp))>;
+def : Pat<(lea_add I64:$base, I64:$idx, simm32:$disp),
+ (LEArri $base, $idx, (LO32 $disp))>;
+def : Pat<(lea_add I64:$base, simm7:$idx, lozero:$disp),
+ (LEASLrii $base, (LO7 $idx), (HI32 $disp))>;
+def : Pat<(lea_add I64:$base, I64:$idx, lozero:$disp),
+ (LEASLrri $base, $idx, (HI32 $disp))>;
+
+// Address calculation patterns and optimizations
+//
+// Generate following instructions:
+// 1. LEA %reg, label@LO32
+// AND %reg, %reg, (32)0
+// 2. LEASL %reg, label@HI32
+// 3. (LEA %reg, label@LO32)
+// (AND %reg, %reg, (32)0)
+// LEASL %reg, label@HI32(, %reg)
+// 4. (LEA %reg, label@LO32)
+// (AND %reg, %reg, (32)0)
+// LEASL %reg, label@HI32(%reg, %got)
+//
+def velo_only : OutPatFrag<(ops node:$lo),
+ (ANDrm (LEAzii 0, 0, $lo), !add(32, 64))>;
+def vehi_only : OutPatFrag<(ops node:$hi),
+ (LEASLzii 0, 0, $hi)>;
+def vehi_lo : OutPatFrag<(ops node:$hi, node:$lo),
+ (LEASLrii $lo, 0, $hi)>;
+def vehi_lo_imm : OutPatFrag<(ops node:$hi, node:$lo, node:$idx),
+ (LEASLrii $lo, $idx, $hi)>;
+def vehi_baselo : OutPatFrag<(ops node:$base, node:$hi, node:$lo),
+ (LEASLrri $base, $lo, $hi)>;
+foreach type = [ "tblockaddress", "tconstpool", "texternalsym", "tglobaladdr",
+ "tglobaltlsaddr", "tjumptable" ] in {
+ def : Pat<(VElo !cast<SDNode>(type):$lo), (velo_only $lo)>;
+ def : Pat<(VEhi !cast<SDNode>(type):$hi), (vehi_only $hi)>;
+ def : Pat<(add (VEhi !cast<SDNode>(type):$hi), I64:$lo), (vehi_lo $hi, $lo)>;
+ def : Pat<(add (add (VEhi !cast<SDNode>(type):$hi), I64:$lo), simm7:$val),
+ (vehi_lo_imm $hi, $lo, (LO7 $val))>;
+ def : Pat<(add I64:$base, (add (VEhi !cast<SDNode>(type):$hi), I64:$lo)),
+ (vehi_baselo $base, $hi, $lo)>;
+}
+
// floating point
def : Pat<(f32 fpimm:$val),
(EXTRACT_SUBREG (LEASLzii 0, 0, (HIFP32 $val)), sub_f32)>;
@@ -1526,8 +1674,8 @@ def : Pat<(sext_inreg I64:$src, i8),
(SRALri (SLLri $src, 56), 56)>;
def : Pat<(sext_inreg (i32 (trunc i64:$src)), i8),
(EXTRACT_SUBREG (SRALri (SLLri $src, 56), 56), sub_i32)>;
-def : Pat<(and (trunc i64:$src), 0xff),
- (AND32rm (EXTRACT_SUBREG $src, sub_i32), !add(56, 64))>;
+def : Pat<(i32 (and (trunc i64:$src), 0xff)),
+ (EXTRACT_SUBREG (ANDrm $src, !add(56, 64)), sub_i32)>;
// Cast to i16
def : Pat<(sext_inreg I32:$src, i16),
@@ -1536,28 +1684,34 @@ def : Pat<(sext_inreg I64:$src, i16),
(SRALri (SLLri $src, 48), 48)>;
def : Pat<(sext_inreg (i32 (trunc i64:$src)), i16),
(EXTRACT_SUBREG (SRALri (SLLri $src, 48), 48), sub_i32)>;
-def : Pat<(and (trunc i64:$src), 0xffff),
- (AND32rm (EXTRACT_SUBREG $src, sub_i32), !add(48, 64))>;
+def : Pat<(i32 (and (trunc i64:$src), 0xffff)),
+ (EXTRACT_SUBREG (ANDrm $src, !add(48, 64)), sub_i32)>;
// Cast to i32
def : Pat<(i32 (trunc i64:$src)),
- (ADDSWSXrm (EXTRACT_SUBREG $src, sub_i32), 0)>;
-def : Pat<(i32 (fp_to_sint I64:$reg)), (CVTWDSXr RD_RZ, $reg)>;
-def : Pat<(i32 (fp_to_sint F32:$reg)), (CVTWSSXr RD_RZ, $reg)>;
+ (EXTRACT_SUBREG (ANDrm $src, !add(32, 64)), sub_i32)>;
+def : Pat<(i32 (fp_to_sint f32:$src)), (CVTWSSXr RD_RZ, $src)>;
+def : Pat<(i32 (fp_to_sint f64:$src)), (CVTWDSXr RD_RZ, $src)>;
+def : Pat<(i32 (fp_to_sint f128:$src)), (CVTWDSXr RD_RZ, (CVTDQr $src))>;
// Cast to i64
-def : Pat<(sext_inreg I64:$src, i32),
+def : Pat<(sext_inreg i64:$src, i32),
(INSERT_SUBREG (i64 (IMPLICIT_DEF)),
(ADDSWSXrm (EXTRACT_SUBREG $src, sub_i32), 0), sub_i32)>;
-def : Pat<(i64 (sext i32:$sy)),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (ADDSWSXrm $sy, 0), sub_i32)>;
-def : Pat<(i64 (zext i32:$sy)),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (ADDSWZXrm $sy, 0), sub_i32)>;
-def : Pat<(i64 (fp_to_sint f32:$sy)), (CVTLDr RD_RZ, (CVTDSr $sy))>;
-def : Pat<(i64 (fp_to_sint I64:$reg)), (CVTLDr RD_RZ, $reg)>;
+def : Pat<(i64 (sext i32:$src)),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (ADDSWSXrm $src, 0), sub_i32)>;
+def : Pat<(i64 (zext i32:$src)),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (ADDSWZXrm $src, 0), sub_i32)>;
+def : Pat<(i64 (fp_to_sint f32:$src)), (CVTLDr RD_RZ, (CVTDSr $src))>;
+def : Pat<(i64 (fp_to_sint f64:$src)), (CVTLDr RD_RZ, $src)>;
+def : Pat<(i64 (fp_to_sint f128:$src)), (CVTLDr RD_RZ, (CVTDQr $src))>;
// Cast to f32
-def : Pat<(f32 (sint_to_fp i64:$sy)), (CVTSDr (CVTDLr i64:$sy))>;
+def : Pat<(f32 (sint_to_fp i64:$src)), (CVTSDr (CVTDLr i64:$src))>;
+
+// Cast to f128
+def : Pat<(f128 (sint_to_fp i32:$src)), (CVTQDr (CVTDWr $src))>;
+def : Pat<(f128 (sint_to_fp i64:$src)), (CVTQDr (CVTDLr $src))>;
def : Pat<(i64 (anyext i32:$sy)),
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), $sy, sub_i32)>;
@@ -1625,29 +1779,150 @@ defm : TRUNC64m<truncstorei8, ST1Brri, ST1Brii, ST1Bzri, ST1Bzii>;
defm : TRUNC64m<truncstorei16, ST2Brri, ST2Brii, ST2Bzri, ST2Bzii>;
defm : TRUNC64m<truncstorei32, STLrri, STLrii, STLzri, ST1Bzii>;
-// Address calculation and its optimization
-def : Pat<(VEhi tglobaladdr:$in), (LEASLzii 0, 0, tglobaladdr:$in)>;
-def : Pat<(VElo tglobaladdr:$in),
- (ANDrm (LEAzii 0, 0, tglobaladdr:$in), !add(32, 64))>;
-def : Pat<(add (VEhi tglobaladdr:$in1), (VElo tglobaladdr:$in2)),
- (LEASLrii (ANDrm (LEAzii 0, 0, tglobaladdr:$in2), !add(32, 64)), 0,
- (tglobaladdr:$in1))>;
-
-// GlobalTLS address calculation and its optimization
-def : Pat<(VEhi tglobaltlsaddr:$in), (LEASLzii 0, 0, tglobaltlsaddr:$in)>;
-def : Pat<(VElo tglobaltlsaddr:$in),
- (ANDrm (LEAzii 0, 0, tglobaltlsaddr:$in), !add(32, 64))>;
-def : Pat<(add (VEhi tglobaltlsaddr:$in1), (VElo tglobaltlsaddr:$in2)),
- (LEASLrii (ANDrm (LEAzii 0, 0, tglobaltlsaddr:$in2), !add(32, 64)), 0,
- (tglobaltlsaddr:$in1))>;
-
-// Address calculation and its optimization
-def : Pat<(VEhi texternalsym:$in), (LEASLzii 0, 0, texternalsym:$in)>;
-def : Pat<(VElo texternalsym:$in),
- (ANDrm (LEAzii 0, 0, texternalsym:$in), !add(32, 64))>;
-def : Pat<(add (VEhi texternalsym:$in1), (VElo texternalsym:$in2)),
- (LEASLrii (ANDrm (LEAzii 0, 0, texternalsym:$in2), !add(32, 64)), 0,
- (texternalsym:$in1))>;
+// Atomic loads
+multiclass ATMLDm<SDPatternOperator from,
+ SDPatternOperator torri, SDPatternOperator torii,
+ SDPatternOperator tozri, SDPatternOperator tozii> {
+ def : Pat<(from ADDRrri:$addr), (torri MEMrri:$addr)>;
+ def : Pat<(from ADDRrii:$addr), (torii MEMrii:$addr)>;
+ def : Pat<(from ADDRzri:$addr), (tozri MEMzri:$addr)>;
+ def : Pat<(from ADDRzii:$addr), (tozii MEMzii:$addr)>;
+}
+defm : ATMLDm<atomic_load_8, LD1BZXrri, LD1BZXrii, LD1BZXzri, LD1BZXzii>;
+defm : ATMLDm<atomic_load_16, LD2BZXrri, LD2BZXrii, LD2BZXzri, LD2BZXzii>;
+defm : ATMLDm<atomic_load_32, LDLZXrri, LDLZXrii, LDLZXzri, LDLZXzii>;
+defm : ATMLDm<atomic_load_64, LDrri, LDrii, LDzri, LDzii>;
+
+// Optimized atomic loads with sext
+multiclass SXATMLDm<SDPatternOperator from, Operand TY,
+ SDPatternOperator torri, SDPatternOperator torii,
+ SDPatternOperator tozri, SDPatternOperator tozii> {
+ def : Pat<(i64 (sext_inreg (i64 (anyext (from ADDRrri:$addr))), TY)),
+ (i2l (torri MEMrri:$addr))>;
+ def : Pat<(i64 (sext_inreg (i64 (anyext (from ADDRrii:$addr))), TY)),
+ (i2l (torii MEMrii:$addr))>;
+ def : Pat<(i64 (sext_inreg (i64 (anyext (from ADDRzri:$addr))), TY)),
+ (i2l (tozri MEMzri:$addr))>;
+ def : Pat<(i64 (sext_inreg (i64 (anyext (from ADDRzii:$addr))), TY)),
+ (i2l (tozii MEMzii:$addr))>;
+}
+multiclass SXATMLD32m<SDPatternOperator from,
+ SDPatternOperator torri, SDPatternOperator torii,
+ SDPatternOperator tozri, SDPatternOperator tozii> {
+ def : Pat<(i64 (sext (from ADDRrri:$addr))),
+ (i2l (torri MEMrri:$addr))>;
+ def : Pat<(i64 (sext (from ADDRrii:$addr))),
+ (i2l (torii MEMrii:$addr))>;
+ def : Pat<(i64 (sext (from ADDRzri:$addr))),
+ (i2l (tozri MEMzri:$addr))>;
+ def : Pat<(i64 (sext (from ADDRzii:$addr))),
+ (i2l (tozii MEMzii:$addr))>;
+}
+defm : SXATMLDm<atomic_load_8, i8, LD1BSXrri, LD1BSXrii, LD1BSXzri, LD1BSXzii>;
+defm : SXATMLDm<atomic_load_16, i16, LD2BSXrri, LD2BSXrii, LD2BSXzri,
+ LD2BSXzii>;
+defm : SXATMLD32m<atomic_load_32, LDLSXrri, LDLSXrii, LDLSXzri, LDLSXzii>;
+
+// Optimized atomic loads with zext
+multiclass ZXATMLDm<SDPatternOperator from, Operand VAL,
+ SDPatternOperator torri, SDPatternOperator torii,
+ SDPatternOperator tozri, SDPatternOperator tozii> {
+ def : Pat<(i64 (and (anyext (from ADDRrri:$addr)), VAL)),
+ (i2l (torri MEMrri:$addr))>;
+ def : Pat<(i64 (and (anyext (from ADDRrii:$addr)), VAL)),
+ (i2l (torii MEMrii:$addr))>;
+ def : Pat<(i64 (and (anyext (from ADDRzri:$addr)), VAL)),
+ (i2l (tozri MEMzri:$addr))>;
+ def : Pat<(i64 (and (anyext (from ADDRzii:$addr)), VAL)),
+ (i2l (tozii MEMzii:$addr))>;
+}
+multiclass ZXATMLD32m<SDPatternOperator from, Operand VAL,
+ SDPatternOperator torri, SDPatternOperator torii,
+ SDPatternOperator tozri, SDPatternOperator tozii> {
+ def : Pat<(i64 (zext (from ADDRrri:$addr))),
+ (i2l (torri MEMrri:$addr))>;
+ def : Pat<(i64 (zext (from ADDRrii:$addr))),
+ (i2l (torii MEMrii:$addr))>;
+ def : Pat<(i64 (zext (from ADDRzri:$addr))),
+ (i2l (tozri MEMzri:$addr))>;
+ def : Pat<(i64 (zext (from ADDRzii:$addr))),
+ (i2l (tozii MEMzii:$addr))>;
+}
+defm : ZXATMLDm<atomic_load_8, 0xFF, LD1BZXrri, LD1BZXrii, LD1BZXzri,
+ LD1BZXzii>;
+defm : ZXATMLDm<atomic_load_16, 0xFFFF, LD2BZXrri, LD2BZXrii, LD2BZXzri,
+ LD2BZXzii>;
+defm : ZXATMLD32m<atomic_load_32, 0xFFFFFFFF, LDLZXrri, LDLZXrii, LDLZXzri,
+ LDLZXzii>;
+
+// Atomic stores
+multiclass ATMSTm<SDPatternOperator from, ValueType ty,
+ SDPatternOperator torri, SDPatternOperator torii,
+ SDPatternOperator tozri, SDPatternOperator tozii> {
+ def : Pat<(from ADDRrri:$addr, ty:$src), (torri MEMrri:$addr, $src)>;
+ def : Pat<(from ADDRrii:$addr, ty:$src), (torii MEMrii:$addr, $src)>;
+ def : Pat<(from ADDRzri:$addr, ty:$src), (tozri MEMzri:$addr, $src)>;
+ def : Pat<(from ADDRzii:$addr, ty:$src), (tozii MEMzii:$addr, $src)>;
+}
+defm : ATMSTm<atomic_store_8, i32, ST1Brri, ST1Brii, ST1Bzri, ST1Bzii>;
+defm : ATMSTm<atomic_store_16, i32, ST2Brri, ST2Brii, ST2Bzri, ST2Bzii>;
+defm : ATMSTm<atomic_store_32, i32, STLrri, STLrii, STLzri, STLzii>;
+defm : ATMSTm<atomic_store_64, i64, STrri, STrii, STzri, STzii>;
+
+// Optimized atomic stores with truncate
+multiclass TRATMSTm<SDPatternOperator from,
+ ValueType ty,
+ SDPatternOperator torri,
+ SDPatternOperator torii,
+ SDPatternOperator tozri,
+ SDPatternOperator tozii> {
+ def : Pat<(from ADDRrri:$addr, (i32 (trunc i64:$src))),
+ (torri MEMrri:$addr, (EXTRACT_SUBREG $src, sub_i32))>;
+ def : Pat<(from ADDRrii:$addr, (i32 (trunc i64:$src))),
+ (torii MEMrii:$addr, (EXTRACT_SUBREG $src, sub_i32))>;
+ def : Pat<(from ADDRzri:$addr, (i32 (trunc i64:$src))),
+ (tozri MEMzri:$addr, (EXTRACT_SUBREG $src, sub_i32))>;
+ def : Pat<(from ADDRzii:$addr, (i32 (trunc i64:$src))),
+ (tozii MEMzii:$addr, (EXTRACT_SUBREG $src, sub_i32))>;
+}
+defm : TRATMSTm<atomic_store_8, i32, ST1Brri, ST1Brii, ST1Bzri, ST1Bzii>;
+defm : TRATMSTm<atomic_store_16, i32, ST2Brri, ST2Brii, ST2Bzri, ST2Bzii>;
+defm : TRATMSTm<atomic_store_32, i32, STLrri, STLrii, STLzri, STLzii>;
+
+// Atomic swaps
+def : Pat<(i32 (ts1am i64:$src, i32:$flag, i32:$new)),
+ (TS1AMWrir $src, 0, $flag, $new)>;
+def : Pat<(i32 (atomic_swap_32 ADDRri:$src, i32:$new)),
+ (TS1AMWrii MEMriRRM:$src, 15, $new)>;
+def : Pat<(i64 (atomic_swap_64 ADDRri:$src, i64:$new)),
+ (TS1AMLrir MEMriRRM:$src, (LEAzii 0, 0, 255), i64:$new)>;
+
+//===----------------------------------------------------------------------===//
+// SJLJ Exception handling patterns
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
+ usesCustomInserter = 1 in {
+ let isTerminator = 1 in
+ def EH_SjLj_LongJmp : Pseudo<(outs), (ins I64:$buf),
+ "# EH_SJLJ_LONGJMP",
+ [(VEeh_sjlj_longjmp I64:$buf)]>;
+
+ def EH_SjLj_SetJmp : Pseudo<(outs I32:$dst), (ins I64:$buf),
+ "# EH_SJLJ_SETJMP",
+ [(set I32:$dst, (VEeh_sjlj_setjmp I64:$buf))]>;
+
+ def EH_SjLj_Setup_Dispatch : Pseudo<(outs), (ins), "# EH_SJLJ_SETUP_DISPATCH",
+ [(VEeh_sjlj_setup_dispatch)]>;
+}
+
+let isTerminator = 1, isBranch = 1, isCodeGenOnly = 1 in
+ def EH_SjLj_Setup : Pseudo<(outs), (ins brtarget32:$dst),
+ "# EH_SJlJ_SETUP $dst">;
+
+//===----------------------------------------------------------------------===//
+// Branch related patterns
+//===----------------------------------------------------------------------===//
// Branches
def : Pat<(br bb:$addr), (BRCFLa bb:$addr)>;
@@ -1681,6 +1956,8 @@ multiclass BRCCFm<ValueType ty, SDPatternOperator BrOpNode1,
}
defm : BRCCFm<f32, BRCFSrr, BRCFSir>;
defm : BRCCFm<f64, BRCFDrr, BRCFDir>;
+def : Pat<(brcc cond:$cond, f128:$l, f128:$r, bb:$addr),
+ (BRCFDir (fcond2cc $cond), 0, (FCMPQrr $r, $l), bb:$addr)>;
//===----------------------------------------------------------------------===//
// Pseudo Instructions
@@ -1737,53 +2014,42 @@ let Uses = [SX11], hasSideEffects = 1 in
def GETSTACKTOP : Pseudo<(outs I64:$dst), (ins),
"# GET STACK TOP",
[(set iPTR:$dst, (GetStackTop))]>;
+
+// MEMBARRIER
+let hasSideEffects = 1 in
+def MEMBARRIER : Pseudo<(outs), (ins), "# MEMBARRIER", [(MemBarrier)] >;
+
+//===----------------------------------------------------------------------===//
+// Other patterns
+//===----------------------------------------------------------------------===//
+
// SETCC pattern matches
//
// CMP %tmp, lhs, rhs ; compare lhs and rhs
// or %res, 0, (0)1 ; initialize by 0
// CMOV %res, (63)0, %tmp ; set 1 if %tmp is true
-def : Pat<(i32 (setcc i64:$LHS, i64:$RHS, CCSIOp:$cond)),
- (EXTRACT_SUBREG
- (CMOVLrm (icond2cc $cond),
- (CMPSLrr i64:$LHS, i64:$RHS),
- !add(63, 64),
- (ORim 0, 0)), sub_i32)>;
-
-def : Pat<(i32 (setcc i64:$LHS, i64:$RHS, CCUIOp:$cond)),
- (EXTRACT_SUBREG
- (CMOVLrm (icond2cc $cond),
- (CMPULrr i64:$LHS, i64:$RHS),
- !add(63, 64),
- (ORim 0, 0)), sub_i32)>;
-
-def : Pat<(i32 (setcc i32:$LHS, i32:$RHS, CCSIOp:$cond)),
- (EXTRACT_SUBREG
- (CMOVWrm (icond2cc $cond),
- (CMPSWSXrr i32:$LHS, i32:$RHS),
- !add(63, 64),
- (ORim 0, 0)), sub_i32)>;
-
-def : Pat<(i32 (setcc i32:$LHS, i32:$RHS, CCUIOp:$cond)),
- (EXTRACT_SUBREG
- (CMOVWrm (icond2cc $cond),
- (CMPUWrr i32:$LHS, i32:$RHS),
- !add(63, 64),
- (ORim 0, 0)), sub_i32)>;
-
-def : Pat<(i32 (setcc f64:$LHS, f64:$RHS, cond:$cond)),
- (EXTRACT_SUBREG
- (CMOVDrm (fcond2cc $cond),
- (FCMPDrr f64:$LHS, f64:$RHS),
- !add(63, 64),
- (ORim 0, 0)), sub_i32)>;
-
-def : Pat<(i32 (setcc f32:$LHS, f32:$RHS, cond:$cond)),
- (EXTRACT_SUBREG
- (CMOVSrm (fcond2cc $cond),
- (FCMPSrr f32:$LHS, f32:$RHS),
- !add(63, 64),
- (ORim 0, 0)), sub_i32)>;
+class setccrr<Instruction INSN> :
+ OutPatFrag<(ops node:$cond, node:$comp),
+ (EXTRACT_SUBREG
+ (INSN $cond, $comp,
+ !add(63, 64), // means (63)0 == 1
+ (ORim 0, 0)), sub_i32)>;
+
+def : Pat<(i32 (setcc i32:$l, i32:$r, CCSIOp:$cond)),
+ (setccrr<CMOVWrm> (icond2cc $cond), (CMPSWSXrr $l, $r))>;
+def : Pat<(i32 (setcc i32:$l, i32:$r, CCUIOp:$cond)),
+ (setccrr<CMOVWrm> (icond2cc $cond), (CMPUWrr $l, $r))>;
+def : Pat<(i32 (setcc i64:$l, i64:$r, CCSIOp:$cond)),
+ (setccrr<CMOVLrm> (icond2cc $cond), (CMPSLrr $l, $r))>;
+def : Pat<(i32 (setcc i64:$l, i64:$r, CCUIOp:$cond)),
+ (setccrr<CMOVLrm> (icond2cc $cond), (CMPULrr $l, $r))>;
+def : Pat<(i32 (setcc f32:$l, f32:$r, cond:$cond)),
+ (setccrr<CMOVSrm> (fcond2cc $cond), (FCMPSrr $l, $r))>;
+def : Pat<(i32 (setcc f64:$l, f64:$r, cond:$cond)),
+ (setccrr<CMOVDrm> (fcond2cc $cond), (FCMPDrr $l, $r))>;
+def : Pat<(i32 (setcc f128:$l, f128:$r, cond:$cond)),
+ (setccrr<CMOVDrm> (fcond2cc $cond), (FCMPQrr $l, $r))>;
// Special SELECTCC pattern matches
// Use min/max for better performance.
@@ -1824,152 +2090,171 @@ def : Pat<(i64 (selectcc i64:$LHS, i64:$RHS, i64:$LHS, i64:$RHS, SETLE)),
def : Pat<(i32 (selectcc i32:$LHS, i32:$RHS, i32:$LHS, i32:$RHS, SETLE)),
(MINSWSXrr $LHS, $RHS)>;
+// Helper classes to construct cmov patterns for the ease.
+//
+// Hiding INSERT_SUBREG/EXTRACT_SUBREG patterns.
+
+class cmovrr<Instruction INSN> :
+ OutPatFrag<(ops node:$cond, node:$comp, node:$t, node:$f),
+ (INSN $cond, $comp, $t, $f)>;
+class cmovrm<Instruction INSN, SDNodeXForm MOP = MIMM> :
+ OutPatFrag<(ops node:$cond, node:$comp, node:$t, node:$f),
+ (INSN $cond, $comp, (MOP $t), $f)>;
+class cmov32rr<Instruction INSN, SubRegIndex sub_oty> :
+ OutPatFrag<(ops node:$cond, node:$comp, node:$t, node:$f),
+ (EXTRACT_SUBREG
+ (INSN $cond, $comp,
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_oty),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_oty)),
+ sub_oty)>;
+class cmov32rm<Instruction INSN, SubRegIndex sub_oty, SDNodeXForm MOP = MIMM> :
+ OutPatFrag<(ops node:$cond, node:$comp, node:$t, node:$f),
+ (EXTRACT_SUBREG
+ (INSN $cond, $comp,
+ (MOP $t),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_oty)),
+ sub_oty)>;
+class cmov128rr<Instruction INSN> :
+ OutPatFrag<(ops node:$cond, node:$comp, node:$t, node:$f),
+ (INSERT_SUBREG
+ (INSERT_SUBREG (f128 (IMPLICIT_DEF)),
+ (INSN $cond, $comp,
+ (EXTRACT_SUBREG $t, sub_odd),
+ (EXTRACT_SUBREG $f, sub_odd)), sub_odd),
+ (INSN $cond, $comp,
+ (EXTRACT_SUBREG $t, sub_even),
+ (EXTRACT_SUBREG $f, sub_even)), sub_even)>;
+
// Generic SELECTCC pattern matches
//
// CMP %tmp, %l, %r ; compare %l and %r
// or %res, %f, (0)1 ; initialize by %f
// CMOV %res, %t, %tmp ; set %t if %tmp is true
-// selectcc for i64 result
-def : Pat<(i64 (selectcc i32:$l, i32:$r, i64:$t, i64:$f, CCSIOp:$cond)),
- (CMOVWrr (icond2cc $cond), (CMPSWSXrr $l, $r), $t, $f)>;
-def : Pat<(i64 (selectcc i32:$l, i32:$r, i64:$t, i64:$f, CCUIOp:$cond)),
- (CMOVWrr (icond2cc $cond), (CMPUWrr $l, $r), $t, $f)>;
-def : Pat<(i64 (selectcc i64:$l, i64:$r, i64:$t, i64:$f, CCSIOp:$cond)),
- (CMOVLrr (icond2cc $cond), (CMPSLrr $l, $r), $t, $f)>;
-def : Pat<(i64 (selectcc i64:$l, i64:$r, i64:$t, i64:$f, CCUIOp:$cond)),
- (CMOVLrr (icond2cc $cond), (CMPULrr $l, $r), $t, $f)>;
-def : Pat<(i64 (selectcc f32:$l, f32:$r, i64:$t, i64:$f, cond:$cond)),
- (CMOVSrr (fcond2cc $cond), (FCMPSrr $l, $r), $t, $f)>;
-def : Pat<(i64 (selectcc f64:$l, f64:$r, i64:$t, i64:$f, cond:$cond)),
- (CMOVDrr (fcond2cc $cond), (FCMPDrr $l, $r), $t, $f)>;
-
-// selectcc for i32 result
def : Pat<(i32 (selectcc i32:$l, i32:$r, i32:$t, i32:$f, CCSIOp:$cond)),
- (EXTRACT_SUBREG
- (CMOVWrr (icond2cc $cond),
- (CMPSWSXrr $l, $r),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
- sub_i32)>;
+ (cmov32rr<CMOVWrr, sub_i32> (icond2cc $cond), (CMPSWSXrr $l, $r),
+ $t, $f)>;
def : Pat<(i32 (selectcc i32:$l, i32:$r, i32:$t, i32:$f, CCUIOp:$cond)),
- (EXTRACT_SUBREG
- (CMOVWrr (icond2cc $cond),
- (CMPUWrr $l, $r),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
- sub_i32)>;
+ (cmov32rr<CMOVWrr, sub_i32> (icond2cc $cond), (CMPUWrr $l, $r),
+ $t, $f)>;
def : Pat<(i32 (selectcc i64:$l, i64:$r, i32:$t, i32:$f, CCSIOp:$cond)),
- (EXTRACT_SUBREG
- (CMOVLrr (icond2cc $cond),
- (CMPSLrr $l, $r),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
- sub_i32)>;
+ (cmov32rr<CMOVLrr, sub_i32> (icond2cc $cond), (CMPSLrr $l, $r),
+ $t, $f)>;
def : Pat<(i32 (selectcc i64:$l, i64:$r, i32:$t, i32:$f, CCUIOp:$cond)),
- (EXTRACT_SUBREG
- (CMOVLrr (icond2cc $cond),
- (CMPULrr $l, $r),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
- sub_i32)>;
+ (cmov32rr<CMOVLrr, sub_i32> (icond2cc $cond), (CMPULrr $l, $r),
+ $t, $f)>;
def : Pat<(i32 (selectcc f32:$l, f32:$r, i32:$t, i32:$f, cond:$cond)),
- (EXTRACT_SUBREG
- (CMOVSrr (fcond2cc $cond),
- (FCMPSrr $l, $r),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
- sub_i32)>;
+ (cmov32rr<CMOVSrr, sub_i32> (fcond2cc $cond), (FCMPSrr $l, $r),
+ $t, $f)>;
def : Pat<(i32 (selectcc f64:$l, f64:$r, i32:$t, i32:$f, cond:$cond)),
- (EXTRACT_SUBREG
- (CMOVDrr (fcond2cc $cond),
- (FCMPDrr $l, $r),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
- sub_i32)>;
+ (cmov32rr<CMOVDrr, sub_i32> (fcond2cc $cond), (FCMPDrr $l, $r),
+ $t, $f)>;
+def : Pat<(i32 (selectcc f128:$l, f128:$r, i32:$t, i32:$f, cond:$cond)),
+ (cmov32rr<CMOVDrr, sub_i32> (fcond2cc $cond), (FCMPQrr $l, $r),
+ $t, $f)>;
-// selectcc for f64 result
-def : Pat<(f64 (selectcc i32:$l, i32:$r, f64:$t, f64:$f, CCSIOp:$cond)),
- (CMOVWrr (icond2cc $cond), (CMPSWSXrr $l, $r), $t, $f)>;
-def : Pat<(f64 (selectcc i32:$l, i32:$r, f64:$t, f64:$f, CCUIOp:$cond)),
- (CMOVWrr (icond2cc $cond), (CMPUWrr $l, $r), $t, $f)>;
-def : Pat<(f64 (selectcc i64:$l, i64:$r, f64:$t, f64:$f, CCSIOp:$cond)),
- (CMOVLrr (icond2cc $cond), (CMPSLrr $l, $r), $t, $f)>;
-def : Pat<(f64 (selectcc i64:$l, i64:$r, f64:$t, f64:$f, CCUIOp:$cond)),
- (CMOVLrr (icond2cc $cond), (CMPULrr $l, $r), $t, $f)>;
-def : Pat<(f64 (selectcc f32:$l, f32:$r, f64:$t, f64:$f, cond:$cond)),
- (CMOVSrr (fcond2cc $cond), (FCMPSrr $l, $r), $t, $f)>;
-def : Pat<(f64 (selectcc f64:$l, f64:$r, f64:$t, f64:$f, cond:$cond)),
- (CMOVDrr (fcond2cc $cond), (FCMPDrr $l, $r), $t, $f)>;
+def : Pat<(i64 (selectcc i32:$l, i32:$r, i64:$t, i64:$f, CCSIOp:$cond)),
+ (cmovrr<CMOVWrr> (icond2cc $cond), (CMPSWSXrr $l, $r), $t, $f)>;
+def : Pat<(i64 (selectcc i32:$l, i32:$r, i64:$t, i64:$f, CCUIOp:$cond)),
+ (cmovrr<CMOVWrr> (icond2cc $cond), (CMPUWrr $l, $r), $t, $f)>;
+def : Pat<(i64 (selectcc i64:$l, i64:$r, i64:$t, i64:$f, CCSIOp:$cond)),
+ (cmovrr<CMOVLrr> (icond2cc $cond), (CMPSLrr $l, $r), $t, $f)>;
+def : Pat<(i64 (selectcc i64:$l, i64:$r, i64:$t, i64:$f, CCUIOp:$cond)),
+ (cmovrr<CMOVLrr> (icond2cc $cond), (CMPULrr $l, $r), $t, $f)>;
+def : Pat<(i64 (selectcc f32:$l, f32:$r, i64:$t, i64:$f, cond:$cond)),
+ (cmovrr<CMOVSrr> (fcond2cc $cond), (FCMPSrr $l, $r), $t, $f)>;
+def : Pat<(i64 (selectcc f64:$l, f64:$r, i64:$t, i64:$f, cond:$cond)),
+ (cmovrr<CMOVDrr> (fcond2cc $cond), (FCMPDrr $l, $r), $t, $f)>;
+def : Pat<(i64 (selectcc f128:$l, f128:$r, i64:$t, i64:$f, cond:$cond)),
+ (cmovrr<CMOVDrr> (fcond2cc $cond), (FCMPQrr $l, $r), $t, $f)>;
-// selectcc for f32 result
def : Pat<(f32 (selectcc i32:$l, i32:$r, f32:$t, f32:$f, CCSIOp:$cond)),
- (EXTRACT_SUBREG
- (CMOVWrr (icond2cc $cond),
- (CMPSWSXrr $l, $r),
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32),
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)),
- sub_f32)>;
+ (cmov32rr<CMOVWrr, sub_f32> (icond2cc $cond), (CMPSWSXrr $l, $r),
+ $t, $f)>;
def : Pat<(f32 (selectcc i32:$l, i32:$r, f32:$t, f32:$f, CCUIOp:$cond)),
- (EXTRACT_SUBREG
- (CMOVWrr (icond2cc $cond),
- (CMPUWrr $l, $r),
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32),
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)),
- sub_f32)>;
+ (cmov32rr<CMOVWrr, sub_f32> (icond2cc $cond), (CMPUWrr $l, $r),
+ $t, $f)>;
def : Pat<(f32 (selectcc i64:$l, i64:$r, f32:$t, f32:$f, CCSIOp:$cond)),
- (EXTRACT_SUBREG
- (CMOVLrr (icond2cc $cond),
- (CMPSLrr $l, $r),
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32),
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)),
- sub_f32)>;
+ (cmov32rr<CMOVLrr, sub_f32> (icond2cc $cond), (CMPSLrr $l, $r),
+ $t, $f)>;
def : Pat<(f32 (selectcc i64:$l, i64:$r, f32:$t, f32:$f, CCUIOp:$cond)),
- (EXTRACT_SUBREG
- (CMOVLrr (icond2cc $cond),
- (CMPULrr $l, $r),
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32),
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)),
- sub_f32)>;
+ (cmov32rr<CMOVLrr, sub_f32> (icond2cc $cond), (CMPULrr $l, $r),
+ $t, $f)>;
def : Pat<(f32 (selectcc f32:$l, f32:$r, f32:$t, f32:$f, cond:$cond)),
- (EXTRACT_SUBREG
- (CMOVSrr (fcond2cc $cond),
- (FCMPSrr $l, $r),
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32),
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)),
- sub_f32)>;
+ (cmov32rr<CMOVSrr, sub_f32> (fcond2cc $cond), (FCMPSrr $l, $r),
+ $t, $f)>;
def : Pat<(f32 (selectcc f64:$l, f64:$r, f32:$t, f32:$f, cond:$cond)),
- (EXTRACT_SUBREG
- (CMOVDrr (fcond2cc $cond),
- (FCMPDrr $l, $r),
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32),
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)),
- sub_f32)>;
+ (cmov32rr<CMOVDrr, sub_f32> (fcond2cc $cond), (FCMPDrr $l, $r),
+ $t, $f)>;
+def : Pat<(f32 (selectcc f128:$l, f128:$r, f32:$t, f32:$f, cond:$cond)),
+ (cmov32rr<CMOVDrr, sub_f32> (fcond2cc $cond), (FCMPQrr $l, $r),
+ $t, $f)>;
+
+def : Pat<(f64 (selectcc i32:$l, i32:$r, f64:$t, f64:$f, CCSIOp:$cond)),
+ (cmovrr<CMOVWrr> (icond2cc $cond), (CMPSWSXrr $l, $r), $t, $f)>;
+def : Pat<(f64 (selectcc i32:$l, i32:$r, f64:$t, f64:$f, CCUIOp:$cond)),
+ (cmovrr<CMOVWrr> (icond2cc $cond), (CMPUWrr $l, $r), $t, $f)>;
+def : Pat<(f64 (selectcc i64:$l, i64:$r, f64:$t, f64:$f, CCSIOp:$cond)),
+ (cmovrr<CMOVLrr> (icond2cc $cond), (CMPSLrr $l, $r), $t, $f)>;
+def : Pat<(f64 (selectcc i64:$l, i64:$r, f64:$t, f64:$f, CCUIOp:$cond)),
+ (cmovrr<CMOVLrr> (icond2cc $cond), (CMPULrr $l, $r), $t, $f)>;
+def : Pat<(f64 (selectcc f32:$l, f32:$r, f64:$t, f64:$f, cond:$cond)),
+ (cmovrr<CMOVSrr> (fcond2cc $cond), (FCMPSrr $l, $r), $t, $f)>;
+def : Pat<(f64 (selectcc f64:$l, f64:$r, f64:$t, f64:$f, cond:$cond)),
+ (cmovrr<CMOVDrr> (fcond2cc $cond), (FCMPDrr $l, $r), $t, $f)>;
+def : Pat<(f64 (selectcc f128:$l, f128:$r, f64:$t, f64:$f, cond:$cond)),
+ (cmovrr<CMOVDrr> (fcond2cc $cond), (FCMPQrr $l, $r), $t, $f)>;
+
+def : Pat<(f128 (selectcc i32:$l, i32:$r, f128:$t, f128:$f, CCSIOp:$cond)),
+ (cmov128rr<CMOVWrr> (icond2cc $cond), (CMPSWSXrr $l, $r), $t, $f)>;
+def : Pat<(f128 (selectcc i32:$l, i32:$r, f128:$t, f128:$f, CCUIOp:$cond)),
+ (cmov128rr<CMOVWrr> (icond2cc $cond), (CMPUWrr $l, $r), $t, $f)>;
+def : Pat<(f128 (selectcc i64:$l, i64:$r, f128:$t, f128:$f, CCSIOp:$cond)),
+ (cmov128rr<CMOVLrr> (icond2cc $cond), (CMPSLrr $l, $r), $t, $f)>;
+def : Pat<(f128 (selectcc i64:$l, i64:$r, f128:$t, f128:$f, CCUIOp:$cond)),
+ (cmov128rr<CMOVLrr> (icond2cc $cond), (CMPULrr $l, $r), $t, $f)>;
+def : Pat<(f128 (selectcc f32:$l, f32:$r, f128:$t, f128:$f, cond:$cond)),
+ (cmov128rr<CMOVSrr> (fcond2cc $cond), (FCMPSrr $l, $r), $t, $f)>;
+def : Pat<(f128 (selectcc f64:$l, f64:$r, f128:$t, f128:$f, cond:$cond)),
+ (cmov128rr<CMOVDrr> (fcond2cc $cond), (FCMPDrr $l, $r), $t, $f)>;
+def : Pat<(f128 (selectcc f128:$l, f128:$r, f128:$t, f128:$f, cond:$cond)),
+ (cmov128rr<CMOVDrr> (fcond2cc $cond), (FCMPQrr $l, $r), $t, $f)>;
// Generic SELECT pattern matches
// Use cmov.w for all cases since %pred holds i32.
//
// CMOV.w.ne %res, %tval, %tmp ; set tval if %tmp is true
+def : Pat<(i32 (select i32:$pred, i32:$t, i32:$f)),
+ (cmov32rr<CMOVWrr, sub_i32> CC_INE, $pred, $t, $f)>;
+def : Pat<(i32 (select i32:$pred, (i32 mimm:$t), i32:$f)),
+ (cmov32rm<CMOVWrm, sub_i32> CC_INE, $pred, $t, $f)>;
+def : Pat<(i32 (select i32:$pred, i32:$t, (i32 mimm:$f))),
+ (cmov32rm<CMOVWrm, sub_i32> CC_IEQ, $pred, $f, $t)>;
+
def : Pat<(i64 (select i32:$pred, i64:$t, i64:$f)),
- (CMOVWrr CC_INE, $pred, $t, $f)>;
+ (cmovrr<CMOVWrr> CC_INE, $pred, $t, $f)>;
+def : Pat<(i64 (select i32:$pred, (i64 mimm:$t), i64:$f)),
+ (cmovrm<CMOVWrm, MIMM> CC_INE, $pred, $t, $f)>;
+def : Pat<(i64 (select i32:$pred, i64:$t, (i64 mimm:$f))),
+ (cmovrm<CMOVWrm, MIMM> CC_IEQ, $pred, $f, $t)>;
-def : Pat<(i32 (select i32:$pred, i32:$t, i32:$f)),
- (EXTRACT_SUBREG
- (CMOVWrr CC_INE, $pred,
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
- sub_i32)>;
+def : Pat<(f32 (select i32:$pred, f32:$t, f32:$f)),
+ (cmov32rr<CMOVWrr, sub_f32> CC_INE, $pred, $t, $f)>;
+def : Pat<(f32 (select i32:$pred, (f32 mimmfp:$t), f32:$f)),
+ (cmov32rm<CMOVWrm, sub_f32, MIMMFP> CC_INE, $pred, $t, $f)>;
+def : Pat<(f32 (select i32:$pred, f32:$t, (f32 mimmfp:$f))),
+ (cmov32rm<CMOVWrm, sub_f32, MIMMFP> CC_IEQ, $pred, $f, $t)>;
def : Pat<(f64 (select i32:$pred, f64:$t, f64:$f)),
- (CMOVWrr CC_INE, $pred, $t, $f)>;
+ (cmovrr<CMOVWrr> CC_INE, $pred, $t, $f)>;
+def : Pat<(f64 (select i32:$pred, (f64 mimmfp:$t), f64:$f)),
+ (cmovrm<CMOVWrm, MIMMFP> CC_INE, $pred, $t, $f)>;
+def : Pat<(f64 (select i32:$pred, f64:$t, (f64 mimmfp:$f))),
+ (cmovrm<CMOVWrm, MIMMFP> CC_IEQ, $pred, $f, $t)>;
-def : Pat<(f32 (select i32:$pred, f32:$t, f32:$f)),
- (EXTRACT_SUBREG
- (CMOVWrr CC_INE, $pred,
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_f32),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_f32)),
- sub_f32)>;
+def : Pat<(f128 (select i32:$pred, f128:$t, f128:$f)),
+ (cmov128rr<CMOVWrr> CC_INE, $pred, $t, $f)>;
// bitconvert
def : Pat<(f64 (bitconvert i64:$src)), (COPY_TO_REGCLASS $src, I64)>;
@@ -1982,24 +2267,48 @@ def : Pat<(f32 (bitconvert i32:$op)),
(EXTRACT_SUBREG (SLLri (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
$op, sub_i32), 32), sub_f32)>;
-// Bits operations pattern matchings.
-def : Pat<(i32 (ctpop i32:$src)),
- (EXTRACT_SUBREG (PCNTr (ANDrm (INSERT_SUBREG
- (i64 (IMPLICIT_DEF)), $src, sub_i32), !add(32, 64))), sub_i32)>;
-def : Pat<(i32 (ctlz i32:$src)),
- (EXTRACT_SUBREG (LDZr (SLLri (INSERT_SUBREG
- (i64 (IMPLICIT_DEF)), $src, sub_i32), 32)), sub_i32)>;
-def : Pat<(i64 (bswap i64:$src)),
- (BSWPri $src, 0)>;
-def : Pat<(i32 (bswap i32:$src)),
- (EXTRACT_SUBREG (BSWPri (INSERT_SUBREG
- (i64 (IMPLICIT_DEF)), $src, sub_i32), 1), sub_i32)>;
+// Optimize code A generated by `(unsigned char)c << 5` to B.
+// A) sla.w.sx %s0, %s0, 5
+// lea %s1, 224 ; 0xE0
+// and %s0, %s0, %s1
+// B) sla.w.sx %s0, %s0, 5
+// and %s0, %s0, (56)0
+
+def : Pat<(i32 (and i32:$val, 0xff)),
+ (EXTRACT_SUBREG
+ (ANDrm (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $val, sub_i32),
+ !add(56, 64)), sub_i32)>;
+def : Pat<(i32 (and i32:$val, 0xffff)),
+ (EXTRACT_SUBREG
+ (ANDrm (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $val, sub_i32),
+ !add(48, 64)), sub_i32)>;
+def : Pat<(i64 (and i64:$val, 0xffffffff)),
+ (ANDrm $val, !add(32, 64))>;
+
+//===----------------------------------------------------------------------===//
+// Vector Instruction Pattern Stuff
+//===----------------------------------------------------------------------===//
+
+// Custom intermediate ISDs.
+class IsVLVT<int OpIdx> : SDTCisVT<OpIdx,i32>;
+def vec_broadcast : SDNode<"VEISD::VEC_BROADCAST", SDTypeProfile<1, 2,
+ [SDTCisVec<0>, IsVLVT<2>]>>;
+
+// Whether this is an all-true mask (assuming undef-bits above VL are all-true).
+def true_mask : PatLeaf<
+ (vec_broadcast (i32 nonzero), (i32 srcvalue))>;
+// Match any broadcast (ignoring VL).
+def any_broadcast : PatFrag<(ops node:$sx),
+ (vec_broadcast node:$sx, (i32 srcvalue))>;
+
+// Vector instructions.
+include "VEInstrVec.td"
+
+// The vevlintrin
+include "VEInstrIntrinsicVL.td"
-// Several special pattern matches to optimize code
+// Patterns and intermediate SD nodes (VEC_*).
+include "VEInstrPatternsVec.td"
-def : Pat<(i32 (and i32:$lhs, 0xff)),
- (AND32rm $lhs, !add(56, 64))>;
-def : Pat<(i32 (and i32:$lhs, 0xffff)),
- (AND32rm $lhs, !add(48, 64))>;
-def : Pat<(i32 (and i32:$lhs, 0xffffffff)),
- (AND32rm $lhs, !add(32, 64))>;
+// Patterns and intermediate SD nodes (VVP_*).
+include "VVPInstrPatternsVec.td"
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td
new file mode 100644
index 000000000000..9ec10838db05
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td
@@ -0,0 +1,1604 @@
+def : Pat<(int_ve_vl_vld_vssl i64:$sy, i64:$sz, i32:$vl), (VLDrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vld_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vld_vssl simm7:$I, i64:$sz, i32:$vl), (VLDirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vld_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldnc_vssl i64:$sy, i64:$sz, i32:$vl), (VLDNCrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDNCirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldu_vssl i64:$sy, i64:$sz, i32:$vl), (VLDUrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldu_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDUrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldu_vssl simm7:$I, i64:$sz, i32:$vl), (VLDUirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldu_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDUirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldunc_vssl i64:$sy, i64:$sz, i32:$vl), (VLDUNCrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldunc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDUNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldunc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDUNCirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldunc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDUNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldlsx_vssl i64:$sy, i64:$sz, i32:$vl), (VLDLSXrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldlsx_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDLSXrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldlsx_vssl simm7:$I, i64:$sz, i32:$vl), (VLDLSXirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldlsx_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDLSXirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldlsxnc_vssl i64:$sy, i64:$sz, i32:$vl), (VLDLSXNCrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldlsxnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDLSXNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldlsxnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDLSXNCirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldlsxnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDLSXNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldlzx_vssl i64:$sy, i64:$sz, i32:$vl), (VLDLZXrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldlzx_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDLZXrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldlzx_vssl simm7:$I, i64:$sz, i32:$vl), (VLDLZXirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldlzx_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDLZXirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldlzxnc_vssl i64:$sy, i64:$sz, i32:$vl), (VLDLZXNCrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldlzxnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDLZXNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldlzxnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDLZXNCirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldlzxnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDLZXNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vld2d_vssl i64:$sy, i64:$sz, i32:$vl), (VLD2Drrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vld2d_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLD2Drrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vld2d_vssl simm7:$I, i64:$sz, i32:$vl), (VLD2Dirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vld2d_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLD2Dirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vld2dnc_vssl i64:$sy, i64:$sz, i32:$vl), (VLD2DNCrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vld2dnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLD2DNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vld2dnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLD2DNCirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vld2dnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLD2DNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldu2d_vssl i64:$sy, i64:$sz, i32:$vl), (VLDU2Drrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldu2d_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDU2Drrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldu2d_vssl simm7:$I, i64:$sz, i32:$vl), (VLDU2Dirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldu2d_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDU2Dirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldu2dnc_vssl i64:$sy, i64:$sz, i32:$vl), (VLDU2DNCrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldu2dnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDU2DNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldu2dnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDU2DNCirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldu2dnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDU2DNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldl2dsx_vssl i64:$sy, i64:$sz, i32:$vl), (VLDL2DSXrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldl2dsx_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DSXrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldl2dsx_vssl simm7:$I, i64:$sz, i32:$vl), (VLDL2DSXirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldl2dsx_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DSXirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldl2dsxnc_vssl i64:$sy, i64:$sz, i32:$vl), (VLDL2DSXNCrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldl2dsxnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DSXNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldl2dsxnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDL2DSXNCirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldl2dsxnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DSXNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldl2dzx_vssl i64:$sy, i64:$sz, i32:$vl), (VLDL2DZXrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldl2dzx_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DZXrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldl2dzx_vssl simm7:$I, i64:$sz, i32:$vl), (VLDL2DZXirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldl2dzx_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DZXirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldl2dzxnc_vssl i64:$sy, i64:$sz, i32:$vl), (VLDL2DZXNCrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldl2dzxnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DZXNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldl2dzxnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDL2DZXNCirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldl2dzxnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DZXNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vst_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vst_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vst_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vst_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstnc_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTNCrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstnc_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTNCirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstnc_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTNCrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstnc_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTNCirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstncot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTNCOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstncot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTNCOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstncot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTNCOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstncot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTNCOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTUrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTUirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTUrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTUirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstunc_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTUNCrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstunc_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTUNCirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstunc_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTUNCrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstunc_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTUNCirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstuot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTUOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstuot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTUOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstuot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTUOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstuot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTUOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstuncot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTUNCOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstuncot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTUNCOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstuncot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTUNCOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstuncot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTUNCOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTLrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTLirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTLrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTLirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlnc_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTLNCrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlnc_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTLNCirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlnc_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTLNCrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlnc_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTLNCirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTLOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTLOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTLOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTLOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlncot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTLNCOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlncot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTLNCOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlncot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTLNCOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlncot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTLNCOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2d_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VST2Drrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2d_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VST2Dirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2d_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VST2Drrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2d_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VST2Dirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dnc_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VST2DNCrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dnc_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VST2DNCirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dnc_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VST2DNCrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dnc_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VST2DNCirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VST2DOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VST2DOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VST2DOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VST2DOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dncot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VST2DNCOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dncot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VST2DNCOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dncot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VST2DNCOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dncot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VST2DNCOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2d_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTU2Drrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2d_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTU2Dirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2d_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2Drrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2d_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2Dirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dnc_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTU2DNCrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dnc_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTU2DNCirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dnc_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2DNCrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dnc_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2DNCirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTU2DOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTU2DOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2DOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2DOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dncot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTU2DNCOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dncot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTU2DNCOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dncot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2DNCOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dncot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2DNCOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2d_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTL2Drrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2d_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTL2Dirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2d_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2Drrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2d_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2Dirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dnc_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTL2DNCrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dnc_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTL2DNCirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dnc_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2DNCrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dnc_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2DNCirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTL2DOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTL2DOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2DOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2DOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dncot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTL2DNCOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dncot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTL2DNCOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dncot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2DNCOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dncot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2DNCOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pfchv_ssl i64:$sy, i64:$sz, i32:$vl), (PFCHVrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_pfchv_ssl simm7:$I, i64:$sz, i32:$vl), (PFCHVirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_pfchvnc_ssl i64:$sy, i64:$sz, i32:$vl), (PFCHVNCrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_pfchvnc_ssl simm7:$I, i64:$sz, i32:$vl), (PFCHVNCirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_lvm_mmss v256i1:$ptm, uimm6:$N, i64:$sz), (LVMir_m (ULO7 $N), i64:$sz, v256i1:$ptm)>;
+def : Pat<(int_ve_vl_lvm_MMss v512i1:$ptm, uimm6:$N, i64:$sz), (LVMyir_y (ULO7 $N), i64:$sz, v512i1:$ptm)>;
+def : Pat<(int_ve_vl_svm_sms v256i1:$vmz, uimm6:$N), (SVMmi v256i1:$vmz, (ULO7 $N))>;
+def : Pat<(int_ve_vl_svm_sMs v512i1:$vmz, uimm6:$N), (SVMyi v512i1:$vmz, (ULO7 $N))>;
+def : Pat<(int_ve_vl_vbrdd_vsl f64:$sy, i32:$vl), (VBRDrl f64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vbrdd_vsvl f64:$sy, v256f64:$pt, i32:$vl), (VBRDrl_v f64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vbrdd_vsmvl f64:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VBRDrml_v f64:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vbrdl_vsl i64:$sy, i32:$vl), (VBRDrl i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vbrdl_vsvl i64:$sy, v256f64:$pt, i32:$vl), (VBRDrl_v i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vbrdl_vsmvl i64:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VBRDrml_v i64:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vbrdl_vsl simm7:$I, i32:$vl), (VBRDil (LO7 $I), i32:$vl)>;
+def : Pat<(int_ve_vl_vbrdl_vsvl simm7:$I, v256f64:$pt, i32:$vl), (VBRDil_v (LO7 $I), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vbrdl_vsmvl simm7:$I, v256i1:$vm, v256f64:$pt, i32:$vl), (VBRDiml_v (LO7 $I), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vbrds_vsl f32:$sy, i32:$vl), (VBRDUrl f32:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vbrds_vsvl f32:$sy, v256f64:$pt, i32:$vl), (VBRDUrl_v f32:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vbrds_vsmvl f32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VBRDUrml_v f32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vbrdw_vsl i32:$sy, i32:$vl), (VBRDLrl i32:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vbrdw_vsvl i32:$sy, v256f64:$pt, i32:$vl), (VBRDLrl_v i32:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vbrdw_vsmvl i32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VBRDLrml_v i32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vbrdw_vsl simm7:$I, i32:$vl), (VBRDLil (LO7 $I), i32:$vl)>;
+def : Pat<(int_ve_vl_vbrdw_vsvl simm7:$I, v256f64:$pt, i32:$vl), (VBRDLil_v (LO7 $I), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vbrdw_vsmvl simm7:$I, v256i1:$vm, v256f64:$pt, i32:$vl), (VBRDLiml_v (LO7 $I), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvbrd_vsl i64:$sy, i32:$vl), (PVBRDrl i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvbrd_vsvl i64:$sy, v256f64:$pt, i32:$vl), (PVBRDrl_v i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvbrd_vsMvl i64:$sy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVBRDrml_v i64:$sy, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmv_vsvl uimm7:$N, v256f64:$vz, i32:$vl), (VMVivl (ULO7 $N), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmv_vsvvl uimm7:$N, v256f64:$vz, v256f64:$pt, i32:$vl), (VMVivl_v (ULO7 $N), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmv_vsvmvl uimm7:$N, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMVivml_v (ULO7 $N), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddul_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VADDULvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddul_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDULvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddul_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VADDULrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddul_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDULrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddul_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VADDULivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddul_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDULivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddul_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDULvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddul_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDULrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddul_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDULivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vadduw_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VADDUWvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vadduw_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDUWvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vadduw_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VADDUWrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vadduw_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDUWrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vadduw_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VADDUWivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vadduw_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDUWivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vadduw_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDUWvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vadduw_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDUWrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vadduw_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDUWivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvaddu_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVADDUvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvaddu_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVADDUvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvaddu_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVADDUrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvaddu_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVADDUrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvaddu_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVADDUvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvaddu_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVADDUrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswsx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VADDSWSXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddswsx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSWSXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswsx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VADDSWSXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddswsx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSWSXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswsx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VADDSWSXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddswsx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSWSXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswsx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSWSXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswsx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSWSXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswsx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSWSXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswzx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VADDSWZXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddswzx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSWZXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswzx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VADDSWZXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddswzx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSWZXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswzx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VADDSWZXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddswzx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSWZXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswzx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSWZXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswzx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSWZXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswzx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSWZXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvadds_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVADDSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvadds_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVADDSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvadds_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVADDSrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvadds_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVADDSrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvadds_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVADDSvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvadds_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVADDSrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddsl_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VADDSLvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddsl_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSLvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddsl_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VADDSLrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddsl_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSLrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddsl_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VADDSLivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddsl_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSLivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddsl_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSLvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddsl_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSLrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddsl_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSLivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubul_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VSUBULvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubul_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBULvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubul_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VSUBULrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubul_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBULrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubul_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VSUBULivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubul_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBULivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubul_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBULvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubul_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBULrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubul_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBULivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubuw_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VSUBUWvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubuw_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBUWvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubuw_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VSUBUWrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubuw_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBUWrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubuw_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VSUBUWivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubuw_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBUWivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubuw_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBUWvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubuw_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBUWrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubuw_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBUWivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsubu_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVSUBUvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsubu_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVSUBUvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsubu_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVSUBUrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsubu_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVSUBUrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsubu_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSUBUvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsubu_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSUBUrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswsx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VSUBSWSXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubswsx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSWSXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswsx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VSUBSWSXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubswsx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSWSXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswsx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VSUBSWSXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubswsx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSWSXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswsx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSWSXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswsx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSWSXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswsx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSWSXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswzx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VSUBSWZXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubswzx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSWZXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswzx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VSUBSWZXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubswzx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSWZXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswzx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VSUBSWZXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubswzx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSWZXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswzx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSWZXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswzx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSWZXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswzx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSWZXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsubs_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVSUBSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsubs_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVSUBSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsubs_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVSUBSrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsubs_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVSUBSrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsubs_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSUBSvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsubs_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSUBSrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubsl_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VSUBSLvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubsl_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSLvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubsl_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VSUBSLrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubsl_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSLrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubsl_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VSUBSLivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubsl_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSLivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubsl_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSLvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubsl_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSLrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubsl_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSLivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulul_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMULULvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulul_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULULvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulul_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VMULULrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulul_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULULrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulul_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMULULivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulul_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULULivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulul_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULULvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulul_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULULrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulul_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULULivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmuluw_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMULUWvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmuluw_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULUWvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmuluw_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VMULUWrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmuluw_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULUWrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmuluw_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMULUWivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmuluw_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULUWivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmuluw_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULUWvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmuluw_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULUWrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmuluw_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULUWivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswsx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMULSWSXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulswsx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSWSXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswsx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VMULSWSXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulswsx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSWSXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswsx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMULSWSXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulswsx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSWSXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswsx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSWSXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswsx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSWSXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswsx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSWSXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswzx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMULSWZXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulswzx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSWZXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswzx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VMULSWZXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulswzx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSWZXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswzx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMULSWZXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulswzx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSWZXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswzx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSWZXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswzx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSWZXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswzx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSWZXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulsl_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMULSLvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulsl_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSLvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulsl_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VMULSLrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulsl_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSLrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulsl_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMULSLivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulsl_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSLivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulsl_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSLvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulsl_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSLrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulsl_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSLivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulslw_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMULSLWvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulslw_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSLWvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulslw_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VMULSLWrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulslw_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSLWrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulslw_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMULSLWivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulslw_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSLWivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivul_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VDIVULvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivul_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVULvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivul_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VDIVULrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivul_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVULrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivul_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VDIVULivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivul_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVULivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivul_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVULvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivul_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVULrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivul_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVULivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivuw_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VDIVUWvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivuw_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVUWvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivuw_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VDIVUWrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivuw_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVUWrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivuw_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VDIVUWivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivuw_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVUWivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivuw_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVUWvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivuw_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVUWrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivuw_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVUWivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivul_vvsl v256f64:$vy, i64:$sy, i32:$vl), (VDIVULvrl v256f64:$vy, i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivul_vvsvl v256f64:$vy, i64:$sy, v256f64:$pt, i32:$vl), (VDIVULvrl_v v256f64:$vy, i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivul_vvsl v256f64:$vy, simm7:$I, i32:$vl), (VDIVULvil v256f64:$vy, (LO7 $I), i32:$vl)>;
+def : Pat<(int_ve_vl_vdivul_vvsvl v256f64:$vy, simm7:$I, v256f64:$pt, i32:$vl), (VDIVULvil_v v256f64:$vy, (LO7 $I), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivul_vvsmvl v256f64:$vy, i64:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVULvrml_v v256f64:$vy, i64:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivul_vvsmvl v256f64:$vy, simm7:$I, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVULviml_v v256f64:$vy, (LO7 $I), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivuw_vvsl v256f64:$vy, i32:$sy, i32:$vl), (VDIVUWvrl v256f64:$vy, i32:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivuw_vvsvl v256f64:$vy, i32:$sy, v256f64:$pt, i32:$vl), (VDIVUWvrl_v v256f64:$vy, i32:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivuw_vvsl v256f64:$vy, simm7:$I, i32:$vl), (VDIVUWvil v256f64:$vy, (LO7 $I), i32:$vl)>;
+def : Pat<(int_ve_vl_vdivuw_vvsvl v256f64:$vy, simm7:$I, v256f64:$pt, i32:$vl), (VDIVUWvil_v v256f64:$vy, (LO7 $I), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivuw_vvsmvl v256f64:$vy, i32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVUWvrml_v v256f64:$vy, i32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivuw_vvsmvl v256f64:$vy, simm7:$I, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVUWviml_v v256f64:$vy, (LO7 $I), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswsx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VDIVSWSXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivswsx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSWSXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswsx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VDIVSWSXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivswsx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSWSXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswsx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VDIVSWSXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivswsx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSWSXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswsx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWSXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswsx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWSXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswsx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWSXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswzx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VDIVSWZXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivswzx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSWZXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswzx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VDIVSWZXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivswzx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSWZXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswzx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VDIVSWZXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivswzx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSWZXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswzx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWZXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswzx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWZXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswzx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWZXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswsx_vvsl v256f64:$vy, i32:$sy, i32:$vl), (VDIVSWSXvrl v256f64:$vy, i32:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivswsx_vvsvl v256f64:$vy, i32:$sy, v256f64:$pt, i32:$vl), (VDIVSWSXvrl_v v256f64:$vy, i32:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswsx_vvsl v256f64:$vy, simm7:$I, i32:$vl), (VDIVSWSXvil v256f64:$vy, (LO7 $I), i32:$vl)>;
+def : Pat<(int_ve_vl_vdivswsx_vvsvl v256f64:$vy, simm7:$I, v256f64:$pt, i32:$vl), (VDIVSWSXvil_v v256f64:$vy, (LO7 $I), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswsx_vvsmvl v256f64:$vy, i32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWSXvrml_v v256f64:$vy, i32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswsx_vvsmvl v256f64:$vy, simm7:$I, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWSXviml_v v256f64:$vy, (LO7 $I), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswzx_vvsl v256f64:$vy, i32:$sy, i32:$vl), (VDIVSWZXvrl v256f64:$vy, i32:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivswzx_vvsvl v256f64:$vy, i32:$sy, v256f64:$pt, i32:$vl), (VDIVSWZXvrl_v v256f64:$vy, i32:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswzx_vvsl v256f64:$vy, simm7:$I, i32:$vl), (VDIVSWZXvil v256f64:$vy, (LO7 $I), i32:$vl)>;
+def : Pat<(int_ve_vl_vdivswzx_vvsvl v256f64:$vy, simm7:$I, v256f64:$pt, i32:$vl), (VDIVSWZXvil_v v256f64:$vy, (LO7 $I), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswzx_vvsmvl v256f64:$vy, i32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWZXvrml_v v256f64:$vy, i32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswzx_vvsmvl v256f64:$vy, simm7:$I, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWZXviml_v v256f64:$vy, (LO7 $I), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivsl_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VDIVSLvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivsl_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSLvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivsl_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VDIVSLrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivsl_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSLrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivsl_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VDIVSLivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivsl_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSLivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivsl_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSLvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivsl_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSLrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivsl_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSLivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivsl_vvsl v256f64:$vy, i64:$sy, i32:$vl), (VDIVSLvrl v256f64:$vy, i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivsl_vvsvl v256f64:$vy, i64:$sy, v256f64:$pt, i32:$vl), (VDIVSLvrl_v v256f64:$vy, i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivsl_vvsl v256f64:$vy, simm7:$I, i32:$vl), (VDIVSLvil v256f64:$vy, (LO7 $I), i32:$vl)>;
+def : Pat<(int_ve_vl_vdivsl_vvsvl v256f64:$vy, simm7:$I, v256f64:$pt, i32:$vl), (VDIVSLvil_v v256f64:$vy, (LO7 $I), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivsl_vvsmvl v256f64:$vy, i64:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSLvrml_v v256f64:$vy, i64:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivsl_vvsmvl v256f64:$vy, simm7:$I, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSLviml_v v256f64:$vy, (LO7 $I), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpul_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VCMPULvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpul_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPULvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpul_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VCMPULrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpul_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPULrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpul_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VCMPULivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpul_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPULivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpul_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPULvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpul_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPULrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpul_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPULivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpuw_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VCMPUWvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpuw_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPUWvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpuw_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VCMPUWrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpuw_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPUWrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpuw_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VCMPUWivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpuw_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPUWivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpuw_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPUWvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpuw_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPUWrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpuw_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPUWivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcmpu_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVCMPUvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvcmpu_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVCMPUvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcmpu_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVCMPUrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvcmpu_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVCMPUrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcmpu_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVCMPUvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcmpu_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVCMPUrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswsx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VCMPSWSXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpswsx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSWSXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswsx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VCMPSWSXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpswsx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSWSXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswsx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VCMPSWSXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpswsx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSWSXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswsx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSWSXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswsx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSWSXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswsx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSWSXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswzx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VCMPSWZXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpswzx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSWZXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswzx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VCMPSWZXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpswzx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSWZXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswzx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VCMPSWZXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpswzx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSWZXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswzx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSWZXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswzx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSWZXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswzx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSWZXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcmps_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVCMPSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvcmps_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVCMPSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcmps_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVCMPSrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvcmps_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVCMPSrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcmps_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVCMPSvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcmps_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVCMPSrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpsl_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VCMPSLvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpsl_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSLvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpsl_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VCMPSLrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpsl_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSLrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpsl_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VCMPSLivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpsl_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSLivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpsl_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSLvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpsl_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSLrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpsl_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSLivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswsx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMAXSWSXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmaxswsx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSWSXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswsx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VMAXSWSXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmaxswsx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSWSXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswsx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMAXSWSXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmaxswsx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSWSXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswsx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSWSXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswsx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSWSXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswsx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSWSXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswzx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMAXSWZXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmaxswzx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSWZXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswzx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VMAXSWZXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmaxswzx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSWZXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswzx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMAXSWZXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmaxswzx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSWZXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswzx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSWZXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswzx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSWZXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswzx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSWZXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvmaxs_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVMAXSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvmaxs_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVMAXSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvmaxs_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVMAXSrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvmaxs_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVMAXSrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvmaxs_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVMAXSvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvmaxs_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVMAXSrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswsx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMINSWSXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vminswsx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSWSXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswsx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VMINSWSXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vminswsx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSWSXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswsx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMINSWSXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vminswsx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSWSXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswsx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSWSXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswsx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSWSXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswsx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSWSXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswzx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMINSWZXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vminswzx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSWZXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswzx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VMINSWZXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vminswzx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSWZXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswzx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMINSWZXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vminswzx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSWZXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswzx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSWZXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswzx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSWZXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswzx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSWZXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvmins_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVMINSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvmins_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVMINSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvmins_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVMINSrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvmins_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVMINSrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvmins_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVMINSvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvmins_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVMINSrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxsl_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMAXSLvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmaxsl_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSLvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxsl_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VMAXSLrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmaxsl_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSLrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxsl_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMAXSLivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmaxsl_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSLivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxsl_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSLvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxsl_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSLrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxsl_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSLivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminsl_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMINSLvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vminsl_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSLvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminsl_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VMINSLrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vminsl_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSLrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminsl_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMINSLivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vminsl_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSLivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminsl_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSLvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminsl_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSLrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminsl_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSLivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vand_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VANDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vand_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VANDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vand_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VANDrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vand_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VANDrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vand_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VANDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vand_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VANDrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvand_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVANDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvand_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVANDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvand_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVANDrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvand_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVANDrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvand_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVANDvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvand_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVANDrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vor_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VORvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vor_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VORvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vor_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VORrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vor_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VORrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vor_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VORvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vor_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VORrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvor_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVORvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvor_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVORvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvor_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVORrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvor_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVORrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvor_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVORvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvor_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVORrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vxor_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VXORvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vxor_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VXORvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vxor_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VXORrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vxor_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VXORrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vxor_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VXORvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vxor_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VXORrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvxor_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVXORvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvxor_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVXORvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvxor_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVXORrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvxor_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVXORrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvxor_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVXORvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvxor_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVXORrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_veqv_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VEQVvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_veqv_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VEQVvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_veqv_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VEQVrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_veqv_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VEQVrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_veqv_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VEQVvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_veqv_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VEQVrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pveqv_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVEQVvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pveqv_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVEQVvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pveqv_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVEQVrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pveqv_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVEQVrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pveqv_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVEQVvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pveqv_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVEQVrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vseq_vl i32:$vl), (VSEQl i32:$vl)>;
+def : Pat<(int_ve_vl_vseq_vvl v256f64:$pt, i32:$vl), (VSEQl_v i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvseqlo_vl i32:$vl), (PVSEQLOl i32:$vl)>;
+def : Pat<(int_ve_vl_pvseqlo_vvl v256f64:$pt, i32:$vl), (PVSEQLOl_v i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsequp_vl i32:$vl), (PVSEQUPl i32:$vl)>;
+def : Pat<(int_ve_vl_pvsequp_vvl v256f64:$pt, i32:$vl), (PVSEQUPl_v i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvseq_vl i32:$vl), (PVSEQl i32:$vl)>;
+def : Pat<(int_ve_vl_pvseq_vvl v256f64:$pt, i32:$vl), (PVSEQl_v i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsll_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (VSLLvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsll_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (VSLLvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsll_vvsl v256f64:$vz, i64:$sy, i32:$vl), (VSLLvrl v256f64:$vz, i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsll_vvsvl v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (VSLLvrl_v v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsll_vvsl v256f64:$vz, uimm6:$N, i32:$vl), (VSLLvil v256f64:$vz, (ULO7 $N), i32:$vl)>;
+def : Pat<(int_ve_vl_vsll_vvsvl v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSLLvil_v v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsll_vvvmvl v256f64:$vz, v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLLvvml_v v256f64:$vz, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsll_vvsmvl v256f64:$vz, i64:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLLvrml_v v256f64:$vz, i64:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsll_vvsmvl v256f64:$vz, uimm6:$N, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLLviml_v v256f64:$vz, (ULO7 $N), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsll_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (PVSLLvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsll_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (PVSLLvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsll_vvsl v256f64:$vz, i64:$sy, i32:$vl), (PVSLLvrl v256f64:$vz, i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsll_vvsvl v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (PVSLLvrl_v v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsll_vvvMvl v256f64:$vz, v256f64:$vy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSLLvvml_v v256f64:$vz, v256f64:$vy, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsll_vvsMvl v256f64:$vz, i64:$sy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSLLvrml_v v256f64:$vz, i64:$sy, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrl_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (VSRLvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsrl_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (VSRLvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrl_vvsl v256f64:$vz, i64:$sy, i32:$vl), (VSRLvrl v256f64:$vz, i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsrl_vvsvl v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (VSRLvrl_v v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrl_vvsl v256f64:$vz, uimm6:$N, i32:$vl), (VSRLvil v256f64:$vz, (ULO7 $N), i32:$vl)>;
+def : Pat<(int_ve_vl_vsrl_vvsvl v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSRLvil_v v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrl_vvvmvl v256f64:$vz, v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRLvvml_v v256f64:$vz, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrl_vvsmvl v256f64:$vz, i64:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRLvrml_v v256f64:$vz, i64:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrl_vvsmvl v256f64:$vz, uimm6:$N, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRLviml_v v256f64:$vz, (ULO7 $N), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsrl_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (PVSRLvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsrl_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (PVSRLvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsrl_vvsl v256f64:$vz, i64:$sy, i32:$vl), (PVSRLvrl v256f64:$vz, i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsrl_vvsvl v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (PVSRLvrl_v v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsrl_vvvMvl v256f64:$vz, v256f64:$vy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSRLvvml_v v256f64:$vz, v256f64:$vy, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsrl_vvsMvl v256f64:$vz, i64:$sy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSRLvrml_v v256f64:$vz, i64:$sy, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawsx_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (VSLAWSXvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vslawsx_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (VSLAWSXvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawsx_vvsl v256f64:$vz, i32:$sy, i32:$vl), (VSLAWSXvrl v256f64:$vz, i32:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vslawsx_vvsvl v256f64:$vz, i32:$sy, v256f64:$pt, i32:$vl), (VSLAWSXvrl_v v256f64:$vz, i32:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawsx_vvsl v256f64:$vz, uimm6:$N, i32:$vl), (VSLAWSXvil v256f64:$vz, (ULO7 $N), i32:$vl)>;
+def : Pat<(int_ve_vl_vslawsx_vvsvl v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSLAWSXvil_v v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawsx_vvvmvl v256f64:$vz, v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLAWSXvvml_v v256f64:$vz, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawsx_vvsmvl v256f64:$vz, i32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLAWSXvrml_v v256f64:$vz, i32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawsx_vvsmvl v256f64:$vz, uimm6:$N, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLAWSXviml_v v256f64:$vz, (ULO7 $N), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawzx_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (VSLAWZXvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vslawzx_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (VSLAWZXvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawzx_vvsl v256f64:$vz, i32:$sy, i32:$vl), (VSLAWZXvrl v256f64:$vz, i32:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vslawzx_vvsvl v256f64:$vz, i32:$sy, v256f64:$pt, i32:$vl), (VSLAWZXvrl_v v256f64:$vz, i32:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawzx_vvsl v256f64:$vz, uimm6:$N, i32:$vl), (VSLAWZXvil v256f64:$vz, (ULO7 $N), i32:$vl)>;
+def : Pat<(int_ve_vl_vslawzx_vvsvl v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSLAWZXvil_v v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawzx_vvvmvl v256f64:$vz, v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLAWZXvvml_v v256f64:$vz, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawzx_vvsmvl v256f64:$vz, i32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLAWZXvrml_v v256f64:$vz, i32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawzx_vvsmvl v256f64:$vz, uimm6:$N, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLAWZXviml_v v256f64:$vz, (ULO7 $N), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsla_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (PVSLAvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsla_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (PVSLAvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsla_vvsl v256f64:$vz, i64:$sy, i32:$vl), (PVSLAvrl v256f64:$vz, i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsla_vvsvl v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (PVSLAvrl_v v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsla_vvvMvl v256f64:$vz, v256f64:$vy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSLAvvml_v v256f64:$vz, v256f64:$vy, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsla_vvsMvl v256f64:$vz, i64:$sy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSLAvrml_v v256f64:$vz, i64:$sy, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslal_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (VSLALvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vslal_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (VSLALvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslal_vvsl v256f64:$vz, i64:$sy, i32:$vl), (VSLALvrl v256f64:$vz, i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vslal_vvsvl v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (VSLALvrl_v v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslal_vvsl v256f64:$vz, uimm6:$N, i32:$vl), (VSLALvil v256f64:$vz, (ULO7 $N), i32:$vl)>;
+def : Pat<(int_ve_vl_vslal_vvsvl v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSLALvil_v v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslal_vvvmvl v256f64:$vz, v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLALvvml_v v256f64:$vz, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslal_vvsmvl v256f64:$vz, i64:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLALvrml_v v256f64:$vz, i64:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslal_vvsmvl v256f64:$vz, uimm6:$N, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLALviml_v v256f64:$vz, (ULO7 $N), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawsx_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (VSRAWSXvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsrawsx_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (VSRAWSXvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawsx_vvsl v256f64:$vz, i32:$sy, i32:$vl), (VSRAWSXvrl v256f64:$vz, i32:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsrawsx_vvsvl v256f64:$vz, i32:$sy, v256f64:$pt, i32:$vl), (VSRAWSXvrl_v v256f64:$vz, i32:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawsx_vvsl v256f64:$vz, uimm6:$N, i32:$vl), (VSRAWSXvil v256f64:$vz, (ULO7 $N), i32:$vl)>;
+def : Pat<(int_ve_vl_vsrawsx_vvsvl v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSRAWSXvil_v v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawsx_vvvmvl v256f64:$vz, v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRAWSXvvml_v v256f64:$vz, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawsx_vvsmvl v256f64:$vz, i32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRAWSXvrml_v v256f64:$vz, i32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawsx_vvsmvl v256f64:$vz, uimm6:$N, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRAWSXviml_v v256f64:$vz, (ULO7 $N), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawzx_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (VSRAWZXvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsrawzx_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (VSRAWZXvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawzx_vvsl v256f64:$vz, i32:$sy, i32:$vl), (VSRAWZXvrl v256f64:$vz, i32:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsrawzx_vvsvl v256f64:$vz, i32:$sy, v256f64:$pt, i32:$vl), (VSRAWZXvrl_v v256f64:$vz, i32:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawzx_vvsl v256f64:$vz, uimm6:$N, i32:$vl), (VSRAWZXvil v256f64:$vz, (ULO7 $N), i32:$vl)>;
+def : Pat<(int_ve_vl_vsrawzx_vvsvl v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSRAWZXvil_v v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawzx_vvvmvl v256f64:$vz, v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRAWZXvvml_v v256f64:$vz, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawzx_vvsmvl v256f64:$vz, i32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRAWZXvrml_v v256f64:$vz, i32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawzx_vvsmvl v256f64:$vz, uimm6:$N, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRAWZXviml_v v256f64:$vz, (ULO7 $N), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsra_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (PVSRAvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsra_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (PVSRAvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsra_vvsl v256f64:$vz, i64:$sy, i32:$vl), (PVSRAvrl v256f64:$vz, i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsra_vvsvl v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (PVSRAvrl_v v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsra_vvvMvl v256f64:$vz, v256f64:$vy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSRAvvml_v v256f64:$vz, v256f64:$vy, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsra_vvsMvl v256f64:$vz, i64:$sy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSRAvrml_v v256f64:$vz, i64:$sy, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsral_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (VSRALvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsral_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (VSRALvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsral_vvsl v256f64:$vz, i64:$sy, i32:$vl), (VSRALvrl v256f64:$vz, i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsral_vvsvl v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (VSRALvrl_v v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsral_vvsl v256f64:$vz, uimm6:$N, i32:$vl), (VSRALvil v256f64:$vz, (ULO7 $N), i32:$vl)>;
+def : Pat<(int_ve_vl_vsral_vvsvl v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSRALvil_v v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsral_vvvmvl v256f64:$vz, v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRALvvml_v v256f64:$vz, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsral_vvsmvl v256f64:$vz, i64:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRALvrml_v v256f64:$vz, i64:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsral_vvsmvl v256f64:$vz, uimm6:$N, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRALviml_v v256f64:$vz, (ULO7 $N), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsfa_vvssl v256f64:$vz, i64:$sy, i64:$sz, i32:$vl), (VSFAvrrl v256f64:$vz, i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsfa_vvssvl v256f64:$vz, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VSFAvrrl_v v256f64:$vz, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsfa_vvssl v256f64:$vz, simm7:$I, i64:$sz, i32:$vl), (VSFAvirl v256f64:$vz, (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsfa_vvssvl v256f64:$vz, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VSFAvirl_v v256f64:$vz, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsfa_vvssmvl v256f64:$vz, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSFAvrrml_v v256f64:$vz, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsfa_vvssmvl v256f64:$vz, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSFAvirml_v v256f64:$vz, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfaddd_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFADDDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfaddd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFADDDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfaddd_vsvl f64:$sy, v256f64:$vz, i32:$vl), (VFADDDrvl f64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfaddd_vsvvl f64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFADDDrvl_v f64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfaddd_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFADDDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfaddd_vsvmvl f64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFADDDrvml_v f64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfadds_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFADDSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfadds_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFADDSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfadds_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFADDSrvl f32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfadds_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFADDSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfadds_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFADDSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfadds_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFADDSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfadd_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVFADDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfadd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFADDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfadd_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVFADDrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfadd_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFADDrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfadd_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFADDvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfadd_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFADDrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsubd_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFSUBDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsubd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFSUBDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsubd_vsvl f64:$sy, v256f64:$vz, i32:$vl), (VFSUBDrvl f64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsubd_vsvvl f64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFSUBDrvl_v f64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsubd_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFSUBDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsubd_vsvmvl f64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFSUBDrvml_v f64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsubs_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFSUBSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsubs_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFSUBSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsubs_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFSUBSrvl f32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsubs_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFSUBSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsubs_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFSUBSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsubs_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFSUBSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfsub_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVFSUBvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfsub_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFSUBvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfsub_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVFSUBrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfsub_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFSUBrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfsub_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFSUBvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfsub_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFSUBrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmuld_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFMULDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmuld_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMULDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmuld_vsvl f64:$sy, v256f64:$vz, i32:$vl), (VFMULDrvl f64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmuld_vsvvl f64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMULDrvl_v f64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmuld_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMULDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmuld_vsvmvl f64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMULDrvml_v f64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmuls_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFMULSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmuls_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMULSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmuls_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFMULSrvl f32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmuls_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMULSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmuls_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMULSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmuls_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMULSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmul_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVFMULvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmul_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFMULvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmul_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVFMULrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmul_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFMULrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmul_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMULvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmul_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMULrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfdivd_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFDIVDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfdivd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFDIVDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfdivd_vsvl f64:$sy, v256f64:$vz, i32:$vl), (VFDIVDrvl f64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfdivd_vsvvl f64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFDIVDrvl_v f64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfdivd_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFDIVDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfdivd_vsvmvl f64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFDIVDrvml_v f64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfdivs_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFDIVSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfdivs_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFDIVSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfdivs_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFDIVSrvl f32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfdivs_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFDIVSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfdivs_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFDIVSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfdivs_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFDIVSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsqrtd_vvl v256f64:$vy, i32:$vl), (VFSQRTDvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsqrtd_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFSQRTDvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsqrts_vvl v256f64:$vy, i32:$vl), (VFSQRTSvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsqrts_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFSQRTSvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfcmpd_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFCMPDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfcmpd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFCMPDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfcmpd_vsvl f64:$sy, v256f64:$vz, i32:$vl), (VFCMPDrvl f64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfcmpd_vsvvl f64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFCMPDrvl_v f64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfcmpd_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFCMPDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfcmpd_vsvmvl f64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFCMPDrvml_v f64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfcmps_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFCMPSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfcmps_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFCMPSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfcmps_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFCMPSrvl f32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfcmps_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFCMPSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfcmps_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFCMPSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfcmps_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFCMPSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfcmp_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVFCMPvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfcmp_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFCMPvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfcmp_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVFCMPrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfcmp_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFCMPrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfcmp_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFCMPvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfcmp_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFCMPrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmaxd_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFMAXDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmaxd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMAXDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmaxd_vsvl f64:$sy, v256f64:$vz, i32:$vl), (VFMAXDrvl f64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmaxd_vsvvl f64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMAXDrvl_v f64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmaxd_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMAXDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmaxd_vsvmvl f64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMAXDrvml_v f64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmaxs_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFMAXSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmaxs_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMAXSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmaxs_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFMAXSrvl f32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmaxs_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMAXSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmaxs_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMAXSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmaxs_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMAXSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmax_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVFMAXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmax_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFMAXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmax_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVFMAXrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmax_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFMAXrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmax_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMAXvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmax_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMAXrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmind_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFMINDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmind_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMINDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmind_vsvl f64:$sy, v256f64:$vz, i32:$vl), (VFMINDrvl f64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmind_vsvvl f64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMINDrvl_v f64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmind_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMINDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmind_vsvmvl f64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMINDrvml_v f64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmins_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFMINSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmins_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMINSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmins_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFMINSrvl f32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmins_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMINSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmins_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMINSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmins_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMINSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmin_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVFMINvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmin_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFMINvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmin_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVFMINrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmin_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFMINrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmin_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMINvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmin_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMINrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmadd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFMADDvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmadd_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMADDvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmadd_vsvvl f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFMADDrvvl f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmadd_vsvvvl f64:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMADDrvvl_v f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmadd_vvsvl v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl), (VFMADDvrvl v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmadd_vvsvvl v256f64:$vy, f64:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMADDvrvl_v v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmadd_vvvvmvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMADDvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmadd_vsvvmvl f64:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMADDrvvml_v f64:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmadd_vvsvmvl v256f64:$vy, f64:$sy, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMADDvrvml_v v256f64:$vy, f64:$sy, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmads_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFMADSvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmads_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMADSvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmads_vsvvl f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFMADSrvvl f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmads_vsvvvl f32:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMADSrvvl_v f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmads_vvsvl v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl), (VFMADSvrvl v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmads_vvsvvl v256f64:$vy, f32:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMADSvrvl_v v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmads_vvvvmvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMADSvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmads_vsvvmvl f32:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMADSrvvml_v f32:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmads_vvsvmvl v256f64:$vy, f32:$sy, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMADSvrvml_v v256f64:$vy, f32:$sy, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmad_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (PVFMADvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmad_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFMADvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmad_vsvvl i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (PVFMADrvvl i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmad_vsvvvl i64:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFMADrvvl_v i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmad_vvsvl v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl), (PVFMADvrvl v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmad_vvsvvl v256f64:$vy, i64:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFMADvrvl_v v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmad_vvvvMvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMADvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmad_vsvvMvl i64:$sy, v256f64:$vz, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMADrvvml_v i64:$sy, v256f64:$vz, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmad_vvsvMvl v256f64:$vy, i64:$sy, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMADvrvml_v v256f64:$vy, i64:$sy, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFMSBDvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmsbd_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMSBDvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbd_vsvvl f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFMSBDrvvl f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmsbd_vsvvvl f64:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMSBDrvvl_v f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbd_vvsvl v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl), (VFMSBDvrvl v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmsbd_vvsvvl v256f64:$vy, f64:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMSBDvrvl_v v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbd_vvvvmvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMSBDvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbd_vsvvmvl f64:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMSBDrvvml_v f64:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbd_vvsvmvl v256f64:$vy, f64:$sy, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMSBDvrvml_v v256f64:$vy, f64:$sy, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbs_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFMSBSvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmsbs_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMSBSvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbs_vsvvl f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFMSBSrvvl f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmsbs_vsvvvl f32:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMSBSrvvl_v f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbs_vvsvl v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl), (VFMSBSvrvl v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmsbs_vvsvvl v256f64:$vy, f32:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMSBSvrvl_v v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbs_vvvvmvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMSBSvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbs_vsvvmvl f32:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMSBSrvvml_v f32:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbs_vvsvmvl v256f64:$vy, f32:$sy, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMSBSvrvml_v v256f64:$vy, f32:$sy, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmsb_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (PVFMSBvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmsb_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFMSBvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmsb_vsvvl i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (PVFMSBrvvl i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmsb_vsvvvl i64:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFMSBrvvl_v i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmsb_vvsvl v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl), (PVFMSBvrvl v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmsb_vvsvvl v256f64:$vy, i64:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFMSBvrvl_v v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmsb_vvvvMvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMSBvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmsb_vsvvMvl i64:$sy, v256f64:$vz, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMSBrvvml_v i64:$sy, v256f64:$vz, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmsb_vvsvMvl v256f64:$vy, i64:$sy, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMSBvrvml_v v256f64:$vy, i64:$sy, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmadd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFNMADDvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmadd_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMADDvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmadd_vsvvl f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFNMADDrvvl f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmadd_vsvvvl f64:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMADDrvvl_v f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmadd_vvsvl v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl), (VFNMADDvrvl v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmadd_vvsvvl v256f64:$vy, f64:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMADDvrvl_v v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmadd_vvvvmvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMADDvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmadd_vsvvmvl f64:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMADDrvvml_v f64:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmadd_vvsvmvl v256f64:$vy, f64:$sy, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMADDvrvml_v v256f64:$vy, f64:$sy, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmads_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFNMADSvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmads_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMADSvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmads_vsvvl f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFNMADSrvvl f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmads_vsvvvl f32:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMADSrvvl_v f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmads_vvsvl v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl), (VFNMADSvrvl v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmads_vvsvvl v256f64:$vy, f32:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMADSvrvl_v v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmads_vvvvmvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMADSvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmads_vsvvmvl f32:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMADSrvvml_v f32:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmads_vvsvmvl v256f64:$vy, f32:$sy, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMADSvrvml_v v256f64:$vy, f32:$sy, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmad_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (PVFNMADvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfnmad_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFNMADvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmad_vsvvl i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (PVFNMADrvvl i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfnmad_vsvvvl i64:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFNMADrvvl_v i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmad_vvsvl v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl), (PVFNMADvrvl v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfnmad_vvsvvl v256f64:$vy, i64:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFNMADvrvl_v v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmad_vvvvMvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFNMADvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmad_vsvvMvl i64:$sy, v256f64:$vz, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFNMADrvvml_v i64:$sy, v256f64:$vz, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmad_vvsvMvl v256f64:$vy, i64:$sy, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFNMADvrvml_v v256f64:$vy, i64:$sy, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFNMSBDvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmsbd_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMSBDvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbd_vsvvl f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFNMSBDrvvl f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmsbd_vsvvvl f64:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMSBDrvvl_v f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbd_vvsvl v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl), (VFNMSBDvrvl v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmsbd_vvsvvl v256f64:$vy, f64:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMSBDvrvl_v v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbd_vvvvmvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMSBDvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbd_vsvvmvl f64:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMSBDrvvml_v f64:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbd_vvsvmvl v256f64:$vy, f64:$sy, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMSBDvrvml_v v256f64:$vy, f64:$sy, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbs_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFNMSBSvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmsbs_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMSBSvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbs_vsvvl f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFNMSBSrvvl f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmsbs_vsvvvl f32:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMSBSrvvl_v f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbs_vvsvl v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl), (VFNMSBSvrvl v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmsbs_vvsvvl v256f64:$vy, f32:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMSBSvrvl_v v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbs_vvvvmvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMSBSvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbs_vsvvmvl f32:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMSBSrvvml_v f32:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbs_vvsvmvl v256f64:$vy, f32:$sy, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMSBSvrvml_v v256f64:$vy, f32:$sy, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmsb_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (PVFNMSBvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfnmsb_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFNMSBvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmsb_vsvvl i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (PVFNMSBrvvl i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfnmsb_vsvvvl i64:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFNMSBrvvl_v i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmsb_vvsvl v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl), (PVFNMSBvrvl v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfnmsb_vvsvvl v256f64:$vy, i64:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFNMSBvrvl_v v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmsb_vvvvMvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFNMSBvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmsb_vsvvMvl i64:$sy, v256f64:$vz, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFNMSBrvvml_v i64:$sy, v256f64:$vz, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmsb_vvsvMvl v256f64:$vy, i64:$sy, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFNMSBvrvml_v v256f64:$vy, i64:$sy, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrcpd_vvl v256f64:$vy, i32:$vl), (VRCPDvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrcpd_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRCPDvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrcps_vvl v256f64:$vy, i32:$vl), (VRCPSvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrcps_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRCPSvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvrcp_vvl v256f64:$vy, i32:$vl), (PVRCPvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvrcp_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (PVRCPvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrsqrtd_vvl v256f64:$vy, i32:$vl), (VRSQRTDvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrsqrtd_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRSQRTDvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrsqrts_vvl v256f64:$vy, i32:$vl), (VRSQRTSvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrsqrts_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRSQRTSvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvrsqrt_vvl v256f64:$vy, i32:$vl), (PVRSQRTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvrsqrt_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (PVRSQRTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrsqrtdnex_vvl v256f64:$vy, i32:$vl), (VRSQRTDNEXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrsqrtdnex_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRSQRTDNEXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrsqrtsnex_vvl v256f64:$vy, i32:$vl), (VRSQRTSNEXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrsqrtsnex_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRSQRTSNEXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvrsqrtnex_vvl v256f64:$vy, i32:$vl), (PVRSQRTNEXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvrsqrtnex_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (PVRSQRTNEXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwdsx_vvl v256f64:$vy, i32:$vl), (VCVTWDSXvl RD_NONE, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtwdsx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWDSXvl_v RD_NONE, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwdsx_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWDSXvml_v RD_NONE, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwdsxrz_vvl v256f64:$vy, i32:$vl), (VCVTWDSXvl RD_RZ, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtwdsxrz_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWDSXvl_v RD_RZ, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwdsxrz_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWDSXvml_v RD_RZ, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwdzx_vvl v256f64:$vy, i32:$vl), (VCVTWDZXvl RD_NONE, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtwdzx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWDZXvl_v RD_NONE, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwdzx_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWDZXvml_v RD_NONE, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwdzxrz_vvl v256f64:$vy, i32:$vl), (VCVTWDZXvl RD_RZ, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtwdzxrz_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWDZXvl_v RD_RZ, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwdzxrz_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWDZXvml_v RD_RZ, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwssx_vvl v256f64:$vy, i32:$vl), (VCVTWSSXvl RD_NONE, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtwssx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWSSXvl_v RD_NONE, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwssx_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWSSXvml_v RD_NONE, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwssxrz_vvl v256f64:$vy, i32:$vl), (VCVTWSSXvl RD_RZ, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtwssxrz_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWSSXvl_v RD_RZ, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwssxrz_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWSSXvml_v RD_RZ, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwszx_vvl v256f64:$vy, i32:$vl), (VCVTWSZXvl RD_NONE, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtwszx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWSZXvl_v RD_NONE, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwszx_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWSZXvml_v RD_NONE, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwszxrz_vvl v256f64:$vy, i32:$vl), (VCVTWSZXvl RD_RZ, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtwszxrz_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWSZXvl_v RD_RZ, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwszxrz_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWSZXvml_v RD_RZ, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcvtws_vvl v256f64:$vy, i32:$vl), (PVCVTWSvl RD_NONE, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvcvtws_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (PVCVTWSvl_v RD_NONE, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcvtws_vvMvl v256f64:$vy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVCVTWSvml_v RD_NONE, v256f64:$vy, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcvtwsrz_vvl v256f64:$vy, i32:$vl), (PVCVTWSvl RD_RZ, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvcvtwsrz_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (PVCVTWSvl_v RD_RZ, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcvtwsrz_vvMvl v256f64:$vy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVCVTWSvml_v RD_RZ, v256f64:$vy, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtld_vvl v256f64:$vy, i32:$vl), (VCVTLDvl RD_NONE, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtld_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTLDvl_v RD_NONE, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtld_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTLDvml_v RD_NONE, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtldrz_vvl v256f64:$vy, i32:$vl), (VCVTLDvl RD_RZ, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtldrz_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTLDvl_v RD_RZ, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtldrz_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTLDvml_v RD_RZ, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtdw_vvl v256f64:$vy, i32:$vl), (VCVTDWvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtdw_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTDWvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtsw_vvl v256f64:$vy, i32:$vl), (VCVTSWvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtsw_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTSWvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcvtsw_vvl v256f64:$vy, i32:$vl), (PVCVTSWvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvcvtsw_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (PVCVTSWvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtdl_vvl v256f64:$vy, i32:$vl), (VCVTDLvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtdl_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTDLvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtds_vvl v256f64:$vy, i32:$vl), (VCVTDSvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtds_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTDSvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtsd_vvl v256f64:$vy, i32:$vl), (VCVTSDvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtsd_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTSDvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmrg_vvvml v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl), (VMRGvvml v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vmrg_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMRGvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmrg_vsvml i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl), (VMRGrvml i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vmrg_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMRGrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmrg_vsvml simm7:$I, v256f64:$vz, v256i1:$vm, i32:$vl), (VMRGivml (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vmrg_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMRGivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmrgw_vvvMl v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl), (VMRGWvvml v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vmrgw_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (VMRGWvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vshf_vvvsl v256f64:$vy, v256f64:$vz, i64:$sy, i32:$vl), (VSHFvvrl v256f64:$vy, v256f64:$vz, i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vshf_vvvsvl v256f64:$vy, v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (VSHFvvrl_v v256f64:$vy, v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vshf_vvvsl v256f64:$vy, v256f64:$vz, uimm6:$N, i32:$vl), (VSHFvvil v256f64:$vy, v256f64:$vz, (ULO7 $N), i32:$vl)>;
+def : Pat<(int_ve_vl_vshf_vvvsvl v256f64:$vy, v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSHFvvil_v v256f64:$vy, v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcp_vvmvl v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCPvml_v v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vex_vvmvl v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VEXvml_v v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmklat_ml i32:$vl), (VFMKLal i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklaf_ml i32:$vl), (VFMKLnal i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkat_Ml i32:$vl), (VFMKyal i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkaf_Ml i32:$vl), (VFMKynal i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklgt_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_IG, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklgt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_IG, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkllt_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_IL, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkllt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_IL, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklne_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_INE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklne_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_INE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkleq_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_IEQ, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkleq_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_IEQ, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklge_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_IGE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklge_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_IGE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklle_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_ILE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklle_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_ILE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklnum_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_NUM, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklnum_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_NUM, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklnan_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_NAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_NAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklgtnan_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_GNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklgtnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_GNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklltnan_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_LNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklltnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_LNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklnenan_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_NENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklnenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_NENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkleqnan_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_EQNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkleqnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_EQNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklgenan_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_GENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklgenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_GENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkllenan_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_LENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkllenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_LENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwgt_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_IG, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwgt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_IG, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwlt_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_IL, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwlt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_IL, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwne_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_INE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwne_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_INE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkweq_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_IEQ, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkweq_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_IEQ, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwge_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_IGE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwge_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_IGE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwle_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_ILE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwle_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_ILE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwnum_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_NUM, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwnum_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_NUM, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwnan_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_NAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_NAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwgtnan_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_GNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwgtnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_GNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwltnan_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_LNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwltnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_LNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwnenan_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_NENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwnenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_NENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkweqnan_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_EQNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkweqnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_EQNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwgenan_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_GENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwgenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_GENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwlenan_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_LENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwlenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_LENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlogt_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_IG, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupgt_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_IG, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlogt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_IG, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupgt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_IG, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlolt_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_IL, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwuplt_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_IL, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlolt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_IL, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwuplt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_IL, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlone_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_INE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupne_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_INE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlone_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_INE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupne_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_INE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwloeq_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_IEQ, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupeq_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_IEQ, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwloeq_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_IEQ, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupeq_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_IEQ, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwloge_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_IGE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupge_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_IGE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwloge_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_IGE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupge_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_IGE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlole_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_ILE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwuple_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_ILE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlole_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_ILE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwuple_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_ILE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlonum_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_NUM, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupnum_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_NUM, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlonum_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_NUM, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupnum_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_NUM, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlonan_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_NAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupnan_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_NAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlonan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_NAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_NAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlogtnan_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_GNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupgtnan_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_GNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlogtnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_GNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupgtnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_GNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwloltnan_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_LNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupltnan_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_LNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwloltnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_LNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupltnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_LNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlonenan_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_NENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupnenan_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_NENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlonenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_NENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupnenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_NENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwloeqnan_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_EQNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupeqnan_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_EQNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwloeqnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_EQNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupeqnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_EQNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlogenan_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_GENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupgenan_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_GENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlogenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_GENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupgenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_GENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlolenan_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_LENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwuplenan_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_LENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlolenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_LENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwuplenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_LENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwgt_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_IG, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwgt_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_IG, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlt_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_IL, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlt_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_IL, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwne_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_INE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwne_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_INE, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkweq_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_IEQ, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkweq_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_IEQ, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwge_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_IGE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwge_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_IGE, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwle_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_ILE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwle_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_ILE, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwnum_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_NUM, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwnum_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_NUM, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwnan_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_NAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwnan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_NAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwgtnan_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_GNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwgtnan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_GNAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwltnan_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_LNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwltnan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_LNAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwnenan_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_NENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwnenan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_NENAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkweqnan_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_EQNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkweqnan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_EQNAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwgenan_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_GENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwgenan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_GENAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlenan_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_LENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlenan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_LENAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdgt_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_G, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdgt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_G, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdlt_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_L, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdlt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_L, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdne_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_NE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdne_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_NE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdeq_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_EQ, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdeq_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_EQ, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdge_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_GE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdge_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_GE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdle_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_LE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdle_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_LE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdnum_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_NUM, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdnum_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_NUM, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdnan_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_NAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_NAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdgtnan_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_GNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdgtnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_GNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdltnan_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_LNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdltnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_LNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdnenan_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_NENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdnenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_NENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdeqnan_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_EQNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdeqnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_EQNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdgenan_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_GENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdgenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_GENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdlenan_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_LENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdlenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_LENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksgt_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_G, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksgt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_G, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkslt_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_L, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkslt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_L, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksne_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_NE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksne_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_NE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkseq_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_EQ, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkseq_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_EQ, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksge_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_GE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksge_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_GE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksle_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_LE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksle_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_LE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksnum_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_NUM, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksnum_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_NUM, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksnan_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_NAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_NAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksgtnan_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_GNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksgtnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_GNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksltnan_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_LNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksltnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_LNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksnenan_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_NENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksnenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_NENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkseqnan_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_EQNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkseqnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_EQNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksgenan_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_GENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksgenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_GENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkslenan_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_LENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkslenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_LENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslogt_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_G, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupgt_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_G, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslogt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_G, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupgt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_G, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslolt_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_L, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksuplt_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_L, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslolt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_L, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksuplt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_L, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslone_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_NE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupne_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_NE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslone_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_NE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupne_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_NE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksloeq_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_EQ, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupeq_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_EQ, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksloeq_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_EQ, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupeq_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_EQ, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksloge_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_GE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupge_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_GE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksloge_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_GE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupge_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_GE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslole_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_LE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksuple_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_LE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslole_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_LE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksuple_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_LE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslonum_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_NUM, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupnum_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_NUM, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslonum_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_NUM, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupnum_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_NUM, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslonan_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_NAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupnan_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_NAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslonan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_NAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_NAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslogtnan_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_GNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupgtnan_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_GNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslogtnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_GNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupgtnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_GNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksloltnan_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_LNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupltnan_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_LNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksloltnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_LNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupltnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_LNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslonenan_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_NENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupnenan_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_NENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslonenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_NENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupnenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_NENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksloeqnan_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_EQNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupeqnan_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_EQNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksloeqnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_EQNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupeqnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_EQNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslogenan_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_GENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupgenan_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_GENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslogenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_GENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupgenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_GENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslolenan_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_LENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksuplenan_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_LENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslolenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_LENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksuplenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_LENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksgt_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_G, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksgt_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_G, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslt_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_L, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslt_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_L, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksne_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_NE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksne_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_NE, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkseq_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_EQ, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkseq_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_EQ, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksge_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_GE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksge_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_GE, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksle_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_LE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksle_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_LE, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksnum_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_NUM, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksnum_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_NUM, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksnan_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_NAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksnan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_NAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksgtnan_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_GNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksgtnan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_GNAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksltnan_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_LNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksltnan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_LNAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksnenan_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_NENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksnenan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_NENAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkseqnan_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_EQNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkseqnan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_EQNAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksgenan_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_GENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksgenan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_GENAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslenan_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_LENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslenan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_LENAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsumwsx_vvl v256f64:$vy, i32:$vl), (VSUMWSXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsumwsx_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VSUMWSXvml v256f64:$vy, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsumwzx_vvl v256f64:$vy, i32:$vl), (VSUMWZXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsumwzx_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VSUMWZXvml v256f64:$vy, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsuml_vvl v256f64:$vy, i32:$vl), (VSUMLvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsuml_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VSUMLvml v256f64:$vy, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsumd_vvl v256f64:$vy, i32:$vl), (VFSUMDvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsumd_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VFSUMDvml v256f64:$vy, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsums_vvl v256f64:$vy, i32:$vl), (VFSUMSvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsums_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VFSUMSvml v256f64:$vy, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vrmaxswfstsx_vvl v256f64:$vy, i32:$vl), (VRMAXSWFSTSXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrmaxswfstsx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMAXSWFSTSXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrmaxswlstsx_vvl v256f64:$vy, i32:$vl), (VRMAXSWLSTSXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrmaxswlstsx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMAXSWLSTSXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrmaxswfstzx_vvl v256f64:$vy, i32:$vl), (VRMAXSWFSTZXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrmaxswfstzx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMAXSWFSTZXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrmaxswlstzx_vvl v256f64:$vy, i32:$vl), (VRMAXSWLSTZXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrmaxswlstzx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMAXSWLSTZXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrminswfstsx_vvl v256f64:$vy, i32:$vl), (VRMINSWFSTSXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrminswfstsx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMINSWFSTSXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrminswlstsx_vvl v256f64:$vy, i32:$vl), (VRMINSWLSTSXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrminswlstsx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMINSWLSTSXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrminswfstzx_vvl v256f64:$vy, i32:$vl), (VRMINSWFSTZXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrminswfstzx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMINSWFSTZXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrminswlstzx_vvl v256f64:$vy, i32:$vl), (VRMINSWLSTZXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrminswlstzx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMINSWLSTZXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrmaxslfst_vvl v256f64:$vy, i32:$vl), (VRMAXSLFSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrmaxslfst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMAXSLFSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrmaxsllst_vvl v256f64:$vy, i32:$vl), (VRMAXSLLSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrmaxsllst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMAXSLLSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrminslfst_vvl v256f64:$vy, i32:$vl), (VRMINSLFSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrminslfst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMINSLFSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrminsllst_vvl v256f64:$vy, i32:$vl), (VRMINSLLSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrminsllst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMINSLLSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfrmaxdfst_vvl v256f64:$vy, i32:$vl), (VFRMAXDFSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfrmaxdfst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFRMAXDFSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfrmaxdlst_vvl v256f64:$vy, i32:$vl), (VFRMAXDLSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfrmaxdlst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFRMAXDLSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfrmaxsfst_vvl v256f64:$vy, i32:$vl), (VFRMAXSFSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfrmaxsfst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFRMAXSFSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfrmaxslst_vvl v256f64:$vy, i32:$vl), (VFRMAXSLSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfrmaxslst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFRMAXSLSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfrmindfst_vvl v256f64:$vy, i32:$vl), (VFRMINDFSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfrmindfst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFRMINDFSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfrmindlst_vvl v256f64:$vy, i32:$vl), (VFRMINDLSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfrmindlst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFRMINDLSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfrminsfst_vvl v256f64:$vy, i32:$vl), (VFRMINSFSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfrminsfst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFRMINSFSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfrminslst_vvl v256f64:$vy, i32:$vl), (VFRMINSLSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfrminslst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFRMINSLSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrand_vvl v256f64:$vy, i32:$vl), (VRANDvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrand_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VRANDvml v256f64:$vy, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vror_vvl v256f64:$vy, i32:$vl), (VRORvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vror_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VRORvml v256f64:$vy, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vrxor_vvl v256f64:$vy, i32:$vl), (VRXORvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrxor_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VRXORvml v256f64:$vy, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgt_vvssl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VGTvrrl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgt_vvssvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VGTvrrl_v v256f64:$vy, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgt_vvssl v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VGTvrzl v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgt_vvssvl v256f64:$vy, i64:$sy, zero:$Z, v256f64:$pt, i32:$vl), (VGTvrzl_v v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgt_vvssl v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VGTvirl v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgt_vvssvl v256f64:$vy, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VGTvirl_v v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgt_vvssl v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VGTvizl v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgt_vvssvl v256f64:$vy, simm7:$I, zero:$Z, v256f64:$pt, i32:$vl), (VGTvizl_v v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgt_vvssml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VGTvrrml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgt_vvssmvl v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTvrrml_v v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgt_vvssml v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VGTvrzml v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgt_vvssmvl v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTvrzml_v v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgt_vvssml v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VGTvirml v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgt_vvssmvl v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTvirml_v v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgt_vvssml v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VGTvizml v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgt_vvssmvl v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTvizml_v v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtnc_vvssl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VGTNCvrrl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtnc_vvssvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VGTNCvrrl_v v256f64:$vy, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtnc_vvssl v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VGTNCvrzl v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtnc_vvssvl v256f64:$vy, i64:$sy, zero:$Z, v256f64:$pt, i32:$vl), (VGTNCvrzl_v v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtnc_vvssl v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VGTNCvirl v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtnc_vvssvl v256f64:$vy, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VGTNCvirl_v v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtnc_vvssl v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VGTNCvizl v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtnc_vvssvl v256f64:$vy, simm7:$I, zero:$Z, v256f64:$pt, i32:$vl), (VGTNCvizl_v v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtnc_vvssml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VGTNCvrrml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtnc_vvssmvl v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTNCvrrml_v v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtnc_vvssml v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VGTNCvrzml v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtnc_vvssmvl v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTNCvrzml_v v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtnc_vvssml v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VGTNCvirml v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtnc_vvssmvl v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTNCvirml_v v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtnc_vvssml v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VGTNCvizml v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtnc_vvssmvl v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTNCvizml_v v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtu_vvssl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VGTUvrrl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtu_vvssvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VGTUvrrl_v v256f64:$vy, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtu_vvssl v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VGTUvrzl v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtu_vvssvl v256f64:$vy, i64:$sy, zero:$Z, v256f64:$pt, i32:$vl), (VGTUvrzl_v v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtu_vvssl v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VGTUvirl v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtu_vvssvl v256f64:$vy, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VGTUvirl_v v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtu_vvssl v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VGTUvizl v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtu_vvssvl v256f64:$vy, simm7:$I, zero:$Z, v256f64:$pt, i32:$vl), (VGTUvizl_v v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtu_vvssml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VGTUvrrml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtu_vvssmvl v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTUvrrml_v v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtu_vvssml v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VGTUvrzml v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtu_vvssmvl v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTUvrzml_v v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtu_vvssml v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VGTUvirml v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtu_vvssmvl v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTUvirml_v v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtu_vvssml v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VGTUvizml v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtu_vvssmvl v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTUvizml_v v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtunc_vvssl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VGTUNCvrrl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtunc_vvssvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VGTUNCvrrl_v v256f64:$vy, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtunc_vvssl v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VGTUNCvrzl v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtunc_vvssvl v256f64:$vy, i64:$sy, zero:$Z, v256f64:$pt, i32:$vl), (VGTUNCvrzl_v v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtunc_vvssl v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VGTUNCvirl v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtunc_vvssvl v256f64:$vy, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VGTUNCvirl_v v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtunc_vvssl v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VGTUNCvizl v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtunc_vvssvl v256f64:$vy, simm7:$I, zero:$Z, v256f64:$pt, i32:$vl), (VGTUNCvizl_v v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtunc_vvssml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VGTUNCvrrml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtunc_vvssmvl v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTUNCvrrml_v v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtunc_vvssml v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VGTUNCvrzml v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtunc_vvssmvl v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTUNCvrzml_v v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtunc_vvssml v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VGTUNCvirml v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtunc_vvssmvl v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTUNCvirml_v v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtunc_vvssml v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VGTUNCvizml v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtunc_vvssmvl v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTUNCvizml_v v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VGTLSXvrrl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VGTLSXvrrl_v v256f64:$vy, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssl v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VGTLSXvrzl v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssvl v256f64:$vy, i64:$sy, zero:$Z, v256f64:$pt, i32:$vl), (VGTLSXvrzl_v v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssl v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VGTLSXvirl v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssvl v256f64:$vy, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VGTLSXvirl_v v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssl v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VGTLSXvizl v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssvl v256f64:$vy, simm7:$I, zero:$Z, v256f64:$pt, i32:$vl), (VGTLSXvizl_v v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VGTLSXvrrml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssmvl v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLSXvrrml_v v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssml v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VGTLSXvrzml v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssmvl v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLSXvrzml_v v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssml v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VGTLSXvirml v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssmvl v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLSXvirml_v v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssml v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VGTLSXvizml v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssmvl v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLSXvizml_v v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VGTLSXNCvrrl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VGTLSXNCvrrl_v v256f64:$vy, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssl v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VGTLSXNCvrzl v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssvl v256f64:$vy, i64:$sy, zero:$Z, v256f64:$pt, i32:$vl), (VGTLSXNCvrzl_v v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssl v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VGTLSXNCvirl v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssvl v256f64:$vy, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VGTLSXNCvirl_v v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssl v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VGTLSXNCvizl v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssvl v256f64:$vy, simm7:$I, zero:$Z, v256f64:$pt, i32:$vl), (VGTLSXNCvizl_v v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VGTLSXNCvrrml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssmvl v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLSXNCvrrml_v v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssml v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VGTLSXNCvrzml v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssmvl v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLSXNCvrzml_v v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssml v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VGTLSXNCvirml v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssmvl v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLSXNCvirml_v v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssml v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VGTLSXNCvizml v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssmvl v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLSXNCvizml_v v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VGTLZXvrrl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VGTLZXvrrl_v v256f64:$vy, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssl v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VGTLZXvrzl v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssvl v256f64:$vy, i64:$sy, zero:$Z, v256f64:$pt, i32:$vl), (VGTLZXvrzl_v v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssl v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VGTLZXvirl v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssvl v256f64:$vy, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VGTLZXvirl_v v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssl v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VGTLZXvizl v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssvl v256f64:$vy, simm7:$I, zero:$Z, v256f64:$pt, i32:$vl), (VGTLZXvizl_v v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VGTLZXvrrml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssmvl v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLZXvrrml_v v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssml v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VGTLZXvrzml v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssmvl v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLZXvrzml_v v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssml v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VGTLZXvirml v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssmvl v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLZXvirml_v v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssml v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VGTLZXvizml v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssmvl v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLZXvizml_v v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VGTLZXNCvrrl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VGTLZXNCvrrl_v v256f64:$vy, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssl v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VGTLZXNCvrzl v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssvl v256f64:$vy, i64:$sy, zero:$Z, v256f64:$pt, i32:$vl), (VGTLZXNCvrzl_v v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssl v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VGTLZXNCvirl v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssvl v256f64:$vy, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VGTLZXNCvirl_v v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssl v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VGTLZXNCvizl v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssvl v256f64:$vy, simm7:$I, zero:$Z, v256f64:$pt, i32:$vl), (VGTLZXNCvizl_v v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VGTLZXNCvrrml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssmvl v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLZXNCvrrml_v v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssml v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VGTLZXNCvrzml v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssmvl v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLZXNCvrzml_v v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssml v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VGTLZXNCvirml v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssmvl v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLZXNCvirml_v v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssml v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VGTLZXNCvizml v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssmvl v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLZXNCvizml_v v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsc_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsc_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsc_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsc_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsc_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsc_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsc_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsc_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscnc_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCNCvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscnc_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCNCvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscnc_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCNCvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscnc_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCNCvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscnc_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCNCvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscnc_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCNCvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscnc_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCNCvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscnc_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCNCvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCOTvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCOTvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCOTvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCOTvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCOTvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCOTvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCOTvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCOTvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscncot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCNCOTvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscncot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCNCOTvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscncot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCNCOTvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscncot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCNCOTvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscncot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCNCOTvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscncot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCNCOTvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscncot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCNCOTvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscncot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCNCOTvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscu_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCUvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscu_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCUvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscu_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCUvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscu_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCUvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscu_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCUvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscu_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCUvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscu_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCUvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscu_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCUvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscunc_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCUNCvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscunc_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCUNCvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscunc_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCUNCvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscunc_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCUNCvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscunc_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCUNCvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscunc_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCUNCvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscunc_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCUNCvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscunc_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCUNCvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCUOTvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCUOTvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCUOTvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCUOTvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCUOTvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCUOTvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCUOTvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCUOTvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuncot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCUNCOTvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuncot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCUNCOTvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuncot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCUNCOTvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuncot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCUNCOTvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuncot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCUNCOTvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuncot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCUNCOTvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuncot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCUNCOTvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuncot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCUNCOTvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscl_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCLvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscl_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCLvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscl_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCLvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscl_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCLvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscl_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCLvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscl_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCLvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscl_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCLvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscl_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCLvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclnc_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCLNCvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclnc_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCLNCvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclnc_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCLNCvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclnc_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCLNCvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclnc_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCLNCvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclnc_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCLNCvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclnc_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCLNCvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclnc_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCLNCvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCLOTvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCLOTvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCLOTvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCLOTvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCLOTvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCLOTvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCLOTvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCLOTvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclncot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCLNCOTvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclncot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCLNCOTvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclncot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCLNCOTvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclncot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCLNCOTvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclncot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCLNCOTvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclncot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCLNCOTvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclncot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCLNCOTvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclncot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCLNCOTvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_andm_mmm v256i1:$vmy, v256i1:$vmz), (ANDMmm v256i1:$vmy, v256i1:$vmz)>;
+def : Pat<(int_ve_vl_andm_MMM v512i1:$vmy, v512i1:$vmz), (ANDMyy v512i1:$vmy, v512i1:$vmz)>;
+def : Pat<(int_ve_vl_orm_mmm v256i1:$vmy, v256i1:$vmz), (ORMmm v256i1:$vmy, v256i1:$vmz)>;
+def : Pat<(int_ve_vl_orm_MMM v512i1:$vmy, v512i1:$vmz), (ORMyy v512i1:$vmy, v512i1:$vmz)>;
+def : Pat<(int_ve_vl_xorm_mmm v256i1:$vmy, v256i1:$vmz), (XORMmm v256i1:$vmy, v256i1:$vmz)>;
+def : Pat<(int_ve_vl_xorm_MMM v512i1:$vmy, v512i1:$vmz), (XORMyy v512i1:$vmy, v512i1:$vmz)>;
+def : Pat<(int_ve_vl_eqvm_mmm v256i1:$vmy, v256i1:$vmz), (EQVMmm v256i1:$vmy, v256i1:$vmz)>;
+def : Pat<(int_ve_vl_eqvm_MMM v512i1:$vmy, v512i1:$vmz), (EQVMyy v512i1:$vmy, v512i1:$vmz)>;
+def : Pat<(int_ve_vl_nndm_mmm v256i1:$vmy, v256i1:$vmz), (NNDMmm v256i1:$vmy, v256i1:$vmz)>;
+def : Pat<(int_ve_vl_nndm_MMM v512i1:$vmy, v512i1:$vmz), (NNDMyy v512i1:$vmy, v512i1:$vmz)>;
+def : Pat<(int_ve_vl_negm_mm v256i1:$vmy), (NEGMm v256i1:$vmy)>;
+def : Pat<(int_ve_vl_negm_MM v512i1:$vmy), (NEGMy v512i1:$vmy)>;
+def : Pat<(int_ve_vl_pcvm_sml v256i1:$vmy, i32:$vl), (PCVMml v256i1:$vmy, i32:$vl)>;
+def : Pat<(int_ve_vl_lzvm_sml v256i1:$vmy, i32:$vl), (LZVMml v256i1:$vmy, i32:$vl)>;
+def : Pat<(int_ve_vl_tovm_sml v256i1:$vmy, i32:$vl), (TOVMml v256i1:$vmy, i32:$vl)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrIntrinsicVL.td b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrIntrinsicVL.td
new file mode 100644
index 000000000000..69ea133ceed0
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrIntrinsicVL.td
@@ -0,0 +1,64 @@
+/// Pattern Matchings for VEL intrinsic instructions.
+
+/// Intrinsic patterns written by hand.
+
+// SVOB pattern.
+def : Pat<(int_ve_vl_svob), (SVOB)>;
+
+// Pack patterns.
+def : Pat<(i64 (int_ve_vl_pack_f32p ADDRrii:$addr0, ADDRrii:$addr1)),
+ (ORrr (f2l (LDUrii MEMrii:$addr0)),
+ (i2l (LDLZXrii MEMrii:$addr1)))>;
+
+def : Pat<(i64 (int_ve_vl_pack_f32a ADDRrii:$addr)),
+ (MULULrr
+ (i2l (LDLZXrii MEMrii:$addr)),
+ (LEASLrii (ANDrm (LEAzii 0, 0, (LO32 (i64 0x0000000100000001))),
+ !add(32, 64)), 0,
+ (HI32 (i64 0x0000000100000001))))>;
+
+// The extract/insert patterns.
+def : Pat<(v256i1 (int_ve_vl_extract_vm512u v512i1:$vm)),
+ (EXTRACT_SUBREG v512i1:$vm, sub_vm_even)>;
+
+def : Pat<(v256i1 (int_ve_vl_extract_vm512l v512i1:$vm)),
+ (EXTRACT_SUBREG v512i1:$vm, sub_vm_odd)>;
+
+def : Pat<(v512i1 (int_ve_vl_insert_vm512u v512i1:$vmx, v256i1:$vmy)),
+ (INSERT_SUBREG v512i1:$vmx, v256i1:$vmy, sub_vm_even)>;
+
+def : Pat<(v512i1 (int_ve_vl_insert_vm512l v512i1:$vmx, v256i1:$vmy)),
+ (INSERT_SUBREG v512i1:$vmx, v256i1:$vmy, sub_vm_odd)>;
+
+// VMRG patterns.
+def : Pat<(int_ve_vl_vmrgw_vsvMl i32:$sy, v256f64:$vz, v512i1:$vm, i32:$vl),
+ (VMRGWrvml (i2l i32:$sy), v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vmrgw_vsvMvl i32:$sy, v256f64:$vz, v512i1:$vm,
+ v256f64:$pt, i32:$vl),
+ (VMRGWrvml_v (i2l i32:$sy), v256f64:$vz, v512i1:$vm, i32:$vl,
+ v256f64:$pt)>;
+
+// VMV patterns.
+def : Pat<(int_ve_vl_vmv_vsvl i32:$sy, v256f64:$vz, i32:$vl),
+ (VMVrvl (i2l i32:$sy), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmv_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl),
+ (VMVrvl_v (i2l i32:$sy), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmv_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt,
+ i32:$vl),
+ (VMVrvml_v (i2l i32:$sy), v256f64:$vz, v256i1:$vm, i32:$vl,
+ v256f64:$pt)>;
+
+// LSV patterns.
+def : Pat<(int_ve_vl_lsv_vvss v256f64:$pt, i32:$sy, i64:$sz),
+ (LSVrr_v (i2l i32:$sy), i64:$sz, v256f64:$pt)>;
+
+// LVS patterns.
+def : Pat<(int_ve_vl_lvsl_svs v256f64:$vx, i32:$sy),
+ (LVSvr v256f64:$vx, (i2l i32:$sy))>;
+def : Pat<(int_ve_vl_lvsd_svs v256f64:$vx, i32:$sy),
+ (LVSvr v256f64:$vx, (i2l i32:$sy))>;
+def : Pat<(int_ve_vl_lvss_svs v256f64:$vx, i32:$sy),
+ (l2f (LVSvr v256f64:$vx, (i2l i32:$sy)))>;
+
+/// Intrinsic patterns automatically generated.
+include "VEInstrIntrinsicVL.gen.td"
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrPatternsVec.td b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrPatternsVec.td
new file mode 100644
index 000000000000..0084876f9f1b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrPatternsVec.td
@@ -0,0 +1,91 @@
+//===-- VEInstrPatternsVec.td - VEC_-type SDNodes and isel for VE Target --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the VEC_* prefixed intermediate SDNodes and their
+// isel patterns.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+multiclass vbrd_elem32<ValueType v32, ValueType s32, SDPatternOperator ImmOp,
+ SDNodeXForm ImmCast, SDNodeXForm SuperRegCast> {
+ // VBRDil
+ def : Pat<(v32 (vec_broadcast (s32 ImmOp:$sy), i32:$vl)),
+ (VBRDil (ImmCast $sy), i32:$vl)>;
+
+ // VBRDrl
+ def : Pat<(v32 (vec_broadcast s32:$sy, i32:$vl)),
+ (VBRDrl (SuperRegCast $sy), i32:$vl)>;
+}
+
+multiclass vbrd_elem64<ValueType v64, ValueType s64,
+ SDPatternOperator ImmOp, SDNodeXForm ImmCast> {
+ // VBRDil
+ def : Pat<(v64 (vec_broadcast (s64 ImmOp:$sy), i32:$vl)),
+ (VBRDil (ImmCast $sy), i32:$vl)>;
+
+ // VBRDrl
+ def : Pat<(v64 (vec_broadcast s64:$sy, i32:$vl)),
+ (VBRDrl s64:$sy, i32:$vl)>;
+}
+
+multiclass extract_insert_elem32<ValueType v32, ValueType s32,
+ SDNodeXForm SubRegCast,
+ SDNodeXForm SuperRegCast> {
+ // LVSvi
+ def: Pat<(s32 (extractelt v32:$vec, uimm7:$idx)),
+ (SubRegCast (LVSvi v32:$vec, (ULO7 $idx)))>;
+ // LVSvr
+ def: Pat<(s32 (extractelt v32:$vec, i64:$idx)),
+ (SubRegCast (LVSvr v32:$vec, $idx))>;
+
+ // LSVir
+ def: Pat<(v32 (insertelt v32:$vec, s32:$val, uimm7:$idx)),
+ (LSVir_v (ULO7 $idx), (SuperRegCast $val), $vec)>;
+ // LSVrr
+ def: Pat<(v32 (insertelt v32:$vec, s32:$val, i64:$idx)),
+ (LSVrr_v $idx, (SuperRegCast $val), $vec)>;
+}
+
+multiclass extract_insert_elem64<ValueType v64, ValueType s64> {
+ // LVSvi
+ def: Pat<(s64 (extractelt v64:$vec, uimm7:$idx)),
+ (LVSvi v64:$vec, (ULO7 $idx))>;
+ // LVSvr
+ def: Pat<(s64 (extractelt v64:$vec, i64:$idx)),
+ (LVSvr v64:$vec, $idx)>;
+
+ // LSVir
+ def: Pat<(v64 (insertelt v64:$vec, s64:$val, uimm7:$idx)),
+ (LSVir_v (ULO7 $idx), $val, $vec)>;
+ // LSVrr
+ def: Pat<(v64 (insertelt v64:$vec, s64:$val, i64:$idx)),
+ (LSVrr_v $idx, $val, $vec)>;
+}
+
+multiclass patterns_elem32<ValueType v32, ValueType s32,
+ SDPatternOperator ImmOp, SDNodeXForm ImmCast,
+ SDNodeXForm SubRegCast, SDNodeXForm SuperRegCast> {
+ defm : vbrd_elem32<v32, s32, ImmOp, ImmCast, SuperRegCast>;
+ defm : extract_insert_elem32<v32, s32, SubRegCast, SuperRegCast>;
+}
+
+multiclass patterns_elem64<ValueType v64, ValueType s64,
+ SDPatternOperator ImmOp, SDNodeXForm ImmCast> {
+ defm : vbrd_elem64<v64, s64, ImmOp, ImmCast>;
+ defm : extract_insert_elem64<v64, s64>;
+}
+
+defm : patterns_elem32<v256i32, i32, simm7, LO7, l2i, i2l>;
+defm : patterns_elem32<v256f32, f32, simm7fp, LO7FP, l2f, f2l>;
+
+defm : patterns_elem64<v256i64, i64, simm7, LO7>;
+defm : patterns_elem64<v256f64, f64, simm7fp, LO7FP>;
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrVec.td b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrVec.td
new file mode 100644
index 000000000000..4a8476f7288a
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrVec.td
@@ -0,0 +1,1510 @@
+//===----------------------------------------------------------------------===//
+// Vector Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions for VM512 modifications
+//===----------------------------------------------------------------------===//
+
+// LVM/SVM instructions using VM512
+let hasSideEffects = 0, isCodeGenOnly = 1 in {
+ let Constraints = "$vx = $vd", DisableEncoding = "$vd" in {
+ def LVMyir_y : Pseudo<(outs VM512:$vx), (ins uimm3:$sy, I64:$sz, VM512:$vd),
+ "# pseudo LVM $vx, $sy, $sz, $vd">;
+ def LVMyim_y : Pseudo<(outs VM512:$vx),
+ (ins uimm3:$sy, mimm:$sz, VM512:$vd),
+ "# pseudo LVM $vx, $sy, $sz, $vd">;
+ }
+ def LVMyir : Pseudo<(outs VM512:$vx), (ins uimm3:$sy, I64:$sz),
+ "# pseudo LVM $vx, $sy, $sz">;
+ def LVMyim : Pseudo<(outs VM512:$vx), (ins uimm3:$sy, mimm:$sz),
+ "# pseudo LVM $vx, $sy, $sz">;
+ def SVMyi : Pseudo<(outs I64:$sx), (ins VM512:$vz, uimm3:$sy),
+ "# pseudo SVM $sx, $vz, $sy">;
+}
+
+// VFMK/VFMKW/VFMKS instructions using VM512
+let hasSideEffects = 0, isCodeGenOnly = 1, DisableEncoding = "$vl" in {
+ def VFMKyal : Pseudo<(outs VM512:$vmx), (ins I32:$vl),
+ "# pseudo-vfmk.at $vmx">;
+ def VFMKynal : Pseudo<(outs VM512:$vmx), (ins I32:$vl),
+ "# pseudo-vfmk.af $vmx">;
+ def VFMKWyvl : Pseudo<(outs VM512:$vmx),
+ (ins CCOp:$cf, V64:$vz, I32:$vl),
+ "# pseudo-vfmk.w.$cf $vmx, $vz">;
+ def VFMKWyvyl : Pseudo<(outs VM512:$vmx),
+ (ins CCOp:$cf, V64:$vz, VM512:$vm, I32:$vl),
+ "# pseudo-vfmk.w.$cf $vmx, $vz, $vm">;
+ def VFMKSyvl : Pseudo<(outs VM512:$vmx),
+ (ins CCOp:$cf, V64:$vz, I32:$vl),
+ "# pseudo-vfmk.s.$cf $vmx, $vz">;
+ def VFMKSyvyl : Pseudo<(outs VM512:$vmx),
+ (ins CCOp:$cf, V64:$vz, VM512:$vm, I32:$vl),
+ "# pseudo-vfmk.s.$cf $vmx, $vz, $vm">;
+}
+
+// ANDM/ORM/XORM/EQVM/NNDM/NEGM instructions using VM512
+let hasSideEffects = 0, isCodeGenOnly = 1 in {
+ def ANDMyy : Pseudo<(outs VM512:$vmx), (ins VM512:$vmy, VM512:$vmz),
+ "# andm $vmx, $vmy, $vmz">;
+ def ORMyy : Pseudo<(outs VM512:$vmx), (ins VM512:$vmy, VM512:$vmz),
+ "# orm $vmx, $vmy, $vmz">;
+ def XORMyy : Pseudo<(outs VM512:$vmx), (ins VM512:$vmy, VM512:$vmz),
+ "# xorm $vmx, $vmy, $vmz">;
+ def EQVMyy : Pseudo<(outs VM512:$vmx), (ins VM512:$vmy, VM512:$vmz),
+ "# eqvm $vmx, $vmy, $vmz">;
+ def NNDMyy : Pseudo<(outs VM512:$vmx), (ins VM512:$vmy, VM512:$vmz),
+ "# nndm $vmx, $vmy, $vmz">;
+ def NEGMy : Pseudo<(outs VM512:$vmx), (ins VM512:$vmy),
+ "# negm $vmx, $vmy">;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//
+// Define all vector instructions defined in SX-Aurora TSUBASA Architecture
+// Guide here. As those mnemonics, we use mnemonics defined in Vector Engine
+// Assembly Language Reference Manual.
+//
+// Some instructions can update existing data by following instructions
+// sequence.
+//
+// lea %s0, 256
+// lea %s1, 128
+// lvl %s0
+// vbrd %v0, 2 # v0 = { 2, 2, 2, ..., 2, 2, 2 }
+// lvl %s1
+// vbrd %v0, 3 # v0 = { 3, 3, 3, ..., 3, 2, 2, 2, ..., 2, 2, 2 }
+//
+// In order to represent above with a virtual register, we defines instructions
+// with an additional base register and `_v` suffiex in mnemonic.
+//
+// lea t0, 256
+// lea t1, 128
+// lea t0
+// vbrd tv0, 2
+// lvl t1
+// vbrd_v tv1, 2, tv0
+//
+// We also have some instructions uses VL register with an pseudo VL value
+// with following suffixes in mnemonic.
+//
+// l: have an additional I32 register to represent the VL value.
+// L: have an additional VL register to represent the VL value.
+//===----------------------------------------------------------------------===//
+
+//-----------------------------------------------------------------------------
+// Section 8.9 - Vector Load/Store and Move Instructions
+//-----------------------------------------------------------------------------
+
+// Multiclass for VLD instructions
+let mayLoad = 1, hasSideEffects = 0, Uses = [VL] in
+multiclass VLDbm<string opcStr, bits<8>opc, RegisterClass RC, dag dag_in,
+ string disEnc = ""> {
+ let DisableEncoding = disEnc in
+ def "" : RVM<opc, (outs RC:$vx), dag_in,
+ !strconcat(opcStr, " $vx, $sy, $sz")>;
+ let Constraints = "$vx = $base", DisableEncoding = disEnc#"$base",
+ isCodeGenOnly = 1 in
+ def _v : RVM<opc, (outs RC:$vx), !con(dag_in, (ins RC:$base)),
+ !strconcat(opcStr, " $vx, $sy, $sz")>;
+}
+multiclass VLDlm<string opcStr, bits<8>opc, RegisterClass RC, dag dag_in> {
+ defm "" : VLDbm<opcStr, opc, RC, dag_in>;
+ let isCodeGenOnly = 1, VE_VLInUse = 1 in {
+ defm l : VLDbm<opcStr, opc, RC, !con(dag_in, (ins I32:$vl)), "$vl,">;
+ defm L : VLDbm<opcStr, opc, RC, !con(dag_in, (ins VLS:$vl)), "$vl,">;
+ }
+}
+let VE_VLIndex = 3 in
+multiclass VLDtgm<string opcStr, bits<8>opc, RegisterClass RC> {
+ defm rr : VLDlm<opcStr, opc, RC, (ins I64:$sy, I64:$sz)>;
+ let cy = 0 in
+ defm ir : VLDlm<opcStr, opc, RC, (ins simm7:$sy, I64:$sz)>;
+ let cz = 0 in
+ defm rz : VLDlm<opcStr, opc, RC, (ins I64:$sy, zero:$sz)>;
+ let cy = 0, cz = 0 in
+ defm iz : VLDlm<opcStr, opc, RC, (ins simm7:$sy, zero:$sz)>;
+}
+multiclass VLDm<string opcStr, bits<8>opc, RegisterClass RC> {
+ let vc = 1 in defm "" : VLDtgm<opcStr, opc, RC>;
+ let vc = 0 in defm NC : VLDtgm<opcStr#".nc", opc, RC>;
+}
+
+// Section 8.9.1 - VLD (Vector Load)
+defm VLD : VLDm<"vld", 0x81, V64>;
+
+// Section 8.9.2 - VLDU (Vector Load Upper)
+defm VLDU : VLDm<"vldu", 0x82, V64>;
+
+// Section 8.9.3 - VLDL (Vector Load Lower)
+defm VLDLSX : VLDm<"vldl.sx", 0x83, V64>;
+let cx = 1 in defm VLDLZX : VLDm<"vldl.zx", 0x83, V64>;
+
+// Section 8.9.4 - VLD2D (Vector Load 2D)
+defm VLD2D : VLDm<"vld2d", 0xc1, V64>;
+
+// Section 8.9.5 - VLDU2D (Vector Load Upper 2D)
+defm VLDU2D : VLDm<"vldu2d", 0xc2, V64>;
+
+// Section 8.9.6 - VLDL2D (Vector Load Lower 2D)
+defm VLDL2DSX : VLDm<"vldl2d.sx", 0xc3, V64>;
+let cx = 1 in defm VLDL2DZX : VLDm<"vldl2d.zx", 0xc3, V64>;
+
+// Multiclass for VST instructions
+let mayStore = 1, hasSideEffects = 0, Uses = [VL] in
+multiclass VSTbm<string opcStr, string argStr, bits<8>opc, dag dag_in> {
+ def "" : RVM<opc, (outs), dag_in, !strconcat(opcStr, argStr)>;
+ let DisableEncoding = "$vl", isCodeGenOnly = 1, VE_VLInUse = 1 in {
+ def l : RVM<opc, (outs), !con(dag_in, (ins I32:$vl)),
+ !strconcat(opcStr, argStr)>;
+ def L : RVM<opc, (outs), !con(dag_in, (ins VLS:$vl)),
+ !strconcat(opcStr, argStr)>;
+ }
+}
+multiclass VSTmm<string opcStr, bits<8>opc, dag dag_in> {
+ defm "" : VSTbm<opcStr, " $vx, $sy, $sz", opc, dag_in>;
+ let m = ?, VE_VLWithMask = 1 in
+ defm m : VSTbm<opcStr, " $vx, $sy, $sz, $m", opc, !con(dag_in, (ins VM:$m))>;
+}
+let VE_VLIndex = 3 in
+multiclass VSTtgm<string opcStr, bits<8>opc, RegisterClass RC> {
+ defm rrv : VSTmm<opcStr, opc, (ins I64:$sy, I64:$sz, RC:$vx)>;
+ let cy = 0 in
+ defm irv : VSTmm<opcStr, opc, (ins simm7:$sy, I64:$sz, RC:$vx)>;
+ let cz = 0 in
+ defm rzv : VSTmm<opcStr, opc, (ins I64:$sy, zero:$sz, RC:$vx)>;
+ let cy = 0, cz = 0 in
+ defm izv : VSTmm<opcStr, opc, (ins simm7:$sy, zero:$sz, RC:$vx)>;
+}
+multiclass VSTm<string opcStr, bits<8>opc, RegisterClass RC> {
+ let vc = 1, cx = 0 in defm "" : VSTtgm<opcStr, opc, RC>;
+ let vc = 0, cx = 0 in defm NC : VSTtgm<opcStr#".nc", opc, RC>;
+ let vc = 1, cx = 1 in defm OT : VSTtgm<opcStr#".ot", opc, RC>;
+ let vc = 0, cx = 1 in defm NCOT : VSTtgm<opcStr#".nc.ot", opc, RC>;
+}
+
+// Section 8.9.7 - VST (Vector Store)
+defm VST : VSTm<"vst", 0x91, V64>;
+
+// Section 8.9.8 - VST (Vector Store Upper)
+defm VSTU : VSTm<"vstu", 0x92, V64>;
+
+// Section 8.9.9 - VSTL (Vector Store Lower)
+defm VSTL : VSTm<"vstl", 0x93, V64>;
+
+// Section 8.9.10 - VST2D (Vector Store 2D)
+defm VST2D : VSTm<"vst2d", 0xd1, V64>;
+
+// Section 8.9.11 - VSTU2D (Vector Store Upper 2D)
+defm VSTU2D : VSTm<"vstu2d", 0xd2, V64>;
+
+// Section 8.9.12 - VSTL2D (Vector Store Lower 2D)
+defm VSTL2D : VSTm<"vstl2d", 0xd3, V64>;
+
+// Multiclass for VGT instructions
+let mayLoad = 1, hasSideEffects = 0, Uses = [VL] in
+multiclass VGTbm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+ dag dag_in, string disEnc = ""> {
+ let DisableEncoding = disEnc in
+ def "" : RVM<opc, (outs RC:$vx), dag_in,
+ !strconcat(opcStr, " $vx, ", argStr)>;
+ let Constraints = "$vx = $base", DisableEncoding = disEnc#"$base",
+ isCodeGenOnly = 1 in
+ def _v : RVM<opc, (outs RC:$vx), !con(dag_in, (ins RC:$base)),
+ !strconcat(opcStr, " $vx, ", argStr)>;
+}
+multiclass VGTlm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+ dag dag_in> {
+ defm "" : VGTbm<opcStr, argStr, opc, RC, dag_in>;
+ let isCodeGenOnly = 1, VE_VLInUse = 1 in {
+ defm l : VGTbm<opcStr, argStr, opc, RC, !con(dag_in, (ins I32:$vl)),
+ "$vl,">;
+ defm L : VGTbm<opcStr, argStr, opc, RC, !con(dag_in, (ins VLS:$vl)),
+ "$vl,">;
+ }
+}
+multiclass VGTmm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+ dag dag_in> {
+ defm "" : VGTlm<opcStr, argStr, opc, RC, dag_in>;
+ let m = ?, VE_VLWithMask = 1 in
+ defm m : VGTlm<opcStr, argStr#", $m", opc, RC, !con(dag_in, (ins VM:$m))>;
+}
+let VE_VLIndex = 4 in
+multiclass VGTlhm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+ dag dag_in> {
+ defm rr : VGTmm<opcStr, argStr#", $sy, $sz", opc, RC,
+ !con(dag_in, (ins I64:$sy, I64:$sz))>;
+ let cy = 0 in
+ defm ir : VGTmm<opcStr, argStr#", $sy, $sz", opc, RC,
+ !con(dag_in, (ins simm7:$sy, I64:$sz))>;
+ let cz = 0 in
+ defm rz : VGTmm<opcStr, argStr#", $sy, $sz", opc, RC,
+ !con(dag_in, (ins I64:$sy, zero:$sz))>;
+ let cy = 0, cz = 0 in
+ defm iz : VGTmm<opcStr, argStr#", $sy, $sz", opc, RC,
+ !con(dag_in, (ins simm7:$sy, zero:$sz))>;
+}
+multiclass VGTtgm<string opcStr, bits<8>opc, RegisterClass RC> {
+ let vy = ? in defm v : VGTlhm<opcStr, "$vy", opc, RC, (ins V64:$vy)>;
+ let cs = 1, sw = ? in defm s : VGTlhm<opcStr, "$sw", opc, RC, (ins I64:$sw)>;
+}
+multiclass VGTm<string opcStr, bits<8>opc, RegisterClass RC> {
+ let vc = 1 in defm "" : VGTtgm<opcStr, opc, RC>;
+ let vc = 0 in defm NC : VGTtgm<opcStr#".nc", opc, RC>;
+}
+
+// Section 8.9.13 - VGT (Vector Gather)
+defm VGT : VGTm<"vgt", 0xa1, V64>;
+
+// Section 8.9.14 - VGTU (Vector Gather Upper)
+defm VGTU : VGTm<"vgtu", 0xa2, V64>;
+
+// Section 8.9.15 - VGTL (Vector Gather Lower)
+defm VGTLSX : VGTm<"vgtl.sx", 0xa3, V64>;
+let cx = 1 in defm VGTLZX : VGTm<"vgtl.zx", 0xa3, V64>;
+def : MnemonicAlias<"vgtl", "vgtl.zx">;
+def : MnemonicAlias<"vgtl.nc", "vgtl.zx.nc">;
+
+// Multiclass for VSC instructions
+let mayStore = 1, hasSideEffects = 0, Uses = [VL] in
+multiclass VSCbm<string opcStr, string argStr, bits<8>opc, dag dag_in> {
+ def "" : RVM<opc, (outs), dag_in, !strconcat(opcStr, argStr)>;
+ let DisableEncoding = "$vl", isCodeGenOnly = 1, VE_VLInUse = 1 in {
+ def l : RVM<opc, (outs), !con(dag_in, (ins I32:$vl)),
+ !strconcat(opcStr, argStr)>;
+ def L : RVM<opc, (outs), !con(dag_in, (ins VLS:$vl)),
+ !strconcat(opcStr, argStr)>;
+ }
+}
+multiclass VSCmm<string opcStr, string argStr, bits<8>opc, dag dag_in> {
+ defm "" : VSCbm<opcStr, argStr, opc, dag_in>;
+ let m = ?, VE_VLWithMask = 1 in
+ defm m : VSCbm<opcStr, argStr#", $m", opc, !con(dag_in, (ins VM:$m))>;
+}
+let VE_VLIndex = 4 in
+multiclass VSClhm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+ dag dag_in> {
+ defm rrv : VSCmm<opcStr, " $vx, "#argStr#", $sy, $sz", opc,
+ !con(dag_in, (ins I64:$sy, I64:$sz, RC:$vx))>;
+ let cy = 0 in
+ defm irv : VSCmm<opcStr, " $vx, "#argStr#", $sy, $sz", opc,
+ !con(dag_in, (ins simm7:$sy, I64:$sz, RC:$vx))>;
+ let cz = 0 in
+ defm rzv : VSCmm<opcStr, " $vx, "#argStr#", $sy, $sz", opc,
+ !con(dag_in, (ins I64:$sy, zero:$sz, RC:$vx))>;
+ let cy = 0, cz = 0 in
+ defm izv : VSCmm<opcStr, " $vx, "#argStr#", $sy, $sz", opc,
+ !con(dag_in, (ins simm7:$sy, zero:$sz, RC:$vx))>;
+}
+multiclass VSCtgm<string opcStr, bits<8>opc, RegisterClass RC> {
+ let vy = ? in defm v : VSClhm<opcStr, "$vy", opc, RC, (ins V64:$vy)>;
+ let cs = 1, sw = ? in defm s : VSClhm<opcStr, "$sw", opc, RC, (ins I64:$sw)>;
+}
+multiclass VSCm<string opcStr, bits<8>opc, RegisterClass RC> {
+ let vc = 1, cx = 0 in defm "" : VSCtgm<opcStr, opc, RC>;
+ let vc = 0, cx = 0 in defm NC : VSCtgm<opcStr#".nc", opc, RC>;
+ let vc = 1, cx = 1 in defm OT : VSCtgm<opcStr#".ot", opc, RC>;
+ let vc = 0, cx = 1 in defm NCOT : VSCtgm<opcStr#".nc.ot", opc, RC>;
+}
+
+// Section 8.9.16 - VSC (Vector Scatter)
+defm VSC : VSCm<"vsc", 0xb1, V64>;
+
+// Section 8.9.17 - VSCU (Vector Scatter Upper)
+defm VSCU : VSCm<"vscu", 0xb2, V64>;
+
+// Section 8.9.18 - VSCL (Vector Scatter Lower)
+defm VSCL : VSCm<"vscl", 0xb3, V64>;
+
+// Section 8.9.19 - PFCHV (Prefetch Vector)
+let Uses = [VL] in
+multiclass PFCHVbm<string opcStr, string argStr, bits<8>opc, dag dag_in> {
+ def "" : RVM<opc, (outs), dag_in, !strconcat(opcStr, argStr)>;
+ let DisableEncoding = "$vl", isCodeGenOnly = 1, VE_VLInUse = 1 in {
+ def l : RVM<opc, (outs), !con(dag_in, (ins I32:$vl)),
+ !strconcat(opcStr, argStr)>;
+ def L : RVM<opc, (outs), !con(dag_in, (ins VLS:$vl)),
+ !strconcat(opcStr, argStr)>;
+ }
+}
+let VE_VLIndex = 2 in
+multiclass PFCHVm<string opcStr, bits<8>opc> {
+ defm rr : PFCHVbm<opcStr, " $sy, $sz", opc, (ins I64:$sy, I64:$sz)>;
+ let cy = 0 in
+ defm ir : PFCHVbm<opcStr, " $sy, $sz", opc, (ins simm7:$sy, I64:$sz)>;
+ let cz = 0 in
+ defm rz : PFCHVbm<opcStr, " $sy, $sz", opc, (ins I64:$sy, zero:$sz)>;
+ let cy = 0, cz = 0 in
+ defm iz : PFCHVbm<opcStr, " $sy, $sz", opc, (ins simm7:$sy, zero:$sz)>;
+}
+let vc = 1, vx = 0 in defm PFCHV : PFCHVm<"pfchv", 0x80>;
+let vc = 0, vx = 0 in defm PFCHVNC : PFCHVm<"pfchv.nc", 0x80>;
+
+// Section 8.9.20 - LSV (Load S to V)
+let sx = 0, vx = ?, hasSideEffects = 0 in
+multiclass LSVbm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+ dag dag_in> {
+ def "" : RR<opc, (outs RC:$vx), dag_in, !strconcat(opcStr, " ${vx}", argStr)>;
+ let Constraints = "$vx = $base", DisableEncoding = "$base",
+ isCodeGenOnly = 1 in
+ def _v : RR<opc, (outs RC:$vx), !con(dag_in, (ins RC:$base)),
+ !strconcat(opcStr, " ${vx}", argStr)>;
+}
+multiclass LSVm<string opcStr, bits<8>opc, RegisterClass RC> {
+ defm rr : LSVbm<opcStr, "(${sy}), $sz", opc, RC, (ins I64:$sy, I64:$sz)>;
+ let cy = 0 in
+ defm ir : LSVbm<opcStr, "(${sy}), $sz", opc, RC, (ins uimm7:$sy, I64:$sz)>;
+ let cz = 0 in
+ defm rm : LSVbm<opcStr, "(${sy}), $sz", opc, RC, (ins I64:$sy, mimm:$sz)>;
+ let cy = 0, cz = 0 in
+ defm im : LSVbm<opcStr, "(${sy}), $sz", opc, RC, (ins uimm7:$sy, mimm:$sz)>;
+}
+defm LSV : LSVm<"lsv", 0x8e, V64>;
+
+// Section 8.9.21 - LVS (Load V to S)
+let cz = 0, sz = 0, vx = ?, hasSideEffects = 0 in
+multiclass LVSm<string opcStr, bits<8>opc, RegisterClass RC> {
+ def vr : RR<opc, (outs I64:$sx), (ins RC:$vx, I64:$sy),
+ opcStr#" $sx, ${vx}(${sy})">;
+ let cy = 0 in
+ def vi : RR<opc, (outs I64:$sx), (ins RC:$vx, uimm7:$sy),
+ opcStr#" $sx, ${vx}(${sy})">;
+}
+defm LVS : LVSm<"lvs", 0x9e, V64>;
+
+// Section 8.9.22 - LVM (Load VM)
+let sx = 0, vx = ?, hasSideEffects = 0 in
+multiclass LVMbm<string opcStr, string argStr, bits<8>opc, RegisterClass RCM,
+ dag dag_in> {
+ def "" : RR<opc, (outs RCM:$vx), dag_in,
+ !strconcat(opcStr, " $vx, ", argStr)>;
+ let Constraints = "$vx = $base", DisableEncoding = "$base",
+ isCodeGenOnly = 1 in {
+ def _m : RR<opc, (outs RCM:$vx), !con(dag_in, (ins RCM:$base)),
+ !strconcat(opcStr, " $vx, ", argStr)>;
+ }
+}
+multiclass LVMom<string opcStr, bits<8>opc, RegisterClass RCM> {
+ defm rr : LVMbm<opcStr, "$sy, $sz", opc, RCM, (ins I64:$sy, I64:$sz)>;
+ let cy = 0 in
+ defm ir : LVMbm<opcStr, "$sy, $sz", opc, RCM, (ins uimm2:$sy, I64:$sz)>;
+ let cz = 0 in
+ defm rm : LVMbm<opcStr, "$sy, $sz", opc, RCM, (ins I64:$sy, mimm:$sz)>;
+ let cy = 0, cz = 0 in
+ defm im : LVMbm<opcStr, "$sy, $sz", opc, RCM, (ins uimm2:$sy, mimm:$sz)>;
+}
+multiclass LVMm<string opcStr, bits<8>opc, RegisterClass RCM> {
+ defm "" : LVMom<opcStr, opc, RCM>;
+}
+defm LVM : LVMm<"lvm", 0xb7, VM>;
+
+// Section 8.9.23 - SVM (Save VM)
+let cz = 0, sz = 0, vz = ?, hasSideEffects = 0 in
+multiclass SVMm<string opcStr, bits<8>opc, RegisterClass RCM> {
+ def mr : RR<opc, (outs I64:$sx), (ins RCM:$vz, I64:$sy),
+ opcStr#" $sx, $vz, $sy">;
+ let cy = 0 in
+ def mi : RR<opc, (outs I64:$sx), (ins RCM:$vz, uimm2:$sy),
+ opcStr#" $sx, $vz, $sy">;
+}
+defm SVM : SVMm<"svm", 0xa7, VM>;
+
+// Section 8.9.24 - VBRD (Vector Broadcast)
+let vx = ?, hasSideEffects = 0, Uses = [VL] in
+multiclass VBRDbm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+ dag dag_in, string disEnc = ""> {
+ let DisableEncoding = disEnc in
+ def "" : RV<opc, (outs RC:$vx), dag_in,
+ !strconcat(opcStr, " $vx, ", argStr)>;
+ let Constraints = "$vx = $base", DisableEncoding = disEnc#"$base",
+ isCodeGenOnly = 1 in
+ def _v : RV<opc, (outs RC:$vx), !con(dag_in, (ins RC:$base)),
+ !strconcat(opcStr, " $vx, ", argStr)>;
+}
+multiclass VBRDlm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+ dag dag_in> {
+ defm "" : VBRDbm<opcStr, argStr, opc, RC, dag_in>;
+ let isCodeGenOnly = 1, VE_VLInUse = 1 in {
+ defm l : VBRDbm<opcStr, argStr, opc, RC, !con(dag_in, (ins I32:$vl)),
+ "$vl,">;
+ defm L : VBRDbm<opcStr, argStr, opc, RC, !con(dag_in, (ins VLS:$vl)),
+ "$vl,">;
+ }
+}
+multiclass VBRDmm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+ RegisterClass RCM, dag dag_in> {
+ defm "" : VBRDlm<opcStr, argStr, opc, RC, dag_in>;
+ let m = ?, VE_VLWithMask = 1 in
+ defm m : VBRDlm<opcStr, argStr#", $m", opc, RC, !con(dag_in, (ins RCM:$m))>;
+}
+let VE_VLIndex = 2 in
+multiclass VBRDm<string opcStr, bits<8>opc, RegisterClass VRC, RegisterClass RC,
+ RegisterClass RCM> {
+ defm r : VBRDmm<opcStr, "$sy", opc, VRC, RCM, (ins RC:$sy)>;
+ let cy = 0 in
+ defm i : VBRDmm<opcStr, "$sy", opc, VRC, RCM, (ins simm7:$sy)>;
+}
+let cx = 0, cx2 = 0 in
+defm VBRD : VBRDm<"vbrd", 0x8c, V64, I64, VM>;
+let cx = 0, cx2 = 1 in
+defm VBRDL : VBRDm<"vbrdl", 0x8c, V64, I32, VM>;
+let cx = 1, cx2 = 0 in
+defm VBRDU : VBRDm<"vbrdu", 0x8c, V64, F32, VM>;
+let cx = 1, cx2 = 1 in
+defm PVBRD : VBRDm<"pvbrd", 0x8c, V64, I64, VM512>;
+
+// Section 8.9.25 - VMV (Vector Move)
+let vx = ?, vz = ?, hasSideEffects = 0, Uses = [VL] in
+multiclass VMVbm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+ dag dag_in, string disEnc = ""> {
+ let DisableEncoding = disEnc in
+ def "" : RV<opc, (outs RC:$vx), dag_in,
+ !strconcat(opcStr, " $vx, ", argStr)>;
+ let Constraints = "$vx = $base", DisableEncoding = disEnc#"$base",
+ isCodeGenOnly = 1 in
+ def _v : RV<opc, (outs RC:$vx), !con(dag_in, (ins RC:$base)),
+ !strconcat(opcStr, " $vx, ", argStr)>;
+}
+multiclass VMVlm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+ dag dag_in> {
+ defm "" : VMVbm<opcStr, argStr, opc, RC, dag_in>;
+ let isCodeGenOnly = 1, VE_VLInUse = 1 in {
+ defm l : VMVbm<opcStr, argStr, opc, RC, !con(dag_in, (ins I32:$vl)),
+ "$vl,">;
+ defm L : VMVbm<opcStr, argStr, opc, RC, !con(dag_in, (ins VLS:$vl)),
+ "$vl,">;
+ }
+}
+multiclass VMVmm<string opcStr, bits<8>opc, RegisterClass RC,
+ RegisterClass RCM, dag dag_in> {
+ defm "" : VMVlm<opcStr, "$sy, $vz", opc, RC, dag_in>;
+ let m = ?, VE_VLWithMask = 1 in
+ defm m : VMVlm<opcStr, "$sy, $vz, $m", opc, RC, !con(dag_in, (ins RCM:$m))>;
+}
+let VE_VLIndex = 3 in
+multiclass VMVm<string opcStr, bits<8>opc, RegisterClass RC,
+ RegisterClass RCM> {
+ defm rv : VMVmm<opcStr, opc, RC, RCM, (ins I64:$sy, RC:$vz)>;
+ let cy = 0 in
+ defm iv : VMVmm<opcStr, opc, RC, RCM, (ins uimm7:$sy, RC:$vz)>;
+}
+defm VMV : VMVm<"vmv", 0x9c, V64, VM>;
+
+//-----------------------------------------------------------------------------
+// Section 8.10 - Vector Fixed-Point Arithmetic Instructions
+//-----------------------------------------------------------------------------
+
+// Multiclass for generic vector calculation
+let vx = ?, hasSideEffects = 0, Uses = [VL] in
+multiclass RVbm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+ dag dag_in, string disEnc = ""> {
+ let DisableEncoding = disEnc in
+ def "" : RV<opc, (outs RC:$vx), dag_in,
+ !strconcat(opcStr, " $vx", argStr)>;
+ let Constraints = "$vx = $base", DisableEncoding = disEnc#"$base",
+ isCodeGenOnly = 1 in
+ def _v : RV<opc, (outs RC:$vx), !con(dag_in, (ins RC:$base)),
+ !strconcat(opcStr, " $vx", argStr)>;
+}
+multiclass RVlm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+ dag dag_in> {
+ defm "" : RVbm<opcStr, argStr, opc, RC, dag_in>;
+ let isCodeGenOnly = 1, VE_VLInUse = 1 in {
+ defm l : RVbm<opcStr, argStr, opc, RC, !con(dag_in, (ins I32:$vl)),
+ "$vl,">;
+ defm L : RVbm<opcStr, argStr, opc, RC, !con(dag_in, (ins VLS:$vl)),
+ "$vl,">;
+ }
+}
+multiclass RVmm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+ RegisterClass RCM, dag dag_in> {
+ defm "" : RVlm<opcStr, argStr, opc, RC, dag_in>;
+ let m = ?, VE_VLWithMask = 1 in
+ defm m : RVlm<opcStr, argStr#", $m", opc, RC, !con(dag_in, (ins RCM:$m))>;
+}
+// Generic RV multiclass with 2 arguments.
+// e.g. VADD, VSUB, VMPY, and etc.
+let VE_VLIndex = 3 in
+multiclass RVm<string opcStr, bits<8>opc, RegisterClass VRC, RegisterClass RC,
+ RegisterClass RCM, Operand SIMM = simm7> {
+ let cy = 0, sy = 0, vy = ?, vz = ? in
+ defm vv : RVmm<opcStr, ", $vy, $vz", opc, VRC, RCM, (ins VRC:$vy, VRC:$vz)>;
+ let cs = 1, vz = ? in
+ defm rv : RVmm<opcStr, ", $sy, $vz", opc, VRC, RCM, (ins RC:$sy, VRC:$vz)>;
+ let cs = 1, cy = 0, vz = ? in
+ defm iv : RVmm<opcStr, ", $sy, $vz", opc, VRC, RCM, (ins SIMM:$sy, VRC:$vz)>;
+}
+// Special RV multiclass with 2 arguments using cs2.
+// e.g. VDIV, VDVS, and VDVX.
+let VE_VLIndex = 3 in
+multiclass RVDIVm<string opcStr, bits<8>opc, RegisterClass VRC,
+ RegisterClass RC, RegisterClass RCM, Operand SIMM = simm7> {
+ let cy = 0, sy = 0, vy = ?, vz = ? in
+ defm vv : RVmm<opcStr, ", $vy, $vz", opc, VRC, RCM, (ins VRC:$vy, VRC:$vz)>;
+ let cs2 = 1, vy = ? in
+ defm vr : RVmm<opcStr, ", $vy, $sy", opc, VRC, RCM, (ins VRC:$vy, RC:$sy)>;
+ let cs2 = 1, cy = 0, vy = ? in
+ defm vi : RVmm<opcStr, ", $vy, $sy", opc, VRC, RCM, (ins VRC:$vy, SIMM:$sy)>;
+ let cs = 1, vz = ? in
+ defm rv : RVmm<opcStr, ", $sy, $vz", opc, VRC, RCM, (ins RC:$sy, VRC:$vz)>;
+ let cs = 1, cy = 0, vz = ? in
+ defm iv : RVmm<opcStr, ", $sy, $vz", opc, VRC, RCM, (ins SIMM:$sy, VRC:$vz)>;
+}
+// Generic RV multiclass with 2 arguments for logical operations.
+// e.g. VAND, VOR, VXOR, and etc.
+let VE_VLIndex = 3 in
+multiclass RVLm<string opcStr, bits<8>opc, RegisterClass ScaRC,
+ RegisterClass RC, RegisterClass RCM> {
+ let cy = 0, sy = 0, vy = ?, vz = ? in
+ defm vv : RVmm<opcStr, ", $vy, $vz", opc, RC, RCM, (ins RC:$vy, RC:$vz)>;
+ let cs = 1, vz = ? in
+ defm rv : RVmm<opcStr, ", $sy, $vz", opc, RC, RCM, (ins ScaRC:$sy, RC:$vz)>;
+ let cs = 1, cy = 0, vz = ? in
+ defm mv : RVmm<opcStr, ", $sy, $vz", opc, RC, RCM, (ins mimm:$sy, RC:$vz)>;
+}
+// Generic RV multiclass with 1 argument.
+// e.g. VLDZ, VPCNT, and VBRV.
+let VE_VLIndex = 2 in
+multiclass RV1m<string opcStr, bits<8>opc, RegisterClass RC,
+ RegisterClass RCM> {
+ let cy = 0, sy = 0, vz = ? in
+ defm v : RVmm<opcStr, ", $vz", opc, RC, RCM, (ins RC:$vz)>;
+}
+// Generic RV multiclass with no argument.
+// e.g. VSEQ.
+let VE_VLIndex = 1 in
+multiclass RV0m<string opcStr, bits<8>opc, RegisterClass RC,
+ RegisterClass RCM> {
+ let cy = 0, sy = 0 in
+ defm "" : RVmm<opcStr, "", opc, RC, RCM, (ins)>;
+}
+// Generic RV multiclass with 2 arguments for shift operations.
+// e.g. VSLL, VSRL, VSLA, and etc.
+let VE_VLIndex = 3 in
+multiclass RVSm<string opcStr, bits<8>opc, RegisterClass ScaRC,
+ RegisterClass RC, RegisterClass RCM> {
+ let cy = 0, sy = 0, vy = ?, vz = ? in
+ defm vv : RVmm<opcStr, ", $vz, $vy", opc, RC, RCM, (ins RC:$vz, RC:$vy)>;
+ let cs = 1, vz = ? in
+ defm vr : RVmm<opcStr, ", $vz, $sy", opc, RC, RCM, (ins RC:$vz, ScaRC:$sy)>;
+ let cs = 1, cy = 0, vz = ? in
+ defm vi : RVmm<opcStr, ", $vz, $sy", opc, RC, RCM, (ins RC:$vz, uimm7:$sy)>;
+}
+// Generic RV multiclass with 3 arguments for shift operations.
+// e.g. VSLD and VSRD.
+let VE_VLIndex = 4 in
+multiclass RVSDm<string opcStr, bits<8>opc, RegisterClass RC,
+ RegisterClass RCM> {
+ let vy = ?, vz = ? in
+ defm vvr : RVmm<opcStr, ", ($vy, ${vz}), $sy", opc, RC, RCM,
+ (ins RC:$vy, RC:$vz, I64:$sy)>;
+ let cy = 0, vy = ?, vz = ? in
+ defm vvi : RVmm<opcStr, ", ($vy, ${vz}), $sy", opc, RC, RCM,
+ (ins RC:$vy, RC:$vz, uimm7:$sy)>;
+}
+// Special RV multiclass with 3 arguments.
+// e.g. VSFA
+let VE_VLIndex = 4 in
+multiclass RVSAm<string opcStr, bits<8>opc, RegisterClass RC,
+ RegisterClass RCM> {
+ let cz = 1, sz = ?, vz = ? in
+ defm vrr : RVmm<opcStr, ", $vz, $sy, $sz", opc, RC, RCM,
+ (ins RC:$vz, I64:$sy, I64:$sz)>;
+ let cz = 0, sz = ?, vz = ? in
+ defm vrm : RVmm<opcStr, ", $vz, $sy, $sz", opc, RC, RCM,
+ (ins RC:$vz, I64:$sy, mimm:$sz)>;
+ let cy = 0, cz = 1, sz = ?, vz = ? in
+ defm vir : RVmm<opcStr, ", $vz, $sy, $sz", opc, RC, RCM,
+ (ins RC:$vz, uimm3:$sy, I64:$sz)>;
+ let cy = 0, cz = 0, sz = ?, vz = ? in
+ defm vim : RVmm<opcStr, ", $vz, $sy, $sz", opc, RC, RCM,
+ (ins RC:$vz, uimm3:$sy, mimm:$sz)>;
+}
+// Generic RV multiclass with 1 argument using vy field.
+// e.g. VFSQRT, VRCP, and VRSQRT.
+let VE_VLIndex = 2 in
+multiclass RVF1m<string opcStr, bits<8>opc, RegisterClass RC,
+ RegisterClass RCM> {
+ let cy = 0, sy = 0, vy = ? in
+ defm v : RVmm<opcStr, ", $vy", opc, RC, RCM, (ins RC:$vy)>;
+}
+// Special RV multiclass with 3 arguments using cs2.
+// e.g. VFMAD, VFMSB, VFNMAD, and etc.
+let VE_VLIndex = 4 in
+multiclass RVMm<string opcStr, bits<8>opc, RegisterClass VRC, RegisterClass RC,
+ RegisterClass RCM, Operand SIMM = simm7> {
+ let cy = 0, sy = 0, vy = ?, vz = ?, vw = ? in
+ defm vvv : RVmm<opcStr, ", $vy, $vz, $vw", opc, VRC, RCM,
+ (ins VRC:$vy, VRC:$vz, VRC:$vw)>;
+ let cs2 = 1, vy = ?, vw = ? in
+ defm vrv : RVmm<opcStr, ", $vy, $sy, $vw", opc, VRC, RCM,
+ (ins VRC:$vy, RC:$sy, VRC:$vw)>;
+ let cs2 = 1, cy = 0, vy = ?, vw = ? in
+ defm viv : RVmm<opcStr, ", $vy, $sy, $vw", opc, VRC, RCM,
+ (ins VRC:$vy, SIMM:$sy, VRC:$vw)>;
+ let cs = 1, vz = ?, vw = ? in
+ defm rvv : RVmm<opcStr, ", $sy, $vz, $vw", opc, VRC, RCM,
+ (ins RC:$sy, VRC:$vz, VRC:$vw)>;
+ let cs = 1, cy = 0, vz = ?, vw = ? in
+ defm ivv : RVmm<opcStr, ", $sy, $vz, $vw", opc, VRC, RCM,
+ (ins SIMM:$sy, VRC:$vz, VRC:$vw)>;
+}
+// Special RV multiclass with 2 arguments for floating point conversions.
+// e.g. VFIX and VFIXX
+let hasSideEffects = 0, VE_VLIndex = 3 in
+multiclass RVFIXm<string opcStr, bits<8> opc, RegisterClass RC,
+ RegisterClass RCM> {
+ let cy = 0, sy = 0, vy = ?, vz = ? in
+ defm v : RVmm<opcStr#"$vz", ", $vy", opc, RC, RCM, (ins RDOp:$vz, RC:$vy)>;
+}
+// Multiclass for generic iterative vector calculation
+let vx = ?, hasSideEffects = 0, Uses = [VL] in
+multiclass RVIbm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+ dag dag_in, string disEnc = ""> {
+ let DisableEncoding = disEnc in
+ def "" : RV<opc, (outs RC:$vx), dag_in,
+ !strconcat(opcStr, " $vx", argStr)>;
+ let isCodeGenOnly = 1, Constraints = "$vx = $base", DisableEncoding = disEnc#"$base" in
+ def _v : RV<opc, (outs RC:$vx), !con(dag_in, (ins RC:$base)),
+ !strconcat(opcStr, " $vx", argStr)>;
+}
+multiclass RVIlm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+ dag dag_in> {
+ defm "" : RVIbm<opcStr, argStr, opc, RC, dag_in>;
+ let isCodeGenOnly = 1, VE_VLInUse = 1 in {
+ defm l : RVIbm<opcStr, argStr, opc, RC, !con(dag_in, (ins I32:$vl)),
+ "$vl,">;
+ defm L : RVIbm<opcStr, argStr, opc, RC, !con(dag_in, (ins VLS:$vl)),
+ "$vl,">;
+ }
+}
+// Generic RV multiclass for iterative operation with 2 argument.
+// e.g. VFIA, VFIS, and VFIM
+let VE_VLIndex = 3 in
+multiclass RVI2m<string opcStr, bits<8>opc, RegisterClass VRC,
+ RegisterClass RC> {
+ let vy = ? in
+ defm vr : RVIlm<opcStr, ", $vy, $sy", opc, VRC, (ins VRC:$vy, RC:$sy)>;
+ let cy = 0, vy = ? in
+ defm vi : RVIlm<opcStr, ", $vy, $sy", opc, VRC, (ins VRC:$vy, simm7fp:$sy)>;
+}
+// Generic RV multiclass for iterative operation with 3 argument.
+// e.g. VFIAM, VFISM, VFIMA, and etc.
+let VE_VLIndex = 4 in
+multiclass RVI3m<string opcStr, bits<8>opc, RegisterClass VRC,
+ RegisterClass RC> {
+ let vy = ?, vz = ? in
+ defm vvr : RVIlm<opcStr, ", $vy, $vz, $sy", opc, VRC,
+ (ins VRC:$vy, VRC:$vz, RC:$sy)>;
+ let cy = 0, vy = ?, vz = ? in
+ defm vvi : RVIlm<opcStr, ", $vy, $vz, $sy", opc, VRC,
+ (ins VRC:$vy, VRC:$vz, simm7fp:$sy)>;
+}
+// special RV multiclass with 3 arguments for VSHF.
+// e.g. VSHF
+let vy = ?, vz = ?, VE_VLIndex = 4 in
+multiclass RVSHFm<string opcStr, bits<8>opc, RegisterClass RC,
+ Operand SIMM = uimm4> {
+ defm vvr : RVlm<opcStr, ", $vy, $vz, $sy", opc, RC,
+ (ins RC:$vy, RC:$vz, I64:$sy)>;
+ let cy = 0 in defm vvi : RVlm<opcStr, ", $vy, $vz, $sy", opc, RC,
+ (ins RC:$vy, RC:$vz, SIMM:$sy)>;
+}
+// Multiclass for generic mask calculation
+let vx = ?, hasSideEffects = 0, Uses = [VL] in
+multiclass RVMKbm<string opcStr, string argStr, bits<8>opc, dag dag_out,
+ dag dag_in> {
+ def "" : RV<opc, dag_out, dag_in, !strconcat(opcStr, argStr)>;
+ let DisableEncoding = "$vl", isCodeGenOnly = 1, VE_VLInUse = 1 in {
+ def l : RV<opc, dag_out, !con(dag_in, (ins I32:$vl)),
+ !strconcat(opcStr, argStr)>;
+ def L : RV<opc, dag_out, !con(dag_in, (ins VLS:$vl)),
+ !strconcat(opcStr, argStr)>;
+ }
+}
+multiclass RVMKlm<string opcStr, string argStr, bits<8>opc, RegisterClass RCM,
+ dag dag_in> {
+ defm "" : RVMKbm<opcStr, " $vx"#argStr, opc, (outs RCM:$vx), dag_in>;
+ let m = ?, VE_VLWithMask = 1 in
+ defm m : RVMKbm<opcStr, " $vx"#argStr#", $m", opc, (outs RCM:$vx),
+ !con(dag_in, (ins RCM:$m))>;
+}
+// Generic RV multiclass for mask calculation with a condition.
+// e.g. VFMK, VFMS, and VFMF
+let cy = 0, sy = 0 in
+multiclass RVMKom<string opcStr, bits<8> opc, RegisterClass RC,
+ RegisterClass RCM> {
+ let vy = ?, vz = ?, VE_VLIndex = 3 in
+ defm v : RVMKlm<opcStr#"$vy", ", $vz", opc, RCM, (ins CCOp:$vy, RC:$vz)>;
+ let vy = 15 /* AT */, VE_VLIndex = 1 in
+ defm a : RVMKlm<opcStr#"at", "", opc, RCM, (ins)>;
+ let vy = 0 /* AF */, VE_VLIndex = 1 in
+ defm na : RVMKlm<opcStr#"af", "", opc, RCM, (ins)>;
+}
+multiclass RVMKm<string opcStr, bits<8> opc, RegisterClass RC,
+ RegisterClass RCM> {
+ defm "" : RVMKom<opcStr, opc, RC, RCM>;
+}
+// Generic RV multiclass for mask calculation with 2 arguments.
+// e.g. ANDM, ORM, XORM, and etc.
+let cy = 0, sy = 0, vx = ?, vy = ?, vz = ?, hasSideEffects = 0 in
+multiclass RVM2m<string opcStr, bits<8> opc, RegisterClass RCM> {
+ def mm : RV<opc, (outs RCM:$vx), (ins RCM:$vy, RCM:$vz),
+ !strconcat(opcStr, " $vx, $vy, $vz")>;
+}
+// Generic RV multiclass for mask calculation with 1 argument.
+// e.g. NEGM
+let cy = 0, sy = 0, vx = ?, vy = ?, hasSideEffects = 0 in
+multiclass RVM1m<string opcStr, bits<8> opc, RegisterClass RCM> {
+ def m : RV<opc, (outs RCM:$vx), (ins RCM:$vy),
+ !strconcat(opcStr, " $vx, $vy")>;
+}
+// Generic RV multiclass for mask calculation with 1 argument.
+// e.g. PCVM, LZVM, and TOVM
+let cy = 0, sy = 0, vy = ?, hasSideEffects = 0, Uses = [VL] in
+multiclass RVMSbm<string opcStr, string argStr, bits<8>opc, dag dag_in> {
+ def "" : RV<opc, (outs I64:$sx), dag_in,
+ !strconcat(opcStr, " $sx,", argStr)> {
+ bits<7> sx;
+ let Inst{54-48} = sx;
+ }
+ let DisableEncoding = "$vl", isCodeGenOnly = 1, VE_VLInUse = 1 in {
+ def l : RV<opc, (outs I64:$sx), !con(dag_in, (ins I32:$vl)),
+ !strconcat(opcStr, " $sx,", argStr)> {
+ bits<7> sx;
+ let Inst{54-48} = sx;
+ }
+ def L : RV<opc, (outs I64:$sx), !con(dag_in, (ins VLS:$vl)),
+ !strconcat(opcStr, " $sx,", argStr)> {
+ bits<7> sx;
+ let Inst{54-48} = sx;
+ }
+ }
+}
+let VE_VLIndex = 2 in
+multiclass RVMSm<string opcStr, bits<8> opc, RegisterClass RCM> {
+ defm m : RVMSbm<opcStr, " $vy", opc, (ins RCM:$vy)>;
+}
+
+// Section 8.10.1 - VADD (Vector Add)
+let cx = 0, cx2 = 0 in
+defm VADDUL : RVm<"vaddu.l", 0xc8, V64, I64, VM>;
+let cx = 0, cx2 = 1 in {
+ defm PVADDULO : RVm<"pvaddu.lo", 0xc8, V64, I32, VM>;
+ let isCodeGenOnly = 1 in
+ defm VADDUW : RVm<"vaddu.w", 0xc8, V64, I32, VM>;
+}
+let cx = 1, cx2 = 0 in
+defm PVADDUUP : RVm<"pvaddu.up", 0xc8, V64, I64, VM>;
+let cx = 1, cx2 = 1 in
+defm PVADDU : RVm<"pvaddu", 0xc8, V64, I64, VM512>;
+def : MnemonicAlias<"vaddu.w", "pvaddu.lo">;
+
+// Section 8.10.2 - VADS (Vector Add Single)
+let cx = 0, cx2 = 0 in
+defm VADDSWSX : RVm<"vadds.w.sx", 0xca, V64, I32, VM>;
+let cx = 0, cx2 = 1 in {
+ defm PVADDSLO : RVm<"pvadds.lo", 0xca, V64, I32, VM>;
+ let isCodeGenOnly = 1 in
+ defm VADDSWZX : RVm<"vadds.w.zx", 0xca, V64, I32, VM>;
+}
+let cx = 1, cx2 = 0 in
+defm PVADDSUP : RVm<"pvadds.up", 0xca, V64, I64, VM>;
+let cx = 1, cx2 = 1 in
+defm PVADDS : RVm<"pvadds", 0xca, V64, I64, VM512>;
+def : MnemonicAlias<"pvadds.lo.sx", "vadds.w.sx">;
+def : MnemonicAlias<"vadds.w.zx", "pvadds.lo">;
+def : MnemonicAlias<"vadds.w", "pvadds.lo">;
+def : MnemonicAlias<"pvadds.lo.zx", "pvadds.lo">;
+
+// Section 8.10.3 - VADX (Vector Add)
+defm VADDSL : RVm<"vadds.l", 0x8b, V64, I64, VM>;
+
+// Section 8.10.4 - VSUB (Vector Subtract)
+let cx = 0, cx2 = 0 in
+defm VSUBUL : RVm<"vsubu.l", 0xd8, V64, I64, VM>;
+let cx = 0, cx2 = 1 in {
+ defm PVSUBULO : RVm<"pvsubu.lo", 0xd8, V64, I32, VM>;
+ let isCodeGenOnly = 1 in
+ defm VSUBUW : RVm<"vsubu.w", 0xd8, V64, I32, VM>;
+}
+let cx = 1, cx2 = 0 in
+defm PVSUBUUP : RVm<"pvsubu.up", 0xd8, V64, I64, VM>;
+let cx = 1, cx2 = 1 in
+defm PVSUBU : RVm<"pvsubu", 0xd8, V64, I64, VM512>;
+def : MnemonicAlias<"vsubu.w", "pvsubu.lo">;
+
+// Section 8.10.5 - VSBS (Vector Subtract Single)
+let cx = 0, cx2 = 0 in
+defm VSUBSWSX : RVm<"vsubs.w.sx", 0xda, V64, I32, VM>;
+let cx = 0, cx2 = 1 in {
+ defm PVSUBSLO : RVm<"pvsubs.lo", 0xda, V64, I32, VM>;
+ let isCodeGenOnly = 1 in
+ defm VSUBSWZX : RVm<"vsubs.w.zx", 0xda, V64, I32, VM>;
+}
+let cx = 1, cx2 = 0 in
+defm PVSUBSUP : RVm<"pvsubs.up", 0xda, V64, I64, VM>;
+let cx = 1, cx2 = 1 in
+defm PVSUBS : RVm<"pvsubs", 0xda, V64, I64, VM512>;
+def : MnemonicAlias<"pvsubs.lo.sx", "vsubs.w.sx">;
+def : MnemonicAlias<"vsubs.w.zx", "pvsubs.lo">;
+def : MnemonicAlias<"vsubs.w", "pvsubs.lo">;
+def : MnemonicAlias<"pvsubs.lo.zx", "pvsubs.lo">;
+
+// Section 8.10.6 - VSBX (Vector Subtract)
+defm VSUBSL : RVm<"vsubs.l", 0x9b, V64, I64, VM>;
+
+// Section 8.10.7 - VMPY (Vector Multiply)
+let cx2 = 0 in
+defm VMULUL : RVm<"vmulu.l", 0xc9, V64, I64, VM>;
+let cx2 = 1 in
+defm VMULUW : RVm<"vmulu.w", 0xc9, V64, I32, VM>;
+
+// Section 8.10.8 - VMPS (Vector Multiply Single)
+let cx2 = 0 in
+defm VMULSWSX : RVm<"vmuls.w.sx", 0xcb, V64, I32, VM>;
+let cx2 = 1 in
+defm VMULSWZX : RVm<"vmuls.w.zx", 0xcb, V64, I32, VM>;
+def : MnemonicAlias<"vmuls.w", "vmuls.w.zx">;
+
+// Section 8.10.9 - VMPX (Vector Multiply)
+defm VMULSL : RVm<"vmuls.l", 0xdb, V64, I64, VM>;
+
+// Section 8.10.10 - VMPD (Vector Multiply)
+defm VMULSLW : RVm<"vmuls.l.w", 0xd9, V64, I32, VM>;
+
+// Section 8.10.11 - VDIV (Vector Divide)
+let cx2 = 0 in
+defm VDIVUL : RVDIVm<"vdivu.l", 0xe9, V64, I64, VM>;
+let cx2 = 1 in
+defm VDIVUW : RVDIVm<"vdivu.w", 0xe9, V64, I32, VM>;
+
+// Section 8.10.12 - VDVS (Vector Divide Single)
+let cx2 = 0 in
+defm VDIVSWSX : RVDIVm<"vdivs.w.sx", 0xeb, V64, I32, VM>;
+let cx2 = 1 in
+defm VDIVSWZX : RVDIVm<"vdivs.w.zx", 0xeb, V64, I32, VM>;
+def : MnemonicAlias<"vdivs.w", "vdivs.w.zx">;
+
+// Section 8.10.13 - VDVX (Vector Divide)
+defm VDIVSL : RVDIVm<"vdivs.l", 0xfb, V64, I64, VM>;
+
+// Section 8.10.14 - VCMP (Vector Compare)
+let cx = 0, cx2 = 0 in
+defm VCMPUL : RVm<"vcmpu.l", 0xb9, V64, I64, VM>;
+let cx = 0, cx2 = 1 in {
+ defm PVCMPULO : RVm<"pvcmpu.lo", 0xb9, V64, I32, VM>;
+ let isCodeGenOnly = 1 in
+ defm VCMPUW : RVm<"vcmpu.w", 0xb9, V64, I32, VM>;
+}
+let cx = 1, cx2 = 0 in
+defm PVCMPUUP : RVm<"pvcmpu.up", 0xb9, V64, I64, VM>;
+let cx = 1, cx2 = 1 in
+defm PVCMPU : RVm<"pvcmpu", 0xb9, V64, I64, VM512>;
+def : MnemonicAlias<"vcmpu.w", "pvcmpu.lo">;
+
+// Section 8.10.15 - VCPS (Vector Compare Single)
+let cx = 0, cx2 = 0 in
+defm VCMPSWSX : RVm<"vcmps.w.sx", 0xfa, V64, I32, VM>;
+let cx = 0, cx2 = 1 in {
+ defm PVCMPSLO : RVm<"pvcmps.lo", 0xfa, V64, I32, VM>;
+ let isCodeGenOnly = 1 in
+ defm VCMPSWZX : RVm<"vcmps.w.zx", 0xfa, V64, I32, VM>;
+}
+let cx = 1, cx2 = 0 in
+defm PVCMPSUP : RVm<"pvcmps.up", 0xfa, V64, I64, VM>;
+let cx = 1, cx2 = 1 in
+defm PVCMPS : RVm<"pvcmps", 0xfa, V64, I64, VM512>;
+def : MnemonicAlias<"pvcmps.lo.sx", "vcmps.w.sx">;
+def : MnemonicAlias<"vcmps.w.zx", "pvcmps.lo">;
+def : MnemonicAlias<"vcmps.w", "pvcmps.lo">;
+def : MnemonicAlias<"pvcmps.lo.zx", "pvcmps.lo">;
+
+// Section 8.10.16 - VCPX (Vector Compare)
+defm VCMPSL : RVm<"vcmps.l", 0xba, V64, I64, VM>;
+
+// Section 8.10.17 - VCMS (Vector Compare and Select Maximum/Minimum Single)
+let cx = 0, cx2 = 0 in
+defm VMAXSWSX : RVm<"vmaxs.w.sx", 0x8a, V64, I32, VM>;
+let cx = 0, cx2 = 1 in {
+ defm PVMAXSLO : RVm<"pvmaxs.lo", 0x8a, V64, I32, VM>;
+ let isCodeGenOnly = 1 in
+ defm VMAXSWZX : RVm<"vmaxs.w.zx", 0x8a, V64, I32, VM>;
+}
+let cx = 1, cx2 = 0 in
+defm PVMAXSUP : RVm<"pvmaxs.up", 0x8a, V64, I64, VM>;
+let cx = 1, cx2 = 1 in
+defm PVMAXS : RVm<"pvmaxs", 0x8a, V64, I64, VM512>;
+let cs2 = 1 in {
+ let cx = 0, cx2 = 0 in
+ defm VMINSWSX : RVm<"vmins.w.sx", 0x8a, V64, I32, VM>;
+ let cx = 0, cx2 = 1 in {
+ defm PVMINSLO : RVm<"pvmins.lo", 0x8a, V64, I32, VM>;
+ let isCodeGenOnly = 1 in
+ defm VMINSWZX : RVm<"vmins.w.zx", 0x8a, V64, I32, VM>;
+ }
+ let cx = 1, cx2 = 0 in
+ defm PVMINSUP : RVm<"pvmins.up", 0x8a, V64, I64, VM>;
+ let cx = 1, cx2 = 1 in
+ defm PVMINS : RVm<"pvmins", 0x8a, V64, I64, VM512>;
+}
+def : MnemonicAlias<"pvmaxs.lo.sx", "vmaxs.w.sx">;
+def : MnemonicAlias<"vmaxs.w.zx", "pvmaxs.lo">;
+def : MnemonicAlias<"vmaxs.w", "pvmaxs.lo">;
+def : MnemonicAlias<"pvmaxs.lo.zx", "pvmaxs.lo">;
+def : MnemonicAlias<"pvmins.lo.sx", "vmins.w.sx">;
+def : MnemonicAlias<"vmins.w.zx", "pvmins.lo">;
+def : MnemonicAlias<"vmins.w", "pvmins.lo">;
+def : MnemonicAlias<"pvmins.lo.zx", "pvmins.lo">;
+
+// Section 8.10.18 - VCMX (Vector Compare and Select Maximum/Minimum)
+defm VMAXSL : RVm<"vmaxs.l", 0x9a, V64, I64, VM>;
+let cs2 = 1 in
+defm VMINSL : RVm<"vmins.l", 0x9a, V64, I64, VM>;
+
+//-----------------------------------------------------------------------------
+// Section 8.11 - Vector Logical Operation Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.11.1 - VAND (Vector And)
+let cx = 0, cx2 = 0 in defm VAND : RVLm<"vand", 0xc4, I64, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVANDLO : RVLm<"pvand.lo", 0xc4, I32, V64, VM>;
+let cx = 1, cx2 = 0 in defm PVANDUP : RVLm<"pvand.up", 0xc4, F32, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVAND : RVLm<"pvand", 0xc4, I64, V64, VM512>;
+
+// Section 8.11.2 - VOR (Vector Or)
+let cx = 0, cx2 = 0 in defm VOR : RVLm<"vor", 0xc5, I64, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVORLO : RVLm<"pvor.lo", 0xc5, I32, V64, VM>;
+let cx = 1, cx2 = 0 in defm PVORUP : RVLm<"pvor.up", 0xc5, F32, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVOR : RVLm<"pvor", 0xc5, I64, V64, VM512>;
+
+// Section 8.11.3 - VXOR (Vector Exclusive Or)
+let cx = 0, cx2 = 0 in defm VXOR : RVLm<"vxor", 0xc6, I64, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVXORLO : RVLm<"pvxor.lo", 0xc6, I32, V64, VM>;
+let cx = 1, cx2 = 0 in defm PVXORUP : RVLm<"pvxor.up", 0xc6, F32, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVXOR : RVLm<"pvxor", 0xc6, I64, V64, VM512>;
+
+// Section 8.11.4 - VEQV (Vector Equivalence)
+let cx = 0, cx2 = 0 in defm VEQV : RVLm<"veqv", 0xc7, I64, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVEQVLO : RVLm<"pveqv.lo", 0xc7, I32, V64, VM>;
+let cx = 1, cx2 = 0 in defm PVEQVUP : RVLm<"pveqv.up", 0xc7, F32, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVEQV : RVLm<"pveqv", 0xc7, I64, V64, VM512>;
+
+// Section 8.11.5 - VLDZ (Vector Leading Zero Count)
+let cx = 0, cx2 = 0 in defm VLDZ : RV1m<"vldz", 0xe7, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVLDZLO : RV1m<"pvldz.lo", 0xe7, V64, VM>;
+let cx = 1, cx2 = 0 in defm PVLDZUP : RV1m<"pvldz.up", 0xe7, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVLDZ : RV1m<"pvldz", 0xe7, V64, VM512>;
+
+// Section 8.11.6 - VPCNT (Vector Population Count)
+let cx = 0, cx2 = 0 in defm VPCNT : RV1m<"vpcnt", 0xac, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVPCNTLO : RV1m<"pvpcnt.lo", 0xac, V64, VM>;
+let cx = 1, cx2 = 0 in defm PVPCNTUP : RV1m<"pvpcnt.up", 0xac, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVPCNT : RV1m<"pvpcnt", 0xac, V64, VM512>;
+
+// Section 8.11.7 - VBRV (Vector Bit Reverse)
+let cx = 0, cx2 = 0 in defm VBRV : RV1m<"vbrv", 0xf7, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVBRVLO : RV1m<"pvbrv.lo", 0xf7, V64, VM>;
+let cx = 1, cx2 = 0 in defm PVBRVUP : RV1m<"pvbrv.up", 0xf7, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVBRV : RV1m<"pvbrv", 0xf7, V64, VM512>;
+
+// Section 8.11.8 - VSEQ (Vector Sequential Number)
+let cx = 0, cx2 = 0 in defm VSEQ : RV0m<"vseq", 0x99, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVSEQLO : RV0m<"pvseq.lo", 0x99, V64, VM>;
+let cx = 1, cx2 = 0 in defm PVSEQUP : RV0m<"pvseq.up", 0x99, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVSEQ : RV0m<"pvseq", 0x99, V64, VM512>;
+
+//-----------------------------------------------------------------------------
+// Section 8.12 - Vector Shift Operation Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.12.1 - VSLL (Vector Shift Left Logical)
+let cx = 0, cx2 = 0 in defm VSLL : RVSm<"vsll", 0xe5, I64, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVSLLLO : RVSm<"pvsll.lo", 0xe5, I32, V64, VM>;
+let cx = 1, cx2 = 0 in defm PVSLLUP : RVSm<"pvsll.up", 0xe5, F32, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVSLL : RVSm<"pvsll", 0xe5, I64, V64, VM512>;
+
+// Section 8.12.2 - VSLD (Vector Shift Left Double)
+defm VSLD : RVSDm<"vsld", 0xe4, V64, VM>;
+
+// Section 8.12.3 - VSRL (Vector Shift Right Logical)
+let cx = 0, cx2 = 0 in defm VSRL : RVSm<"vsrl", 0xf5, I64, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVSRLLO : RVSm<"pvsrl.lo", 0xf5, I32, V64, VM>;
+let cx = 1, cx2 = 0 in defm PVSRLUP : RVSm<"pvsrl.up", 0xf5, F32, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVSRL : RVSm<"pvsrl", 0xf5, I64, V64, VM512>;
+
+// Section 8.12.4 - VSRD (Vector Shift Right Double)
+defm VSRD : RVSDm<"vsrd", 0xf4, V64, VM>;
+
+// Section 8.12.5 - VSLA (Vector Shift Left Arithmetic)
+let cx = 0, cx2 = 0 in defm VSLAWSX : RVSm<"vsla.w.sx", 0xe6, I32, V64, VM>;
+let cx = 0, cx2 = 1 in {
+ defm PVSLALO : RVSm<"pvsla.lo", 0xe6, I32, V64, VM>;
+ let isCodeGenOnly = 1 in defm VSLAWZX : RVSm<"vsla.w.zx", 0xe6, I32, V64, VM>;
+}
+let cx = 1, cx2 = 0 in defm PVSLAUP : RVSm<"pvsla.up", 0xe6, F32, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVSLA : RVSm<"pvsla", 0xe6, I64, V64, VM512>;
+def : MnemonicAlias<"pvsla.lo.sx", "vsla.w.sx">;
+def : MnemonicAlias<"vsla.w.zx", "pvsla.lo">;
+def : MnemonicAlias<"vsla.w", "pvsla.lo">;
+def : MnemonicAlias<"pvsla.lo.zx", "pvsla.lo">;
+
+// Section 8.12.6 - VSLAX (Vector Shift Left Arithmetic)
+defm VSLAL : RVSm<"vsla.l", 0xd4, I64, V64, VM>;
+
+// Section 8.12.7 - VSRA (Vector Shift Right Arithmetic)
+let cx = 0, cx2 = 0 in defm VSRAWSX : RVSm<"vsra.w.sx", 0xf6, I32, V64, VM>;
+let cx = 0, cx2 = 1 in {
+ defm PVSRALO : RVSm<"pvsra.lo", 0xf6, I32, V64, VM>;
+ let isCodeGenOnly = 1 in defm VSRAWZX : RVSm<"vsra.w.zx", 0xf6, I32, V64, VM>;
+}
+let cx = 1, cx2 = 0 in defm PVSRAUP : RVSm<"pvsra.up", 0xf6, F32, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVSRA : RVSm<"pvsra", 0xf6, I64, V64, VM512>;
+def : MnemonicAlias<"pvsra.lo.sx", "vsra.w.sx">;
+def : MnemonicAlias<"vsra.w.zx", "pvsra.lo">;
+def : MnemonicAlias<"vsra.w", "pvsra.lo">;
+def : MnemonicAlias<"pvsra.lo.zx", "pvsra.lo">;
+
+// Section 8.12.8 - VSRAX (Vector Shift Right Arithmetic)
+defm VSRAL : RVSm<"vsra.l", 0xd5, I64, V64, VM>;
+
+// Section 8.12.9 - VSFA (Vector Shift Left and Add)
+defm VSFA : RVSAm<"vsfa", 0xd7, V64, VM>;
+
+//-----------------------------------------------------------------------------
+// Section 8.13 - Vector Floating-Point Arithmetic Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.13.1 - VFAD (Vector Floating Add)
+let cx = 0, cx2 = 0 in
+defm VFADDD : RVm<"vfadd.d", 0xcc, V64, I64, VM, simm7fp>;
+let cx = 0, cx2 = 1 in
+defm PVFADDLO : RVm<"pvfadd.lo", 0xcc, V64, I64, VM, simm7fp>;
+let cx = 1, cx2 = 0 in {
+ defm PVFADDUP : RVm<"pvfadd.up", 0xcc, V64, F32, VM, simm7fp>;
+ let isCodeGenOnly = 1 in
+ defm VFADDS : RVm<"vfadd.s", 0xcc, V64, F32, VM, simm7fp>;
+}
+let cx = 1, cx2 = 1 in
+defm PVFADD : RVm<"pvfadd", 0xcc, V64, I64, VM512, simm7fp>;
+def : MnemonicAlias<"vfadd.s", "pvfadd.up">;
+
+// Section 8.13.2 - VFSB (Vector Floating Subtract)
+let cx = 0, cx2 = 0 in
+defm VFSUBD : RVm<"vfsub.d", 0xdc, V64, I64, VM, simm7fp>;
+let cx = 0, cx2 = 1 in
+defm PVFSUBLO : RVm<"pvfsub.lo", 0xdc, V64, I64, VM, simm7fp>;
+let cx = 1, cx2 = 0 in {
+ defm PVFSUBUP : RVm<"pvfsub.up", 0xdc, V64, F32, VM, simm7fp>;
+ let isCodeGenOnly = 1 in
+ defm VFSUBS : RVm<"vfsub.s", 0xdc, V64, F32, VM, simm7fp>;
+}
+let cx = 1, cx2 = 1 in
+defm PVFSUB : RVm<"pvfsub", 0xdc, V64, I64, VM512, simm7fp>;
+def : MnemonicAlias<"vfsub.s", "pvfsub.up">;
+
+// Section 8.13.3 - VFMP (Vector Floating Multiply)
+let cx = 0, cx2 = 0 in
+defm VFMULD : RVm<"vfmul.d", 0xcd, V64, I64, VM, simm7fp>;
+let cx = 0, cx2 = 1 in
+defm PVFMULLO : RVm<"pvfmul.lo", 0xcd, V64, I64, VM, simm7fp>;
+let cx = 1, cx2 = 0 in {
+ defm PVFMULUP : RVm<"pvfmul.up", 0xcd, V64, F32, VM, simm7fp>;
+ let isCodeGenOnly = 1 in
+ defm VFMULS : RVm<"vfmul.s", 0xcd, V64, F32, VM, simm7fp>;
+}
+let cx = 1, cx2 = 1 in
+defm PVFMUL : RVm<"pvfmul", 0xcd, V64, I64, VM512, simm7fp>;
+def : MnemonicAlias<"vfmul.s", "pvfmul.up">;
+
+// Section 8.13.4 - VFDV (Vector Floating Divide)
+defm VFDIVD : RVDIVm<"vfdiv.d", 0xdd, V64, I64, VM, simm7fp>;
+let cx = 1 in
+defm VFDIVS : RVDIVm<"vfdiv.s", 0xdd, V64, F32, VM, simm7fp>;
+
+// Section 8.13.5 - VFSQRT (Vector Floating Square Root)
+defm VFSQRTD : RVF1m<"vfsqrt.d", 0xed, V64, VM>;
+let cx = 1 in
+defm VFSQRTS : RVF1m<"vfsqrt.s", 0xed, V64, VM>;
+
+// Section 8.13.6 - VFCP (Vector Floating Compare)
+let cx = 0, cx2 = 0 in
+defm VFCMPD : RVm<"vfcmp.d", 0xfc, V64, I64, VM, simm7fp>;
+let cx = 0, cx2 = 1 in
+defm PVFCMPLO : RVm<"pvfcmp.lo", 0xfc, V64, I64, VM, simm7fp>;
+let cx = 1, cx2 = 0 in {
+ defm PVFCMPUP : RVm<"pvfcmp.up", 0xfc, V64, F32, VM, simm7fp>;
+ let isCodeGenOnly = 1 in
+ defm VFCMPS : RVm<"vfcmp.s", 0xfc, V64, F32, VM, simm7fp>;
+}
+let cx = 1, cx2 = 1 in
+defm PVFCMP : RVm<"pvfcmp", 0xfc, V64, I64, VM512, simm7fp>;
+def : MnemonicAlias<"vfcmp.s", "pvfcmp.up">;
+
+// Section 8.13.7 - VFCM (Vector Floating Compare and Select Maximum/Minimum)
+let cx = 0, cx2 = 0 in
+defm VFMAXD : RVm<"vfmax.d", 0xbd, V64, I64, VM, simm7fp>;
+let cx = 0, cx2 = 1 in
+defm PVFMAXLO : RVm<"pvfmax.lo", 0xbd, V64, I64, VM, simm7fp>;
+let cx = 1, cx2 = 0 in {
+ defm PVFMAXUP : RVm<"pvfmax.up", 0xbd, V64, F32, VM, simm7fp>;
+ let isCodeGenOnly = 1 in
+ defm VFMAXS : RVm<"vfmax.s", 0xbd, V64, F32, VM, simm7fp>;
+}
+let cx = 1, cx2 = 1 in
+defm PVFMAX : RVm<"pvfmax", 0xbd, V64, I64, VM512, simm7fp>;
+let cs2 = 1 in {
+ let cx = 0, cx2 = 0 in
+ defm VFMIND : RVm<"vfmin.d", 0xbd, V64, I64, VM, simm7fp>;
+ let cx = 0, cx2 = 1 in
+ defm PVFMINLO : RVm<"pvfmin.lo", 0xbd, V64, I64, VM, simm7fp>;
+ let cx = 1, cx2 = 0 in {
+ defm PVFMINUP : RVm<"pvfmin.up", 0xbd, V64, F32, VM, simm7fp>;
+ let isCodeGenOnly = 1 in
+ defm VFMINS : RVm<"vfmin.s", 0xbd, V64, F32, VM, simm7fp>;
+ }
+ let cx = 1, cx2 = 1 in
+ defm PVFMIN : RVm<"pvfmin", 0xbd, V64, I64, VM512, simm7fp>;
+}
+def : MnemonicAlias<"vfmax.s", "pvfmax.up">;
+def : MnemonicAlias<"vfmin.s", "pvfmin.up">;
+
+// Section 8.13.8 - VFMAD (Vector Floating Fused Multiply Add)
+let cx = 0, cx2 = 0 in
+defm VFMADD : RVMm<"vfmad.d", 0xe2, V64, I64, VM, simm7fp>;
+let cx = 0, cx2 = 1 in
+defm PVFMADLO : RVMm<"pvfmad.lo", 0xe2, V64, I64, VM, simm7fp>;
+let cx = 1, cx2 = 0 in {
+ defm PVFMADUP : RVMm<"pvfmad.up", 0xe2, V64, F32, VM, simm7fp>;
+ let isCodeGenOnly = 1 in
+ defm VFMADS : RVMm<"vfmad.s", 0xe2, V64, F32, VM, simm7fp>;
+}
+let cx = 1, cx2 = 1 in
+defm PVFMAD : RVMm<"pvfmad", 0xe2, V64, I64, VM512, simm7fp>;
+def : MnemonicAlias<"vfmad.s", "pvfmad.up">;
+
+// Section 8.13.9 - VFMSB (Vector Floating Fused Multiply Subtract)
+let cx = 0, cx2 = 0 in
+defm VFMSBD : RVMm<"vfmsb.d", 0xf2, V64, I64, VM, simm7fp>;
+let cx = 0, cx2 = 1 in
+defm PVFMSBLO : RVMm<"pvfmsb.lo", 0xf2, V64, I64, VM, simm7fp>;
+let cx = 1, cx2 = 0 in {
+ defm PVFMSBUP : RVMm<"pvfmsb.up", 0xf2, V64, F32, VM, simm7fp>;
+ let isCodeGenOnly = 1 in
+ defm VFMSBS : RVMm<"vfmsb.s", 0xf2, V64, F32, VM, simm7fp>;
+}
+let cx = 1, cx2 = 1 in
+defm PVFMSB : RVMm<"pvfmsb", 0xf2, V64, I64, VM512, simm7fp>;
+def : MnemonicAlias<"vfmsb.s", "pvfmsb.up">;
+
+// Section 8.13.10 - VFNMAD (Vector Floating Fused Negative Multiply Add)
+let cx = 0, cx2 = 0 in
+defm VFNMADD : RVMm<"vfnmad.d", 0xe3, V64, I64, VM, simm7fp>;
+let cx = 0, cx2 = 1 in
+defm PVFNMADLO : RVMm<"pvfnmad.lo", 0xe3, V64, I64, VM, simm7fp>;
+let cx = 1, cx2 = 0 in {
+ defm PVFNMADUP : RVMm<"pvfnmad.up", 0xe3, V64, F32, VM, simm7fp>;
+ let isCodeGenOnly = 1 in
+ defm VFNMADS : RVMm<"vfnmad.s", 0xe3, V64, F32, VM, simm7fp>;
+}
+let cx = 1, cx2 = 1 in
+defm PVFNMAD : RVMm<"pvfnmad", 0xe3, V64, I64, VM512, simm7fp>;
+def : MnemonicAlias<"vfnmad.s", "pvfnmad.up">;
+
+// Section 8.13.11 - VFNMSB (Vector Floating Fused Negative Multiply Subtract)
+let cx = 0, cx2 = 0 in
+defm VFNMSBD : RVMm<"vfnmsb.d", 0xf3, V64, I64, VM, simm7fp>;
+let cx = 0, cx2 = 1 in
+defm PVFNMSBLO : RVMm<"pvfnmsb.lo", 0xf3, V64, I64, VM, simm7fp>;
+let cx = 1, cx2 = 0 in {
+ defm PVFNMSBUP : RVMm<"pvfnmsb.up", 0xf3, V64, F32, VM, simm7fp>;
+ let isCodeGenOnly = 1 in
+ defm VFNMSBS : RVMm<"vfnmsb.s", 0xf3, V64, F32, VM, simm7fp>;
+}
+let cx = 1, cx2 = 1 in
+defm PVFNMSB : RVMm<"pvfnmsb", 0xf3, V64, I64, VM512, simm7fp>;
+def : MnemonicAlias<"vfnmsb.s", "pvfnmsb.up">;
+
+// Section 8.13.12 - VRCP (Vector Floating Reciprocal)
+let cx = 0, cx2 = 0 in defm VRCPD : RVF1m<"vrcp.d", 0xe1, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVRCPLO : RVF1m<"pvrcp.lo", 0xe1, V64, VM>;
+let cx = 1, cx2 = 0 in {
+ defm PVRCPUP : RVF1m<"pvrcp.up", 0xe1, V64, VM>;
+ let isCodeGenOnly = 1 in defm VRCPS : RVF1m<"vrcp.s", 0xe1, V64, VM>;
+}
+let cx = 1, cx2 = 1 in defm PVRCP : RVF1m<"pvrcp", 0xe1, V64, VM512>;
+def : MnemonicAlias<"vrcp.s", "pvrcp.up">;
+
+// Section 8.13.13 - VRSQRT (Vector Floating Reciprocal Square Root)
+let cx = 0, cx2 = 0 in defm VRSQRTD : RVF1m<"vrsqrt.d", 0xf1, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVRSQRTLO : RVF1m<"pvrsqrt.lo", 0xf1, V64, VM>;
+let cx = 1, cx2 = 0 in {
+ defm PVRSQRTUP : RVF1m<"pvrsqrt.up", 0xf1, V64, VM>;
+ let isCodeGenOnly = 1 in
+ defm VRSQRTS : RVF1m<"vrsqrt.s", 0xf1, V64, VM>;
+}
+let cx = 1, cx2 = 1 in
+defm PVRSQRT : RVF1m<"pvrsqrt", 0xf1, V64, VM512>;
+let cs2 = 1 in {
+ let cx = 0, cx2 = 0 in
+ defm VRSQRTDNEX : RVF1m<"vrsqrt.d.nex", 0xf1, V64, VM>;
+ let cx = 0, cx2 = 1 in
+ defm PVRSQRTLONEX : RVF1m<"pvrsqrt.lo.nex", 0xf1, V64, VM>;
+ let cx = 1, cx2 = 0 in {
+ defm PVRSQRTUPNEX : RVF1m<"pvrsqrt.up.nex", 0xf1, V64, VM>;
+ let isCodeGenOnly = 1 in
+ defm VRSQRTSNEX : RVF1m<"vrsqrt.s.nex", 0xf1, V64, VM>;
+ }
+ let cx = 1, cx2 = 1 in
+ defm PVRSQRTNEX : RVF1m<"pvrsqrt.nex", 0xf1, V64, VM512>;
+}
+def : MnemonicAlias<"vrsqrt.s", "pvrsqrt.up">;
+def : MnemonicAlias<"vrsqrt.s.nex", "pvrsqrt.up.nex">;
+
+// Section 8.13.14 - VFIX (Vector Convert to Fixed Pointer)
+let cx = 0, cx2 = 0, cs2 = 0 in
+defm VCVTWDSX : RVFIXm<"vcvt.w.d.sx", 0xe8, V64, VM>;
+let cx = 0, cx2 = 1, cs2 = 0 in
+defm VCVTWDZX : RVFIXm<"vcvt.w.d.zx", 0xe8, V64, VM>;
+let cx = 1, cx2 = 0, cs2 = 0 in
+defm VCVTWSSX : RVFIXm<"vcvt.w.s.sx", 0xe8, V64, VM>;
+let cx = 1, cx2 = 1, cs2 = 0 in
+defm VCVTWSZX : RVFIXm<"vcvt.w.s.zx", 0xe8, V64, VM>;
+let cx = 0, cx2 = 1, cs2 = 1 in
+defm PVCVTWSLO : RVFIXm<"pvcvt.w.s.lo", 0xe8, V64, VM>;
+let cx = 1, cx2 = 0, cs2 = 1 in
+defm PVCVTWSUP : RVFIXm<"pvcvt.w.s.up", 0xe8, V64, VM>;
+let cx = 1, cx2 = 1, cs2 = 1 in
+defm PVCVTWS : RVFIXm<"pvcvt.w.s", 0xe8, V64, VM512>;
+
+// Section 8.13.15 - VFIXX (Vector Convert to Fixed Pointer)
+defm VCVTLD : RVFIXm<"vcvt.l.d", 0xa8, V64, VM>;
+
+// Section 8.13.16 - VFLT (Vector Convert to Floating Pointer)
+let cx = 0, cx2 = 0, cs2 = 0 in
+defm VCVTDW : RVF1m<"vcvt.d.w", 0xf8, V64, VM>;
+let cx = 1, cx2 = 0, cs2 = 0 in
+defm VCVTSW : RVF1m<"vcvt.s.w", 0xf8, V64, VM>;
+let cx = 0, cx2 = 1, cs2 = 1 in
+defm PVCVTSWLO : RVF1m<"pvcvt.s.w.lo", 0xf8, V64, VM>;
+let cx = 1, cx2 = 0, cs2 = 1 in
+defm PVCVTSWUP : RVF1m<"pvcvt.s.w.up", 0xf8, V64, VM>;
+let cx = 1, cx2 = 1, cs2 = 1 in
+defm PVCVTSW : RVF1m<"pvcvt.s.w", 0xf8, V64, VM512>;
+
+// Section 8.13.17 - VFLTX (Vector Convert to Floating Pointer)
+defm VCVTDL : RVF1m<"vcvt.d.l", 0xb8, V64, VM>;
+
+// Section 8.13.18 - VCVS (Vector Convert to Single-format)
+defm VCVTSD : RVF1m<"vcvt.s.d", 0x9f, V64, VM>;
+
+// Section 8.13.19 - VCVD (Vector Convert to Double-format)
+defm VCVTDS : RVF1m<"vcvt.d.s", 0x8f, V64, VM>;
+
+//-----------------------------------------------------------------------------
+// Section 8.14 - Vector Reduction Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.14.1 - VSUMS (Vector Sum Single)
+defm VSUMWSX : RVF1m<"vsum.w.sx", 0xea, V64, VM>;
+let cx2 = 1 in defm VSUMWZX : RVF1m<"vsum.w.zx", 0xea, V64, VM>;
+
+// Section 8.14.2 - VSUMX (Vector Sum)
+defm VSUML : RVF1m<"vsum.l", 0xaa, V64, VM>;
+
+// Section 8.14.3 - VFSUM (Vector Floating Sum)
+defm VFSUMD : RVF1m<"vfsum.d", 0xec, V64, VM>;
+let cx = 1 in defm VFSUMS : RVF1m<"vfsum.s", 0xec, V64, VM>;
+
+// Section 8.14.4 - VMAXS (Vector Maximum/Minimum Single)
+let cx2 = 0 in defm VRMAXSWFSTSX : RVF1m<"vrmaxs.w.fst.sx", 0xbb, V64, VM>;
+let cx2 = 1 in defm VRMAXSWFSTZX : RVF1m<"vrmaxs.w.fst.zx", 0xbb, V64, VM>;
+let cs = 1 in {
+ let cx2 = 0 in
+ defm VRMAXSWLSTSX : RVF1m<"vrmaxs.w.lst.sx", 0xbb, V64, VM>;
+ let cx2 = 1 in
+ defm VRMAXSWLSTZX : RVF1m<"vrmaxs.w.lst.zx", 0xbb, V64, VM>;
+}
+let cs2 = 1 in {
+ let cx2 = 0 in
+ defm VRMINSWFSTSX : RVF1m<"vrmins.w.fst.sx", 0xbb, V64, VM>;
+ let cx2 = 1 in
+ defm VRMINSWFSTZX : RVF1m<"vrmins.w.fst.zx", 0xbb, V64, VM>;
+ let cs = 1 in {
+ let cx2 = 0 in
+ defm VRMINSWLSTSX : RVF1m<"vrmins.w.lst.sx", 0xbb, V64, VM>;
+ let cx2 = 1 in
+ defm VRMINSWLSTZX : RVF1m<"vrmins.w.lst.zx", 0xbb, V64, VM>;
+ }
+}
+
+// Section 8.14.5 - VMAXX (Vector Maximum/Minimum)
+let cs = 0 in defm VRMAXSLFST : RVF1m<"vrmaxs.l.fst", 0xab, V64, VM>;
+let cs = 1 in defm VRMAXSLLST : RVF1m<"vrmaxs.l.lst", 0xab, V64, VM>;
+let cs2 = 1 in {
+ let cs = 0 in defm VRMINSLFST : RVF1m<"vrmins.l.fst", 0xab, V64, VM>;
+ let cs = 1 in defm VRMINSLLST : RVF1m<"vrmins.l.lst", 0xab, V64, VM>;
+}
+
+// Section 8.14.6 - VFMAX (Vector Floating Maximum/Minimum)
+let cs = 0 in defm VFRMAXDFST : RVF1m<"vfrmax.d.fst", 0xad, V64, VM>;
+let cs = 1 in defm VFRMAXDLST : RVF1m<"vfrmax.d.lst", 0xad, V64, VM>;
+let cs2 = 1 in {
+ let cs = 0 in defm VFRMINDFST : RVF1m<"vfrmin.d.fst", 0xad, V64, VM>;
+ let cs = 1 in defm VFRMINDLST : RVF1m<"vfrmin.d.lst", 0xad, V64, VM>;
+}
+let cx = 1 in {
+ let cs = 0 in defm VFRMAXSFST : RVF1m<"vfrmax.s.fst", 0xad, V64, VM>;
+ let cs = 1 in defm VFRMAXSLST : RVF1m<"vfrmax.s.lst", 0xad, V64, VM>;
+ let cs2 = 1 in {
+ let cs = 0 in defm VFRMINSFST : RVF1m<"vfrmin.s.fst", 0xad, V64, VM>;
+ let cs = 1 in defm VFRMINSLST : RVF1m<"vfrmin.s.lst", 0xad, V64, VM>;
+ }
+}
+
+// Section 8.14.7 - VRAND (Vector Reduction And)
+defm VRAND : RVF1m<"vrand", 0x88, V64, VM>;
+
+// Section 8.14.8 - VROR (Vector Reduction Or)
+defm VROR : RVF1m<"vror", 0x98, V64, VM>;
+
+// Section 8.14.9 - VRXOR (Vector Reduction Exclusive Or)
+defm VRXOR : RVF1m<"vrxor", 0x89, V64, VM>;
+
+//-----------------------------------------------------------------------------
+// Section 8.15 - Vector Iterative Operation Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.15.1 - VFIA (Vector Floating Iteration Add)
+let cx = 0 in defm VFIAD : RVI2m<"vfia.d", 0xce, V64, I64>;
+let cx = 1 in defm VFIAS : RVI2m<"vfia.s", 0xce, V64, F32>;
+
+// Section 8.15.2 - VFIS (Vector Floating Iteration Subtract)
+let cx = 0 in defm VFISD : RVI2m<"vfis.d", 0xde, V64, I64>;
+let cx = 1 in defm VFISS : RVI2m<"vfis.s", 0xde, V64, F32>;
+
+// Section 8.15.3 - VFIM (Vector Floating Iteration Multiply)
+let cx = 0 in defm VFIMD : RVI2m<"vfim.d", 0xcf, V64, I64>;
+let cx = 1 in defm VFIMS : RVI2m<"vfim.s", 0xcf, V64, F32>;
+
+// Section 8.15.4 - VFIAM (Vector Floating Iteration Add and Multiply)
+let cx = 0 in defm VFIAMD : RVI3m<"vfiam.d", 0xee, V64, I64>;
+let cx = 1 in defm VFIAMS : RVI3m<"vfiam.s", 0xee, V64, F32>;
+
+// Section 8.15.5 - VFISM (Vector Floating Iteration Subtract and Multiply)
+let cx = 0 in defm VFISMD : RVI3m<"vfism.d", 0xfe, V64, I64>;
+let cx = 1 in defm VFISMS : RVI3m<"vfism.s", 0xfe, V64, F32>;
+
+// Section 8.15.6 - VFIMA (Vector Floating Iteration Multiply and Add)
+let cx = 0 in defm VFIMAD : RVI3m<"vfima.d", 0xef, V64, I64>;
+let cx = 1 in defm VFIMAS : RVI3m<"vfima.s", 0xef, V64, F32>;
+
+// Section 8.15.7 - VFIMS (Vector Floating Iteration Multiply and Subtract)
+let cx = 0 in defm VFIMSD : RVI3m<"vfims.d", 0xff, V64, I64>;
+let cx = 1 in defm VFIMSS : RVI3m<"vfims.s", 0xff, V64, F32>;
+
+//-----------------------------------------------------------------------------
+// Section 8.16 - Vector Merger Operation Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.16.1 - VMRG (Vector Merge)
+let cx = 0 in defm VMRG : RVm<"vmrg", 0xd6, V64, I64, VM>;
+// FIXME: vmrg.w should be called as pvmrg, but following assembly manual.
+let cx = 1 in defm VMRGW : RVm<"vmrg.w", 0xd6, V64, I64, VM512>;
+def : MnemonicAlias<"vmrg.l", "vmrg">;
+
+// Section 8.16.2 - VSHF (Vector Shuffle)
+defm VSHF : RVSHFm<"vshf", 0xbc, V64>;
+
+// Section 8.16.3 - VCP (Vector Compress)
+defm VCP : RV1m<"vcp", 0x8d, V64, VM>;
+
+// Section 8.16.4 - VEX (Vector Expand)
+defm VEX : RV1m<"vex", 0x9d, V64, VM>;
+
+//-----------------------------------------------------------------------------
+// Section 8.17 - Vector Mask Operation Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.17.1 - VFMK (Vector Form Mask)
+defm VFMKL : RVMKm<"vfmk.l.", 0xb4, V64, VM>;
+def : MnemonicAlias<"vfmk.l", "vfmk.l.at">;
+
+// Section 8.17.2 - VFMS (Vector Form Mask Single)
+defm VFMKW : RVMKm<"vfmk.w.", 0xb5, V64, VM>;
+let isCodeGenOnly = 1 in defm PVFMKWLO : RVMKm<"vfmk.w.", 0xb5, V64, VM>;
+let cx = 1 in defm PVFMKWUP : RVMKm<"pvfmk.w.up.", 0xb5, V64, VM>;
+def : MnemonicAlias<"vfmk.w", "vfmk.w.at">;
+def : MnemonicAlias<"pvfmk.w.up", "pvfmk.w.up.at">;
+def : MnemonicAlias<"pvfmk.w.lo", "vfmk.w.at">;
+foreach CC = [ "af", "gt", "lt", "ne", "eq", "ge", "le", "at" ] in {
+ def : MnemonicAlias<"pvfmk.w.lo."#CC, "vfmk.w."#CC>;
+}
+
+// Section 8.17.3 - VFMF (Vector Form Mask Floating Point)
+defm VFMKD : RVMKm<"vfmk.d.", 0xb6, V64, VM>;
+let cx2 = 1 in defm PVFMKSLO : RVMKm<"pvfmk.s.lo.", 0xb6, V64, VM>;
+let cx = 1 in {
+ defm PVFMKSUP : RVMKm<"pvfmk.s.up.", 0xb6, V64, VM>;
+ let isCodeGenOnly = 1 in defm VFMKS : RVMKm<"vfmk.s.", 0xb6, V64, VM>;
+}
+def : MnemonicAlias<"vfmk.d", "vfmk.d.at">;
+def : MnemonicAlias<"pvfmk.s.lo", "pvfmk.s.lo.at">;
+def : MnemonicAlias<"pvfmk.s.up", "pvfmk.s.up.at">;
+def : MnemonicAlias<"vfmk.s", "pvfmk.s.up.at">;
+foreach CC = [ "af", "gt", "lt", "ne", "eq", "ge", "le", "at", "num", "nan",
+ "gtnan", "ltnan", "nenan", "eqnan", "genan", "lenan" ] in {
+ def : MnemonicAlias<"vfmk.s."#CC, "pvfmk.s.up."#CC>;
+}
+
+// Section 8.17.4 - ANDM (And VM)
+defm ANDM : RVM2m<"andm", 0x84, VM>;
+
+// Section 8.17.5 - ORM (Or VM)
+defm ORM : RVM2m<"orm", 0x85, VM>;
+
+// Section 8.17.6 - XORM (Exclusive Or VM)
+defm XORM : RVM2m<"xorm", 0x86, VM>;
+
+// Section 8.17.7 - EQVM (Equivalence VM)
+defm EQVM : RVM2m<"eqvm", 0x87, VM>;
+
+// Section 8.17.8 - NNDM (Negate And VM)
+defm NNDM : RVM2m<"nndm", 0x94, VM>;
+
+// Section 8.17.9 - NEGM (Negate VM)
+defm NEGM : RVM1m<"negm", 0x95, VM>;
+
+// Section 8.17.10 - PCVM (Population Count of VM)
+defm PCVM : RVMSm<"pcvm", 0xa4, VM>;
+
+// Section 8.17.11 - LZVM (Leading Zero of VM)
+defm LZVM : RVMSm<"lzvm", 0xa5, VM>;
+
+// Section 8.17.12 - TOVM (Trailing One of VM)
+defm TOVM : RVMSm<"tovm", 0xa6, VM>;
+
+//-----------------------------------------------------------------------------
+// Section 8.18 - Vector Control Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.18.1 - LVL (Load VL)
+let sx = 0, cz = 0, sz = 0, hasSideEffects = 0, Defs = [VL] in {
+ def LVLr : RR<0xbf, (outs), (ins I64:$sy), "lvl $sy">;
+ let cy = 0 in def LVLi : RR<0xbf, (outs), (ins simm7:$sy), "lvl $sy">;
+}
+
+// Section 8.18.2 - SVL (Save VL)
+let cy = 0, sy = 0, cz = 0, sz = 0, hasSideEffects = 0, Uses = [VL] in
+def SVL : RR<0x2f, (outs I64:$sx), (ins), "svl $sx">;
+
+// Section 8.18.3 - SMVL (Save Maximum Vector Length)
+let cy = 0, sy = 0, cz = 0, sz = 0, hasSideEffects = 0 in
+def SMVL : RR<0x2e, (outs I64:$sx), (ins), "smvl $sx">;
+
+// Section 8.18.4 - LVIX (Load Vector Data Index)
+let sx = 0, cz = 0, sz = 0, hasSideEffects = 0, Defs = [VIX] in {
+ def LVIXr : RR<0xaf, (outs), (ins I64:$sy), "lvix $sy">;
+ let cy = 0 in def LVIXi : RR<0xaf, (outs), (ins uimm6:$sy), "lvix $sy">;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEMCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEMCInstLower.cpp
index 9815610510e1..bc5577ce4f97 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEMCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEMCInstLower.cpp
@@ -51,6 +51,11 @@ static MCOperand LowerOperand(const MachineInstr *MI, const MachineOperand &MO,
break;
return MCOperand::createReg(MO.getReg());
+ case MachineOperand::MO_BlockAddress:
+ return LowerSymbolOperand(
+ MI, MO, AP.GetBlockAddressSymbol(MO.getBlockAddress()), AP);
+ case MachineOperand::MO_ConstantPoolIndex:
+ return LowerSymbolOperand(MI, MO, AP.GetCPISymbol(MO.getIndex()), AP);
case MachineOperand::MO_ExternalSymbol:
return LowerSymbolOperand(
MI, MO, AP.GetExternalSymbolSymbol(MO.getSymbolName()), AP);
@@ -58,7 +63,8 @@ static MCOperand LowerOperand(const MachineInstr *MI, const MachineOperand &MO,
return LowerSymbolOperand(MI, MO, AP.getSymbol(MO.getGlobal()), AP);
case MachineOperand::MO_Immediate:
return MCOperand::createImm(MO.getImm());
-
+ case MachineOperand::MO_JumpTableIndex:
+ return LowerSymbolOperand(MI, MO, AP.GetJTISymbol(MO.getIndex()), AP);
case MachineOperand::MO_MachineBasicBlock:
return LowerSymbolOperand(MI, MO, MO.getMBB()->getSymbol(), AP);
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.cpp
index 5783a8df69d2..d175ad26c742 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
@@ -35,6 +36,8 @@ VERegisterInfo::VERegisterInfo() : VEGenRegisterInfo(VE::SX10) {}
const MCPhysReg *
VERegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
switch (MF->getFunction().getCallingConv()) {
+ case CallingConv::Fast:
+ // Being explicit (same as standard CC).
default:
return CSR_SaveList;
case CallingConv::PreserveAll:
@@ -45,6 +48,8 @@ VERegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const uint32_t *VERegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
switch (CC) {
+ case CallingConv::Fast:
+ // Being explicit (same as standard CC).
default:
return CSR_RegMask;
case CallingConv::PreserveAll:
@@ -82,10 +87,22 @@ BitVector VERegisterInfo::getReservedRegs(const MachineFunction &MF) const {
++ItAlias)
Reserved.set(*ItAlias);
+ // Reserve constant registers.
+ Reserved.set(VE::VM0);
+ Reserved.set(VE::VMP0);
+
return Reserved;
}
-bool VERegisterInfo::isConstantPhysReg(MCRegister PhysReg) const { return false; }
+bool VERegisterInfo::isConstantPhysReg(MCRegister PhysReg) const {
+ switch (PhysReg) {
+ case VE::VM0:
+ case VE::VMP0:
+ return true;
+ default:
+ return false;
+ }
+}
const TargetRegisterClass *
VERegisterInfo::getPointerRegClass(const MachineFunction &MF,
@@ -93,6 +110,29 @@ VERegisterInfo::getPointerRegClass(const MachineFunction &MF,
return &VE::I64RegClass;
}
+static unsigned offsetToDisp(MachineInstr &MI) {
+ // Default offset in instruction's operands (reg+reg+imm).
+ unsigned OffDisp = 2;
+
+#define RRCAS_multi_cases(NAME) NAME##rir : case NAME##rii
+
+ {
+ using namespace llvm::VE;
+ switch (MI.getOpcode()) {
+ case RRCAS_multi_cases(TS1AML):
+ case RRCAS_multi_cases(TS1AMW):
+ case RRCAS_multi_cases(CASL):
+ case RRCAS_multi_cases(CASW):
+ // These instructions use AS format (reg+imm).
+ OffDisp = 1;
+ break;
+ }
+ }
+#undef RRCAS_multi_cases
+
+ return OffDisp;
+}
+
static void replaceFI(MachineFunction &MF, MachineBasicBlock::iterator II,
MachineInstr &MI, const DebugLoc &dl,
unsigned FIOperandNum, int Offset, Register FrameReg) {
@@ -100,7 +140,7 @@ static void replaceFI(MachineFunction &MF, MachineBasicBlock::iterator II,
// VE has 32 bit offset field, so no need to expand a target instruction.
// Directly encode it.
MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false);
- MI.getOperand(FIOperandNum + 2).ChangeToImmediate(Offset);
+ MI.getOperand(FIOperandNum + offsetToDisp(MI)).ChangeToImmediate(Offset);
}
void VERegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
@@ -116,9 +156,41 @@ void VERegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Register FrameReg;
int Offset;
- Offset = TFI->getFrameIndexReference(MF, FrameIndex, FrameReg);
-
- Offset += MI.getOperand(FIOperandNum + 2).getImm();
+ Offset = TFI->getFrameIndexReference(MF, FrameIndex, FrameReg).getFixed();
+
+ Offset += MI.getOperand(FIOperandNum + offsetToDisp(MI)).getImm();
+
+ if (MI.getOpcode() == VE::STQrii) {
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ Register SrcReg = MI.getOperand(3).getReg();
+ Register SrcHiReg = getSubReg(SrcReg, VE::sub_even);
+ Register SrcLoReg = getSubReg(SrcReg, VE::sub_odd);
+ // VE stores HiReg to 8(addr) and LoReg to 0(addr)
+ MachineInstr *StMI = BuildMI(*MI.getParent(), II, dl, TII.get(VE::STrii))
+ .addReg(FrameReg)
+ .addImm(0)
+ .addImm(0)
+ .addReg(SrcLoReg);
+ replaceFI(MF, II, *StMI, dl, 0, Offset, FrameReg);
+ MI.setDesc(TII.get(VE::STrii));
+ MI.getOperand(3).setReg(SrcHiReg);
+ Offset += 8;
+ } else if (MI.getOpcode() == VE::LDQrii) {
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ Register DestReg = MI.getOperand(0).getReg();
+ Register DestHiReg = getSubReg(DestReg, VE::sub_even);
+ Register DestLoReg = getSubReg(DestReg, VE::sub_odd);
+ // VE loads HiReg from 8(addr) and LoReg from 0(addr)
+ MachineInstr *StMI =
+ BuildMI(*MI.getParent(), II, dl, TII.get(VE::LDrii), DestLoReg)
+ .addReg(FrameReg)
+ .addImm(0)
+ .addImm(0);
+ replaceFI(MF, II, *StMI, dl, 1, Offset, FrameReg);
+ MI.setDesc(TII.get(VE::LDrii));
+ MI.getOperand(0).setReg(DestHiReg);
+ Offset += 8;
+ }
replaceFI(MF, II, MI, dl, FIOperandNum, Offset, FrameReg);
}
@@ -126,26 +198,3 @@ void VERegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Register VERegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return VE::SX9;
}
-
-// VE has no architectural need for stack realignment support,
-// except that LLVM unfortunately currently implements overaligned
-// stack objects by depending upon stack realignment support.
-// If that ever changes, this can probably be deleted.
-bool VERegisterInfo::canRealignStack(const MachineFunction &MF) const {
- if (!TargetRegisterInfo::canRealignStack(MF))
- return false;
-
- // VE always has a fixed frame pointer register, so don't need to
- // worry about needing to reserve it. [even if we don't have a frame
- // pointer for our frame, it still cannot be used for other things,
- // or register window traps will be SADNESS.]
-
- // If there's a reserved call frame, we can use VE to access locals.
- if (getFrameLowering(MF)->hasReservedCallFrame(MF))
- return true;
-
- // Otherwise, we'd need a base pointer, but those aren't implemented
- // for VE at the moment.
-
- return false;
-}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.h
index 9a32da16bea6..334fb965a986 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.h
@@ -40,8 +40,6 @@ public:
RegScavenger *RS = nullptr) const override;
Register getFrameRegister(const MachineFunction &MF) const override;
-
- bool canRealignStack(const MachineFunction &MF) const override;
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.td
index 29708d35c730..70ff104b65b7 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.td
@@ -26,13 +26,33 @@ class VEMiscReg<bits<6> enc, string n>: Register<n> {
let Namespace = "VE";
}
+class VEVecReg<bits<8> enc, string n, list<Register> subregs = [],
+ list<string> altNames = [], list<Register> aliases = []>
+ : Register<n, altNames> {
+ let HWEncoding{15-8} = 0;
+ let HWEncoding{7-0} = enc;
+ let Namespace = "VE";
+ let SubRegs = subregs;
+ let Aliases = aliases;
+}
+
+class VEMaskReg<bits<4> enc, string n, list<Register> subregs = [],
+ list<string> altNames = [], list<Register> aliases = []>
+ : Register<n, altNames> {
+ let HWEncoding{15-4} = 0;
+ let HWEncoding{3-0} = enc;
+ let Namespace = "VE";
+ let SubRegs = subregs;
+ let Aliases = aliases;
+}
+
let Namespace = "VE" in {
- def sub_i8 : SubRegIndex<8, 56>; // Low 8 bit (56..63)
- def sub_i16 : SubRegIndex<16, 48>; // Low 16 bit (48..63)
def sub_i32 : SubRegIndex<32, 32>; // Low 32 bit (32..63)
def sub_f32 : SubRegIndex<32>; // High 32 bit (0..31)
def sub_even : SubRegIndex<64>; // High 64 bit (0..63)
def sub_odd : SubRegIndex<64, 64>; // Low 64 bit (64..127)
+ def sub_vm_even : SubRegIndex<256>; // High 256 bit (0..255)
+ def sub_vm_odd : SubRegIndex<256, 256>; // Low 256 bit (256..511)
def AsmName : RegAltNameIndex;
}
@@ -66,26 +86,23 @@ def MISC : RegisterClass<"VE", [i64], 64,
def IC : VEMiscReg<62, "ic">;
//-----------------------------------------------------------------------------
-// Gneric Registers
+// Vector Length Register
//-----------------------------------------------------------------------------
-let RegAltNameIndices = [AsmName] in {
+def VL : VEMiscReg<63, "vl">;
-// Generic integer registers - 8 bits wide
-foreach I = 0-63 in
- def SB#I : VEReg<I, "sb"#I, [], ["s"#I]>, DwarfRegNum<[I]>;
+// Register classes.
+def VLS : RegisterClass<"VE", [i32], 64, (add VL)>;
-// Generic integer registers - 16 bits wide
-let SubRegIndices = [sub_i8] in
-foreach I = 0-63 in
- def SH#I : VEReg<I, "sh"#I, [!cast<VEReg>("SB"#I)], ["s"#I]>,
- DwarfRegNum<[I]>;
+//-----------------------------------------------------------------------------
+// Generic Registers
+//-----------------------------------------------------------------------------
+
+let RegAltNameIndices = [AsmName] in {
// Generic integer registers - 32 bits wide
-let SubRegIndices = [sub_i16] in
foreach I = 0-63 in
- def SW#I : VEReg<I, "sw"#I, [!cast<VEReg>("SH"#I)], ["s"#I]>,
- DwarfRegNum<[I]>;
+ def SW#I : VEReg<I, "sw"#I, [], ["s"#I]>, DwarfRegNum<[I]>;
// Generic floating point registers - 32 bits wide
// NOTE: Mark SF#I as alias of SW#I temporary to avoid register allocation
@@ -95,10 +112,21 @@ foreach I = 0-63 in
DwarfRegNum<[I]>;
// Generic integer registers - 64 bits wide
-let SubRegIndices = [sub_i32, sub_f32], CoveredBySubRegs = 1 in
-foreach I = 0-63 in
- def SX#I : VEReg<I, "s"#I, [!cast<VEReg>("SW"#I), !cast<VEReg>("SF"#I)],
- ["s"#I]>, DwarfRegNum<[I]>;
+let SubRegIndices = [sub_i32, sub_f32], CoveredBySubRegs = 1 in {
+ // Several registers have specific names, so add them to one of aliases.
+ def SX8 : VEReg<8, "s8", [SW8, SF8], ["s8", "sl"]>, DwarfRegNum<[8]>;
+ def SX9 : VEReg<9, "s9", [SW9, SF9], ["s9", "fp"]>, DwarfRegNum<[9]>;
+ def SX10 : VEReg<10, "s10", [SW10, SF10], ["s10", "lr"]>, DwarfRegNum<[10]>;
+ def SX11 : VEReg<11, "s11", [SW11, SF11], ["s11", "sp"]>, DwarfRegNum<[11]>;
+ def SX14 : VEReg<14, "s14", [SW14, SF14], ["s14", "tp"]>, DwarfRegNum<[14]>;
+ def SX15 : VEReg<15, "s15", [SW15, SF15], ["s15", "got"]>, DwarfRegNum<[15]>;
+ def SX16 : VEReg<16, "s16", [SW16, SF16], ["s16", "plt"]>, DwarfRegNum<[16]>;
+
+ // Other generic registers.
+ foreach I = { 0-7, 12-13, 17-63 } in
+ def SX#I : VEReg<I, "s"#I, [!cast<VEReg>("SW"#I), !cast<VEReg>("SF"#I)],
+ ["s"#I]>, DwarfRegNum<[I]>;
+}
// Aliases of the S* registers used to hold 128-bit for values (long doubles).
// Following foreach represents something like:
@@ -112,20 +140,31 @@ foreach I = 0-31 in
!cast<VEReg>("SX"#!add(!shl(I,1),1))],
["s"#!shl(I,1)]>;
+// Vector registers - 64 bits wide 256 elements
+foreach I = 0-63 in
+ def V#I : VEVecReg<I, "v"#I, [], ["v"#I]>, DwarfRegNum<[!add(64,I)]>;
+
+// Vector Index Register
+def VIX : VEVecReg<255, "vix", [], ["vix"]>;
+
+// Vector mask registers - 256 bits wide
+foreach I = 0-15 in
+ def VM#I : VEMaskReg<I, "vm"#I, [], ["vm"#I]>, DwarfRegNum<[!add(128,I)]>;
+
+// Aliases of VMs to use as a pair of two VM for packed instructions
+let SubRegIndices = [sub_vm_even, sub_vm_odd], CoveredBySubRegs = 1 in
+foreach I = 0-7 in
+ def VMP#I : VEMaskReg<!shl(I,1), "vmp"#I,
+ [!cast<VEMaskReg>("VM"#!shl(I,1)),
+ !cast<VEMaskReg>("VM"#!add(!shl(I,1),1))],
+ ["vm"#!shl(I,1)]>;
+
} // RegAltNameIndices = [AsmName]
// Register classes.
//
// The register order is defined in terms of the preferred
// allocation order.
-def I8 : RegisterClass<"VE", [i8], 8,
- (add (sequence "SB%u", 0, 7),
- (sequence "SB%u", 34, 63),
- (sequence "SB%u", 8, 33))>;
-def I16 : RegisterClass<"VE", [i16], 16,
- (add (sequence "SH%u", 0, 7),
- (sequence "SH%u", 34, 63),
- (sequence "SH%u", 8, 33))>;
def I32 : RegisterClass<"VE", [i32], 32,
(add (sequence "SW%u", 0, 7),
(sequence "SW%u", 34, 63),
@@ -142,3 +181,14 @@ def F128 : RegisterClass<"VE", [f128], 128,
(add (sequence "Q%u", 0, 3),
(sequence "Q%u", 17, 31),
(sequence "Q%u", 4, 16))>;
+
+def V64 : RegisterClass<"VE",
+ [v256f64, // default type for vector registers
+ v512i32, v512f32,
+ v256i64, v256i32, v256f32, /* v256f64, */], 64,
+ (add (sequence "V%u", 0, 63),
+ VIX)>;
+
+// vm0 is reserved for always true
+def VM : RegisterClass<"VE", [v256i1], 64, (sequence "VM%u", 0, 15)>;
+def VM512 : RegisterClass<"VE", [v512i1], 64, (sequence "VMP%u", 0, 7)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.cpp
index a0b78d95e3cf..daa6cfb8aa84 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.cpp
@@ -27,73 +27,35 @@ void VESubtarget::anchor() {}
VESubtarget &VESubtarget::initializeSubtargetDependencies(StringRef CPU,
StringRef FS) {
+ // Default feature settings
+ EnableVPU = false;
+
// Determine default and user specified characteristics
std::string CPUName = std::string(CPU);
if (CPUName.empty())
- CPUName = "ve";
+ CPUName = "generic";
// Parse features string.
- ParseSubtargetFeatures(CPUName, FS);
+ ParseSubtargetFeatures(CPUName, /*TuneCPU=*/CPU, FS);
return *this;
}
VESubtarget::VESubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS, const TargetMachine &TM)
- : VEGenSubtargetInfo(TT, CPU, FS), TargetTriple(TT),
+ : VEGenSubtargetInfo(TT, CPU, /*TuneCPU=*/CPU, FS), TargetTriple(TT),
InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
FrameLowering(*this) {}
-int VESubtarget::getAdjustedFrameSize(int frameSize) const {
-
- // VE stack frame:
- //
- // +----------------------------------------+
- // | Locals and temporaries |
- // +----------------------------------------+
- // | Parameter area for callee |
- // 176(fp) | |
- // +----------------------------------------+
- // | Register save area (RSA) for callee |
- // | |
- // 16(fp) | 20 * 8 bytes |
- // +----------------------------------------+
- // 8(fp) | Return address |
- // +----------------------------------------+
- // 0(fp) | Frame pointer of caller |
- // --------+----------------------------------------+--------
- // | Locals and temporaries for callee |
- // +----------------------------------------+
- // | Parameter area for callee of callee |
- // +----------------------------------------+
- // 16(sp) | RSA for callee of callee |
- // +----------------------------------------+
- // 8(sp) | Return address |
- // +----------------------------------------+
- // 0(sp) | Frame pointer of callee |
- // +----------------------------------------+
-
- // RSA frame:
- // +----------------------------------------------+
- // 168(fp) | %s33 |
- // +----------------------------------------------+
- // | %s19...%s32 |
- // +----------------------------------------------+
- // 48(fp) | %s18 |
- // +----------------------------------------------+
- // 40(fp) | Linkage area register (%s17) |
- // +----------------------------------------------+
- // 32(fp) | Procedure linkage table register (%plt=%s16) |
- // +----------------------------------------------+
- // 24(fp) | Global offset table register (%got=%s15) |
- // +----------------------------------------------+
- // 16(fp) | Thread pointer register (%tp=%s14) |
- // +----------------------------------------------+
+uint64_t VESubtarget::getAdjustedFrameSize(uint64_t FrameSize) const {
+ // Calculate adjusted frame size by adding the size of RSA frame,
+ // return address, and frame poitner as described in VEFrameLowering.cpp.
+ const VEFrameLowering *TFL = getFrameLowering();
- frameSize += 176; // for RSA, RA, and FP
- frameSize = alignTo(frameSize, 16); // requires 16 bytes alignment
+ FrameSize += getRsaSize();
+ FrameSize = alignTo(FrameSize, TFL->getStackAlign());
- return frameSize;
+ return FrameSize;
}
bool VESubtarget::enableMachineScheduler() const { return true; }
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.h b/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.h
index f3a2c206162e..213aca2ea3f9 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.h
@@ -32,6 +32,13 @@ class VESubtarget : public VEGenSubtargetInfo {
Triple TargetTriple;
virtual void anchor();
+ /// Features {
+
+ // Emit VPU instructions
+ bool EnableVPU;
+
+ /// } Features
+
VEInstrInfo InstrInfo;
VETargetLowering TLInfo;
SelectionDAGTargetInfo TSInfo;
@@ -55,15 +62,21 @@ public:
bool enableMachineScheduler() const override;
+ bool enableVPU() const { return EnableVPU; }
+
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
VESubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
/// Given a actual stack size as determined by FrameInfo, this function
- /// returns adjusted framesize which includes space for register window
- /// spills and arguments.
- int getAdjustedFrameSize(int stackSize) const;
+ /// returns adjusted framesize which includes space for RSA, return
+ /// address, and frame poitner.
+ uint64_t getAdjustedFrameSize(uint64_t FrameSize) const;
+
+ /// Get the size of RSA, return address, and frame pointer as described
+ /// in VEFrameLowering.cpp.
+ unsigned getRsaSize(void) const { return 176; };
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
};
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.cpp
index 08b55eebbc98..414ae09431c0 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.cpp
@@ -23,7 +23,7 @@ using namespace llvm;
#define DEBUG_TYPE "ve"
-extern "C" void LLVMInitializeVETarget() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeVETarget() {
// Register the target.
RegisterTargetMachine<VETargetMachine> X(getTheVETarget());
}
@@ -44,13 +44,24 @@ static std::string computeDataLayout(const Triple &T) {
// Stack alignment is 128 bits
Ret += "-S128";
+ // Vector alignments are 64 bits
+ // Need to define all of them. Otherwise, each alignment becomes
+ // the size of each data by default.
+ Ret += "-v64:64:64"; // for v2f32
+ Ret += "-v128:64:64";
+ Ret += "-v256:64:64";
+ Ret += "-v512:64:64";
+ Ret += "-v1024:64:64";
+ Ret += "-v2048:64:64";
+ Ret += "-v4096:64:64";
+ Ret += "-v8192:64:64";
+ Ret += "-v16384:64:64"; // for v256f64
+
return Ret;
}
static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
- if (!RM.hasValue())
- return Reloc::Static;
- return *RM;
+ return RM.getValueOr(Reloc::Static);
}
class VEELFTargetObjectFile : public TargetLoweringObjectFileELF {
@@ -96,7 +107,9 @@ public:
return getTM<VETargetMachine>();
}
+ void addIRPasses() override;
bool addInstSelector() override;
+ void addPreEmitPass() override;
};
} // namespace
@@ -104,7 +117,18 @@ TargetPassConfig *VETargetMachine::createPassConfig(PassManagerBase &PM) {
return new VEPassConfig(*this, PM);
}
+void VEPassConfig::addIRPasses() {
+ // VE requires atomic expand pass.
+ addPass(createAtomicExpandPass());
+ TargetPassConfig::addIRPasses();
+}
+
bool VEPassConfig::addInstSelector() {
addPass(createVEISelDag(getVETargetMachine()));
return false;
}
+
+void VEPassConfig::addPreEmitPass() {
+ // LVLGen should be called after scheduling and register allocation
+ addPass(createLVLGenPass());
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VETargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/VE/VETargetTransformInfo.h
index c267c4d9a578..68af66597485 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VETargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VETargetTransformInfo.h
@@ -33,16 +33,35 @@ class VETTIImpl : public BasicTTIImplBase<VETTIImpl> {
const VESubtarget *getST() const { return ST; }
const VETargetLowering *getTLI() const { return TLI; }
+ bool enableVPU() const { return getST()->enableVPU(); }
+
public:
explicit VETTIImpl(const VETargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
- unsigned getNumberOfRegisters(unsigned ClassID) const { return 64; }
+ unsigned getNumberOfRegisters(unsigned ClassID) const {
+ bool VectorRegs = (ClassID == 1);
+ if (VectorRegs) {
+ // TODO report vregs once vector isel is stable.
+ return 0;
+ }
+
+ return 64;
+ }
- unsigned getRegisterBitWidth(bool Vector) const { return 64; }
+ unsigned getRegisterBitWidth(bool Vector) const {
+ if (Vector) {
+ // TODO report vregs once vector isel is stable.
+ return 0;
+ }
+ return 64;
+ }
- unsigned getMinVectorRegisterBitWidth() const { return 64; }
+ unsigned getMinVectorRegisterBitWidth() const {
+ // TODO report vregs once vector isel is stable.
+ return 0;
+ }
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VVPInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/VE/VVPInstrInfo.td
new file mode 100644
index 000000000000..2c88d5099a7b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VVPInstrInfo.td
@@ -0,0 +1,46 @@
+//===-------------- VVPInstrInfo.td - VVP_* SDNode patterns ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the VE Vector Predicated SDNodes (VVP SDNodes). VVP
+// SDNodes are an intermediate isel layer between the vector SDNodes emitted by
+// LLVM and the actual VE vector instructions. For example:
+//
+// ADD(x,y) --> VVP_ADD(x,y,mask,evl) --> VADDSWSXrvml(x,y,mask,evl)
+// ^ ^ ^
+// The standard The VVP layer SDNode. The VE vector instruction.
+// SDNode.
+//
+// TODO explain how VVP nodes relate to VP SDNodes once VP ISel is uptream.
+//===----------------------------------------------------------------------===//
+
+// Binary Operators {
+
+// BinaryOp(x,y,mask,vl)
+def SDTIntBinOpVVP : SDTypeProfile<1, 4, [ // vp_add, vp_and, etc.
+ SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisInt<0>,
+ SDTCisSameNumEltsAs<0, 3>,
+ IsVLVT<4>
+]>;
+
+// Binary operator commutative pattern.
+class vvp_commutative<SDNode RootOp> :
+ PatFrags<
+ (ops node:$lhs, node:$rhs, node:$mask, node:$vlen),
+ [(RootOp node:$lhs, node:$rhs, node:$mask, node:$vlen),
+ (RootOp node:$rhs, node:$lhs, node:$mask, node:$vlen)]>;
+
+// VVP node definitions.
+def vvp_add : SDNode<"VEISD::VVP_ADD", SDTIntBinOpVVP>;
+def c_vvp_add : vvp_commutative<vvp_add>;
+
+def vvp_and : SDNode<"VEISD::VVP_AND", SDTIntBinOpVVP>;
+def c_vvp_and : vvp_commutative<vvp_and>;
+
+// } Binary Operators
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VVPInstrPatternsVec.td b/contrib/llvm-project/llvm/lib/Target/VE/VVPInstrPatternsVec.td
new file mode 100644
index 000000000000..7003fb387670
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VVPInstrPatternsVec.td
@@ -0,0 +1,71 @@
+//===----------- VVPInstrPatternsVec.td - VVP_* SDNode patterns -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes how VVP_* SDNodes are lowered to machine instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// VVP SDNode definitions.
+//
+//===----------------------------------------------------------------------===//
+include "VVPInstrInfo.td"
+
+multiclass VectorBinaryArith<
+ SDPatternOperator OpNode,
+ ValueType ScalarVT, ValueType DataVT, ValueType MaskVT,
+ string OpBaseName,
+ SDPatternOperator ImmOp, SDNodeXForm ImmCast> {
+ // No mask.
+ def : Pat<(OpNode
+ (any_broadcast ScalarVT:$sx),
+ DataVT:$vy, (MaskVT true_mask), i32:$avl),
+ (!cast<Instruction>(OpBaseName#"rvl")
+ ScalarVT:$sx, $vy, $avl)>;
+ def : Pat<(OpNode DataVT:$vx, DataVT:$vy, (MaskVT true_mask), i32:$avl),
+ (!cast<Instruction>(OpBaseName#"vvl")
+ $vx, $vy, $avl)>;
+
+ // Mask.
+ def : Pat<(OpNode
+ (any_broadcast ScalarVT:$sx),
+ DataVT:$vy, MaskVT:$mask, i32:$avl),
+ (!cast<Instruction>(OpBaseName#"rvml")
+ ScalarVT:$sx, $vy, $mask, $avl)>;
+ def : Pat<(OpNode DataVT:$vx, DataVT:$vy, MaskVT:$mask, i32:$avl),
+ (!cast<Instruction>(OpBaseName#"vvml")
+ $vx, $vy, $mask, $avl)>;
+
+ // TODO We do not specify patterns for the immediate variants here. There
+ // will be an immediate folding pass that takes care of switching to the
+ // immediate variant where applicable.
+
+ // TODO Fold vvp_select into passthru.
+}
+
+// Expand both 64bit and 32 bit variant (256 elements)
+multiclass VectorBinaryArith_ShortLong<
+ SDPatternOperator OpNode,
+ ValueType LongScalarVT, ValueType LongDataVT, string LongOpBaseName,
+ ValueType ShortScalarVT, ValueType ShortDataVT, string ShortOpBaseName> {
+ defm : VectorBinaryArith<OpNode,
+ LongScalarVT, LongDataVT, v256i1,
+ LongOpBaseName, simm7, LO7>;
+ defm : VectorBinaryArith<OpNode,
+ ShortScalarVT, ShortDataVT, v256i1,
+ ShortOpBaseName, simm7, LO7>;
+}
+
+
+defm : VectorBinaryArith_ShortLong<c_vvp_add,
+ i64, v256i64, "VADDSL",
+ i32, v256i32, "VADDSWSX">;
+defm : VectorBinaryArith_ShortLong<c_vvp_and,
+ i64, v256i64, "VAND",
+ i32, v256i32, "PVANDLO">;
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VVPNodes.def b/contrib/llvm-project/llvm/lib/Target/VE/VVPNodes.def
new file mode 100644
index 000000000000..a68402e9ea10
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VVPNodes.def
@@ -0,0 +1,41 @@
+//===-- VVPNodes.def - Lists & properties of VE Vector Predication Nodes --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all VVP_* SDNodes and their properties
+//
+//===----------------------------------------------------------------------===//
+
+/// HANDLE_VP_TO_VVP(VPOPC, VVPOPC)
+/// \p VPOPC is the VP_* SDNode opcode.
+/// \p VVPOPC is the VVP_* SDNode opcode.
+#ifndef HANDLE_VP_TO_VVP
+#define HANDLE_VP_TO_VVP(VPOPC, VVPOPC)
+#endif
+
+/// ADD_VVP_OP(VVPNAME,SDNAME)
+/// \p VVPName is a VVP SDNode operator.
+/// \p SDNAME is the generic SD opcode corresponding to \p VVPName.
+#ifndef ADD_VVP_OP
+#define ADD_VVP_OP(X, Y)
+#endif
+
+/// ADD_BINARY_VVP_OP(VVPNAME,SDNAME)
+/// \p VVPName is a VVP Binary operator.
+/// \p SDNAME is the generic SD opcode corresponding to \p VVPName.
+#ifndef ADD_BINARY_VVP_OP
+#define ADD_BINARY_VVP_OP(X,Y) ADD_VVP_OP(X,Y) HANDLE_VP_TO_VVP(VP_##Y, X)
+#endif
+
+// Integer arithmetic.
+ADD_BINARY_VVP_OP(VVP_ADD,ADD)
+
+ADD_BINARY_VVP_OP(VVP_AND,AND)
+
+#undef HANDLE_VP_TO_VVP
+#undef ADD_BINARY_VVP_OP
+#undef ADD_VVP_OP