diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/VE')
35 files changed, 7853 insertions, 833 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp index 7a899b4b38e2..a3309a68c76d 100644 --- a/contrib/llvm-project/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp +++ b/contrib/llvm-project/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp @@ -125,6 +125,9 @@ static const MCPhysReg F128Regs[32] = { VE::Q16, VE::Q17, VE::Q18, VE::Q19, VE::Q20, VE::Q21, VE::Q22, VE::Q23, VE::Q24, VE::Q25, VE::Q26, VE::Q27, VE::Q28, VE::Q29, VE::Q30, VE::Q31}; +static const MCPhysReg VM512Regs[8] = {VE::VMP0, VE::VMP1, VE::VMP2, VE::VMP3, + VE::VMP4, VE::VMP5, VE::VMP6, VE::VMP7}; + static const MCPhysReg MISCRegs[31] = { VE::USRCC, VE::PSW, VE::SAR, VE::NoRegister, VE::NoRegister, VE::NoRegister, VE::NoRegister, VE::PMMR, @@ -277,6 +280,17 @@ public: } return false; } + bool isUImm4() { + if (!isImm()) + return false; + + // Constant case + if (const auto *ConstExpr = dyn_cast<MCConstantExpr>(Imm.Val)) { + int64_t Value = ConstExpr->getValue(); + return isUInt<4>(Value); + } + return false; + } bool isUImm6() { if (!isImm()) return false; @@ -476,6 +490,10 @@ public: addImmOperands(Inst, N); } + void addUImm4Operands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addUImm6Operands(MCInst &Inst, unsigned N) const { addImmOperands(Inst, N); } @@ -648,6 +666,15 @@ public: return true; } + static bool MorphToVM512Reg(VEOperand &Op) { + unsigned Reg = Op.getReg(); + unsigned regIdx = Reg - VE::VM0; + if (regIdx % 2 || regIdx > 15) + return false; + Op.Reg.RegNum = VM512Regs[regIdx / 2]; + return true; + } + static bool MorphToMISCReg(VEOperand &Op) { const auto *ConstExpr = dyn_cast<MCConstantExpr>(Op.getImm()); if (!ConstExpr) @@ -902,6 +929,24 @@ StringRef VEAsmParser::splitMnemonic(StringRef Name, SMLoc NameLoc, Mnemonic = parseRD(Name, 10, NameLoc, Operands); } else if (Name.startswith("cvt.l.d")) { Mnemonic = parseRD(Name, 7, NameLoc, Operands); + } else if (Name.startswith("vcvt.w.d.sx") || Name.startswith("vcvt.w.d.zx") || + Name.startswith("vcvt.w.s.sx") || Name.startswith("vcvt.w.s.zx")) { + Mnemonic = parseRD(Name, 11, NameLoc, Operands); + } else if (Name.startswith("vcvt.l.d")) { + Mnemonic = parseRD(Name, 8, NameLoc, Operands); + } else if (Name.startswith("pvcvt.w.s.lo") || + Name.startswith("pvcvt.w.s.up")) { + Mnemonic = parseRD(Name, 12, NameLoc, Operands); + } else if (Name.startswith("pvcvt.w.s")) { + Mnemonic = parseRD(Name, 9, NameLoc, Operands); + } else if (Name.startswith("vfmk.l.") || Name.startswith("vfmk.w.") || + Name.startswith("vfmk.d.") || Name.startswith("vfmk.s.")) { + bool ICC = Name[5] == 'l' || Name[5] == 'w' ? true : false; + Mnemonic = parseCC(Name, 7, Name.size(), ICC, true, NameLoc, Operands); + } else if (Name.startswith("pvfmk.w.lo.") || Name.startswith("pvfmk.w.up.") || + Name.startswith("pvfmk.s.lo.") || Name.startswith("pvfmk.s.up.")) { + bool ICC = Name[6] == 'l' || Name[6] == 'w' ? true : false; + Mnemonic = parseCC(Name, 11, Name.size(), ICC, true, NameLoc, Operands); } else { Operands->push_back(VEOperand::CreateToken(Mnemonic, NameLoc)); } @@ -1362,9 +1407,38 @@ OperandMatchResultTy VEAsmParser::parseOperand(OperandVector &Operands, return ResTy; switch (getLexer().getKind()) { - case AsmToken::LParen: - // FIXME: Parsing "(" + %vreg + ", " + %vreg + ")" - // FALLTHROUGH + case AsmToken::LParen: { + // Parsing "(" + %vreg + ", " + %vreg + ")" + const AsmToken Tok1 = Parser.getTok(); + Parser.Lex(); // Eat the '('. + + unsigned RegNo1; + SMLoc S1, E1; + if (tryParseRegister(RegNo1, S1, E1) != MatchOperand_Success) { + getLexer().UnLex(Tok1); + return MatchOperand_NoMatch; + } + + if (!Parser.getTok().is(AsmToken::Comma)) + return MatchOperand_ParseFail; + Parser.Lex(); // Eat the ','. + + unsigned RegNo2; + SMLoc S2, E2; + if (tryParseRegister(RegNo2, S2, E2) != MatchOperand_Success) + return MatchOperand_ParseFail; + + if (!Parser.getTok().is(AsmToken::RParen)) + return MatchOperand_ParseFail; + + Operands.push_back(VEOperand::CreateToken(Tok1.getString(), Tok1.getLoc())); + Operands.push_back(VEOperand::CreateReg(RegNo1, S1, E1)); + Operands.push_back(VEOperand::CreateReg(RegNo2, S2, E2)); + Operands.push_back(VEOperand::CreateToken(Parser.getTok().getString(), + Parser.getTok().getLoc())); + Parser.Lex(); // Eat the ')'. + break; + } default: { std::unique_ptr<VEOperand> Op; ResTy = parseVEAsmOperand(Op); @@ -1377,7 +1451,24 @@ OperandMatchResultTy VEAsmParser::parseOperand(OperandVector &Operands, if (!Parser.getTok().is(AsmToken::LParen)) break; - // FIXME: Parsing %vec-reg + "(" + %sclar-reg/number + ")" + // Parsing %vec-reg + "(" + %sclar-reg/number + ")" + std::unique_ptr<VEOperand> Op1 = VEOperand::CreateToken( + Parser.getTok().getString(), Parser.getTok().getLoc()); + Parser.Lex(); // Eat the '('. + + std::unique_ptr<VEOperand> Op2; + ResTy = parseVEAsmOperand(Op2); + if (ResTy != MatchOperand_Success || !Op2) + return MatchOperand_ParseFail; + + if (!Parser.getTok().is(AsmToken::RParen)) + return MatchOperand_ParseFail; + + Operands.push_back(std::move(Op1)); + Operands.push_back(std::move(Op2)); + Operands.push_back(VEOperand::CreateToken(Parser.getTok().getString(), + Parser.getTok().getLoc())); + Parser.Lex(); // Eat the ')'. break; } } @@ -1445,6 +1536,10 @@ unsigned VEAsmParser::validateTargetOperandClass(MCParsedAsmOperand &GOp, if (Op.isReg() && VEOperand::MorphToF128Reg(Op)) return MCTargetAsmParser::Match_Success; break; + case MCK_VM512: + if (Op.isReg() && VEOperand::MorphToVM512Reg(Op)) + return MCTargetAsmParser::Match_Success; + break; case MCK_MISC: if (Op.isImm() && VEOperand::MorphToMISCReg(Op)) return MCTargetAsmParser::Match_Success; diff --git a/contrib/llvm-project/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp index 35885a4e3cae..20d609bc6b32 100644 --- a/contrib/llvm-project/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp +++ b/contrib/llvm-project/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp @@ -47,7 +47,7 @@ static MCDisassembler *createVEDisassembler(const Target &T, return new VEDisassembler(STI, Ctx); } -extern "C" void LLVMInitializeVEDisassembler() { +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeVEDisassembler() { // Register the disassembler. TargetRegistry::RegisterMCDisassembler(getTheVETarget(), createVEDisassembler); @@ -95,6 +95,25 @@ static const unsigned F128RegDecoderTable[] = { VE::Q16, VE::Q17, VE::Q18, VE::Q19, VE::Q20, VE::Q21, VE::Q22, VE::Q23, VE::Q24, VE::Q25, VE::Q26, VE::Q27, VE::Q28, VE::Q29, VE::Q30, VE::Q31}; +static const unsigned V64RegDecoderTable[] = { + VE::V0, VE::V1, VE::V2, VE::V3, VE::V4, VE::V5, VE::V6, VE::V7, + VE::V8, VE::V9, VE::V10, VE::V11, VE::V12, VE::V13, VE::V14, VE::V15, + VE::V16, VE::V17, VE::V18, VE::V19, VE::V20, VE::V21, VE::V22, VE::V23, + VE::V24, VE::V25, VE::V26, VE::V27, VE::V28, VE::V29, VE::V30, VE::V31, + VE::V32, VE::V33, VE::V34, VE::V35, VE::V36, VE::V37, VE::V38, VE::V39, + VE::V40, VE::V41, VE::V42, VE::V43, VE::V44, VE::V45, VE::V46, VE::V47, + VE::V48, VE::V49, VE::V50, VE::V51, VE::V52, VE::V53, VE::V54, VE::V55, + VE::V56, VE::V57, VE::V58, VE::V59, VE::V60, VE::V61, VE::V62, VE::V63}; + +static const unsigned VMRegDecoderTable[] = { + VE::VM0, VE::VM1, VE::VM2, VE::VM3, VE::VM4, VE::VM5, + VE::VM6, VE::VM7, VE::VM8, VE::VM9, VE::VM10, VE::VM11, + VE::VM12, VE::VM13, VE::VM14, VE::VM15}; + +static const unsigned VM512RegDecoderTable[] = {VE::VMP0, VE::VMP1, VE::VMP2, + VE::VMP3, VE::VMP4, VE::VMP5, + VE::VMP6, VE::VMP7}; + static const unsigned MiscRegDecoderTable[] = { VE::USRCC, VE::PSW, VE::SAR, VE::NoRegister, VE::NoRegister, VE::NoRegister, VE::NoRegister, VE::PMMR, @@ -145,6 +164,40 @@ static DecodeStatus DecodeF128RegisterClass(MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } +static DecodeStatus DecodeV64RegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + unsigned Reg = VE::NoRegister; + if (RegNo == 255) + Reg = VE::VIX; + else if (RegNo > 63) + return MCDisassembler::Fail; + else + Reg = V64RegDecoderTable[RegNo]; + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeVMRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 15) + return MCDisassembler::Fail; + unsigned Reg = VMRegDecoderTable[RegNo]; + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeVM512RegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo % 2 || RegNo > 15) + return MCDisassembler::Fail; + unsigned Reg = VM512RegDecoderTable[RegNo / 2]; + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeMISCRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { diff --git a/contrib/llvm-project/llvm/lib/Target/VE/LVLGen.cpp b/contrib/llvm-project/llvm/lib/Target/VE/LVLGen.cpp new file mode 100644 index 000000000000..c4588926af9e --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/VE/LVLGen.cpp @@ -0,0 +1,137 @@ +//===-- LVLGen.cpp - LVL instruction generator ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "VE.h" +#include "VESubtarget.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +#define DEBUG_TYPE "lvl-gen" + +namespace { +struct LVLGen : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + + static char ID; + LVLGen() : MachineFunctionPass(ID) {} + bool runOnMachineBasicBlock(MachineBasicBlock &MBB); + bool runOnMachineFunction(MachineFunction &F) override; + + unsigned getVL(const MachineInstr &MI); + int getVLIndex(unsigned Opcode); +}; +char LVLGen::ID = 0; + +} // end of anonymous namespace + +FunctionPass *llvm::createLVLGenPass() { return new LVLGen; } + +int LVLGen::getVLIndex(unsigned Opcode) { + const MCInstrDesc &MCID = TII->get(Opcode); + + // If an instruction has VLIndex information, return it. + if (HAS_VLINDEX(MCID.TSFlags)) + return GET_VLINDEX(MCID.TSFlags); + + return -1; +} + +// returns a register holding a vector length. NoRegister is returned when +// this MI does not have a vector length. +unsigned LVLGen::getVL(const MachineInstr &MI) { + int Index = getVLIndex(MI.getOpcode()); + if (Index >= 0) + return MI.getOperand(Index).getReg(); + + return VE::NoRegister; +} + +bool LVLGen::runOnMachineBasicBlock(MachineBasicBlock &MBB) { +#define RegName(no) \ + (MBB.getParent()->getSubtarget<VESubtarget>().getRegisterInfo()->getName(no)) + + bool Changed = false; + bool HasRegForVL = false; + unsigned RegForVL; + + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) { + MachineBasicBlock::iterator MI = I; + + // Check whether MI uses a vector length operand. If so, we prepare for VL + // register. We would like to reuse VL register as much as possible. We + // also would like to keep the number of LEA instructions as fewer as + // possible. Therefore, we use a regular scalar register to hold immediate + // values to load VL register. And try to reuse identical scalar registers + // to avoid new LVLr instructions as much as possible. + unsigned Reg = getVL(*MI); + if (Reg != VE::NoRegister) { + LLVM_DEBUG(dbgs() << "Vector instruction found: "); + LLVM_DEBUG(MI->dump()); + LLVM_DEBUG(dbgs() << "Vector length is " << RegName(Reg) << ". "); + LLVM_DEBUG(dbgs() << "Current VL is " + << (HasRegForVL ? RegName(RegForVL) : "unknown") + << ". "); + + if (!HasRegForVL || RegForVL != Reg) { + // Use VL, but a different value in a different scalar register. + // So, generate new LVL instruction just before the current instruction. + LLVM_DEBUG(dbgs() << "Generate a LVL instruction to load " + << RegName(Reg) << ".\n"); + BuildMI(MBB, I, MI->getDebugLoc(), TII->get(VE::LVLr)).addReg(Reg); + HasRegForVL = true; + RegForVL = Reg; + Changed = true; + } else { + LLVM_DEBUG(dbgs() << "Reuse current VL.\n"); + } + } + // Check the update of a given scalar register holding an immediate value + // for VL register. Also, a call doesn't preserve VL register. + if (HasRegForVL) { + if (MI->definesRegister(RegForVL, TRI) || + MI->modifiesRegister(RegForVL, TRI) || + MI->killsRegister(RegForVL, TRI) || MI->isCall()) { + // The latest VL is needed to be updated, so disable HasRegForVL. + LLVM_DEBUG(dbgs() << RegName(RegForVL) << " is needed to be updated: "); + LLVM_DEBUG(MI->dump()); + HasRegForVL = false; + } + } + + ++I; + } + return Changed; +} + +bool LVLGen::runOnMachineFunction(MachineFunction &F) { + LLVM_DEBUG(dbgs() << "********** Begin LVLGen **********\n"); + LLVM_DEBUG(dbgs() << "********** Function: " << F.getName() << '\n'); + LLVM_DEBUG(F.dump()); + + bool Changed = false; + + const VESubtarget &Subtarget = F.getSubtarget<VESubtarget>(); + TII = Subtarget.getInstrInfo(); + TRI = Subtarget.getRegisterInfo(); + + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) + Changed |= runOnMachineBasicBlock(*FI); + + if (Changed) { + LLVM_DEBUG(dbgs() << "\n"); + LLVM_DEBUG(F.dump()); + } + LLVM_DEBUG(dbgs() << "********** End LVLGen **********\n"); + return Changed; +} diff --git a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h index 657cc513b3c5..6995007c6dc6 100644 --- a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h +++ b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h @@ -29,6 +29,7 @@ public: const MCSubtargetInfo &STI, raw_ostream &OS) override; // Autogenerated by tblgen. + std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override; bool printAliasInstr(const MCInst *, uint64_t Address, const MCSubtargetInfo &, raw_ostream &); void printInstruction(const MCInst *, uint64_t, const MCSubtargetInfo &, diff --git a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp index a39cffc8f4a6..4c480c050274 100644 --- a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp +++ b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp @@ -56,8 +56,8 @@ static MCRegisterInfo *createVEMCRegisterInfo(const Triple &TT) { static MCSubtargetInfo *createVEMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { if (CPU.empty()) - CPU = "ve"; - return createVEMCSubtargetInfoImpl(TT, CPU, FS); + CPU = "generic"; + return createVEMCSubtargetInfoImpl(TT, CPU, /*TuneCPU=*/CPU, FS); } static MCTargetStreamer * @@ -80,7 +80,7 @@ static MCInstPrinter *createVEMCInstPrinter(const Triple &T, return new VEInstPrinter(MAI, MII, MRI); } -extern "C" void LLVMInitializeVETargetMC() { +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeVETargetMC() { // Register the MC asm info. RegisterMCAsmInfoFn X(getTheVETarget(), createVEMCAsmInfo); diff --git a/contrib/llvm-project/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp b/contrib/llvm-project/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp index 65bd142fe0db..a95a299def88 100644 --- a/contrib/llvm-project/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp @@ -16,7 +16,7 @@ Target &llvm::getTheVETarget() { return TheVETarget; } -extern "C" void LLVMInitializeVETargetInfo() { +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeVETargetInfo() { RegisterTarget<Triple::ve, /*HasJIT=*/false> X(getTheVETarget(), "ve", "VE", "VE"); } diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VE.h b/contrib/llvm-project/llvm/lib/Target/VE/VE.h index 7ed7797cbb83..8c1fa840f19c 100644 --- a/contrib/llvm-project/llvm/lib/Target/VE/VE.h +++ b/contrib/llvm-project/llvm/lib/Target/VE/VE.h @@ -29,6 +29,7 @@ class MachineInstr; FunctionPass *createVEISelDag(VETargetMachine &TM); FunctionPass *createVEPromoteToI1Pass(); +FunctionPass *createLVLGenPass(); void LowerVEMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP); @@ -333,7 +334,7 @@ inline static bool isMImmVal(uint64_t Val) { return true; } // (m)1 patterns - return (Val & (1UL << 63)) && isShiftedMask_64(Val); + return (Val & (UINT64_C(1) << 63)) && isShiftedMask_64(Val); } inline static bool isMImm32Val(uint32_t Val) { @@ -346,7 +347,25 @@ inline static bool isMImm32Val(uint32_t Val) { return true; } // (m)1 patterns - return (Val & (1 << 31)) && isShiftedMask_32(Val); + return (Val & (UINT32_C(1) << 31)) && isShiftedMask_32(Val); +} + +/// val2MImm - Convert an integer immediate value to target MImm immediate. +inline static uint64_t val2MImm(uint64_t Val) { + if (Val == 0) + return 0; // (0)1 + if (Val & (UINT64_C(1) << 63)) + return countLeadingOnes(Val); // (m)1 + return countLeadingZeros(Val) | 0x40; // (m)0 +} + +/// mimm2Val - Convert a target MImm immediate to an integer immediate value. +inline static uint64_t mimm2Val(uint64_t Val) { + if (Val == 0) + return 0; // (0)1 + if ((Val & 0x40) == 0) + return (uint64_t)((INT64_C(1) << 63) >> (Val & 0x3f)); // (m)1 + return ((uint64_t)INT64_C(-1) >> (Val & 0x3f)); // (m)0 } inline unsigned M0(unsigned Val) { return Val + 64; } diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VE.td b/contrib/llvm-project/llvm/lib/Target/VE/VE.td index 617a6ea458b6..9e8adcd42077 100644 --- a/contrib/llvm-project/llvm/lib/Target/VE/VE.td +++ b/contrib/llvm-project/llvm/lib/Target/VE/VE.td @@ -18,6 +18,9 @@ include "llvm/Target/Target.td" //===----------------------------------------------------------------------===// // VE Subtarget features. // +def FeatureEnableVPU + : SubtargetFeature<"vpu", "EnableVPU", "true", + "Enable the VPU">; //===----------------------------------------------------------------------===// // Register File, Calling Conv, Instruction Descriptions @@ -43,7 +46,7 @@ def VEAsmParser : AsmParser { class Proc<string Name, list<SubtargetFeature> Features> : Processor<Name, NoItineraries, Features>; -def : Proc<"ve", []>; +def : Proc<"generic", []>; //===----------------------------------------------------------------------===// // Declare the target which we are implementing diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEAsmPrinter.cpp index 86e3aa3d3fa1..08a75b6b8c55 100644 --- a/contrib/llvm-project/llvm/lib/Target/VE/VEAsmPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/VE/VEAsmPrinter.cpp @@ -60,6 +60,9 @@ public: static const char *getRegisterName(unsigned RegNo) { return VEInstPrinter::getRegisterName(RegNo); } + void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &OS); + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &O) override; }; } // end of anonymous namespace @@ -203,7 +206,7 @@ void VEAsmPrinter::lowerGETGOTAndEmitMCInsts(const MachineInstr *MI, // lea %got, _GLOBAL_OFFSET_TABLE_@PC_LO(-24) // and %got, %got, (32)0 // sic %plt - // lea.sl %got, _GLOBAL_OFFSET_TABLE_@PC_HI(%got, %plt) + // lea.sl %got, _GLOBAL_OFFSET_TABLE_@PC_HI(%plt, %got) MCOperand cim24 = MCOperand::createImm(-24); MCOperand loImm = createGOTRelExprOp(VEMCExpr::VK_VE_PC_LO32, GOTLabel, OutContext); @@ -248,10 +251,10 @@ void VEAsmPrinter::lowerGETFunPLTAndEmitMCInsts(const MachineInstr *MI, MCOperand RegPLT = MCOperand::createReg(VE::SX16); // PLT - // lea %dst, %plt_lo(func)(-24) + // lea %dst, func@plt_lo(-24) // and %dst, %dst, (32)0 // sic %plt ; FIXME: is it safe to use %plt here? - // lea.sl %dst, %plt_hi(func)(%dst, %plt) + // lea.sl %dst, func@plt_hi(%plt, %dst) MCOperand cim24 = MCOperand::createImm(-24); MCOperand loImm = createGOTRelExprOp(VEMCExpr::VK_VE_PLT_LO32, AddrSym, OutContext); @@ -295,7 +298,7 @@ void VEAsmPrinter::lowerGETTLSAddrAndEmitMCInsts(const MachineInstr *MI, // lea %s0, sym@tls_gd_lo(-24) // and %s0, %s0, (32)0 // sic %lr - // lea.sl %s0, sym@tls_gd_hi(%s0, %lr) + // lea.sl %s0, sym@tls_gd_hi(%lr, %s0) // lea %s12, __tls_get_addr@plt_lo(8) // and %s12, %s12, (32)0 // lea.sl %s12, __tls_get_addr@plt_hi(%s12, %lr) @@ -349,7 +352,42 @@ void VEAsmPrinter::emitInstruction(const MachineInstr *MI) { } while ((++I != E) && I->isInsideBundle()); // Delay slot check. } +void VEAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(OpNum); + + switch (MO.getType()) { + case MachineOperand::MO_Register: + O << "%" << StringRef(getRegisterName(MO.getReg())).lower(); + break; + default: + llvm_unreachable("<unknown operand type>"); + } +} + +// PrintAsmOperand - Print out an operand for an inline asm expression. +bool VEAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &O) { + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) + return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O); + case 'r': + case 'v': + break; + } + } + + printOperand(MI, OpNo, O); + + return false; +} + // Force static initialization. -extern "C" void LLVMInitializeVEAsmPrinter() { +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeVEAsmPrinter() { RegisterAsmPrinter<VEAsmPrinter> X(getTheVETarget()); } diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VECallingConv.td b/contrib/llvm-project/llvm/lib/Target/VE/VECallingConv.td index 4f04dae884ab..93899c2cae3d 100644 --- a/contrib/llvm-project/llvm/lib/Target/VE/VECallingConv.td +++ b/contrib/llvm-project/llvm/lib/Target/VE/VECallingConv.td @@ -14,71 +14,133 @@ // Aurora VE //===----------------------------------------------------------------------===// def CC_VE_C_Stack: CallingConv<[ - // float --> need special handling like below. - // 0 4 - // +------+------+ - // | empty| float| - // +------+------+ - CCIfType<[f32], CCCustom<"allocateFloat">>, + // F128 are assigned to the stack in 16-byte aligned units + CCIfType<[f128], CCAssignToStackWithShadow<16, 16, [SX7]>>, // All of the rest are assigned to the stack in 8-byte aligned units. CCAssignToStack<0, 8> ]>; -def CC_VE : CallingConv<[ +///// C Calling Convention (VE ABI v2.1) ///// +// +// Reference: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-ABI_v2.1.pdf +// +def CC_VE_C : CallingConv<[ // All arguments get passed in generic registers if there is space. - // Promote i1/i8/i16 arguments to i32. - CCIfType<[i1, i8, i16], CCPromoteToType<i32>>, - - // bool, char, int, enum, long --> generic integer 32 bit registers - CCIfType<[i32], CCAssignToRegWithShadow< - [SW0, SW1, SW2, SW3, SW4, SW5, SW6, SW7], - [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>, + // Promote i1/i8/i16/i32 arguments to i64. + CCIfType<[i1, i8, i16, i32], CCPromoteToType<i64>>, - // float --> generic floating point 32 bit registers - CCIfType<[f32], CCAssignToRegWithShadow< - [SF0, SF1, SF2, SF3, SF4, SF5, SF6, SF7], - [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>, + // Convert float arguments to i64 with padding. + // 63 31 0 + // +------+------+ + // | float| 0 | + // +------+------+ + CCIfType<[f32], CCBitConvertToType<i64>>, - // long long/double --> generic 64 bit registers + // bool, char, int, enum, long, long long, float, double + // --> generic 64 bit registers CCIfType<[i64, f64], CCAssignToReg<[SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>, + // long double --> pair of generic 64 bit registers + // + // NOTE: If Q1 is allocated while SX1 is free, llvm tries to allocate SX1 for + // following operands, this masks SX1 to avoid such behavior. + CCIfType<[f128], + CCAssignToRegWithShadow<[Q0, Q1, Q2, Q3], + [SX0, SX1, SX3, SX5]>>, + // Alternatively, they are assigned to the stack in 8-byte aligned units. CCDelegateTo<CC_VE_C_Stack> ]>; +///// Standard vararg C Calling Convention (VE ABI v2.1) ///// // All arguments get passed in stack for varargs function or non-prototyped // function. def CC_VE2 : CallingConv<[ - // float --> need special handling like below. - // 0 4 + // Promote i1/i8/i16/i32 arguments to i64. + CCIfType<[i1, i8, i16, i32], CCPromoteToType<i64>>, + + // Convert float arguments to i64 with padding. + // 63 31 0 // +------+------+ - // | empty| float| + // | float| 0 | // +------+------+ - CCIfType<[f32], CCCustom<"allocateFloat">>, + CCIfType<[f32], CCBitConvertToType<i64>>, + + // F128 are assigned to the stack in 16-byte aligned units + CCIfType<[f128], CCAssignToStack<16, 16>>, CCAssignToStack<0, 8> ]>; -def RetCC_VE : CallingConv<[ - // Promote i1/i8/i16 arguments to i32. - CCIfType<[i1, i8, i16], CCPromoteToType<i32>>, +def RetCC_VE_C : CallingConv<[ + // Promote i1/i8/i16/i32 return values to i64. + CCIfType<[i1, i8, i16, i32], CCPromoteToType<i64>>, - // bool, char, int, enum, long --> generic integer 32 bit registers - CCIfType<[i32], CCAssignToRegWithShadow< - [SW0, SW1, SW2, SW3, SW4, SW5, SW6, SW7], - [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>, - - // float --> generic floating point 32 bit registers - CCIfType<[f32], CCAssignToRegWithShadow< - [SF0, SF1, SF2, SF3, SF4, SF5, SF6, SF7], - [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>, + // Convert float return values to i64 with padding. + // 63 31 0 + // +------+------+ + // | float| 0 | + // +------+------+ + CCIfType<[f32], CCBitConvertToType<i64>>, - // long long/double --> generic 64 bit registers + // bool, char, int, enum, long, long long, float, double + // --> generic 64 bit registers CCIfType<[i64, f64], CCAssignToReg<[SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>, + + // long double --> pair of generic 64 bit registers + CCIfType<[f128], + CCAssignToRegWithShadow<[Q0, Q1, Q2, Q3], + [SX0, SX1, SX3, SX5]>>, +]>; + +///// Custom fastcc ///// +// +// This passes vector params and return values in registers. Scalar values are +// handled conforming to the standard cc. +def CC_VE_Fast : CallingConv<[ + // vector --> generic vector registers + CCIfType<[v256i32, v256f32, v256i64, v256f64], + CCAssignToReg<[V0, V1, V2, V3, V4, V5, V6, V7]>>, + // TODO: make this conditional on packed mode + CCIfType<[v512i32, v512f32], + CCAssignToReg<[V0, V1, V2, V3, V4, V5, V6, V7]>>, + + // vector mask --> generic vector mask registers + CCIfType<[v256i1], + CCAssignToReg<[VM1, VM2, VM3, VM4, VM5, VM6, VM7]>>, + + // pair of vector mask --> generic vector mask registers + CCIfType<[v512i1], + CCAssignToRegWithShadow<[VMP1, VMP2, VMP3], + [VM1, VM3, VM5]>>, + + // Follow the standard C CC for scalars. + CCDelegateTo<CC_VE_C> +]>; + +def RetCC_VE_Fast : CallingConv<[ + // vector --> generic vector registers + CCIfType<[v256i32, v256f32, v256i64, v256f64], + CCAssignToReg<[V0, V1, V2, V3, V4, V5, V6, V7]>>, + // TODO: make this conditional on packed mode + CCIfType<[v512i32, v512f32], + CCAssignToReg<[V0, V1, V2, V3, V4, V5, V6, V7]>>, + + // vector mask --> generic vector mask registers + CCIfType<[v256i1], + CCAssignToReg<[VM1, VM2, VM3, VM4, VM5, VM6, VM7]>>, + + // pair of vector mask --> generic vector mask registers + CCIfType<[v512i1], + CCAssignToRegWithShadow<[VMP1, VMP2, VMP3], + [VM1, VM3, VM5]>>, + + // Follow the standard C CC for scalars. + CCDelegateTo<RetCC_VE_C> ]>; // Callee-saved registers @@ -86,4 +148,6 @@ def CSR : CalleeSavedRegs<(add (sequence "SX%u", 18, 33))>; def CSR_NoRegs : CalleeSavedRegs<(add)>; // PreserveAll (clobbers s62,s63) - used for ve_grow_stack -def CSR_preserve_all : CalleeSavedRegs<(add (sequence "SX%u", 0, 61))>; +def CSR_preserve_all : CalleeSavedRegs<(add (sequence "SX%u", 0, 61), + (sequence "V%u", 0, 63), + (sequence "VM%u", 1, 15))>; diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.cpp index 8b10e6466123..9e97d0eca833 100644 --- a/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.cpp @@ -8,6 +8,105 @@ // // This file contains the VE implementation of TargetFrameLowering class. // +// On VE, stack frames are structured as follows: +// +// The stack grows downward. +// +// All of the individual frame areas on the frame below are optional, i.e. it's +// possible to create a function so that the particular area isn't present +// in the frame. +// +// At function entry, the "frame" looks as follows: +// +// | | Higher address +// |----------------------------------------------| +// | Parameter area for this function | +// |----------------------------------------------| +// | Register save area (RSA) for this function | +// |----------------------------------------------| +// | Return address for this function | +// |----------------------------------------------| +// | Frame pointer for this function | +// |----------------------------------------------| <- sp +// | | Lower address +// +// VE doesn't use on demand stack allocation, so user code generated by LLVM +// needs to call VEOS to allocate stack frame. VE's ABI want to reduce the +// number of VEOS calls, so ABI requires to allocate not only RSA (in general +// CSR, callee saved register) area but also call frame at the prologue of +// caller function. +// +// After the prologue has run, the frame has the following general structure. +// Note that technically the last frame area (VLAs) doesn't get created until +// in the main function body, after the prologue is run. However, it's depicted +// here for completeness. +// +// | | Higher address +// |----------------------------------------------| +// | Parameter area for this function | +// |----------------------------------------------| +// | Register save area (RSA) for this function | +// |----------------------------------------------| +// | Return address for this function | +// |----------------------------------------------| +// | Frame pointer for this function | +// |----------------------------------------------| <- fp(=old sp) +// |.empty.space.to.make.part.below.aligned.in....| +// |.case.it.needs.more.than.the.standard.16-byte.| (size of this area is +// |.alignment....................................| unknown at compile time) +// |----------------------------------------------| +// | Local variables of fixed size including spill| +// | slots | +// |----------------------------------------------| <- bp(not defined by ABI, +// |.variable-sized.local.variables.(VLAs)........| LLVM chooses SX17) +// |..............................................| (size of this area is +// |..............................................| unknown at compile time) +// |----------------------------------------------| <- stack top (returned by +// | Parameter area for callee | alloca) +// |----------------------------------------------| +// | Register save area (RSA) for callee | +// |----------------------------------------------| +// | Return address for callee | +// |----------------------------------------------| +// | Frame pointer for callee | +// |----------------------------------------------| <- sp +// | | Lower address +// +// To access the data in a frame, at-compile time, a constant offset must be +// computable from one of the pointers (fp, bp, sp) to access it. The size +// of the areas with a dotted background cannot be computed at compile-time +// if they are present, making it required to have all three of fp, bp and +// sp to be set up to be able to access all contents in the frame areas, +// assuming all of the frame areas are non-empty. +// +// For most functions, some of the frame areas are empty. For those functions, +// it may not be necessary to set up fp or bp: +// * A base pointer is definitely needed when there are both VLAs and local +// variables with more-than-default alignment requirements. +// * A frame pointer is definitely needed when there are local variables with +// more-than-default alignment requirements. +// +// In addition, VE ABI defines RSA frame, return address, and frame pointer +// as follows: +// +// |----------------------------------------------| <- sp+176 +// | %s18...%s33 | +// |----------------------------------------------| <- sp+48 +// | Linkage area register (%s17) | +// |----------------------------------------------| <- sp+40 +// | Procedure linkage table register (%plt=%s16) | +// |----------------------------------------------| <- sp+32 +// | Global offset table register (%got=%s15) | +// |----------------------------------------------| <- sp+24 +// | Thread pointer register (%tp=%s14) | +// |----------------------------------------------| <- sp+16 +// | Return address | +// |----------------------------------------------| <- sp+8 +// | Frame pointer | +// |----------------------------------------------| <- sp+0 +// +// NOTE: This description is based on VE ABI and description in +// AArch64FrameLowering.cpp. Thanks a lot. //===----------------------------------------------------------------------===// #include "VEFrameLowering.h" @@ -38,48 +137,47 @@ void VEFrameLowering::emitPrologueInsns(MachineFunction &MF, MachineBasicBlock::iterator MBBI, uint64_t NumBytes, bool RequireFPUpdate) const { + const VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>(); + DebugLoc DL; + const VEInstrInfo &TII = *STI.getInstrInfo(); - DebugLoc dl; - const VEInstrInfo &TII = - *static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo()); // Insert following codes here as prologue // - // st %fp, 0(,%sp) - // st %lr, 8(,%sp) - // st %got, 24(,%sp) - // st %plt, 32(,%sp) - // st %s17, 40(,%sp) iff this function is using s17 as BP - // or %fp, 0, %sp - - BuildMI(MBB, MBBI, dl, TII.get(VE::STrii)) - .addReg(VE::SX11) - .addImm(0) - .addImm(0) - .addReg(VE::SX9); - BuildMI(MBB, MBBI, dl, TII.get(VE::STrii)) - .addReg(VE::SX11) - .addImm(0) - .addImm(8) - .addReg(VE::SX10); - BuildMI(MBB, MBBI, dl, TII.get(VE::STrii)) - .addReg(VE::SX11) - .addImm(0) - .addImm(24) - .addReg(VE::SX15); - BuildMI(MBB, MBBI, dl, TII.get(VE::STrii)) - .addReg(VE::SX11) - .addImm(0) - .addImm(32) - .addReg(VE::SX16); + // st %fp, 0(, %sp) iff !isLeafProc + // st %lr, 8(, %sp) iff !isLeafProc + // st %got, 24(, %sp) iff hasGOT + // st %plt, 32(, %sp) iff hasGOT + // st %s17, 40(, %sp) iff hasBP + if (!FuncInfo->isLeafProc()) { + BuildMI(MBB, MBBI, DL, TII.get(VE::STrii)) + .addReg(VE::SX11) + .addImm(0) + .addImm(0) + .addReg(VE::SX9); + BuildMI(MBB, MBBI, DL, TII.get(VE::STrii)) + .addReg(VE::SX11) + .addImm(0) + .addImm(8) + .addReg(VE::SX10); + } + if (hasGOT(MF)) { + BuildMI(MBB, MBBI, DL, TII.get(VE::STrii)) + .addReg(VE::SX11) + .addImm(0) + .addImm(24) + .addReg(VE::SX15); + BuildMI(MBB, MBBI, DL, TII.get(VE::STrii)) + .addReg(VE::SX11) + .addImm(0) + .addImm(32) + .addReg(VE::SX16); + } if (hasBP(MF)) - BuildMI(MBB, MBBI, dl, TII.get(VE::STrii)) + BuildMI(MBB, MBBI, DL, TII.get(VE::STrii)) .addReg(VE::SX11) .addImm(0) .addImm(40) .addReg(VE::SX17); - BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX9) - .addReg(VE::SX11) - .addImm(0); } void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF, @@ -87,43 +185,42 @@ void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF, MachineBasicBlock::iterator MBBI, uint64_t NumBytes, bool RequireFPUpdate) const { + const VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>(); + DebugLoc DL; + const VEInstrInfo &TII = *STI.getInstrInfo(); - DebugLoc dl; - const VEInstrInfo &TII = - *static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo()); // Insert following codes here as epilogue // - // or %sp, 0, %fp - // ld %s17, 40(,%sp) iff this function is using s17 as BP - // ld %got, 32(,%sp) - // ld %plt, 24(,%sp) - // ld %lr, 8(,%sp) - // ld %fp, 0(,%sp) - - BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX11) - .addReg(VE::SX9) - .addImm(0); + // ld %s17, 40(, %sp) iff hasBP + // ld %plt, 32(, %sp) iff hasGOT + // ld %got, 24(, %sp) iff hasGOT + // ld %lr, 8(, %sp) iff !isLeafProc + // ld %fp, 0(, %sp) iff !isLeafProc if (hasBP(MF)) - BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX17) + BuildMI(MBB, MBBI, DL, TII.get(VE::LDrii), VE::SX17) .addReg(VE::SX11) .addImm(0) .addImm(40); - BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX16) - .addReg(VE::SX11) - .addImm(0) - .addImm(32); - BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX15) - .addReg(VE::SX11) - .addImm(0) - .addImm(24); - BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX10) - .addReg(VE::SX11) - .addImm(0) - .addImm(8); - BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX9) - .addReg(VE::SX11) - .addImm(0) - .addImm(0); + if (hasGOT(MF)) { + BuildMI(MBB, MBBI, DL, TII.get(VE::LDrii), VE::SX16) + .addReg(VE::SX11) + .addImm(0) + .addImm(32); + BuildMI(MBB, MBBI, DL, TII.get(VE::LDrii), VE::SX15) + .addReg(VE::SX11) + .addImm(0) + .addImm(24); + } + if (!FuncInfo->isLeafProc()) { + BuildMI(MBB, MBBI, DL, TII.get(VE::LDrii), VE::SX10) + .addReg(VE::SX11) + .addImm(0) + .addImm(8); + BuildMI(MBB, MBBI, DL, TII.get(VE::LDrii), VE::SX9) + .addReg(VE::SX11) + .addImm(0) + .addImm(0); + } } void VEFrameLowering::emitSPAdjustment(MachineFunction &MF, @@ -131,37 +228,44 @@ void VEFrameLowering::emitSPAdjustment(MachineFunction &MF, MachineBasicBlock::iterator MBBI, int64_t NumBytes, MaybeAlign MaybeAlign) const { - DebugLoc dl; - const VEInstrInfo &TII = - *static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo()); + DebugLoc DL; + const VEInstrInfo &TII = *STI.getInstrInfo(); - if (NumBytes >= -64 && NumBytes < 63) { - BuildMI(MBB, MBBI, dl, TII.get(VE::ADDSLri), VE::SX11) + if (NumBytes == 0) { + // Nothing to do here. + } else if (isInt<7>(NumBytes)) { + // adds.l %s11, NumBytes@lo, %s11 + BuildMI(MBB, MBBI, DL, TII.get(VE::ADDSLri), VE::SX11) .addReg(VE::SX11) .addImm(NumBytes); - return; + } else if (isInt<32>(NumBytes)) { + // lea %s11, NumBytes@lo(, %s11) + BuildMI(MBB, MBBI, DL, TII.get(VE::LEArii), VE::SX11) + .addReg(VE::SX11) + .addImm(0) + .addImm(Lo_32(NumBytes)); + } else { + // Emit following codes. This clobbers SX13 which we always know is + // available here. + // lea %s13, NumBytes@lo + // and %s13, %s13, (32)0 + // lea.sl %sp, NumBytes@hi(%s13, %sp) + BuildMI(MBB, MBBI, DL, TII.get(VE::LEAzii), VE::SX13) + .addImm(0) + .addImm(0) + .addImm(Lo_32(NumBytes)); + BuildMI(MBB, MBBI, DL, TII.get(VE::ANDrm), VE::SX13) + .addReg(VE::SX13) + .addImm(M0(32)); + BuildMI(MBB, MBBI, DL, TII.get(VE::LEASLrri), VE::SX11) + .addReg(VE::SX11) + .addReg(VE::SX13) + .addImm(Hi_32(NumBytes)); } - // Emit following codes. This clobbers SX13 which we always know is - // available here. - // lea %s13,%lo(NumBytes) - // and %s13,%s13,(32)0 - // lea.sl %sp,%hi(NumBytes)(%sp, %s13) - BuildMI(MBB, MBBI, dl, TII.get(VE::LEAzii), VE::SX13) - .addImm(0) - .addImm(0) - .addImm(Lo_32(NumBytes)); - BuildMI(MBB, MBBI, dl, TII.get(VE::ANDrm), VE::SX13) - .addReg(VE::SX13) - .addImm(M0(32)); - BuildMI(MBB, MBBI, dl, TII.get(VE::LEASLrri), VE::SX11) - .addReg(VE::SX11) - .addReg(VE::SX13) - .addImm(Hi_32(NumBytes)); - if (MaybeAlign) { // and %sp, %sp, Align-1 - BuildMI(MBB, MBBI, dl, TII.get(VE::ANDrm), VE::SX11) + BuildMI(MBB, MBBI, DL, TII.get(VE::ANDrm), VE::SX11) .addReg(VE::SX11) .addImm(M1(64 - Log2_64(MaybeAlign.valueOrOne().value()))); } @@ -169,9 +273,8 @@ void VEFrameLowering::emitSPAdjustment(MachineFunction &MF, void VEFrameLowering::emitSPExtend(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { - DebugLoc dl; - const VEInstrInfo &TII = - *static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo()); + DebugLoc DL; + const VEInstrInfo &TII = *STI.getInstrInfo(); // Emit following codes. It is not possible to insert multiple // BasicBlocks in PEI pass, so we emit two pseudo instructions here. @@ -198,22 +301,23 @@ void VEFrameLowering::emitSPExtend(MachineFunction &MF, MachineBasicBlock &MBB, // EXTEND_STACK_GUARD pseudo will be simply eliminated by ExpandPostRA // pass. This pseudo is required to be at the next of EXTEND_STACK // pseudo in order to protect iteration loop in ExpandPostRA. - - BuildMI(MBB, MBBI, dl, TII.get(VE::EXTEND_STACK)); - BuildMI(MBB, MBBI, dl, TII.get(VE::EXTEND_STACK_GUARD)); + BuildMI(MBB, MBBI, DL, TII.get(VE::EXTEND_STACK)); + BuildMI(MBB, MBBI, DL, TII.get(VE::EXTEND_STACK_GUARD)); } void VEFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { + const VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>(); assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); MachineFrameInfo &MFI = MF.getFrameInfo(); const VEInstrInfo &TII = *STI.getInstrInfo(); const VERegisterInfo &RegInfo = *STI.getRegisterInfo(); MachineBasicBlock::iterator MBBI = MBB.begin(); + bool NeedsStackRealignment = RegInfo.needsStackRealignment(MF); + // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. - DebugLoc dl; - bool NeedsStackRealignment = RegInfo.needsStackRealignment(MF); + DebugLoc DL; // FIXME: unfortunately, returning false from canRealignStack // actually just causes needsStackRealignment to return false, @@ -226,12 +330,17 @@ void VEFrameLowering::emitPrologue(MachineFunction &MF, "stack re-alignment, but LLVM couldn't handle it " "(probably because it has a dynamic alloca)."); - // Get the number of bytes to allocate from the FrameInfo + // Get the number of bytes to allocate from the FrameInfo. + // This number of bytes is already aligned to ABI stack alignment. uint64_t NumBytes = MFI.getStackSize(); - // The VE ABI requires a reserved 176 bytes area at the top - // of stack as described in VESubtarget.cpp. So, we adjust it here. - NumBytes = STI.getAdjustedFrameSize(NumBytes); + // Adjust stack size if this function is not a leaf function since the + // VE ABI requires a reserved area at the top of stack as described in + // VEFrameLowering.cpp. + if (!FuncInfo->isLeafProc()) { + // NOTE: The number is aligned to ABI stack alignment after adjustment. + NumBytes = STI.getAdjustedFrameSize(NumBytes); + } // Finally, ensure that the size is sufficiently aligned for the // data on the stack. @@ -240,36 +349,34 @@ void VEFrameLowering::emitPrologue(MachineFunction &MF, // Update stack size with corrected value. MFI.setStackSize(NumBytes); - // Emit Prologue instructions to save %lr + // Emit Prologue instructions to save multiple registers. emitPrologueInsns(MF, MBB, MBBI, NumBytes, true); + // Emit instructions to save SP in FP as follows if this is not a leaf + // function: + // or %fp, 0, %sp + if (!FuncInfo->isLeafProc()) + BuildMI(MBB, MBBI, DL, TII.get(VE::ORri), VE::SX9) + .addReg(VE::SX11) + .addImm(0); + // Emit stack adjust instructions MaybeAlign RuntimeAlign = NeedsStackRealignment ? MaybeAlign(MFI.getMaxAlign()) : None; + assert((RuntimeAlign == None || !FuncInfo->isLeafProc()) && + "SP has to be saved in order to align variable sized stack object!"); emitSPAdjustment(MF, MBB, MBBI, -(int64_t)NumBytes, RuntimeAlign); if (hasBP(MF)) { // Copy SP to BP. - BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX17) + BuildMI(MBB, MBBI, DL, TII.get(VE::ORri), VE::SX17) .addReg(VE::SX11) .addImm(0); } // Emit stack extend instructions - emitSPExtend(MF, MBB, MBBI); - - Register RegFP = RegInfo.getDwarfRegNum(VE::SX9, true); - - // Emit ".cfi_def_cfa_register 30". - unsigned CFIIndex = - MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, RegFP)); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - - // Emit ".cfi_window_save". - CFIIndex = MF.addFrameInst(MCCFIInstruction::createWindowSave(nullptr)); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + if (NumBytes != 0) + emitSPExtend(MF, MBB, MBBI); } MachineBasicBlock::iterator VEFrameLowering::eliminateCallFramePseudoInstr( @@ -289,21 +396,33 @@ MachineBasicBlock::iterator VEFrameLowering::eliminateCallFramePseudoInstr( void VEFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { + const VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>(); + DebugLoc DL; MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); - DebugLoc dl = MBBI->getDebugLoc(); MachineFrameInfo &MFI = MF.getFrameInfo(); + const VEInstrInfo &TII = *STI.getInstrInfo(); uint64_t NumBytes = MFI.getStackSize(); - // Emit Epilogue instructions to restore %lr + // Emit instructions to retrieve original SP. + if (!FuncInfo->isLeafProc()) { + // If SP is saved in FP, retrieve it as follows: + // or %sp, 0, %fp iff !isLeafProc + BuildMI(MBB, MBBI, DL, TII.get(VE::ORri), VE::SX11) + .addReg(VE::SX9) + .addImm(0); + } else { + // Emit stack adjust instructions. + emitSPAdjustment(MF, MBB, MBBI, NumBytes, None); + } + + // Emit Epilogue instructions to restore multiple registers. emitEpilogueInsns(MF, MBB, MBBI, NumBytes, true); } // hasFP - Return true if the specified function should have a dedicated frame // pointer register. This is true if the function has variable sized allocas -// or if frame pointer elimination is disabled. For the case of VE, we don't -// implement FP eliminator yet, but we returns false from this function to -// not refer fp from generated code. +// or if frame pointer elimination is disabled. bool VEFrameLowering::hasFP(const MachineFunction &MF) const { const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); @@ -320,34 +439,41 @@ bool VEFrameLowering::hasBP(const MachineFunction &MF) const { return MFI.hasVarSizedObjects() && TRI->needsStackRealignment(MF); } -int VEFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, - Register &FrameReg) const { +bool VEFrameLowering::hasGOT(const MachineFunction &MF) const { + const VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>(); + + // If a global base register is assigned (!= 0), GOT is used. + return FuncInfo->getGlobalBaseReg() != 0; +} + +StackOffset VEFrameLowering::getFrameIndexReference(const MachineFunction &MF, + int FI, + Register &FrameReg) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); const VERegisterInfo *RegInfo = STI.getRegisterInfo(); - const VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>(); bool isFixed = MFI.isFixedObjectIndex(FI); int64_t FrameOffset = MF.getFrameInfo().getObjectOffset(FI); - if (FuncInfo->isLeafProc()) { - // If there's a leaf proc, all offsets need to be %sp-based, - // because we haven't caused %fp to actually point to our frame. + if (!hasFP(MF)) { + // If FP is not used, frame indexies are based on a %sp regiter. FrameReg = VE::SX11; // %sp - return FrameOffset + MF.getFrameInfo().getStackSize(); + return StackOffset::getFixed(FrameOffset + + MF.getFrameInfo().getStackSize()); } if (RegInfo->needsStackRealignment(MF) && !isFixed) { - // If there is dynamic stack realignment, all local object - // references need to be via %sp or %s17 (bp), to take account - // of the re-alignment. + // If data on stack require realignemnt, frame indexies are based on a %sp + // or %s17 (bp) register. If there is a variable sized object, bp is used. if (hasBP(MF)) FrameReg = VE::SX17; // %bp else FrameReg = VE::SX11; // %sp - return FrameOffset + MF.getFrameInfo().getStackSize(); + return StackOffset::getFixed(FrameOffset + + MF.getFrameInfo().getStackSize()); } - // Finally, default to using %fp. + // Use %fp by default. FrameReg = RegInfo->getFrameRegister(MF); - return FrameOffset; + return StackOffset::getFixed(FrameOffset); } bool VEFrameLowering::isLeafProc(MachineFunction &MF) const { @@ -367,8 +493,10 @@ void VEFrameLowering::determineCalleeSaves(MachineFunction &MF, RegScavenger *RS) const { TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); - if (isLeafProc(MF)) { - VEMachineFunctionInfo *MFI = MF.getInfo<VEMachineFunctionInfo>(); - MFI->setLeafProc(true); + // Functions having BP need to emit prologue and epilogue to allocate local + // buffer on the stack even if the function is a leaf function. + if (isLeafProc(MF) && !hasBP(MF)) { + VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>(); + FuncInfo->setLeafProc(true); } } diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.h index b548d663c504..99eb41189b25 100644 --- a/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.h +++ b/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.h @@ -15,6 +15,7 @@ #include "VE.h" #include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/Support/TypeSize.h" namespace llvm { @@ -38,8 +39,10 @@ public: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const override; - bool hasBP(const MachineFunction &MF) const; bool hasFP(const MachineFunction &MF) const override; + bool hasBP(const MachineFunction &MF) const; + bool hasGOT(const MachineFunction &MF) const; + // VE reserves argument space always for call sites in the function // immediately on entry of the current function. bool hasReservedCallFrame(const MachineFunction &MF) const override { @@ -48,8 +51,8 @@ public: void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS = nullptr) const override; - int getFrameIndexReference(const MachineFunction &MF, int FI, - Register &FrameReg) const override; + StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, + Register &FrameReg) const override; const SpillSlot * getCalleeSavedSpillSlots(unsigned &NumEntries) const override { diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEISelDAGToDAG.cpp index f3d067d55fdb..761baa79b4ab 100644 --- a/contrib/llvm-project/llvm/lib/Target/VE/VEISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/VE/VEISelDAGToDAG.cpp @@ -113,15 +113,6 @@ inline static uint64_t getFpImmVal(const ConstantFPSDNode *N) { return Val; } -/// convMImmVal - Convert a mimm integer immediate value to target immediate. -inline static uint64_t convMImmVal(uint64_t Val) { - if (Val == 0) - return 0; // (0)1 - if (Val & (1UL << 63)) - return countLeadingOnes(Val); // (m)1 - return countLeadingZeros(Val) | 0x40; // (m)0 -} - //===--------------------------------------------------------------------===// /// VEDAGToDAGISel - VE specific code to select VE machine /// instructions for SelectionDAG operations. @@ -148,6 +139,7 @@ public: bool selectADDRzri(SDValue N, SDValue &Base, SDValue &Index, SDValue &Offset); bool selectADDRzii(SDValue N, SDValue &Base, SDValue &Index, SDValue &Offset); bool selectADDRri(SDValue N, SDValue &Base, SDValue &Offset); + bool selectADDRzi(SDValue N, SDValue &Base, SDValue &Offset); StringRef getPassName() const override { return "VE DAG->DAG Pattern Instruction Selection"; @@ -183,6 +175,14 @@ bool VEDAGToDAGISel::selectADDRrri(SDValue Addr, SDValue &Base, SDValue &Index, return false; } if (matchADDRrr(Addr, LHS, RHS)) { + // If the input is a pair of a frame-index and a register, move a + // frame-index to LHS. This generates MI with following operands. + // %dest, #FI, %reg, offset + // In the eliminateFrameIndex, above MI is converted to the following. + // %dest, %fp, %reg, fi_offset + offset + if (dyn_cast<FrameIndexSDNode>(RHS)) + std::swap(LHS, RHS); + if (matchADDRri(RHS, Index, Offset)) { Base = LHS; return true; @@ -228,7 +228,7 @@ bool VEDAGToDAGISel::selectADDRzii(SDValue Addr, SDValue &Base, SDValue &Index, Addr.getOpcode() == ISD::TargetGlobalTLSAddress) return false; // direct calls. - if (ConstantSDNode *CN = cast<ConstantSDNode>(Addr)) { + if (auto *CN = dyn_cast<ConstantSDNode>(Addr)) { if (isInt<32>(CN->getSExtValue())) { Base = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); Index = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); @@ -250,6 +250,26 @@ bool VEDAGToDAGISel::selectADDRri(SDValue Addr, SDValue &Base, return true; } +bool VEDAGToDAGISel::selectADDRzi(SDValue Addr, SDValue &Base, + SDValue &Offset) { + if (dyn_cast<FrameIndexSDNode>(Addr)) + return false; + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress || + Addr.getOpcode() == ISD::TargetGlobalTLSAddress) + return false; // direct calls. + + if (auto *CN = dyn_cast<ConstantSDNode>(Addr)) { + if (isInt<32>(CN->getSExtValue())) { + Base = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); + Offset = + CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(Addr), MVT::i32); + return true; + } + } + return false; +} + bool VEDAGToDAGISel::matchADDRrr(SDValue Addr, SDValue &Base, SDValue &Index) { if (dyn_cast<FrameIndexSDNode>(Addr)) return false; diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp index ab720545dd83..d377f8e27cfd 100644 --- a/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp @@ -13,6 +13,7 @@ #include "VEISelLowering.h" #include "MCTargetDesc/VEMCExpr.h" +#include "VEInstrBuilder.h" #include "VEMachineFunctionInfo.h" #include "VERegisterInfo.h" #include "VETargetMachine.h" @@ -21,6 +22,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -38,39 +40,280 @@ using namespace llvm; // Calling Convention Implementation //===----------------------------------------------------------------------===// -static bool allocateFloat(unsigned ValNo, MVT ValVT, MVT LocVT, - CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State) { - switch (LocVT.SimpleTy) { - case MVT::f32: { - // Allocate stack like below - // 0 4 - // +------+------+ - // | empty| float| - // +------+------+ - // Use align=8 for dummy area to align the beginning of these 2 area. - State.AllocateStack(4, Align(8)); // for empty area - // Use align=4 for value to place it at just after the dummy area. - unsigned Offset = State.AllocateStack(4, Align(4)); // for float value area - State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); - return true; - } +#include "VEGenCallingConv.inc" + +CCAssignFn *getReturnCC(CallingConv::ID CallConv) { + switch (CallConv) { default: - return false; + return RetCC_VE_C; + case CallingConv::Fast: + return RetCC_VE_Fast; } } -#include "VEGenCallingConv.inc" +CCAssignFn *getParamCC(CallingConv::ID CallConv, bool IsVarArg) { + if (IsVarArg) + return CC_VE2; + switch (CallConv) { + default: + return CC_VE_C; + case CallingConv::Fast: + return CC_VE_Fast; + } +} bool VETargetLowering::CanLowerReturn( CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { - CCAssignFn *RetCC = RetCC_VE; + CCAssignFn *RetCC = getReturnCC(CallConv); SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); return CCInfo.CheckReturn(Outs, RetCC); } +static const MVT AllVectorVTs[] = {MVT::v256i32, MVT::v512i32, MVT::v256i64, + MVT::v256f32, MVT::v512f32, MVT::v256f64}; + +static const MVT AllPackedVTs[] = {MVT::v512i32, MVT::v512f32}; + +void VETargetLowering::initRegisterClasses() { + // Set up the register classes. + addRegisterClass(MVT::i32, &VE::I32RegClass); + addRegisterClass(MVT::i64, &VE::I64RegClass); + addRegisterClass(MVT::f32, &VE::F32RegClass); + addRegisterClass(MVT::f64, &VE::I64RegClass); + addRegisterClass(MVT::f128, &VE::F128RegClass); + + if (Subtarget->enableVPU()) { + for (MVT VecVT : AllVectorVTs) + addRegisterClass(VecVT, &VE::V64RegClass); + addRegisterClass(MVT::v256i1, &VE::VMRegClass); + addRegisterClass(MVT::v512i1, &VE::VM512RegClass); + } +} + +void VETargetLowering::initSPUActions() { + const auto &TM = getTargetMachine(); + /// Load & Store { + + // VE doesn't have i1 sign extending load. + for (MVT VT : MVT::integer_valuetypes()) { + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); + setTruncStoreAction(VT, MVT::i1, Expand); + } + + // VE doesn't have floating point extload/truncstore, so expand them. + for (MVT FPVT : MVT::fp_valuetypes()) { + for (MVT OtherFPVT : MVT::fp_valuetypes()) { + setLoadExtAction(ISD::EXTLOAD, FPVT, OtherFPVT, Expand); + setTruncStoreAction(FPVT, OtherFPVT, Expand); + } + } + + // VE doesn't have fp128 load/store, so expand them in custom lower. + setOperationAction(ISD::LOAD, MVT::f128, Custom); + setOperationAction(ISD::STORE, MVT::f128, Custom); + + /// } Load & Store + + // Custom legalize address nodes into LO/HI parts. + MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0)); + setOperationAction(ISD::BlockAddress, PtrVT, Custom); + setOperationAction(ISD::GlobalAddress, PtrVT, Custom); + setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom); + setOperationAction(ISD::ConstantPool, PtrVT, Custom); + setOperationAction(ISD::JumpTable, PtrVT, Custom); + + /// VAARG handling { + setOperationAction(ISD::VASTART, MVT::Other, Custom); + // VAARG needs to be lowered to access with 8 bytes alignment. + setOperationAction(ISD::VAARG, MVT::Other, Custom); + // Use the default implementation. + setOperationAction(ISD::VACOPY, MVT::Other, Expand); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + /// } VAARG handling + + /// Stack { + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); + + // Use the default implementation. + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + /// } Stack + + /// Branch { + + // VE doesn't have BRCOND + setOperationAction(ISD::BRCOND, MVT::Other, Expand); + + // BR_JT is not implemented yet. + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + + /// } Branch + + /// Int Ops { + for (MVT IntVT : {MVT::i32, MVT::i64}) { + // VE has no REM or DIVREM operations. + setOperationAction(ISD::UREM, IntVT, Expand); + setOperationAction(ISD::SREM, IntVT, Expand); + setOperationAction(ISD::SDIVREM, IntVT, Expand); + setOperationAction(ISD::UDIVREM, IntVT, Expand); + + // VE has no SHL_PARTS/SRA_PARTS/SRL_PARTS operations. + setOperationAction(ISD::SHL_PARTS, IntVT, Expand); + setOperationAction(ISD::SRA_PARTS, IntVT, Expand); + setOperationAction(ISD::SRL_PARTS, IntVT, Expand); + + // VE has no MULHU/S or U/SMUL_LOHI operations. + // TODO: Use MPD instruction to implement SMUL_LOHI for i32 type. + setOperationAction(ISD::MULHU, IntVT, Expand); + setOperationAction(ISD::MULHS, IntVT, Expand); + setOperationAction(ISD::UMUL_LOHI, IntVT, Expand); + setOperationAction(ISD::SMUL_LOHI, IntVT, Expand); + + // VE has no CTTZ, ROTL, ROTR operations. + setOperationAction(ISD::CTTZ, IntVT, Expand); + setOperationAction(ISD::ROTL, IntVT, Expand); + setOperationAction(ISD::ROTR, IntVT, Expand); + + // VE has 64 bits instruction which works as i64 BSWAP operation. This + // instruction works fine as i32 BSWAP operation with an additional + // parameter. Use isel patterns to lower BSWAP. + setOperationAction(ISD::BSWAP, IntVT, Legal); + + // VE has only 64 bits instructions which work as i64 BITREVERSE/CTLZ/CTPOP + // operations. Use isel patterns for i64, promote for i32. + LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal; + setOperationAction(ISD::BITREVERSE, IntVT, Act); + setOperationAction(ISD::CTLZ, IntVT, Act); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, IntVT, Act); + setOperationAction(ISD::CTPOP, IntVT, Act); + + // VE has only 64 bits instructions which work as i64 AND/OR/XOR operations. + // Use isel patterns for i64, promote for i32. + setOperationAction(ISD::AND, IntVT, Act); + setOperationAction(ISD::OR, IntVT, Act); + setOperationAction(ISD::XOR, IntVT, Act); + } + /// } Int Ops + + /// Conversion { + // VE doesn't have instructions for fp<->uint, so expand them by llvm + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64 + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64 + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); + + // fp16 not supported + for (MVT FPVT : MVT::fp_valuetypes()) { + setOperationAction(ISD::FP16_TO_FP, FPVT, Expand); + setOperationAction(ISD::FP_TO_FP16, FPVT, Expand); + } + /// } Conversion + + /// Floating-point Ops { + /// Note: Floating-point operations are fneg, fadd, fsub, fmul, fdiv, frem, + /// and fcmp. + + // VE doesn't have following floating point operations. + for (MVT VT : MVT::fp_valuetypes()) { + setOperationAction(ISD::FNEG, VT, Expand); + setOperationAction(ISD::FREM, VT, Expand); + } + + // VE doesn't have fdiv of f128. + setOperationAction(ISD::FDIV, MVT::f128, Expand); + + for (MVT FPVT : {MVT::f32, MVT::f64}) { + // f32 and f64 uses ConstantFP. f128 uses ConstantPool. + setOperationAction(ISD::ConstantFP, FPVT, Legal); + } + /// } Floating-point Ops + + /// Floating-point math functions { + + // VE doesn't have following floating point math functions. + for (MVT VT : MVT::fp_valuetypes()) { + setOperationAction(ISD::FABS, VT, Expand); + setOperationAction(ISD::FCOPYSIGN, VT, Expand); + setOperationAction(ISD::FCOS, VT, Expand); + setOperationAction(ISD::FSIN, VT, Expand); + setOperationAction(ISD::FSQRT, VT, Expand); + } + + /// } Floating-point math functions + + /// Atomic instructions { + + setMaxAtomicSizeInBitsSupported(64); + setMinCmpXchgSizeInBits(32); + setSupportsUnalignedAtomics(false); + + // Use custom inserter for ATOMIC_FENCE. + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + + // Other atomic instructions. + for (MVT VT : MVT::integer_valuetypes()) { + // Support i8/i16 atomic swap. + setOperationAction(ISD::ATOMIC_SWAP, VT, Custom); + + // FIXME: Support "atmam" instructions. + setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Expand); + setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Expand); + setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Expand); + setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Expand); + + // VE doesn't have follwing instructions. + setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand); + setOperationAction(ISD::ATOMIC_LOAD_CLR, VT, Expand); + setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Expand); + setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Expand); + } + + /// } Atomic instructions + + /// SJLJ instructions { + setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); + setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); + setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom); + if (TM.Options.ExceptionModel == ExceptionHandling::SjLj) + setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); + /// } SJLJ instructions + + // Intrinsic instructions + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); +} + +void VETargetLowering::initVPUActions() { + for (MVT LegalVecVT : AllVectorVTs) { + setOperationAction(ISD::BUILD_VECTOR, LegalVecVT, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, LegalVecVT, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalVecVT, Legal); + // Translate all vector instructions with legal element types to VVP_* + // nodes. + // TODO We will custom-widen into VVP_* nodes in the future. While we are + // buildling the infrastructure for this, we only do this for legal vector + // VTs. +#define HANDLE_VP_TO_VVP(VP_OPC, VVP_NAME) \ + setOperationAction(ISD::VP_OPC, LegalVecVT, Custom); +#define ADD_VVP_OP(VVP_NAME, ISD_NAME) \ + setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom); +#include "VVPNodes.def" + } + + for (MVT LegalPackedVT : AllPackedVTs) { + setOperationAction(ISD::INSERT_VECTOR_ELT, LegalPackedVT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalPackedVT, Custom); + } +} + SDValue VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, @@ -85,7 +328,7 @@ VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, *DAG.getContext()); // Analyze return values. - CCInfo.AnalyzeReturn(Outs, RetCC_VE); + CCInfo.AnalyzeReturn(Outs, getReturnCC(CallConv)); SDValue Flag; SmallVector<SDValue, 4> RetOps(1, Chain); @@ -94,6 +337,7 @@ VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); + assert(!VA.needsCustom() && "Unexpected custom lowering"); SDValue OutVal = OutVals[i]; // Integer return values must be sign or zero extended by the callee. @@ -109,12 +353,26 @@ VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, case CCValAssign::AExt: OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal); break; + case CCValAssign::BCvt: { + // Convert a float return value to i64 with padding. + // 63 31 0 + // +------+------+ + // | float| 0 | + // +------+------+ + assert(VA.getLocVT() == MVT::i64); + assert(VA.getValVT() == MVT::f32); + SDValue Undef = SDValue( + DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0); + SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32); + OutVal = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, + MVT::i64, Undef, OutVal, Sub_f32), + 0); + break; + } default: llvm_unreachable("Unknown loc info!"); } - assert(!VA.needsCustom() && "Unexpected custom lowering"); - Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag); // Guarantee that all emitted copies are stuck together with flags. @@ -138,7 +396,7 @@ SDValue VETargetLowering::LowerFormalArguments( MachineFunction &MF = DAG.getMachineFunction(); // Get the base offset of the incoming arguments stack space. - unsigned ArgsBaseOffset = 176; + unsigned ArgsBaseOffset = Subtarget->getRsaSize(); // Get the size of the preserved arguments area unsigned ArgsPreserved = 64; @@ -150,10 +408,11 @@ SDValue VETargetLowering::LowerFormalArguments( CCInfo.AllocateStack(ArgsPreserved, Align(8)); // We already allocated the preserved area, so the stack offset computed // by CC_VE would be correct now. - CCInfo.AnalyzeFormalArguments(Ins, CC_VE); + CCInfo.AnalyzeFormalArguments(Ins, getParamCC(CallConv, false)); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; + assert(!VA.needsCustom() && "Unexpected custom lowering"); if (VA.isRegLoc()) { // This argument is passed in a register. // All integer register arguments are promoted by the caller to i64. @@ -163,11 +422,6 @@ SDValue VETargetLowering::LowerFormalArguments( MF.addLiveIn(VA.getLocReg(), getRegClassFor(VA.getLocVT())); SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT()); - // Get the high bits for i32 struct elements. - if (VA.getValVT() == MVT::i32 && VA.needsCustom()) - Arg = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Arg, - DAG.getConstant(32, DL, MVT::i32)); - // The caller promoted the argument, so insert an Assert?ext SDNode so we // won't promote the value again in this function. switch (VA.getLocInfo()) { @@ -179,6 +433,20 @@ SDValue VETargetLowering::LowerFormalArguments( Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg, DAG.getValueType(VA.getValVT())); break; + case CCValAssign::BCvt: { + // Extract a float argument from i64 with padding. + // 63 31 0 + // +------+------+ + // | float| 0 | + // +------+------+ + assert(VA.getLocVT() == MVT::i64); + assert(VA.getValVT() == MVT::f32); + SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32); + Arg = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, + MVT::f32, Arg, Sub_f32), + 0); + break; + } default: break; } @@ -194,9 +462,23 @@ SDValue VETargetLowering::LowerFormalArguments( // The registers are exhausted. This argument was passed on the stack. assert(VA.isMemLoc()); // The CC_VE_Full/Half functions compute stack offsets relative to the - // beginning of the arguments area at %fp+176. + // beginning of the arguments area at %fp + the size of reserved area. unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset; unsigned ValSize = VA.getValVT().getSizeInBits() / 8; + + // Adjust offset for a float argument by adding 4 since the argument is + // stored in 8 bytes buffer with offset like below. LLVM generates + // 4 bytes load instruction, so need to adjust offset here. This + // adjustment is required in only LowerFormalArguments. In LowerCall, + // a float argument is converted to i64 first, and stored as 8 bytes + // data, which is required by ABI, so no need for adjustment. + // 0 4 + // +------+------+ + // | empty| float| + // +------+------+ + if (VA.getValVT() == MVT::f32) + Offset += 4; + int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true); InVals.push_back( DAG.getLoad(VA.getValVT(), DL, Chain, @@ -215,7 +497,7 @@ SDValue VETargetLowering::LowerFormalArguments( // TODO: need to calculate offset correctly once we support f128. unsigned ArgOffset = ArgLocs.size() * 8; VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>(); - // Skip the 176 bytes of register save area. + // Skip the reserved area at the top of stack. FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset); return Chain; @@ -258,7 +540,7 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CLI.IsTailCall = false; // Get the base offset of the outgoing arguments stack space. - unsigned ArgsBaseOffset = 176; + unsigned ArgsBaseOffset = Subtarget->getRsaSize(); // Get the size of the preserved arguments area unsigned ArgsPreserved = 8 * 8u; @@ -270,7 +552,7 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CCInfo.AllocateStack(ArgsPreserved, Align(8)); // We already allocated the preserved area, so the stack offset computed // by CC_VE would be correct now. - CCInfo.AnalyzeCallOperands(CLI.Outs, CC_VE); + CCInfo.AnalyzeCallOperands(CLI.Outs, getParamCC(CLI.CallConv, false)); // VE requires to use both register and stack for varargs or no-prototyped // functions. @@ -281,7 +563,7 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs2, *DAG.getContext()); if (UseBoth) - CCInfo2.AnalyzeCallOperands(CLI.Outs, CC_VE2); + CCInfo2.AnalyzeCallOperands(CLI.Outs, getParamCC(CLI.CallConv, true)); // Get the size of the outgoing arguments stack space requirement. unsigned ArgsSize = CCInfo.getNextStackOffset(); @@ -371,6 +653,22 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, case CCValAssign::AExt: Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg); break; + case CCValAssign::BCvt: { + // Convert a float argument to i64 with padding. + // 63 31 0 + // +------+------+ + // | float| 0 | + // +------+------+ + assert(VA.getLocVT() == MVT::i64); + assert(VA.getValVT() == MVT::f32); + SDValue Undef = SDValue( + DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0); + SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32); + Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, + MVT::i64, Undef, Arg, Sub_f32), + 0); + break; + } } if (VA.isRegLoc()) { @@ -384,8 +682,7 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Create a store off the stack pointer for this argument. SDValue StackPtr = DAG.getRegister(VE::SX11, PtrVT); - // The argument area starts at %fp+176 in the callee frame, - // %sp+176 in ours. + // The argument area starts at %fp/%sp + the size of reserved area. SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset() + ArgsBaseOffset, DL); PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff); @@ -450,11 +747,12 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CB) CLI.Ins[0].Flags.setInReg(); - RVInfo.AnalyzeCallResult(CLI.Ins, RetCC_VE); + RVInfo.AnalyzeCallResult(CLI.Ins, getReturnCC(CLI.CallConv)); // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; + assert(!VA.needsCustom() && "Unexpected custom lowering"); unsigned Reg = VA.getLocReg(); // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can @@ -472,11 +770,6 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, InGlue = Chain.getValue(2); } - // Get the high bits for i32 struct elements. - if (VA.getValVT() == MVT::i32 && VA.needsCustom()) - RV = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), RV, - DAG.getConstant(32, DL, MVT::i32)); - // The callee promoted the return value, so insert an Assert?ext SDNode so // we won't promote the value again in this function. switch (VA.getLocInfo()) { @@ -488,6 +781,20 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV, DAG.getValueType(VA.getValVT())); break; + case CCValAssign::BCvt: { + // Extract a float return value from i64 with padding. + // 63 31 0 + // +------+------+ + // | float| 0 | + // +------+------+ + assert(VA.getLocVT() == MVT::i64); + assert(VA.getValVT() == MVT::f32); + SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32); + RV = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, + MVT::f32, RV, Sub_f32), + 0); + break; + } default: break; } @@ -502,6 +809,15 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, return Chain; } +bool VETargetLowering::isOffsetFoldingLegal( + const GlobalAddressSDNode *GA) const { + // VE uses 64 bit addressing, so we need multiple instructions to generate + // an address. Folding address with offset increases the number of + // instructions, so that we disable it here. Offsets will be folded in + // the DAG combine later if it worth to do so. + return false; +} + /// isFPImmLegal - Returns true if the target can instruction select the /// specified FP immediate natively. If false, the legalizer will /// materialize the FP immediate as a load from a constant pool. @@ -531,30 +847,6 @@ bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT, return true; } -bool VETargetLowering::hasAndNot(SDValue Y) const { - EVT VT = Y.getValueType(); - - // VE doesn't have vector and not instruction. - if (VT.isVector()) - return false; - - // VE allows different immediate values for X and Y where ~X & Y. - // Only simm7 works for X, and only mimm works for Y on VE. However, this - // function is used to check whether an immediate value is OK for and-not - // instruction as both X and Y. Generating additional instruction to - // retrieve an immediate value is no good since the purpose of this - // function is to convert a series of 3 instructions to another series of - // 3 instructions with better parallelism. Therefore, we return false - // for all immediate values now. - // FIXME: Change hasAndNot function to have two operands to make it work - // correctly with Aurora VE. - if (isa<ConstantSDNode>(Y)) - return false; - - // It's ok for generic registers. - return true; -} - VETargetLowering::VETargetLowering(const TargetMachine &TM, const VESubtarget &STI) : TargetLowering(TM), Subtarget(&STI) { @@ -566,91 +858,15 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM, setBooleanContents(ZeroOrOneBooleanContent); setBooleanVectorContents(ZeroOrOneBooleanContent); - // Set up the register classes. - addRegisterClass(MVT::i32, &VE::I32RegClass); - addRegisterClass(MVT::i64, &VE::I64RegClass); - addRegisterClass(MVT::f32, &VE::F32RegClass); - addRegisterClass(MVT::f64, &VE::I64RegClass); - - /// Load & Store { - for (MVT FPVT : MVT::fp_valuetypes()) { - for (MVT OtherFPVT : MVT::fp_valuetypes()) { - // Turn FP extload into load/fpextend - setLoadExtAction(ISD::EXTLOAD, FPVT, OtherFPVT, Expand); - - // Turn FP truncstore into trunc + store. - setTruncStoreAction(FPVT, OtherFPVT, Expand); - } - } - - // VE doesn't have i1 sign extending load - for (MVT VT : MVT::integer_valuetypes()) { - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); - setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); - setTruncStoreAction(VT, MVT::i1, Expand); - } - /// } Load & Store - - // Custom legalize address nodes into LO/HI parts. - MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0)); - setOperationAction(ISD::BlockAddress, PtrVT, Custom); - setOperationAction(ISD::GlobalAddress, PtrVT, Custom); - setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom); - - /// VAARG handling { - setOperationAction(ISD::VASTART, MVT::Other, Custom); - // VAARG needs to be lowered to access with 8 bytes alignment. - setOperationAction(ISD::VAARG, MVT::Other, Custom); - // Use the default implementation. - setOperationAction(ISD::VACOPY, MVT::Other, Expand); - setOperationAction(ISD::VAEND, MVT::Other, Expand); - /// } VAARG handling - - /// Stack { - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); - /// } Stack - - /// Int Ops { - for (MVT IntVT : {MVT::i32, MVT::i64}) { - // VE has no REM or DIVREM operations. - setOperationAction(ISD::UREM, IntVT, Expand); - setOperationAction(ISD::SREM, IntVT, Expand); - setOperationAction(ISD::SDIVREM, IntVT, Expand); - setOperationAction(ISD::UDIVREM, IntVT, Expand); - - setOperationAction(ISD::CTTZ, IntVT, Expand); - setOperationAction(ISD::ROTL, IntVT, Expand); - setOperationAction(ISD::ROTR, IntVT, Expand); - - // Use isel patterns for i32 and i64 - setOperationAction(ISD::BSWAP, IntVT, Legal); - setOperationAction(ISD::CTLZ, IntVT, Legal); - setOperationAction(ISD::CTPOP, IntVT, Legal); - - // Use isel patterns for i64, Promote i32 - LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal; - setOperationAction(ISD::BITREVERSE, IntVT, Act); - } - /// } Int Ops - - /// Conversion { - // VE doesn't have instructions for fp<->uint, so expand them by llvm - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64 - setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64 - setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); - setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); - - // fp16 not supported - for (MVT FPVT : MVT::fp_valuetypes()) { - setOperationAction(ISD::FP16_TO_FP, FPVT, Expand); - setOperationAction(ISD::FP_TO_FP16, FPVT, Expand); - } - /// } Conversion + initRegisterClasses(); + initSPUActions(); + initVPUActions(); setStackPointerRegisterToSaveRestore(VE::SX11); + // We have target-specific dag combine patterns for the following nodes: + setTargetDAGCombine(ISD::TRUNCATE); + // Set function alignment to 16 bytes setMinFunctionAlignment(Align(16)); @@ -667,14 +883,24 @@ const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((VEISD::NodeType)Opcode) { case VEISD::FIRST_NUMBER: break; - TARGET_NODE_CASE(Lo) - TARGET_NODE_CASE(Hi) + TARGET_NODE_CASE(CALL) + TARGET_NODE_CASE(EH_SJLJ_LONGJMP) + TARGET_NODE_CASE(EH_SJLJ_SETJMP) + TARGET_NODE_CASE(EH_SJLJ_SETUP_DISPATCH) TARGET_NODE_CASE(GETFUNPLT) TARGET_NODE_CASE(GETSTACKTOP) TARGET_NODE_CASE(GETTLSADDR) - TARGET_NODE_CASE(CALL) - TARGET_NODE_CASE(RET_FLAG) TARGET_NODE_CASE(GLOBAL_BASE_REG) + TARGET_NODE_CASE(Hi) + TARGET_NODE_CASE(Lo) + TARGET_NODE_CASE(MEMBARRIER) + TARGET_NODE_CASE(RET_FLAG) + TARGET_NODE_CASE(TS1AM) + TARGET_NODE_CASE(VEC_BROADCAST) + + // Register the VVP_* SDNodes. +#define ADD_VVP_OP(VVP_NAME, ...) TARGET_NODE_CASE(VVP_NAME) +#include "VVPNodes.def" } #undef TARGET_NODE_CASE return nullptr; @@ -696,10 +922,17 @@ SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF, return DAG.getTargetBlockAddress(BA->getBlockAddress(), Op.getValueType(), 0, TF); + if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) + return DAG.getTargetConstantPool(CP->getConstVal(), CP->getValueType(0), + CP->getAlign(), CP->getOffset(), TF); + if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0), TF); + if (const JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op)) + return DAG.getTargetJumpTable(JT->getIndex(), JT->getValueType(0), TF); + llvm_unreachable("Unhandled address SDNode"); } @@ -722,32 +955,24 @@ SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const { // Handle PIC mode first. VE needs a got load for every variable! if (isPositionIndependent()) { - // GLOBAL_BASE_REG codegen'ed with call. Inform MFI that this - // function has calls. - MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); - MFI.setHasCalls(true); auto GlobalN = dyn_cast<GlobalAddressSDNode>(Op); - if (isa<ConstantPoolSDNode>(Op) || + if (isa<ConstantPoolSDNode>(Op) || isa<JumpTableSDNode>(Op) || (GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) { // Create following instructions for local linkage PIC code. - // lea %s35, %gotoff_lo(.LCPI0_0) - // and %s35, %s35, (32)0 - // lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35) - // adds.l %s35, %s15, %s35 ; %s15 is GOT - // FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15) + // lea %reg, label@gotoff_lo + // and %reg, %reg, (32)0 + // lea.sl %reg, label@gotoff_hi(%reg, %got) SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32, VEMCExpr::VK_VE_GOTOFF_LO32, DAG); SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT); return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo); } // Create following instructions for not local linkage PIC code. - // lea %s35, %got_lo(.LCPI0_0) - // and %s35, %s35, (32)0 - // lea.sl %s35, %got_hi(.LCPI0_0)(%s35) - // adds.l %s35, %s15, %s35 ; %s15 is GOT - // ld %s35, (,%s35) - // FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15) + // lea %reg, label@got_lo + // and %reg, %reg, (32)0 + // lea.sl %reg, label@got_hi(%reg) + // ld %reg, (%reg, %got) SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOT_HI32, VEMCExpr::VK_VE_GOT_LO32, DAG); SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT); @@ -770,20 +995,222 @@ SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const { /// Custom Lower { -SDValue VETargetLowering::LowerGlobalAddress(SDValue Op, +// The mappings for emitLeading/TrailingFence for VE is designed by following +// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html +Instruction *VETargetLowering::emitLeadingFence(IRBuilder<> &Builder, + Instruction *Inst, + AtomicOrdering Ord) const { + switch (Ord) { + case AtomicOrdering::NotAtomic: + case AtomicOrdering::Unordered: + llvm_unreachable("Invalid fence: unordered/non-atomic"); + case AtomicOrdering::Monotonic: + case AtomicOrdering::Acquire: + return nullptr; // Nothing to do + case AtomicOrdering::Release: + case AtomicOrdering::AcquireRelease: + return Builder.CreateFence(AtomicOrdering::Release); + case AtomicOrdering::SequentiallyConsistent: + if (!Inst->hasAtomicStore()) + return nullptr; // Nothing to do + return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent); + } + llvm_unreachable("Unknown fence ordering in emitLeadingFence"); +} + +Instruction *VETargetLowering::emitTrailingFence(IRBuilder<> &Builder, + Instruction *Inst, + AtomicOrdering Ord) const { + switch (Ord) { + case AtomicOrdering::NotAtomic: + case AtomicOrdering::Unordered: + llvm_unreachable("Invalid fence: unordered/not-atomic"); + case AtomicOrdering::Monotonic: + case AtomicOrdering::Release: + return nullptr; // Nothing to do + case AtomicOrdering::Acquire: + case AtomicOrdering::AcquireRelease: + return Builder.CreateFence(AtomicOrdering::Acquire); + case AtomicOrdering::SequentiallyConsistent: + return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent); + } + llvm_unreachable("Unknown fence ordering in emitTrailingFence"); +} + +SDValue VETargetLowering::lowerATOMIC_FENCE(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>( + cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()); + SyncScope::ID FenceSSID = static_cast<SyncScope::ID>( + cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue()); + + // VE uses Release consistency, so need a fence instruction if it is a + // cross-thread fence. + if (FenceSSID == SyncScope::System) { + switch (FenceOrdering) { + case AtomicOrdering::NotAtomic: + case AtomicOrdering::Unordered: + case AtomicOrdering::Monotonic: + // No need to generate fencem instruction here. + break; + case AtomicOrdering::Acquire: + // Generate "fencem 2" as acquire fence. + return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other, + DAG.getTargetConstant(2, DL, MVT::i32), + Op.getOperand(0)), + 0); + case AtomicOrdering::Release: + // Generate "fencem 1" as release fence. + return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other, + DAG.getTargetConstant(1, DL, MVT::i32), + Op.getOperand(0)), + 0); + case AtomicOrdering::AcquireRelease: + case AtomicOrdering::SequentiallyConsistent: + // Generate "fencem 3" as acq_rel and seq_cst fence. + // FIXME: "fencem 3" doesn't wait for for PCIe deveices accesses, + // so seq_cst may require more instruction for them. + return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other, + DAG.getTargetConstant(3, DL, MVT::i32), + Op.getOperand(0)), + 0); + } + } + + // MEMBARRIER is a compiler barrier; it codegens to a no-op. + return DAG.getNode(VEISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); +} + +TargetLowering::AtomicExpansionKind +VETargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { + // We have TS1AM implementation for i8/i16/i32/i64, so use it. + if (AI->getOperation() == AtomicRMWInst::Xchg) { + return AtomicExpansionKind::None; + } + // FIXME: Support "ATMAM" instruction for LOAD_ADD/SUB/AND/OR. + + // Otherwise, expand it using compare and exchange instruction to not call + // __sync_fetch_and_* functions. + return AtomicExpansionKind::CmpXChg; +} + +static SDValue prepareTS1AM(SDValue Op, SelectionDAG &DAG, SDValue &Flag, + SDValue &Bits) { + SDLoc DL(Op); + AtomicSDNode *N = cast<AtomicSDNode>(Op); + SDValue Ptr = N->getOperand(1); + SDValue Val = N->getOperand(2); + EVT PtrVT = Ptr.getValueType(); + bool Byte = N->getMemoryVT() == MVT::i8; + // Remainder = AND Ptr, 3 + // Flag = 1 << Remainder ; If Byte is true (1 byte swap flag) + // Flag = 3 << Remainder ; If Byte is false (2 bytes swap flag) + // Bits = Remainder << 3 + // NewVal = Val << Bits + SDValue Const3 = DAG.getConstant(3, DL, PtrVT); + SDValue Remainder = DAG.getNode(ISD::AND, DL, PtrVT, {Ptr, Const3}); + SDValue Mask = Byte ? DAG.getConstant(1, DL, MVT::i32) + : DAG.getConstant(3, DL, MVT::i32); + Flag = DAG.getNode(ISD::SHL, DL, MVT::i32, {Mask, Remainder}); + Bits = DAG.getNode(ISD::SHL, DL, PtrVT, {Remainder, Const3}); + return DAG.getNode(ISD::SHL, DL, Val.getValueType(), {Val, Bits}); +} + +static SDValue finalizeTS1AM(SDValue Op, SelectionDAG &DAG, SDValue Data, + SDValue Bits) { + SDLoc DL(Op); + EVT VT = Data.getValueType(); + bool Byte = cast<AtomicSDNode>(Op)->getMemoryVT() == MVT::i8; + // NewData = Data >> Bits + // Result = NewData & 0xff ; If Byte is true (1 byte) + // Result = NewData & 0xffff ; If Byte is false (2 bytes) + + SDValue NewData = DAG.getNode(ISD::SRL, DL, VT, Data, Bits); + return DAG.getNode(ISD::AND, DL, VT, + {NewData, DAG.getConstant(Byte ? 0xff : 0xffff, DL, VT)}); +} + +SDValue VETargetLowering::lowerATOMIC_SWAP(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + AtomicSDNode *N = cast<AtomicSDNode>(Op); + + if (N->getMemoryVT() == MVT::i8) { + // For i8, use "ts1am" + // Input: + // ATOMIC_SWAP Ptr, Val, Order + // + // Output: + // Remainder = AND Ptr, 3 + // Flag = 1 << Remainder ; 1 byte swap flag for TS1AM inst. + // Bits = Remainder << 3 + // NewVal = Val << Bits + // + // Aligned = AND Ptr, -4 + // Data = TS1AM Aligned, Flag, NewVal + // + // NewData = Data >> Bits + // Result = NewData & 0xff ; 1 byte result + SDValue Flag; + SDValue Bits; + SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits); + + SDValue Ptr = N->getOperand(1); + SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), + {Ptr, DAG.getConstant(-4, DL, MVT::i64)}); + SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(), + DAG.getVTList(Op.getNode()->getValueType(0), + Op.getNode()->getValueType(1)), + {N->getChain(), Aligned, Flag, NewVal}, + N->getMemOperand()); + + SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits); + SDValue Chain = TS1AM.getValue(1); + return DAG.getMergeValues({Result, Chain}, DL); + } + if (N->getMemoryVT() == MVT::i16) { + // For i16, use "ts1am" + SDValue Flag; + SDValue Bits; + SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits); + + SDValue Ptr = N->getOperand(1); + SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), + {Ptr, DAG.getConstant(-4, DL, MVT::i64)}); + SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(), + DAG.getVTList(Op.getNode()->getValueType(0), + Op.getNode()->getValueType(1)), + {N->getChain(), Aligned, Flag, NewVal}, + N->getMemOperand()); + + SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits); + SDValue Chain = TS1AM.getValue(1); + return DAG.getMergeValues({Result, Chain}, DL); + } + // Otherwise, let llvm legalize it. + return Op; +} + +SDValue VETargetLowering::lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { return makeAddress(Op, DAG); } -SDValue VETargetLowering::LowerBlockAddress(SDValue Op, +SDValue VETargetLowering::lowerBlockAddress(SDValue Op, + SelectionDAG &DAG) const { + return makeAddress(Op, DAG); +} + +SDValue VETargetLowering::lowerConstantPool(SDValue Op, SelectionDAG &DAG) const { return makeAddress(Op, DAG); } SDValue -VETargetLowering::LowerToTLSGeneralDynamicModel(SDValue Op, +VETargetLowering::lowerToTLSGeneralDynamicModel(SDValue Op, SelectionDAG &DAG) const { - SDLoc dl(Op); + SDLoc DL(Op); // Generate the following code: // t1: ch,glue = callseq_start t0, 0, 0 @@ -799,13 +1226,13 @@ VETargetLowering::LowerToTLSGeneralDynamicModel(SDValue Op, SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask( DAG.getMachineFunction(), CallingConv::C); - Chain = DAG.getCALLSEQ_START(Chain, 64, 0, dl); + Chain = DAG.getCALLSEQ_START(Chain, 64, 0, DL); SDValue Args[] = {Chain, Label, DAG.getRegisterMask(Mask), Chain.getValue(1)}; - Chain = DAG.getNode(VEISD::GETTLSADDR, dl, NodeTys, Args); - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(64, dl, true), - DAG.getIntPtrConstant(0, dl, true), - Chain.getValue(1), dl); - Chain = DAG.getCopyFromReg(Chain, dl, VE::SX0, PtrVT, Chain.getValue(1)); + Chain = DAG.getNode(VEISD::GETTLSADDR, DL, NodeTys, Args); + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(64, DL, true), + DAG.getIntPtrConstant(0, DL, true), + Chain.getValue(1), DL); + Chain = DAG.getCopyFromReg(Chain, DL, VE::SX0, PtrVT, Chain.getValue(1)); // GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls. MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); @@ -820,17 +1247,133 @@ VETargetLowering::LowerToTLSGeneralDynamicModel(SDValue Op, return Chain; } -SDValue VETargetLowering::LowerGlobalTLSAddress(SDValue Op, +SDValue VETargetLowering::lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { // The current implementation of nld (2.26) doesn't allow local exec model // code described in VE-tls_v1.1.pdf (*1) as its input. Instead, we always // generate the general dynamic model code sequence. // // *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf - return LowerToTLSGeneralDynamicModel(Op, DAG); + return lowerToTLSGeneralDynamicModel(Op, DAG); +} + +SDValue VETargetLowering::lowerJumpTable(SDValue Op, SelectionDAG &DAG) const { + return makeAddress(Op, DAG); +} + +// Lower a f128 load into two f64 loads. +static SDValue lowerLoadF128(SDValue Op, SelectionDAG &DAG) { + SDLoc DL(Op); + LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Op.getNode()); + assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type"); + unsigned Alignment = LdNode->getAlign().value(); + if (Alignment > 8) + Alignment = 8; + + SDValue Lo64 = + DAG.getLoad(MVT::f64, DL, LdNode->getChain(), LdNode->getBasePtr(), + LdNode->getPointerInfo(), Alignment, + LdNode->isVolatile() ? MachineMemOperand::MOVolatile + : MachineMemOperand::MONone); + EVT AddrVT = LdNode->getBasePtr().getValueType(); + SDValue HiPtr = DAG.getNode(ISD::ADD, DL, AddrVT, LdNode->getBasePtr(), + DAG.getConstant(8, DL, AddrVT)); + SDValue Hi64 = + DAG.getLoad(MVT::f64, DL, LdNode->getChain(), HiPtr, + LdNode->getPointerInfo(), Alignment, + LdNode->isVolatile() ? MachineMemOperand::MOVolatile + : MachineMemOperand::MONone); + + SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32); + SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32); + + // VE stores Hi64 to 8(addr) and Lo64 to 0(addr) + SDNode *InFP128 = + DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f128); + InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128, + SDValue(InFP128, 0), Hi64, SubRegEven); + InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128, + SDValue(InFP128, 0), Lo64, SubRegOdd); + SDValue OutChains[2] = {SDValue(Lo64.getNode(), 1), + SDValue(Hi64.getNode(), 1)}; + SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); + SDValue Ops[2] = {SDValue(InFP128, 0), OutChain}; + return DAG.getMergeValues(Ops, DL); } -SDValue VETargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { +SDValue VETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { + LoadSDNode *LdNode = cast<LoadSDNode>(Op.getNode()); + + SDValue BasePtr = LdNode->getBasePtr(); + if (isa<FrameIndexSDNode>(BasePtr.getNode())) { + // Do not expand store instruction with frame index here because of + // dependency problems. We expand it later in eliminateFrameIndex(). + return Op; + } + + EVT MemVT = LdNode->getMemoryVT(); + if (MemVT == MVT::f128) + return lowerLoadF128(Op, DAG); + + return Op; +} + +// Lower a f128 store into two f64 stores. +static SDValue lowerStoreF128(SDValue Op, SelectionDAG &DAG) { + SDLoc DL(Op); + StoreSDNode *StNode = dyn_cast<StoreSDNode>(Op.getNode()); + assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type"); + + SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32); + SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32); + + SDNode *Hi64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64, + StNode->getValue(), SubRegEven); + SDNode *Lo64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64, + StNode->getValue(), SubRegOdd); + + unsigned Alignment = StNode->getAlign().value(); + if (Alignment > 8) + Alignment = 8; + + // VE stores Hi64 to 8(addr) and Lo64 to 0(addr) + SDValue OutChains[2]; + OutChains[0] = + DAG.getStore(StNode->getChain(), DL, SDValue(Lo64, 0), + StNode->getBasePtr(), MachinePointerInfo(), Alignment, + StNode->isVolatile() ? MachineMemOperand::MOVolatile + : MachineMemOperand::MONone); + EVT AddrVT = StNode->getBasePtr().getValueType(); + SDValue HiPtr = DAG.getNode(ISD::ADD, DL, AddrVT, StNode->getBasePtr(), + DAG.getConstant(8, DL, AddrVT)); + OutChains[1] = + DAG.getStore(StNode->getChain(), DL, SDValue(Hi64, 0), HiPtr, + MachinePointerInfo(), Alignment, + StNode->isVolatile() ? MachineMemOperand::MOVolatile + : MachineMemOperand::MONone); + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); +} + +SDValue VETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { + StoreSDNode *StNode = cast<StoreSDNode>(Op.getNode()); + assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type"); + + SDValue BasePtr = StNode->getBasePtr(); + if (isa<FrameIndexSDNode>(BasePtr.getNode())) { + // Do not expand store instruction with frame index here because of + // dependency problems. We expand it later in eliminateFrameIndex(). + return Op; + } + + EVT MemVT = StNode->getMemoryVT(); + if (MemVT == MVT::f128) + return lowerStoreF128(Op, DAG); + + // Otherwise, ask llvm to expand it. + return SDValue(); +} + +SDValue VETargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>(); auto PtrVT = getPointerTy(DAG.getDataLayout()); @@ -849,7 +1392,7 @@ SDValue VETargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachinePointerInfo(SV)); } -SDValue VETargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { +SDValue VETargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); EVT VT = Node->getValueType(0); SDValue InChain = Node->getOperand(0); @@ -862,7 +1405,19 @@ SDValue VETargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = VAList.getValue(1); SDValue NextPtr; - if (VT == MVT::f32) { + if (VT == MVT::f128) { + // VE f128 values must be stored with 16 bytes alignment. We doesn't + // know the actual alignment of VAList, so we take alignment of it + // dyanmically. + int Align = 16; + VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, + DAG.getConstant(Align - 1, DL, PtrVT)); + VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList, + DAG.getConstant(-Align, DL, PtrVT)); + // Increment the pointer, VAList, by 16 to the next vaarg. + NextPtr = + DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(16, DL)); + } else if (VT == MVT::f32) { // float --> need special handling like below. // 0 4 // +------+------+ @@ -955,22 +1510,1325 @@ SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op, return DAG.getMergeValues(Ops, DL); } +SDValue VETargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + return DAG.getNode(VEISD::EH_SJLJ_LONGJMP, DL, MVT::Other, Op.getOperand(0), + Op.getOperand(1)); +} + +SDValue VETargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + return DAG.getNode(VEISD::EH_SJLJ_SETJMP, DL, + DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), + Op.getOperand(1)); +} + +SDValue VETargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + return DAG.getNode(VEISD::EH_SJLJ_SETUP_DISPATCH, DL, MVT::Other, + Op.getOperand(0)); +} + +static SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG, + const VETargetLowering &TLI, + const VESubtarget *Subtarget) { + SDLoc DL(Op); + MachineFunction &MF = DAG.getMachineFunction(); + EVT PtrVT = TLI.getPointerTy(MF.getDataLayout()); + + MachineFrameInfo &MFI = MF.getFrameInfo(); + MFI.setFrameAddressIsTaken(true); + + unsigned Depth = Op.getConstantOperandVal(0); + const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo(); + unsigned FrameReg = RegInfo->getFrameRegister(MF); + SDValue FrameAddr = + DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, PtrVT); + while (Depth--) + FrameAddr = DAG.getLoad(Op.getValueType(), DL, DAG.getEntryNode(), + FrameAddr, MachinePointerInfo()); + return FrameAddr; +} + +static SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG, + const VETargetLowering &TLI, + const VESubtarget *Subtarget) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + MFI.setReturnAddressIsTaken(true); + + if (TLI.verifyReturnAddressArgumentIsConstant(Op, DAG)) + return SDValue(); + + SDValue FrameAddr = lowerFRAMEADDR(Op, DAG, TLI, Subtarget); + + SDLoc DL(Op); + EVT VT = Op.getValueType(); + SDValue Offset = DAG.getConstant(8, DL, VT); + return DAG.getLoad(VT, DL, DAG.getEntryNode(), + DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), + MachinePointerInfo()); +} + +SDValue VETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + switch (IntNo) { + default: // Don't custom lower most intrinsics. + return SDValue(); + case Intrinsic::eh_sjlj_lsda: { + MachineFunction &MF = DAG.getMachineFunction(); + MVT VT = Op.getSimpleValueType(); + const VETargetMachine *TM = + static_cast<const VETargetMachine *>(&DAG.getTarget()); + + // Create GCC_except_tableXX string. The real symbol for that will be + // generated in EHStreamer::emitExceptionTable() later. So, we just + // borrow it's name here. + TM->getStrList()->push_back(std::string( + (Twine("GCC_except_table") + Twine(MF.getFunctionNumber())).str())); + SDValue Addr = + DAG.getTargetExternalSymbol(TM->getStrList()->back().c_str(), VT, 0); + if (isPositionIndependent()) { + Addr = makeHiLoPair(Addr, VEMCExpr::VK_VE_GOTOFF_HI32, + VEMCExpr::VK_VE_GOTOFF_LO32, DAG); + SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, VT); + return DAG.getNode(ISD::ADD, DL, VT, GlobalBase, Addr); + } + return makeHiLoPair(Addr, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG); + } + } +} + +static bool getUniqueInsertion(SDNode *N, unsigned &UniqueIdx) { + if (!isa<BuildVectorSDNode>(N)) + return false; + const auto *BVN = cast<BuildVectorSDNode>(N); + + // Find first non-undef insertion. + unsigned Idx; + for (Idx = 0; Idx < BVN->getNumOperands(); ++Idx) { + auto ElemV = BVN->getOperand(Idx); + if (!ElemV->isUndef()) + break; + } + // Catch the (hypothetical) all-undef case. + if (Idx == BVN->getNumOperands()) + return false; + // Remember insertion. + UniqueIdx = Idx++; + // Verify that all other insertions are undef. + for (; Idx < BVN->getNumOperands(); ++Idx) { + auto ElemV = BVN->getOperand(Idx); + if (!ElemV->isUndef()) + return false; + } + return true; +} + +static SDValue getSplatValue(SDNode *N) { + if (auto *BuildVec = dyn_cast<BuildVectorSDNode>(N)) { + return BuildVec->getSplatValue(); + } + return SDValue(); +} + +SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + unsigned NumEls = Op.getValueType().getVectorNumElements(); + MVT ElemVT = Op.getSimpleValueType().getVectorElementType(); + + // If there is just one element, expand to INSERT_VECTOR_ELT. + unsigned UniqueIdx; + if (getUniqueInsertion(Op.getNode(), UniqueIdx)) { + SDValue AccuV = DAG.getUNDEF(Op.getValueType()); + auto ElemV = Op->getOperand(UniqueIdx); + SDValue IdxV = DAG.getConstant(UniqueIdx, DL, MVT::i64); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(), AccuV, + ElemV, IdxV); + } + + // Else emit a broadcast. + if (SDValue ScalarV = getSplatValue(Op.getNode())) { + // lower to VEC_BROADCAST + MVT LegalResVT = MVT::getVectorVT(ElemVT, 256); + + auto AVL = DAG.getConstant(NumEls, DL, MVT::i32); + return DAG.getNode(VEISD::VEC_BROADCAST, DL, LegalResVT, Op.getOperand(0), + AVL); + } + + // Expand + return SDValue(); +} + SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { - switch (Op.getOpcode()) { + unsigned Opcode = Op.getOpcode(); + if (ISD::isVPOpcode(Opcode)) + return lowerToVVP(Op, DAG); + + switch (Opcode) { default: llvm_unreachable("Should not custom lower this!"); + case ISD::ATOMIC_FENCE: + return lowerATOMIC_FENCE(Op, DAG); + case ISD::ATOMIC_SWAP: + return lowerATOMIC_SWAP(Op, DAG); case ISD::BlockAddress: - return LowerBlockAddress(Op, DAG); + return lowerBlockAddress(Op, DAG); + case ISD::ConstantPool: + return lowerConstantPool(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return lowerDYNAMIC_STACKALLOC(Op, DAG); + case ISD::EH_SJLJ_LONGJMP: + return lowerEH_SJLJ_LONGJMP(Op, DAG); + case ISD::EH_SJLJ_SETJMP: + return lowerEH_SJLJ_SETJMP(Op, DAG); + case ISD::EH_SJLJ_SETUP_DISPATCH: + return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG); + case ISD::FRAMEADDR: + return lowerFRAMEADDR(Op, DAG, *this, Subtarget); case ISD::GlobalAddress: - return LowerGlobalAddress(Op, DAG); + return lowerGlobalAddress(Op, DAG); case ISD::GlobalTLSAddress: - return LowerGlobalTLSAddress(Op, DAG); + return lowerGlobalTLSAddress(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: + return lowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::JumpTable: + return lowerJumpTable(Op, DAG); + case ISD::LOAD: + return lowerLOAD(Op, DAG); + case ISD::RETURNADDR: + return lowerRETURNADDR(Op, DAG, *this, Subtarget); + case ISD::BUILD_VECTOR: + return lowerBUILD_VECTOR(Op, DAG); + case ISD::STORE: + return lowerSTORE(Op, DAG); case ISD::VASTART: - return LowerVASTART(Op, DAG); + return lowerVASTART(Op, DAG); case ISD::VAARG: - return LowerVAARG(Op, DAG); + return lowerVAARG(Op, DAG); + + case ISD::INSERT_VECTOR_ELT: + return lowerINSERT_VECTOR_ELT(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: + return lowerEXTRACT_VECTOR_ELT(Op, DAG); + +#define ADD_BINARY_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME: +#include "VVPNodes.def" + return lowerToVVP(Op, DAG); } } /// } Custom Lower + +void VETargetLowering::ReplaceNodeResults(SDNode *N, + SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) const { + switch (N->getOpcode()) { + case ISD::ATOMIC_SWAP: + // Let LLVM expand atomic swap instruction through LowerOperation. + return; + default: + LLVM_DEBUG(N->dumpr(&DAG)); + llvm_unreachable("Do not know how to custom type legalize this operation!"); + } +} + +/// JumpTable for VE. +/// +/// VE cannot generate relocatable symbol in jump table. VE cannot +/// generate expressions using symbols in both text segment and data +/// segment like below. +/// .4byte .LBB0_2-.LJTI0_0 +/// So, we generate offset from the top of function like below as +/// a custom label. +/// .4byte .LBB0_2-<function name> + +unsigned VETargetLowering::getJumpTableEncoding() const { + // Use custom label for PIC. + if (isPositionIndependent()) + return MachineJumpTableInfo::EK_Custom32; + + // Otherwise, use the normal jump table encoding heuristics. + return TargetLowering::getJumpTableEncoding(); +} + +const MCExpr *VETargetLowering::LowerCustomJumpTableEntry( + const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, + unsigned Uid, MCContext &Ctx) const { + assert(isPositionIndependent()); + + // Generate custom label for PIC like below. + // .4bytes .LBB0_2-<function name> + const auto *Value = MCSymbolRefExpr::create(MBB->getSymbol(), Ctx); + MCSymbol *Sym = Ctx.getOrCreateSymbol(MBB->getParent()->getName().data()); + const auto *Base = MCSymbolRefExpr::create(Sym, Ctx); + return MCBinaryExpr::createSub(Value, Base, Ctx); +} + +SDValue VETargetLowering::getPICJumpTableRelocBase(SDValue Table, + SelectionDAG &DAG) const { + assert(isPositionIndependent()); + SDLoc DL(Table); + Function *Function = &DAG.getMachineFunction().getFunction(); + assert(Function != nullptr); + auto PtrTy = getPointerTy(DAG.getDataLayout(), Function->getAddressSpace()); + + // In the jump table, we have following values in PIC mode. + // .4bytes .LBB0_2-<function name> + // We need to add this value and the address of this function to generate + // .LBB0_2 label correctly under PIC mode. So, we want to generate following + // instructions: + // lea %reg, fun@gotoff_lo + // and %reg, %reg, (32)0 + // lea.sl %reg, fun@gotoff_hi(%reg, %got) + // In order to do so, we need to genarate correctly marked DAG node using + // makeHiLoPair. + SDValue Op = DAG.getGlobalAddress(Function, DL, PtrTy); + SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32, + VEMCExpr::VK_VE_GOTOFF_LO32, DAG); + SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrTy); + return DAG.getNode(ISD::ADD, DL, PtrTy, GlobalBase, HiLo); +} + +Register VETargetLowering::prepareMBB(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + MachineBasicBlock *TargetBB, + const DebugLoc &DL) const { + MachineFunction *MF = MBB.getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + const VEInstrInfo *TII = Subtarget->getInstrInfo(); + + const TargetRegisterClass *RC = &VE::I64RegClass; + Register Tmp1 = MRI.createVirtualRegister(RC); + Register Tmp2 = MRI.createVirtualRegister(RC); + Register Result = MRI.createVirtualRegister(RC); + + if (isPositionIndependent()) { + // Create following instructions for local linkage PIC code. + // lea %Tmp1, TargetBB@gotoff_lo + // and %Tmp2, %Tmp1, (32)0 + // lea.sl %Result, TargetBB@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT + BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1) + .addImm(0) + .addImm(0) + .addMBB(TargetBB, VEMCExpr::VK_VE_GOTOFF_LO32); + BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2) + .addReg(Tmp1, getKillRegState(true)) + .addImm(M0(32)); + BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result) + .addReg(VE::SX15) + .addReg(Tmp2, getKillRegState(true)) + .addMBB(TargetBB, VEMCExpr::VK_VE_GOTOFF_HI32); + } else { + // Create following instructions for non-PIC code. + // lea %Tmp1, TargetBB@lo + // and %Tmp2, %Tmp1, (32)0 + // lea.sl %Result, TargetBB@hi(%Tmp2) + BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1) + .addImm(0) + .addImm(0) + .addMBB(TargetBB, VEMCExpr::VK_VE_LO32); + BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2) + .addReg(Tmp1, getKillRegState(true)) + .addImm(M0(32)); + BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result) + .addReg(Tmp2, getKillRegState(true)) + .addImm(0) + .addMBB(TargetBB, VEMCExpr::VK_VE_HI32); + } + return Result; +} + +Register VETargetLowering::prepareSymbol(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + StringRef Symbol, const DebugLoc &DL, + bool IsLocal = false, + bool IsCall = false) const { + MachineFunction *MF = MBB.getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + const VEInstrInfo *TII = Subtarget->getInstrInfo(); + + const TargetRegisterClass *RC = &VE::I64RegClass; + Register Result = MRI.createVirtualRegister(RC); + + if (isPositionIndependent()) { + if (IsCall && !IsLocal) { + // Create following instructions for non-local linkage PIC code function + // calls. These instructions uses IC and magic number -24, so we expand + // them in VEAsmPrinter.cpp from GETFUNPLT pseudo instruction. + // lea %Reg, Symbol@plt_lo(-24) + // and %Reg, %Reg, (32)0 + // sic %s16 + // lea.sl %Result, Symbol@plt_hi(%Reg, %s16) ; %s16 is PLT + BuildMI(MBB, I, DL, TII->get(VE::GETFUNPLT), Result) + .addExternalSymbol("abort"); + } else if (IsLocal) { + Register Tmp1 = MRI.createVirtualRegister(RC); + Register Tmp2 = MRI.createVirtualRegister(RC); + // Create following instructions for local linkage PIC code. + // lea %Tmp1, Symbol@gotoff_lo + // and %Tmp2, %Tmp1, (32)0 + // lea.sl %Result, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT + BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1) + .addImm(0) + .addImm(0) + .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOTOFF_LO32); + BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2) + .addReg(Tmp1, getKillRegState(true)) + .addImm(M0(32)); + BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result) + .addReg(VE::SX15) + .addReg(Tmp2, getKillRegState(true)) + .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOTOFF_HI32); + } else { + Register Tmp1 = MRI.createVirtualRegister(RC); + Register Tmp2 = MRI.createVirtualRegister(RC); + // Create following instructions for not local linkage PIC code. + // lea %Tmp1, Symbol@got_lo + // and %Tmp2, %Tmp1, (32)0 + // lea.sl %Tmp3, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT + // ld %Result, 0(%Tmp3) + Register Tmp3 = MRI.createVirtualRegister(RC); + BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1) + .addImm(0) + .addImm(0) + .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOT_LO32); + BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2) + .addReg(Tmp1, getKillRegState(true)) + .addImm(M0(32)); + BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Tmp3) + .addReg(VE::SX15) + .addReg(Tmp2, getKillRegState(true)) + .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOT_HI32); + BuildMI(MBB, I, DL, TII->get(VE::LDrii), Result) + .addReg(Tmp3, getKillRegState(true)) + .addImm(0) + .addImm(0); + } + } else { + Register Tmp1 = MRI.createVirtualRegister(RC); + Register Tmp2 = MRI.createVirtualRegister(RC); + // Create following instructions for non-PIC code. + // lea %Tmp1, Symbol@lo + // and %Tmp2, %Tmp1, (32)0 + // lea.sl %Result, Symbol@hi(%Tmp2) + BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1) + .addImm(0) + .addImm(0) + .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_LO32); + BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2) + .addReg(Tmp1, getKillRegState(true)) + .addImm(M0(32)); + BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result) + .addReg(Tmp2, getKillRegState(true)) + .addImm(0) + .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_HI32); + } + return Result; +} + +void VETargetLowering::setupEntryBlockForSjLj(MachineInstr &MI, + MachineBasicBlock *MBB, + MachineBasicBlock *DispatchBB, + int FI, int Offset) const { + DebugLoc DL = MI.getDebugLoc(); + const VEInstrInfo *TII = Subtarget->getInstrInfo(); + + Register LabelReg = + prepareMBB(*MBB, MachineBasicBlock::iterator(MI), DispatchBB, DL); + + // Store an address of DispatchBB to a given jmpbuf[1] where has next IC + // referenced by longjmp (throw) later. + MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii)); + addFrameReference(MIB, FI, Offset); // jmpbuf[1] + MIB.addReg(LabelReg, getKillRegState(true)); +} + +MachineBasicBlock * +VETargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, + MachineBasicBlock *MBB) const { + DebugLoc DL = MI.getDebugLoc(); + MachineFunction *MF = MBB->getParent(); + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + + const BasicBlock *BB = MBB->getBasicBlock(); + MachineFunction::iterator I = ++MBB->getIterator(); + + // Memory Reference. + SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(), + MI.memoperands_end()); + Register BufReg = MI.getOperand(1).getReg(); + + Register DstReg; + + DstReg = MI.getOperand(0).getReg(); + const TargetRegisterClass *RC = MRI.getRegClass(DstReg); + assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!"); + (void)TRI; + Register MainDestReg = MRI.createVirtualRegister(RC); + Register RestoreDestReg = MRI.createVirtualRegister(RC); + + // For `v = call @llvm.eh.sjlj.setjmp(buf)`, we generate following + // instructions. SP/FP must be saved in jmpbuf before `llvm.eh.sjlj.setjmp`. + // + // ThisMBB: + // buf[3] = %s17 iff %s17 is used as BP + // buf[1] = RestoreMBB as IC after longjmp + // # SjLjSetup RestoreMBB + // + // MainMBB: + // v_main = 0 + // + // SinkMBB: + // v = phi(v_main, MainMBB, v_restore, RestoreMBB) + // ... + // + // RestoreMBB: + // %s17 = buf[3] = iff %s17 is used as BP + // v_restore = 1 + // goto SinkMBB + + MachineBasicBlock *ThisMBB = MBB; + MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB); + MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB); + MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB); + MF->insert(I, MainMBB); + MF->insert(I, SinkMBB); + MF->push_back(RestoreMBB); + RestoreMBB->setHasAddressTaken(); + + // Transfer the remainder of BB and its successor edges to SinkMBB. + SinkMBB->splice(SinkMBB->begin(), MBB, + std::next(MachineBasicBlock::iterator(MI)), MBB->end()); + SinkMBB->transferSuccessorsAndUpdatePHIs(MBB); + + // ThisMBB: + Register LabelReg = + prepareMBB(*MBB, MachineBasicBlock::iterator(MI), RestoreMBB, DL); + + // Store BP in buf[3] iff this function is using BP. + const VEFrameLowering *TFI = Subtarget->getFrameLowering(); + if (TFI->hasBP(*MF)) { + MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii)); + MIB.addReg(BufReg); + MIB.addImm(0); + MIB.addImm(24); + MIB.addReg(VE::SX17); + MIB.setMemRefs(MMOs); + } + + // Store IP in buf[1]. + MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii)); + MIB.add(MI.getOperand(1)); // we can preserve the kill flags here. + MIB.addImm(0); + MIB.addImm(8); + MIB.addReg(LabelReg, getKillRegState(true)); + MIB.setMemRefs(MMOs); + + // SP/FP are already stored in jmpbuf before `llvm.eh.sjlj.setjmp`. + + // Insert setup. + MIB = + BuildMI(*ThisMBB, MI, DL, TII->get(VE::EH_SjLj_Setup)).addMBB(RestoreMBB); + + const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo(); + MIB.addRegMask(RegInfo->getNoPreservedMask()); + ThisMBB->addSuccessor(MainMBB); + ThisMBB->addSuccessor(RestoreMBB); + + // MainMBB: + BuildMI(MainMBB, DL, TII->get(VE::LEAzii), MainDestReg) + .addImm(0) + .addImm(0) + .addImm(0); + MainMBB->addSuccessor(SinkMBB); + + // SinkMBB: + BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(VE::PHI), DstReg) + .addReg(MainDestReg) + .addMBB(MainMBB) + .addReg(RestoreDestReg) + .addMBB(RestoreMBB); + + // RestoreMBB: + // Restore BP from buf[3] iff this function is using BP. The address of + // buf is in SX10. + // FIXME: Better to not use SX10 here + if (TFI->hasBP(*MF)) { + MachineInstrBuilder MIB = + BuildMI(RestoreMBB, DL, TII->get(VE::LDrii), VE::SX17); + MIB.addReg(VE::SX10); + MIB.addImm(0); + MIB.addImm(24); + MIB.setMemRefs(MMOs); + } + BuildMI(RestoreMBB, DL, TII->get(VE::LEAzii), RestoreDestReg) + .addImm(0) + .addImm(0) + .addImm(1); + BuildMI(RestoreMBB, DL, TII->get(VE::BRCFLa_t)).addMBB(SinkMBB); + RestoreMBB->addSuccessor(SinkMBB); + + MI.eraseFromParent(); + return SinkMBB; +} + +MachineBasicBlock * +VETargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, + MachineBasicBlock *MBB) const { + DebugLoc DL = MI.getDebugLoc(); + MachineFunction *MF = MBB->getParent(); + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + + // Memory Reference. + SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(), + MI.memoperands_end()); + Register BufReg = MI.getOperand(0).getReg(); + + Register Tmp = MRI.createVirtualRegister(&VE::I64RegClass); + // Since FP is only updated here but NOT referenced, it's treated as GPR. + Register FP = VE::SX9; + Register SP = VE::SX11; + + MachineInstrBuilder MIB; + + MachineBasicBlock *ThisMBB = MBB; + + // For `call @llvm.eh.sjlj.longjmp(buf)`, we generate following instructions. + // + // ThisMBB: + // %fp = load buf[0] + // %jmp = load buf[1] + // %s10 = buf ; Store an address of buf to SX10 for RestoreMBB + // %sp = load buf[2] ; generated by llvm.eh.sjlj.setjmp. + // jmp %jmp + + // Reload FP. + MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), FP); + MIB.addReg(BufReg); + MIB.addImm(0); + MIB.addImm(0); + MIB.setMemRefs(MMOs); + + // Reload IP. + MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), Tmp); + MIB.addReg(BufReg); + MIB.addImm(0); + MIB.addImm(8); + MIB.setMemRefs(MMOs); + + // Copy BufReg to SX10 for later use in setjmp. + // FIXME: Better to not use SX10 here + BuildMI(*ThisMBB, MI, DL, TII->get(VE::ORri), VE::SX10) + .addReg(BufReg) + .addImm(0); + + // Reload SP. + MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), SP); + MIB.add(MI.getOperand(0)); // we can preserve the kill flags here. + MIB.addImm(0); + MIB.addImm(16); + MIB.setMemRefs(MMOs); + + // Jump. + BuildMI(*ThisMBB, MI, DL, TII->get(VE::BCFLari_t)) + .addReg(Tmp, getKillRegState(true)) + .addImm(0); + + MI.eraseFromParent(); + return ThisMBB; +} + +MachineBasicBlock * +VETargetLowering::emitSjLjDispatchBlock(MachineInstr &MI, + MachineBasicBlock *BB) const { + DebugLoc DL = MI.getDebugLoc(); + MachineFunction *MF = BB->getParent(); + MachineFrameInfo &MFI = MF->getFrameInfo(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + const VEInstrInfo *TII = Subtarget->getInstrInfo(); + int FI = MFI.getFunctionContextIndex(); + + // Get a mapping of the call site numbers to all of the landing pads they're + // associated with. + DenseMap<unsigned, SmallVector<MachineBasicBlock *, 2>> CallSiteNumToLPad; + unsigned MaxCSNum = 0; + for (auto &MBB : *MF) { + if (!MBB.isEHPad()) + continue; + + MCSymbol *Sym = nullptr; + for (const auto &MI : MBB) { + if (MI.isDebugInstr()) + continue; + + assert(MI.isEHLabel() && "expected EH_LABEL"); + Sym = MI.getOperand(0).getMCSymbol(); + break; + } + + if (!MF->hasCallSiteLandingPad(Sym)) + continue; + + for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) { + CallSiteNumToLPad[CSI].push_back(&MBB); + MaxCSNum = std::max(MaxCSNum, CSI); + } + } + + // Get an ordered list of the machine basic blocks for the jump table. + std::vector<MachineBasicBlock *> LPadList; + SmallPtrSet<MachineBasicBlock *, 32> InvokeBBs; + LPadList.reserve(CallSiteNumToLPad.size()); + + for (unsigned CSI = 1; CSI <= MaxCSNum; ++CSI) { + for (auto &LP : CallSiteNumToLPad[CSI]) { + LPadList.push_back(LP); + InvokeBBs.insert(LP->pred_begin(), LP->pred_end()); + } + } + + assert(!LPadList.empty() && + "No landing pad destinations for the dispatch jump table!"); + + // The %fn_context is allocated like below (from --print-after=sjljehprepare): + // %fn_context = alloca { i8*, i64, [4 x i64], i8*, i8*, [5 x i8*] } + // + // This `[5 x i8*]` is jmpbuf, so jmpbuf[1] is FI+72. + // First `i64` is callsite, so callsite is FI+8. + static const int OffsetIC = 72; + static const int OffsetCS = 8; + + // Create the MBBs for the dispatch code like following: + // + // ThisMBB: + // Prepare DispatchBB address and store it to buf[1]. + // ... + // + // DispatchBB: + // %s15 = GETGOT iff isPositionIndependent + // %callsite = load callsite + // brgt.l.t #size of callsites, %callsite, DispContBB + // + // TrapBB: + // Call abort. + // + // DispContBB: + // %breg = address of jump table + // %pc = load and calculate next pc from %breg and %callsite + // jmp %pc + + // Shove the dispatch's address into the return slot in the function context. + MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock(); + DispatchBB->setIsEHPad(true); + + // Trap BB will causes trap like `assert(0)`. + MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); + DispatchBB->addSuccessor(TrapBB); + + MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock(); + DispatchBB->addSuccessor(DispContBB); + + // Insert MBBs. + MF->push_back(DispatchBB); + MF->push_back(DispContBB); + MF->push_back(TrapBB); + + // Insert code to call abort in the TrapBB. + Register Abort = prepareSymbol(*TrapBB, TrapBB->end(), "abort", DL, + /* Local */ false, /* Call */ true); + BuildMI(TrapBB, DL, TII->get(VE::BSICrii), VE::SX10) + .addReg(Abort, getKillRegState(true)) + .addImm(0) + .addImm(0); + + // Insert code into the entry block that creates and registers the function + // context. + setupEntryBlockForSjLj(MI, BB, DispatchBB, FI, OffsetIC); + + // Create the jump table and associated information + unsigned JTE = getJumpTableEncoding(); + MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(JTE); + unsigned MJTI = JTI->createJumpTableIndex(LPadList); + + const VERegisterInfo &RI = TII->getRegisterInfo(); + // Add a register mask with no preserved registers. This results in all + // registers being marked as clobbered. + BuildMI(DispatchBB, DL, TII->get(VE::NOP)) + .addRegMask(RI.getNoPreservedMask()); + + if (isPositionIndependent()) { + // Force to generate GETGOT, since current implementation doesn't store GOT + // register. + BuildMI(DispatchBB, DL, TII->get(VE::GETGOT), VE::SX15); + } + + // IReg is used as an index in a memory operand and therefore can't be SP + const TargetRegisterClass *RC = &VE::I64RegClass; + Register IReg = MRI.createVirtualRegister(RC); + addFrameReference(BuildMI(DispatchBB, DL, TII->get(VE::LDLZXrii), IReg), FI, + OffsetCS); + if (LPadList.size() < 64) { + BuildMI(DispatchBB, DL, TII->get(VE::BRCFLir_t)) + .addImm(VECC::CC_ILE) + .addImm(LPadList.size()) + .addReg(IReg) + .addMBB(TrapBB); + } else { + assert(LPadList.size() <= 0x7FFFFFFF && "Too large Landing Pad!"); + Register TmpReg = MRI.createVirtualRegister(RC); + BuildMI(DispatchBB, DL, TII->get(VE::LEAzii), TmpReg) + .addImm(0) + .addImm(0) + .addImm(LPadList.size()); + BuildMI(DispatchBB, DL, TII->get(VE::BRCFLrr_t)) + .addImm(VECC::CC_ILE) + .addReg(TmpReg, getKillRegState(true)) + .addReg(IReg) + .addMBB(TrapBB); + } + + Register BReg = MRI.createVirtualRegister(RC); + Register Tmp1 = MRI.createVirtualRegister(RC); + Register Tmp2 = MRI.createVirtualRegister(RC); + + if (isPositionIndependent()) { + // Create following instructions for local linkage PIC code. + // lea %Tmp1, .LJTI0_0@gotoff_lo + // and %Tmp2, %Tmp1, (32)0 + // lea.sl %BReg, .LJTI0_0@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT + BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1) + .addImm(0) + .addImm(0) + .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_LO32); + BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2) + .addReg(Tmp1, getKillRegState(true)) + .addImm(M0(32)); + BuildMI(DispContBB, DL, TII->get(VE::LEASLrri), BReg) + .addReg(VE::SX15) + .addReg(Tmp2, getKillRegState(true)) + .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_HI32); + } else { + // Create following instructions for non-PIC code. + // lea %Tmp1, .LJTI0_0@lo + // and %Tmp2, %Tmp1, (32)0 + // lea.sl %BReg, .LJTI0_0@hi(%Tmp2) + BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1) + .addImm(0) + .addImm(0) + .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_LO32); + BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2) + .addReg(Tmp1, getKillRegState(true)) + .addImm(M0(32)); + BuildMI(DispContBB, DL, TII->get(VE::LEASLrii), BReg) + .addReg(Tmp2, getKillRegState(true)) + .addImm(0) + .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_HI32); + } + + switch (JTE) { + case MachineJumpTableInfo::EK_BlockAddress: { + // Generate simple block address code for no-PIC model. + // sll %Tmp1, %IReg, 3 + // lds %TReg, 0(%Tmp1, %BReg) + // bcfla %TReg + + Register TReg = MRI.createVirtualRegister(RC); + Register Tmp1 = MRI.createVirtualRegister(RC); + + BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1) + .addReg(IReg, getKillRegState(true)) + .addImm(3); + BuildMI(DispContBB, DL, TII->get(VE::LDrri), TReg) + .addReg(BReg, getKillRegState(true)) + .addReg(Tmp1, getKillRegState(true)) + .addImm(0); + BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t)) + .addReg(TReg, getKillRegState(true)) + .addImm(0); + break; + } + case MachineJumpTableInfo::EK_Custom32: { + // Generate block address code using differences from the function pointer + // for PIC model. + // sll %Tmp1, %IReg, 2 + // ldl.zx %OReg, 0(%Tmp1, %BReg) + // Prepare function address in BReg2. + // adds.l %TReg, %BReg2, %OReg + // bcfla %TReg + + assert(isPositionIndependent()); + Register OReg = MRI.createVirtualRegister(RC); + Register TReg = MRI.createVirtualRegister(RC); + Register Tmp1 = MRI.createVirtualRegister(RC); + + BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1) + .addReg(IReg, getKillRegState(true)) + .addImm(2); + BuildMI(DispContBB, DL, TII->get(VE::LDLZXrri), OReg) + .addReg(BReg, getKillRegState(true)) + .addReg(Tmp1, getKillRegState(true)) + .addImm(0); + Register BReg2 = + prepareSymbol(*DispContBB, DispContBB->end(), + DispContBB->getParent()->getName(), DL, /* Local */ true); + BuildMI(DispContBB, DL, TII->get(VE::ADDSLrr), TReg) + .addReg(OReg, getKillRegState(true)) + .addReg(BReg2, getKillRegState(true)); + BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t)) + .addReg(TReg, getKillRegState(true)) + .addImm(0); + break; + } + default: + llvm_unreachable("Unexpected jump table encoding"); + } + + // Add the jump table entries as successors to the MBB. + SmallPtrSet<MachineBasicBlock *, 8> SeenMBBs; + for (auto &LP : LPadList) + if (SeenMBBs.insert(LP).second) + DispContBB->addSuccessor(LP); + + // N.B. the order the invoke BBs are processed in doesn't matter here. + SmallVector<MachineBasicBlock *, 64> MBBLPads; + const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs(); + for (MachineBasicBlock *MBB : InvokeBBs) { + // Remove the landing pad successor from the invoke block and replace it + // with the new dispatch block. + // Keep a copy of Successors since it's modified inside the loop. + SmallVector<MachineBasicBlock *, 8> Successors(MBB->succ_rbegin(), + MBB->succ_rend()); + // FIXME: Avoid quadratic complexity. + for (auto MBBS : Successors) { + if (MBBS->isEHPad()) { + MBB->removeSuccessor(MBBS); + MBBLPads.push_back(MBBS); + } + } + + MBB->addSuccessor(DispatchBB); + + // Find the invoke call and mark all of the callee-saved registers as + // 'implicit defined' so that they're spilled. This prevents code from + // moving instructions to before the EH block, where they will never be + // executed. + for (auto &II : reverse(*MBB)) { + if (!II.isCall()) + continue; + + DenseMap<Register, bool> DefRegs; + for (auto &MOp : II.operands()) + if (MOp.isReg()) + DefRegs[MOp.getReg()] = true; + + MachineInstrBuilder MIB(*MF, &II); + for (unsigned RI = 0; SavedRegs[RI]; ++RI) { + Register Reg = SavedRegs[RI]; + if (!DefRegs[Reg]) + MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead); + } + + break; + } + } + + // Mark all former landing pads as non-landing pads. The dispatch is the only + // landing pad now. + for (auto &LP : MBBLPads) + LP->setIsEHPad(false); + + // The instruction is gone now. + MI.eraseFromParent(); + return BB; +} + +MachineBasicBlock * +VETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, + MachineBasicBlock *BB) const { + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unknown Custom Instruction!"); + case VE::EH_SjLj_LongJmp: + return emitEHSjLjLongJmp(MI, BB); + case VE::EH_SjLj_SetJmp: + return emitEHSjLjSetJmp(MI, BB); + case VE::EH_SjLj_Setup_Dispatch: + return emitSjLjDispatchBlock(MI, BB); + } +} + +static bool isI32Insn(const SDNode *User, const SDNode *N) { + switch (User->getOpcode()) { + default: + return false; + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + case ISD::SDIV: + case ISD::UDIV: + case ISD::SETCC: + case ISD::SMIN: + case ISD::SMAX: + case ISD::SHL: + case ISD::SRA: + case ISD::BSWAP: + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + case ISD::BR_CC: + case ISD::BITCAST: + case ISD::ATOMIC_CMP_SWAP: + case ISD::ATOMIC_SWAP: + return true; + case ISD::SRL: + if (N->getOperand(0).getOpcode() != ISD::SRL) + return true; + // (srl (trunc (srl ...))) may be optimized by combining srl, so + // doesn't optimize trunc now. + return false; + case ISD::SELECT_CC: + if (User->getOperand(2).getNode() != N && + User->getOperand(3).getNode() != N) + return true; + LLVM_FALLTHROUGH; + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::SELECT: + case ISD::CopyToReg: + // Check all use of selections, bit operations, and copies. If all of them + // are safe, optimize truncate to extract_subreg. + for (SDNode::use_iterator UI = User->use_begin(), UE = User->use_end(); + UI != UE; ++UI) { + switch ((*UI)->getOpcode()) { + default: + // If the use is an instruction which treats the source operand as i32, + // it is safe to avoid truncate here. + if (isI32Insn(*UI, N)) + continue; + break; + case ISD::ANY_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: { + // Special optimizations to the combination of ext and trunc. + // (ext ... (select ... (trunc ...))) is safe to avoid truncate here + // since this truncate instruction clears higher 32 bits which is filled + // by one of ext instructions later. + assert(N->getValueType(0) == MVT::i32 && + "find truncate to not i32 integer"); + if (User->getOpcode() == ISD::SELECT_CC || + User->getOpcode() == ISD::SELECT) + continue; + break; + } + } + return false; + } + return true; + } +} + +// Optimize TRUNCATE in DAG combining. Optimizing it in CUSTOM lower is +// sometime too early. Optimizing it in DAG pattern matching in VEInstrInfo.td +// is sometime too late. So, doing it at here. +SDValue VETargetLowering::combineTRUNCATE(SDNode *N, + DAGCombinerInfo &DCI) const { + assert(N->getOpcode() == ISD::TRUNCATE && + "Should be called with a TRUNCATE node"); + + SelectionDAG &DAG = DCI.DAG; + SDLoc DL(N); + EVT VT = N->getValueType(0); + + // We prefer to do this when all types are legal. + if (!DCI.isAfterLegalizeDAG()) + return SDValue(); + + // Skip combine TRUNCATE atm if the operand of TRUNCATE might be a constant. + if (N->getOperand(0)->getOpcode() == ISD::SELECT_CC && + isa<ConstantSDNode>(N->getOperand(0)->getOperand(0)) && + isa<ConstantSDNode>(N->getOperand(0)->getOperand(1))) + return SDValue(); + + // Check all use of this TRUNCATE. + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE; + ++UI) { + SDNode *User = *UI; + + // Make sure that we're not going to replace TRUNCATE for non i32 + // instructions. + // + // FIXME: Although we could sometimes handle this, and it does occur in + // practice that one of the condition inputs to the select is also one of + // the outputs, we currently can't deal with this. + if (isI32Insn(User, N)) + continue; + + return SDValue(); + } + + SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32); + return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, + N->getOperand(0), SubI32), + 0); +} + +SDValue VETargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + switch (N->getOpcode()) { + default: + break; + case ISD::TRUNCATE: + return combineTRUNCATE(N, DCI); + } + + return SDValue(); +} + +//===----------------------------------------------------------------------===// +// VE Inline Assembly Support +//===----------------------------------------------------------------------===// + +VETargetLowering::ConstraintType +VETargetLowering::getConstraintType(StringRef Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default: + break; + case 'v': // vector registers + return C_RegisterClass; + } + } + return TargetLowering::getConstraintType(Constraint); +} + +std::pair<unsigned, const TargetRegisterClass *> +VETargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + StringRef Constraint, + MVT VT) const { + const TargetRegisterClass *RC = nullptr; + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default: + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); + case 'r': + RC = &VE::I64RegClass; + break; + case 'v': + RC = &VE::V64RegClass; + break; + } + return std::make_pair(0U, RC); + } + + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); +} + +//===----------------------------------------------------------------------===// +// VE Target Optimization Support +//===----------------------------------------------------------------------===// + +unsigned VETargetLowering::getMinimumJumpTableEntries() const { + // Specify 8 for PIC model to relieve the impact of PIC load instructions. + if (isJumpTableRelative()) + return 8; + + return TargetLowering::getMinimumJumpTableEntries(); +} + +bool VETargetLowering::hasAndNot(SDValue Y) const { + EVT VT = Y.getValueType(); + + // VE doesn't have vector and not instruction. + if (VT.isVector()) + return false; + + // VE allows different immediate values for X and Y where ~X & Y. + // Only simm7 works for X, and only mimm works for Y on VE. However, this + // function is used to check whether an immediate value is OK for and-not + // instruction as both X and Y. Generating additional instruction to + // retrieve an immediate value is no good since the purpose of this + // function is to convert a series of 3 instructions to another series of + // 3 instructions with better parallelism. Therefore, we return false + // for all immediate values now. + // FIXME: Change hasAndNot function to have two operands to make it work + // correctly with Aurora VE. + if (isa<ConstantSDNode>(Y)) + return false; + + // It's ok for generic registers. + return true; +} + +/// \returns the VVP_* SDNode opcode corresponsing to \p OC. +static Optional<unsigned> getVVPOpcode(unsigned Opcode) { + switch (Opcode) { +#define HANDLE_VP_TO_VVP(VPOPC, VVPNAME) \ + case ISD::VPOPC: \ + return VEISD::VVPNAME; +#define ADD_VVP_OP(VVPNAME, SDNAME) \ + case VEISD::VVPNAME: \ + case ISD::SDNAME: \ + return VEISD::VVPNAME; +#include "VVPNodes.def" + } + return None; +} + +SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const { + // Can we represent this as a VVP node. + const unsigned Opcode = Op->getOpcode(); + auto VVPOpcodeOpt = getVVPOpcode(Opcode); + if (!VVPOpcodeOpt.hasValue()) + return SDValue(); + unsigned VVPOpcode = VVPOpcodeOpt.getValue(); + const bool FromVP = ISD::isVPOpcode(Opcode); + + // The representative and legalized vector type of this operation. + SDLoc DL(Op); + MVT MaskVT = MVT::v256i1; // TODO: packed mode. + EVT OpVecVT = Op.getValueType(); + EVT LegalVecVT = getTypeToTransformTo(*DAG.getContext(), OpVecVT); + + SDValue AVL; + SDValue Mask; + + if (FromVP) { + // All upstream VP SDNodes always have a mask and avl. + auto MaskIdx = ISD::getVPMaskIdx(Opcode).getValue(); + auto AVLIdx = ISD::getVPExplicitVectorLengthIdx(Opcode).getValue(); + Mask = Op->getOperand(MaskIdx); + AVL = Op->getOperand(AVLIdx); + + } else { + // Materialize the VL parameter. + AVL = DAG.getConstant(OpVecVT.getVectorNumElements(), DL, MVT::i32); + SDValue ConstTrue = DAG.getConstant(1, DL, MVT::i32); + Mask = DAG.getNode(VEISD::VEC_BROADCAST, DL, MaskVT, + ConstTrue); // emit a VEISD::VEC_BROADCAST here. + } + + // Categories we are interested in. + bool IsBinaryOp = false; + + switch (VVPOpcode) { +#define ADD_BINARY_VVP_OP(VVPNAME, ...) \ + case VEISD::VVPNAME: \ + IsBinaryOp = true; \ + break; +#include "VVPNodes.def" + } + + if (IsBinaryOp) { + assert(LegalVecVT.isSimple()); + return DAG.getNode(VVPOpcode, DL, LegalVecVT, Op->getOperand(0), + Op->getOperand(1), Mask, AVL); + } + llvm_unreachable("lowerToVVP called for unexpected SDNode."); +} + +SDValue VETargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!"); + MVT VT = Op.getOperand(0).getSimpleValueType(); + + // Special treatment for packed V64 types. + assert(VT == MVT::v512i32 || VT == MVT::v512f32); + // Example of codes: + // %packed_v = extractelt %vr, %idx / 2 + // %v = %packed_v >> (%idx % 2 * 32) + // %res = %v & 0xffffffff + + SDValue Vec = Op.getOperand(0); + SDValue Idx = Op.getOperand(1); + SDLoc DL(Op); + SDValue Result = Op; + if (0 /* Idx->isConstant() */) { + // TODO: optimized implementation using constant values + } else { + SDValue Const1 = DAG.getConstant(1, DL, MVT::i64); + SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1}); + SDValue PackedElt = + SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0); + SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1}); + SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1}); + SDValue Const5 = DAG.getConstant(5, DL, MVT::i64); + Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5}); + PackedElt = DAG.getNode(ISD::SRL, DL, MVT::i64, {PackedElt, Shift}); + SDValue Mask = DAG.getConstant(0xFFFFFFFFL, DL, MVT::i64); + PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask}); + SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32); + Result = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, + MVT::i32, PackedElt, SubI32), + 0); + + if (Op.getSimpleValueType() == MVT::f32) { + Result = DAG.getBitcast(MVT::f32, Result); + } else { + assert(Op.getSimpleValueType() == MVT::i32); + } + } + return Result; +} + +SDValue VETargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!"); + MVT VT = Op.getOperand(0).getSimpleValueType(); + + // Special treatment for packed V64 types. + assert(VT == MVT::v512i32 || VT == MVT::v512f32); + // The v512i32 and v512f32 starts from upper bits (0..31). This "upper + // bits" required `val << 32` from C implementation's point of view. + // + // Example of codes: + // %packed_elt = extractelt %vr, (%idx >> 1) + // %shift = ((%idx & 1) ^ 1) << 5 + // %packed_elt &= 0xffffffff00000000 >> shift + // %packed_elt |= (zext %val) << shift + // %vr = insertelt %vr, %packed_elt, (%idx >> 1) + + SDLoc DL(Op); + SDValue Vec = Op.getOperand(0); + SDValue Val = Op.getOperand(1); + SDValue Idx = Op.getOperand(2); + if (Idx.getSimpleValueType() == MVT::i32) + Idx = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Idx); + if (Val.getSimpleValueType() == MVT::f32) + Val = DAG.getBitcast(MVT::i32, Val); + assert(Val.getSimpleValueType() == MVT::i32); + Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val); + + SDValue Result = Op; + if (0 /* Idx->isConstant()*/) { + // TODO: optimized implementation using constant values + } else { + SDValue Const1 = DAG.getConstant(1, DL, MVT::i64); + SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1}); + SDValue PackedElt = + SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0); + SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1}); + SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1}); + SDValue Const5 = DAG.getConstant(5, DL, MVT::i64); + Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5}); + SDValue Mask = DAG.getConstant(0xFFFFFFFF00000000L, DL, MVT::i64); + Mask = DAG.getNode(ISD::SRL, DL, MVT::i64, {Mask, Shift}); + PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask}); + Val = DAG.getNode(ISD::SHL, DL, MVT::i64, {Val, Shift}); + PackedElt = DAG.getNode(ISD::OR, DL, MVT::i64, {PackedElt, Val}); + Result = + SDValue(DAG.getMachineNode(VE::LSVrr_v, DL, Vec.getSimpleValueType(), + {HalfIdx, PackedElt, Vec}), + 0); + } + return Result; +} diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.h b/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.h index 4633220efaa1..a6e1bf396035 100644 --- a/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.h +++ b/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.h @@ -24,23 +24,36 @@ namespace VEISD { enum NodeType : unsigned { FIRST_NUMBER = ISD::BUILTIN_OP_END, - Hi, - Lo, // Hi/Lo operations, typically on a global address. - - GETFUNPLT, // load function address through %plt insturction - GETTLSADDR, // load address for TLS access - GETSTACKTOP, // retrieve address of stack top (first address of - // locals and temporaries) - - CALL, // A call instruction. - RET_FLAG, // Return with a flag operand. - GLOBAL_BASE_REG, // Global base reg for PIC. + CALL, // A call instruction. + EH_SJLJ_LONGJMP, // SjLj exception handling longjmp. + EH_SJLJ_SETJMP, // SjLj exception handling setjmp. + EH_SJLJ_SETUP_DISPATCH, // SjLj exception handling setup_dispatch. + GETFUNPLT, // Load function address through %plt insturction. + GETTLSADDR, // Load address for TLS access. + GETSTACKTOP, // Retrieve address of stack top (first address of + // locals and temporaries). + GLOBAL_BASE_REG, // Global base reg for PIC. + Hi, // Hi/Lo operations, typically on a global address. + Lo, // Hi/Lo operations, typically on a global address. + MEMBARRIER, // Compiler barrier only; generate a no-op. + RET_FLAG, // Return with a flag operand. + TS1AM, // A TS1AM instruction used for 1/2 bytes swap. + VEC_BROADCAST, // A vector broadcast instruction. + // 0: scalar value, 1: VL + +// VVP_* nodes. +#define ADD_VVP_OP(VVP_NAME, ...) VVP_NAME, +#include "VVPNodes.def" }; } class VETargetLowering : public TargetLowering { const VESubtarget *Subtarget; + void initRegisterClasses(); + void initSPUActions(); + void initVPUActions(); + public: VETargetLowering(const TargetMachine &TM, const VESubtarget &STI); @@ -74,23 +87,98 @@ public: const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl, SelectionDAG &DAG) const override; + /// Helper functions for atomic operations. + bool shouldInsertFencesForAtomic(const Instruction *I) const override { + // VE uses release consistency, so need fence for each atomics. + return true; + } + Instruction *emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst, + AtomicOrdering Ord) const override; + Instruction *emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst, + AtomicOrdering Ord) const override; + TargetLoweringBase::AtomicExpansionKind + shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + /// Custom Lower { SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; - - SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerToTLSGeneralDynamicModel(SDValue Op, SelectionDAG &DAG) const; + unsigned getJumpTableEncoding() const override; + const MCExpr *LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, + const MachineBasicBlock *MBB, + unsigned Uid, + MCContext &Ctx) const override; + SDValue getPICJumpTableRelocBase(SDValue Table, + SelectionDAG &DAG) const override; + // VE doesn't need getPICJumpTableRelocBaseExpr since it is used for only + // EK_LabelDifference32. + + SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerATOMIC_SWAP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerToTLSGeneralDynamicModel(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVAARG(SDValue Op, SelectionDAG &DAG) const; + + SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; /// } Custom Lower + /// Replace the results of node with an illegal result + /// type with new values built out of custom code. + /// + void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) const override; + + /// Custom Inserter { + MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr &MI, + MachineBasicBlock *MBB) const override; + MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI, + MachineBasicBlock *MBB) const; + MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, + MachineBasicBlock *MBB) const; + MachineBasicBlock *emitSjLjDispatchBlock(MachineInstr &MI, + MachineBasicBlock *BB) const; + + void setupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB, + MachineBasicBlock *DispatchBB, int FI, + int Offset) const; + // Setup basic block address. + Register prepareMBB(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + MachineBasicBlock *TargetBB, const DebugLoc &DL) const; + // Prepare function/variable address. + Register prepareSymbol(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + StringRef Symbol, const DebugLoc &DL, bool IsLocal, + bool IsCall) const; + /// } Custom Inserter + + /// VVP Lowering { + SDValue lowerToVVP(SDValue Op, SelectionDAG &DAG) const; + /// } VVPLowering + + /// Custom DAGCombine { + SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + + SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const; + /// } Custom DAGCombine + SDValue withTargetFlags(SDValue Op, unsigned TF, SelectionDAG &DAG) const; SDValue makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF, SelectionDAG &DAG) const; SDValue makeAddress(SDValue Op, SelectionDAG &DAG) const; + bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; /// Returns true if the target allows unaligned memory accesses of the @@ -99,10 +187,32 @@ public: MachineMemOperand::Flags Flags, bool *Fast) const override; - // Block s/udiv lowering for now - bool isIntDivCheap(EVT VT, AttributeList Attr) const override { return true; } + /// Inline Assembly { + + ConstraintType getConstraintType(StringRef Constraint) const override; + std::pair<unsigned, const TargetRegisterClass *> + getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + StringRef Constraint, MVT VT) const override; + + /// } Inline Assembly + /// Target Optimization { + + // Return lower limit for number of blocks in a jump table. + unsigned getMinimumJumpTableEntries() const override; + + // SX-Aurora VE's s/udiv is 5-9 times slower than multiply. + bool isIntDivCheap(EVT, AttributeList) const override { return false; } + // VE doesn't have rem. + bool hasStandaloneRem(EVT) const override { return false; } + // VE LDZ instruction returns 64 if the input is zero. + bool isCheapToSpeculateCtlz() const override { return true; } + // VE LDZ instruction is fast. + bool isCtlzFast() const override { return true; } + // VE has NND instruction. bool hasAndNot(SDValue Y) const override; + + /// } Target Optimization }; } // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrBuilder.h b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrBuilder.h new file mode 100644 index 000000000000..1b0e07546931 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrBuilder.h @@ -0,0 +1,41 @@ +//===-- VEInstrBuilder.h - Aides for building VE insts ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file exposes functions that may be used with BuildMI from the +// MachineInstrBuilder.h file to simplify generating frame and constant pool +// references. +// +// For reference, the order of operands for memory references is: +// (Operand), Dest Reg, Base Reg, and either Reg Index or Immediate +// Displacement. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_VE_VEINSTRBUILDER_H +#define LLVM_LIB_TARGET_VE_VEINSTRBUILDER_H + +#include "llvm/CodeGen/MachineInstrBuilder.h" + +namespace llvm { + +/// addFrameReference - This function is used to add a reference to the base of +/// an abstract object on the stack frame of the current function. This +/// reference has base register as the FrameIndex offset until it is resolved. +/// This allows a constant offset to be specified as well... +/// +static inline const MachineInstrBuilder & +addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0, + bool ThreeOp = true) { + if (ThreeOp) + return MIB.addFrameIndex(FI).addImm(0).addImm(Offset); + return MIB.addFrameIndex(FI).addImm(Offset); +} + +} // namespace llvm + +#endif diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrFormats.td index 0c02411ff916..f43c9755f1b9 100644 --- a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrFormats.td +++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrFormats.td @@ -35,6 +35,25 @@ class InstVE<dag outs, dag ins, string asmstr, list<dag> pattern> let AsmString = asmstr; let Pattern = pattern; + bits<1> VE_Vector = 0; + bits<1> VE_VLInUse = 0; + bits<3> VE_VLIndex = 0; + bits<1> VE_VLWithMask = 0; + + /// These fields correspond to the fields in VEInstrInfo.h. Any changes to + /// these must be reflected there! See comments there for what these are. + /// + /// VLIndex is the index of VL register in MI's operands. The HW instruction + /// doesn't have that field, but we add is in MI for the ease of optimization. + /// For example, the index of VL of (VST $sy, $sz, $sx, $vl) is 3 (beginning + /// from 0), and the index of VL of (VST $sy, $sz, $sx, $vm, $vl) is 4. We + /// define vector instructions hierarchically, so use VE_VLIndex which is + /// defined by the type of instruction and VE_VLWithMask which is defined + /// whether the insturction use mask or not. + let TSFlags{0} = VE_Vector; + let TSFlags{1} = VE_VLInUse; + let TSFlags{4-2} = !add(VE_VLIndex, VE_VLWithMask); + let DecoderNamespace = "VE"; field bits<64> SoftFail = 0; } @@ -179,12 +198,82 @@ class RRFENCE<bits<8>opVal, dag outs, dag ins, string asmstr, //----------------------------------------------------------------------------- // Section 5.6 RVM Type +// +// RVM type is for vector transfer instructions. //----------------------------------------------------------------------------- +class RVM<bits<8>opVal, dag outs, dag ins, string asmstr, + list<dag> pattern = []> + : InstVE<outs, ins, asmstr, pattern> { + bits<1> cx = 0; + bits<1> vc = 0; + bits<1> cs = 0; + bits<4> m = 0; + bits<1> cy = 1; + bits<7> sy; + bits<1> cz = 1; + bits<7> sz; + bits<8> vx; + bits<8> vy = 0; + bits<7> sw = 0; + let op = opVal; + let Inst{55} = cx; + let Inst{54} = vc; + let Inst{53} = cs; + let Inst{52} = 0; + let Inst{51-48} = m; + let Inst{47} = cy; + let Inst{46-40} = sy; + let Inst{39} = cz; + let Inst{38-32} = sz; + let Inst{31-24} = vx; + let Inst{23-16} = vy; + let Inst{15-8} = 0; + let Inst{7} = 0; + let Inst{6-0} = sw; + + let VE_Vector = 1; +} + //----------------------------------------------------------------------------- // Section 5.7 RV Type +// +// RV type is for vector instructions. //----------------------------------------------------------------------------- +class RV<bits<8>opVal, dag outs, dag ins, string asmstr, list<dag> pattern = []> + : InstVE<outs, ins, asmstr, pattern> { + bits<1> cx = 0; + bits<1> cx2 = 0; + bits<1> cs = 0; + bits<1> cs2 = 0; + bits<4> m = 0; + bits<1> cy = 1; + bits<7> sy; + bits<1> cz = 0; + bits<7> sz = 0; + bits<8> vx = 0; + bits<8> vy = 0; + bits<8> vz = 0; + bits<8> vw = 0; + let op = opVal; + let Inst{55} = cx; + let Inst{54} = cx2; + let Inst{53} = cs; + let Inst{52} = cs2; + let Inst{51-48} = m; + let Inst{47} = cy; + let Inst{46-40} = sy; + let Inst{39} = cz; + let Inst{38-32} = sz; + let Inst{31-24} = vx; + let Inst{23-16} = vy; + let Inst{15-8} = vz; + let Inst{7-0} = vw; + + let VE_Vector = 1; +} + // Pseudo instructions. class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern = []> : InstVE<outs, ins, asmstr, pattern> { diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp index 86b2ac2078b1..9770052ff913 100644 --- a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp @@ -92,38 +92,46 @@ static VECC::CondCode GetOppositeBranchCondition(VECC::CondCode CC) { llvm_unreachable("Invalid cond code"); } -// Treat br.l [BRCF AT] as unconditional branch +// Treat a branch relative long always instruction as unconditional branch. +// For example, br.l.t and br.l. static bool isUncondBranchOpcode(int Opc) { - return Opc == VE::BRCFLa || Opc == VE::BRCFWa || - Opc == VE::BRCFLa_nt || Opc == VE::BRCFWa_nt || - Opc == VE::BRCFLa_t || Opc == VE::BRCFWa_t || - Opc == VE::BRCFDa || Opc == VE::BRCFSa || - Opc == VE::BRCFDa_nt || Opc == VE::BRCFSa_nt || - Opc == VE::BRCFDa_t || Opc == VE::BRCFSa_t; + using namespace llvm::VE; + +#define BRKIND(NAME) (Opc == NAME##a || Opc == NAME##a_nt || Opc == NAME##a_t) + // VE has other branch relative always instructions for word/double/float, + // but we use only long branches in our lower. So, sanity check it here. + assert(!BRKIND(BRCFW) && !BRKIND(BRCFD) && !BRKIND(BRCFS) && + "Branch relative word/double/float always instructions should not be " + "used!"); + return BRKIND(BRCFL); +#undef BRKIND } +// Treat branch relative conditional as conditional branch instructions. +// For example, brgt.l.t and brle.s.nt. static bool isCondBranchOpcode(int Opc) { - return Opc == VE::BRCFLrr || Opc == VE::BRCFLir || - Opc == VE::BRCFLrr_nt || Opc == VE::BRCFLir_nt || - Opc == VE::BRCFLrr_t || Opc == VE::BRCFLir_t || - Opc == VE::BRCFWrr || Opc == VE::BRCFWir || - Opc == VE::BRCFWrr_nt || Opc == VE::BRCFWir_nt || - Opc == VE::BRCFWrr_t || Opc == VE::BRCFWir_t || - Opc == VE::BRCFDrr || Opc == VE::BRCFDir || - Opc == VE::BRCFDrr_nt || Opc == VE::BRCFDir_nt || - Opc == VE::BRCFDrr_t || Opc == VE::BRCFDir_t || - Opc == VE::BRCFSrr || Opc == VE::BRCFSir || - Opc == VE::BRCFSrr_nt || Opc == VE::BRCFSir_nt || - Opc == VE::BRCFSrr_t || Opc == VE::BRCFSir_t; + using namespace llvm::VE; + +#define BRKIND(NAME) \ + (Opc == NAME##rr || Opc == NAME##rr_nt || Opc == NAME##rr_t || \ + Opc == NAME##ir || Opc == NAME##ir_nt || Opc == NAME##ir_t) + return BRKIND(BRCFL) || BRKIND(BRCFW) || BRKIND(BRCFD) || BRKIND(BRCFS); +#undef BRKIND } +// Treat branch long always instructions as indirect branch. +// For example, b.l.t and b.l. static bool isIndirectBranchOpcode(int Opc) { - return Opc == VE::BCFLari || Opc == VE::BCFLari || - Opc == VE::BCFLari_nt || Opc == VE::BCFLari_nt || - Opc == VE::BCFLari_t || Opc == VE::BCFLari_t || - Opc == VE::BCFLari || Opc == VE::BCFLari || - Opc == VE::BCFLari_nt || Opc == VE::BCFLari_nt || - Opc == VE::BCFLari_t || Opc == VE::BCFLari_t; + using namespace llvm::VE; + +#define BRKIND(NAME) \ + (Opc == NAME##ari || Opc == NAME##ari_nt || Opc == NAME##ari_t) + // VE has other branch always instructions for word/double/float, but + // we use only long branches in our lower. So, sanity check it here. + assert(!BRKIND(BCFW) && !BRKIND(BCFD) && !BRKIND(BCFS) && + "Branch word/double/float always instructions should not be used!"); + return BRKIND(BCFL); +#undef BRKIND } static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, @@ -311,11 +319,43 @@ bool VEInstrInfo::reverseBranchCondition( } static bool IsAliasOfSX(Register Reg) { - return VE::I8RegClass.contains(Reg) || VE::I16RegClass.contains(Reg) || - VE::I32RegClass.contains(Reg) || VE::I64RegClass.contains(Reg) || + return VE::I32RegClass.contains(Reg) || VE::I64RegClass.contains(Reg) || VE::F32RegClass.contains(Reg); } +static void copyPhysSubRegs(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, const DebugLoc &DL, + MCRegister DestReg, MCRegister SrcReg, bool KillSrc, + const MCInstrDesc &MCID, unsigned int NumSubRegs, + const unsigned *SubRegIdx, + const TargetRegisterInfo *TRI) { + MachineInstr *MovMI = nullptr; + + for (unsigned Idx = 0; Idx != NumSubRegs; ++Idx) { + Register SubDest = TRI->getSubReg(DestReg, SubRegIdx[Idx]); + Register SubSrc = TRI->getSubReg(SrcReg, SubRegIdx[Idx]); + assert(SubDest && SubSrc && "Bad sub-register"); + + if (MCID.getOpcode() == VE::ORri) { + // generate "ORri, dest, src, 0" instruction. + MachineInstrBuilder MIB = + BuildMI(MBB, I, DL, MCID, SubDest).addReg(SubSrc).addImm(0); + MovMI = MIB.getInstr(); + } else if (MCID.getOpcode() == VE::ANDMmm) { + // generate "ANDM, dest, vm0, src" instruction. + MachineInstrBuilder MIB = + BuildMI(MBB, I, DL, MCID, SubDest).addReg(VE::VM0).addReg(SubSrc); + MovMI = MIB.getInstr(); + } else { + llvm_unreachable("Unexpected reg-to-reg copy instruction"); + } + } + // Add implicit super-register defs and kills to the last MovMI. + MovMI->addRegisterDefined(DestReg, TRI); + if (KillSrc) + MovMI->addRegisterKilled(SrcReg, TRI, true); +} + void VEInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, @@ -325,6 +365,41 @@ void VEInstrInfo::copyPhysReg(MachineBasicBlock &MBB, BuildMI(MBB, I, DL, get(VE::ORri), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)) .addImm(0); + } else if (VE::V64RegClass.contains(DestReg, SrcReg)) { + // Generate following instructions + // %sw16 = LEA32zii 256 + // VORmvl %dest, (0)1, %src, %sw16 + // TODO: reuse a register if vl is already assigned to a register + // FIXME: it would be better to scavenge a register here instead of + // reserving SX16 all of the time. + const TargetRegisterInfo *TRI = &getRegisterInfo(); + Register TmpReg = VE::SX16; + Register SubTmp = TRI->getSubReg(TmpReg, VE::sub_i32); + BuildMI(MBB, I, DL, get(VE::LEAzii), TmpReg) + .addImm(0) + .addImm(0) + .addImm(256); + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(VE::VORmvl), DestReg) + .addImm(M1(0)) // Represent (0)1. + .addReg(SrcReg, getKillRegState(KillSrc)) + .addReg(SubTmp, getKillRegState(true)); + MIB.getInstr()->addRegisterKilled(TmpReg, TRI, true); + } else if (VE::VMRegClass.contains(DestReg, SrcReg)) { + BuildMI(MBB, I, DL, get(VE::ANDMmm), DestReg) + .addReg(VE::VM0) + .addReg(SrcReg, getKillRegState(KillSrc)); + } else if (VE::VM512RegClass.contains(DestReg, SrcReg)) { + // Use two instructions. + const unsigned SubRegIdx[] = {VE::sub_vm_even, VE::sub_vm_odd}; + unsigned int NumSubRegs = 2; + copyPhysSubRegs(MBB, I, DL, DestReg, SrcReg, KillSrc, get(VE::ANDMmm), + NumSubRegs, SubRegIdx, &getRegisterInfo()); + } else if (VE::F128RegClass.contains(DestReg, SrcReg)) { + // Use two instructions. + const unsigned SubRegIdx[] = {VE::sub_even, VE::sub_odd}; + unsigned int NumSubRegs = 2; + copyPhysSubRegs(MBB, I, DL, DestReg, SrcReg, KillSrc, get(VE::ORri), + NumSubRegs, SubRegIdx, &getRegisterInfo()); } else { const TargetRegisterInfo *TRI = &getRegisterInfo(); dbgs() << "Impossible reg-to-reg copy from " << printReg(SrcReg, TRI) @@ -342,7 +417,8 @@ unsigned VEInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const { if (MI.getOpcode() == VE::LDrii || // I64 MI.getOpcode() == VE::LDLSXrii || // I32 - MI.getOpcode() == VE::LDUrii // F32 + MI.getOpcode() == VE::LDUrii || // F32 + MI.getOpcode() == VE::LDQrii // F128 (pseudo) ) { if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0 && MI.getOperand(3).isImm() && @@ -363,7 +439,8 @@ unsigned VEInstrInfo::isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const { if (MI.getOpcode() == VE::STrii || // I64 MI.getOpcode() == VE::STLrii || // I32 - MI.getOpcode() == VE::STUrii // F32 + MI.getOpcode() == VE::STUrii || // F32 + MI.getOpcode() == VE::STQrii // F128 (pseudo) ) { if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0 && MI.getOperand(2).isImm() && @@ -412,6 +489,13 @@ void VEInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addImm(0) .addReg(SrcReg, getKillRegState(isKill)) .addMemOperand(MMO); + } else if (VE::F128RegClass.hasSubClassEq(RC)) { + BuildMI(MBB, I, DL, get(VE::STQrii)) + .addFrameIndex(FI) + .addImm(0) + .addImm(0) + .addReg(SrcReg, getKillRegState(isKill)) + .addMemOperand(MMO); } else report_fatal_error("Can't store this register to stack slot"); } @@ -449,10 +533,194 @@ void VEInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, .addImm(0) .addImm(0) .addMemOperand(MMO); + } else if (VE::F128RegClass.hasSubClassEq(RC)) { + BuildMI(MBB, I, DL, get(VE::LDQrii), DestReg) + .addFrameIndex(FI) + .addImm(0) + .addImm(0) + .addMemOperand(MMO); } else report_fatal_error("Can't load this register from stack slot"); } +bool VEInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, + Register Reg, MachineRegisterInfo *MRI) const { + LLVM_DEBUG(dbgs() << "FoldImmediate\n"); + + LLVM_DEBUG(dbgs() << "checking DefMI\n"); + int64_t ImmVal; + switch (DefMI.getOpcode()) { + default: + return false; + case VE::ORim: + // General move small immediate instruction on VE. + LLVM_DEBUG(dbgs() << "checking ORim\n"); + LLVM_DEBUG(DefMI.dump()); + // FIXME: We may need to support FPImm too. + assert(DefMI.getOperand(1).isImm()); + assert(DefMI.getOperand(2).isImm()); + ImmVal = + DefMI.getOperand(1).getImm() + mimm2Val(DefMI.getOperand(2).getImm()); + LLVM_DEBUG(dbgs() << "ImmVal is " << ImmVal << "\n"); + break; + case VE::LEAzii: + // General move immediate instruction on VE. + LLVM_DEBUG(dbgs() << "checking LEAzii\n"); + LLVM_DEBUG(DefMI.dump()); + // FIXME: We may need to support FPImm too. + assert(DefMI.getOperand(2).isImm()); + if (!DefMI.getOperand(3).isImm()) + // LEAzii may refer label + return false; + ImmVal = DefMI.getOperand(2).getImm() + DefMI.getOperand(3).getImm(); + LLVM_DEBUG(dbgs() << "ImmVal is " << ImmVal << "\n"); + break; + } + + // Try to fold like below: + // %1:i64 = ORim 0, 0(1) + // %2:i64 = CMPSLrr %0, %1 + // To + // %2:i64 = CMPSLrm %0, 0(1) + // + // Another example: + // %1:i64 = ORim 6, 0(1) + // %2:i64 = CMPSLrr %1, %0 + // To + // %2:i64 = CMPSLir 6, %0 + // + // Support commutable instructions like below: + // %1:i64 = ORim 6, 0(1) + // %2:i64 = ADDSLrr %1, %0 + // To + // %2:i64 = ADDSLri %0, 6 + // + // FIXME: Need to support i32. Current implementtation requires + // EXTRACT_SUBREG, so input has following COPY and it avoids folding: + // %1:i64 = ORim 6, 0(1) + // %2:i32 = COPY %1.sub_i32 + // %3:i32 = ADDSWSXrr %0, %2 + // FIXME: Need to support shift, cmov, and more instructions. + // FIXME: Need to support lvl too, but LVLGen runs after peephole-opt. + + LLVM_DEBUG(dbgs() << "checking UseMI\n"); + LLVM_DEBUG(UseMI.dump()); + unsigned NewUseOpcSImm7; + unsigned NewUseOpcMImm; + enum InstType { + rr2ri_rm, // rr -> ri or rm, commutable + rr2ir_rm, // rr -> ir or rm + } InstType; + + using namespace llvm::VE; +#define INSTRKIND(NAME) \ + case NAME##rr: \ + NewUseOpcSImm7 = NAME##ri; \ + NewUseOpcMImm = NAME##rm; \ + InstType = rr2ri_rm; \ + break +#define NCINSTRKIND(NAME) \ + case NAME##rr: \ + NewUseOpcSImm7 = NAME##ir; \ + NewUseOpcMImm = NAME##rm; \ + InstType = rr2ir_rm; \ + break + + switch (UseMI.getOpcode()) { + default: + return false; + + INSTRKIND(ADDUL); + INSTRKIND(ADDSWSX); + INSTRKIND(ADDSWZX); + INSTRKIND(ADDSL); + NCINSTRKIND(SUBUL); + NCINSTRKIND(SUBSWSX); + NCINSTRKIND(SUBSWZX); + NCINSTRKIND(SUBSL); + INSTRKIND(MULUL); + INSTRKIND(MULSWSX); + INSTRKIND(MULSWZX); + INSTRKIND(MULSL); + NCINSTRKIND(DIVUL); + NCINSTRKIND(DIVSWSX); + NCINSTRKIND(DIVSWZX); + NCINSTRKIND(DIVSL); + NCINSTRKIND(CMPUL); + NCINSTRKIND(CMPSWSX); + NCINSTRKIND(CMPSWZX); + NCINSTRKIND(CMPSL); + INSTRKIND(MAXSWSX); + INSTRKIND(MAXSWZX); + INSTRKIND(MAXSL); + INSTRKIND(MINSWSX); + INSTRKIND(MINSWZX); + INSTRKIND(MINSL); + INSTRKIND(AND); + INSTRKIND(OR); + INSTRKIND(XOR); + INSTRKIND(EQV); + NCINSTRKIND(NND); + NCINSTRKIND(MRG); + } + +#undef INSTRKIND + + unsigned NewUseOpc; + unsigned UseIdx; + bool Commute = false; + LLVM_DEBUG(dbgs() << "checking UseMI operands\n"); + switch (InstType) { + case rr2ri_rm: + UseIdx = 2; + if (UseMI.getOperand(1).getReg() == Reg) { + Commute = true; + } else { + assert(UseMI.getOperand(2).getReg() == Reg); + } + if (isInt<7>(ImmVal)) { + // This ImmVal matches to SImm7 slot, so change UseOpc to an instruction + // holds a simm7 slot. + NewUseOpc = NewUseOpcSImm7; + } else if (isMImmVal(ImmVal)) { + // Similarly, change UseOpc to an instruction holds a mimm slot. + NewUseOpc = NewUseOpcMImm; + ImmVal = val2MImm(ImmVal); + } else + return false; + break; + case rr2ir_rm: + if (UseMI.getOperand(1).getReg() == Reg) { + // Check immediate value whether it matchs to the UseMI instruction. + if (!isInt<7>(ImmVal)) + return false; + NewUseOpc = NewUseOpcSImm7; + UseIdx = 1; + } else { + assert(UseMI.getOperand(2).getReg() == Reg); + // Check immediate value whether it matchs to the UseMI instruction. + if (!isMImmVal(ImmVal)) + return false; + NewUseOpc = NewUseOpcMImm; + ImmVal = val2MImm(ImmVal); + UseIdx = 2; + } + break; + } + + LLVM_DEBUG(dbgs() << "modifying UseMI\n"); + bool DeleteDef = MRI->hasOneNonDBGUse(Reg); + UseMI.setDesc(get(NewUseOpc)); + if (Commute) { + UseMI.getOperand(1).setReg(UseMI.getOperand(UseIdx).getReg()); + } + UseMI.getOperand(UseIdx).ChangeToImmediate(ImmVal); + if (DeleteDef) + DefMI.eraseFromParent(); + + return true; +} + Register VEInstrInfo::getGlobalBaseReg(MachineFunction *MF) const { VEMachineFunctionInfo *VEFI = MF->getInfo<VEMachineFunctionInfo>(); Register GlobalBaseReg = VEFI->getGlobalBaseReg(); @@ -472,6 +740,106 @@ Register VEInstrInfo::getGlobalBaseReg(MachineFunction *MF) const { return GlobalBaseReg; } +static Register getVM512Upper(Register reg) { + return (reg - VE::VMP0) * 2 + VE::VM0; +} + +static Register getVM512Lower(Register reg) { return getVM512Upper(reg) + 1; } + +// Expand pseudo logical vector instructions for VM512 registers. +static void expandPseudoLogM(MachineInstr &MI, const MCInstrDesc &MCID) { + MachineBasicBlock *MBB = MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + + Register VMXu = getVM512Upper(MI.getOperand(0).getReg()); + Register VMXl = getVM512Lower(MI.getOperand(0).getReg()); + Register VMYu = getVM512Upper(MI.getOperand(1).getReg()); + Register VMYl = getVM512Lower(MI.getOperand(1).getReg()); + + switch (MI.getOpcode()) { + default: { + Register VMZu = getVM512Upper(MI.getOperand(2).getReg()); + Register VMZl = getVM512Lower(MI.getOperand(2).getReg()); + BuildMI(*MBB, MI, DL, MCID).addDef(VMXu).addUse(VMYu).addUse(VMZu); + BuildMI(*MBB, MI, DL, MCID).addDef(VMXl).addUse(VMYl).addUse(VMZl); + break; + } + case VE::NEGMy: + BuildMI(*MBB, MI, DL, MCID).addDef(VMXu).addUse(VMYu); + BuildMI(*MBB, MI, DL, MCID).addDef(VMXl).addUse(VMYl); + break; + } + MI.eraseFromParent(); +} + +static void addOperandsForVFMK(MachineInstrBuilder &MIB, MachineInstr &MI, + bool Upper) { + // VM512 + MIB.addReg(Upper ? getVM512Upper(MI.getOperand(0).getReg()) + : getVM512Lower(MI.getOperand(0).getReg())); + + switch (MI.getNumExplicitOperands()) { + default: + report_fatal_error("unexpected number of operands for pvfmk"); + case 2: // _Ml: VM512, VL + // VL + MIB.addReg(MI.getOperand(1).getReg()); + break; + case 4: // _Mvl: VM512, CC, VR, VL + // CC + MIB.addImm(MI.getOperand(1).getImm()); + // VR + MIB.addReg(MI.getOperand(2).getReg()); + // VL + MIB.addReg(MI.getOperand(3).getReg()); + break; + case 5: // _MvMl: VM512, CC, VR, VM512, VL + // CC + MIB.addImm(MI.getOperand(1).getImm()); + // VR + MIB.addReg(MI.getOperand(2).getReg()); + // VM512 + MIB.addReg(Upper ? getVM512Upper(MI.getOperand(3).getReg()) + : getVM512Lower(MI.getOperand(3).getReg())); + // VL + MIB.addReg(MI.getOperand(4).getReg()); + break; + } +} + +static void expandPseudoVFMK(const TargetInstrInfo &TI, MachineInstr &MI) { + // replace to pvfmk.w.up and pvfmk.w.lo + // replace to pvfmk.s.up and pvfmk.s.lo + + static std::map<unsigned, std::pair<unsigned, unsigned>> VFMKMap = { + {VE::VFMKyal, {VE::VFMKLal, VE::VFMKLal}}, + {VE::VFMKynal, {VE::VFMKLnal, VE::VFMKLnal}}, + {VE::VFMKWyvl, {VE::PVFMKWUPvl, VE::PVFMKWLOvl}}, + {VE::VFMKWyvyl, {VE::PVFMKWUPvml, VE::PVFMKWLOvml}}, + {VE::VFMKSyvl, {VE::PVFMKSUPvl, VE::PVFMKSLOvl}}, + {VE::VFMKSyvyl, {VE::PVFMKSUPvml, VE::PVFMKSLOvml}}, + }; + + unsigned Opcode = MI.getOpcode(); + + auto Found = VFMKMap.find(Opcode); + if (Found == VFMKMap.end()) + report_fatal_error("unexpected opcode for pseudo vfmk"); + + unsigned OpcodeUpper = (*Found).second.first; + unsigned OpcodeLower = (*Found).second.second; + + MachineBasicBlock *MBB = MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + + MachineInstrBuilder Bu = BuildMI(*MBB, MI, DL, TI.get(OpcodeUpper)); + addOperandsForVFMK(Bu, MI, /* Upper */ true); + MachineInstrBuilder Bl = BuildMI(*MBB, MI, DL, TI.get(OpcodeLower)); + addOperandsForVFMK(Bl, MI, /* Upper */ false); + + MI.eraseFromParent(); +} + bool VEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { switch (MI.getOpcode()) { case VE::EXTEND_STACK: { @@ -484,6 +852,110 @@ bool VEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case VE::GETSTACKTOP: { return expandGetStackTopPseudo(MI); } + + case VE::ANDMyy: + expandPseudoLogM(MI, get(VE::ANDMmm)); + return true; + case VE::ORMyy: + expandPseudoLogM(MI, get(VE::ORMmm)); + return true; + case VE::XORMyy: + expandPseudoLogM(MI, get(VE::XORMmm)); + return true; + case VE::EQVMyy: + expandPseudoLogM(MI, get(VE::EQVMmm)); + return true; + case VE::NNDMyy: + expandPseudoLogM(MI, get(VE::NNDMmm)); + return true; + case VE::NEGMy: + expandPseudoLogM(MI, get(VE::NEGMm)); + return true; + + case VE::LVMyir: + case VE::LVMyim: + case VE::LVMyir_y: + case VE::LVMyim_y: { + Register VMXu = getVM512Upper(MI.getOperand(0).getReg()); + Register VMXl = getVM512Lower(MI.getOperand(0).getReg()); + int64_t Imm = MI.getOperand(1).getImm(); + bool IsSrcReg = + MI.getOpcode() == VE::LVMyir || MI.getOpcode() == VE::LVMyir_y; + Register Src = IsSrcReg ? MI.getOperand(2).getReg() : VE::NoRegister; + int64_t MImm = IsSrcReg ? 0 : MI.getOperand(2).getImm(); + bool KillSrc = IsSrcReg ? MI.getOperand(2).isKill() : false; + Register VMX = VMXl; + if (Imm >= 4) { + VMX = VMXu; + Imm -= 4; + } + MachineBasicBlock *MBB = MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + switch (MI.getOpcode()) { + case VE::LVMyir: + BuildMI(*MBB, MI, DL, get(VE::LVMir)) + .addDef(VMX) + .addImm(Imm) + .addReg(Src, getKillRegState(KillSrc)); + break; + case VE::LVMyim: + BuildMI(*MBB, MI, DL, get(VE::LVMim)) + .addDef(VMX) + .addImm(Imm) + .addImm(MImm); + break; + case VE::LVMyir_y: + assert(MI.getOperand(0).getReg() == MI.getOperand(3).getReg() && + "LVMyir_y has different register in 3rd operand"); + BuildMI(*MBB, MI, DL, get(VE::LVMir_m)) + .addDef(VMX) + .addImm(Imm) + .addReg(Src, getKillRegState(KillSrc)) + .addReg(VMX); + break; + case VE::LVMyim_y: + assert(MI.getOperand(0).getReg() == MI.getOperand(3).getReg() && + "LVMyim_y has different register in 3rd operand"); + BuildMI(*MBB, MI, DL, get(VE::LVMim_m)) + .addDef(VMX) + .addImm(Imm) + .addImm(MImm) + .addReg(VMX); + break; + } + MI.eraseFromParent(); + return true; + } + case VE::SVMyi: { + Register Dest = MI.getOperand(0).getReg(); + Register VMZu = getVM512Upper(MI.getOperand(1).getReg()); + Register VMZl = getVM512Lower(MI.getOperand(1).getReg()); + bool KillSrc = MI.getOperand(1).isKill(); + int64_t Imm = MI.getOperand(2).getImm(); + Register VMZ = VMZl; + if (Imm >= 4) { + VMZ = VMZu; + Imm -= 4; + } + MachineBasicBlock *MBB = MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + MachineInstrBuilder MIB = + BuildMI(*MBB, MI, DL, get(VE::SVMmi), Dest).addReg(VMZ).addImm(Imm); + MachineInstr *Inst = MIB.getInstr(); + MI.eraseFromParent(); + if (KillSrc) { + const TargetRegisterInfo *TRI = &getRegisterInfo(); + Inst->addRegisterKilled(MI.getOperand(1).getReg(), TRI, true); + } + return true; + } + case VE::VFMKyal: + case VE::VFMKynal: + case VE::VFMKWyvl: + case VE::VFMKWyvyl: + case VE::VFMKSyvl: + case VE::VFMKSyvyl: + expandPseudoVFMK(*this, MI); } return false; } @@ -586,8 +1058,8 @@ bool VEInstrInfo::expandGetStackTopPseudo(MachineInstr &MI) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); const VEFrameLowering &TFL = *STI.getFrameLowering(); - // The VE ABI requires a reserved 176 bytes area at the top - // of stack as described in VESubtarget.cpp. So, we adjust it here. + // The VE ABI requires a reserved area at the top of stack as described + // in VEFrameLowering.cpp. So, we adjust it here. unsigned NumBytes = STI.getAdjustedFrameSize(0); // Also adds the size of parameter area. diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.h index 7b6662df1d60..ed1f49182150 100644 --- a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.h @@ -23,6 +23,31 @@ namespace llvm { class VESubtarget; +/// VEII - This namespace holds all of the Aurora VE target-specific +/// per-instruction flags. These must match the corresponding definitions in +/// VEInstrFormats.td. +namespace VEII { +enum { + // Aurora VE Instruction Flags. These flags describe the characteristics of + // the Aurora VE instructions for vector handling. + + /// VE_Vector - This instruction is Vector Instruction. + VE_Vector = 0x1, + + /// VE_VLInUse - This instruction has a vector register in its operands. + VE_VLInUse = 0x2, + + /// VE_VLMask/Shift - This is a bitmask that selects the index number where + /// an instruction holds vector length informatio (0 to 6, 7 means undef).n + VE_VLShift = 2, + VE_VLMask = 0x07 << VE_VLShift, +}; + +#define HAS_VLINDEX(TSF) ((TSF)&VEII::VE_VLInUse) +#define GET_VLINDEX(TSF) \ + (HAS_VLINDEX(TSF) ? (int)(((TSF)&VEII::VE_VLMask) >> VEII::VE_VLShift) : -1) +} // end namespace VEII + class VEInstrInfo : public VEGenInstrInfo { const VERegisterInfo RI; virtual void anchor(); @@ -75,6 +100,13 @@ public: const TargetRegisterInfo *TRI) const override; /// } Stack Spill & Reload + /// Optimization { + + bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, + MachineRegisterInfo *MRI) const override; + + /// } Optimization + Register getGlobalBaseReg(MachineFunction *MF) const; // Lower pseudo instructions after register allocation. diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.td index 8500f8ef1292..b6862cf7b30d 100644 --- a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.td @@ -48,7 +48,7 @@ def LO7 : SDNodeXForm<imm, [{ SDLoc(N), MVT::i32); }]>; def MIMM : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(convMImmVal(getImmVal(N)), + return CurDAG->getTargetConstant(val2MImm(getImmVal(N)), SDLoc(N), MVT::i32); }]>; def LO32 : SDNodeXForm<imm, [{ @@ -66,7 +66,7 @@ def LO7FP : SDNodeXForm<fpimm, [{ return CurDAG->getTargetConstant(SignExtend32(Val, 7), SDLoc(N), MVT::i32); }]>; def MIMMFP : SDNodeXForm<fpimm, [{ - return CurDAG->getTargetConstant(convMImmVal(getFpImmVal(N)), + return CurDAG->getTargetConstant(val2MImm(getFpImmVal(N)), SDLoc(N), MVT::i32); }]>; def LOFP32 : SDNodeXForm<fpimm, [{ @@ -157,6 +157,15 @@ def uimm3 : Operand<i32>, PatLeaf<(imm), [{ let ParserMatchClass = UImm3AsmOperand; } +// uimm4 - Generic immediate value. +def UImm4AsmOperand : AsmOperandClass { + let Name = "UImm4"; +} +def uimm4 : Operand<i32>, PatLeaf<(imm), [{ + return isUInt<4>(N->getZExtValue()); }], ULO7> { + let ParserMatchClass = UImm4AsmOperand; +} + // uimm6 - Generic immediate value. def UImm6AsmOperand : AsmOperandClass { let Name = "UImm6"; @@ -196,6 +205,12 @@ def mimm : Operand<i32>, PatLeaf<(imm), [{ let PrintMethod = "printMImmOperand"; } +// zerofp - Generic fp immediate zero value. +def zerofp : Operand<i32>, PatLeaf<(fpimm), [{ + return getFpImmVal(N) == 0; }]> { + let ParserMatchClass = ZeroAsmOperand; +} + // simm7fp - Generic fp immediate value. def simm7fp : Operand<i32>, PatLeaf<(fpimm), [{ return isInt<7>(getFpImmVal(N)); @@ -230,6 +245,7 @@ def fplomsbzero : PatLeaf<(fpimm), [{ return (getFpImmVal(N) & 0x80000000) == 0; }]>; def fplozero : PatLeaf<(fpimm), [{ return (getFpImmVal(N) & 0xffffffff) == 0; }]>; +def nonzero : PatLeaf<(imm), [{ return N->getSExtValue() !=0 ; }]>; def CCSIOp : PatLeaf<(cond), [{ switch (N->get()) { @@ -430,6 +446,17 @@ def retflag : SDNode<"VEISD::RET_FLAG", SDTNone, def getGOT : Operand<iPTR>; +def VEeh_sjlj_setjmp: SDNode<"VEISD::EH_SJLJ_SETJMP", + SDTypeProfile<1, 1, [SDTCisInt<0>, + SDTCisPtrTy<1>]>, + [SDNPHasChain, SDNPSideEffect]>; +def VEeh_sjlj_longjmp: SDNode<"VEISD::EH_SJLJ_LONGJMP", + SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, + [SDNPHasChain, SDNPSideEffect]>; +def VEeh_sjlj_setup_dispatch: SDNode<"VEISD::EH_SJLJ_SETUP_DISPATCH", + SDTypeProfile<0, 0, []>, + [SDNPHasChain, SDNPSideEffect]>; + // GETFUNPLT for PIC def GetFunPLT : SDNode<"VEISD::GETFUNPLT", SDTIntUnaryOp>; @@ -442,6 +469,16 @@ def GetTLSAddr : SDNode<"VEISD::GETTLSADDR", SDT_SPCall, def GetStackTop : SDNode<"VEISD::GETSTACKTOP", SDTNone, [SDNPHasChain, SDNPSideEffect]>; +// MEMBARRIER +def MemBarrier : SDNode<"VEISD::MEMBARRIER", SDTNone, + [SDNPHasChain, SDNPSideEffect]>; + +// TS1AM +def SDT_TS1AM : SDTypeProfile<1, 3, [SDTCisSameAs<0, 3>, SDTCisPtrTy<1>, + SDTCisVT<2, i32>, SDTCisInt<3>]>; +def ts1am : SDNode<"VEISD::TS1AM", SDT_TS1AM, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, + SDNPMemOperand]>; //===----------------------------------------------------------------------===// // VE Flag Conditions @@ -497,7 +534,8 @@ multiclass RRbm<string opcStr, bits<8>opc, RegisterClass RCo, ValueType Tyo, RegisterClass RCi, ValueType Tyi, SDPatternOperator OpNode = null_frag, - Operand immOp = simm7, Operand mOp = mimm> { + Operand immOp = simm7, Operand mOp = mimm, + bit MoveImm = 0> { def rr : RR<opc, (outs RCo:$sx), (ins RCi:$sy, RCi:$sz), !strconcat(opcStr, " $sx, $sy, $sz"), [(set Tyo:$sx, (OpNode Tyi:$sy, Tyi:$sz))]>; @@ -514,7 +552,12 @@ multiclass RRbm<string opcStr, bits<8>opc, let cy = 0, cz = 0 in def im : RR<opc, (outs RCo:$sx), (ins immOp:$sy, mOp:$sz), !strconcat(opcStr, " $sx, $sy, $sz"), - [(set Tyo:$sx, (OpNode (Tyi immOp:$sy), (Tyi mOp:$sz)))]>; + [(set Tyo:$sx, (OpNode (Tyi immOp:$sy), (Tyi mOp:$sz)))]> { + // VE uses ORim as a move immediate instruction, so declare it here. + // An instruction declared as MoveImm will be optimized in FoldImmediate + // later. + let isMoveImm = MoveImm; + } } // Multiclass for non-commutative RR type instructions @@ -546,8 +589,8 @@ multiclass RRNCbm<string opcStr, bits<8>opc, multiclass RRm<string opcStr, bits<8>opc, RegisterClass RC, ValueType Ty, SDPatternOperator OpNode = null_frag, - Operand immOp = simm7, Operand mOp = mimm> : - RRbm<opcStr, opc, RC, Ty, RC, Ty, OpNode, immOp, mOp>; + Operand immOp = simm7, Operand mOp = mimm, bit MoveImm = 0> : + RRbm<opcStr, opc, RC, Ty, RC, Ty, OpNode, immOp, mOp, MoveImm>; // Generic RR multiclass for non-commutative instructions with 2 arguments. // e.g. SUBUL, SUBUW, SUBSWSX, and etc. @@ -775,10 +818,10 @@ multiclass BCbpfm<string opcStr, string cmpStr, bits<8> opc, dag cond, let bpf = 0 /* NONE */ in def "" : CF<opc, (outs), !con(cond, (ins ADDR:$addr)), !strconcat(opcStr, " ", cmpStr, "$addr")>; - let bpf = 2 /* NOT TaKEN */ in + let bpf = 2 /* NOT TAKEN */ in def _nt : CF<opc, (outs), !con(cond, (ins ADDR:$addr)), !strconcat(opcStr, ".nt ", cmpStr, "$addr")>; - let bpf = 3 /* TaKEN */ in + let bpf = 3 /* TAKEN */ in def _t : CF<opc, (outs), !con(cond, (ins ADDR:$addr)), !strconcat(opcStr, ".t ", cmpStr, "$addr")>; } @@ -807,18 +850,25 @@ multiclass BCRbpfm<string opcStr, string cmpStr, bits<8> opc, dag cond> { let bpf = 0 /* NONE */ in def "" : CF<opc, (outs), !con(cond, (ins brtarget32:$imm32)), !strconcat(opcStr, " ", cmpStr, "$imm32")>; - let bpf = 2 /* NOT TaKEN */ in + let bpf = 2 /* NOT TAKEN */ in def _nt : CF<opc, (outs), !con(cond, (ins brtarget32:$imm32)), !strconcat(opcStr, ".nt ", cmpStr, "$imm32")>; - let bpf = 3 /* TaKEN */ in + let bpf = 3 /* TAKEN */ in def _t : CF<opc, (outs), !con(cond, (ins brtarget32:$imm32)), !strconcat(opcStr, ".t ", cmpStr, "$imm32")>; } multiclass BCRm<string opcStr, string opcStrAt, string opcStrAf, bits<8> opc, - RegisterClass RC, Operand immOp> { + RegisterClass RC, Operand immOp, Operand zeroOp> { defm rr : BCRbpfm<opcStr, "$sy, $sz, ", opc, (ins CCOp:$cf, RC:$sy, RC:$sz)>; let cy = 0 in - defm ir : BCRbpfm<opcStr, "$sy, $sz, ", opc, (ins CCOp:$cf, immOp:$sy, RC:$sz)>; + defm ir : BCRbpfm<opcStr, "$sy, $sz, ", opc, (ins CCOp:$cf, immOp:$sy, + RC:$sz)>; + let cz = 0 in + defm rz : BCRbpfm<opcStr, "$sy, $sz, ", opc, (ins CCOp:$cf, RC:$sy, + zeroOp:$sz)>; + let cy = 0, cz = 0 in + defm iz : BCRbpfm<opcStr, "$sy, $sz, ", opc, (ins CCOp:$cf, immOp:$sy, + zeroOp:$sz)>; let cy = 0, sy = 0, cz = 0, sz = 0, cf = 15 /* AT */, isBarrier = 1 in defm a : BCRbpfm<opcStrAt, "", opc, (ins)>; let cy = 0, sy = 0, cz = 0, sz = 0, cf = 0 /* AF */ in @@ -898,7 +948,7 @@ multiclass SHMm<string opcStr, bits<8> opc, RegisterClass RC> { //----------------------------------------------------------------------------- // Multiclass for generic RM instructions -multiclass RMm<string opcStr, bits<8>opc, RegisterClass RC> { +multiclass RMm<string opcStr, bits<8>opc, RegisterClass RC, bit MoveImm = 0> { def rri : RM<opc, (outs RC:$dest), (ins MEMrri:$addr), !strconcat(opcStr, " $dest, $addr"), []>; let cy = 0 in @@ -909,36 +959,27 @@ multiclass RMm<string opcStr, bits<8>opc, RegisterClass RC> { !strconcat(opcStr, " $dest, $addr"), []>; let cy = 0, cz = 0 in def zii : RM<opc, (outs RC:$dest), (ins MEMzii:$addr), - !strconcat(opcStr, " $dest, $addr"), []>; + !strconcat(opcStr, " $dest, $addr"), []> { + // VE uses LEAzii and LEASLzii as a move immediate instruction, so declare + // it here. An instruction declared as MoveImm will be optimized in + // FoldImmediate later. + let isMoveImm = MoveImm; + } } // Section 8.2.1 - LEA -let cx = 0, DecoderMethod = "DecodeLoadI64" in -defm LEA : RMm<"lea", 0x06, I64>; -let cx = 1, DecoderMethod = "DecodeLoadI64" in -defm LEASL : RMm<"lea.sl", 0x06, I64>; -let cx = 0, DecoderMethod = "DecodeLoadI32", isCodeGenOnly = 1 in -defm LEA32 : RMm<"lea", 0x06, I32>; +let isReMaterializable = 1, isAsCheapAsAMove = 1, + DecoderMethod = "DecodeLoadI64" in { + let cx = 0 in defm LEA : RMm<"lea", 0x06, I64, /* MoveImm */ 1>; + let cx = 1 in defm LEASL : RMm<"lea.sl", 0x06, I64, /* MoveImm */ 1>; +} +// LEA basic patterns. +// Need to be defined here to prioritize LEA over ADX. def : Pat<(iPTR ADDRrri:$addr), (LEArri MEMrri:$addr)>; def : Pat<(iPTR ADDRrii:$addr), (LEArii MEMrii:$addr)>; def : Pat<(add I64:$base, simm32:$disp), (LEArii $base, 0, (LO32 $disp))>; def : Pat<(add I64:$base, lozero:$disp), (LEASLrii $base, 0, (HI32 $disp))>; -def : Pat<(add I32:$base, simm32:$disp), - (LEA32rii (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $base, sub_i32), 0, - (LO32 $disp))>; - -def lea_add : PatFrags<(ops node:$base, node:$idx, node:$disp), - [(add (add node:$base, node:$idx), node:$disp), - (add (add node:$base, node:$disp), node:$idx)]>; -def : Pat<(lea_add I64:$base, simm7:$idx, simm32:$disp), - (LEArii $base, (LO7 $idx), (LO32 $disp))>; -def : Pat<(lea_add I64:$base, I64:$idx, simm32:$disp), - (LEArri $base, $idx, (LO32 $disp))>; -def : Pat<(lea_add I64:$base, simm7:$idx, lozero:$disp), - (LEASLrii $base, (LO7 $idx), (HI32 $disp))>; -def : Pat<(lea_add I64:$base, I64:$idx, lozero:$disp), - (LEASLrri $base, $idx, (HI32 $disp))>; // Multiclass for load instructions. let mayLoad = 1, hasSideEffects = 0 in @@ -991,6 +1032,13 @@ defm LD1BSX : LOADm<"ld1b.sx", 0x05, I32, i32, sextloadi8>; let cx = 1, DecoderMethod = "DecodeLoadI32" in defm LD1BZX : LOADm<"ld1b.zx", 0x05, I32, i32, zextloadi8>; +// LDQ pseudo instructions +let mayLoad = 1, hasSideEffects = 0 in { + def LDQrii : Pseudo<(outs F128:$dest), (ins MEMrii:$addr), + "# pseudo ldq $dest, $addr", + [(set f128:$dest, (load ADDRrii:$addr))]>; +} + // Multiclass for store instructions. let mayStore = 1 in multiclass STOREm<string opcStr, bits<8> opc, RegisterClass RC, ValueType Ty, @@ -1036,6 +1084,13 @@ defm ST2B : STOREm<"st2b", 0x14, I32, i32, truncstorei16>; let DecoderMethod = "DecodeStoreI32" in defm ST1B : STOREm<"st1b", 0x15, I32, i32, truncstorei8>; +// STQ pseudo instructions +let mayStore = 1, hasSideEffects = 0 in { + def STQrii : Pseudo<(outs), (ins MEMrii:$addr, F128:$sx), + "# pseudo stq $sx, $addr", + [(store f128:$sx, ADDRrii:$addr)]>; +} + // Section 8.2.12 - DLDS let DecoderMethod = "DecodeLoadI64" in defm DLD : LOADm<"dld", 0x09, I64, i64, load>; @@ -1074,9 +1129,9 @@ defm ATMAM : RRCASm<"atmam", 0x53, I64, i64, uimm0to2>; // Section 8.2.20 - CAS (Compare and Swap) let DecoderMethod = "DecodeCASI64" in -defm CASL : RRCASm<"cas.l", 0x62, I64, i64, simm7>; +defm CASL : RRCASm<"cas.l", 0x62, I64, i64, simm7, atomic_cmp_swap_64>; let DecoderMethod = "DecodeCASI32", cx = 1 in -defm CASW : RRCASm<"cas.w", 0x62, I32, i32, simm7>; +defm CASW : RRCASm<"cas.w", 0x62, I32, i32, simm7, atomic_cmp_swap_32>; //----------------------------------------------------------------------------- // Section 8.3 - Transfer Control Instructions @@ -1106,6 +1161,8 @@ def SVOB : RR<0x30, (outs), (ins), "svob">; // Section 8.4 - Fixed-point Operation Instructions //----------------------------------------------------------------------------- +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { + // Section 8.4.1 - ADD (Add) defm ADDUL : RRm<"addu.l", 0x48, I64, i64>; let cx = 1 in defm ADDUW : RRm<"addu.w", 0x48, I32, i32>; @@ -1128,6 +1185,8 @@ let cx = 1 in defm SUBSWZX : RRNCm<"subs.w.zx", 0x5A, I32, i32>; // Section 8.4.6 - SBX (Subtract) defm SUBSL : RRNCm<"subs.l", 0x5B, I64, i64, sub>; +} // isReMaterializable, isAsCheapAsAMove + // Section 8.4.7 - MPY (Multiply) defm MULUL : RRm<"mulu.l", 0x49, I64, i64>; let cx = 1 in defm MULUW : RRm<"mulu.w", 0x49, I32, i32>; @@ -1153,6 +1212,8 @@ let cx = 1 in defm DIVSWZX : RRNCm<"divs.w.zx", 0x7B, I32, i32>; // Section 8.4.13 - DVX (Divide) defm DIVSL : RRNCm<"divs.l", 0x7F, I64, i64, sdiv>; +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { + // Section 8.4.14 - CMP (Compare) defm CMPUL : RRNCm<"cmpu.l", 0x55, I64, i64>; let cx = 1 in defm CMPUW : RRNCm<"cmpu.w", 0x55, I32, i32>; @@ -1175,45 +1236,66 @@ let cx = 1, cw = 1 in defm MINSWZX : RRm<"mins.w.zx", 0x78, I32, i32>; defm MAXSL : RRm<"maxs.l", 0x68, I64, i64>; let cw = 1 in defm MINSL : RRm<"mins.l", 0x68, I64, i64>; +} // isReMaterializable, isAsCheapAsAMove + //----------------------------------------------------------------------------- // Section 8.5 - Logical Operation Instructions //----------------------------------------------------------------------------- +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { + // Section 8.5.1 - AND (AND) defm AND : RRm<"and", 0x44, I64, i64, and>; -let isCodeGenOnly = 1 in defm AND32 : RRm<"and", 0x44, I32, i32, and>; // Section 8.5.2 - OR (OR) -defm OR : RRm<"or", 0x45, I64, i64, or>; -let isCodeGenOnly = 1 in defm OR32 : RRm<"or", 0x45, I32, i32, or>; +defm OR : RRm<"or", 0x45, I64, i64, or, simm7, mimm, /* MoveImm */ 1>; // Section 8.5.3 - XOR (Exclusive OR) defm XOR : RRm<"xor", 0x46, I64, i64, xor>; -let isCodeGenOnly = 1 in defm XOR32 : RRm<"xor", 0x46, I32, i32, xor>; // Section 8.5.4 - EQV (Equivalence) defm EQV : RRm<"eqv", 0x47, I64, i64>; +} // isReMaterializable, isAsCheapAsAMove + // Section 8.5.5 - NND (Negate AND) def and_not : PatFrags<(ops node:$x, node:$y), [(and (not node:$x), node:$y)]>; +let isReMaterializable = 1, isAsCheapAsAMove = 1 in defm NND : RRNCm<"nnd", 0x54, I64, i64, and_not>; // Section 8.5.6 - MRG (Merge) defm MRG : RRMRGm<"mrg", 0x56, I64, i64>; // Section 8.5.7 - LDZ (Leading Zero Count) -defm LDZ : RRI1m<"ldz", 0x67, I64, i64, ctlz>; +def ctlz_pat : PatFrags<(ops node:$src), + [(ctlz node:$src), + (ctlz_zero_undef node:$src)]>; +let isReMaterializable = 1, isAsCheapAsAMove = 1 in +defm LDZ : RRI1m<"ldz", 0x67, I64, i64, ctlz_pat>; // Section 8.5.8 - PCNT (Population Count) defm PCNT : RRI1m<"pcnt", 0x38, I64, i64, ctpop>; // Section 8.5.9 - BRV (Bit Reverse) +let isReMaterializable = 1, isAsCheapAsAMove = 1 in defm BRV : RRI1m<"brv", 0x39, I64, i64, bitreverse>; // Section 8.5.10 - BSWP (Byte Swap) +let isReMaterializable = 1, isAsCheapAsAMove = 1 in defm BSWP : RRSWPm<"bswp", 0x2B, I64, i64>; +def : Pat<(i64 (bswap i64:$src)), + (BSWPri $src, 0)>; +def : Pat<(i64 (bswap (i64 mimm:$src))), + (BSWPmi (MIMM $src), 0)>; +def : Pat<(i32 (bswap i32:$src)), + (EXTRACT_SUBREG + (BSWPri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $src, sub_i32), 1), + sub_i32)>; +def : Pat<(i32 (bswap (i32 mimm:$src))), + (EXTRACT_SUBREG (BSWPmi (MIMM $src), 1), sub_i32)>; + // Section 8.5.11 - CMOV (Conditional Move) let cw = 0, cw2 = 0 in defm CMOVL : RRCMOVm<"cmov.l.${cfw}", 0x3B, I64, i64>; let cw = 1, cw2 = 0 in defm CMOVW : RRCMOVm<"cmov.w.${cfw}", 0x3B, I32, i32>; @@ -1229,17 +1311,21 @@ def : MnemonicAlias<"cmov.s", "cmov.s.at">; //----------------------------------------------------------------------------- // Section 8.6.1 - SLL (Shift Left Logical) +let isReMaterializable = 1, isAsCheapAsAMove = 1 in defm SLL : RRIm<"sll", 0x65, I64, i64, shl>; // Section 8.6.2 - SLD (Shift Left Double) defm SLD : RRILDm<"sld", 0x64, I64, i64>; // Section 8.6.3 - SRL (Shift Right Logical) +let isReMaterializable = 1, isAsCheapAsAMove = 1 in defm SRL : RRIm<"srl", 0x75, I64, i64, srl>; // Section 8.6.4 - SRD (Shift Right Double) defm SRD : RRIRDm<"srd", 0x74, I64, i64>; +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { + // Section 8.6.5 - SLA (Shift Left Arithmetic) defm SLAWSX : RRIm<"sla.w.sx", 0x66, I32, i32, shl>; let cx = 1 in defm SLAWZX : RRIm<"sla.w.zx", 0x66, I32, i32>; @@ -1254,6 +1340,8 @@ let cx = 1 in defm SRAWZX : RRIm<"sra.w.zx", 0x76, I32, i32>; // Section 8.6.8 - SRAX (Shift Right Arithmetic) defm SRAL : RRIm<"sra.l", 0x77, I64, i64, sra>; +} // isReMaterializable, isAsCheapAsAMove + def : Pat<(i32 (srl i32:$src, (i32 simm7:$val))), (EXTRACT_SUBREG (SRLri (ANDrm (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $src, sub_i32), !add(32, 64)), imm:$val), sub_i32)>; @@ -1302,13 +1390,13 @@ let cw = 1, cx = 1 in defm FMINS : RRFm<"fmin.s", 0x3E, F32, f32, fminnum, simm7fp, mimmfp32>; // Section 8.7.7 - FAQ (Floating Add Quadruple) -defm FADDQ : RRFm<"fadd.q", 0x6C, F128, f128>; +defm FADDQ : RRFm<"fadd.q", 0x6C, F128, f128, fadd>; // Section 8.7.8 - FSQ (Floating Subtract Quadruple) -defm FSUBQ : RRFm<"fsub.q", 0x7C, F128, f128>; +defm FSUBQ : RRFm<"fsub.q", 0x7C, F128, f128, fsub>; // Section 8.7.9 - FMQ (Floating Subtract Quadruple) -defm FMULQ : RRFm<"fmul.q", 0x6D, F128, f128>; +defm FMULQ : RRFm<"fmul.q", 0x6D, F128, f128, fmul>; // Section 8.7.10 - FCQ (Floating Compare Quadruple) defm FCMPQ : RRNCbm<"fcmp.q", 0x7D, I64, f64, F128, f128, null_frag, simm7fp, @@ -1339,17 +1427,17 @@ defm CVTDL : CVTm<"cvt.d.l", 0x5F, I64, f64, I64, i64, sint_to_fp>; // Section 8.7.15 - CVS (Convert to Single-format) defm CVTSD : CVTm<"cvt.s.d", 0x1F, F32, f32, I64, f64, fpround>; let cx = 1 in -defm CVTSQ : CVTm<"cvt.s.q", 0x1F, F32, f32, F128, f128>; +defm CVTSQ : CVTm<"cvt.s.q", 0x1F, F32, f32, F128, f128, fpround>; // Section 8.7.16 - CVD (Convert to Double-format) defm CVTDS : CVTm<"cvt.d.s", 0x0F, I64, f64, F32, f32, fpextend>; let cx = 1 in -defm CVTDQ : CVTm<"cvt.d.q", 0x0F, I64, f64, F128, f128>; +defm CVTDQ : CVTm<"cvt.d.q", 0x0F, I64, f64, F128, f128, fpround>; // Section 8.7.17 - CVQ (Convert to Single-format) -defm CVTQD : CVTm<"cvt.q.d", 0x2D, F128, f128, I64, f64>; +defm CVTQD : CVTm<"cvt.q.d", 0x2D, F128, f128, I64, f64, fpextend>; let cx = 1 in -defm CVTQS : CVTm<"cvt.q.s", 0x2D, F128, f128, F32, f32>; +defm CVTQS : CVTm<"cvt.q.s", 0x2D, F128, f128, F32, f32, fpextend>; //----------------------------------------------------------------------------- // Section 8.8 - Branch instructions @@ -1378,13 +1466,13 @@ defm BCFS : BCm<"b${cond}.s", "b.s", "baf.s", 0x1C, F32, simm7fp>; // Section 8.8.4 - BCR (Branch on Condition Relative) let cx = 0, cx2 = 0 in -defm BRCFL : BCRm<"br${cf}.l", "br.l", "braf.l", 0x18, I64, simm7>; +defm BRCFL : BCRm<"br${cf}.l", "br.l", "braf.l", 0x18, I64, simm7, zero>; let cx = 1, cx2 = 0 in -defm BRCFW : BCRm<"br${cf}.w", "br.w", "braf.w", 0x18, I32, simm7>; +defm BRCFW : BCRm<"br${cf}.w", "br.w", "braf.w", 0x18, I32, simm7, zero>; let cx = 0, cx2 = 1 in -defm BRCFD : BCRm<"br${cf}.d", "br.d", "braf.d", 0x18, I64, simm7fp>; +defm BRCFD : BCRm<"br${cf}.d", "br.d", "braf.d", 0x18, I64, simm7fp, zerofp>; let cx = 1, cx2 = 1 in -defm BRCFS : BCRm<"br${cf}.s", "br.s", "braf.s", 0x18, F32, simm7fp>; +defm BRCFS : BCRm<"br${cf}.s", "br.s", "braf.s", 0x18, F32, simm7fp, zerofp>; // Section 8.8.5 - BSIC (Branch and Save IC) let isCall = 1, hasSideEffects = 0, DecoderMethod = "DecodeCall" in @@ -1481,11 +1569,23 @@ defm SHMB : SHMm<"shm.b", 0x31, I64>; // Pattern Matchings //===----------------------------------------------------------------------===// +// Basic cast between registers. This is often used in ISel patterns, so make +// them as OutPatFrag. +def i2l : OutPatFrag<(ops node:$exp), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $exp, sub_i32)>; +def l2i : OutPatFrag<(ops node:$exp), + (EXTRACT_SUBREG $exp, sub_i32)>; +def f2l : OutPatFrag<(ops node:$exp), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $exp, sub_f32)>; +def l2f : OutPatFrag<(ops node:$exp), + (EXTRACT_SUBREG $exp, sub_f32)>; + // Small immediates. -def : Pat<(i32 simm7:$val), (OR32im (LO7 $val), 0)>; +def : Pat<(i32 simm7:$val), (EXTRACT_SUBREG (ORim (LO7 $val), 0), sub_i32)>; def : Pat<(i64 simm7:$val), (ORim (LO7 $val), 0)>; // Medium immediates. -def : Pat<(i32 simm32:$val), (LEA32zii 0, 0, (LO32 $val))>; +def : Pat<(i32 simm32:$val), + (EXTRACT_SUBREG (LEAzii 0, 0, (LO32 $val)), sub_i32)>; def : Pat<(i64 simm32:$val), (LEAzii 0, 0, (LO32 $val))>; def : Pat<(i64 uimm32:$val), (ANDrm (LEAzii 0, 0, (LO32 $val)), !add(32, 64))>; // Arbitrary immediates. @@ -1497,6 +1597,54 @@ def : Pat<(i64 imm:$val), (LEASLrii (ANDrm (LEAzii 0, 0, (LO32 imm:$val)), !add(32, 64)), 0, (HI32 imm:$val))>; +// LEA patterns +def lea_add : PatFrags<(ops node:$base, node:$idx, node:$disp), + [(add (add node:$base, node:$idx), node:$disp), + (add (add node:$base, node:$disp), node:$idx), + (add node:$base, (add $idx, $disp))]>; +def : Pat<(lea_add I64:$base, simm7:$idx, simm32:$disp), + (LEArii $base, (LO7 $idx), (LO32 $disp))>; +def : Pat<(lea_add I64:$base, I64:$idx, simm32:$disp), + (LEArri $base, $idx, (LO32 $disp))>; +def : Pat<(lea_add I64:$base, simm7:$idx, lozero:$disp), + (LEASLrii $base, (LO7 $idx), (HI32 $disp))>; +def : Pat<(lea_add I64:$base, I64:$idx, lozero:$disp), + (LEASLrri $base, $idx, (HI32 $disp))>; + +// Address calculation patterns and optimizations +// +// Generate following instructions: +// 1. LEA %reg, label@LO32 +// AND %reg, %reg, (32)0 +// 2. LEASL %reg, label@HI32 +// 3. (LEA %reg, label@LO32) +// (AND %reg, %reg, (32)0) +// LEASL %reg, label@HI32(, %reg) +// 4. (LEA %reg, label@LO32) +// (AND %reg, %reg, (32)0) +// LEASL %reg, label@HI32(%reg, %got) +// +def velo_only : OutPatFrag<(ops node:$lo), + (ANDrm (LEAzii 0, 0, $lo), !add(32, 64))>; +def vehi_only : OutPatFrag<(ops node:$hi), + (LEASLzii 0, 0, $hi)>; +def vehi_lo : OutPatFrag<(ops node:$hi, node:$lo), + (LEASLrii $lo, 0, $hi)>; +def vehi_lo_imm : OutPatFrag<(ops node:$hi, node:$lo, node:$idx), + (LEASLrii $lo, $idx, $hi)>; +def vehi_baselo : OutPatFrag<(ops node:$base, node:$hi, node:$lo), + (LEASLrri $base, $lo, $hi)>; +foreach type = [ "tblockaddress", "tconstpool", "texternalsym", "tglobaladdr", + "tglobaltlsaddr", "tjumptable" ] in { + def : Pat<(VElo !cast<SDNode>(type):$lo), (velo_only $lo)>; + def : Pat<(VEhi !cast<SDNode>(type):$hi), (vehi_only $hi)>; + def : Pat<(add (VEhi !cast<SDNode>(type):$hi), I64:$lo), (vehi_lo $hi, $lo)>; + def : Pat<(add (add (VEhi !cast<SDNode>(type):$hi), I64:$lo), simm7:$val), + (vehi_lo_imm $hi, $lo, (LO7 $val))>; + def : Pat<(add I64:$base, (add (VEhi !cast<SDNode>(type):$hi), I64:$lo)), + (vehi_baselo $base, $hi, $lo)>; +} + // floating point def : Pat<(f32 fpimm:$val), (EXTRACT_SUBREG (LEASLzii 0, 0, (HIFP32 $val)), sub_f32)>; @@ -1526,8 +1674,8 @@ def : Pat<(sext_inreg I64:$src, i8), (SRALri (SLLri $src, 56), 56)>; def : Pat<(sext_inreg (i32 (trunc i64:$src)), i8), (EXTRACT_SUBREG (SRALri (SLLri $src, 56), 56), sub_i32)>; -def : Pat<(and (trunc i64:$src), 0xff), - (AND32rm (EXTRACT_SUBREG $src, sub_i32), !add(56, 64))>; +def : Pat<(i32 (and (trunc i64:$src), 0xff)), + (EXTRACT_SUBREG (ANDrm $src, !add(56, 64)), sub_i32)>; // Cast to i16 def : Pat<(sext_inreg I32:$src, i16), @@ -1536,28 +1684,34 @@ def : Pat<(sext_inreg I64:$src, i16), (SRALri (SLLri $src, 48), 48)>; def : Pat<(sext_inreg (i32 (trunc i64:$src)), i16), (EXTRACT_SUBREG (SRALri (SLLri $src, 48), 48), sub_i32)>; -def : Pat<(and (trunc i64:$src), 0xffff), - (AND32rm (EXTRACT_SUBREG $src, sub_i32), !add(48, 64))>; +def : Pat<(i32 (and (trunc i64:$src), 0xffff)), + (EXTRACT_SUBREG (ANDrm $src, !add(48, 64)), sub_i32)>; // Cast to i32 def : Pat<(i32 (trunc i64:$src)), - (ADDSWSXrm (EXTRACT_SUBREG $src, sub_i32), 0)>; -def : Pat<(i32 (fp_to_sint I64:$reg)), (CVTWDSXr RD_RZ, $reg)>; -def : Pat<(i32 (fp_to_sint F32:$reg)), (CVTWSSXr RD_RZ, $reg)>; + (EXTRACT_SUBREG (ANDrm $src, !add(32, 64)), sub_i32)>; +def : Pat<(i32 (fp_to_sint f32:$src)), (CVTWSSXr RD_RZ, $src)>; +def : Pat<(i32 (fp_to_sint f64:$src)), (CVTWDSXr RD_RZ, $src)>; +def : Pat<(i32 (fp_to_sint f128:$src)), (CVTWDSXr RD_RZ, (CVTDQr $src))>; // Cast to i64 -def : Pat<(sext_inreg I64:$src, i32), +def : Pat<(sext_inreg i64:$src, i32), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (ADDSWSXrm (EXTRACT_SUBREG $src, sub_i32), 0), sub_i32)>; -def : Pat<(i64 (sext i32:$sy)), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (ADDSWSXrm $sy, 0), sub_i32)>; -def : Pat<(i64 (zext i32:$sy)), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (ADDSWZXrm $sy, 0), sub_i32)>; -def : Pat<(i64 (fp_to_sint f32:$sy)), (CVTLDr RD_RZ, (CVTDSr $sy))>; -def : Pat<(i64 (fp_to_sint I64:$reg)), (CVTLDr RD_RZ, $reg)>; +def : Pat<(i64 (sext i32:$src)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (ADDSWSXrm $src, 0), sub_i32)>; +def : Pat<(i64 (zext i32:$src)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (ADDSWZXrm $src, 0), sub_i32)>; +def : Pat<(i64 (fp_to_sint f32:$src)), (CVTLDr RD_RZ, (CVTDSr $src))>; +def : Pat<(i64 (fp_to_sint f64:$src)), (CVTLDr RD_RZ, $src)>; +def : Pat<(i64 (fp_to_sint f128:$src)), (CVTLDr RD_RZ, (CVTDQr $src))>; // Cast to f32 -def : Pat<(f32 (sint_to_fp i64:$sy)), (CVTSDr (CVTDLr i64:$sy))>; +def : Pat<(f32 (sint_to_fp i64:$src)), (CVTSDr (CVTDLr i64:$src))>; + +// Cast to f128 +def : Pat<(f128 (sint_to_fp i32:$src)), (CVTQDr (CVTDWr $src))>; +def : Pat<(f128 (sint_to_fp i64:$src)), (CVTQDr (CVTDLr $src))>; def : Pat<(i64 (anyext i32:$sy)), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $sy, sub_i32)>; @@ -1625,29 +1779,150 @@ defm : TRUNC64m<truncstorei8, ST1Brri, ST1Brii, ST1Bzri, ST1Bzii>; defm : TRUNC64m<truncstorei16, ST2Brri, ST2Brii, ST2Bzri, ST2Bzii>; defm : TRUNC64m<truncstorei32, STLrri, STLrii, STLzri, ST1Bzii>; -// Address calculation and its optimization -def : Pat<(VEhi tglobaladdr:$in), (LEASLzii 0, 0, tglobaladdr:$in)>; -def : Pat<(VElo tglobaladdr:$in), - (ANDrm (LEAzii 0, 0, tglobaladdr:$in), !add(32, 64))>; -def : Pat<(add (VEhi tglobaladdr:$in1), (VElo tglobaladdr:$in2)), - (LEASLrii (ANDrm (LEAzii 0, 0, tglobaladdr:$in2), !add(32, 64)), 0, - (tglobaladdr:$in1))>; - -// GlobalTLS address calculation and its optimization -def : Pat<(VEhi tglobaltlsaddr:$in), (LEASLzii 0, 0, tglobaltlsaddr:$in)>; -def : Pat<(VElo tglobaltlsaddr:$in), - (ANDrm (LEAzii 0, 0, tglobaltlsaddr:$in), !add(32, 64))>; -def : Pat<(add (VEhi tglobaltlsaddr:$in1), (VElo tglobaltlsaddr:$in2)), - (LEASLrii (ANDrm (LEAzii 0, 0, tglobaltlsaddr:$in2), !add(32, 64)), 0, - (tglobaltlsaddr:$in1))>; - -// Address calculation and its optimization -def : Pat<(VEhi texternalsym:$in), (LEASLzii 0, 0, texternalsym:$in)>; -def : Pat<(VElo texternalsym:$in), - (ANDrm (LEAzii 0, 0, texternalsym:$in), !add(32, 64))>; -def : Pat<(add (VEhi texternalsym:$in1), (VElo texternalsym:$in2)), - (LEASLrii (ANDrm (LEAzii 0, 0, texternalsym:$in2), !add(32, 64)), 0, - (texternalsym:$in1))>; +// Atomic loads +multiclass ATMLDm<SDPatternOperator from, + SDPatternOperator torri, SDPatternOperator torii, + SDPatternOperator tozri, SDPatternOperator tozii> { + def : Pat<(from ADDRrri:$addr), (torri MEMrri:$addr)>; + def : Pat<(from ADDRrii:$addr), (torii MEMrii:$addr)>; + def : Pat<(from ADDRzri:$addr), (tozri MEMzri:$addr)>; + def : Pat<(from ADDRzii:$addr), (tozii MEMzii:$addr)>; +} +defm : ATMLDm<atomic_load_8, LD1BZXrri, LD1BZXrii, LD1BZXzri, LD1BZXzii>; +defm : ATMLDm<atomic_load_16, LD2BZXrri, LD2BZXrii, LD2BZXzri, LD2BZXzii>; +defm : ATMLDm<atomic_load_32, LDLZXrri, LDLZXrii, LDLZXzri, LDLZXzii>; +defm : ATMLDm<atomic_load_64, LDrri, LDrii, LDzri, LDzii>; + +// Optimized atomic loads with sext +multiclass SXATMLDm<SDPatternOperator from, Operand TY, + SDPatternOperator torri, SDPatternOperator torii, + SDPatternOperator tozri, SDPatternOperator tozii> { + def : Pat<(i64 (sext_inreg (i64 (anyext (from ADDRrri:$addr))), TY)), + (i2l (torri MEMrri:$addr))>; + def : Pat<(i64 (sext_inreg (i64 (anyext (from ADDRrii:$addr))), TY)), + (i2l (torii MEMrii:$addr))>; + def : Pat<(i64 (sext_inreg (i64 (anyext (from ADDRzri:$addr))), TY)), + (i2l (tozri MEMzri:$addr))>; + def : Pat<(i64 (sext_inreg (i64 (anyext (from ADDRzii:$addr))), TY)), + (i2l (tozii MEMzii:$addr))>; +} +multiclass SXATMLD32m<SDPatternOperator from, + SDPatternOperator torri, SDPatternOperator torii, + SDPatternOperator tozri, SDPatternOperator tozii> { + def : Pat<(i64 (sext (from ADDRrri:$addr))), + (i2l (torri MEMrri:$addr))>; + def : Pat<(i64 (sext (from ADDRrii:$addr))), + (i2l (torii MEMrii:$addr))>; + def : Pat<(i64 (sext (from ADDRzri:$addr))), + (i2l (tozri MEMzri:$addr))>; + def : Pat<(i64 (sext (from ADDRzii:$addr))), + (i2l (tozii MEMzii:$addr))>; +} +defm : SXATMLDm<atomic_load_8, i8, LD1BSXrri, LD1BSXrii, LD1BSXzri, LD1BSXzii>; +defm : SXATMLDm<atomic_load_16, i16, LD2BSXrri, LD2BSXrii, LD2BSXzri, + LD2BSXzii>; +defm : SXATMLD32m<atomic_load_32, LDLSXrri, LDLSXrii, LDLSXzri, LDLSXzii>; + +// Optimized atomic loads with zext +multiclass ZXATMLDm<SDPatternOperator from, Operand VAL, + SDPatternOperator torri, SDPatternOperator torii, + SDPatternOperator tozri, SDPatternOperator tozii> { + def : Pat<(i64 (and (anyext (from ADDRrri:$addr)), VAL)), + (i2l (torri MEMrri:$addr))>; + def : Pat<(i64 (and (anyext (from ADDRrii:$addr)), VAL)), + (i2l (torii MEMrii:$addr))>; + def : Pat<(i64 (and (anyext (from ADDRzri:$addr)), VAL)), + (i2l (tozri MEMzri:$addr))>; + def : Pat<(i64 (and (anyext (from ADDRzii:$addr)), VAL)), + (i2l (tozii MEMzii:$addr))>; +} +multiclass ZXATMLD32m<SDPatternOperator from, Operand VAL, + SDPatternOperator torri, SDPatternOperator torii, + SDPatternOperator tozri, SDPatternOperator tozii> { + def : Pat<(i64 (zext (from ADDRrri:$addr))), + (i2l (torri MEMrri:$addr))>; + def : Pat<(i64 (zext (from ADDRrii:$addr))), + (i2l (torii MEMrii:$addr))>; + def : Pat<(i64 (zext (from ADDRzri:$addr))), + (i2l (tozri MEMzri:$addr))>; + def : Pat<(i64 (zext (from ADDRzii:$addr))), + (i2l (tozii MEMzii:$addr))>; +} +defm : ZXATMLDm<atomic_load_8, 0xFF, LD1BZXrri, LD1BZXrii, LD1BZXzri, + LD1BZXzii>; +defm : ZXATMLDm<atomic_load_16, 0xFFFF, LD2BZXrri, LD2BZXrii, LD2BZXzri, + LD2BZXzii>; +defm : ZXATMLD32m<atomic_load_32, 0xFFFFFFFF, LDLZXrri, LDLZXrii, LDLZXzri, + LDLZXzii>; + +// Atomic stores +multiclass ATMSTm<SDPatternOperator from, ValueType ty, + SDPatternOperator torri, SDPatternOperator torii, + SDPatternOperator tozri, SDPatternOperator tozii> { + def : Pat<(from ADDRrri:$addr, ty:$src), (torri MEMrri:$addr, $src)>; + def : Pat<(from ADDRrii:$addr, ty:$src), (torii MEMrii:$addr, $src)>; + def : Pat<(from ADDRzri:$addr, ty:$src), (tozri MEMzri:$addr, $src)>; + def : Pat<(from ADDRzii:$addr, ty:$src), (tozii MEMzii:$addr, $src)>; +} +defm : ATMSTm<atomic_store_8, i32, ST1Brri, ST1Brii, ST1Bzri, ST1Bzii>; +defm : ATMSTm<atomic_store_16, i32, ST2Brri, ST2Brii, ST2Bzri, ST2Bzii>; +defm : ATMSTm<atomic_store_32, i32, STLrri, STLrii, STLzri, STLzii>; +defm : ATMSTm<atomic_store_64, i64, STrri, STrii, STzri, STzii>; + +// Optimized atomic stores with truncate +multiclass TRATMSTm<SDPatternOperator from, + ValueType ty, + SDPatternOperator torri, + SDPatternOperator torii, + SDPatternOperator tozri, + SDPatternOperator tozii> { + def : Pat<(from ADDRrri:$addr, (i32 (trunc i64:$src))), + (torri MEMrri:$addr, (EXTRACT_SUBREG $src, sub_i32))>; + def : Pat<(from ADDRrii:$addr, (i32 (trunc i64:$src))), + (torii MEMrii:$addr, (EXTRACT_SUBREG $src, sub_i32))>; + def : Pat<(from ADDRzri:$addr, (i32 (trunc i64:$src))), + (tozri MEMzri:$addr, (EXTRACT_SUBREG $src, sub_i32))>; + def : Pat<(from ADDRzii:$addr, (i32 (trunc i64:$src))), + (tozii MEMzii:$addr, (EXTRACT_SUBREG $src, sub_i32))>; +} +defm : TRATMSTm<atomic_store_8, i32, ST1Brri, ST1Brii, ST1Bzri, ST1Bzii>; +defm : TRATMSTm<atomic_store_16, i32, ST2Brri, ST2Brii, ST2Bzri, ST2Bzii>; +defm : TRATMSTm<atomic_store_32, i32, STLrri, STLrii, STLzri, STLzii>; + +// Atomic swaps +def : Pat<(i32 (ts1am i64:$src, i32:$flag, i32:$new)), + (TS1AMWrir $src, 0, $flag, $new)>; +def : Pat<(i32 (atomic_swap_32 ADDRri:$src, i32:$new)), + (TS1AMWrii MEMriRRM:$src, 15, $new)>; +def : Pat<(i64 (atomic_swap_64 ADDRri:$src, i64:$new)), + (TS1AMLrir MEMriRRM:$src, (LEAzii 0, 0, 255), i64:$new)>; + +//===----------------------------------------------------------------------===// +// SJLJ Exception handling patterns +//===----------------------------------------------------------------------===// + +let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, + usesCustomInserter = 1 in { + let isTerminator = 1 in + def EH_SjLj_LongJmp : Pseudo<(outs), (ins I64:$buf), + "# EH_SJLJ_LONGJMP", + [(VEeh_sjlj_longjmp I64:$buf)]>; + + def EH_SjLj_SetJmp : Pseudo<(outs I32:$dst), (ins I64:$buf), + "# EH_SJLJ_SETJMP", + [(set I32:$dst, (VEeh_sjlj_setjmp I64:$buf))]>; + + def EH_SjLj_Setup_Dispatch : Pseudo<(outs), (ins), "# EH_SJLJ_SETUP_DISPATCH", + [(VEeh_sjlj_setup_dispatch)]>; +} + +let isTerminator = 1, isBranch = 1, isCodeGenOnly = 1 in + def EH_SjLj_Setup : Pseudo<(outs), (ins brtarget32:$dst), + "# EH_SJlJ_SETUP $dst">; + +//===----------------------------------------------------------------------===// +// Branch related patterns +//===----------------------------------------------------------------------===// // Branches def : Pat<(br bb:$addr), (BRCFLa bb:$addr)>; @@ -1681,6 +1956,8 @@ multiclass BRCCFm<ValueType ty, SDPatternOperator BrOpNode1, } defm : BRCCFm<f32, BRCFSrr, BRCFSir>; defm : BRCCFm<f64, BRCFDrr, BRCFDir>; +def : Pat<(brcc cond:$cond, f128:$l, f128:$r, bb:$addr), + (BRCFDir (fcond2cc $cond), 0, (FCMPQrr $r, $l), bb:$addr)>; //===----------------------------------------------------------------------===// // Pseudo Instructions @@ -1737,53 +2014,42 @@ let Uses = [SX11], hasSideEffects = 1 in def GETSTACKTOP : Pseudo<(outs I64:$dst), (ins), "# GET STACK TOP", [(set iPTR:$dst, (GetStackTop))]>; + +// MEMBARRIER +let hasSideEffects = 1 in +def MEMBARRIER : Pseudo<(outs), (ins), "# MEMBARRIER", [(MemBarrier)] >; + +//===----------------------------------------------------------------------===// +// Other patterns +//===----------------------------------------------------------------------===// + // SETCC pattern matches // // CMP %tmp, lhs, rhs ; compare lhs and rhs // or %res, 0, (0)1 ; initialize by 0 // CMOV %res, (63)0, %tmp ; set 1 if %tmp is true -def : Pat<(i32 (setcc i64:$LHS, i64:$RHS, CCSIOp:$cond)), - (EXTRACT_SUBREG - (CMOVLrm (icond2cc $cond), - (CMPSLrr i64:$LHS, i64:$RHS), - !add(63, 64), - (ORim 0, 0)), sub_i32)>; - -def : Pat<(i32 (setcc i64:$LHS, i64:$RHS, CCUIOp:$cond)), - (EXTRACT_SUBREG - (CMOVLrm (icond2cc $cond), - (CMPULrr i64:$LHS, i64:$RHS), - !add(63, 64), - (ORim 0, 0)), sub_i32)>; - -def : Pat<(i32 (setcc i32:$LHS, i32:$RHS, CCSIOp:$cond)), - (EXTRACT_SUBREG - (CMOVWrm (icond2cc $cond), - (CMPSWSXrr i32:$LHS, i32:$RHS), - !add(63, 64), - (ORim 0, 0)), sub_i32)>; - -def : Pat<(i32 (setcc i32:$LHS, i32:$RHS, CCUIOp:$cond)), - (EXTRACT_SUBREG - (CMOVWrm (icond2cc $cond), - (CMPUWrr i32:$LHS, i32:$RHS), - !add(63, 64), - (ORim 0, 0)), sub_i32)>; - -def : Pat<(i32 (setcc f64:$LHS, f64:$RHS, cond:$cond)), - (EXTRACT_SUBREG - (CMOVDrm (fcond2cc $cond), - (FCMPDrr f64:$LHS, f64:$RHS), - !add(63, 64), - (ORim 0, 0)), sub_i32)>; - -def : Pat<(i32 (setcc f32:$LHS, f32:$RHS, cond:$cond)), - (EXTRACT_SUBREG - (CMOVSrm (fcond2cc $cond), - (FCMPSrr f32:$LHS, f32:$RHS), - !add(63, 64), - (ORim 0, 0)), sub_i32)>; +class setccrr<Instruction INSN> : + OutPatFrag<(ops node:$cond, node:$comp), + (EXTRACT_SUBREG + (INSN $cond, $comp, + !add(63, 64), // means (63)0 == 1 + (ORim 0, 0)), sub_i32)>; + +def : Pat<(i32 (setcc i32:$l, i32:$r, CCSIOp:$cond)), + (setccrr<CMOVWrm> (icond2cc $cond), (CMPSWSXrr $l, $r))>; +def : Pat<(i32 (setcc i32:$l, i32:$r, CCUIOp:$cond)), + (setccrr<CMOVWrm> (icond2cc $cond), (CMPUWrr $l, $r))>; +def : Pat<(i32 (setcc i64:$l, i64:$r, CCSIOp:$cond)), + (setccrr<CMOVLrm> (icond2cc $cond), (CMPSLrr $l, $r))>; +def : Pat<(i32 (setcc i64:$l, i64:$r, CCUIOp:$cond)), + (setccrr<CMOVLrm> (icond2cc $cond), (CMPULrr $l, $r))>; +def : Pat<(i32 (setcc f32:$l, f32:$r, cond:$cond)), + (setccrr<CMOVSrm> (fcond2cc $cond), (FCMPSrr $l, $r))>; +def : Pat<(i32 (setcc f64:$l, f64:$r, cond:$cond)), + (setccrr<CMOVDrm> (fcond2cc $cond), (FCMPDrr $l, $r))>; +def : Pat<(i32 (setcc f128:$l, f128:$r, cond:$cond)), + (setccrr<CMOVDrm> (fcond2cc $cond), (FCMPQrr $l, $r))>; // Special SELECTCC pattern matches // Use min/max for better performance. @@ -1824,152 +2090,171 @@ def : Pat<(i64 (selectcc i64:$LHS, i64:$RHS, i64:$LHS, i64:$RHS, SETLE)), def : Pat<(i32 (selectcc i32:$LHS, i32:$RHS, i32:$LHS, i32:$RHS, SETLE)), (MINSWSXrr $LHS, $RHS)>; +// Helper classes to construct cmov patterns for the ease. +// +// Hiding INSERT_SUBREG/EXTRACT_SUBREG patterns. + +class cmovrr<Instruction INSN> : + OutPatFrag<(ops node:$cond, node:$comp, node:$t, node:$f), + (INSN $cond, $comp, $t, $f)>; +class cmovrm<Instruction INSN, SDNodeXForm MOP = MIMM> : + OutPatFrag<(ops node:$cond, node:$comp, node:$t, node:$f), + (INSN $cond, $comp, (MOP $t), $f)>; +class cmov32rr<Instruction INSN, SubRegIndex sub_oty> : + OutPatFrag<(ops node:$cond, node:$comp, node:$t, node:$f), + (EXTRACT_SUBREG + (INSN $cond, $comp, + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_oty), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_oty)), + sub_oty)>; +class cmov32rm<Instruction INSN, SubRegIndex sub_oty, SDNodeXForm MOP = MIMM> : + OutPatFrag<(ops node:$cond, node:$comp, node:$t, node:$f), + (EXTRACT_SUBREG + (INSN $cond, $comp, + (MOP $t), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_oty)), + sub_oty)>; +class cmov128rr<Instruction INSN> : + OutPatFrag<(ops node:$cond, node:$comp, node:$t, node:$f), + (INSERT_SUBREG + (INSERT_SUBREG (f128 (IMPLICIT_DEF)), + (INSN $cond, $comp, + (EXTRACT_SUBREG $t, sub_odd), + (EXTRACT_SUBREG $f, sub_odd)), sub_odd), + (INSN $cond, $comp, + (EXTRACT_SUBREG $t, sub_even), + (EXTRACT_SUBREG $f, sub_even)), sub_even)>; + // Generic SELECTCC pattern matches // // CMP %tmp, %l, %r ; compare %l and %r // or %res, %f, (0)1 ; initialize by %f // CMOV %res, %t, %tmp ; set %t if %tmp is true -// selectcc for i64 result -def : Pat<(i64 (selectcc i32:$l, i32:$r, i64:$t, i64:$f, CCSIOp:$cond)), - (CMOVWrr (icond2cc $cond), (CMPSWSXrr $l, $r), $t, $f)>; -def : Pat<(i64 (selectcc i32:$l, i32:$r, i64:$t, i64:$f, CCUIOp:$cond)), - (CMOVWrr (icond2cc $cond), (CMPUWrr $l, $r), $t, $f)>; -def : Pat<(i64 (selectcc i64:$l, i64:$r, i64:$t, i64:$f, CCSIOp:$cond)), - (CMOVLrr (icond2cc $cond), (CMPSLrr $l, $r), $t, $f)>; -def : Pat<(i64 (selectcc i64:$l, i64:$r, i64:$t, i64:$f, CCUIOp:$cond)), - (CMOVLrr (icond2cc $cond), (CMPULrr $l, $r), $t, $f)>; -def : Pat<(i64 (selectcc f32:$l, f32:$r, i64:$t, i64:$f, cond:$cond)), - (CMOVSrr (fcond2cc $cond), (FCMPSrr $l, $r), $t, $f)>; -def : Pat<(i64 (selectcc f64:$l, f64:$r, i64:$t, i64:$f, cond:$cond)), - (CMOVDrr (fcond2cc $cond), (FCMPDrr $l, $r), $t, $f)>; - -// selectcc for i32 result def : Pat<(i32 (selectcc i32:$l, i32:$r, i32:$t, i32:$f, CCSIOp:$cond)), - (EXTRACT_SUBREG - (CMOVWrr (icond2cc $cond), - (CMPSWSXrr $l, $r), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)), - sub_i32)>; + (cmov32rr<CMOVWrr, sub_i32> (icond2cc $cond), (CMPSWSXrr $l, $r), + $t, $f)>; def : Pat<(i32 (selectcc i32:$l, i32:$r, i32:$t, i32:$f, CCUIOp:$cond)), - (EXTRACT_SUBREG - (CMOVWrr (icond2cc $cond), - (CMPUWrr $l, $r), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)), - sub_i32)>; + (cmov32rr<CMOVWrr, sub_i32> (icond2cc $cond), (CMPUWrr $l, $r), + $t, $f)>; def : Pat<(i32 (selectcc i64:$l, i64:$r, i32:$t, i32:$f, CCSIOp:$cond)), - (EXTRACT_SUBREG - (CMOVLrr (icond2cc $cond), - (CMPSLrr $l, $r), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)), - sub_i32)>; + (cmov32rr<CMOVLrr, sub_i32> (icond2cc $cond), (CMPSLrr $l, $r), + $t, $f)>; def : Pat<(i32 (selectcc i64:$l, i64:$r, i32:$t, i32:$f, CCUIOp:$cond)), - (EXTRACT_SUBREG - (CMOVLrr (icond2cc $cond), - (CMPULrr $l, $r), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)), - sub_i32)>; + (cmov32rr<CMOVLrr, sub_i32> (icond2cc $cond), (CMPULrr $l, $r), + $t, $f)>; def : Pat<(i32 (selectcc f32:$l, f32:$r, i32:$t, i32:$f, cond:$cond)), - (EXTRACT_SUBREG - (CMOVSrr (fcond2cc $cond), - (FCMPSrr $l, $r), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)), - sub_i32)>; + (cmov32rr<CMOVSrr, sub_i32> (fcond2cc $cond), (FCMPSrr $l, $r), + $t, $f)>; def : Pat<(i32 (selectcc f64:$l, f64:$r, i32:$t, i32:$f, cond:$cond)), - (EXTRACT_SUBREG - (CMOVDrr (fcond2cc $cond), - (FCMPDrr $l, $r), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)), - sub_i32)>; + (cmov32rr<CMOVDrr, sub_i32> (fcond2cc $cond), (FCMPDrr $l, $r), + $t, $f)>; +def : Pat<(i32 (selectcc f128:$l, f128:$r, i32:$t, i32:$f, cond:$cond)), + (cmov32rr<CMOVDrr, sub_i32> (fcond2cc $cond), (FCMPQrr $l, $r), + $t, $f)>; -// selectcc for f64 result -def : Pat<(f64 (selectcc i32:$l, i32:$r, f64:$t, f64:$f, CCSIOp:$cond)), - (CMOVWrr (icond2cc $cond), (CMPSWSXrr $l, $r), $t, $f)>; -def : Pat<(f64 (selectcc i32:$l, i32:$r, f64:$t, f64:$f, CCUIOp:$cond)), - (CMOVWrr (icond2cc $cond), (CMPUWrr $l, $r), $t, $f)>; -def : Pat<(f64 (selectcc i64:$l, i64:$r, f64:$t, f64:$f, CCSIOp:$cond)), - (CMOVLrr (icond2cc $cond), (CMPSLrr $l, $r), $t, $f)>; -def : Pat<(f64 (selectcc i64:$l, i64:$r, f64:$t, f64:$f, CCUIOp:$cond)), - (CMOVLrr (icond2cc $cond), (CMPULrr $l, $r), $t, $f)>; -def : Pat<(f64 (selectcc f32:$l, f32:$r, f64:$t, f64:$f, cond:$cond)), - (CMOVSrr (fcond2cc $cond), (FCMPSrr $l, $r), $t, $f)>; -def : Pat<(f64 (selectcc f64:$l, f64:$r, f64:$t, f64:$f, cond:$cond)), - (CMOVDrr (fcond2cc $cond), (FCMPDrr $l, $r), $t, $f)>; +def : Pat<(i64 (selectcc i32:$l, i32:$r, i64:$t, i64:$f, CCSIOp:$cond)), + (cmovrr<CMOVWrr> (icond2cc $cond), (CMPSWSXrr $l, $r), $t, $f)>; +def : Pat<(i64 (selectcc i32:$l, i32:$r, i64:$t, i64:$f, CCUIOp:$cond)), + (cmovrr<CMOVWrr> (icond2cc $cond), (CMPUWrr $l, $r), $t, $f)>; +def : Pat<(i64 (selectcc i64:$l, i64:$r, i64:$t, i64:$f, CCSIOp:$cond)), + (cmovrr<CMOVLrr> (icond2cc $cond), (CMPSLrr $l, $r), $t, $f)>; +def : Pat<(i64 (selectcc i64:$l, i64:$r, i64:$t, i64:$f, CCUIOp:$cond)), + (cmovrr<CMOVLrr> (icond2cc $cond), (CMPULrr $l, $r), $t, $f)>; +def : Pat<(i64 (selectcc f32:$l, f32:$r, i64:$t, i64:$f, cond:$cond)), + (cmovrr<CMOVSrr> (fcond2cc $cond), (FCMPSrr $l, $r), $t, $f)>; +def : Pat<(i64 (selectcc f64:$l, f64:$r, i64:$t, i64:$f, cond:$cond)), + (cmovrr<CMOVDrr> (fcond2cc $cond), (FCMPDrr $l, $r), $t, $f)>; +def : Pat<(i64 (selectcc f128:$l, f128:$r, i64:$t, i64:$f, cond:$cond)), + (cmovrr<CMOVDrr> (fcond2cc $cond), (FCMPQrr $l, $r), $t, $f)>; -// selectcc for f32 result def : Pat<(f32 (selectcc i32:$l, i32:$r, f32:$t, f32:$f, CCSIOp:$cond)), - (EXTRACT_SUBREG - (CMOVWrr (icond2cc $cond), - (CMPSWSXrr $l, $r), - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32), - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)), - sub_f32)>; + (cmov32rr<CMOVWrr, sub_f32> (icond2cc $cond), (CMPSWSXrr $l, $r), + $t, $f)>; def : Pat<(f32 (selectcc i32:$l, i32:$r, f32:$t, f32:$f, CCUIOp:$cond)), - (EXTRACT_SUBREG - (CMOVWrr (icond2cc $cond), - (CMPUWrr $l, $r), - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32), - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)), - sub_f32)>; + (cmov32rr<CMOVWrr, sub_f32> (icond2cc $cond), (CMPUWrr $l, $r), + $t, $f)>; def : Pat<(f32 (selectcc i64:$l, i64:$r, f32:$t, f32:$f, CCSIOp:$cond)), - (EXTRACT_SUBREG - (CMOVLrr (icond2cc $cond), - (CMPSLrr $l, $r), - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32), - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)), - sub_f32)>; + (cmov32rr<CMOVLrr, sub_f32> (icond2cc $cond), (CMPSLrr $l, $r), + $t, $f)>; def : Pat<(f32 (selectcc i64:$l, i64:$r, f32:$t, f32:$f, CCUIOp:$cond)), - (EXTRACT_SUBREG - (CMOVLrr (icond2cc $cond), - (CMPULrr $l, $r), - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32), - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)), - sub_f32)>; + (cmov32rr<CMOVLrr, sub_f32> (icond2cc $cond), (CMPULrr $l, $r), + $t, $f)>; def : Pat<(f32 (selectcc f32:$l, f32:$r, f32:$t, f32:$f, cond:$cond)), - (EXTRACT_SUBREG - (CMOVSrr (fcond2cc $cond), - (FCMPSrr $l, $r), - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32), - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)), - sub_f32)>; + (cmov32rr<CMOVSrr, sub_f32> (fcond2cc $cond), (FCMPSrr $l, $r), + $t, $f)>; def : Pat<(f32 (selectcc f64:$l, f64:$r, f32:$t, f32:$f, cond:$cond)), - (EXTRACT_SUBREG - (CMOVDrr (fcond2cc $cond), - (FCMPDrr $l, $r), - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32), - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)), - sub_f32)>; + (cmov32rr<CMOVDrr, sub_f32> (fcond2cc $cond), (FCMPDrr $l, $r), + $t, $f)>; +def : Pat<(f32 (selectcc f128:$l, f128:$r, f32:$t, f32:$f, cond:$cond)), + (cmov32rr<CMOVDrr, sub_f32> (fcond2cc $cond), (FCMPQrr $l, $r), + $t, $f)>; + +def : Pat<(f64 (selectcc i32:$l, i32:$r, f64:$t, f64:$f, CCSIOp:$cond)), + (cmovrr<CMOVWrr> (icond2cc $cond), (CMPSWSXrr $l, $r), $t, $f)>; +def : Pat<(f64 (selectcc i32:$l, i32:$r, f64:$t, f64:$f, CCUIOp:$cond)), + (cmovrr<CMOVWrr> (icond2cc $cond), (CMPUWrr $l, $r), $t, $f)>; +def : Pat<(f64 (selectcc i64:$l, i64:$r, f64:$t, f64:$f, CCSIOp:$cond)), + (cmovrr<CMOVLrr> (icond2cc $cond), (CMPSLrr $l, $r), $t, $f)>; +def : Pat<(f64 (selectcc i64:$l, i64:$r, f64:$t, f64:$f, CCUIOp:$cond)), + (cmovrr<CMOVLrr> (icond2cc $cond), (CMPULrr $l, $r), $t, $f)>; +def : Pat<(f64 (selectcc f32:$l, f32:$r, f64:$t, f64:$f, cond:$cond)), + (cmovrr<CMOVSrr> (fcond2cc $cond), (FCMPSrr $l, $r), $t, $f)>; +def : Pat<(f64 (selectcc f64:$l, f64:$r, f64:$t, f64:$f, cond:$cond)), + (cmovrr<CMOVDrr> (fcond2cc $cond), (FCMPDrr $l, $r), $t, $f)>; +def : Pat<(f64 (selectcc f128:$l, f128:$r, f64:$t, f64:$f, cond:$cond)), + (cmovrr<CMOVDrr> (fcond2cc $cond), (FCMPQrr $l, $r), $t, $f)>; + +def : Pat<(f128 (selectcc i32:$l, i32:$r, f128:$t, f128:$f, CCSIOp:$cond)), + (cmov128rr<CMOVWrr> (icond2cc $cond), (CMPSWSXrr $l, $r), $t, $f)>; +def : Pat<(f128 (selectcc i32:$l, i32:$r, f128:$t, f128:$f, CCUIOp:$cond)), + (cmov128rr<CMOVWrr> (icond2cc $cond), (CMPUWrr $l, $r), $t, $f)>; +def : Pat<(f128 (selectcc i64:$l, i64:$r, f128:$t, f128:$f, CCSIOp:$cond)), + (cmov128rr<CMOVLrr> (icond2cc $cond), (CMPSLrr $l, $r), $t, $f)>; +def : Pat<(f128 (selectcc i64:$l, i64:$r, f128:$t, f128:$f, CCUIOp:$cond)), + (cmov128rr<CMOVLrr> (icond2cc $cond), (CMPULrr $l, $r), $t, $f)>; +def : Pat<(f128 (selectcc f32:$l, f32:$r, f128:$t, f128:$f, cond:$cond)), + (cmov128rr<CMOVSrr> (fcond2cc $cond), (FCMPSrr $l, $r), $t, $f)>; +def : Pat<(f128 (selectcc f64:$l, f64:$r, f128:$t, f128:$f, cond:$cond)), + (cmov128rr<CMOVDrr> (fcond2cc $cond), (FCMPDrr $l, $r), $t, $f)>; +def : Pat<(f128 (selectcc f128:$l, f128:$r, f128:$t, f128:$f, cond:$cond)), + (cmov128rr<CMOVDrr> (fcond2cc $cond), (FCMPQrr $l, $r), $t, $f)>; // Generic SELECT pattern matches // Use cmov.w for all cases since %pred holds i32. // // CMOV.w.ne %res, %tval, %tmp ; set tval if %tmp is true +def : Pat<(i32 (select i32:$pred, i32:$t, i32:$f)), + (cmov32rr<CMOVWrr, sub_i32> CC_INE, $pred, $t, $f)>; +def : Pat<(i32 (select i32:$pred, (i32 mimm:$t), i32:$f)), + (cmov32rm<CMOVWrm, sub_i32> CC_INE, $pred, $t, $f)>; +def : Pat<(i32 (select i32:$pred, i32:$t, (i32 mimm:$f))), + (cmov32rm<CMOVWrm, sub_i32> CC_IEQ, $pred, $f, $t)>; + def : Pat<(i64 (select i32:$pred, i64:$t, i64:$f)), - (CMOVWrr CC_INE, $pred, $t, $f)>; + (cmovrr<CMOVWrr> CC_INE, $pred, $t, $f)>; +def : Pat<(i64 (select i32:$pred, (i64 mimm:$t), i64:$f)), + (cmovrm<CMOVWrm, MIMM> CC_INE, $pred, $t, $f)>; +def : Pat<(i64 (select i32:$pred, i64:$t, (i64 mimm:$f))), + (cmovrm<CMOVWrm, MIMM> CC_IEQ, $pred, $f, $t)>; -def : Pat<(i32 (select i32:$pred, i32:$t, i32:$f)), - (EXTRACT_SUBREG - (CMOVWrr CC_INE, $pred, - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)), - sub_i32)>; +def : Pat<(f32 (select i32:$pred, f32:$t, f32:$f)), + (cmov32rr<CMOVWrr, sub_f32> CC_INE, $pred, $t, $f)>; +def : Pat<(f32 (select i32:$pred, (f32 mimmfp:$t), f32:$f)), + (cmov32rm<CMOVWrm, sub_f32, MIMMFP> CC_INE, $pred, $t, $f)>; +def : Pat<(f32 (select i32:$pred, f32:$t, (f32 mimmfp:$f))), + (cmov32rm<CMOVWrm, sub_f32, MIMMFP> CC_IEQ, $pred, $f, $t)>; def : Pat<(f64 (select i32:$pred, f64:$t, f64:$f)), - (CMOVWrr CC_INE, $pred, $t, $f)>; + (cmovrr<CMOVWrr> CC_INE, $pred, $t, $f)>; +def : Pat<(f64 (select i32:$pred, (f64 mimmfp:$t), f64:$f)), + (cmovrm<CMOVWrm, MIMMFP> CC_INE, $pred, $t, $f)>; +def : Pat<(f64 (select i32:$pred, f64:$t, (f64 mimmfp:$f))), + (cmovrm<CMOVWrm, MIMMFP> CC_IEQ, $pred, $f, $t)>; -def : Pat<(f32 (select i32:$pred, f32:$t, f32:$f)), - (EXTRACT_SUBREG - (CMOVWrr CC_INE, $pred, - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_f32), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_f32)), - sub_f32)>; +def : Pat<(f128 (select i32:$pred, f128:$t, f128:$f)), + (cmov128rr<CMOVWrr> CC_INE, $pred, $t, $f)>; // bitconvert def : Pat<(f64 (bitconvert i64:$src)), (COPY_TO_REGCLASS $src, I64)>; @@ -1982,24 +2267,48 @@ def : Pat<(f32 (bitconvert i32:$op)), (EXTRACT_SUBREG (SLLri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $op, sub_i32), 32), sub_f32)>; -// Bits operations pattern matchings. -def : Pat<(i32 (ctpop i32:$src)), - (EXTRACT_SUBREG (PCNTr (ANDrm (INSERT_SUBREG - (i64 (IMPLICIT_DEF)), $src, sub_i32), !add(32, 64))), sub_i32)>; -def : Pat<(i32 (ctlz i32:$src)), - (EXTRACT_SUBREG (LDZr (SLLri (INSERT_SUBREG - (i64 (IMPLICIT_DEF)), $src, sub_i32), 32)), sub_i32)>; -def : Pat<(i64 (bswap i64:$src)), - (BSWPri $src, 0)>; -def : Pat<(i32 (bswap i32:$src)), - (EXTRACT_SUBREG (BSWPri (INSERT_SUBREG - (i64 (IMPLICIT_DEF)), $src, sub_i32), 1), sub_i32)>; +// Optimize code A generated by `(unsigned char)c << 5` to B. +// A) sla.w.sx %s0, %s0, 5 +// lea %s1, 224 ; 0xE0 +// and %s0, %s0, %s1 +// B) sla.w.sx %s0, %s0, 5 +// and %s0, %s0, (56)0 + +def : Pat<(i32 (and i32:$val, 0xff)), + (EXTRACT_SUBREG + (ANDrm (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $val, sub_i32), + !add(56, 64)), sub_i32)>; +def : Pat<(i32 (and i32:$val, 0xffff)), + (EXTRACT_SUBREG + (ANDrm (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $val, sub_i32), + !add(48, 64)), sub_i32)>; +def : Pat<(i64 (and i64:$val, 0xffffffff)), + (ANDrm $val, !add(32, 64))>; + +//===----------------------------------------------------------------------===// +// Vector Instruction Pattern Stuff +//===----------------------------------------------------------------------===// + +// Custom intermediate ISDs. +class IsVLVT<int OpIdx> : SDTCisVT<OpIdx,i32>; +def vec_broadcast : SDNode<"VEISD::VEC_BROADCAST", SDTypeProfile<1, 2, + [SDTCisVec<0>, IsVLVT<2>]>>; + +// Whether this is an all-true mask (assuming undef-bits above VL are all-true). +def true_mask : PatLeaf< + (vec_broadcast (i32 nonzero), (i32 srcvalue))>; +// Match any broadcast (ignoring VL). +def any_broadcast : PatFrag<(ops node:$sx), + (vec_broadcast node:$sx, (i32 srcvalue))>; + +// Vector instructions. +include "VEInstrVec.td" + +// The vevlintrin +include "VEInstrIntrinsicVL.td" -// Several special pattern matches to optimize code +// Patterns and intermediate SD nodes (VEC_*). +include "VEInstrPatternsVec.td" -def : Pat<(i32 (and i32:$lhs, 0xff)), - (AND32rm $lhs, !add(56, 64))>; -def : Pat<(i32 (and i32:$lhs, 0xffff)), - (AND32rm $lhs, !add(48, 64))>; -def : Pat<(i32 (and i32:$lhs, 0xffffffff)), - (AND32rm $lhs, !add(32, 64))>; +// Patterns and intermediate SD nodes (VVP_*). +include "VVPInstrPatternsVec.td" diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td new file mode 100644 index 000000000000..9ec10838db05 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td @@ -0,0 +1,1604 @@ +def : Pat<(int_ve_vl_vld_vssl i64:$sy, i64:$sz, i32:$vl), (VLDrrl i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vld_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vld_vssl simm7:$I, i64:$sz, i32:$vl), (VLDirl (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vld_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldnc_vssl i64:$sy, i64:$sz, i32:$vl), (VLDNCrrl i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDNCirl (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldu_vssl i64:$sy, i64:$sz, i32:$vl), (VLDUrrl i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldu_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDUrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldu_vssl simm7:$I, i64:$sz, i32:$vl), (VLDUirl (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldu_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDUirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldunc_vssl i64:$sy, i64:$sz, i32:$vl), (VLDUNCrrl i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldunc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDUNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldunc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDUNCirl (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldunc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDUNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldlsx_vssl i64:$sy, i64:$sz, i32:$vl), (VLDLSXrrl i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldlsx_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDLSXrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldlsx_vssl simm7:$I, i64:$sz, i32:$vl), (VLDLSXirl (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldlsx_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDLSXirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldlsxnc_vssl i64:$sy, i64:$sz, i32:$vl), (VLDLSXNCrrl i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldlsxnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDLSXNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldlsxnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDLSXNCirl (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldlsxnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDLSXNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldlzx_vssl i64:$sy, i64:$sz, i32:$vl), (VLDLZXrrl i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldlzx_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDLZXrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldlzx_vssl simm7:$I, i64:$sz, i32:$vl), (VLDLZXirl (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldlzx_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDLZXirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldlzxnc_vssl i64:$sy, i64:$sz, i32:$vl), (VLDLZXNCrrl i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldlzxnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDLZXNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldlzxnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDLZXNCirl (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldlzxnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDLZXNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vld2d_vssl i64:$sy, i64:$sz, i32:$vl), (VLD2Drrl i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vld2d_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLD2Drrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vld2d_vssl simm7:$I, i64:$sz, i32:$vl), (VLD2Dirl (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vld2d_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLD2Dirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vld2dnc_vssl i64:$sy, i64:$sz, i32:$vl), (VLD2DNCrrl i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vld2dnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLD2DNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vld2dnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLD2DNCirl (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vld2dnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLD2DNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldu2d_vssl i64:$sy, i64:$sz, i32:$vl), (VLDU2Drrl i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldu2d_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDU2Drrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldu2d_vssl simm7:$I, i64:$sz, i32:$vl), (VLDU2Dirl (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldu2d_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDU2Dirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldu2dnc_vssl i64:$sy, i64:$sz, i32:$vl), (VLDU2DNCrrl i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldu2dnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDU2DNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldu2dnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDU2DNCirl (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldu2dnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDU2DNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldl2dsx_vssl i64:$sy, i64:$sz, i32:$vl), (VLDL2DSXrrl i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldl2dsx_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DSXrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldl2dsx_vssl simm7:$I, i64:$sz, i32:$vl), (VLDL2DSXirl (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldl2dsx_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DSXirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldl2dsxnc_vssl i64:$sy, i64:$sz, i32:$vl), (VLDL2DSXNCrrl i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldl2dsxnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DSXNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldl2dsxnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDL2DSXNCirl (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldl2dsxnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DSXNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldl2dzx_vssl i64:$sy, i64:$sz, i32:$vl), (VLDL2DZXrrl i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldl2dzx_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DZXrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldl2dzx_vssl simm7:$I, i64:$sz, i32:$vl), (VLDL2DZXirl (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldl2dzx_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DZXirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldl2dzxnc_vssl i64:$sy, i64:$sz, i32:$vl), (VLDL2DZXNCrrl i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldl2dzxnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DZXNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldl2dzxnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDL2DZXNCirl (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vldl2dzxnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DZXNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vst_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vst_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vst_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vst_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstnc_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTNCrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstnc_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTNCirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstnc_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTNCrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstnc_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTNCirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstncot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTNCOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstncot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTNCOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstncot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTNCOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstncot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTNCOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstu_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTUrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstu_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTUirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstu_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTUrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstu_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTUirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstunc_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTUNCrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstunc_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTUNCirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstunc_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTUNCrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstunc_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTUNCirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstuot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTUOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstuot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTUOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstuot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTUOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstuot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTUOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstuncot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTUNCOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstuncot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTUNCOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstuncot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTUNCOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstuncot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTUNCOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstl_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTLrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstl_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTLirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstl_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTLrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstl_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTLirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstlnc_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTLNCrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstlnc_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTLNCirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstlnc_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTLNCrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstlnc_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTLNCirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstlot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTLOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstlot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTLOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstlot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTLOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstlot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTLOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstlncot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTLNCOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstlncot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTLNCOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstlncot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTLNCOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstlncot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTLNCOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vst2d_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VST2Drrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vst2d_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VST2Dirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vst2d_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VST2Drrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vst2d_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VST2Dirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vst2dnc_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VST2DNCrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vst2dnc_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VST2DNCirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vst2dnc_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VST2DNCrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vst2dnc_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VST2DNCirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vst2dot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VST2DOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vst2dot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VST2DOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vst2dot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VST2DOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vst2dot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VST2DOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vst2dncot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VST2DNCOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vst2dncot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VST2DNCOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vst2dncot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VST2DNCOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vst2dncot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VST2DNCOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstu2d_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTU2Drrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstu2d_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTU2Dirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstu2d_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2Drrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstu2d_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2Dirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstu2dnc_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTU2DNCrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstu2dnc_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTU2DNCirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstu2dnc_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2DNCrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstu2dnc_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2DNCirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstu2dot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTU2DOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstu2dot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTU2DOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstu2dot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2DOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstu2dot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2DOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstu2dncot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTU2DNCOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstu2dncot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTU2DNCOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstu2dncot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2DNCOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstu2dncot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2DNCOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstl2d_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTL2Drrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstl2d_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTL2Dirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstl2d_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2Drrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstl2d_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2Dirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstl2dnc_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTL2DNCrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstl2dnc_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTL2DNCirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstl2dnc_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2DNCrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstl2dnc_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2DNCirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstl2dot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTL2DOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstl2dot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTL2DOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstl2dot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2DOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstl2dot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2DOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstl2dncot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTL2DNCOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstl2dncot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTL2DNCOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstl2dncot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2DNCOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstl2dncot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2DNCOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pfchv_ssl i64:$sy, i64:$sz, i32:$vl), (PFCHVrrl i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_pfchv_ssl simm7:$I, i64:$sz, i32:$vl), (PFCHVirl (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_pfchvnc_ssl i64:$sy, i64:$sz, i32:$vl), (PFCHVNCrrl i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_pfchvnc_ssl simm7:$I, i64:$sz, i32:$vl), (PFCHVNCirl (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_lvm_mmss v256i1:$ptm, uimm6:$N, i64:$sz), (LVMir_m (ULO7 $N), i64:$sz, v256i1:$ptm)>; +def : Pat<(int_ve_vl_lvm_MMss v512i1:$ptm, uimm6:$N, i64:$sz), (LVMyir_y (ULO7 $N), i64:$sz, v512i1:$ptm)>; +def : Pat<(int_ve_vl_svm_sms v256i1:$vmz, uimm6:$N), (SVMmi v256i1:$vmz, (ULO7 $N))>; +def : Pat<(int_ve_vl_svm_sMs v512i1:$vmz, uimm6:$N), (SVMyi v512i1:$vmz, (ULO7 $N))>; +def : Pat<(int_ve_vl_vbrdd_vsl f64:$sy, i32:$vl), (VBRDrl f64:$sy, i32:$vl)>; +def : Pat<(int_ve_vl_vbrdd_vsvl f64:$sy, v256f64:$pt, i32:$vl), (VBRDrl_v f64:$sy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vbrdd_vsmvl f64:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VBRDrml_v f64:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vbrdl_vsl i64:$sy, i32:$vl), (VBRDrl i64:$sy, i32:$vl)>; +def : Pat<(int_ve_vl_vbrdl_vsvl i64:$sy, v256f64:$pt, i32:$vl), (VBRDrl_v i64:$sy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vbrdl_vsmvl i64:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VBRDrml_v i64:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vbrdl_vsl simm7:$I, i32:$vl), (VBRDil (LO7 $I), i32:$vl)>; +def : Pat<(int_ve_vl_vbrdl_vsvl simm7:$I, v256f64:$pt, i32:$vl), (VBRDil_v (LO7 $I), i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vbrdl_vsmvl simm7:$I, v256i1:$vm, v256f64:$pt, i32:$vl), (VBRDiml_v (LO7 $I), v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vbrds_vsl f32:$sy, i32:$vl), (VBRDUrl f32:$sy, i32:$vl)>; +def : Pat<(int_ve_vl_vbrds_vsvl f32:$sy, v256f64:$pt, i32:$vl), (VBRDUrl_v f32:$sy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vbrds_vsmvl f32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VBRDUrml_v f32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vbrdw_vsl i32:$sy, i32:$vl), (VBRDLrl i32:$sy, i32:$vl)>; +def : Pat<(int_ve_vl_vbrdw_vsvl i32:$sy, v256f64:$pt, i32:$vl), (VBRDLrl_v i32:$sy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vbrdw_vsmvl i32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VBRDLrml_v i32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vbrdw_vsl simm7:$I, i32:$vl), (VBRDLil (LO7 $I), i32:$vl)>; +def : Pat<(int_ve_vl_vbrdw_vsvl simm7:$I, v256f64:$pt, i32:$vl), (VBRDLil_v (LO7 $I), i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vbrdw_vsmvl simm7:$I, v256i1:$vm, v256f64:$pt, i32:$vl), (VBRDLiml_v (LO7 $I), v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvbrd_vsl i64:$sy, i32:$vl), (PVBRDrl i64:$sy, i32:$vl)>; +def : Pat<(int_ve_vl_pvbrd_vsvl i64:$sy, v256f64:$pt, i32:$vl), (PVBRDrl_v i64:$sy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvbrd_vsMvl i64:$sy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVBRDrml_v i64:$sy, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmv_vsvl uimm7:$N, v256f64:$vz, i32:$vl), (VMVivl (ULO7 $N), v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vmv_vsvvl uimm7:$N, v256f64:$vz, v256f64:$pt, i32:$vl), (VMVivl_v (ULO7 $N), v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmv_vsvmvl uimm7:$N, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMVivml_v (ULO7 $N), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vaddul_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VADDULvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vaddul_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDULvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vaddul_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VADDULrvl i64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vaddul_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDULrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vaddul_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VADDULivl (LO7 $I), v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vaddul_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDULivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vaddul_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDULvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vaddul_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDULrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vaddul_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDULivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vadduw_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VADDUWvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vadduw_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDUWvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vadduw_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VADDUWrvl i32:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vadduw_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDUWrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vadduw_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VADDUWivl (LO7 $I), v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vadduw_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDUWivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vadduw_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDUWvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vadduw_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDUWrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vadduw_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDUWivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvaddu_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVADDUvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvaddu_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVADDUvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvaddu_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVADDUrvl i64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvaddu_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVADDUrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvaddu_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVADDUvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvaddu_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVADDUrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vaddswsx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VADDSWSXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vaddswsx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSWSXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vaddswsx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VADDSWSXrvl i32:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vaddswsx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSWSXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vaddswsx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VADDSWSXivl (LO7 $I), v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vaddswsx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSWSXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vaddswsx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSWSXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vaddswsx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSWSXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vaddswsx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSWSXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vaddswzx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VADDSWZXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vaddswzx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSWZXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vaddswzx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VADDSWZXrvl i32:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vaddswzx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSWZXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vaddswzx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VADDSWZXivl (LO7 $I), v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vaddswzx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSWZXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vaddswzx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSWZXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vaddswzx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSWZXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vaddswzx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSWZXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvadds_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVADDSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvadds_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVADDSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvadds_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVADDSrvl i64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvadds_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVADDSrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvadds_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVADDSvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvadds_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVADDSrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vaddsl_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VADDSLvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vaddsl_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSLvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vaddsl_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VADDSLrvl i64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vaddsl_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSLrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vaddsl_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VADDSLivl (LO7 $I), v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vaddsl_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSLivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vaddsl_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSLvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vaddsl_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSLrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vaddsl_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSLivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsubul_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VSUBULvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vsubul_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBULvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsubul_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VSUBULrvl i64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vsubul_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBULrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsubul_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VSUBULivl (LO7 $I), v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vsubul_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBULivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsubul_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBULvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsubul_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBULrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsubul_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBULivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsubuw_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VSUBUWvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vsubuw_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBUWvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsubuw_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VSUBUWrvl i32:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vsubuw_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBUWrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsubuw_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VSUBUWivl (LO7 $I), v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vsubuw_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBUWivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsubuw_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBUWvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsubuw_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBUWrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsubuw_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBUWivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvsubu_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVSUBUvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvsubu_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVSUBUvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvsubu_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVSUBUrvl i64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvsubu_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVSUBUrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvsubu_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSUBUvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvsubu_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSUBUrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsubswsx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VSUBSWSXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vsubswsx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSWSXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsubswsx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VSUBSWSXrvl i32:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vsubswsx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSWSXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsubswsx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VSUBSWSXivl (LO7 $I), v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vsubswsx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSWSXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsubswsx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSWSXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsubswsx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSWSXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsubswsx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSWSXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsubswzx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VSUBSWZXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vsubswzx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSWZXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsubswzx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VSUBSWZXrvl i32:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vsubswzx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSWZXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsubswzx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VSUBSWZXivl (LO7 $I), v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vsubswzx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSWZXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsubswzx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSWZXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsubswzx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSWZXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsubswzx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSWZXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvsubs_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVSUBSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvsubs_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVSUBSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvsubs_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVSUBSrvl i64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvsubs_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVSUBSrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvsubs_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSUBSvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvsubs_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSUBSrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsubsl_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VSUBSLvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vsubsl_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSLvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsubsl_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VSUBSLrvl i64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vsubsl_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSLrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsubsl_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VSUBSLivl (LO7 $I), v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vsubsl_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSLivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsubsl_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSLvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsubsl_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSLrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsubsl_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSLivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmulul_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMULULvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vmulul_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULULvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmulul_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VMULULrvl i64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vmulul_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULULrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmulul_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMULULivl (LO7 $I), v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vmulul_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULULivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmulul_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULULvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmulul_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULULrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmulul_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULULivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmuluw_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMULUWvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vmuluw_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULUWvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmuluw_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VMULUWrvl i32:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vmuluw_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULUWrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmuluw_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMULUWivl (LO7 $I), v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vmuluw_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULUWivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmuluw_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULUWvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmuluw_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULUWrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmuluw_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULUWivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmulswsx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMULSWSXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vmulswsx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSWSXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmulswsx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VMULSWSXrvl i32:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vmulswsx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSWSXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmulswsx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMULSWSXivl (LO7 $I), v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vmulswsx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSWSXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmulswsx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSWSXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmulswsx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSWSXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmulswsx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSWSXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmulswzx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMULSWZXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vmulswzx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSWZXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmulswzx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VMULSWZXrvl i32:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vmulswzx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSWZXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmulswzx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMULSWZXivl (LO7 $I), v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vmulswzx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSWZXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmulswzx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSWZXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmulswzx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSWZXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmulswzx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSWZXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmulsl_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMULSLvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vmulsl_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSLvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmulsl_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VMULSLrvl i64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vmulsl_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSLrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmulsl_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMULSLivl (LO7 $I), v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vmulsl_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSLivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmulsl_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSLvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmulsl_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSLrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmulsl_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSLivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmulslw_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMULSLWvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vmulslw_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSLWvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmulslw_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VMULSLWrvl i32:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vmulslw_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSLWrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmulslw_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMULSLWivl (LO7 $I), v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vmulslw_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSLWivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivul_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VDIVULvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vdivul_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVULvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivul_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VDIVULrvl i64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vdivul_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVULrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivul_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VDIVULivl (LO7 $I), v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vdivul_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVULivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivul_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVULvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivul_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVULrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivul_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVULivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivuw_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VDIVUWvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vdivuw_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVUWvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivuw_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VDIVUWrvl i32:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vdivuw_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVUWrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivuw_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VDIVUWivl (LO7 $I), v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vdivuw_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVUWivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivuw_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVUWvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivuw_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVUWrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivuw_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVUWivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivul_vvsl v256f64:$vy, i64:$sy, i32:$vl), (VDIVULvrl v256f64:$vy, i64:$sy, i32:$vl)>; +def : Pat<(int_ve_vl_vdivul_vvsvl v256f64:$vy, i64:$sy, v256f64:$pt, i32:$vl), (VDIVULvrl_v v256f64:$vy, i64:$sy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivul_vvsl v256f64:$vy, simm7:$I, i32:$vl), (VDIVULvil v256f64:$vy, (LO7 $I), i32:$vl)>; +def : Pat<(int_ve_vl_vdivul_vvsvl v256f64:$vy, simm7:$I, v256f64:$pt, i32:$vl), (VDIVULvil_v v256f64:$vy, (LO7 $I), i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivul_vvsmvl v256f64:$vy, i64:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVULvrml_v v256f64:$vy, i64:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivul_vvsmvl v256f64:$vy, simm7:$I, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVULviml_v v256f64:$vy, (LO7 $I), v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivuw_vvsl v256f64:$vy, i32:$sy, i32:$vl), (VDIVUWvrl v256f64:$vy, i32:$sy, i32:$vl)>; +def : Pat<(int_ve_vl_vdivuw_vvsvl v256f64:$vy, i32:$sy, v256f64:$pt, i32:$vl), (VDIVUWvrl_v v256f64:$vy, i32:$sy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivuw_vvsl v256f64:$vy, simm7:$I, i32:$vl), (VDIVUWvil v256f64:$vy, (LO7 $I), i32:$vl)>; +def : Pat<(int_ve_vl_vdivuw_vvsvl v256f64:$vy, simm7:$I, v256f64:$pt, i32:$vl), (VDIVUWvil_v v256f64:$vy, (LO7 $I), i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivuw_vvsmvl v256f64:$vy, i32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVUWvrml_v v256f64:$vy, i32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivuw_vvsmvl v256f64:$vy, simm7:$I, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVUWviml_v v256f64:$vy, (LO7 $I), v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivswsx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VDIVSWSXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vdivswsx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSWSXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivswsx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VDIVSWSXrvl i32:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vdivswsx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSWSXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivswsx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VDIVSWSXivl (LO7 $I), v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vdivswsx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSWSXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivswsx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWSXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivswsx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWSXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivswsx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWSXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivswzx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VDIVSWZXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vdivswzx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSWZXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivswzx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VDIVSWZXrvl i32:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vdivswzx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSWZXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivswzx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VDIVSWZXivl (LO7 $I), v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vdivswzx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSWZXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivswzx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWZXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivswzx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWZXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivswzx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWZXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivswsx_vvsl v256f64:$vy, i32:$sy, i32:$vl), (VDIVSWSXvrl v256f64:$vy, i32:$sy, i32:$vl)>; +def : Pat<(int_ve_vl_vdivswsx_vvsvl v256f64:$vy, i32:$sy, v256f64:$pt, i32:$vl), (VDIVSWSXvrl_v v256f64:$vy, i32:$sy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivswsx_vvsl v256f64:$vy, simm7:$I, i32:$vl), (VDIVSWSXvil v256f64:$vy, (LO7 $I), i32:$vl)>; +def : Pat<(int_ve_vl_vdivswsx_vvsvl v256f64:$vy, simm7:$I, v256f64:$pt, i32:$vl), (VDIVSWSXvil_v v256f64:$vy, (LO7 $I), i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivswsx_vvsmvl v256f64:$vy, i32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWSXvrml_v v256f64:$vy, i32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivswsx_vvsmvl v256f64:$vy, simm7:$I, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWSXviml_v v256f64:$vy, (LO7 $I), v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivswzx_vvsl v256f64:$vy, i32:$sy, i32:$vl), (VDIVSWZXvrl v256f64:$vy, i32:$sy, i32:$vl)>; +def : Pat<(int_ve_vl_vdivswzx_vvsvl v256f64:$vy, i32:$sy, v256f64:$pt, i32:$vl), (VDIVSWZXvrl_v v256f64:$vy, i32:$sy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivswzx_vvsl v256f64:$vy, simm7:$I, i32:$vl), (VDIVSWZXvil v256f64:$vy, (LO7 $I), i32:$vl)>; +def : Pat<(int_ve_vl_vdivswzx_vvsvl v256f64:$vy, simm7:$I, v256f64:$pt, i32:$vl), (VDIVSWZXvil_v v256f64:$vy, (LO7 $I), i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivswzx_vvsmvl v256f64:$vy, i32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWZXvrml_v v256f64:$vy, i32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivswzx_vvsmvl v256f64:$vy, simm7:$I, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWZXviml_v v256f64:$vy, (LO7 $I), v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivsl_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VDIVSLvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vdivsl_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSLvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivsl_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VDIVSLrvl i64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vdivsl_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSLrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivsl_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VDIVSLivl (LO7 $I), v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vdivsl_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSLivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivsl_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSLvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivsl_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSLrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivsl_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSLivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivsl_vvsl v256f64:$vy, i64:$sy, i32:$vl), (VDIVSLvrl v256f64:$vy, i64:$sy, i32:$vl)>; +def : Pat<(int_ve_vl_vdivsl_vvsvl v256f64:$vy, i64:$sy, v256f64:$pt, i32:$vl), (VDIVSLvrl_v v256f64:$vy, i64:$sy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivsl_vvsl v256f64:$vy, simm7:$I, i32:$vl), (VDIVSLvil v256f64:$vy, (LO7 $I), i32:$vl)>; +def : Pat<(int_ve_vl_vdivsl_vvsvl v256f64:$vy, simm7:$I, v256f64:$pt, i32:$vl), (VDIVSLvil_v v256f64:$vy, (LO7 $I), i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivsl_vvsmvl v256f64:$vy, i64:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSLvrml_v v256f64:$vy, i64:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vdivsl_vvsmvl v256f64:$vy, simm7:$I, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSLviml_v v256f64:$vy, (LO7 $I), v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcmpul_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VCMPULvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vcmpul_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPULvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcmpul_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VCMPULrvl i64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vcmpul_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPULrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcmpul_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VCMPULivl (LO7 $I), v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vcmpul_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPULivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcmpul_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPULvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcmpul_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPULrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcmpul_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPULivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcmpuw_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VCMPUWvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vcmpuw_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPUWvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcmpuw_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VCMPUWrvl i32:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vcmpuw_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPUWrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcmpuw_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VCMPUWivl (LO7 $I), v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vcmpuw_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPUWivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcmpuw_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPUWvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcmpuw_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPUWrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcmpuw_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPUWivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvcmpu_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVCMPUvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvcmpu_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVCMPUvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvcmpu_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVCMPUrvl i64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvcmpu_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVCMPUrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvcmpu_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVCMPUvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvcmpu_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVCMPUrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcmpswsx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VCMPSWSXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vcmpswsx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSWSXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcmpswsx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VCMPSWSXrvl i32:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vcmpswsx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSWSXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcmpswsx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VCMPSWSXivl (LO7 $I), v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vcmpswsx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSWSXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcmpswsx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSWSXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcmpswsx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSWSXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcmpswsx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSWSXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcmpswzx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VCMPSWZXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vcmpswzx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSWZXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcmpswzx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VCMPSWZXrvl i32:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vcmpswzx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSWZXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcmpswzx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VCMPSWZXivl (LO7 $I), v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vcmpswzx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSWZXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcmpswzx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSWZXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcmpswzx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSWZXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcmpswzx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSWZXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvcmps_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVCMPSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvcmps_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVCMPSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvcmps_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVCMPSrvl i64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvcmps_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVCMPSrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvcmps_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVCMPSvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvcmps_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVCMPSrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcmpsl_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VCMPSLvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vcmpsl_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSLvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcmpsl_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VCMPSLrvl i64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vcmpsl_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSLrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcmpsl_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VCMPSLivl (LO7 $I), v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vcmpsl_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSLivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcmpsl_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSLvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcmpsl_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSLrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcmpsl_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSLivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmaxswsx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMAXSWSXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vmaxswsx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSWSXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmaxswsx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VMAXSWSXrvl i32:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vmaxswsx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSWSXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmaxswsx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMAXSWSXivl (LO7 $I), v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vmaxswsx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSWSXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmaxswsx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSWSXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmaxswsx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSWSXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmaxswsx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSWSXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmaxswzx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMAXSWZXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vmaxswzx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSWZXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmaxswzx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VMAXSWZXrvl i32:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vmaxswzx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSWZXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmaxswzx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMAXSWZXivl (LO7 $I), v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vmaxswzx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSWZXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmaxswzx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSWZXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmaxswzx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSWZXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmaxswzx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSWZXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvmaxs_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVMAXSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvmaxs_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVMAXSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvmaxs_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVMAXSrvl i64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvmaxs_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVMAXSrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvmaxs_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVMAXSvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvmaxs_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVMAXSrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vminswsx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMINSWSXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vminswsx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSWSXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vminswsx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VMINSWSXrvl i32:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vminswsx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSWSXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vminswsx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMINSWSXivl (LO7 $I), v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vminswsx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSWSXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vminswsx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSWSXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vminswsx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSWSXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vminswsx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSWSXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vminswzx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMINSWZXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vminswzx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSWZXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vminswzx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VMINSWZXrvl i32:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vminswzx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSWZXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vminswzx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMINSWZXivl (LO7 $I), v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vminswzx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSWZXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vminswzx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSWZXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vminswzx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSWZXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vminswzx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSWZXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvmins_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVMINSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvmins_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVMINSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvmins_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVMINSrvl i64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvmins_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVMINSrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvmins_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVMINSvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvmins_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVMINSrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmaxsl_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMAXSLvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vmaxsl_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSLvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmaxsl_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VMAXSLrvl i64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vmaxsl_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSLrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmaxsl_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMAXSLivl (LO7 $I), v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vmaxsl_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSLivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmaxsl_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSLvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmaxsl_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSLrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmaxsl_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSLivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vminsl_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMINSLvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vminsl_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSLvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vminsl_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VMINSLrvl i64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vminsl_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSLrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vminsl_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMINSLivl (LO7 $I), v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vminsl_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSLivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vminsl_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSLvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vminsl_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSLrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vminsl_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSLivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vand_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VANDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vand_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VANDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vand_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VANDrvl i64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vand_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VANDrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vand_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VANDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vand_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VANDrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvand_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVANDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvand_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVANDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvand_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVANDrvl i64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvand_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVANDrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvand_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVANDvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvand_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVANDrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vor_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VORvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vor_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VORvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vor_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VORrvl i64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vor_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VORrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vor_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VORvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vor_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VORrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvor_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVORvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvor_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVORvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvor_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVORrvl i64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvor_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVORrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvor_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVORvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvor_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVORrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vxor_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VXORvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vxor_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VXORvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vxor_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VXORrvl i64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vxor_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VXORrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vxor_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VXORvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vxor_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VXORrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvxor_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVXORvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvxor_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVXORvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvxor_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVXORrvl i64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvxor_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVXORrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvxor_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVXORvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvxor_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVXORrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_veqv_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VEQVvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_veqv_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VEQVvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_veqv_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VEQVrvl i64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_veqv_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VEQVrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_veqv_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VEQVvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_veqv_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VEQVrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pveqv_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVEQVvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pveqv_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVEQVvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pveqv_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVEQVrvl i64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pveqv_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVEQVrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pveqv_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVEQVvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pveqv_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVEQVrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vseq_vl i32:$vl), (VSEQl i32:$vl)>; +def : Pat<(int_ve_vl_vseq_vvl v256f64:$pt, i32:$vl), (VSEQl_v i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvseqlo_vl i32:$vl), (PVSEQLOl i32:$vl)>; +def : Pat<(int_ve_vl_pvseqlo_vvl v256f64:$pt, i32:$vl), (PVSEQLOl_v i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvsequp_vl i32:$vl), (PVSEQUPl i32:$vl)>; +def : Pat<(int_ve_vl_pvsequp_vvl v256f64:$pt, i32:$vl), (PVSEQUPl_v i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvseq_vl i32:$vl), (PVSEQl i32:$vl)>; +def : Pat<(int_ve_vl_pvseq_vvl v256f64:$pt, i32:$vl), (PVSEQl_v i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsll_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (VSLLvvl v256f64:$vz, v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vsll_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (VSLLvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsll_vvsl v256f64:$vz, i64:$sy, i32:$vl), (VSLLvrl v256f64:$vz, i64:$sy, i32:$vl)>; +def : Pat<(int_ve_vl_vsll_vvsvl v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (VSLLvrl_v v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsll_vvsl v256f64:$vz, uimm6:$N, i32:$vl), (VSLLvil v256f64:$vz, (ULO7 $N), i32:$vl)>; +def : Pat<(int_ve_vl_vsll_vvsvl v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSLLvil_v v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsll_vvvmvl v256f64:$vz, v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLLvvml_v v256f64:$vz, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsll_vvsmvl v256f64:$vz, i64:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLLvrml_v v256f64:$vz, i64:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsll_vvsmvl v256f64:$vz, uimm6:$N, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLLviml_v v256f64:$vz, (ULO7 $N), v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvsll_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (PVSLLvvl v256f64:$vz, v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_pvsll_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (PVSLLvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvsll_vvsl v256f64:$vz, i64:$sy, i32:$vl), (PVSLLvrl v256f64:$vz, i64:$sy, i32:$vl)>; +def : Pat<(int_ve_vl_pvsll_vvsvl v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (PVSLLvrl_v v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvsll_vvvMvl v256f64:$vz, v256f64:$vy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSLLvvml_v v256f64:$vz, v256f64:$vy, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvsll_vvsMvl v256f64:$vz, i64:$sy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSLLvrml_v v256f64:$vz, i64:$sy, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsrl_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (VSRLvvl v256f64:$vz, v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vsrl_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (VSRLvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsrl_vvsl v256f64:$vz, i64:$sy, i32:$vl), (VSRLvrl v256f64:$vz, i64:$sy, i32:$vl)>; +def : Pat<(int_ve_vl_vsrl_vvsvl v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (VSRLvrl_v v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsrl_vvsl v256f64:$vz, uimm6:$N, i32:$vl), (VSRLvil v256f64:$vz, (ULO7 $N), i32:$vl)>; +def : Pat<(int_ve_vl_vsrl_vvsvl v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSRLvil_v v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsrl_vvvmvl v256f64:$vz, v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRLvvml_v v256f64:$vz, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsrl_vvsmvl v256f64:$vz, i64:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRLvrml_v v256f64:$vz, i64:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsrl_vvsmvl v256f64:$vz, uimm6:$N, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRLviml_v v256f64:$vz, (ULO7 $N), v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvsrl_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (PVSRLvvl v256f64:$vz, v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_pvsrl_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (PVSRLvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvsrl_vvsl v256f64:$vz, i64:$sy, i32:$vl), (PVSRLvrl v256f64:$vz, i64:$sy, i32:$vl)>; +def : Pat<(int_ve_vl_pvsrl_vvsvl v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (PVSRLvrl_v v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvsrl_vvvMvl v256f64:$vz, v256f64:$vy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSRLvvml_v v256f64:$vz, v256f64:$vy, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvsrl_vvsMvl v256f64:$vz, i64:$sy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSRLvrml_v v256f64:$vz, i64:$sy, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vslawsx_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (VSLAWSXvvl v256f64:$vz, v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vslawsx_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (VSLAWSXvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vslawsx_vvsl v256f64:$vz, i32:$sy, i32:$vl), (VSLAWSXvrl v256f64:$vz, i32:$sy, i32:$vl)>; +def : Pat<(int_ve_vl_vslawsx_vvsvl v256f64:$vz, i32:$sy, v256f64:$pt, i32:$vl), (VSLAWSXvrl_v v256f64:$vz, i32:$sy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vslawsx_vvsl v256f64:$vz, uimm6:$N, i32:$vl), (VSLAWSXvil v256f64:$vz, (ULO7 $N), i32:$vl)>; +def : Pat<(int_ve_vl_vslawsx_vvsvl v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSLAWSXvil_v v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vslawsx_vvvmvl v256f64:$vz, v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLAWSXvvml_v v256f64:$vz, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vslawsx_vvsmvl v256f64:$vz, i32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLAWSXvrml_v v256f64:$vz, i32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vslawsx_vvsmvl v256f64:$vz, uimm6:$N, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLAWSXviml_v v256f64:$vz, (ULO7 $N), v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vslawzx_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (VSLAWZXvvl v256f64:$vz, v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vslawzx_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (VSLAWZXvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vslawzx_vvsl v256f64:$vz, i32:$sy, i32:$vl), (VSLAWZXvrl v256f64:$vz, i32:$sy, i32:$vl)>; +def : Pat<(int_ve_vl_vslawzx_vvsvl v256f64:$vz, i32:$sy, v256f64:$pt, i32:$vl), (VSLAWZXvrl_v v256f64:$vz, i32:$sy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vslawzx_vvsl v256f64:$vz, uimm6:$N, i32:$vl), (VSLAWZXvil v256f64:$vz, (ULO7 $N), i32:$vl)>; +def : Pat<(int_ve_vl_vslawzx_vvsvl v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSLAWZXvil_v v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vslawzx_vvvmvl v256f64:$vz, v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLAWZXvvml_v v256f64:$vz, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vslawzx_vvsmvl v256f64:$vz, i32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLAWZXvrml_v v256f64:$vz, i32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vslawzx_vvsmvl v256f64:$vz, uimm6:$N, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLAWZXviml_v v256f64:$vz, (ULO7 $N), v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvsla_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (PVSLAvvl v256f64:$vz, v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_pvsla_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (PVSLAvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvsla_vvsl v256f64:$vz, i64:$sy, i32:$vl), (PVSLAvrl v256f64:$vz, i64:$sy, i32:$vl)>; +def : Pat<(int_ve_vl_pvsla_vvsvl v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (PVSLAvrl_v v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvsla_vvvMvl v256f64:$vz, v256f64:$vy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSLAvvml_v v256f64:$vz, v256f64:$vy, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvsla_vvsMvl v256f64:$vz, i64:$sy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSLAvrml_v v256f64:$vz, i64:$sy, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vslal_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (VSLALvvl v256f64:$vz, v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vslal_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (VSLALvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vslal_vvsl v256f64:$vz, i64:$sy, i32:$vl), (VSLALvrl v256f64:$vz, i64:$sy, i32:$vl)>; +def : Pat<(int_ve_vl_vslal_vvsvl v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (VSLALvrl_v v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vslal_vvsl v256f64:$vz, uimm6:$N, i32:$vl), (VSLALvil v256f64:$vz, (ULO7 $N), i32:$vl)>; +def : Pat<(int_ve_vl_vslal_vvsvl v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSLALvil_v v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vslal_vvvmvl v256f64:$vz, v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLALvvml_v v256f64:$vz, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vslal_vvsmvl v256f64:$vz, i64:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLALvrml_v v256f64:$vz, i64:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vslal_vvsmvl v256f64:$vz, uimm6:$N, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLALviml_v v256f64:$vz, (ULO7 $N), v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsrawsx_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (VSRAWSXvvl v256f64:$vz, v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vsrawsx_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (VSRAWSXvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsrawsx_vvsl v256f64:$vz, i32:$sy, i32:$vl), (VSRAWSXvrl v256f64:$vz, i32:$sy, i32:$vl)>; +def : Pat<(int_ve_vl_vsrawsx_vvsvl v256f64:$vz, i32:$sy, v256f64:$pt, i32:$vl), (VSRAWSXvrl_v v256f64:$vz, i32:$sy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsrawsx_vvsl v256f64:$vz, uimm6:$N, i32:$vl), (VSRAWSXvil v256f64:$vz, (ULO7 $N), i32:$vl)>; +def : Pat<(int_ve_vl_vsrawsx_vvsvl v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSRAWSXvil_v v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsrawsx_vvvmvl v256f64:$vz, v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRAWSXvvml_v v256f64:$vz, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsrawsx_vvsmvl v256f64:$vz, i32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRAWSXvrml_v v256f64:$vz, i32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsrawsx_vvsmvl v256f64:$vz, uimm6:$N, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRAWSXviml_v v256f64:$vz, (ULO7 $N), v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsrawzx_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (VSRAWZXvvl v256f64:$vz, v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vsrawzx_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (VSRAWZXvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsrawzx_vvsl v256f64:$vz, i32:$sy, i32:$vl), (VSRAWZXvrl v256f64:$vz, i32:$sy, i32:$vl)>; +def : Pat<(int_ve_vl_vsrawzx_vvsvl v256f64:$vz, i32:$sy, v256f64:$pt, i32:$vl), (VSRAWZXvrl_v v256f64:$vz, i32:$sy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsrawzx_vvsl v256f64:$vz, uimm6:$N, i32:$vl), (VSRAWZXvil v256f64:$vz, (ULO7 $N), i32:$vl)>; +def : Pat<(int_ve_vl_vsrawzx_vvsvl v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSRAWZXvil_v v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsrawzx_vvvmvl v256f64:$vz, v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRAWZXvvml_v v256f64:$vz, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsrawzx_vvsmvl v256f64:$vz, i32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRAWZXvrml_v v256f64:$vz, i32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsrawzx_vvsmvl v256f64:$vz, uimm6:$N, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRAWZXviml_v v256f64:$vz, (ULO7 $N), v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvsra_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (PVSRAvvl v256f64:$vz, v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_pvsra_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (PVSRAvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvsra_vvsl v256f64:$vz, i64:$sy, i32:$vl), (PVSRAvrl v256f64:$vz, i64:$sy, i32:$vl)>; +def : Pat<(int_ve_vl_pvsra_vvsvl v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (PVSRAvrl_v v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvsra_vvvMvl v256f64:$vz, v256f64:$vy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSRAvvml_v v256f64:$vz, v256f64:$vy, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvsra_vvsMvl v256f64:$vz, i64:$sy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSRAvrml_v v256f64:$vz, i64:$sy, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsral_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (VSRALvvl v256f64:$vz, v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vsral_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (VSRALvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsral_vvsl v256f64:$vz, i64:$sy, i32:$vl), (VSRALvrl v256f64:$vz, i64:$sy, i32:$vl)>; +def : Pat<(int_ve_vl_vsral_vvsvl v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (VSRALvrl_v v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsral_vvsl v256f64:$vz, uimm6:$N, i32:$vl), (VSRALvil v256f64:$vz, (ULO7 $N), i32:$vl)>; +def : Pat<(int_ve_vl_vsral_vvsvl v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSRALvil_v v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsral_vvvmvl v256f64:$vz, v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRALvvml_v v256f64:$vz, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsral_vvsmvl v256f64:$vz, i64:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRALvrml_v v256f64:$vz, i64:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsral_vvsmvl v256f64:$vz, uimm6:$N, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRALviml_v v256f64:$vz, (ULO7 $N), v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsfa_vvssl v256f64:$vz, i64:$sy, i64:$sz, i32:$vl), (VSFAvrrl v256f64:$vz, i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vsfa_vvssvl v256f64:$vz, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VSFAvrrl_v v256f64:$vz, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsfa_vvssl v256f64:$vz, simm7:$I, i64:$sz, i32:$vl), (VSFAvirl v256f64:$vz, (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vsfa_vvssvl v256f64:$vz, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VSFAvirl_v v256f64:$vz, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsfa_vvssmvl v256f64:$vz, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSFAvrrml_v v256f64:$vz, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsfa_vvssmvl v256f64:$vz, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSFAvirml_v v256f64:$vz, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfaddd_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFADDDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfaddd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFADDDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfaddd_vsvl f64:$sy, v256f64:$vz, i32:$vl), (VFADDDrvl f64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfaddd_vsvvl f64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFADDDrvl_v f64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfaddd_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFADDDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfaddd_vsvmvl f64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFADDDrvml_v f64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfadds_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFADDSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfadds_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFADDSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfadds_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFADDSrvl f32:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfadds_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFADDSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfadds_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFADDSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfadds_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFADDSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfadd_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVFADDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfadd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFADDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfadd_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVFADDrvl i64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfadd_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFADDrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfadd_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFADDvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfadd_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFADDrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfsubd_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFSUBDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfsubd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFSUBDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfsubd_vsvl f64:$sy, v256f64:$vz, i32:$vl), (VFSUBDrvl f64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfsubd_vsvvl f64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFSUBDrvl_v f64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfsubd_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFSUBDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfsubd_vsvmvl f64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFSUBDrvml_v f64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfsubs_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFSUBSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfsubs_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFSUBSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfsubs_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFSUBSrvl f32:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfsubs_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFSUBSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfsubs_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFSUBSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfsubs_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFSUBSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfsub_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVFSUBvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfsub_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFSUBvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfsub_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVFSUBrvl i64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfsub_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFSUBrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfsub_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFSUBvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfsub_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFSUBrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmuld_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFMULDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmuld_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMULDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmuld_vsvl f64:$sy, v256f64:$vz, i32:$vl), (VFMULDrvl f64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmuld_vsvvl f64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMULDrvl_v f64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmuld_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMULDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmuld_vsvmvl f64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMULDrvml_v f64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmuls_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFMULSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmuls_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMULSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmuls_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFMULSrvl f32:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmuls_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMULSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmuls_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMULSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmuls_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMULSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfmul_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVFMULvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmul_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFMULvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfmul_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVFMULrvl i64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmul_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFMULrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfmul_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMULvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfmul_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMULrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfdivd_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFDIVDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfdivd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFDIVDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfdivd_vsvl f64:$sy, v256f64:$vz, i32:$vl), (VFDIVDrvl f64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfdivd_vsvvl f64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFDIVDrvl_v f64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfdivd_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFDIVDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfdivd_vsvmvl f64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFDIVDrvml_v f64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfdivs_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFDIVSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfdivs_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFDIVSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfdivs_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFDIVSrvl f32:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfdivs_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFDIVSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfdivs_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFDIVSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfdivs_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFDIVSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfsqrtd_vvl v256f64:$vy, i32:$vl), (VFSQRTDvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vfsqrtd_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFSQRTDvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfsqrts_vvl v256f64:$vy, i32:$vl), (VFSQRTSvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vfsqrts_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFSQRTSvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfcmpd_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFCMPDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfcmpd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFCMPDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfcmpd_vsvl f64:$sy, v256f64:$vz, i32:$vl), (VFCMPDrvl f64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfcmpd_vsvvl f64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFCMPDrvl_v f64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfcmpd_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFCMPDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfcmpd_vsvmvl f64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFCMPDrvml_v f64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfcmps_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFCMPSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfcmps_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFCMPSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfcmps_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFCMPSrvl f32:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfcmps_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFCMPSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfcmps_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFCMPSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfcmps_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFCMPSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfcmp_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVFCMPvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfcmp_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFCMPvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfcmp_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVFCMPrvl i64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfcmp_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFCMPrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfcmp_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFCMPvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfcmp_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFCMPrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmaxd_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFMAXDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmaxd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMAXDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmaxd_vsvl f64:$sy, v256f64:$vz, i32:$vl), (VFMAXDrvl f64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmaxd_vsvvl f64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMAXDrvl_v f64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmaxd_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMAXDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmaxd_vsvmvl f64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMAXDrvml_v f64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmaxs_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFMAXSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmaxs_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMAXSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmaxs_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFMAXSrvl f32:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmaxs_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMAXSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmaxs_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMAXSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmaxs_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMAXSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfmax_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVFMAXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmax_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFMAXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfmax_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVFMAXrvl i64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmax_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFMAXrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfmax_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMAXvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfmax_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMAXrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmind_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFMINDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmind_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMINDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmind_vsvl f64:$sy, v256f64:$vz, i32:$vl), (VFMINDrvl f64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmind_vsvvl f64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMINDrvl_v f64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmind_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMINDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmind_vsvmvl f64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMINDrvml_v f64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmins_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFMINSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmins_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMINSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmins_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFMINSrvl f32:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmins_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMINSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmins_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMINSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmins_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMINSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfmin_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVFMINvvl v256f64:$vy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmin_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFMINvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfmin_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVFMINrvl i64:$sy, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmin_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFMINrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfmin_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMINvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfmin_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMINrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmadd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFMADDvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_vfmadd_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMADDvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmadd_vsvvl f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFMADDrvvl f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_vfmadd_vsvvvl f64:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMADDrvvl_v f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmadd_vvsvl v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl), (VFMADDvrvl v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_vfmadd_vvsvvl v256f64:$vy, f64:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMADDvrvl_v v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmadd_vvvvmvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMADDvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmadd_vsvvmvl f64:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMADDrvvml_v f64:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmadd_vvsvmvl v256f64:$vy, f64:$sy, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMADDvrvml_v v256f64:$vy, f64:$sy, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmads_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFMADSvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_vfmads_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMADSvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmads_vsvvl f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFMADSrvvl f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_vfmads_vsvvvl f32:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMADSrvvl_v f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmads_vvsvl v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl), (VFMADSvrvl v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_vfmads_vvsvvl v256f64:$vy, f32:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMADSvrvl_v v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmads_vvvvmvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMADSvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmads_vsvvmvl f32:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMADSrvvml_v f32:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmads_vvsvmvl v256f64:$vy, f32:$sy, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMADSvrvml_v v256f64:$vy, f32:$sy, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfmad_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (PVFMADvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmad_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFMADvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfmad_vsvvl i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (PVFMADrvvl i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmad_vsvvvl i64:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFMADrvvl_v i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfmad_vvsvl v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl), (PVFMADvrvl v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmad_vvsvvl v256f64:$vy, i64:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFMADvrvl_v v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfmad_vvvvMvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMADvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfmad_vsvvMvl i64:$sy, v256f64:$vz, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMADrvvml_v i64:$sy, v256f64:$vz, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfmad_vvsvMvl v256f64:$vy, i64:$sy, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMADvrvml_v v256f64:$vy, i64:$sy, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmsbd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFMSBDvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_vfmsbd_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMSBDvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmsbd_vsvvl f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFMSBDrvvl f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_vfmsbd_vsvvvl f64:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMSBDrvvl_v f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmsbd_vvsvl v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl), (VFMSBDvrvl v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_vfmsbd_vvsvvl v256f64:$vy, f64:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMSBDvrvl_v v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmsbd_vvvvmvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMSBDvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmsbd_vsvvmvl f64:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMSBDrvvml_v f64:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmsbd_vvsvmvl v256f64:$vy, f64:$sy, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMSBDvrvml_v v256f64:$vy, f64:$sy, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmsbs_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFMSBSvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_vfmsbs_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMSBSvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmsbs_vsvvl f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFMSBSrvvl f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_vfmsbs_vsvvvl f32:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMSBSrvvl_v f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmsbs_vvsvl v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl), (VFMSBSvrvl v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_vfmsbs_vvsvvl v256f64:$vy, f32:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMSBSvrvl_v v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmsbs_vvvvmvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMSBSvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmsbs_vsvvmvl f32:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMSBSrvvml_v f32:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmsbs_vvsvmvl v256f64:$vy, f32:$sy, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMSBSvrvml_v v256f64:$vy, f32:$sy, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfmsb_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (PVFMSBvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmsb_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFMSBvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfmsb_vsvvl i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (PVFMSBrvvl i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmsb_vsvvvl i64:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFMSBrvvl_v i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfmsb_vvsvl v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl), (PVFMSBvrvl v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmsb_vvsvvl v256f64:$vy, i64:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFMSBvrvl_v v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfmsb_vvvvMvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMSBvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfmsb_vsvvMvl i64:$sy, v256f64:$vz, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMSBrvvml_v i64:$sy, v256f64:$vz, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfmsb_vvsvMvl v256f64:$vy, i64:$sy, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMSBvrvml_v v256f64:$vy, i64:$sy, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfnmadd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFNMADDvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_vfnmadd_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMADDvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfnmadd_vsvvl f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFNMADDrvvl f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_vfnmadd_vsvvvl f64:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMADDrvvl_v f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfnmadd_vvsvl v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl), (VFNMADDvrvl v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_vfnmadd_vvsvvl v256f64:$vy, f64:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMADDvrvl_v v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfnmadd_vvvvmvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMADDvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfnmadd_vsvvmvl f64:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMADDrvvml_v f64:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfnmadd_vvsvmvl v256f64:$vy, f64:$sy, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMADDvrvml_v v256f64:$vy, f64:$sy, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfnmads_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFNMADSvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_vfnmads_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMADSvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfnmads_vsvvl f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFNMADSrvvl f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_vfnmads_vsvvvl f32:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMADSrvvl_v f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfnmads_vvsvl v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl), (VFNMADSvrvl v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_vfnmads_vvsvvl v256f64:$vy, f32:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMADSvrvl_v v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfnmads_vvvvmvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMADSvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfnmads_vsvvmvl f32:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMADSrvvml_v f32:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfnmads_vvsvmvl v256f64:$vy, f32:$sy, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMADSvrvml_v v256f64:$vy, f32:$sy, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfnmad_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (PVFNMADvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_pvfnmad_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFNMADvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfnmad_vsvvl i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (PVFNMADrvvl i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_pvfnmad_vsvvvl i64:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFNMADrvvl_v i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfnmad_vvsvl v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl), (PVFNMADvrvl v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_pvfnmad_vvsvvl v256f64:$vy, i64:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFNMADvrvl_v v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfnmad_vvvvMvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFNMADvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfnmad_vsvvMvl i64:$sy, v256f64:$vz, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFNMADrvvml_v i64:$sy, v256f64:$vz, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfnmad_vvsvMvl v256f64:$vy, i64:$sy, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFNMADvrvml_v v256f64:$vy, i64:$sy, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfnmsbd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFNMSBDvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_vfnmsbd_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMSBDvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfnmsbd_vsvvl f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFNMSBDrvvl f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_vfnmsbd_vsvvvl f64:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMSBDrvvl_v f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfnmsbd_vvsvl v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl), (VFNMSBDvrvl v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_vfnmsbd_vvsvvl v256f64:$vy, f64:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMSBDvrvl_v v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfnmsbd_vvvvmvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMSBDvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfnmsbd_vsvvmvl f64:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMSBDrvvml_v f64:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfnmsbd_vvsvmvl v256f64:$vy, f64:$sy, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMSBDvrvml_v v256f64:$vy, f64:$sy, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfnmsbs_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFNMSBSvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_vfnmsbs_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMSBSvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfnmsbs_vsvvl f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFNMSBSrvvl f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_vfnmsbs_vsvvvl f32:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMSBSrvvl_v f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfnmsbs_vvsvl v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl), (VFNMSBSvrvl v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_vfnmsbs_vvsvvl v256f64:$vy, f32:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMSBSvrvl_v v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfnmsbs_vvvvmvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMSBSvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfnmsbs_vsvvmvl f32:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMSBSrvvml_v f32:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfnmsbs_vvsvmvl v256f64:$vy, f32:$sy, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMSBSvrvml_v v256f64:$vy, f32:$sy, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfnmsb_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (PVFNMSBvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_pvfnmsb_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFNMSBvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfnmsb_vsvvl i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (PVFNMSBrvvl i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_pvfnmsb_vsvvvl i64:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFNMSBrvvl_v i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfnmsb_vvsvl v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl), (PVFNMSBvrvl v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl)>; +def : Pat<(int_ve_vl_pvfnmsb_vvsvvl v256f64:$vy, i64:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFNMSBvrvl_v v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfnmsb_vvvvMvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFNMSBvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfnmsb_vsvvMvl i64:$sy, v256f64:$vz, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFNMSBrvvml_v i64:$sy, v256f64:$vz, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvfnmsb_vvsvMvl v256f64:$vy, i64:$sy, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFNMSBvrvml_v v256f64:$vy, i64:$sy, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vrcpd_vvl v256f64:$vy, i32:$vl), (VRCPDvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vrcpd_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRCPDvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vrcps_vvl v256f64:$vy, i32:$vl), (VRCPSvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vrcps_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRCPSvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvrcp_vvl v256f64:$vy, i32:$vl), (PVRCPvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_pvrcp_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (PVRCPvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vrsqrtd_vvl v256f64:$vy, i32:$vl), (VRSQRTDvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vrsqrtd_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRSQRTDvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vrsqrts_vvl v256f64:$vy, i32:$vl), (VRSQRTSvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vrsqrts_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRSQRTSvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvrsqrt_vvl v256f64:$vy, i32:$vl), (PVRSQRTvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_pvrsqrt_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (PVRSQRTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vrsqrtdnex_vvl v256f64:$vy, i32:$vl), (VRSQRTDNEXvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vrsqrtdnex_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRSQRTDNEXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vrsqrtsnex_vvl v256f64:$vy, i32:$vl), (VRSQRTSNEXvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vrsqrtsnex_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRSQRTSNEXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvrsqrtnex_vvl v256f64:$vy, i32:$vl), (PVRSQRTNEXvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_pvrsqrtnex_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (PVRSQRTNEXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcvtwdsx_vvl v256f64:$vy, i32:$vl), (VCVTWDSXvl RD_NONE, v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vcvtwdsx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWDSXvl_v RD_NONE, v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcvtwdsx_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWDSXvml_v RD_NONE, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcvtwdsxrz_vvl v256f64:$vy, i32:$vl), (VCVTWDSXvl RD_RZ, v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vcvtwdsxrz_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWDSXvl_v RD_RZ, v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcvtwdsxrz_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWDSXvml_v RD_RZ, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcvtwdzx_vvl v256f64:$vy, i32:$vl), (VCVTWDZXvl RD_NONE, v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vcvtwdzx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWDZXvl_v RD_NONE, v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcvtwdzx_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWDZXvml_v RD_NONE, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcvtwdzxrz_vvl v256f64:$vy, i32:$vl), (VCVTWDZXvl RD_RZ, v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vcvtwdzxrz_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWDZXvl_v RD_RZ, v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcvtwdzxrz_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWDZXvml_v RD_RZ, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcvtwssx_vvl v256f64:$vy, i32:$vl), (VCVTWSSXvl RD_NONE, v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vcvtwssx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWSSXvl_v RD_NONE, v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcvtwssx_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWSSXvml_v RD_NONE, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcvtwssxrz_vvl v256f64:$vy, i32:$vl), (VCVTWSSXvl RD_RZ, v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vcvtwssxrz_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWSSXvl_v RD_RZ, v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcvtwssxrz_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWSSXvml_v RD_RZ, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcvtwszx_vvl v256f64:$vy, i32:$vl), (VCVTWSZXvl RD_NONE, v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vcvtwszx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWSZXvl_v RD_NONE, v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcvtwszx_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWSZXvml_v RD_NONE, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcvtwszxrz_vvl v256f64:$vy, i32:$vl), (VCVTWSZXvl RD_RZ, v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vcvtwszxrz_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWSZXvl_v RD_RZ, v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcvtwszxrz_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWSZXvml_v RD_RZ, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvcvtws_vvl v256f64:$vy, i32:$vl), (PVCVTWSvl RD_NONE, v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_pvcvtws_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (PVCVTWSvl_v RD_NONE, v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvcvtws_vvMvl v256f64:$vy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVCVTWSvml_v RD_NONE, v256f64:$vy, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvcvtwsrz_vvl v256f64:$vy, i32:$vl), (PVCVTWSvl RD_RZ, v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_pvcvtwsrz_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (PVCVTWSvl_v RD_RZ, v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvcvtwsrz_vvMvl v256f64:$vy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVCVTWSvml_v RD_RZ, v256f64:$vy, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcvtld_vvl v256f64:$vy, i32:$vl), (VCVTLDvl RD_NONE, v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vcvtld_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTLDvl_v RD_NONE, v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcvtld_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTLDvml_v RD_NONE, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcvtldrz_vvl v256f64:$vy, i32:$vl), (VCVTLDvl RD_RZ, v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vcvtldrz_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTLDvl_v RD_RZ, v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcvtldrz_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTLDvml_v RD_RZ, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcvtdw_vvl v256f64:$vy, i32:$vl), (VCVTDWvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vcvtdw_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTDWvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcvtsw_vvl v256f64:$vy, i32:$vl), (VCVTSWvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vcvtsw_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTSWvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvcvtsw_vvl v256f64:$vy, i32:$vl), (PVCVTSWvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_pvcvtsw_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (PVCVTSWvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcvtdl_vvl v256f64:$vy, i32:$vl), (VCVTDLvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vcvtdl_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTDLvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcvtds_vvl v256f64:$vy, i32:$vl), (VCVTDSvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vcvtds_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTDSvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcvtsd_vvl v256f64:$vy, i32:$vl), (VCVTSDvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vcvtsd_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTSDvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmrg_vvvml v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl), (VMRGvvml v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vmrg_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMRGvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmrg_vsvml i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl), (VMRGrvml i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vmrg_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMRGrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmrg_vsvml simm7:$I, v256f64:$vz, v256i1:$vm, i32:$vl), (VMRGivml (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vmrg_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMRGivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmrgw_vvvMl v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl), (VMRGWvvml v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vmrgw_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (VMRGWvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vshf_vvvsl v256f64:$vy, v256f64:$vz, i64:$sy, i32:$vl), (VSHFvvrl v256f64:$vy, v256f64:$vz, i64:$sy, i32:$vl)>; +def : Pat<(int_ve_vl_vshf_vvvsvl v256f64:$vy, v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (VSHFvvrl_v v256f64:$vy, v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vshf_vvvsl v256f64:$vy, v256f64:$vz, uimm6:$N, i32:$vl), (VSHFvvil v256f64:$vy, v256f64:$vz, (ULO7 $N), i32:$vl)>; +def : Pat<(int_ve_vl_vshf_vvvsvl v256f64:$vy, v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSHFvvil_v v256f64:$vy, v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vcp_vvmvl v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCPvml_v v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vex_vvmvl v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VEXvml_v v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfmklat_ml i32:$vl), (VFMKLal i32:$vl)>; +def : Pat<(int_ve_vl_vfmklaf_ml i32:$vl), (VFMKLnal i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkat_Ml i32:$vl), (VFMKyal i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkaf_Ml i32:$vl), (VFMKynal i32:$vl)>; +def : Pat<(int_ve_vl_vfmklgt_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_IG, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmklgt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_IG, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkllt_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_IL, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkllt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_IL, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmklne_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_INE, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmklne_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_INE, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkleq_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_IEQ, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkleq_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_IEQ, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmklge_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_IGE, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmklge_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_IGE, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmklle_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_ILE, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmklle_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_ILE, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmklnum_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_NUM, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmklnum_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_NUM, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmklnan_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_NAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmklnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_NAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmklgtnan_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_GNAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmklgtnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_GNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmklltnan_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_LNAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmklltnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_LNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmklnenan_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_NENAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmklnenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_NENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkleqnan_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_EQNAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkleqnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_EQNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmklgenan_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_GENAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmklgenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_GENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkllenan_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_LENAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkllenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_LENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkwgt_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_IG, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkwgt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_IG, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkwlt_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_IL, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkwlt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_IL, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkwne_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_INE, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkwne_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_INE, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkweq_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_IEQ, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkweq_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_IEQ, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkwge_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_IGE, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkwge_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_IGE, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkwle_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_ILE, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkwle_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_ILE, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkwnum_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_NUM, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkwnum_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_NUM, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkwnan_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_NAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkwnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_NAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkwgtnan_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_GNAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkwgtnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_GNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkwltnan_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_LNAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkwltnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_LNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkwnenan_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_NENAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkwnenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_NENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkweqnan_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_EQNAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkweqnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_EQNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkwgenan_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_GENAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkwgenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_GENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkwlenan_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_LENAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkwlenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_LENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwlogt_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_IG, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwupgt_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_IG, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwlogt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_IG, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwupgt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_IG, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwlolt_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_IL, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwuplt_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_IL, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwlolt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_IL, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwuplt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_IL, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwlone_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_INE, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwupne_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_INE, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwlone_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_INE, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwupne_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_INE, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwloeq_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_IEQ, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwupeq_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_IEQ, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwloeq_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_IEQ, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwupeq_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_IEQ, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwloge_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_IGE, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwupge_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_IGE, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwloge_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_IGE, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwupge_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_IGE, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwlole_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_ILE, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwuple_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_ILE, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwlole_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_ILE, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwuple_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_ILE, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwlonum_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_NUM, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwupnum_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_NUM, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwlonum_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_NUM, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwupnum_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_NUM, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwlonan_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_NAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwupnan_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_NAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwlonan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_NAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwupnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_NAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwlogtnan_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_GNAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwupgtnan_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_GNAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwlogtnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_GNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwupgtnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_GNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwloltnan_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_LNAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwupltnan_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_LNAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwloltnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_LNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwupltnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_LNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwlonenan_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_NENAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwupnenan_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_NENAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwlonenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_NENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwupnenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_NENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwloeqnan_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_EQNAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwupeqnan_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_EQNAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwloeqnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_EQNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwupeqnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_EQNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwlogenan_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_GENAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwupgenan_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_GENAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwlogenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_GENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwupgenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_GENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwlolenan_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_LENAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwuplenan_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_LENAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwlolenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_LENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwuplenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_LENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwgt_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_IG, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwgt_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_IG, v256f64:$vz, v512i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwlt_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_IL, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwlt_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_IL, v256f64:$vz, v512i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwne_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_INE, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwne_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_INE, v256f64:$vz, v512i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkweq_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_IEQ, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkweq_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_IEQ, v256f64:$vz, v512i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwge_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_IGE, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwge_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_IGE, v256f64:$vz, v512i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwle_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_ILE, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwle_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_ILE, v256f64:$vz, v512i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwnum_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_NUM, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwnum_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_NUM, v256f64:$vz, v512i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwnan_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_NAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwnan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_NAN, v256f64:$vz, v512i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwgtnan_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_GNAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwgtnan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_GNAN, v256f64:$vz, v512i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwltnan_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_LNAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwltnan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_LNAN, v256f64:$vz, v512i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwnenan_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_NENAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwnenan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_NENAN, v256f64:$vz, v512i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkweqnan_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_EQNAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkweqnan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_EQNAN, v256f64:$vz, v512i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwgenan_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_GENAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwgenan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_GENAN, v256f64:$vz, v512i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwlenan_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_LENAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkwlenan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_LENAN, v256f64:$vz, v512i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkdgt_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_G, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkdgt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_G, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkdlt_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_L, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkdlt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_L, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkdne_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_NE, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkdne_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_NE, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkdeq_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_EQ, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkdeq_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_EQ, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkdge_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_GE, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkdge_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_GE, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkdle_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_LE, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkdle_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_LE, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkdnum_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_NUM, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkdnum_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_NUM, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkdnan_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_NAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkdnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_NAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkdgtnan_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_GNAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkdgtnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_GNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkdltnan_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_LNAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkdltnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_LNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkdnenan_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_NENAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkdnenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_NENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkdeqnan_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_EQNAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkdeqnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_EQNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkdgenan_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_GENAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkdgenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_GENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkdlenan_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_LENAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkdlenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_LENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmksgt_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_G, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmksgt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_G, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkslt_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_L, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkslt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_L, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmksne_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_NE, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmksne_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_NE, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkseq_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_EQ, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkseq_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_EQ, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmksge_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_GE, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmksge_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_GE, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmksle_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_LE, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmksle_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_LE, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmksnum_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_NUM, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmksnum_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_NUM, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmksnan_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_NAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmksnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_NAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmksgtnan_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_GNAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmksgtnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_GNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmksltnan_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_LNAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmksltnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_LNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmksnenan_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_NENAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmksnenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_NENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkseqnan_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_EQNAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkseqnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_EQNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmksgenan_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_GENAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmksgenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_GENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkslenan_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_LENAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vfmkslenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_LENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkslogt_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_G, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksupgt_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_G, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkslogt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_G, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksupgt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_G, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkslolt_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_L, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksuplt_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_L, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkslolt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_L, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksuplt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_L, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkslone_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_NE, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksupne_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_NE, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkslone_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_NE, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksupne_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_NE, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksloeq_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_EQ, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksupeq_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_EQ, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksloeq_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_EQ, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksupeq_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_EQ, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksloge_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_GE, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksupge_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_GE, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksloge_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_GE, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksupge_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_GE, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkslole_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_LE, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksuple_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_LE, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkslole_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_LE, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksuple_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_LE, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkslonum_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_NUM, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksupnum_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_NUM, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkslonum_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_NUM, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksupnum_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_NUM, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkslonan_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_NAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksupnan_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_NAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkslonan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_NAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksupnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_NAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkslogtnan_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_GNAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksupgtnan_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_GNAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkslogtnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_GNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksupgtnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_GNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksloltnan_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_LNAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksupltnan_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_LNAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksloltnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_LNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksupltnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_LNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkslonenan_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_NENAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksupnenan_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_NENAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkslonenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_NENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksupnenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_NENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksloeqnan_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_EQNAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksupeqnan_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_EQNAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksloeqnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_EQNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksupeqnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_EQNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkslogenan_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_GENAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksupgenan_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_GENAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkslogenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_GENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksupgenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_GENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkslolenan_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_LENAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksuplenan_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_LENAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkslolenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_LENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksuplenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_LENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksgt_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_G, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksgt_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_G, v256f64:$vz, v512i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkslt_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_L, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkslt_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_L, v256f64:$vz, v512i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksne_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_NE, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksne_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_NE, v256f64:$vz, v512i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkseq_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_EQ, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkseq_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_EQ, v256f64:$vz, v512i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksge_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_GE, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksge_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_GE, v256f64:$vz, v512i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksle_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_LE, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksle_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_LE, v256f64:$vz, v512i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksnum_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_NUM, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksnum_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_NUM, v256f64:$vz, v512i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksnan_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_NAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksnan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_NAN, v256f64:$vz, v512i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksgtnan_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_GNAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksgtnan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_GNAN, v256f64:$vz, v512i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksltnan_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_LNAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksltnan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_LNAN, v256f64:$vz, v512i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksnenan_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_NENAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksnenan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_NENAN, v256f64:$vz, v512i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkseqnan_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_EQNAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkseqnan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_EQNAN, v256f64:$vz, v512i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksgenan_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_GENAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmksgenan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_GENAN, v256f64:$vz, v512i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkslenan_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_LENAN, v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvfmkslenan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_LENAN, v256f64:$vz, v512i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vsumwsx_vvl v256f64:$vy, i32:$vl), (VSUMWSXvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vsumwsx_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VSUMWSXvml v256f64:$vy, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vsumwzx_vvl v256f64:$vy, i32:$vl), (VSUMWZXvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vsumwzx_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VSUMWZXvml v256f64:$vy, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vsuml_vvl v256f64:$vy, i32:$vl), (VSUMLvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vsuml_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VSUMLvml v256f64:$vy, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfsumd_vvl v256f64:$vy, i32:$vl), (VFSUMDvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vfsumd_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VFSUMDvml v256f64:$vy, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vfsums_vvl v256f64:$vy, i32:$vl), (VFSUMSvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vfsums_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VFSUMSvml v256f64:$vy, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vrmaxswfstsx_vvl v256f64:$vy, i32:$vl), (VRMAXSWFSTSXvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vrmaxswfstsx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMAXSWFSTSXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vrmaxswlstsx_vvl v256f64:$vy, i32:$vl), (VRMAXSWLSTSXvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vrmaxswlstsx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMAXSWLSTSXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vrmaxswfstzx_vvl v256f64:$vy, i32:$vl), (VRMAXSWFSTZXvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vrmaxswfstzx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMAXSWFSTZXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vrmaxswlstzx_vvl v256f64:$vy, i32:$vl), (VRMAXSWLSTZXvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vrmaxswlstzx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMAXSWLSTZXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vrminswfstsx_vvl v256f64:$vy, i32:$vl), (VRMINSWFSTSXvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vrminswfstsx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMINSWFSTSXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vrminswlstsx_vvl v256f64:$vy, i32:$vl), (VRMINSWLSTSXvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vrminswlstsx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMINSWLSTSXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vrminswfstzx_vvl v256f64:$vy, i32:$vl), (VRMINSWFSTZXvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vrminswfstzx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMINSWFSTZXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vrminswlstzx_vvl v256f64:$vy, i32:$vl), (VRMINSWLSTZXvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vrminswlstzx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMINSWLSTZXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vrmaxslfst_vvl v256f64:$vy, i32:$vl), (VRMAXSLFSTvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vrmaxslfst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMAXSLFSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vrmaxsllst_vvl v256f64:$vy, i32:$vl), (VRMAXSLLSTvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vrmaxsllst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMAXSLLSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vrminslfst_vvl v256f64:$vy, i32:$vl), (VRMINSLFSTvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vrminslfst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMINSLFSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vrminsllst_vvl v256f64:$vy, i32:$vl), (VRMINSLLSTvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vrminsllst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMINSLLSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfrmaxdfst_vvl v256f64:$vy, i32:$vl), (VFRMAXDFSTvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vfrmaxdfst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFRMAXDFSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfrmaxdlst_vvl v256f64:$vy, i32:$vl), (VFRMAXDLSTvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vfrmaxdlst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFRMAXDLSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfrmaxsfst_vvl v256f64:$vy, i32:$vl), (VFRMAXSFSTvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vfrmaxsfst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFRMAXSFSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfrmaxslst_vvl v256f64:$vy, i32:$vl), (VFRMAXSLSTvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vfrmaxslst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFRMAXSLSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfrmindfst_vvl v256f64:$vy, i32:$vl), (VFRMINDFSTvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vfrmindfst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFRMINDFSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfrmindlst_vvl v256f64:$vy, i32:$vl), (VFRMINDLSTvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vfrmindlst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFRMINDLSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfrminsfst_vvl v256f64:$vy, i32:$vl), (VFRMINSFSTvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vfrminsfst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFRMINSFSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vfrminslst_vvl v256f64:$vy, i32:$vl), (VFRMINSLSTvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vfrminslst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFRMINSLSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vrand_vvl v256f64:$vy, i32:$vl), (VRANDvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vrand_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VRANDvml v256f64:$vy, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vror_vvl v256f64:$vy, i32:$vl), (VRORvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vror_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VRORvml v256f64:$vy, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vrxor_vvl v256f64:$vy, i32:$vl), (VRXORvl v256f64:$vy, i32:$vl)>; +def : Pat<(int_ve_vl_vrxor_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VRXORvml v256f64:$vy, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vgt_vvssl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VGTvrrl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vgt_vvssvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VGTvrrl_v v256f64:$vy, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgt_vvssl v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VGTvrzl v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl)>; +def : Pat<(int_ve_vl_vgt_vvssvl v256f64:$vy, i64:$sy, zero:$Z, v256f64:$pt, i32:$vl), (VGTvrzl_v v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgt_vvssl v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VGTvirl v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vgt_vvssvl v256f64:$vy, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VGTvirl_v v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgt_vvssl v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VGTvizl v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl)>; +def : Pat<(int_ve_vl_vgt_vvssvl v256f64:$vy, simm7:$I, zero:$Z, v256f64:$pt, i32:$vl), (VGTvizl_v v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgt_vvssml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VGTvrrml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vgt_vvssmvl v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTvrrml_v v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgt_vvssml v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VGTvrzml v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vgt_vvssmvl v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTvrzml_v v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgt_vvssml v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VGTvirml v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vgt_vvssmvl v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTvirml_v v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgt_vvssml v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VGTvizml v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vgt_vvssmvl v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTvizml_v v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtnc_vvssl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VGTNCvrrl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vgtnc_vvssvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VGTNCvrrl_v v256f64:$vy, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtnc_vvssl v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VGTNCvrzl v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl)>; +def : Pat<(int_ve_vl_vgtnc_vvssvl v256f64:$vy, i64:$sy, zero:$Z, v256f64:$pt, i32:$vl), (VGTNCvrzl_v v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtnc_vvssl v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VGTNCvirl v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vgtnc_vvssvl v256f64:$vy, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VGTNCvirl_v v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtnc_vvssl v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VGTNCvizl v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl)>; +def : Pat<(int_ve_vl_vgtnc_vvssvl v256f64:$vy, simm7:$I, zero:$Z, v256f64:$pt, i32:$vl), (VGTNCvizl_v v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtnc_vvssml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VGTNCvrrml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vgtnc_vvssmvl v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTNCvrrml_v v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtnc_vvssml v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VGTNCvrzml v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vgtnc_vvssmvl v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTNCvrzml_v v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtnc_vvssml v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VGTNCvirml v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vgtnc_vvssmvl v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTNCvirml_v v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtnc_vvssml v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VGTNCvizml v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vgtnc_vvssmvl v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTNCvizml_v v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtu_vvssl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VGTUvrrl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vgtu_vvssvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VGTUvrrl_v v256f64:$vy, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtu_vvssl v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VGTUvrzl v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl)>; +def : Pat<(int_ve_vl_vgtu_vvssvl v256f64:$vy, i64:$sy, zero:$Z, v256f64:$pt, i32:$vl), (VGTUvrzl_v v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtu_vvssl v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VGTUvirl v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vgtu_vvssvl v256f64:$vy, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VGTUvirl_v v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtu_vvssl v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VGTUvizl v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl)>; +def : Pat<(int_ve_vl_vgtu_vvssvl v256f64:$vy, simm7:$I, zero:$Z, v256f64:$pt, i32:$vl), (VGTUvizl_v v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtu_vvssml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VGTUvrrml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vgtu_vvssmvl v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTUvrrml_v v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtu_vvssml v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VGTUvrzml v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vgtu_vvssmvl v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTUvrzml_v v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtu_vvssml v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VGTUvirml v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vgtu_vvssmvl v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTUvirml_v v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtu_vvssml v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VGTUvizml v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vgtu_vvssmvl v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTUvizml_v v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtunc_vvssl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VGTUNCvrrl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vgtunc_vvssvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VGTUNCvrrl_v v256f64:$vy, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtunc_vvssl v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VGTUNCvrzl v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl)>; +def : Pat<(int_ve_vl_vgtunc_vvssvl v256f64:$vy, i64:$sy, zero:$Z, v256f64:$pt, i32:$vl), (VGTUNCvrzl_v v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtunc_vvssl v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VGTUNCvirl v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vgtunc_vvssvl v256f64:$vy, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VGTUNCvirl_v v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtunc_vvssl v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VGTUNCvizl v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl)>; +def : Pat<(int_ve_vl_vgtunc_vvssvl v256f64:$vy, simm7:$I, zero:$Z, v256f64:$pt, i32:$vl), (VGTUNCvizl_v v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtunc_vvssml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VGTUNCvrrml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vgtunc_vvssmvl v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTUNCvrrml_v v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtunc_vvssml v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VGTUNCvrzml v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vgtunc_vvssmvl v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTUNCvrzml_v v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtunc_vvssml v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VGTUNCvirml v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vgtunc_vvssmvl v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTUNCvirml_v v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtunc_vvssml v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VGTUNCvizml v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vgtunc_vvssmvl v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTUNCvizml_v v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtlsx_vvssl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VGTLSXvrrl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vgtlsx_vvssvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VGTLSXvrrl_v v256f64:$vy, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtlsx_vvssl v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VGTLSXvrzl v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl)>; +def : Pat<(int_ve_vl_vgtlsx_vvssvl v256f64:$vy, i64:$sy, zero:$Z, v256f64:$pt, i32:$vl), (VGTLSXvrzl_v v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtlsx_vvssl v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VGTLSXvirl v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vgtlsx_vvssvl v256f64:$vy, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VGTLSXvirl_v v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtlsx_vvssl v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VGTLSXvizl v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl)>; +def : Pat<(int_ve_vl_vgtlsx_vvssvl v256f64:$vy, simm7:$I, zero:$Z, v256f64:$pt, i32:$vl), (VGTLSXvizl_v v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtlsx_vvssml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VGTLSXvrrml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vgtlsx_vvssmvl v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLSXvrrml_v v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtlsx_vvssml v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VGTLSXvrzml v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vgtlsx_vvssmvl v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLSXvrzml_v v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtlsx_vvssml v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VGTLSXvirml v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vgtlsx_vvssmvl v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLSXvirml_v v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtlsx_vvssml v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VGTLSXvizml v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vgtlsx_vvssmvl v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLSXvizml_v v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtlsxnc_vvssl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VGTLSXNCvrrl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vgtlsxnc_vvssvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VGTLSXNCvrrl_v v256f64:$vy, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtlsxnc_vvssl v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VGTLSXNCvrzl v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl)>; +def : Pat<(int_ve_vl_vgtlsxnc_vvssvl v256f64:$vy, i64:$sy, zero:$Z, v256f64:$pt, i32:$vl), (VGTLSXNCvrzl_v v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtlsxnc_vvssl v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VGTLSXNCvirl v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vgtlsxnc_vvssvl v256f64:$vy, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VGTLSXNCvirl_v v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtlsxnc_vvssl v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VGTLSXNCvizl v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl)>; +def : Pat<(int_ve_vl_vgtlsxnc_vvssvl v256f64:$vy, simm7:$I, zero:$Z, v256f64:$pt, i32:$vl), (VGTLSXNCvizl_v v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtlsxnc_vvssml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VGTLSXNCvrrml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vgtlsxnc_vvssmvl v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLSXNCvrrml_v v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtlsxnc_vvssml v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VGTLSXNCvrzml v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vgtlsxnc_vvssmvl v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLSXNCvrzml_v v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtlsxnc_vvssml v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VGTLSXNCvirml v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vgtlsxnc_vvssmvl v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLSXNCvirml_v v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtlsxnc_vvssml v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VGTLSXNCvizml v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vgtlsxnc_vvssmvl v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLSXNCvizml_v v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtlzx_vvssl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VGTLZXvrrl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vgtlzx_vvssvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VGTLZXvrrl_v v256f64:$vy, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtlzx_vvssl v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VGTLZXvrzl v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl)>; +def : Pat<(int_ve_vl_vgtlzx_vvssvl v256f64:$vy, i64:$sy, zero:$Z, v256f64:$pt, i32:$vl), (VGTLZXvrzl_v v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtlzx_vvssl v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VGTLZXvirl v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vgtlzx_vvssvl v256f64:$vy, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VGTLZXvirl_v v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtlzx_vvssl v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VGTLZXvizl v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl)>; +def : Pat<(int_ve_vl_vgtlzx_vvssvl v256f64:$vy, simm7:$I, zero:$Z, v256f64:$pt, i32:$vl), (VGTLZXvizl_v v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtlzx_vvssml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VGTLZXvrrml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vgtlzx_vvssmvl v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLZXvrrml_v v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtlzx_vvssml v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VGTLZXvrzml v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vgtlzx_vvssmvl v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLZXvrzml_v v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtlzx_vvssml v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VGTLZXvirml v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vgtlzx_vvssmvl v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLZXvirml_v v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtlzx_vvssml v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VGTLZXvizml v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vgtlzx_vvssmvl v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLZXvizml_v v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtlzxnc_vvssl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VGTLZXNCvrrl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vgtlzxnc_vvssvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VGTLZXNCvrrl_v v256f64:$vy, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtlzxnc_vvssl v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VGTLZXNCvrzl v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl)>; +def : Pat<(int_ve_vl_vgtlzxnc_vvssvl v256f64:$vy, i64:$sy, zero:$Z, v256f64:$pt, i32:$vl), (VGTLZXNCvrzl_v v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtlzxnc_vvssl v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VGTLZXNCvirl v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_vgtlzxnc_vvssvl v256f64:$vy, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VGTLZXNCvirl_v v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtlzxnc_vvssl v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VGTLZXNCvizl v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl)>; +def : Pat<(int_ve_vl_vgtlzxnc_vvssvl v256f64:$vy, simm7:$I, zero:$Z, v256f64:$pt, i32:$vl), (VGTLZXNCvizl_v v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtlzxnc_vvssml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VGTLZXNCvrrml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vgtlzxnc_vvssmvl v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLZXNCvrrml_v v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtlzxnc_vvssml v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VGTLZXNCvrzml v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vgtlzxnc_vvssmvl v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLZXNCvrzml_v v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtlzxnc_vvssml v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VGTLZXNCvirml v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vgtlzxnc_vvssmvl v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLZXNCvirml_v v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vgtlzxnc_vvssml v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VGTLZXNCvizml v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vgtlzxnc_vvssmvl v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLZXNCvizml_v v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vsc_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vsc_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vsc_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vsc_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vsc_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vsc_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vsc_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vsc_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vscnc_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCNCvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vscnc_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCNCvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vscnc_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCNCvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vscnc_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCNCvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vscnc_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCNCvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vscnc_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCNCvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vscnc_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCNCvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vscnc_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCNCvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vscot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCOTvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vscot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCOTvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vscot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCOTvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vscot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCOTvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vscot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCOTvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vscot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCOTvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vscot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCOTvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vscot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCOTvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vscncot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCNCOTvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vscncot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCNCOTvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vscncot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCNCOTvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vscncot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCNCOTvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vscncot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCNCOTvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vscncot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCNCOTvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vscncot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCNCOTvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vscncot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCNCOTvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vscu_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCUvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vscu_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCUvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vscu_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCUvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vscu_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCUvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vscu_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCUvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vscu_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCUvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vscu_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCUvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vscu_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCUvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vscunc_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCUNCvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vscunc_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCUNCvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vscunc_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCUNCvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vscunc_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCUNCvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vscunc_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCUNCvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vscunc_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCUNCvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vscunc_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCUNCvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vscunc_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCUNCvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vscuot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCUOTvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vscuot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCUOTvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vscuot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCUOTvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vscuot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCUOTvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vscuot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCUOTvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vscuot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCUOTvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vscuot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCUOTvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vscuot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCUOTvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vscuncot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCUNCOTvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vscuncot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCUNCOTvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vscuncot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCUNCOTvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vscuncot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCUNCOTvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vscuncot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCUNCOTvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vscuncot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCUNCOTvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vscuncot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCUNCOTvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vscuncot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCUNCOTvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vscl_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCLvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vscl_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCLvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vscl_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCLvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vscl_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCLvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vscl_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCLvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vscl_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCLvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vscl_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCLvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vscl_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCLvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vsclnc_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCLNCvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vsclnc_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCLNCvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vsclnc_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCLNCvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vsclnc_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCLNCvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vsclnc_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCLNCvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vsclnc_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCLNCvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vsclnc_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCLNCvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vsclnc_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCLNCvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vsclot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCLOTvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vsclot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCLOTvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vsclot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCLOTvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vsclot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCLOTvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vsclot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCLOTvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vsclot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCLOTvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vsclot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCLOTvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vsclot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCLOTvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vsclncot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCLNCOTvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vsclncot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCLNCOTvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vsclncot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCLNCOTvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vsclncot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCLNCOTvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vsclncot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCLNCOTvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vsclncot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCLNCOTvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vsclncot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCLNCOTvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vsclncot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCLNCOTvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_andm_mmm v256i1:$vmy, v256i1:$vmz), (ANDMmm v256i1:$vmy, v256i1:$vmz)>; +def : Pat<(int_ve_vl_andm_MMM v512i1:$vmy, v512i1:$vmz), (ANDMyy v512i1:$vmy, v512i1:$vmz)>; +def : Pat<(int_ve_vl_orm_mmm v256i1:$vmy, v256i1:$vmz), (ORMmm v256i1:$vmy, v256i1:$vmz)>; +def : Pat<(int_ve_vl_orm_MMM v512i1:$vmy, v512i1:$vmz), (ORMyy v512i1:$vmy, v512i1:$vmz)>; +def : Pat<(int_ve_vl_xorm_mmm v256i1:$vmy, v256i1:$vmz), (XORMmm v256i1:$vmy, v256i1:$vmz)>; +def : Pat<(int_ve_vl_xorm_MMM v512i1:$vmy, v512i1:$vmz), (XORMyy v512i1:$vmy, v512i1:$vmz)>; +def : Pat<(int_ve_vl_eqvm_mmm v256i1:$vmy, v256i1:$vmz), (EQVMmm v256i1:$vmy, v256i1:$vmz)>; +def : Pat<(int_ve_vl_eqvm_MMM v512i1:$vmy, v512i1:$vmz), (EQVMyy v512i1:$vmy, v512i1:$vmz)>; +def : Pat<(int_ve_vl_nndm_mmm v256i1:$vmy, v256i1:$vmz), (NNDMmm v256i1:$vmy, v256i1:$vmz)>; +def : Pat<(int_ve_vl_nndm_MMM v512i1:$vmy, v512i1:$vmz), (NNDMyy v512i1:$vmy, v512i1:$vmz)>; +def : Pat<(int_ve_vl_negm_mm v256i1:$vmy), (NEGMm v256i1:$vmy)>; +def : Pat<(int_ve_vl_negm_MM v512i1:$vmy), (NEGMy v512i1:$vmy)>; +def : Pat<(int_ve_vl_pcvm_sml v256i1:$vmy, i32:$vl), (PCVMml v256i1:$vmy, i32:$vl)>; +def : Pat<(int_ve_vl_lzvm_sml v256i1:$vmy, i32:$vl), (LZVMml v256i1:$vmy, i32:$vl)>; +def : Pat<(int_ve_vl_tovm_sml v256i1:$vmy, i32:$vl), (TOVMml v256i1:$vmy, i32:$vl)>; diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrIntrinsicVL.td b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrIntrinsicVL.td new file mode 100644 index 000000000000..69ea133ceed0 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrIntrinsicVL.td @@ -0,0 +1,64 @@ +/// Pattern Matchings for VEL intrinsic instructions. + +/// Intrinsic patterns written by hand. + +// SVOB pattern. +def : Pat<(int_ve_vl_svob), (SVOB)>; + +// Pack patterns. +def : Pat<(i64 (int_ve_vl_pack_f32p ADDRrii:$addr0, ADDRrii:$addr1)), + (ORrr (f2l (LDUrii MEMrii:$addr0)), + (i2l (LDLZXrii MEMrii:$addr1)))>; + +def : Pat<(i64 (int_ve_vl_pack_f32a ADDRrii:$addr)), + (MULULrr + (i2l (LDLZXrii MEMrii:$addr)), + (LEASLrii (ANDrm (LEAzii 0, 0, (LO32 (i64 0x0000000100000001))), + !add(32, 64)), 0, + (HI32 (i64 0x0000000100000001))))>; + +// The extract/insert patterns. +def : Pat<(v256i1 (int_ve_vl_extract_vm512u v512i1:$vm)), + (EXTRACT_SUBREG v512i1:$vm, sub_vm_even)>; + +def : Pat<(v256i1 (int_ve_vl_extract_vm512l v512i1:$vm)), + (EXTRACT_SUBREG v512i1:$vm, sub_vm_odd)>; + +def : Pat<(v512i1 (int_ve_vl_insert_vm512u v512i1:$vmx, v256i1:$vmy)), + (INSERT_SUBREG v512i1:$vmx, v256i1:$vmy, sub_vm_even)>; + +def : Pat<(v512i1 (int_ve_vl_insert_vm512l v512i1:$vmx, v256i1:$vmy)), + (INSERT_SUBREG v512i1:$vmx, v256i1:$vmy, sub_vm_odd)>; + +// VMRG patterns. +def : Pat<(int_ve_vl_vmrgw_vsvMl i32:$sy, v256f64:$vz, v512i1:$vm, i32:$vl), + (VMRGWrvml (i2l i32:$sy), v256f64:$vz, v512i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vmrgw_vsvMvl i32:$sy, v256f64:$vz, v512i1:$vm, + v256f64:$pt, i32:$vl), + (VMRGWrvml_v (i2l i32:$sy), v256f64:$vz, v512i1:$vm, i32:$vl, + v256f64:$pt)>; + +// VMV patterns. +def : Pat<(int_ve_vl_vmv_vsvl i32:$sy, v256f64:$vz, i32:$vl), + (VMVrvl (i2l i32:$sy), v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vmv_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), + (VMVrvl_v (i2l i32:$sy), v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmv_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, + i32:$vl), + (VMVrvml_v (i2l i32:$sy), v256f64:$vz, v256i1:$vm, i32:$vl, + v256f64:$pt)>; + +// LSV patterns. +def : Pat<(int_ve_vl_lsv_vvss v256f64:$pt, i32:$sy, i64:$sz), + (LSVrr_v (i2l i32:$sy), i64:$sz, v256f64:$pt)>; + +// LVS patterns. +def : Pat<(int_ve_vl_lvsl_svs v256f64:$vx, i32:$sy), + (LVSvr v256f64:$vx, (i2l i32:$sy))>; +def : Pat<(int_ve_vl_lvsd_svs v256f64:$vx, i32:$sy), + (LVSvr v256f64:$vx, (i2l i32:$sy))>; +def : Pat<(int_ve_vl_lvss_svs v256f64:$vx, i32:$sy), + (l2f (LVSvr v256f64:$vx, (i2l i32:$sy)))>; + +/// Intrinsic patterns automatically generated. +include "VEInstrIntrinsicVL.gen.td" diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrPatternsVec.td b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrPatternsVec.td new file mode 100644 index 000000000000..0084876f9f1b --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrPatternsVec.td @@ -0,0 +1,91 @@ +//===-- VEInstrPatternsVec.td - VEC_-type SDNodes and isel for VE Target --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes the VEC_* prefixed intermediate SDNodes and their +// isel patterns. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Instruction format superclass +//===----------------------------------------------------------------------===// + +multiclass vbrd_elem32<ValueType v32, ValueType s32, SDPatternOperator ImmOp, + SDNodeXForm ImmCast, SDNodeXForm SuperRegCast> { + // VBRDil + def : Pat<(v32 (vec_broadcast (s32 ImmOp:$sy), i32:$vl)), + (VBRDil (ImmCast $sy), i32:$vl)>; + + // VBRDrl + def : Pat<(v32 (vec_broadcast s32:$sy, i32:$vl)), + (VBRDrl (SuperRegCast $sy), i32:$vl)>; +} + +multiclass vbrd_elem64<ValueType v64, ValueType s64, + SDPatternOperator ImmOp, SDNodeXForm ImmCast> { + // VBRDil + def : Pat<(v64 (vec_broadcast (s64 ImmOp:$sy), i32:$vl)), + (VBRDil (ImmCast $sy), i32:$vl)>; + + // VBRDrl + def : Pat<(v64 (vec_broadcast s64:$sy, i32:$vl)), + (VBRDrl s64:$sy, i32:$vl)>; +} + +multiclass extract_insert_elem32<ValueType v32, ValueType s32, + SDNodeXForm SubRegCast, + SDNodeXForm SuperRegCast> { + // LVSvi + def: Pat<(s32 (extractelt v32:$vec, uimm7:$idx)), + (SubRegCast (LVSvi v32:$vec, (ULO7 $idx)))>; + // LVSvr + def: Pat<(s32 (extractelt v32:$vec, i64:$idx)), + (SubRegCast (LVSvr v32:$vec, $idx))>; + + // LSVir + def: Pat<(v32 (insertelt v32:$vec, s32:$val, uimm7:$idx)), + (LSVir_v (ULO7 $idx), (SuperRegCast $val), $vec)>; + // LSVrr + def: Pat<(v32 (insertelt v32:$vec, s32:$val, i64:$idx)), + (LSVrr_v $idx, (SuperRegCast $val), $vec)>; +} + +multiclass extract_insert_elem64<ValueType v64, ValueType s64> { + // LVSvi + def: Pat<(s64 (extractelt v64:$vec, uimm7:$idx)), + (LVSvi v64:$vec, (ULO7 $idx))>; + // LVSvr + def: Pat<(s64 (extractelt v64:$vec, i64:$idx)), + (LVSvr v64:$vec, $idx)>; + + // LSVir + def: Pat<(v64 (insertelt v64:$vec, s64:$val, uimm7:$idx)), + (LSVir_v (ULO7 $idx), $val, $vec)>; + // LSVrr + def: Pat<(v64 (insertelt v64:$vec, s64:$val, i64:$idx)), + (LSVrr_v $idx, $val, $vec)>; +} + +multiclass patterns_elem32<ValueType v32, ValueType s32, + SDPatternOperator ImmOp, SDNodeXForm ImmCast, + SDNodeXForm SubRegCast, SDNodeXForm SuperRegCast> { + defm : vbrd_elem32<v32, s32, ImmOp, ImmCast, SuperRegCast>; + defm : extract_insert_elem32<v32, s32, SubRegCast, SuperRegCast>; +} + +multiclass patterns_elem64<ValueType v64, ValueType s64, + SDPatternOperator ImmOp, SDNodeXForm ImmCast> { + defm : vbrd_elem64<v64, s64, ImmOp, ImmCast>; + defm : extract_insert_elem64<v64, s64>; +} + +defm : patterns_elem32<v256i32, i32, simm7, LO7, l2i, i2l>; +defm : patterns_elem32<v256f32, f32, simm7fp, LO7FP, l2f, f2l>; + +defm : patterns_elem64<v256i64, i64, simm7, LO7>; +defm : patterns_elem64<v256f64, f64, simm7fp, LO7FP>; diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrVec.td b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrVec.td new file mode 100644 index 000000000000..4a8476f7288a --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrVec.td @@ -0,0 +1,1510 @@ +//===----------------------------------------------------------------------===// +// Vector Instructions +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Pseudo instructions for VM512 modifications +//===----------------------------------------------------------------------===// + +// LVM/SVM instructions using VM512 +let hasSideEffects = 0, isCodeGenOnly = 1 in { + let Constraints = "$vx = $vd", DisableEncoding = "$vd" in { + def LVMyir_y : Pseudo<(outs VM512:$vx), (ins uimm3:$sy, I64:$sz, VM512:$vd), + "# pseudo LVM $vx, $sy, $sz, $vd">; + def LVMyim_y : Pseudo<(outs VM512:$vx), + (ins uimm3:$sy, mimm:$sz, VM512:$vd), + "# pseudo LVM $vx, $sy, $sz, $vd">; + } + def LVMyir : Pseudo<(outs VM512:$vx), (ins uimm3:$sy, I64:$sz), + "# pseudo LVM $vx, $sy, $sz">; + def LVMyim : Pseudo<(outs VM512:$vx), (ins uimm3:$sy, mimm:$sz), + "# pseudo LVM $vx, $sy, $sz">; + def SVMyi : Pseudo<(outs I64:$sx), (ins VM512:$vz, uimm3:$sy), + "# pseudo SVM $sx, $vz, $sy">; +} + +// VFMK/VFMKW/VFMKS instructions using VM512 +let hasSideEffects = 0, isCodeGenOnly = 1, DisableEncoding = "$vl" in { + def VFMKyal : Pseudo<(outs VM512:$vmx), (ins I32:$vl), + "# pseudo-vfmk.at $vmx">; + def VFMKynal : Pseudo<(outs VM512:$vmx), (ins I32:$vl), + "# pseudo-vfmk.af $vmx">; + def VFMKWyvl : Pseudo<(outs VM512:$vmx), + (ins CCOp:$cf, V64:$vz, I32:$vl), + "# pseudo-vfmk.w.$cf $vmx, $vz">; + def VFMKWyvyl : Pseudo<(outs VM512:$vmx), + (ins CCOp:$cf, V64:$vz, VM512:$vm, I32:$vl), + "# pseudo-vfmk.w.$cf $vmx, $vz, $vm">; + def VFMKSyvl : Pseudo<(outs VM512:$vmx), + (ins CCOp:$cf, V64:$vz, I32:$vl), + "# pseudo-vfmk.s.$cf $vmx, $vz">; + def VFMKSyvyl : Pseudo<(outs VM512:$vmx), + (ins CCOp:$cf, V64:$vz, VM512:$vm, I32:$vl), + "# pseudo-vfmk.s.$cf $vmx, $vz, $vm">; +} + +// ANDM/ORM/XORM/EQVM/NNDM/NEGM instructions using VM512 +let hasSideEffects = 0, isCodeGenOnly = 1 in { + def ANDMyy : Pseudo<(outs VM512:$vmx), (ins VM512:$vmy, VM512:$vmz), + "# andm $vmx, $vmy, $vmz">; + def ORMyy : Pseudo<(outs VM512:$vmx), (ins VM512:$vmy, VM512:$vmz), + "# orm $vmx, $vmy, $vmz">; + def XORMyy : Pseudo<(outs VM512:$vmx), (ins VM512:$vmy, VM512:$vmz), + "# xorm $vmx, $vmy, $vmz">; + def EQVMyy : Pseudo<(outs VM512:$vmx), (ins VM512:$vmy, VM512:$vmz), + "# eqvm $vmx, $vmy, $vmz">; + def NNDMyy : Pseudo<(outs VM512:$vmx), (ins VM512:$vmy, VM512:$vmz), + "# nndm $vmx, $vmy, $vmz">; + def NEGMy : Pseudo<(outs VM512:$vmx), (ins VM512:$vmy), + "# negm $vmx, $vmy">; +} + +//===----------------------------------------------------------------------===// +// Instructions +// +// Define all vector instructions defined in SX-Aurora TSUBASA Architecture +// Guide here. As those mnemonics, we use mnemonics defined in Vector Engine +// Assembly Language Reference Manual. +// +// Some instructions can update existing data by following instructions +// sequence. +// +// lea %s0, 256 +// lea %s1, 128 +// lvl %s0 +// vbrd %v0, 2 # v0 = { 2, 2, 2, ..., 2, 2, 2 } +// lvl %s1 +// vbrd %v0, 3 # v0 = { 3, 3, 3, ..., 3, 2, 2, 2, ..., 2, 2, 2 } +// +// In order to represent above with a virtual register, we defines instructions +// with an additional base register and `_v` suffiex in mnemonic. +// +// lea t0, 256 +// lea t1, 128 +// lea t0 +// vbrd tv0, 2 +// lvl t1 +// vbrd_v tv1, 2, tv0 +// +// We also have some instructions uses VL register with an pseudo VL value +// with following suffixes in mnemonic. +// +// l: have an additional I32 register to represent the VL value. +// L: have an additional VL register to represent the VL value. +//===----------------------------------------------------------------------===// + +//----------------------------------------------------------------------------- +// Section 8.9 - Vector Load/Store and Move Instructions +//----------------------------------------------------------------------------- + +// Multiclass for VLD instructions +let mayLoad = 1, hasSideEffects = 0, Uses = [VL] in +multiclass VLDbm<string opcStr, bits<8>opc, RegisterClass RC, dag dag_in, + string disEnc = ""> { + let DisableEncoding = disEnc in + def "" : RVM<opc, (outs RC:$vx), dag_in, + !strconcat(opcStr, " $vx, $sy, $sz")>; + let Constraints = "$vx = $base", DisableEncoding = disEnc#"$base", + isCodeGenOnly = 1 in + def _v : RVM<opc, (outs RC:$vx), !con(dag_in, (ins RC:$base)), + !strconcat(opcStr, " $vx, $sy, $sz")>; +} +multiclass VLDlm<string opcStr, bits<8>opc, RegisterClass RC, dag dag_in> { + defm "" : VLDbm<opcStr, opc, RC, dag_in>; + let isCodeGenOnly = 1, VE_VLInUse = 1 in { + defm l : VLDbm<opcStr, opc, RC, !con(dag_in, (ins I32:$vl)), "$vl,">; + defm L : VLDbm<opcStr, opc, RC, !con(dag_in, (ins VLS:$vl)), "$vl,">; + } +} +let VE_VLIndex = 3 in +multiclass VLDtgm<string opcStr, bits<8>opc, RegisterClass RC> { + defm rr : VLDlm<opcStr, opc, RC, (ins I64:$sy, I64:$sz)>; + let cy = 0 in + defm ir : VLDlm<opcStr, opc, RC, (ins simm7:$sy, I64:$sz)>; + let cz = 0 in + defm rz : VLDlm<opcStr, opc, RC, (ins I64:$sy, zero:$sz)>; + let cy = 0, cz = 0 in + defm iz : VLDlm<opcStr, opc, RC, (ins simm7:$sy, zero:$sz)>; +} +multiclass VLDm<string opcStr, bits<8>opc, RegisterClass RC> { + let vc = 1 in defm "" : VLDtgm<opcStr, opc, RC>; + let vc = 0 in defm NC : VLDtgm<opcStr#".nc", opc, RC>; +} + +// Section 8.9.1 - VLD (Vector Load) +defm VLD : VLDm<"vld", 0x81, V64>; + +// Section 8.9.2 - VLDU (Vector Load Upper) +defm VLDU : VLDm<"vldu", 0x82, V64>; + +// Section 8.9.3 - VLDL (Vector Load Lower) +defm VLDLSX : VLDm<"vldl.sx", 0x83, V64>; +let cx = 1 in defm VLDLZX : VLDm<"vldl.zx", 0x83, V64>; + +// Section 8.9.4 - VLD2D (Vector Load 2D) +defm VLD2D : VLDm<"vld2d", 0xc1, V64>; + +// Section 8.9.5 - VLDU2D (Vector Load Upper 2D) +defm VLDU2D : VLDm<"vldu2d", 0xc2, V64>; + +// Section 8.9.6 - VLDL2D (Vector Load Lower 2D) +defm VLDL2DSX : VLDm<"vldl2d.sx", 0xc3, V64>; +let cx = 1 in defm VLDL2DZX : VLDm<"vldl2d.zx", 0xc3, V64>; + +// Multiclass for VST instructions +let mayStore = 1, hasSideEffects = 0, Uses = [VL] in +multiclass VSTbm<string opcStr, string argStr, bits<8>opc, dag dag_in> { + def "" : RVM<opc, (outs), dag_in, !strconcat(opcStr, argStr)>; + let DisableEncoding = "$vl", isCodeGenOnly = 1, VE_VLInUse = 1 in { + def l : RVM<opc, (outs), !con(dag_in, (ins I32:$vl)), + !strconcat(opcStr, argStr)>; + def L : RVM<opc, (outs), !con(dag_in, (ins VLS:$vl)), + !strconcat(opcStr, argStr)>; + } +} +multiclass VSTmm<string opcStr, bits<8>opc, dag dag_in> { + defm "" : VSTbm<opcStr, " $vx, $sy, $sz", opc, dag_in>; + let m = ?, VE_VLWithMask = 1 in + defm m : VSTbm<opcStr, " $vx, $sy, $sz, $m", opc, !con(dag_in, (ins VM:$m))>; +} +let VE_VLIndex = 3 in +multiclass VSTtgm<string opcStr, bits<8>opc, RegisterClass RC> { + defm rrv : VSTmm<opcStr, opc, (ins I64:$sy, I64:$sz, RC:$vx)>; + let cy = 0 in + defm irv : VSTmm<opcStr, opc, (ins simm7:$sy, I64:$sz, RC:$vx)>; + let cz = 0 in + defm rzv : VSTmm<opcStr, opc, (ins I64:$sy, zero:$sz, RC:$vx)>; + let cy = 0, cz = 0 in + defm izv : VSTmm<opcStr, opc, (ins simm7:$sy, zero:$sz, RC:$vx)>; +} +multiclass VSTm<string opcStr, bits<8>opc, RegisterClass RC> { + let vc = 1, cx = 0 in defm "" : VSTtgm<opcStr, opc, RC>; + let vc = 0, cx = 0 in defm NC : VSTtgm<opcStr#".nc", opc, RC>; + let vc = 1, cx = 1 in defm OT : VSTtgm<opcStr#".ot", opc, RC>; + let vc = 0, cx = 1 in defm NCOT : VSTtgm<opcStr#".nc.ot", opc, RC>; +} + +// Section 8.9.7 - VST (Vector Store) +defm VST : VSTm<"vst", 0x91, V64>; + +// Section 8.9.8 - VST (Vector Store Upper) +defm VSTU : VSTm<"vstu", 0x92, V64>; + +// Section 8.9.9 - VSTL (Vector Store Lower) +defm VSTL : VSTm<"vstl", 0x93, V64>; + +// Section 8.9.10 - VST2D (Vector Store 2D) +defm VST2D : VSTm<"vst2d", 0xd1, V64>; + +// Section 8.9.11 - VSTU2D (Vector Store Upper 2D) +defm VSTU2D : VSTm<"vstu2d", 0xd2, V64>; + +// Section 8.9.12 - VSTL2D (Vector Store Lower 2D) +defm VSTL2D : VSTm<"vstl2d", 0xd3, V64>; + +// Multiclass for VGT instructions +let mayLoad = 1, hasSideEffects = 0, Uses = [VL] in +multiclass VGTbm<string opcStr, string argStr, bits<8>opc, RegisterClass RC, + dag dag_in, string disEnc = ""> { + let DisableEncoding = disEnc in + def "" : RVM<opc, (outs RC:$vx), dag_in, + !strconcat(opcStr, " $vx, ", argStr)>; + let Constraints = "$vx = $base", DisableEncoding = disEnc#"$base", + isCodeGenOnly = 1 in + def _v : RVM<opc, (outs RC:$vx), !con(dag_in, (ins RC:$base)), + !strconcat(opcStr, " $vx, ", argStr)>; +} +multiclass VGTlm<string opcStr, string argStr, bits<8>opc, RegisterClass RC, + dag dag_in> { + defm "" : VGTbm<opcStr, argStr, opc, RC, dag_in>; + let isCodeGenOnly = 1, VE_VLInUse = 1 in { + defm l : VGTbm<opcStr, argStr, opc, RC, !con(dag_in, (ins I32:$vl)), + "$vl,">; + defm L : VGTbm<opcStr, argStr, opc, RC, !con(dag_in, (ins VLS:$vl)), + "$vl,">; + } +} +multiclass VGTmm<string opcStr, string argStr, bits<8>opc, RegisterClass RC, + dag dag_in> { + defm "" : VGTlm<opcStr, argStr, opc, RC, dag_in>; + let m = ?, VE_VLWithMask = 1 in + defm m : VGTlm<opcStr, argStr#", $m", opc, RC, !con(dag_in, (ins VM:$m))>; +} +let VE_VLIndex = 4 in +multiclass VGTlhm<string opcStr, string argStr, bits<8>opc, RegisterClass RC, + dag dag_in> { + defm rr : VGTmm<opcStr, argStr#", $sy, $sz", opc, RC, + !con(dag_in, (ins I64:$sy, I64:$sz))>; + let cy = 0 in + defm ir : VGTmm<opcStr, argStr#", $sy, $sz", opc, RC, + !con(dag_in, (ins simm7:$sy, I64:$sz))>; + let cz = 0 in + defm rz : VGTmm<opcStr, argStr#", $sy, $sz", opc, RC, + !con(dag_in, (ins I64:$sy, zero:$sz))>; + let cy = 0, cz = 0 in + defm iz : VGTmm<opcStr, argStr#", $sy, $sz", opc, RC, + !con(dag_in, (ins simm7:$sy, zero:$sz))>; +} +multiclass VGTtgm<string opcStr, bits<8>opc, RegisterClass RC> { + let vy = ? in defm v : VGTlhm<opcStr, "$vy", opc, RC, (ins V64:$vy)>; + let cs = 1, sw = ? in defm s : VGTlhm<opcStr, "$sw", opc, RC, (ins I64:$sw)>; +} +multiclass VGTm<string opcStr, bits<8>opc, RegisterClass RC> { + let vc = 1 in defm "" : VGTtgm<opcStr, opc, RC>; + let vc = 0 in defm NC : VGTtgm<opcStr#".nc", opc, RC>; +} + +// Section 8.9.13 - VGT (Vector Gather) +defm VGT : VGTm<"vgt", 0xa1, V64>; + +// Section 8.9.14 - VGTU (Vector Gather Upper) +defm VGTU : VGTm<"vgtu", 0xa2, V64>; + +// Section 8.9.15 - VGTL (Vector Gather Lower) +defm VGTLSX : VGTm<"vgtl.sx", 0xa3, V64>; +let cx = 1 in defm VGTLZX : VGTm<"vgtl.zx", 0xa3, V64>; +def : MnemonicAlias<"vgtl", "vgtl.zx">; +def : MnemonicAlias<"vgtl.nc", "vgtl.zx.nc">; + +// Multiclass for VSC instructions +let mayStore = 1, hasSideEffects = 0, Uses = [VL] in +multiclass VSCbm<string opcStr, string argStr, bits<8>opc, dag dag_in> { + def "" : RVM<opc, (outs), dag_in, !strconcat(opcStr, argStr)>; + let DisableEncoding = "$vl", isCodeGenOnly = 1, VE_VLInUse = 1 in { + def l : RVM<opc, (outs), !con(dag_in, (ins I32:$vl)), + !strconcat(opcStr, argStr)>; + def L : RVM<opc, (outs), !con(dag_in, (ins VLS:$vl)), + !strconcat(opcStr, argStr)>; + } +} +multiclass VSCmm<string opcStr, string argStr, bits<8>opc, dag dag_in> { + defm "" : VSCbm<opcStr, argStr, opc, dag_in>; + let m = ?, VE_VLWithMask = 1 in + defm m : VSCbm<opcStr, argStr#", $m", opc, !con(dag_in, (ins VM:$m))>; +} +let VE_VLIndex = 4 in +multiclass VSClhm<string opcStr, string argStr, bits<8>opc, RegisterClass RC, + dag dag_in> { + defm rrv : VSCmm<opcStr, " $vx, "#argStr#", $sy, $sz", opc, + !con(dag_in, (ins I64:$sy, I64:$sz, RC:$vx))>; + let cy = 0 in + defm irv : VSCmm<opcStr, " $vx, "#argStr#", $sy, $sz", opc, + !con(dag_in, (ins simm7:$sy, I64:$sz, RC:$vx))>; + let cz = 0 in + defm rzv : VSCmm<opcStr, " $vx, "#argStr#", $sy, $sz", opc, + !con(dag_in, (ins I64:$sy, zero:$sz, RC:$vx))>; + let cy = 0, cz = 0 in + defm izv : VSCmm<opcStr, " $vx, "#argStr#", $sy, $sz", opc, + !con(dag_in, (ins simm7:$sy, zero:$sz, RC:$vx))>; +} +multiclass VSCtgm<string opcStr, bits<8>opc, RegisterClass RC> { + let vy = ? in defm v : VSClhm<opcStr, "$vy", opc, RC, (ins V64:$vy)>; + let cs = 1, sw = ? in defm s : VSClhm<opcStr, "$sw", opc, RC, (ins I64:$sw)>; +} +multiclass VSCm<string opcStr, bits<8>opc, RegisterClass RC> { + let vc = 1, cx = 0 in defm "" : VSCtgm<opcStr, opc, RC>; + let vc = 0, cx = 0 in defm NC : VSCtgm<opcStr#".nc", opc, RC>; + let vc = 1, cx = 1 in defm OT : VSCtgm<opcStr#".ot", opc, RC>; + let vc = 0, cx = 1 in defm NCOT : VSCtgm<opcStr#".nc.ot", opc, RC>; +} + +// Section 8.9.16 - VSC (Vector Scatter) +defm VSC : VSCm<"vsc", 0xb1, V64>; + +// Section 8.9.17 - VSCU (Vector Scatter Upper) +defm VSCU : VSCm<"vscu", 0xb2, V64>; + +// Section 8.9.18 - VSCL (Vector Scatter Lower) +defm VSCL : VSCm<"vscl", 0xb3, V64>; + +// Section 8.9.19 - PFCHV (Prefetch Vector) +let Uses = [VL] in +multiclass PFCHVbm<string opcStr, string argStr, bits<8>opc, dag dag_in> { + def "" : RVM<opc, (outs), dag_in, !strconcat(opcStr, argStr)>; + let DisableEncoding = "$vl", isCodeGenOnly = 1, VE_VLInUse = 1 in { + def l : RVM<opc, (outs), !con(dag_in, (ins I32:$vl)), + !strconcat(opcStr, argStr)>; + def L : RVM<opc, (outs), !con(dag_in, (ins VLS:$vl)), + !strconcat(opcStr, argStr)>; + } +} +let VE_VLIndex = 2 in +multiclass PFCHVm<string opcStr, bits<8>opc> { + defm rr : PFCHVbm<opcStr, " $sy, $sz", opc, (ins I64:$sy, I64:$sz)>; + let cy = 0 in + defm ir : PFCHVbm<opcStr, " $sy, $sz", opc, (ins simm7:$sy, I64:$sz)>; + let cz = 0 in + defm rz : PFCHVbm<opcStr, " $sy, $sz", opc, (ins I64:$sy, zero:$sz)>; + let cy = 0, cz = 0 in + defm iz : PFCHVbm<opcStr, " $sy, $sz", opc, (ins simm7:$sy, zero:$sz)>; +} +let vc = 1, vx = 0 in defm PFCHV : PFCHVm<"pfchv", 0x80>; +let vc = 0, vx = 0 in defm PFCHVNC : PFCHVm<"pfchv.nc", 0x80>; + +// Section 8.9.20 - LSV (Load S to V) +let sx = 0, vx = ?, hasSideEffects = 0 in +multiclass LSVbm<string opcStr, string argStr, bits<8>opc, RegisterClass RC, + dag dag_in> { + def "" : RR<opc, (outs RC:$vx), dag_in, !strconcat(opcStr, " ${vx}", argStr)>; + let Constraints = "$vx = $base", DisableEncoding = "$base", + isCodeGenOnly = 1 in + def _v : RR<opc, (outs RC:$vx), !con(dag_in, (ins RC:$base)), + !strconcat(opcStr, " ${vx}", argStr)>; +} +multiclass LSVm<string opcStr, bits<8>opc, RegisterClass RC> { + defm rr : LSVbm<opcStr, "(${sy}), $sz", opc, RC, (ins I64:$sy, I64:$sz)>; + let cy = 0 in + defm ir : LSVbm<opcStr, "(${sy}), $sz", opc, RC, (ins uimm7:$sy, I64:$sz)>; + let cz = 0 in + defm rm : LSVbm<opcStr, "(${sy}), $sz", opc, RC, (ins I64:$sy, mimm:$sz)>; + let cy = 0, cz = 0 in + defm im : LSVbm<opcStr, "(${sy}), $sz", opc, RC, (ins uimm7:$sy, mimm:$sz)>; +} +defm LSV : LSVm<"lsv", 0x8e, V64>; + +// Section 8.9.21 - LVS (Load V to S) +let cz = 0, sz = 0, vx = ?, hasSideEffects = 0 in +multiclass LVSm<string opcStr, bits<8>opc, RegisterClass RC> { + def vr : RR<opc, (outs I64:$sx), (ins RC:$vx, I64:$sy), + opcStr#" $sx, ${vx}(${sy})">; + let cy = 0 in + def vi : RR<opc, (outs I64:$sx), (ins RC:$vx, uimm7:$sy), + opcStr#" $sx, ${vx}(${sy})">; +} +defm LVS : LVSm<"lvs", 0x9e, V64>; + +// Section 8.9.22 - LVM (Load VM) +let sx = 0, vx = ?, hasSideEffects = 0 in +multiclass LVMbm<string opcStr, string argStr, bits<8>opc, RegisterClass RCM, + dag dag_in> { + def "" : RR<opc, (outs RCM:$vx), dag_in, + !strconcat(opcStr, " $vx, ", argStr)>; + let Constraints = "$vx = $base", DisableEncoding = "$base", + isCodeGenOnly = 1 in { + def _m : RR<opc, (outs RCM:$vx), !con(dag_in, (ins RCM:$base)), + !strconcat(opcStr, " $vx, ", argStr)>; + } +} +multiclass LVMom<string opcStr, bits<8>opc, RegisterClass RCM> { + defm rr : LVMbm<opcStr, "$sy, $sz", opc, RCM, (ins I64:$sy, I64:$sz)>; + let cy = 0 in + defm ir : LVMbm<opcStr, "$sy, $sz", opc, RCM, (ins uimm2:$sy, I64:$sz)>; + let cz = 0 in + defm rm : LVMbm<opcStr, "$sy, $sz", opc, RCM, (ins I64:$sy, mimm:$sz)>; + let cy = 0, cz = 0 in + defm im : LVMbm<opcStr, "$sy, $sz", opc, RCM, (ins uimm2:$sy, mimm:$sz)>; +} +multiclass LVMm<string opcStr, bits<8>opc, RegisterClass RCM> { + defm "" : LVMom<opcStr, opc, RCM>; +} +defm LVM : LVMm<"lvm", 0xb7, VM>; + +// Section 8.9.23 - SVM (Save VM) +let cz = 0, sz = 0, vz = ?, hasSideEffects = 0 in +multiclass SVMm<string opcStr, bits<8>opc, RegisterClass RCM> { + def mr : RR<opc, (outs I64:$sx), (ins RCM:$vz, I64:$sy), + opcStr#" $sx, $vz, $sy">; + let cy = 0 in + def mi : RR<opc, (outs I64:$sx), (ins RCM:$vz, uimm2:$sy), + opcStr#" $sx, $vz, $sy">; +} +defm SVM : SVMm<"svm", 0xa7, VM>; + +// Section 8.9.24 - VBRD (Vector Broadcast) +let vx = ?, hasSideEffects = 0, Uses = [VL] in +multiclass VBRDbm<string opcStr, string argStr, bits<8>opc, RegisterClass RC, + dag dag_in, string disEnc = ""> { + let DisableEncoding = disEnc in + def "" : RV<opc, (outs RC:$vx), dag_in, + !strconcat(opcStr, " $vx, ", argStr)>; + let Constraints = "$vx = $base", DisableEncoding = disEnc#"$base", + isCodeGenOnly = 1 in + def _v : RV<opc, (outs RC:$vx), !con(dag_in, (ins RC:$base)), + !strconcat(opcStr, " $vx, ", argStr)>; +} +multiclass VBRDlm<string opcStr, string argStr, bits<8>opc, RegisterClass RC, + dag dag_in> { + defm "" : VBRDbm<opcStr, argStr, opc, RC, dag_in>; + let isCodeGenOnly = 1, VE_VLInUse = 1 in { + defm l : VBRDbm<opcStr, argStr, opc, RC, !con(dag_in, (ins I32:$vl)), + "$vl,">; + defm L : VBRDbm<opcStr, argStr, opc, RC, !con(dag_in, (ins VLS:$vl)), + "$vl,">; + } +} +multiclass VBRDmm<string opcStr, string argStr, bits<8>opc, RegisterClass RC, + RegisterClass RCM, dag dag_in> { + defm "" : VBRDlm<opcStr, argStr, opc, RC, dag_in>; + let m = ?, VE_VLWithMask = 1 in + defm m : VBRDlm<opcStr, argStr#", $m", opc, RC, !con(dag_in, (ins RCM:$m))>; +} +let VE_VLIndex = 2 in +multiclass VBRDm<string opcStr, bits<8>opc, RegisterClass VRC, RegisterClass RC, + RegisterClass RCM> { + defm r : VBRDmm<opcStr, "$sy", opc, VRC, RCM, (ins RC:$sy)>; + let cy = 0 in + defm i : VBRDmm<opcStr, "$sy", opc, VRC, RCM, (ins simm7:$sy)>; +} +let cx = 0, cx2 = 0 in +defm VBRD : VBRDm<"vbrd", 0x8c, V64, I64, VM>; +let cx = 0, cx2 = 1 in +defm VBRDL : VBRDm<"vbrdl", 0x8c, V64, I32, VM>; +let cx = 1, cx2 = 0 in +defm VBRDU : VBRDm<"vbrdu", 0x8c, V64, F32, VM>; +let cx = 1, cx2 = 1 in +defm PVBRD : VBRDm<"pvbrd", 0x8c, V64, I64, VM512>; + +// Section 8.9.25 - VMV (Vector Move) +let vx = ?, vz = ?, hasSideEffects = 0, Uses = [VL] in +multiclass VMVbm<string opcStr, string argStr, bits<8>opc, RegisterClass RC, + dag dag_in, string disEnc = ""> { + let DisableEncoding = disEnc in + def "" : RV<opc, (outs RC:$vx), dag_in, + !strconcat(opcStr, " $vx, ", argStr)>; + let Constraints = "$vx = $base", DisableEncoding = disEnc#"$base", + isCodeGenOnly = 1 in + def _v : RV<opc, (outs RC:$vx), !con(dag_in, (ins RC:$base)), + !strconcat(opcStr, " $vx, ", argStr)>; +} +multiclass VMVlm<string opcStr, string argStr, bits<8>opc, RegisterClass RC, + dag dag_in> { + defm "" : VMVbm<opcStr, argStr, opc, RC, dag_in>; + let isCodeGenOnly = 1, VE_VLInUse = 1 in { + defm l : VMVbm<opcStr, argStr, opc, RC, !con(dag_in, (ins I32:$vl)), + "$vl,">; + defm L : VMVbm<opcStr, argStr, opc, RC, !con(dag_in, (ins VLS:$vl)), + "$vl,">; + } +} +multiclass VMVmm<string opcStr, bits<8>opc, RegisterClass RC, + RegisterClass RCM, dag dag_in> { + defm "" : VMVlm<opcStr, "$sy, $vz", opc, RC, dag_in>; + let m = ?, VE_VLWithMask = 1 in + defm m : VMVlm<opcStr, "$sy, $vz, $m", opc, RC, !con(dag_in, (ins RCM:$m))>; +} +let VE_VLIndex = 3 in +multiclass VMVm<string opcStr, bits<8>opc, RegisterClass RC, + RegisterClass RCM> { + defm rv : VMVmm<opcStr, opc, RC, RCM, (ins I64:$sy, RC:$vz)>; + let cy = 0 in + defm iv : VMVmm<opcStr, opc, RC, RCM, (ins uimm7:$sy, RC:$vz)>; +} +defm VMV : VMVm<"vmv", 0x9c, V64, VM>; + +//----------------------------------------------------------------------------- +// Section 8.10 - Vector Fixed-Point Arithmetic Instructions +//----------------------------------------------------------------------------- + +// Multiclass for generic vector calculation +let vx = ?, hasSideEffects = 0, Uses = [VL] in +multiclass RVbm<string opcStr, string argStr, bits<8>opc, RegisterClass RC, + dag dag_in, string disEnc = ""> { + let DisableEncoding = disEnc in + def "" : RV<opc, (outs RC:$vx), dag_in, + !strconcat(opcStr, " $vx", argStr)>; + let Constraints = "$vx = $base", DisableEncoding = disEnc#"$base", + isCodeGenOnly = 1 in + def _v : RV<opc, (outs RC:$vx), !con(dag_in, (ins RC:$base)), + !strconcat(opcStr, " $vx", argStr)>; +} +multiclass RVlm<string opcStr, string argStr, bits<8>opc, RegisterClass RC, + dag dag_in> { + defm "" : RVbm<opcStr, argStr, opc, RC, dag_in>; + let isCodeGenOnly = 1, VE_VLInUse = 1 in { + defm l : RVbm<opcStr, argStr, opc, RC, !con(dag_in, (ins I32:$vl)), + "$vl,">; + defm L : RVbm<opcStr, argStr, opc, RC, !con(dag_in, (ins VLS:$vl)), + "$vl,">; + } +} +multiclass RVmm<string opcStr, string argStr, bits<8>opc, RegisterClass RC, + RegisterClass RCM, dag dag_in> { + defm "" : RVlm<opcStr, argStr, opc, RC, dag_in>; + let m = ?, VE_VLWithMask = 1 in + defm m : RVlm<opcStr, argStr#", $m", opc, RC, !con(dag_in, (ins RCM:$m))>; +} +// Generic RV multiclass with 2 arguments. +// e.g. VADD, VSUB, VMPY, and etc. +let VE_VLIndex = 3 in +multiclass RVm<string opcStr, bits<8>opc, RegisterClass VRC, RegisterClass RC, + RegisterClass RCM, Operand SIMM = simm7> { + let cy = 0, sy = 0, vy = ?, vz = ? in + defm vv : RVmm<opcStr, ", $vy, $vz", opc, VRC, RCM, (ins VRC:$vy, VRC:$vz)>; + let cs = 1, vz = ? in + defm rv : RVmm<opcStr, ", $sy, $vz", opc, VRC, RCM, (ins RC:$sy, VRC:$vz)>; + let cs = 1, cy = 0, vz = ? in + defm iv : RVmm<opcStr, ", $sy, $vz", opc, VRC, RCM, (ins SIMM:$sy, VRC:$vz)>; +} +// Special RV multiclass with 2 arguments using cs2. +// e.g. VDIV, VDVS, and VDVX. +let VE_VLIndex = 3 in +multiclass RVDIVm<string opcStr, bits<8>opc, RegisterClass VRC, + RegisterClass RC, RegisterClass RCM, Operand SIMM = simm7> { + let cy = 0, sy = 0, vy = ?, vz = ? in + defm vv : RVmm<opcStr, ", $vy, $vz", opc, VRC, RCM, (ins VRC:$vy, VRC:$vz)>; + let cs2 = 1, vy = ? in + defm vr : RVmm<opcStr, ", $vy, $sy", opc, VRC, RCM, (ins VRC:$vy, RC:$sy)>; + let cs2 = 1, cy = 0, vy = ? in + defm vi : RVmm<opcStr, ", $vy, $sy", opc, VRC, RCM, (ins VRC:$vy, SIMM:$sy)>; + let cs = 1, vz = ? in + defm rv : RVmm<opcStr, ", $sy, $vz", opc, VRC, RCM, (ins RC:$sy, VRC:$vz)>; + let cs = 1, cy = 0, vz = ? in + defm iv : RVmm<opcStr, ", $sy, $vz", opc, VRC, RCM, (ins SIMM:$sy, VRC:$vz)>; +} +// Generic RV multiclass with 2 arguments for logical operations. +// e.g. VAND, VOR, VXOR, and etc. +let VE_VLIndex = 3 in +multiclass RVLm<string opcStr, bits<8>opc, RegisterClass ScaRC, + RegisterClass RC, RegisterClass RCM> { + let cy = 0, sy = 0, vy = ?, vz = ? in + defm vv : RVmm<opcStr, ", $vy, $vz", opc, RC, RCM, (ins RC:$vy, RC:$vz)>; + let cs = 1, vz = ? in + defm rv : RVmm<opcStr, ", $sy, $vz", opc, RC, RCM, (ins ScaRC:$sy, RC:$vz)>; + let cs = 1, cy = 0, vz = ? in + defm mv : RVmm<opcStr, ", $sy, $vz", opc, RC, RCM, (ins mimm:$sy, RC:$vz)>; +} +// Generic RV multiclass with 1 argument. +// e.g. VLDZ, VPCNT, and VBRV. +let VE_VLIndex = 2 in +multiclass RV1m<string opcStr, bits<8>opc, RegisterClass RC, + RegisterClass RCM> { + let cy = 0, sy = 0, vz = ? in + defm v : RVmm<opcStr, ", $vz", opc, RC, RCM, (ins RC:$vz)>; +} +// Generic RV multiclass with no argument. +// e.g. VSEQ. +let VE_VLIndex = 1 in +multiclass RV0m<string opcStr, bits<8>opc, RegisterClass RC, + RegisterClass RCM> { + let cy = 0, sy = 0 in + defm "" : RVmm<opcStr, "", opc, RC, RCM, (ins)>; +} +// Generic RV multiclass with 2 arguments for shift operations. +// e.g. VSLL, VSRL, VSLA, and etc. +let VE_VLIndex = 3 in +multiclass RVSm<string opcStr, bits<8>opc, RegisterClass ScaRC, + RegisterClass RC, RegisterClass RCM> { + let cy = 0, sy = 0, vy = ?, vz = ? in + defm vv : RVmm<opcStr, ", $vz, $vy", opc, RC, RCM, (ins RC:$vz, RC:$vy)>; + let cs = 1, vz = ? in + defm vr : RVmm<opcStr, ", $vz, $sy", opc, RC, RCM, (ins RC:$vz, ScaRC:$sy)>; + let cs = 1, cy = 0, vz = ? in + defm vi : RVmm<opcStr, ", $vz, $sy", opc, RC, RCM, (ins RC:$vz, uimm7:$sy)>; +} +// Generic RV multiclass with 3 arguments for shift operations. +// e.g. VSLD and VSRD. +let VE_VLIndex = 4 in +multiclass RVSDm<string opcStr, bits<8>opc, RegisterClass RC, + RegisterClass RCM> { + let vy = ?, vz = ? in + defm vvr : RVmm<opcStr, ", ($vy, ${vz}), $sy", opc, RC, RCM, + (ins RC:$vy, RC:$vz, I64:$sy)>; + let cy = 0, vy = ?, vz = ? in + defm vvi : RVmm<opcStr, ", ($vy, ${vz}), $sy", opc, RC, RCM, + (ins RC:$vy, RC:$vz, uimm7:$sy)>; +} +// Special RV multiclass with 3 arguments. +// e.g. VSFA +let VE_VLIndex = 4 in +multiclass RVSAm<string opcStr, bits<8>opc, RegisterClass RC, + RegisterClass RCM> { + let cz = 1, sz = ?, vz = ? in + defm vrr : RVmm<opcStr, ", $vz, $sy, $sz", opc, RC, RCM, + (ins RC:$vz, I64:$sy, I64:$sz)>; + let cz = 0, sz = ?, vz = ? in + defm vrm : RVmm<opcStr, ", $vz, $sy, $sz", opc, RC, RCM, + (ins RC:$vz, I64:$sy, mimm:$sz)>; + let cy = 0, cz = 1, sz = ?, vz = ? in + defm vir : RVmm<opcStr, ", $vz, $sy, $sz", opc, RC, RCM, + (ins RC:$vz, uimm3:$sy, I64:$sz)>; + let cy = 0, cz = 0, sz = ?, vz = ? in + defm vim : RVmm<opcStr, ", $vz, $sy, $sz", opc, RC, RCM, + (ins RC:$vz, uimm3:$sy, mimm:$sz)>; +} +// Generic RV multiclass with 1 argument using vy field. +// e.g. VFSQRT, VRCP, and VRSQRT. +let VE_VLIndex = 2 in +multiclass RVF1m<string opcStr, bits<8>opc, RegisterClass RC, + RegisterClass RCM> { + let cy = 0, sy = 0, vy = ? in + defm v : RVmm<opcStr, ", $vy", opc, RC, RCM, (ins RC:$vy)>; +} +// Special RV multiclass with 3 arguments using cs2. +// e.g. VFMAD, VFMSB, VFNMAD, and etc. +let VE_VLIndex = 4 in +multiclass RVMm<string opcStr, bits<8>opc, RegisterClass VRC, RegisterClass RC, + RegisterClass RCM, Operand SIMM = simm7> { + let cy = 0, sy = 0, vy = ?, vz = ?, vw = ? in + defm vvv : RVmm<opcStr, ", $vy, $vz, $vw", opc, VRC, RCM, + (ins VRC:$vy, VRC:$vz, VRC:$vw)>; + let cs2 = 1, vy = ?, vw = ? in + defm vrv : RVmm<opcStr, ", $vy, $sy, $vw", opc, VRC, RCM, + (ins VRC:$vy, RC:$sy, VRC:$vw)>; + let cs2 = 1, cy = 0, vy = ?, vw = ? in + defm viv : RVmm<opcStr, ", $vy, $sy, $vw", opc, VRC, RCM, + (ins VRC:$vy, SIMM:$sy, VRC:$vw)>; + let cs = 1, vz = ?, vw = ? in + defm rvv : RVmm<opcStr, ", $sy, $vz, $vw", opc, VRC, RCM, + (ins RC:$sy, VRC:$vz, VRC:$vw)>; + let cs = 1, cy = 0, vz = ?, vw = ? in + defm ivv : RVmm<opcStr, ", $sy, $vz, $vw", opc, VRC, RCM, + (ins SIMM:$sy, VRC:$vz, VRC:$vw)>; +} +// Special RV multiclass with 2 arguments for floating point conversions. +// e.g. VFIX and VFIXX +let hasSideEffects = 0, VE_VLIndex = 3 in +multiclass RVFIXm<string opcStr, bits<8> opc, RegisterClass RC, + RegisterClass RCM> { + let cy = 0, sy = 0, vy = ?, vz = ? in + defm v : RVmm<opcStr#"$vz", ", $vy", opc, RC, RCM, (ins RDOp:$vz, RC:$vy)>; +} +// Multiclass for generic iterative vector calculation +let vx = ?, hasSideEffects = 0, Uses = [VL] in +multiclass RVIbm<string opcStr, string argStr, bits<8>opc, RegisterClass RC, + dag dag_in, string disEnc = ""> { + let DisableEncoding = disEnc in + def "" : RV<opc, (outs RC:$vx), dag_in, + !strconcat(opcStr, " $vx", argStr)>; + let isCodeGenOnly = 1, Constraints = "$vx = $base", DisableEncoding = disEnc#"$base" in + def _v : RV<opc, (outs RC:$vx), !con(dag_in, (ins RC:$base)), + !strconcat(opcStr, " $vx", argStr)>; +} +multiclass RVIlm<string opcStr, string argStr, bits<8>opc, RegisterClass RC, + dag dag_in> { + defm "" : RVIbm<opcStr, argStr, opc, RC, dag_in>; + let isCodeGenOnly = 1, VE_VLInUse = 1 in { + defm l : RVIbm<opcStr, argStr, opc, RC, !con(dag_in, (ins I32:$vl)), + "$vl,">; + defm L : RVIbm<opcStr, argStr, opc, RC, !con(dag_in, (ins VLS:$vl)), + "$vl,">; + } +} +// Generic RV multiclass for iterative operation with 2 argument. +// e.g. VFIA, VFIS, and VFIM +let VE_VLIndex = 3 in +multiclass RVI2m<string opcStr, bits<8>opc, RegisterClass VRC, + RegisterClass RC> { + let vy = ? in + defm vr : RVIlm<opcStr, ", $vy, $sy", opc, VRC, (ins VRC:$vy, RC:$sy)>; + let cy = 0, vy = ? in + defm vi : RVIlm<opcStr, ", $vy, $sy", opc, VRC, (ins VRC:$vy, simm7fp:$sy)>; +} +// Generic RV multiclass for iterative operation with 3 argument. +// e.g. VFIAM, VFISM, VFIMA, and etc. +let VE_VLIndex = 4 in +multiclass RVI3m<string opcStr, bits<8>opc, RegisterClass VRC, + RegisterClass RC> { + let vy = ?, vz = ? in + defm vvr : RVIlm<opcStr, ", $vy, $vz, $sy", opc, VRC, + (ins VRC:$vy, VRC:$vz, RC:$sy)>; + let cy = 0, vy = ?, vz = ? in + defm vvi : RVIlm<opcStr, ", $vy, $vz, $sy", opc, VRC, + (ins VRC:$vy, VRC:$vz, simm7fp:$sy)>; +} +// special RV multiclass with 3 arguments for VSHF. +// e.g. VSHF +let vy = ?, vz = ?, VE_VLIndex = 4 in +multiclass RVSHFm<string opcStr, bits<8>opc, RegisterClass RC, + Operand SIMM = uimm4> { + defm vvr : RVlm<opcStr, ", $vy, $vz, $sy", opc, RC, + (ins RC:$vy, RC:$vz, I64:$sy)>; + let cy = 0 in defm vvi : RVlm<opcStr, ", $vy, $vz, $sy", opc, RC, + (ins RC:$vy, RC:$vz, SIMM:$sy)>; +} +// Multiclass for generic mask calculation +let vx = ?, hasSideEffects = 0, Uses = [VL] in +multiclass RVMKbm<string opcStr, string argStr, bits<8>opc, dag dag_out, + dag dag_in> { + def "" : RV<opc, dag_out, dag_in, !strconcat(opcStr, argStr)>; + let DisableEncoding = "$vl", isCodeGenOnly = 1, VE_VLInUse = 1 in { + def l : RV<opc, dag_out, !con(dag_in, (ins I32:$vl)), + !strconcat(opcStr, argStr)>; + def L : RV<opc, dag_out, !con(dag_in, (ins VLS:$vl)), + !strconcat(opcStr, argStr)>; + } +} +multiclass RVMKlm<string opcStr, string argStr, bits<8>opc, RegisterClass RCM, + dag dag_in> { + defm "" : RVMKbm<opcStr, " $vx"#argStr, opc, (outs RCM:$vx), dag_in>; + let m = ?, VE_VLWithMask = 1 in + defm m : RVMKbm<opcStr, " $vx"#argStr#", $m", opc, (outs RCM:$vx), + !con(dag_in, (ins RCM:$m))>; +} +// Generic RV multiclass for mask calculation with a condition. +// e.g. VFMK, VFMS, and VFMF +let cy = 0, sy = 0 in +multiclass RVMKom<string opcStr, bits<8> opc, RegisterClass RC, + RegisterClass RCM> { + let vy = ?, vz = ?, VE_VLIndex = 3 in + defm v : RVMKlm<opcStr#"$vy", ", $vz", opc, RCM, (ins CCOp:$vy, RC:$vz)>; + let vy = 15 /* AT */, VE_VLIndex = 1 in + defm a : RVMKlm<opcStr#"at", "", opc, RCM, (ins)>; + let vy = 0 /* AF */, VE_VLIndex = 1 in + defm na : RVMKlm<opcStr#"af", "", opc, RCM, (ins)>; +} +multiclass RVMKm<string opcStr, bits<8> opc, RegisterClass RC, + RegisterClass RCM> { + defm "" : RVMKom<opcStr, opc, RC, RCM>; +} +// Generic RV multiclass for mask calculation with 2 arguments. +// e.g. ANDM, ORM, XORM, and etc. +let cy = 0, sy = 0, vx = ?, vy = ?, vz = ?, hasSideEffects = 0 in +multiclass RVM2m<string opcStr, bits<8> opc, RegisterClass RCM> { + def mm : RV<opc, (outs RCM:$vx), (ins RCM:$vy, RCM:$vz), + !strconcat(opcStr, " $vx, $vy, $vz")>; +} +// Generic RV multiclass for mask calculation with 1 argument. +// e.g. NEGM +let cy = 0, sy = 0, vx = ?, vy = ?, hasSideEffects = 0 in +multiclass RVM1m<string opcStr, bits<8> opc, RegisterClass RCM> { + def m : RV<opc, (outs RCM:$vx), (ins RCM:$vy), + !strconcat(opcStr, " $vx, $vy")>; +} +// Generic RV multiclass for mask calculation with 1 argument. +// e.g. PCVM, LZVM, and TOVM +let cy = 0, sy = 0, vy = ?, hasSideEffects = 0, Uses = [VL] in +multiclass RVMSbm<string opcStr, string argStr, bits<8>opc, dag dag_in> { + def "" : RV<opc, (outs I64:$sx), dag_in, + !strconcat(opcStr, " $sx,", argStr)> { + bits<7> sx; + let Inst{54-48} = sx; + } + let DisableEncoding = "$vl", isCodeGenOnly = 1, VE_VLInUse = 1 in { + def l : RV<opc, (outs I64:$sx), !con(dag_in, (ins I32:$vl)), + !strconcat(opcStr, " $sx,", argStr)> { + bits<7> sx; + let Inst{54-48} = sx; + } + def L : RV<opc, (outs I64:$sx), !con(dag_in, (ins VLS:$vl)), + !strconcat(opcStr, " $sx,", argStr)> { + bits<7> sx; + let Inst{54-48} = sx; + } + } +} +let VE_VLIndex = 2 in +multiclass RVMSm<string opcStr, bits<8> opc, RegisterClass RCM> { + defm m : RVMSbm<opcStr, " $vy", opc, (ins RCM:$vy)>; +} + +// Section 8.10.1 - VADD (Vector Add) +let cx = 0, cx2 = 0 in +defm VADDUL : RVm<"vaddu.l", 0xc8, V64, I64, VM>; +let cx = 0, cx2 = 1 in { + defm PVADDULO : RVm<"pvaddu.lo", 0xc8, V64, I32, VM>; + let isCodeGenOnly = 1 in + defm VADDUW : RVm<"vaddu.w", 0xc8, V64, I32, VM>; +} +let cx = 1, cx2 = 0 in +defm PVADDUUP : RVm<"pvaddu.up", 0xc8, V64, I64, VM>; +let cx = 1, cx2 = 1 in +defm PVADDU : RVm<"pvaddu", 0xc8, V64, I64, VM512>; +def : MnemonicAlias<"vaddu.w", "pvaddu.lo">; + +// Section 8.10.2 - VADS (Vector Add Single) +let cx = 0, cx2 = 0 in +defm VADDSWSX : RVm<"vadds.w.sx", 0xca, V64, I32, VM>; +let cx = 0, cx2 = 1 in { + defm PVADDSLO : RVm<"pvadds.lo", 0xca, V64, I32, VM>; + let isCodeGenOnly = 1 in + defm VADDSWZX : RVm<"vadds.w.zx", 0xca, V64, I32, VM>; +} +let cx = 1, cx2 = 0 in +defm PVADDSUP : RVm<"pvadds.up", 0xca, V64, I64, VM>; +let cx = 1, cx2 = 1 in +defm PVADDS : RVm<"pvadds", 0xca, V64, I64, VM512>; +def : MnemonicAlias<"pvadds.lo.sx", "vadds.w.sx">; +def : MnemonicAlias<"vadds.w.zx", "pvadds.lo">; +def : MnemonicAlias<"vadds.w", "pvadds.lo">; +def : MnemonicAlias<"pvadds.lo.zx", "pvadds.lo">; + +// Section 8.10.3 - VADX (Vector Add) +defm VADDSL : RVm<"vadds.l", 0x8b, V64, I64, VM>; + +// Section 8.10.4 - VSUB (Vector Subtract) +let cx = 0, cx2 = 0 in +defm VSUBUL : RVm<"vsubu.l", 0xd8, V64, I64, VM>; +let cx = 0, cx2 = 1 in { + defm PVSUBULO : RVm<"pvsubu.lo", 0xd8, V64, I32, VM>; + let isCodeGenOnly = 1 in + defm VSUBUW : RVm<"vsubu.w", 0xd8, V64, I32, VM>; +} +let cx = 1, cx2 = 0 in +defm PVSUBUUP : RVm<"pvsubu.up", 0xd8, V64, I64, VM>; +let cx = 1, cx2 = 1 in +defm PVSUBU : RVm<"pvsubu", 0xd8, V64, I64, VM512>; +def : MnemonicAlias<"vsubu.w", "pvsubu.lo">; + +// Section 8.10.5 - VSBS (Vector Subtract Single) +let cx = 0, cx2 = 0 in +defm VSUBSWSX : RVm<"vsubs.w.sx", 0xda, V64, I32, VM>; +let cx = 0, cx2 = 1 in { + defm PVSUBSLO : RVm<"pvsubs.lo", 0xda, V64, I32, VM>; + let isCodeGenOnly = 1 in + defm VSUBSWZX : RVm<"vsubs.w.zx", 0xda, V64, I32, VM>; +} +let cx = 1, cx2 = 0 in +defm PVSUBSUP : RVm<"pvsubs.up", 0xda, V64, I64, VM>; +let cx = 1, cx2 = 1 in +defm PVSUBS : RVm<"pvsubs", 0xda, V64, I64, VM512>; +def : MnemonicAlias<"pvsubs.lo.sx", "vsubs.w.sx">; +def : MnemonicAlias<"vsubs.w.zx", "pvsubs.lo">; +def : MnemonicAlias<"vsubs.w", "pvsubs.lo">; +def : MnemonicAlias<"pvsubs.lo.zx", "pvsubs.lo">; + +// Section 8.10.6 - VSBX (Vector Subtract) +defm VSUBSL : RVm<"vsubs.l", 0x9b, V64, I64, VM>; + +// Section 8.10.7 - VMPY (Vector Multiply) +let cx2 = 0 in +defm VMULUL : RVm<"vmulu.l", 0xc9, V64, I64, VM>; +let cx2 = 1 in +defm VMULUW : RVm<"vmulu.w", 0xc9, V64, I32, VM>; + +// Section 8.10.8 - VMPS (Vector Multiply Single) +let cx2 = 0 in +defm VMULSWSX : RVm<"vmuls.w.sx", 0xcb, V64, I32, VM>; +let cx2 = 1 in +defm VMULSWZX : RVm<"vmuls.w.zx", 0xcb, V64, I32, VM>; +def : MnemonicAlias<"vmuls.w", "vmuls.w.zx">; + +// Section 8.10.9 - VMPX (Vector Multiply) +defm VMULSL : RVm<"vmuls.l", 0xdb, V64, I64, VM>; + +// Section 8.10.10 - VMPD (Vector Multiply) +defm VMULSLW : RVm<"vmuls.l.w", 0xd9, V64, I32, VM>; + +// Section 8.10.11 - VDIV (Vector Divide) +let cx2 = 0 in +defm VDIVUL : RVDIVm<"vdivu.l", 0xe9, V64, I64, VM>; +let cx2 = 1 in +defm VDIVUW : RVDIVm<"vdivu.w", 0xe9, V64, I32, VM>; + +// Section 8.10.12 - VDVS (Vector Divide Single) +let cx2 = 0 in +defm VDIVSWSX : RVDIVm<"vdivs.w.sx", 0xeb, V64, I32, VM>; +let cx2 = 1 in +defm VDIVSWZX : RVDIVm<"vdivs.w.zx", 0xeb, V64, I32, VM>; +def : MnemonicAlias<"vdivs.w", "vdivs.w.zx">; + +// Section 8.10.13 - VDVX (Vector Divide) +defm VDIVSL : RVDIVm<"vdivs.l", 0xfb, V64, I64, VM>; + +// Section 8.10.14 - VCMP (Vector Compare) +let cx = 0, cx2 = 0 in +defm VCMPUL : RVm<"vcmpu.l", 0xb9, V64, I64, VM>; +let cx = 0, cx2 = 1 in { + defm PVCMPULO : RVm<"pvcmpu.lo", 0xb9, V64, I32, VM>; + let isCodeGenOnly = 1 in + defm VCMPUW : RVm<"vcmpu.w", 0xb9, V64, I32, VM>; +} +let cx = 1, cx2 = 0 in +defm PVCMPUUP : RVm<"pvcmpu.up", 0xb9, V64, I64, VM>; +let cx = 1, cx2 = 1 in +defm PVCMPU : RVm<"pvcmpu", 0xb9, V64, I64, VM512>; +def : MnemonicAlias<"vcmpu.w", "pvcmpu.lo">; + +// Section 8.10.15 - VCPS (Vector Compare Single) +let cx = 0, cx2 = 0 in +defm VCMPSWSX : RVm<"vcmps.w.sx", 0xfa, V64, I32, VM>; +let cx = 0, cx2 = 1 in { + defm PVCMPSLO : RVm<"pvcmps.lo", 0xfa, V64, I32, VM>; + let isCodeGenOnly = 1 in + defm VCMPSWZX : RVm<"vcmps.w.zx", 0xfa, V64, I32, VM>; +} +let cx = 1, cx2 = 0 in +defm PVCMPSUP : RVm<"pvcmps.up", 0xfa, V64, I64, VM>; +let cx = 1, cx2 = 1 in +defm PVCMPS : RVm<"pvcmps", 0xfa, V64, I64, VM512>; +def : MnemonicAlias<"pvcmps.lo.sx", "vcmps.w.sx">; +def : MnemonicAlias<"vcmps.w.zx", "pvcmps.lo">; +def : MnemonicAlias<"vcmps.w", "pvcmps.lo">; +def : MnemonicAlias<"pvcmps.lo.zx", "pvcmps.lo">; + +// Section 8.10.16 - VCPX (Vector Compare) +defm VCMPSL : RVm<"vcmps.l", 0xba, V64, I64, VM>; + +// Section 8.10.17 - VCMS (Vector Compare and Select Maximum/Minimum Single) +let cx = 0, cx2 = 0 in +defm VMAXSWSX : RVm<"vmaxs.w.sx", 0x8a, V64, I32, VM>; +let cx = 0, cx2 = 1 in { + defm PVMAXSLO : RVm<"pvmaxs.lo", 0x8a, V64, I32, VM>; + let isCodeGenOnly = 1 in + defm VMAXSWZX : RVm<"vmaxs.w.zx", 0x8a, V64, I32, VM>; +} +let cx = 1, cx2 = 0 in +defm PVMAXSUP : RVm<"pvmaxs.up", 0x8a, V64, I64, VM>; +let cx = 1, cx2 = 1 in +defm PVMAXS : RVm<"pvmaxs", 0x8a, V64, I64, VM512>; +let cs2 = 1 in { + let cx = 0, cx2 = 0 in + defm VMINSWSX : RVm<"vmins.w.sx", 0x8a, V64, I32, VM>; + let cx = 0, cx2 = 1 in { + defm PVMINSLO : RVm<"pvmins.lo", 0x8a, V64, I32, VM>; + let isCodeGenOnly = 1 in + defm VMINSWZX : RVm<"vmins.w.zx", 0x8a, V64, I32, VM>; + } + let cx = 1, cx2 = 0 in + defm PVMINSUP : RVm<"pvmins.up", 0x8a, V64, I64, VM>; + let cx = 1, cx2 = 1 in + defm PVMINS : RVm<"pvmins", 0x8a, V64, I64, VM512>; +} +def : MnemonicAlias<"pvmaxs.lo.sx", "vmaxs.w.sx">; +def : MnemonicAlias<"vmaxs.w.zx", "pvmaxs.lo">; +def : MnemonicAlias<"vmaxs.w", "pvmaxs.lo">; +def : MnemonicAlias<"pvmaxs.lo.zx", "pvmaxs.lo">; +def : MnemonicAlias<"pvmins.lo.sx", "vmins.w.sx">; +def : MnemonicAlias<"vmins.w.zx", "pvmins.lo">; +def : MnemonicAlias<"vmins.w", "pvmins.lo">; +def : MnemonicAlias<"pvmins.lo.zx", "pvmins.lo">; + +// Section 8.10.18 - VCMX (Vector Compare and Select Maximum/Minimum) +defm VMAXSL : RVm<"vmaxs.l", 0x9a, V64, I64, VM>; +let cs2 = 1 in +defm VMINSL : RVm<"vmins.l", 0x9a, V64, I64, VM>; + +//----------------------------------------------------------------------------- +// Section 8.11 - Vector Logical Operation Instructions +//----------------------------------------------------------------------------- + +// Section 8.11.1 - VAND (Vector And) +let cx = 0, cx2 = 0 in defm VAND : RVLm<"vand", 0xc4, I64, V64, VM>; +let cx = 0, cx2 = 1 in defm PVANDLO : RVLm<"pvand.lo", 0xc4, I32, V64, VM>; +let cx = 1, cx2 = 0 in defm PVANDUP : RVLm<"pvand.up", 0xc4, F32, V64, VM>; +let cx = 1, cx2 = 1 in defm PVAND : RVLm<"pvand", 0xc4, I64, V64, VM512>; + +// Section 8.11.2 - VOR (Vector Or) +let cx = 0, cx2 = 0 in defm VOR : RVLm<"vor", 0xc5, I64, V64, VM>; +let cx = 0, cx2 = 1 in defm PVORLO : RVLm<"pvor.lo", 0xc5, I32, V64, VM>; +let cx = 1, cx2 = 0 in defm PVORUP : RVLm<"pvor.up", 0xc5, F32, V64, VM>; +let cx = 1, cx2 = 1 in defm PVOR : RVLm<"pvor", 0xc5, I64, V64, VM512>; + +// Section 8.11.3 - VXOR (Vector Exclusive Or) +let cx = 0, cx2 = 0 in defm VXOR : RVLm<"vxor", 0xc6, I64, V64, VM>; +let cx = 0, cx2 = 1 in defm PVXORLO : RVLm<"pvxor.lo", 0xc6, I32, V64, VM>; +let cx = 1, cx2 = 0 in defm PVXORUP : RVLm<"pvxor.up", 0xc6, F32, V64, VM>; +let cx = 1, cx2 = 1 in defm PVXOR : RVLm<"pvxor", 0xc6, I64, V64, VM512>; + +// Section 8.11.4 - VEQV (Vector Equivalence) +let cx = 0, cx2 = 0 in defm VEQV : RVLm<"veqv", 0xc7, I64, V64, VM>; +let cx = 0, cx2 = 1 in defm PVEQVLO : RVLm<"pveqv.lo", 0xc7, I32, V64, VM>; +let cx = 1, cx2 = 0 in defm PVEQVUP : RVLm<"pveqv.up", 0xc7, F32, V64, VM>; +let cx = 1, cx2 = 1 in defm PVEQV : RVLm<"pveqv", 0xc7, I64, V64, VM512>; + +// Section 8.11.5 - VLDZ (Vector Leading Zero Count) +let cx = 0, cx2 = 0 in defm VLDZ : RV1m<"vldz", 0xe7, V64, VM>; +let cx = 0, cx2 = 1 in defm PVLDZLO : RV1m<"pvldz.lo", 0xe7, V64, VM>; +let cx = 1, cx2 = 0 in defm PVLDZUP : RV1m<"pvldz.up", 0xe7, V64, VM>; +let cx = 1, cx2 = 1 in defm PVLDZ : RV1m<"pvldz", 0xe7, V64, VM512>; + +// Section 8.11.6 - VPCNT (Vector Population Count) +let cx = 0, cx2 = 0 in defm VPCNT : RV1m<"vpcnt", 0xac, V64, VM>; +let cx = 0, cx2 = 1 in defm PVPCNTLO : RV1m<"pvpcnt.lo", 0xac, V64, VM>; +let cx = 1, cx2 = 0 in defm PVPCNTUP : RV1m<"pvpcnt.up", 0xac, V64, VM>; +let cx = 1, cx2 = 1 in defm PVPCNT : RV1m<"pvpcnt", 0xac, V64, VM512>; + +// Section 8.11.7 - VBRV (Vector Bit Reverse) +let cx = 0, cx2 = 0 in defm VBRV : RV1m<"vbrv", 0xf7, V64, VM>; +let cx = 0, cx2 = 1 in defm PVBRVLO : RV1m<"pvbrv.lo", 0xf7, V64, VM>; +let cx = 1, cx2 = 0 in defm PVBRVUP : RV1m<"pvbrv.up", 0xf7, V64, VM>; +let cx = 1, cx2 = 1 in defm PVBRV : RV1m<"pvbrv", 0xf7, V64, VM512>; + +// Section 8.11.8 - VSEQ (Vector Sequential Number) +let cx = 0, cx2 = 0 in defm VSEQ : RV0m<"vseq", 0x99, V64, VM>; +let cx = 0, cx2 = 1 in defm PVSEQLO : RV0m<"pvseq.lo", 0x99, V64, VM>; +let cx = 1, cx2 = 0 in defm PVSEQUP : RV0m<"pvseq.up", 0x99, V64, VM>; +let cx = 1, cx2 = 1 in defm PVSEQ : RV0m<"pvseq", 0x99, V64, VM512>; + +//----------------------------------------------------------------------------- +// Section 8.12 - Vector Shift Operation Instructions +//----------------------------------------------------------------------------- + +// Section 8.12.1 - VSLL (Vector Shift Left Logical) +let cx = 0, cx2 = 0 in defm VSLL : RVSm<"vsll", 0xe5, I64, V64, VM>; +let cx = 0, cx2 = 1 in defm PVSLLLO : RVSm<"pvsll.lo", 0xe5, I32, V64, VM>; +let cx = 1, cx2 = 0 in defm PVSLLUP : RVSm<"pvsll.up", 0xe5, F32, V64, VM>; +let cx = 1, cx2 = 1 in defm PVSLL : RVSm<"pvsll", 0xe5, I64, V64, VM512>; + +// Section 8.12.2 - VSLD (Vector Shift Left Double) +defm VSLD : RVSDm<"vsld", 0xe4, V64, VM>; + +// Section 8.12.3 - VSRL (Vector Shift Right Logical) +let cx = 0, cx2 = 0 in defm VSRL : RVSm<"vsrl", 0xf5, I64, V64, VM>; +let cx = 0, cx2 = 1 in defm PVSRLLO : RVSm<"pvsrl.lo", 0xf5, I32, V64, VM>; +let cx = 1, cx2 = 0 in defm PVSRLUP : RVSm<"pvsrl.up", 0xf5, F32, V64, VM>; +let cx = 1, cx2 = 1 in defm PVSRL : RVSm<"pvsrl", 0xf5, I64, V64, VM512>; + +// Section 8.12.4 - VSRD (Vector Shift Right Double) +defm VSRD : RVSDm<"vsrd", 0xf4, V64, VM>; + +// Section 8.12.5 - VSLA (Vector Shift Left Arithmetic) +let cx = 0, cx2 = 0 in defm VSLAWSX : RVSm<"vsla.w.sx", 0xe6, I32, V64, VM>; +let cx = 0, cx2 = 1 in { + defm PVSLALO : RVSm<"pvsla.lo", 0xe6, I32, V64, VM>; + let isCodeGenOnly = 1 in defm VSLAWZX : RVSm<"vsla.w.zx", 0xe6, I32, V64, VM>; +} +let cx = 1, cx2 = 0 in defm PVSLAUP : RVSm<"pvsla.up", 0xe6, F32, V64, VM>; +let cx = 1, cx2 = 1 in defm PVSLA : RVSm<"pvsla", 0xe6, I64, V64, VM512>; +def : MnemonicAlias<"pvsla.lo.sx", "vsla.w.sx">; +def : MnemonicAlias<"vsla.w.zx", "pvsla.lo">; +def : MnemonicAlias<"vsla.w", "pvsla.lo">; +def : MnemonicAlias<"pvsla.lo.zx", "pvsla.lo">; + +// Section 8.12.6 - VSLAX (Vector Shift Left Arithmetic) +defm VSLAL : RVSm<"vsla.l", 0xd4, I64, V64, VM>; + +// Section 8.12.7 - VSRA (Vector Shift Right Arithmetic) +let cx = 0, cx2 = 0 in defm VSRAWSX : RVSm<"vsra.w.sx", 0xf6, I32, V64, VM>; +let cx = 0, cx2 = 1 in { + defm PVSRALO : RVSm<"pvsra.lo", 0xf6, I32, V64, VM>; + let isCodeGenOnly = 1 in defm VSRAWZX : RVSm<"vsra.w.zx", 0xf6, I32, V64, VM>; +} +let cx = 1, cx2 = 0 in defm PVSRAUP : RVSm<"pvsra.up", 0xf6, F32, V64, VM>; +let cx = 1, cx2 = 1 in defm PVSRA : RVSm<"pvsra", 0xf6, I64, V64, VM512>; +def : MnemonicAlias<"pvsra.lo.sx", "vsra.w.sx">; +def : MnemonicAlias<"vsra.w.zx", "pvsra.lo">; +def : MnemonicAlias<"vsra.w", "pvsra.lo">; +def : MnemonicAlias<"pvsra.lo.zx", "pvsra.lo">; + +// Section 8.12.8 - VSRAX (Vector Shift Right Arithmetic) +defm VSRAL : RVSm<"vsra.l", 0xd5, I64, V64, VM>; + +// Section 8.12.9 - VSFA (Vector Shift Left and Add) +defm VSFA : RVSAm<"vsfa", 0xd7, V64, VM>; + +//----------------------------------------------------------------------------- +// Section 8.13 - Vector Floating-Point Arithmetic Instructions +//----------------------------------------------------------------------------- + +// Section 8.13.1 - VFAD (Vector Floating Add) +let cx = 0, cx2 = 0 in +defm VFADDD : RVm<"vfadd.d", 0xcc, V64, I64, VM, simm7fp>; +let cx = 0, cx2 = 1 in +defm PVFADDLO : RVm<"pvfadd.lo", 0xcc, V64, I64, VM, simm7fp>; +let cx = 1, cx2 = 0 in { + defm PVFADDUP : RVm<"pvfadd.up", 0xcc, V64, F32, VM, simm7fp>; + let isCodeGenOnly = 1 in + defm VFADDS : RVm<"vfadd.s", 0xcc, V64, F32, VM, simm7fp>; +} +let cx = 1, cx2 = 1 in +defm PVFADD : RVm<"pvfadd", 0xcc, V64, I64, VM512, simm7fp>; +def : MnemonicAlias<"vfadd.s", "pvfadd.up">; + +// Section 8.13.2 - VFSB (Vector Floating Subtract) +let cx = 0, cx2 = 0 in +defm VFSUBD : RVm<"vfsub.d", 0xdc, V64, I64, VM, simm7fp>; +let cx = 0, cx2 = 1 in +defm PVFSUBLO : RVm<"pvfsub.lo", 0xdc, V64, I64, VM, simm7fp>; +let cx = 1, cx2 = 0 in { + defm PVFSUBUP : RVm<"pvfsub.up", 0xdc, V64, F32, VM, simm7fp>; + let isCodeGenOnly = 1 in + defm VFSUBS : RVm<"vfsub.s", 0xdc, V64, F32, VM, simm7fp>; +} +let cx = 1, cx2 = 1 in +defm PVFSUB : RVm<"pvfsub", 0xdc, V64, I64, VM512, simm7fp>; +def : MnemonicAlias<"vfsub.s", "pvfsub.up">; + +// Section 8.13.3 - VFMP (Vector Floating Multiply) +let cx = 0, cx2 = 0 in +defm VFMULD : RVm<"vfmul.d", 0xcd, V64, I64, VM, simm7fp>; +let cx = 0, cx2 = 1 in +defm PVFMULLO : RVm<"pvfmul.lo", 0xcd, V64, I64, VM, simm7fp>; +let cx = 1, cx2 = 0 in { + defm PVFMULUP : RVm<"pvfmul.up", 0xcd, V64, F32, VM, simm7fp>; + let isCodeGenOnly = 1 in + defm VFMULS : RVm<"vfmul.s", 0xcd, V64, F32, VM, simm7fp>; +} +let cx = 1, cx2 = 1 in +defm PVFMUL : RVm<"pvfmul", 0xcd, V64, I64, VM512, simm7fp>; +def : MnemonicAlias<"vfmul.s", "pvfmul.up">; + +// Section 8.13.4 - VFDV (Vector Floating Divide) +defm VFDIVD : RVDIVm<"vfdiv.d", 0xdd, V64, I64, VM, simm7fp>; +let cx = 1 in +defm VFDIVS : RVDIVm<"vfdiv.s", 0xdd, V64, F32, VM, simm7fp>; + +// Section 8.13.5 - VFSQRT (Vector Floating Square Root) +defm VFSQRTD : RVF1m<"vfsqrt.d", 0xed, V64, VM>; +let cx = 1 in +defm VFSQRTS : RVF1m<"vfsqrt.s", 0xed, V64, VM>; + +// Section 8.13.6 - VFCP (Vector Floating Compare) +let cx = 0, cx2 = 0 in +defm VFCMPD : RVm<"vfcmp.d", 0xfc, V64, I64, VM, simm7fp>; +let cx = 0, cx2 = 1 in +defm PVFCMPLO : RVm<"pvfcmp.lo", 0xfc, V64, I64, VM, simm7fp>; +let cx = 1, cx2 = 0 in { + defm PVFCMPUP : RVm<"pvfcmp.up", 0xfc, V64, F32, VM, simm7fp>; + let isCodeGenOnly = 1 in + defm VFCMPS : RVm<"vfcmp.s", 0xfc, V64, F32, VM, simm7fp>; +} +let cx = 1, cx2 = 1 in +defm PVFCMP : RVm<"pvfcmp", 0xfc, V64, I64, VM512, simm7fp>; +def : MnemonicAlias<"vfcmp.s", "pvfcmp.up">; + +// Section 8.13.7 - VFCM (Vector Floating Compare and Select Maximum/Minimum) +let cx = 0, cx2 = 0 in +defm VFMAXD : RVm<"vfmax.d", 0xbd, V64, I64, VM, simm7fp>; +let cx = 0, cx2 = 1 in +defm PVFMAXLO : RVm<"pvfmax.lo", 0xbd, V64, I64, VM, simm7fp>; +let cx = 1, cx2 = 0 in { + defm PVFMAXUP : RVm<"pvfmax.up", 0xbd, V64, F32, VM, simm7fp>; + let isCodeGenOnly = 1 in + defm VFMAXS : RVm<"vfmax.s", 0xbd, V64, F32, VM, simm7fp>; +} +let cx = 1, cx2 = 1 in +defm PVFMAX : RVm<"pvfmax", 0xbd, V64, I64, VM512, simm7fp>; +let cs2 = 1 in { + let cx = 0, cx2 = 0 in + defm VFMIND : RVm<"vfmin.d", 0xbd, V64, I64, VM, simm7fp>; + let cx = 0, cx2 = 1 in + defm PVFMINLO : RVm<"pvfmin.lo", 0xbd, V64, I64, VM, simm7fp>; + let cx = 1, cx2 = 0 in { + defm PVFMINUP : RVm<"pvfmin.up", 0xbd, V64, F32, VM, simm7fp>; + let isCodeGenOnly = 1 in + defm VFMINS : RVm<"vfmin.s", 0xbd, V64, F32, VM, simm7fp>; + } + let cx = 1, cx2 = 1 in + defm PVFMIN : RVm<"pvfmin", 0xbd, V64, I64, VM512, simm7fp>; +} +def : MnemonicAlias<"vfmax.s", "pvfmax.up">; +def : MnemonicAlias<"vfmin.s", "pvfmin.up">; + +// Section 8.13.8 - VFMAD (Vector Floating Fused Multiply Add) +let cx = 0, cx2 = 0 in +defm VFMADD : RVMm<"vfmad.d", 0xe2, V64, I64, VM, simm7fp>; +let cx = 0, cx2 = 1 in +defm PVFMADLO : RVMm<"pvfmad.lo", 0xe2, V64, I64, VM, simm7fp>; +let cx = 1, cx2 = 0 in { + defm PVFMADUP : RVMm<"pvfmad.up", 0xe2, V64, F32, VM, simm7fp>; + let isCodeGenOnly = 1 in + defm VFMADS : RVMm<"vfmad.s", 0xe2, V64, F32, VM, simm7fp>; +} +let cx = 1, cx2 = 1 in +defm PVFMAD : RVMm<"pvfmad", 0xe2, V64, I64, VM512, simm7fp>; +def : MnemonicAlias<"vfmad.s", "pvfmad.up">; + +// Section 8.13.9 - VFMSB (Vector Floating Fused Multiply Subtract) +let cx = 0, cx2 = 0 in +defm VFMSBD : RVMm<"vfmsb.d", 0xf2, V64, I64, VM, simm7fp>; +let cx = 0, cx2 = 1 in +defm PVFMSBLO : RVMm<"pvfmsb.lo", 0xf2, V64, I64, VM, simm7fp>; +let cx = 1, cx2 = 0 in { + defm PVFMSBUP : RVMm<"pvfmsb.up", 0xf2, V64, F32, VM, simm7fp>; + let isCodeGenOnly = 1 in + defm VFMSBS : RVMm<"vfmsb.s", 0xf2, V64, F32, VM, simm7fp>; +} +let cx = 1, cx2 = 1 in +defm PVFMSB : RVMm<"pvfmsb", 0xf2, V64, I64, VM512, simm7fp>; +def : MnemonicAlias<"vfmsb.s", "pvfmsb.up">; + +// Section 8.13.10 - VFNMAD (Vector Floating Fused Negative Multiply Add) +let cx = 0, cx2 = 0 in +defm VFNMADD : RVMm<"vfnmad.d", 0xe3, V64, I64, VM, simm7fp>; +let cx = 0, cx2 = 1 in +defm PVFNMADLO : RVMm<"pvfnmad.lo", 0xe3, V64, I64, VM, simm7fp>; +let cx = 1, cx2 = 0 in { + defm PVFNMADUP : RVMm<"pvfnmad.up", 0xe3, V64, F32, VM, simm7fp>; + let isCodeGenOnly = 1 in + defm VFNMADS : RVMm<"vfnmad.s", 0xe3, V64, F32, VM, simm7fp>; +} +let cx = 1, cx2 = 1 in +defm PVFNMAD : RVMm<"pvfnmad", 0xe3, V64, I64, VM512, simm7fp>; +def : MnemonicAlias<"vfnmad.s", "pvfnmad.up">; + +// Section 8.13.11 - VFNMSB (Vector Floating Fused Negative Multiply Subtract) +let cx = 0, cx2 = 0 in +defm VFNMSBD : RVMm<"vfnmsb.d", 0xf3, V64, I64, VM, simm7fp>; +let cx = 0, cx2 = 1 in +defm PVFNMSBLO : RVMm<"pvfnmsb.lo", 0xf3, V64, I64, VM, simm7fp>; +let cx = 1, cx2 = 0 in { + defm PVFNMSBUP : RVMm<"pvfnmsb.up", 0xf3, V64, F32, VM, simm7fp>; + let isCodeGenOnly = 1 in + defm VFNMSBS : RVMm<"vfnmsb.s", 0xf3, V64, F32, VM, simm7fp>; +} +let cx = 1, cx2 = 1 in +defm PVFNMSB : RVMm<"pvfnmsb", 0xf3, V64, I64, VM512, simm7fp>; +def : MnemonicAlias<"vfnmsb.s", "pvfnmsb.up">; + +// Section 8.13.12 - VRCP (Vector Floating Reciprocal) +let cx = 0, cx2 = 0 in defm VRCPD : RVF1m<"vrcp.d", 0xe1, V64, VM>; +let cx = 0, cx2 = 1 in defm PVRCPLO : RVF1m<"pvrcp.lo", 0xe1, V64, VM>; +let cx = 1, cx2 = 0 in { + defm PVRCPUP : RVF1m<"pvrcp.up", 0xe1, V64, VM>; + let isCodeGenOnly = 1 in defm VRCPS : RVF1m<"vrcp.s", 0xe1, V64, VM>; +} +let cx = 1, cx2 = 1 in defm PVRCP : RVF1m<"pvrcp", 0xe1, V64, VM512>; +def : MnemonicAlias<"vrcp.s", "pvrcp.up">; + +// Section 8.13.13 - VRSQRT (Vector Floating Reciprocal Square Root) +let cx = 0, cx2 = 0 in defm VRSQRTD : RVF1m<"vrsqrt.d", 0xf1, V64, VM>; +let cx = 0, cx2 = 1 in defm PVRSQRTLO : RVF1m<"pvrsqrt.lo", 0xf1, V64, VM>; +let cx = 1, cx2 = 0 in { + defm PVRSQRTUP : RVF1m<"pvrsqrt.up", 0xf1, V64, VM>; + let isCodeGenOnly = 1 in + defm VRSQRTS : RVF1m<"vrsqrt.s", 0xf1, V64, VM>; +} +let cx = 1, cx2 = 1 in +defm PVRSQRT : RVF1m<"pvrsqrt", 0xf1, V64, VM512>; +let cs2 = 1 in { + let cx = 0, cx2 = 0 in + defm VRSQRTDNEX : RVF1m<"vrsqrt.d.nex", 0xf1, V64, VM>; + let cx = 0, cx2 = 1 in + defm PVRSQRTLONEX : RVF1m<"pvrsqrt.lo.nex", 0xf1, V64, VM>; + let cx = 1, cx2 = 0 in { + defm PVRSQRTUPNEX : RVF1m<"pvrsqrt.up.nex", 0xf1, V64, VM>; + let isCodeGenOnly = 1 in + defm VRSQRTSNEX : RVF1m<"vrsqrt.s.nex", 0xf1, V64, VM>; + } + let cx = 1, cx2 = 1 in + defm PVRSQRTNEX : RVF1m<"pvrsqrt.nex", 0xf1, V64, VM512>; +} +def : MnemonicAlias<"vrsqrt.s", "pvrsqrt.up">; +def : MnemonicAlias<"vrsqrt.s.nex", "pvrsqrt.up.nex">; + +// Section 8.13.14 - VFIX (Vector Convert to Fixed Pointer) +let cx = 0, cx2 = 0, cs2 = 0 in +defm VCVTWDSX : RVFIXm<"vcvt.w.d.sx", 0xe8, V64, VM>; +let cx = 0, cx2 = 1, cs2 = 0 in +defm VCVTWDZX : RVFIXm<"vcvt.w.d.zx", 0xe8, V64, VM>; +let cx = 1, cx2 = 0, cs2 = 0 in +defm VCVTWSSX : RVFIXm<"vcvt.w.s.sx", 0xe8, V64, VM>; +let cx = 1, cx2 = 1, cs2 = 0 in +defm VCVTWSZX : RVFIXm<"vcvt.w.s.zx", 0xe8, V64, VM>; +let cx = 0, cx2 = 1, cs2 = 1 in +defm PVCVTWSLO : RVFIXm<"pvcvt.w.s.lo", 0xe8, V64, VM>; +let cx = 1, cx2 = 0, cs2 = 1 in +defm PVCVTWSUP : RVFIXm<"pvcvt.w.s.up", 0xe8, V64, VM>; +let cx = 1, cx2 = 1, cs2 = 1 in +defm PVCVTWS : RVFIXm<"pvcvt.w.s", 0xe8, V64, VM512>; + +// Section 8.13.15 - VFIXX (Vector Convert to Fixed Pointer) +defm VCVTLD : RVFIXm<"vcvt.l.d", 0xa8, V64, VM>; + +// Section 8.13.16 - VFLT (Vector Convert to Floating Pointer) +let cx = 0, cx2 = 0, cs2 = 0 in +defm VCVTDW : RVF1m<"vcvt.d.w", 0xf8, V64, VM>; +let cx = 1, cx2 = 0, cs2 = 0 in +defm VCVTSW : RVF1m<"vcvt.s.w", 0xf8, V64, VM>; +let cx = 0, cx2 = 1, cs2 = 1 in +defm PVCVTSWLO : RVF1m<"pvcvt.s.w.lo", 0xf8, V64, VM>; +let cx = 1, cx2 = 0, cs2 = 1 in +defm PVCVTSWUP : RVF1m<"pvcvt.s.w.up", 0xf8, V64, VM>; +let cx = 1, cx2 = 1, cs2 = 1 in +defm PVCVTSW : RVF1m<"pvcvt.s.w", 0xf8, V64, VM512>; + +// Section 8.13.17 - VFLTX (Vector Convert to Floating Pointer) +defm VCVTDL : RVF1m<"vcvt.d.l", 0xb8, V64, VM>; + +// Section 8.13.18 - VCVS (Vector Convert to Single-format) +defm VCVTSD : RVF1m<"vcvt.s.d", 0x9f, V64, VM>; + +// Section 8.13.19 - VCVD (Vector Convert to Double-format) +defm VCVTDS : RVF1m<"vcvt.d.s", 0x8f, V64, VM>; + +//----------------------------------------------------------------------------- +// Section 8.14 - Vector Reduction Instructions +//----------------------------------------------------------------------------- + +// Section 8.14.1 - VSUMS (Vector Sum Single) +defm VSUMWSX : RVF1m<"vsum.w.sx", 0xea, V64, VM>; +let cx2 = 1 in defm VSUMWZX : RVF1m<"vsum.w.zx", 0xea, V64, VM>; + +// Section 8.14.2 - VSUMX (Vector Sum) +defm VSUML : RVF1m<"vsum.l", 0xaa, V64, VM>; + +// Section 8.14.3 - VFSUM (Vector Floating Sum) +defm VFSUMD : RVF1m<"vfsum.d", 0xec, V64, VM>; +let cx = 1 in defm VFSUMS : RVF1m<"vfsum.s", 0xec, V64, VM>; + +// Section 8.14.4 - VMAXS (Vector Maximum/Minimum Single) +let cx2 = 0 in defm VRMAXSWFSTSX : RVF1m<"vrmaxs.w.fst.sx", 0xbb, V64, VM>; +let cx2 = 1 in defm VRMAXSWFSTZX : RVF1m<"vrmaxs.w.fst.zx", 0xbb, V64, VM>; +let cs = 1 in { + let cx2 = 0 in + defm VRMAXSWLSTSX : RVF1m<"vrmaxs.w.lst.sx", 0xbb, V64, VM>; + let cx2 = 1 in + defm VRMAXSWLSTZX : RVF1m<"vrmaxs.w.lst.zx", 0xbb, V64, VM>; +} +let cs2 = 1 in { + let cx2 = 0 in + defm VRMINSWFSTSX : RVF1m<"vrmins.w.fst.sx", 0xbb, V64, VM>; + let cx2 = 1 in + defm VRMINSWFSTZX : RVF1m<"vrmins.w.fst.zx", 0xbb, V64, VM>; + let cs = 1 in { + let cx2 = 0 in + defm VRMINSWLSTSX : RVF1m<"vrmins.w.lst.sx", 0xbb, V64, VM>; + let cx2 = 1 in + defm VRMINSWLSTZX : RVF1m<"vrmins.w.lst.zx", 0xbb, V64, VM>; + } +} + +// Section 8.14.5 - VMAXX (Vector Maximum/Minimum) +let cs = 0 in defm VRMAXSLFST : RVF1m<"vrmaxs.l.fst", 0xab, V64, VM>; +let cs = 1 in defm VRMAXSLLST : RVF1m<"vrmaxs.l.lst", 0xab, V64, VM>; +let cs2 = 1 in { + let cs = 0 in defm VRMINSLFST : RVF1m<"vrmins.l.fst", 0xab, V64, VM>; + let cs = 1 in defm VRMINSLLST : RVF1m<"vrmins.l.lst", 0xab, V64, VM>; +} + +// Section 8.14.6 - VFMAX (Vector Floating Maximum/Minimum) +let cs = 0 in defm VFRMAXDFST : RVF1m<"vfrmax.d.fst", 0xad, V64, VM>; +let cs = 1 in defm VFRMAXDLST : RVF1m<"vfrmax.d.lst", 0xad, V64, VM>; +let cs2 = 1 in { + let cs = 0 in defm VFRMINDFST : RVF1m<"vfrmin.d.fst", 0xad, V64, VM>; + let cs = 1 in defm VFRMINDLST : RVF1m<"vfrmin.d.lst", 0xad, V64, VM>; +} +let cx = 1 in { + let cs = 0 in defm VFRMAXSFST : RVF1m<"vfrmax.s.fst", 0xad, V64, VM>; + let cs = 1 in defm VFRMAXSLST : RVF1m<"vfrmax.s.lst", 0xad, V64, VM>; + let cs2 = 1 in { + let cs = 0 in defm VFRMINSFST : RVF1m<"vfrmin.s.fst", 0xad, V64, VM>; + let cs = 1 in defm VFRMINSLST : RVF1m<"vfrmin.s.lst", 0xad, V64, VM>; + } +} + +// Section 8.14.7 - VRAND (Vector Reduction And) +defm VRAND : RVF1m<"vrand", 0x88, V64, VM>; + +// Section 8.14.8 - VROR (Vector Reduction Or) +defm VROR : RVF1m<"vror", 0x98, V64, VM>; + +// Section 8.14.9 - VRXOR (Vector Reduction Exclusive Or) +defm VRXOR : RVF1m<"vrxor", 0x89, V64, VM>; + +//----------------------------------------------------------------------------- +// Section 8.15 - Vector Iterative Operation Instructions +//----------------------------------------------------------------------------- + +// Section 8.15.1 - VFIA (Vector Floating Iteration Add) +let cx = 0 in defm VFIAD : RVI2m<"vfia.d", 0xce, V64, I64>; +let cx = 1 in defm VFIAS : RVI2m<"vfia.s", 0xce, V64, F32>; + +// Section 8.15.2 - VFIS (Vector Floating Iteration Subtract) +let cx = 0 in defm VFISD : RVI2m<"vfis.d", 0xde, V64, I64>; +let cx = 1 in defm VFISS : RVI2m<"vfis.s", 0xde, V64, F32>; + +// Section 8.15.3 - VFIM (Vector Floating Iteration Multiply) +let cx = 0 in defm VFIMD : RVI2m<"vfim.d", 0xcf, V64, I64>; +let cx = 1 in defm VFIMS : RVI2m<"vfim.s", 0xcf, V64, F32>; + +// Section 8.15.4 - VFIAM (Vector Floating Iteration Add and Multiply) +let cx = 0 in defm VFIAMD : RVI3m<"vfiam.d", 0xee, V64, I64>; +let cx = 1 in defm VFIAMS : RVI3m<"vfiam.s", 0xee, V64, F32>; + +// Section 8.15.5 - VFISM (Vector Floating Iteration Subtract and Multiply) +let cx = 0 in defm VFISMD : RVI3m<"vfism.d", 0xfe, V64, I64>; +let cx = 1 in defm VFISMS : RVI3m<"vfism.s", 0xfe, V64, F32>; + +// Section 8.15.6 - VFIMA (Vector Floating Iteration Multiply and Add) +let cx = 0 in defm VFIMAD : RVI3m<"vfima.d", 0xef, V64, I64>; +let cx = 1 in defm VFIMAS : RVI3m<"vfima.s", 0xef, V64, F32>; + +// Section 8.15.7 - VFIMS (Vector Floating Iteration Multiply and Subtract) +let cx = 0 in defm VFIMSD : RVI3m<"vfims.d", 0xff, V64, I64>; +let cx = 1 in defm VFIMSS : RVI3m<"vfims.s", 0xff, V64, F32>; + +//----------------------------------------------------------------------------- +// Section 8.16 - Vector Merger Operation Instructions +//----------------------------------------------------------------------------- + +// Section 8.16.1 - VMRG (Vector Merge) +let cx = 0 in defm VMRG : RVm<"vmrg", 0xd6, V64, I64, VM>; +// FIXME: vmrg.w should be called as pvmrg, but following assembly manual. +let cx = 1 in defm VMRGW : RVm<"vmrg.w", 0xd6, V64, I64, VM512>; +def : MnemonicAlias<"vmrg.l", "vmrg">; + +// Section 8.16.2 - VSHF (Vector Shuffle) +defm VSHF : RVSHFm<"vshf", 0xbc, V64>; + +// Section 8.16.3 - VCP (Vector Compress) +defm VCP : RV1m<"vcp", 0x8d, V64, VM>; + +// Section 8.16.4 - VEX (Vector Expand) +defm VEX : RV1m<"vex", 0x9d, V64, VM>; + +//----------------------------------------------------------------------------- +// Section 8.17 - Vector Mask Operation Instructions +//----------------------------------------------------------------------------- + +// Section 8.17.1 - VFMK (Vector Form Mask) +defm VFMKL : RVMKm<"vfmk.l.", 0xb4, V64, VM>; +def : MnemonicAlias<"vfmk.l", "vfmk.l.at">; + +// Section 8.17.2 - VFMS (Vector Form Mask Single) +defm VFMKW : RVMKm<"vfmk.w.", 0xb5, V64, VM>; +let isCodeGenOnly = 1 in defm PVFMKWLO : RVMKm<"vfmk.w.", 0xb5, V64, VM>; +let cx = 1 in defm PVFMKWUP : RVMKm<"pvfmk.w.up.", 0xb5, V64, VM>; +def : MnemonicAlias<"vfmk.w", "vfmk.w.at">; +def : MnemonicAlias<"pvfmk.w.up", "pvfmk.w.up.at">; +def : MnemonicAlias<"pvfmk.w.lo", "vfmk.w.at">; +foreach CC = [ "af", "gt", "lt", "ne", "eq", "ge", "le", "at" ] in { + def : MnemonicAlias<"pvfmk.w.lo."#CC, "vfmk.w."#CC>; +} + +// Section 8.17.3 - VFMF (Vector Form Mask Floating Point) +defm VFMKD : RVMKm<"vfmk.d.", 0xb6, V64, VM>; +let cx2 = 1 in defm PVFMKSLO : RVMKm<"pvfmk.s.lo.", 0xb6, V64, VM>; +let cx = 1 in { + defm PVFMKSUP : RVMKm<"pvfmk.s.up.", 0xb6, V64, VM>; + let isCodeGenOnly = 1 in defm VFMKS : RVMKm<"vfmk.s.", 0xb6, V64, VM>; +} +def : MnemonicAlias<"vfmk.d", "vfmk.d.at">; +def : MnemonicAlias<"pvfmk.s.lo", "pvfmk.s.lo.at">; +def : MnemonicAlias<"pvfmk.s.up", "pvfmk.s.up.at">; +def : MnemonicAlias<"vfmk.s", "pvfmk.s.up.at">; +foreach CC = [ "af", "gt", "lt", "ne", "eq", "ge", "le", "at", "num", "nan", + "gtnan", "ltnan", "nenan", "eqnan", "genan", "lenan" ] in { + def : MnemonicAlias<"vfmk.s."#CC, "pvfmk.s.up."#CC>; +} + +// Section 8.17.4 - ANDM (And VM) +defm ANDM : RVM2m<"andm", 0x84, VM>; + +// Section 8.17.5 - ORM (Or VM) +defm ORM : RVM2m<"orm", 0x85, VM>; + +// Section 8.17.6 - XORM (Exclusive Or VM) +defm XORM : RVM2m<"xorm", 0x86, VM>; + +// Section 8.17.7 - EQVM (Equivalence VM) +defm EQVM : RVM2m<"eqvm", 0x87, VM>; + +// Section 8.17.8 - NNDM (Negate And VM) +defm NNDM : RVM2m<"nndm", 0x94, VM>; + +// Section 8.17.9 - NEGM (Negate VM) +defm NEGM : RVM1m<"negm", 0x95, VM>; + +// Section 8.17.10 - PCVM (Population Count of VM) +defm PCVM : RVMSm<"pcvm", 0xa4, VM>; + +// Section 8.17.11 - LZVM (Leading Zero of VM) +defm LZVM : RVMSm<"lzvm", 0xa5, VM>; + +// Section 8.17.12 - TOVM (Trailing One of VM) +defm TOVM : RVMSm<"tovm", 0xa6, VM>; + +//----------------------------------------------------------------------------- +// Section 8.18 - Vector Control Instructions +//----------------------------------------------------------------------------- + +// Section 8.18.1 - LVL (Load VL) +let sx = 0, cz = 0, sz = 0, hasSideEffects = 0, Defs = [VL] in { + def LVLr : RR<0xbf, (outs), (ins I64:$sy), "lvl $sy">; + let cy = 0 in def LVLi : RR<0xbf, (outs), (ins simm7:$sy), "lvl $sy">; +} + +// Section 8.18.2 - SVL (Save VL) +let cy = 0, sy = 0, cz = 0, sz = 0, hasSideEffects = 0, Uses = [VL] in +def SVL : RR<0x2f, (outs I64:$sx), (ins), "svl $sx">; + +// Section 8.18.3 - SMVL (Save Maximum Vector Length) +let cy = 0, sy = 0, cz = 0, sz = 0, hasSideEffects = 0 in +def SMVL : RR<0x2e, (outs I64:$sx), (ins), "smvl $sx">; + +// Section 8.18.4 - LVIX (Load Vector Data Index) +let sx = 0, cz = 0, sz = 0, hasSideEffects = 0, Defs = [VIX] in { + def LVIXr : RR<0xaf, (outs), (ins I64:$sy), "lvix $sy">; + let cy = 0 in def LVIXi : RR<0xaf, (outs), (ins uimm6:$sy), "lvix $sy">; +} diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEMCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEMCInstLower.cpp index 9815610510e1..bc5577ce4f97 100644 --- a/contrib/llvm-project/llvm/lib/Target/VE/VEMCInstLower.cpp +++ b/contrib/llvm-project/llvm/lib/Target/VE/VEMCInstLower.cpp @@ -51,6 +51,11 @@ static MCOperand LowerOperand(const MachineInstr *MI, const MachineOperand &MO, break; return MCOperand::createReg(MO.getReg()); + case MachineOperand::MO_BlockAddress: + return LowerSymbolOperand( + MI, MO, AP.GetBlockAddressSymbol(MO.getBlockAddress()), AP); + case MachineOperand::MO_ConstantPoolIndex: + return LowerSymbolOperand(MI, MO, AP.GetCPISymbol(MO.getIndex()), AP); case MachineOperand::MO_ExternalSymbol: return LowerSymbolOperand( MI, MO, AP.GetExternalSymbolSymbol(MO.getSymbolName()), AP); @@ -58,7 +63,8 @@ static MCOperand LowerOperand(const MachineInstr *MI, const MachineOperand &MO, return LowerSymbolOperand(MI, MO, AP.getSymbol(MO.getGlobal()), AP); case MachineOperand::MO_Immediate: return MCOperand::createImm(MO.getImm()); - + case MachineOperand::MO_JumpTableIndex: + return LowerSymbolOperand(MI, MO, AP.GetJTISymbol(MO.getIndex()), AP); case MachineOperand::MO_MachineBasicBlock: return LowerSymbolOperand(MI, MO, MO.getMBB()->getSymbol(), AP); diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.cpp index 5783a8df69d2..d175ad26c742 100644 --- a/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Type.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -35,6 +36,8 @@ VERegisterInfo::VERegisterInfo() : VEGenRegisterInfo(VE::SX10) {} const MCPhysReg * VERegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { switch (MF->getFunction().getCallingConv()) { + case CallingConv::Fast: + // Being explicit (same as standard CC). default: return CSR_SaveList; case CallingConv::PreserveAll: @@ -45,6 +48,8 @@ VERegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const uint32_t *VERegisterInfo::getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { switch (CC) { + case CallingConv::Fast: + // Being explicit (same as standard CC). default: return CSR_RegMask; case CallingConv::PreserveAll: @@ -82,10 +87,22 @@ BitVector VERegisterInfo::getReservedRegs(const MachineFunction &MF) const { ++ItAlias) Reserved.set(*ItAlias); + // Reserve constant registers. + Reserved.set(VE::VM0); + Reserved.set(VE::VMP0); + return Reserved; } -bool VERegisterInfo::isConstantPhysReg(MCRegister PhysReg) const { return false; } +bool VERegisterInfo::isConstantPhysReg(MCRegister PhysReg) const { + switch (PhysReg) { + case VE::VM0: + case VE::VMP0: + return true; + default: + return false; + } +} const TargetRegisterClass * VERegisterInfo::getPointerRegClass(const MachineFunction &MF, @@ -93,6 +110,29 @@ VERegisterInfo::getPointerRegClass(const MachineFunction &MF, return &VE::I64RegClass; } +static unsigned offsetToDisp(MachineInstr &MI) { + // Default offset in instruction's operands (reg+reg+imm). + unsigned OffDisp = 2; + +#define RRCAS_multi_cases(NAME) NAME##rir : case NAME##rii + + { + using namespace llvm::VE; + switch (MI.getOpcode()) { + case RRCAS_multi_cases(TS1AML): + case RRCAS_multi_cases(TS1AMW): + case RRCAS_multi_cases(CASL): + case RRCAS_multi_cases(CASW): + // These instructions use AS format (reg+imm). + OffDisp = 1; + break; + } + } +#undef RRCAS_multi_cases + + return OffDisp; +} + static void replaceFI(MachineFunction &MF, MachineBasicBlock::iterator II, MachineInstr &MI, const DebugLoc &dl, unsigned FIOperandNum, int Offset, Register FrameReg) { @@ -100,7 +140,7 @@ static void replaceFI(MachineFunction &MF, MachineBasicBlock::iterator II, // VE has 32 bit offset field, so no need to expand a target instruction. // Directly encode it. MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false); - MI.getOperand(FIOperandNum + 2).ChangeToImmediate(Offset); + MI.getOperand(FIOperandNum + offsetToDisp(MI)).ChangeToImmediate(Offset); } void VERegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, @@ -116,9 +156,41 @@ void VERegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Register FrameReg; int Offset; - Offset = TFI->getFrameIndexReference(MF, FrameIndex, FrameReg); - - Offset += MI.getOperand(FIOperandNum + 2).getImm(); + Offset = TFI->getFrameIndexReference(MF, FrameIndex, FrameReg).getFixed(); + + Offset += MI.getOperand(FIOperandNum + offsetToDisp(MI)).getImm(); + + if (MI.getOpcode() == VE::STQrii) { + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + Register SrcReg = MI.getOperand(3).getReg(); + Register SrcHiReg = getSubReg(SrcReg, VE::sub_even); + Register SrcLoReg = getSubReg(SrcReg, VE::sub_odd); + // VE stores HiReg to 8(addr) and LoReg to 0(addr) + MachineInstr *StMI = BuildMI(*MI.getParent(), II, dl, TII.get(VE::STrii)) + .addReg(FrameReg) + .addImm(0) + .addImm(0) + .addReg(SrcLoReg); + replaceFI(MF, II, *StMI, dl, 0, Offset, FrameReg); + MI.setDesc(TII.get(VE::STrii)); + MI.getOperand(3).setReg(SrcHiReg); + Offset += 8; + } else if (MI.getOpcode() == VE::LDQrii) { + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + Register DestReg = MI.getOperand(0).getReg(); + Register DestHiReg = getSubReg(DestReg, VE::sub_even); + Register DestLoReg = getSubReg(DestReg, VE::sub_odd); + // VE loads HiReg from 8(addr) and LoReg from 0(addr) + MachineInstr *StMI = + BuildMI(*MI.getParent(), II, dl, TII.get(VE::LDrii), DestLoReg) + .addReg(FrameReg) + .addImm(0) + .addImm(0); + replaceFI(MF, II, *StMI, dl, 1, Offset, FrameReg); + MI.setDesc(TII.get(VE::LDrii)); + MI.getOperand(0).setReg(DestHiReg); + Offset += 8; + } replaceFI(MF, II, MI, dl, FIOperandNum, Offset, FrameReg); } @@ -126,26 +198,3 @@ void VERegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Register VERegisterInfo::getFrameRegister(const MachineFunction &MF) const { return VE::SX9; } - -// VE has no architectural need for stack realignment support, -// except that LLVM unfortunately currently implements overaligned -// stack objects by depending upon stack realignment support. -// If that ever changes, this can probably be deleted. -bool VERegisterInfo::canRealignStack(const MachineFunction &MF) const { - if (!TargetRegisterInfo::canRealignStack(MF)) - return false; - - // VE always has a fixed frame pointer register, so don't need to - // worry about needing to reserve it. [even if we don't have a frame - // pointer for our frame, it still cannot be used for other things, - // or register window traps will be SADNESS.] - - // If there's a reserved call frame, we can use VE to access locals. - if (getFrameLowering(MF)->hasReservedCallFrame(MF)) - return true; - - // Otherwise, we'd need a base pointer, but those aren't implemented - // for VE at the moment. - - return false; -} diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.h index 9a32da16bea6..334fb965a986 100644 --- a/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.h @@ -40,8 +40,6 @@ public: RegScavenger *RS = nullptr) const override; Register getFrameRegister(const MachineFunction &MF) const override; - - bool canRealignStack(const MachineFunction &MF) const override; }; } // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.td index 29708d35c730..70ff104b65b7 100644 --- a/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.td @@ -26,13 +26,33 @@ class VEMiscReg<bits<6> enc, string n>: Register<n> { let Namespace = "VE"; } +class VEVecReg<bits<8> enc, string n, list<Register> subregs = [], + list<string> altNames = [], list<Register> aliases = []> + : Register<n, altNames> { + let HWEncoding{15-8} = 0; + let HWEncoding{7-0} = enc; + let Namespace = "VE"; + let SubRegs = subregs; + let Aliases = aliases; +} + +class VEMaskReg<bits<4> enc, string n, list<Register> subregs = [], + list<string> altNames = [], list<Register> aliases = []> + : Register<n, altNames> { + let HWEncoding{15-4} = 0; + let HWEncoding{3-0} = enc; + let Namespace = "VE"; + let SubRegs = subregs; + let Aliases = aliases; +} + let Namespace = "VE" in { - def sub_i8 : SubRegIndex<8, 56>; // Low 8 bit (56..63) - def sub_i16 : SubRegIndex<16, 48>; // Low 16 bit (48..63) def sub_i32 : SubRegIndex<32, 32>; // Low 32 bit (32..63) def sub_f32 : SubRegIndex<32>; // High 32 bit (0..31) def sub_even : SubRegIndex<64>; // High 64 bit (0..63) def sub_odd : SubRegIndex<64, 64>; // Low 64 bit (64..127) + def sub_vm_even : SubRegIndex<256>; // High 256 bit (0..255) + def sub_vm_odd : SubRegIndex<256, 256>; // Low 256 bit (256..511) def AsmName : RegAltNameIndex; } @@ -66,26 +86,23 @@ def MISC : RegisterClass<"VE", [i64], 64, def IC : VEMiscReg<62, "ic">; //----------------------------------------------------------------------------- -// Gneric Registers +// Vector Length Register //----------------------------------------------------------------------------- -let RegAltNameIndices = [AsmName] in { +def VL : VEMiscReg<63, "vl">; -// Generic integer registers - 8 bits wide -foreach I = 0-63 in - def SB#I : VEReg<I, "sb"#I, [], ["s"#I]>, DwarfRegNum<[I]>; +// Register classes. +def VLS : RegisterClass<"VE", [i32], 64, (add VL)>; -// Generic integer registers - 16 bits wide -let SubRegIndices = [sub_i8] in -foreach I = 0-63 in - def SH#I : VEReg<I, "sh"#I, [!cast<VEReg>("SB"#I)], ["s"#I]>, - DwarfRegNum<[I]>; +//----------------------------------------------------------------------------- +// Generic Registers +//----------------------------------------------------------------------------- + +let RegAltNameIndices = [AsmName] in { // Generic integer registers - 32 bits wide -let SubRegIndices = [sub_i16] in foreach I = 0-63 in - def SW#I : VEReg<I, "sw"#I, [!cast<VEReg>("SH"#I)], ["s"#I]>, - DwarfRegNum<[I]>; + def SW#I : VEReg<I, "sw"#I, [], ["s"#I]>, DwarfRegNum<[I]>; // Generic floating point registers - 32 bits wide // NOTE: Mark SF#I as alias of SW#I temporary to avoid register allocation @@ -95,10 +112,21 @@ foreach I = 0-63 in DwarfRegNum<[I]>; // Generic integer registers - 64 bits wide -let SubRegIndices = [sub_i32, sub_f32], CoveredBySubRegs = 1 in -foreach I = 0-63 in - def SX#I : VEReg<I, "s"#I, [!cast<VEReg>("SW"#I), !cast<VEReg>("SF"#I)], - ["s"#I]>, DwarfRegNum<[I]>; +let SubRegIndices = [sub_i32, sub_f32], CoveredBySubRegs = 1 in { + // Several registers have specific names, so add them to one of aliases. + def SX8 : VEReg<8, "s8", [SW8, SF8], ["s8", "sl"]>, DwarfRegNum<[8]>; + def SX9 : VEReg<9, "s9", [SW9, SF9], ["s9", "fp"]>, DwarfRegNum<[9]>; + def SX10 : VEReg<10, "s10", [SW10, SF10], ["s10", "lr"]>, DwarfRegNum<[10]>; + def SX11 : VEReg<11, "s11", [SW11, SF11], ["s11", "sp"]>, DwarfRegNum<[11]>; + def SX14 : VEReg<14, "s14", [SW14, SF14], ["s14", "tp"]>, DwarfRegNum<[14]>; + def SX15 : VEReg<15, "s15", [SW15, SF15], ["s15", "got"]>, DwarfRegNum<[15]>; + def SX16 : VEReg<16, "s16", [SW16, SF16], ["s16", "plt"]>, DwarfRegNum<[16]>; + + // Other generic registers. + foreach I = { 0-7, 12-13, 17-63 } in + def SX#I : VEReg<I, "s"#I, [!cast<VEReg>("SW"#I), !cast<VEReg>("SF"#I)], + ["s"#I]>, DwarfRegNum<[I]>; +} // Aliases of the S* registers used to hold 128-bit for values (long doubles). // Following foreach represents something like: @@ -112,20 +140,31 @@ foreach I = 0-31 in !cast<VEReg>("SX"#!add(!shl(I,1),1))], ["s"#!shl(I,1)]>; +// Vector registers - 64 bits wide 256 elements +foreach I = 0-63 in + def V#I : VEVecReg<I, "v"#I, [], ["v"#I]>, DwarfRegNum<[!add(64,I)]>; + +// Vector Index Register +def VIX : VEVecReg<255, "vix", [], ["vix"]>; + +// Vector mask registers - 256 bits wide +foreach I = 0-15 in + def VM#I : VEMaskReg<I, "vm"#I, [], ["vm"#I]>, DwarfRegNum<[!add(128,I)]>; + +// Aliases of VMs to use as a pair of two VM for packed instructions +let SubRegIndices = [sub_vm_even, sub_vm_odd], CoveredBySubRegs = 1 in +foreach I = 0-7 in + def VMP#I : VEMaskReg<!shl(I,1), "vmp"#I, + [!cast<VEMaskReg>("VM"#!shl(I,1)), + !cast<VEMaskReg>("VM"#!add(!shl(I,1),1))], + ["vm"#!shl(I,1)]>; + } // RegAltNameIndices = [AsmName] // Register classes. // // The register order is defined in terms of the preferred // allocation order. -def I8 : RegisterClass<"VE", [i8], 8, - (add (sequence "SB%u", 0, 7), - (sequence "SB%u", 34, 63), - (sequence "SB%u", 8, 33))>; -def I16 : RegisterClass<"VE", [i16], 16, - (add (sequence "SH%u", 0, 7), - (sequence "SH%u", 34, 63), - (sequence "SH%u", 8, 33))>; def I32 : RegisterClass<"VE", [i32], 32, (add (sequence "SW%u", 0, 7), (sequence "SW%u", 34, 63), @@ -142,3 +181,14 @@ def F128 : RegisterClass<"VE", [f128], 128, (add (sequence "Q%u", 0, 3), (sequence "Q%u", 17, 31), (sequence "Q%u", 4, 16))>; + +def V64 : RegisterClass<"VE", + [v256f64, // default type for vector registers + v512i32, v512f32, + v256i64, v256i32, v256f32, /* v256f64, */], 64, + (add (sequence "V%u", 0, 63), + VIX)>; + +// vm0 is reserved for always true +def VM : RegisterClass<"VE", [v256i1], 64, (sequence "VM%u", 0, 15)>; +def VM512 : RegisterClass<"VE", [v512i1], 64, (sequence "VMP%u", 0, 7)>; diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.cpp index a0b78d95e3cf..daa6cfb8aa84 100644 --- a/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.cpp +++ b/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.cpp @@ -27,73 +27,35 @@ void VESubtarget::anchor() {} VESubtarget &VESubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { + // Default feature settings + EnableVPU = false; + // Determine default and user specified characteristics std::string CPUName = std::string(CPU); if (CPUName.empty()) - CPUName = "ve"; + CPUName = "generic"; // Parse features string. - ParseSubtargetFeatures(CPUName, FS); + ParseSubtargetFeatures(CPUName, /*TuneCPU=*/CPU, FS); return *this; } VESubtarget::VESubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const TargetMachine &TM) - : VEGenSubtargetInfo(TT, CPU, FS), TargetTriple(TT), + : VEGenSubtargetInfo(TT, CPU, /*TuneCPU=*/CPU, FS), TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), FrameLowering(*this) {} -int VESubtarget::getAdjustedFrameSize(int frameSize) const { - - // VE stack frame: - // - // +----------------------------------------+ - // | Locals and temporaries | - // +----------------------------------------+ - // | Parameter area for callee | - // 176(fp) | | - // +----------------------------------------+ - // | Register save area (RSA) for callee | - // | | - // 16(fp) | 20 * 8 bytes | - // +----------------------------------------+ - // 8(fp) | Return address | - // +----------------------------------------+ - // 0(fp) | Frame pointer of caller | - // --------+----------------------------------------+-------- - // | Locals and temporaries for callee | - // +----------------------------------------+ - // | Parameter area for callee of callee | - // +----------------------------------------+ - // 16(sp) | RSA for callee of callee | - // +----------------------------------------+ - // 8(sp) | Return address | - // +----------------------------------------+ - // 0(sp) | Frame pointer of callee | - // +----------------------------------------+ - - // RSA frame: - // +----------------------------------------------+ - // 168(fp) | %s33 | - // +----------------------------------------------+ - // | %s19...%s32 | - // +----------------------------------------------+ - // 48(fp) | %s18 | - // +----------------------------------------------+ - // 40(fp) | Linkage area register (%s17) | - // +----------------------------------------------+ - // 32(fp) | Procedure linkage table register (%plt=%s16) | - // +----------------------------------------------+ - // 24(fp) | Global offset table register (%got=%s15) | - // +----------------------------------------------+ - // 16(fp) | Thread pointer register (%tp=%s14) | - // +----------------------------------------------+ +uint64_t VESubtarget::getAdjustedFrameSize(uint64_t FrameSize) const { + // Calculate adjusted frame size by adding the size of RSA frame, + // return address, and frame poitner as described in VEFrameLowering.cpp. + const VEFrameLowering *TFL = getFrameLowering(); - frameSize += 176; // for RSA, RA, and FP - frameSize = alignTo(frameSize, 16); // requires 16 bytes alignment + FrameSize += getRsaSize(); + FrameSize = alignTo(FrameSize, TFL->getStackAlign()); - return frameSize; + return FrameSize; } bool VESubtarget::enableMachineScheduler() const { return true; } diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.h b/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.h index f3a2c206162e..213aca2ea3f9 100644 --- a/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.h +++ b/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.h @@ -32,6 +32,13 @@ class VESubtarget : public VEGenSubtargetInfo { Triple TargetTriple; virtual void anchor(); + /// Features { + + // Emit VPU instructions + bool EnableVPU; + + /// } Features + VEInstrInfo InstrInfo; VETargetLowering TLInfo; SelectionDAGTargetInfo TSInfo; @@ -55,15 +62,21 @@ public: bool enableMachineScheduler() const override; + bool enableVPU() const { return EnableVPU; } + /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. - void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); VESubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS); /// Given a actual stack size as determined by FrameInfo, this function - /// returns adjusted framesize which includes space for register window - /// spills and arguments. - int getAdjustedFrameSize(int stackSize) const; + /// returns adjusted framesize which includes space for RSA, return + /// address, and frame poitner. + uint64_t getAdjustedFrameSize(uint64_t FrameSize) const; + + /// Get the size of RSA, return address, and frame pointer as described + /// in VEFrameLowering.cpp. + unsigned getRsaSize(void) const { return 176; }; bool isTargetLinux() const { return TargetTriple.isOSLinux(); } }; diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.cpp index 08b55eebbc98..414ae09431c0 100644 --- a/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.cpp +++ b/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.cpp @@ -23,7 +23,7 @@ using namespace llvm; #define DEBUG_TYPE "ve" -extern "C" void LLVMInitializeVETarget() { +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeVETarget() { // Register the target. RegisterTargetMachine<VETargetMachine> X(getTheVETarget()); } @@ -44,13 +44,24 @@ static std::string computeDataLayout(const Triple &T) { // Stack alignment is 128 bits Ret += "-S128"; + // Vector alignments are 64 bits + // Need to define all of them. Otherwise, each alignment becomes + // the size of each data by default. + Ret += "-v64:64:64"; // for v2f32 + Ret += "-v128:64:64"; + Ret += "-v256:64:64"; + Ret += "-v512:64:64"; + Ret += "-v1024:64:64"; + Ret += "-v2048:64:64"; + Ret += "-v4096:64:64"; + Ret += "-v8192:64:64"; + Ret += "-v16384:64:64"; // for v256f64 + return Ret; } static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) { - if (!RM.hasValue()) - return Reloc::Static; - return *RM; + return RM.getValueOr(Reloc::Static); } class VEELFTargetObjectFile : public TargetLoweringObjectFileELF { @@ -96,7 +107,9 @@ public: return getTM<VETargetMachine>(); } + void addIRPasses() override; bool addInstSelector() override; + void addPreEmitPass() override; }; } // namespace @@ -104,7 +117,18 @@ TargetPassConfig *VETargetMachine::createPassConfig(PassManagerBase &PM) { return new VEPassConfig(*this, PM); } +void VEPassConfig::addIRPasses() { + // VE requires atomic expand pass. + addPass(createAtomicExpandPass()); + TargetPassConfig::addIRPasses(); +} + bool VEPassConfig::addInstSelector() { addPass(createVEISelDag(getVETargetMachine())); return false; } + +void VEPassConfig::addPreEmitPass() { + // LVLGen should be called after scheduling and register allocation + addPass(createLVLGenPass()); +} diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VETargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/VE/VETargetTransformInfo.h index c267c4d9a578..68af66597485 100644 --- a/contrib/llvm-project/llvm/lib/Target/VE/VETargetTransformInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/VE/VETargetTransformInfo.h @@ -33,16 +33,35 @@ class VETTIImpl : public BasicTTIImplBase<VETTIImpl> { const VESubtarget *getST() const { return ST; } const VETargetLowering *getTLI() const { return TLI; } + bool enableVPU() const { return getST()->enableVPU(); } + public: explicit VETTIImpl(const VETargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} - unsigned getNumberOfRegisters(unsigned ClassID) const { return 64; } + unsigned getNumberOfRegisters(unsigned ClassID) const { + bool VectorRegs = (ClassID == 1); + if (VectorRegs) { + // TODO report vregs once vector isel is stable. + return 0; + } + + return 64; + } - unsigned getRegisterBitWidth(bool Vector) const { return 64; } + unsigned getRegisterBitWidth(bool Vector) const { + if (Vector) { + // TODO report vregs once vector isel is stable. + return 0; + } + return 64; + } - unsigned getMinVectorRegisterBitWidth() const { return 64; } + unsigned getMinVectorRegisterBitWidth() const { + // TODO report vregs once vector isel is stable. + return 0; + } }; } // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VVPInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/VE/VVPInstrInfo.td new file mode 100644 index 000000000000..2c88d5099a7b --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/VE/VVPInstrInfo.td @@ -0,0 +1,46 @@ +//===-------------- VVPInstrInfo.td - VVP_* SDNode patterns ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the VE Vector Predicated SDNodes (VVP SDNodes). VVP +// SDNodes are an intermediate isel layer between the vector SDNodes emitted by +// LLVM and the actual VE vector instructions. For example: +// +// ADD(x,y) --> VVP_ADD(x,y,mask,evl) --> VADDSWSXrvml(x,y,mask,evl) +// ^ ^ ^ +// The standard The VVP layer SDNode. The VE vector instruction. +// SDNode. +// +// TODO explain how VVP nodes relate to VP SDNodes once VP ISel is uptream. +//===----------------------------------------------------------------------===// + +// Binary Operators { + +// BinaryOp(x,y,mask,vl) +def SDTIntBinOpVVP : SDTypeProfile<1, 4, [ // vp_add, vp_and, etc. + SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisInt<0>, + SDTCisSameNumEltsAs<0, 3>, + IsVLVT<4> +]>; + +// Binary operator commutative pattern. +class vvp_commutative<SDNode RootOp> : + PatFrags< + (ops node:$lhs, node:$rhs, node:$mask, node:$vlen), + [(RootOp node:$lhs, node:$rhs, node:$mask, node:$vlen), + (RootOp node:$rhs, node:$lhs, node:$mask, node:$vlen)]>; + +// VVP node definitions. +def vvp_add : SDNode<"VEISD::VVP_ADD", SDTIntBinOpVVP>; +def c_vvp_add : vvp_commutative<vvp_add>; + +def vvp_and : SDNode<"VEISD::VVP_AND", SDTIntBinOpVVP>; +def c_vvp_and : vvp_commutative<vvp_and>; + +// } Binary Operators diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VVPInstrPatternsVec.td b/contrib/llvm-project/llvm/lib/Target/VE/VVPInstrPatternsVec.td new file mode 100644 index 000000000000..7003fb387670 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/VE/VVPInstrPatternsVec.td @@ -0,0 +1,71 @@ +//===----------- VVPInstrPatternsVec.td - VVP_* SDNode patterns -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes how VVP_* SDNodes are lowered to machine instructions. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// +// VVP SDNode definitions. +// +//===----------------------------------------------------------------------===// +include "VVPInstrInfo.td" + +multiclass VectorBinaryArith< + SDPatternOperator OpNode, + ValueType ScalarVT, ValueType DataVT, ValueType MaskVT, + string OpBaseName, + SDPatternOperator ImmOp, SDNodeXForm ImmCast> { + // No mask. + def : Pat<(OpNode + (any_broadcast ScalarVT:$sx), + DataVT:$vy, (MaskVT true_mask), i32:$avl), + (!cast<Instruction>(OpBaseName#"rvl") + ScalarVT:$sx, $vy, $avl)>; + def : Pat<(OpNode DataVT:$vx, DataVT:$vy, (MaskVT true_mask), i32:$avl), + (!cast<Instruction>(OpBaseName#"vvl") + $vx, $vy, $avl)>; + + // Mask. + def : Pat<(OpNode + (any_broadcast ScalarVT:$sx), + DataVT:$vy, MaskVT:$mask, i32:$avl), + (!cast<Instruction>(OpBaseName#"rvml") + ScalarVT:$sx, $vy, $mask, $avl)>; + def : Pat<(OpNode DataVT:$vx, DataVT:$vy, MaskVT:$mask, i32:$avl), + (!cast<Instruction>(OpBaseName#"vvml") + $vx, $vy, $mask, $avl)>; + + // TODO We do not specify patterns for the immediate variants here. There + // will be an immediate folding pass that takes care of switching to the + // immediate variant where applicable. + + // TODO Fold vvp_select into passthru. +} + +// Expand both 64bit and 32 bit variant (256 elements) +multiclass VectorBinaryArith_ShortLong< + SDPatternOperator OpNode, + ValueType LongScalarVT, ValueType LongDataVT, string LongOpBaseName, + ValueType ShortScalarVT, ValueType ShortDataVT, string ShortOpBaseName> { + defm : VectorBinaryArith<OpNode, + LongScalarVT, LongDataVT, v256i1, + LongOpBaseName, simm7, LO7>; + defm : VectorBinaryArith<OpNode, + ShortScalarVT, ShortDataVT, v256i1, + ShortOpBaseName, simm7, LO7>; +} + + +defm : VectorBinaryArith_ShortLong<c_vvp_add, + i64, v256i64, "VADDSL", + i32, v256i32, "VADDSWSX">; +defm : VectorBinaryArith_ShortLong<c_vvp_and, + i64, v256i64, "VAND", + i32, v256i32, "PVANDLO">; diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VVPNodes.def b/contrib/llvm-project/llvm/lib/Target/VE/VVPNodes.def new file mode 100644 index 000000000000..a68402e9ea10 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/VE/VVPNodes.def @@ -0,0 +1,41 @@ +//===-- VVPNodes.def - Lists & properties of VE Vector Predication Nodes --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines all VVP_* SDNodes and their properties +// +//===----------------------------------------------------------------------===// + +/// HANDLE_VP_TO_VVP(VPOPC, VVPOPC) +/// \p VPOPC is the VP_* SDNode opcode. +/// \p VVPOPC is the VVP_* SDNode opcode. +#ifndef HANDLE_VP_TO_VVP +#define HANDLE_VP_TO_VVP(VPOPC, VVPOPC) +#endif + +/// ADD_VVP_OP(VVPNAME,SDNAME) +/// \p VVPName is a VVP SDNode operator. +/// \p SDNAME is the generic SD opcode corresponding to \p VVPName. +#ifndef ADD_VVP_OP +#define ADD_VVP_OP(X, Y) +#endif + +/// ADD_BINARY_VVP_OP(VVPNAME,SDNAME) +/// \p VVPName is a VVP Binary operator. +/// \p SDNAME is the generic SD opcode corresponding to \p VVPName. +#ifndef ADD_BINARY_VVP_OP +#define ADD_BINARY_VVP_OP(X,Y) ADD_VVP_OP(X,Y) HANDLE_VP_TO_VVP(VP_##Y, X) +#endif + +// Integer arithmetic. +ADD_BINARY_VVP_OP(VVP_ADD,ADD) + +ADD_BINARY_VVP_OP(VVP_AND,AND) + +#undef HANDLE_VP_TO_VVP +#undef ADD_BINARY_VVP_OP +#undef ADD_VVP_OP |