35 files changed, 7853 insertions, 833 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
index 7a899b4b38e2..a3309a68c76d 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
@@ -125,6 +125,9 @@ static const MCPhysReg F128Regs[32] = {
     VE::Q16, VE::Q17, VE::Q18, VE::Q19, VE::Q20, VE::Q21, VE::Q22, VE::Q23,
     VE::Q24, VE::Q25, VE::Q26, VE::Q27, VE::Q28, VE::Q29, VE::Q30, VE::Q31};
 
+static const MCPhysReg VM512Regs[8] = {VE::VMP0, VE::VMP1, VE::VMP2, VE::VMP3,
+                                       VE::VMP4, VE::VMP5, VE::VMP6, VE::VMP7};
+
 static const MCPhysReg MISCRegs[31] = {
     VE::USRCC,      VE::PSW,        VE::SAR,        VE::NoRegister,
     VE::NoRegister, VE::NoRegister, VE::NoRegister, VE::PMMR,
@@ -277,6 +280,17 @@ public:
     }
     return false;
   }
+  bool isUImm4() {
+    if (!isImm())
+      return false;
+
+    // Constant case
+    if (const auto *ConstExpr = dyn_cast<MCConstantExpr>(Imm.Val)) {
+      int64_t Value = ConstExpr->getValue();
+      return isUInt<4>(Value);
+    }
+    return false;
+  }
   bool isUImm6() {
     if (!isImm())
       return false;
@@ -476,6 +490,10 @@ public:
     addImmOperands(Inst, N);
   }
 
+  void addUImm4Operands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+
   void addUImm6Operands(MCInst &Inst, unsigned N) const {
     addImmOperands(Inst, N);
   }
@@ -648,6 +666,15 @@ public:
     return true;
   }
 
+  static bool MorphToVM512Reg(VEOperand &Op) {
+    unsigned Reg = Op.getReg();
+    unsigned regIdx = Reg - VE::VM0;
+    if (regIdx % 2 || regIdx > 15)
+      return false;
+    Op.Reg.RegNum = VM512Regs[regIdx / 2];
+    return true;
+  }
+
   static bool MorphToMISCReg(VEOperand &Op) {
     const auto *ConstExpr = dyn_cast<MCConstantExpr>(Op.getImm());
     if (!ConstExpr)
@@ -902,6 +929,24 @@ StringRef VEAsmParser::splitMnemonic(StringRef Name, SMLoc NameLoc,
     Mnemonic = parseRD(Name, 10, NameLoc, Operands);
   } else if (Name.startswith("cvt.l.d")) {
     Mnemonic = parseRD(Name, 7, NameLoc, Operands);
+  } else if (Name.startswith("vcvt.w.d.sx") || Name.startswith("vcvt.w.d.zx") ||
+             Name.startswith("vcvt.w.s.sx") || Name.startswith("vcvt.w.s.zx")) {
+    Mnemonic = parseRD(Name, 11, NameLoc, Operands);
+  } else if (Name.startswith("vcvt.l.d")) {
+    Mnemonic = parseRD(Name, 8, NameLoc, Operands);
+  } else if (Name.startswith("pvcvt.w.s.lo") ||
+             Name.startswith("pvcvt.w.s.up")) {
+    Mnemonic = parseRD(Name, 12, NameLoc, Operands);
+  } else if (Name.startswith("pvcvt.w.s")) {
+    Mnemonic = parseRD(Name, 9, NameLoc, Operands);
+  } else if (Name.startswith("vfmk.l.") || Name.startswith("vfmk.w.") ||
+             Name.startswith("vfmk.d.") || Name.startswith("vfmk.s.")) {
+    bool ICC = Name[5] == 'l' || Name[5] == 'w' ? true : false;
+    Mnemonic = parseCC(Name, 7, Name.size(), ICC, true, NameLoc, Operands);
+  } else if (Name.startswith("pvfmk.w.lo.") || Name.startswith("pvfmk.w.up.") ||
+             Name.startswith("pvfmk.s.lo.") || Name.startswith("pvfmk.s.up.")) {
+    bool ICC = Name[6] == 'l' || Name[6] == 'w' ? true : false;
+    Mnemonic = parseCC(Name, 11, Name.size(), ICC, true, NameLoc, Operands);
   } else {
     Operands->push_back(VEOperand::CreateToken(Mnemonic, NameLoc));
   }
@@ -1362,9 +1407,38 @@ OperandMatchResultTy VEAsmParser::parseOperand(OperandVector &Operands,
     return ResTy;
 
   switch (getLexer().getKind()) {
-  case AsmToken::LParen:
-    // FIXME: Parsing "(" + %vreg + ", " + %vreg + ")"
-    // FALLTHROUGH
+  case AsmToken::LParen: {
+    // Parsing "(" + %vreg + ", " + %vreg + ")"
+    const AsmToken Tok1 = Parser.getTok();
+    Parser.Lex(); // Eat the '('.
+
+    unsigned RegNo1;
+    SMLoc S1, E1;
+    if (tryParseRegister(RegNo1, S1, E1) != MatchOperand_Success) {
+      getLexer().UnLex(Tok1);
+      return MatchOperand_NoMatch;
+    }
+
+    if (!Parser.getTok().is(AsmToken::Comma))
+      return MatchOperand_ParseFail;
+    Parser.Lex(); // Eat the ','.
+
+    unsigned RegNo2;
+    SMLoc S2, E2;
+    if (tryParseRegister(RegNo2, S2, E2) != MatchOperand_Success)
+      return MatchOperand_ParseFail;
+
+    if (!Parser.getTok().is(AsmToken::RParen))
+      return MatchOperand_ParseFail;
+
+    Operands.push_back(VEOperand::CreateToken(Tok1.getString(), Tok1.getLoc()));
+    Operands.push_back(VEOperand::CreateReg(RegNo1, S1, E1));
+    Operands.push_back(VEOperand::CreateReg(RegNo2, S2, E2));
+    Operands.push_back(VEOperand::CreateToken(Parser.getTok().getString(),
+                                              Parser.getTok().getLoc()));
+    Parser.Lex(); // Eat the ')'.
+    break;
+  }
   default: {
     std::unique_ptr<VEOperand> Op;
     ResTy = parseVEAsmOperand(Op);
@@ -1377,7 +1451,24 @@ OperandMatchResultTy VEAsmParser::parseOperand(OperandVector &Operands,
     if (!Parser.getTok().is(AsmToken::LParen))
       break;
 
-    // FIXME: Parsing %vec-reg + "(" + %sclar-reg/number + ")"
+    // Parsing %vec-reg + "(" + %sclar-reg/number + ")"
+    std::unique_ptr<VEOperand> Op1 = VEOperand::CreateToken(
+        Parser.getTok().getString(), Parser.getTok().getLoc());
+    Parser.Lex(); // Eat the '('.
+
+    std::unique_ptr<VEOperand> Op2;
+    ResTy = parseVEAsmOperand(Op2);
+    if (ResTy != MatchOperand_Success || !Op2)
+      return MatchOperand_ParseFail;
+
+    if (!Parser.getTok().is(AsmToken::RParen))
+      return MatchOperand_ParseFail;
+
+    Operands.push_back(std::move(Op1));
+    Operands.push_back(std::move(Op2));
+    Operands.push_back(VEOperand::CreateToken(Parser.getTok().getString(),
+                                              Parser.getTok().getLoc()));
+    Parser.Lex(); // Eat the ')'.
     break;
   }
   }
@@ -1445,6 +1536,10 @@ unsigned VEAsmParser::validateTargetOperandClass(MCParsedAsmOperand &GOp,
     if (Op.isReg() && VEOperand::MorphToF128Reg(Op))
       return MCTargetAsmParser::Match_Success;
     break;
+  case MCK_VM512:
+    if (Op.isReg() && VEOperand::MorphToVM512Reg(Op))
+      return MCTargetAsmParser::Match_Success;
+    break;
   case MCK_MISC:
     if (Op.isImm() && VEOperand::MorphToMISCReg(Op))
       return MCTargetAsmParser::Match_Success;
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp
index 35885a4e3cae..20d609bc6b32 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp
@@ -47,7 +47,7 @@ static MCDisassembler *createVEDisassembler(const Target &T,
   return new VEDisassembler(STI, Ctx);
 }
 
-extern "C" void LLVMInitializeVEDisassembler() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeVEDisassembler() {
   // Register the disassembler.
   TargetRegistry::RegisterMCDisassembler(getTheVETarget(),
                                          createVEDisassembler);
@@ -95,6 +95,25 @@ static const unsigned F128RegDecoderTable[] = {
     VE::Q16, VE::Q17, VE::Q18, VE::Q19, VE::Q20, VE::Q21, VE::Q22, VE::Q23,
     VE::Q24, VE::Q25, VE::Q26, VE::Q27, VE::Q28, VE::Q29, VE::Q30, VE::Q31};
 
+static const unsigned V64RegDecoderTable[] = {
+    VE::V0,  VE::V1,  VE::V2,  VE::V3,  VE::V4,  VE::V5,  VE::V6,  VE::V7,
+    VE::V8,  VE::V9,  VE::V10, VE::V11, VE::V12, VE::V13, VE::V14, VE::V15,
+    VE::V16, VE::V17, VE::V18, VE::V19, VE::V20, VE::V21, VE::V22, VE::V23,
+    VE::V24, VE::V25, VE::V26, VE::V27, VE::V28, VE::V29, VE::V30, VE::V31,
+    VE::V32, VE::V33, VE::V34, VE::V35, VE::V36, VE::V37, VE::V38, VE::V39,
+    VE::V40, VE::V41, VE::V42, VE::V43, VE::V44, VE::V45, VE::V46, VE::V47,
+    VE::V48, VE::V49, VE::V50, VE::V51, VE::V52, VE::V53, VE::V54, VE::V55,
+    VE::V56, VE::V57, VE::V58, VE::V59, VE::V60, VE::V61, VE::V62, VE::V63};
+
+static const unsigned VMRegDecoderTable[] = {
+    VE::VM0,  VE::VM1,  VE::VM2,  VE::VM3, VE::VM4,  VE::VM5,
+    VE::VM6,  VE::VM7,  VE::VM8,  VE::VM9, VE::VM10, VE::VM11,
+    VE::VM12, VE::VM13, VE::VM14, VE::VM15};
+
+static const unsigned VM512RegDecoderTable[] = {VE::VMP0, VE::VMP1, VE::VMP2,
+                                                VE::VMP3, VE::VMP4, VE::VMP5,
+                                                VE::VMP6, VE::VMP7};
+
 static const unsigned MiscRegDecoderTable[] = {
     VE::USRCC,      VE::PSW,        VE::SAR,        VE::NoRegister,
     VE::NoRegister, VE::NoRegister, VE::NoRegister, VE::PMMR,
@@ -145,6 +164,40 @@ static DecodeStatus DecodeF128RegisterClass(MCInst &Inst, unsigned RegNo,
   return MCDisassembler::Success;
 }
 
+static DecodeStatus DecodeV64RegisterClass(MCInst &Inst, unsigned RegNo,
+                                           uint64_t Address,
+                                           const void *Decoder) {
+  unsigned Reg = VE::NoRegister;
+  if (RegNo == 255)
+    Reg = VE::VIX;
+  else if (RegNo > 63)
+    return MCDisassembler::Fail;
+  else
+    Reg = V64RegDecoderTable[RegNo];
+  Inst.addOperand(MCOperand::createReg(Reg));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeVMRegisterClass(MCInst &Inst, unsigned RegNo,
+                                          uint64_t Address,
+                                          const void *Decoder) {
+  if (RegNo > 15)
+    return MCDisassembler::Fail;
+  unsigned Reg = VMRegDecoderTable[RegNo];
+  Inst.addOperand(MCOperand::createReg(Reg));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeVM512RegisterClass(MCInst &Inst, unsigned RegNo,
+                                             uint64_t Address,
+                                             const void *Decoder) {
+  if (RegNo % 2 || RegNo > 15)
+    return MCDisassembler::Fail;
+  unsigned Reg = VM512RegDecoderTable[RegNo / 2];
+  Inst.addOperand(MCOperand::createReg(Reg));
+  return MCDisassembler::Success;
+}
+
 static DecodeStatus DecodeMISCRegisterClass(MCInst &Inst, unsigned RegNo,
                                             uint64_t Address,
                                             const void *Decoder) {
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/LVLGen.cpp b/contrib/llvm-project/llvm/lib/Target/VE/LVLGen.cpp
new file mode 100644
index 000000000000..c4588926af9e
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/LVLGen.cpp
@@ -0,0 +1,137 @@
+//===-- LVLGen.cpp - LVL instruction generator ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "VE.h"
+#include "VESubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "lvl-gen"
+
+namespace {
+struct LVLGen : public MachineFunctionPass {
+  const TargetInstrInfo *TII;
+  const TargetRegisterInfo *TRI;
+
+  static char ID;
+  LVLGen() : MachineFunctionPass(ID) {}
+  bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+  bool runOnMachineFunction(MachineFunction &F) override;
+
+  unsigned getVL(const MachineInstr &MI);
+  int getVLIndex(unsigned Opcode);
+};
+char LVLGen::ID = 0;
+
+} // end of anonymous namespace
+
+FunctionPass *llvm::createLVLGenPass() { return new LVLGen; }
+
+int LVLGen::getVLIndex(unsigned Opcode) {
+  const MCInstrDesc &MCID = TII->get(Opcode);
+
+  // If an instruction has VLIndex information, return it.
+  if (HAS_VLINDEX(MCID.TSFlags))
+    return GET_VLINDEX(MCID.TSFlags);
+
+  return -1;
+}
+
+// returns a register holding a vector length. NoRegister is returned when
+// this MI does not have a vector length.
+unsigned LVLGen::getVL(const MachineInstr &MI) {
+  int Index = getVLIndex(MI.getOpcode());
+  if (Index >= 0)
+    return MI.getOperand(Index).getReg();
+
+  return VE::NoRegister;
+}
+
+bool LVLGen::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+#define RegName(no)                                                            \
+  (MBB.getParent()->getSubtarget<VESubtarget>().getRegisterInfo()->getName(no))
+
+  bool Changed = false;
+  bool HasRegForVL = false;
+  unsigned RegForVL;
+
+  for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) {
+    MachineBasicBlock::iterator MI = I;
+
+    // Check whether MI uses a vector length operand.  If so, we prepare for VL
+    // register.  We would like to reuse VL register as much as possible.  We
+    // also would like to keep the number of LEA instructions as fewer as
+    // possible.  Therefore, we use a regular scalar register to hold immediate
+    // values to load VL register.  And try to reuse identical scalar registers
+    // to avoid new LVLr instructions as much as possible.
+    unsigned Reg = getVL(*MI);
+    if (Reg != VE::NoRegister) {
+      LLVM_DEBUG(dbgs() << "Vector instruction found: ");
+      LLVM_DEBUG(MI->dump());
+      LLVM_DEBUG(dbgs() << "Vector length is " << RegName(Reg) << ". ");
+      LLVM_DEBUG(dbgs() << "Current VL is "
+                        << (HasRegForVL ? RegName(RegForVL) : "unknown")
+                        << ". ");
+
+      if (!HasRegForVL || RegForVL != Reg) {
+        // Use VL, but a different value in a different scalar register.
+        // So, generate new LVL instruction just before the current instruction.
+        LLVM_DEBUG(dbgs() << "Generate a LVL instruction to load "
+                          << RegName(Reg) << ".\n");
+        BuildMI(MBB, I, MI->getDebugLoc(), TII->get(VE::LVLr)).addReg(Reg);
+        HasRegForVL = true;
+        RegForVL = Reg;
+        Changed = true;
+      } else {
+        LLVM_DEBUG(dbgs() << "Reuse current VL.\n");
+      }
+    }
+    // Check the update of a given scalar register holding an immediate value
+    // for VL register.  Also, a call doesn't preserve VL register.
+    if (HasRegForVL) {
+      if (MI->definesRegister(RegForVL, TRI) ||
+          MI->modifiesRegister(RegForVL, TRI) ||
+          MI->killsRegister(RegForVL, TRI) || MI->isCall()) {
+        // The latest VL is needed to be updated, so disable HasRegForVL.
+        LLVM_DEBUG(dbgs() << RegName(RegForVL) << " is needed to be updated: ");
+        LLVM_DEBUG(MI->dump());
+        HasRegForVL = false;
+      }
+    }
+
+    ++I;
+  }
+  return Changed;
+}
+
+bool LVLGen::runOnMachineFunction(MachineFunction &F) {
+  LLVM_DEBUG(dbgs() << "********** Begin LVLGen **********\n");
+  LLVM_DEBUG(dbgs() << "********** Function: " << F.getName() << '\n');
+  LLVM_DEBUG(F.dump());
+
+  bool Changed = false;
+
+  const VESubtarget &Subtarget = F.getSubtarget<VESubtarget>();
+  TII = Subtarget.getInstrInfo();
+  TRI = Subtarget.getRegisterInfo();
+
+  for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
+    Changed |= runOnMachineBasicBlock(*FI);
+
+  if (Changed) {
+    LLVM_DEBUG(dbgs() << "\n");
+    LLVM_DEBUG(F.dump());
+  }
+  LLVM_DEBUG(dbgs() << "********** End LVLGen **********\n");
+  return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h
index 657cc513b3c5..6995007c6dc6 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h
@@ -29,6 +29,7 @@ public:
                  const MCSubtargetInfo &STI, raw_ostream &OS) override;
 
   // Autogenerated by tblgen.
+  std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
   bool printAliasInstr(const MCInst *, uint64_t Address,
                        const MCSubtargetInfo &, raw_ostream &);
   void printInstruction(const MCInst *, uint64_t, const MCSubtargetInfo &,
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp
index a39cffc8f4a6..4c480c050274 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp
@@ -56,8 +56,8 @@ static MCRegisterInfo *createVEMCRegisterInfo(const Triple &TT) {
 static MCSubtargetInfo *createVEMCSubtargetInfo(const Triple &TT, StringRef CPU,
                                                 StringRef FS) {
   if (CPU.empty())
-    CPU = "ve";
-  return createVEMCSubtargetInfoImpl(TT, CPU, FS);
+    CPU = "generic";
+  return createVEMCSubtargetInfoImpl(TT, CPU, /*TuneCPU=*/CPU, FS);
 }
 
 static MCTargetStreamer *
@@ -80,7 +80,7 @@ static MCInstPrinter *createVEMCInstPrinter(const Triple &T,
   return new VEInstPrinter(MAI, MII, MRI);
 }
 
-extern "C" void LLVMInitializeVETargetMC() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeVETargetMC() {
   // Register the MC asm info.
   RegisterMCAsmInfoFn X(getTheVETarget(), createVEMCAsmInfo);
 
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp b/contrib/llvm-project/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp
index 65bd142fe0db..a95a299def88 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp
@@ -16,7 +16,7 @@ Target &llvm::getTheVETarget() {
   return TheVETarget;
 }
 
-extern "C" void LLVMInitializeVETargetInfo() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeVETargetInfo() {
   RegisterTarget<Triple::ve, /*HasJIT=*/false> X(getTheVETarget(), "ve",
                                                  "VE", "VE");
 }
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VE.h b/contrib/llvm-project/llvm/lib/Target/VE/VE.h
index 7ed7797cbb83..8c1fa840f19c 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VE.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VE.h
@@ -29,6 +29,7 @@ class MachineInstr;
 
 FunctionPass *createVEISelDag(VETargetMachine &TM);
 FunctionPass *createVEPromoteToI1Pass();
+FunctionPass *createLVLGenPass();
 
 void LowerVEMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
                                  AsmPrinter &AP);
@@ -333,7 +334,7 @@ inline static bool isMImmVal(uint64_t Val) {
     return true;
   }
   // (m)1 patterns
-  return (Val & (1UL << 63)) && isShiftedMask_64(Val);
+  return (Val & (UINT64_C(1) << 63)) && isShiftedMask_64(Val);
 }
 
 inline static bool isMImm32Val(uint32_t Val) {
@@ -346,7 +347,25 @@ inline static bool isMImm32Val(uint32_t Val) {
     return true;
   }
   // (m)1 patterns
-  return (Val & (1 << 31)) && isShiftedMask_32(Val);
+  return (Val & (UINT32_C(1) << 31)) && isShiftedMask_32(Val);
+}
+
+/// val2MImm - Convert an integer immediate value to target MImm immediate.
+inline static uint64_t val2MImm(uint64_t Val) {
+  if (Val == 0)
+    return 0; // (0)1
+  if (Val & (UINT64_C(1) << 63))
+    return countLeadingOnes(Val);       // (m)1
+  return countLeadingZeros(Val) | 0x40; // (m)0
+}
+
+/// mimm2Val - Convert a target MImm immediate to an integer immediate value.
+inline static uint64_t mimm2Val(uint64_t Val) {
+  if (Val == 0)
+    return 0; // (0)1
+  if ((Val & 0x40) == 0)
+    return (uint64_t)((INT64_C(1) << 63) >> (Val & 0x3f)); // (m)1
+  return ((uint64_t)INT64_C(-1) >> (Val & 0x3f));          // (m)0
 }
 
 inline unsigned M0(unsigned Val) { return Val + 64; }
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VE.td b/contrib/llvm-project/llvm/lib/Target/VE/VE.td
index 617a6ea458b6..9e8adcd42077 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VE.td
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VE.td
@@ -18,6 +18,9 @@ include "llvm/Target/Target.td"
 //===----------------------------------------------------------------------===//
 // VE Subtarget features.
 //
+def FeatureEnableVPU
+  : SubtargetFeature<"vpu", "EnableVPU", "true",
+                     "Enable the VPU">;
 
 //===----------------------------------------------------------------------===//
 // Register File, Calling Conv, Instruction Descriptions
@@ -43,7 +46,7 @@ def VEAsmParser : AsmParser {
 class Proc<string Name, list<SubtargetFeature> Features>
  : Processor<Name, NoItineraries, Features>;
 
-def : Proc<"ve",             []>;
+def : Proc<"generic", []>;
 
 //===----------------------------------------------------------------------===//
 // Declare the target which we are implementing
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEAsmPrinter.cpp
index 86e3aa3d3fa1..08a75b6b8c55 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEAsmPrinter.cpp
@@ -60,6 +60,9 @@ public:
   static const char *getRegisterName(unsigned RegNo) {
     return VEInstPrinter::getRegisterName(RegNo);
   }
+  void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &OS);
+  bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                       const char *ExtraCode, raw_ostream &O) override;
 };
 } // end of anonymous namespace
 
@@ -203,7 +206,7 @@ void VEAsmPrinter::lowerGETGOTAndEmitMCInsts(const MachineInstr *MI,
   // lea %got, _GLOBAL_OFFSET_TABLE_@PC_LO(-24)
   // and %got, %got, (32)0
   // sic %plt
-  // lea.sl %got, _GLOBAL_OFFSET_TABLE_@PC_HI(%got, %plt)
+  // lea.sl %got, _GLOBAL_OFFSET_TABLE_@PC_HI(%plt, %got)
   MCOperand cim24 = MCOperand::createImm(-24);
   MCOperand loImm =
       createGOTRelExprOp(VEMCExpr::VK_VE_PC_LO32, GOTLabel, OutContext);
@@ -248,10 +251,10 @@ void VEAsmPrinter::lowerGETFunPLTAndEmitMCInsts(const MachineInstr *MI,
 
   MCOperand RegPLT = MCOperand::createReg(VE::SX16); // PLT
 
-  // lea %dst, %plt_lo(func)(-24)
+  // lea %dst, func@plt_lo(-24)
   // and %dst, %dst, (32)0
   // sic %plt                            ; FIXME: is it safe to use %plt here?
-  // lea.sl %dst, %plt_hi(func)(%dst, %plt)
+  // lea.sl %dst, func@plt_hi(%plt, %dst)
   MCOperand cim24 = MCOperand::createImm(-24);
   MCOperand loImm =
       createGOTRelExprOp(VEMCExpr::VK_VE_PLT_LO32, AddrSym, OutContext);
@@ -295,7 +298,7 @@ void VEAsmPrinter::lowerGETTLSAddrAndEmitMCInsts(const MachineInstr *MI,
   // lea %s0, sym@tls_gd_lo(-24)
   // and %s0, %s0, (32)0
   // sic %lr
-  // lea.sl %s0, sym@tls_gd_hi(%s0, %lr)
+  // lea.sl %s0, sym@tls_gd_hi(%lr, %s0)
   // lea %s12, __tls_get_addr@plt_lo(8)
   // and %s12, %s12, (32)0
   // lea.sl %s12, __tls_get_addr@plt_hi(%s12, %lr)
@@ -349,7 +352,42 @@ void VEAsmPrinter::emitInstruction(const MachineInstr *MI) {
   } while ((++I != E) && I->isInsideBundle()); // Delay slot check.
 }
 
+void VEAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
+                                raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(OpNum);
+
+  switch (MO.getType()) {
+  case MachineOperand::MO_Register:
+    O << "%" << StringRef(getRegisterName(MO.getReg())).lower();
+    break;
+  default:
+    llvm_unreachable("<unknown operand type>");
+  }
+}
+
+// PrintAsmOperand - Print out an operand for an inline asm expression.
+bool VEAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                   const char *ExtraCode, raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0)
+      return true; // Unknown modifier.
+
+    switch (ExtraCode[0]) {
+    default:
+      // See if this is a generic print operand
+      return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
+    case 'r':
+    case 'v':
+      break;
+    }
+  }
+
+  printOperand(MI, OpNo, O);
+
+  return false;
+}
+
 // Force static initialization.
-extern "C" void LLVMInitializeVEAsmPrinter() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeVEAsmPrinter() {
   RegisterAsmPrinter<VEAsmPrinter> X(getTheVETarget());
 }
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VECallingConv.td b/contrib/llvm-project/llvm/lib/Target/VE/VECallingConv.td
index 4f04dae884ab..93899c2cae3d 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VECallingConv.td
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VECallingConv.td
@@ -14,71 +14,133 @@
 // Aurora VE
 //===----------------------------------------------------------------------===//
 def CC_VE_C_Stack: CallingConv<[
-  // float --> need special handling like below.
-  //    0      4
-  //    +------+------+
-  //    | empty| float|
-  //    +------+------+
-  CCIfType<[f32], CCCustom<"allocateFloat">>,
+  // F128 are assigned to the stack in 16-byte aligned units
+  CCIfType<[f128], CCAssignToStackWithShadow<16, 16, [SX7]>>,
 
   // All of the rest are assigned to the stack in 8-byte aligned units.
   CCAssignToStack<0, 8>
 ]>;
 
-def CC_VE : CallingConv<[
+///// C Calling Convention (VE ABI v2.1) /////
+//
+// Reference: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-ABI_v2.1.pdf
+//
+def CC_VE_C : CallingConv<[
   // All arguments get passed in generic registers if there is space.
 
-  // Promote i1/i8/i16 arguments to i32.
-  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
-
-  // bool, char, int, enum, long --> generic integer 32 bit registers
-  CCIfType<[i32], CCAssignToRegWithShadow<
-    [SW0, SW1, SW2, SW3, SW4, SW5, SW6, SW7],
-    [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>,
+  // Promote i1/i8/i16/i32 arguments to i64.
+  CCIfType<[i1, i8, i16, i32], CCPromoteToType<i64>>,
 
-  // float --> generic floating point 32 bit registers
-  CCIfType<[f32], CCAssignToRegWithShadow<
-    [SF0, SF1, SF2, SF3, SF4, SF5, SF6, SF7],
-    [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>,
+  // Convert float arguments to i64 with padding.
+  //     63     31   0
+  //    +------+------+
+  //    | float|   0  |
+  //    +------+------+
+  CCIfType<[f32], CCBitConvertToType<i64>>,
 
-  // long long/double --> generic 64 bit registers
+  // bool, char, int, enum, long, long long, float, double
+  //     --> generic 64 bit registers
   CCIfType<[i64, f64],
            CCAssignToReg<[SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>,
 
+  // long double --> pair of generic 64 bit registers
+  //
+  // NOTE: If Q1 is allocated while SX1 is free, llvm tries to allocate SX1 for
+  //       following operands, this masks SX1 to avoid such behavior.
+  CCIfType<[f128],
+           CCAssignToRegWithShadow<[Q0, Q1, Q2, Q3],
+                                   [SX0, SX1, SX3, SX5]>>,
+
   // Alternatively, they are assigned to the stack in 8-byte aligned units.
   CCDelegateTo<CC_VE_C_Stack>
 ]>;
 
+///// Standard vararg C Calling Convention (VE ABI v2.1) /////
 // All arguments get passed in stack for varargs function or non-prototyped
 // function.
 def CC_VE2 : CallingConv<[
-  // float --> need special handling like below.
-  //    0      4
+  // Promote i1/i8/i16/i32 arguments to i64.
+  CCIfType<[i1, i8, i16, i32], CCPromoteToType<i64>>,
+
+  // Convert float arguments to i64 with padding.
+  //     63     31   0
   //    +------+------+
-  //    | empty| float|
+  //    | float|   0  |
   //    +------+------+
-  CCIfType<[f32], CCCustom<"allocateFloat">>,
+  CCIfType<[f32], CCBitConvertToType<i64>>,
+
+  // F128 are assigned to the stack in 16-byte aligned units
+  CCIfType<[f128], CCAssignToStack<16, 16>>,
 
   CCAssignToStack<0, 8>
 ]>;
 
-def RetCC_VE : CallingConv<[
-  // Promote i1/i8/i16 arguments to i32.
-  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+def RetCC_VE_C : CallingConv<[
+  // Promote i1/i8/i16/i32 return values to i64.
+  CCIfType<[i1, i8, i16, i32], CCPromoteToType<i64>>,
 
-  // bool, char, int, enum, long --> generic integer 32 bit registers
-  CCIfType<[i32], CCAssignToRegWithShadow<
-    [SW0, SW1, SW2, SW3, SW4, SW5, SW6, SW7],
-    [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>,
-
-  // float --> generic floating point 32 bit registers
-  CCIfType<[f32], CCAssignToRegWithShadow<
-    [SF0, SF1, SF2, SF3, SF4, SF5, SF6, SF7],
-    [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>,
+  // Convert float return values to i64 with padding.
+  //     63     31   0
+  //    +------+------+
+  //    | float|   0  |
+  //    +------+------+
+  CCIfType<[f32], CCBitConvertToType<i64>>,
 
-  // long long/double --> generic 64 bit registers
+  // bool, char, int, enum, long, long long, float, double
+  //     --> generic 64 bit registers
   CCIfType<[i64, f64],
            CCAssignToReg<[SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>,
+
+  // long double --> pair of generic 64 bit registers
+  CCIfType<[f128],
+           CCAssignToRegWithShadow<[Q0, Q1, Q2, Q3],
+                                   [SX0, SX1, SX3, SX5]>>,
+]>;
+
+///// Custom fastcc /////
+//
+// This passes vector params and return values in registers.  Scalar values are
+// handled conforming to the standard cc.
+def CC_VE_Fast : CallingConv<[
+  // vector --> generic vector registers
+  CCIfType<[v256i32, v256f32, v256i64, v256f64],
+           CCAssignToReg<[V0, V1, V2, V3, V4, V5, V6, V7]>>,
+  // TODO: make this conditional on packed mode
+  CCIfType<[v512i32, v512f32],
+           CCAssignToReg<[V0, V1, V2, V3, V4, V5, V6, V7]>>,
+
+  // vector mask --> generic vector mask registers
+  CCIfType<[v256i1],
+           CCAssignToReg<[VM1, VM2, VM3, VM4, VM5, VM6, VM7]>>,
+
+  // pair of vector mask --> generic vector mask registers
+  CCIfType<[v512i1],
+           CCAssignToRegWithShadow<[VMP1, VMP2, VMP3],
+                                   [VM1, VM3, VM5]>>,
+
+  // Follow the standard C CC for scalars.
+  CCDelegateTo<CC_VE_C>
+]>;
+
+def RetCC_VE_Fast : CallingConv<[
+  // vector --> generic vector registers
+  CCIfType<[v256i32, v256f32, v256i64, v256f64],
+           CCAssignToReg<[V0, V1, V2, V3, V4, V5, V6, V7]>>,
+  // TODO: make this conditional on packed mode
+  CCIfType<[v512i32, v512f32],
+           CCAssignToReg<[V0, V1, V2, V3, V4, V5, V6, V7]>>,
+
+  // vector mask --> generic vector mask registers
+  CCIfType<[v256i1],
+           CCAssignToReg<[VM1, VM2, VM3, VM4, VM5, VM6, VM7]>>,
+
+  // pair of vector mask --> generic vector mask registers
+  CCIfType<[v512i1],
+           CCAssignToRegWithShadow<[VMP1, VMP2, VMP3],
+                                   [VM1, VM3, VM5]>>,
+
+  // Follow the standard C CC for scalars.
+  CCDelegateTo<RetCC_VE_C>
 ]>;
 
 // Callee-saved registers
@@ -86,4 +148,6 @@ def CSR : CalleeSavedRegs<(add (sequence "SX%u", 18, 33))>;
 def CSR_NoRegs : CalleeSavedRegs<(add)>;
 
 // PreserveAll (clobbers s62,s63) - used for ve_grow_stack
-def CSR_preserve_all : CalleeSavedRegs<(add (sequence "SX%u", 0, 61))>;
+def CSR_preserve_all : CalleeSavedRegs<(add (sequence "SX%u", 0, 61),
+                                            (sequence "V%u", 0, 63),
+                                            (sequence "VM%u", 1, 15))>;
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.cpp
index 8b10e6466123..9e97d0eca833 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.cpp
@@ -8,6 +8,105 @@
 //
 // This file contains the VE implementation of TargetFrameLowering class.
 //
+// On VE, stack frames are structured as follows:
+//
+// The stack grows downward.
+//
+// All of the individual frame areas on the frame below are optional, i.e. it's
+// possible to create a function so that the particular area isn't present
+// in the frame.
+//
+// At function entry, the "frame" looks as follows:
+//
+// |                                              | Higher address
+// |----------------------------------------------|
+// | Parameter area for this function             |
+// |----------------------------------------------|
+// | Register save area (RSA) for this function   |
+// |----------------------------------------------|
+// | Return address for this function             |
+// |----------------------------------------------|
+// | Frame pointer for this function              |
+// |----------------------------------------------| <- sp
+// |                                              | Lower address
+//
+// VE doesn't use on demand stack allocation, so user code generated by LLVM
+// needs to call VEOS to allocate stack frame.  VE's ABI want to reduce the
+// number of VEOS calls, so ABI requires to allocate not only RSA (in general
+// CSR, callee saved register) area but also call frame at the prologue of
+// caller function.
+//
+// After the prologue has run, the frame has the following general structure.
+// Note that technically the last frame area (VLAs) doesn't get created until
+// in the main function body, after the prologue is run. However, it's depicted
+// here for completeness.
+//
+// |                                              | Higher address
+// |----------------------------------------------|
+// | Parameter area for this function             |
+// |----------------------------------------------|
+// | Register save area (RSA) for this function   |
+// |----------------------------------------------|
+// | Return address for this function             |
+// |----------------------------------------------|
+// | Frame pointer for this function              |
+// |----------------------------------------------| <- fp(=old sp)
+// |.empty.space.to.make.part.below.aligned.in....|
+// |.case.it.needs.more.than.the.standard.16-byte.| (size of this area is
+// |.alignment....................................|  unknown at compile time)
+// |----------------------------------------------|
+// | Local variables of fixed size including spill|
+// | slots                                        |
+// |----------------------------------------------| <- bp(not defined by ABI,
+// |.variable-sized.local.variables.(VLAs)........|       LLVM chooses SX17)
+// |..............................................| (size of this area is
+// |..............................................|  unknown at compile time)
+// |----------------------------------------------| <- stack top (returned by
+// | Parameter area for callee                    |               alloca)
+// |----------------------------------------------|
+// | Register save area (RSA) for callee          |
+// |----------------------------------------------|
+// | Return address for callee                    |
+// |----------------------------------------------|
+// | Frame pointer for callee                     |
+// |----------------------------------------------| <- sp
+// |                                              | Lower address
+//
+// To access the data in a frame, at-compile time, a constant offset must be
+// computable from one of the pointers (fp, bp, sp) to access it. The size
+// of the areas with a dotted background cannot be computed at compile-time
+// if they are present, making it required to have all three of fp, bp and
+// sp to be set up to be able to access all contents in the frame areas,
+// assuming all of the frame areas are non-empty.
+//
+// For most functions, some of the frame areas are empty. For those functions,
+// it may not be necessary to set up fp or bp:
+// * A base pointer is definitely needed when there are both VLAs and local
+//   variables with more-than-default alignment requirements.
+// * A frame pointer is definitely needed when there are local variables with
+//   more-than-default alignment requirements.
+//
+// In addition, VE ABI defines RSA frame, return address, and frame pointer
+// as follows:
+//
+// |----------------------------------------------| <- sp+176
+// | %s18...%s33                                  |
+// |----------------------------------------------| <- sp+48
+// | Linkage area register (%s17)                 |
+// |----------------------------------------------| <- sp+40
+// | Procedure linkage table register (%plt=%s16) |
+// |----------------------------------------------| <- sp+32
+// | Global offset table register (%got=%s15)     |
+// |----------------------------------------------| <- sp+24
+// | Thread pointer register (%tp=%s14)           |
+// |----------------------------------------------| <- sp+16
+// | Return address                               |
+// |----------------------------------------------| <- sp+8
+// | Frame pointer                                |
+// |----------------------------------------------| <- sp+0
+//
+// NOTE: This description is based on VE ABI and description in
+//       AArch64FrameLowering.cpp.  Thanks a lot.
 //===----------------------------------------------------------------------===//
 
 #include "VEFrameLowering.h"
@@ -38,48 +137,47 @@ void VEFrameLowering::emitPrologueInsns(MachineFunction &MF,
                                         MachineBasicBlock::iterator MBBI,
                                         uint64_t NumBytes,
                                         bool RequireFPUpdate) const {
+  const VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
+  DebugLoc DL;
+  const VEInstrInfo &TII = *STI.getInstrInfo();
 
-  DebugLoc dl;
-  const VEInstrInfo &TII =
-      *static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo());
   // Insert following codes here as prologue
   //
-  //    st %fp, 0(,%sp)
-  //    st %lr, 8(,%sp)
-  //    st %got, 24(,%sp)
-  //    st %plt, 32(,%sp)
-  //    st %s17, 40(,%sp) iff this function is using s17 as BP
-  //    or %fp, 0, %sp
-
-  BuildMI(MBB, MBBI, dl, TII.get(VE::STrii))
-      .addReg(VE::SX11)
-      .addImm(0)
-      .addImm(0)
-      .addReg(VE::SX9);
-  BuildMI(MBB, MBBI, dl, TII.get(VE::STrii))
-      .addReg(VE::SX11)
-      .addImm(0)
-      .addImm(8)
-      .addReg(VE::SX10);
-  BuildMI(MBB, MBBI, dl, TII.get(VE::STrii))
-      .addReg(VE::SX11)
-      .addImm(0)
-      .addImm(24)
-      .addReg(VE::SX15);
-  BuildMI(MBB, MBBI, dl, TII.get(VE::STrii))
-      .addReg(VE::SX11)
-      .addImm(0)
-      .addImm(32)
-      .addReg(VE::SX16);
+  //    st %fp, 0(, %sp)   iff !isLeafProc
+  //    st %lr, 8(, %sp)   iff !isLeafProc
+  //    st %got, 24(, %sp) iff hasGOT
+  //    st %plt, 32(, %sp) iff hasGOT
+  //    st %s17, 40(, %sp) iff hasBP
+  if (!FuncInfo->isLeafProc()) {
+    BuildMI(MBB, MBBI, DL, TII.get(VE::STrii))
+        .addReg(VE::SX11)
+        .addImm(0)
+        .addImm(0)
+        .addReg(VE::SX9);
+    BuildMI(MBB, MBBI, DL, TII.get(VE::STrii))
+        .addReg(VE::SX11)
+        .addImm(0)
+        .addImm(8)
+        .addReg(VE::SX10);
+  }
+  if (hasGOT(MF)) {
+    BuildMI(MBB, MBBI, DL, TII.get(VE::STrii))
+        .addReg(VE::SX11)
+        .addImm(0)
+        .addImm(24)
+        .addReg(VE::SX15);
+    BuildMI(MBB, MBBI, DL, TII.get(VE::STrii))
+        .addReg(VE::SX11)
+        .addImm(0)
+        .addImm(32)
+        .addReg(VE::SX16);
+  }
   if (hasBP(MF))
-    BuildMI(MBB, MBBI, dl, TII.get(VE::STrii))
+    BuildMI(MBB, MBBI, DL, TII.get(VE::STrii))
         .addReg(VE::SX11)
         .addImm(0)
         .addImm(40)
         .addReg(VE::SX17);
-  BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX9)
-      .addReg(VE::SX11)
-      .addImm(0);
 }
 
 void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF,
@@ -87,43 +185,42 @@ void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF,
                                         MachineBasicBlock::iterator MBBI,
                                         uint64_t NumBytes,
                                         bool RequireFPUpdate) const {
+  const VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
+  DebugLoc DL;
+  const VEInstrInfo &TII = *STI.getInstrInfo();
 
-  DebugLoc dl;
-  const VEInstrInfo &TII =
-      *static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo());
   // Insert following codes here as epilogue
   //
-  //    or %sp, 0, %fp
-  //    ld %s17, 40(,%sp) iff this function is using s17 as BP
-  //    ld %got, 32(,%sp)
-  //    ld %plt, 24(,%sp)
-  //    ld %lr, 8(,%sp)
-  //    ld %fp, 0(,%sp)
-
-  BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX11)
-      .addReg(VE::SX9)
-      .addImm(0);
+  //    ld %s17, 40(, %sp) iff hasBP
+  //    ld %plt, 32(, %sp) iff hasGOT
+  //    ld %got, 24(, %sp) iff hasGOT
+  //    ld %lr, 8(, %sp)   iff !isLeafProc
+  //    ld %fp, 0(, %sp)   iff !isLeafProc
   if (hasBP(MF))
-    BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX17)
+    BuildMI(MBB, MBBI, DL, TII.get(VE::LDrii), VE::SX17)
         .addReg(VE::SX11)
         .addImm(0)
         .addImm(40);
-  BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX16)
-      .addReg(VE::SX11)
-      .addImm(0)
-      .addImm(32);
-  BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX15)
-      .addReg(VE::SX11)
-      .addImm(0)
-      .addImm(24);
-  BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX10)
-      .addReg(VE::SX11)
-      .addImm(0)
-      .addImm(8);
-  BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX9)
-      .addReg(VE::SX11)
-      .addImm(0)
-      .addImm(0);
+  if (hasGOT(MF)) {
+    BuildMI(MBB, MBBI, DL, TII.get(VE::LDrii), VE::SX16)
+        .addReg(VE::SX11)
+        .addImm(0)
+        .addImm(32);
+    BuildMI(MBB, MBBI, DL, TII.get(VE::LDrii), VE::SX15)
+        .addReg(VE::SX11)
+        .addImm(0)
+        .addImm(24);
+  }
+  if (!FuncInfo->isLeafProc()) {
+    BuildMI(MBB, MBBI, DL, TII.get(VE::LDrii), VE::SX10)
+        .addReg(VE::SX11)
+        .addImm(0)
+        .addImm(8);
+    BuildMI(MBB, MBBI, DL, TII.get(VE::LDrii), VE::SX9)
+        .addReg(VE::SX11)
+        .addImm(0)
+        .addImm(0);
+  }
 }
 
 void VEFrameLowering::emitSPAdjustment(MachineFunction &MF,
@@ -131,37 +228,44 @@ void VEFrameLowering::emitSPAdjustment(MachineFunction &MF,
                                        MachineBasicBlock::iterator MBBI,
                                        int64_t NumBytes,
                                        MaybeAlign MaybeAlign) const {
-  DebugLoc dl;
-  const VEInstrInfo &TII =
-      *static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo());
+  DebugLoc DL;
+  const VEInstrInfo &TII = *STI.getInstrInfo();
 
-  if (NumBytes >= -64 && NumBytes < 63) {
-    BuildMI(MBB, MBBI, dl, TII.get(VE::ADDSLri), VE::SX11)
+  if (NumBytes == 0) {
+    // Nothing to do here.
+  } else if (isInt<7>(NumBytes)) {
+    // adds.l %s11, NumBytes@lo, %s11
+    BuildMI(MBB, MBBI, DL, TII.get(VE::ADDSLri), VE::SX11)
         .addReg(VE::SX11)
         .addImm(NumBytes);
-    return;
+  } else if (isInt<32>(NumBytes)) {
+    // lea %s11, NumBytes@lo(, %s11)
+    BuildMI(MBB, MBBI, DL, TII.get(VE::LEArii), VE::SX11)
+        .addReg(VE::SX11)
+        .addImm(0)
+        .addImm(Lo_32(NumBytes));
+  } else {
+    // Emit following codes.  This clobbers SX13 which we always know is
+    // available here.
+    //   lea     %s13, NumBytes@lo
+    //   and     %s13, %s13, (32)0
+    //   lea.sl  %sp, NumBytes@hi(%s13, %sp)
+    BuildMI(MBB, MBBI, DL, TII.get(VE::LEAzii), VE::SX13)
+        .addImm(0)
+        .addImm(0)
+        .addImm(Lo_32(NumBytes));
+    BuildMI(MBB, MBBI, DL, TII.get(VE::ANDrm), VE::SX13)
+        .addReg(VE::SX13)
+        .addImm(M0(32));
+    BuildMI(MBB, MBBI, DL, TII.get(VE::LEASLrri), VE::SX11)
+        .addReg(VE::SX11)
+        .addReg(VE::SX13)
+        .addImm(Hi_32(NumBytes));
   }
 
-  // Emit following codes.  This clobbers SX13 which we always know is
-  // available here.
-  //   lea     %s13,%lo(NumBytes)
-  //   and     %s13,%s13,(32)0
-  //   lea.sl  %sp,%hi(NumBytes)(%sp, %s13)
-  BuildMI(MBB, MBBI, dl, TII.get(VE::LEAzii), VE::SX13)
-      .addImm(0)
-      .addImm(0)
-      .addImm(Lo_32(NumBytes));
-  BuildMI(MBB, MBBI, dl, TII.get(VE::ANDrm), VE::SX13)
-      .addReg(VE::SX13)
-      .addImm(M0(32));
-  BuildMI(MBB, MBBI, dl, TII.get(VE::LEASLrri), VE::SX11)
-      .addReg(VE::SX11)
-      .addReg(VE::SX13)
-      .addImm(Hi_32(NumBytes));
-
   if (MaybeAlign) {
     // and %sp, %sp, Align-1
-    BuildMI(MBB, MBBI, dl, TII.get(VE::ANDrm), VE::SX11)
+    BuildMI(MBB, MBBI, DL, TII.get(VE::ANDrm), VE::SX11)
         .addReg(VE::SX11)
         .addImm(M1(64 - Log2_64(MaybeAlign.valueOrOne().value())));
   }
@@ -169,9 +273,8 @@ void VEFrameLowering::emitSPAdjustment(MachineFunction &MF,
 
 void VEFrameLowering::emitSPExtend(MachineFunction &MF, MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator MBBI) const {
-  DebugLoc dl;
-  const VEInstrInfo &TII =
-      *static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo());
+  DebugLoc DL;
+  const VEInstrInfo &TII = *STI.getInstrInfo();
 
   // Emit following codes.  It is not possible to insert multiple
   // BasicBlocks in PEI pass, so we emit two pseudo instructions here.
@@ -198,22 +301,23 @@ void VEFrameLowering::emitSPExtend(MachineFunction &MF, MachineBasicBlock &MBB,
   // EXTEND_STACK_GUARD pseudo will be simply eliminated by ExpandPostRA
   // pass.  This pseudo is required to be at the next of EXTEND_STACK
   // pseudo in order to protect iteration loop in ExpandPostRA.
-
-  BuildMI(MBB, MBBI, dl, TII.get(VE::EXTEND_STACK));
-  BuildMI(MBB, MBBI, dl, TII.get(VE::EXTEND_STACK_GUARD));
+  BuildMI(MBB, MBBI, DL, TII.get(VE::EXTEND_STACK));
+  BuildMI(MBB, MBBI, DL, TII.get(VE::EXTEND_STACK_GUARD));
 }
 
 void VEFrameLowering::emitPrologue(MachineFunction &MF,
                                    MachineBasicBlock &MBB) const {
+  const VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
   assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
   MachineFrameInfo &MFI = MF.getFrameInfo();
   const VEInstrInfo &TII = *STI.getInstrInfo();
   const VERegisterInfo &RegInfo = *STI.getRegisterInfo();
   MachineBasicBlock::iterator MBBI = MBB.begin();
+  bool NeedsStackRealignment = RegInfo.needsStackRealignment(MF);
+
   // Debug location must be unknown since the first debug location is used
   // to determine the end of the prologue.
-  DebugLoc dl;
-  bool NeedsStackRealignment = RegInfo.needsStackRealignment(MF);
+  DebugLoc DL;
 
   // FIXME: unfortunately, returning false from canRealignStack
   // actually just causes needsStackRealignment to return false,
@@ -226,12 +330,17 @@ void VEFrameLowering::emitPrologue(MachineFunction &MF,
                        "stack re-alignment, but LLVM couldn't handle it "
                        "(probably because it has a dynamic alloca).");
 
-  // Get the number of bytes to allocate from the FrameInfo
+  // Get the number of bytes to allocate from the FrameInfo.
+  // This number of bytes is already aligned to ABI stack alignment.
   uint64_t NumBytes = MFI.getStackSize();
 
-  // The VE ABI requires a reserved 176 bytes area at the top
-  // of stack as described in VESubtarget.cpp.  So, we adjust it here.
-  NumBytes = STI.getAdjustedFrameSize(NumBytes);
+  // Adjust stack size if this function is not a leaf function since the
+  // VE ABI requires a reserved area at the top of stack as described in
+  // VEFrameLowering.cpp.
+  if (!FuncInfo->isLeafProc()) {
+    // NOTE: The number is aligned to ABI stack alignment after adjustment.
+    NumBytes = STI.getAdjustedFrameSize(NumBytes);
+  }
 
   // Finally, ensure that the size is sufficiently aligned for the
   // data on the stack.
@@ -240,36 +349,34 @@ void VEFrameLowering::emitPrologue(MachineFunction &MF,
   // Update stack size with corrected value.
   MFI.setStackSize(NumBytes);
 
-  // Emit Prologue instructions to save %lr
+  // Emit Prologue instructions to save multiple registers.
   emitPrologueInsns(MF, MBB, MBBI, NumBytes, true);
 
+  // Emit instructions to save SP in FP as follows if this is not a leaf
+  // function:
+  //    or %fp, 0, %sp
+  if (!FuncInfo->isLeafProc())
+    BuildMI(MBB, MBBI, DL, TII.get(VE::ORri), VE::SX9)
+        .addReg(VE::SX11)
+        .addImm(0);
+
   // Emit stack adjust instructions
   MaybeAlign RuntimeAlign =
       NeedsStackRealignment ? MaybeAlign(MFI.getMaxAlign()) : None;
+  assert((RuntimeAlign == None || !FuncInfo->isLeafProc()) &&
+         "SP has to be saved in order to align variable sized stack object!");
   emitSPAdjustment(MF, MBB, MBBI, -(int64_t)NumBytes, RuntimeAlign);
 
   if (hasBP(MF)) {
     // Copy SP to BP.
-    BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX17)
+    BuildMI(MBB, MBBI, DL, TII.get(VE::ORri), VE::SX17)
         .addReg(VE::SX11)
         .addImm(0);
   }
 
   // Emit stack extend instructions
-  emitSPExtend(MF, MBB, MBBI);
-
-  Register RegFP = RegInfo.getDwarfRegNum(VE::SX9, true);
-
-  // Emit ".cfi_def_cfa_register 30".
-  unsigned CFIIndex =
-      MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, RegFP));
-  BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
-      .addCFIIndex(CFIIndex);
-
-  // Emit ".cfi_window_save".
-  CFIIndex = MF.addFrameInst(MCCFIInstruction::createWindowSave(nullptr));
-  BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
-      .addCFIIndex(CFIIndex);
+  if (NumBytes != 0)
+    emitSPExtend(MF, MBB, MBBI);
 }
 
 MachineBasicBlock::iterator VEFrameLowering::eliminateCallFramePseudoInstr(
@@ -289,21 +396,33 @@ MachineBasicBlock::iterator VEFrameLowering::eliminateCallFramePseudoInstr(
 
 void VEFrameLowering::emitEpilogue(MachineFunction &MF,
                                    MachineBasicBlock &MBB) const {
+  const VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
+  DebugLoc DL;
   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
-  DebugLoc dl = MBBI->getDebugLoc();
   MachineFrameInfo &MFI = MF.getFrameInfo();
+  const VEInstrInfo &TII = *STI.getInstrInfo();
 
   uint64_t NumBytes = MFI.getStackSize();
 
-  // Emit Epilogue instructions to restore %lr
+  // Emit instructions to retrieve original SP.
+  if (!FuncInfo->isLeafProc()) {
+    // If SP is saved in FP, retrieve it as follows:
+    //    or %sp, 0, %fp     iff !isLeafProc
+    BuildMI(MBB, MBBI, DL, TII.get(VE::ORri), VE::SX11)
+        .addReg(VE::SX9)
+        .addImm(0);
+  } else {
+    // Emit stack adjust instructions.
+    emitSPAdjustment(MF, MBB, MBBI, NumBytes, None);
+  }
+
+  // Emit Epilogue instructions to restore multiple registers.
   emitEpilogueInsns(MF, MBB, MBBI, NumBytes, true);
 }
 
 // hasFP - Return true if the specified function should have a dedicated frame
 // pointer register.  This is true if the function has variable sized allocas
-// or if frame pointer elimination is disabled.  For the case of VE, we don't
-// implement FP eliminator yet, but we returns false from this function to
-// not refer fp from generated code.
+// or if frame pointer elimination is disabled.
 bool VEFrameLowering::hasFP(const MachineFunction &MF) const {
   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
 
@@ -320,34 +439,41 @@ bool VEFrameLowering::hasBP(const MachineFunction &MF) const {
   return MFI.hasVarSizedObjects() && TRI->needsStackRealignment(MF);
 }
 
-int VEFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
-                                            Register &FrameReg) const {
+bool VEFrameLowering::hasGOT(const MachineFunction &MF) const {
+  const VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
+
+  // If a global base register is assigned (!= 0), GOT is used.
+  return FuncInfo->getGlobalBaseReg() != 0;
+}
+
+StackOffset VEFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+                                                    int FI,
+                                                    Register &FrameReg) const {
   const MachineFrameInfo &MFI = MF.getFrameInfo();
   const VERegisterInfo *RegInfo = STI.getRegisterInfo();
-  const VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
   bool isFixed = MFI.isFixedObjectIndex(FI);
 
   int64_t FrameOffset = MF.getFrameInfo().getObjectOffset(FI);
 
-  if (FuncInfo->isLeafProc()) {
-    // If there's a leaf proc, all offsets need to be %sp-based,
-    // because we haven't caused %fp to actually point to our frame.
+  if (!hasFP(MF)) {
+    // If FP is not used, frame indexies are based on a %sp regiter.
     FrameReg = VE::SX11; // %sp
-    return FrameOffset + MF.getFrameInfo().getStackSize();
+    return StackOffset::getFixed(FrameOffset +
+                                 MF.getFrameInfo().getStackSize());
   }
   if (RegInfo->needsStackRealignment(MF) && !isFixed) {
-    // If there is dynamic stack realignment, all local object
-    // references need to be via %sp or %s17 (bp), to take account
-    // of the re-alignment.
+    // If data on stack require realignemnt, frame indexies are based on a %sp
+    // or %s17 (bp) register.  If there is a variable sized object, bp is used.
     if (hasBP(MF))
       FrameReg = VE::SX17; // %bp
     else
       FrameReg = VE::SX11; // %sp
-    return FrameOffset + MF.getFrameInfo().getStackSize();
+    return StackOffset::getFixed(FrameOffset +
+                                 MF.getFrameInfo().getStackSize());
   }
-  // Finally, default to using %fp.
+  // Use %fp by default.
   FrameReg = RegInfo->getFrameRegister(MF);
-  return FrameOffset;
+  return StackOffset::getFixed(FrameOffset);
 }
 
 bool VEFrameLowering::isLeafProc(MachineFunction &MF) const {
@@ -367,8 +493,10 @@ void VEFrameLowering::determineCalleeSaves(MachineFunction &MF,
                                            RegScavenger *RS) const {
   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
 
-  if (isLeafProc(MF)) {
-    VEMachineFunctionInfo *MFI = MF.getInfo<VEMachineFunctionInfo>();
-    MFI->setLeafProc(true);
+  // Functions having BP need to emit prologue and epilogue to allocate local
+  // buffer on the stack even if the function is a leaf function.
+  if (isLeafProc(MF) && !hasBP(MF)) {
+    VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
+    FuncInfo->setLeafProc(true);
   }
 }
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.h
index b548d663c504..99eb41189b25 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.h
@@ -15,6 +15,7 @@
 
 #include "VE.h"
 #include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/Support/TypeSize.h"
 
 namespace llvm {
 
@@ -38,8 +39,10 @@ public:
   eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                                 MachineBasicBlock::iterator I) const override;
 
-  bool hasBP(const MachineFunction &MF) const;
   bool hasFP(const MachineFunction &MF) const override;
+  bool hasBP(const MachineFunction &MF) const;
+  bool hasGOT(const MachineFunction &MF) const;
+
   // VE reserves argument space always for call sites in the function
   // immediately on entry of the current function.
   bool hasReservedCallFrame(const MachineFunction &MF) const override {
@@ -48,8 +51,8 @@ public:
   void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
                             RegScavenger *RS = nullptr) const override;
 
-  int getFrameIndexReference(const MachineFunction &MF, int FI,
-                             Register &FrameReg) const override;
+  StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
+                                     Register &FrameReg) const override;
 
   const SpillSlot *
   getCalleeSavedSpillSlots(unsigned &NumEntries) const override {
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEISelDAGToDAG.cpp
index f3d067d55fdb..761baa79b4ab 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEISelDAGToDAG.cpp
@@ -113,15 +113,6 @@ inline static uint64_t getFpImmVal(const ConstantFPSDNode *N) {
   return Val;
 }
 
-/// convMImmVal - Convert a mimm integer immediate value to target immediate.
-inline static uint64_t convMImmVal(uint64_t Val) {
-  if (Val == 0)
-    return 0; // (0)1
-  if (Val & (1UL << 63))
-    return countLeadingOnes(Val);       // (m)1
-  return countLeadingZeros(Val) | 0x40; // (m)0
-}
-
 //===--------------------------------------------------------------------===//
 /// VEDAGToDAGISel - VE specific code to select VE machine
 /// instructions for SelectionDAG operations.
@@ -148,6 +139,7 @@ public:
   bool selectADDRzri(SDValue N, SDValue &Base, SDValue &Index, SDValue &Offset);
   bool selectADDRzii(SDValue N, SDValue &Base, SDValue &Index, SDValue &Offset);
   bool selectADDRri(SDValue N, SDValue &Base, SDValue &Offset);
+  bool selectADDRzi(SDValue N, SDValue &Base, SDValue &Offset);
 
   StringRef getPassName() const override {
     return "VE DAG->DAG Pattern Instruction Selection";
@@ -183,6 +175,14 @@ bool VEDAGToDAGISel::selectADDRrri(SDValue Addr, SDValue &Base, SDValue &Index,
     return false;
   }
   if (matchADDRrr(Addr, LHS, RHS)) {
+    // If the input is a pair of a frame-index and a register, move a
+    // frame-index to LHS.  This generates MI with following operands.
+    //    %dest, #FI, %reg, offset
+    // In the eliminateFrameIndex, above MI is converted to the following.
+    //    %dest, %fp, %reg, fi_offset + offset
+    if (dyn_cast<FrameIndexSDNode>(RHS))
+      std::swap(LHS, RHS);
+
     if (matchADDRri(RHS, Index, Offset)) {
       Base = LHS;
       return true;
@@ -228,7 +228,7 @@ bool VEDAGToDAGISel::selectADDRzii(SDValue Addr, SDValue &Base, SDValue &Index,
       Addr.getOpcode() == ISD::TargetGlobalTLSAddress)
     return false; // direct calls.
 
-  if (ConstantSDNode *CN = cast<ConstantSDNode>(Addr)) {
+  if (auto *CN = dyn_cast<ConstantSDNode>(Addr)) {
     if (isInt<32>(CN->getSExtValue())) {
       Base = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
       Index = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
@@ -250,6 +250,26 @@ bool VEDAGToDAGISel::selectADDRri(SDValue Addr, SDValue &Base,
   return true;
 }
 
+bool VEDAGToDAGISel::selectADDRzi(SDValue Addr, SDValue &Base,
+                                  SDValue &Offset) {
+  if (dyn_cast<FrameIndexSDNode>(Addr))
+    return false;
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress ||
+      Addr.getOpcode() == ISD::TargetGlobalTLSAddress)
+    return false; // direct calls.
+
+  if (auto *CN = dyn_cast<ConstantSDNode>(Addr)) {
+    if (isInt<32>(CN->getSExtValue())) {
+      Base = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+      Offset =
+          CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(Addr), MVT::i32);
+      return true;
+    }
+  }
+  return false;
+}
+
 bool VEDAGToDAGISel::matchADDRrr(SDValue Addr, SDValue &Base, SDValue &Index) {
   if (dyn_cast<FrameIndexSDNode>(Addr))
     return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp
index ab720545dd83..d377f8e27cfd 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -13,6 +13,7 @@
 
 #include "VEISelLowering.h"
 #include "MCTargetDesc/VEMCExpr.h"
+#include "VEInstrBuilder.h"
 #include "VEMachineFunctionInfo.h"
 #include "VERegisterInfo.h"
 #include "VETargetMachine.h"
@@ -21,6 +22,7 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
@@ -38,39 +40,280 @@ using namespace llvm;
 // Calling Convention Implementation
 //===----------------------------------------------------------------------===//
 
-static bool allocateFloat(unsigned ValNo, MVT ValVT, MVT LocVT,
-                          CCValAssign::LocInfo LocInfo,
-                          ISD::ArgFlagsTy ArgFlags, CCState &State) {
-  switch (LocVT.SimpleTy) {
-  case MVT::f32: {
-    // Allocate stack like below
-    //    0      4
-    //    +------+------+
-    //    | empty| float|
-    //    +------+------+
-    // Use align=8 for dummy area to align the beginning of these 2 area.
-    State.AllocateStack(4, Align(8)); // for empty area
-    // Use align=4 for value to place it at just after the dummy area.
-    unsigned Offset = State.AllocateStack(4, Align(4)); // for float value area
-    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
-    return true;
-  }
+#include "VEGenCallingConv.inc"
+
+CCAssignFn *getReturnCC(CallingConv::ID CallConv) {
+  switch (CallConv) {
   default:
-    return false;
+    return RetCC_VE_C;
+  case CallingConv::Fast:
+    return RetCC_VE_Fast;
   }
 }
 
-#include "VEGenCallingConv.inc"
+CCAssignFn *getParamCC(CallingConv::ID CallConv, bool IsVarArg) {
+  if (IsVarArg)
+    return CC_VE2;
+  switch (CallConv) {
+  default:
+    return CC_VE_C;
+  case CallingConv::Fast:
+    return CC_VE_Fast;
+  }
+}
 
 bool VETargetLowering::CanLowerReturn(
     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
-  CCAssignFn *RetCC = RetCC_VE;
+  CCAssignFn *RetCC = getReturnCC(CallConv);
   SmallVector<CCValAssign, 16> RVLocs;
   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
   return CCInfo.CheckReturn(Outs, RetCC);
 }
 
+static const MVT AllVectorVTs[] = {MVT::v256i32, MVT::v512i32, MVT::v256i64,
+                                   MVT::v256f32, MVT::v512f32, MVT::v256f64};
+
+static const MVT AllPackedVTs[] = {MVT::v512i32, MVT::v512f32};
+
+void VETargetLowering::initRegisterClasses() {
+  // Set up the register classes.
+  addRegisterClass(MVT::i32, &VE::I32RegClass);
+  addRegisterClass(MVT::i64, &VE::I64RegClass);
+  addRegisterClass(MVT::f32, &VE::F32RegClass);
+  addRegisterClass(MVT::f64, &VE::I64RegClass);
+  addRegisterClass(MVT::f128, &VE::F128RegClass);
+
+  if (Subtarget->enableVPU()) {
+    for (MVT VecVT : AllVectorVTs)
+      addRegisterClass(VecVT, &VE::V64RegClass);
+    addRegisterClass(MVT::v256i1, &VE::VMRegClass);
+    addRegisterClass(MVT::v512i1, &VE::VM512RegClass);
+  }
+}
+
+void VETargetLowering::initSPUActions() {
+  const auto &TM = getTargetMachine();
+  /// Load & Store {
+
+  // VE doesn't have i1 sign extending load.
+  for (MVT VT : MVT::integer_valuetypes()) {
+    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
+    setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
+    setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
+    setTruncStoreAction(VT, MVT::i1, Expand);
+  }
+
+  // VE doesn't have floating point extload/truncstore, so expand them.
+  for (MVT FPVT : MVT::fp_valuetypes()) {
+    for (MVT OtherFPVT : MVT::fp_valuetypes()) {
+      setLoadExtAction(ISD::EXTLOAD, FPVT, OtherFPVT, Expand);
+      setTruncStoreAction(FPVT, OtherFPVT, Expand);
+    }
+  }
+
+  // VE doesn't have fp128 load/store, so expand them in custom lower.
+  setOperationAction(ISD::LOAD, MVT::f128, Custom);
+  setOperationAction(ISD::STORE, MVT::f128, Custom);
+
+  /// } Load & Store
+
+  // Custom legalize address nodes into LO/HI parts.
+  MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
+  setOperationAction(ISD::BlockAddress, PtrVT, Custom);
+  setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
+  setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
+  setOperationAction(ISD::ConstantPool, PtrVT, Custom);
+  setOperationAction(ISD::JumpTable, PtrVT, Custom);
+
+  /// VAARG handling {
+  setOperationAction(ISD::VASTART, MVT::Other, Custom);
+  // VAARG needs to be lowered to access with 8 bytes alignment.
+  setOperationAction(ISD::VAARG, MVT::Other, Custom);
+  // Use the default implementation.
+  setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+  setOperationAction(ISD::VAEND, MVT::Other, Expand);
+  /// } VAARG handling
+
+  /// Stack {
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
+
+  // Use the default implementation.
+  setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+  setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+  /// } Stack
+
+  /// Branch {
+
+  // VE doesn't have BRCOND
+  setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+
+  // BR_JT is not implemented yet.
+  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+
+  /// } Branch
+
+  /// Int Ops {
+  for (MVT IntVT : {MVT::i32, MVT::i64}) {
+    // VE has no REM or DIVREM operations.
+    setOperationAction(ISD::UREM, IntVT, Expand);
+    setOperationAction(ISD::SREM, IntVT, Expand);
+    setOperationAction(ISD::SDIVREM, IntVT, Expand);
+    setOperationAction(ISD::UDIVREM, IntVT, Expand);
+
+    // VE has no SHL_PARTS/SRA_PARTS/SRL_PARTS operations.
+    setOperationAction(ISD::SHL_PARTS, IntVT, Expand);
+    setOperationAction(ISD::SRA_PARTS, IntVT, Expand);
+    setOperationAction(ISD::SRL_PARTS, IntVT, Expand);
+
+    // VE has no MULHU/S or U/SMUL_LOHI operations.
+    // TODO: Use MPD instruction to implement SMUL_LOHI for i32 type.
+    setOperationAction(ISD::MULHU, IntVT, Expand);
+    setOperationAction(ISD::MULHS, IntVT, Expand);
+    setOperationAction(ISD::UMUL_LOHI, IntVT, Expand);
+    setOperationAction(ISD::SMUL_LOHI, IntVT, Expand);
+
+    // VE has no CTTZ, ROTL, ROTR operations.
+    setOperationAction(ISD::CTTZ, IntVT, Expand);
+    setOperationAction(ISD::ROTL, IntVT, Expand);
+    setOperationAction(ISD::ROTR, IntVT, Expand);
+
+    // VE has 64 bits instruction which works as i64 BSWAP operation.  This
+    // instruction works fine as i32 BSWAP operation with an additional
+    // parameter.  Use isel patterns to lower BSWAP.
+    setOperationAction(ISD::BSWAP, IntVT, Legal);
+
+    // VE has only 64 bits instructions which work as i64 BITREVERSE/CTLZ/CTPOP
+    // operations.  Use isel patterns for i64, promote for i32.
+    LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal;
+    setOperationAction(ISD::BITREVERSE, IntVT, Act);
+    setOperationAction(ISD::CTLZ, IntVT, Act);
+    setOperationAction(ISD::CTLZ_ZERO_UNDEF, IntVT, Act);
+    setOperationAction(ISD::CTPOP, IntVT, Act);
+
+    // VE has only 64 bits instructions which work as i64 AND/OR/XOR operations.
+    // Use isel patterns for i64, promote for i32.
+    setOperationAction(ISD::AND, IntVT, Act);
+    setOperationAction(ISD::OR, IntVT, Act);
+    setOperationAction(ISD::XOR, IntVT, Act);
+  }
+  /// } Int Ops
+
+  /// Conversion {
+  // VE doesn't have instructions for fp<->uint, so expand them by llvm
+  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64
+  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64
+  setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+
+  // fp16 not supported
+  for (MVT FPVT : MVT::fp_valuetypes()) {
+    setOperationAction(ISD::FP16_TO_FP, FPVT, Expand);
+    setOperationAction(ISD::FP_TO_FP16, FPVT, Expand);
+  }
+  /// } Conversion
+
+  /// Floating-point Ops {
+  /// Note: Floating-point operations are fneg, fadd, fsub, fmul, fdiv, frem,
+  ///       and fcmp.
+
+  // VE doesn't have following floating point operations.
+  for (MVT VT : MVT::fp_valuetypes()) {
+    setOperationAction(ISD::FNEG, VT, Expand);
+    setOperationAction(ISD::FREM, VT, Expand);
+  }
+
+  // VE doesn't have fdiv of f128.
+  setOperationAction(ISD::FDIV, MVT::f128, Expand);
+
+  for (MVT FPVT : {MVT::f32, MVT::f64}) {
+    // f32 and f64 uses ConstantFP.  f128 uses ConstantPool.
+    setOperationAction(ISD::ConstantFP, FPVT, Legal);
+  }
+  /// } Floating-point Ops
+
+  /// Floating-point math functions {
+
+  // VE doesn't have following floating point math functions.
+  for (MVT VT : MVT::fp_valuetypes()) {
+    setOperationAction(ISD::FABS, VT, Expand);
+    setOperationAction(ISD::FCOPYSIGN, VT, Expand);
+    setOperationAction(ISD::FCOS, VT, Expand);
+    setOperationAction(ISD::FSIN, VT, Expand);
+    setOperationAction(ISD::FSQRT, VT, Expand);
+  }
+
+  /// } Floating-point math functions
+
+  /// Atomic instructions {
+
+  setMaxAtomicSizeInBitsSupported(64);
+  setMinCmpXchgSizeInBits(32);
+  setSupportsUnalignedAtomics(false);
+
+  // Use custom inserter for ATOMIC_FENCE.
+  setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+
+  // Other atomic instructions.
+  for (MVT VT : MVT::integer_valuetypes()) {
+    // Support i8/i16 atomic swap.
+    setOperationAction(ISD::ATOMIC_SWAP, VT, Custom);
+
+    // FIXME: Support "atmam" instructions.
+    setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Expand);
+
+    // VE doesn't have follwing instructions.
+    setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_CLR, VT, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Expand);
+  }
+
+  /// } Atomic instructions
+
+  /// SJLJ instructions {
+  setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
+  setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+  setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
+  if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
+    setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
+  /// } SJLJ instructions
+
+  // Intrinsic instructions
+  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+}
+
+void VETargetLowering::initVPUActions() {
+  for (MVT LegalVecVT : AllVectorVTs) {
+    setOperationAction(ISD::BUILD_VECTOR, LegalVecVT, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT, LegalVecVT, Legal);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalVecVT, Legal);
+    // Translate all vector instructions with legal element types to VVP_*
+    // nodes.
+    // TODO We will custom-widen into VVP_* nodes in the future. While we are
+    // buildling the infrastructure for this, we only do this for legal vector
+    // VTs.
+#define HANDLE_VP_TO_VVP(VP_OPC, VVP_NAME)                                     \
+  setOperationAction(ISD::VP_OPC, LegalVecVT, Custom);
+#define ADD_VVP_OP(VVP_NAME, ISD_NAME)                                         \
+  setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom);
+#include "VVPNodes.def"
+  }
+
+  for (MVT LegalPackedVT : AllPackedVTs) {
+    setOperationAction(ISD::INSERT_VECTOR_ELT, LegalPackedVT, Custom);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalPackedVT, Custom);
+  }
+}
+
 SDValue
 VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
                               bool IsVarArg,
@@ -85,7 +328,7 @@ VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
                  *DAG.getContext());
 
   // Analyze return values.
-  CCInfo.AnalyzeReturn(Outs, RetCC_VE);
+  CCInfo.AnalyzeReturn(Outs, getReturnCC(CallConv));
 
   SDValue Flag;
   SmallVector<SDValue, 4> RetOps(1, Chain);
@@ -94,6 +337,7 @@ VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
   for (unsigned i = 0; i != RVLocs.size(); ++i) {
     CCValAssign &VA = RVLocs[i];
     assert(VA.isRegLoc() && "Can only return in registers!");
+    assert(!VA.needsCustom() && "Unexpected custom lowering");
     SDValue OutVal = OutVals[i];
 
     // Integer return values must be sign or zero extended by the callee.
@@ -109,12 +353,26 @@ VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
     case CCValAssign::AExt:
       OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal);
       break;
+    case CCValAssign::BCvt: {
+      // Convert a float return value to i64 with padding.
+      //     63     31   0
+      //    +------+------+
+      //    | float|   0  |
+      //    +------+------+
+      assert(VA.getLocVT() == MVT::i64);
+      assert(VA.getValVT() == MVT::f32);
+      SDValue Undef = SDValue(
+          DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0);
+      SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
+      OutVal = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
+                                          MVT::i64, Undef, OutVal, Sub_f32),
+                       0);
+      break;
+    }
     default:
       llvm_unreachable("Unknown loc info!");
     }
 
-    assert(!VA.needsCustom() && "Unexpected custom lowering");
-
     Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag);
 
     // Guarantee that all emitted copies are stuck together with flags.
@@ -138,7 +396,7 @@ SDValue VETargetLowering::LowerFormalArguments(
   MachineFunction &MF = DAG.getMachineFunction();
 
   // Get the base offset of the incoming arguments stack space.
-  unsigned ArgsBaseOffset = 176;
+  unsigned ArgsBaseOffset = Subtarget->getRsaSize();
   // Get the size of the preserved arguments area
   unsigned ArgsPreserved = 64;
 
@@ -150,10 +408,11 @@ SDValue VETargetLowering::LowerFormalArguments(
   CCInfo.AllocateStack(ArgsPreserved, Align(8));
   // We already allocated the preserved area, so the stack offset computed
   // by CC_VE would be correct now.
-  CCInfo.AnalyzeFormalArguments(Ins, CC_VE);
+  CCInfo.AnalyzeFormalArguments(Ins, getParamCC(CallConv, false));
 
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     CCValAssign &VA = ArgLocs[i];
+    assert(!VA.needsCustom() && "Unexpected custom lowering");
     if (VA.isRegLoc()) {
       // This argument is passed in a register.
       // All integer register arguments are promoted by the caller to i64.
@@ -163,11 +422,6 @@ SDValue VETargetLowering::LowerFormalArguments(
           MF.addLiveIn(VA.getLocReg(), getRegClassFor(VA.getLocVT()));
       SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT());
 
-      // Get the high bits for i32 struct elements.
-      if (VA.getValVT() == MVT::i32 && VA.needsCustom())
-        Arg = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Arg,
-                          DAG.getConstant(32, DL, MVT::i32));
-
       // The caller promoted the argument, so insert an Assert?ext SDNode so we
       // won't promote the value again in this function.
       switch (VA.getLocInfo()) {
@@ -179,6 +433,20 @@ SDValue VETargetLowering::LowerFormalArguments(
         Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg,
                           DAG.getValueType(VA.getValVT()));
         break;
+      case CCValAssign::BCvt: {
+        // Extract a float argument from i64 with padding.
+        //     63     31   0
+        //    +------+------+
+        //    | float|   0  |
+        //    +------+------+
+        assert(VA.getLocVT() == MVT::i64);
+        assert(VA.getValVT() == MVT::f32);
+        SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
+        Arg = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
+                                         MVT::f32, Arg, Sub_f32),
+                      0);
+        break;
+      }
       default:
         break;
       }
@@ -194,9 +462,23 @@ SDValue VETargetLowering::LowerFormalArguments(
     // The registers are exhausted. This argument was passed on the stack.
     assert(VA.isMemLoc());
     // The CC_VE_Full/Half functions compute stack offsets relative to the
-    // beginning of the arguments area at %fp+176.
+    // beginning of the arguments area at %fp + the size of reserved area.
     unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset;
     unsigned ValSize = VA.getValVT().getSizeInBits() / 8;
+
+    // Adjust offset for a float argument by adding 4 since the argument is
+    // stored in 8 bytes buffer with offset like below.  LLVM generates
+    // 4 bytes load instruction, so need to adjust offset here.  This
+    // adjustment is required in only LowerFormalArguments.  In LowerCall,
+    // a float argument is converted to i64 first, and stored as 8 bytes
+    // data, which is required by ABI, so no need for adjustment.
+    //    0      4
+    //    +------+------+
+    //    | empty| float|
+    //    +------+------+
+    if (VA.getValVT() == MVT::f32)
+      Offset += 4;
+
     int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true);
     InVals.push_back(
         DAG.getLoad(VA.getValVT(), DL, Chain,
@@ -215,7 +497,7 @@ SDValue VETargetLowering::LowerFormalArguments(
   // TODO: need to calculate offset correctly once we support f128.
   unsigned ArgOffset = ArgLocs.size() * 8;
   VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
-  // Skip the 176 bytes of register save area.
+  // Skip the reserved area at the top of stack.
   FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset);
 
   return Chain;
@@ -258,7 +540,7 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   CLI.IsTailCall = false;
 
   // Get the base offset of the outgoing arguments stack space.
-  unsigned ArgsBaseOffset = 176;
+  unsigned ArgsBaseOffset = Subtarget->getRsaSize();
   // Get the size of the preserved arguments area
   unsigned ArgsPreserved = 8 * 8u;
 
@@ -270,7 +552,7 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   CCInfo.AllocateStack(ArgsPreserved, Align(8));
   // We already allocated the preserved area, so the stack offset computed
   // by CC_VE would be correct now.
-  CCInfo.AnalyzeCallOperands(CLI.Outs, CC_VE);
+  CCInfo.AnalyzeCallOperands(CLI.Outs, getParamCC(CLI.CallConv, false));
 
   // VE requires to use both register and stack for varargs or no-prototyped
   // functions.
@@ -281,7 +563,7 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
                   ArgLocs2, *DAG.getContext());
   if (UseBoth)
-    CCInfo2.AnalyzeCallOperands(CLI.Outs, CC_VE2);
+    CCInfo2.AnalyzeCallOperands(CLI.Outs, getParamCC(CLI.CallConv, true));
 
   // Get the size of the outgoing arguments stack space requirement.
   unsigned ArgsSize = CCInfo.getNextStackOffset();
@@ -371,6 +653,22 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     case CCValAssign::AExt:
       Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
       break;
+    case CCValAssign::BCvt: {
+      // Convert a float argument to i64 with padding.
+      //     63     31   0
+      //    +------+------+
+      //    | float|   0  |
+      //    +------+------+
+      assert(VA.getLocVT() == MVT::i64);
+      assert(VA.getValVT() == MVT::f32);
+      SDValue Undef = SDValue(
+          DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0);
+      SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
+      Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
+                                       MVT::i64, Undef, Arg, Sub_f32),
+                    0);
+      break;
+    }
     }
 
     if (VA.isRegLoc()) {
@@ -384,8 +682,7 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
     // Create a store off the stack pointer for this argument.
     SDValue StackPtr = DAG.getRegister(VE::SX11, PtrVT);
-    // The argument area starts at %fp+176 in the callee frame,
-    // %sp+176 in ours.
+    // The argument area starts at %fp/%sp + the size of reserved area.
     SDValue PtrOff =
         DAG.getIntPtrConstant(VA.getLocMemOffset() + ArgsBaseOffset, DL);
     PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
@@ -450,11 +747,12 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CB)
     CLI.Ins[0].Flags.setInReg();
 
-  RVInfo.AnalyzeCallResult(CLI.Ins, RetCC_VE);
+  RVInfo.AnalyzeCallResult(CLI.Ins, getReturnCC(CLI.CallConv));
 
   // Copy all of the result registers out of their specified physreg.
   for (unsigned i = 0; i != RVLocs.size(); ++i) {
     CCValAssign &VA = RVLocs[i];
+    assert(!VA.needsCustom() && "Unexpected custom lowering");
     unsigned Reg = VA.getLocReg();
 
     // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
@@ -472,11 +770,6 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       InGlue = Chain.getValue(2);
     }
 
-    // Get the high bits for i32 struct elements.
-    if (VA.getValVT() == MVT::i32 && VA.needsCustom())
-      RV = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), RV,
-                       DAG.getConstant(32, DL, MVT::i32));
-
     // The callee promoted the return value, so insert an Assert?ext SDNode so
     // we won't promote the value again in this function.
     switch (VA.getLocInfo()) {
@@ -488,6 +781,20 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV,
                        DAG.getValueType(VA.getValVT()));
       break;
+    case CCValAssign::BCvt: {
+      // Extract a float return value from i64 with padding.
+      //     63     31   0
+      //    +------+------+
+      //    | float|   0  |
+      //    +------+------+
+      assert(VA.getLocVT() == MVT::i64);
+      assert(VA.getValVT() == MVT::f32);
+      SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
+      RV = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
+                                      MVT::f32, RV, Sub_f32),
+                   0);
+      break;
+    }
     default:
       break;
     }
@@ -502,6 +809,15 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   return Chain;
 }
 
+bool VETargetLowering::isOffsetFoldingLegal(
+    const GlobalAddressSDNode *GA) const {
+  // VE uses 64 bit addressing, so we need multiple instructions to generate
+  // an address.  Folding address with offset increases the number of
+  // instructions, so that we disable it here.  Offsets will be folded in
+  // the DAG combine later if it worth to do so.
+  return false;
+}
+
 /// isFPImmLegal - Returns true if the target can instruction select the
 /// specified FP immediate natively. If false, the legalizer will
 /// materialize the FP immediate as a load from a constant pool.
@@ -531,30 +847,6 @@ bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
   return true;
 }
 
-bool VETargetLowering::hasAndNot(SDValue Y) const {
-  EVT VT = Y.getValueType();
-
-  // VE doesn't have vector and not instruction.
-  if (VT.isVector())
-    return false;
-
-  // VE allows different immediate values for X and Y where ~X & Y.
-  // Only simm7 works for X, and only mimm works for Y on VE.  However, this
-  // function is used to check whether an immediate value is OK for and-not
-  // instruction as both X and Y.  Generating additional instruction to
-  // retrieve an immediate value is no good since the purpose of this
-  // function is to convert a series of 3 instructions to another series of
-  // 3 instructions with better parallelism.  Therefore, we return false
-  // for all immediate values now.
-  // FIXME: Change hasAndNot function to have two operands to make it work
-  //        correctly with Aurora VE.
-  if (isa<ConstantSDNode>(Y))
-    return false;
-
-  // It's ok for generic registers.
-  return true;
-}
-
 VETargetLowering::VETargetLowering(const TargetMachine &TM,
                                    const VESubtarget &STI)
     : TargetLowering(TM), Subtarget(&STI) {
@@ -566,91 +858,15 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM,
   setBooleanContents(ZeroOrOneBooleanContent);
   setBooleanVectorContents(ZeroOrOneBooleanContent);
 
-  // Set up the register classes.
-  addRegisterClass(MVT::i32, &VE::I32RegClass);
-  addRegisterClass(MVT::i64, &VE::I64RegClass);
-  addRegisterClass(MVT::f32, &VE::F32RegClass);
-  addRegisterClass(MVT::f64, &VE::I64RegClass);
-
-  /// Load & Store {
-  for (MVT FPVT : MVT::fp_valuetypes()) {
-    for (MVT OtherFPVT : MVT::fp_valuetypes()) {
-      // Turn FP extload into load/fpextend
-      setLoadExtAction(ISD::EXTLOAD, FPVT, OtherFPVT, Expand);
-
-      // Turn FP truncstore into trunc + store.
-      setTruncStoreAction(FPVT, OtherFPVT, Expand);
-    }
-  }
-
-  // VE doesn't have i1 sign extending load
-  for (MVT VT : MVT::integer_valuetypes()) {
-    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
-    setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
-    setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
-    setTruncStoreAction(VT, MVT::i1, Expand);
-  }
-  /// } Load & Store
-
-  // Custom legalize address nodes into LO/HI parts.
-  MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
-  setOperationAction(ISD::BlockAddress, PtrVT, Custom);
-  setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
-  setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
-
-  /// VAARG handling {
-  setOperationAction(ISD::VASTART, MVT::Other, Custom);
-  // VAARG needs to be lowered to access with 8 bytes alignment.
-  setOperationAction(ISD::VAARG, MVT::Other, Custom);
-  // Use the default implementation.
-  setOperationAction(ISD::VACOPY, MVT::Other, Expand);
-  setOperationAction(ISD::VAEND, MVT::Other, Expand);
-  /// } VAARG handling
-
-  /// Stack {
-  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
-  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
-  /// } Stack
-
-  /// Int Ops {
-  for (MVT IntVT : {MVT::i32, MVT::i64}) {
-    // VE has no REM or DIVREM operations.
-    setOperationAction(ISD::UREM, IntVT, Expand);
-    setOperationAction(ISD::SREM, IntVT, Expand);
-    setOperationAction(ISD::SDIVREM, IntVT, Expand);
-    setOperationAction(ISD::UDIVREM, IntVT, Expand);
-
-    setOperationAction(ISD::CTTZ, IntVT, Expand);
-    setOperationAction(ISD::ROTL, IntVT, Expand);
-    setOperationAction(ISD::ROTR, IntVT, Expand);
-
-    // Use isel patterns for i32 and i64
-    setOperationAction(ISD::BSWAP, IntVT, Legal);
-    setOperationAction(ISD::CTLZ, IntVT, Legal);
-    setOperationAction(ISD::CTPOP, IntVT, Legal);
-
-    // Use isel patterns for i64, Promote i32
-    LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal;
-    setOperationAction(ISD::BITREVERSE, IntVT, Act);
-  }
-  /// } Int Ops
-
-  /// Conversion {
-  // VE doesn't have instructions for fp<->uint, so expand them by llvm
-  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64
-  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64
-  setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
-  setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
-
-  // fp16 not supported
-  for (MVT FPVT : MVT::fp_valuetypes()) {
-    setOperationAction(ISD::FP16_TO_FP, FPVT, Expand);
-    setOperationAction(ISD::FP_TO_FP16, FPVT, Expand);
-  }
-  /// } Conversion
+  initRegisterClasses();
+  initSPUActions();
+  initVPUActions();
 
   setStackPointerRegisterToSaveRestore(VE::SX11);
 
+  // We have target-specific dag combine patterns for the following nodes:
+  setTargetDAGCombine(ISD::TRUNCATE);
+
   // Set function alignment to 16 bytes
   setMinFunctionAlignment(Align(16));
 
@@ -667,14 +883,24 @@ const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch ((VEISD::NodeType)Opcode) {
   case VEISD::FIRST_NUMBER:
     break;
-    TARGET_NODE_CASE(Lo)
-    TARGET_NODE_CASE(Hi)
+    TARGET_NODE_CASE(CALL)
+    TARGET_NODE_CASE(EH_SJLJ_LONGJMP)
+    TARGET_NODE_CASE(EH_SJLJ_SETJMP)
+    TARGET_NODE_CASE(EH_SJLJ_SETUP_DISPATCH)
     TARGET_NODE_CASE(GETFUNPLT)
     TARGET_NODE_CASE(GETSTACKTOP)
     TARGET_NODE_CASE(GETTLSADDR)
-    TARGET_NODE_CASE(CALL)
-    TARGET_NODE_CASE(RET_FLAG)
     TARGET_NODE_CASE(GLOBAL_BASE_REG)
+    TARGET_NODE_CASE(Hi)
+    TARGET_NODE_CASE(Lo)
+    TARGET_NODE_CASE(MEMBARRIER)
+    TARGET_NODE_CASE(RET_FLAG)
+    TARGET_NODE_CASE(TS1AM)
+    TARGET_NODE_CASE(VEC_BROADCAST)
+
+    // Register the VVP_* SDNodes.
+#define ADD_VVP_OP(VVP_NAME, ...) TARGET_NODE_CASE(VVP_NAME)
+#include "VVPNodes.def"
   }
 #undef TARGET_NODE_CASE
   return nullptr;
@@ -696,10 +922,17 @@ SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF,
     return DAG.getTargetBlockAddress(BA->getBlockAddress(), Op.getValueType(),
                                      0, TF);
 
+  if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op))
+    return DAG.getTargetConstantPool(CP->getConstVal(), CP->getValueType(0),
+                                     CP->getAlign(), CP->getOffset(), TF);
+
   if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op))
     return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
                                        TF);
 
+  if (const JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op))
+    return DAG.getTargetJumpTable(JT->getIndex(), JT->getValueType(0), TF);
+
   llvm_unreachable("Unhandled address SDNode");
 }
 
@@ -722,32 +955,24 @@ SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
 
   // Handle PIC mode first. VE needs a got load for every variable!
   if (isPositionIndependent()) {
-    // GLOBAL_BASE_REG codegen'ed with call. Inform MFI that this
-    // function has calls.
-    MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
-    MFI.setHasCalls(true);
     auto GlobalN = dyn_cast<GlobalAddressSDNode>(Op);
 
-    if (isa<ConstantPoolSDNode>(Op) ||
+    if (isa<ConstantPoolSDNode>(Op) || isa<JumpTableSDNode>(Op) ||
         (GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) {
       // Create following instructions for local linkage PIC code.
-      //     lea %s35, %gotoff_lo(.LCPI0_0)
-      //     and %s35, %s35, (32)0
-      //     lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35)
-      //     adds.l %s35, %s15, %s35                  ; %s15 is GOT
-      // FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15)
+      //     lea %reg, label@gotoff_lo
+      //     and %reg, %reg, (32)0
+      //     lea.sl %reg, label@gotoff_hi(%reg, %got)
       SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
                                   VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
       SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
       return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
     }
     // Create following instructions for not local linkage PIC code.
-    //     lea %s35, %got_lo(.LCPI0_0)
-    //     and %s35, %s35, (32)0
-    //     lea.sl %s35, %got_hi(.LCPI0_0)(%s35)
-    //     adds.l %s35, %s15, %s35                  ; %s15 is GOT
-    //     ld     %s35, (,%s35)
-    // FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15)
+    //     lea %reg, label@got_lo
+    //     and %reg, %reg, (32)0
+    //     lea.sl %reg, label@got_hi(%reg)
+    //     ld %reg, (%reg, %got)
     SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOT_HI32,
                                 VEMCExpr::VK_VE_GOT_LO32, DAG);
     SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
@@ -770,20 +995,222 @@ SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
 
 /// Custom Lower {
 
-SDValue VETargetLowering::LowerGlobalAddress(SDValue Op,
+// The mappings for emitLeading/TrailingFence for VE is designed by following
+// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
+Instruction *VETargetLowering::emitLeadingFence(IRBuilder<> &Builder,
+                                                Instruction *Inst,
+                                                AtomicOrdering Ord) const {
+  switch (Ord) {
+  case AtomicOrdering::NotAtomic:
+  case AtomicOrdering::Unordered:
+    llvm_unreachable("Invalid fence: unordered/non-atomic");
+  case AtomicOrdering::Monotonic:
+  case AtomicOrdering::Acquire:
+    return nullptr; // Nothing to do
+  case AtomicOrdering::Release:
+  case AtomicOrdering::AcquireRelease:
+    return Builder.CreateFence(AtomicOrdering::Release);
+  case AtomicOrdering::SequentiallyConsistent:
+    if (!Inst->hasAtomicStore())
+      return nullptr; // Nothing to do
+    return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
+  }
+  llvm_unreachable("Unknown fence ordering in emitLeadingFence");
+}
+
+Instruction *VETargetLowering::emitTrailingFence(IRBuilder<> &Builder,
+                                                 Instruction *Inst,
+                                                 AtomicOrdering Ord) const {
+  switch (Ord) {
+  case AtomicOrdering::NotAtomic:
+  case AtomicOrdering::Unordered:
+    llvm_unreachable("Invalid fence: unordered/not-atomic");
+  case AtomicOrdering::Monotonic:
+  case AtomicOrdering::Release:
+    return nullptr; // Nothing to do
+  case AtomicOrdering::Acquire:
+  case AtomicOrdering::AcquireRelease:
+    return Builder.CreateFence(AtomicOrdering::Acquire);
+  case AtomicOrdering::SequentiallyConsistent:
+    return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
+  }
+  llvm_unreachable("Unknown fence ordering in emitTrailingFence");
+}
+
+SDValue VETargetLowering::lowerATOMIC_FENCE(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
+      cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
+  SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
+      cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
+
+  // VE uses Release consistency, so need a fence instruction if it is a
+  // cross-thread fence.
+  if (FenceSSID == SyncScope::System) {
+    switch (FenceOrdering) {
+    case AtomicOrdering::NotAtomic:
+    case AtomicOrdering::Unordered:
+    case AtomicOrdering::Monotonic:
+      // No need to generate fencem instruction here.
+      break;
+    case AtomicOrdering::Acquire:
+      // Generate "fencem 2" as acquire fence.
+      return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
+                                        DAG.getTargetConstant(2, DL, MVT::i32),
+                                        Op.getOperand(0)),
+                     0);
+    case AtomicOrdering::Release:
+      // Generate "fencem 1" as release fence.
+      return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
+                                        DAG.getTargetConstant(1, DL, MVT::i32),
+                                        Op.getOperand(0)),
+                     0);
+    case AtomicOrdering::AcquireRelease:
+    case AtomicOrdering::SequentiallyConsistent:
+      // Generate "fencem 3" as acq_rel and seq_cst fence.
+      // FIXME: "fencem 3" doesn't wait for for PCIe deveices accesses,
+      //        so  seq_cst may require more instruction for them.
+      return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
+                                        DAG.getTargetConstant(3, DL, MVT::i32),
+                                        Op.getOperand(0)),
+                     0);
+    }
+  }
+
+  // MEMBARRIER is a compiler barrier; it codegens to a no-op.
+  return DAG.getNode(VEISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
+}
+
+TargetLowering::AtomicExpansionKind
+VETargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+  // We have TS1AM implementation for i8/i16/i32/i64, so use it.
+  if (AI->getOperation() == AtomicRMWInst::Xchg) {
+    return AtomicExpansionKind::None;
+  }
+  // FIXME: Support "ATMAM" instruction for LOAD_ADD/SUB/AND/OR.
+
+  // Otherwise, expand it using compare and exchange instruction to not call
+  // __sync_fetch_and_* functions.
+  return AtomicExpansionKind::CmpXChg;
+}
+
+static SDValue prepareTS1AM(SDValue Op, SelectionDAG &DAG, SDValue &Flag,
+                            SDValue &Bits) {
+  SDLoc DL(Op);
+  AtomicSDNode *N = cast<AtomicSDNode>(Op);
+  SDValue Ptr = N->getOperand(1);
+  SDValue Val = N->getOperand(2);
+  EVT PtrVT = Ptr.getValueType();
+  bool Byte = N->getMemoryVT() == MVT::i8;
+  //   Remainder = AND Ptr, 3
+  //   Flag = 1 << Remainder  ; If Byte is true (1 byte swap flag)
+  //   Flag = 3 << Remainder  ; If Byte is false (2 bytes swap flag)
+  //   Bits = Remainder << 3
+  //   NewVal = Val << Bits
+  SDValue Const3 = DAG.getConstant(3, DL, PtrVT);
+  SDValue Remainder = DAG.getNode(ISD::AND, DL, PtrVT, {Ptr, Const3});
+  SDValue Mask = Byte ? DAG.getConstant(1, DL, MVT::i32)
+                      : DAG.getConstant(3, DL, MVT::i32);
+  Flag = DAG.getNode(ISD::SHL, DL, MVT::i32, {Mask, Remainder});
+  Bits = DAG.getNode(ISD::SHL, DL, PtrVT, {Remainder, Const3});
+  return DAG.getNode(ISD::SHL, DL, Val.getValueType(), {Val, Bits});
+}
+
+static SDValue finalizeTS1AM(SDValue Op, SelectionDAG &DAG, SDValue Data,
+                             SDValue Bits) {
+  SDLoc DL(Op);
+  EVT VT = Data.getValueType();
+  bool Byte = cast<AtomicSDNode>(Op)->getMemoryVT() == MVT::i8;
+  //   NewData = Data >> Bits
+  //   Result = NewData & 0xff   ; If Byte is true (1 byte)
+  //   Result = NewData & 0xffff ; If Byte is false (2 bytes)
+
+  SDValue NewData = DAG.getNode(ISD::SRL, DL, VT, Data, Bits);
+  return DAG.getNode(ISD::AND, DL, VT,
+                     {NewData, DAG.getConstant(Byte ? 0xff : 0xffff, DL, VT)});
+}
+
+SDValue VETargetLowering::lowerATOMIC_SWAP(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  AtomicSDNode *N = cast<AtomicSDNode>(Op);
+
+  if (N->getMemoryVT() == MVT::i8) {
+    // For i8, use "ts1am"
+    //   Input:
+    //     ATOMIC_SWAP Ptr, Val, Order
+    //
+    //   Output:
+    //     Remainder = AND Ptr, 3
+    //     Flag = 1 << Remainder   ; 1 byte swap flag for TS1AM inst.
+    //     Bits = Remainder << 3
+    //     NewVal = Val << Bits
+    //
+    //     Aligned = AND Ptr, -4
+    //     Data = TS1AM Aligned, Flag, NewVal
+    //
+    //     NewData = Data >> Bits
+    //     Result = NewData & 0xff ; 1 byte result
+    SDValue Flag;
+    SDValue Bits;
+    SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
+
+    SDValue Ptr = N->getOperand(1);
+    SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(),
+                                  {Ptr, DAG.getConstant(-4, DL, MVT::i64)});
+    SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(),
+                                  DAG.getVTList(Op.getNode()->getValueType(0),
+                                                Op.getNode()->getValueType(1)),
+                                  {N->getChain(), Aligned, Flag, NewVal},
+                                  N->getMemOperand());
+
+    SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits);
+    SDValue Chain = TS1AM.getValue(1);
+    return DAG.getMergeValues({Result, Chain}, DL);
+  }
+  if (N->getMemoryVT() == MVT::i16) {
+    // For i16, use "ts1am"
+    SDValue Flag;
+    SDValue Bits;
+    SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
+
+    SDValue Ptr = N->getOperand(1);
+    SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(),
+                                  {Ptr, DAG.getConstant(-4, DL, MVT::i64)});
+    SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(),
+                                  DAG.getVTList(Op.getNode()->getValueType(0),
+                                                Op.getNode()->getValueType(1)),
+                                  {N->getChain(), Aligned, Flag, NewVal},
+                                  N->getMemOperand());
+
+    SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits);
+    SDValue Chain = TS1AM.getValue(1);
+    return DAG.getMergeValues({Result, Chain}, DL);
+  }
+  // Otherwise, let llvm legalize it.
+  return Op;
+}
+
+SDValue VETargetLowering::lowerGlobalAddress(SDValue Op,
                                              SelectionDAG &DAG) const {
   return makeAddress(Op, DAG);
 }
 
-SDValue VETargetLowering::LowerBlockAddress(SDValue Op,
+SDValue VETargetLowering::lowerBlockAddress(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  return makeAddress(Op, DAG);
+}
+
+SDValue VETargetLowering::lowerConstantPool(SDValue Op,
                                             SelectionDAG &DAG) const {
   return makeAddress(Op, DAG);
 }
 
 SDValue
-VETargetLowering::LowerToTLSGeneralDynamicModel(SDValue Op,
+VETargetLowering::lowerToTLSGeneralDynamicModel(SDValue Op,
                                                 SelectionDAG &DAG) const {
-  SDLoc dl(Op);
+  SDLoc DL(Op);
 
   // Generate the following code:
   //   t1: ch,glue = callseq_start t0, 0, 0
@@ -799,13 +1226,13 @@ VETargetLowering::LowerToTLSGeneralDynamicModel(SDValue Op,
   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
   const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask(
       DAG.getMachineFunction(), CallingConv::C);
-  Chain = DAG.getCALLSEQ_START(Chain, 64, 0, dl);
+  Chain = DAG.getCALLSEQ_START(Chain, 64, 0, DL);
   SDValue Args[] = {Chain, Label, DAG.getRegisterMask(Mask), Chain.getValue(1)};
-  Chain = DAG.getNode(VEISD::GETTLSADDR, dl, NodeTys, Args);
-  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(64, dl, true),
-                             DAG.getIntPtrConstant(0, dl, true),
-                             Chain.getValue(1), dl);
-  Chain = DAG.getCopyFromReg(Chain, dl, VE::SX0, PtrVT, Chain.getValue(1));
+  Chain = DAG.getNode(VEISD::GETTLSADDR, DL, NodeTys, Args);
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(64, DL, true),
+                             DAG.getIntPtrConstant(0, DL, true),
+                             Chain.getValue(1), DL);
+  Chain = DAG.getCopyFromReg(Chain, DL, VE::SX0, PtrVT, Chain.getValue(1));
 
   // GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls.
   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
@@ -820,17 +1247,133 @@ VETargetLowering::LowerToTLSGeneralDynamicModel(SDValue Op,
   return Chain;
 }
 
-SDValue VETargetLowering::LowerGlobalTLSAddress(SDValue Op,
+SDValue VETargetLowering::lowerGlobalTLSAddress(SDValue Op,
                                                 SelectionDAG &DAG) const {
   // The current implementation of nld (2.26) doesn't allow local exec model
   // code described in VE-tls_v1.1.pdf (*1) as its input. Instead, we always
   // generate the general dynamic model code sequence.
   //
   // *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf
-  return LowerToTLSGeneralDynamicModel(Op, DAG);
+  return lowerToTLSGeneralDynamicModel(Op, DAG);
+}
+
+SDValue VETargetLowering::lowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+  return makeAddress(Op, DAG);
+}
+
+// Lower a f128 load into two f64 loads.
+static SDValue lowerLoadF128(SDValue Op, SelectionDAG &DAG) {
+  SDLoc DL(Op);
+  LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Op.getNode());
+  assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
+  unsigned Alignment = LdNode->getAlign().value();
+  if (Alignment > 8)
+    Alignment = 8;
+
+  SDValue Lo64 =
+      DAG.getLoad(MVT::f64, DL, LdNode->getChain(), LdNode->getBasePtr(),
+                  LdNode->getPointerInfo(), Alignment,
+                  LdNode->isVolatile() ? MachineMemOperand::MOVolatile
+                                       : MachineMemOperand::MONone);
+  EVT AddrVT = LdNode->getBasePtr().getValueType();
+  SDValue HiPtr = DAG.getNode(ISD::ADD, DL, AddrVT, LdNode->getBasePtr(),
+                              DAG.getConstant(8, DL, AddrVT));
+  SDValue Hi64 =
+      DAG.getLoad(MVT::f64, DL, LdNode->getChain(), HiPtr,
+                  LdNode->getPointerInfo(), Alignment,
+                  LdNode->isVolatile() ? MachineMemOperand::MOVolatile
+                                       : MachineMemOperand::MONone);
+
+  SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);
+  SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);
+
+  // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
+  SDNode *InFP128 =
+      DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f128);
+  InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128,
+                               SDValue(InFP128, 0), Hi64, SubRegEven);
+  InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128,
+                               SDValue(InFP128, 0), Lo64, SubRegOdd);
+  SDValue OutChains[2] = {SDValue(Lo64.getNode(), 1),
+                          SDValue(Hi64.getNode(), 1)};
+  SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
+  SDValue Ops[2] = {SDValue(InFP128, 0), OutChain};
+  return DAG.getMergeValues(Ops, DL);
 }
 
-SDValue VETargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+SDValue VETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+  LoadSDNode *LdNode = cast<LoadSDNode>(Op.getNode());
+
+  SDValue BasePtr = LdNode->getBasePtr();
+  if (isa<FrameIndexSDNode>(BasePtr.getNode())) {
+    // Do not expand store instruction with frame index here because of
+    // dependency problems.  We expand it later in eliminateFrameIndex().
+    return Op;
+  }
+
+  EVT MemVT = LdNode->getMemoryVT();
+  if (MemVT == MVT::f128)
+    return lowerLoadF128(Op, DAG);
+
+  return Op;
+}
+
+// Lower a f128 store into two f64 stores.
+static SDValue lowerStoreF128(SDValue Op, SelectionDAG &DAG) {
+  SDLoc DL(Op);
+  StoreSDNode *StNode = dyn_cast<StoreSDNode>(Op.getNode());
+  assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
+
+  SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);
+  SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);
+
+  SDNode *Hi64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64,
+                                    StNode->getValue(), SubRegEven);
+  SDNode *Lo64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64,
+                                    StNode->getValue(), SubRegOdd);
+
+  unsigned Alignment = StNode->getAlign().value();
+  if (Alignment > 8)
+    Alignment = 8;
+
+  // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
+  SDValue OutChains[2];
+  OutChains[0] =
+      DAG.getStore(StNode->getChain(), DL, SDValue(Lo64, 0),
+                   StNode->getBasePtr(), MachinePointerInfo(), Alignment,
+                   StNode->isVolatile() ? MachineMemOperand::MOVolatile
+                                        : MachineMemOperand::MONone);
+  EVT AddrVT = StNode->getBasePtr().getValueType();
+  SDValue HiPtr = DAG.getNode(ISD::ADD, DL, AddrVT, StNode->getBasePtr(),
+                              DAG.getConstant(8, DL, AddrVT));
+  OutChains[1] =
+      DAG.getStore(StNode->getChain(), DL, SDValue(Hi64, 0), HiPtr,
+                   MachinePointerInfo(), Alignment,
+                   StNode->isVolatile() ? MachineMemOperand::MOVolatile
+                                        : MachineMemOperand::MONone);
+  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
+}
+
+SDValue VETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+  StoreSDNode *StNode = cast<StoreSDNode>(Op.getNode());
+  assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
+
+  SDValue BasePtr = StNode->getBasePtr();
+  if (isa<FrameIndexSDNode>(BasePtr.getNode())) {
+    // Do not expand store instruction with frame index here because of
+    // dependency problems.  We expand it later in eliminateFrameIndex().
+    return Op;
+  }
+
+  EVT MemVT = StNode->getMemoryVT();
+  if (MemVT == MVT::f128)
+    return lowerStoreF128(Op, DAG);
+
+  // Otherwise, ask llvm to expand it.
+  return SDValue();
+}
+
+SDValue VETargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
   auto PtrVT = getPointerTy(DAG.getDataLayout());
@@ -849,7 +1392,7 @@ SDValue VETargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
                       MachinePointerInfo(SV));
 }
 
-SDValue VETargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
+SDValue VETargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const {
   SDNode *Node = Op.getNode();
   EVT VT = Node->getValueType(0);
   SDValue InChain = Node->getOperand(0);
@@ -862,7 +1405,19 @@ SDValue VETargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
   SDValue Chain = VAList.getValue(1);
   SDValue NextPtr;
 
-  if (VT == MVT::f32) {
+  if (VT == MVT::f128) {
+    // VE f128 values must be stored with 16 bytes alignment.  We doesn't
+    // know the actual alignment of VAList, so we take alignment of it
+    // dyanmically.
+    int Align = 16;
+    VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
+                         DAG.getConstant(Align - 1, DL, PtrVT));
+    VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
+                         DAG.getConstant(-Align, DL, PtrVT));
+    // Increment the pointer, VAList, by 16 to the next vaarg.
+    NextPtr =
+        DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(16, DL));
+  } else if (VT == MVT::f32) {
     // float --> need special handling like below.
     //    0      4
     //    +------+------+
@@ -955,22 +1510,1325 @@ SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
   return DAG.getMergeValues(Ops, DL);
 }
 
+SDValue VETargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  return DAG.getNode(VEISD::EH_SJLJ_LONGJMP, DL, MVT::Other, Op.getOperand(0),
+                     Op.getOperand(1));
+}
+
+SDValue VETargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  return DAG.getNode(VEISD::EH_SJLJ_SETJMP, DL,
+                     DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
+                     Op.getOperand(1));
+}
+
+SDValue VETargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
+                                                      SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  return DAG.getNode(VEISD::EH_SJLJ_SETUP_DISPATCH, DL, MVT::Other,
+                     Op.getOperand(0));
+}
+
+static SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG,
+                              const VETargetLowering &TLI,
+                              const VESubtarget *Subtarget) {
+  SDLoc DL(Op);
+  MachineFunction &MF = DAG.getMachineFunction();
+  EVT PtrVT = TLI.getPointerTy(MF.getDataLayout());
+
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  MFI.setFrameAddressIsTaken(true);
+
+  unsigned Depth = Op.getConstantOperandVal(0);
+  const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+  unsigned FrameReg = RegInfo->getFrameRegister(MF);
+  SDValue FrameAddr =
+      DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, PtrVT);
+  while (Depth--)
+    FrameAddr = DAG.getLoad(Op.getValueType(), DL, DAG.getEntryNode(),
+                            FrameAddr, MachinePointerInfo());
+  return FrameAddr;
+}
+
+static SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG,
+                               const VETargetLowering &TLI,
+                               const VESubtarget *Subtarget) {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  MFI.setReturnAddressIsTaken(true);
+
+  if (TLI.verifyReturnAddressArgumentIsConstant(Op, DAG))
+    return SDValue();
+
+  SDValue FrameAddr = lowerFRAMEADDR(Op, DAG, TLI, Subtarget);
+
+  SDLoc DL(Op);
+  EVT VT = Op.getValueType();
+  SDValue Offset = DAG.getConstant(8, DL, VT);
+  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
+                     DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
+                     MachinePointerInfo());
+}
+
+SDValue VETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
+                                                  SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  switch (IntNo) {
+  default: // Don't custom lower most intrinsics.
+    return SDValue();
+  case Intrinsic::eh_sjlj_lsda: {
+    MachineFunction &MF = DAG.getMachineFunction();
+    MVT VT = Op.getSimpleValueType();
+    const VETargetMachine *TM =
+        static_cast<const VETargetMachine *>(&DAG.getTarget());
+
+    // Create GCC_except_tableXX string.  The real symbol for that will be
+    // generated in EHStreamer::emitExceptionTable() later.  So, we just
+    // borrow it's name here.
+    TM->getStrList()->push_back(std::string(
+        (Twine("GCC_except_table") + Twine(MF.getFunctionNumber())).str()));
+    SDValue Addr =
+        DAG.getTargetExternalSymbol(TM->getStrList()->back().c_str(), VT, 0);
+    if (isPositionIndependent()) {
+      Addr = makeHiLoPair(Addr, VEMCExpr::VK_VE_GOTOFF_HI32,
+                          VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
+      SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, VT);
+      return DAG.getNode(ISD::ADD, DL, VT, GlobalBase, Addr);
+    }
+    return makeHiLoPair(Addr, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
+  }
+  }
+}
+
+static bool getUniqueInsertion(SDNode *N, unsigned &UniqueIdx) {
+  if (!isa<BuildVectorSDNode>(N))
+    return false;
+  const auto *BVN = cast<BuildVectorSDNode>(N);
+
+  // Find first non-undef insertion.
+  unsigned Idx;
+  for (Idx = 0; Idx < BVN->getNumOperands(); ++Idx) {
+    auto ElemV = BVN->getOperand(Idx);
+    if (!ElemV->isUndef())
+      break;
+  }
+  // Catch the (hypothetical) all-undef case.
+  if (Idx == BVN->getNumOperands())
+    return false;
+  // Remember insertion.
+  UniqueIdx = Idx++;
+  // Verify that all other insertions are undef.
+  for (; Idx < BVN->getNumOperands(); ++Idx) {
+    auto ElemV = BVN->getOperand(Idx);
+    if (!ElemV->isUndef())
+      return false;
+  }
+  return true;
+}
+
+static SDValue getSplatValue(SDNode *N) {
+  if (auto *BuildVec = dyn_cast<BuildVectorSDNode>(N)) {
+    return BuildVec->getSplatValue();
+  }
+  return SDValue();
+}
+
+SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  unsigned NumEls = Op.getValueType().getVectorNumElements();
+  MVT ElemVT = Op.getSimpleValueType().getVectorElementType();
+
+  // If there is just one element, expand to INSERT_VECTOR_ELT.
+  unsigned UniqueIdx;
+  if (getUniqueInsertion(Op.getNode(), UniqueIdx)) {
+    SDValue AccuV = DAG.getUNDEF(Op.getValueType());
+    auto ElemV = Op->getOperand(UniqueIdx);
+    SDValue IdxV = DAG.getConstant(UniqueIdx, DL, MVT::i64);
+    return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(), AccuV,
+                       ElemV, IdxV);
+  }
+
+  // Else emit a broadcast.
+  if (SDValue ScalarV = getSplatValue(Op.getNode())) {
+    // lower to VEC_BROADCAST
+    MVT LegalResVT = MVT::getVectorVT(ElemVT, 256);
+
+    auto AVL = DAG.getConstant(NumEls, DL, MVT::i32);
+    return DAG.getNode(VEISD::VEC_BROADCAST, DL, LegalResVT, Op.getOperand(0),
+                       AVL);
+  }
+
+  // Expand
+  return SDValue();
+}
+
 SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
-  switch (Op.getOpcode()) {
+  unsigned Opcode = Op.getOpcode();
+  if (ISD::isVPOpcode(Opcode))
+    return lowerToVVP(Op, DAG);
+
+  switch (Opcode) {
   default:
     llvm_unreachable("Should not custom lower this!");
+  case ISD::ATOMIC_FENCE:
+    return lowerATOMIC_FENCE(Op, DAG);
+  case ISD::ATOMIC_SWAP:
+    return lowerATOMIC_SWAP(Op, DAG);
   case ISD::BlockAddress:
-    return LowerBlockAddress(Op, DAG);
+    return lowerBlockAddress(Op, DAG);
+  case ISD::ConstantPool:
+    return lowerConstantPool(Op, DAG);
   case ISD::DYNAMIC_STACKALLOC:
     return lowerDYNAMIC_STACKALLOC(Op, DAG);
+  case ISD::EH_SJLJ_LONGJMP:
+    return lowerEH_SJLJ_LONGJMP(Op, DAG);
+  case ISD::EH_SJLJ_SETJMP:
+    return lowerEH_SJLJ_SETJMP(Op, DAG);
+  case ISD::EH_SJLJ_SETUP_DISPATCH:
+    return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
+  case ISD::FRAMEADDR:
+    return lowerFRAMEADDR(Op, DAG, *this, Subtarget);
   case ISD::GlobalAddress:
-    return LowerGlobalAddress(Op, DAG);
+    return lowerGlobalAddress(Op, DAG);
   case ISD::GlobalTLSAddress:
-    return LowerGlobalTLSAddress(Op, DAG);
+    return lowerGlobalTLSAddress(Op, DAG);
+  case ISD::INTRINSIC_WO_CHAIN:
+    return lowerINTRINSIC_WO_CHAIN(Op, DAG);
+  case ISD::JumpTable:
+    return lowerJumpTable(Op, DAG);
+  case ISD::LOAD:
+    return lowerLOAD(Op, DAG);
+  case ISD::RETURNADDR:
+    return lowerRETURNADDR(Op, DAG, *this, Subtarget);
+  case ISD::BUILD_VECTOR:
+    return lowerBUILD_VECTOR(Op, DAG);
+  case ISD::STORE:
+    return lowerSTORE(Op, DAG);
   case ISD::VASTART:
-    return LowerVASTART(Op, DAG);
+    return lowerVASTART(Op, DAG);
   case ISD::VAARG:
-    return LowerVAARG(Op, DAG);
+    return lowerVAARG(Op, DAG);
+
+  case ISD::INSERT_VECTOR_ELT:
+    return lowerINSERT_VECTOR_ELT(Op, DAG);
+  case ISD::EXTRACT_VECTOR_ELT:
+    return lowerEXTRACT_VECTOR_ELT(Op, DAG);
+
+#define ADD_BINARY_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
+#include "VVPNodes.def"
+    return lowerToVVP(Op, DAG);
   }
 }
 /// } Custom Lower
+
+void VETargetLowering::ReplaceNodeResults(SDNode *N,
+                                          SmallVectorImpl<SDValue> &Results,
+                                          SelectionDAG &DAG) const {
+  switch (N->getOpcode()) {
+  case ISD::ATOMIC_SWAP:
+    // Let LLVM expand atomic swap instruction through LowerOperation.
+    return;
+  default:
+    LLVM_DEBUG(N->dumpr(&DAG));
+    llvm_unreachable("Do not know how to custom type legalize this operation!");
+  }
+}
+
+/// JumpTable for VE.
+///
+///   VE cannot generate relocatable symbol in jump table.  VE cannot
+///   generate expressions using symbols in both text segment and data
+///   segment like below.
+///             .4byte  .LBB0_2-.LJTI0_0
+///   So, we generate offset from the top of function like below as
+///   a custom label.
+///             .4byte  .LBB0_2-<function name>
+
+unsigned VETargetLowering::getJumpTableEncoding() const {
+  // Use custom label for PIC.
+  if (isPositionIndependent())
+    return MachineJumpTableInfo::EK_Custom32;
+
+  // Otherwise, use the normal jump table encoding heuristics.
+  return TargetLowering::getJumpTableEncoding();
+}
+
+const MCExpr *VETargetLowering::LowerCustomJumpTableEntry(
+    const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
+    unsigned Uid, MCContext &Ctx) const {
+  assert(isPositionIndependent());
+
+  // Generate custom label for PIC like below.
+  //    .4bytes  .LBB0_2-<function name>
+  const auto *Value = MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
+  MCSymbol *Sym = Ctx.getOrCreateSymbol(MBB->getParent()->getName().data());
+  const auto *Base = MCSymbolRefExpr::create(Sym, Ctx);
+  return MCBinaryExpr::createSub(Value, Base, Ctx);
+}
+
+SDValue VETargetLowering::getPICJumpTableRelocBase(SDValue Table,
+                                                   SelectionDAG &DAG) const {
+  assert(isPositionIndependent());
+  SDLoc DL(Table);
+  Function *Function = &DAG.getMachineFunction().getFunction();
+  assert(Function != nullptr);
+  auto PtrTy = getPointerTy(DAG.getDataLayout(), Function->getAddressSpace());
+
+  // In the jump table, we have following values in PIC mode.
+  //    .4bytes  .LBB0_2-<function name>
+  // We need to add this value and the address of this function to generate
+  // .LBB0_2 label correctly under PIC mode.  So, we want to generate following
+  // instructions:
+  //     lea %reg, fun@gotoff_lo
+  //     and %reg, %reg, (32)0
+  //     lea.sl %reg, fun@gotoff_hi(%reg, %got)
+  // In order to do so, we need to genarate correctly marked DAG node using
+  // makeHiLoPair.
+  SDValue Op = DAG.getGlobalAddress(Function, DL, PtrTy);
+  SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
+                              VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
+  SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrTy);
+  return DAG.getNode(ISD::ADD, DL, PtrTy, GlobalBase, HiLo);
+}
+
+Register VETargetLowering::prepareMBB(MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator I,
+                                      MachineBasicBlock *TargetBB,
+                                      const DebugLoc &DL) const {
+  MachineFunction *MF = MBB.getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  const VEInstrInfo *TII = Subtarget->getInstrInfo();
+
+  const TargetRegisterClass *RC = &VE::I64RegClass;
+  Register Tmp1 = MRI.createVirtualRegister(RC);
+  Register Tmp2 = MRI.createVirtualRegister(RC);
+  Register Result = MRI.createVirtualRegister(RC);
+
+  if (isPositionIndependent()) {
+    // Create following instructions for local linkage PIC code.
+    //     lea %Tmp1, TargetBB@gotoff_lo
+    //     and %Tmp2, %Tmp1, (32)0
+    //     lea.sl %Result, TargetBB@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
+    BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
+        .addImm(0)
+        .addImm(0)
+        .addMBB(TargetBB, VEMCExpr::VK_VE_GOTOFF_LO32);
+    BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
+        .addReg(Tmp1, getKillRegState(true))
+        .addImm(M0(32));
+    BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)
+        .addReg(VE::SX15)
+        .addReg(Tmp2, getKillRegState(true))
+        .addMBB(TargetBB, VEMCExpr::VK_VE_GOTOFF_HI32);
+  } else {
+    // Create following instructions for non-PIC code.
+    //     lea     %Tmp1, TargetBB@lo
+    //     and     %Tmp2, %Tmp1, (32)0
+    //     lea.sl  %Result, TargetBB@hi(%Tmp2)
+    BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
+        .addImm(0)
+        .addImm(0)
+        .addMBB(TargetBB, VEMCExpr::VK_VE_LO32);
+    BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
+        .addReg(Tmp1, getKillRegState(true))
+        .addImm(M0(32));
+    BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)
+        .addReg(Tmp2, getKillRegState(true))
+        .addImm(0)
+        .addMBB(TargetBB, VEMCExpr::VK_VE_HI32);
+  }
+  return Result;
+}
+
+Register VETargetLowering::prepareSymbol(MachineBasicBlock &MBB,
+                                         MachineBasicBlock::iterator I,
+                                         StringRef Symbol, const DebugLoc &DL,
+                                         bool IsLocal = false,
+                                         bool IsCall = false) const {
+  MachineFunction *MF = MBB.getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  const VEInstrInfo *TII = Subtarget->getInstrInfo();
+
+  const TargetRegisterClass *RC = &VE::I64RegClass;
+  Register Result = MRI.createVirtualRegister(RC);
+
+  if (isPositionIndependent()) {
+    if (IsCall && !IsLocal) {
+      // Create following instructions for non-local linkage PIC code function
+      // calls.  These instructions uses IC and magic number -24, so we expand
+      // them in VEAsmPrinter.cpp from GETFUNPLT pseudo instruction.
+      //     lea %Reg, Symbol@plt_lo(-24)
+      //     and %Reg, %Reg, (32)0
+      //     sic %s16
+      //     lea.sl %Result, Symbol@plt_hi(%Reg, %s16) ; %s16 is PLT
+      BuildMI(MBB, I, DL, TII->get(VE::GETFUNPLT), Result)
+          .addExternalSymbol("abort");
+    } else if (IsLocal) {
+      Register Tmp1 = MRI.createVirtualRegister(RC);
+      Register Tmp2 = MRI.createVirtualRegister(RC);
+      // Create following instructions for local linkage PIC code.
+      //     lea %Tmp1, Symbol@gotoff_lo
+      //     and %Tmp2, %Tmp1, (32)0
+      //     lea.sl %Result, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
+      BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
+          .addImm(0)
+          .addImm(0)
+          .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOTOFF_LO32);
+      BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
+          .addReg(Tmp1, getKillRegState(true))
+          .addImm(M0(32));
+      BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)
+          .addReg(VE::SX15)
+          .addReg(Tmp2, getKillRegState(true))
+          .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOTOFF_HI32);
+    } else {
+      Register Tmp1 = MRI.createVirtualRegister(RC);
+      Register Tmp2 = MRI.createVirtualRegister(RC);
+      // Create following instructions for not local linkage PIC code.
+      //     lea %Tmp1, Symbol@got_lo
+      //     and %Tmp2, %Tmp1, (32)0
+      //     lea.sl %Tmp3, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
+      //     ld %Result, 0(%Tmp3)
+      Register Tmp3 = MRI.createVirtualRegister(RC);
+      BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
+          .addImm(0)
+          .addImm(0)
+          .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOT_LO32);
+      BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
+          .addReg(Tmp1, getKillRegState(true))
+          .addImm(M0(32));
+      BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Tmp3)
+          .addReg(VE::SX15)
+          .addReg(Tmp2, getKillRegState(true))
+          .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOT_HI32);
+      BuildMI(MBB, I, DL, TII->get(VE::LDrii), Result)
+          .addReg(Tmp3, getKillRegState(true))
+          .addImm(0)
+          .addImm(0);
+    }
+  } else {
+    Register Tmp1 = MRI.createVirtualRegister(RC);
+    Register Tmp2 = MRI.createVirtualRegister(RC);
+    // Create following instructions for non-PIC code.
+    //     lea     %Tmp1, Symbol@lo
+    //     and     %Tmp2, %Tmp1, (32)0
+    //     lea.sl  %Result, Symbol@hi(%Tmp2)
+    BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
+        .addImm(0)
+        .addImm(0)
+        .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_LO32);
+    BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
+        .addReg(Tmp1, getKillRegState(true))
+        .addImm(M0(32));
+    BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)
+        .addReg(Tmp2, getKillRegState(true))
+        .addImm(0)
+        .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_HI32);
+  }
+  return Result;
+}
+
+void VETargetLowering::setupEntryBlockForSjLj(MachineInstr &MI,
+                                              MachineBasicBlock *MBB,
+                                              MachineBasicBlock *DispatchBB,
+                                              int FI, int Offset) const {
+  DebugLoc DL = MI.getDebugLoc();
+  const VEInstrInfo *TII = Subtarget->getInstrInfo();
+
+  Register LabelReg =
+      prepareMBB(*MBB, MachineBasicBlock::iterator(MI), DispatchBB, DL);
+
+  // Store an address of DispatchBB to a given jmpbuf[1] where has next IC
+  // referenced by longjmp (throw) later.
+  MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
+  addFrameReference(MIB, FI, Offset); // jmpbuf[1]
+  MIB.addReg(LabelReg, getKillRegState(true));
+}
+
+MachineBasicBlock *
+VETargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
+                                   MachineBasicBlock *MBB) const {
+  DebugLoc DL = MI.getDebugLoc();
+  MachineFunction *MF = MBB->getParent();
+  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+  const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+
+  const BasicBlock *BB = MBB->getBasicBlock();
+  MachineFunction::iterator I = ++MBB->getIterator();
+
+  // Memory Reference.
+  SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
+                                           MI.memoperands_end());
+  Register BufReg = MI.getOperand(1).getReg();
+
+  Register DstReg;
+
+  DstReg = MI.getOperand(0).getReg();
+  const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
+  assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
+  (void)TRI;
+  Register MainDestReg = MRI.createVirtualRegister(RC);
+  Register RestoreDestReg = MRI.createVirtualRegister(RC);
+
+  // For `v = call @llvm.eh.sjlj.setjmp(buf)`, we generate following
+  // instructions.  SP/FP must be saved in jmpbuf before `llvm.eh.sjlj.setjmp`.
+  //
+  // ThisMBB:
+  //   buf[3] = %s17 iff %s17 is used as BP
+  //   buf[1] = RestoreMBB as IC after longjmp
+  //   # SjLjSetup RestoreMBB
+  //
+  // MainMBB:
+  //   v_main = 0
+  //
+  // SinkMBB:
+  //   v = phi(v_main, MainMBB, v_restore, RestoreMBB)
+  //   ...
+  //
+  // RestoreMBB:
+  //   %s17 = buf[3] = iff %s17 is used as BP
+  //   v_restore = 1
+  //   goto SinkMBB
+
+  MachineBasicBlock *ThisMBB = MBB;
+  MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
+  MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
+  MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
+  MF->insert(I, MainMBB);
+  MF->insert(I, SinkMBB);
+  MF->push_back(RestoreMBB);
+  RestoreMBB->setHasAddressTaken();
+
+  // Transfer the remainder of BB and its successor edges to SinkMBB.
+  SinkMBB->splice(SinkMBB->begin(), MBB,
+                  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
+  SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+  // ThisMBB:
+  Register LabelReg =
+      prepareMBB(*MBB, MachineBasicBlock::iterator(MI), RestoreMBB, DL);
+
+  // Store BP in buf[3] iff this function is using BP.
+  const VEFrameLowering *TFI = Subtarget->getFrameLowering();
+  if (TFI->hasBP(*MF)) {
+    MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
+    MIB.addReg(BufReg);
+    MIB.addImm(0);
+    MIB.addImm(24);
+    MIB.addReg(VE::SX17);
+    MIB.setMemRefs(MMOs);
+  }
+
+  // Store IP in buf[1].
+  MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
+  MIB.add(MI.getOperand(1)); // we can preserve the kill flags here.
+  MIB.addImm(0);
+  MIB.addImm(8);
+  MIB.addReg(LabelReg, getKillRegState(true));
+  MIB.setMemRefs(MMOs);
+
+  // SP/FP are already stored in jmpbuf before `llvm.eh.sjlj.setjmp`.
+
+  // Insert setup.
+  MIB =
+      BuildMI(*ThisMBB, MI, DL, TII->get(VE::EH_SjLj_Setup)).addMBB(RestoreMBB);
+
+  const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+  MIB.addRegMask(RegInfo->getNoPreservedMask());
+  ThisMBB->addSuccessor(MainMBB);
+  ThisMBB->addSuccessor(RestoreMBB);
+
+  // MainMBB:
+  BuildMI(MainMBB, DL, TII->get(VE::LEAzii), MainDestReg)
+      .addImm(0)
+      .addImm(0)
+      .addImm(0);
+  MainMBB->addSuccessor(SinkMBB);
+
+  // SinkMBB:
+  BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(VE::PHI), DstReg)
+      .addReg(MainDestReg)
+      .addMBB(MainMBB)
+      .addReg(RestoreDestReg)
+      .addMBB(RestoreMBB);
+
+  // RestoreMBB:
+  // Restore BP from buf[3] iff this function is using BP.  The address of
+  // buf is in SX10.
+  // FIXME: Better to not use SX10 here
+  if (TFI->hasBP(*MF)) {
+    MachineInstrBuilder MIB =
+        BuildMI(RestoreMBB, DL, TII->get(VE::LDrii), VE::SX17);
+    MIB.addReg(VE::SX10);
+    MIB.addImm(0);
+    MIB.addImm(24);
+    MIB.setMemRefs(MMOs);
+  }
+  BuildMI(RestoreMBB, DL, TII->get(VE::LEAzii), RestoreDestReg)
+      .addImm(0)
+      .addImm(0)
+      .addImm(1);
+  BuildMI(RestoreMBB, DL, TII->get(VE::BRCFLa_t)).addMBB(SinkMBB);
+  RestoreMBB->addSuccessor(SinkMBB);
+
+  MI.eraseFromParent();
+  return SinkMBB;
+}
+
+MachineBasicBlock *
+VETargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
+                                    MachineBasicBlock *MBB) const {
+  DebugLoc DL = MI.getDebugLoc();
+  MachineFunction *MF = MBB->getParent();
+  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+
+  // Memory Reference.
+  SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
+                                           MI.memoperands_end());
+  Register BufReg = MI.getOperand(0).getReg();
+
+  Register Tmp = MRI.createVirtualRegister(&VE::I64RegClass);
+  // Since FP is only updated here but NOT referenced, it's treated as GPR.
+  Register FP = VE::SX9;
+  Register SP = VE::SX11;
+
+  MachineInstrBuilder MIB;
+
+  MachineBasicBlock *ThisMBB = MBB;
+
+  // For `call @llvm.eh.sjlj.longjmp(buf)`, we generate following instructions.
+  //
+  // ThisMBB:
+  //   %fp = load buf[0]
+  //   %jmp = load buf[1]
+  //   %s10 = buf        ; Store an address of buf to SX10 for RestoreMBB
+  //   %sp = load buf[2] ; generated by llvm.eh.sjlj.setjmp.
+  //   jmp %jmp
+
+  // Reload FP.
+  MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), FP);
+  MIB.addReg(BufReg);
+  MIB.addImm(0);
+  MIB.addImm(0);
+  MIB.setMemRefs(MMOs);
+
+  // Reload IP.
+  MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), Tmp);
+  MIB.addReg(BufReg);
+  MIB.addImm(0);
+  MIB.addImm(8);
+  MIB.setMemRefs(MMOs);
+
+  // Copy BufReg to SX10 for later use in setjmp.
+  // FIXME: Better to not use SX10 here
+  BuildMI(*ThisMBB, MI, DL, TII->get(VE::ORri), VE::SX10)
+      .addReg(BufReg)
+      .addImm(0);
+
+  // Reload SP.
+  MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), SP);
+  MIB.add(MI.getOperand(0)); // we can preserve the kill flags here.
+  MIB.addImm(0);
+  MIB.addImm(16);
+  MIB.setMemRefs(MMOs);
+
+  // Jump.
+  BuildMI(*ThisMBB, MI, DL, TII->get(VE::BCFLari_t))
+      .addReg(Tmp, getKillRegState(true))
+      .addImm(0);
+
+  MI.eraseFromParent();
+  return ThisMBB;
+}
+
+MachineBasicBlock *
+VETargetLowering::emitSjLjDispatchBlock(MachineInstr &MI,
+                                        MachineBasicBlock *BB) const {
+  DebugLoc DL = MI.getDebugLoc();
+  MachineFunction *MF = BB->getParent();
+  MachineFrameInfo &MFI = MF->getFrameInfo();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  const VEInstrInfo *TII = Subtarget->getInstrInfo();
+  int FI = MFI.getFunctionContextIndex();
+
+  // Get a mapping of the call site numbers to all of the landing pads they're
+  // associated with.
+  DenseMap<unsigned, SmallVector<MachineBasicBlock *, 2>> CallSiteNumToLPad;
+  unsigned MaxCSNum = 0;
+  for (auto &MBB : *MF) {
+    if (!MBB.isEHPad())
+      continue;
+
+    MCSymbol *Sym = nullptr;
+    for (const auto &MI : MBB) {
+      if (MI.isDebugInstr())
+        continue;
+
+      assert(MI.isEHLabel() && "expected EH_LABEL");
+      Sym = MI.getOperand(0).getMCSymbol();
+      break;
+    }
+
+    if (!MF->hasCallSiteLandingPad(Sym))
+      continue;
+
+    for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) {
+      CallSiteNumToLPad[CSI].push_back(&MBB);
+      MaxCSNum = std::max(MaxCSNum, CSI);
+    }
+  }
+
+  // Get an ordered list of the machine basic blocks for the jump table.
+  std::vector<MachineBasicBlock *> LPadList;
+  SmallPtrSet<MachineBasicBlock *, 32> InvokeBBs;
+  LPadList.reserve(CallSiteNumToLPad.size());
+
+  for (unsigned CSI = 1; CSI <= MaxCSNum; ++CSI) {
+    for (auto &LP : CallSiteNumToLPad[CSI]) {
+      LPadList.push_back(LP);
+      InvokeBBs.insert(LP->pred_begin(), LP->pred_end());
+    }
+  }
+
+  assert(!LPadList.empty() &&
+         "No landing pad destinations for the dispatch jump table!");
+
+  // The %fn_context is allocated like below (from --print-after=sjljehprepare):
+  //   %fn_context = alloca { i8*, i64, [4 x i64], i8*, i8*, [5 x i8*] }
+  //
+  // This `[5 x i8*]` is jmpbuf, so jmpbuf[1] is FI+72.
+  // First `i64` is callsite, so callsite is FI+8.
+  static const int OffsetIC = 72;
+  static const int OffsetCS = 8;
+
+  // Create the MBBs for the dispatch code like following:
+  //
+  // ThisMBB:
+  //   Prepare DispatchBB address and store it to buf[1].
+  //   ...
+  //
+  // DispatchBB:
+  //   %s15 = GETGOT iff isPositionIndependent
+  //   %callsite = load callsite
+  //   brgt.l.t #size of callsites, %callsite, DispContBB
+  //
+  // TrapBB:
+  //   Call abort.
+  //
+  // DispContBB:
+  //   %breg = address of jump table
+  //   %pc = load and calculate next pc from %breg and %callsite
+  //   jmp %pc
+
+  // Shove the dispatch's address into the return slot in the function context.
+  MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
+  DispatchBB->setIsEHPad(true);
+
+  // Trap BB will causes trap like `assert(0)`.
+  MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
+  DispatchBB->addSuccessor(TrapBB);
+
+  MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
+  DispatchBB->addSuccessor(DispContBB);
+
+  // Insert MBBs.
+  MF->push_back(DispatchBB);
+  MF->push_back(DispContBB);
+  MF->push_back(TrapBB);
+
+  // Insert code to call abort in the TrapBB.
+  Register Abort = prepareSymbol(*TrapBB, TrapBB->end(), "abort", DL,
+                                 /* Local */ false, /* Call */ true);
+  BuildMI(TrapBB, DL, TII->get(VE::BSICrii), VE::SX10)
+      .addReg(Abort, getKillRegState(true))
+      .addImm(0)
+      .addImm(0);
+
+  // Insert code into the entry block that creates and registers the function
+  // context.
+  setupEntryBlockForSjLj(MI, BB, DispatchBB, FI, OffsetIC);
+
+  // Create the jump table and associated information
+  unsigned JTE = getJumpTableEncoding();
+  MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(JTE);
+  unsigned MJTI = JTI->createJumpTableIndex(LPadList);
+
+  const VERegisterInfo &RI = TII->getRegisterInfo();
+  // Add a register mask with no preserved registers.  This results in all
+  // registers being marked as clobbered.
+  BuildMI(DispatchBB, DL, TII->get(VE::NOP))
+      .addRegMask(RI.getNoPreservedMask());
+
+  if (isPositionIndependent()) {
+    // Force to generate GETGOT, since current implementation doesn't store GOT
+    // register.
+    BuildMI(DispatchBB, DL, TII->get(VE::GETGOT), VE::SX15);
+  }
+
+  // IReg is used as an index in a memory operand and therefore can't be SP
+  const TargetRegisterClass *RC = &VE::I64RegClass;
+  Register IReg = MRI.createVirtualRegister(RC);
+  addFrameReference(BuildMI(DispatchBB, DL, TII->get(VE::LDLZXrii), IReg), FI,
+                    OffsetCS);
+  if (LPadList.size() < 64) {
+    BuildMI(DispatchBB, DL, TII->get(VE::BRCFLir_t))
+        .addImm(VECC::CC_ILE)
+        .addImm(LPadList.size())
+        .addReg(IReg)
+        .addMBB(TrapBB);
+  } else {
+    assert(LPadList.size() <= 0x7FFFFFFF && "Too large Landing Pad!");
+    Register TmpReg = MRI.createVirtualRegister(RC);
+    BuildMI(DispatchBB, DL, TII->get(VE::LEAzii), TmpReg)
+        .addImm(0)
+        .addImm(0)
+        .addImm(LPadList.size());
+    BuildMI(DispatchBB, DL, TII->get(VE::BRCFLrr_t))
+        .addImm(VECC::CC_ILE)
+        .addReg(TmpReg, getKillRegState(true))
+        .addReg(IReg)
+        .addMBB(TrapBB);
+  }
+
+  Register BReg = MRI.createVirtualRegister(RC);
+  Register Tmp1 = MRI.createVirtualRegister(RC);
+  Register Tmp2 = MRI.createVirtualRegister(RC);
+
+  if (isPositionIndependent()) {
+    // Create following instructions for local linkage PIC code.
+    //     lea    %Tmp1, .LJTI0_0@gotoff_lo
+    //     and    %Tmp2, %Tmp1, (32)0
+    //     lea.sl %BReg, .LJTI0_0@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
+    BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)
+        .addImm(0)
+        .addImm(0)
+        .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_LO32);
+    BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)
+        .addReg(Tmp1, getKillRegState(true))
+        .addImm(M0(32));
+    BuildMI(DispContBB, DL, TII->get(VE::LEASLrri), BReg)
+        .addReg(VE::SX15)
+        .addReg(Tmp2, getKillRegState(true))
+        .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_HI32);
+  } else {
+    // Create following instructions for non-PIC code.
+    //     lea     %Tmp1, .LJTI0_0@lo
+    //     and     %Tmp2, %Tmp1, (32)0
+    //     lea.sl  %BReg, .LJTI0_0@hi(%Tmp2)
+    BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)
+        .addImm(0)
+        .addImm(0)
+        .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_LO32);
+    BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)
+        .addReg(Tmp1, getKillRegState(true))
+        .addImm(M0(32));
+    BuildMI(DispContBB, DL, TII->get(VE::LEASLrii), BReg)
+        .addReg(Tmp2, getKillRegState(true))
+        .addImm(0)
+        .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_HI32);
+  }
+
+  switch (JTE) {
+  case MachineJumpTableInfo::EK_BlockAddress: {
+    // Generate simple block address code for no-PIC model.
+    //     sll %Tmp1, %IReg, 3
+    //     lds %TReg, 0(%Tmp1, %BReg)
+    //     bcfla %TReg
+
+    Register TReg = MRI.createVirtualRegister(RC);
+    Register Tmp1 = MRI.createVirtualRegister(RC);
+
+    BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)
+        .addReg(IReg, getKillRegState(true))
+        .addImm(3);
+    BuildMI(DispContBB, DL, TII->get(VE::LDrri), TReg)
+        .addReg(BReg, getKillRegState(true))
+        .addReg(Tmp1, getKillRegState(true))
+        .addImm(0);
+    BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))
+        .addReg(TReg, getKillRegState(true))
+        .addImm(0);
+    break;
+  }
+  case MachineJumpTableInfo::EK_Custom32: {
+    // Generate block address code using differences from the function pointer
+    // for PIC model.
+    //     sll %Tmp1, %IReg, 2
+    //     ldl.zx %OReg, 0(%Tmp1, %BReg)
+    //     Prepare function address in BReg2.
+    //     adds.l %TReg, %BReg2, %OReg
+    //     bcfla %TReg
+
+    assert(isPositionIndependent());
+    Register OReg = MRI.createVirtualRegister(RC);
+    Register TReg = MRI.createVirtualRegister(RC);
+    Register Tmp1 = MRI.createVirtualRegister(RC);
+
+    BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)
+        .addReg(IReg, getKillRegState(true))
+        .addImm(2);
+    BuildMI(DispContBB, DL, TII->get(VE::LDLZXrri), OReg)
+        .addReg(BReg, getKillRegState(true))
+        .addReg(Tmp1, getKillRegState(true))
+        .addImm(0);
+    Register BReg2 =
+        prepareSymbol(*DispContBB, DispContBB->end(),
+                      DispContBB->getParent()->getName(), DL, /* Local */ true);
+    BuildMI(DispContBB, DL, TII->get(VE::ADDSLrr), TReg)
+        .addReg(OReg, getKillRegState(true))
+        .addReg(BReg2, getKillRegState(true));
+    BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))
+        .addReg(TReg, getKillRegState(true))
+        .addImm(0);
+    break;
+  }
+  default:
+    llvm_unreachable("Unexpected jump table encoding");
+  }
+
+  // Add the jump table entries as successors to the MBB.
+  SmallPtrSet<MachineBasicBlock *, 8> SeenMBBs;
+  for (auto &LP : LPadList)
+    if (SeenMBBs.insert(LP).second)
+      DispContBB->addSuccessor(LP);
+
+  // N.B. the order the invoke BBs are processed in doesn't matter here.
+  SmallVector<MachineBasicBlock *, 64> MBBLPads;
+  const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs();
+  for (MachineBasicBlock *MBB : InvokeBBs) {
+    // Remove the landing pad successor from the invoke block and replace it
+    // with the new dispatch block.
+    // Keep a copy of Successors since it's modified inside the loop.
+    SmallVector<MachineBasicBlock *, 8> Successors(MBB->succ_rbegin(),
+                                                   MBB->succ_rend());
+    // FIXME: Avoid quadratic complexity.
+    for (auto MBBS : Successors) {
+      if (MBBS->isEHPad()) {
+        MBB->removeSuccessor(MBBS);
+        MBBLPads.push_back(MBBS);
+      }
+    }
+
+    MBB->addSuccessor(DispatchBB);
+
+    // Find the invoke call and mark all of the callee-saved registers as
+    // 'implicit defined' so that they're spilled.  This prevents code from
+    // moving instructions to before the EH block, where they will never be
+    // executed.
+    for (auto &II : reverse(*MBB)) {
+      if (!II.isCall())
+        continue;
+
+      DenseMap<Register, bool> DefRegs;
+      for (auto &MOp : II.operands())
+        if (MOp.isReg())
+          DefRegs[MOp.getReg()] = true;
+
+      MachineInstrBuilder MIB(*MF, &II);
+      for (unsigned RI = 0; SavedRegs[RI]; ++RI) {
+        Register Reg = SavedRegs[RI];
+        if (!DefRegs[Reg])
+          MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);
+      }
+
+      break;
+    }
+  }
+
+  // Mark all former landing pads as non-landing pads.  The dispatch is the only
+  // landing pad now.
+  for (auto &LP : MBBLPads)
+    LP->setIsEHPad(false);
+
+  // The instruction is gone now.
+  MI.eraseFromParent();
+  return BB;
+}
+
+MachineBasicBlock *
+VETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
+                                              MachineBasicBlock *BB) const {
+  switch (MI.getOpcode()) {
+  default:
+    llvm_unreachable("Unknown Custom Instruction!");
+  case VE::EH_SjLj_LongJmp:
+    return emitEHSjLjLongJmp(MI, BB);
+  case VE::EH_SjLj_SetJmp:
+    return emitEHSjLjSetJmp(MI, BB);
+  case VE::EH_SjLj_Setup_Dispatch:
+    return emitSjLjDispatchBlock(MI, BB);
+  }
+}
+
+static bool isI32Insn(const SDNode *User, const SDNode *N) {
+  switch (User->getOpcode()) {
+  default:
+    return false;
+  case ISD::ADD:
+  case ISD::SUB:
+  case ISD::MUL:
+  case ISD::SDIV:
+  case ISD::UDIV:
+  case ISD::SETCC:
+  case ISD::SMIN:
+  case ISD::SMAX:
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::BSWAP:
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
+  case ISD::BR_CC:
+  case ISD::BITCAST:
+  case ISD::ATOMIC_CMP_SWAP:
+  case ISD::ATOMIC_SWAP:
+    return true;
+  case ISD::SRL:
+    if (N->getOperand(0).getOpcode() != ISD::SRL)
+      return true;
+    // (srl (trunc (srl ...))) may be optimized by combining srl, so
+    // doesn't optimize trunc now.
+    return false;
+  case ISD::SELECT_CC:
+    if (User->getOperand(2).getNode() != N &&
+        User->getOperand(3).getNode() != N)
+      return true;
+    LLVM_FALLTHROUGH;
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+  case ISD::SELECT:
+  case ISD::CopyToReg:
+    // Check all use of selections, bit operations, and copies.  If all of them
+    // are safe, optimize truncate to extract_subreg.
+    for (SDNode::use_iterator UI = User->use_begin(), UE = User->use_end();
+         UI != UE; ++UI) {
+      switch ((*UI)->getOpcode()) {
+      default:
+        // If the use is an instruction which treats the source operand as i32,
+        // it is safe to avoid truncate here.
+        if (isI32Insn(*UI, N))
+          continue;
+        break;
+      case ISD::ANY_EXTEND:
+      case ISD::SIGN_EXTEND:
+      case ISD::ZERO_EXTEND: {
+        // Special optimizations to the combination of ext and trunc.
+        // (ext ... (select ... (trunc ...))) is safe to avoid truncate here
+        // since this truncate instruction clears higher 32 bits which is filled
+        // by one of ext instructions later.
+        assert(N->getValueType(0) == MVT::i32 &&
+               "find truncate to not i32 integer");
+        if (User->getOpcode() == ISD::SELECT_CC ||
+            User->getOpcode() == ISD::SELECT)
+          continue;
+        break;
+      }
+      }
+      return false;
+    }
+    return true;
+  }
+}
+
+// Optimize TRUNCATE in DAG combining.  Optimizing it in CUSTOM lower is
+// sometime too early.  Optimizing it in DAG pattern matching in VEInstrInfo.td
+// is sometime too late.  So, doing it at here.
+SDValue VETargetLowering::combineTRUNCATE(SDNode *N,
+                                          DAGCombinerInfo &DCI) const {
+  assert(N->getOpcode() == ISD::TRUNCATE &&
+         "Should be called with a TRUNCATE node");
+
+  SelectionDAG &DAG = DCI.DAG;
+  SDLoc DL(N);
+  EVT VT = N->getValueType(0);
+
+  // We prefer to do this when all types are legal.
+  if (!DCI.isAfterLegalizeDAG())
+    return SDValue();
+
+  // Skip combine TRUNCATE atm if the operand of TRUNCATE might be a constant.
+  if (N->getOperand(0)->getOpcode() == ISD::SELECT_CC &&
+      isa<ConstantSDNode>(N->getOperand(0)->getOperand(0)) &&
+      isa<ConstantSDNode>(N->getOperand(0)->getOperand(1)))
+    return SDValue();
+
+  // Check all use of this TRUNCATE.
+  for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE;
+       ++UI) {
+    SDNode *User = *UI;
+
+    // Make sure that we're not going to replace TRUNCATE for non i32
+    // instructions.
+    //
+    // FIXME: Although we could sometimes handle this, and it does occur in
+    // practice that one of the condition inputs to the select is also one of
+    // the outputs, we currently can't deal with this.
+    if (isI32Insn(User, N))
+      continue;
+
+    return SDValue();
+  }
+
+  SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
+  return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT,
+                                    N->getOperand(0), SubI32),
+                 0);
+}
+
+SDValue VETargetLowering::PerformDAGCombine(SDNode *N,
+                                            DAGCombinerInfo &DCI) const {
+  switch (N->getOpcode()) {
+  default:
+    break;
+  case ISD::TRUNCATE:
+    return combineTRUNCATE(N, DCI);
+  }
+
+  return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// VE Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+VETargetLowering::ConstraintType
+VETargetLowering::getConstraintType(StringRef Constraint) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    default:
+      break;
+    case 'v': // vector registers
+      return C_RegisterClass;
+    }
+  }
+  return TargetLowering::getConstraintType(Constraint);
+}
+
+std::pair<unsigned, const TargetRegisterClass *>
+VETargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+                                               StringRef Constraint,
+                                               MVT VT) const {
+  const TargetRegisterClass *RC = nullptr;
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    default:
+      return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
+    case 'r':
+      RC = &VE::I64RegClass;
+      break;
+    case 'v':
+      RC = &VE::V64RegClass;
+      break;
+    }
+    return std::make_pair(0U, RC);
+  }
+
+  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
+}
+
+//===----------------------------------------------------------------------===//
+// VE Target Optimization Support
+//===----------------------------------------------------------------------===//
+
+unsigned VETargetLowering::getMinimumJumpTableEntries() const {
+  // Specify 8 for PIC model to relieve the impact of PIC load instructions.
+  if (isJumpTableRelative())
+    return 8;
+
+  return TargetLowering::getMinimumJumpTableEntries();
+}
+
+bool VETargetLowering::hasAndNot(SDValue Y) const {
+  EVT VT = Y.getValueType();
+
+  // VE doesn't have vector and not instruction.
+  if (VT.isVector())
+    return false;
+
+  // VE allows different immediate values for X and Y where ~X & Y.
+  // Only simm7 works for X, and only mimm works for Y on VE.  However, this
+  // function is used to check whether an immediate value is OK for and-not
+  // instruction as both X and Y.  Generating additional instruction to
+  // retrieve an immediate value is no good since the purpose of this
+  // function is to convert a series of 3 instructions to another series of
+  // 3 instructions with better parallelism.  Therefore, we return false
+  // for all immediate values now.
+  // FIXME: Change hasAndNot function to have two operands to make it work
+  //        correctly with Aurora VE.
+  if (isa<ConstantSDNode>(Y))
+    return false;
+
+  // It's ok for generic registers.
+  return true;
+}
+
+/// \returns the VVP_* SDNode opcode corresponsing to \p OC.
+static Optional<unsigned> getVVPOpcode(unsigned Opcode) {
+  switch (Opcode) {
+#define HANDLE_VP_TO_VVP(VPOPC, VVPNAME)                                       \
+  case ISD::VPOPC:                                                             \
+    return VEISD::VVPNAME;
+#define ADD_VVP_OP(VVPNAME, SDNAME)                                            \
+  case VEISD::VVPNAME:                                                         \
+  case ISD::SDNAME:                                                            \
+    return VEISD::VVPNAME;
+#include "VVPNodes.def"
+  }
+  return None;
+}
+
+SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const {
+  // Can we represent this as a VVP node.
+  const unsigned Opcode = Op->getOpcode();
+  auto VVPOpcodeOpt = getVVPOpcode(Opcode);
+  if (!VVPOpcodeOpt.hasValue())
+    return SDValue();
+  unsigned VVPOpcode = VVPOpcodeOpt.getValue();
+  const bool FromVP = ISD::isVPOpcode(Opcode);
+
+  // The representative and legalized vector type of this operation.
+  SDLoc DL(Op);
+  MVT MaskVT = MVT::v256i1; // TODO: packed mode.
+  EVT OpVecVT = Op.getValueType();
+  EVT LegalVecVT = getTypeToTransformTo(*DAG.getContext(), OpVecVT);
+
+  SDValue AVL;
+  SDValue Mask;
+
+  if (FromVP) {
+    // All upstream VP SDNodes always have a mask and avl.
+    auto MaskIdx = ISD::getVPMaskIdx(Opcode).getValue();
+    auto AVLIdx = ISD::getVPExplicitVectorLengthIdx(Opcode).getValue();
+    Mask = Op->getOperand(MaskIdx);
+    AVL = Op->getOperand(AVLIdx);
+
+  } else {
+    // Materialize the VL parameter.
+    AVL = DAG.getConstant(OpVecVT.getVectorNumElements(), DL, MVT::i32);
+    SDValue ConstTrue = DAG.getConstant(1, DL, MVT::i32);
+    Mask = DAG.getNode(VEISD::VEC_BROADCAST, DL, MaskVT,
+                       ConstTrue); // emit a VEISD::VEC_BROADCAST here.
+  }
+
+  // Categories we are interested in.
+  bool IsBinaryOp = false;
+
+  switch (VVPOpcode) {
+#define ADD_BINARY_VVP_OP(VVPNAME, ...)                                        \
+  case VEISD::VVPNAME:                                                         \
+    IsBinaryOp = true;                                                         \
+    break;
+#include "VVPNodes.def"
+  }
+
+  if (IsBinaryOp) {
+    assert(LegalVecVT.isSimple());
+    return DAG.getNode(VVPOpcode, DL, LegalVecVT, Op->getOperand(0),
+                       Op->getOperand(1), Mask, AVL);
+  }
+  llvm_unreachable("lowerToVVP called for unexpected SDNode.");
+}
+
+SDValue VETargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
+                                                  SelectionDAG &DAG) const {
+  assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
+  MVT VT = Op.getOperand(0).getSimpleValueType();
+
+  // Special treatment for packed V64 types.
+  assert(VT == MVT::v512i32 || VT == MVT::v512f32);
+  // Example of codes:
+  //   %packed_v = extractelt %vr, %idx / 2
+  //   %v = %packed_v >> (%idx % 2 * 32)
+  //   %res = %v & 0xffffffff
+
+  SDValue Vec = Op.getOperand(0);
+  SDValue Idx = Op.getOperand(1);
+  SDLoc DL(Op);
+  SDValue Result = Op;
+  if (0 /* Idx->isConstant() */) {
+    // TODO: optimized implementation using constant values
+  } else {
+    SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
+    SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
+    SDValue PackedElt =
+        SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
+    SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
+    SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
+    SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);
+    Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
+    PackedElt = DAG.getNode(ISD::SRL, DL, MVT::i64, {PackedElt, Shift});
+    SDValue Mask = DAG.getConstant(0xFFFFFFFFL, DL, MVT::i64);
+    PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
+    SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
+    Result = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
+                                        MVT::i32, PackedElt, SubI32),
+                     0);
+
+    if (Op.getSimpleValueType() == MVT::f32) {
+      Result = DAG.getBitcast(MVT::f32, Result);
+    } else {
+      assert(Op.getSimpleValueType() == MVT::i32);
+    }
+  }
+  return Result;
+}
+
+SDValue VETargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
+                                                 SelectionDAG &DAG) const {
+  assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
+  MVT VT = Op.getOperand(0).getSimpleValueType();
+
+  // Special treatment for packed V64 types.
+  assert(VT == MVT::v512i32 || VT == MVT::v512f32);
+  // The v512i32 and v512f32 starts from upper bits (0..31).  This "upper
+  // bits" required `val << 32` from C implementation's point of view.
+  //
+  // Example of codes:
+  //   %packed_elt = extractelt %vr, (%idx >> 1)
+  //   %shift = ((%idx & 1) ^ 1) << 5
+  //   %packed_elt &= 0xffffffff00000000 >> shift
+  //   %packed_elt |= (zext %val) << shift
+  //   %vr = insertelt %vr, %packed_elt, (%idx >> 1)
+
+  SDLoc DL(Op);
+  SDValue Vec = Op.getOperand(0);
+  SDValue Val = Op.getOperand(1);
+  SDValue Idx = Op.getOperand(2);
+  if (Idx.getSimpleValueType() == MVT::i32)
+    Idx = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Idx);
+  if (Val.getSimpleValueType() == MVT::f32)
+    Val = DAG.getBitcast(MVT::i32, Val);
+  assert(Val.getSimpleValueType() == MVT::i32);
+  Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
+
+  SDValue Result = Op;
+  if (0 /* Idx->isConstant()*/) {
+    // TODO: optimized implementation using constant values
+  } else {
+    SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
+    SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
+    SDValue PackedElt =
+        SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
+    SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
+    SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
+    SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);
+    Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
+    SDValue Mask = DAG.getConstant(0xFFFFFFFF00000000L, DL, MVT::i64);
+    Mask = DAG.getNode(ISD::SRL, DL, MVT::i64, {Mask, Shift});
+    PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
+    Val = DAG.getNode(ISD::SHL, DL, MVT::i64, {Val, Shift});
+    PackedElt = DAG.getNode(ISD::OR, DL, MVT::i64, {PackedElt, Val});
+    Result =
+        SDValue(DAG.getMachineNode(VE::LSVrr_v, DL, Vec.getSimpleValueType(),
+                                   {HalfIdx, PackedElt, Vec}),
+                0);
+  }
+  return Result;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.h b/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.h
index 4633220efaa1..a6e1bf396035 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.h
@@ -24,23 +24,36 @@ namespace VEISD {
 enum NodeType : unsigned {
   FIRST_NUMBER = ISD::BUILTIN_OP_END,
 
-  Hi,
-  Lo, // Hi/Lo operations, typically on a global address.
-
-  GETFUNPLT,   // load function address through %plt insturction
-  GETTLSADDR,  // load address for TLS access
-  GETSTACKTOP, // retrieve address of stack top (first address of
-               // locals and temporaries)
-
-  CALL,            // A call instruction.
-  RET_FLAG,        // Return with a flag operand.
-  GLOBAL_BASE_REG, // Global base reg for PIC.
+  CALL,                   // A call instruction.
+  EH_SJLJ_LONGJMP,        // SjLj exception handling longjmp.
+  EH_SJLJ_SETJMP,         // SjLj exception handling setjmp.
+  EH_SJLJ_SETUP_DISPATCH, // SjLj exception handling setup_dispatch.
+  GETFUNPLT,              // Load function address through %plt insturction.
+  GETTLSADDR,             // Load address for TLS access.
+  GETSTACKTOP,            // Retrieve address of stack top (first address of
+                          // locals and temporaries).
+  GLOBAL_BASE_REG,        // Global base reg for PIC.
+  Hi,                     // Hi/Lo operations, typically on a global address.
+  Lo,                     // Hi/Lo operations, typically on a global address.
+  MEMBARRIER,             // Compiler barrier only; generate a no-op.
+  RET_FLAG,               // Return with a flag operand.
+  TS1AM,                  // A TS1AM instruction used for 1/2 bytes swap.
+  VEC_BROADCAST,          // A vector broadcast instruction.
+                          //   0: scalar value, 1: VL
+
+// VVP_* nodes.
+#define ADD_VVP_OP(VVP_NAME, ...) VVP_NAME,
+#include "VVPNodes.def"
 };
 }
 
 class VETargetLowering : public TargetLowering {
   const VESubtarget *Subtarget;
 
+  void initRegisterClasses();
+  void initSPUActions();
+  void initVPUActions();
+
 public:
   VETargetLowering(const TargetMachine &TM, const VESubtarget &STI);
 
@@ -74,23 +87,98 @@ public:
                       const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl,
                       SelectionDAG &DAG) const override;
 
+  /// Helper functions for atomic operations.
+  bool shouldInsertFencesForAtomic(const Instruction *I) const override {
+    // VE uses release consistency, so need fence for each atomics.
+    return true;
+  }
+  Instruction *emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst,
+                                AtomicOrdering Ord) const override;
+  Instruction *emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst,
+                                 AtomicOrdering Ord) const override;
+  TargetLoweringBase::AtomicExpansionKind
+  shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+
   /// Custom Lower {
   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
-
-  SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerToTLSGeneralDynamicModel(SDValue Op, SelectionDAG &DAG) const;
+  unsigned getJumpTableEncoding() const override;
+  const MCExpr *LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
+                                          const MachineBasicBlock *MBB,
+                                          unsigned Uid,
+                                          MCContext &Ctx) const override;
+  SDValue getPICJumpTableRelocBase(SDValue Table,
+                                   SelectionDAG &DAG) const override;
+  // VE doesn't need getPICJumpTableRelocBaseExpr since it is used for only
+  // EK_LabelDifference32.
+
+  SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerATOMIC_SWAP(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerToTLSGeneralDynamicModel(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerVAARG(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
   /// } Custom Lower
 
+  /// Replace the results of node with an illegal result
+  /// type with new values built out of custom code.
+  ///
+  void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
+                          SelectionDAG &DAG) const override;
+
+  /// Custom Inserter {
+  MachineBasicBlock *
+  EmitInstrWithCustomInserter(MachineInstr &MI,
+                              MachineBasicBlock *MBB) const override;
+  MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
+                                       MachineBasicBlock *MBB) const;
+  MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
+                                      MachineBasicBlock *MBB) const;
+  MachineBasicBlock *emitSjLjDispatchBlock(MachineInstr &MI,
+                                           MachineBasicBlock *BB) const;
+
+  void setupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
+                              MachineBasicBlock *DispatchBB, int FI,
+                              int Offset) const;
+  // Setup basic block address.
+  Register prepareMBB(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                      MachineBasicBlock *TargetBB, const DebugLoc &DL) const;
+  // Prepare function/variable address.
+  Register prepareSymbol(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                         StringRef Symbol, const DebugLoc &DL, bool IsLocal,
+                         bool IsCall) const;
+  /// } Custom Inserter
+
+  /// VVP Lowering {
+  SDValue lowerToVVP(SDValue Op, SelectionDAG &DAG) const;
+  /// } VVPLowering
+
+  /// Custom DAGCombine {
+  SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
+
+  SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const;
+  /// } Custom DAGCombine
+
   SDValue withTargetFlags(SDValue Op, unsigned TF, SelectionDAG &DAG) const;
   SDValue makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
                        SelectionDAG &DAG) const;
   SDValue makeAddress(SDValue Op, SelectionDAG &DAG) const;
 
+  bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
   bool isFPImmLegal(const APFloat &Imm, EVT VT,
                     bool ForCodeSize) const override;
   /// Returns true if the target allows unaligned memory accesses of the
@@ -99,10 +187,32 @@ public:
                                       MachineMemOperand::Flags Flags,
                                       bool *Fast) const override;
 
-  // Block s/udiv lowering for now
-  bool isIntDivCheap(EVT VT, AttributeList Attr) const override { return true; }
+  /// Inline Assembly {
+
+  ConstraintType getConstraintType(StringRef Constraint) const override;
+  std::pair<unsigned, const TargetRegisterClass *>
+  getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+                               StringRef Constraint, MVT VT) const override;
+
+  /// } Inline Assembly
 
+  /// Target Optimization {
+
+  // Return lower limit for number of blocks in a jump table.
+  unsigned getMinimumJumpTableEntries() const override;
+
+  // SX-Aurora VE's s/udiv is 5-9 times slower than multiply.
+  bool isIntDivCheap(EVT, AttributeList) const override { return false; }
+  // VE doesn't have rem.
+  bool hasStandaloneRem(EVT) const override { return false; }
+  // VE LDZ instruction returns 64 if the input is zero.
+  bool isCheapToSpeculateCtlz() const override { return true; }
+  // VE LDZ instruction is fast.
+  bool isCtlzFast() const override { return true; }
+  // VE has NND instruction.
   bool hasAndNot(SDValue Y) const override;
+
+  /// } Target Optimization
 };
 } // namespace llvm
 
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrBuilder.h b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrBuilder.h
new file mode 100644
index 000000000000..1b0e07546931
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrBuilder.h
@@ -0,0 +1,41 @@
+//===-- VEInstrBuilder.h - Aides for building VE insts ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to simplify generating frame and constant pool
+// references.
+//
+// For reference, the order of operands for memory references is:
+// (Operand), Dest Reg, Base Reg, and either Reg Index or Immediate
+// Displacement.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_VE_VEINSTRBUILDER_H
+#define LLVM_LIB_TARGET_VE_VEINSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+namespace llvm {
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function.  This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+static inline const MachineInstrBuilder &
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0,
+                  bool ThreeOp = true) {
+  if (ThreeOp)
+    return MIB.addFrameIndex(FI).addImm(0).addImm(Offset);
+  return MIB.addFrameIndex(FI).addImm(Offset);
+}
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrFormats.td
index 0c02411ff916..f43c9755f1b9 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrFormats.td
@@ -35,6 +35,25 @@ class InstVE<dag outs, dag ins, string asmstr, list<dag> pattern>
   let AsmString   = asmstr;
   let Pattern = pattern;
 
+  bits<1> VE_Vector = 0;
+  bits<1> VE_VLInUse = 0;
+  bits<3> VE_VLIndex = 0;
+  bits<1> VE_VLWithMask = 0;
+
+  /// These fields correspond to the fields in VEInstrInfo.h.  Any changes to
+  /// these must be reflected there!  See comments there for what these are.
+  ///
+  /// VLIndex is the index of VL register in MI's operands.  The HW instruction
+  /// doesn't have that field, but we add is in MI for the ease of optimization.
+  /// For example, the index of VL of (VST $sy, $sz, $sx, $vl) is 3 (beginning
+  /// from 0), and the index of VL of (VST $sy, $sz, $sx, $vm, $vl) is 4.  We
+  /// define vector instructions hierarchically, so use VE_VLIndex which is
+  /// defined by the type of instruction and VE_VLWithMask which is defined
+  /// whether the insturction use mask or not.
+  let TSFlags{0}   = VE_Vector;
+  let TSFlags{1}   = VE_VLInUse;
+  let TSFlags{4-2} = !add(VE_VLIndex, VE_VLWithMask);
+
   let DecoderNamespace = "VE";
   field bits<64> SoftFail = 0;
 }
@@ -179,12 +198,82 @@ class RRFENCE<bits<8>opVal, dag outs, dag ins, string asmstr,
 
 //-----------------------------------------------------------------------------
 // Section 5.6 RVM Type
+//
+// RVM type is for vector transfer instructions.
 //-----------------------------------------------------------------------------
 
+class RVM<bits<8>opVal, dag outs, dag ins, string asmstr,
+          list<dag> pattern = []>
+   : InstVE<outs, ins, asmstr, pattern> {
+  bits<1>  cx = 0;
+  bits<1>  vc = 0;
+  bits<1>  cs = 0;
+  bits<4>  m = 0;
+  bits<1>  cy = 1;
+  bits<7>  sy;
+  bits<1>  cz = 1;
+  bits<7>  sz;
+  bits<8>  vx;
+  bits<8>  vy = 0;
+  bits<7>  sw = 0;
+  let op = opVal;
+  let Inst{55} = cx;
+  let Inst{54} = vc;
+  let Inst{53} = cs;
+  let Inst{52} = 0;
+  let Inst{51-48} = m;
+  let Inst{47} = cy;
+  let Inst{46-40} = sy;
+  let Inst{39} = cz;
+  let Inst{38-32} = sz;
+  let Inst{31-24} = vx;
+  let Inst{23-16} = vy;
+  let Inst{15-8} = 0;
+  let Inst{7} = 0;
+  let Inst{6-0} = sw;
+
+  let VE_Vector = 1;
+}
+
 //-----------------------------------------------------------------------------
 // Section 5.7 RV Type
+//
+// RV type is for vector instructions.
 //-----------------------------------------------------------------------------
 
+class RV<bits<8>opVal, dag outs, dag ins, string asmstr, list<dag> pattern = []>
+   : InstVE<outs, ins, asmstr, pattern> {
+  bits<1>  cx = 0;
+  bits<1>  cx2 = 0;
+  bits<1>  cs = 0;
+  bits<1>  cs2 = 0;
+  bits<4>  m = 0;
+  bits<1>  cy = 1;
+  bits<7>  sy;
+  bits<1>  cz = 0;
+  bits<7>  sz = 0;
+  bits<8>  vx = 0;
+  bits<8>  vy = 0;
+  bits<8>  vz = 0;
+  bits<8>  vw = 0;
+  let op = opVal;
+  let Inst{55} = cx;
+  let Inst{54} = cx2;
+  let Inst{53} = cs;
+  let Inst{52} = cs2;
+  let Inst{51-48} = m;
+  let Inst{47} = cy;
+  let Inst{46-40} = sy;
+  let Inst{39} = cz;
+  let Inst{38-32} = sz;
+  let Inst{31-24} = vx;
+  let Inst{23-16} = vy;
+  let Inst{15-8} = vz;
+  let Inst{7-0} = vw;
+
+  let VE_Vector = 1;
+}
+
 // Pseudo instructions.
 class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern = []>
    : InstVE<outs, ins, asmstr, pattern> {
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp
index 86b2ac2078b1..9770052ff913 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp
@@ -92,38 +92,46 @@ static VECC::CondCode GetOppositeBranchCondition(VECC::CondCode CC) {
   llvm_unreachable("Invalid cond code");
 }
 
-// Treat br.l [BRCF AT] as unconditional branch
+// Treat a branch relative long always instruction as unconditional branch.
+// For example, br.l.t and br.l.
 static bool isUncondBranchOpcode(int Opc) {
-  return Opc == VE::BRCFLa    || Opc == VE::BRCFWa    ||
-         Opc == VE::BRCFLa_nt || Opc == VE::BRCFWa_nt ||
-         Opc == VE::BRCFLa_t  || Opc == VE::BRCFWa_t  ||
-         Opc == VE::BRCFDa    || Opc == VE::BRCFSa    ||
-         Opc == VE::BRCFDa_nt || Opc == VE::BRCFSa_nt ||
-         Opc == VE::BRCFDa_t  || Opc == VE::BRCFSa_t;
+  using namespace llvm::VE;
+
+#define BRKIND(NAME) (Opc == NAME##a || Opc == NAME##a_nt || Opc == NAME##a_t)
+  // VE has other branch relative always instructions for word/double/float,
+  // but we use only long branches in our lower.  So, sanity check it here.
+  assert(!BRKIND(BRCFW) && !BRKIND(BRCFD) && !BRKIND(BRCFS) &&
+         "Branch relative word/double/float always instructions should not be "
+         "used!");
+  return BRKIND(BRCFL);
+#undef BRKIND
 }
 
+// Treat branch relative conditional as conditional branch instructions.
+// For example, brgt.l.t and brle.s.nt.
 static bool isCondBranchOpcode(int Opc) {
-  return Opc == VE::BRCFLrr    || Opc == VE::BRCFLir    ||
-         Opc == VE::BRCFLrr_nt || Opc == VE::BRCFLir_nt ||
-         Opc == VE::BRCFLrr_t  || Opc == VE::BRCFLir_t  ||
-         Opc == VE::BRCFWrr    || Opc == VE::BRCFWir    ||
-         Opc == VE::BRCFWrr_nt || Opc == VE::BRCFWir_nt ||
-         Opc == VE::BRCFWrr_t  || Opc == VE::BRCFWir_t  ||
-         Opc == VE::BRCFDrr    || Opc == VE::BRCFDir    ||
-         Opc == VE::BRCFDrr_nt || Opc == VE::BRCFDir_nt ||
-         Opc == VE::BRCFDrr_t  || Opc == VE::BRCFDir_t  ||
-         Opc == VE::BRCFSrr    || Opc == VE::BRCFSir    ||
-         Opc == VE::BRCFSrr_nt || Opc == VE::BRCFSir_nt ||
-         Opc == VE::BRCFSrr_t  || Opc == VE::BRCFSir_t;
+  using namespace llvm::VE;
+
+#define BRKIND(NAME)                                                           \
+  (Opc == NAME##rr || Opc == NAME##rr_nt || Opc == NAME##rr_t ||               \
+   Opc == NAME##ir || Opc == NAME##ir_nt || Opc == NAME##ir_t)
+  return BRKIND(BRCFL) || BRKIND(BRCFW) || BRKIND(BRCFD) || BRKIND(BRCFS);
+#undef BRKIND
 }
 
+// Treat branch long always instructions as indirect branch.
+// For example, b.l.t and b.l.
 static bool isIndirectBranchOpcode(int Opc) {
-  return Opc == VE::BCFLari    || Opc == VE::BCFLari    ||
-         Opc == VE::BCFLari_nt || Opc == VE::BCFLari_nt ||
-         Opc == VE::BCFLari_t  || Opc == VE::BCFLari_t  ||
-         Opc == VE::BCFLari    || Opc == VE::BCFLari    ||
-         Opc == VE::BCFLari_nt || Opc == VE::BCFLari_nt ||
-         Opc == VE::BCFLari_t  || Opc == VE::BCFLari_t;
+  using namespace llvm::VE;
+
+#define BRKIND(NAME)                                                           \
+  (Opc == NAME##ari || Opc == NAME##ari_nt || Opc == NAME##ari_t)
+  // VE has other branch always instructions for word/double/float, but
+  // we use only long branches in our lower.  So, sanity check it here.
+  assert(!BRKIND(BCFW) && !BRKIND(BCFD) && !BRKIND(BCFS) &&
+         "Branch word/double/float always instructions should not be used!");
+  return BRKIND(BCFL);
+#undef BRKIND
 }
 
 static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
@@ -311,11 +319,43 @@ bool VEInstrInfo::reverseBranchCondition(
 }
 
 static bool IsAliasOfSX(Register Reg) {
-  return VE::I8RegClass.contains(Reg) || VE::I16RegClass.contains(Reg) ||
-         VE::I32RegClass.contains(Reg) || VE::I64RegClass.contains(Reg) ||
+  return VE::I32RegClass.contains(Reg) || VE::I64RegClass.contains(Reg) ||
          VE::F32RegClass.contains(Reg);
 }
 
+static void copyPhysSubRegs(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator I, const DebugLoc &DL,
+                            MCRegister DestReg, MCRegister SrcReg, bool KillSrc,
+                            const MCInstrDesc &MCID, unsigned int NumSubRegs,
+                            const unsigned *SubRegIdx,
+                            const TargetRegisterInfo *TRI) {
+  MachineInstr *MovMI = nullptr;
+
+  for (unsigned Idx = 0; Idx != NumSubRegs; ++Idx) {
+    Register SubDest = TRI->getSubReg(DestReg, SubRegIdx[Idx]);
+    Register SubSrc = TRI->getSubReg(SrcReg, SubRegIdx[Idx]);
+    assert(SubDest && SubSrc && "Bad sub-register");
+
+    if (MCID.getOpcode() == VE::ORri) {
+      // generate "ORri, dest, src, 0" instruction.
+      MachineInstrBuilder MIB =
+          BuildMI(MBB, I, DL, MCID, SubDest).addReg(SubSrc).addImm(0);
+      MovMI = MIB.getInstr();
+    } else if (MCID.getOpcode() == VE::ANDMmm) {
+      // generate "ANDM, dest, vm0, src" instruction.
+      MachineInstrBuilder MIB =
+          BuildMI(MBB, I, DL, MCID, SubDest).addReg(VE::VM0).addReg(SubSrc);
+      MovMI = MIB.getInstr();
+    } else {
+      llvm_unreachable("Unexpected reg-to-reg copy instruction");
+    }
+  }
+  // Add implicit super-register defs and kills to the last MovMI.
+  MovMI->addRegisterDefined(DestReg, TRI);
+  if (KillSrc)
+    MovMI->addRegisterKilled(SrcReg, TRI, true);
+}
+
 void VEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I, const DebugLoc &DL,
                               MCRegister DestReg, MCRegister SrcReg,
@@ -325,6 +365,41 @@ void VEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     BuildMI(MBB, I, DL, get(VE::ORri), DestReg)
         .addReg(SrcReg, getKillRegState(KillSrc))
         .addImm(0);
+  } else if (VE::V64RegClass.contains(DestReg, SrcReg)) {
+    // Generate following instructions
+    //   %sw16 = LEA32zii 256
+    //   VORmvl %dest, (0)1, %src, %sw16
+    // TODO: reuse a register if vl is already assigned to a register
+    // FIXME: it would be better to scavenge a register here instead of
+    // reserving SX16 all of the time.
+    const TargetRegisterInfo *TRI = &getRegisterInfo();
+    Register TmpReg = VE::SX16;
+    Register SubTmp = TRI->getSubReg(TmpReg, VE::sub_i32);
+    BuildMI(MBB, I, DL, get(VE::LEAzii), TmpReg)
+        .addImm(0)
+        .addImm(0)
+        .addImm(256);
+    MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(VE::VORmvl), DestReg)
+                                  .addImm(M1(0)) // Represent (0)1.
+                                  .addReg(SrcReg, getKillRegState(KillSrc))
+                                  .addReg(SubTmp, getKillRegState(true));
+    MIB.getInstr()->addRegisterKilled(TmpReg, TRI, true);
+  } else if (VE::VMRegClass.contains(DestReg, SrcReg)) {
+    BuildMI(MBB, I, DL, get(VE::ANDMmm), DestReg)
+        .addReg(VE::VM0)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+  } else if (VE::VM512RegClass.contains(DestReg, SrcReg)) {
+    // Use two instructions.
+    const unsigned SubRegIdx[] = {VE::sub_vm_even, VE::sub_vm_odd};
+    unsigned int NumSubRegs = 2;
+    copyPhysSubRegs(MBB, I, DL, DestReg, SrcReg, KillSrc, get(VE::ANDMmm),
+                    NumSubRegs, SubRegIdx, &getRegisterInfo());
+  } else if (VE::F128RegClass.contains(DestReg, SrcReg)) {
+    // Use two instructions.
+    const unsigned SubRegIdx[] = {VE::sub_even, VE::sub_odd};
+    unsigned int NumSubRegs = 2;
+    copyPhysSubRegs(MBB, I, DL, DestReg, SrcReg, KillSrc, get(VE::ORri),
+                    NumSubRegs, SubRegIdx, &getRegisterInfo());
   } else {
     const TargetRegisterInfo *TRI = &getRegisterInfo();
     dbgs() << "Impossible reg-to-reg copy from " << printReg(SrcReg, TRI)
@@ -342,7 +417,8 @@ unsigned VEInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
                                           int &FrameIndex) const {
   if (MI.getOpcode() == VE::LDrii ||    // I64
       MI.getOpcode() == VE::LDLSXrii || // I32
-      MI.getOpcode() == VE::LDUrii      // F32
+      MI.getOpcode() == VE::LDUrii ||   // F32
+      MI.getOpcode() == VE::LDQrii      // F128 (pseudo)
   ) {
     if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
         MI.getOperand(2).getImm() == 0 && MI.getOperand(3).isImm() &&
@@ -363,7 +439,8 @@ unsigned VEInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
                                          int &FrameIndex) const {
   if (MI.getOpcode() == VE::STrii ||  // I64
       MI.getOpcode() == VE::STLrii || // I32
-      MI.getOpcode() == VE::STUrii    // F32
+      MI.getOpcode() == VE::STUrii || // F32
+      MI.getOpcode() == VE::STQrii    // F128 (pseudo)
   ) {
     if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() &&
         MI.getOperand(1).getImm() == 0 && MI.getOperand(2).isImm() &&
@@ -412,6 +489,13 @@ void VEInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
         .addImm(0)
         .addReg(SrcReg, getKillRegState(isKill))
         .addMemOperand(MMO);
+  } else if (VE::F128RegClass.hasSubClassEq(RC)) {
+    BuildMI(MBB, I, DL, get(VE::STQrii))
+        .addFrameIndex(FI)
+        .addImm(0)
+        .addImm(0)
+        .addReg(SrcReg, getKillRegState(isKill))
+        .addMemOperand(MMO);
   } else
     report_fatal_error("Can't store this register to stack slot");
 }
@@ -449,10 +533,194 @@ void VEInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
         .addImm(0)
         .addImm(0)
         .addMemOperand(MMO);
+  } else if (VE::F128RegClass.hasSubClassEq(RC)) {
+    BuildMI(MBB, I, DL, get(VE::LDQrii), DestReg)
+        .addFrameIndex(FI)
+        .addImm(0)
+        .addImm(0)
+        .addMemOperand(MMO);
   } else
     report_fatal_error("Can't load this register from stack slot");
 }
 
+bool VEInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
+                                Register Reg, MachineRegisterInfo *MRI) const {
+  LLVM_DEBUG(dbgs() << "FoldImmediate\n");
+
+  LLVM_DEBUG(dbgs() << "checking DefMI\n");
+  int64_t ImmVal;
+  switch (DefMI.getOpcode()) {
+  default:
+    return false;
+  case VE::ORim:
+    // General move small immediate instruction on VE.
+    LLVM_DEBUG(dbgs() << "checking ORim\n");
+    LLVM_DEBUG(DefMI.dump());
+    // FIXME: We may need to support FPImm too.
+    assert(DefMI.getOperand(1).isImm());
+    assert(DefMI.getOperand(2).isImm());
+    ImmVal =
+        DefMI.getOperand(1).getImm() + mimm2Val(DefMI.getOperand(2).getImm());
+    LLVM_DEBUG(dbgs() << "ImmVal is " << ImmVal << "\n");
+    break;
+  case VE::LEAzii:
+    // General move immediate instruction on VE.
+    LLVM_DEBUG(dbgs() << "checking LEAzii\n");
+    LLVM_DEBUG(DefMI.dump());
+    // FIXME: We may need to support FPImm too.
+    assert(DefMI.getOperand(2).isImm());
+    if (!DefMI.getOperand(3).isImm())
+      // LEAzii may refer label
+      return false;
+    ImmVal = DefMI.getOperand(2).getImm() + DefMI.getOperand(3).getImm();
+    LLVM_DEBUG(dbgs() << "ImmVal is " << ImmVal << "\n");
+    break;
+  }
+
+  // Try to fold like below:
+  //   %1:i64 = ORim 0, 0(1)
+  //   %2:i64 = CMPSLrr %0, %1
+  // To
+  //   %2:i64 = CMPSLrm %0, 0(1)
+  //
+  // Another example:
+  //   %1:i64 = ORim 6, 0(1)
+  //   %2:i64 = CMPSLrr %1, %0
+  // To
+  //   %2:i64 = CMPSLir 6, %0
+  //
+  // Support commutable instructions like below:
+  //   %1:i64 = ORim 6, 0(1)
+  //   %2:i64 = ADDSLrr %1, %0
+  // To
+  //   %2:i64 = ADDSLri %0, 6
+  //
+  // FIXME: Need to support i32.  Current implementtation requires
+  //        EXTRACT_SUBREG, so input has following COPY and it avoids folding:
+  //   %1:i64 = ORim 6, 0(1)
+  //   %2:i32 = COPY %1.sub_i32
+  //   %3:i32 = ADDSWSXrr %0, %2
+  // FIXME: Need to support shift, cmov, and more instructions.
+  // FIXME: Need to support lvl too, but LVLGen runs after peephole-opt.
+
+  LLVM_DEBUG(dbgs() << "checking UseMI\n");
+  LLVM_DEBUG(UseMI.dump());
+  unsigned NewUseOpcSImm7;
+  unsigned NewUseOpcMImm;
+  enum InstType {
+    rr2ri_rm, // rr -> ri or rm, commutable
+    rr2ir_rm, // rr -> ir or rm
+  } InstType;
+
+  using namespace llvm::VE;
+#define INSTRKIND(NAME)                                                        \
+  case NAME##rr:                                                               \
+    NewUseOpcSImm7 = NAME##ri;                                                 \
+    NewUseOpcMImm = NAME##rm;                                                  \
+    InstType = rr2ri_rm;                                                       \
+    break
+#define NCINSTRKIND(NAME)                                                      \
+  case NAME##rr:                                                               \
+    NewUseOpcSImm7 = NAME##ir;                                                 \
+    NewUseOpcMImm = NAME##rm;                                                  \
+    InstType = rr2ir_rm;                                                       \
+    break
+
+  switch (UseMI.getOpcode()) {
+  default:
+    return false;
+
+    INSTRKIND(ADDUL);
+    INSTRKIND(ADDSWSX);
+    INSTRKIND(ADDSWZX);
+    INSTRKIND(ADDSL);
+    NCINSTRKIND(SUBUL);
+    NCINSTRKIND(SUBSWSX);
+    NCINSTRKIND(SUBSWZX);
+    NCINSTRKIND(SUBSL);
+    INSTRKIND(MULUL);
+    INSTRKIND(MULSWSX);
+    INSTRKIND(MULSWZX);
+    INSTRKIND(MULSL);
+    NCINSTRKIND(DIVUL);
+    NCINSTRKIND(DIVSWSX);
+    NCINSTRKIND(DIVSWZX);
+    NCINSTRKIND(DIVSL);
+    NCINSTRKIND(CMPUL);
+    NCINSTRKIND(CMPSWSX);
+    NCINSTRKIND(CMPSWZX);
+    NCINSTRKIND(CMPSL);
+    INSTRKIND(MAXSWSX);
+    INSTRKIND(MAXSWZX);
+    INSTRKIND(MAXSL);
+    INSTRKIND(MINSWSX);
+    INSTRKIND(MINSWZX);
+    INSTRKIND(MINSL);
+    INSTRKIND(AND);
+    INSTRKIND(OR);
+    INSTRKIND(XOR);
+    INSTRKIND(EQV);
+    NCINSTRKIND(NND);
+    NCINSTRKIND(MRG);
+  }
+
+#undef INSTRKIND
+
+  unsigned NewUseOpc;
+  unsigned UseIdx;
+  bool Commute = false;
+  LLVM_DEBUG(dbgs() << "checking UseMI operands\n");
+  switch (InstType) {
+  case rr2ri_rm:
+    UseIdx = 2;
+    if (UseMI.getOperand(1).getReg() == Reg) {
+      Commute = true;
+    } else {
+      assert(UseMI.getOperand(2).getReg() == Reg);
+    }
+    if (isInt<7>(ImmVal)) {
+      // This ImmVal matches to SImm7 slot, so change UseOpc to an instruction
+      // holds a simm7 slot.
+      NewUseOpc = NewUseOpcSImm7;
+    } else if (isMImmVal(ImmVal)) {
+      // Similarly, change UseOpc to an instruction holds a mimm slot.
+      NewUseOpc = NewUseOpcMImm;
+      ImmVal = val2MImm(ImmVal);
+    } else
+      return false;
+    break;
+  case rr2ir_rm:
+    if (UseMI.getOperand(1).getReg() == Reg) {
+      // Check immediate value whether it matchs to the UseMI instruction.
+      if (!isInt<7>(ImmVal))
+        return false;
+      NewUseOpc = NewUseOpcSImm7;
+      UseIdx = 1;
+    } else {
+      assert(UseMI.getOperand(2).getReg() == Reg);
+      // Check immediate value whether it matchs to the UseMI instruction.
+      if (!isMImmVal(ImmVal))
+        return false;
+      NewUseOpc = NewUseOpcMImm;
+      ImmVal = val2MImm(ImmVal);
+      UseIdx = 2;
+    }
+    break;
+  }
+
+  LLVM_DEBUG(dbgs() << "modifying UseMI\n");
+  bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
+  UseMI.setDesc(get(NewUseOpc));
+  if (Commute) {
+    UseMI.getOperand(1).setReg(UseMI.getOperand(UseIdx).getReg());
+  }
+  UseMI.getOperand(UseIdx).ChangeToImmediate(ImmVal);
+  if (DeleteDef)
+    DefMI.eraseFromParent();
+
+  return true;
+}
+
 Register VEInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
   VEMachineFunctionInfo *VEFI = MF->getInfo<VEMachineFunctionInfo>();
   Register GlobalBaseReg = VEFI->getGlobalBaseReg();
@@ -472,6 +740,106 @@ Register VEInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
   return GlobalBaseReg;
 }
 
+static Register getVM512Upper(Register reg) {
+  return (reg - VE::VMP0) * 2 + VE::VM0;
+}
+
+static Register getVM512Lower(Register reg) { return getVM512Upper(reg) + 1; }
+
+// Expand pseudo logical vector instructions for VM512 registers.
+static void expandPseudoLogM(MachineInstr &MI, const MCInstrDesc &MCID) {
+  MachineBasicBlock *MBB = MI.getParent();
+  DebugLoc DL = MI.getDebugLoc();
+
+  Register VMXu = getVM512Upper(MI.getOperand(0).getReg());
+  Register VMXl = getVM512Lower(MI.getOperand(0).getReg());
+  Register VMYu = getVM512Upper(MI.getOperand(1).getReg());
+  Register VMYl = getVM512Lower(MI.getOperand(1).getReg());
+
+  switch (MI.getOpcode()) {
+  default: {
+    Register VMZu = getVM512Upper(MI.getOperand(2).getReg());
+    Register VMZl = getVM512Lower(MI.getOperand(2).getReg());
+    BuildMI(*MBB, MI, DL, MCID).addDef(VMXu).addUse(VMYu).addUse(VMZu);
+    BuildMI(*MBB, MI, DL, MCID).addDef(VMXl).addUse(VMYl).addUse(VMZl);
+    break;
+  }
+  case VE::NEGMy:
+    BuildMI(*MBB, MI, DL, MCID).addDef(VMXu).addUse(VMYu);
+    BuildMI(*MBB, MI, DL, MCID).addDef(VMXl).addUse(VMYl);
+    break;
+  }
+  MI.eraseFromParent();
+}
+
+static void addOperandsForVFMK(MachineInstrBuilder &MIB, MachineInstr &MI,
+                               bool Upper) {
+  // VM512
+  MIB.addReg(Upper ? getVM512Upper(MI.getOperand(0).getReg())
+                   : getVM512Lower(MI.getOperand(0).getReg()));
+
+  switch (MI.getNumExplicitOperands()) {
+  default:
+    report_fatal_error("unexpected number of operands for pvfmk");
+  case 2: // _Ml: VM512, VL
+    // VL
+    MIB.addReg(MI.getOperand(1).getReg());
+    break;
+  case 4: // _Mvl: VM512, CC, VR, VL
+    // CC
+    MIB.addImm(MI.getOperand(1).getImm());
+    // VR
+    MIB.addReg(MI.getOperand(2).getReg());
+    // VL
+    MIB.addReg(MI.getOperand(3).getReg());
+    break;
+  case 5: // _MvMl: VM512, CC, VR, VM512, VL
+    // CC
+    MIB.addImm(MI.getOperand(1).getImm());
+    // VR
+    MIB.addReg(MI.getOperand(2).getReg());
+    // VM512
+    MIB.addReg(Upper ? getVM512Upper(MI.getOperand(3).getReg())
+                     : getVM512Lower(MI.getOperand(3).getReg()));
+    // VL
+    MIB.addReg(MI.getOperand(4).getReg());
+    break;
+  }
+}
+
+static void expandPseudoVFMK(const TargetInstrInfo &TI, MachineInstr &MI) {
+  // replace to pvfmk.w.up and pvfmk.w.lo
+  // replace to pvfmk.s.up and pvfmk.s.lo
+
+  static std::map<unsigned, std::pair<unsigned, unsigned>> VFMKMap = {
+      {VE::VFMKyal, {VE::VFMKLal, VE::VFMKLal}},
+      {VE::VFMKynal, {VE::VFMKLnal, VE::VFMKLnal}},
+      {VE::VFMKWyvl, {VE::PVFMKWUPvl, VE::PVFMKWLOvl}},
+      {VE::VFMKWyvyl, {VE::PVFMKWUPvml, VE::PVFMKWLOvml}},
+      {VE::VFMKSyvl, {VE::PVFMKSUPvl, VE::PVFMKSLOvl}},
+      {VE::VFMKSyvyl, {VE::PVFMKSUPvml, VE::PVFMKSLOvml}},
+  };
+
+  unsigned Opcode = MI.getOpcode();
+
+  auto Found = VFMKMap.find(Opcode);
+  if (Found == VFMKMap.end())
+    report_fatal_error("unexpected opcode for pseudo vfmk");
+
+  unsigned OpcodeUpper = (*Found).second.first;
+  unsigned OpcodeLower = (*Found).second.second;
+
+  MachineBasicBlock *MBB = MI.getParent();
+  DebugLoc DL = MI.getDebugLoc();
+
+  MachineInstrBuilder Bu = BuildMI(*MBB, MI, DL, TI.get(OpcodeUpper));
+  addOperandsForVFMK(Bu, MI, /* Upper */ true);
+  MachineInstrBuilder Bl = BuildMI(*MBB, MI, DL, TI.get(OpcodeLower));
+  addOperandsForVFMK(Bl, MI, /* Upper */ false);
+
+  MI.eraseFromParent();
+}
+
 bool VEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
   switch (MI.getOpcode()) {
   case VE::EXTEND_STACK: {
@@ -484,6 +852,110 @@ bool VEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
   case VE::GETSTACKTOP: {
     return expandGetStackTopPseudo(MI);
   }
+
+  case VE::ANDMyy:
+    expandPseudoLogM(MI, get(VE::ANDMmm));
+    return true;
+  case VE::ORMyy:
+    expandPseudoLogM(MI, get(VE::ORMmm));
+    return true;
+  case VE::XORMyy:
+    expandPseudoLogM(MI, get(VE::XORMmm));
+    return true;
+  case VE::EQVMyy:
+    expandPseudoLogM(MI, get(VE::EQVMmm));
+    return true;
+  case VE::NNDMyy:
+    expandPseudoLogM(MI, get(VE::NNDMmm));
+    return true;
+  case VE::NEGMy:
+    expandPseudoLogM(MI, get(VE::NEGMm));
+    return true;
+
+  case VE::LVMyir:
+  case VE::LVMyim:
+  case VE::LVMyir_y:
+  case VE::LVMyim_y: {
+    Register VMXu = getVM512Upper(MI.getOperand(0).getReg());
+    Register VMXl = getVM512Lower(MI.getOperand(0).getReg());
+    int64_t Imm = MI.getOperand(1).getImm();
+    bool IsSrcReg =
+        MI.getOpcode() == VE::LVMyir || MI.getOpcode() == VE::LVMyir_y;
+    Register Src = IsSrcReg ? MI.getOperand(2).getReg() : VE::NoRegister;
+    int64_t MImm = IsSrcReg ? 0 : MI.getOperand(2).getImm();
+    bool KillSrc = IsSrcReg ? MI.getOperand(2).isKill() : false;
+    Register VMX = VMXl;
+    if (Imm >= 4) {
+      VMX = VMXu;
+      Imm -= 4;
+    }
+    MachineBasicBlock *MBB = MI.getParent();
+    DebugLoc DL = MI.getDebugLoc();
+    switch (MI.getOpcode()) {
+    case VE::LVMyir:
+      BuildMI(*MBB, MI, DL, get(VE::LVMir))
+          .addDef(VMX)
+          .addImm(Imm)
+          .addReg(Src, getKillRegState(KillSrc));
+      break;
+    case VE::LVMyim:
+      BuildMI(*MBB, MI, DL, get(VE::LVMim))
+          .addDef(VMX)
+          .addImm(Imm)
+          .addImm(MImm);
+      break;
+    case VE::LVMyir_y:
+      assert(MI.getOperand(0).getReg() == MI.getOperand(3).getReg() &&
+             "LVMyir_y has different register in 3rd operand");
+      BuildMI(*MBB, MI, DL, get(VE::LVMir_m))
+          .addDef(VMX)
+          .addImm(Imm)
+          .addReg(Src, getKillRegState(KillSrc))
+          .addReg(VMX);
+      break;
+    case VE::LVMyim_y:
+      assert(MI.getOperand(0).getReg() == MI.getOperand(3).getReg() &&
+             "LVMyim_y has different register in 3rd operand");
+      BuildMI(*MBB, MI, DL, get(VE::LVMim_m))
+          .addDef(VMX)
+          .addImm(Imm)
+          .addImm(MImm)
+          .addReg(VMX);
+      break;
+    }
+    MI.eraseFromParent();
+    return true;
+  }
+  case VE::SVMyi: {
+    Register Dest = MI.getOperand(0).getReg();
+    Register VMZu = getVM512Upper(MI.getOperand(1).getReg());
+    Register VMZl = getVM512Lower(MI.getOperand(1).getReg());
+    bool KillSrc = MI.getOperand(1).isKill();
+    int64_t Imm = MI.getOperand(2).getImm();
+    Register VMZ = VMZl;
+    if (Imm >= 4) {
+      VMZ = VMZu;
+      Imm -= 4;
+    }
+    MachineBasicBlock *MBB = MI.getParent();
+    DebugLoc DL = MI.getDebugLoc();
+    MachineInstrBuilder MIB =
+        BuildMI(*MBB, MI, DL, get(VE::SVMmi), Dest).addReg(VMZ).addImm(Imm);
+    MachineInstr *Inst = MIB.getInstr();
+    MI.eraseFromParent();
+    if (KillSrc) {
+      const TargetRegisterInfo *TRI = &getRegisterInfo();
+      Inst->addRegisterKilled(MI.getOperand(1).getReg(), TRI, true);
+    }
+    return true;
+  }
+  case VE::VFMKyal:
+  case VE::VFMKynal:
+  case VE::VFMKWyvl:
+  case VE::VFMKWyvyl:
+  case VE::VFMKSyvl:
+  case VE::VFMKSyvyl:
+    expandPseudoVFMK(*this, MI);
   }
   return false;
 }
@@ -586,8 +1058,8 @@ bool VEInstrInfo::expandGetStackTopPseudo(MachineInstr &MI) const {
   const MachineFrameInfo &MFI = MF.getFrameInfo();
   const VEFrameLowering &TFL = *STI.getFrameLowering();
 
-  // The VE ABI requires a reserved 176 bytes area at the top
-  // of stack as described in VESubtarget.cpp.  So, we adjust it here.
+  // The VE ABI requires a reserved area at the top of stack as described
+  // in VEFrameLowering.cpp.  So, we adjust it here.
   unsigned NumBytes = STI.getAdjustedFrameSize(0);
 
   // Also adds the size of parameter area.
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.h
index 7b6662df1d60..ed1f49182150 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.h
@@ -23,6 +23,31 @@ namespace llvm {
 
 class VESubtarget;
 
+/// VEII - This namespace holds all of the Aurora VE target-specific
+/// per-instruction flags.  These must match the corresponding definitions in
+/// VEInstrFormats.td.
+namespace VEII {
+enum {
+  // Aurora VE Instruction Flags.  These flags describe the characteristics of
+  // the Aurora VE instructions for vector handling.
+
+  /// VE_Vector - This instruction is Vector Instruction.
+  VE_Vector = 0x1,
+
+  /// VE_VLInUse - This instruction has a vector register in its operands.
+  VE_VLInUse = 0x2,
+
+  /// VE_VLMask/Shift - This is a bitmask that selects the index number where
+  /// an instruction holds vector length informatio (0 to 6, 7 means undef).n
+  VE_VLShift = 2,
+  VE_VLMask = 0x07 << VE_VLShift,
+};
+
+#define HAS_VLINDEX(TSF) ((TSF)&VEII::VE_VLInUse)
+#define GET_VLINDEX(TSF)                                                       \
+  (HAS_VLINDEX(TSF) ? (int)(((TSF)&VEII::VE_VLMask) >> VEII::VE_VLShift) : -1)
+} // end namespace VEII
+
 class VEInstrInfo : public VEGenInstrInfo {
   const VERegisterInfo RI;
   virtual void anchor();
@@ -75,6 +100,13 @@ public:
                             const TargetRegisterInfo *TRI) const override;
   /// } Stack Spill & Reload
 
+  /// Optimization {
+
+  bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
+                     MachineRegisterInfo *MRI) const override;
+
+  /// } Optimization
+
   Register getGlobalBaseReg(MachineFunction *MF) const;
 
   // Lower pseudo instructions after register allocation.
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.td
index 8500f8ef1292..b6862cf7b30d 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.td
@@ -48,7 +48,7 @@ def LO7 : SDNodeXForm<imm, [{
                                    SDLoc(N), MVT::i32);
 }]>;
 def MIMM : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(convMImmVal(getImmVal(N)),
+  return CurDAG->getTargetConstant(val2MImm(getImmVal(N)),
                                    SDLoc(N), MVT::i32);
 }]>;
 def LO32 : SDNodeXForm<imm, [{
@@ -66,7 +66,7 @@ def LO7FP : SDNodeXForm<fpimm, [{
   return CurDAG->getTargetConstant(SignExtend32(Val, 7), SDLoc(N), MVT::i32);
 }]>;
 def MIMMFP : SDNodeXForm<fpimm, [{
-  return CurDAG->getTargetConstant(convMImmVal(getFpImmVal(N)),
+  return CurDAG->getTargetConstant(val2MImm(getFpImmVal(N)),
                                    SDLoc(N), MVT::i32);
 }]>;
 def LOFP32 : SDNodeXForm<fpimm, [{
@@ -157,6 +157,15 @@ def uimm3 : Operand<i32>, PatLeaf<(imm), [{
   let ParserMatchClass = UImm3AsmOperand;
 }
 
+// uimm4 - Generic immediate value.
+def UImm4AsmOperand : AsmOperandClass {
+  let Name = "UImm4";
+}
+def uimm4 : Operand<i32>, PatLeaf<(imm), [{
+    return isUInt<4>(N->getZExtValue()); }], ULO7> {
+  let ParserMatchClass = UImm4AsmOperand;
+}
+
 // uimm6 - Generic immediate value.
 def UImm6AsmOperand : AsmOperandClass {
   let Name = "UImm6";
@@ -196,6 +205,12 @@ def mimm : Operand<i32>, PatLeaf<(imm), [{
   let PrintMethod = "printMImmOperand";
 }
 
+// zerofp - Generic fp immediate zero value.
+def zerofp : Operand<i32>, PatLeaf<(fpimm), [{
+    return getFpImmVal(N) == 0; }]> {
+  let ParserMatchClass = ZeroAsmOperand;
+}
+
 // simm7fp - Generic fp immediate value.
 def simm7fp : Operand<i32>, PatLeaf<(fpimm), [{
     return isInt<7>(getFpImmVal(N));
@@ -230,6 +245,7 @@ def fplomsbzero : PatLeaf<(fpimm), [{ return (getFpImmVal(N) & 0x80000000)
                                         == 0; }]>;
 def fplozero    : PatLeaf<(fpimm), [{ return (getFpImmVal(N) & 0xffffffff)
                                         == 0; }]>;
+def nonzero     : PatLeaf<(imm), [{ return N->getSExtValue() !=0 ; }]>;
 
 def CCSIOp : PatLeaf<(cond), [{
   switch (N->get()) {
@@ -430,6 +446,17 @@ def retflag       : SDNode<"VEISD::RET_FLAG", SDTNone,
 
 def getGOT        : Operand<iPTR>;
 
+def VEeh_sjlj_setjmp: SDNode<"VEISD::EH_SJLJ_SETJMP",
+                             SDTypeProfile<1, 1, [SDTCisInt<0>,
+                                                  SDTCisPtrTy<1>]>,
+                             [SDNPHasChain, SDNPSideEffect]>;
+def VEeh_sjlj_longjmp: SDNode<"VEISD::EH_SJLJ_LONGJMP",
+                              SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
+                              [SDNPHasChain, SDNPSideEffect]>;
+def VEeh_sjlj_setup_dispatch: SDNode<"VEISD::EH_SJLJ_SETUP_DISPATCH",
+                                     SDTypeProfile<0, 0, []>,
+                                     [SDNPHasChain, SDNPSideEffect]>;
+
 // GETFUNPLT for PIC
 def GetFunPLT : SDNode<"VEISD::GETFUNPLT", SDTIntUnaryOp>;
 
@@ -442,6 +469,16 @@ def GetTLSAddr : SDNode<"VEISD::GETTLSADDR", SDT_SPCall,
 def GetStackTop : SDNode<"VEISD::GETSTACKTOP", SDTNone,
                         [SDNPHasChain, SDNPSideEffect]>;
 
+// MEMBARRIER
+def MemBarrier : SDNode<"VEISD::MEMBARRIER", SDTNone,
+                        [SDNPHasChain, SDNPSideEffect]>;
+
+// TS1AM
+def SDT_TS1AM : SDTypeProfile<1, 3, [SDTCisSameAs<0, 3>, SDTCisPtrTy<1>,
+                                     SDTCisVT<2, i32>, SDTCisInt<3>]>;
+def ts1am     : SDNode<"VEISD::TS1AM", SDT_TS1AM,
+                       [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
+                        SDNPMemOperand]>;
 
 //===----------------------------------------------------------------------===//
 // VE Flag Conditions
@@ -497,7 +534,8 @@ multiclass RRbm<string opcStr, bits<8>opc,
                 RegisterClass RCo, ValueType Tyo,
                 RegisterClass RCi, ValueType Tyi,
                 SDPatternOperator OpNode = null_frag,
-                Operand immOp = simm7, Operand mOp = mimm> {
+                Operand immOp = simm7, Operand mOp = mimm,
+                bit MoveImm = 0> {
   def rr : RR<opc, (outs RCo:$sx), (ins RCi:$sy, RCi:$sz),
               !strconcat(opcStr, " $sx, $sy, $sz"),
               [(set Tyo:$sx, (OpNode Tyi:$sy, Tyi:$sz))]>;
@@ -514,7 +552,12 @@ multiclass RRbm<string opcStr, bits<8>opc,
   let cy = 0, cz = 0 in
   def im : RR<opc, (outs RCo:$sx), (ins immOp:$sy, mOp:$sz),
               !strconcat(opcStr, " $sx, $sy, $sz"),
-              [(set Tyo:$sx, (OpNode (Tyi immOp:$sy), (Tyi mOp:$sz)))]>;
+              [(set Tyo:$sx, (OpNode (Tyi immOp:$sy), (Tyi mOp:$sz)))]> {
+    // VE uses ORim as a move immediate instruction, so declare it here.
+    // An instruction declared as MoveImm will be optimized in FoldImmediate
+    // later.
+    let isMoveImm = MoveImm;
+  }
 }
 
 // Multiclass for non-commutative RR type instructions
@@ -546,8 +589,8 @@ multiclass RRNCbm<string opcStr, bits<8>opc,
 multiclass RRm<string opcStr, bits<8>opc,
                RegisterClass RC, ValueType Ty,
                SDPatternOperator OpNode = null_frag,
-               Operand immOp = simm7, Operand mOp = mimm> :
-  RRbm<opcStr, opc, RC, Ty, RC, Ty, OpNode, immOp, mOp>;
+               Operand immOp = simm7, Operand mOp = mimm, bit MoveImm = 0> :
+  RRbm<opcStr, opc, RC, Ty, RC, Ty, OpNode, immOp, mOp, MoveImm>;
 
 // Generic RR multiclass for non-commutative instructions with 2 arguments.
 //   e.g. SUBUL, SUBUW, SUBSWSX, and etc.
@@ -775,10 +818,10 @@ multiclass BCbpfm<string opcStr, string cmpStr, bits<8> opc, dag cond,
   let bpf = 0 /* NONE */ in
   def "" : CF<opc, (outs), !con(cond, (ins ADDR:$addr)),
               !strconcat(opcStr, " ", cmpStr, "$addr")>;
-  let bpf = 2 /* NOT TaKEN */ in
+  let bpf = 2 /* NOT TAKEN */ in
   def _nt : CF<opc, (outs), !con(cond, (ins ADDR:$addr)),
                !strconcat(opcStr, ".nt ", cmpStr, "$addr")>;
-  let bpf = 3 /* TaKEN */ in
+  let bpf = 3 /* TAKEN */ in
   def _t : CF<opc, (outs), !con(cond, (ins ADDR:$addr)),
               !strconcat(opcStr, ".t ", cmpStr, "$addr")>;
 }
@@ -807,18 +850,25 @@ multiclass BCRbpfm<string opcStr, string cmpStr, bits<8> opc, dag cond> {
   let bpf = 0 /* NONE */ in
   def "" : CF<opc, (outs), !con(cond, (ins brtarget32:$imm32)),
               !strconcat(opcStr, " ", cmpStr, "$imm32")>;
-  let bpf = 2 /* NOT TaKEN */ in
+  let bpf = 2 /* NOT TAKEN */ in
   def _nt : CF<opc, (outs), !con(cond, (ins brtarget32:$imm32)),
                !strconcat(opcStr, ".nt ", cmpStr, "$imm32")>;
-  let bpf = 3 /* TaKEN */ in
+  let bpf = 3 /* TAKEN */ in
   def _t : CF<opc, (outs), !con(cond, (ins brtarget32:$imm32)),
               !strconcat(opcStr, ".t ", cmpStr, "$imm32")>;
 }
 multiclass BCRm<string opcStr, string opcStrAt, string opcStrAf, bits<8> opc,
-               RegisterClass RC, Operand immOp> {
+               RegisterClass RC, Operand immOp, Operand zeroOp> {
   defm rr : BCRbpfm<opcStr, "$sy, $sz, ", opc, (ins CCOp:$cf, RC:$sy, RC:$sz)>;
   let cy = 0 in
-  defm ir : BCRbpfm<opcStr, "$sy, $sz, ", opc, (ins CCOp:$cf, immOp:$sy, RC:$sz)>;
+  defm ir : BCRbpfm<opcStr, "$sy, $sz, ", opc, (ins CCOp:$cf, immOp:$sy,
+                                                    RC:$sz)>;
+  let cz = 0 in
+  defm rz : BCRbpfm<opcStr, "$sy, $sz, ", opc, (ins CCOp:$cf, RC:$sy,
+                                                    zeroOp:$sz)>;
+  let cy = 0, cz = 0 in
+  defm iz : BCRbpfm<opcStr, "$sy, $sz, ", opc, (ins CCOp:$cf, immOp:$sy,
+                                                    zeroOp:$sz)>;
   let cy = 0, sy = 0, cz = 0, sz = 0, cf = 15 /* AT */, isBarrier = 1 in
   defm a : BCRbpfm<opcStrAt, "", opc, (ins)>;
   let cy = 0, sy = 0, cz = 0, sz = 0, cf = 0 /* AF */ in
@@ -898,7 +948,7 @@ multiclass SHMm<string opcStr, bits<8> opc, RegisterClass RC> {
 //-----------------------------------------------------------------------------
 
 // Multiclass for generic RM instructions
-multiclass RMm<string opcStr, bits<8>opc, RegisterClass RC> {
+multiclass RMm<string opcStr, bits<8>opc, RegisterClass RC, bit MoveImm = 0> {
   def rri : RM<opc, (outs RC:$dest), (ins MEMrri:$addr),
                !strconcat(opcStr, " $dest, $addr"), []>;
   let cy = 0 in
@@ -909,36 +959,27 @@ multiclass RMm<string opcStr, bits<8>opc, RegisterClass RC> {
                !strconcat(opcStr, " $dest, $addr"), []>;
   let cy = 0, cz = 0 in
   def zii : RM<opc, (outs RC:$dest), (ins MEMzii:$addr),
-               !strconcat(opcStr, " $dest, $addr"), []>;
+               !strconcat(opcStr, " $dest, $addr"), []> {
+    // VE uses LEAzii and LEASLzii as a move immediate instruction, so declare
+    // it here.  An instruction declared as MoveImm will be optimized in
+    // FoldImmediate later.
+    let isMoveImm = MoveImm;
+  }
 }
 
 // Section 8.2.1 - LEA
-let cx = 0, DecoderMethod = "DecodeLoadI64" in
-defm LEA : RMm<"lea", 0x06, I64>;
-let cx = 1, DecoderMethod = "DecodeLoadI64" in
-defm LEASL : RMm<"lea.sl", 0x06, I64>;
-let cx = 0, DecoderMethod = "DecodeLoadI32", isCodeGenOnly = 1 in
-defm LEA32 : RMm<"lea", 0x06, I32>;
+let isReMaterializable = 1, isAsCheapAsAMove = 1,
+    DecoderMethod = "DecodeLoadI64" in {
+  let cx = 0 in defm LEA : RMm<"lea", 0x06, I64, /* MoveImm */ 1>;
+  let cx = 1 in defm LEASL : RMm<"lea.sl", 0x06, I64, /* MoveImm */ 1>;
+}
 
+// LEA basic patterns.
+//   Need to be defined here to prioritize LEA over ADX.
 def : Pat<(iPTR ADDRrri:$addr), (LEArri MEMrri:$addr)>;
 def : Pat<(iPTR ADDRrii:$addr), (LEArii MEMrii:$addr)>;
 def : Pat<(add I64:$base, simm32:$disp), (LEArii $base, 0, (LO32 $disp))>;
 def : Pat<(add I64:$base, lozero:$disp), (LEASLrii $base, 0, (HI32 $disp))>;
-def : Pat<(add I32:$base, simm32:$disp),
-          (LEA32rii (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $base, sub_i32), 0,
-                    (LO32 $disp))>;
-
-def lea_add : PatFrags<(ops node:$base, node:$idx, node:$disp),
-                       [(add (add node:$base, node:$idx), node:$disp),
-                        (add (add node:$base, node:$disp), node:$idx)]>;
-def : Pat<(lea_add I64:$base, simm7:$idx, simm32:$disp),
-          (LEArii $base, (LO7 $idx), (LO32 $disp))>;
-def : Pat<(lea_add I64:$base, I64:$idx, simm32:$disp),
-          (LEArri $base, $idx, (LO32 $disp))>;
-def : Pat<(lea_add I64:$base, simm7:$idx, lozero:$disp),
-          (LEASLrii $base, (LO7 $idx), (HI32 $disp))>;
-def : Pat<(lea_add I64:$base, I64:$idx, lozero:$disp),
-          (LEASLrri $base, $idx, (HI32 $disp))>;
 
 // Multiclass for load instructions.
 let mayLoad = 1, hasSideEffects = 0 in
@@ -991,6 +1032,13 @@ defm LD1BSX : LOADm<"ld1b.sx", 0x05, I32, i32, sextloadi8>;
 let cx = 1, DecoderMethod = "DecodeLoadI32" in
 defm LD1BZX : LOADm<"ld1b.zx", 0x05, I32, i32, zextloadi8>;
 
+// LDQ pseudo instructions
+let mayLoad = 1, hasSideEffects = 0 in {
+  def LDQrii : Pseudo<(outs F128:$dest), (ins MEMrii:$addr),
+                      "# pseudo ldq $dest, $addr",
+                      [(set f128:$dest, (load ADDRrii:$addr))]>;
+}
+
 // Multiclass for store instructions.
 let mayStore = 1 in
 multiclass STOREm<string opcStr, bits<8> opc, RegisterClass RC, ValueType Ty,
@@ -1036,6 +1084,13 @@ defm ST2B : STOREm<"st2b", 0x14, I32, i32, truncstorei16>;
 let DecoderMethod = "DecodeStoreI32" in
 defm ST1B : STOREm<"st1b", 0x15, I32, i32, truncstorei8>;
 
+// STQ pseudo instructions
+let mayStore = 1, hasSideEffects = 0 in {
+  def STQrii : Pseudo<(outs), (ins MEMrii:$addr, F128:$sx),
+                      "# pseudo stq $sx, $addr",
+                      [(store f128:$sx, ADDRrii:$addr)]>;
+}
+
 // Section 8.2.12 - DLDS
 let DecoderMethod = "DecodeLoadI64" in
 defm DLD : LOADm<"dld", 0x09, I64, i64, load>;
@@ -1074,9 +1129,9 @@ defm ATMAM : RRCASm<"atmam", 0x53, I64, i64, uimm0to2>;
 
 // Section 8.2.20 - CAS (Compare and Swap)
 let DecoderMethod = "DecodeCASI64" in
-defm CASL : RRCASm<"cas.l", 0x62, I64, i64, simm7>;
+defm CASL : RRCASm<"cas.l", 0x62, I64, i64, simm7, atomic_cmp_swap_64>;
 let DecoderMethod = "DecodeCASI32", cx = 1 in
-defm CASW : RRCASm<"cas.w", 0x62, I32, i32, simm7>;
+defm CASW : RRCASm<"cas.w", 0x62, I32, i32, simm7, atomic_cmp_swap_32>;
 
 //-----------------------------------------------------------------------------
 // Section 8.3 - Transfer Control Instructions
@@ -1106,6 +1161,8 @@ def SVOB : RR<0x30, (outs), (ins), "svob">;
 // Section 8.4 - Fixed-point Operation Instructions
 //-----------------------------------------------------------------------------
 
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+
 // Section 8.4.1 - ADD (Add)
 defm ADDUL : RRm<"addu.l", 0x48, I64, i64>;
 let cx = 1 in defm ADDUW : RRm<"addu.w", 0x48, I32, i32>;
@@ -1128,6 +1185,8 @@ let cx = 1 in defm SUBSWZX : RRNCm<"subs.w.zx", 0x5A, I32, i32>;
 // Section 8.4.6 - SBX (Subtract)
 defm SUBSL : RRNCm<"subs.l", 0x5B, I64, i64, sub>;
 
+} // isReMaterializable, isAsCheapAsAMove
+
 // Section 8.4.7 - MPY (Multiply)
 defm MULUL : RRm<"mulu.l", 0x49, I64, i64>;
 let cx = 1 in defm MULUW : RRm<"mulu.w", 0x49, I32, i32>;
@@ -1153,6 +1212,8 @@ let cx = 1 in defm DIVSWZX : RRNCm<"divs.w.zx", 0x7B, I32, i32>;
 // Section 8.4.13 - DVX (Divide)
 defm DIVSL : RRNCm<"divs.l", 0x7F, I64, i64, sdiv>;
 
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+
 // Section 8.4.14 - CMP (Compare)
 defm CMPUL : RRNCm<"cmpu.l", 0x55, I64, i64>;
 let cx = 1 in defm CMPUW : RRNCm<"cmpu.w", 0x55, I32, i32>;
@@ -1175,45 +1236,66 @@ let cx = 1, cw = 1 in defm MINSWZX : RRm<"mins.w.zx", 0x78, I32, i32>;
 defm MAXSL : RRm<"maxs.l", 0x68, I64, i64>;
 let cw = 1 in defm MINSL : RRm<"mins.l", 0x68, I64, i64>;
 
+} // isReMaterializable, isAsCheapAsAMove
+
 //-----------------------------------------------------------------------------
 // Section 8.5 - Logical Operation Instructions
 //-----------------------------------------------------------------------------
 
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+
 // Section 8.5.1 - AND (AND)
 defm AND : RRm<"and", 0x44, I64, i64, and>;
-let isCodeGenOnly = 1 in defm AND32 : RRm<"and", 0x44, I32, i32, and>;
 
 // Section 8.5.2 - OR (OR)
-defm OR : RRm<"or", 0x45, I64, i64, or>;
-let isCodeGenOnly = 1 in defm OR32 : RRm<"or", 0x45, I32, i32, or>;
+defm OR : RRm<"or", 0x45, I64, i64, or, simm7, mimm, /* MoveImm */ 1>;
 
 // Section 8.5.3 - XOR (Exclusive OR)
 defm XOR : RRm<"xor", 0x46, I64, i64, xor>;
-let isCodeGenOnly = 1 in defm XOR32 : RRm<"xor", 0x46, I32, i32, xor>;
 
 // Section 8.5.4 - EQV (Equivalence)
 defm EQV : RRm<"eqv", 0x47, I64, i64>;
 
+} // isReMaterializable, isAsCheapAsAMove
+
 // Section 8.5.5 - NND (Negate AND)
 def and_not : PatFrags<(ops node:$x, node:$y),
                        [(and (not node:$x), node:$y)]>;
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
 defm NND : RRNCm<"nnd", 0x54, I64, i64, and_not>;
 
 // Section 8.5.6 - MRG (Merge)
 defm MRG : RRMRGm<"mrg", 0x56, I64, i64>;
 
 // Section 8.5.7 - LDZ (Leading Zero Count)
-defm LDZ : RRI1m<"ldz", 0x67, I64, i64, ctlz>;
+def ctlz_pat : PatFrags<(ops node:$src),
+                        [(ctlz node:$src),
+                         (ctlz_zero_undef node:$src)]>;
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+defm LDZ : RRI1m<"ldz", 0x67, I64, i64, ctlz_pat>;
 
 // Section 8.5.8 - PCNT (Population Count)
 defm PCNT : RRI1m<"pcnt", 0x38, I64, i64, ctpop>;
 
 // Section 8.5.9 - BRV (Bit Reverse)
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
 defm BRV : RRI1m<"brv", 0x39, I64, i64, bitreverse>;
 
 // Section 8.5.10 - BSWP (Byte Swap)
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
 defm BSWP : RRSWPm<"bswp", 0x2B, I64, i64>;
 
+def : Pat<(i64 (bswap i64:$src)),
+          (BSWPri $src, 0)>;
+def : Pat<(i64 (bswap (i64 mimm:$src))),
+          (BSWPmi (MIMM $src), 0)>;
+def : Pat<(i32 (bswap i32:$src)),
+          (EXTRACT_SUBREG
+              (BSWPri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $src, sub_i32), 1),
+              sub_i32)>;
+def : Pat<(i32 (bswap (i32 mimm:$src))),
+          (EXTRACT_SUBREG (BSWPmi (MIMM $src), 1), sub_i32)>;
+
 // Section 8.5.11 - CMOV (Conditional Move)
 let cw = 0, cw2 = 0 in defm CMOVL : RRCMOVm<"cmov.l.${cfw}", 0x3B, I64, i64>;
 let cw = 1, cw2 = 0 in defm CMOVW : RRCMOVm<"cmov.w.${cfw}", 0x3B, I32, i32>;
@@ -1229,17 +1311,21 @@ def : MnemonicAlias<"cmov.s", "cmov.s.at">;
 //-----------------------------------------------------------------------------
 
 // Section 8.6.1 - SLL (Shift Left Logical)
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
 defm SLL : RRIm<"sll", 0x65, I64, i64, shl>;
 
 // Section 8.6.2 - SLD (Shift Left Double)
 defm SLD : RRILDm<"sld", 0x64, I64, i64>;
 
 // Section 8.6.3 - SRL (Shift Right Logical)
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
 defm SRL : RRIm<"srl", 0x75, I64, i64, srl>;
 
 // Section 8.6.4 - SRD (Shift Right Double)
 defm SRD : RRIRDm<"srd", 0x74, I64, i64>;
 
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+
 // Section 8.6.5 - SLA (Shift Left Arithmetic)
 defm SLAWSX : RRIm<"sla.w.sx", 0x66, I32, i32, shl>;
 let cx = 1 in defm SLAWZX : RRIm<"sla.w.zx", 0x66, I32, i32>;
@@ -1254,6 +1340,8 @@ let cx = 1 in defm SRAWZX : RRIm<"sra.w.zx", 0x76, I32, i32>;
 // Section 8.6.8 - SRAX (Shift Right Arithmetic)
 defm SRAL : RRIm<"sra.l", 0x77, I64, i64, sra>;
 
+} // isReMaterializable, isAsCheapAsAMove
+
 def : Pat<(i32 (srl i32:$src, (i32 simm7:$val))),
           (EXTRACT_SUBREG (SRLri (ANDrm (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
             $src, sub_i32), !add(32, 64)), imm:$val), sub_i32)>;
@@ -1302,13 +1390,13 @@ let cw = 1, cx = 1 in
 defm FMINS : RRFm<"fmin.s", 0x3E, F32, f32, fminnum, simm7fp, mimmfp32>;
 
 // Section 8.7.7 - FAQ (Floating Add Quadruple)
-defm FADDQ : RRFm<"fadd.q", 0x6C, F128, f128>;
+defm FADDQ : RRFm<"fadd.q", 0x6C, F128, f128, fadd>;
 
 // Section 8.7.8 - FSQ (Floating Subtract Quadruple)
-defm FSUBQ : RRFm<"fsub.q", 0x7C, F128, f128>;
+defm FSUBQ : RRFm<"fsub.q", 0x7C, F128, f128, fsub>;
 
 // Section 8.7.9 - FMQ (Floating Subtract Quadruple)
-defm FMULQ : RRFm<"fmul.q", 0x6D, F128, f128>;
+defm FMULQ : RRFm<"fmul.q", 0x6D, F128, f128, fmul>;
 
 // Section 8.7.10 - FCQ (Floating Compare Quadruple)
 defm FCMPQ : RRNCbm<"fcmp.q", 0x7D, I64, f64, F128, f128, null_frag, simm7fp,
@@ -1339,17 +1427,17 @@ defm CVTDL : CVTm<"cvt.d.l", 0x5F, I64, f64, I64, i64, sint_to_fp>;
 // Section 8.7.15 - CVS (Convert to Single-format)
 defm CVTSD : CVTm<"cvt.s.d", 0x1F, F32, f32, I64, f64, fpround>;
 let cx = 1 in
-defm CVTSQ : CVTm<"cvt.s.q", 0x1F, F32, f32, F128, f128>;
+defm CVTSQ : CVTm<"cvt.s.q", 0x1F, F32, f32, F128, f128, fpround>;
 
 // Section 8.7.16 - CVD (Convert to Double-format)
 defm CVTDS : CVTm<"cvt.d.s", 0x0F, I64, f64, F32, f32, fpextend>;
 let cx = 1 in
-defm CVTDQ : CVTm<"cvt.d.q", 0x0F, I64, f64, F128, f128>;
+defm CVTDQ : CVTm<"cvt.d.q", 0x0F, I64, f64, F128, f128, fpround>;
 
 // Section 8.7.17 - CVQ (Convert to Single-format)
-defm CVTQD : CVTm<"cvt.q.d", 0x2D, F128, f128, I64, f64>;
+defm CVTQD : CVTm<"cvt.q.d", 0x2D, F128, f128, I64, f64, fpextend>;
 let cx = 1 in
-defm CVTQS : CVTm<"cvt.q.s", 0x2D, F128, f128, F32, f32>;
+defm CVTQS : CVTm<"cvt.q.s", 0x2D, F128, f128, F32, f32, fpextend>;
 
 //-----------------------------------------------------------------------------
 // Section 8.8 - Branch instructions
@@ -1378,13 +1466,13 @@ defm BCFS : BCm<"b${cond}.s", "b.s", "baf.s", 0x1C, F32, simm7fp>;
 
 // Section 8.8.4 - BCR (Branch on Condition Relative)
 let cx = 0, cx2 = 0 in
-defm BRCFL : BCRm<"br${cf}.l", "br.l", "braf.l", 0x18, I64, simm7>;
+defm BRCFL : BCRm<"br${cf}.l", "br.l", "braf.l", 0x18, I64, simm7, zero>;
 let cx = 1, cx2 = 0 in
-defm BRCFW : BCRm<"br${cf}.w", "br.w", "braf.w", 0x18, I32, simm7>;
+defm BRCFW : BCRm<"br${cf}.w", "br.w", "braf.w", 0x18, I32, simm7, zero>;
 let cx = 0, cx2 = 1 in
-defm BRCFD : BCRm<"br${cf}.d", "br.d", "braf.d", 0x18, I64, simm7fp>;
+defm BRCFD : BCRm<"br${cf}.d", "br.d", "braf.d", 0x18, I64, simm7fp, zerofp>;
 let cx = 1, cx2 = 1 in
-defm BRCFS : BCRm<"br${cf}.s", "br.s", "braf.s", 0x18, F32, simm7fp>;
+defm BRCFS : BCRm<"br${cf}.s", "br.s", "braf.s", 0x18, F32, simm7fp, zerofp>;
 
 // Section 8.8.5 - BSIC (Branch and Save IC)
 let isCall = 1, hasSideEffects = 0, DecoderMethod = "DecodeCall" in
@@ -1481,11 +1569,23 @@ defm SHMB : SHMm<"shm.b", 0x31, I64>;
 // Pattern Matchings
 //===----------------------------------------------------------------------===//
 
+// Basic cast between registers.  This is often used in ISel patterns, so make
+// them as OutPatFrag.
+def i2l : OutPatFrag<(ops node:$exp),
+                     (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $exp, sub_i32)>;
+def l2i : OutPatFrag<(ops node:$exp),
+                     (EXTRACT_SUBREG $exp, sub_i32)>;
+def f2l : OutPatFrag<(ops node:$exp),
+                     (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $exp, sub_f32)>;
+def l2f : OutPatFrag<(ops node:$exp),
+                     (EXTRACT_SUBREG $exp, sub_f32)>;
+
 // Small immediates.
-def : Pat<(i32 simm7:$val), (OR32im (LO7 $val), 0)>;
+def : Pat<(i32 simm7:$val), (EXTRACT_SUBREG (ORim (LO7 $val), 0), sub_i32)>;
 def : Pat<(i64 simm7:$val), (ORim (LO7 $val), 0)>;
 // Medium immediates.
-def : Pat<(i32 simm32:$val), (LEA32zii 0, 0, (LO32 $val))>;
+def : Pat<(i32 simm32:$val),
+          (EXTRACT_SUBREG (LEAzii 0, 0, (LO32 $val)), sub_i32)>;
 def : Pat<(i64 simm32:$val), (LEAzii 0, 0, (LO32 $val))>;
 def : Pat<(i64 uimm32:$val), (ANDrm (LEAzii 0, 0, (LO32 $val)), !add(32, 64))>;
 // Arbitrary immediates.
@@ -1497,6 +1597,54 @@ def : Pat<(i64 imm:$val),
           (LEASLrii (ANDrm (LEAzii 0, 0, (LO32 imm:$val)), !add(32, 64)), 0,
                     (HI32 imm:$val))>;
 
+// LEA patterns
+def lea_add : PatFrags<(ops node:$base, node:$idx, node:$disp),
+                       [(add (add node:$base, node:$idx), node:$disp),
+                        (add (add node:$base, node:$disp), node:$idx),
+                        (add node:$base, (add $idx, $disp))]>;
+def : Pat<(lea_add I64:$base, simm7:$idx, simm32:$disp),
+          (LEArii $base, (LO7 $idx), (LO32 $disp))>;
+def : Pat<(lea_add I64:$base, I64:$idx, simm32:$disp),
+          (LEArri $base, $idx, (LO32 $disp))>;
+def : Pat<(lea_add I64:$base, simm7:$idx, lozero:$disp),
+          (LEASLrii $base, (LO7 $idx), (HI32 $disp))>;
+def : Pat<(lea_add I64:$base, I64:$idx, lozero:$disp),
+          (LEASLrri $base, $idx, (HI32 $disp))>;
+
+// Address calculation patterns and optimizations
+//
+// Generate following instructions:
+//   1. LEA %reg, label@LO32
+//      AND %reg, %reg, (32)0
+//   2. LEASL %reg, label@HI32
+//   3. (LEA %reg, label@LO32)
+//      (AND %reg, %reg, (32)0)
+//      LEASL %reg, label@HI32(, %reg)
+//   4. (LEA %reg, label@LO32)
+//      (AND %reg, %reg, (32)0)
+//      LEASL %reg, label@HI32(%reg, %got)
+//
+def velo_only : OutPatFrag<(ops node:$lo),
+                           (ANDrm (LEAzii 0, 0, $lo), !add(32, 64))>;
+def vehi_only : OutPatFrag<(ops node:$hi),
+                           (LEASLzii 0, 0, $hi)>;
+def vehi_lo : OutPatFrag<(ops node:$hi, node:$lo),
+                         (LEASLrii $lo, 0, $hi)>;
+def vehi_lo_imm : OutPatFrag<(ops node:$hi, node:$lo, node:$idx),
+                             (LEASLrii $lo, $idx, $hi)>;
+def vehi_baselo : OutPatFrag<(ops node:$base, node:$hi, node:$lo),
+                             (LEASLrri $base, $lo, $hi)>;
+foreach type = [ "tblockaddress", "tconstpool", "texternalsym", "tglobaladdr",
+                 "tglobaltlsaddr", "tjumptable" ] in {
+  def : Pat<(VElo !cast<SDNode>(type):$lo), (velo_only $lo)>;
+  def : Pat<(VEhi !cast<SDNode>(type):$hi), (vehi_only $hi)>;
+  def : Pat<(add (VEhi !cast<SDNode>(type):$hi), I64:$lo), (vehi_lo $hi, $lo)>;
+  def : Pat<(add (add (VEhi !cast<SDNode>(type):$hi), I64:$lo), simm7:$val),
+            (vehi_lo_imm $hi, $lo, (LO7 $val))>;
+  def : Pat<(add I64:$base, (add (VEhi !cast<SDNode>(type):$hi), I64:$lo)),
+            (vehi_baselo $base, $hi, $lo)>;
+}
+
 // floating point
 def : Pat<(f32 fpimm:$val),
           (EXTRACT_SUBREG (LEASLzii 0, 0, (HIFP32 $val)), sub_f32)>;
@@ -1526,8 +1674,8 @@ def : Pat<(sext_inreg I64:$src, i8),
           (SRALri (SLLri $src, 56), 56)>;
 def : Pat<(sext_inreg (i32 (trunc i64:$src)), i8),
           (EXTRACT_SUBREG (SRALri (SLLri $src, 56), 56), sub_i32)>;
-def : Pat<(and (trunc i64:$src), 0xff),
-          (AND32rm (EXTRACT_SUBREG $src, sub_i32), !add(56, 64))>;
+def : Pat<(i32 (and (trunc i64:$src), 0xff)),
+          (EXTRACT_SUBREG (ANDrm $src, !add(56, 64)), sub_i32)>;
 
 // Cast to i16
 def : Pat<(sext_inreg I32:$src, i16),
@@ -1536,28 +1684,34 @@ def : Pat<(sext_inreg I64:$src, i16),
           (SRALri (SLLri $src, 48), 48)>;
 def : Pat<(sext_inreg (i32 (trunc i64:$src)), i16),
           (EXTRACT_SUBREG (SRALri (SLLri $src, 48), 48), sub_i32)>;
-def : Pat<(and (trunc i64:$src), 0xffff),
-          (AND32rm (EXTRACT_SUBREG $src, sub_i32), !add(48, 64))>;
+def : Pat<(i32 (and (trunc i64:$src), 0xffff)),
+          (EXTRACT_SUBREG (ANDrm $src, !add(48, 64)), sub_i32)>;
 
 // Cast to i32
 def : Pat<(i32 (trunc i64:$src)),
-          (ADDSWSXrm (EXTRACT_SUBREG $src, sub_i32), 0)>;
-def : Pat<(i32 (fp_to_sint I64:$reg)), (CVTWDSXr RD_RZ, $reg)>;
-def : Pat<(i32 (fp_to_sint F32:$reg)), (CVTWSSXr RD_RZ, $reg)>;
+          (EXTRACT_SUBREG (ANDrm $src, !add(32, 64)), sub_i32)>;
+def : Pat<(i32 (fp_to_sint f32:$src)), (CVTWSSXr RD_RZ, $src)>;
+def : Pat<(i32 (fp_to_sint f64:$src)), (CVTWDSXr RD_RZ, $src)>;
+def : Pat<(i32 (fp_to_sint f128:$src)), (CVTWDSXr RD_RZ, (CVTDQr $src))>;
 
 // Cast to i64
-def : Pat<(sext_inreg I64:$src, i32),
+def : Pat<(sext_inreg i64:$src, i32),
           (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
             (ADDSWSXrm (EXTRACT_SUBREG $src, sub_i32), 0), sub_i32)>;
-def : Pat<(i64 (sext i32:$sy)),
-          (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (ADDSWSXrm $sy, 0), sub_i32)>;
-def : Pat<(i64 (zext i32:$sy)),
-          (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (ADDSWZXrm $sy, 0), sub_i32)>;
-def : Pat<(i64 (fp_to_sint f32:$sy)), (CVTLDr RD_RZ, (CVTDSr $sy))>;
-def : Pat<(i64 (fp_to_sint I64:$reg)), (CVTLDr RD_RZ, $reg)>;
+def : Pat<(i64 (sext i32:$src)),
+          (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (ADDSWSXrm $src, 0), sub_i32)>;
+def : Pat<(i64 (zext i32:$src)),
+          (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (ADDSWZXrm $src, 0), sub_i32)>;
+def : Pat<(i64 (fp_to_sint f32:$src)), (CVTLDr RD_RZ, (CVTDSr $src))>;
+def : Pat<(i64 (fp_to_sint f64:$src)), (CVTLDr RD_RZ, $src)>;
+def : Pat<(i64 (fp_to_sint f128:$src)), (CVTLDr RD_RZ, (CVTDQr $src))>;
 
 // Cast to f32
-def : Pat<(f32 (sint_to_fp i64:$sy)), (CVTSDr (CVTDLr i64:$sy))>;
+def : Pat<(f32 (sint_to_fp i64:$src)), (CVTSDr (CVTDLr i64:$src))>;
+
+// Cast to f128
+def : Pat<(f128 (sint_to_fp i32:$src)), (CVTQDr (CVTDWr $src))>;
+def : Pat<(f128 (sint_to_fp i64:$src)), (CVTQDr (CVTDLr $src))>;
 
 def : Pat<(i64 (anyext i32:$sy)),
           (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $sy, sub_i32)>;
@@ -1625,29 +1779,150 @@ defm : TRUNC64m<truncstorei8, ST1Brri, ST1Brii, ST1Bzri, ST1Bzii>;
 defm : TRUNC64m<truncstorei16, ST2Brri, ST2Brii, ST2Bzri, ST2Bzii>;
 defm : TRUNC64m<truncstorei32, STLrri, STLrii, STLzri, ST1Bzii>;
 
-// Address calculation and its optimization
-def : Pat<(VEhi tglobaladdr:$in), (LEASLzii 0, 0, tglobaladdr:$in)>;
-def : Pat<(VElo tglobaladdr:$in),
-          (ANDrm (LEAzii 0, 0, tglobaladdr:$in), !add(32, 64))>;
-def : Pat<(add (VEhi tglobaladdr:$in1), (VElo tglobaladdr:$in2)),
-          (LEASLrii (ANDrm (LEAzii 0, 0, tglobaladdr:$in2), !add(32, 64)), 0,
-                    (tglobaladdr:$in1))>;
-
-// GlobalTLS address calculation and its optimization
-def : Pat<(VEhi tglobaltlsaddr:$in), (LEASLzii 0, 0, tglobaltlsaddr:$in)>;
-def : Pat<(VElo tglobaltlsaddr:$in),
-          (ANDrm (LEAzii 0, 0, tglobaltlsaddr:$in), !add(32, 64))>;
-def : Pat<(add (VEhi tglobaltlsaddr:$in1), (VElo tglobaltlsaddr:$in2)),
-          (LEASLrii (ANDrm (LEAzii 0, 0, tglobaltlsaddr:$in2), !add(32, 64)), 0,
-                    (tglobaltlsaddr:$in1))>;
-
-// Address calculation and its optimization
-def : Pat<(VEhi texternalsym:$in), (LEASLzii 0, 0, texternalsym:$in)>;
-def : Pat<(VElo texternalsym:$in),
-          (ANDrm (LEAzii 0, 0, texternalsym:$in), !add(32, 64))>;
-def : Pat<(add (VEhi texternalsym:$in1), (VElo texternalsym:$in2)),
-          (LEASLrii (ANDrm (LEAzii 0, 0, texternalsym:$in2), !add(32, 64)), 0,
-                    (texternalsym:$in1))>;
+// Atomic loads
+multiclass ATMLDm<SDPatternOperator from,
+                  SDPatternOperator torri, SDPatternOperator torii,
+                  SDPatternOperator tozri, SDPatternOperator tozii> {
+  def : Pat<(from ADDRrri:$addr), (torri MEMrri:$addr)>;
+  def : Pat<(from ADDRrii:$addr), (torii MEMrii:$addr)>;
+  def : Pat<(from ADDRzri:$addr), (tozri MEMzri:$addr)>;
+  def : Pat<(from ADDRzii:$addr), (tozii MEMzii:$addr)>;
+}
+defm : ATMLDm<atomic_load_8, LD1BZXrri, LD1BZXrii, LD1BZXzri, LD1BZXzii>;
+defm : ATMLDm<atomic_load_16, LD2BZXrri, LD2BZXrii, LD2BZXzri, LD2BZXzii>;
+defm : ATMLDm<atomic_load_32, LDLZXrri, LDLZXrii, LDLZXzri, LDLZXzii>;
+defm : ATMLDm<atomic_load_64, LDrri, LDrii, LDzri, LDzii>;
+
+// Optimized atomic loads with sext
+multiclass SXATMLDm<SDPatternOperator from, Operand TY,
+                    SDPatternOperator torri, SDPatternOperator torii,
+                    SDPatternOperator tozri, SDPatternOperator tozii> {
+  def : Pat<(i64 (sext_inreg (i64 (anyext (from ADDRrri:$addr))), TY)),
+            (i2l (torri MEMrri:$addr))>;
+  def : Pat<(i64 (sext_inreg (i64 (anyext (from ADDRrii:$addr))), TY)),
+            (i2l (torii MEMrii:$addr))>;
+  def : Pat<(i64 (sext_inreg (i64 (anyext (from ADDRzri:$addr))), TY)),
+            (i2l (tozri MEMzri:$addr))>;
+  def : Pat<(i64 (sext_inreg (i64 (anyext (from ADDRzii:$addr))), TY)),
+            (i2l (tozii MEMzii:$addr))>;
+}
+multiclass SXATMLD32m<SDPatternOperator from,
+                      SDPatternOperator torri, SDPatternOperator torii,
+                      SDPatternOperator tozri, SDPatternOperator tozii> {
+  def : Pat<(i64 (sext (from ADDRrri:$addr))),
+            (i2l (torri MEMrri:$addr))>;
+  def : Pat<(i64 (sext (from ADDRrii:$addr))),
+            (i2l (torii MEMrii:$addr))>;
+  def : Pat<(i64 (sext (from ADDRzri:$addr))),
+            (i2l (tozri MEMzri:$addr))>;
+  def : Pat<(i64 (sext (from ADDRzii:$addr))),
+            (i2l (tozii MEMzii:$addr))>;
+}
+defm : SXATMLDm<atomic_load_8, i8, LD1BSXrri, LD1BSXrii, LD1BSXzri, LD1BSXzii>;
+defm : SXATMLDm<atomic_load_16, i16, LD2BSXrri, LD2BSXrii, LD2BSXzri,
+                LD2BSXzii>;
+defm : SXATMLD32m<atomic_load_32, LDLSXrri, LDLSXrii, LDLSXzri, LDLSXzii>;
+
+// Optimized atomic loads with zext
+multiclass ZXATMLDm<SDPatternOperator from, Operand VAL,
+                    SDPatternOperator torri, SDPatternOperator torii,
+                    SDPatternOperator tozri, SDPatternOperator tozii> {
+  def : Pat<(i64 (and (anyext (from ADDRrri:$addr)), VAL)),
+            (i2l (torri MEMrri:$addr))>;
+  def : Pat<(i64 (and (anyext (from ADDRrii:$addr)), VAL)),
+            (i2l (torii MEMrii:$addr))>;
+  def : Pat<(i64 (and (anyext (from ADDRzri:$addr)), VAL)),
+            (i2l (tozri MEMzri:$addr))>;
+  def : Pat<(i64 (and (anyext (from ADDRzii:$addr)), VAL)),
+            (i2l (tozii MEMzii:$addr))>;
+}
+multiclass ZXATMLD32m<SDPatternOperator from, Operand VAL,
+                      SDPatternOperator torri, SDPatternOperator torii,
+                      SDPatternOperator tozri, SDPatternOperator tozii> {
+  def : Pat<(i64 (zext (from ADDRrri:$addr))),
+            (i2l (torri MEMrri:$addr))>;
+  def : Pat<(i64 (zext (from ADDRrii:$addr))),
+            (i2l (torii MEMrii:$addr))>;
+  def : Pat<(i64 (zext (from ADDRzri:$addr))),
+            (i2l (tozri MEMzri:$addr))>;
+  def : Pat<(i64 (zext (from ADDRzii:$addr))),
+            (i2l (tozii MEMzii:$addr))>;
+}
+defm : ZXATMLDm<atomic_load_8, 0xFF, LD1BZXrri, LD1BZXrii, LD1BZXzri,
+                LD1BZXzii>;
+defm : ZXATMLDm<atomic_load_16, 0xFFFF, LD2BZXrri, LD2BZXrii, LD2BZXzri,
+                LD2BZXzii>;
+defm : ZXATMLD32m<atomic_load_32, 0xFFFFFFFF, LDLZXrri, LDLZXrii, LDLZXzri,
+                  LDLZXzii>;
+
+// Atomic stores
+multiclass ATMSTm<SDPatternOperator from, ValueType ty,
+                  SDPatternOperator torri, SDPatternOperator torii,
+                  SDPatternOperator tozri, SDPatternOperator tozii> {
+  def : Pat<(from ADDRrri:$addr, ty:$src), (torri MEMrri:$addr, $src)>;
+  def : Pat<(from ADDRrii:$addr, ty:$src), (torii MEMrii:$addr, $src)>;
+  def : Pat<(from ADDRzri:$addr, ty:$src), (tozri MEMzri:$addr, $src)>;
+  def : Pat<(from ADDRzii:$addr, ty:$src), (tozii MEMzii:$addr, $src)>;
+}
+defm : ATMSTm<atomic_store_8, i32, ST1Brri, ST1Brii, ST1Bzri, ST1Bzii>;
+defm : ATMSTm<atomic_store_16, i32, ST2Brri, ST2Brii, ST2Bzri, ST2Bzii>;
+defm : ATMSTm<atomic_store_32, i32, STLrri, STLrii, STLzri, STLzii>;
+defm : ATMSTm<atomic_store_64, i64, STrri, STrii, STzri, STzii>;
+
+// Optimized atomic stores with truncate
+multiclass TRATMSTm<SDPatternOperator from,
+                  ValueType ty,
+                  SDPatternOperator torri,
+                  SDPatternOperator torii,
+                  SDPatternOperator tozri,
+                  SDPatternOperator tozii> {
+  def : Pat<(from ADDRrri:$addr, (i32 (trunc i64:$src))),
+            (torri MEMrri:$addr, (EXTRACT_SUBREG $src, sub_i32))>;
+  def : Pat<(from ADDRrii:$addr, (i32 (trunc i64:$src))),
+            (torii MEMrii:$addr, (EXTRACT_SUBREG $src, sub_i32))>;
+  def : Pat<(from ADDRzri:$addr, (i32 (trunc i64:$src))),
+            (tozri MEMzri:$addr, (EXTRACT_SUBREG $src, sub_i32))>;
+  def : Pat<(from ADDRzii:$addr, (i32 (trunc i64:$src))),
+            (tozii MEMzii:$addr, (EXTRACT_SUBREG $src, sub_i32))>;
+}
+defm : TRATMSTm<atomic_store_8, i32, ST1Brri, ST1Brii, ST1Bzri, ST1Bzii>;
+defm : TRATMSTm<atomic_store_16, i32, ST2Brri, ST2Brii, ST2Bzri, ST2Bzii>;
+defm : TRATMSTm<atomic_store_32, i32, STLrri, STLrii, STLzri, STLzii>;
+
+// Atomic swaps
+def : Pat<(i32 (ts1am i64:$src, i32:$flag, i32:$new)),
+          (TS1AMWrir $src, 0, $flag, $new)>;
+def : Pat<(i32 (atomic_swap_32 ADDRri:$src, i32:$new)),
+          (TS1AMWrii MEMriRRM:$src, 15, $new)>;
+def : Pat<(i64 (atomic_swap_64 ADDRri:$src, i64:$new)),
+          (TS1AMLrir MEMriRRM:$src, (LEAzii 0, 0, 255), i64:$new)>;
+
+//===----------------------------------------------------------------------===//
+// SJLJ Exception handling patterns
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
+    usesCustomInserter = 1 in {
+  let isTerminator = 1 in
+  def EH_SjLj_LongJmp : Pseudo<(outs), (ins I64:$buf),
+                               "# EH_SJLJ_LONGJMP",
+                               [(VEeh_sjlj_longjmp I64:$buf)]>;
+
+  def EH_SjLj_SetJmp  : Pseudo<(outs I32:$dst), (ins I64:$buf),
+                               "# EH_SJLJ_SETJMP",
+                               [(set I32:$dst, (VEeh_sjlj_setjmp I64:$buf))]>;
+
+  def EH_SjLj_Setup_Dispatch : Pseudo<(outs), (ins), "# EH_SJLJ_SETUP_DISPATCH",
+                                      [(VEeh_sjlj_setup_dispatch)]>;
+}
+
+let isTerminator = 1, isBranch = 1, isCodeGenOnly = 1 in
+  def EH_SjLj_Setup : Pseudo<(outs), (ins brtarget32:$dst),
+                             "# EH_SJlJ_SETUP $dst">;
+
+//===----------------------------------------------------------------------===//
+// Branch related patterns
+//===----------------------------------------------------------------------===//
 
 // Branches
 def : Pat<(br bb:$addr), (BRCFLa bb:$addr)>;
@@ -1681,6 +1956,8 @@ multiclass BRCCFm<ValueType ty, SDPatternOperator BrOpNode1,
 }
 defm : BRCCFm<f32, BRCFSrr, BRCFSir>;
 defm : BRCCFm<f64, BRCFDrr, BRCFDir>;
+def : Pat<(brcc cond:$cond, f128:$l, f128:$r, bb:$addr),
+          (BRCFDir (fcond2cc $cond), 0, (FCMPQrr $r, $l), bb:$addr)>;
 
 //===----------------------------------------------------------------------===//
 // Pseudo Instructions
@@ -1737,53 +2014,42 @@ let Uses = [SX11], hasSideEffects = 1 in
 def GETSTACKTOP : Pseudo<(outs I64:$dst), (ins),
                          "# GET STACK TOP",
                          [(set iPTR:$dst, (GetStackTop))]>;
+
+// MEMBARRIER
+let hasSideEffects = 1 in
+def MEMBARRIER : Pseudo<(outs), (ins), "# MEMBARRIER", [(MemBarrier)] >;
+
+//===----------------------------------------------------------------------===//
+// Other patterns
+//===----------------------------------------------------------------------===//
+
 // SETCC pattern matches
 //
 //   CMP  %tmp, lhs, rhs     ; compare lhs and rhs
 //   or   %res, 0, (0)1      ; initialize by 0
 //   CMOV %res, (63)0, %tmp  ; set 1 if %tmp is true
 
-def : Pat<(i32 (setcc i64:$LHS, i64:$RHS, CCSIOp:$cond)),
-          (EXTRACT_SUBREG
-              (CMOVLrm (icond2cc $cond),
-                       (CMPSLrr i64:$LHS, i64:$RHS),
-                       !add(63, 64),
-                       (ORim 0, 0)), sub_i32)>;
-
-def : Pat<(i32 (setcc i64:$LHS, i64:$RHS, CCUIOp:$cond)),
-          (EXTRACT_SUBREG
-              (CMOVLrm (icond2cc $cond),
-                       (CMPULrr i64:$LHS, i64:$RHS),
-                       !add(63, 64),
-                       (ORim 0, 0)), sub_i32)>;
-
-def : Pat<(i32 (setcc i32:$LHS, i32:$RHS, CCSIOp:$cond)),
-          (EXTRACT_SUBREG
-              (CMOVWrm (icond2cc $cond),
-                       (CMPSWSXrr i32:$LHS, i32:$RHS),
-                       !add(63, 64),
-                       (ORim 0, 0)), sub_i32)>;
-
-def : Pat<(i32 (setcc i32:$LHS, i32:$RHS, CCUIOp:$cond)),
-          (EXTRACT_SUBREG
-              (CMOVWrm (icond2cc $cond),
-                       (CMPUWrr i32:$LHS, i32:$RHS),
-                       !add(63, 64),
-                       (ORim 0, 0)), sub_i32)>;
-
-def : Pat<(i32 (setcc f64:$LHS, f64:$RHS, cond:$cond)),
-          (EXTRACT_SUBREG
-              (CMOVDrm (fcond2cc $cond),
-                       (FCMPDrr f64:$LHS, f64:$RHS),
-                       !add(63, 64),
-                       (ORim 0, 0)), sub_i32)>;
-
-def : Pat<(i32 (setcc f32:$LHS, f32:$RHS, cond:$cond)),
-          (EXTRACT_SUBREG
-              (CMOVSrm (fcond2cc $cond),
-                       (FCMPSrr f32:$LHS, f32:$RHS),
-                       !add(63, 64),
-                       (ORim 0, 0)), sub_i32)>;
+class setccrr<Instruction INSN> :
+    OutPatFrag<(ops node:$cond, node:$comp),
+               (EXTRACT_SUBREG
+                   (INSN $cond, $comp,
+                         !add(63, 64), // means (63)0 == 1
+                         (ORim 0, 0)), sub_i32)>;
+
+def : Pat<(i32 (setcc i32:$l, i32:$r, CCSIOp:$cond)),
+          (setccrr<CMOVWrm> (icond2cc $cond), (CMPSWSXrr $l, $r))>;
+def : Pat<(i32 (setcc i32:$l, i32:$r, CCUIOp:$cond)),
+          (setccrr<CMOVWrm> (icond2cc $cond), (CMPUWrr $l, $r))>;
+def : Pat<(i32 (setcc i64:$l, i64:$r, CCSIOp:$cond)),
+          (setccrr<CMOVLrm> (icond2cc $cond), (CMPSLrr $l, $r))>;
+def : Pat<(i32 (setcc i64:$l, i64:$r, CCUIOp:$cond)),
+          (setccrr<CMOVLrm> (icond2cc $cond), (CMPULrr $l, $r))>;
+def : Pat<(i32 (setcc f32:$l, f32:$r, cond:$cond)),
+          (setccrr<CMOVSrm> (fcond2cc $cond), (FCMPSrr $l, $r))>;
+def : Pat<(i32 (setcc f64:$l, f64:$r, cond:$cond)),
+          (setccrr<CMOVDrm> (fcond2cc $cond), (FCMPDrr $l, $r))>;
+def : Pat<(i32 (setcc f128:$l, f128:$r, cond:$cond)),
+          (setccrr<CMOVDrm> (fcond2cc $cond), (FCMPQrr $l, $r))>;
 
 // Special SELECTCC pattern matches
 // Use min/max for better performance.
@@ -1824,152 +2090,171 @@ def : Pat<(i64 (selectcc i64:$LHS, i64:$RHS, i64:$LHS, i64:$RHS, SETLE)),
 def : Pat<(i32 (selectcc i32:$LHS, i32:$RHS, i32:$LHS, i32:$RHS, SETLE)),
           (MINSWSXrr $LHS, $RHS)>;
 
+// Helper classes to construct cmov patterns for the ease.
+//
+//   Hiding INSERT_SUBREG/EXTRACT_SUBREG patterns.
+
+class cmovrr<Instruction INSN> :
+    OutPatFrag<(ops node:$cond, node:$comp, node:$t, node:$f),
+               (INSN $cond, $comp, $t, $f)>;
+class cmovrm<Instruction INSN, SDNodeXForm MOP = MIMM> :
+    OutPatFrag<(ops node:$cond, node:$comp, node:$t, node:$f),
+               (INSN $cond, $comp, (MOP $t), $f)>;
+class cmov32rr<Instruction INSN, SubRegIndex sub_oty> :
+    OutPatFrag<(ops node:$cond, node:$comp, node:$t, node:$f),
+               (EXTRACT_SUBREG
+                   (INSN $cond, $comp,
+                         (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_oty),
+                         (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_oty)),
+                   sub_oty)>;
+class cmov32rm<Instruction INSN, SubRegIndex sub_oty, SDNodeXForm MOP = MIMM> :
+    OutPatFrag<(ops node:$cond, node:$comp, node:$t, node:$f),
+               (EXTRACT_SUBREG
+                   (INSN $cond, $comp,
+                         (MOP $t),
+                         (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_oty)),
+                   sub_oty)>;
+class cmov128rr<Instruction INSN> :
+    OutPatFrag<(ops node:$cond, node:$comp, node:$t, node:$f),
+               (INSERT_SUBREG
+                 (INSERT_SUBREG (f128 (IMPLICIT_DEF)),
+                   (INSN $cond, $comp,
+                       (EXTRACT_SUBREG $t, sub_odd),
+                       (EXTRACT_SUBREG $f, sub_odd)), sub_odd),
+                 (INSN $cond, $comp,
+                     (EXTRACT_SUBREG $t, sub_even),
+                     (EXTRACT_SUBREG $f, sub_even)), sub_even)>;
+
 // Generic SELECTCC pattern matches
 //
 //   CMP  %tmp, %l, %r       ; compare %l and %r
 //   or   %res, %f, (0)1     ; initialize by %f
 //   CMOV %res, %t, %tmp     ; set %t if %tmp is true
 
-// selectcc for i64 result
-def : Pat<(i64 (selectcc i32:$l, i32:$r, i64:$t, i64:$f, CCSIOp:$cond)),
-          (CMOVWrr (icond2cc $cond), (CMPSWSXrr $l, $r), $t, $f)>;
-def : Pat<(i64 (selectcc i32:$l, i32:$r, i64:$t, i64:$f, CCUIOp:$cond)),
-          (CMOVWrr (icond2cc $cond), (CMPUWrr $l, $r), $t, $f)>;
-def : Pat<(i64 (selectcc i64:$l, i64:$r, i64:$t, i64:$f, CCSIOp:$cond)),
-          (CMOVLrr (icond2cc $cond), (CMPSLrr $l, $r), $t, $f)>;
-def : Pat<(i64 (selectcc i64:$l, i64:$r, i64:$t, i64:$f, CCUIOp:$cond)),
-          (CMOVLrr (icond2cc $cond), (CMPULrr $l, $r), $t, $f)>;
-def : Pat<(i64 (selectcc f32:$l, f32:$r, i64:$t, i64:$f, cond:$cond)),
-          (CMOVSrr (fcond2cc $cond), (FCMPSrr $l, $r), $t, $f)>;
-def : Pat<(i64 (selectcc f64:$l, f64:$r, i64:$t, i64:$f, cond:$cond)),
-          (CMOVDrr (fcond2cc $cond), (FCMPDrr $l, $r), $t, $f)>;
-
-// selectcc for i32 result
 def : Pat<(i32 (selectcc i32:$l, i32:$r, i32:$t, i32:$f, CCSIOp:$cond)),
-          (EXTRACT_SUBREG
-              (CMOVWrr (icond2cc $cond),
-                       (CMPSWSXrr $l, $r),
-                       (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
-                       (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
-              sub_i32)>;
+          (cmov32rr<CMOVWrr, sub_i32> (icond2cc $cond), (CMPSWSXrr $l, $r),
+                                      $t, $f)>;
 def : Pat<(i32 (selectcc i32:$l, i32:$r, i32:$t, i32:$f, CCUIOp:$cond)),
-          (EXTRACT_SUBREG
-              (CMOVWrr (icond2cc $cond),
-                       (CMPUWrr $l, $r),
-                       (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
-                       (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
-              sub_i32)>;
+          (cmov32rr<CMOVWrr, sub_i32> (icond2cc $cond), (CMPUWrr $l, $r),
+                                      $t, $f)>;
 def : Pat<(i32 (selectcc i64:$l, i64:$r, i32:$t, i32:$f, CCSIOp:$cond)),
-          (EXTRACT_SUBREG
-              (CMOVLrr (icond2cc $cond),
-                       (CMPSLrr $l, $r),
-                       (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
-                       (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
-              sub_i32)>;
+          (cmov32rr<CMOVLrr, sub_i32> (icond2cc $cond), (CMPSLrr $l, $r),
+                                      $t, $f)>;
 def : Pat<(i32 (selectcc i64:$l, i64:$r, i32:$t, i32:$f, CCUIOp:$cond)),
-          (EXTRACT_SUBREG
-              (CMOVLrr (icond2cc $cond),
-                       (CMPULrr $l, $r),
-                       (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
-                       (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
-              sub_i32)>;
+          (cmov32rr<CMOVLrr, sub_i32> (icond2cc $cond), (CMPULrr $l, $r),
+                                      $t, $f)>;
 def : Pat<(i32 (selectcc f32:$l, f32:$r, i32:$t, i32:$f, cond:$cond)),
-          (EXTRACT_SUBREG
-              (CMOVSrr (fcond2cc $cond),
-                       (FCMPSrr $l, $r),
-                       (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
-                       (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
-              sub_i32)>;
+          (cmov32rr<CMOVSrr, sub_i32> (fcond2cc $cond), (FCMPSrr $l, $r),
+                                      $t, $f)>;
 def : Pat<(i32 (selectcc f64:$l, f64:$r, i32:$t, i32:$f, cond:$cond)),
-          (EXTRACT_SUBREG
-              (CMOVDrr (fcond2cc $cond),
-                       (FCMPDrr $l, $r),
-                       (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
-                       (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
-              sub_i32)>;
+          (cmov32rr<CMOVDrr, sub_i32> (fcond2cc $cond), (FCMPDrr $l, $r),
+                                      $t, $f)>;
+def : Pat<(i32 (selectcc f128:$l, f128:$r, i32:$t, i32:$f, cond:$cond)),
+          (cmov32rr<CMOVDrr, sub_i32> (fcond2cc $cond), (FCMPQrr $l, $r),
+                                      $t, $f)>;
 
-// selectcc for f64 result
-def : Pat<(f64 (selectcc i32:$l, i32:$r, f64:$t, f64:$f, CCSIOp:$cond)),
-          (CMOVWrr (icond2cc $cond), (CMPSWSXrr $l, $r), $t, $f)>;
-def : Pat<(f64 (selectcc i32:$l, i32:$r, f64:$t, f64:$f, CCUIOp:$cond)),
-          (CMOVWrr (icond2cc $cond), (CMPUWrr $l, $r), $t, $f)>;
-def : Pat<(f64 (selectcc i64:$l, i64:$r, f64:$t, f64:$f, CCSIOp:$cond)),
-          (CMOVLrr (icond2cc $cond), (CMPSLrr $l, $r), $t, $f)>;
-def : Pat<(f64 (selectcc i64:$l, i64:$r, f64:$t, f64:$f, CCUIOp:$cond)),
-          (CMOVLrr (icond2cc $cond), (CMPULrr $l, $r), $t, $f)>;
-def : Pat<(f64 (selectcc f32:$l, f32:$r, f64:$t, f64:$f, cond:$cond)),
-          (CMOVSrr (fcond2cc $cond), (FCMPSrr $l, $r), $t, $f)>;
-def : Pat<(f64 (selectcc f64:$l, f64:$r, f64:$t, f64:$f, cond:$cond)),
-          (CMOVDrr (fcond2cc $cond), (FCMPDrr $l, $r), $t, $f)>;
+def : Pat<(i64 (selectcc i32:$l, i32:$r, i64:$t, i64:$f, CCSIOp:$cond)),
+          (cmovrr<CMOVWrr> (icond2cc $cond), (CMPSWSXrr $l, $r), $t, $f)>;
+def : Pat<(i64 (selectcc i32:$l, i32:$r, i64:$t, i64:$f, CCUIOp:$cond)),
+          (cmovrr<CMOVWrr> (icond2cc $cond), (CMPUWrr $l, $r), $t, $f)>;
+def : Pat<(i64 (selectcc i64:$l, i64:$r, i64:$t, i64:$f, CCSIOp:$cond)),
+          (cmovrr<CMOVLrr> (icond2cc $cond), (CMPSLrr $l, $r), $t, $f)>;
+def : Pat<(i64 (selectcc i64:$l, i64:$r, i64:$t, i64:$f, CCUIOp:$cond)),
+          (cmovrr<CMOVLrr> (icond2cc $cond), (CMPULrr $l, $r), $t, $f)>;
+def : Pat<(i64 (selectcc f32:$l, f32:$r, i64:$t, i64:$f, cond:$cond)),
+          (cmovrr<CMOVSrr> (fcond2cc $cond), (FCMPSrr $l, $r), $t, $f)>;
+def : Pat<(i64 (selectcc f64:$l, f64:$r, i64:$t, i64:$f, cond:$cond)),
+          (cmovrr<CMOVDrr> (fcond2cc $cond), (FCMPDrr $l, $r), $t, $f)>;
+def : Pat<(i64 (selectcc f128:$l, f128:$r, i64:$t, i64:$f, cond:$cond)),
+          (cmovrr<CMOVDrr> (fcond2cc $cond), (FCMPQrr $l, $r), $t, $f)>;
 
-// selectcc for f32 result
 def : Pat<(f32 (selectcc i32:$l, i32:$r, f32:$t, f32:$f, CCSIOp:$cond)),
-          (EXTRACT_SUBREG
-              (CMOVWrr (icond2cc $cond),
-                       (CMPSWSXrr $l, $r),
-                       (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32),
-                       (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)),
-              sub_f32)>;
+          (cmov32rr<CMOVWrr, sub_f32> (icond2cc $cond), (CMPSWSXrr $l, $r),
+                                      $t, $f)>;
 def : Pat<(f32 (selectcc i32:$l, i32:$r, f32:$t, f32:$f, CCUIOp:$cond)),
-          (EXTRACT_SUBREG
-              (CMOVWrr (icond2cc $cond),
-                       (CMPUWrr $l, $r),
-                       (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32),
-                       (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)),
-              sub_f32)>;
+          (cmov32rr<CMOVWrr, sub_f32> (icond2cc $cond), (CMPUWrr $l, $r),
+                                      $t, $f)>;
 def : Pat<(f32 (selectcc i64:$l, i64:$r, f32:$t, f32:$f, CCSIOp:$cond)),
-          (EXTRACT_SUBREG
-              (CMOVLrr (icond2cc $cond),
-                       (CMPSLrr $l, $r),
-                       (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32),
-                       (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)),
-              sub_f32)>;
+          (cmov32rr<CMOVLrr, sub_f32> (icond2cc $cond), (CMPSLrr $l, $r),
+                                      $t, $f)>;
 def : Pat<(f32 (selectcc i64:$l, i64:$r, f32:$t, f32:$f, CCUIOp:$cond)),
-          (EXTRACT_SUBREG
-              (CMOVLrr (icond2cc $cond),
-                       (CMPULrr $l, $r),
-                       (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32),
-                       (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)),
-              sub_f32)>;
+          (cmov32rr<CMOVLrr, sub_f32> (icond2cc $cond), (CMPULrr $l, $r),
+                                      $t, $f)>;
 def : Pat<(f32 (selectcc f32:$l, f32:$r, f32:$t, f32:$f, cond:$cond)),
-          (EXTRACT_SUBREG
-              (CMOVSrr (fcond2cc $cond),
-                       (FCMPSrr $l, $r),
-                       (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32),
-                       (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)),
-              sub_f32)>;
+          (cmov32rr<CMOVSrr, sub_f32> (fcond2cc $cond), (FCMPSrr $l, $r),
+                                      $t, $f)>;
 def : Pat<(f32 (selectcc f64:$l, f64:$r, f32:$t, f32:$f, cond:$cond)),
-          (EXTRACT_SUBREG
-              (CMOVDrr (fcond2cc $cond),
-                       (FCMPDrr $l, $r),
-                       (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32),
-                       (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)),
-              sub_f32)>;
+          (cmov32rr<CMOVDrr, sub_f32> (fcond2cc $cond), (FCMPDrr $l, $r),
+                                      $t, $f)>;
+def : Pat<(f32 (selectcc f128:$l, f128:$r, f32:$t, f32:$f, cond:$cond)),
+          (cmov32rr<CMOVDrr, sub_f32> (fcond2cc $cond), (FCMPQrr $l, $r),
+                                      $t, $f)>;
+
+def : Pat<(f64 (selectcc i32:$l, i32:$r, f64:$t, f64:$f, CCSIOp:$cond)),
+          (cmovrr<CMOVWrr> (icond2cc $cond), (CMPSWSXrr $l, $r), $t, $f)>;
+def : Pat<(f64 (selectcc i32:$l, i32:$r, f64:$t, f64:$f, CCUIOp:$cond)),
+          (cmovrr<CMOVWrr> (icond2cc $cond), (CMPUWrr $l, $r), $t, $f)>;
+def : Pat<(f64 (selectcc i64:$l, i64:$r, f64:$t, f64:$f, CCSIOp:$cond)),
+          (cmovrr<CMOVLrr> (icond2cc $cond), (CMPSLrr $l, $r), $t, $f)>;
+def : Pat<(f64 (selectcc i64:$l, i64:$r, f64:$t, f64:$f, CCUIOp:$cond)),
+          (cmovrr<CMOVLrr> (icond2cc $cond), (CMPULrr $l, $r), $t, $f)>;
+def : Pat<(f64 (selectcc f32:$l, f32:$r, f64:$t, f64:$f, cond:$cond)),
+          (cmovrr<CMOVSrr> (fcond2cc $cond), (FCMPSrr $l, $r), $t, $f)>;
+def : Pat<(f64 (selectcc f64:$l, f64:$r, f64:$t, f64:$f, cond:$cond)),
+          (cmovrr<CMOVDrr> (fcond2cc $cond), (FCMPDrr $l, $r), $t, $f)>;
+def : Pat<(f64 (selectcc f128:$l, f128:$r, f64:$t, f64:$f, cond:$cond)),
+          (cmovrr<CMOVDrr> (fcond2cc $cond), (FCMPQrr $l, $r), $t, $f)>;
+
+def : Pat<(f128 (selectcc i32:$l, i32:$r, f128:$t, f128:$f, CCSIOp:$cond)),
+          (cmov128rr<CMOVWrr> (icond2cc $cond), (CMPSWSXrr $l, $r), $t, $f)>;
+def : Pat<(f128 (selectcc i32:$l, i32:$r, f128:$t, f128:$f, CCUIOp:$cond)),
+          (cmov128rr<CMOVWrr> (icond2cc $cond), (CMPUWrr $l, $r), $t, $f)>;
+def : Pat<(f128 (selectcc i64:$l, i64:$r, f128:$t, f128:$f, CCSIOp:$cond)),
+          (cmov128rr<CMOVLrr> (icond2cc $cond), (CMPSLrr $l, $r), $t, $f)>;
+def : Pat<(f128 (selectcc i64:$l, i64:$r, f128:$t, f128:$f, CCUIOp:$cond)),
+          (cmov128rr<CMOVLrr> (icond2cc $cond), (CMPULrr $l, $r), $t, $f)>;
+def : Pat<(f128 (selectcc f32:$l, f32:$r, f128:$t, f128:$f, cond:$cond)),
+          (cmov128rr<CMOVSrr> (fcond2cc $cond), (FCMPSrr $l, $r), $t, $f)>;
+def : Pat<(f128 (selectcc f64:$l, f64:$r, f128:$t, f128:$f, cond:$cond)),
+          (cmov128rr<CMOVDrr> (fcond2cc $cond), (FCMPDrr $l, $r), $t, $f)>;
+def : Pat<(f128 (selectcc f128:$l, f128:$r, f128:$t, f128:$f, cond:$cond)),
+          (cmov128rr<CMOVDrr> (fcond2cc $cond), (FCMPQrr $l, $r), $t, $f)>;
 
 // Generic SELECT pattern matches
 // Use cmov.w for all cases since %pred holds i32.
 //
 //   CMOV.w.ne %res, %tval, %tmp  ; set tval if %tmp is true
 
+def : Pat<(i32 (select i32:$pred, i32:$t, i32:$f)),
+          (cmov32rr<CMOVWrr, sub_i32> CC_INE, $pred, $t, $f)>;
+def : Pat<(i32 (select i32:$pred, (i32 mimm:$t), i32:$f)),
+          (cmov32rm<CMOVWrm, sub_i32> CC_INE, $pred, $t, $f)>;
+def : Pat<(i32 (select i32:$pred, i32:$t, (i32 mimm:$f))),
+          (cmov32rm<CMOVWrm, sub_i32> CC_IEQ, $pred, $f, $t)>;
+
 def : Pat<(i64 (select i32:$pred, i64:$t, i64:$f)),
-          (CMOVWrr CC_INE, $pred, $t, $f)>;
+          (cmovrr<CMOVWrr> CC_INE, $pred, $t, $f)>;
+def : Pat<(i64 (select i32:$pred, (i64 mimm:$t), i64:$f)),
+          (cmovrm<CMOVWrm, MIMM> CC_INE, $pred, $t, $f)>;
+def : Pat<(i64 (select i32:$pred, i64:$t, (i64 mimm:$f))),
+          (cmovrm<CMOVWrm, MIMM> CC_IEQ, $pred, $f, $t)>;
 
-def : Pat<(i32 (select i32:$pred, i32:$t, i32:$f)),
-          (EXTRACT_SUBREG
-              (CMOVWrr CC_INE, $pred,
-                       (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
-                       (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
-              sub_i32)>;
+def : Pat<(f32 (select i32:$pred, f32:$t, f32:$f)),
+          (cmov32rr<CMOVWrr, sub_f32> CC_INE, $pred, $t, $f)>;
+def : Pat<(f32 (select i32:$pred, (f32 mimmfp:$t), f32:$f)),
+          (cmov32rm<CMOVWrm, sub_f32, MIMMFP> CC_INE, $pred, $t, $f)>;
+def : Pat<(f32 (select i32:$pred, f32:$t, (f32 mimmfp:$f))),
+          (cmov32rm<CMOVWrm, sub_f32, MIMMFP> CC_IEQ, $pred, $f, $t)>;
 
 def : Pat<(f64 (select i32:$pred, f64:$t, f64:$f)),
-          (CMOVWrr CC_INE, $pred, $t, $f)>;
+          (cmovrr<CMOVWrr> CC_INE, $pred, $t, $f)>;
+def : Pat<(f64 (select i32:$pred, (f64 mimmfp:$t), f64:$f)),
+          (cmovrm<CMOVWrm, MIMMFP> CC_INE, $pred, $t, $f)>;
+def : Pat<(f64 (select i32:$pred, f64:$t, (f64 mimmfp:$f))),
+          (cmovrm<CMOVWrm, MIMMFP> CC_IEQ, $pred, $f, $t)>;
 
-def : Pat<(f32 (select i32:$pred, f32:$t, f32:$f)),
-          (EXTRACT_SUBREG
-            (CMOVWrr CC_INE, $pred,
-                     (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_f32),
-                     (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_f32)),
-            sub_f32)>;
+def : Pat<(f128 (select i32:$pred, f128:$t, f128:$f)),
+          (cmov128rr<CMOVWrr> CC_INE, $pred, $t, $f)>;
 
 // bitconvert
 def : Pat<(f64 (bitconvert i64:$src)), (COPY_TO_REGCLASS $src, I64)>;
@@ -1982,24 +2267,48 @@ def : Pat<(f32 (bitconvert i32:$op)),
           (EXTRACT_SUBREG (SLLri (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
             $op, sub_i32), 32), sub_f32)>;
 
-// Bits operations pattern matchings.
-def : Pat<(i32 (ctpop i32:$src)),
-          (EXTRACT_SUBREG (PCNTr (ANDrm (INSERT_SUBREG
-            (i64 (IMPLICIT_DEF)), $src, sub_i32), !add(32, 64))), sub_i32)>;
-def : Pat<(i32 (ctlz i32:$src)),
-          (EXTRACT_SUBREG (LDZr (SLLri (INSERT_SUBREG
-            (i64 (IMPLICIT_DEF)), $src, sub_i32), 32)), sub_i32)>;
-def : Pat<(i64 (bswap i64:$src)),
-          (BSWPri $src, 0)>;
-def : Pat<(i32 (bswap i32:$src)),
-          (EXTRACT_SUBREG (BSWPri (INSERT_SUBREG
-            (i64 (IMPLICIT_DEF)), $src, sub_i32), 1), sub_i32)>;
+// Optimize code A generated by `(unsigned char)c << 5` to B.
+// A) sla.w.sx %s0, %s0, 5
+//    lea %s1, 224           ; 0xE0
+//    and %s0, %s0, %s1
+// B) sla.w.sx %s0, %s0, 5
+//    and %s0, %s0, (56)0
+
+def : Pat<(i32 (and i32:$val, 0xff)),
+          (EXTRACT_SUBREG
+              (ANDrm (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $val, sub_i32),
+                     !add(56, 64)), sub_i32)>;
+def : Pat<(i32 (and i32:$val, 0xffff)),
+          (EXTRACT_SUBREG
+              (ANDrm (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $val, sub_i32),
+                     !add(48, 64)), sub_i32)>;
+def : Pat<(i64 (and i64:$val, 0xffffffff)),
+          (ANDrm $val, !add(32, 64))>;
+
+//===----------------------------------------------------------------------===//
+// Vector Instruction Pattern Stuff
+//===----------------------------------------------------------------------===//
+
+// Custom intermediate ISDs.
+class IsVLVT<int OpIdx> : SDTCisVT<OpIdx,i32>;
+def vec_broadcast       : SDNode<"VEISD::VEC_BROADCAST", SDTypeProfile<1, 2,
+                                 [SDTCisVec<0>, IsVLVT<2>]>>;
+
+// Whether this is an all-true mask (assuming undef-bits above VL are all-true).
+def true_mask           : PatLeaf<
+                            (vec_broadcast (i32 nonzero), (i32 srcvalue))>;
+// Match any broadcast (ignoring VL).
+def any_broadcast       : PatFrag<(ops node:$sx),
+                                  (vec_broadcast node:$sx, (i32 srcvalue))>;
+
+// Vector instructions.
+include "VEInstrVec.td"
+
+// The vevlintrin
+include "VEInstrIntrinsicVL.td"
 
-// Several special pattern matches to optimize code
+// Patterns and intermediate SD nodes (VEC_*).
+include "VEInstrPatternsVec.td"
 
-def : Pat<(i32 (and i32:$lhs, 0xff)),
-          (AND32rm $lhs, !add(56, 64))>;
-def : Pat<(i32 (and i32:$lhs, 0xffff)),
-          (AND32rm $lhs, !add(48, 64))>;
-def : Pat<(i32 (and i32:$lhs, 0xffffffff)),
-          (AND32rm $lhs, !add(32, 64))>;
+// Patterns and intermediate SD nodes (VVP_*).
+include "VVPInstrPatternsVec.td"
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td
new file mode 100644
index 000000000000..9ec10838db05
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td
@@ -0,0 +1,1604 @@
+def : Pat<(int_ve_vl_vld_vssl i64:$sy, i64:$sz, i32:$vl), (VLDrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vld_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vld_vssl simm7:$I, i64:$sz, i32:$vl), (VLDirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vld_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldnc_vssl i64:$sy, i64:$sz, i32:$vl), (VLDNCrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDNCirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldu_vssl i64:$sy, i64:$sz, i32:$vl), (VLDUrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldu_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDUrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldu_vssl simm7:$I, i64:$sz, i32:$vl), (VLDUirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldu_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDUirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldunc_vssl i64:$sy, i64:$sz, i32:$vl), (VLDUNCrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldunc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDUNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldunc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDUNCirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldunc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDUNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldlsx_vssl i64:$sy, i64:$sz, i32:$vl), (VLDLSXrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldlsx_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDLSXrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldlsx_vssl simm7:$I, i64:$sz, i32:$vl), (VLDLSXirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldlsx_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDLSXirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldlsxnc_vssl i64:$sy, i64:$sz, i32:$vl), (VLDLSXNCrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldlsxnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDLSXNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldlsxnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDLSXNCirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldlsxnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDLSXNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldlzx_vssl i64:$sy, i64:$sz, i32:$vl), (VLDLZXrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldlzx_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDLZXrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldlzx_vssl simm7:$I, i64:$sz, i32:$vl), (VLDLZXirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldlzx_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDLZXirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldlzxnc_vssl i64:$sy, i64:$sz, i32:$vl), (VLDLZXNCrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldlzxnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDLZXNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldlzxnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDLZXNCirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldlzxnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDLZXNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vld2d_vssl i64:$sy, i64:$sz, i32:$vl), (VLD2Drrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vld2d_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLD2Drrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vld2d_vssl simm7:$I, i64:$sz, i32:$vl), (VLD2Dirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vld2d_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLD2Dirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vld2dnc_vssl i64:$sy, i64:$sz, i32:$vl), (VLD2DNCrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vld2dnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLD2DNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vld2dnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLD2DNCirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vld2dnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLD2DNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldu2d_vssl i64:$sy, i64:$sz, i32:$vl), (VLDU2Drrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldu2d_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDU2Drrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldu2d_vssl simm7:$I, i64:$sz, i32:$vl), (VLDU2Dirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldu2d_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDU2Dirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldu2dnc_vssl i64:$sy, i64:$sz, i32:$vl), (VLDU2DNCrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldu2dnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDU2DNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldu2dnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDU2DNCirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldu2dnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDU2DNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldl2dsx_vssl i64:$sy, i64:$sz, i32:$vl), (VLDL2DSXrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldl2dsx_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DSXrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldl2dsx_vssl simm7:$I, i64:$sz, i32:$vl), (VLDL2DSXirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldl2dsx_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DSXirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldl2dsxnc_vssl i64:$sy, i64:$sz, i32:$vl), (VLDL2DSXNCrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldl2dsxnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DSXNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldl2dsxnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDL2DSXNCirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldl2dsxnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DSXNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldl2dzx_vssl i64:$sy, i64:$sz, i32:$vl), (VLDL2DZXrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldl2dzx_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DZXrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldl2dzx_vssl simm7:$I, i64:$sz, i32:$vl), (VLDL2DZXirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldl2dzx_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DZXirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldl2dzxnc_vssl i64:$sy, i64:$sz, i32:$vl), (VLDL2DZXNCrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldl2dzxnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DZXNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldl2dzxnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDL2DZXNCirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldl2dzxnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DZXNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vst_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vst_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vst_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vst_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstnc_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTNCrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstnc_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTNCirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstnc_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTNCrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstnc_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTNCirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstncot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTNCOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstncot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTNCOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstncot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTNCOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstncot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTNCOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTUrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTUirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTUrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTUirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstunc_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTUNCrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstunc_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTUNCirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstunc_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTUNCrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstunc_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTUNCirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstuot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTUOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstuot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTUOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstuot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTUOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstuot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTUOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstuncot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTUNCOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstuncot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTUNCOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstuncot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTUNCOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstuncot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTUNCOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTLrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTLirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTLrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTLirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlnc_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTLNCrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlnc_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTLNCirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlnc_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTLNCrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlnc_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTLNCirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTLOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTLOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTLOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTLOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlncot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTLNCOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlncot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTLNCOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlncot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTLNCOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstlncot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTLNCOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2d_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VST2Drrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2d_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VST2Dirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2d_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VST2Drrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2d_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VST2Dirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dnc_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VST2DNCrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dnc_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VST2DNCirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dnc_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VST2DNCrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dnc_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VST2DNCirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VST2DOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VST2DOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VST2DOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VST2DOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dncot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VST2DNCOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dncot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VST2DNCOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dncot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VST2DNCOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vst2dncot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VST2DNCOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2d_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTU2Drrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2d_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTU2Dirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2d_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2Drrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2d_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2Dirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dnc_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTU2DNCrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dnc_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTU2DNCirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dnc_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2DNCrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dnc_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2DNCirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTU2DOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTU2DOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2DOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2DOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dncot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTU2DNCOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dncot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTU2DNCOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dncot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2DNCOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstu2dncot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2DNCOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2d_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTL2Drrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2d_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTL2Dirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2d_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2Drrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2d_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2Dirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dnc_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTL2DNCrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dnc_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTL2DNCirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dnc_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2DNCrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dnc_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2DNCirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTL2DOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTL2DOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2DOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2DOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dncot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTL2DNCOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dncot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTL2DNCOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dncot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2DNCOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vstl2dncot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2DNCOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pfchv_ssl i64:$sy, i64:$sz, i32:$vl), (PFCHVrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_pfchv_ssl simm7:$I, i64:$sz, i32:$vl), (PFCHVirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_pfchvnc_ssl i64:$sy, i64:$sz, i32:$vl), (PFCHVNCrrl i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_pfchvnc_ssl simm7:$I, i64:$sz, i32:$vl), (PFCHVNCirl (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_lvm_mmss v256i1:$ptm, uimm6:$N, i64:$sz), (LVMir_m (ULO7 $N), i64:$sz, v256i1:$ptm)>;
+def : Pat<(int_ve_vl_lvm_MMss v512i1:$ptm, uimm6:$N, i64:$sz), (LVMyir_y (ULO7 $N), i64:$sz, v512i1:$ptm)>;
+def : Pat<(int_ve_vl_svm_sms v256i1:$vmz, uimm6:$N), (SVMmi v256i1:$vmz, (ULO7 $N))>;
+def : Pat<(int_ve_vl_svm_sMs v512i1:$vmz, uimm6:$N), (SVMyi v512i1:$vmz, (ULO7 $N))>;
+def : Pat<(int_ve_vl_vbrdd_vsl f64:$sy, i32:$vl), (VBRDrl f64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vbrdd_vsvl f64:$sy, v256f64:$pt, i32:$vl), (VBRDrl_v f64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vbrdd_vsmvl f64:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VBRDrml_v f64:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vbrdl_vsl i64:$sy, i32:$vl), (VBRDrl i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vbrdl_vsvl i64:$sy, v256f64:$pt, i32:$vl), (VBRDrl_v i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vbrdl_vsmvl i64:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VBRDrml_v i64:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vbrdl_vsl simm7:$I, i32:$vl), (VBRDil (LO7 $I), i32:$vl)>;
+def : Pat<(int_ve_vl_vbrdl_vsvl simm7:$I, v256f64:$pt, i32:$vl), (VBRDil_v (LO7 $I), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vbrdl_vsmvl simm7:$I, v256i1:$vm, v256f64:$pt, i32:$vl), (VBRDiml_v (LO7 $I), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vbrds_vsl f32:$sy, i32:$vl), (VBRDUrl f32:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vbrds_vsvl f32:$sy, v256f64:$pt, i32:$vl), (VBRDUrl_v f32:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vbrds_vsmvl f32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VBRDUrml_v f32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vbrdw_vsl i32:$sy, i32:$vl), (VBRDLrl i32:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vbrdw_vsvl i32:$sy, v256f64:$pt, i32:$vl), (VBRDLrl_v i32:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vbrdw_vsmvl i32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VBRDLrml_v i32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vbrdw_vsl simm7:$I, i32:$vl), (VBRDLil (LO7 $I), i32:$vl)>;
+def : Pat<(int_ve_vl_vbrdw_vsvl simm7:$I, v256f64:$pt, i32:$vl), (VBRDLil_v (LO7 $I), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vbrdw_vsmvl simm7:$I, v256i1:$vm, v256f64:$pt, i32:$vl), (VBRDLiml_v (LO7 $I), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvbrd_vsl i64:$sy, i32:$vl), (PVBRDrl i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvbrd_vsvl i64:$sy, v256f64:$pt, i32:$vl), (PVBRDrl_v i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvbrd_vsMvl i64:$sy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVBRDrml_v i64:$sy, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmv_vsvl uimm7:$N, v256f64:$vz, i32:$vl), (VMVivl (ULO7 $N), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmv_vsvvl uimm7:$N, v256f64:$vz, v256f64:$pt, i32:$vl), (VMVivl_v (ULO7 $N), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmv_vsvmvl uimm7:$N, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMVivml_v (ULO7 $N), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddul_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VADDULvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddul_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDULvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddul_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VADDULrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddul_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDULrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddul_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VADDULivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddul_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDULivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddul_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDULvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddul_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDULrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddul_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDULivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vadduw_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VADDUWvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vadduw_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDUWvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vadduw_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VADDUWrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vadduw_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDUWrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vadduw_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VADDUWivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vadduw_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDUWivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vadduw_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDUWvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vadduw_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDUWrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vadduw_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDUWivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvaddu_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVADDUvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvaddu_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVADDUvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvaddu_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVADDUrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvaddu_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVADDUrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvaddu_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVADDUvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvaddu_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVADDUrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswsx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VADDSWSXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddswsx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSWSXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswsx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VADDSWSXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddswsx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSWSXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswsx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VADDSWSXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddswsx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSWSXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswsx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSWSXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswsx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSWSXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswsx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSWSXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswzx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VADDSWZXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddswzx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSWZXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswzx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VADDSWZXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddswzx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSWZXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswzx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VADDSWZXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddswzx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSWZXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswzx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSWZXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswzx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSWZXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddswzx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSWZXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvadds_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVADDSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvadds_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVADDSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvadds_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVADDSrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvadds_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVADDSrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvadds_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVADDSvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvadds_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVADDSrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddsl_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VADDSLvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddsl_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSLvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddsl_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VADDSLrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddsl_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSLrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddsl_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VADDSLivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vaddsl_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VADDSLivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddsl_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSLvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddsl_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSLrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vaddsl_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VADDSLivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubul_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VSUBULvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubul_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBULvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubul_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VSUBULrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubul_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBULrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubul_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VSUBULivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubul_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBULivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubul_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBULvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubul_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBULrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubul_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBULivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubuw_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VSUBUWvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubuw_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBUWvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubuw_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VSUBUWrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubuw_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBUWrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubuw_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VSUBUWivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubuw_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBUWivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubuw_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBUWvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubuw_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBUWrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubuw_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBUWivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsubu_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVSUBUvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsubu_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVSUBUvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsubu_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVSUBUrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsubu_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVSUBUrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsubu_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSUBUvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsubu_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSUBUrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswsx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VSUBSWSXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubswsx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSWSXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswsx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VSUBSWSXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubswsx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSWSXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswsx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VSUBSWSXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubswsx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSWSXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswsx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSWSXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswsx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSWSXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswsx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSWSXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswzx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VSUBSWZXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubswzx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSWZXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswzx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VSUBSWZXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubswzx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSWZXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswzx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VSUBSWZXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubswzx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSWZXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswzx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSWZXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswzx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSWZXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubswzx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSWZXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsubs_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVSUBSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsubs_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVSUBSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsubs_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVSUBSrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsubs_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVSUBSrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsubs_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSUBSvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsubs_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSUBSrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubsl_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VSUBSLvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubsl_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSLvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubsl_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VSUBSLrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubsl_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSLrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubsl_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VSUBSLivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsubsl_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VSUBSLivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubsl_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSLvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubsl_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSLrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsubsl_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSUBSLivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulul_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMULULvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulul_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULULvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulul_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VMULULrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulul_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULULrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulul_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMULULivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulul_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULULivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulul_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULULvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulul_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULULrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulul_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULULivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmuluw_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMULUWvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmuluw_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULUWvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmuluw_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VMULUWrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmuluw_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULUWrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmuluw_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMULUWivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmuluw_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULUWivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmuluw_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULUWvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmuluw_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULUWrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmuluw_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULUWivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswsx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMULSWSXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulswsx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSWSXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswsx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VMULSWSXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulswsx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSWSXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswsx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMULSWSXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulswsx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSWSXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswsx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSWSXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswsx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSWSXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswsx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSWSXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswzx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMULSWZXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulswzx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSWZXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswzx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VMULSWZXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulswzx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSWZXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswzx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMULSWZXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulswzx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSWZXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswzx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSWZXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswzx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSWZXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulswzx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSWZXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulsl_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMULSLvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulsl_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSLvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulsl_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VMULSLrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulsl_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSLrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulsl_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMULSLivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulsl_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSLivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulsl_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSLvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulsl_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSLrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulsl_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMULSLivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulslw_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMULSLWvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulslw_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSLWvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulslw_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VMULSLWrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulslw_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSLWrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmulslw_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMULSLWivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmulslw_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMULSLWivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivul_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VDIVULvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivul_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVULvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivul_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VDIVULrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivul_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVULrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivul_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VDIVULivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivul_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVULivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivul_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVULvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivul_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVULrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivul_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVULivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivuw_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VDIVUWvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivuw_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVUWvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivuw_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VDIVUWrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivuw_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVUWrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivuw_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VDIVUWivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivuw_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVUWivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivuw_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVUWvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivuw_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVUWrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivuw_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVUWivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivul_vvsl v256f64:$vy, i64:$sy, i32:$vl), (VDIVULvrl v256f64:$vy, i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivul_vvsvl v256f64:$vy, i64:$sy, v256f64:$pt, i32:$vl), (VDIVULvrl_v v256f64:$vy, i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivul_vvsl v256f64:$vy, simm7:$I, i32:$vl), (VDIVULvil v256f64:$vy, (LO7 $I), i32:$vl)>;
+def : Pat<(int_ve_vl_vdivul_vvsvl v256f64:$vy, simm7:$I, v256f64:$pt, i32:$vl), (VDIVULvil_v v256f64:$vy, (LO7 $I), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivul_vvsmvl v256f64:$vy, i64:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVULvrml_v v256f64:$vy, i64:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivul_vvsmvl v256f64:$vy, simm7:$I, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVULviml_v v256f64:$vy, (LO7 $I), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivuw_vvsl v256f64:$vy, i32:$sy, i32:$vl), (VDIVUWvrl v256f64:$vy, i32:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivuw_vvsvl v256f64:$vy, i32:$sy, v256f64:$pt, i32:$vl), (VDIVUWvrl_v v256f64:$vy, i32:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivuw_vvsl v256f64:$vy, simm7:$I, i32:$vl), (VDIVUWvil v256f64:$vy, (LO7 $I), i32:$vl)>;
+def : Pat<(int_ve_vl_vdivuw_vvsvl v256f64:$vy, simm7:$I, v256f64:$pt, i32:$vl), (VDIVUWvil_v v256f64:$vy, (LO7 $I), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivuw_vvsmvl v256f64:$vy, i32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVUWvrml_v v256f64:$vy, i32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivuw_vvsmvl v256f64:$vy, simm7:$I, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVUWviml_v v256f64:$vy, (LO7 $I), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswsx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VDIVSWSXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivswsx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSWSXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswsx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VDIVSWSXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivswsx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSWSXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswsx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VDIVSWSXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivswsx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSWSXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswsx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWSXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswsx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWSXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswsx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWSXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswzx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VDIVSWZXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivswzx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSWZXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswzx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VDIVSWZXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivswzx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSWZXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswzx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VDIVSWZXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivswzx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSWZXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswzx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWZXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswzx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWZXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswzx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWZXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswsx_vvsl v256f64:$vy, i32:$sy, i32:$vl), (VDIVSWSXvrl v256f64:$vy, i32:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivswsx_vvsvl v256f64:$vy, i32:$sy, v256f64:$pt, i32:$vl), (VDIVSWSXvrl_v v256f64:$vy, i32:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswsx_vvsl v256f64:$vy, simm7:$I, i32:$vl), (VDIVSWSXvil v256f64:$vy, (LO7 $I), i32:$vl)>;
+def : Pat<(int_ve_vl_vdivswsx_vvsvl v256f64:$vy, simm7:$I, v256f64:$pt, i32:$vl), (VDIVSWSXvil_v v256f64:$vy, (LO7 $I), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswsx_vvsmvl v256f64:$vy, i32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWSXvrml_v v256f64:$vy, i32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswsx_vvsmvl v256f64:$vy, simm7:$I, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWSXviml_v v256f64:$vy, (LO7 $I), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswzx_vvsl v256f64:$vy, i32:$sy, i32:$vl), (VDIVSWZXvrl v256f64:$vy, i32:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivswzx_vvsvl v256f64:$vy, i32:$sy, v256f64:$pt, i32:$vl), (VDIVSWZXvrl_v v256f64:$vy, i32:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswzx_vvsl v256f64:$vy, simm7:$I, i32:$vl), (VDIVSWZXvil v256f64:$vy, (LO7 $I), i32:$vl)>;
+def : Pat<(int_ve_vl_vdivswzx_vvsvl v256f64:$vy, simm7:$I, v256f64:$pt, i32:$vl), (VDIVSWZXvil_v v256f64:$vy, (LO7 $I), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswzx_vvsmvl v256f64:$vy, i32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWZXvrml_v v256f64:$vy, i32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivswzx_vvsmvl v256f64:$vy, simm7:$I, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSWZXviml_v v256f64:$vy, (LO7 $I), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivsl_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VDIVSLvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivsl_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSLvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivsl_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VDIVSLrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivsl_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSLrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivsl_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VDIVSLivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivsl_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VDIVSLivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivsl_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSLvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivsl_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSLrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivsl_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSLivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivsl_vvsl v256f64:$vy, i64:$sy, i32:$vl), (VDIVSLvrl v256f64:$vy, i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vdivsl_vvsvl v256f64:$vy, i64:$sy, v256f64:$pt, i32:$vl), (VDIVSLvrl_v v256f64:$vy, i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivsl_vvsl v256f64:$vy, simm7:$I, i32:$vl), (VDIVSLvil v256f64:$vy, (LO7 $I), i32:$vl)>;
+def : Pat<(int_ve_vl_vdivsl_vvsvl v256f64:$vy, simm7:$I, v256f64:$pt, i32:$vl), (VDIVSLvil_v v256f64:$vy, (LO7 $I), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivsl_vvsmvl v256f64:$vy, i64:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSLvrml_v v256f64:$vy, i64:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vdivsl_vvsmvl v256f64:$vy, simm7:$I, v256i1:$vm, v256f64:$pt, i32:$vl), (VDIVSLviml_v v256f64:$vy, (LO7 $I), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpul_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VCMPULvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpul_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPULvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpul_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VCMPULrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpul_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPULrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpul_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VCMPULivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpul_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPULivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpul_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPULvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpul_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPULrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpul_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPULivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpuw_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VCMPUWvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpuw_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPUWvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpuw_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VCMPUWrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpuw_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPUWrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpuw_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VCMPUWivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpuw_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPUWivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpuw_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPUWvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpuw_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPUWrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpuw_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPUWivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcmpu_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVCMPUvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvcmpu_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVCMPUvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcmpu_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVCMPUrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvcmpu_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVCMPUrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcmpu_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVCMPUvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcmpu_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVCMPUrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswsx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VCMPSWSXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpswsx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSWSXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswsx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VCMPSWSXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpswsx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSWSXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswsx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VCMPSWSXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpswsx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSWSXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswsx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSWSXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswsx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSWSXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswsx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSWSXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswzx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VCMPSWZXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpswzx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSWZXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswzx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VCMPSWZXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpswzx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSWZXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswzx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VCMPSWZXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpswzx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSWZXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswzx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSWZXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswzx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSWZXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpswzx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSWZXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcmps_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVCMPSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvcmps_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVCMPSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcmps_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVCMPSrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvcmps_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVCMPSrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcmps_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVCMPSvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcmps_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVCMPSrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpsl_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VCMPSLvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpsl_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSLvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpsl_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VCMPSLrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpsl_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSLrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpsl_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VCMPSLivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vcmpsl_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VCMPSLivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpsl_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSLvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpsl_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSLrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcmpsl_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCMPSLivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswsx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMAXSWSXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmaxswsx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSWSXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswsx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VMAXSWSXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmaxswsx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSWSXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswsx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMAXSWSXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmaxswsx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSWSXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswsx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSWSXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswsx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSWSXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswsx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSWSXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswzx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMAXSWZXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmaxswzx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSWZXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswzx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VMAXSWZXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmaxswzx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSWZXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswzx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMAXSWZXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmaxswzx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSWZXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswzx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSWZXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswzx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSWZXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxswzx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSWZXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvmaxs_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVMAXSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvmaxs_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVMAXSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvmaxs_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVMAXSrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvmaxs_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVMAXSrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvmaxs_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVMAXSvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvmaxs_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVMAXSrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswsx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMINSWSXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vminswsx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSWSXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswsx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VMINSWSXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vminswsx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSWSXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswsx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMINSWSXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vminswsx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSWSXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswsx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSWSXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswsx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSWSXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswsx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSWSXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswzx_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMINSWZXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vminswzx_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSWZXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswzx_vsvl i32:$sy, v256f64:$vz, i32:$vl), (VMINSWZXrvl i32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vminswzx_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSWZXrvl_v i32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswzx_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMINSWZXivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vminswzx_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSWZXivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswzx_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSWZXvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswzx_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSWZXrvml_v i32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminswzx_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSWZXivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvmins_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVMINSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvmins_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVMINSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvmins_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVMINSrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvmins_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVMINSrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvmins_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVMINSvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvmins_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVMINSrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxsl_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMAXSLvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmaxsl_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSLvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxsl_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VMAXSLrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmaxsl_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSLrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxsl_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMAXSLivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmaxsl_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMAXSLivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxsl_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSLvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxsl_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSLrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmaxsl_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMAXSLivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminsl_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VMINSLvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vminsl_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSLvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminsl_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VMINSLrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vminsl_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSLrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminsl_vsvl simm7:$I, v256f64:$vz, i32:$vl), (VMINSLivl (LO7 $I), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vminsl_vsvvl simm7:$I, v256f64:$vz, v256f64:$pt, i32:$vl), (VMINSLivl_v (LO7 $I), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminsl_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSLvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminsl_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSLrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vminsl_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMINSLivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vand_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VANDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vand_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VANDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vand_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VANDrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vand_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VANDrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vand_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VANDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vand_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VANDrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvand_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVANDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvand_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVANDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvand_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVANDrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvand_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVANDrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvand_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVANDvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvand_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVANDrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vor_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VORvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vor_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VORvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vor_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VORrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vor_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VORrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vor_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VORvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vor_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VORrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvor_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVORvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvor_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVORvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvor_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVORrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvor_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVORrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvor_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVORvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvor_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVORrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vxor_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VXORvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vxor_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VXORvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vxor_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VXORrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vxor_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VXORrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vxor_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VXORvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vxor_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VXORrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvxor_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVXORvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvxor_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVXORvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvxor_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVXORrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvxor_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVXORrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvxor_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVXORvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvxor_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVXORrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_veqv_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VEQVvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_veqv_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VEQVvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_veqv_vsvl i64:$sy, v256f64:$vz, i32:$vl), (VEQVrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_veqv_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VEQVrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_veqv_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VEQVvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_veqv_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VEQVrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pveqv_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVEQVvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pveqv_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVEQVvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pveqv_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVEQVrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pveqv_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVEQVrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pveqv_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVEQVvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pveqv_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVEQVrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vseq_vl i32:$vl), (VSEQl i32:$vl)>;
+def : Pat<(int_ve_vl_vseq_vvl v256f64:$pt, i32:$vl), (VSEQl_v i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvseqlo_vl i32:$vl), (PVSEQLOl i32:$vl)>;
+def : Pat<(int_ve_vl_pvseqlo_vvl v256f64:$pt, i32:$vl), (PVSEQLOl_v i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsequp_vl i32:$vl), (PVSEQUPl i32:$vl)>;
+def : Pat<(int_ve_vl_pvsequp_vvl v256f64:$pt, i32:$vl), (PVSEQUPl_v i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvseq_vl i32:$vl), (PVSEQl i32:$vl)>;
+def : Pat<(int_ve_vl_pvseq_vvl v256f64:$pt, i32:$vl), (PVSEQl_v i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsll_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (VSLLvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsll_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (VSLLvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsll_vvsl v256f64:$vz, i64:$sy, i32:$vl), (VSLLvrl v256f64:$vz, i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsll_vvsvl v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (VSLLvrl_v v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsll_vvsl v256f64:$vz, uimm6:$N, i32:$vl), (VSLLvil v256f64:$vz, (ULO7 $N), i32:$vl)>;
+def : Pat<(int_ve_vl_vsll_vvsvl v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSLLvil_v v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsll_vvvmvl v256f64:$vz, v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLLvvml_v v256f64:$vz, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsll_vvsmvl v256f64:$vz, i64:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLLvrml_v v256f64:$vz, i64:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsll_vvsmvl v256f64:$vz, uimm6:$N, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLLviml_v v256f64:$vz, (ULO7 $N), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsll_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (PVSLLvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsll_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (PVSLLvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsll_vvsl v256f64:$vz, i64:$sy, i32:$vl), (PVSLLvrl v256f64:$vz, i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsll_vvsvl v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (PVSLLvrl_v v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsll_vvvMvl v256f64:$vz, v256f64:$vy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSLLvvml_v v256f64:$vz, v256f64:$vy, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsll_vvsMvl v256f64:$vz, i64:$sy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSLLvrml_v v256f64:$vz, i64:$sy, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrl_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (VSRLvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsrl_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (VSRLvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrl_vvsl v256f64:$vz, i64:$sy, i32:$vl), (VSRLvrl v256f64:$vz, i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsrl_vvsvl v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (VSRLvrl_v v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrl_vvsl v256f64:$vz, uimm6:$N, i32:$vl), (VSRLvil v256f64:$vz, (ULO7 $N), i32:$vl)>;
+def : Pat<(int_ve_vl_vsrl_vvsvl v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSRLvil_v v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrl_vvvmvl v256f64:$vz, v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRLvvml_v v256f64:$vz, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrl_vvsmvl v256f64:$vz, i64:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRLvrml_v v256f64:$vz, i64:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrl_vvsmvl v256f64:$vz, uimm6:$N, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRLviml_v v256f64:$vz, (ULO7 $N), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsrl_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (PVSRLvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsrl_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (PVSRLvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsrl_vvsl v256f64:$vz, i64:$sy, i32:$vl), (PVSRLvrl v256f64:$vz, i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsrl_vvsvl v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (PVSRLvrl_v v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsrl_vvvMvl v256f64:$vz, v256f64:$vy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSRLvvml_v v256f64:$vz, v256f64:$vy, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsrl_vvsMvl v256f64:$vz, i64:$sy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSRLvrml_v v256f64:$vz, i64:$sy, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawsx_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (VSLAWSXvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vslawsx_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (VSLAWSXvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawsx_vvsl v256f64:$vz, i32:$sy, i32:$vl), (VSLAWSXvrl v256f64:$vz, i32:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vslawsx_vvsvl v256f64:$vz, i32:$sy, v256f64:$pt, i32:$vl), (VSLAWSXvrl_v v256f64:$vz, i32:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawsx_vvsl v256f64:$vz, uimm6:$N, i32:$vl), (VSLAWSXvil v256f64:$vz, (ULO7 $N), i32:$vl)>;
+def : Pat<(int_ve_vl_vslawsx_vvsvl v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSLAWSXvil_v v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawsx_vvvmvl v256f64:$vz, v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLAWSXvvml_v v256f64:$vz, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawsx_vvsmvl v256f64:$vz, i32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLAWSXvrml_v v256f64:$vz, i32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawsx_vvsmvl v256f64:$vz, uimm6:$N, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLAWSXviml_v v256f64:$vz, (ULO7 $N), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawzx_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (VSLAWZXvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vslawzx_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (VSLAWZXvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawzx_vvsl v256f64:$vz, i32:$sy, i32:$vl), (VSLAWZXvrl v256f64:$vz, i32:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vslawzx_vvsvl v256f64:$vz, i32:$sy, v256f64:$pt, i32:$vl), (VSLAWZXvrl_v v256f64:$vz, i32:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawzx_vvsl v256f64:$vz, uimm6:$N, i32:$vl), (VSLAWZXvil v256f64:$vz, (ULO7 $N), i32:$vl)>;
+def : Pat<(int_ve_vl_vslawzx_vvsvl v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSLAWZXvil_v v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawzx_vvvmvl v256f64:$vz, v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLAWZXvvml_v v256f64:$vz, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawzx_vvsmvl v256f64:$vz, i32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLAWZXvrml_v v256f64:$vz, i32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslawzx_vvsmvl v256f64:$vz, uimm6:$N, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLAWZXviml_v v256f64:$vz, (ULO7 $N), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsla_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (PVSLAvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsla_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (PVSLAvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsla_vvsl v256f64:$vz, i64:$sy, i32:$vl), (PVSLAvrl v256f64:$vz, i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsla_vvsvl v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (PVSLAvrl_v v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsla_vvvMvl v256f64:$vz, v256f64:$vy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSLAvvml_v v256f64:$vz, v256f64:$vy, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsla_vvsMvl v256f64:$vz, i64:$sy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSLAvrml_v v256f64:$vz, i64:$sy, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslal_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (VSLALvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vslal_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (VSLALvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslal_vvsl v256f64:$vz, i64:$sy, i32:$vl), (VSLALvrl v256f64:$vz, i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vslal_vvsvl v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (VSLALvrl_v v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslal_vvsl v256f64:$vz, uimm6:$N, i32:$vl), (VSLALvil v256f64:$vz, (ULO7 $N), i32:$vl)>;
+def : Pat<(int_ve_vl_vslal_vvsvl v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSLALvil_v v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslal_vvvmvl v256f64:$vz, v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLALvvml_v v256f64:$vz, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslal_vvsmvl v256f64:$vz, i64:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLALvrml_v v256f64:$vz, i64:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vslal_vvsmvl v256f64:$vz, uimm6:$N, v256i1:$vm, v256f64:$pt, i32:$vl), (VSLALviml_v v256f64:$vz, (ULO7 $N), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawsx_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (VSRAWSXvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsrawsx_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (VSRAWSXvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawsx_vvsl v256f64:$vz, i32:$sy, i32:$vl), (VSRAWSXvrl v256f64:$vz, i32:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsrawsx_vvsvl v256f64:$vz, i32:$sy, v256f64:$pt, i32:$vl), (VSRAWSXvrl_v v256f64:$vz, i32:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawsx_vvsl v256f64:$vz, uimm6:$N, i32:$vl), (VSRAWSXvil v256f64:$vz, (ULO7 $N), i32:$vl)>;
+def : Pat<(int_ve_vl_vsrawsx_vvsvl v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSRAWSXvil_v v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawsx_vvvmvl v256f64:$vz, v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRAWSXvvml_v v256f64:$vz, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawsx_vvsmvl v256f64:$vz, i32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRAWSXvrml_v v256f64:$vz, i32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawsx_vvsmvl v256f64:$vz, uimm6:$N, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRAWSXviml_v v256f64:$vz, (ULO7 $N), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawzx_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (VSRAWZXvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsrawzx_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (VSRAWZXvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawzx_vvsl v256f64:$vz, i32:$sy, i32:$vl), (VSRAWZXvrl v256f64:$vz, i32:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsrawzx_vvsvl v256f64:$vz, i32:$sy, v256f64:$pt, i32:$vl), (VSRAWZXvrl_v v256f64:$vz, i32:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawzx_vvsl v256f64:$vz, uimm6:$N, i32:$vl), (VSRAWZXvil v256f64:$vz, (ULO7 $N), i32:$vl)>;
+def : Pat<(int_ve_vl_vsrawzx_vvsvl v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSRAWZXvil_v v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawzx_vvvmvl v256f64:$vz, v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRAWZXvvml_v v256f64:$vz, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawzx_vvsmvl v256f64:$vz, i32:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRAWZXvrml_v v256f64:$vz, i32:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsrawzx_vvsmvl v256f64:$vz, uimm6:$N, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRAWZXviml_v v256f64:$vz, (ULO7 $N), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsra_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (PVSRAvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsra_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (PVSRAvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsra_vvsl v256f64:$vz, i64:$sy, i32:$vl), (PVSRAvrl v256f64:$vz, i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvsra_vvsvl v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (PVSRAvrl_v v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsra_vvvMvl v256f64:$vz, v256f64:$vy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSRAvvml_v v256f64:$vz, v256f64:$vy, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvsra_vvsMvl v256f64:$vz, i64:$sy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVSRAvrml_v v256f64:$vz, i64:$sy, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsral_vvvl v256f64:$vz, v256f64:$vy, i32:$vl), (VSRALvvl v256f64:$vz, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsral_vvvvl v256f64:$vz, v256f64:$vy, v256f64:$pt, i32:$vl), (VSRALvvl_v v256f64:$vz, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsral_vvsl v256f64:$vz, i64:$sy, i32:$vl), (VSRALvrl v256f64:$vz, i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsral_vvsvl v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (VSRALvrl_v v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsral_vvsl v256f64:$vz, uimm6:$N, i32:$vl), (VSRALvil v256f64:$vz, (ULO7 $N), i32:$vl)>;
+def : Pat<(int_ve_vl_vsral_vvsvl v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSRALvil_v v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsral_vvvmvl v256f64:$vz, v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRALvvml_v v256f64:$vz, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsral_vvsmvl v256f64:$vz, i64:$sy, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRALvrml_v v256f64:$vz, i64:$sy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsral_vvsmvl v256f64:$vz, uimm6:$N, v256i1:$vm, v256f64:$pt, i32:$vl), (VSRALviml_v v256f64:$vz, (ULO7 $N), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsfa_vvssl v256f64:$vz, i64:$sy, i64:$sz, i32:$vl), (VSFAvrrl v256f64:$vz, i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsfa_vvssvl v256f64:$vz, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VSFAvrrl_v v256f64:$vz, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsfa_vvssl v256f64:$vz, simm7:$I, i64:$sz, i32:$vl), (VSFAvirl v256f64:$vz, (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vsfa_vvssvl v256f64:$vz, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VSFAvirl_v v256f64:$vz, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsfa_vvssmvl v256f64:$vz, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSFAvrrml_v v256f64:$vz, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsfa_vvssmvl v256f64:$vz, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSFAvirml_v v256f64:$vz, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfaddd_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFADDDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfaddd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFADDDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfaddd_vsvl f64:$sy, v256f64:$vz, i32:$vl), (VFADDDrvl f64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfaddd_vsvvl f64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFADDDrvl_v f64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfaddd_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFADDDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfaddd_vsvmvl f64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFADDDrvml_v f64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfadds_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFADDSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfadds_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFADDSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfadds_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFADDSrvl f32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfadds_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFADDSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfadds_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFADDSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfadds_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFADDSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfadd_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVFADDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfadd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFADDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfadd_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVFADDrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfadd_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFADDrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfadd_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFADDvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfadd_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFADDrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsubd_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFSUBDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsubd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFSUBDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsubd_vsvl f64:$sy, v256f64:$vz, i32:$vl), (VFSUBDrvl f64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsubd_vsvvl f64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFSUBDrvl_v f64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsubd_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFSUBDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsubd_vsvmvl f64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFSUBDrvml_v f64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsubs_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFSUBSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsubs_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFSUBSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsubs_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFSUBSrvl f32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsubs_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFSUBSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsubs_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFSUBSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsubs_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFSUBSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfsub_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVFSUBvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfsub_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFSUBvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfsub_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVFSUBrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfsub_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFSUBrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfsub_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFSUBvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfsub_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFSUBrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmuld_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFMULDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmuld_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMULDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmuld_vsvl f64:$sy, v256f64:$vz, i32:$vl), (VFMULDrvl f64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmuld_vsvvl f64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMULDrvl_v f64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmuld_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMULDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmuld_vsvmvl f64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMULDrvml_v f64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmuls_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFMULSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmuls_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMULSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmuls_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFMULSrvl f32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmuls_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMULSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmuls_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMULSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmuls_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMULSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmul_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVFMULvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmul_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFMULvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmul_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVFMULrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmul_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFMULrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmul_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMULvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmul_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMULrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfdivd_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFDIVDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfdivd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFDIVDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfdivd_vsvl f64:$sy, v256f64:$vz, i32:$vl), (VFDIVDrvl f64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfdivd_vsvvl f64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFDIVDrvl_v f64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfdivd_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFDIVDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfdivd_vsvmvl f64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFDIVDrvml_v f64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfdivs_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFDIVSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfdivs_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFDIVSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfdivs_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFDIVSrvl f32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfdivs_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFDIVSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfdivs_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFDIVSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfdivs_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFDIVSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsqrtd_vvl v256f64:$vy, i32:$vl), (VFSQRTDvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsqrtd_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFSQRTDvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsqrts_vvl v256f64:$vy, i32:$vl), (VFSQRTSvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsqrts_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFSQRTSvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfcmpd_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFCMPDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfcmpd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFCMPDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfcmpd_vsvl f64:$sy, v256f64:$vz, i32:$vl), (VFCMPDrvl f64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfcmpd_vsvvl f64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFCMPDrvl_v f64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfcmpd_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFCMPDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfcmpd_vsvmvl f64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFCMPDrvml_v f64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfcmps_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFCMPSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfcmps_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFCMPSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfcmps_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFCMPSrvl f32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfcmps_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFCMPSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfcmps_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFCMPSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfcmps_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFCMPSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfcmp_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVFCMPvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfcmp_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFCMPvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfcmp_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVFCMPrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfcmp_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFCMPrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfcmp_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFCMPvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfcmp_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFCMPrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmaxd_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFMAXDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmaxd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMAXDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmaxd_vsvl f64:$sy, v256f64:$vz, i32:$vl), (VFMAXDrvl f64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmaxd_vsvvl f64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMAXDrvl_v f64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmaxd_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMAXDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmaxd_vsvmvl f64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMAXDrvml_v f64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmaxs_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFMAXSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmaxs_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMAXSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmaxs_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFMAXSrvl f32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmaxs_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMAXSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmaxs_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMAXSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmaxs_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMAXSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmax_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVFMAXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmax_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFMAXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmax_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVFMAXrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmax_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFMAXrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmax_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMAXvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmax_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMAXrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmind_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFMINDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmind_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMINDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmind_vsvl f64:$sy, v256f64:$vz, i32:$vl), (VFMINDrvl f64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmind_vsvvl f64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMINDrvl_v f64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmind_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMINDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmind_vsvmvl f64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMINDrvml_v f64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmins_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFMINSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmins_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMINSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmins_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFMINSrvl f32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmins_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMINSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmins_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMINSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmins_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMINSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmin_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVFMINvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmin_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFMINvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmin_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVFMINrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmin_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFMINrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmin_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMINvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmin_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMINrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmadd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFMADDvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmadd_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMADDvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmadd_vsvvl f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFMADDrvvl f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmadd_vsvvvl f64:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMADDrvvl_v f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmadd_vvsvl v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl), (VFMADDvrvl v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmadd_vvsvvl v256f64:$vy, f64:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMADDvrvl_v v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmadd_vvvvmvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMADDvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmadd_vsvvmvl f64:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMADDrvvml_v f64:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmadd_vvsvmvl v256f64:$vy, f64:$sy, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMADDvrvml_v v256f64:$vy, f64:$sy, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmads_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFMADSvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmads_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMADSvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmads_vsvvl f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFMADSrvvl f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmads_vsvvvl f32:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMADSrvvl_v f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmads_vvsvl v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl), (VFMADSvrvl v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmads_vvsvvl v256f64:$vy, f32:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMADSvrvl_v v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmads_vvvvmvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMADSvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmads_vsvvmvl f32:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMADSrvvml_v f32:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmads_vvsvmvl v256f64:$vy, f32:$sy, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMADSvrvml_v v256f64:$vy, f32:$sy, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmad_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (PVFMADvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmad_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFMADvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmad_vsvvl i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (PVFMADrvvl i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmad_vsvvvl i64:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFMADrvvl_v i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmad_vvsvl v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl), (PVFMADvrvl v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmad_vvsvvl v256f64:$vy, i64:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFMADvrvl_v v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmad_vvvvMvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMADvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmad_vsvvMvl i64:$sy, v256f64:$vz, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMADrvvml_v i64:$sy, v256f64:$vz, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmad_vvsvMvl v256f64:$vy, i64:$sy, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMADvrvml_v v256f64:$vy, i64:$sy, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFMSBDvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmsbd_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMSBDvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbd_vsvvl f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFMSBDrvvl f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmsbd_vsvvvl f64:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMSBDrvvl_v f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbd_vvsvl v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl), (VFMSBDvrvl v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmsbd_vvsvvl v256f64:$vy, f64:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMSBDvrvl_v v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbd_vvvvmvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMSBDvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbd_vsvvmvl f64:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMSBDrvvml_v f64:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbd_vvsvmvl v256f64:$vy, f64:$sy, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMSBDvrvml_v v256f64:$vy, f64:$sy, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbs_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFMSBSvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmsbs_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMSBSvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbs_vsvvl f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFMSBSrvvl f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmsbs_vsvvvl f32:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMSBSrvvl_v f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbs_vvsvl v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl), (VFMSBSvrvl v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmsbs_vvsvvl v256f64:$vy, f32:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (VFMSBSvrvl_v v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbs_vvvvmvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMSBSvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbs_vsvvmvl f32:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMSBSrvvml_v f32:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmsbs_vvsvmvl v256f64:$vy, f32:$sy, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMSBSvrvml_v v256f64:$vy, f32:$sy, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmsb_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (PVFMSBvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmsb_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFMSBvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmsb_vsvvl i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (PVFMSBrvvl i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmsb_vsvvvl i64:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFMSBrvvl_v i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmsb_vvsvl v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl), (PVFMSBvrvl v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmsb_vvsvvl v256f64:$vy, i64:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFMSBvrvl_v v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmsb_vvvvMvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMSBvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmsb_vsvvMvl i64:$sy, v256f64:$vz, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMSBrvvml_v i64:$sy, v256f64:$vz, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmsb_vvsvMvl v256f64:$vy, i64:$sy, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMSBvrvml_v v256f64:$vy, i64:$sy, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmadd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFNMADDvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmadd_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMADDvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmadd_vsvvl f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFNMADDrvvl f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmadd_vsvvvl f64:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMADDrvvl_v f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmadd_vvsvl v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl), (VFNMADDvrvl v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmadd_vvsvvl v256f64:$vy, f64:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMADDvrvl_v v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmadd_vvvvmvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMADDvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmadd_vsvvmvl f64:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMADDrvvml_v f64:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmadd_vvsvmvl v256f64:$vy, f64:$sy, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMADDvrvml_v v256f64:$vy, f64:$sy, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmads_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFNMADSvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmads_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMADSvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmads_vsvvl f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFNMADSrvvl f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmads_vsvvvl f32:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMADSrvvl_v f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmads_vvsvl v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl), (VFNMADSvrvl v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmads_vvsvvl v256f64:$vy, f32:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMADSvrvl_v v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmads_vvvvmvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMADSvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmads_vsvvmvl f32:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMADSrvvml_v f32:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmads_vvsvmvl v256f64:$vy, f32:$sy, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMADSvrvml_v v256f64:$vy, f32:$sy, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmad_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (PVFNMADvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfnmad_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFNMADvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmad_vsvvl i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (PVFNMADrvvl i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfnmad_vsvvvl i64:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFNMADrvvl_v i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmad_vvsvl v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl), (PVFNMADvrvl v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfnmad_vvsvvl v256f64:$vy, i64:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFNMADvrvl_v v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmad_vvvvMvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFNMADvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmad_vsvvMvl i64:$sy, v256f64:$vz, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFNMADrvvml_v i64:$sy, v256f64:$vz, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmad_vvsvMvl v256f64:$vy, i64:$sy, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFNMADvrvml_v v256f64:$vy, i64:$sy, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFNMSBDvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmsbd_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMSBDvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbd_vsvvl f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFNMSBDrvvl f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmsbd_vsvvvl f64:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMSBDrvvl_v f64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbd_vvsvl v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl), (VFNMSBDvrvl v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmsbd_vvsvvl v256f64:$vy, f64:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMSBDvrvl_v v256f64:$vy, f64:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbd_vvvvmvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMSBDvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbd_vsvvmvl f64:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMSBDrvvml_v f64:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbd_vvsvmvl v256f64:$vy, f64:$sy, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMSBDvrvml_v v256f64:$vy, f64:$sy, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbs_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFNMSBSvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmsbs_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMSBSvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbs_vsvvl f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (VFNMSBSrvvl f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmsbs_vsvvvl f32:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMSBSrvvl_v f32:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbs_vvsvl v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl), (VFNMSBSvrvl v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_vfnmsbs_vvsvvl v256f64:$vy, f32:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (VFNMSBSvrvl_v v256f64:$vy, f32:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbs_vvvvmvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMSBSvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbs_vsvvmvl f32:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMSBSrvvml_v f32:$sy, v256f64:$vz, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfnmsbs_vvsvmvl v256f64:$vy, f32:$sy, v256f64:$vw, v256i1:$vm, v256f64:$pt, i32:$vl), (VFNMSBSvrvml_v v256f64:$vy, f32:$sy, v256f64:$vw, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmsb_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl), (PVFNMSBvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfnmsb_vvvvvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFNMSBvvvl_v v256f64:$vy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmsb_vsvvl i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl), (PVFNMSBrvvl i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfnmsb_vsvvvl i64:$sy, v256f64:$vz, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFNMSBrvvl_v i64:$sy, v256f64:$vz, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmsb_vvsvl v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl), (PVFNMSBvrvl v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfnmsb_vvsvvl v256f64:$vy, i64:$sy, v256f64:$vw, v256f64:$pt, i32:$vl), (PVFNMSBvrvl_v v256f64:$vy, i64:$sy, v256f64:$vw, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmsb_vvvvMvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFNMSBvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmsb_vsvvMvl i64:$sy, v256f64:$vz, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFNMSBrvvml_v i64:$sy, v256f64:$vz, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfnmsb_vvsvMvl v256f64:$vy, i64:$sy, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFNMSBvrvml_v v256f64:$vy, i64:$sy, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrcpd_vvl v256f64:$vy, i32:$vl), (VRCPDvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrcpd_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRCPDvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrcps_vvl v256f64:$vy, i32:$vl), (VRCPSvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrcps_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRCPSvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvrcp_vvl v256f64:$vy, i32:$vl), (PVRCPvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvrcp_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (PVRCPvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrsqrtd_vvl v256f64:$vy, i32:$vl), (VRSQRTDvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrsqrtd_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRSQRTDvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrsqrts_vvl v256f64:$vy, i32:$vl), (VRSQRTSvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrsqrts_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRSQRTSvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvrsqrt_vvl v256f64:$vy, i32:$vl), (PVRSQRTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvrsqrt_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (PVRSQRTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrsqrtdnex_vvl v256f64:$vy, i32:$vl), (VRSQRTDNEXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrsqrtdnex_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRSQRTDNEXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrsqrtsnex_vvl v256f64:$vy, i32:$vl), (VRSQRTSNEXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrsqrtsnex_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRSQRTSNEXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvrsqrtnex_vvl v256f64:$vy, i32:$vl), (PVRSQRTNEXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvrsqrtnex_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (PVRSQRTNEXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwdsx_vvl v256f64:$vy, i32:$vl), (VCVTWDSXvl RD_NONE, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtwdsx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWDSXvl_v RD_NONE, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwdsx_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWDSXvml_v RD_NONE, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwdsxrz_vvl v256f64:$vy, i32:$vl), (VCVTWDSXvl RD_RZ, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtwdsxrz_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWDSXvl_v RD_RZ, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwdsxrz_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWDSXvml_v RD_RZ, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwdzx_vvl v256f64:$vy, i32:$vl), (VCVTWDZXvl RD_NONE, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtwdzx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWDZXvl_v RD_NONE, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwdzx_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWDZXvml_v RD_NONE, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwdzxrz_vvl v256f64:$vy, i32:$vl), (VCVTWDZXvl RD_RZ, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtwdzxrz_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWDZXvl_v RD_RZ, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwdzxrz_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWDZXvml_v RD_RZ, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwssx_vvl v256f64:$vy, i32:$vl), (VCVTWSSXvl RD_NONE, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtwssx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWSSXvl_v RD_NONE, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwssx_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWSSXvml_v RD_NONE, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwssxrz_vvl v256f64:$vy, i32:$vl), (VCVTWSSXvl RD_RZ, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtwssxrz_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWSSXvl_v RD_RZ, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwssxrz_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWSSXvml_v RD_RZ, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwszx_vvl v256f64:$vy, i32:$vl), (VCVTWSZXvl RD_NONE, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtwszx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWSZXvl_v RD_NONE, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwszx_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWSZXvml_v RD_NONE, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwszxrz_vvl v256f64:$vy, i32:$vl), (VCVTWSZXvl RD_RZ, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtwszxrz_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWSZXvl_v RD_RZ, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwszxrz_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWSZXvml_v RD_RZ, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcvtws_vvl v256f64:$vy, i32:$vl), (PVCVTWSvl RD_NONE, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvcvtws_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (PVCVTWSvl_v RD_NONE, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcvtws_vvMvl v256f64:$vy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVCVTWSvml_v RD_NONE, v256f64:$vy, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcvtwsrz_vvl v256f64:$vy, i32:$vl), (PVCVTWSvl RD_RZ, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvcvtwsrz_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (PVCVTWSvl_v RD_RZ, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcvtwsrz_vvMvl v256f64:$vy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVCVTWSvml_v RD_RZ, v256f64:$vy, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtld_vvl v256f64:$vy, i32:$vl), (VCVTLDvl RD_NONE, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtld_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTLDvl_v RD_NONE, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtld_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTLDvml_v RD_NONE, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtldrz_vvl v256f64:$vy, i32:$vl), (VCVTLDvl RD_RZ, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtldrz_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTLDvl_v RD_RZ, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtldrz_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTLDvml_v RD_RZ, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtdw_vvl v256f64:$vy, i32:$vl), (VCVTDWvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtdw_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTDWvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtsw_vvl v256f64:$vy, i32:$vl), (VCVTSWvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtsw_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTSWvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcvtsw_vvl v256f64:$vy, i32:$vl), (PVCVTSWvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvcvtsw_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (PVCVTSWvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtdl_vvl v256f64:$vy, i32:$vl), (VCVTDLvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtdl_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTDLvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtds_vvl v256f64:$vy, i32:$vl), (VCVTDSvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtds_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTDSvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtsd_vvl v256f64:$vy, i32:$vl), (VCVTSDvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtsd_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTSDvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmrg_vvvml v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl), (VMRGvvml v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vmrg_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMRGvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmrg_vsvml i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl), (VMRGrvml i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vmrg_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMRGrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmrg_vsvml simm7:$I, v256f64:$vz, v256i1:$vm, i32:$vl), (VMRGivml (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vmrg_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMRGivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmrgw_vvvMl v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl), (VMRGWvvml v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vmrgw_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (VMRGWvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vshf_vvvsl v256f64:$vy, v256f64:$vz, i64:$sy, i32:$vl), (VSHFvvrl v256f64:$vy, v256f64:$vz, i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vshf_vvvsvl v256f64:$vy, v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (VSHFvvrl_v v256f64:$vy, v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vshf_vvvsl v256f64:$vy, v256f64:$vz, uimm6:$N, i32:$vl), (VSHFvvil v256f64:$vy, v256f64:$vz, (ULO7 $N), i32:$vl)>;
+def : Pat<(int_ve_vl_vshf_vvvsvl v256f64:$vy, v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSHFvvil_v v256f64:$vy, v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcp_vvmvl v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VCPvml_v v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vex_vvmvl v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VEXvml_v v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmklat_ml i32:$vl), (VFMKLal i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklaf_ml i32:$vl), (VFMKLnal i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkat_Ml i32:$vl), (VFMKyal i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkaf_Ml i32:$vl), (VFMKynal i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklgt_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_IG, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklgt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_IG, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkllt_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_IL, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkllt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_IL, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklne_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_INE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklne_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_INE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkleq_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_IEQ, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkleq_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_IEQ, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklge_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_IGE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklge_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_IGE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklle_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_ILE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklle_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_ILE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklnum_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_NUM, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklnum_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_NUM, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklnan_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_NAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_NAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklgtnan_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_GNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklgtnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_GNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklltnan_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_LNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklltnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_LNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklnenan_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_NENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklnenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_NENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkleqnan_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_EQNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkleqnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_EQNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklgenan_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_GENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmklgenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_GENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkllenan_mvl v256f64:$vz, i32:$vl), (VFMKLvl CC_LENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkllenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKLvml CC_LENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwgt_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_IG, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwgt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_IG, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwlt_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_IL, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwlt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_IL, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwne_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_INE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwne_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_INE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkweq_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_IEQ, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkweq_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_IEQ, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwge_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_IGE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwge_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_IGE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwle_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_ILE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwle_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_ILE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwnum_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_NUM, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwnum_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_NUM, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwnan_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_NAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_NAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwgtnan_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_GNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwgtnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_GNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwltnan_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_LNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwltnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_LNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwnenan_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_NENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwnenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_NENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkweqnan_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_EQNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkweqnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_EQNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwgenan_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_GENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwgenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_GENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwlenan_mvl v256f64:$vz, i32:$vl), (VFMKWvl CC_LENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkwlenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKWvml CC_LENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlogt_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_IG, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupgt_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_IG, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlogt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_IG, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupgt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_IG, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlolt_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_IL, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwuplt_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_IL, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlolt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_IL, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwuplt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_IL, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlone_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_INE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupne_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_INE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlone_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_INE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupne_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_INE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwloeq_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_IEQ, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupeq_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_IEQ, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwloeq_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_IEQ, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupeq_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_IEQ, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwloge_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_IGE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupge_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_IGE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwloge_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_IGE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupge_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_IGE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlole_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_ILE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwuple_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_ILE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlole_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_ILE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwuple_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_ILE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlonum_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_NUM, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupnum_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_NUM, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlonum_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_NUM, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupnum_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_NUM, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlonan_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_NAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupnan_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_NAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlonan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_NAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_NAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlogtnan_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_GNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupgtnan_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_GNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlogtnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_GNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupgtnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_GNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwloltnan_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_LNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupltnan_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_LNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwloltnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_LNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupltnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_LNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlonenan_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_NENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupnenan_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_NENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlonenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_NENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupnenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_NENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwloeqnan_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_EQNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupeqnan_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_EQNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwloeqnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_EQNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupeqnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_EQNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlogenan_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_GENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupgenan_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_GENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlogenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_GENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwupgenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_GENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlolenan_mvl v256f64:$vz, i32:$vl), (PVFMKWLOvl CC_LENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwuplenan_mvl v256f64:$vz, i32:$vl), (PVFMKWUPvl CC_LENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlolenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWLOvml CC_LENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwuplenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKWUPvml CC_LENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwgt_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_IG, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwgt_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_IG, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlt_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_IL, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlt_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_IL, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwne_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_INE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwne_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_INE, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkweq_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_IEQ, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkweq_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_IEQ, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwge_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_IGE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwge_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_IGE, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwle_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_ILE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwle_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_ILE, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwnum_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_NUM, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwnum_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_NUM, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwnan_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_NAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwnan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_NAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwgtnan_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_GNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwgtnan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_GNAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwltnan_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_LNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwltnan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_LNAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwnenan_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_NENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwnenan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_NENAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkweqnan_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_EQNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkweqnan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_EQNAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwgenan_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_GENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwgenan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_GENAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlenan_Mvl v256f64:$vz, i32:$vl), (VFMKWyvl CC_LENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkwlenan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKWyvyl CC_LENAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdgt_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_G, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdgt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_G, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdlt_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_L, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdlt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_L, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdne_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_NE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdne_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_NE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdeq_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_EQ, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdeq_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_EQ, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdge_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_GE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdge_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_GE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdle_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_LE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdle_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_LE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdnum_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_NUM, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdnum_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_NUM, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdnan_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_NAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_NAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdgtnan_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_GNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdgtnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_GNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdltnan_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_LNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdltnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_LNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdnenan_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_NENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdnenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_NENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdeqnan_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_EQNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdeqnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_EQNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdgenan_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_GENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdgenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_GENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdlenan_mvl v256f64:$vz, i32:$vl), (VFMKDvl CC_LENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkdlenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKDvml CC_LENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksgt_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_G, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksgt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_G, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkslt_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_L, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkslt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_L, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksne_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_NE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksne_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_NE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkseq_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_EQ, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkseq_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_EQ, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksge_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_GE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksge_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_GE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksle_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_LE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksle_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_LE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksnum_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_NUM, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksnum_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_NUM, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksnan_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_NAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_NAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksgtnan_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_GNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksgtnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_GNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksltnan_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_LNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksltnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_LNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksnenan_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_NENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksnenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_NENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkseqnan_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_EQNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkseqnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_EQNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksgenan_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_GENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmksgenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_GENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkslenan_mvl v256f64:$vz, i32:$vl), (VFMKSvl CC_LENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmkslenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (VFMKSvml CC_LENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslogt_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_G, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupgt_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_G, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslogt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_G, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupgt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_G, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslolt_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_L, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksuplt_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_L, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslolt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_L, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksuplt_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_L, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslone_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_NE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupne_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_NE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslone_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_NE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupne_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_NE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksloeq_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_EQ, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupeq_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_EQ, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksloeq_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_EQ, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupeq_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_EQ, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksloge_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_GE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupge_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_GE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksloge_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_GE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupge_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_GE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslole_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_LE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksuple_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_LE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslole_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_LE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksuple_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_LE, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslonum_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_NUM, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupnum_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_NUM, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslonum_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_NUM, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupnum_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_NUM, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslonan_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_NAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupnan_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_NAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslonan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_NAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_NAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslogtnan_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_GNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupgtnan_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_GNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslogtnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_GNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupgtnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_GNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksloltnan_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_LNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupltnan_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_LNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksloltnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_LNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupltnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_LNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslonenan_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_NENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupnenan_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_NENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslonenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_NENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupnenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_NENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksloeqnan_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_EQNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupeqnan_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_EQNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksloeqnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_EQNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupeqnan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_EQNAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslogenan_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_GENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupgenan_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_GENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslogenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_GENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksupgenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_GENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslolenan_mvl v256f64:$vz, i32:$vl), (PVFMKSLOvl CC_LENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksuplenan_mvl v256f64:$vz, i32:$vl), (PVFMKSUPvl CC_LENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslolenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSLOvml CC_LENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksuplenan_mvml v256f64:$vz, v256i1:$vm, i32:$vl), (PVFMKSUPvml CC_LENAN, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksgt_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_G, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksgt_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_G, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslt_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_L, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslt_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_L, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksne_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_NE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksne_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_NE, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkseq_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_EQ, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkseq_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_EQ, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksge_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_GE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksge_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_GE, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksle_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_LE, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksle_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_LE, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksnum_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_NUM, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksnum_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_NUM, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksnan_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_NAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksnan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_NAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksgtnan_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_GNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksgtnan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_GNAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksltnan_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_LNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksltnan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_LNAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksnenan_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_NENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksnenan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_NENAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkseqnan_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_EQNAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkseqnan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_EQNAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksgenan_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_GENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmksgenan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_GENAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslenan_Mvl v256f64:$vz, i32:$vl), (VFMKSyvl CC_LENAN, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmkslenan_MvMl v256f64:$vz, v512i1:$vm, i32:$vl), (VFMKSyvyl CC_LENAN, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsumwsx_vvl v256f64:$vy, i32:$vl), (VSUMWSXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsumwsx_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VSUMWSXvml v256f64:$vy, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsumwzx_vvl v256f64:$vy, i32:$vl), (VSUMWZXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsumwzx_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VSUMWZXvml v256f64:$vy, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsuml_vvl v256f64:$vy, i32:$vl), (VSUMLvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vsuml_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VSUMLvml v256f64:$vy, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsumd_vvl v256f64:$vy, i32:$vl), (VFSUMDvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsumd_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VFSUMDvml v256f64:$vy, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsums_vvl v256f64:$vy, i32:$vl), (VFSUMSvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsums_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VFSUMSvml v256f64:$vy, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vrmaxswfstsx_vvl v256f64:$vy, i32:$vl), (VRMAXSWFSTSXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrmaxswfstsx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMAXSWFSTSXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrmaxswlstsx_vvl v256f64:$vy, i32:$vl), (VRMAXSWLSTSXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrmaxswlstsx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMAXSWLSTSXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrmaxswfstzx_vvl v256f64:$vy, i32:$vl), (VRMAXSWFSTZXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrmaxswfstzx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMAXSWFSTZXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrmaxswlstzx_vvl v256f64:$vy, i32:$vl), (VRMAXSWLSTZXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrmaxswlstzx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMAXSWLSTZXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrminswfstsx_vvl v256f64:$vy, i32:$vl), (VRMINSWFSTSXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrminswfstsx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMINSWFSTSXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrminswlstsx_vvl v256f64:$vy, i32:$vl), (VRMINSWLSTSXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrminswlstsx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMINSWLSTSXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrminswfstzx_vvl v256f64:$vy, i32:$vl), (VRMINSWFSTZXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrminswfstzx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMINSWFSTZXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrminswlstzx_vvl v256f64:$vy, i32:$vl), (VRMINSWLSTZXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrminswlstzx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMINSWLSTZXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrmaxslfst_vvl v256f64:$vy, i32:$vl), (VRMAXSLFSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrmaxslfst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMAXSLFSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrmaxsllst_vvl v256f64:$vy, i32:$vl), (VRMAXSLLSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrmaxsllst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMAXSLLSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrminslfst_vvl v256f64:$vy, i32:$vl), (VRMINSLFSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrminslfst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMINSLFSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrminsllst_vvl v256f64:$vy, i32:$vl), (VRMINSLLSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrminsllst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRMINSLLSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfrmaxdfst_vvl v256f64:$vy, i32:$vl), (VFRMAXDFSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfrmaxdfst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFRMAXDFSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfrmaxdlst_vvl v256f64:$vy, i32:$vl), (VFRMAXDLSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfrmaxdlst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFRMAXDLSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfrmaxsfst_vvl v256f64:$vy, i32:$vl), (VFRMAXSFSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfrmaxsfst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFRMAXSFSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfrmaxslst_vvl v256f64:$vy, i32:$vl), (VFRMAXSLSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfrmaxslst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFRMAXSLSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfrmindfst_vvl v256f64:$vy, i32:$vl), (VFRMINDFSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfrmindfst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFRMINDFSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfrmindlst_vvl v256f64:$vy, i32:$vl), (VFRMINDLSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfrmindlst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFRMINDLSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfrminsfst_vvl v256f64:$vy, i32:$vl), (VFRMINSFSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfrminsfst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFRMINSFSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfrminslst_vvl v256f64:$vy, i32:$vl), (VFRMINSLSTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfrminslst_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFRMINSLSTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrand_vvl v256f64:$vy, i32:$vl), (VRANDvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrand_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VRANDvml v256f64:$vy, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vror_vvl v256f64:$vy, i32:$vl), (VRORvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vror_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VRORvml v256f64:$vy, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vrxor_vvl v256f64:$vy, i32:$vl), (VRXORvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrxor_vvml v256f64:$vy, v256i1:$vm, i32:$vl), (VRXORvml v256f64:$vy, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgt_vvssl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VGTvrrl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgt_vvssvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VGTvrrl_v v256f64:$vy, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgt_vvssl v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VGTvrzl v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgt_vvssvl v256f64:$vy, i64:$sy, zero:$Z, v256f64:$pt, i32:$vl), (VGTvrzl_v v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgt_vvssl v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VGTvirl v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgt_vvssvl v256f64:$vy, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VGTvirl_v v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgt_vvssl v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VGTvizl v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgt_vvssvl v256f64:$vy, simm7:$I, zero:$Z, v256f64:$pt, i32:$vl), (VGTvizl_v v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgt_vvssml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VGTvrrml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgt_vvssmvl v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTvrrml_v v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgt_vvssml v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VGTvrzml v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgt_vvssmvl v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTvrzml_v v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgt_vvssml v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VGTvirml v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgt_vvssmvl v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTvirml_v v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgt_vvssml v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VGTvizml v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgt_vvssmvl v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTvizml_v v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtnc_vvssl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VGTNCvrrl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtnc_vvssvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VGTNCvrrl_v v256f64:$vy, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtnc_vvssl v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VGTNCvrzl v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtnc_vvssvl v256f64:$vy, i64:$sy, zero:$Z, v256f64:$pt, i32:$vl), (VGTNCvrzl_v v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtnc_vvssl v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VGTNCvirl v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtnc_vvssvl v256f64:$vy, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VGTNCvirl_v v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtnc_vvssl v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VGTNCvizl v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtnc_vvssvl v256f64:$vy, simm7:$I, zero:$Z, v256f64:$pt, i32:$vl), (VGTNCvizl_v v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtnc_vvssml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VGTNCvrrml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtnc_vvssmvl v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTNCvrrml_v v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtnc_vvssml v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VGTNCvrzml v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtnc_vvssmvl v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTNCvrzml_v v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtnc_vvssml v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VGTNCvirml v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtnc_vvssmvl v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTNCvirml_v v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtnc_vvssml v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VGTNCvizml v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtnc_vvssmvl v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTNCvizml_v v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtu_vvssl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VGTUvrrl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtu_vvssvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VGTUvrrl_v v256f64:$vy, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtu_vvssl v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VGTUvrzl v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtu_vvssvl v256f64:$vy, i64:$sy, zero:$Z, v256f64:$pt, i32:$vl), (VGTUvrzl_v v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtu_vvssl v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VGTUvirl v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtu_vvssvl v256f64:$vy, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VGTUvirl_v v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtu_vvssl v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VGTUvizl v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtu_vvssvl v256f64:$vy, simm7:$I, zero:$Z, v256f64:$pt, i32:$vl), (VGTUvizl_v v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtu_vvssml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VGTUvrrml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtu_vvssmvl v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTUvrrml_v v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtu_vvssml v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VGTUvrzml v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtu_vvssmvl v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTUvrzml_v v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtu_vvssml v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VGTUvirml v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtu_vvssmvl v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTUvirml_v v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtu_vvssml v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VGTUvizml v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtu_vvssmvl v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTUvizml_v v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtunc_vvssl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VGTUNCvrrl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtunc_vvssvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VGTUNCvrrl_v v256f64:$vy, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtunc_vvssl v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VGTUNCvrzl v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtunc_vvssvl v256f64:$vy, i64:$sy, zero:$Z, v256f64:$pt, i32:$vl), (VGTUNCvrzl_v v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtunc_vvssl v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VGTUNCvirl v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtunc_vvssvl v256f64:$vy, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VGTUNCvirl_v v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtunc_vvssl v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VGTUNCvizl v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtunc_vvssvl v256f64:$vy, simm7:$I, zero:$Z, v256f64:$pt, i32:$vl), (VGTUNCvizl_v v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtunc_vvssml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VGTUNCvrrml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtunc_vvssmvl v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTUNCvrrml_v v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtunc_vvssml v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VGTUNCvrzml v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtunc_vvssmvl v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTUNCvrzml_v v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtunc_vvssml v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VGTUNCvirml v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtunc_vvssmvl v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTUNCvirml_v v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtunc_vvssml v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VGTUNCvizml v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtunc_vvssmvl v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTUNCvizml_v v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VGTLSXvrrl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VGTLSXvrrl_v v256f64:$vy, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssl v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VGTLSXvrzl v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssvl v256f64:$vy, i64:$sy, zero:$Z, v256f64:$pt, i32:$vl), (VGTLSXvrzl_v v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssl v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VGTLSXvirl v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssvl v256f64:$vy, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VGTLSXvirl_v v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssl v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VGTLSXvizl v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssvl v256f64:$vy, simm7:$I, zero:$Z, v256f64:$pt, i32:$vl), (VGTLSXvizl_v v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VGTLSXvrrml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssmvl v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLSXvrrml_v v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssml v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VGTLSXvrzml v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssmvl v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLSXvrzml_v v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssml v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VGTLSXvirml v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssmvl v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLSXvirml_v v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssml v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VGTLSXvizml v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsx_vvssmvl v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLSXvizml_v v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VGTLSXNCvrrl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VGTLSXNCvrrl_v v256f64:$vy, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssl v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VGTLSXNCvrzl v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssvl v256f64:$vy, i64:$sy, zero:$Z, v256f64:$pt, i32:$vl), (VGTLSXNCvrzl_v v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssl v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VGTLSXNCvirl v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssvl v256f64:$vy, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VGTLSXNCvirl_v v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssl v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VGTLSXNCvizl v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssvl v256f64:$vy, simm7:$I, zero:$Z, v256f64:$pt, i32:$vl), (VGTLSXNCvizl_v v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VGTLSXNCvrrml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssmvl v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLSXNCvrrml_v v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssml v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VGTLSXNCvrzml v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssmvl v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLSXNCvrzml_v v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssml v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VGTLSXNCvirml v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssmvl v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLSXNCvirml_v v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssml v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VGTLSXNCvizml v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlsxnc_vvssmvl v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLSXNCvizml_v v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VGTLZXvrrl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VGTLZXvrrl_v v256f64:$vy, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssl v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VGTLZXvrzl v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssvl v256f64:$vy, i64:$sy, zero:$Z, v256f64:$pt, i32:$vl), (VGTLZXvrzl_v v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssl v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VGTLZXvirl v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssvl v256f64:$vy, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VGTLZXvirl_v v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssl v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VGTLZXvizl v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssvl v256f64:$vy, simm7:$I, zero:$Z, v256f64:$pt, i32:$vl), (VGTLZXvizl_v v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VGTLZXvrrml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssmvl v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLZXvrrml_v v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssml v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VGTLZXvrzml v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssmvl v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLZXvrzml_v v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssml v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VGTLZXvirml v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssmvl v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLZXvirml_v v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssml v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VGTLZXvizml v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzx_vvssmvl v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLZXvizml_v v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VGTLZXNCvrrl v256f64:$vy, i64:$sy, i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VGTLZXNCvrrl_v v256f64:$vy, i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssl v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VGTLZXNCvrzl v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssvl v256f64:$vy, i64:$sy, zero:$Z, v256f64:$pt, i32:$vl), (VGTLZXNCvrzl_v v256f64:$vy, i64:$sy, (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssl v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VGTLZXNCvirl v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssvl v256f64:$vy, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VGTLZXNCvirl_v v256f64:$vy, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssl v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VGTLZXNCvizl v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssvl v256f64:$vy, simm7:$I, zero:$Z, v256f64:$pt, i32:$vl), (VGTLZXNCvizl_v v256f64:$vy, (LO7 $I), (LO7 $Z), i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VGTLZXNCvrrml v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssmvl v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLZXNCvrrml_v v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssml v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VGTLZXNCvrzml v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssmvl v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLZXNCvrzml_v v256f64:$vy, i64:$sy, (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssml v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VGTLZXNCvirml v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssmvl v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLZXNCvirml_v v256f64:$vy, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssml v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VGTLZXNCvizml v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vgtlzxnc_vvssmvl v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, v256f64:$pt, i32:$vl), (VGTLZXNCvizml_v v256f64:$vy, (LO7 $I), (LO7 $Z), v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vsc_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsc_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsc_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsc_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsc_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsc_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsc_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsc_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscnc_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCNCvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscnc_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCNCvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscnc_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCNCvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscnc_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCNCvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscnc_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCNCvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscnc_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCNCvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscnc_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCNCvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscnc_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCNCvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCOTvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCOTvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCOTvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCOTvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCOTvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCOTvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCOTvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCOTvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscncot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCNCOTvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscncot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCNCOTvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscncot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCNCOTvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscncot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCNCOTvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscncot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCNCOTvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscncot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCNCOTvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscncot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCNCOTvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscncot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCNCOTvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscu_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCUvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscu_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCUvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscu_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCUvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscu_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCUvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscu_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCUvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscu_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCUvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscu_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCUvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscu_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCUvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscunc_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCUNCvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscunc_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCUNCvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscunc_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCUNCvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscunc_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCUNCvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscunc_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCUNCvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscunc_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCUNCvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscunc_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCUNCvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscunc_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCUNCvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCUOTvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCUOTvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCUOTvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCUOTvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCUOTvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCUOTvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCUOTvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCUOTvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuncot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCUNCOTvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuncot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCUNCOTvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuncot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCUNCOTvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuncot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCUNCOTvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuncot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCUNCOTvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuncot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCUNCOTvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuncot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCUNCOTvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscuncot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCUNCOTvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscl_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCLvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscl_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCLvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscl_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCLvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscl_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCLvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vscl_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCLvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscl_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCLvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscl_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCLvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vscl_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCLvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclnc_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCLNCvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclnc_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCLNCvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclnc_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCLNCvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclnc_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCLNCvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclnc_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCLNCvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclnc_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCLNCvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclnc_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCLNCvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclnc_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCLNCvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCLOTvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCLOTvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCLOTvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCLOTvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCLOTvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCLOTvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCLOTvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCLOTvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclncot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, i32:$vl), (VSCLNCOTvrrvl v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclncot_vvssl v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, i32:$vl), (VSCLNCOTvrzvl v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclncot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, i32:$vl), (VSCLNCOTvirvl v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclncot_vvssl v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, i32:$vl), (VSCLNCOTvizvl v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclncot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSCLNCOTvrrvml v256f64:$vy, i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclncot_vvssml v256f64:$vx, v256f64:$vy, i64:$sy, zero:$Z, v256i1:$vm, i32:$vl), (VSCLNCOTvrzvml v256f64:$vy, i64:$sy, (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclncot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSCLNCOTvirvml v256f64:$vy, (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vsclncot_vvssml v256f64:$vx, v256f64:$vy, simm7:$I, zero:$Z, v256i1:$vm, i32:$vl), (VSCLNCOTvizvml v256f64:$vy, (LO7 $I), (LO7 $Z), v256f64:$vx, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_andm_mmm v256i1:$vmy, v256i1:$vmz), (ANDMmm v256i1:$vmy, v256i1:$vmz)>;
+def : Pat<(int_ve_vl_andm_MMM v512i1:$vmy, v512i1:$vmz), (ANDMyy v512i1:$vmy, v512i1:$vmz)>;
+def : Pat<(int_ve_vl_orm_mmm v256i1:$vmy, v256i1:$vmz), (ORMmm v256i1:$vmy, v256i1:$vmz)>;
+def : Pat<(int_ve_vl_orm_MMM v512i1:$vmy, v512i1:$vmz), (ORMyy v512i1:$vmy, v512i1:$vmz)>;
+def : Pat<(int_ve_vl_xorm_mmm v256i1:$vmy, v256i1:$vmz), (XORMmm v256i1:$vmy, v256i1:$vmz)>;
+def : Pat<(int_ve_vl_xorm_MMM v512i1:$vmy, v512i1:$vmz), (XORMyy v512i1:$vmy, v512i1:$vmz)>;
+def : Pat<(int_ve_vl_eqvm_mmm v256i1:$vmy, v256i1:$vmz), (EQVMmm v256i1:$vmy, v256i1:$vmz)>;
+def : Pat<(int_ve_vl_eqvm_MMM v512i1:$vmy, v512i1:$vmz), (EQVMyy v512i1:$vmy, v512i1:$vmz)>;
+def : Pat<(int_ve_vl_nndm_mmm v256i1:$vmy, v256i1:$vmz), (NNDMmm v256i1:$vmy, v256i1:$vmz)>;
+def : Pat<(int_ve_vl_nndm_MMM v512i1:$vmy, v512i1:$vmz), (NNDMyy v512i1:$vmy, v512i1:$vmz)>;
+def : Pat<(int_ve_vl_negm_mm v256i1:$vmy), (NEGMm v256i1:$vmy)>;
+def : Pat<(int_ve_vl_negm_MM v512i1:$vmy), (NEGMy v512i1:$vmy)>;
+def : Pat<(int_ve_vl_pcvm_sml v256i1:$vmy, i32:$vl), (PCVMml v256i1:$vmy, i32:$vl)>;
+def : Pat<(int_ve_vl_lzvm_sml v256i1:$vmy, i32:$vl), (LZVMml v256i1:$vmy, i32:$vl)>;
+def : Pat<(int_ve_vl_tovm_sml v256i1:$vmy, i32:$vl), (TOVMml v256i1:$vmy, i32:$vl)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrIntrinsicVL.td b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrIntrinsicVL.td
new file mode 100644
index 000000000000..69ea133ceed0
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrIntrinsicVL.td
@@ -0,0 +1,64 @@
+/// Pattern Matchings for VEL intrinsic instructions.
+
+/// Intrinsic patterns written by hand.
+
+// SVOB pattern.
+def : Pat<(int_ve_vl_svob), (SVOB)>;
+
+// Pack patterns.
+def : Pat<(i64 (int_ve_vl_pack_f32p ADDRrii:$addr0, ADDRrii:$addr1)),
+          (ORrr (f2l (LDUrii MEMrii:$addr0)),
+                (i2l (LDLZXrii MEMrii:$addr1)))>;
+
+def : Pat<(i64 (int_ve_vl_pack_f32a ADDRrii:$addr)),
+          (MULULrr
+            (i2l (LDLZXrii MEMrii:$addr)),
+            (LEASLrii (ANDrm (LEAzii 0, 0, (LO32 (i64 0x0000000100000001))),
+                             !add(32, 64)), 0,
+                      (HI32 (i64 0x0000000100000001))))>;
+
+// The extract/insert patterns.
+def : Pat<(v256i1 (int_ve_vl_extract_vm512u v512i1:$vm)),
+          (EXTRACT_SUBREG v512i1:$vm, sub_vm_even)>;
+
+def : Pat<(v256i1 (int_ve_vl_extract_vm512l v512i1:$vm)),
+          (EXTRACT_SUBREG v512i1:$vm, sub_vm_odd)>;
+
+def : Pat<(v512i1 (int_ve_vl_insert_vm512u v512i1:$vmx, v256i1:$vmy)),
+          (INSERT_SUBREG v512i1:$vmx, v256i1:$vmy, sub_vm_even)>;
+
+def : Pat<(v512i1 (int_ve_vl_insert_vm512l v512i1:$vmx, v256i1:$vmy)),
+          (INSERT_SUBREG v512i1:$vmx, v256i1:$vmy, sub_vm_odd)>;
+
+// VMRG patterns.
+def : Pat<(int_ve_vl_vmrgw_vsvMl i32:$sy, v256f64:$vz, v512i1:$vm, i32:$vl),
+          (VMRGWrvml (i2l i32:$sy), v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vmrgw_vsvMvl i32:$sy, v256f64:$vz, v512i1:$vm,
+                                  v256f64:$pt, i32:$vl),
+          (VMRGWrvml_v (i2l i32:$sy), v256f64:$vz, v512i1:$vm, i32:$vl,
+                       v256f64:$pt)>;
+
+// VMV patterns.
+def : Pat<(int_ve_vl_vmv_vsvl i32:$sy, v256f64:$vz, i32:$vl),
+          (VMVrvl (i2l i32:$sy), v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vmv_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl),
+          (VMVrvl_v (i2l i32:$sy), v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmv_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt,
+                                i32:$vl),
+          (VMVrvml_v (i2l i32:$sy), v256f64:$vz, v256i1:$vm, i32:$vl,
+                     v256f64:$pt)>;
+
+// LSV patterns.
+def : Pat<(int_ve_vl_lsv_vvss v256f64:$pt, i32:$sy, i64:$sz),
+          (LSVrr_v (i2l i32:$sy), i64:$sz, v256f64:$pt)>;
+
+// LVS patterns.
+def : Pat<(int_ve_vl_lvsl_svs v256f64:$vx, i32:$sy),
+          (LVSvr v256f64:$vx, (i2l i32:$sy))>;
+def : Pat<(int_ve_vl_lvsd_svs v256f64:$vx, i32:$sy),
+          (LVSvr v256f64:$vx, (i2l i32:$sy))>;
+def : Pat<(int_ve_vl_lvss_svs v256f64:$vx, i32:$sy),
+          (l2f (LVSvr v256f64:$vx, (i2l i32:$sy)))>;
+
+/// Intrinsic patterns automatically generated.
+include "VEInstrIntrinsicVL.gen.td"
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrPatternsVec.td b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrPatternsVec.td
new file mode 100644
index 000000000000..0084876f9f1b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrPatternsVec.td
@@ -0,0 +1,91 @@
+//===-- VEInstrPatternsVec.td - VEC_-type SDNodes and isel for VE Target --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the VEC_* prefixed intermediate SDNodes and their
+// isel patterns.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+multiclass vbrd_elem32<ValueType v32, ValueType s32, SDPatternOperator ImmOp,
+                       SDNodeXForm ImmCast, SDNodeXForm SuperRegCast> {
+  // VBRDil
+  def : Pat<(v32 (vec_broadcast (s32 ImmOp:$sy), i32:$vl)),
+            (VBRDil (ImmCast $sy), i32:$vl)>;
+
+  // VBRDrl
+  def : Pat<(v32 (vec_broadcast s32:$sy, i32:$vl)),
+            (VBRDrl (SuperRegCast $sy), i32:$vl)>;
+}
+
+multiclass vbrd_elem64<ValueType v64, ValueType s64,
+                       SDPatternOperator ImmOp, SDNodeXForm ImmCast> {
+  // VBRDil
+  def : Pat<(v64 (vec_broadcast (s64 ImmOp:$sy), i32:$vl)),
+            (VBRDil (ImmCast $sy), i32:$vl)>;
+
+  // VBRDrl
+  def : Pat<(v64 (vec_broadcast s64:$sy, i32:$vl)),
+            (VBRDrl s64:$sy, i32:$vl)>;
+}
+
+multiclass extract_insert_elem32<ValueType v32, ValueType s32,
+                                 SDNodeXForm SubRegCast,
+                                 SDNodeXForm SuperRegCast> {
+  // LVSvi
+  def: Pat<(s32 (extractelt v32:$vec, uimm7:$idx)),
+           (SubRegCast (LVSvi v32:$vec, (ULO7 $idx)))>;
+  // LVSvr
+  def: Pat<(s32 (extractelt v32:$vec, i64:$idx)),
+           (SubRegCast (LVSvr v32:$vec, $idx))>;
+
+  // LSVir
+  def: Pat<(v32 (insertelt v32:$vec, s32:$val, uimm7:$idx)),
+           (LSVir_v (ULO7 $idx), (SuperRegCast $val), $vec)>;
+  // LSVrr
+  def: Pat<(v32 (insertelt v32:$vec, s32:$val, i64:$idx)),
+           (LSVrr_v $idx, (SuperRegCast $val), $vec)>;
+}
+
+multiclass extract_insert_elem64<ValueType v64, ValueType s64> {
+  // LVSvi
+  def: Pat<(s64 (extractelt v64:$vec, uimm7:$idx)),
+           (LVSvi v64:$vec, (ULO7 $idx))>;
+  // LVSvr
+  def: Pat<(s64 (extractelt v64:$vec, i64:$idx)),
+           (LVSvr v64:$vec, $idx)>;
+
+  // LSVir
+  def: Pat<(v64 (insertelt v64:$vec, s64:$val, uimm7:$idx)),
+           (LSVir_v (ULO7 $idx), $val, $vec)>;
+  // LSVrr
+  def: Pat<(v64 (insertelt v64:$vec, s64:$val, i64:$idx)),
+           (LSVrr_v $idx, $val, $vec)>;
+}
+
+multiclass patterns_elem32<ValueType v32, ValueType s32,
+                           SDPatternOperator ImmOp, SDNodeXForm ImmCast,
+                           SDNodeXForm SubRegCast, SDNodeXForm SuperRegCast> {
+  defm : vbrd_elem32<v32, s32, ImmOp, ImmCast, SuperRegCast>;
+  defm : extract_insert_elem32<v32, s32, SubRegCast, SuperRegCast>;
+}
+
+multiclass patterns_elem64<ValueType v64, ValueType s64,
+                           SDPatternOperator ImmOp, SDNodeXForm ImmCast> {
+  defm : vbrd_elem64<v64, s64, ImmOp, ImmCast>;
+  defm : extract_insert_elem64<v64, s64>;
+}
+
+defm : patterns_elem32<v256i32, i32, simm7, LO7, l2i, i2l>;
+defm : patterns_elem32<v256f32, f32, simm7fp, LO7FP, l2f, f2l>;
+
+defm : patterns_elem64<v256i64, i64, simm7, LO7>;
+defm : patterns_elem64<v256f64, f64, simm7fp, LO7FP>;
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrVec.td b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrVec.td
new file mode 100644
index 000000000000..4a8476f7288a
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrVec.td
@@ -0,0 +1,1510 @@
+//===----------------------------------------------------------------------===//
+// Vector Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions for VM512 modifications
+//===----------------------------------------------------------------------===//
+
+// LVM/SVM instructions using VM512
+let hasSideEffects = 0, isCodeGenOnly = 1 in {
+  let Constraints = "$vx = $vd", DisableEncoding = "$vd" in {
+    def LVMyir_y : Pseudo<(outs VM512:$vx), (ins uimm3:$sy, I64:$sz, VM512:$vd),
+                          "# pseudo LVM $vx, $sy, $sz, $vd">;
+    def LVMyim_y : Pseudo<(outs VM512:$vx),
+                          (ins uimm3:$sy, mimm:$sz, VM512:$vd),
+                          "# pseudo LVM $vx, $sy, $sz, $vd">;
+  }
+  def LVMyir : Pseudo<(outs VM512:$vx), (ins uimm3:$sy, I64:$sz),
+                      "# pseudo LVM $vx, $sy, $sz">;
+  def LVMyim : Pseudo<(outs VM512:$vx), (ins uimm3:$sy, mimm:$sz),
+                      "# pseudo LVM $vx, $sy, $sz">;
+  def SVMyi : Pseudo<(outs I64:$sx), (ins VM512:$vz, uimm3:$sy),
+                     "# pseudo SVM $sx, $vz, $sy">;
+}
+
+// VFMK/VFMKW/VFMKS instructions using VM512
+let hasSideEffects = 0, isCodeGenOnly = 1, DisableEncoding = "$vl" in {
+  def VFMKyal : Pseudo<(outs VM512:$vmx), (ins I32:$vl),
+                       "# pseudo-vfmk.at $vmx">;
+  def VFMKynal : Pseudo<(outs VM512:$vmx), (ins I32:$vl),
+                        "# pseudo-vfmk.af $vmx">;
+  def VFMKWyvl  : Pseudo<(outs VM512:$vmx),
+                         (ins CCOp:$cf, V64:$vz, I32:$vl),
+                         "# pseudo-vfmk.w.$cf $vmx, $vz">;
+  def VFMKWyvyl : Pseudo<(outs VM512:$vmx),
+                         (ins CCOp:$cf, V64:$vz, VM512:$vm, I32:$vl),
+                         "# pseudo-vfmk.w.$cf $vmx, $vz, $vm">;
+  def VFMKSyvl  : Pseudo<(outs VM512:$vmx),
+                         (ins CCOp:$cf, V64:$vz, I32:$vl),
+                         "# pseudo-vfmk.s.$cf $vmx, $vz">;
+  def VFMKSyvyl : Pseudo<(outs VM512:$vmx),
+                         (ins CCOp:$cf, V64:$vz, VM512:$vm, I32:$vl),
+                         "# pseudo-vfmk.s.$cf $vmx, $vz, $vm">;
+}
+
+// ANDM/ORM/XORM/EQVM/NNDM/NEGM instructions using VM512
+let hasSideEffects = 0, isCodeGenOnly = 1 in {
+  def ANDMyy : Pseudo<(outs VM512:$vmx), (ins VM512:$vmy, VM512:$vmz),
+                      "# andm $vmx, $vmy, $vmz">;
+  def ORMyy : Pseudo<(outs VM512:$vmx), (ins VM512:$vmy, VM512:$vmz),
+                     "# orm $vmx, $vmy, $vmz">;
+  def XORMyy : Pseudo<(outs VM512:$vmx), (ins VM512:$vmy, VM512:$vmz),
+                      "# xorm $vmx, $vmy, $vmz">;
+  def EQVMyy : Pseudo<(outs VM512:$vmx), (ins VM512:$vmy, VM512:$vmz),
+                      "# eqvm $vmx, $vmy, $vmz">;
+  def NNDMyy : Pseudo<(outs VM512:$vmx), (ins VM512:$vmy, VM512:$vmz),
+                      "# nndm $vmx, $vmy, $vmz">;
+  def NEGMy : Pseudo<(outs VM512:$vmx), (ins VM512:$vmy),
+                     "# negm $vmx, $vmy">;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//
+// Define all vector instructions defined in SX-Aurora TSUBASA Architecture
+// Guide here.  As those mnemonics, we use mnemonics defined in Vector Engine
+// Assembly Language Reference Manual.
+//
+// Some instructions can update existing data by following instructions
+// sequence.
+//
+//   lea %s0, 256
+//   lea %s1, 128
+//   lvl %s0
+//   vbrd %v0, 2 # v0 = { 2, 2, 2, ..., 2, 2, 2 }
+//   lvl %s1
+//   vbrd %v0, 3 # v0 = { 3, 3, 3, ..., 3, 2, 2, 2, ..., 2, 2, 2 }
+//
+// In order to represent above with a virtual register, we defines instructions
+// with an additional base register and `_v` suffiex in mnemonic.
+//
+//   lea t0, 256
+//   lea t1, 128
+//   lea t0
+//   vbrd tv0, 2
+//   lvl t1
+//   vbrd_v tv1, 2, tv0
+//
+// We also have some instructions uses VL register with an pseudo VL value
+// with following suffixes in mnemonic.
+//
+//   l: have an additional I32 register to represent the VL value.
+//   L: have an additional VL register to represent the VL value.
+//===----------------------------------------------------------------------===//
+
+//-----------------------------------------------------------------------------
+// Section 8.9 - Vector Load/Store and Move Instructions
+//-----------------------------------------------------------------------------
+
+// Multiclass for VLD instructions
+let mayLoad = 1, hasSideEffects = 0, Uses = [VL] in
+multiclass VLDbm<string opcStr, bits<8>opc, RegisterClass RC, dag dag_in,
+                 string disEnc = ""> {
+  let DisableEncoding = disEnc in
+  def "" : RVM<opc, (outs RC:$vx), dag_in,
+               !strconcat(opcStr, " $vx, $sy, $sz")>;
+  let Constraints = "$vx = $base", DisableEncoding = disEnc#"$base",
+      isCodeGenOnly = 1 in
+  def _v : RVM<opc, (outs RC:$vx), !con(dag_in, (ins RC:$base)),
+               !strconcat(opcStr, " $vx, $sy, $sz")>;
+}
+multiclass VLDlm<string opcStr, bits<8>opc, RegisterClass RC, dag dag_in> {
+  defm "" : VLDbm<opcStr, opc, RC, dag_in>;
+  let isCodeGenOnly = 1, VE_VLInUse = 1 in {
+    defm l : VLDbm<opcStr, opc, RC, !con(dag_in, (ins I32:$vl)), "$vl,">;
+    defm L : VLDbm<opcStr, opc, RC, !con(dag_in, (ins VLS:$vl)), "$vl,">;
+  }
+}
+let VE_VLIndex = 3 in
+multiclass VLDtgm<string opcStr, bits<8>opc, RegisterClass RC> {
+  defm rr : VLDlm<opcStr, opc, RC, (ins I64:$sy, I64:$sz)>;
+  let cy = 0 in
+  defm ir : VLDlm<opcStr, opc, RC, (ins simm7:$sy, I64:$sz)>;
+  let cz = 0 in
+  defm rz : VLDlm<opcStr, opc, RC, (ins I64:$sy, zero:$sz)>;
+  let cy = 0, cz = 0 in
+  defm iz : VLDlm<opcStr, opc, RC, (ins simm7:$sy, zero:$sz)>;
+}
+multiclass VLDm<string opcStr, bits<8>opc, RegisterClass RC> {
+  let vc = 1 in defm "" : VLDtgm<opcStr, opc, RC>;
+  let vc = 0 in defm NC : VLDtgm<opcStr#".nc", opc, RC>;
+}
+
+// Section 8.9.1 - VLD (Vector Load)
+defm VLD : VLDm<"vld", 0x81, V64>;
+
+// Section 8.9.2 - VLDU (Vector Load Upper)
+defm VLDU : VLDm<"vldu", 0x82, V64>;
+
+// Section 8.9.3 - VLDL (Vector Load Lower)
+defm VLDLSX : VLDm<"vldl.sx", 0x83, V64>;
+let cx = 1 in defm VLDLZX : VLDm<"vldl.zx", 0x83, V64>;
+
+// Section 8.9.4 - VLD2D (Vector Load 2D)
+defm VLD2D : VLDm<"vld2d", 0xc1, V64>;
+
+// Section 8.9.5 - VLDU2D (Vector Load Upper 2D)
+defm VLDU2D : VLDm<"vldu2d", 0xc2, V64>;
+
+// Section 8.9.6 - VLDL2D (Vector Load Lower 2D)
+defm VLDL2DSX : VLDm<"vldl2d.sx", 0xc3, V64>;
+let cx = 1 in defm VLDL2DZX : VLDm<"vldl2d.zx", 0xc3, V64>;
+
+// Multiclass for VST instructions
+let mayStore = 1, hasSideEffects = 0, Uses = [VL] in
+multiclass VSTbm<string opcStr, string argStr, bits<8>opc, dag dag_in> {
+  def "" : RVM<opc, (outs), dag_in, !strconcat(opcStr, argStr)>;
+  let DisableEncoding = "$vl", isCodeGenOnly = 1, VE_VLInUse = 1 in {
+    def l : RVM<opc, (outs), !con(dag_in, (ins I32:$vl)),
+                !strconcat(opcStr, argStr)>;
+    def L : RVM<opc, (outs), !con(dag_in, (ins VLS:$vl)),
+                !strconcat(opcStr, argStr)>;
+  }
+}
+multiclass VSTmm<string opcStr, bits<8>opc, dag dag_in> {
+  defm "" : VSTbm<opcStr, " $vx, $sy, $sz", opc, dag_in>;
+  let m = ?, VE_VLWithMask = 1 in
+  defm m : VSTbm<opcStr, " $vx, $sy, $sz, $m", opc, !con(dag_in, (ins VM:$m))>;
+}
+let VE_VLIndex = 3 in
+multiclass VSTtgm<string opcStr, bits<8>opc, RegisterClass RC> {
+  defm rrv : VSTmm<opcStr, opc, (ins I64:$sy, I64:$sz, RC:$vx)>;
+  let cy = 0 in
+  defm irv : VSTmm<opcStr, opc, (ins simm7:$sy, I64:$sz, RC:$vx)>;
+  let cz = 0 in
+  defm rzv : VSTmm<opcStr, opc, (ins I64:$sy, zero:$sz, RC:$vx)>;
+  let cy = 0, cz = 0 in
+  defm izv : VSTmm<opcStr, opc, (ins simm7:$sy, zero:$sz, RC:$vx)>;
+}
+multiclass VSTm<string opcStr, bits<8>opc, RegisterClass RC> {
+  let vc = 1, cx = 0 in defm "" : VSTtgm<opcStr, opc, RC>;
+  let vc = 0, cx = 0 in defm NC : VSTtgm<opcStr#".nc", opc, RC>;
+  let vc = 1, cx = 1 in defm OT : VSTtgm<opcStr#".ot", opc, RC>;
+  let vc = 0, cx = 1 in defm NCOT : VSTtgm<opcStr#".nc.ot", opc, RC>;
+}
+
+// Section 8.9.7 - VST (Vector Store)
+defm VST : VSTm<"vst", 0x91, V64>;
+
+// Section 8.9.8 - VST (Vector Store Upper)
+defm VSTU : VSTm<"vstu", 0x92, V64>;
+
+// Section 8.9.9 - VSTL (Vector Store Lower)
+defm VSTL : VSTm<"vstl", 0x93, V64>;
+
+// Section 8.9.10 - VST2D (Vector Store 2D)
+defm VST2D : VSTm<"vst2d", 0xd1, V64>;
+
+// Section 8.9.11 - VSTU2D (Vector Store Upper 2D)
+defm VSTU2D : VSTm<"vstu2d", 0xd2, V64>;
+
+// Section 8.9.12 - VSTL2D (Vector Store Lower 2D)
+defm VSTL2D : VSTm<"vstl2d", 0xd3, V64>;
+
+// Multiclass for VGT instructions
+let mayLoad = 1, hasSideEffects = 0, Uses = [VL] in
+multiclass VGTbm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+                 dag dag_in, string disEnc = ""> {
+  let DisableEncoding = disEnc in
+  def "" : RVM<opc, (outs RC:$vx), dag_in,
+               !strconcat(opcStr, " $vx, ", argStr)>;
+  let Constraints = "$vx = $base", DisableEncoding = disEnc#"$base",
+      isCodeGenOnly = 1 in
+  def _v : RVM<opc, (outs RC:$vx), !con(dag_in, (ins RC:$base)),
+               !strconcat(opcStr, " $vx, ", argStr)>;
+}
+multiclass VGTlm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+                 dag dag_in> {
+  defm "" : VGTbm<opcStr, argStr, opc, RC, dag_in>;
+  let isCodeGenOnly = 1, VE_VLInUse = 1 in {
+    defm l : VGTbm<opcStr, argStr, opc, RC, !con(dag_in, (ins I32:$vl)),
+                   "$vl,">;
+    defm L : VGTbm<opcStr, argStr, opc, RC, !con(dag_in, (ins VLS:$vl)),
+                   "$vl,">;
+  }
+}
+multiclass VGTmm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+                 dag dag_in> {
+  defm "" : VGTlm<opcStr, argStr, opc, RC, dag_in>;
+  let m = ?, VE_VLWithMask = 1 in
+  defm m : VGTlm<opcStr, argStr#", $m", opc, RC, !con(dag_in, (ins VM:$m))>;
+}
+let VE_VLIndex = 4 in
+multiclass VGTlhm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+                  dag dag_in> {
+  defm rr : VGTmm<opcStr, argStr#", $sy, $sz", opc, RC,
+                  !con(dag_in, (ins I64:$sy, I64:$sz))>;
+  let cy = 0 in
+  defm ir : VGTmm<opcStr, argStr#", $sy, $sz", opc, RC,
+                  !con(dag_in, (ins simm7:$sy, I64:$sz))>;
+  let cz = 0 in
+  defm rz : VGTmm<opcStr, argStr#", $sy, $sz", opc, RC,
+                  !con(dag_in, (ins I64:$sy, zero:$sz))>;
+  let cy = 0, cz = 0 in
+  defm iz : VGTmm<opcStr, argStr#", $sy, $sz", opc, RC,
+                  !con(dag_in, (ins simm7:$sy, zero:$sz))>;
+}
+multiclass VGTtgm<string opcStr, bits<8>opc, RegisterClass RC> {
+  let vy = ? in defm v : VGTlhm<opcStr, "$vy", opc, RC, (ins V64:$vy)>;
+  let cs = 1, sw = ? in defm s : VGTlhm<opcStr, "$sw", opc, RC, (ins I64:$sw)>;
+}
+multiclass VGTm<string opcStr, bits<8>opc, RegisterClass RC> {
+  let vc = 1 in defm "" : VGTtgm<opcStr, opc, RC>;
+  let vc = 0 in defm NC : VGTtgm<opcStr#".nc", opc, RC>;
+}
+
+// Section 8.9.13 - VGT (Vector Gather)
+defm VGT : VGTm<"vgt", 0xa1, V64>;
+
+// Section 8.9.14 - VGTU (Vector Gather Upper)
+defm VGTU : VGTm<"vgtu", 0xa2, V64>;
+
+// Section 8.9.15 - VGTL (Vector Gather Lower)
+defm VGTLSX : VGTm<"vgtl.sx", 0xa3, V64>;
+let cx = 1 in defm VGTLZX : VGTm<"vgtl.zx", 0xa3, V64>;
+def : MnemonicAlias<"vgtl", "vgtl.zx">;
+def : MnemonicAlias<"vgtl.nc", "vgtl.zx.nc">;
+
+// Multiclass for VSC instructions
+let mayStore = 1, hasSideEffects = 0, Uses = [VL] in
+multiclass VSCbm<string opcStr, string argStr, bits<8>opc, dag dag_in> {
+  def "" : RVM<opc, (outs), dag_in, !strconcat(opcStr, argStr)>;
+  let DisableEncoding = "$vl", isCodeGenOnly = 1, VE_VLInUse = 1 in {
+    def l : RVM<opc, (outs), !con(dag_in, (ins I32:$vl)),
+                !strconcat(opcStr, argStr)>;
+    def L : RVM<opc, (outs), !con(dag_in, (ins VLS:$vl)),
+                !strconcat(opcStr, argStr)>;
+  }
+}
+multiclass VSCmm<string opcStr, string argStr, bits<8>opc, dag dag_in> {
+  defm "" : VSCbm<opcStr, argStr, opc, dag_in>;
+  let m = ?, VE_VLWithMask = 1 in
+  defm m : VSCbm<opcStr, argStr#", $m", opc, !con(dag_in, (ins VM:$m))>;
+}
+let VE_VLIndex = 4 in
+multiclass VSClhm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+                  dag dag_in> {
+  defm rrv : VSCmm<opcStr, " $vx, "#argStr#", $sy, $sz", opc,
+                   !con(dag_in, (ins I64:$sy, I64:$sz, RC:$vx))>;
+  let cy = 0 in
+  defm irv : VSCmm<opcStr, " $vx, "#argStr#", $sy, $sz", opc,
+                   !con(dag_in, (ins simm7:$sy, I64:$sz, RC:$vx))>;
+  let cz = 0 in
+  defm rzv : VSCmm<opcStr, " $vx, "#argStr#", $sy, $sz", opc,
+                   !con(dag_in, (ins I64:$sy, zero:$sz, RC:$vx))>;
+  let cy = 0, cz = 0 in
+  defm izv : VSCmm<opcStr, " $vx, "#argStr#", $sy, $sz", opc,
+                   !con(dag_in, (ins simm7:$sy, zero:$sz, RC:$vx))>;
+}
+multiclass VSCtgm<string opcStr, bits<8>opc, RegisterClass RC> {
+  let vy = ? in defm v : VSClhm<opcStr, "$vy", opc, RC, (ins V64:$vy)>;
+  let cs = 1, sw = ? in defm s : VSClhm<opcStr, "$sw", opc, RC, (ins I64:$sw)>;
+}
+multiclass VSCm<string opcStr, bits<8>opc, RegisterClass RC> {
+  let vc = 1, cx = 0 in defm "" : VSCtgm<opcStr, opc, RC>;
+  let vc = 0, cx = 0 in defm NC : VSCtgm<opcStr#".nc", opc, RC>;
+  let vc = 1, cx = 1 in defm OT : VSCtgm<opcStr#".ot", opc, RC>;
+  let vc = 0, cx = 1 in defm NCOT : VSCtgm<opcStr#".nc.ot", opc, RC>;
+}
+
+// Section 8.9.16 - VSC (Vector Scatter)
+defm VSC : VSCm<"vsc", 0xb1, V64>;
+
+// Section 8.9.17 - VSCU (Vector Scatter Upper)
+defm VSCU : VSCm<"vscu", 0xb2, V64>;
+
+// Section 8.9.18 - VSCL (Vector Scatter Lower)
+defm VSCL : VSCm<"vscl", 0xb3, V64>;
+
+// Section 8.9.19 - PFCHV (Prefetch Vector)
+let Uses = [VL] in
+multiclass PFCHVbm<string opcStr, string argStr, bits<8>opc, dag dag_in> {
+  def "" : RVM<opc, (outs), dag_in, !strconcat(opcStr, argStr)>;
+  let DisableEncoding = "$vl", isCodeGenOnly = 1, VE_VLInUse = 1 in {
+    def l : RVM<opc, (outs), !con(dag_in, (ins I32:$vl)),
+                !strconcat(opcStr, argStr)>;
+    def L : RVM<opc, (outs), !con(dag_in, (ins VLS:$vl)),
+                !strconcat(opcStr, argStr)>;
+  }
+}
+let VE_VLIndex = 2 in
+multiclass PFCHVm<string opcStr, bits<8>opc> {
+  defm rr : PFCHVbm<opcStr, " $sy, $sz", opc, (ins I64:$sy, I64:$sz)>;
+  let cy = 0 in
+  defm ir : PFCHVbm<opcStr, " $sy, $sz", opc, (ins simm7:$sy, I64:$sz)>;
+  let cz = 0 in
+  defm rz : PFCHVbm<opcStr, " $sy, $sz", opc, (ins I64:$sy, zero:$sz)>;
+  let cy = 0, cz = 0 in
+  defm iz : PFCHVbm<opcStr, " $sy, $sz", opc, (ins simm7:$sy, zero:$sz)>;
+}
+let vc = 1, vx = 0 in defm PFCHV : PFCHVm<"pfchv", 0x80>;
+let vc = 0, vx = 0 in defm PFCHVNC : PFCHVm<"pfchv.nc", 0x80>;
+
+// Section 8.9.20 - LSV (Load S to V)
+let sx = 0, vx = ?, hasSideEffects = 0 in
+multiclass LSVbm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+                 dag dag_in> {
+  def "" : RR<opc, (outs RC:$vx), dag_in, !strconcat(opcStr, " ${vx}", argStr)>;
+  let Constraints = "$vx = $base", DisableEncoding = "$base",
+      isCodeGenOnly = 1 in
+  def _v : RR<opc, (outs RC:$vx), !con(dag_in, (ins RC:$base)),
+               !strconcat(opcStr, " ${vx}", argStr)>;
+}
+multiclass LSVm<string opcStr, bits<8>opc, RegisterClass RC> {
+  defm rr : LSVbm<opcStr, "(${sy}), $sz", opc, RC, (ins I64:$sy, I64:$sz)>;
+  let cy = 0 in
+  defm ir : LSVbm<opcStr, "(${sy}), $sz", opc, RC, (ins uimm7:$sy, I64:$sz)>;
+  let cz = 0 in
+  defm rm : LSVbm<opcStr, "(${sy}), $sz", opc, RC, (ins I64:$sy, mimm:$sz)>;
+  let cy = 0, cz = 0 in
+  defm im : LSVbm<opcStr, "(${sy}), $sz", opc, RC, (ins uimm7:$sy, mimm:$sz)>;
+}
+defm LSV : LSVm<"lsv", 0x8e, V64>;
+
+// Section 8.9.21 - LVS (Load V to S)
+let cz = 0, sz = 0, vx = ?, hasSideEffects = 0 in
+multiclass LVSm<string opcStr, bits<8>opc, RegisterClass RC> {
+  def vr : RR<opc, (outs I64:$sx), (ins RC:$vx, I64:$sy),
+              opcStr#" $sx, ${vx}(${sy})">;
+  let cy = 0 in
+  def vi : RR<opc, (outs I64:$sx), (ins RC:$vx, uimm7:$sy),
+              opcStr#" $sx, ${vx}(${sy})">;
+}
+defm LVS : LVSm<"lvs", 0x9e, V64>;
+
+// Section 8.9.22 - LVM (Load VM)
+let sx = 0, vx = ?, hasSideEffects = 0 in
+multiclass LVMbm<string opcStr, string argStr, bits<8>opc, RegisterClass RCM,
+                 dag dag_in> {
+  def "" : RR<opc, (outs RCM:$vx), dag_in,
+              !strconcat(opcStr, " $vx, ", argStr)>;
+  let Constraints = "$vx = $base", DisableEncoding = "$base",
+      isCodeGenOnly = 1 in {
+    def _m : RR<opc, (outs RCM:$vx), !con(dag_in, (ins RCM:$base)),
+                !strconcat(opcStr, " $vx, ", argStr)>;
+  }
+}
+multiclass LVMom<string opcStr, bits<8>opc, RegisterClass RCM> {
+  defm rr : LVMbm<opcStr, "$sy, $sz", opc, RCM, (ins I64:$sy, I64:$sz)>;
+  let cy = 0 in
+  defm ir : LVMbm<opcStr, "$sy, $sz", opc, RCM, (ins uimm2:$sy, I64:$sz)>;
+  let cz = 0 in
+  defm rm : LVMbm<opcStr, "$sy, $sz", opc, RCM, (ins I64:$sy, mimm:$sz)>;
+  let cy = 0, cz = 0 in
+  defm im : LVMbm<opcStr, "$sy, $sz", opc, RCM, (ins uimm2:$sy, mimm:$sz)>;
+}
+multiclass LVMm<string opcStr, bits<8>opc, RegisterClass RCM> {
+  defm "" : LVMom<opcStr, opc, RCM>;
+}
+defm LVM : LVMm<"lvm", 0xb7, VM>;
+
+// Section 8.9.23 - SVM (Save VM)
+let cz = 0, sz = 0, vz = ?, hasSideEffects = 0 in
+multiclass SVMm<string opcStr, bits<8>opc, RegisterClass RCM> {
+  def mr : RR<opc, (outs I64:$sx), (ins RCM:$vz, I64:$sy),
+              opcStr#" $sx, $vz, $sy">;
+  let cy = 0 in
+  def mi : RR<opc, (outs I64:$sx), (ins RCM:$vz, uimm2:$sy),
+              opcStr#" $sx, $vz, $sy">;
+}
+defm SVM : SVMm<"svm", 0xa7, VM>;
+
+// Section 8.9.24 - VBRD (Vector Broadcast)
+let vx = ?, hasSideEffects = 0, Uses = [VL] in
+multiclass VBRDbm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+                  dag dag_in, string disEnc = ""> {
+  let DisableEncoding = disEnc in
+  def "" : RV<opc, (outs RC:$vx), dag_in,
+              !strconcat(opcStr, " $vx, ", argStr)>;
+  let Constraints = "$vx = $base", DisableEncoding = disEnc#"$base",
+      isCodeGenOnly = 1 in
+  def _v : RV<opc, (outs RC:$vx), !con(dag_in, (ins RC:$base)),
+              !strconcat(opcStr, " $vx, ", argStr)>;
+}
+multiclass VBRDlm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+                  dag dag_in> {
+  defm "" : VBRDbm<opcStr, argStr, opc, RC, dag_in>;
+  let isCodeGenOnly = 1, VE_VLInUse = 1 in {
+    defm l : VBRDbm<opcStr, argStr, opc, RC, !con(dag_in, (ins I32:$vl)),
+                   "$vl,">;
+    defm L : VBRDbm<opcStr, argStr, opc, RC, !con(dag_in, (ins VLS:$vl)),
+                   "$vl,">;
+  }
+}
+multiclass VBRDmm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+                  RegisterClass RCM, dag dag_in> {
+  defm "" : VBRDlm<opcStr, argStr, opc, RC, dag_in>;
+  let m = ?, VE_VLWithMask = 1 in
+  defm m : VBRDlm<opcStr, argStr#", $m", opc, RC, !con(dag_in, (ins RCM:$m))>;
+}
+let VE_VLIndex = 2 in
+multiclass VBRDm<string opcStr, bits<8>opc, RegisterClass VRC, RegisterClass RC,
+                 RegisterClass RCM> {
+  defm r : VBRDmm<opcStr, "$sy", opc, VRC, RCM, (ins RC:$sy)>;
+  let cy = 0 in
+  defm i : VBRDmm<opcStr, "$sy", opc, VRC, RCM, (ins simm7:$sy)>;
+}
+let cx = 0, cx2 = 0 in
+defm VBRD : VBRDm<"vbrd", 0x8c, V64, I64, VM>;
+let cx = 0, cx2 = 1 in
+defm VBRDL : VBRDm<"vbrdl", 0x8c, V64, I32, VM>;
+let cx = 1, cx2 = 0 in
+defm VBRDU : VBRDm<"vbrdu", 0x8c, V64, F32, VM>;
+let cx = 1, cx2 = 1 in
+defm PVBRD : VBRDm<"pvbrd", 0x8c, V64, I64, VM512>;
+
+// Section 8.9.25 - VMV (Vector Move)
+let vx = ?, vz = ?, hasSideEffects = 0, Uses = [VL] in
+multiclass VMVbm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+                 dag dag_in, string disEnc = ""> {
+  let DisableEncoding = disEnc in
+  def "" : RV<opc, (outs RC:$vx), dag_in,
+              !strconcat(opcStr, " $vx, ", argStr)>;
+  let Constraints = "$vx = $base", DisableEncoding = disEnc#"$base",
+      isCodeGenOnly = 1 in
+  def _v : RV<opc, (outs RC:$vx), !con(dag_in, (ins RC:$base)),
+              !strconcat(opcStr, " $vx, ", argStr)>;
+}
+multiclass VMVlm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+                 dag dag_in> {
+  defm "" : VMVbm<opcStr, argStr, opc, RC, dag_in>;
+  let isCodeGenOnly = 1, VE_VLInUse = 1 in {
+    defm l : VMVbm<opcStr, argStr, opc, RC, !con(dag_in, (ins I32:$vl)),
+                   "$vl,">;
+    defm L : VMVbm<opcStr, argStr, opc, RC, !con(dag_in, (ins VLS:$vl)),
+                   "$vl,">;
+  }
+}
+multiclass VMVmm<string opcStr, bits<8>opc, RegisterClass RC,
+                 RegisterClass RCM, dag dag_in> {
+  defm "" : VMVlm<opcStr, "$sy, $vz", opc, RC, dag_in>;
+  let m = ?, VE_VLWithMask = 1 in
+  defm m : VMVlm<opcStr, "$sy, $vz, $m", opc, RC, !con(dag_in, (ins RCM:$m))>;
+}
+let VE_VLIndex = 3 in
+multiclass VMVm<string opcStr, bits<8>opc, RegisterClass RC,
+                RegisterClass RCM> {
+  defm rv : VMVmm<opcStr, opc, RC, RCM, (ins I64:$sy, RC:$vz)>;
+  let cy = 0 in
+  defm iv : VMVmm<opcStr, opc, RC, RCM, (ins uimm7:$sy, RC:$vz)>;
+}
+defm VMV : VMVm<"vmv", 0x9c, V64, VM>;
+
+//-----------------------------------------------------------------------------
+// Section 8.10 - Vector Fixed-Point Arithmetic Instructions
+//-----------------------------------------------------------------------------
+
+// Multiclass for generic vector calculation
+let vx = ?, hasSideEffects = 0, Uses = [VL] in
+multiclass RVbm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+                dag dag_in, string disEnc = ""> {
+  let DisableEncoding = disEnc in
+  def "" : RV<opc, (outs RC:$vx), dag_in,
+              !strconcat(opcStr, " $vx", argStr)>;
+  let Constraints = "$vx = $base", DisableEncoding = disEnc#"$base",
+      isCodeGenOnly = 1 in
+  def _v : RV<opc, (outs RC:$vx), !con(dag_in, (ins RC:$base)),
+              !strconcat(opcStr, " $vx", argStr)>;
+}
+multiclass RVlm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+                dag dag_in> {
+  defm "" : RVbm<opcStr, argStr, opc, RC, dag_in>;
+  let isCodeGenOnly = 1, VE_VLInUse = 1 in {
+    defm l : RVbm<opcStr, argStr, opc, RC, !con(dag_in, (ins I32:$vl)),
+                  "$vl,">;
+    defm L : RVbm<opcStr, argStr, opc, RC, !con(dag_in, (ins VLS:$vl)),
+                  "$vl,">;
+  }
+}
+multiclass RVmm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+                RegisterClass RCM, dag dag_in> {
+  defm "" : RVlm<opcStr, argStr, opc, RC, dag_in>;
+  let m = ?, VE_VLWithMask = 1 in
+  defm m : RVlm<opcStr, argStr#", $m", opc, RC, !con(dag_in, (ins RCM:$m))>;
+}
+// Generic RV multiclass with 2 arguments.
+//   e.g. VADD, VSUB, VMPY, and etc.
+let VE_VLIndex = 3 in
+multiclass RVm<string opcStr, bits<8>opc, RegisterClass VRC, RegisterClass RC,
+               RegisterClass RCM, Operand SIMM = simm7> {
+  let cy = 0, sy = 0, vy = ?, vz = ? in
+  defm vv : RVmm<opcStr, ", $vy, $vz", opc, VRC, RCM, (ins VRC:$vy, VRC:$vz)>;
+  let cs = 1, vz = ? in
+  defm rv : RVmm<opcStr, ", $sy, $vz", opc, VRC, RCM, (ins RC:$sy, VRC:$vz)>;
+  let cs = 1, cy = 0, vz = ? in
+  defm iv : RVmm<opcStr, ", $sy, $vz", opc, VRC, RCM, (ins SIMM:$sy, VRC:$vz)>;
+}
+// Special RV multiclass with 2 arguments using cs2.
+//   e.g. VDIV, VDVS, and VDVX.
+let VE_VLIndex = 3 in
+multiclass RVDIVm<string opcStr, bits<8>opc, RegisterClass VRC,
+                  RegisterClass RC, RegisterClass RCM, Operand SIMM = simm7> {
+  let cy = 0, sy = 0, vy = ?, vz = ? in
+  defm vv : RVmm<opcStr, ", $vy, $vz", opc, VRC, RCM, (ins VRC:$vy, VRC:$vz)>;
+  let cs2 = 1, vy = ? in
+  defm vr : RVmm<opcStr, ", $vy, $sy", opc, VRC, RCM, (ins VRC:$vy, RC:$sy)>;
+  let cs2 = 1, cy = 0, vy = ? in
+  defm vi : RVmm<opcStr, ", $vy, $sy", opc, VRC, RCM, (ins VRC:$vy, SIMM:$sy)>;
+  let cs = 1, vz = ? in
+  defm rv : RVmm<opcStr, ", $sy, $vz", opc, VRC, RCM, (ins RC:$sy, VRC:$vz)>;
+  let cs = 1, cy = 0, vz = ? in
+  defm iv : RVmm<opcStr, ", $sy, $vz", opc, VRC, RCM, (ins SIMM:$sy, VRC:$vz)>;
+}
+// Generic RV multiclass with 2 arguments for logical operations.
+//   e.g. VAND, VOR, VXOR, and etc.
+let VE_VLIndex = 3 in
+multiclass RVLm<string opcStr, bits<8>opc, RegisterClass ScaRC,
+                RegisterClass RC, RegisterClass RCM> {
+  let cy = 0, sy = 0, vy = ?, vz = ? in
+  defm vv : RVmm<opcStr, ", $vy, $vz", opc, RC, RCM, (ins RC:$vy, RC:$vz)>;
+  let cs = 1, vz = ? in
+  defm rv : RVmm<opcStr, ", $sy, $vz", opc, RC, RCM, (ins ScaRC:$sy, RC:$vz)>;
+  let cs = 1, cy = 0, vz = ? in
+  defm mv : RVmm<opcStr, ", $sy, $vz", opc, RC, RCM, (ins mimm:$sy, RC:$vz)>;
+}
+// Generic RV multiclass with 1 argument.
+//   e.g. VLDZ, VPCNT, and VBRV.
+let VE_VLIndex = 2 in
+multiclass RV1m<string opcStr, bits<8>opc, RegisterClass RC,
+                RegisterClass RCM> {
+  let cy = 0, sy = 0, vz = ? in
+  defm v : RVmm<opcStr, ", $vz", opc, RC, RCM, (ins RC:$vz)>;
+}
+// Generic RV multiclass with no argument.
+//   e.g. VSEQ.
+let VE_VLIndex = 1 in
+multiclass RV0m<string opcStr, bits<8>opc, RegisterClass RC,
+                RegisterClass RCM> {
+  let cy = 0, sy = 0 in
+  defm "" : RVmm<opcStr, "", opc, RC, RCM, (ins)>;
+}
+// Generic RV multiclass with 2 arguments for shift operations.
+//   e.g. VSLL, VSRL, VSLA, and etc.
+let VE_VLIndex = 3 in
+multiclass RVSm<string opcStr, bits<8>opc, RegisterClass ScaRC,
+                RegisterClass RC, RegisterClass RCM> {
+  let cy = 0, sy = 0, vy = ?, vz = ? in
+  defm vv : RVmm<opcStr, ", $vz, $vy", opc, RC, RCM, (ins RC:$vz, RC:$vy)>;
+  let cs = 1, vz = ? in
+  defm vr : RVmm<opcStr, ", $vz, $sy", opc, RC, RCM, (ins RC:$vz, ScaRC:$sy)>;
+  let cs = 1, cy = 0, vz = ? in
+  defm vi : RVmm<opcStr, ", $vz, $sy", opc, RC, RCM, (ins RC:$vz, uimm7:$sy)>;
+}
+// Generic RV multiclass with 3 arguments for shift operations.
+//   e.g. VSLD and VSRD.
+let VE_VLIndex = 4 in
+multiclass RVSDm<string opcStr, bits<8>opc, RegisterClass RC,
+                 RegisterClass RCM> {
+  let vy = ?, vz = ? in
+  defm vvr : RVmm<opcStr, ", ($vy, ${vz}), $sy", opc, RC, RCM,
+                 (ins RC:$vy, RC:$vz, I64:$sy)>;
+  let cy = 0, vy = ?, vz = ? in
+  defm vvi : RVmm<opcStr, ", ($vy, ${vz}), $sy", opc, RC, RCM,
+                 (ins RC:$vy, RC:$vz, uimm7:$sy)>;
+}
+// Special RV multiclass with 3 arguments.
+//   e.g. VSFA
+let VE_VLIndex = 4 in
+multiclass RVSAm<string opcStr, bits<8>opc, RegisterClass RC,
+                 RegisterClass RCM> {
+  let cz = 1, sz = ?, vz = ? in
+  defm vrr : RVmm<opcStr, ", $vz, $sy, $sz", opc, RC, RCM,
+                  (ins RC:$vz, I64:$sy, I64:$sz)>;
+  let cz = 0, sz = ?, vz = ? in
+  defm vrm : RVmm<opcStr, ", $vz, $sy, $sz", opc, RC, RCM,
+                  (ins RC:$vz, I64:$sy, mimm:$sz)>;
+  let cy = 0, cz = 1, sz = ?, vz = ? in
+  defm vir : RVmm<opcStr, ", $vz, $sy, $sz", opc, RC, RCM,
+                  (ins RC:$vz, uimm3:$sy, I64:$sz)>;
+  let cy = 0, cz = 0, sz = ?, vz = ? in
+  defm vim : RVmm<opcStr, ", $vz, $sy, $sz", opc, RC, RCM,
+                  (ins RC:$vz, uimm3:$sy, mimm:$sz)>;
+}
+// Generic RV multiclass with 1 argument using vy field.
+//   e.g. VFSQRT, VRCP, and VRSQRT.
+let VE_VLIndex = 2 in
+multiclass RVF1m<string opcStr, bits<8>opc, RegisterClass RC,
+                 RegisterClass RCM> {
+  let cy = 0, sy = 0, vy = ? in
+  defm v : RVmm<opcStr, ", $vy", opc, RC, RCM, (ins RC:$vy)>;
+}
+// Special RV multiclass with 3 arguments using cs2.
+//   e.g. VFMAD, VFMSB, VFNMAD, and etc.
+let VE_VLIndex = 4 in
+multiclass RVMm<string opcStr, bits<8>opc, RegisterClass VRC, RegisterClass RC,
+                RegisterClass RCM, Operand SIMM = simm7> {
+  let cy = 0, sy = 0, vy = ?, vz = ?, vw = ? in
+  defm vvv : RVmm<opcStr, ", $vy, $vz, $vw", opc, VRC, RCM,
+                  (ins VRC:$vy, VRC:$vz, VRC:$vw)>;
+  let cs2 = 1, vy = ?, vw = ? in
+  defm vrv : RVmm<opcStr, ", $vy, $sy, $vw", opc, VRC, RCM,
+                  (ins VRC:$vy, RC:$sy, VRC:$vw)>;
+  let cs2 = 1, cy = 0, vy = ?, vw = ? in
+  defm viv : RVmm<opcStr, ", $vy, $sy, $vw", opc, VRC, RCM,
+                  (ins VRC:$vy, SIMM:$sy, VRC:$vw)>;
+  let cs = 1, vz = ?, vw = ? in
+  defm rvv : RVmm<opcStr, ", $sy, $vz, $vw", opc, VRC, RCM,
+                  (ins RC:$sy, VRC:$vz, VRC:$vw)>;
+  let cs = 1, cy = 0, vz = ?, vw = ? in
+  defm ivv : RVmm<opcStr, ", $sy, $vz, $vw", opc, VRC, RCM,
+                  (ins SIMM:$sy, VRC:$vz, VRC:$vw)>;
+}
+// Special RV multiclass with 2 arguments for floating point conversions.
+//   e.g. VFIX and VFIXX
+let hasSideEffects = 0, VE_VLIndex = 3 in
+multiclass RVFIXm<string opcStr, bits<8> opc, RegisterClass RC,
+                  RegisterClass RCM> {
+  let cy = 0, sy = 0, vy = ?, vz = ? in
+  defm v : RVmm<opcStr#"$vz", ", $vy", opc, RC, RCM, (ins RDOp:$vz, RC:$vy)>;
+}
+// Multiclass for generic iterative vector calculation
+let vx = ?, hasSideEffects = 0, Uses = [VL] in
+multiclass RVIbm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+                dag dag_in, string disEnc = ""> {
+  let DisableEncoding = disEnc in
+  def "" : RV<opc, (outs RC:$vx), dag_in,
+              !strconcat(opcStr, " $vx", argStr)>;
+  let isCodeGenOnly = 1, Constraints = "$vx = $base", DisableEncoding = disEnc#"$base" in
+  def _v : RV<opc, (outs RC:$vx), !con(dag_in, (ins RC:$base)),
+              !strconcat(opcStr, " $vx", argStr)>;
+}
+multiclass RVIlm<string opcStr, string argStr, bits<8>opc, RegisterClass RC,
+                 dag dag_in> {
+  defm "" : RVIbm<opcStr, argStr, opc, RC, dag_in>;
+  let isCodeGenOnly = 1, VE_VLInUse = 1 in {
+    defm l : RVIbm<opcStr, argStr, opc, RC, !con(dag_in, (ins I32:$vl)),
+                   "$vl,">;
+    defm L : RVIbm<opcStr, argStr, opc, RC, !con(dag_in, (ins VLS:$vl)),
+                   "$vl,">;
+  }
+}
+// Generic RV multiclass for iterative operation with 2 argument.
+//   e.g. VFIA, VFIS, and VFIM
+let VE_VLIndex = 3 in
+multiclass RVI2m<string opcStr, bits<8>opc, RegisterClass VRC,
+                 RegisterClass RC> {
+  let vy = ? in
+  defm vr : RVIlm<opcStr, ", $vy, $sy", opc, VRC, (ins VRC:$vy, RC:$sy)>;
+  let cy = 0, vy = ? in
+  defm vi : RVIlm<opcStr, ", $vy, $sy", opc, VRC, (ins VRC:$vy, simm7fp:$sy)>;
+}
+// Generic RV multiclass for iterative operation with 3 argument.
+//   e.g. VFIAM, VFISM, VFIMA, and etc.
+let VE_VLIndex = 4 in
+multiclass RVI3m<string opcStr, bits<8>opc, RegisterClass VRC,
+                 RegisterClass RC> {
+  let vy = ?, vz = ? in
+  defm vvr : RVIlm<opcStr, ", $vy, $vz, $sy", opc, VRC,
+                   (ins VRC:$vy, VRC:$vz, RC:$sy)>;
+  let cy = 0, vy = ?, vz = ? in
+  defm vvi : RVIlm<opcStr, ", $vy, $vz, $sy", opc, VRC,
+                   (ins VRC:$vy, VRC:$vz, simm7fp:$sy)>;
+}
+// special RV multiclass with 3 arguments for VSHF.
+//   e.g. VSHF
+let vy = ?, vz = ?, VE_VLIndex = 4 in
+multiclass RVSHFm<string opcStr, bits<8>opc, RegisterClass RC,
+                  Operand SIMM = uimm4> {
+  defm vvr : RVlm<opcStr, ", $vy, $vz, $sy", opc, RC,
+                  (ins RC:$vy, RC:$vz, I64:$sy)>;
+  let cy = 0 in defm vvi : RVlm<opcStr, ", $vy, $vz, $sy", opc, RC,
+                                (ins RC:$vy, RC:$vz, SIMM:$sy)>;
+}
+// Multiclass for generic mask calculation
+let vx = ?, hasSideEffects = 0, Uses = [VL] in
+multiclass RVMKbm<string opcStr, string argStr, bits<8>opc, dag dag_out,
+                  dag dag_in> {
+  def "" : RV<opc, dag_out, dag_in, !strconcat(opcStr, argStr)>;
+  let DisableEncoding = "$vl", isCodeGenOnly = 1, VE_VLInUse = 1 in {
+    def l : RV<opc, dag_out, !con(dag_in, (ins I32:$vl)),
+               !strconcat(opcStr, argStr)>;
+    def L : RV<opc, dag_out, !con(dag_in, (ins VLS:$vl)),
+               !strconcat(opcStr, argStr)>;
+  }
+}
+multiclass RVMKlm<string opcStr, string argStr, bits<8>opc, RegisterClass RCM,
+                  dag dag_in> {
+  defm "" : RVMKbm<opcStr, " $vx"#argStr, opc, (outs RCM:$vx), dag_in>;
+  let m = ?, VE_VLWithMask = 1 in
+  defm m : RVMKbm<opcStr, " $vx"#argStr#", $m", opc, (outs RCM:$vx),
+                  !con(dag_in, (ins RCM:$m))>;
+}
+// Generic RV multiclass for mask calculation with a condition.
+//   e.g. VFMK, VFMS, and VFMF
+let cy = 0, sy = 0 in
+multiclass RVMKom<string opcStr, bits<8> opc, RegisterClass RC,
+                 RegisterClass RCM> {
+  let vy = ?, vz = ?, VE_VLIndex = 3 in
+  defm v : RVMKlm<opcStr#"$vy", ", $vz", opc, RCM, (ins CCOp:$vy, RC:$vz)>;
+  let vy = 15 /* AT */, VE_VLIndex = 1 in
+  defm a : RVMKlm<opcStr#"at", "", opc, RCM, (ins)>;
+  let vy = 0 /* AF */, VE_VLIndex = 1 in
+  defm na : RVMKlm<opcStr#"af", "", opc, RCM, (ins)>;
+}
+multiclass RVMKm<string opcStr, bits<8> opc, RegisterClass RC,
+                 RegisterClass RCM> {
+  defm "" : RVMKom<opcStr, opc, RC, RCM>;
+}
+// Generic RV multiclass for mask calculation with 2 arguments.
+//   e.g. ANDM, ORM, XORM, and etc.
+let cy = 0, sy = 0, vx = ?, vy = ?, vz = ?, hasSideEffects = 0 in
+multiclass RVM2m<string opcStr, bits<8> opc, RegisterClass RCM> {
+  def mm : RV<opc, (outs RCM:$vx), (ins RCM:$vy, RCM:$vz),
+              !strconcat(opcStr, " $vx, $vy, $vz")>;
+}
+// Generic RV multiclass for mask calculation with 1 argument.
+//   e.g. NEGM
+let cy = 0, sy = 0, vx = ?, vy = ?, hasSideEffects = 0 in
+multiclass RVM1m<string opcStr, bits<8> opc, RegisterClass RCM> {
+  def m : RV<opc, (outs RCM:$vx), (ins RCM:$vy),
+             !strconcat(opcStr, " $vx, $vy")>;
+}
+// Generic RV multiclass for mask calculation with 1 argument.
+//   e.g. PCVM, LZVM, and TOVM
+let cy = 0, sy = 0, vy = ?, hasSideEffects = 0, Uses = [VL] in
+multiclass RVMSbm<string opcStr, string argStr, bits<8>opc, dag dag_in> {
+  def "" : RV<opc, (outs I64:$sx), dag_in,
+              !strconcat(opcStr, " $sx,", argStr)> {
+    bits<7> sx;
+    let Inst{54-48} = sx;
+  }
+  let DisableEncoding = "$vl", isCodeGenOnly = 1, VE_VLInUse = 1 in {
+    def l : RV<opc, (outs I64:$sx), !con(dag_in, (ins I32:$vl)),
+               !strconcat(opcStr, " $sx,", argStr)> {
+      bits<7> sx;
+      let Inst{54-48} = sx;
+    }
+    def L : RV<opc, (outs I64:$sx), !con(dag_in, (ins VLS:$vl)),
+               !strconcat(opcStr, " $sx,", argStr)> {
+      bits<7> sx;
+      let Inst{54-48} = sx;
+    }
+  }
+}
+let VE_VLIndex = 2 in
+multiclass RVMSm<string opcStr, bits<8> opc, RegisterClass RCM> {
+  defm m : RVMSbm<opcStr, " $vy", opc, (ins RCM:$vy)>;
+}
+
+// Section 8.10.1 - VADD (Vector Add)
+let cx = 0, cx2 = 0 in
+defm VADDUL : RVm<"vaddu.l", 0xc8, V64, I64, VM>;
+let cx = 0, cx2 = 1 in {
+  defm PVADDULO : RVm<"pvaddu.lo", 0xc8, V64, I32, VM>;
+  let isCodeGenOnly = 1 in
+  defm VADDUW : RVm<"vaddu.w", 0xc8, V64, I32, VM>;
+}
+let cx = 1, cx2 = 0 in
+defm PVADDUUP : RVm<"pvaddu.up", 0xc8, V64, I64, VM>;
+let cx = 1, cx2 = 1 in
+defm PVADDU : RVm<"pvaddu", 0xc8, V64, I64, VM512>;
+def : MnemonicAlias<"vaddu.w", "pvaddu.lo">;
+
+// Section 8.10.2 - VADS (Vector Add Single)
+let cx = 0, cx2 = 0 in
+defm VADDSWSX : RVm<"vadds.w.sx", 0xca, V64, I32, VM>;
+let cx = 0, cx2 = 1 in {
+  defm PVADDSLO : RVm<"pvadds.lo", 0xca, V64, I32, VM>;
+  let isCodeGenOnly = 1 in
+  defm VADDSWZX : RVm<"vadds.w.zx", 0xca, V64, I32, VM>;
+}
+let cx = 1, cx2 = 0 in
+defm PVADDSUP : RVm<"pvadds.up", 0xca, V64, I64, VM>;
+let cx = 1, cx2 = 1 in
+defm PVADDS : RVm<"pvadds", 0xca, V64, I64, VM512>;
+def : MnemonicAlias<"pvadds.lo.sx", "vadds.w.sx">;
+def : MnemonicAlias<"vadds.w.zx", "pvadds.lo">;
+def : MnemonicAlias<"vadds.w", "pvadds.lo">;
+def : MnemonicAlias<"pvadds.lo.zx", "pvadds.lo">;
+
+// Section 8.10.3 - VADX (Vector Add)
+defm VADDSL : RVm<"vadds.l", 0x8b, V64, I64, VM>;
+
+// Section 8.10.4 - VSUB (Vector Subtract)
+let cx = 0, cx2 = 0 in
+defm VSUBUL : RVm<"vsubu.l", 0xd8, V64, I64, VM>;
+let cx = 0, cx2 = 1 in {
+  defm PVSUBULO : RVm<"pvsubu.lo", 0xd8, V64, I32, VM>;
+  let isCodeGenOnly = 1 in
+  defm VSUBUW : RVm<"vsubu.w", 0xd8, V64, I32, VM>;
+}
+let cx = 1, cx2 = 0 in
+defm PVSUBUUP : RVm<"pvsubu.up", 0xd8, V64, I64, VM>;
+let cx = 1, cx2 = 1 in
+defm PVSUBU : RVm<"pvsubu", 0xd8, V64, I64, VM512>;
+def : MnemonicAlias<"vsubu.w", "pvsubu.lo">;
+
+// Section 8.10.5 - VSBS (Vector Subtract Single)
+let cx = 0, cx2 = 0 in
+defm VSUBSWSX : RVm<"vsubs.w.sx", 0xda, V64, I32, VM>;
+let cx = 0, cx2 = 1 in {
+  defm PVSUBSLO : RVm<"pvsubs.lo", 0xda, V64, I32, VM>;
+  let isCodeGenOnly = 1 in
+  defm VSUBSWZX : RVm<"vsubs.w.zx", 0xda, V64, I32, VM>;
+}
+let cx = 1, cx2 = 0 in
+defm PVSUBSUP : RVm<"pvsubs.up", 0xda, V64, I64, VM>;
+let cx = 1, cx2 = 1 in
+defm PVSUBS : RVm<"pvsubs", 0xda, V64, I64, VM512>;
+def : MnemonicAlias<"pvsubs.lo.sx", "vsubs.w.sx">;
+def : MnemonicAlias<"vsubs.w.zx", "pvsubs.lo">;
+def : MnemonicAlias<"vsubs.w", "pvsubs.lo">;
+def : MnemonicAlias<"pvsubs.lo.zx", "pvsubs.lo">;
+
+// Section 8.10.6 - VSBX (Vector Subtract)
+defm VSUBSL : RVm<"vsubs.l", 0x9b, V64, I64, VM>;
+
+// Section 8.10.7 - VMPY (Vector Multiply)
+let cx2 = 0 in
+defm VMULUL : RVm<"vmulu.l", 0xc9, V64, I64, VM>;
+let cx2 = 1 in
+defm VMULUW : RVm<"vmulu.w", 0xc9, V64, I32, VM>;
+
+// Section 8.10.8 - VMPS (Vector Multiply Single)
+let cx2 = 0 in
+defm VMULSWSX : RVm<"vmuls.w.sx", 0xcb, V64, I32, VM>;
+let cx2 = 1 in
+defm VMULSWZX : RVm<"vmuls.w.zx", 0xcb, V64, I32, VM>;
+def : MnemonicAlias<"vmuls.w", "vmuls.w.zx">;
+
+// Section 8.10.9 - VMPX (Vector Multiply)
+defm VMULSL : RVm<"vmuls.l", 0xdb, V64, I64, VM>;
+
+// Section 8.10.10 - VMPD (Vector Multiply)
+defm VMULSLW : RVm<"vmuls.l.w", 0xd9, V64, I32, VM>;
+
+// Section 8.10.11 - VDIV (Vector Divide)
+let cx2 = 0 in
+defm VDIVUL : RVDIVm<"vdivu.l", 0xe9, V64, I64, VM>;
+let cx2 = 1 in
+defm VDIVUW : RVDIVm<"vdivu.w", 0xe9, V64, I32, VM>;
+
+// Section 8.10.12 - VDVS (Vector Divide Single)
+let cx2 = 0 in
+defm VDIVSWSX : RVDIVm<"vdivs.w.sx", 0xeb, V64, I32, VM>;
+let cx2 = 1 in
+defm VDIVSWZX : RVDIVm<"vdivs.w.zx", 0xeb, V64, I32, VM>;
+def : MnemonicAlias<"vdivs.w", "vdivs.w.zx">;
+
+// Section 8.10.13 - VDVX (Vector Divide)
+defm VDIVSL : RVDIVm<"vdivs.l", 0xfb, V64, I64, VM>;
+
+// Section 8.10.14 - VCMP (Vector Compare)
+let cx = 0, cx2 = 0 in
+defm VCMPUL : RVm<"vcmpu.l", 0xb9, V64, I64, VM>;
+let cx = 0, cx2 = 1 in {
+  defm PVCMPULO : RVm<"pvcmpu.lo", 0xb9, V64, I32, VM>;
+  let isCodeGenOnly = 1 in
+  defm VCMPUW : RVm<"vcmpu.w", 0xb9, V64, I32, VM>;
+}
+let cx = 1, cx2 = 0 in
+defm PVCMPUUP : RVm<"pvcmpu.up", 0xb9, V64, I64, VM>;
+let cx = 1, cx2 = 1 in
+defm PVCMPU : RVm<"pvcmpu", 0xb9, V64, I64, VM512>;
+def : MnemonicAlias<"vcmpu.w", "pvcmpu.lo">;
+
+// Section 8.10.15 - VCPS (Vector Compare Single)
+let cx = 0, cx2 = 0 in
+defm VCMPSWSX : RVm<"vcmps.w.sx", 0xfa, V64, I32, VM>;
+let cx = 0, cx2 = 1 in {
+  defm PVCMPSLO : RVm<"pvcmps.lo", 0xfa, V64, I32, VM>;
+  let isCodeGenOnly = 1 in
+  defm VCMPSWZX : RVm<"vcmps.w.zx", 0xfa, V64, I32, VM>;
+}
+let cx = 1, cx2 = 0 in
+defm PVCMPSUP : RVm<"pvcmps.up", 0xfa, V64, I64, VM>;
+let cx = 1, cx2 = 1 in
+defm PVCMPS : RVm<"pvcmps", 0xfa, V64, I64, VM512>;
+def : MnemonicAlias<"pvcmps.lo.sx", "vcmps.w.sx">;
+def : MnemonicAlias<"vcmps.w.zx", "pvcmps.lo">;
+def : MnemonicAlias<"vcmps.w", "pvcmps.lo">;
+def : MnemonicAlias<"pvcmps.lo.zx", "pvcmps.lo">;
+
+// Section 8.10.16 - VCPX (Vector Compare)
+defm VCMPSL : RVm<"vcmps.l", 0xba, V64, I64, VM>;
+
+// Section 8.10.17 - VCMS (Vector Compare and Select Maximum/Minimum Single)
+let cx = 0, cx2 = 0 in
+defm VMAXSWSX : RVm<"vmaxs.w.sx", 0x8a, V64, I32, VM>;
+let cx = 0, cx2 = 1 in {
+  defm PVMAXSLO : RVm<"pvmaxs.lo", 0x8a, V64, I32, VM>;
+  let isCodeGenOnly = 1 in
+  defm VMAXSWZX : RVm<"vmaxs.w.zx", 0x8a, V64, I32, VM>;
+}
+let cx = 1, cx2 = 0 in
+defm PVMAXSUP : RVm<"pvmaxs.up", 0x8a, V64, I64, VM>;
+let cx = 1, cx2 = 1 in
+defm PVMAXS : RVm<"pvmaxs", 0x8a, V64, I64, VM512>;
+let cs2 = 1 in {
+  let cx = 0, cx2 = 0 in
+  defm VMINSWSX : RVm<"vmins.w.sx", 0x8a, V64, I32, VM>;
+  let cx = 0, cx2 = 1 in {
+    defm PVMINSLO : RVm<"pvmins.lo", 0x8a, V64, I32, VM>;
+    let isCodeGenOnly = 1 in
+    defm VMINSWZX : RVm<"vmins.w.zx", 0x8a, V64, I32, VM>;
+  }
+  let cx = 1, cx2 = 0 in
+  defm PVMINSUP : RVm<"pvmins.up", 0x8a, V64, I64, VM>;
+  let cx = 1, cx2 = 1 in
+  defm PVMINS : RVm<"pvmins", 0x8a, V64, I64, VM512>;
+}
+def : MnemonicAlias<"pvmaxs.lo.sx", "vmaxs.w.sx">;
+def : MnemonicAlias<"vmaxs.w.zx", "pvmaxs.lo">;
+def : MnemonicAlias<"vmaxs.w", "pvmaxs.lo">;
+def : MnemonicAlias<"pvmaxs.lo.zx", "pvmaxs.lo">;
+def : MnemonicAlias<"pvmins.lo.sx", "vmins.w.sx">;
+def : MnemonicAlias<"vmins.w.zx", "pvmins.lo">;
+def : MnemonicAlias<"vmins.w", "pvmins.lo">;
+def : MnemonicAlias<"pvmins.lo.zx", "pvmins.lo">;
+
+// Section 8.10.18 - VCMX (Vector Compare and Select Maximum/Minimum)
+defm VMAXSL : RVm<"vmaxs.l", 0x9a, V64, I64, VM>;
+let cs2 = 1 in
+defm VMINSL : RVm<"vmins.l", 0x9a, V64, I64, VM>;
+
+//-----------------------------------------------------------------------------
+// Section 8.11 - Vector Logical Operation Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.11.1 - VAND (Vector And)
+let cx = 0, cx2 = 0 in defm VAND : RVLm<"vand", 0xc4, I64, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVANDLO : RVLm<"pvand.lo", 0xc4, I32, V64, VM>;
+let cx = 1, cx2 = 0 in defm PVANDUP : RVLm<"pvand.up", 0xc4, F32, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVAND : RVLm<"pvand", 0xc4, I64, V64, VM512>;
+
+// Section 8.11.2 - VOR (Vector Or)
+let cx = 0, cx2 = 0 in defm VOR : RVLm<"vor", 0xc5, I64, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVORLO : RVLm<"pvor.lo", 0xc5, I32, V64, VM>;
+let cx = 1, cx2 = 0 in defm PVORUP : RVLm<"pvor.up", 0xc5, F32, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVOR : RVLm<"pvor", 0xc5, I64, V64, VM512>;
+
+// Section 8.11.3 - VXOR (Vector Exclusive Or)
+let cx = 0, cx2 = 0 in defm VXOR : RVLm<"vxor", 0xc6, I64, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVXORLO : RVLm<"pvxor.lo", 0xc6, I32, V64, VM>;
+let cx = 1, cx2 = 0 in defm PVXORUP : RVLm<"pvxor.up", 0xc6, F32, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVXOR : RVLm<"pvxor", 0xc6, I64, V64, VM512>;
+
+// Section 8.11.4 - VEQV (Vector Equivalence)
+let cx = 0, cx2 = 0 in defm VEQV : RVLm<"veqv", 0xc7, I64, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVEQVLO : RVLm<"pveqv.lo", 0xc7, I32, V64, VM>;
+let cx = 1, cx2 = 0 in defm PVEQVUP : RVLm<"pveqv.up", 0xc7, F32, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVEQV : RVLm<"pveqv", 0xc7, I64, V64, VM512>;
+
+// Section 8.11.5 - VLDZ (Vector Leading Zero Count)
+let cx = 0, cx2 = 0 in defm VLDZ : RV1m<"vldz", 0xe7, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVLDZLO : RV1m<"pvldz.lo", 0xe7, V64, VM>;
+let cx = 1, cx2 = 0 in defm PVLDZUP : RV1m<"pvldz.up", 0xe7, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVLDZ : RV1m<"pvldz", 0xe7, V64, VM512>;
+
+// Section 8.11.6 - VPCNT (Vector Population Count)
+let cx = 0, cx2 = 0 in defm VPCNT : RV1m<"vpcnt", 0xac, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVPCNTLO : RV1m<"pvpcnt.lo", 0xac, V64, VM>;
+let cx = 1, cx2 = 0 in defm PVPCNTUP : RV1m<"pvpcnt.up", 0xac, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVPCNT : RV1m<"pvpcnt", 0xac, V64, VM512>;
+
+// Section 8.11.7 - VBRV (Vector Bit Reverse)
+let cx = 0, cx2 = 0 in defm VBRV : RV1m<"vbrv", 0xf7, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVBRVLO : RV1m<"pvbrv.lo", 0xf7, V64, VM>;
+let cx = 1, cx2 = 0 in defm PVBRVUP : RV1m<"pvbrv.up", 0xf7, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVBRV : RV1m<"pvbrv", 0xf7, V64, VM512>;
+
+// Section 8.11.8 - VSEQ (Vector Sequential Number)
+let cx = 0, cx2 = 0 in defm VSEQ : RV0m<"vseq", 0x99, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVSEQLO : RV0m<"pvseq.lo", 0x99, V64, VM>;
+let cx = 1, cx2 = 0 in defm PVSEQUP : RV0m<"pvseq.up", 0x99, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVSEQ : RV0m<"pvseq", 0x99, V64, VM512>;
+
+//-----------------------------------------------------------------------------
+// Section 8.12 - Vector Shift Operation Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.12.1 - VSLL (Vector Shift Left Logical)
+let cx = 0, cx2 = 0 in defm VSLL : RVSm<"vsll", 0xe5, I64, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVSLLLO : RVSm<"pvsll.lo", 0xe5, I32, V64, VM>;
+let cx = 1, cx2 = 0 in defm PVSLLUP : RVSm<"pvsll.up", 0xe5, F32, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVSLL : RVSm<"pvsll", 0xe5, I64, V64, VM512>;
+
+// Section 8.12.2 - VSLD (Vector Shift Left Double)
+defm VSLD : RVSDm<"vsld", 0xe4, V64, VM>;
+
+// Section 8.12.3 - VSRL (Vector Shift Right Logical)
+let cx = 0, cx2 = 0 in defm VSRL : RVSm<"vsrl", 0xf5, I64, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVSRLLO : RVSm<"pvsrl.lo", 0xf5, I32, V64, VM>;
+let cx = 1, cx2 = 0 in defm PVSRLUP : RVSm<"pvsrl.up", 0xf5, F32, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVSRL : RVSm<"pvsrl", 0xf5, I64, V64, VM512>;
+
+// Section 8.12.4 - VSRD (Vector Shift Right Double)
+defm VSRD : RVSDm<"vsrd", 0xf4, V64, VM>;
+
+// Section 8.12.5 - VSLA (Vector Shift Left Arithmetic)
+let cx = 0, cx2 = 0 in defm VSLAWSX : RVSm<"vsla.w.sx", 0xe6, I32, V64, VM>;
+let cx = 0, cx2 = 1 in {
+  defm PVSLALO : RVSm<"pvsla.lo", 0xe6, I32, V64, VM>;
+  let isCodeGenOnly = 1 in defm VSLAWZX : RVSm<"vsla.w.zx", 0xe6, I32, V64, VM>;
+}
+let cx = 1, cx2 = 0 in defm PVSLAUP : RVSm<"pvsla.up", 0xe6, F32, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVSLA : RVSm<"pvsla", 0xe6, I64, V64, VM512>;
+def : MnemonicAlias<"pvsla.lo.sx", "vsla.w.sx">;
+def : MnemonicAlias<"vsla.w.zx", "pvsla.lo">;
+def : MnemonicAlias<"vsla.w", "pvsla.lo">;
+def : MnemonicAlias<"pvsla.lo.zx", "pvsla.lo">;
+
+// Section 8.12.6 - VSLAX (Vector Shift Left Arithmetic)
+defm VSLAL : RVSm<"vsla.l", 0xd4, I64, V64, VM>;
+
+// Section 8.12.7 - VSRA (Vector Shift Right Arithmetic)
+let cx = 0, cx2 = 0 in defm VSRAWSX : RVSm<"vsra.w.sx", 0xf6, I32, V64, VM>;
+let cx = 0, cx2 = 1 in {
+  defm PVSRALO : RVSm<"pvsra.lo", 0xf6, I32, V64, VM>;
+  let isCodeGenOnly = 1 in defm VSRAWZX : RVSm<"vsra.w.zx", 0xf6, I32, V64, VM>;
+}
+let cx = 1, cx2 = 0 in defm PVSRAUP : RVSm<"pvsra.up", 0xf6, F32, V64, VM>;
+let cx = 1, cx2 = 1 in defm PVSRA : RVSm<"pvsra", 0xf6, I64, V64, VM512>;
+def : MnemonicAlias<"pvsra.lo.sx", "vsra.w.sx">;
+def : MnemonicAlias<"vsra.w.zx", "pvsra.lo">;
+def : MnemonicAlias<"vsra.w", "pvsra.lo">;
+def : MnemonicAlias<"pvsra.lo.zx", "pvsra.lo">;
+
+// Section 8.12.8 - VSRAX (Vector Shift Right Arithmetic)
+defm VSRAL : RVSm<"vsra.l", 0xd5, I64, V64, VM>;
+
+// Section 8.12.9 - VSFA (Vector Shift Left and Add)
+defm VSFA : RVSAm<"vsfa", 0xd7, V64, VM>;
+
+//-----------------------------------------------------------------------------
+// Section 8.13 - Vector Floating-Point Arithmetic Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.13.1 - VFAD (Vector Floating Add)
+let cx = 0, cx2 = 0 in
+defm VFADDD : RVm<"vfadd.d", 0xcc, V64, I64, VM, simm7fp>;
+let cx = 0, cx2 = 1 in
+defm PVFADDLO : RVm<"pvfadd.lo", 0xcc, V64, I64, VM, simm7fp>;
+let cx = 1, cx2 = 0 in {
+  defm PVFADDUP : RVm<"pvfadd.up", 0xcc, V64, F32, VM, simm7fp>;
+  let isCodeGenOnly = 1 in
+  defm VFADDS : RVm<"vfadd.s", 0xcc, V64, F32, VM, simm7fp>;
+}
+let cx = 1, cx2 = 1 in
+defm PVFADD : RVm<"pvfadd", 0xcc, V64, I64, VM512, simm7fp>;
+def : MnemonicAlias<"vfadd.s", "pvfadd.up">;
+
+// Section 8.13.2 - VFSB (Vector Floating Subtract)
+let cx = 0, cx2 = 0 in
+defm VFSUBD : RVm<"vfsub.d", 0xdc, V64, I64, VM, simm7fp>;
+let cx = 0, cx2 = 1 in
+defm PVFSUBLO : RVm<"pvfsub.lo", 0xdc, V64, I64, VM, simm7fp>;
+let cx = 1, cx2 = 0 in {
+  defm PVFSUBUP : RVm<"pvfsub.up", 0xdc, V64, F32, VM, simm7fp>;
+  let isCodeGenOnly = 1 in
+  defm VFSUBS : RVm<"vfsub.s", 0xdc, V64, F32, VM, simm7fp>;
+}
+let cx = 1, cx2 = 1 in
+defm PVFSUB : RVm<"pvfsub", 0xdc, V64, I64, VM512, simm7fp>;
+def : MnemonicAlias<"vfsub.s", "pvfsub.up">;
+
+// Section 8.13.3 - VFMP (Vector Floating Multiply)
+let cx = 0, cx2 = 0 in
+defm VFMULD : RVm<"vfmul.d", 0xcd, V64, I64, VM, simm7fp>;
+let cx = 0, cx2 = 1 in
+defm PVFMULLO : RVm<"pvfmul.lo", 0xcd, V64, I64, VM, simm7fp>;
+let cx = 1, cx2 = 0 in {
+  defm PVFMULUP : RVm<"pvfmul.up", 0xcd, V64, F32, VM, simm7fp>;
+  let isCodeGenOnly = 1 in
+  defm VFMULS : RVm<"vfmul.s", 0xcd, V64, F32, VM, simm7fp>;
+}
+let cx = 1, cx2 = 1 in
+defm PVFMUL : RVm<"pvfmul", 0xcd, V64, I64, VM512, simm7fp>;
+def : MnemonicAlias<"vfmul.s", "pvfmul.up">;
+
+// Section 8.13.4 - VFDV (Vector Floating Divide)
+defm VFDIVD : RVDIVm<"vfdiv.d", 0xdd, V64, I64, VM, simm7fp>;
+let cx = 1 in
+defm VFDIVS : RVDIVm<"vfdiv.s", 0xdd, V64, F32, VM, simm7fp>;
+
+// Section 8.13.5 - VFSQRT (Vector Floating Square Root)
+defm VFSQRTD : RVF1m<"vfsqrt.d", 0xed, V64, VM>;
+let cx = 1 in
+defm VFSQRTS : RVF1m<"vfsqrt.s", 0xed, V64, VM>;
+
+// Section 8.13.6 - VFCP (Vector Floating Compare)
+let cx = 0, cx2 = 0 in
+defm VFCMPD : RVm<"vfcmp.d", 0xfc, V64, I64, VM, simm7fp>;
+let cx = 0, cx2 = 1 in
+defm PVFCMPLO : RVm<"pvfcmp.lo", 0xfc, V64, I64, VM, simm7fp>;
+let cx = 1, cx2 = 0 in {
+  defm PVFCMPUP : RVm<"pvfcmp.up", 0xfc, V64, F32, VM, simm7fp>;
+  let isCodeGenOnly = 1 in
+  defm VFCMPS : RVm<"vfcmp.s", 0xfc, V64, F32, VM, simm7fp>;
+}
+let cx = 1, cx2 = 1 in
+defm PVFCMP : RVm<"pvfcmp", 0xfc, V64, I64, VM512, simm7fp>;
+def : MnemonicAlias<"vfcmp.s", "pvfcmp.up">;
+
+// Section 8.13.7 - VFCM (Vector Floating Compare and Select Maximum/Minimum)
+let cx = 0, cx2 = 0 in
+defm VFMAXD : RVm<"vfmax.d", 0xbd, V64, I64, VM, simm7fp>;
+let cx = 0, cx2 = 1 in
+defm PVFMAXLO : RVm<"pvfmax.lo", 0xbd, V64, I64, VM, simm7fp>;
+let cx = 1, cx2 = 0 in {
+  defm PVFMAXUP : RVm<"pvfmax.up", 0xbd, V64, F32, VM, simm7fp>;
+  let isCodeGenOnly = 1 in
+  defm VFMAXS : RVm<"vfmax.s", 0xbd, V64, F32, VM, simm7fp>;
+}
+let cx = 1, cx2 = 1 in
+defm PVFMAX : RVm<"pvfmax", 0xbd, V64, I64, VM512, simm7fp>;
+let cs2 = 1 in {
+  let cx = 0, cx2 = 0 in
+  defm VFMIND : RVm<"vfmin.d", 0xbd, V64, I64, VM, simm7fp>;
+  let cx = 0, cx2 = 1 in
+  defm PVFMINLO : RVm<"pvfmin.lo", 0xbd, V64, I64, VM, simm7fp>;
+  let cx = 1, cx2 = 0 in {
+    defm PVFMINUP : RVm<"pvfmin.up", 0xbd, V64, F32, VM, simm7fp>;
+    let isCodeGenOnly = 1 in
+    defm VFMINS : RVm<"vfmin.s", 0xbd, V64, F32, VM, simm7fp>;
+  }
+  let cx = 1, cx2 = 1 in
+  defm PVFMIN : RVm<"pvfmin", 0xbd, V64, I64, VM512, simm7fp>;
+}
+def : MnemonicAlias<"vfmax.s", "pvfmax.up">;
+def : MnemonicAlias<"vfmin.s", "pvfmin.up">;
+
+// Section 8.13.8 - VFMAD (Vector Floating Fused Multiply Add)
+let cx = 0, cx2 = 0 in
+defm VFMADD : RVMm<"vfmad.d", 0xe2, V64, I64, VM, simm7fp>;
+let cx = 0, cx2 = 1 in
+defm PVFMADLO : RVMm<"pvfmad.lo", 0xe2, V64, I64, VM, simm7fp>;
+let cx = 1, cx2 = 0 in {
+  defm PVFMADUP : RVMm<"pvfmad.up", 0xe2, V64, F32, VM, simm7fp>;
+  let isCodeGenOnly = 1 in
+  defm VFMADS : RVMm<"vfmad.s", 0xe2, V64, F32, VM, simm7fp>;
+}
+let cx = 1, cx2 = 1 in
+defm PVFMAD : RVMm<"pvfmad", 0xe2, V64, I64, VM512, simm7fp>;
+def : MnemonicAlias<"vfmad.s", "pvfmad.up">;
+
+// Section 8.13.9 - VFMSB (Vector Floating Fused Multiply Subtract)
+let cx = 0, cx2 = 0 in
+defm VFMSBD : RVMm<"vfmsb.d", 0xf2, V64, I64, VM, simm7fp>;
+let cx = 0, cx2 = 1 in
+defm PVFMSBLO : RVMm<"pvfmsb.lo", 0xf2, V64, I64, VM, simm7fp>;
+let cx = 1, cx2 = 0 in {
+  defm PVFMSBUP : RVMm<"pvfmsb.up", 0xf2, V64, F32, VM, simm7fp>;
+  let isCodeGenOnly = 1 in
+  defm VFMSBS : RVMm<"vfmsb.s", 0xf2, V64, F32, VM, simm7fp>;
+}
+let cx = 1, cx2 = 1 in
+defm PVFMSB : RVMm<"pvfmsb", 0xf2, V64, I64, VM512, simm7fp>;
+def : MnemonicAlias<"vfmsb.s", "pvfmsb.up">;
+
+// Section 8.13.10 - VFNMAD (Vector Floating Fused Negative Multiply Add)
+let cx = 0, cx2 = 0 in
+defm VFNMADD : RVMm<"vfnmad.d", 0xe3, V64, I64, VM, simm7fp>;
+let cx = 0, cx2 = 1 in
+defm PVFNMADLO : RVMm<"pvfnmad.lo", 0xe3, V64, I64, VM, simm7fp>;
+let cx = 1, cx2 = 0 in {
+  defm PVFNMADUP : RVMm<"pvfnmad.up", 0xe3, V64, F32, VM, simm7fp>;
+  let isCodeGenOnly = 1 in
+  defm VFNMADS : RVMm<"vfnmad.s", 0xe3, V64, F32, VM, simm7fp>;
+}
+let cx = 1, cx2 = 1 in
+defm PVFNMAD : RVMm<"pvfnmad", 0xe3, V64, I64, VM512, simm7fp>;
+def : MnemonicAlias<"vfnmad.s", "pvfnmad.up">;
+
+// Section 8.13.11 - VFNMSB (Vector Floating Fused Negative Multiply Subtract)
+let cx = 0, cx2 = 0 in
+defm VFNMSBD : RVMm<"vfnmsb.d", 0xf3, V64, I64, VM, simm7fp>;
+let cx = 0, cx2 = 1 in
+defm PVFNMSBLO : RVMm<"pvfnmsb.lo", 0xf3, V64, I64, VM, simm7fp>;
+let cx = 1, cx2 = 0 in {
+  defm PVFNMSBUP : RVMm<"pvfnmsb.up", 0xf3, V64, F32, VM, simm7fp>;
+  let isCodeGenOnly = 1 in
+  defm VFNMSBS : RVMm<"vfnmsb.s", 0xf3, V64, F32, VM, simm7fp>;
+}
+let cx = 1, cx2 = 1 in
+defm PVFNMSB : RVMm<"pvfnmsb", 0xf3, V64, I64, VM512, simm7fp>;
+def : MnemonicAlias<"vfnmsb.s", "pvfnmsb.up">;
+
+// Section 8.13.12 - VRCP (Vector Floating Reciprocal)
+let cx = 0, cx2 = 0 in defm VRCPD : RVF1m<"vrcp.d", 0xe1, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVRCPLO : RVF1m<"pvrcp.lo", 0xe1, V64, VM>;
+let cx = 1, cx2 = 0 in {
+  defm PVRCPUP : RVF1m<"pvrcp.up", 0xe1, V64, VM>;
+  let isCodeGenOnly = 1 in defm VRCPS : RVF1m<"vrcp.s", 0xe1, V64, VM>;
+}
+let cx = 1, cx2 = 1 in defm PVRCP : RVF1m<"pvrcp", 0xe1, V64, VM512>;
+def : MnemonicAlias<"vrcp.s", "pvrcp.up">;
+
+// Section 8.13.13 - VRSQRT (Vector Floating Reciprocal Square Root)
+let cx = 0, cx2 = 0 in defm VRSQRTD : RVF1m<"vrsqrt.d", 0xf1, V64, VM>;
+let cx = 0, cx2 = 1 in defm PVRSQRTLO : RVF1m<"pvrsqrt.lo", 0xf1, V64, VM>;
+let cx = 1, cx2 = 0 in {
+  defm PVRSQRTUP : RVF1m<"pvrsqrt.up", 0xf1, V64, VM>;
+  let isCodeGenOnly = 1 in
+  defm VRSQRTS : RVF1m<"vrsqrt.s", 0xf1, V64, VM>;
+}
+let cx = 1, cx2 = 1 in
+defm PVRSQRT : RVF1m<"pvrsqrt", 0xf1, V64, VM512>;
+let cs2 = 1 in {
+    let cx = 0, cx2 = 0 in
+    defm VRSQRTDNEX : RVF1m<"vrsqrt.d.nex", 0xf1, V64, VM>;
+    let cx = 0, cx2 = 1 in
+    defm PVRSQRTLONEX : RVF1m<"pvrsqrt.lo.nex", 0xf1, V64, VM>;
+    let cx = 1, cx2 = 0 in {
+      defm PVRSQRTUPNEX : RVF1m<"pvrsqrt.up.nex", 0xf1, V64, VM>;
+      let isCodeGenOnly = 1 in
+      defm VRSQRTSNEX : RVF1m<"vrsqrt.s.nex", 0xf1, V64, VM>;
+    }
+    let cx = 1, cx2 = 1 in
+    defm PVRSQRTNEX : RVF1m<"pvrsqrt.nex", 0xf1, V64, VM512>;
+}
+def : MnemonicAlias<"vrsqrt.s", "pvrsqrt.up">;
+def : MnemonicAlias<"vrsqrt.s.nex", "pvrsqrt.up.nex">;
+
+// Section 8.13.14 - VFIX (Vector Convert to Fixed Pointer)
+let cx = 0, cx2 = 0, cs2 = 0 in
+defm VCVTWDSX : RVFIXm<"vcvt.w.d.sx", 0xe8, V64, VM>;
+let cx = 0, cx2 = 1, cs2 = 0 in
+defm VCVTWDZX : RVFIXm<"vcvt.w.d.zx", 0xe8, V64, VM>;
+let cx = 1, cx2 = 0, cs2 = 0 in
+defm VCVTWSSX : RVFIXm<"vcvt.w.s.sx", 0xe8, V64, VM>;
+let cx = 1, cx2 = 1, cs2 = 0 in
+defm VCVTWSZX : RVFIXm<"vcvt.w.s.zx", 0xe8, V64, VM>;
+let cx = 0, cx2 = 1, cs2 = 1 in
+defm PVCVTWSLO : RVFIXm<"pvcvt.w.s.lo", 0xe8, V64, VM>;
+let cx = 1, cx2 = 0, cs2 = 1 in
+defm PVCVTWSUP : RVFIXm<"pvcvt.w.s.up", 0xe8, V64, VM>;
+let cx = 1, cx2 = 1, cs2 = 1 in
+defm PVCVTWS : RVFIXm<"pvcvt.w.s", 0xe8, V64, VM512>;
+
+// Section 8.13.15 - VFIXX (Vector Convert to Fixed Pointer)
+defm VCVTLD : RVFIXm<"vcvt.l.d", 0xa8, V64, VM>;
+
+// Section 8.13.16 - VFLT (Vector Convert to Floating Pointer)
+let cx = 0, cx2 = 0, cs2 = 0 in
+defm VCVTDW : RVF1m<"vcvt.d.w", 0xf8, V64, VM>;
+let cx = 1, cx2 = 0, cs2 = 0 in
+defm VCVTSW : RVF1m<"vcvt.s.w", 0xf8, V64, VM>;
+let cx = 0, cx2 = 1, cs2 = 1 in
+defm PVCVTSWLO : RVF1m<"pvcvt.s.w.lo", 0xf8, V64, VM>;
+let cx = 1, cx2 = 0, cs2 = 1 in
+defm PVCVTSWUP : RVF1m<"pvcvt.s.w.up", 0xf8, V64, VM>;
+let cx = 1, cx2 = 1, cs2 = 1 in
+defm PVCVTSW : RVF1m<"pvcvt.s.w", 0xf8, V64, VM512>;
+
+// Section 8.13.17 - VFLTX (Vector Convert to Floating Pointer)
+defm VCVTDL : RVF1m<"vcvt.d.l", 0xb8, V64, VM>;
+
+// Section 8.13.18 - VCVS (Vector Convert to Single-format)
+defm VCVTSD : RVF1m<"vcvt.s.d", 0x9f, V64, VM>;
+
+// Section 8.13.19 - VCVD (Vector Convert to Double-format)
+defm VCVTDS : RVF1m<"vcvt.d.s", 0x8f, V64, VM>;
+
+//-----------------------------------------------------------------------------
+// Section 8.14 - Vector Reduction Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.14.1 - VSUMS (Vector Sum Single)
+defm VSUMWSX : RVF1m<"vsum.w.sx", 0xea, V64, VM>;
+let cx2 = 1 in defm VSUMWZX : RVF1m<"vsum.w.zx", 0xea, V64, VM>;
+
+// Section 8.14.2 - VSUMX (Vector Sum)
+defm VSUML : RVF1m<"vsum.l", 0xaa, V64, VM>;
+
+// Section 8.14.3 - VFSUM (Vector Floating Sum)
+defm VFSUMD : RVF1m<"vfsum.d", 0xec, V64, VM>;
+let cx = 1 in defm VFSUMS : RVF1m<"vfsum.s", 0xec, V64, VM>;
+
+// Section 8.14.4 - VMAXS (Vector Maximum/Minimum Single)
+let cx2 = 0 in defm VRMAXSWFSTSX : RVF1m<"vrmaxs.w.fst.sx", 0xbb, V64, VM>;
+let cx2 = 1 in defm VRMAXSWFSTZX : RVF1m<"vrmaxs.w.fst.zx", 0xbb, V64, VM>;
+let cs = 1 in {
+  let cx2 = 0 in
+  defm VRMAXSWLSTSX : RVF1m<"vrmaxs.w.lst.sx", 0xbb, V64, VM>;
+  let cx2 = 1 in
+  defm VRMAXSWLSTZX : RVF1m<"vrmaxs.w.lst.zx", 0xbb, V64, VM>;
+}
+let cs2 = 1 in {
+  let cx2 = 0 in
+  defm VRMINSWFSTSX : RVF1m<"vrmins.w.fst.sx", 0xbb, V64, VM>;
+  let cx2 = 1 in
+  defm VRMINSWFSTZX : RVF1m<"vrmins.w.fst.zx", 0xbb, V64, VM>;
+  let cs = 1 in {
+    let cx2 = 0 in
+    defm VRMINSWLSTSX : RVF1m<"vrmins.w.lst.sx", 0xbb, V64, VM>;
+    let cx2 = 1 in
+    defm VRMINSWLSTZX : RVF1m<"vrmins.w.lst.zx", 0xbb, V64, VM>;
+  }
+}
+
+// Section 8.14.5 - VMAXX (Vector Maximum/Minimum)
+let cs = 0 in defm VRMAXSLFST : RVF1m<"vrmaxs.l.fst", 0xab, V64, VM>;
+let cs = 1 in defm VRMAXSLLST : RVF1m<"vrmaxs.l.lst", 0xab, V64, VM>;
+let cs2 = 1 in {
+  let cs = 0 in defm VRMINSLFST : RVF1m<"vrmins.l.fst", 0xab, V64, VM>;
+  let cs = 1 in defm VRMINSLLST : RVF1m<"vrmins.l.lst", 0xab, V64, VM>;
+}
+
+// Section 8.14.6 - VFMAX (Vector Floating Maximum/Minimum)
+let cs = 0 in defm VFRMAXDFST : RVF1m<"vfrmax.d.fst", 0xad, V64, VM>;
+let cs = 1 in defm VFRMAXDLST : RVF1m<"vfrmax.d.lst", 0xad, V64, VM>;
+let cs2 = 1 in {
+  let cs = 0 in defm VFRMINDFST : RVF1m<"vfrmin.d.fst", 0xad, V64, VM>;
+  let cs = 1 in defm VFRMINDLST : RVF1m<"vfrmin.d.lst", 0xad, V64, VM>;
+}
+let cx = 1 in {
+  let cs = 0 in defm VFRMAXSFST : RVF1m<"vfrmax.s.fst", 0xad, V64, VM>;
+  let cs = 1 in defm VFRMAXSLST : RVF1m<"vfrmax.s.lst", 0xad, V64, VM>;
+  let cs2 = 1 in {
+    let cs = 0 in defm VFRMINSFST : RVF1m<"vfrmin.s.fst", 0xad, V64, VM>;
+    let cs = 1 in defm VFRMINSLST : RVF1m<"vfrmin.s.lst", 0xad, V64, VM>;
+  }
+}
+
+// Section 8.14.7 - VRAND (Vector Reduction And)
+defm VRAND : RVF1m<"vrand", 0x88, V64, VM>;
+
+// Section 8.14.8 - VROR (Vector Reduction Or)
+defm VROR : RVF1m<"vror", 0x98, V64, VM>;
+
+// Section 8.14.9 - VRXOR (Vector Reduction Exclusive Or)
+defm VRXOR : RVF1m<"vrxor", 0x89, V64, VM>;
+
+//-----------------------------------------------------------------------------
+// Section 8.15 - Vector Iterative Operation Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.15.1 - VFIA (Vector Floating Iteration Add)
+let cx = 0 in defm VFIAD : RVI2m<"vfia.d", 0xce, V64, I64>;
+let cx = 1 in defm VFIAS : RVI2m<"vfia.s", 0xce, V64, F32>;
+
+// Section 8.15.2 - VFIS (Vector Floating Iteration Subtract)
+let cx = 0 in defm VFISD : RVI2m<"vfis.d", 0xde, V64, I64>;
+let cx = 1 in defm VFISS : RVI2m<"vfis.s", 0xde, V64, F32>;
+
+// Section 8.15.3 - VFIM (Vector Floating Iteration Multiply)
+let cx = 0 in defm VFIMD : RVI2m<"vfim.d", 0xcf, V64, I64>;
+let cx = 1 in defm VFIMS : RVI2m<"vfim.s", 0xcf, V64, F32>;
+
+// Section 8.15.4 - VFIAM (Vector Floating Iteration Add and Multiply)
+let cx = 0 in defm VFIAMD : RVI3m<"vfiam.d", 0xee, V64, I64>;
+let cx = 1 in defm VFIAMS : RVI3m<"vfiam.s", 0xee, V64, F32>;
+
+// Section 8.15.5 - VFISM (Vector Floating Iteration Subtract and Multiply)
+let cx = 0 in defm VFISMD : RVI3m<"vfism.d", 0xfe, V64, I64>;
+let cx = 1 in defm VFISMS : RVI3m<"vfism.s", 0xfe, V64, F32>;
+
+// Section 8.15.6 - VFIMA (Vector Floating Iteration Multiply and Add)
+let cx = 0 in defm VFIMAD : RVI3m<"vfima.d", 0xef, V64, I64>;
+let cx = 1 in defm VFIMAS : RVI3m<"vfima.s", 0xef, V64, F32>;
+
+// Section 8.15.7 - VFIMS (Vector Floating Iteration Multiply and Subtract)
+let cx = 0 in defm VFIMSD : RVI3m<"vfims.d", 0xff, V64, I64>;
+let cx = 1 in defm VFIMSS : RVI3m<"vfims.s", 0xff, V64, F32>;
+
+//-----------------------------------------------------------------------------
+// Section 8.16 - Vector Merger Operation Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.16.1 - VMRG (Vector Merge)
+let cx = 0 in defm VMRG : RVm<"vmrg", 0xd6, V64, I64, VM>;
+// FIXME: vmrg.w should be called as pvmrg, but following assembly manual.
+let cx = 1 in defm VMRGW : RVm<"vmrg.w", 0xd6, V64, I64, VM512>;
+def : MnemonicAlias<"vmrg.l", "vmrg">;
+
+// Section 8.16.2 - VSHF (Vector Shuffle)
+defm VSHF : RVSHFm<"vshf", 0xbc, V64>;
+
+// Section 8.16.3 - VCP (Vector Compress)
+defm VCP : RV1m<"vcp", 0x8d, V64, VM>;
+
+// Section 8.16.4 - VEX (Vector Expand)
+defm VEX : RV1m<"vex", 0x9d, V64, VM>;
+
+//-----------------------------------------------------------------------------
+// Section 8.17 - Vector Mask Operation Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.17.1 - VFMK (Vector Form Mask)
+defm VFMKL : RVMKm<"vfmk.l.", 0xb4, V64, VM>;
+def : MnemonicAlias<"vfmk.l", "vfmk.l.at">;
+
+// Section 8.17.2 - VFMS (Vector Form Mask Single)
+defm VFMKW : RVMKm<"vfmk.w.", 0xb5, V64, VM>;
+let isCodeGenOnly = 1 in defm PVFMKWLO : RVMKm<"vfmk.w.", 0xb5, V64, VM>;
+let cx = 1 in defm PVFMKWUP : RVMKm<"pvfmk.w.up.", 0xb5, V64, VM>;
+def : MnemonicAlias<"vfmk.w", "vfmk.w.at">;
+def : MnemonicAlias<"pvfmk.w.up", "pvfmk.w.up.at">;
+def : MnemonicAlias<"pvfmk.w.lo", "vfmk.w.at">;
+foreach CC = [ "af", "gt", "lt", "ne", "eq", "ge", "le", "at" ] in {
+  def : MnemonicAlias<"pvfmk.w.lo."#CC, "vfmk.w."#CC>;
+}
+
+// Section 8.17.3 - VFMF (Vector Form Mask Floating Point)
+defm VFMKD : RVMKm<"vfmk.d.", 0xb6, V64, VM>;
+let cx2 = 1 in defm PVFMKSLO : RVMKm<"pvfmk.s.lo.", 0xb6, V64, VM>;
+let cx = 1 in {
+  defm PVFMKSUP : RVMKm<"pvfmk.s.up.", 0xb6, V64, VM>;
+  let isCodeGenOnly = 1 in defm VFMKS : RVMKm<"vfmk.s.", 0xb6, V64, VM>;
+}
+def : MnemonicAlias<"vfmk.d", "vfmk.d.at">;
+def : MnemonicAlias<"pvfmk.s.lo", "pvfmk.s.lo.at">;
+def : MnemonicAlias<"pvfmk.s.up", "pvfmk.s.up.at">;
+def : MnemonicAlias<"vfmk.s", "pvfmk.s.up.at">;
+foreach CC = [ "af", "gt", "lt", "ne", "eq", "ge", "le", "at", "num", "nan",
+               "gtnan", "ltnan", "nenan", "eqnan", "genan", "lenan" ] in {
+  def : MnemonicAlias<"vfmk.s."#CC, "pvfmk.s.up."#CC>;
+}
+
+// Section 8.17.4 - ANDM (And VM)
+defm ANDM : RVM2m<"andm", 0x84, VM>;
+
+// Section 8.17.5 - ORM (Or VM)
+defm ORM : RVM2m<"orm", 0x85, VM>;
+
+// Section 8.17.6 - XORM (Exclusive Or VM)
+defm XORM : RVM2m<"xorm", 0x86, VM>;
+
+// Section 8.17.7 - EQVM (Equivalence VM)
+defm EQVM : RVM2m<"eqvm", 0x87, VM>;
+
+// Section 8.17.8 - NNDM (Negate And VM)
+defm NNDM : RVM2m<"nndm", 0x94, VM>;
+
+// Section 8.17.9 - NEGM (Negate VM)
+defm NEGM : RVM1m<"negm", 0x95, VM>;
+
+// Section 8.17.10 - PCVM (Population Count of VM)
+defm PCVM : RVMSm<"pcvm", 0xa4, VM>;
+
+// Section 8.17.11 - LZVM (Leading Zero of VM)
+defm LZVM : RVMSm<"lzvm", 0xa5, VM>;
+
+// Section 8.17.12 - TOVM (Trailing One of VM)
+defm TOVM : RVMSm<"tovm", 0xa6, VM>;
+
+//-----------------------------------------------------------------------------
+// Section 8.18 - Vector Control Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.18.1 - LVL (Load VL)
+let sx = 0, cz = 0, sz = 0, hasSideEffects = 0, Defs = [VL] in {
+  def LVLr : RR<0xbf, (outs), (ins I64:$sy), "lvl $sy">;
+  let cy = 0 in def LVLi : RR<0xbf, (outs), (ins simm7:$sy), "lvl $sy">;
+}
+
+// Section 8.18.2 - SVL (Save VL)
+let cy = 0, sy = 0, cz = 0, sz = 0, hasSideEffects = 0, Uses = [VL] in
+def SVL : RR<0x2f, (outs I64:$sx), (ins), "svl $sx">;
+
+// Section 8.18.3 - SMVL (Save Maximum Vector Length)
+let cy = 0, sy = 0, cz = 0, sz = 0, hasSideEffects = 0 in
+def SMVL : RR<0x2e, (outs I64:$sx), (ins), "smvl $sx">;
+
+// Section 8.18.4 - LVIX (Load Vector Data Index)
+let sx = 0, cz = 0, sz = 0, hasSideEffects = 0, Defs = [VIX] in {
+  def LVIXr : RR<0xaf, (outs), (ins I64:$sy), "lvix $sy">;
+  let cy = 0 in def LVIXi : RR<0xaf, (outs), (ins uimm6:$sy), "lvix $sy">;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEMCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEMCInstLower.cpp
index 9815610510e1..bc5577ce4f97 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEMCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEMCInstLower.cpp
@@ -51,6 +51,11 @@ static MCOperand LowerOperand(const MachineInstr *MI, const MachineOperand &MO,
       break;
     return MCOperand::createReg(MO.getReg());
 
+  case MachineOperand::MO_BlockAddress:
+    return LowerSymbolOperand(
+        MI, MO, AP.GetBlockAddressSymbol(MO.getBlockAddress()), AP);
+  case MachineOperand::MO_ConstantPoolIndex:
+    return LowerSymbolOperand(MI, MO, AP.GetCPISymbol(MO.getIndex()), AP);
   case MachineOperand::MO_ExternalSymbol:
     return LowerSymbolOperand(
         MI, MO, AP.GetExternalSymbolSymbol(MO.getSymbolName()), AP);
@@ -58,7 +63,8 @@ static MCOperand LowerOperand(const MachineInstr *MI, const MachineOperand &MO,
     return LowerSymbolOperand(MI, MO, AP.getSymbol(MO.getGlobal()), AP);
   case MachineOperand::MO_Immediate:
     return MCOperand::createImm(MO.getImm());
-
+  case MachineOperand::MO_JumpTableIndex:
+    return LowerSymbolOperand(MI, MO, AP.GetJTISymbol(MO.getIndex()), AP);
   case MachineOperand::MO_MachineBasicBlock:
     return LowerSymbolOperand(MI, MO, MO.getMBB()->getSymbol(), AP);
 
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.cpp
index 5783a8df69d2..d175ad26c742 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.cpp
@@ -22,6 +22,7 @@
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 
 using namespace llvm;
@@ -35,6 +36,8 @@ VERegisterInfo::VERegisterInfo() : VEGenRegisterInfo(VE::SX10) {}
 const MCPhysReg *
 VERegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   switch (MF->getFunction().getCallingConv()) {
+  case CallingConv::Fast:
+    // Being explicit (same as standard CC).
   default:
     return CSR_SaveList;
   case CallingConv::PreserveAll:
@@ -45,6 +48,8 @@ VERegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
 const uint32_t *VERegisterInfo::getCallPreservedMask(const MachineFunction &MF,
                                                      CallingConv::ID CC) const {
   switch (CC) {
+  case CallingConv::Fast:
+    // Being explicit (same as standard CC).
   default:
     return CSR_RegMask;
   case CallingConv::PreserveAll:
@@ -82,10 +87,22 @@ BitVector VERegisterInfo::getReservedRegs(const MachineFunction &MF) const {
          ++ItAlias)
       Reserved.set(*ItAlias);
 
+  // Reserve constant registers.
+  Reserved.set(VE::VM0);
+  Reserved.set(VE::VMP0);
+
   return Reserved;
 }
 
-bool VERegisterInfo::isConstantPhysReg(MCRegister PhysReg) const { return false; }
+bool VERegisterInfo::isConstantPhysReg(MCRegister PhysReg) const {
+  switch (PhysReg) {
+  case VE::VM0:
+  case VE::VMP0:
+    return true;
+  default:
+    return false;
+  }
+}
 
 const TargetRegisterClass *
 VERegisterInfo::getPointerRegClass(const MachineFunction &MF,
@@ -93,6 +110,29 @@ VERegisterInfo::getPointerRegClass(const MachineFunction &MF,
   return &VE::I64RegClass;
 }
 
+static unsigned offsetToDisp(MachineInstr &MI) {
+  // Default offset in instruction's operands (reg+reg+imm).
+  unsigned OffDisp = 2;
+
+#define RRCAS_multi_cases(NAME) NAME##rir : case NAME##rii
+
+  {
+    using namespace llvm::VE;
+    switch (MI.getOpcode()) {
+    case RRCAS_multi_cases(TS1AML):
+    case RRCAS_multi_cases(TS1AMW):
+    case RRCAS_multi_cases(CASL):
+    case RRCAS_multi_cases(CASW):
+      // These instructions use AS format (reg+imm).
+      OffDisp = 1;
+      break;
+    }
+  }
+#undef RRCAS_multi_cases
+
+  return OffDisp;
+}
+
 static void replaceFI(MachineFunction &MF, MachineBasicBlock::iterator II,
                       MachineInstr &MI, const DebugLoc &dl,
                       unsigned FIOperandNum, int Offset, Register FrameReg) {
@@ -100,7 +140,7 @@ static void replaceFI(MachineFunction &MF, MachineBasicBlock::iterator II,
   // VE has 32 bit offset field, so no need to expand a target instruction.
   // Directly encode it.
   MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false);
-  MI.getOperand(FIOperandNum + 2).ChangeToImmediate(Offset);
+  MI.getOperand(FIOperandNum + offsetToDisp(MI)).ChangeToImmediate(Offset);
 }
 
 void VERegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
@@ -116,9 +156,41 @@ void VERegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   Register FrameReg;
   int Offset;
-  Offset = TFI->getFrameIndexReference(MF, FrameIndex, FrameReg);
-
-  Offset += MI.getOperand(FIOperandNum + 2).getImm();
+  Offset = TFI->getFrameIndexReference(MF, FrameIndex, FrameReg).getFixed();
+
+  Offset += MI.getOperand(FIOperandNum + offsetToDisp(MI)).getImm();
+
+  if (MI.getOpcode() == VE::STQrii) {
+    const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+    Register SrcReg = MI.getOperand(3).getReg();
+    Register SrcHiReg = getSubReg(SrcReg, VE::sub_even);
+    Register SrcLoReg = getSubReg(SrcReg, VE::sub_odd);
+    // VE stores HiReg to 8(addr) and LoReg to 0(addr)
+    MachineInstr *StMI = BuildMI(*MI.getParent(), II, dl, TII.get(VE::STrii))
+                             .addReg(FrameReg)
+                             .addImm(0)
+                             .addImm(0)
+                             .addReg(SrcLoReg);
+    replaceFI(MF, II, *StMI, dl, 0, Offset, FrameReg);
+    MI.setDesc(TII.get(VE::STrii));
+    MI.getOperand(3).setReg(SrcHiReg);
+    Offset += 8;
+  } else if (MI.getOpcode() == VE::LDQrii) {
+    const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+    Register DestReg = MI.getOperand(0).getReg();
+    Register DestHiReg = getSubReg(DestReg, VE::sub_even);
+    Register DestLoReg = getSubReg(DestReg, VE::sub_odd);
+    // VE loads HiReg from 8(addr) and LoReg from 0(addr)
+    MachineInstr *StMI =
+        BuildMI(*MI.getParent(), II, dl, TII.get(VE::LDrii), DestLoReg)
+            .addReg(FrameReg)
+            .addImm(0)
+            .addImm(0);
+    replaceFI(MF, II, *StMI, dl, 1, Offset, FrameReg);
+    MI.setDesc(TII.get(VE::LDrii));
+    MI.getOperand(0).setReg(DestHiReg);
+    Offset += 8;
+  }
 
   replaceFI(MF, II, MI, dl, FIOperandNum, Offset, FrameReg);
 }
@@ -126,26 +198,3 @@ void VERegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 Register VERegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   return VE::SX9;
 }
-
-// VE has no architectural need for stack realignment support,
-// except that LLVM unfortunately currently implements overaligned
-// stack objects by depending upon stack realignment support.
-// If that ever changes, this can probably be deleted.
-bool VERegisterInfo::canRealignStack(const MachineFunction &MF) const {
-  if (!TargetRegisterInfo::canRealignStack(MF))
-    return false;
-
-  // VE always has a fixed frame pointer register, so don't need to
-  // worry about needing to reserve it. [even if we don't have a frame
-  // pointer for our frame, it still cannot be used for other things,
-  // or register window traps will be SADNESS.]
-
-  // If there's a reserved call frame, we can use VE to access locals.
-  if (getFrameLowering(MF)->hasReservedCallFrame(MF))
-    return true;
-
-  // Otherwise, we'd need a base pointer, but those aren't implemented
-  // for VE at the moment.
-
-  return false;
-}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.h
index 9a32da16bea6..334fb965a986 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.h
@@ -40,8 +40,6 @@ public:
                            RegScavenger *RS = nullptr) const override;
 
   Register getFrameRegister(const MachineFunction &MF) const override;
-
-  bool canRealignStack(const MachineFunction &MF) const override;
 };
 
 } // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.td
index 29708d35c730..70ff104b65b7 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.td
@@ -26,13 +26,33 @@ class VEMiscReg<bits<6> enc, string n>: Register<n> {
   let Namespace = "VE";
 }
 
+class VEVecReg<bits<8> enc, string n, list<Register> subregs = [],
+               list<string> altNames = [], list<Register> aliases = []>
+        : Register<n, altNames> {
+  let HWEncoding{15-8} = 0;
+  let HWEncoding{7-0} = enc;
+  let Namespace = "VE";
+  let SubRegs = subregs;
+  let Aliases = aliases;
+}
+
+class VEMaskReg<bits<4> enc, string n, list<Register> subregs = [],
+                list<string> altNames = [], list<Register> aliases = []>
+        : Register<n, altNames> {
+  let HWEncoding{15-4} = 0;
+  let HWEncoding{3-0} = enc;
+  let Namespace = "VE";
+  let SubRegs = subregs;
+  let Aliases = aliases;
+}
+
 let Namespace = "VE" in {
-  def sub_i8      : SubRegIndex<8, 56>;         // Low 8 bit (56..63)
-  def sub_i16     : SubRegIndex<16, 48>;        // Low 16 bit (48..63)
   def sub_i32     : SubRegIndex<32, 32>;        // Low 32 bit (32..63)
   def sub_f32     : SubRegIndex<32>;            // High 32 bit (0..31)
   def sub_even    : SubRegIndex<64>;            // High 64 bit (0..63)
   def sub_odd     : SubRegIndex<64, 64>;        // Low 64 bit (64..127)
+  def sub_vm_even : SubRegIndex<256>;           // High 256 bit (0..255)
+  def sub_vm_odd  : SubRegIndex<256, 256>;      // Low 256 bit (256..511)
   def AsmName     : RegAltNameIndex;
 }
 
@@ -66,26 +86,23 @@ def MISC : RegisterClass<"VE", [i64], 64,
 def IC : VEMiscReg<62, "ic">;
 
 //-----------------------------------------------------------------------------
-// Gneric Registers
+// Vector Length Register
 //-----------------------------------------------------------------------------
 
-let RegAltNameIndices = [AsmName] in {
+def VL : VEMiscReg<63, "vl">;
 
-// Generic integer registers - 8 bits wide
-foreach I = 0-63 in
-  def SB#I : VEReg<I, "sb"#I, [], ["s"#I]>, DwarfRegNum<[I]>;
+// Register classes.
+def VLS : RegisterClass<"VE", [i32], 64, (add VL)>;
 
-// Generic integer registers - 16 bits wide
-let SubRegIndices = [sub_i8] in
-foreach I = 0-63 in
-  def SH#I : VEReg<I, "sh"#I, [!cast<VEReg>("SB"#I)], ["s"#I]>,
-                   DwarfRegNum<[I]>;
+//-----------------------------------------------------------------------------
+// Generic Registers
+//-----------------------------------------------------------------------------
+
+let RegAltNameIndices = [AsmName] in {
 
 // Generic integer registers - 32 bits wide
-let SubRegIndices = [sub_i16] in
 foreach I = 0-63 in
-  def SW#I : VEReg<I, "sw"#I, [!cast<VEReg>("SH"#I)], ["s"#I]>,
-                   DwarfRegNum<[I]>;
+  def SW#I : VEReg<I, "sw"#I, [], ["s"#I]>, DwarfRegNum<[I]>;
 
 // Generic floating point registers - 32 bits wide
 //   NOTE: Mark SF#I as alias of SW#I temporary to avoid register allocation
@@ -95,10 +112,21 @@ foreach I = 0-63 in
                    DwarfRegNum<[I]>;
 
 // Generic integer registers - 64 bits wide
-let SubRegIndices = [sub_i32, sub_f32], CoveredBySubRegs = 1 in
-foreach I = 0-63 in
-  def SX#I : VEReg<I, "s"#I, [!cast<VEReg>("SW"#I), !cast<VEReg>("SF"#I)],
-                   ["s"#I]>, DwarfRegNum<[I]>;
+let SubRegIndices = [sub_i32, sub_f32], CoveredBySubRegs = 1 in {
+  // Several registers have specific names, so add them to one of aliases.
+  def SX8 : VEReg<8, "s8", [SW8, SF8], ["s8", "sl"]>, DwarfRegNum<[8]>;
+  def SX9 : VEReg<9, "s9", [SW9, SF9], ["s9", "fp"]>, DwarfRegNum<[9]>;
+  def SX10 : VEReg<10, "s10", [SW10, SF10], ["s10", "lr"]>, DwarfRegNum<[10]>;
+  def SX11 : VEReg<11, "s11", [SW11, SF11], ["s11", "sp"]>, DwarfRegNum<[11]>;
+  def SX14 : VEReg<14, "s14", [SW14, SF14], ["s14", "tp"]>, DwarfRegNum<[14]>;
+  def SX15 : VEReg<15, "s15", [SW15, SF15], ["s15", "got"]>, DwarfRegNum<[15]>;
+  def SX16 : VEReg<16, "s16", [SW16, SF16], ["s16", "plt"]>, DwarfRegNum<[16]>;
+
+  // Other generic registers.
+  foreach I = { 0-7, 12-13, 17-63 } in
+    def SX#I : VEReg<I, "s"#I, [!cast<VEReg>("SW"#I), !cast<VEReg>("SF"#I)],
+                     ["s"#I]>, DwarfRegNum<[I]>;
+}
 
 // Aliases of the S* registers used to hold 128-bit for values (long doubles).
 // Following foreach represents something like:
@@ -112,20 +140,31 @@ foreach I = 0-31 in
                    !cast<VEReg>("SX"#!add(!shl(I,1),1))],
                   ["s"#!shl(I,1)]>;
 
+// Vector registers - 64 bits wide 256 elements
+foreach I = 0-63 in
+  def V#I : VEVecReg<I, "v"#I, [], ["v"#I]>, DwarfRegNum<[!add(64,I)]>;
+
+// Vector Index Register
+def VIX : VEVecReg<255, "vix", [], ["vix"]>;
+
+// Vector mask registers - 256 bits wide
+foreach I = 0-15 in
+  def VM#I : VEMaskReg<I, "vm"#I, [], ["vm"#I]>, DwarfRegNum<[!add(128,I)]>;
+
+// Aliases of VMs to use as a pair of two VM for packed instructions
+let SubRegIndices = [sub_vm_even, sub_vm_odd], CoveredBySubRegs = 1 in
+foreach I = 0-7 in
+  def VMP#I : VEMaskReg<!shl(I,1), "vmp"#I,
+                        [!cast<VEMaskReg>("VM"#!shl(I,1)),
+                         !cast<VEMaskReg>("VM"#!add(!shl(I,1),1))],
+                        ["vm"#!shl(I,1)]>;
+
 } // RegAltNameIndices = [AsmName]
 
 // Register classes.
 //
 // The register order is defined in terms of the preferred
 // allocation order.
-def I8  : RegisterClass<"VE", [i8], 8,
-                        (add (sequence "SB%u", 0, 7),
-                             (sequence "SB%u", 34, 63),
-                             (sequence "SB%u", 8, 33))>;
-def I16 : RegisterClass<"VE", [i16], 16,
-                        (add (sequence "SH%u", 0, 7),
-                             (sequence "SH%u", 34, 63),
-                             (sequence "SH%u", 8, 33))>;
 def I32 : RegisterClass<"VE", [i32], 32,
                         (add (sequence "SW%u", 0, 7),
                              (sequence "SW%u", 34, 63),
@@ -142,3 +181,14 @@ def F128 : RegisterClass<"VE", [f128], 128,
                         (add (sequence "Q%u", 0, 3),
                              (sequence "Q%u", 17, 31),
                              (sequence "Q%u", 4, 16))>;
+
+def V64 : RegisterClass<"VE",
+                        [v256f64, // default type for vector registers
+                         v512i32, v512f32,
+                         v256i64, v256i32, v256f32, /* v256f64, */], 64,
+                        (add (sequence "V%u", 0, 63),
+                             VIX)>;
+
+// vm0 is reserved for always true
+def VM : RegisterClass<"VE", [v256i1], 64, (sequence "VM%u", 0, 15)>;
+def VM512 : RegisterClass<"VE", [v512i1], 64, (sequence "VMP%u", 0, 7)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.cpp
index a0b78d95e3cf..daa6cfb8aa84 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.cpp
@@ -27,73 +27,35 @@ void VESubtarget::anchor() {}
 
 VESubtarget &VESubtarget::initializeSubtargetDependencies(StringRef CPU,
                                                           StringRef FS) {
+  // Default feature settings
+  EnableVPU = false;
+
   // Determine default and user specified characteristics
   std::string CPUName = std::string(CPU);
   if (CPUName.empty())
-    CPUName = "ve";
+    CPUName = "generic";
 
   // Parse features string.
-  ParseSubtargetFeatures(CPUName, FS);
+  ParseSubtargetFeatures(CPUName, /*TuneCPU=*/CPU, FS);
 
   return *this;
 }
 
 VESubtarget::VESubtarget(const Triple &TT, const std::string &CPU,
                          const std::string &FS, const TargetMachine &TM)
-    : VEGenSubtargetInfo(TT, CPU, FS), TargetTriple(TT),
+    : VEGenSubtargetInfo(TT, CPU, /*TuneCPU=*/CPU, FS), TargetTriple(TT),
       InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
       FrameLowering(*this) {}
 
-int VESubtarget::getAdjustedFrameSize(int frameSize) const {
-
-  // VE stack frame:
-  //
-  //         +----------------------------------------+
-  //         | Locals and temporaries                 |
-  //         +----------------------------------------+
-  //         | Parameter area for callee              |
-  // 176(fp) |                                        |
-  //         +----------------------------------------+
-  //         | Register save area (RSA) for callee    |
-  //         |                                        |
-  //  16(fp) |                         20 * 8 bytes   |
-  //         +----------------------------------------+
-  //   8(fp) | Return address                         |
-  //         +----------------------------------------+
-  //   0(fp) | Frame pointer of caller                |
-  // --------+----------------------------------------+--------
-  //         | Locals and temporaries for callee      |
-  //         +----------------------------------------+
-  //         | Parameter area for callee of callee    |
-  //         +----------------------------------------+
-  //  16(sp) | RSA for callee of callee               |
-  //         +----------------------------------------+
-  //   8(sp) | Return address                         |
-  //         +----------------------------------------+
-  //   0(sp) | Frame pointer of callee                |
-  //         +----------------------------------------+
-
-  // RSA frame:
-  //         +----------------------------------------------+
-  // 168(fp) | %s33                                         |
-  //         +----------------------------------------------+
-  //         | %s19...%s32                                  |
-  //         +----------------------------------------------+
-  //  48(fp) | %s18                                         |
-  //         +----------------------------------------------+
-  //  40(fp) | Linkage area register (%s17)                 |
-  //         +----------------------------------------------+
-  //  32(fp) | Procedure linkage table register (%plt=%s16) |
-  //         +----------------------------------------------+
-  //  24(fp) | Global offset table register (%got=%s15)     |
-  //         +----------------------------------------------+
-  //  16(fp) | Thread pointer register (%tp=%s14)           |
-  //         +----------------------------------------------+
+uint64_t VESubtarget::getAdjustedFrameSize(uint64_t FrameSize) const {
+  // Calculate adjusted frame size by adding the size of RSA frame,
+  // return address, and frame poitner as described in VEFrameLowering.cpp.
+  const VEFrameLowering *TFL = getFrameLowering();
 
-  frameSize += 176;                   // for RSA, RA, and FP
-  frameSize = alignTo(frameSize, 16); // requires 16 bytes alignment
+  FrameSize += getRsaSize();
+  FrameSize = alignTo(FrameSize, TFL->getStackAlign());
 
-  return frameSize;
+  return FrameSize;
 }
 
 bool VESubtarget::enableMachineScheduler() const { return true; }
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.h b/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.h
index f3a2c206162e..213aca2ea3f9 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.h
@@ -32,6 +32,13 @@ class VESubtarget : public VEGenSubtargetInfo {
   Triple TargetTriple;
   virtual void anchor();
 
+  /// Features {
+
+  // Emit VPU instructions
+  bool EnableVPU;
+
+  /// } Features
+
   VEInstrInfo InstrInfo;
   VETargetLowering TLInfo;
   SelectionDAGTargetInfo TSInfo;
@@ -55,15 +62,21 @@ public:
 
   bool enableMachineScheduler() const override;
 
+  bool enableVPU() const { return EnableVPU; }
+
   /// ParseSubtargetFeatures - Parses features string setting specified
   /// subtarget options.  Definition of function is auto generated by tblgen.
-  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+  void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
   VESubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
 
   /// Given a actual stack size as determined by FrameInfo, this function
-  /// returns adjusted framesize which includes space for register window
-  /// spills and arguments.
-  int getAdjustedFrameSize(int stackSize) const;
+  /// returns adjusted framesize which includes space for RSA, return
+  /// address, and frame poitner.
+  uint64_t getAdjustedFrameSize(uint64_t FrameSize) const;
+
+  /// Get the size of RSA, return address, and frame pointer as described
+  /// in VEFrameLowering.cpp.
+  unsigned getRsaSize(void) const { return 176; };
 
   bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
 };
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.cpp
index 08b55eebbc98..414ae09431c0 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.cpp
@@ -23,7 +23,7 @@ using namespace llvm;
 
 #define DEBUG_TYPE "ve"
 
-extern "C" void LLVMInitializeVETarget() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeVETarget() {
   // Register the target.
   RegisterTargetMachine<VETargetMachine> X(getTheVETarget());
 }
@@ -44,13 +44,24 @@ static std::string computeDataLayout(const Triple &T) {
   // Stack alignment is 128 bits
   Ret += "-S128";
 
+  // Vector alignments are 64 bits
+  // Need to define all of them.  Otherwise, each alignment becomes
+  // the size of each data by default.
+  Ret += "-v64:64:64"; // for v2f32
+  Ret += "-v128:64:64";
+  Ret += "-v256:64:64";
+  Ret += "-v512:64:64";
+  Ret += "-v1024:64:64";
+  Ret += "-v2048:64:64";
+  Ret += "-v4096:64:64";
+  Ret += "-v8192:64:64";
+  Ret += "-v16384:64:64"; // for v256f64
+
   return Ret;
 }
 
 static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
-  if (!RM.hasValue())
-    return Reloc::Static;
-  return *RM;
+  return RM.getValueOr(Reloc::Static);
 }
 
 class VEELFTargetObjectFile : public TargetLoweringObjectFileELF {
@@ -96,7 +107,9 @@ public:
     return getTM<VETargetMachine>();
   }
 
+  void addIRPasses() override;
   bool addInstSelector() override;
+  void addPreEmitPass() override;
 };
 } // namespace
 
@@ -104,7 +117,18 @@ TargetPassConfig *VETargetMachine::createPassConfig(PassManagerBase &PM) {
   return new VEPassConfig(*this, PM);
 }
 
+void VEPassConfig::addIRPasses() {
+  // VE requires atomic expand pass.
+  addPass(createAtomicExpandPass());
+  TargetPassConfig::addIRPasses();
+}
+
 bool VEPassConfig::addInstSelector() {
   addPass(createVEISelDag(getVETargetMachine()));
   return false;
 }
+
+void VEPassConfig::addPreEmitPass() {
+  // LVLGen should be called after scheduling and register allocation
+  addPass(createLVLGenPass());
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VETargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/VE/VETargetTransformInfo.h
index c267c4d9a578..68af66597485 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VETargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VETargetTransformInfo.h
@@ -33,16 +33,35 @@ class VETTIImpl : public BasicTTIImplBase<VETTIImpl> {
   const VESubtarget *getST() const { return ST; }
   const VETargetLowering *getTLI() const { return TLI; }
 
+  bool enableVPU() const { return getST()->enableVPU(); }
+
 public:
   explicit VETTIImpl(const VETargetMachine *TM, const Function &F)
       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
         TLI(ST->getTargetLowering()) {}
 
-  unsigned getNumberOfRegisters(unsigned ClassID) const { return 64; }
+  unsigned getNumberOfRegisters(unsigned ClassID) const {
+    bool VectorRegs = (ClassID == 1);
+    if (VectorRegs) {
+      // TODO report vregs once vector isel is stable.
+      return 0;
+    }
+
+    return 64;
+  }
 
-  unsigned getRegisterBitWidth(bool Vector) const { return 64; }
+  unsigned getRegisterBitWidth(bool Vector) const {
+    if (Vector) {
+      // TODO report vregs once vector isel is stable.
+      return 0;
+    }
+    return 64;
+  }
 
-  unsigned getMinVectorRegisterBitWidth() const { return 64; }
+  unsigned getMinVectorRegisterBitWidth() const {
+    // TODO report vregs once vector isel is stable.
+    return 0;
+  }
 };
 
 } // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VVPInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/VE/VVPInstrInfo.td
new file mode 100644
index 000000000000..2c88d5099a7b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VVPInstrInfo.td
@@ -0,0 +1,46 @@
+//===-------------- VVPInstrInfo.td - VVP_* SDNode patterns ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the VE Vector Predicated SDNodes (VVP SDNodes).  VVP
+// SDNodes are an intermediate isel layer between the vector SDNodes emitted by
+// LLVM and the actual VE vector instructions. For example:
+//
+//  ADD(x,y)   -->   VVP_ADD(x,y,mask,evl)   -->   VADDSWSXrvml(x,y,mask,evl)
+//     ^                      ^                            ^
+//  The standard     The VVP layer SDNode.        The VE vector instruction.
+//  SDNode.
+//
+// TODO explain how VVP nodes relate to VP SDNodes once VP ISel is uptream.
+//===----------------------------------------------------------------------===//
+
+// Binary Operators {
+
+// BinaryOp(x,y,mask,vl)
+def SDTIntBinOpVVP : SDTypeProfile<1, 4, [     // vp_add, vp_and, etc.
+  SDTCisSameAs<0, 1>,
+  SDTCisSameAs<0, 2>,
+  SDTCisInt<0>,
+  SDTCisSameNumEltsAs<0, 3>,
+  IsVLVT<4>
+]>;
+
+// Binary operator commutative pattern.
+class vvp_commutative<SDNode RootOp> :
+  PatFrags<
+  (ops node:$lhs, node:$rhs, node:$mask, node:$vlen),
+  [(RootOp node:$lhs, node:$rhs, node:$mask, node:$vlen),
+   (RootOp node:$rhs, node:$lhs, node:$mask, node:$vlen)]>;
+
+// VVP node definitions.
+def vvp_add    : SDNode<"VEISD::VVP_ADD",  SDTIntBinOpVVP>;
+def c_vvp_add  : vvp_commutative<vvp_add>;
+
+def vvp_and    : SDNode<"VEISD::VVP_AND",  SDTIntBinOpVVP>;
+def c_vvp_and  : vvp_commutative<vvp_and>;
+
+// } Binary Operators
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VVPInstrPatternsVec.td b/contrib/llvm-project/llvm/lib/Target/VE/VVPInstrPatternsVec.td
new file mode 100644
index 000000000000..7003fb387670
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VVPInstrPatternsVec.td
@@ -0,0 +1,71 @@
+//===----------- VVPInstrPatternsVec.td - VVP_* SDNode patterns -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes how VVP_* SDNodes are lowered to machine instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// VVP SDNode definitions.
+//
+//===----------------------------------------------------------------------===//
+include "VVPInstrInfo.td"
+
+multiclass VectorBinaryArith<
+    SDPatternOperator OpNode,
+    ValueType ScalarVT, ValueType DataVT, ValueType MaskVT,
+    string OpBaseName,
+    SDPatternOperator ImmOp, SDNodeXForm ImmCast> {
+  // No mask.
+  def : Pat<(OpNode
+                (any_broadcast ScalarVT:$sx),
+                DataVT:$vy, (MaskVT true_mask), i32:$avl),
+            (!cast<Instruction>(OpBaseName#"rvl")
+                ScalarVT:$sx, $vy, $avl)>;
+  def : Pat<(OpNode DataVT:$vx, DataVT:$vy, (MaskVT true_mask), i32:$avl),
+            (!cast<Instruction>(OpBaseName#"vvl")
+                $vx, $vy, $avl)>;
+
+  // Mask.
+  def : Pat<(OpNode
+                (any_broadcast ScalarVT:$sx),
+                DataVT:$vy, MaskVT:$mask, i32:$avl),
+            (!cast<Instruction>(OpBaseName#"rvml")
+                ScalarVT:$sx, $vy, $mask, $avl)>;
+  def : Pat<(OpNode DataVT:$vx, DataVT:$vy, MaskVT:$mask, i32:$avl),
+            (!cast<Instruction>(OpBaseName#"vvml")
+                $vx, $vy, $mask, $avl)>;
+
+  // TODO We do not specify patterns for the immediate variants here. There
+  // will be an immediate folding pass that takes care of switching to the
+  // immediate variant where applicable.
+
+  // TODO Fold vvp_select into passthru.
+}
+
+// Expand both 64bit and 32 bit variant (256 elements)
+multiclass VectorBinaryArith_ShortLong<
+    SDPatternOperator OpNode,
+    ValueType LongScalarVT, ValueType LongDataVT, string LongOpBaseName,
+    ValueType ShortScalarVT, ValueType ShortDataVT, string ShortOpBaseName> {
+  defm : VectorBinaryArith<OpNode,
+                           LongScalarVT, LongDataVT, v256i1,
+                           LongOpBaseName, simm7, LO7>;
+  defm : VectorBinaryArith<OpNode,
+                           ShortScalarVT, ShortDataVT, v256i1,
+                           ShortOpBaseName, simm7, LO7>;
+}
+
+
+defm : VectorBinaryArith_ShortLong<c_vvp_add,
+                                   i64, v256i64, "VADDSL",
+                                   i32, v256i32, "VADDSWSX">;
+defm : VectorBinaryArith_ShortLong<c_vvp_and,
+                                   i64, v256i64, "VAND",
+                                   i32, v256i32, "PVANDLO">;
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VVPNodes.def b/contrib/llvm-project/llvm/lib/Target/VE/VVPNodes.def
new file mode 100644
index 000000000000..a68402e9ea10
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VVPNodes.def
@@ -0,0 +1,41 @@
+//===-- VVPNodes.def - Lists & properties of VE Vector Predication Nodes --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all VVP_* SDNodes and their properties
+//
+//===----------------------------------------------------------------------===//
+
+/// HANDLE_VP_TO_VVP(VPOPC, VVPOPC)
+/// \p  VPOPC is the VP_* SDNode opcode.
+/// \p  VVPOPC is the VVP_* SDNode opcode.
+#ifndef HANDLE_VP_TO_VVP
+#define HANDLE_VP_TO_VVP(VPOPC, VVPOPC)
+#endif
+
+/// ADD_VVP_OP(VVPNAME,SDNAME)
+/// \p VVPName is a VVP SDNode operator.
+/// \p SDNAME is the generic SD opcode corresponding to \p VVPName.
+#ifndef ADD_VVP_OP
+#define ADD_VVP_OP(X, Y)
+#endif
+
+/// ADD_BINARY_VVP_OP(VVPNAME,SDNAME)
+/// \p VVPName is a VVP Binary operator.
+/// \p SDNAME is the generic SD opcode corresponding to \p VVPName.
+#ifndef ADD_BINARY_VVP_OP
+#define ADD_BINARY_VVP_OP(X,Y) ADD_VVP_OP(X,Y) HANDLE_VP_TO_VVP(VP_##Y, X)
+#endif
+
+// Integer arithmetic.
+ADD_BINARY_VVP_OP(VVP_ADD,ADD)
+
+ADD_BINARY_VVP_OP(VVP_AND,AND)
+
+#undef HANDLE_VP_TO_VVP
+#undef ADD_BINARY_VVP_OP
+#undef ADD_VVP_OP