aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/X86
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86')
-rw-r--r--lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp2
-rw-r--r--lib/Target/X86/AsmParser/X86AsmInstrumentation.h2
-rw-r--r--lib/Target/X86/AsmParser/X86Operand.h16
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp10
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp39
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.h2
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.h2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp32
-rw-r--r--lib/Target/X86/MCTargetDesc/X86BaseInfo.h12
-rw-r--r--lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp13
-rw-r--r--lib/Target/X86/MCTargetDesc/X86FixupKinds.h4
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h4
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp16
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp2
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.cpp2
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.h2
-rw-r--r--lib/Target/X86/X86.h2
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp69
-rw-r--r--lib/Target/X86/X86AsmPrinter.h2
-rw-r--r--lib/Target/X86/X86CallFrameOptimization.cpp119
-rw-r--r--lib/Target/X86/X86CallingConv.h2
-rw-r--r--lib/Target/X86/X86FastISel.cpp7
-rw-r--r--lib/Target/X86/X86FixupLEAs.cpp2
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp6
-rw-r--r--lib/Target/X86/X86FrameLowering.h2
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp45
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp708
-rw-r--r--lib/Target/X86/X86ISelLowering.h17
-rw-r--r--lib/Target/X86/X86InstrAVX512.td794
-rw-r--r--lib/Target/X86/X86InstrBuilder.h2
-rw-r--r--lib/Target/X86/X86InstrCompiler.td16
-rw-r--r--lib/Target/X86/X86InstrFPStack.td16
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td58
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp352
-rw-r--r--lib/Target/X86/X86InstrInfo.h4
-rw-r--r--lib/Target/X86/X86InstrInfo.td10
-rw-r--r--lib/Target/X86/X86InstrSSE.td9
-rw-r--r--lib/Target/X86/X86IntrinsicsInfo.h355
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp10
-rw-r--r--lib/Target/X86/X86MachineFunctionInfo.h2
-rw-r--r--lib/Target/X86/X86PadShortFunction.cpp2
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp24
-rw-r--r--lib/Target/X86/X86RegisterInfo.h13
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.h2
-rw-r--r--lib/Target/X86/X86Subtarget.h2
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp15
-rw-r--r--lib/Target/X86/X86TargetMachine.h2
-rw-r--r--lib/Target/X86/X86TargetObjectFile.cpp64
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp15
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.h2
-rw-r--r--lib/Target/X86/X86VZeroUpper.cpp2
-rw-r--r--lib/Target/X86/X86WinEHState.cpp17
55 files changed, 1741 insertions, 1194 deletions
diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
index 6ba897b8636d..9eee4a0f3d82 100644
--- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
@@ -1080,4 +1080,4 @@ CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
return new X86AsmInstrumentation(STI);
}
-} // namespace llvm
+} // End llvm namespace
diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h b/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
index 341fc81c0480..19ebcc44f61e 100644
--- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
+++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
@@ -61,6 +61,6 @@ protected:
unsigned InitialFrameReg;
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/X86/AsmParser/X86Operand.h b/lib/Target/X86/AsmParser/X86Operand.h
index b3066efbab24..7ec02408ffa4 100644
--- a/lib/Target/X86/AsmParser/X86Operand.h
+++ b/lib/Target/X86/AsmParser/X86Operand.h
@@ -238,18 +238,34 @@ struct X86Operand : public MCParsedAsmOperand {
return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
}
+ bool isMemVX32X() const {
+ return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
+ getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM31;
+ }
bool isMemVY32() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
}
+ bool isMemVY32X() const {
+ return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
+ getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM31;
+ }
bool isMemVX64() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
}
+ bool isMemVX64X() const {
+ return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
+ getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM31;
+ }
bool isMemVY64() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
}
+ bool isMemVY64X() const {
+ return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
+ getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM31;
+ }
bool isMemVZ32() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
getMemIndexReg() >= X86::ZMM0 && getMemIndexReg() <= X86::ZMM31;
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 5b53fbef3f71..cfc3ee2fb08f 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -69,7 +69,7 @@ namespace X86 {
extern Target TheX86_32Target, TheX86_64Target;
-} // namespace llvm
+}
static bool translateInstruction(MCInst &target,
InternalInstruction &source,
@@ -551,9 +551,15 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
case TYPE_REL8:
isBranch = true;
pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
- if(immediate & 0x80)
+ if (immediate & 0x80)
immediate |= ~(0xffull);
break;
+ case TYPE_REL16:
+ isBranch = true;
+ pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
+ if (immediate & 0x8000)
+ immediate |= ~(0xffffull);
+ break;
case TYPE_REL32:
case TYPE_REL64:
isBranch = true;
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
index d990bf3484bf..f73fa75f888e 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
@@ -1165,35 +1165,30 @@ static int readSIB(struct InternalInstruction* insn) {
return -1;
index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
+
+ // FIXME: The fifth bit (bit index 4) is only to be used for instructions
+ // that understand VSIB indexing. ORing the bit in here is mildy dangerous
+ // because performing math on an 'enum SIBIndex' can produce garbage.
+ // Excluding the "none" value, it should cover 6 spaces of register names:
+ // - 16 possibilities for 16-bit GPR starting at SIB_INDEX_BX_SI
+ // - 16 possibilities for 32-bit GPR starting at SIB_INDEX_EAX
+ // - 16 possibilities for 64-bit GPR starting at SIB_INDEX_RAX
+ // - 32 possibilities for each of XMM, YMM, ZMM registers
+ // When sibIndexBase gets assigned SIB_INDEX_RAX as it does in 64-bit mode,
+ // summing in a fully decoded index between 0 and 31 can end up with a value
+ // that looks like something in the low half of the XMM range.
+ // translateRMMemory() tries to reverse the damage, with only partial success,
+ // as evidenced by known bugs in "test/MC/Disassembler/X86/x86-64.txt"
if (insn->vectorExtensionType == TYPE_EVEX)
index |= v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4;
- switch (index) {
- case 0x4:
+ if (index == 0x4) {
insn->sibIndex = SIB_INDEX_NONE;
- break;
- default:
+ } else {
insn->sibIndex = (SIBIndex)(sibIndexBase + index);
- if (insn->sibIndex == SIB_INDEX_sib ||
- insn->sibIndex == SIB_INDEX_sib64)
- insn->sibIndex = SIB_INDEX_NONE;
- break;
}
- switch (scaleFromSIB(insn->sib)) {
- case 0:
- insn->sibScale = 1;
- break;
- case 1:
- insn->sibScale = 2;
- break;
- case 2:
- insn->sibScale = 4;
- break;
- case 3:
- insn->sibScale = 8;
- break;
- }
+ insn->sibScale = 1 << scaleFromSIB(insn->sib);
base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
index ac484f317276..62b6b73e7864 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -140,6 +140,6 @@ public:
private:
bool HasCustomInstComment;
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
index 2bee518fed68..6e371da37290 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -159,6 +159,6 @@ public:
}
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 2d85f84d6669..3e0dc1424609 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -29,13 +29,6 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-// Option to allow disabling arithmetic relaxation to workaround PR9807, which
-// is useful when running bitwise comparison experiments on Darwin. We should be
-// able to remove this once PR9807 is resolved.
-static cl::opt<bool>
-MCDisableArithRelaxation("mc-x86-disable-arith-relaxation",
- cl::desc("Disable relaxation of arithmetic instruction for X86"));
-
static unsigned getFixupKindLog2Size(unsigned Kind) {
switch (Kind) {
default:
@@ -243,29 +236,18 @@ bool X86AsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
if (getRelaxedOpcodeBranch(Inst.getOpcode()) != Inst.getOpcode())
return true;
- if (MCDisableArithRelaxation)
- return false;
-
// Check if this instruction is ever relaxable.
if (getRelaxedOpcodeArith(Inst.getOpcode()) == Inst.getOpcode())
return false;
- // Check if it has an expression and is not RIP relative.
- bool hasExp = false;
- bool hasRIP = false;
- for (unsigned i = 0; i < Inst.getNumOperands(); ++i) {
- const MCOperand &Op = Inst.getOperand(i);
- if (Op.isExpr())
- hasExp = true;
-
- if (Op.isReg() && Op.getReg() == X86::RIP)
- hasRIP = true;
- }
+ // Check if the relaxable operand has an expression. For the current set of
+ // relaxable instructions, the relaxable operand is always the last operand.
+ unsigned RelaxableOp = Inst.getNumOperands() - 1;
+ if (Inst.getOperand(RelaxableOp).isExpr())
+ return true;
- // FIXME: Why exactly do we need the !hasRIP? Is it just a limitation on
- // how we do relaxations?
- return hasExp && !hasRIP;
+ return false;
}
bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
@@ -426,7 +408,7 @@ namespace CU {
UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
};
-} // namespace CU
+} // end CU namespace
class DarwinX86AsmBackend : public X86AsmBackend {
const MCRegisterInfo &MRI;
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 69e9c7b4a83e..f0d00b0c1bc3 100644
--- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -41,7 +41,7 @@ namespace X86 {
/// AddrNumOperands - Total number of operands in a memory reference.
AddrNumOperands = 5
};
-} // namespace X86
+} // end namespace X86;
/// X86II - This namespace holds all of the target specific flags that
/// instruction info tracks.
@@ -213,11 +213,7 @@ namespace X86II {
/// the offset from beginning of section.
///
/// This is the TLS offset for the COFF/Windows TLS mechanism.
- MO_SECREL,
-
- /// MO_NOPREFIX - On a symbol operand this indicates that the symbol should
- /// not be mangled with a prefix.
- MO_NOPREFIX,
+ MO_SECREL
};
enum : uint64_t {
@@ -762,8 +758,8 @@ namespace X86II {
return (reg == X86::SPL || reg == X86::BPL ||
reg == X86::SIL || reg == X86::DIL);
}
-} // namespace X86II
+}
-} // namespace llvm
+} // end namespace llvm;
#endif
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index 512afebf482e..a33468dc4769 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -28,7 +28,7 @@ namespace {
unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
bool IsPCRel) const override;
};
-} // namespace
+}
X86ELFObjectWriter::X86ELFObjectWriter(bool IsELF64, uint8_t OSABI,
uint16_t EMachine)
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
index 7c09e5d59580..89f394582631 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
@@ -26,14 +26,17 @@ public:
X86_64ELFRelocationInfo(MCContext &Ctx) : MCRelocationInfo(Ctx) {}
const MCExpr *createExprForRelocation(RelocationRef Rel) override {
- uint64_t RelType; Rel.getType(RelType);
- symbol_iterator SymI = Rel.getSymbol();
+ uint64_t RelType = Rel.getType();
+ elf_symbol_iterator SymI = Rel.getSymbol();
+
+ ErrorOr<StringRef> SymNameOrErr = SymI->getName();
+ if (std::error_code EC = SymNameOrErr.getError())
+ report_fatal_error(EC.message());
+ StringRef SymName = *SymNameOrErr;
- StringRef SymName; SymI->getName(SymName);
uint64_t SymAddr; SymI->getAddress(SymAddr);
uint64_t SymSize = SymI->getSize();
- auto *Obj = cast<ELFObjectFileBase>(Rel.getObjectFile());
- int64_t Addend = *Obj->getRelocationAddend(Rel.getRawDataRefImpl());
+ int64_t Addend = *ELFRelocationRef(Rel).getAddend();
MCSymbol *Sym = Ctx.getOrCreateSymbol(SymName);
// FIXME: check that the value is actually the same.
diff --git a/lib/Target/X86/MCTargetDesc/X86FixupKinds.h b/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
index a523a32b2a2d..4899900dcef9 100644
--- a/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
+++ b/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
@@ -28,7 +28,7 @@ enum Fixups {
LastTargetFixupKind,
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
};
-} // namespace X86
-} // namespace llvm
+}
+}
#endif
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index 020803b57f76..6221baba1793 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -62,7 +62,7 @@ void InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI);
/// do not need to go through TargetRegistry.
MCSubtargetInfo *createX86MCSubtargetInfo(const Triple &TT, StringRef CPU,
StringRef FS);
-} // namespace X86_MC
+}
MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
@@ -98,7 +98,7 @@ MCRelocationInfo *createX86_64MachORelocationInfo(MCContext &Ctx);
/// Construct X86-64 ELF relocation info.
MCRelocationInfo *createX86_64ELFRelocationInfo(MCContext &Ctx);
-} // namespace llvm
+} // End llvm namespace
// Defines symbolic names for X86 registers. This defines a mapping from
diff --git a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
index a5aadd6a385e..c9479b62f7b6 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
@@ -25,12 +25,15 @@ public:
X86_64MachORelocationInfo(MCContext &Ctx) : MCRelocationInfo(Ctx) {}
const MCExpr *createExprForRelocation(RelocationRef Rel) override {
- const MachOObjectFile *Obj = cast<MachOObjectFile>(Rel.getObjectFile());
+ const MachOObjectFile *Obj = cast<MachOObjectFile>(Rel.getObject());
- uint64_t RelType; Rel.getType(RelType);
+ uint64_t RelType = Rel.getType();
symbol_iterator SymI = Rel.getSymbol();
- StringRef SymName; SymI->getName(SymName);
+ ErrorOr<StringRef> SymNameOrErr = SymI->getName();
+ if (std::error_code EC = SymNameOrErr.getError())
+ report_fatal_error(EC.message());
+ StringRef SymName = *SymNameOrErr;
uint64_t SymAddr; SymI->getAddress(SymAddr);
any_relocation_info RE = Obj->getRelocation(Rel.getRawDataRefImpl());
@@ -89,10 +92,11 @@ public:
symbol_iterator RSymI = Rel.getSymbol();
uint64_t RSymAddr;
RSymI->getAddress(RSymAddr);
- StringRef RSymName;
- RSymI->getName(RSymName);
+ ErrorOr<StringRef> RSymName = RSymI->getName();
+ if (std::error_code EC = RSymName.getError())
+ report_fatal_error(EC.message());
- MCSymbol *RSym = Ctx.getOrCreateSymbol(RSymName);
+ MCSymbol *RSym = Ctx.getOrCreateSymbol(*RSymName);
if (!RSym->isVariable())
RSym->setVariableValue(MCConstantExpr::create(RSymAddr, Ctx));
diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index 773fbf41a7b1..9e801fc8f191 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -69,7 +69,7 @@ public:
FixedValue);
}
};
-} // namespace
+}
static bool isFixupKindRIPRel(unsigned Kind) {
return Kind == X86::reloc_riprel_4byte ||
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
index 7d262cdbf51d..bd1bc9943b6d 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
@@ -31,7 +31,7 @@ namespace {
bool IsCrossSection,
const MCAsmBackend &MAB) const override;
};
-} // namespace
+}
X86WinCOFFObjectWriter::X86WinCOFFObjectWriter(bool Is64Bit)
: MCWinCOFFObjectTargetWriter(Is64Bit ? COFF::IMAGE_FILE_MACHINE_AMD64
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
index dc6dd66bcd85..92f42b68ae51 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
@@ -46,7 +46,7 @@ void X86WinCOFFStreamer::FinishImpl() {
MCWinCOFFStreamer::FinishImpl();
}
-} // namespace
+}
MCStreamer *llvm::createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB,
raw_pwrite_stream &OS,
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index 1e7d94287c4a..ef3318ba7580 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -431,4 +431,4 @@ void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl<int> &Mask) {
for (unsigned i = 1; i < NumElts; i++)
Mask.push_back(IsLoad ? static_cast<int>(SM_SentinelZero) : i);
}
-} // namespace llvm
+} // llvm namespace
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h
index 0139297fc72d..14b69434806e 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -100,6 +100,6 @@ void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
/// \brief Decode a scalar float move instruction as a shuffle mask.
void DecodeScalarMoveMask(MVT VT, bool IsLoad,
SmallVectorImpl<int> &ShuffleMask);
-} // namespace llvm
+} // llvm namespace
#endif
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 80f457984951..8403ae6101df 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -80,6 +80,6 @@ FunctionPass *createX86WinEHStatePass();
/// must run after prologue/epilogue insertion and before lowering
/// the MachineInstr to MC.
FunctionPass *createX86ExpandPseudoPass();
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index 205140144ab5..ba33248d2039 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -581,34 +581,6 @@ MCSymbol *X86AsmPrinter::GetCPISymbol(unsigned CPID) const {
return AsmPrinter::GetCPISymbol(CPID);
}
-void X86AsmPrinter::GenerateExportDirective(const MCSymbol *Sym, bool IsData) {
- SmallString<128> Directive;
- raw_svector_ostream OS(Directive);
- StringRef Name = Sym->getName();
- const Triple &TT = TM.getTargetTriple();
-
- if (TT.isKnownWindowsMSVCEnvironment())
- OS << " /EXPORT:";
- else
- OS << " -export:";
-
- if ((TT.isWindowsGNUEnvironment() || TT.isWindowsCygwinEnvironment()) &&
- (Name[0] == getDataLayout().getGlobalPrefix()))
- Name = Name.drop_front();
-
- OS << Name;
-
- if (IsData) {
- if (TT.isKnownWindowsMSVCEnvironment())
- OS << ",DATA";
- else
- OS << ",data";
- }
-
- OS.flush();
- OutStreamer->EmitBytes(Directive);
-}
-
void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
const Triple &TT = TM.getTargetTriple();
@@ -692,39 +664,28 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
}
if (TT.isOSBinFormatCOFF()) {
- // Necessary for dllexport support
- std::vector<const MCSymbol*> DLLExportedFns, DLLExportedGlobals;
+ const TargetLoweringObjectFileCOFF &TLOFCOFF =
+ static_cast<const TargetLoweringObjectFileCOFF&>(getObjFileLowering());
- for (const auto &Function : M)
- if (Function.hasDLLExportStorageClass() && !Function.isDeclaration())
- DLLExportedFns.push_back(getSymbol(&Function));
+ std::string Flags;
+ raw_string_ostream FlagsOS(Flags);
+ for (const auto &Function : M)
+ TLOFCOFF.emitLinkerFlagsForGlobal(FlagsOS, &Function, *Mang);
for (const auto &Global : M.globals())
- if (Global.hasDLLExportStorageClass() && !Global.isDeclaration())
- DLLExportedGlobals.push_back(getSymbol(&Global));
-
- for (const auto &Alias : M.aliases()) {
- if (!Alias.hasDLLExportStorageClass())
- continue;
-
- if (Alias.getType()->getElementType()->isFunctionTy())
- DLLExportedFns.push_back(getSymbol(&Alias));
- else
- DLLExportedGlobals.push_back(getSymbol(&Alias));
- }
+ TLOFCOFF.emitLinkerFlagsForGlobal(FlagsOS, &Global, *Mang);
+ for (const auto &Alias : M.aliases())
+ TLOFCOFF.emitLinkerFlagsForGlobal(FlagsOS, &Alias, *Mang);
- // Output linker support code for dllexported globals on windows.
- if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) {
- const TargetLoweringObjectFileCOFF &TLOFCOFF =
- static_cast<const TargetLoweringObjectFileCOFF&>(getObjFileLowering());
+ FlagsOS.flush();
+ // Output collected flags.
+ if (!Flags.empty()) {
OutStreamer->SwitchSection(TLOFCOFF.getDrectveSection());
-
- for (auto & Symbol : DLLExportedGlobals)
- GenerateExportDirective(Symbol, /*IsData=*/true);
- for (auto & Symbol : DLLExportedFns)
- GenerateExportDirective(Symbol, /*IsData=*/false);
+ OutStreamer->EmitBytes(Flags);
}
+
+ SM.serializeToStackMapSection();
}
if (TT.isOSBinFormatELF()) {
diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index acba21169c9c..7f5d127c68d5 100644
--- a/lib/Target/X86/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -30,8 +30,6 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
StackMaps SM;
FaultMaps FM;
- void GenerateExportDirective(const MCSymbol *Sym, bool IsData);
-
// This utility class tracks the length of a stackmap instruction's 'shadow'.
// It is used by the X86AsmPrinter to ensure that the stackmap shadow
// invariants (i.e. no other stackmaps, patchpoints, or control flow within
diff --git a/lib/Target/X86/X86CallFrameOptimization.cpp b/lib/Target/X86/X86CallFrameOptimization.cpp
index 6d6831b18b0a..031ba4ba9e66 100644
--- a/lib/Target/X86/X86CallFrameOptimization.cpp
+++ b/lib/Target/X86/X86CallFrameOptimization.cpp
@@ -78,7 +78,7 @@ private:
typedef DenseMap<MachineInstr *, CallContext> ContextMap;
bool isLegal(MachineFunction &MF);
-
+
bool isProfitable(MachineFunction &MF, ContextMap &CallSeqMap);
void collectCallInfo(MachineFunction &MF, MachineBasicBlock &MBB,
@@ -90,6 +90,13 @@ private:
MachineInstr *canFoldIntoRegPush(MachineBasicBlock::iterator FrameSetup,
unsigned Reg);
+ enum InstClassification { Convert, Skip, Exit };
+
+ InstClassification classifyInstruction(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const X86RegisterInfo &RegInfo,
+ DenseSet<unsigned int> &UsedRegs);
+
const char *getPassName() const override { return "X86 Optimize Call Frame"; }
const TargetInstrInfo *TII;
@@ -99,13 +106,13 @@ private:
};
char X86CallFrameOptimization::ID = 0;
-} // namespace
+}
FunctionPass *llvm::createX86CallFrameOptimization() {
return new X86CallFrameOptimization();
}
-// This checks whether the transformation is legal.
+// This checks whether the transformation is legal.
// Also returns false in cases where it's potentially legal, but
// we don't even want to try.
bool X86CallFrameOptimization::isLegal(MachineFunction &MF) {
@@ -170,9 +177,8 @@ bool X86CallFrameOptimization::isProfitable(MachineFunction &MF,
if (!OptForSize)
return false;
-
unsigned StackAlign = TFL->getStackAlignment();
-
+
int64_t Advantage = 0;
for (auto CC : CallSeqMap) {
// Call sites where no parameters are passed on the stack
@@ -205,7 +211,6 @@ bool X86CallFrameOptimization::isProfitable(MachineFunction &MF,
return (Advantage >= 0);
}
-
bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getSubtarget().getInstrInfo();
TFL = MF.getSubtarget().getFrameLowering();
@@ -237,6 +242,64 @@ bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) {
return Changed;
}
+X86CallFrameOptimization::InstClassification
+X86CallFrameOptimization::classifyInstruction(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ const X86RegisterInfo &RegInfo, DenseSet<unsigned int> &UsedRegs) {
+ if (MI == MBB.end())
+ return Exit;
+
+ // The instructions we actually care about are movs onto the stack
+ int Opcode = MI->getOpcode();
+ if (Opcode == X86::MOV32mi || Opcode == X86::MOV32mr)
+ return Convert;
+
+ // Not all calling conventions have only stack MOVs between the stack
+ // adjust and the call.
+
+ // We want to tolerate other instructions, to cover more cases.
+ // In particular:
+ // a) PCrel calls, where we expect an additional COPY of the basereg.
+ // b) Passing frame-index addresses.
+ // c) Calling conventions that have inreg parameters. These generate
+ // both copies and movs into registers.
+ // To avoid creating lots of special cases, allow any instruction
+ // that does not write into memory, does not def or use the stack
+ // pointer, and does not def any register that was used by a preceding
+ // push.
+ // (Reading from memory is allowed, even if referenced through a
+ // frame index, since these will get adjusted properly in PEI)
+
+ // The reason for the last condition is that the pushes can't replace
+ // the movs in place, because the order must be reversed.
+ // So if we have a MOV32mr that uses EDX, then an instruction that defs
+ // EDX, and then the call, after the transformation the push will use
+ // the modified version of EDX, and not the original one.
+ // Since we are still in SSA form at this point, we only need to
+ // make sure we don't clobber any *physical* registers that were
+ // used by an earlier mov that will become a push.
+
+ if (MI->isCall() || MI->mayStore())
+ return Exit;
+
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg())
+ continue;
+ unsigned int Reg = MO.getReg();
+ if (!RegInfo.isPhysicalRegister(Reg))
+ continue;
+ if (RegInfo.regsOverlap(Reg, RegInfo.getStackRegister()))
+ return Exit;
+ if (MO.isDef()) {
+ for (unsigned int U : UsedRegs)
+ if (RegInfo.regsOverlap(Reg, U))
+ return Exit;
+ }
+ }
+
+ return Skip;
+}
+
void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
@@ -254,8 +317,8 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
// How much do we adjust the stack? This puts an upper bound on
// the number of parameters actually passed on it.
- unsigned int MaxAdjust = FrameSetup->getOperand(0).getImm() / 4;
-
+ unsigned int MaxAdjust = FrameSetup->getOperand(0).getImm() / 4;
+
// A zero adjustment means no stack parameters
if (!MaxAdjust) {
Context.NoStackParams = true;
@@ -284,11 +347,17 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
if (MaxAdjust > 4)
Context.MovVector.resize(MaxAdjust, nullptr);
- do {
- int Opcode = I->getOpcode();
- if (Opcode != X86::MOV32mi && Opcode != X86::MOV32mr)
- break;
+ InstClassification Classification;
+ DenseSet<unsigned int> UsedRegs;
+ while ((Classification = classifyInstruction(MBB, I, RegInfo, UsedRegs)) !=
+ Exit) {
+ if (Classification == Skip) {
+ ++I;
+ continue;
+ }
+
+ // We know the instruction is a MOV32mi/MOV32mr.
// We only want movs of the form:
// movl imm/r32, k(%esp)
// If we run into something else, bail.
@@ -323,24 +392,20 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
return;
Context.MovVector[StackDisp] = I;
- ++I;
- } while (I != MBB.end());
-
- // We now expect the end of the sequence - a call and a stack adjust.
- if (I == MBB.end())
- return;
+ for (const MachineOperand &MO : I->uses()) {
+ if (!MO.isReg())
+ continue;
+ unsigned int Reg = MO.getReg();
+ if (RegInfo.isPhysicalRegister(Reg))
+ UsedRegs.insert(Reg);
+ }
- // For PCrel calls, we expect an additional COPY of the basereg.
- // If we find one, skip it.
- if (I->isCopy()) {
- if (I->getOperand(1).getReg() ==
- MF.getInfo<X86MachineFunctionInfo>()->getGlobalBaseReg())
- ++I;
- else
- return;
+ ++I;
}
- if (!I->isCall())
+ // We now expect the end of the sequence. If we stopped early,
+ // or reached the end of the block without finding a call, bail.
+ if (I == MBB.end() || !I->isCall())
return;
Context.Call = I;
diff --git a/lib/Target/X86/X86CallingConv.h b/lib/Target/X86/X86CallingConv.h
index a377eb6051ae..0eb2494f1d63 100644
--- a/lib/Target/X86/X86CallingConv.h
+++ b/lib/Target/X86/X86CallingConv.h
@@ -42,7 +42,7 @@ inline bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &,
return false;
}
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 3dc75d76cee3..02645460b6a2 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -38,6 +38,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
@@ -2821,7 +2822,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
bool &IsTailCall = CLI.IsTailCall;
bool IsVarArg = CLI.IsVarArg;
const Value *Callee = CLI.Callee;
- const char *SymName = CLI.SymName;
+ MCSymbol *Symbol = CLI.Symbol;
bool Is64Bit = Subtarget->is64Bit();
bool IsWin64 = Subtarget->isCallingConvWin64(CC);
@@ -3117,8 +3118,8 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
}
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
- if (SymName)
- MIB.addExternalSymbol(SymName, OpFlags);
+ if (Symbol)
+ MIB.addSym(Symbol, OpFlags);
else
MIB.addGlobalAddress(GV, 0, OpFlags);
}
diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp
index 8305a0454c80..5eb4faeedff4 100644
--- a/lib/Target/X86/X86FixupLEAs.cpp
+++ b/lib/Target/X86/X86FixupLEAs.cpp
@@ -91,7 +91,7 @@ private:
const X86InstrInfo *TII; // Machine instruction info.
};
char FixupLEAPass::ID = 0;
-} // namespace
+}
MachineInstr *
FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI,
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 6f1d8e523732..40b9c8a863a3 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -279,7 +279,7 @@ namespace {
void setKillFlags(MachineBasicBlock &MBB) const;
};
char FPS::ID = 0;
-} // namespace
+}
FunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); }
@@ -544,7 +544,7 @@ namespace {
return V < TE.from;
}
};
-} // namespace
+}
#ifndef NDEBUG
static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) {
@@ -1530,7 +1530,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
if (Op.isKill())
moveToTop(FPReg, Inst);
else
- duplicateToTop(FPReg, FPReg, Inst);
+ duplicateToTop(FPReg, ScratchFPReg, Inst);
// Emit the call. This will pop the operand.
BuildMI(*MBB, Inst, MI->getDebugLoc(), TII->get(X86::CALLpcrel32))
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index 2858e86cd0e0..c274c8820149 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -153,6 +153,6 @@ private:
bool InEpilogue) const;
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index f6785e161188..6b23e62a2d35 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -67,19 +67,19 @@ namespace {
const Constant *CP;
const BlockAddress *BlockAddr;
const char *ES;
+ MCSymbol *MCSym;
int JT;
unsigned Align; // CP alignment.
unsigned char SymbolFlags; // X86II::MO_*
X86ISelAddressMode()
- : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
- Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr),
- JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) {
- }
+ : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
+ Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr),
+ MCSym(nullptr), JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) {}
bool hasSymbolicDisplacement() const {
return GV != nullptr || CP != nullptr || ES != nullptr ||
- JT != -1 || BlockAddr != nullptr;
+ MCSym != nullptr || JT != -1 || BlockAddr != nullptr;
}
bool hasBaseOrIndexReg() const {
@@ -134,11 +134,16 @@ namespace {
dbgs() << ES;
else
dbgs() << "nul";
+ dbgs() << " MCSym ";
+ if (MCSym)
+ dbgs() << MCSym;
+ else
+ dbgs() << "nul";
dbgs() << " JT" << JT << " Align" << Align << '\n';
}
#endif
};
-} // namespace
+}
namespace {
//===--------------------------------------------------------------------===//
@@ -258,6 +263,10 @@ namespace {
else if (AM.ES) {
assert(!AM.Disp && "Non-zero displacement is ignored with ES.");
Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
+ } else if (AM.MCSym) {
+ assert(!AM.Disp && "Non-zero displacement is ignored with MCSym.");
+ assert(AM.SymbolFlags == 0 && "oo");
+ Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32);
} else if (AM.JT != -1) {
assert(!AM.Disp && "Non-zero displacement is ignored with JT.");
Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
@@ -310,7 +319,7 @@ namespace {
return true;
}
};
-} // namespace
+}
bool
@@ -604,7 +613,7 @@ static bool isDispSafeForFrameIndex(int64_t Val) {
bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset,
X86ISelAddressMode &AM) {
// Cannot combine ExternalSymbol displacements with integer offsets.
- if (Offset != 0 && AM.ES)
+ if (Offset != 0 && (AM.ES || AM.MCSym))
return true;
int64_t Val = AM.Disp + Offset;
CodeModel::Model M = TM.getCodeModel();
@@ -690,6 +699,8 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
AM.ES = S->getSymbol();
AM.SymbolFlags = S->getTargetFlags();
+ } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
+ AM.MCSym = S->getMCSymbol();
} else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
AM.JT = J->getIndex();
AM.SymbolFlags = J->getTargetFlags();
@@ -728,6 +739,8 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
AM.ES = S->getSymbol();
AM.SymbolFlags = S->getTargetFlags();
+ } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
+ AM.MCSym = S->getMCSymbol();
} else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
AM.JT = J->getIndex();
AM.SymbolFlags = J->getTargetFlags();
@@ -1001,7 +1014,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
// FIXME: JumpTable and ExternalSymbol address currently don't like
// displacements. It isn't very important, but this should be fixed for
// consistency.
- if (!AM.ES && AM.JT != -1) return true;
+ if (!(AM.ES || AM.MCSym) && AM.JT != -1)
+ return true;
if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
if (!FoldOffsetIntoAddress(Cst->getSExtValue(), AM))
@@ -1013,13 +1027,11 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
default: break;
case ISD::FRAME_ALLOC_RECOVER: {
if (!AM.hasSymbolicDisplacement() && AM.Disp == 0)
- if (const auto *ESNode = dyn_cast<ExternalSymbolSDNode>(N.getOperand(0)))
- if (ESNode->getOpcode() == ISD::TargetExternalSymbol) {
- // Use the symbol and don't prefix it.
- AM.ES = ESNode->getSymbol();
- AM.SymbolFlags = X86II::MO_NOPREFIX;
- return false;
- }
+ if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) {
+ // Use the symbol and don't prefix it.
+ AM.MCSym = ESNode->getMCSymbol();
+ return false;
+ }
break;
}
case ISD::Constant: {
@@ -1473,6 +1485,7 @@ bool X86DAGToDAGISel::SelectMOV64Imm32(SDValue N, SDValue &Imm) {
N->getOpcode() != ISD::TargetJumpTable &&
N->getOpcode() != ISD::TargetGlobalAddress &&
N->getOpcode() != ISD::TargetExternalSymbol &&
+ N->getOpcode() != ISD::MCSymbol &&
N->getOpcode() != ISD::TargetBlockAddress)
return false;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index ce1ca20ee81a..b16bd18aefaa 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1111,7 +1111,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CTPOP, MVT::v8i32, Custom);
setOperationAction(ISD::CTPOP, MVT::v4i64, Custom);
- if (Subtarget->hasFMA() || Subtarget->hasFMA4()) {
+ if (Subtarget->hasFMA() || Subtarget->hasFMA4() || Subtarget->hasAVX512()) {
setOperationAction(ISD::FMA, MVT::v8f32, Legal);
setOperationAction(ISD::FMA, MVT::v4f64, Legal);
setOperationAction(ISD::FMA, MVT::v4f32, Legal);
@@ -6259,42 +6259,6 @@ is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask,
return true;
}
-/// \brief Test whether a shuffle mask is equivalent within each 256-bit lane.
-///
-/// This checks a shuffle mask to see if it is performing the same
-/// 256-bit lane-relative shuffle in each 256-bit lane. This trivially implies
-/// that it is also not lane-crossing. It may however involve a blend from the
-/// same lane of a second vector.
-///
-/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
-/// non-trivial to compute in the face of undef lanes. The representation is
-/// *not* suitable for use with existing 256-bit shuffles as it will contain
-/// entries from both V1 and V2 inputs to the wider mask.
-static bool
-is256BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask,
- SmallVectorImpl<int> &RepeatedMask) {
- int LaneSize = 256 / VT.getScalarSizeInBits();
- RepeatedMask.resize(LaneSize, -1);
- int Size = Mask.size();
- for (int i = 0; i < Size; ++i) {
- if (Mask[i] < 0)
- continue;
- if ((Mask[i] % Size) / LaneSize != i / LaneSize)
- // This entry crosses lanes, so there is no way to model this shuffle.
- return false;
-
- // Ok, handle the in-lane shuffles by detecting if and when they repeat.
- if (RepeatedMask[i % LaneSize] == -1)
- // This is the first non-undef entry in this slot of a 256-bit lane.
- RepeatedMask[i % LaneSize] =
- Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + Size;
- else if (RepeatedMask[i % LaneSize] + (i / LaneSize) * LaneSize != Mask[i])
- // Found a mismatch with the repeated mask.
- return false;
- }
- return true;
-}
-
/// \brief Checks whether a shuffle mask is equivalent to an explicit list of
/// arguments.
///
@@ -6354,22 +6318,6 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, SDLoc DL,
return DAG.getConstant(Imm, DL, MVT::i8);
}
-/// \brief Get a 8-bit shuffle, 1 bit per lane, immediate for a mask.
-///
-/// This helper function produces an 8-bit shuffle immediate corresponding to
-/// the ubiquitous shuffle encoding scheme used in x86 instructions for
-/// shuffling 8 lanes.
-static SDValue get1bitLaneShuffleImm8ForMask(ArrayRef<int> Mask, SDLoc DL,
- SelectionDAG &DAG) {
- assert(Mask.size() <= 8 &&
- "Up to 8 elts may be in Imm8 1-bit lane shuffle mask");
- unsigned Imm = 0;
- for (unsigned i = 0; i < Mask.size(); ++i)
- if (Mask[i] >= 0)
- Imm |= (Mask[i] % 2) << i;
- return DAG.getConstant(Imm, DL, MVT::i8);
-}
-
/// \brief Try to emit a blend instruction for a shuffle using bit math.
///
/// This is used as a fallback approach when first class blend instructions are
@@ -9385,30 +9333,6 @@ static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1,
DAG.getConstant(PermMask, DL, MVT::i8));
}
-/// \brief Handle lowering 4-lane 128-bit shuffles.
-static SDValue lowerV4X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1,
- SDValue V2, ArrayRef<int> WidenedMask,
- SelectionDAG &DAG) {
-
- assert(WidenedMask.size() == 4 && "Unexpected mask size for 128bit shuffle!");
- // form a 128-bit permutation.
- // convert the 64-bit shuffle mask selection values into 128-bit selection
- // bits defined by a vshuf64x2 instruction's immediate control byte.
- unsigned PermMask = 0, Imm = 0;
-
- for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) {
- if(WidenedMask[i] == SM_SentinelZero)
- return SDValue();
-
- // use first element in place of undef musk
- Imm = (WidenedMask[i] == SM_SentinelUndef) ? 0 : WidenedMask[i];
- PermMask |= (Imm % 4) << (i * 2);
- }
-
- return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2,
- DAG.getConstant(PermMask, DL, MVT::i8));
-}
-
/// \brief Lower a vector shuffle by first fixing the 128-bit lanes and then
/// shuffling each lane.
///
@@ -10144,105 +10068,86 @@ static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
}
}
-static SDValue lowerVectorShuffleWithVALIGN(SDLoc DL, MVT VT,
- ArrayRef<int> Mask, SDValue V1,
- SDValue V2, SelectionDAG &DAG) {
-
- assert(VT.getScalarSizeInBits() >= 32 && "Unexpected data type for VALIGN");
- // VALIGN pattern 2, 3, 4, 5, .. (sequential, shifted right)
- int AlignVal = -1;
- for (int i = 0; i < (signed)VT.getVectorNumElements(); ++i) {
- if (Mask[i] < 0)
- continue;
- if (Mask[i] < i)
- return SDValue();
- if (AlignVal == -1)
- AlignVal = Mask[i] - i;
- else if (Mask[i] - i != AlignVal)
- return SDValue();
- }
- // Vector source operands should be swapped
- return DAG.getNode(X86ISD::VALIGN, DL, VT, V2, V1,
- DAG.getConstant(AlignVal, DL, MVT::i8));
-}
+/// \brief Handle lowering of 8-lane 64-bit floating point shuffles.
+static SDValue lowerV8F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(V1.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
-static SDValue lowerVectorShuffleWithPERMV(SDLoc DL, MVT VT,
- ArrayRef<int> Mask, SDValue V1,
- SDValue V2, SelectionDAG &DAG) {
+ // X86 has dedicated unpack instructions that can handle specific blend
+ // operations: UNPCKH and UNPCKL.
+ if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14}))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f64, V1, V2);
+ if (isShuffleEquivalent(V1, V2, Mask, {1, 9, 3, 11, 5, 13, 7, 15}))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f64, V1, V2);
- assert(VT.getScalarSizeInBits() >= 16 && "Unexpected data type for PERMV");
+ // FIXME: Implement direct support for this type!
+ return splitAndLowerVectorShuffle(DL, MVT::v8f64, V1, V2, Mask, DAG);
+}
- MVT MaskEltVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
- MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements());
+/// \brief Handle lowering of 16-lane 32-bit floating point shuffles.
+static SDValue lowerV16F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(V1.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
- SmallVector<SDValue, 32> VPermMask;
- for (unsigned i = 0; i < VT.getVectorNumElements(); ++i)
- VPermMask.push_back(Mask[i] < 0 ? DAG.getUNDEF(MaskEltVT) :
- DAG.getConstant(Mask[i], DL,MaskEltVT));
- SDValue MaskNode = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVecVT,
- VPermMask);
- if (isSingleInputShuffleMask(Mask))
- return DAG.getNode(X86ISD::VPERMV, DL, VT, MaskNode, V1);
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(V1, V2, Mask,
+ {// First 128-bit lane.
+ 0, 16, 1, 17, 4, 20, 5, 21,
+ // Second 128-bit lane.
+ 8, 24, 9, 25, 12, 28, 13, 29}))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16f32, V1, V2);
+ if (isShuffleEquivalent(V1, V2, Mask,
+ {// First 128-bit lane.
+ 2, 18, 3, 19, 6, 22, 7, 23,
+ // Second 128-bit lane.
+ 10, 26, 11, 27, 14, 30, 15, 31}))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16f32, V1, V2);
- return DAG.getNode(X86ISD::VPERMV3, DL, VT, MaskNode, V1, V2);
+ // FIXME: Implement direct support for this type!
+ return splitAndLowerVectorShuffle(DL, MVT::v16f32, V1, V2, Mask, DAG);
}
-
-/// \brief Handle lowering of 8-lane 64-bit floating point shuffles.
-static SDValue lowerV8X64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+/// \brief Handle lowering of 8-lane 64-bit integer shuffles.
+static SDValue lowerV8I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
- MVT VT = Op.getSimpleValueType();
- assert((V1.getSimpleValueType() == MVT::v8f64 ||
- V1.getSimpleValueType() == MVT::v8i64) && "Bad operand type!");
- assert((V2.getSimpleValueType() == MVT::v8f64 ||
- V2.getSimpleValueType() == MVT::v8i64) && "Bad operand type!");
+ assert(V1.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
- SmallVector<int, 4> WidenedMask;
- if (canWidenShuffleElements(Mask, WidenedMask))
- if(SDValue Op = lowerV4X128VectorShuffle(DL, VT, V1, V2, WidenedMask, DAG))
- return Op;
// X86 has dedicated unpack instructions that can handle specific blend
// operations: UNPCKH and UNPCKL.
if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14}))
- return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2);
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i64, V1, V2);
if (isShuffleEquivalent(V1, V2, Mask, {1, 9, 3, 11, 5, 13, 7, 15}))
- return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2);
-
- if (SDValue Op = lowerVectorShuffleWithVALIGN(DL, VT, Mask, V1, V2, DAG))
- return Op;
-
- if (SDValue Op = lowerVectorShuffleWithSHUFPD(DL, VT, Mask, V1, V2, DAG))
- return Op;
-
- // PERMILPD instruction - mask 0/1, 0/1, 2/3, 2/3, 4/5, 4/5, 6/7, 6/7
- if (isSingleInputShuffleMask(Mask)) {
- if (!is128BitLaneCrossingShuffleMask(VT, Mask))
- return DAG.getNode(X86ISD::VPERMILPI, DL, VT, V1,
- get1bitLaneShuffleImm8ForMask(Mask, DL, DAG));
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i64, V1, V2);
- SmallVector<int, 4> RepeatedMask;
- if (is256BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask))
- return DAG.getNode(X86ISD::VPERMI, DL, VT, V1,
- getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
- }
- return lowerVectorShuffleWithPERMV(DL, VT, Mask, V1, V2, DAG);
+ // FIXME: Implement direct support for this type!
+ return splitAndLowerVectorShuffle(DL, MVT::v8i64, V1, V2, Mask, DAG);
}
/// \brief Handle lowering of 16-lane 32-bit integer shuffles.
-static SDValue lowerV16X32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
- MVT VT = Op.getSimpleValueType();
SDLoc DL(Op);
- assert((V1.getSimpleValueType() == MVT::v16i32 ||
- V1.getSimpleValueType() == MVT::v16f32) && "Bad operand type!");
- assert((V2.getSimpleValueType() == MVT::v16i32 ||
- V2.getSimpleValueType() == MVT::v16f32) && "Bad operand type!");
+ assert(V1.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
@@ -10253,39 +10158,16 @@ static SDValue lowerV16X32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
0, 16, 1, 17, 4, 20, 5, 21,
// Second 128-bit lane.
8, 24, 9, 25, 12, 28, 13, 29}))
- return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2);
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i32, V1, V2);
if (isShuffleEquivalent(V1, V2, Mask,
{// First 128-bit lane.
2, 18, 3, 19, 6, 22, 7, 23,
// Second 128-bit lane.
10, 26, 11, 27, 14, 30, 15, 31}))
- return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2);
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i32, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10,
- 12, 12, 14, 14}))
- return DAG.getNode(X86ISD::MOVSLDUP, DL, VT, V1);
- if (isShuffleEquivalent(V1, V2, Mask, {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11,
- 13, 13, 15, 15}))
- return DAG.getNode(X86ISD::MOVSHDUP, DL, VT, V1);
-
- SmallVector<int, 4> RepeatedMask;
- if (is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask)) {
- if (isSingleInputShuffleMask(Mask)) {
- unsigned Opc = VT.isInteger() ? X86ISD::PSHUFD : X86ISD::VPERMILPI;
- return DAG.getNode(Opc, DL, VT, V1,
- getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
- }
-
- for (int i = 0; i < 4; ++i)
- if (RepeatedMask[i] >= 16)
- RepeatedMask[i] -= 12;
- return lowerVectorShuffleWithSHUFPS(DL, VT, RepeatedMask, V1, V2, DAG);
- }
-
- if (SDValue Op = lowerVectorShuffleWithVALIGN(DL, VT, Mask, V1, V2, DAG))
- return Op;
-
- return lowerVectorShuffleWithPERMV(DL, VT, Mask, V1, V2, DAG);
+ // FIXME: Implement direct support for this type!
+ return splitAndLowerVectorShuffle(DL, MVT::v16i32, V1, V2, Mask, DAG);
}
/// \brief Handle lowering of 32-lane 16-bit integer shuffles.
@@ -10345,11 +10227,13 @@ static SDValue lower512BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
// the requisite ISA extensions for that element type are available.
switch (VT.SimpleTy) {
case MVT::v8f64:
- case MVT::v8i64:
- return lowerV8X64VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ return lowerV8F64VectorShuffle(Op, V1, V2, Subtarget, DAG);
case MVT::v16f32:
+ return lowerV16F32VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ case MVT::v8i64:
+ return lowerV8I64VectorShuffle(Op, V1, V2, Subtarget, DAG);
case MVT::v16i32:
- return lowerV16X32VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ return lowerV16I32VectorShuffle(Op, V1, V2, Subtarget, DAG);
case MVT::v32i16:
if (Subtarget->hasBWI())
return lowerV32I16VectorShuffle(Op, V1, V2, Subtarget, DAG);
@@ -10759,11 +10643,9 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
assert(VecVT.is128BitVector() && "Unexpected vector length");
- if (Subtarget->hasSSE41()) {
- SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
- if (Res.getNode())
+ if (Subtarget->hasSSE41())
+ if (SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG))
return Res;
- }
MVT VT = Op.getSimpleValueType();
// TODO: handle v16i8.
@@ -12253,11 +12135,9 @@ static SDValue LowerZERO_EXTEND_AVX512(SDValue Op,
static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
- if (Subtarget->hasFp256()) {
- SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
- if (Res.getNode())
+ if (Subtarget->hasFp256())
+ if (SDValue Res = LowerAVXExtend(Op, DAG, Subtarget))
return Res;
- }
return SDValue();
}
@@ -12272,11 +12152,9 @@ static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
if (VT.is512BitVector() || SVT.getVectorElementType() == MVT::i1)
return LowerZERO_EXTEND_AVX512(Op, Subtarget, DAG);
- if (Subtarget->hasFp256()) {
- SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
- if (Res.getNode())
+ if (Subtarget->hasFp256())
+ if (SDValue Res = LowerAVXExtend(Op, DAG, Subtarget))
return Res;
- }
assert(!VT.is256BitVector() || !SVT.is128BitVector() ||
VT.getVectorNumElements() != SVT.getVectorNumElements());
@@ -15117,6 +14995,54 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
return DAG.getNode(X86ISD::SELECT, dl, VT, IMask, Op, PreservedSrc);
}
+/// When the 32-bit MSVC runtime transfers control to us, either to an outlined
+/// function or when returning to a parent frame after catching an exception, we
+/// recover the parent frame pointer by doing arithmetic on the incoming EBP.
+/// Here's the math:
+/// RegNodeBase = EntryEBP - RegNodeSize
+/// ParentFP = RegNodeBase - RegNodeFrameOffset
+/// Subtracting RegNodeSize takes us to the offset of the registration node, and
+/// subtracting the offset (negative on x86) takes us back to the parent FP.
+static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn,
+ SDValue EntryEBP) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ SDLoc dl;
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MVT PtrVT = TLI.getPointerTy();
+
+ // It's possible that the parent function no longer has a personality function
+ // if the exceptional code was optimized away, in which case we just return
+ // the incoming EBP.
+ if (!Fn->hasPersonalityFn())
+ return EntryEBP;
+
+ // The RegNodeSize is 6 32-bit words for SEH and 4 for C++ EH. See
+ // WinEHStatePass for the full struct definition.
+ int RegNodeSize;
+ switch (classifyEHPersonality(Fn->getPersonalityFn())) {
+ default:
+ report_fatal_error("can only recover FP for MSVC EH personality functions");
+ case EHPersonality::MSVC_X86SEH: RegNodeSize = 24; break;
+ case EHPersonality::MSVC_CXX: RegNodeSize = 16; break;
+ }
+
+ // Get an MCSymbol that will ultimately resolve to the frame offset of the EH
+ // registration.
+ MCSymbol *OffsetSym =
+ MF.getMMI().getContext().getOrCreateParentFrameOffsetSymbol(
+ GlobalValue::getRealLinkageName(Fn->getName()));
+ SDValue OffsetSymVal = DAG.getMCSymbol(OffsetSym, PtrVT);
+ SDValue RegNodeFrameOffset =
+ DAG.getNode(ISD::FRAME_ALLOC_RECOVER, dl, PtrVT, OffsetSymVal);
+
+ // RegNodeBase = EntryEBP - RegNodeSize
+ // ParentFP = RegNodeBase - RegNodeFrameOffset
+ SDValue RegNodeBase = DAG.getNode(ISD::SUB, dl, PtrVT, EntryEBP,
+ DAG.getConstant(RegNodeSize, dl, PtrVT));
+ return DAG.getNode(ISD::SUB, dl, PtrVT, RegNodeBase, RegNodeFrameOffset);
+}
+
static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc dl(Op);
@@ -15206,6 +15132,23 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
Src1,Src2),
Mask, PassThru, Subtarget, DAG);
}
+ case INTR_TYPE_2OP_MASK_RM: {
+ SDValue Src1 = Op.getOperand(1);
+ SDValue Src2 = Op.getOperand(2);
+ SDValue PassThru = Op.getOperand(3);
+ SDValue Mask = Op.getOperand(4);
+ // We specify 2 possible modes for intrinsics, with/without rounding modes.
+ // First, we check if the intrinsic have rounding mode (6 operands),
+ // if not, we set rounding mode to "current".
+ SDValue Rnd;
+ if (Op.getNumOperands() == 6)
+ Rnd = Op.getOperand(5);
+ else
+ Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
+ return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
+ Src1, Src2, Rnd),
+ Mask, PassThru, Subtarget, DAG);
+ }
case INTR_TYPE_3OP_MASK: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
@@ -15230,11 +15173,26 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
Src1, Src2, Src3),
Mask, PassThru, Subtarget, DAG);
}
+ case VPERM_3OP_MASKZ:
+ case VPERM_3OP_MASK:
+ case FMA_OP_MASK3:
+ case FMA_OP_MASKZ:
case FMA_OP_MASK: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue Src3 = Op.getOperand(3);
SDValue Mask = Op.getOperand(4);
+ EVT VT = Op.getValueType();
+ SDValue PassThru = SDValue();
+
+ // set PassThru element
+ if (IntrData->Type == VPERM_3OP_MASKZ || IntrData->Type == FMA_OP_MASKZ)
+ PassThru = getZeroVector(VT, Subtarget, DAG, dl);
+ else if (IntrData->Type == FMA_OP_MASK3)
+ PassThru = Src3;
+ else
+ PassThru = Src1;
+
// We specify 2 possible opcodes for intrinsics with rounding modes.
// First, we check if the intrinsic may have non-default rounding mode,
// (IntrData->Opc1 != 0), then we check the rounding mode operand.
@@ -15246,12 +15204,12 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
dl, Op.getValueType(),
Src1, Src2, Src3, Rnd),
- Mask, Src1, Subtarget, DAG);
+ Mask, PassThru, Subtarget, DAG);
}
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
dl, Op.getValueType(),
Src1, Src2, Src3),
- Mask, Src1, Subtarget, DAG);
+ Mask, PassThru, Subtarget, DAG);
}
case CMP_MASK:
case CMP_MASK_CC: {
@@ -15330,18 +15288,10 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
SDValue PassThru = Op.getOperand(2);
if (isAllOnes(Mask)) // return data as is
return Op.getOperand(1);
- EVT VT = Op.getValueType();
- EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
- VT.getVectorNumElements());
- EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
- Mask.getValueType().getSizeInBits());
- SDLoc dl(Op);
- SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
- DAG.getBitcast(BitcastVT, Mask),
- DAG.getIntPtrConstant(0, dl));
- return DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToCompress,
- PassThru);
+ return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
+ DataToCompress),
+ Mask, PassThru, Subtarget, DAG);
}
case BLEND: {
SDValue Mask = Op.getOperand(3);
@@ -15532,15 +15482,23 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
auto *Fn = cast<Function>(cast<GlobalAddressSDNode>(Op1)->getGlobal());
MCSymbol *LSDASym = MF.getMMI().getContext().getOrCreateLSDASymbol(
GlobalValue::getRealLinkageName(Fn->getName()));
- StringRef Name = LSDASym->getName();
- assert(Name.data()[Name.size()] == '\0' && "not null terminated");
// Generate a simple absolute symbol reference. This intrinsic is only
// supported on 32-bit Windows, which isn't PIC.
- SDValue Result =
- DAG.getTargetExternalSymbol(Name.data(), VT, X86II::MO_NOPREFIX);
+ SDValue Result = DAG.getMCSymbol(LSDASym, VT);
return DAG.getNode(X86ISD::Wrapper, dl, VT, Result);
}
+
+ case Intrinsic::x86_seh_recoverfp: {
+ SDValue FnOp = Op.getOperand(1);
+ SDValue IncomingFPOp = Op.getOperand(2);
+ GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
+ auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
+ if (!Fn)
+ report_fatal_error(
+ "llvm.x86.seh.recoverfp must take a function as the first argument");
+ return recoverFramePointer(DAG, Fn, IncomingFPOp);
+ }
}
}
@@ -15550,7 +15508,12 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
const X86Subtarget * Subtarget) {
SDLoc dl(Op);
ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
- assert(C && "Invalid scale type");
+ if (!C)
+ llvm_unreachable("Invalid scale type");
+ unsigned ScaleVal = C->getZExtValue();
+ if (ScaleVal > 2 && ScaleVal != 4 && ScaleVal != 8)
+ llvm_unreachable("Valid scale values are 1, 2, 4, 8");
+
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
EVT MaskVT = MVT::getVectorVT(MVT::i1,
Index.getSimpleValueType().getVectorNumElements());
@@ -15558,8 +15521,16 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
if (MaskC)
MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT);
- else
- MaskInReg = DAG.getBitcast(MaskVT, Mask);
+ else {
+ EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ Mask.getValueType().getSizeInBits());
+
+ // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
+ // are extracted by EXTRACT_SUBVECTOR.
+ MaskInReg = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
+ DAG.getBitcast(BitcastVT, Mask),
+ DAG.getIntPtrConstant(0, dl));
+ }
SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
@@ -15576,7 +15547,12 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Index, SDValue ScaleOp, SDValue Chain) {
SDLoc dl(Op);
ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
- assert(C && "Invalid scale type");
+ if (!C)
+ llvm_unreachable("Invalid scale type");
+ unsigned ScaleVal = C->getZExtValue();
+ if (ScaleVal > 2 && ScaleVal != 4 && ScaleVal != 8)
+ llvm_unreachable("Valid scale values are 1, 2, 4, 8");
+
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
@@ -15586,8 +15562,16 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
if (MaskC)
MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT);
- else
- MaskInReg = DAG.getBitcast(MaskVT, Mask);
+ else {
+ EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ Mask.getValueType().getSizeInBits());
+
+ // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
+ // are extracted by EXTRACT_SUBVECTOR.
+ MaskInReg = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
+ DAG.getBitcast(BitcastVT, Mask),
+ DAG.getIntPtrConstant(0, dl));
+ }
SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other);
SDValue Ops[] = {Base, Scale, Index, Disp, Segment, MaskInReg, Src, Chain};
SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
@@ -15725,37 +15709,38 @@ static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget,
return DAG.getMergeValues(Results, DL);
}
-static SDValue LowerEXCEPTIONINFO(SDValue Op, const X86Subtarget *Subtarget,
- SelectionDAG &DAG) {
+static SDValue LowerSEHRESTOREFRAME(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
MachineFunction &MF = DAG.getMachineFunction();
SDLoc dl(Op);
- SDValue FnOp = Op.getOperand(2);
- SDValue FPOp = Op.getOperand(3);
+ SDValue Chain = Op.getOperand(0);
- // Compute the symbol for the parent EH registration. We know it'll get
- // emitted later.
- auto *Fn = cast<Function>(cast<GlobalAddressSDNode>(FnOp)->getGlobal());
- MCSymbol *ParentFrameSym =
- MF.getMMI().getContext().getOrCreateParentFrameOffsetSymbol(
- GlobalValue::getRealLinkageName(Fn->getName()));
- StringRef Name = ParentFrameSym->getName();
- assert(Name.data()[Name.size()] == '\0' && "not null terminated");
-
- // Create a TargetExternalSymbol for the label to avoid any target lowering
- // that would make this PC relative.
- MVT PtrVT = Op.getSimpleValueType();
- SDValue OffsetSym = DAG.getTargetExternalSymbol(Name.data(), PtrVT);
- SDValue OffsetVal =
- DAG.getNode(ISD::FRAME_ALLOC_RECOVER, dl, PtrVT, OffsetSym);
-
- // Add the offset to the FP.
- SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, FPOp, OffsetVal);
-
- // Load the second field of the struct, which is 4 bytes in. See
- // WinEHStatePass for more info.
- Add = DAG.getNode(ISD::ADD, dl, PtrVT, Add, DAG.getConstant(4, dl, PtrVT));
- return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Add, MachinePointerInfo(),
- false, false, false, 0);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MVT VT = TLI.getPointerTy();
+
+ const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+ unsigned FrameReg =
+ RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction());
+ unsigned SPReg = RegInfo->getStackRegister();
+
+ // Get incoming EBP.
+ SDValue IncomingEBP =
+ DAG.getCopyFromReg(Chain, dl, FrameReg, VT);
+
+ // Load [EBP-24] into SP.
+ SDValue SPAddr =
+ DAG.getNode(ISD::ADD, dl, VT, IncomingEBP, DAG.getConstant(-24, dl, VT));
+ SDValue NewSP =
+ DAG.getLoad(VT, dl, Chain, SPAddr, MachinePointerInfo(), false, false,
+ false, VT.getScalarSizeInBits() / 8);
+ Chain = DAG.getCopyToReg(Chain, dl, SPReg, NewSP);
+
+ // FIXME: Restore the base pointer in case of stack realignment!
+
+ // Adjust EBP to point back to the original frame position.
+ SDValue NewFP = recoverFramePointer(DAG, MF.getFunction(), IncomingEBP);
+ Chain = DAG.getCopyToReg(Chain, dl, FrameReg, NewFP);
+ return Chain;
}
static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
@@ -15764,8 +15749,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
const IntrinsicData* IntrData = getIntrinsicWithChain(IntNo);
if (!IntrData) {
- if (IntNo == Intrinsic::x86_seh_exceptioninfo)
- return LowerEXCEPTIONINFO(Op, Subtarget, DAG);
+ if (IntNo == llvm::Intrinsic::x86_seh_restoreframe)
+ return LowerSEHRESTOREFRAME(Op, Subtarget, DAG);
return SDValue();
}
@@ -15884,16 +15869,9 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
MachinePointerInfo(), false, false,
VT.getScalarSizeInBits()/8);
- EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
- VT.getVectorNumElements());
- EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
- Mask.getValueType().getSizeInBits());
- SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
- DAG.getBitcast(BitcastVT, Mask),
- DAG.getIntPtrConstant(0, dl));
-
- SDValue Compressed = DAG.getNode(IntrData->Opc0, dl, VT, VMask,
- DataToCompress, DAG.getUNDEF(VT));
+ SDValue Compressed =
+ getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, DataToCompress),
+ Mask, DAG.getUNDEF(VT), Subtarget, DAG);
return DAG.getStore(Chain, dl, Compressed, Addr,
MachinePointerInfo(), false, false,
VT.getScalarSizeInBits()/8);
@@ -15901,7 +15879,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
case EXPAND_FROM_MEM: {
SDLoc dl(Op);
SDValue Mask = Op.getOperand(4);
- SDValue PathThru = Op.getOperand(3);
+ SDValue PassThru = Op.getOperand(3);
SDValue Addr = Op.getOperand(2);
SDValue Chain = Op.getOperand(0);
EVT VT = Op.getValueType();
@@ -15909,21 +15887,14 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
if (isAllOnes(Mask)) // return just a load
return DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(), false, false,
false, VT.getScalarSizeInBits()/8);
- EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
- VT.getVectorNumElements());
- EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
- Mask.getValueType().getSizeInBits());
- SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
- DAG.getBitcast(BitcastVT, Mask),
- DAG.getIntPtrConstant(0, dl));
SDValue DataToExpand = DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(),
false, false, false,
VT.getScalarSizeInBits()/8);
SDValue Results[] = {
- DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToExpand, PathThru),
- Chain};
+ getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, DataToExpand),
+ Mask, PassThru, Subtarget, DAG), Chain};
return DAG.getMergeValues(Results, dl);
}
}
@@ -18476,6 +18447,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::UMIN: return "X86ISD::UMIN";
case X86ISD::SMAX: return "X86ISD::SMAX";
case X86ISD::SMIN: return "X86ISD::SMIN";
+ case X86ISD::ABS: return "X86ISD::ABS";
case X86ISD::FMAX: return "X86ISD::FMAX";
case X86ISD::FMAX_RND: return "X86ISD::FMAX_RND";
case X86ISD::FMIN: return "X86ISD::FMIN";
@@ -18618,9 +18590,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FDIV_RND: return "X86ISD::FDIV_RND";
case X86ISD::FSQRT_RND: return "X86ISD::FSQRT_RND";
case X86ISD::FGETEXP_RND: return "X86ISD::FGETEXP_RND";
+ case X86ISD::SCALEF: return "X86ISD::SCALEF";
case X86ISD::ADDS: return "X86ISD::ADDS";
case X86ISD::SUBS: return "X86ISD::SUBS";
- case X86ISD::AVG: return "X86ISD::AVG";
+ case X86ISD::AVG: return "X86ISD::AVG";
case X86ISD::SINT_TO_FP_RND: return "X86ISD::SINT_TO_FP_RND";
case X86ISD::UINT_TO_FP_RND: return "X86ISD::UINT_TO_FP_RND";
}
@@ -18777,7 +18750,7 @@ bool X86TargetLowering::isVectorLoadExtDesirable(SDValue) const { return true; }
bool
X86TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
- if (!(Subtarget->hasFMA() || Subtarget->hasFMA4()))
+ if (!(Subtarget->hasFMA() || Subtarget->hasFMA4() || Subtarget->hasAVX512()))
return false;
VT = VT.getScalarType();
@@ -19962,6 +19935,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
// Replace 213-type (isel default) FMA3 instructions with 231-type for
// accumulator loops. Writing back to the accumulator allows the coalescer
// to remove extra copies in the loop.
+// FIXME: Do this on AVX512. We don't support 231 variants yet (PR23937).
MachineBasicBlock *
X86TargetLowering::emitFMA3Instr(MachineInstr *MI,
MachineBasicBlock *MBB) const {
@@ -21302,8 +21276,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
- SDValue LD = EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true);
- if (LD.getNode())
+ if (SDValue LD = EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true))
return LD;
if (isTargetShuffle(N->getOpcode())) {
@@ -21451,8 +21424,7 @@ static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG) {
/// use 64-bit extracts and shifts.
static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
- SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI);
- if (NewOp.getNode())
+ if (SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI))
return NewOp;
SDValue InputVector = N->getOperand(0);
@@ -22895,16 +22867,14 @@ static SDValue performShiftToAllZeros(SDNode *N, SelectionDAG &DAG,
static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
- if (N->getOpcode() == ISD::SHL) {
- SDValue V = PerformSHLCombine(N, DAG);
- if (V.getNode()) return V;
- }
+ if (N->getOpcode() == ISD::SHL)
+ if (SDValue V = PerformSHLCombine(N, DAG))
+ return V;
- if (N->getOpcode() != ISD::SRA) {
- // Try to fold this logical shift into a zero vector.
- SDValue V = performShiftToAllZeros(N, DAG, Subtarget);
- if (V.getNode()) return V;
- }
+ // Try to fold this logical shift into a zero vector.
+ if (N->getOpcode() != ISD::SRA)
+ if (SDValue V = performShiftToAllZeros(N, DAG, Subtarget))
+ return V;
return SDValue();
}
@@ -23284,8 +23254,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
if (DCI.isBeforeLegalizeOps())
return SDValue();
- SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget);
- if (R.getNode())
+ if (SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget))
return R;
SDValue N0 = N->getOperand(0);
@@ -23480,11 +23449,9 @@ static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG,
if (DCI.isBeforeLegalizeOps())
return SDValue();
- if (Subtarget->hasCMov()) {
- SDValue RV = performIntegerAbsCombine(N, DAG);
- if (RV.getNode())
+ if (Subtarget->hasCMov())
+ if (SDValue RV = performIntegerAbsCombine(N, DAG))
return RV;
- }
return SDValue();
}
@@ -24266,23 +24233,37 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
- if (VT.isVector()) {
- auto ExtendToVec128 = [&DAG](SDLoc DL, SDValue N) {
+ if (VT.isVector() && Subtarget->hasSSE2()) {
+ auto ExtendVecSize = [&DAG](SDLoc DL, SDValue N, unsigned Size) {
EVT InVT = N.getValueType();
EVT OutVT = EVT::getVectorVT(*DAG.getContext(), InVT.getScalarType(),
- 128 / InVT.getScalarSizeInBits());
- SmallVector<SDValue, 8> Opnds(128 / InVT.getSizeInBits(),
+ Size / InVT.getScalarSizeInBits());
+ SmallVector<SDValue, 8> Opnds(Size / InVT.getSizeInBits(),
DAG.getUNDEF(InVT));
Opnds[0] = N;
return DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Opnds);
};
+ // If target-size is less than 128-bits, extend to a type that would extend
+ // to 128 bits, extend that and extract the original target vector.
+ if (VT.getSizeInBits() < 128 && !(128 % VT.getSizeInBits()) &&
+ (SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16) &&
+ (InSVT == MVT::i32 || InSVT == MVT::i16 || InSVT == MVT::i8)) {
+ unsigned Scale = 128 / VT.getSizeInBits();
+ EVT ExVT =
+ EVT::getVectorVT(*DAG.getContext(), SVT, 128 / SVT.getSizeInBits());
+ SDValue Ex = ExtendVecSize(DL, N0, Scale * InVT.getSizeInBits());
+ SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, ExVT, Ex);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SExt,
+ DAG.getIntPtrConstant(0, DL));
+ }
+
// If target-size is 128-bits, then convert to ISD::SIGN_EXTEND_VECTOR_INREG
// which ensures lowering to X86ISD::VSEXT (pmovsx*).
if (VT.getSizeInBits() == 128 &&
(SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16) &&
(InSVT == MVT::i32 || InSVT == MVT::i16 || InSVT == MVT::i8)) {
- SDValue ExOp = ExtendToVec128(DL, N0);
+ SDValue ExOp = ExtendVecSize(DL, N0, 128);
return DAG.getSignExtendVectorInReg(ExOp, DL, VT);
}
@@ -24301,7 +24282,7 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
++i, Offset += NumSubElts) {
SDValue SrcVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InSubVT, N0,
DAG.getIntPtrConstant(Offset, DL));
- SrcVec = ExtendToVec128(DL, SrcVec);
+ SrcVec = ExtendVecSize(DL, SrcVec, 128);
SrcVec = DAG.getSignExtendVectorInReg(SrcVec, DL, SubVT);
Opnds.push_back(SrcVec);
}
@@ -24312,11 +24293,9 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
if (!Subtarget->hasFp256())
return SDValue();
- if (VT.isVector() && VT.getSizeInBits() == 256) {
- SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget);
- if (R.getNode())
+ if (VT.isVector() && VT.getSizeInBits() == 256)
+ if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget))
return R;
- }
return SDValue();
}
@@ -24332,7 +24311,8 @@ static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG,
EVT ScalarVT = VT.getScalarType();
if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) ||
- (!Subtarget->hasFMA() && !Subtarget->hasFMA4()))
+ (!Subtarget->hasFMA() && !Subtarget->hasFMA4() &&
+ !Subtarget->hasAVX512()))
return SDValue();
SDValue A = N->getOperand(0);
@@ -24398,11 +24378,10 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
DAG.getConstant(1, dl, VT));
}
}
- if (VT.is256BitVector()) {
- SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget);
- if (R.getNode())
+
+ if (VT.is256BitVector())
+ if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget))
return R;
- }
// (i8,i32 zext (udivrem (i8 x, i8 y)) ->
// (i8,i32 (udivrem_zext_hreg (i8 x, i8 y)
@@ -24606,10 +24585,7 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG,
if (CC == X86::COND_B)
return MaterializeSETB(DL, EFLAGS, DAG, N->getSimpleValueType(0));
- SDValue Flags;
-
- Flags = checkBoolTestSetCCCombine(EFLAGS, CC);
- if (Flags.getNode()) {
+ if (SDValue Flags = checkBoolTestSetCCCombine(EFLAGS, CC)) {
SDValue Cond = DAG.getConstant(CC, DL, MVT::i8);
return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags);
}
@@ -24628,10 +24604,7 @@ static SDValue PerformBrCondCombine(SDNode *N, SelectionDAG &DAG,
SDValue EFLAGS = N->getOperand(3);
X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(2));
- SDValue Flags;
-
- Flags = checkBoolTestSetCCCombine(EFLAGS, CC);
- if (Flags.getNode()) {
+ if (SDValue Flags = checkBoolTestSetCCCombine(EFLAGS, CC)) {
SDValue Cond = DAG.getConstant(CC, DL, MVT::i8);
return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond,
Flags);
@@ -24695,16 +24668,18 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
// Now move on to more general possibilities.
SDValue Op0 = N->getOperand(0);
- EVT InVT = Op0->getValueType(0);
+ EVT VT = N->getValueType(0);
+ EVT InVT = Op0.getValueType();
+ EVT InSVT = InVT.getScalarType();
// SINT_TO_FP(vXi8) -> SINT_TO_FP(SEXT(vXi8 to vXi32))
// SINT_TO_FP(vXi16) -> SINT_TO_FP(SEXT(vXi16 to vXi32))
- if (InVT == MVT::v8i8 || InVT == MVT::v4i8 ||
- InVT == MVT::v8i16 || InVT == MVT::v4i16) {
+ if (InVT.isVector() && (InSVT == MVT::i8 || InSVT == MVT::i16)) {
SDLoc dl(N);
- MVT DstVT = MVT::getVectorVT(MVT::i32, InVT.getVectorNumElements());
+ EVT DstVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
+ InVT.getVectorNumElements());
SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0);
- return DAG.getNode(ISD::SINT_TO_FP, dl, N->getValueType(0), P);
+ return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P);
}
// Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have
@@ -24714,10 +24689,10 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
EVT LdVT = Ld->getValueType(0);
// This transformation is not supported if the result type is f16
- if (N->getValueType(0) == MVT::f16)
+ if (VT == MVT::f16)
return SDValue();
- if (!Ld->isVolatile() && !N->getValueType(0).isVector() &&
+ if (!Ld->isVolatile() && !VT.isVector() &&
ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
!Subtarget->is64Bit() && LdVT == MVT::i64) {
SDValue FILDChain = Subtarget->getTargetLowering()->BuildFILD(
@@ -25683,75 +25658,40 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// Otherwise, check to see if this is a register class of the wrong value
// type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to
// turn into {ax},{dx}.
- if (Res.second->hasType(VT))
+ // MVT::Other is used to specify clobber names.
+ if (Res.second->hasType(VT) || VT == MVT::Other)
return Res; // Correct type already, nothing to do.
- // All of the single-register GCC register classes map their values onto
- // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we
- // really want an 8-bit or 32-bit register, map to the appropriate register
- // class and return the appropriate register.
- if (Res.second == &X86::GR16RegClass) {
- if (VT == MVT::i8 || VT == MVT::i1) {
- unsigned DestReg = 0;
- switch (Res.first) {
- default: break;
- case X86::AX: DestReg = X86::AL; break;
- case X86::DX: DestReg = X86::DL; break;
- case X86::CX: DestReg = X86::CL; break;
- case X86::BX: DestReg = X86::BL; break;
- }
- if (DestReg) {
- Res.first = DestReg;
- Res.second = &X86::GR8RegClass;
- }
- } else if (VT == MVT::i32 || VT == MVT::f32) {
- unsigned DestReg = 0;
- switch (Res.first) {
- default: break;
- case X86::AX: DestReg = X86::EAX; break;
- case X86::DX: DestReg = X86::EDX; break;
- case X86::CX: DestReg = X86::ECX; break;
- case X86::BX: DestReg = X86::EBX; break;
- case X86::SI: DestReg = X86::ESI; break;
- case X86::DI: DestReg = X86::EDI; break;
- case X86::BP: DestReg = X86::EBP; break;
- case X86::SP: DestReg = X86::ESP; break;
- }
- if (DestReg) {
- Res.first = DestReg;
- Res.second = &X86::GR32RegClass;
- }
- } else if (VT == MVT::i64 || VT == MVT::f64) {
- unsigned DestReg = 0;
- switch (Res.first) {
- default: break;
- case X86::AX: DestReg = X86::RAX; break;
- case X86::DX: DestReg = X86::RDX; break;
- case X86::CX: DestReg = X86::RCX; break;
- case X86::BX: DestReg = X86::RBX; break;
- case X86::SI: DestReg = X86::RSI; break;
- case X86::DI: DestReg = X86::RDI; break;
- case X86::BP: DestReg = X86::RBP; break;
- case X86::SP: DestReg = X86::RSP; break;
- }
- if (DestReg) {
- Res.first = DestReg;
- Res.second = &X86::GR64RegClass;
- }
- } else if (VT != MVT::Other) {
- // Type mismatch and not a clobber: Return an error;
+ // Get a matching integer of the correct size. i.e. "ax" with MVT::32 should
+ // return "eax". This should even work for things like getting 64bit integer
+ // registers when given an f64 type.
+ const TargetRegisterClass *Class = Res.second;
+ if (Class == &X86::GR8RegClass || Class == &X86::GR16RegClass ||
+ Class == &X86::GR32RegClass || Class == &X86::GR64RegClass) {
+ unsigned Size = VT.getSizeInBits();
+ MVT::SimpleValueType SimpleTy = Size == 1 || Size == 8 ? MVT::i8
+ : Size == 16 ? MVT::i16
+ : Size == 32 ? MVT::i32
+ : Size == 64 ? MVT::i64
+ : MVT::Other;
+ unsigned DestReg = getX86SubSuperRegisterOrZero(Res.first, SimpleTy);
+ if (DestReg > 0) {
+ Res.first = DestReg;
+ Res.second = SimpleTy == MVT::i8 ? &X86::GR8RegClass
+ : SimpleTy == MVT::i16 ? &X86::GR16RegClass
+ : SimpleTy == MVT::i32 ? &X86::GR32RegClass
+ : &X86::GR64RegClass;
+ assert(Res.second->contains(Res.first) && "Register in register class");
+ } else {
+ // No register found/type mismatch.
Res.first = 0;
Res.second = nullptr;
}
- } else if (Res.second == &X86::FR32RegClass ||
- Res.second == &X86::FR64RegClass ||
- Res.second == &X86::VR128RegClass ||
- Res.second == &X86::VR256RegClass ||
- Res.second == &X86::FR32XRegClass ||
- Res.second == &X86::FR64XRegClass ||
- Res.second == &X86::VR128XRegClass ||
- Res.second == &X86::VR256XRegClass ||
- Res.second == &X86::VR512RegClass) {
+ } else if (Class == &X86::FR32RegClass || Class == &X86::FR64RegClass ||
+ Class == &X86::VR128RegClass || Class == &X86::VR256RegClass ||
+ Class == &X86::FR32XRegClass || Class == &X86::FR64XRegClass ||
+ Class == &X86::VR128XRegClass || Class == &X86::VR256XRegClass ||
+ Class == &X86::VR512RegClass) {
// Handle references to XMM physical registers that got mapped into the
// wrong class. This can happen with constraints like {xmm0} where the
// target independent register mapper will just pick the first match it can
@@ -25767,15 +25707,11 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
Res.second = &X86::VR256RegClass;
else if (X86::VR512RegClass.hasType(VT))
Res.second = &X86::VR512RegClass;
- else if (VT != MVT::Other) {
+ else {
// Type mismatch and not a clobber: Return an error;
Res.first = 0;
Res.second = nullptr;
}
- } else if (VT != MVT::Other) {
- // Type mismatch and not a clobber: Return an error;
- Res.first = 0;
- Res.second = nullptr;
}
return Res;
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 9c98333776cf..17660891635c 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -211,7 +211,8 @@ namespace llvm {
// FP vector get exponent
FGETEXP_RND,
-
+ // FP Scale
+ SCALEF,
// Integer add/sub with unsigned saturation.
ADDUS,
SUBUS,
@@ -238,6 +239,9 @@ namespace llvm {
/// Signed integer max and min.
SMAX, SMIN,
+ // Integer absolute value
+ ABS,
+
/// Floating point max and min.
FMAX, FMIN,
@@ -516,7 +520,7 @@ namespace llvm {
// have memop! In fact, starting from ATOMADD64_DAG all opcodes will be
// thought as target memory ops!
};
- } // namespace X86ISD
+ }
/// Define some predicates that are used for node matching.
namespace X86 {
@@ -583,7 +587,7 @@ namespace llvm {
TO_ZERO = 3,
CUR_DIRECTION = 4
};
- } // namespace X86
+ }
//===--------------------------------------------------------------------===//
// X86 Implementation of the TargetLowering interface
@@ -638,9 +642,8 @@ namespace llvm {
/// legal as the hook is used before type legalization.
bool isSafeMemOpType(MVT VT) const override;
- /// Returns true if the target allows
- /// unaligned memory accesses. of the specified type. Returns whether it
- /// is "fast" by reference in the second argument.
+ /// Returns true if the target allows unaligned memory accesses of the
+ /// specified type. Returns whether it is "fast" in the last argument.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
bool *Fast) const override;
@@ -1120,6 +1123,6 @@ namespace llvm {
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo);
}
-} // namespace llvm
+}
#endif // X86ISELLOWERING_H
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index de6a83506b28..b309b8210851 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -274,6 +274,16 @@ multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
(vselect _.KRCWM:$mask, RHS, _.RC:$src1)>;
+multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
+ dag Outs, dag NonTiedIns, string OpcodeStr,
+ string AttSrcAsm, string IntelSrcAsm,
+ dag RHS> :
+ AVX512_maskable_common<O, F, _, Outs,
+ !con((ins _.RC:$src1), NonTiedIns),
+ !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
+ !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
+ OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
+ (X86select _.KRCWM:$mask, RHS, _.RC:$src1)>;
multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag Ins,
@@ -3436,7 +3446,7 @@ multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
- X86VectorVTInfo _, bit IsCommutable> {
+ X86VectorVTInfo _> {
defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
"$rc, $src2, $src1", "$src1, $src2, $rc",
@@ -3446,7 +3456,7 @@ multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRn
multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
- X86VectorVTInfo _, bit IsCommutable> {
+ X86VectorVTInfo _> {
defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
@@ -3481,16 +3491,16 @@ multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> {
- defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v16f32_info, 0>,
+ defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
- defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v8f64_info, 0>,
+ defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v8f64_info>,
EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
}
multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> {
- defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, v16f32_info, 0>,
+ defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
- defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, v8f64_info, 0>,
+ defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, v8f64_info>,
EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
}
@@ -3513,6 +3523,48 @@ let Predicates = [HasDQI] in {
defm VXOR : avx512_fp_binop_p<0x57, "vxor", X86fxor, 1>;
}
+multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>, EVEX_4V;
+ let mayLoad = 1 in {
+ defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
+ "$src2, $src1", "$src1, $src2",
+ (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT))>, EVEX_4V;
+ defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
+ "${src2}"##_.BroadcastStr##", $src1",
+ "$src1, ${src2}"##_.BroadcastStr,
+ (OpNode _.RC:$src1, (_.VT (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2))), (i32 FROUND_CURRENT))>,
+ EVEX_4V, EVEX_B;
+ }//let mayLoad = 1
+}
+
+multiclass avx512_fp_scalef_all<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v16f32_info>,
+ avx512_fp_round_packed<opc, OpcodeStr, OpNode, v16f32_info>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+ defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v8f64_info>,
+ avx512_fp_round_packed<opc, OpcodeStr, OpNode, v8f64_info>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+ // Define only if AVX512VL feature is present.
+ let Predicates = [HasVLX] in {
+ defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v4f32x_info>,
+ EVEX_V128, EVEX_CD8<32, CD8VF>;
+ defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v8f32x_info>,
+ EVEX_V256, EVEX_CD8<32, CD8VF>;
+ defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v2f64x_info>,
+ EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
+ defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v4f64x_info>,
+ EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
+ }
+}
+defm VSCALEF : avx512_fp_scalef_all<0x2C, "vscalef", X86scalef>, T8PD;
+
//===----------------------------------------------------------------------===//
// AVX-512 VPTESTM instructions
//===----------------------------------------------------------------------===//
@@ -3870,6 +3922,19 @@ defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
X86PShufhw>, EVEX, AVX512XSIi8Base, VEX_W;
defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
X86PShuflw>, EVEX, AVX512XDIi8Base, VEX_W;
+
+multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ let Predicates = [HasBWI] in
+ defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, v64i8_info>, EVEX_V512;
+
+ let Predicates = [HasVLX, HasBWI] in {
+ defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, v32i8x_info>, EVEX_V256;
+ defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, v16i8x_info>, EVEX_V128;
+ }
+}
+
+defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb>;
+
//===----------------------------------------------------------------------===//
// AVX-512 - MOVDDUP
//===----------------------------------------------------------------------===//
@@ -3950,188 +4015,295 @@ let Predicates = [HasAVX512] in {
//
let Constraints = "$src1 = $dst" in {
-// Omitting the parameter OpNode (= null_frag) disables ISel pattern matching.
-multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
- SDPatternOperator OpNode = null_frag> {
+multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
AVX512FMA3Base;
- let mayLoad = 1 in
- defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ let mayLoad = 1 in {
+ defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src1, _.RC:$src2, (_.LdFrag addr:$src3)))>,
AVX512FMA3Base;
- defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
!strconcat("$src2, ${src3}", _.BroadcastStr ),
(OpNode _.RC:$src1,
_.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,
AVX512FMA3Base, EVEX_B;
- }
-} // Constraints = "$src1 = $dst"
+ }
+}
-let Constraints = "$src1 = $dst" in {
-// Omitting the parameter OpNode (= null_frag) disables ISel pattern matching.
-multiclass avx512_fma3_round_rrb<bits<8> opc, string OpcodeStr,
- X86VectorVTInfo _,
- SDPatternOperator OpNode> {
- defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
(_.VT ( OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3, (i32 imm:$rc)))>,
AVX512FMA3Base, EVEX_B, EVEX_RC;
- }
+}
} // Constraints = "$src1 = $dst"
-multiclass avx512_fma3_round_forms<bits<8> opc213, string OpcodeStr,
- X86VectorVTInfo VTI, SDPatternOperator OpNode> {
- defm v213r : avx512_fma3_round_rrb<opc213, !strconcat(OpcodeStr, "213", VTI.Suffix),
- VTI, OpNode>, EVEX_CD8<VTI.EltSize, CD8VF>;
+multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode OpNodeRnd, AVX512VLVectorVTInfo _> {
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info512>,
+ avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, _.info512>,
+ EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
+ }
+ let Predicates = [HasVLX, HasAVX512] in {
+ defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info256>,
+ EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
+ defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info128>,
+ EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
+ }
}
-multiclass avx512_fma3p_forms<bits<8> opc213, bits<8> opc231,
- string OpcodeStr, X86VectorVTInfo VTI,
- SDPatternOperator OpNode> {
- defm v213r : avx512_fma3p_rm<opc213, !strconcat(OpcodeStr, "213", VTI.Suffix),
- VTI, OpNode>, EVEX_CD8<VTI.EltSize, CD8VF>;
- defm v231r : avx512_fma3p_rm<opc231, !strconcat(OpcodeStr, "231", VTI.Suffix),
- VTI>, EVEX_CD8<VTI.EltSize, CD8VF>;
+multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode OpNodeRnd > {
+ defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
+ avx512vl_f32_info>;
+ defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
+ avx512vl_f64_info>, VEX_W;
}
-multiclass avx512_fma3p<bits<8> opc213, bits<8> opc231,
- string OpcodeStr,
- SDPatternOperator OpNode,
- SDPatternOperator OpNodeRnd> {
-let ExeDomain = SSEPackedSingle in {
- defm NAME##PSZ : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
- v16f32_info, OpNode>,
- avx512_fma3_round_forms<opc213, OpcodeStr,
- v16f32_info, OpNodeRnd>, EVEX_V512;
- defm NAME##PSZ256 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
- v8f32x_info, OpNode>, EVEX_V256;
- defm NAME##PSZ128 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
- v4f32x_info, OpNode>, EVEX_V128;
+defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
+defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
+defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
+defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
+defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
+defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
+
+
+let Constraints = "$src1 = $dst" in {
+multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.RC:$src3),
+ OpcodeStr, "$src3, $src2", "$src2, $src3",
+ (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1))>,
+ AVX512FMA3Base;
+
+ let mayLoad = 1 in {
+ defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.MemOp:$src3),
+ OpcodeStr, "$src3, $src2", "$src2, $src3",
+ (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
+ AVX512FMA3Base;
+
+ defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.ScalarMemOp:$src3),
+ OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
+ "$src2, ${src3}"##_.BroadcastStr,
+ (_.VT (OpNode _.RC:$src2,
+ (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
+ _.RC:$src1))>, AVX512FMA3Base, EVEX_B;
}
-let ExeDomain = SSEPackedDouble in {
- defm NAME##PDZ : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
- v8f64_info, OpNode>,
- avx512_fma3_round_forms<opc213, OpcodeStr, v8f64_info,
- OpNodeRnd>, EVEX_V512, VEX_W;
- defm NAME##PDZ256 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
- v4f64x_info, OpNode>,
- EVEX_V256, VEX_W;
- defm NAME##PDZ128 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
- v2f64x_info, OpNode>,
- EVEX_V128, VEX_W;
+}
+
+multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
+ OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
+ (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc)))>,
+ AVX512FMA3Base, EVEX_B, EVEX_RC;
+}
+} // Constraints = "$src1 = $dst"
+
+multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode OpNodeRnd, AVX512VLVectorVTInfo _> {
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info512>,
+ avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, _.info512>,
+ EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
+ }
+ let Predicates = [HasVLX, HasAVX512] in {
+ defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info256>,
+ EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
+ defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info128>,
+ EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
}
}
-defm VFMADD : avx512_fma3p<0xA8, 0xB8, "vfmadd", X86Fmadd, X86FmaddRnd>;
-defm VFMSUB : avx512_fma3p<0xAA, 0xBA, "vfmsub", X86Fmsub, X86FmsubRnd>;
-defm VFMADDSUB : avx512_fma3p<0xA6, 0xB6, "vfmaddsub", X86Fmaddsub, X86FmaddsubRnd>;
-defm VFMSUBADD : avx512_fma3p<0xA7, 0xB7, "vfmsubadd", X86Fmsubadd, X86FmsubaddRnd>;
-defm VFNMADD : avx512_fma3p<0xAC, 0xBC, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
-defm VFNMSUB : avx512_fma3p<0xAE, 0xBE, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
+multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode OpNodeRnd > {
+ defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
+ avx512vl_f32_info>;
+ defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
+ avx512vl_f64_info>, VEX_W;
+}
+
+defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
+defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
+defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
+defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
+defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
+defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
let Constraints = "$src1 = $dst" in {
-multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo _> {
- let mayLoad = 1 in
- def m: AVX512FMA3<opc, MRMSrcMem, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src3, _.MemOp:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src3, $dst|$dst, $src3, $src2}"),
- [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2),
- _.RC:$src3)))]>;
- def mb: AVX512FMA3<opc, MRMSrcMem, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src3, _.ScalarMemOp:$src2),
- !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr,
- ", $src3, $dst|$dst, $src3, ${src2}", _.BroadcastStr, "}"),
- [(set _.RC:$dst,
- (OpNode _.RC:$src1, (_.VT (X86VBroadcast
- (_.ScalarLdFrag addr:$src2))),
- _.RC:$src3))]>, EVEX_B;
+multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src3, _.RC:$src2),
+ OpcodeStr, "$src2, $src3", "$src3, $src2",
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
+ AVX512FMA3Base;
+
+ let mayLoad = 1 in {
+ defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src3, _.MemOp:$src2),
+ OpcodeStr, "$src2, $src3", "$src3, $src2",
+ (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2), _.RC:$src3))>,
+ AVX512FMA3Base;
+
+ defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src3, _.ScalarMemOp:$src2),
+ OpcodeStr, "${src2}"##_.BroadcastStr##", $src3",
+ "$src3, ${src2}"##_.BroadcastStr,
+ (_.VT (OpNode _.RC:$src1,
+ (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
+ _.RC:$src3))>, AVX512FMA3Base, EVEX_B;
+ }
}
-} // Constraints = "$src1 = $dst"
-multiclass avx512_fma3p_m132_f<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src3, _.RC:$src2, AVX512RC:$rc),
+ OpcodeStr, "$rc, $src2, $src3", "$src3, $src2, $rc",
+ (_.VT ( OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3, (i32 imm:$rc)))>,
+ AVX512FMA3Base, EVEX_B, EVEX_RC;
+}
+} // Constraints = "$src1 = $dst"
-let ExeDomain = SSEPackedSingle in {
- defm NAME##PSZ : avx512_fma3p_m132<opc, OpcodeStr##ps,
- OpNode,v16f32_info>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
- defm NAME##PSZ256 : avx512_fma3p_m132<opc, OpcodeStr##ps,
- OpNode, v8f32x_info>, EVEX_V256,
- EVEX_CD8<32, CD8VF>;
- defm NAME##PSZ128 : avx512_fma3p_m132<opc, OpcodeStr##ps,
- OpNode, v4f32x_info>, EVEX_V128,
- EVEX_CD8<32, CD8VF>;
+multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode OpNodeRnd, AVX512VLVectorVTInfo _> {
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info512>,
+ avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, _.info512>,
+ EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
}
-let ExeDomain = SSEPackedDouble in {
- defm NAME##PDZ : avx512_fma3p_m132<opc, OpcodeStr##pd,
- OpNode, v8f64_info>, EVEX_V512,
- VEX_W, EVEX_CD8<32, CD8VF>;
- defm NAME##PDZ256 : avx512_fma3p_m132<opc, OpcodeStr##pd,
- OpNode, v4f64x_info>, EVEX_V256,
- VEX_W, EVEX_CD8<32, CD8VF>;
- defm NAME##PDZ128 : avx512_fma3p_m132<opc, OpcodeStr##pd,
- OpNode, v2f64x_info>, EVEX_V128,
- VEX_W, EVEX_CD8<32, CD8VF>;
+ let Predicates = [HasVLX, HasAVX512] in {
+ defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info256>,
+ EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
+ defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info128>,
+ EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
}
}
-defm VFMADD132 : avx512_fma3p_m132_f<0x98, "vfmadd132", X86Fmadd>;
-defm VFMSUB132 : avx512_fma3p_m132_f<0x9A, "vfmsub132", X86Fmsub>;
-defm VFMADDSUB132 : avx512_fma3p_m132_f<0x96, "vfmaddsub132", X86Fmaddsub>;
-defm VFMSUBADD132 : avx512_fma3p_m132_f<0x97, "vfmsubadd132", X86Fmsubadd>;
-defm VFNMADD132 : avx512_fma3p_m132_f<0x9C, "vfnmadd132", X86Fnmadd>;
-defm VFNMSUB132 : avx512_fma3p_m132_f<0x9E, "vfnmsub132", X86Fnmsub>;
+multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode OpNodeRnd > {
+ defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
+ avx512vl_f32_info>;
+ defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
+ avx512vl_f64_info>, VEX_W;
+}
+
+defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
+defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
+defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
+defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
+defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
+defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
// Scalar FMA
let Constraints = "$src1 = $dst" in {
-multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- RegisterClass RC, ValueType OpVT,
- X86MemOperand x86memop, Operand memop,
- PatFrag mem_frag> {
- let isCommutable = 1 in
- def r : AVX512FMA3<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, RC:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set RC:$dst,
- (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
+multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+ dag RHS_VEC_r, dag RHS_VEC_m, dag RHS_VEC_rb,
+ dag RHS_r, dag RHS_m > {
+ defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
+ "$src3, $src2", "$src2, $src3", RHS_VEC_r>, AVX512FMA3Base;
+
let mayLoad = 1 in
- def m : AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, f128mem:$src3),
- !strconcat(OpcodeStr,
+ defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.MemOp:$src3), OpcodeStr,
+ "$src3, $src2", "$src2, $src3", RHS_VEC_m>, AVX512FMA3Base;
+
+ defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
+ OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", RHS_VEC_rb>,
+ AVX512FMA3Base, EVEX_B, EVEX_RC;
+
+ let isCodeGenOnly = 1 in {
+ def r : AVX512FMA3<opc, MRMSrcReg, (outs _.FRC:$dst),
+ (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
+ !strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set RC:$dst,
- (OpVT (OpNode RC:$src2, RC:$src1,
- (mem_frag addr:$src3))))]>;
+ [RHS_r]>;
+ let mayLoad = 1 in
+ def m : AVX512FMA3<opc, MRMSrcMem, (outs _.FRC:$dst),
+ (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [RHS_m]>;
+ }// isCodeGenOnly = 1
+}
+}// Constraints = "$src1 = $dst"
+
+multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
+ string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, X86VectorVTInfo _ ,
+ string SUFF> {
+
+ defm NAME#213#SUFF: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix , _ ,
+ (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)),
+ (_.VT (OpNode _.RC:$src2, _.RC:$src1,
+ (_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))))),
+ (_.VT ( OpNodeRnd _.RC:$src2, _.RC:$src1, _.RC:$src3,
+ (i32 imm:$rc))),
+ (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
+ _.FRC:$src3))),
+ (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
+ (_.ScalarLdFrag addr:$src3))))>;
+
+ defm NAME#231#SUFF: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix , _ ,
+ (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)),
+ (_.VT (OpNode _.RC:$src2,
+ (_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))),
+ _.RC:$src1)),
+ (_.VT ( OpNodeRnd _.RC:$src2, _.RC:$src3, _.RC:$src1,
+ (i32 imm:$rc))),
+ (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
+ _.FRC:$src1))),
+ (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
+ (_.ScalarLdFrag addr:$src3), _.FRC:$src1)))>;
+
+ defm NAME#132#SUFF: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix , _ ,
+ (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)),
+ (_.VT (OpNode _.RC:$src1,
+ (_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))),
+ _.RC:$src2)),
+ (_.VT ( OpNodeRnd _.RC:$src1, _.RC:$src3, _.RC:$src2,
+ (i32 imm:$rc))),
+ (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
+ _.FRC:$src2))),
+ (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1,
+ (_.ScalarLdFrag addr:$src3), _.FRC:$src2)))>;
+}
+
+multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
+ string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd>{
+ let Predicates = [HasAVX512] in {
+ defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
+ OpNodeRnd, f32x_info, "SS">,
+ EVEX_CD8<32, CD8VT1>, VEX_LIG;
+ defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
+ OpNodeRnd, f64x_info, "SD">,
+ EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
+ }
}
-} // Constraints = "$src1 = $dst"
-defm VFMADDSSZ : avx512_fma3s_rm<0xA9, "vfmadd213ss", X86Fmadd, FR32X,
- f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
-defm VFMADDSDZ : avx512_fma3s_rm<0xA9, "vfmadd213sd", X86Fmadd, FR64X,
- f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VFMSUBSSZ : avx512_fma3s_rm<0xAB, "vfmsub213ss", X86Fmsub, FR32X,
- f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
-defm VFMSUBSDZ : avx512_fma3s_rm<0xAB, "vfmsub213sd", X86Fmsub, FR64X,
- f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VFNMADDSSZ : avx512_fma3s_rm<0xAD, "vfnmadd213ss", X86Fnmadd, FR32X,
- f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
-defm VFNMADDSDZ : avx512_fma3s_rm<0xAD, "vfnmadd213sd", X86Fnmadd, FR64X,
- f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VFNMSUBSSZ : avx512_fma3s_rm<0xAF, "vfnmsub213ss", X86Fnmsub, FR32X,
- f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
-defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd", X86Fnmsub, FR64X,
- f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnd>;
+defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>;
+defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
+defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
//===----------------------------------------------------------------------===//
// AVX-512 Scalar convert from sign integer to float/double
@@ -5427,10 +5599,11 @@ defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, "s">;
multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86MemOperand memop, PatFrag GatherNode> {
- let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in
+ let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
+ ExeDomain = _.ExeDomain in
def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, _.KRCWM:$mask_wb),
(ins _.RC:$src1, _.KRCWM:$mask, memop:$src2),
- !strconcat(OpcodeStr,
+ !strconcat(OpcodeStr#_.Suffix,
"\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
[(set _.RC:$dst, _.KRCWM:$mask_wb,
(GatherNode (_.VT _.RC:$src1), _.KRCWM:$mask,
@@ -5438,67 +5611,104 @@ multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
EVEX_CD8<_.EltSize, CD8VT1>;
}
-let ExeDomain = SSEPackedDouble in {
-defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", v8f64_info, vy64xmem,
- mgatherv8i32>, EVEX_V512, VEX_W;
-defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", v8f64_info, vz64mem,
- mgatherv8i64>, EVEX_V512, VEX_W;
+multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
+ AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
+ defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
+ vy32xmem, mgatherv8i32>, EVEX_V512, VEX_W;
+ defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
+ vz64mem, mgatherv8i64>, EVEX_V512, VEX_W;
+let Predicates = [HasVLX] in {
+ defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
+ vx32xmem, mgatherv4i32>, EVEX_V256, VEX_W;
+ defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
+ vy64xmem, mgatherv4i64>, EVEX_V256, VEX_W;
+ defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
+ vx32xmem, mgatherv4i32>, EVEX_V128, VEX_W;
+ defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
+ vx64xmem, mgatherv2i64>, EVEX_V128, VEX_W;
+}
+}
+
+multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
+ AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
+ defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz32mem,
+ mgatherv16i32>, EVEX_V512;
+ defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz64mem,
+ mgatherv8i64>, EVEX_V512;
+let Predicates = [HasVLX] in {
+ defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
+ vy32xmem, mgatherv8i32>, EVEX_V256;
+ defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
+ vy64xmem, mgatherv4i64>, EVEX_V256;
+ defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
+ vx32xmem, mgatherv4i32>, EVEX_V128;
+ defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
+ vx64xmem, mgatherv2i64>, EVEX_V128;
}
-
-let ExeDomain = SSEPackedSingle in {
-defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", v16f32_info, vz32mem,
- mgatherv16i32>, EVEX_V512;
-defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", v8f32x_info, vz64mem,
- mgatherv8i64>, EVEX_V512;
}
-defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", v8i64_info, vy64xmem,
- mgatherv8i32>, EVEX_V512, VEX_W;
-defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", v16i32_info, vz32mem,
- mgatherv16i32>, EVEX_V512;
-defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", v8i64_info, vz64mem,
- mgatherv8i64>, EVEX_V512, VEX_W;
-defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", v8i32x_info, vz64mem,
- mgatherv8i64>, EVEX_V512;
+defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
+ avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
+
+defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
+ avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86MemOperand memop, PatFrag ScatterNode> {
-let mayStore = 1, Constraints = "$mask = $mask_wb" in
+let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
def mr : AVX5128I<opc, MRMDestMem, (outs _.KRCWM:$mask_wb),
(ins memop:$dst, _.KRCWM:$mask, _.RC:$src),
- !strconcat(OpcodeStr,
+ !strconcat(OpcodeStr#_.Suffix,
"\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
[(set _.KRCWM:$mask_wb, (ScatterNode (_.VT _.RC:$src),
_.KRCWM:$mask, vectoraddr:$dst))]>,
EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
}
-let ExeDomain = SSEPackedDouble in {
-defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", v8f64_info, vy64xmem,
- mscatterv8i32>, EVEX_V512, VEX_W;
-defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", v8f64_info, vz64mem,
- mscatterv8i64>, EVEX_V512, VEX_W;
+multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
+ AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
+ defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
+ vy32xmem, mscatterv8i32>, EVEX_V512, VEX_W;
+ defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
+ vz64mem, mscatterv8i64>, EVEX_V512, VEX_W;
+let Predicates = [HasVLX] in {
+ defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
+ vx32xmem, mscatterv4i32>, EVEX_V256, VEX_W;
+ defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
+ vy64xmem, mscatterv4i64>, EVEX_V256, VEX_W;
+ defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
+ vx32xmem, mscatterv4i32>, EVEX_V128, VEX_W;
+ defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
+ vx64xmem, mscatterv2i64>, EVEX_V128, VEX_W;
+}
+}
+
+multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
+ AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
+ defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz32mem,
+ mscatterv16i32>, EVEX_V512;
+ defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz64mem,
+ mscatterv8i64>, EVEX_V512;
+let Predicates = [HasVLX] in {
+ defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
+ vy32xmem, mscatterv8i32>, EVEX_V256;
+ defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
+ vy64xmem, mscatterv4i64>, EVEX_V256;
+ defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
+ vx32xmem, mscatterv4i32>, EVEX_V128;
+ defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
+ vx64xmem, mscatterv2i64>, EVEX_V128;
}
-
-let ExeDomain = SSEPackedSingle in {
-defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", v16f32_info, vz32mem,
- mscatterv16i32>, EVEX_V512;
-defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", v8f32x_info, vz64mem,
- mscatterv8i64>, EVEX_V512;
}
-defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", v8i64_info, vy64xmem,
- mscatterv8i32>, EVEX_V512, VEX_W;
-defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", v16i32_info, vz32mem,
- mscatterv16i32>, EVEX_V512;
+defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
+ avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
-defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", v8i64_info, vz64mem,
- mscatterv8i64>, EVEX_V512, VEX_W;
-defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", v8i32x_info, vz64mem,
- mscatterv8i64>, EVEX_V512;
+defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
+ avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
// prefetch
multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
@@ -5599,77 +5809,6 @@ def : Pat<(v8i64 (X86Shufp VR512:$src1,
def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;
-multiclass avx512_vpabs<bits<8> opc, string OpcodeStr, ValueType OpVT,
- RegisterClass KRC, RegisterClass RC,
- X86MemOperand x86memop, X86MemOperand x86scalar_mop,
- string BrdcstStr> {
- def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- []>, EVEX;
- def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
- []>, EVEX, EVEX_K;
- def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
- !strconcat(OpcodeStr,
- "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
- []>, EVEX, EVEX_KZ;
- let mayLoad = 1 in {
- def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
- (ins x86memop:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- []>, EVEX;
- def rmk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
- (ins KRC:$mask, x86memop:$src),
- !strconcat(OpcodeStr,
- "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
- []>, EVEX, EVEX_K;
- def rmkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
- (ins KRC:$mask, x86memop:$src),
- !strconcat(OpcodeStr,
- "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
- []>, EVEX, EVEX_KZ;
- def rmb : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
- (ins x86scalar_mop:$src),
- !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
- ", $dst|$dst, ${src}", BrdcstStr, "}"),
- []>, EVEX, EVEX_B;
- def rmbk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
- (ins KRC:$mask, x86scalar_mop:$src),
- !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
- ", $dst {${mask}}|$dst {${mask}}, ${src}", BrdcstStr, "}"),
- []>, EVEX, EVEX_B, EVEX_K;
- def rmbkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
- (ins KRC:$mask, x86scalar_mop:$src),
- !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
- ", $dst {${mask}} {z}|$dst {${mask}} {z}, ${src}",
- BrdcstStr, "}"),
- []>, EVEX, EVEX_B, EVEX_KZ;
- }
-}
-
-defm VPABSDZ : avx512_vpabs<0x1E, "vpabsd", v16i32, VK16WM, VR512,
- i512mem, i32mem, "{1to16}">, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
-defm VPABSQZ : avx512_vpabs<0x1F, "vpabsq", v8i64, VK8WM, VR512,
- i512mem, i64mem, "{1to8}">, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
-
-def : Pat<(xor
- (bc_v16i32 (v16i1sextv16i32)),
- (bc_v16i32 (add (v16i32 VR512:$src), (v16i1sextv16i32)))),
- (VPABSDZrr VR512:$src)>;
-def : Pat<(xor
- (bc_v8i64 (v8i1sextv8i64)),
- (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))),
- (VPABSQZrr VR512:$src)>;
-
-def : Pat<(v16i32 (int_x86_avx512_mask_pabs_d_512 (v16i32 VR512:$src),
- (v16i32 immAllZerosV), (i16 -1))),
- (VPABSDZrr VR512:$src)>;
-def : Pat<(v8i64 (int_x86_avx512_mask_pabs_q_512 (v8i64 VR512:$src),
- (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
- (VPABSQZrr VR512:$src)>;
-
multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
RegisterClass RC, RegisterClass KRC,
X86MemOperand x86memop,
@@ -5868,26 +6007,24 @@ defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
//===----------------------------------------------------------------------===//
// AVX-512 - COMPRESS and EXPAND
//
+
multiclass compress_by_vec_width<bits<8> opc, X86VectorVTInfo _,
string OpcodeStr> {
- def rrkz : AVX5128I<opc, MRMDestReg, (outs _.RC:$dst),
- (ins _.KRCWM:$mask, _.RC:$src),
- OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
- [(set _.RC:$dst, (_.VT (X86compress _.KRCWM:$mask, _.RC:$src,
- _.ImmAllZerosV)))]>, EVEX_KZ;
-
- let Constraints = "$src0 = $dst" in
- def rrk : AVX5128I<opc, MRMDestReg, (outs _.RC:$dst),
- (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src),
- OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
- [(set _.RC:$dst, (_.VT (X86compress _.KRCWM:$mask, _.RC:$src,
- _.RC:$src0)))]>, EVEX_K;
+ defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
+ (_.VT (X86compress _.RC:$src1))>, AVX5128IBase;
let mayStore = 1 in {
+ def mr : AVX5128I<opc, MRMDestMem, (outs),
+ (ins _.MemOp:$dst, _.RC:$src),
+ OpcodeStr # "\t{$src, $dst |$dst, $src}",
+ []>, EVEX_CD8<_.EltSize, CD8VT1>;
+
def mrk : AVX5128I<opc, MRMDestMem, (outs),
(ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
- [(store (_.VT (X86compress _.KRCWM:$mask, _.RC:$src, undef)),
+ [(store (_.VT (vselect _.KRCWM:$mask,
+ (_.VT (X86compress _.RC:$src)), _.ImmAllZerosV)),
addr:$dst)]>,
EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
}
@@ -5915,37 +6052,16 @@ defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", avx512vl_f64_info
// expand
multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
string OpcodeStr> {
- def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
- (ins _.KRCWM:$mask, _.RC:$src),
- OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
- [(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask, (_.VT _.RC:$src),
- _.ImmAllZerosV)))]>, EVEX_KZ;
-
- let Constraints = "$src0 = $dst" in
- def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
- (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src),
- OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
- [(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask,
- (_.VT _.RC:$src), _.RC:$src0)))]>, EVEX_K;
-
- let mayLoad = 1, Constraints = "$src0 = $dst" in
- def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
- (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src),
- OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
- [(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask,
- (_.VT (bitconvert
- (_.LdFrag addr:$src))),
- _.RC:$src0)))]>,
- EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
+ defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
+ (_.VT (X86expand _.RC:$src1))>, AVX5128IBase;
let mayLoad = 1 in
- def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
- (ins _.KRCWM:$mask, _.MemOp:$src),
- OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
- [(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask,
- (_.VT (bitconvert (_.LdFrag addr:$src))),
- _.ImmAllZerosV)))]>,
- EVEX_KZ, EVEX_CD8<_.EltSize, CD8VT1>;
+ defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
+ (_.VT (X86expand (_.VT (bitconvert
+ (_.LdFrag addr:$src1)))))>,
+ AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>;
}
multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
@@ -6175,3 +6291,91 @@ defm VALIGND: avx512_valign<"valignd", avx512vl_i32_info, avx512vl_f32_info>,
EVEX_CD8<32, CD8VF>;
defm VALIGNQ: avx512_valign<"valignq", avx512vl_i64_info, avx512vl_f64_info>,
EVEX_CD8<64, CD8VF>, VEX_W;
+
+multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1), OpcodeStr##_.Suffix,
+ "$src1", "$src1",
+ (_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase;
+
+ let mayLoad = 1 in
+ defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.MemOp:$src1), OpcodeStr##_.Suffix,
+ "$src1", "$src1",
+ (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>,
+ EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>;
+}
+
+multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> :
+ avx512_unary_rm<opc, OpcodeStr, OpNode, _> {
+ let mayLoad = 1 in
+ defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.ScalarMemOp:$src1), OpcodeStr##_.Suffix,
+ "${src1}"##_.BroadcastStr,
+ "${src1}"##_.BroadcastStr,
+ (_.VT (OpNode (X86VBroadcast
+ (_.ScalarLdFrag addr:$src1))))>,
+ EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
+}
+
+multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ AVX512VLVectorVTInfo VTInfo, Predicate prd> {
+ let Predicates = [prd] in
+ defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info512>, EVEX_V512;
+
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info256>,
+ EVEX_V256;
+ defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info128>,
+ EVEX_V128;
+ }
+}
+
+multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ AVX512VLVectorVTInfo VTInfo, Predicate prd> {
+ let Predicates = [prd] in
+ defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, VTInfo.info512>,
+ EVEX_V512;
+
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, VTInfo.info256>,
+ EVEX_V256;
+ defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, VTInfo.info128>,
+ EVEX_V128;
+ }
+}
+
+multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
+ SDNode OpNode, Predicate prd> {
+ defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr, OpNode, avx512vl_i64_info,
+ prd>, VEX_W;
+ defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr, OpNode, avx512vl_i32_info, prd>;
+}
+
+multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
+ SDNode OpNode, Predicate prd> {
+ defm W : avx512_unary_rm_vl<opc_w, OpcodeStr, OpNode, avx512vl_i16_info, prd>;
+ defm B : avx512_unary_rm_vl<opc_b, OpcodeStr, OpNode, avx512vl_i8_info, prd>;
+}
+
+multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
+ bits<8> opc_d, bits<8> opc_q,
+ string OpcodeStr, SDNode OpNode> {
+ defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
+ HasAVX512>,
+ avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
+ HasBWI>;
+}
+
+defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", X86Abs>;
+
+def : Pat<(xor
+ (bc_v16i32 (v16i1sextv16i32)),
+ (bc_v16i32 (add (v16i32 VR512:$src), (v16i1sextv16i32)))),
+ (VPABSDZrr VR512:$src)>;
+def : Pat<(xor
+ (bc_v8i64 (v8i1sextv8i64)),
+ (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))),
+ (VPABSQZrr VR512:$src)>;
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
index eb4dc48a7a65..2056056d23a5 100644
--- a/lib/Target/X86/X86InstrBuilder.h
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -179,6 +179,6 @@ addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI,
.addConstantPoolIndex(CPI, 0, OpFlags).addReg(0);
}
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 912a0fb356ed..7f850d6830e1 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -869,6 +869,7 @@ def : Pat<(i32 (X86Wrapper tjumptable :$dst)), (MOV32ri tjumptable :$dst)>;
def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>;
def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;
def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
+def : Pat<(i32 (X86Wrapper mcsym:$dst)), (MOV32ri mcsym:$dst)>;
def : Pat<(i32 (X86Wrapper tblockaddress:$dst)), (MOV32ri tblockaddress:$dst)>;
def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)),
@@ -879,6 +880,8 @@ def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)),
(ADD32ri GR32:$src1, tglobaladdr:$src2)>;
def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)),
(ADD32ri GR32:$src1, texternalsym:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper mcsym:$src2)),
+ (ADD32ri GR32:$src1, mcsym:$src2)>;
def : Pat<(add GR32:$src1, (X86Wrapper tblockaddress:$src2)),
(ADD32ri GR32:$src1, tblockaddress:$src2)>;
@@ -886,6 +889,8 @@ def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst),
(MOV32mi addr:$dst, tglobaladdr:$src)>;
def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
(MOV32mi addr:$dst, texternalsym:$src)>;
+def : Pat<(store (i32 (X86Wrapper mcsym:$src)), addr:$dst),
+ (MOV32mi addr:$dst, mcsym:$src)>;
def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst),
(MOV32mi addr:$dst, tblockaddress:$src)>;
@@ -900,6 +905,8 @@ def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
(MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>;
def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
(MOV64ri texternalsym:$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper mcsym:$dst)),
+ (MOV64ri mcsym:$dst)>, Requires<[FarData]>;
def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
(MOV64ri tblockaddress:$dst)>, Requires<[FarData]>;
@@ -914,6 +921,8 @@ def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
(MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;
def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
(MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper mcsym:$dst)),
+ (MOV64ri32 mcsym:$dst)>, Requires<[KernelCode]>;
def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
(MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>;
@@ -932,12 +941,15 @@ def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst),
def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
(MOV64mi32 addr:$dst, texternalsym:$src)>,
Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper mcsym:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, mcsym:$src)>,
+ Requires<[NearData, IsStatic]>;
def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst),
(MOV64mi32 addr:$dst, tblockaddress:$src)>,
Requires<[NearData, IsStatic]>;
-def : Pat<(i32 (X86RecoverFrameAlloc texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
-def : Pat<(i64 (X86RecoverFrameAlloc texternalsym:$dst)), (MOV64ri texternalsym:$dst)>;
+def : Pat<(i32 (X86RecoverFrameAlloc mcsym:$dst)), (MOV32ri mcsym:$dst)>;
+def : Pat<(i64 (X86RecoverFrameAlloc mcsym:$dst)), (MOV64ri mcsym:$dst)>;
// Calls
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index 0dd05d8befd6..49068e9c37d3 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -633,16 +633,16 @@ def FRNDINT : I<0xD9, MRM_FC, (outs), (ins), "frndint", [], IIC_FRNDINT>;
def FSCALE : I<0xD9, MRM_FD, (outs), (ins), "fscale", [], IIC_FSCALE>;
def FCOMPP : I<0xDE, MRM_D9, (outs), (ins), "fcompp", [], IIC_FCOMPP>;
-def FXSAVE : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins),
- "fxsave\t$dst", [], IIC_FXSAVE>, TB;
-def FXSAVE64 : RI<0xAE, MRM0m, (outs opaque512mem:$dst), (ins),
- "fxsave64\t$dst", [], IIC_FXSAVE>, TB,
- Requires<[In64BitMode]>;
+def FXSAVE : I<0xAE, MRM0m, (outs), (ins opaque512mem:$dst),
+ "fxsave\t$dst", [(int_x86_fxsave addr:$dst)], IIC_FXSAVE>, TB;
+def FXSAVE64 : RI<0xAE, MRM0m, (outs), (ins opaque512mem:$dst),
+ "fxsave64\t$dst", [(int_x86_fxsave64 addr:$dst)],
+ IIC_FXSAVE>, TB, Requires<[In64BitMode]>;
def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
- "fxrstor\t$src", [], IIC_FXRSTOR>, TB;
+ "fxrstor\t$src", [(int_x86_fxrstor addr:$src)], IIC_FXRSTOR>, TB;
def FXRSTOR64 : RI<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
- "fxrstor64\t$src", [], IIC_FXRSTOR>, TB,
- Requires<[In64BitMode]>;
+ "fxrstor64\t$src", [(int_x86_fxrstor64 addr:$src)],
+ IIC_FXRSTOR>, TB, Requires<[In64BitMode]>;
} // SchedRW
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 16ae77dd81a3..fe245c3a7e38 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -251,6 +251,7 @@ def STDFp3SrcRm : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>;
def X86VAlign : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>;
+def X86Abs : SDNode<"X86ISD::ABS", SDTIntUnaryOp>;
def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>;
@@ -310,6 +311,7 @@ def X86fsubRnd : SDNode<"X86ISD::FSUB_RND", SDTFPBinOpRound>;
def X86fmulRnd : SDNode<"X86ISD::FMUL_RND", SDTFPBinOpRound>;
def X86fdivRnd : SDNode<"X86ISD::FDIV_RND", SDTFPBinOpRound>;
def X86fmaxRnd : SDNode<"X86ISD::FMAX_RND", SDTFPBinOpRound>;
+def X86scalef : SDNode<"X86ISD::SCALEF", SDTFPBinOpRound>;
def X86fminRnd : SDNode<"X86ISD::FMIN_RND", SDTFPBinOpRound>;
def X86fsqrtRnd : SDNode<"X86ISD::FSQRT_RND", SDTFPUnaryOpRound>;
def X86fgetexpRnd : SDNode<"X86ISD::FGETEXP_RND", SDTFPUnaryOpRound>;
@@ -347,12 +349,10 @@ def SDT_PCMPESTRI : SDTypeProfile<2, 5, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
def X86pcmpistri : SDNode<"X86ISD::PCMPISTRI", SDT_PCMPISTRI>;
def X86pcmpestri : SDNode<"X86ISD::PCMPESTRI", SDT_PCMPESTRI>;
-def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 3,
- [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,
- SDTCisVec<3>, SDTCisVec<1>, SDTCisInt<1>]>, []>;
-def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 3,
- [SDTCisSameAs<0, 3>,
- SDTCisVec<3>, SDTCisVec<1>, SDTCisInt<1>]>, []>;
+def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 1,
+ [SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>;
+def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 1,
+ [SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>;
def SDTintToFPRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisFP<0>,
SDTCisSameAs<0,1>, SDTCisInt<2>, SDTCisInt<3>]>;
@@ -561,6 +561,14 @@ def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
return false;
}]>;
+def mgatherv4i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_gather node:$src1, node:$src2, node:$src3) , [{
+ if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
+ return (Mgt->getIndex().getValueType() == MVT::v4i32 ||
+ Mgt->getBasePtr().getValueType() == MVT::v4i32);
+ return false;
+}]>;
+
def mgatherv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_gather node:$src1, node:$src2, node:$src3) , [{
if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
@@ -569,6 +577,20 @@ def mgatherv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
return false;
}]>;
+def mgatherv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_gather node:$src1, node:$src2, node:$src3) , [{
+ if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
+ return (Mgt->getIndex().getValueType() == MVT::v2i64 ||
+ Mgt->getBasePtr().getValueType() == MVT::v2i64);
+ return false;
+}]>;
+def mgatherv4i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_gather node:$src1, node:$src2, node:$src3) , [{
+ if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
+ return (Mgt->getIndex().getValueType() == MVT::v4i64 ||
+ Mgt->getBasePtr().getValueType() == MVT::v4i64);
+ return false;
+}]>;
def mgatherv8i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_gather node:$src1, node:$src2, node:$src3) , [{
if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
@@ -584,6 +606,30 @@ def mgatherv16i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
return false;
}]>;
+def mscatterv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_scatter node:$src1, node:$src2, node:$src3) , [{
+ if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
+ return (Sc->getIndex().getValueType() == MVT::v2i64 ||
+ Sc->getBasePtr().getValueType() == MVT::v2i64);
+ return false;
+}]>;
+
+def mscatterv4i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_scatter node:$src1, node:$src2, node:$src3) , [{
+ if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
+ return (Sc->getIndex().getValueType() == MVT::v4i32 ||
+ Sc->getBasePtr().getValueType() == MVT::v4i32);
+ return false;
+}]>;
+
+def mscatterv4i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_scatter node:$src1, node:$src2, node:$src3) , [{
+ if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
+ return (Sc->getIndex().getValueType() == MVT::v4i64 ||
+ Sc->getBasePtr().getValueType() == MVT::v4i64);
+ return false;
+}]>;
+
def mscatterv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_scatter node:$src1, node:$src2, node:$src3) , [{
if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 4aa0ae6f1959..b92ba99fb100 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -1577,38 +1577,38 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPXORYrr, X86::VPXORYrm, 0 },
// FMA4 foldable patterns
- { X86::VFMADDSS4rr, X86::VFMADDSS4mr, 0 },
- { X86::VFMADDSD4rr, X86::VFMADDSD4mr, 0 },
- { X86::VFMADDPS4rr, X86::VFMADDPS4mr, 0 },
- { X86::VFMADDPD4rr, X86::VFMADDPD4mr, 0 },
- { X86::VFMADDPS4rrY, X86::VFMADDPS4mrY, 0 },
- { X86::VFMADDPD4rrY, X86::VFMADDPD4mrY, 0 },
- { X86::VFNMADDSS4rr, X86::VFNMADDSS4mr, 0 },
- { X86::VFNMADDSD4rr, X86::VFNMADDSD4mr, 0 },
- { X86::VFNMADDPS4rr, X86::VFNMADDPS4mr, 0 },
- { X86::VFNMADDPD4rr, X86::VFNMADDPD4mr, 0 },
- { X86::VFNMADDPS4rrY, X86::VFNMADDPS4mrY, 0 },
- { X86::VFNMADDPD4rrY, X86::VFNMADDPD4mrY, 0 },
- { X86::VFMSUBSS4rr, X86::VFMSUBSS4mr, 0 },
- { X86::VFMSUBSD4rr, X86::VFMSUBSD4mr, 0 },
- { X86::VFMSUBPS4rr, X86::VFMSUBPS4mr, 0 },
- { X86::VFMSUBPD4rr, X86::VFMSUBPD4mr, 0 },
- { X86::VFMSUBPS4rrY, X86::VFMSUBPS4mrY, 0 },
- { X86::VFMSUBPD4rrY, X86::VFMSUBPD4mrY, 0 },
- { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4mr, 0 },
- { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4mr, 0 },
- { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4mr, 0 },
- { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4mr, 0 },
- { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4mrY, 0 },
- { X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4mrY, 0 },
- { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4mr, 0 },
- { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4mr, 0 },
- { X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4mrY, 0 },
- { X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4mrY, 0 },
- { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4mr, 0 },
- { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4mr, 0 },
- { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4mrY, 0 },
- { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4mrY, 0 },
+ { X86::VFMADDSS4rr, X86::VFMADDSS4mr, TB_ALIGN_NONE },
+ { X86::VFMADDSD4rr, X86::VFMADDSD4mr, TB_ALIGN_NONE },
+ { X86::VFMADDPS4rr, X86::VFMADDPS4mr, TB_ALIGN_NONE },
+ { X86::VFMADDPD4rr, X86::VFMADDPD4mr, TB_ALIGN_NONE },
+ { X86::VFMADDPS4rrY, X86::VFMADDPS4mrY, TB_ALIGN_NONE },
+ { X86::VFMADDPD4rrY, X86::VFMADDPD4mrY, TB_ALIGN_NONE },
+ { X86::VFNMADDSS4rr, X86::VFNMADDSS4mr, TB_ALIGN_NONE },
+ { X86::VFNMADDSD4rr, X86::VFNMADDSD4mr, TB_ALIGN_NONE },
+ { X86::VFNMADDPS4rr, X86::VFNMADDPS4mr, TB_ALIGN_NONE },
+ { X86::VFNMADDPD4rr, X86::VFNMADDPD4mr, TB_ALIGN_NONE },
+ { X86::VFNMADDPS4rrY, X86::VFNMADDPS4mrY, TB_ALIGN_NONE },
+ { X86::VFNMADDPD4rrY, X86::VFNMADDPD4mrY, TB_ALIGN_NONE },
+ { X86::VFMSUBSS4rr, X86::VFMSUBSS4mr, TB_ALIGN_NONE },
+ { X86::VFMSUBSD4rr, X86::VFMSUBSD4mr, TB_ALIGN_NONE },
+ { X86::VFMSUBPS4rr, X86::VFMSUBPS4mr, TB_ALIGN_NONE },
+ { X86::VFMSUBPD4rr, X86::VFMSUBPD4mr, TB_ALIGN_NONE },
+ { X86::VFMSUBPS4rrY, X86::VFMSUBPS4mrY, TB_ALIGN_NONE },
+ { X86::VFMSUBPD4rrY, X86::VFMSUBPD4mrY, TB_ALIGN_NONE },
+ { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4mr, TB_ALIGN_NONE },
+ { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4mr, TB_ALIGN_NONE },
+ { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4mr, TB_ALIGN_NONE },
+ { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4mr, TB_ALIGN_NONE },
+ { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4mrY, TB_ALIGN_NONE },
+ { X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4mrY, TB_ALIGN_NONE },
+ { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4mr, TB_ALIGN_NONE },
+ { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4mr, TB_ALIGN_NONE },
+ { X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4mrY, TB_ALIGN_NONE },
+ { X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4mrY, TB_ALIGN_NONE },
+ { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4mr, TB_ALIGN_NONE },
+ { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4mr, TB_ALIGN_NONE },
+ { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4mrY, TB_ALIGN_NONE },
+ { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4mrY, TB_ALIGN_NONE },
// XOP foldable instructions
{ X86::VPCMOVrr, X86::VPCMOVmr, 0 },
@@ -1852,38 +1852,38 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr213mY, TB_ALIGN_NONE },
// FMA4 foldable patterns
- { X86::VFMADDSS4rr, X86::VFMADDSS4rm, 0 },
- { X86::VFMADDSD4rr, X86::VFMADDSD4rm, 0 },
- { X86::VFMADDPS4rr, X86::VFMADDPS4rm, TB_ALIGN_16 },
- { X86::VFMADDPD4rr, X86::VFMADDPD4rm, TB_ALIGN_16 },
- { X86::VFMADDPS4rrY, X86::VFMADDPS4rmY, TB_ALIGN_32 },
- { X86::VFMADDPD4rrY, X86::VFMADDPD4rmY, TB_ALIGN_32 },
- { X86::VFNMADDSS4rr, X86::VFNMADDSS4rm, 0 },
- { X86::VFNMADDSD4rr, X86::VFNMADDSD4rm, 0 },
- { X86::VFNMADDPS4rr, X86::VFNMADDPS4rm, TB_ALIGN_16 },
- { X86::VFNMADDPD4rr, X86::VFNMADDPD4rm, TB_ALIGN_16 },
- { X86::VFNMADDPS4rrY, X86::VFNMADDPS4rmY, TB_ALIGN_32 },
- { X86::VFNMADDPD4rrY, X86::VFNMADDPD4rmY, TB_ALIGN_32 },
- { X86::VFMSUBSS4rr, X86::VFMSUBSS4rm, 0 },
- { X86::VFMSUBSD4rr, X86::VFMSUBSD4rm, 0 },
- { X86::VFMSUBPS4rr, X86::VFMSUBPS4rm, TB_ALIGN_16 },
- { X86::VFMSUBPD4rr, X86::VFMSUBPD4rm, TB_ALIGN_16 },
- { X86::VFMSUBPS4rrY, X86::VFMSUBPS4rmY, TB_ALIGN_32 },
- { X86::VFMSUBPD4rrY, X86::VFMSUBPD4rmY, TB_ALIGN_32 },
- { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4rm, 0 },
- { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4rm, 0 },
- { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4rm, TB_ALIGN_16 },
- { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4rm, TB_ALIGN_16 },
- { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4rmY, TB_ALIGN_32 },
- { X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4rmY, TB_ALIGN_32 },
- { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4rm, TB_ALIGN_16 },
- { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4rm, TB_ALIGN_16 },
- { X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4rmY, TB_ALIGN_32 },
- { X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4rmY, TB_ALIGN_32 },
- { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4rm, TB_ALIGN_16 },
- { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4rm, TB_ALIGN_16 },
- { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4rmY, TB_ALIGN_32 },
- { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4rmY, TB_ALIGN_32 },
+ { X86::VFMADDSS4rr, X86::VFMADDSS4rm, TB_ALIGN_NONE },
+ { X86::VFMADDSD4rr, X86::VFMADDSD4rm, TB_ALIGN_NONE },
+ { X86::VFMADDPS4rr, X86::VFMADDPS4rm, TB_ALIGN_NONE },
+ { X86::VFMADDPD4rr, X86::VFMADDPD4rm, TB_ALIGN_NONE },
+ { X86::VFMADDPS4rrY, X86::VFMADDPS4rmY, TB_ALIGN_NONE },
+ { X86::VFMADDPD4rrY, X86::VFMADDPD4rmY, TB_ALIGN_NONE },
+ { X86::VFNMADDSS4rr, X86::VFNMADDSS4rm, TB_ALIGN_NONE },
+ { X86::VFNMADDSD4rr, X86::VFNMADDSD4rm, TB_ALIGN_NONE },
+ { X86::VFNMADDPS4rr, X86::VFNMADDPS4rm, TB_ALIGN_NONE },
+ { X86::VFNMADDPD4rr, X86::VFNMADDPD4rm, TB_ALIGN_NONE },
+ { X86::VFNMADDPS4rrY, X86::VFNMADDPS4rmY, TB_ALIGN_NONE },
+ { X86::VFNMADDPD4rrY, X86::VFNMADDPD4rmY, TB_ALIGN_NONE },
+ { X86::VFMSUBSS4rr, X86::VFMSUBSS4rm, TB_ALIGN_NONE },
+ { X86::VFMSUBSD4rr, X86::VFMSUBSD4rm, TB_ALIGN_NONE },
+ { X86::VFMSUBPS4rr, X86::VFMSUBPS4rm, TB_ALIGN_NONE },
+ { X86::VFMSUBPD4rr, X86::VFMSUBPD4rm, TB_ALIGN_NONE },
+ { X86::VFMSUBPS4rrY, X86::VFMSUBPS4rmY, TB_ALIGN_NONE },
+ { X86::VFMSUBPD4rrY, X86::VFMSUBPD4rmY, TB_ALIGN_NONE },
+ { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4rm, TB_ALIGN_NONE },
+ { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4rm, TB_ALIGN_NONE },
+ { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4rm, TB_ALIGN_NONE },
+ { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4rm, TB_ALIGN_NONE },
+ { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4rmY, TB_ALIGN_NONE },
+ { X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4rmY, TB_ALIGN_NONE },
+ { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4rm, TB_ALIGN_NONE },
+ { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4rm, TB_ALIGN_NONE },
+ { X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4rmY, TB_ALIGN_NONE },
+ { X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4rmY, TB_ALIGN_NONE },
+ { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4rm, TB_ALIGN_NONE },
+ { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4rm, TB_ALIGN_NONE },
+ { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4rmY, TB_ALIGN_NONE },
+ { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4rmY, TB_ALIGN_NONE },
// XOP foldable instructions
{ X86::VPCMOVrr, X86::VPCMOVrm, 0 },
@@ -5295,21 +5295,57 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
Size, Alignment, /*AllowCommute=*/true);
}
-static bool isPartialRegisterLoad(const MachineInstr &LoadMI,
- const MachineFunction &MF) {
+/// Check if \p LoadMI is a partial register load that we can't fold into \p MI
+/// because the latter uses contents that wouldn't be defined in the folded
+/// version. For instance, this transformation isn't legal:
+/// movss (%rdi), %xmm0
+/// addps %xmm0, %xmm0
+/// ->
+/// addps (%rdi), %xmm0
+///
+/// But this one is:
+/// movss (%rdi), %xmm0
+/// addss %xmm0, %xmm0
+/// ->
+/// addss (%rdi), %xmm0
+///
+static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
+ const MachineInstr &UserMI,
+ const MachineFunction &MF) {
unsigned Opc = LoadMI.getOpcode();
+ unsigned UserOpc = UserMI.getOpcode();
unsigned RegSize =
MF.getRegInfo().getRegClass(LoadMI.getOperand(0).getReg())->getSize();
- if ((Opc == X86::MOVSSrm || Opc == X86::VMOVSSrm) && RegSize > 4)
+ if ((Opc == X86::MOVSSrm || Opc == X86::VMOVSSrm) && RegSize > 4) {
// These instructions only load 32 bits, we can't fold them if the
- // destination register is wider than 32 bits (4 bytes).
- return true;
+ // destination register is wider than 32 bits (4 bytes), and its user
+ // instruction isn't scalar (SS).
+ switch (UserOpc) {
+ case X86::ADDSSrr_Int: case X86::VADDSSrr_Int:
+ case X86::DIVSSrr_Int: case X86::VDIVSSrr_Int:
+ case X86::MULSSrr_Int: case X86::VMULSSrr_Int:
+ case X86::SUBSSrr_Int: case X86::VSUBSSrr_Int:
+ return false;
+ default:
+ return true;
+ }
+ }
- if ((Opc == X86::MOVSDrm || Opc == X86::VMOVSDrm) && RegSize > 8)
+ if ((Opc == X86::MOVSDrm || Opc == X86::VMOVSDrm) && RegSize > 8) {
// These instructions only load 64 bits, we can't fold them if the
- // destination register is wider than 64 bits (8 bytes).
- return true;
+ // destination register is wider than 64 bits (8 bytes), and its user
+ // instruction isn't scalar (SD).
+ switch (UserOpc) {
+ case X86::ADDSDrr_Int: case X86::VADDSDrr_Int:
+ case X86::DIVSDrr_Int: case X86::VDIVSDrr_Int:
+ case X86::MULSDrr_Int: case X86::VMULSDrr_Int:
+ case X86::SUBSDrr_Int: case X86::VSUBSDrr_Int:
+ return false;
+ default:
+ return true;
+ }
+ }
return false;
}
@@ -5321,7 +5357,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
unsigned NumOps = LoadMI->getDesc().getNumOperands();
int FrameIndex;
if (isLoadFromStackSlot(LoadMI, FrameIndex)) {
- if (isPartialRegisterLoad(*LoadMI, MF))
+ if (isNonFoldablePartialRegisterLoad(*LoadMI, *MI, MF))
return nullptr;
return foldMemoryOperandImpl(MF, MI, Ops, InsertPt, FrameIndex);
}
@@ -5434,7 +5470,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
break;
}
default: {
- if (isPartialRegisterLoad(*LoadMI, MF))
+ if (isNonFoldablePartialRegisterLoad(*LoadMI, *MI, MF))
return nullptr;
// Folding a normal load. Just copy the load's address operands.
@@ -6334,22 +6370,11 @@ hasHighOperandLatency(const TargetSchedModel &SchedModel,
return isHighLatencyDef(DefMI->getOpcode());
}
-/// If the input instruction is part of a chain of dependent ops that are
-/// suitable for reassociation, return the earlier instruction in the sequence
-/// that defines its first operand, otherwise return a nullptr.
-/// If the instruction's operands must be commuted to be considered a
-/// reassociation candidate, Commuted will be set to true.
-static MachineInstr *isReassocCandidate(const MachineInstr &Inst,
- unsigned AssocOpcode,
- bool checkPrevOneUse,
- bool &Commuted) {
- if (Inst.getOpcode() != AssocOpcode)
- return nullptr;
-
- MachineOperand Op1 = Inst.getOperand(1);
- MachineOperand Op2 = Inst.getOperand(2);
-
- const MachineBasicBlock *MBB = Inst.getParent();
+static bool hasVirtualRegDefsInBasicBlock(const MachineInstr &Inst,
+ const MachineBasicBlock *MBB) {
+ assert(Inst.getNumOperands() == 3 && "Reassociation needs binary operators");
+ const MachineOperand &Op1 = Inst.getOperand(1);
+ const MachineOperand &Op2 = Inst.getOperand(2);
const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
// We need virtual register definitions.
@@ -6359,80 +6384,99 @@ static MachineInstr *isReassocCandidate(const MachineInstr &Inst,
MI1 = MRI.getUniqueVRegDef(Op1.getReg());
if (Op2.isReg() && TargetRegisterInfo::isVirtualRegister(Op2.getReg()))
MI2 = MRI.getUniqueVRegDef(Op2.getReg());
-
+
// And they need to be in the trace (otherwise, they won't have a depth).
- if (!MI1 || !MI2 || MI1->getParent() != MBB || MI2->getParent() != MBB)
- return nullptr;
-
- Commuted = false;
- if (MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode) {
+ if (MI1 && MI2 && MI1->getParent() == MBB && MI2->getParent() == MBB)
+ return true;
+
+ return false;
+}
+
+static bool hasReassocSibling(const MachineInstr &Inst, bool &Commuted) {
+ const MachineBasicBlock *MBB = Inst.getParent();
+ const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(1).getReg());
+ MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg());
+ unsigned AssocOpcode = Inst.getOpcode();
+
+ // If only one operand has the same opcode and it's the second source operand,
+ // the operands must be commuted.
+ Commuted = MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode;
+ if (Commuted)
std::swap(MI1, MI2);
- Commuted = true;
- }
- // Avoid reassociating operands when it won't provide any benefit. If both
- // operands are produced by instructions of this type, we may already
- // have the optimal sequence.
- if (MI2->getOpcode() == AssocOpcode)
- return nullptr;
-
- // The instruction must only be used by the other instruction that we
- // reassociate with.
- if (checkPrevOneUse && !MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg()))
- return nullptr;
-
- // We must match a simple chain of dependent ops.
- // TODO: This check is not necessary for the earliest instruction in the
- // sequence. Instead of a sequence of 3 dependent instructions with the same
- // opcode, we only need to find a sequence of 2 dependent instructions with
- // the same opcode plus 1 other instruction that adds to the height of the
- // trace.
- if (MI1->getOpcode() != AssocOpcode)
- return nullptr;
+ // 1. The previous instruction must be the same type as Inst.
+ // 2. The previous instruction must have virtual register definitions for its
+ // operands in the same basic block as Inst.
+ // 3. The previous instruction's result must only be used by Inst.
+ if (MI1->getOpcode() == AssocOpcode &&
+ hasVirtualRegDefsInBasicBlock(*MI1, MBB) &&
+ MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg()))
+ return true;
- return MI1;
+ return false;
}
-/// Select a pattern based on how the operands of each associative operation
-/// need to be commuted.
-static MachineCombinerPattern::MC_PATTERN getPattern(bool CommutePrev,
- bool CommuteRoot) {
- if (CommutePrev) {
- if (CommuteRoot)
- return MachineCombinerPattern::MC_REASSOC_XA_YB;
- return MachineCombinerPattern::MC_REASSOC_XA_BY;
- } else {
- if (CommuteRoot)
- return MachineCombinerPattern::MC_REASSOC_AX_YB;
- return MachineCombinerPattern::MC_REASSOC_AX_BY;
- }
+/// Return true if the input instruction is part of a chain of dependent ops
+/// that are suitable for reassociation, otherwise return false.
+/// If the instruction's operands must be commuted to have a previous
+/// instruction of the same type define the first source operand, Commuted will
+/// be set to true.
+static bool isReassocCandidate(const MachineInstr &Inst, unsigned AssocOpcode,
+ bool &Commuted) {
+ // 1. The instruction must have the correct type.
+ // 2. The instruction must have virtual register definitions for its
+ // operands in the same basic block.
+ // 3. The instruction must have a reassociatable sibling.
+ if (Inst.getOpcode() == AssocOpcode &&
+ hasVirtualRegDefsInBasicBlock(Inst, Inst.getParent()) &&
+ hasReassocSibling(Inst, Commuted))
+ return true;
+
+ return false;
}
+// FIXME: This has the potential to be expensive (compile time) while not
+// improving the code at all. Some ways to limit the overhead:
+// 1. Track successful transforms; bail out if hit rate gets too low.
+// 2. Only enable at -O3 or some other non-default optimization level.
+// 3. Pre-screen pattern candidates here: if an operand of the previous
+// instruction is known to not increase the critical path, then don't match
+// that pattern.
bool X86InstrInfo::getMachineCombinerPatterns(MachineInstr &Root,
SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Patterns) const {
if (!Root.getParent()->getParent()->getTarget().Options.UnsafeFPMath)
return false;
+ // TODO: There is nothing x86-specific here except the instruction type.
+ // This logic could be hoisted into the machine combiner pass itself.
+
+ // Look for this reassociation pattern:
+ // B = A op X (Prev)
+ // C = B op Y (Root)
+
// TODO: There are many more associative instruction types to match:
// 1. Other forms of scalar FP add (non-AVX)
// 2. Other data types (double, integer, vectors)
// 3. Other math / logic operations (mul, and, or)
unsigned AssocOpcode = X86::VADDSSrr;
- // TODO: There is nothing x86-specific here except the instruction type.
- // This logic could be hoisted into the machine combiner pass itself.
- bool CommuteRoot;
- if (MachineInstr *Prev = isReassocCandidate(Root, AssocOpcode, true,
- CommuteRoot)) {
- bool CommutePrev;
- if (isReassocCandidate(*Prev, AssocOpcode, false, CommutePrev)) {
- // We found a sequence of instructions that may be suitable for a
- // reassociation of operands to increase ILP.
- Patterns.push_back(getPattern(CommutePrev, CommuteRoot));
- return true;
+ bool Commute = false;
+ if (isReassocCandidate(Root, AssocOpcode, Commute)) {
+ // We found a sequence of instructions that may be suitable for a
+ // reassociation of operands to increase ILP. Specify each commutation
+ // possibility for the Prev instruction in the sequence and let the
+ // machine combiner decide if changing the operands is worthwhile.
+ if (Commute) {
+ Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_YB);
+ Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_YB);
+ } else {
+ Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_BY);
+ Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_BY);
}
+ return true;
}
-
+
return false;
}
@@ -6525,14 +6569,16 @@ void X86InstrInfo::genAlternativeCodeSequence(
// Select the previous instruction in the sequence based on the input pattern.
MachineInstr *Prev = nullptr;
- if (Pattern == MachineCombinerPattern::MC_REASSOC_AX_BY ||
- Pattern == MachineCombinerPattern::MC_REASSOC_XA_BY)
- Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
- else if (Pattern == MachineCombinerPattern::MC_REASSOC_AX_YB ||
- Pattern == MachineCombinerPattern::MC_REASSOC_XA_YB)
- Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
- else
- llvm_unreachable("Unknown pattern for machine combiner");
+ switch (Pattern) {
+ case MachineCombinerPattern::MC_REASSOC_AX_BY:
+ case MachineCombinerPattern::MC_REASSOC_XA_BY:
+ Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
+ break;
+ case MachineCombinerPattern::MC_REASSOC_AX_YB:
+ case MachineCombinerPattern::MC_REASSOC_XA_YB:
+ Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
+ }
+ assert(Prev && "Unknown pattern for machine combiner");
reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg);
return;
@@ -6604,7 +6650,7 @@ namespace {
MachineFunctionPass::getAnalysisUsage(AU);
}
};
-} // namespace
+}
char CGBR::ID = 0;
FunctionPass*
@@ -6716,7 +6762,7 @@ namespace {
MachineFunctionPass::getAnalysisUsage(AU);
}
};
-} // namespace
+}
char LDTLSCleanup::ID = 0;
FunctionPass*
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 4912951140d9..bf63336c7005 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -90,7 +90,7 @@ namespace X86 {
/// GetOppositeBranchCondition - Return the inverse of the specified cond,
/// e.g. turning COND_E to COND_NE.
CondCode GetOppositeBranchCondition(CondCode CC);
-} // namespace X86
+} // end namespace X86;
/// isGlobalStubReference - Return true if the specified TargetFlag operand is
@@ -512,6 +512,6 @@ private:
int &FrameIndex) const;
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index e936b4bc466e..6f38cb8eaf33 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -282,6 +282,10 @@ let RenderMethod = "addMemOperands" in {
def X86MemVX64Operand : AsmOperandClass { let Name = "MemVX64"; }
def X86MemVY64Operand : AsmOperandClass { let Name = "MemVY64"; }
def X86MemVZ64Operand : AsmOperandClass { let Name = "MemVZ64"; }
+ def X86MemVX32XOperand : AsmOperandClass { let Name = "MemVX32X"; }
+ def X86MemVY32XOperand : AsmOperandClass { let Name = "MemVY32X"; }
+ def X86MemVX64XOperand : AsmOperandClass { let Name = "MemVX64X"; }
+ def X86MemVY64XOperand : AsmOperandClass { let Name = "MemVY64X"; }
}
def X86AbsMemAsmOperand : AsmOperandClass {
@@ -332,7 +336,11 @@ def vx32mem : X86VMemOperand<VR128, "printi32mem", X86MemVX32Operand>;
def vy32mem : X86VMemOperand<VR256, "printi32mem", X86MemVY32Operand>;
def vx64mem : X86VMemOperand<VR128, "printi64mem", X86MemVX64Operand>;
def vy64mem : X86VMemOperand<VR256, "printi64mem", X86MemVY64Operand>;
-def vy64xmem : X86VMemOperand<VR256X, "printi64mem", X86MemVY64Operand>;
+
+def vx32xmem : X86VMemOperand<VR128X, "printi32mem", X86MemVX32XOperand>;
+def vx64xmem : X86VMemOperand<VR128X, "printi32mem", X86MemVX64XOperand>;
+def vy32xmem : X86VMemOperand<VR256X, "printi32mem", X86MemVY32XOperand>;
+def vy64xmem : X86VMemOperand<VR256X, "printi64mem", X86MemVY64XOperand>;
def vz32mem : X86VMemOperand<VR512, "printi32mem", X86MemVZ32Operand>;
def vz64mem : X86VMemOperand<VR512, "printi64mem", X86MemVZ64Operand>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 95629184f2cf..2a896dfe8aa8 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -7860,10 +7860,11 @@ def VBROADCASTSDYrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256,
int_x86_avx2_vbroadcast_sd_pd_256,
WriteFShuffle256>, VEX_L;
-let Predicates = [HasAVX2] in
-def VBROADCASTI128 : avx_broadcast_no_int<0x5A, "vbroadcasti128", VR256,
- i128mem, v4i64, loadv2i64,
- WriteLoad>, VEX_L;
+let mayLoad = 1, Predicates = [HasAVX2] in
+def VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst),
+ (ins i128mem:$src),
+ "vbroadcasti128\t{$src, $dst|$dst, $src}", []>,
+ Sched<[WriteLoad]>, VEX, VEX_L;
let Predicates = [HasAVX] in
def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h
index 2b829301e327..61a33484b8bf 100644
--- a/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/lib/Target/X86/X86IntrinsicsInfo.h
@@ -21,8 +21,9 @@ enum IntrinsicType {
GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX,
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP,
CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
- INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK,
- INTR_TYPE_3OP_MASK, FMA_OP_MASK,
+ INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
+ INTR_TYPE_3OP_MASK, FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK,
+ VPERM_3OP_MASKZ,
INTR_TYPE_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
EXPAND_FROM_MEM, BLEND
};
@@ -55,6 +56,22 @@ static const IntrinsicData IntrinsicsWithChain[] = {
X86_INTRINSIC_DATA(addcarryx_u32, ADX, X86ISD::ADC, 0),
X86_INTRINSIC_DATA(addcarryx_u64, ADX, X86ISD::ADC, 0),
+ X86_INTRINSIC_DATA(avx512_gather3div2_df, GATHER, X86::VGATHERQPDZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_gather3div2_di, GATHER, X86::VPGATHERQQZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_gather3div4_df, GATHER, X86::VGATHERQPDZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_gather3div4_di, GATHER, X86::VPGATHERQQZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_gather3div4_sf, GATHER, X86::VGATHERQPSZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_gather3div4_si, GATHER, X86::VPGATHERQDZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_gather3div8_sf, GATHER, X86::VGATHERQPSZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_gather3div8_si, GATHER, X86::VPGATHERQDZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_gather3siv2_df, GATHER, X86::VGATHERDPDZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_gather3siv2_di, GATHER, X86::VPGATHERDQZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_gather3siv4_df, GATHER, X86::VGATHERDPDZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_gather3siv4_di, GATHER, X86::VPGATHERDQZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_gather3siv4_sf, GATHER, X86::VGATHERDPSZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_gather3siv4_si, GATHER, X86::VPGATHERDDZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_gather3siv8_sf, GATHER, X86::VGATHERDPSZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_gather3siv8_si, GATHER, X86::VPGATHERDDZ256rm, 0),
X86_INTRINSIC_DATA(avx512_gather_dpd_512, GATHER, X86::VGATHERDPDZrm, 0),
X86_INTRINSIC_DATA(avx512_gather_dpi_512, GATHER, X86::VPGATHERDDZrm, 0),
X86_INTRINSIC_DATA(avx512_gather_dpq_512, GATHER, X86::VPGATHERDQZrm, 0),
@@ -129,15 +146,30 @@ static const IntrinsicData IntrinsicsWithChain[] = {
X86_INTRINSIC_DATA(avx512_scatter_qpi_512, SCATTER, X86::VPSCATTERQDZmr, 0),
X86_INTRINSIC_DATA(avx512_scatter_qpq_512, SCATTER, X86::VPSCATTERQQZmr, 0),
X86_INTRINSIC_DATA(avx512_scatter_qps_512, SCATTER, X86::VSCATTERQPSZmr, 0),
-
- X86_INTRINSIC_DATA(avx512_scatterpf_dpd_512, PREFETCH,
- X86::VSCATTERPF0DPDm, X86::VSCATTERPF1DPDm),
- X86_INTRINSIC_DATA(avx512_scatterpf_dps_512, PREFETCH,
- X86::VSCATTERPF0DPSm, X86::VSCATTERPF1DPSm),
- X86_INTRINSIC_DATA(avx512_scatterpf_qpd_512, PREFETCH,
- X86::VSCATTERPF0QPDm, X86::VSCATTERPF1QPDm),
- X86_INTRINSIC_DATA(avx512_scatterpf_qps_512, PREFETCH,
- X86::VSCATTERPF0QPSm, X86::VSCATTERPF1QPSm),
+ X86_INTRINSIC_DATA(avx512_scatterdiv2_df, SCATTER, X86::VSCATTERQPDZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_scatterdiv2_di, SCATTER, X86::VPSCATTERQQZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_scatterdiv4_df, SCATTER, X86::VSCATTERQPDZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_scatterdiv4_di, SCATTER, X86::VPSCATTERQQZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_scatterdiv4_sf, SCATTER, X86::VSCATTERQPSZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_scatterdiv4_si, SCATTER, X86::VPSCATTERQDZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_scatterdiv8_sf, SCATTER, X86::VSCATTERQPSZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_scatterdiv8_si, SCATTER, X86::VPSCATTERQDZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_scatterpf_dpd_512, PREFETCH, X86::VSCATTERPF0DPDm,
+ X86::VSCATTERPF1DPDm),
+ X86_INTRINSIC_DATA(avx512_scatterpf_dps_512, PREFETCH, X86::VSCATTERPF0DPSm,
+ X86::VSCATTERPF1DPSm),
+ X86_INTRINSIC_DATA(avx512_scatterpf_qpd_512, PREFETCH, X86::VSCATTERPF0QPDm,
+ X86::VSCATTERPF1QPDm),
+ X86_INTRINSIC_DATA(avx512_scatterpf_qps_512, PREFETCH, X86::VSCATTERPF0QPSm,
+ X86::VSCATTERPF1QPSm),
+ X86_INTRINSIC_DATA(avx512_scattersiv2_df, SCATTER, X86::VSCATTERDPDZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_scattersiv2_di, SCATTER, X86::VPSCATTERDQZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_scattersiv4_df, SCATTER, X86::VSCATTERDPDZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_scattersiv4_di, SCATTER, X86::VPSCATTERDQZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_scattersiv4_sf, SCATTER, X86::VSCATTERDPSZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_scattersiv4_si, SCATTER, X86::VPSCATTERDDZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_scattersiv8_sf, SCATTER, X86::VSCATTERDPSZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_scattersiv8_si, SCATTER, X86::VPSCATTERDDZ256mr, 0),
X86_INTRINSIC_DATA(rdpmc, RDPMC, X86ISD::RDPMC_DAG, 0),
X86_INTRINSIC_DATA(rdrand_16, RDRAND, X86ISD::RDRAND, 0),
@@ -251,6 +283,52 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
+
+ X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_128, FMA_OP_MASK3, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_256, FMA_OP_MASK3, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_512, FMA_OP_MASK3, X86ISD::FMADD,
+ X86ISD::FMADD_RND),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ps_128, FMA_OP_MASK3, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ps_256, FMA_OP_MASK3, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ps_512, FMA_OP_MASK3, X86ISD::FMADD,
+ X86ISD::FMADD_RND),
+
+ X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_128, FMA_OP_MASK3, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_256, FMA_OP_MASK3, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_512, FMA_OP_MASK3, X86ISD::FMADDSUB,
+ X86ISD::FMADDSUB_RND),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_ps_128, FMA_OP_MASK3, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_ps_256, FMA_OP_MASK3, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_ps_512, FMA_OP_MASK3, X86ISD::FMADDSUB,
+ X86ISD::FMADDSUB_RND),
+
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsub_pd_128, FMA_OP_MASK3, X86ISD::FMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsub_pd_256, FMA_OP_MASK3, X86ISD::FMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsub_pd_512, FMA_OP_MASK3, X86ISD::FMSUB,
+ X86ISD::FMSUB_RND),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ps_128, FMA_OP_MASK3, X86ISD::FMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ps_256, FMA_OP_MASK3, X86ISD::FMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ps_512, FMA_OP_MASK3, X86ISD::FMSUB,
+ X86ISD::FMSUB_RND),
+
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_pd_128, FMA_OP_MASK3, X86ISD::FMSUBADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_pd_256, FMA_OP_MASK3, X86ISD::FMSUBADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_pd_512, FMA_OP_MASK3, X86ISD::FMSUBADD,
+ X86ISD::FMSUBADD_RND),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_ps_128, FMA_OP_MASK3, X86ISD::FMSUBADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_ps_256, FMA_OP_MASK3, X86ISD::FMSUBADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_ps_512, FMA_OP_MASK3, X86ISD::FMSUBADD,
+ X86ISD::FMSUBADD_RND),
+
+ X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_pd_128, FMA_OP_MASK3, X86ISD::FNMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_pd_256, FMA_OP_MASK3, X86ISD::FNMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_pd_512, FMA_OP_MASK3, X86ISD::FNMSUB,
+ X86ISD::FNMSUB_RND),
+ X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ps_128, FMA_OP_MASK3, X86ISD::FNMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ps_256, FMA_OP_MASK3, X86ISD::FNMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ps_512, FMA_OP_MASK3, X86ISD::FNMSUB,
+ X86ISD::FNMSUB_RND),
+
X86_INTRINSIC_DATA(avx512_mask_add_pd_128, INTR_TYPE_2OP_MASK, ISD::FADD, 0),
X86_INTRINSIC_DATA(avx512_mask_add_pd_256, INTR_TYPE_2OP_MASK, ISD::FADD, 0),
X86_INTRINSIC_DATA(avx512_mask_add_pd_512, INTR_TYPE_2OP_MASK, ISD::FADD,
@@ -382,9 +460,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::FGETEXP_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_getexp_ps_128, INTR_TYPE_1OP_MASK_RM,
X86ISD::FGETEXP_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_getexp_ps_256, INTR_TYPE_1OP_MASK_RM,
+ X86_INTRINSIC_DATA(avx512_mask_getexp_ps_256, INTR_TYPE_1OP_MASK_RM,
X86ISD::FGETEXP_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_getexp_ps_512, INTR_TYPE_1OP_MASK_RM,
+ X86_INTRINSIC_DATA(avx512_mask_getexp_ps_512, INTR_TYPE_1OP_MASK_RM,
X86ISD::FGETEXP_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_max_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(avx512_mask_max_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
@@ -393,7 +471,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_max_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(avx512_mask_max_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(avx512_mask_max_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX,
- X86ISD::FMAX_RND),
+ X86ISD::FMAX_RND),
X86_INTRINSIC_DATA(avx512_mask_max_sd_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMAX,
X86ISD::FMAX_RND),
X86_INTRINSIC_DATA(avx512_mask_max_ss_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMAX,
@@ -405,7 +483,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_min_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
X86_INTRINSIC_DATA(avx512_mask_min_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
X86_INTRINSIC_DATA(avx512_mask_min_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMIN,
- X86ISD::FMIN_RND),
+ X86ISD::FMIN_RND),
X86_INTRINSIC_DATA(avx512_mask_min_sd_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMIN,
X86ISD::FMIN_RND),
X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMIN,
@@ -428,6 +506,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_or_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
X86_INTRINSIC_DATA(avx512_mask_or_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
X86_INTRINSIC_DATA(avx512_mask_or_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_b_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_b_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_b_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_d_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_d_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_d_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_q_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_q_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_q_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_w_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_w_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_w_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
X86_INTRINSIC_DATA(avx512_mask_packssdw_128, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx512_mask_packssdw_256, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx512_mask_packssdw_512, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
@@ -581,6 +671,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_por_q_128, INTR_TYPE_2OP_MASK, ISD::OR, 0),
X86_INTRINSIC_DATA(avx512_mask_por_q_256, INTR_TYPE_2OP_MASK, ISD::OR, 0),
X86_INTRINSIC_DATA(avx512_mask_por_q_512, INTR_TYPE_2OP_MASK, ISD::OR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pshuf_b_128, INTR_TYPE_2OP_MASK,
+ X86ISD::PSHUFB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pshuf_b_256, INTR_TYPE_2OP_MASK,
+ X86ISD::PSHUFB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pshuf_b_512, INTR_TYPE_2OP_MASK,
+ X86ISD::PSHUFB, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_d, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_q, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx512_mask_pslli_d, VSHIFT_MASK, X86ISD::VSHLI, 0),
@@ -633,6 +729,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::RNDSCALE, 0),
X86_INTRINSIC_DATA(avx512_mask_rndscale_ss, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::RNDSCALE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scalef_pd_128, INTR_TYPE_2OP_MASK_RM,
+ X86ISD::SCALEF, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scalef_pd_256, INTR_TYPE_2OP_MASK_RM,
+ X86ISD::SCALEF, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scalef_pd_512, INTR_TYPE_2OP_MASK_RM,
+ X86ISD::SCALEF, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scalef_ps_128, INTR_TYPE_2OP_MASK_RM,
+ X86ISD::SCALEF, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scalef_ps_256, INTR_TYPE_2OP_MASK_RM,
+ X86ISD::SCALEF, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scalef_ps_512, INTR_TYPE_2OP_MASK_RM,
+ X86ISD::SCALEF, 0),
X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_128, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_512, INTR_TYPE_1OP_MASK_RM, ISD::FSQRT,
@@ -667,12 +775,181 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512, CMP_MASK_CC, X86ISD::CMPMU, 0),
X86_INTRINSIC_DATA(avx512_mask_valign_d_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0),
X86_INTRINSIC_DATA(avx512_mask_valign_q_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0),
+
+ X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_512, FMA_OP_MASK, X86ISD::FMADD,
+ X86ISD::FMADD_RND),
+ X86_INTRINSIC_DATA(avx512_mask_vfmadd_ps_128, FMA_OP_MASK, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfmadd_ps_256, FMA_OP_MASK, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfmadd_ps_512, FMA_OP_MASK, X86ISD::FMADD,
+ X86ISD::FMADD_RND),
+
+ X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_512, FMA_OP_MASK, X86ISD::FMADDSUB,
+ X86ISD::FMADDSUB_RND),
+ X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_ps_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_ps_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_ps_512, FMA_OP_MASK, X86ISD::FMADDSUB,
+ X86ISD::FMADDSUB_RND),
+
+ X86_INTRINSIC_DATA(avx512_mask_vfnmadd_pd_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfnmadd_pd_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfnmadd_pd_512, FMA_OP_MASK, X86ISD::FNMADD,
+ X86ISD::FNMADD_RND),
+ X86_INTRINSIC_DATA(avx512_mask_vfnmadd_ps_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfnmadd_ps_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfnmadd_ps_512, FMA_OP_MASK, X86ISD::FNMADD,
+ X86ISD::FNMADD_RND),
+
+ X86_INTRINSIC_DATA(avx512_mask_vfnmsub_pd_128, FMA_OP_MASK, X86ISD::FNMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfnmsub_pd_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfnmsub_pd_512, FMA_OP_MASK, X86ISD::FNMSUB,
+ X86ISD::FNMSUB_RND),
+ X86_INTRINSIC_DATA(avx512_mask_vfnmsub_ps_128, FMA_OP_MASK, X86ISD::FNMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfnmsub_ps_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vfnmsub_ps_512, FMA_OP_MASK, X86ISD::FNMSUB,
+ X86ISD::FNMSUB_RND),
+
+
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_256, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_512, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_128, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_256, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_512, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_128, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_256, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_512, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_128, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_256, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_512, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_128, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_256, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_512, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_256, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_256, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_512, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_128, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_128, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_256, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_256, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_512, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_512, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_128, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_128, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_256, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_256, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_512, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_128, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_128, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_256, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_256, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_512, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_128, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_128, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_256, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_256, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_512, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
+
+ X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_128, FMA_OP_MASKZ, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_256, FMA_OP_MASKZ, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_512, FMA_OP_MASKZ, X86ISD::FMADD,
+ X86ISD::FMADD_RND),
+ X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ps_128, FMA_OP_MASKZ, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ps_256, FMA_OP_MASKZ, X86ISD::FMADD, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ps_512, FMA_OP_MASKZ, X86ISD::FMADD,
+ X86ISD::FMADD_RND),
+
+ X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_128, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_256, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_512, FMA_OP_MASKZ, X86ISD::FMADDSUB,
+ X86ISD::FMADDSUB_RND),
+ X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_ps_128, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_ps_256, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_ps_512, FMA_OP_MASKZ, X86ISD::FMADDSUB,
+ X86ISD::FMADDSUB_RND),
+
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_128, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_256, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_512, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_hi_128, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_hi_256, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_hi_512, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_pd_128, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_pd_256, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_pd_512, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_ps_128, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_ps_256, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_ps_512, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_q_128, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_q_256, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_q_512, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0),
@@ -696,54 +973,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx_vperm2f128_pd_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
X86_INTRINSIC_DATA(avx_vperm2f128_ps_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
X86_INTRINSIC_DATA(avx_vperm2f128_si_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_512, FMA_OP_MASK, X86ISD::FMADD,
- X86ISD::FMADD_RND),
- X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_128, FMA_OP_MASK, X86ISD::FMADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_256, FMA_OP_MASK, X86ISD::FMADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_512, FMA_OP_MASK, X86ISD::FMADD,
- X86ISD::FMADD_RND),
- X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_512, FMA_OP_MASK, X86ISD::FMADDSUB,
- X86ISD::FMADDSUB_RND),
- X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_512, FMA_OP_MASK, X86ISD::FMADDSUB,
- X86ISD::FMADDSUB_RND),
- X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_128, FMA_OP_MASK, X86ISD::FMSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_256, FMA_OP_MASK, X86ISD::FMSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_512, FMA_OP_MASK, X86ISD::FMSUB,
- X86ISD::FMSUB_RND),
- X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_128, FMA_OP_MASK, X86ISD::FMSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_256, FMA_OP_MASK, X86ISD::FMSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_512, FMA_OP_MASK, X86ISD::FMSUB,
- X86ISD::FMSUB_RND),
- X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_128, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_256, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_512, FMA_OP_MASK, X86ISD::FMSUBADD,
- X86ISD::FMSUBADD_RND),
- X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_128, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_256, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_512, FMA_OP_MASK, X86ISD::FMSUBADD,
- X86ISD::FMSUBADD_RND),
- X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_512, FMA_OP_MASK, X86ISD::FNMADD,
- X86ISD::FNMADD_RND),
- X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
- X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_512, FMA_OP_MASK, X86ISD::FNMADD,
- X86ISD::FNMADD_RND),
- X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_128, FMA_OP_MASK, X86ISD::FNMSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_512, FMA_OP_MASK, X86ISD::FNMSUB,
- X86ISD::FNMSUB_RND),
- X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_128, FMA_OP_MASK, X86ISD::FNMSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
- X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_512, FMA_OP_MASK, X86ISD::FNMSUB,
- X86ISD::FNMSUB_RND),
X86_INTRINSIC_DATA(fma_vfmadd_pd, INTR_TYPE_3OP, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(fma_vfmadd_pd_256, INTR_TYPE_3OP, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(fma_vfmadd_ps, INTR_TYPE_3OP, X86ISD::FMADD, 0),
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 64135e0f53e5..3415cedc6fea 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -112,7 +112,7 @@ namespace llvm {
OutStreamer->EmitInstruction(Inst, getSubtargetInfo());
SMShadowTracker.count(Inst, getSubtargetInfo());
}
-} // namespace llvm
+} // end llvm namespace
X86MCInstLower::X86MCInstLower(const MachineFunction &mf,
X86AsmPrinter &asmprinter)
@@ -159,10 +159,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
const GlobalValue *GV = MO.getGlobal();
AsmPrinter.getNameWithPrefix(Name, GV);
} else if (MO.isSymbol()) {
- if (MO.getTargetFlags() == X86II::MO_NOPREFIX)
- Name += MO.getSymbolName();
- else
- getMang()->getNameWithPrefix(Name, MO.getSymbolName());
+ Mangler::getNameWithPrefix(Name, MO.getSymbolName(), *DL);
} else if (MO.isMBB()) {
assert(Suffix.empty());
Sym = MO.getMBB()->getSymbol();
@@ -241,7 +238,6 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case X86II::MO_DARWIN_NONLAZY:
case X86II::MO_DLLIMPORT:
case X86II::MO_DARWIN_STUB:
- case X86II::MO_NOPREFIX:
break;
case X86II::MO_TLVP: RefKind = MCSymbolRefExpr::VK_TLVP; break;
@@ -423,6 +419,8 @@ X86MCInstLower::LowerMachineOperand(const MachineInstr *MI,
case MachineOperand::MO_GlobalAddress:
case MachineOperand::MO_ExternalSymbol:
return LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
+ case MachineOperand::MO_MCSymbol:
+ return LowerSymbolOperand(MO, MO.getMCSymbol());
case MachineOperand::MO_JumpTableIndex:
return LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()));
case MachineOperand::MO_ConstantPoolIndex:
diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h
index 342d26ab1fbb..d598b55aae3e 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/lib/Target/X86/X86MachineFunctionInfo.h
@@ -179,6 +179,6 @@ public:
}
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/X86/X86PadShortFunction.cpp b/lib/Target/X86/X86PadShortFunction.cpp
index 33aa78ffdf8a..143e70bda9e7 100644
--- a/lib/Target/X86/X86PadShortFunction.cpp
+++ b/lib/Target/X86/X86PadShortFunction.cpp
@@ -84,7 +84,7 @@ namespace {
};
char PadShortFunc::ID = 0;
-} // namespace
+}
FunctionPass *llvm::createX86PadShortFunctions() {
return new PadShortFunc();
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 00e213423974..0033b5058187 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -598,10 +598,10 @@ X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const {
}
namespace llvm {
-unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
- bool High) {
+unsigned getX86SubSuperRegisterOrZero(unsigned Reg, MVT::SimpleValueType VT,
+ bool High) {
switch (VT) {
- default: llvm_unreachable("Unexpected VT");
+ default: return 0;
case MVT::i8:
if (High) {
switch (Reg) {
@@ -625,7 +625,7 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
}
} else {
switch (Reg) {
- default: llvm_unreachable("Unexpected register");
+ default: return 0;
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::AL;
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
@@ -662,7 +662,7 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
}
case MVT::i16:
switch (Reg) {
- default: llvm_unreachable("Unexpected register");
+ default: return 0;
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::AX;
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
@@ -698,7 +698,7 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
}
case MVT::i32:
switch (Reg) {
- default: llvm_unreachable("Unexpected register");
+ default: return 0;
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::EAX;
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
@@ -734,7 +734,7 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
}
case MVT::i64:
switch (Reg) {
- default: llvm_unreachable("Unexpected register");
+ default: return 0;
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::RAX;
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
@@ -771,6 +771,14 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
}
}
+unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
+ bool High) {
+ unsigned Res = getX86SubSuperRegisterOrZero(Reg, VT, High);
+ if (Res == 0)
+ llvm_unreachable("Unexpected register or VT");
+ return Res;
+}
+
unsigned get512BitSuperRegister(unsigned Reg) {
if (Reg >= X86::XMM0 && Reg <= X86::XMM31)
return X86::ZMM0 + (Reg - X86::XMM0);
@@ -781,4 +789,4 @@ unsigned get512BitSuperRegister(unsigned Reg) {
llvm_unreachable("Unexpected SIMD register");
}
-} // namespace llvm
+}
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index 459ecf7fff72..8de1d0bf8ec8 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -128,14 +128,19 @@ public:
unsigned getSlotSize() const { return SlotSize; }
};
-// getX86SubSuperRegister - X86 utility function. It returns the sub or super
-// register of a specific X86 register.
-// e.g. getX86SubSuperRegister(X86::EAX, MVT::i16) return X86:AX
+/// Returns the sub or super register of a specific X86 register.
+/// e.g. getX86SubSuperRegister(X86::EAX, MVT::i16) returns X86::AX.
+/// Aborts on error.
unsigned getX86SubSuperRegister(unsigned, MVT::SimpleValueType, bool High=false);
+/// Returns the sub or super register of a specific X86 register.
+/// Like getX86SubSuperRegister() but returns 0 on error.
+unsigned getX86SubSuperRegisterOrZero(unsigned, MVT::SimpleValueType,
+ bool High = false);
+
//get512BitRegister - X86 utility - returns 512-bit super register
unsigned get512BitSuperRegister(unsigned Reg);
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/X86/X86SelectionDAGInfo.h b/lib/Target/X86/X86SelectionDAGInfo.h
index 25606d3f5df3..eb7e0ed9de6c 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.h
+++ b/lib/Target/X86/X86SelectionDAGInfo.h
@@ -48,6 +48,6 @@ public:
MachinePointerInfo SrcPtrInfo) const override;
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 6934061c6922..d420abbe1433 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -490,6 +490,6 @@ public:
}
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 3d6eb4f7ce02..fb9cb4ba4c86 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -110,12 +110,15 @@ X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT,
if (Subtarget.isTargetWin64())
this->Options.TrapUnreachable = true;
- // TODO: By default, all reciprocal estimate operations are off because
- // that matches the behavior before TargetRecip was added (except for btver2
- // which used subtarget features to enable this type of codegen).
- // We should change this to match GCC behavior where everything but
- // scalar division estimates are turned on by default with -ffast-math.
- this->Options.Reciprocals.setDefaults("all", false, 1);
+ // By default (and when -ffast-math is on), enable estimate codegen for
+ // everything except scalar division. By default, use 1 refinement step for
+ // all operations. Defaults may be overridden by using command-line options.
+ // Scalar division estimates are disabled because they break too much
+ // real-world code. These defaults match GCC behavior.
+ this->Options.Reciprocals.setDefaults("sqrtf", true, 1);
+ this->Options.Reciprocals.setDefaults("divf", false, 1);
+ this->Options.Reciprocals.setDefaults("vec-sqrtf", true, 1);
+ this->Options.Reciprocals.setDefaults("vec-divf", true, 1);
initAsmInfo();
}
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index be56888b75f4..262955698e44 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -44,6 +44,6 @@ public:
}
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index f9f62904b64b..6f900ea351ef 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -131,52 +131,44 @@ static std::string APIntToHexString(const APInt &AI) {
return HexString;
}
-
static std::string scalarConstantToHexString(const Constant *C) {
Type *Ty = C->getType();
- APInt AI;
if (isa<UndefValue>(C)) {
- AI = APInt(Ty->getPrimitiveSizeInBits(), /*val=*/0);
- } else if (Ty->isFloatTy() || Ty->isDoubleTy()) {
- const auto *CFP = cast<ConstantFP>(C);
- AI = CFP->getValueAPF().bitcastToAPInt();
- } else if (Ty->isIntegerTy()) {
- const auto *CI = cast<ConstantInt>(C);
- AI = CI->getValue();
+ return APIntToHexString(APInt::getNullValue(Ty->getPrimitiveSizeInBits()));
+ } else if (const auto *CFP = dyn_cast<ConstantFP>(C)) {
+ return APIntToHexString(CFP->getValueAPF().bitcastToAPInt());
+ } else if (const auto *CI = dyn_cast<ConstantInt>(C)) {
+ return APIntToHexString(CI->getValue());
} else {
- llvm_unreachable("unexpected constant pool element type!");
+ unsigned NumElements;
+ if (isa<VectorType>(Ty))
+ NumElements = Ty->getVectorNumElements();
+ else
+ NumElements = Ty->getArrayNumElements();
+ std::string HexString;
+ for (int I = NumElements - 1, E = -1; I != E; --I)
+ HexString += scalarConstantToHexString(C->getAggregateElement(I));
+ return HexString;
}
- return APIntToHexString(AI);
}
MCSection *
X86WindowsTargetObjectFile::getSectionForConstant(SectionKind Kind,
const Constant *C) const {
- if (Kind.isReadOnly()) {
- if (C) {
- Type *Ty = C->getType();
- SmallString<32> COMDATSymName;
- if (Ty->isFloatTy() || Ty->isDoubleTy()) {
- COMDATSymName = "__real@";
- COMDATSymName += scalarConstantToHexString(C);
- } else if (const auto *VTy = dyn_cast<VectorType>(Ty)) {
- uint64_t NumBits = VTy->getBitWidth();
- if (NumBits == 128 || NumBits == 256) {
- COMDATSymName = NumBits == 128 ? "__xmm@" : "__ymm@";
- for (int I = VTy->getNumElements() - 1, E = -1; I != E; --I)
- COMDATSymName +=
- scalarConstantToHexString(C->getAggregateElement(I));
- }
- }
- if (!COMDATSymName.empty()) {
- unsigned Characteristics = COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ |
- COFF::IMAGE_SCN_LNK_COMDAT;
- return getContext().getCOFFSection(".rdata", Characteristics, Kind,
- COMDATSymName,
- COFF::IMAGE_COMDAT_SELECT_ANY);
- }
- }
+ if (Kind.isMergeableConst() && C) {
+ const unsigned Characteristics = COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_LNK_COMDAT;
+ std::string COMDATSymName;
+ if (Kind.isMergeableConst4() || Kind.isMergeableConst8())
+ COMDATSymName = "__real@" + scalarConstantToHexString(C);
+ else if (Kind.isMergeableConst16())
+ COMDATSymName = "__xmm@" + scalarConstantToHexString(C);
+
+ if (!COMDATSymName.empty())
+ return getContext().getCOFFSection(".rdata", Characteristics, Kind,
+ COMDATSymName,
+ COFF::IMAGE_COMDAT_SELECT_ANY);
}
return TargetLoweringObjectFile::getSectionForConstant(Kind, C);
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index 13384fab5985..0c82a700952b 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1130,3 +1130,18 @@ bool X86TTIImpl::isLegalMaskedStore(Type *DataType, int Consecutive) {
return isLegalMaskedLoad(DataType, Consecutive);
}
+bool X86TTIImpl::hasCompatibleFunctionAttributes(const Function *Caller,
+ const Function *Callee) const {
+ const TargetMachine &TM = getTLI()->getTargetMachine();
+
+ // Work this as a subsetting of subtarget features.
+ const FeatureBitset &CallerBits =
+ TM.getSubtargetImpl(*Caller)->getFeatureBits();
+ const FeatureBitset &CalleeBits =
+ TM.getSubtargetImpl(*Callee)->getFeatureBits();
+
+ // FIXME: This is likely too limiting as it will include subtarget features
+ // that we might not care about for inlining, but it is conservatively
+ // correct.
+ return (CallerBits & CalleeBits) == CalleeBits;
+}
diff --git a/lib/Target/X86/X86TargetTransformInfo.h b/lib/Target/X86/X86TargetTransformInfo.h
index e570bb55710a..a83158440193 100644
--- a/lib/Target/X86/X86TargetTransformInfo.h
+++ b/lib/Target/X86/X86TargetTransformInfo.h
@@ -103,6 +103,8 @@ public:
Type *Ty);
bool isLegalMaskedLoad(Type *DataType, int Consecutive);
bool isLegalMaskedStore(Type *DataType, int Consecutive);
+ bool hasCompatibleFunctionAttributes(const Function *Caller,
+ const Function *Callee) const;
/// @}
};
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
index 71ce45b0bc2e..6925b272b4a5 100644
--- a/lib/Target/X86/X86VZeroUpper.cpp
+++ b/lib/Target/X86/X86VZeroUpper.cpp
@@ -86,7 +86,7 @@ namespace {
};
char VZeroUpperInserter::ID = 0;
-} // namespace
+}
FunctionPass *llvm::createX86IssueVZeroUpperPass() {
return new VZeroUpperInserter();
diff --git a/lib/Target/X86/X86WinEHState.cpp b/lib/Target/X86/X86WinEHState.cpp
index c9e80945549b..90357257b9ef 100644
--- a/lib/Target/X86/X86WinEHState.cpp
+++ b/lib/Target/X86/X86WinEHState.cpp
@@ -105,7 +105,7 @@ private:
/// The linked list node subobject inside of RegNode.
Value *Link = nullptr;
};
-} // namespace
+}
FunctionPass *llvm::createX86WinEHStatePass() { return new WinEHStatePass(); }
@@ -398,6 +398,7 @@ void WinEHStatePass::addCXXStateStores(Function &F, MachineModuleInfo &MMI) {
// Set up RegNodeEscapeIndex
int RegNodeEscapeIndex = escapeRegNode(F);
+ FuncInfo.EHRegNodeEscapeIndex = RegNodeEscapeIndex;
// Only insert stores in catch handlers.
Constant *FI8 =
@@ -480,8 +481,8 @@ void WinEHStatePass::addSEHStateStores(Function &F, MachineModuleInfo &MMI) {
WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(&F);
// Remember and return the index that we used. We save it in WinEHFuncInfo so
- // that we can lower llvm.x86.seh.exceptioninfo later in filter functions
- // without too much trouble.
+ // that we can lower llvm.x86.seh.recoverfp later in filter functions without
+ // too much trouble.
int RegNodeEscapeIndex = escapeRegNode(F);
FuncInfo.EHRegNodeEscapeIndex = RegNodeEscapeIndex;
@@ -528,14 +529,12 @@ void WinEHStatePass::addSEHStateStores(Function &F, MachineModuleInfo &MMI) {
}
}
- // Insert llvm.stackrestore into each __except block.
- Function *StackRestore =
- Intrinsic::getDeclaration(TheModule, Intrinsic::stackrestore);
+ // Insert llvm.x86.seh.restoreframe() into each __except block.
+ Function *RestoreFrame =
+ Intrinsic::getDeclaration(TheModule, Intrinsic::x86_seh_restoreframe);
for (BasicBlock *ExceptBB : ExceptBlocks) {
IRBuilder<> Builder(ExceptBB->begin());
- Value *SP =
- Builder.CreateLoad(Builder.CreateStructGEP(RegNodeTy, RegNode, 0));
- Builder.CreateCall(StackRestore, {SP});
+ Builder.CreateCall(RestoreFrame, {});
}
}