aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/X86/X86InstrInfo.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2020-07-26 19:36:28 +0000
committerDimitry Andric <dim@FreeBSD.org>2020-07-26 19:36:28 +0000
commitcfca06d7963fa0909f90483b42a6d7d194d01e08 (patch)
tree209fb2a2d68f8f277793fc8df46c753d31bc853b /llvm/lib/Target/X86/X86InstrInfo.cpp
parent706b4fc47bbc608932d3b491ae19a3b9cde9497b (diff)
downloadsrc-cfca06d7963fa0909f90483b42a6d7d194d01e08.tar.gz
src-cfca06d7963fa0909f90483b42a6d7d194d01e08.zip
Vendor import of llvm-project master 2e10b7a39b9, the last commit beforevendor/llvm-project/llvmorg-11-init-20887-g2e10b7a39b9vendor/llvm-project/master
the llvmorg-12-init tag, from which release/11.x was branched.
Notes
Notes: svn path=/vendor/llvm-project/master/; revision=363578 svn path=/vendor/llvm-project/llvmorg-11-init-20887-g2e10b7a39b9/; revision=363579; tag=vendor/llvm-project/llvmorg-11-init-20887-g2e10b7a39b9
Diffstat (limited to 'llvm/lib/Target/X86/X86InstrInfo.cpp')
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp1104
1 files changed, 920 insertions, 184 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 245346d82731..42c111173570 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -88,7 +88,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
bool
X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
+ Register &SrcReg, Register &DstReg,
unsigned &SubIdx) const {
switch (MI.getOpcode()) {
default: break;
@@ -135,13 +135,497 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
return false;
}
+bool X86InstrInfo::isDataInvariant(MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ default:
+ // By default, assume that the instruction is not data invariant.
+ return false;
+
+ // Some target-independent operations that trivially lower to data-invariant
+ // instructions.
+ case TargetOpcode::COPY:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ return true;
+
+ // On x86 it is believed that imul is constant time w.r.t. the loaded data.
+ // However, they set flags and are perhaps the most surprisingly constant
+ // time operations so we call them out here separately.
+ case X86::IMUL16rr:
+ case X86::IMUL16rri8:
+ case X86::IMUL16rri:
+ case X86::IMUL32rr:
+ case X86::IMUL32rri8:
+ case X86::IMUL32rri:
+ case X86::IMUL64rr:
+ case X86::IMUL64rri32:
+ case X86::IMUL64rri8:
+
+ // Bit scanning and counting instructions that are somewhat surprisingly
+ // constant time as they scan across bits and do other fairly complex
+ // operations like popcnt, but are believed to be constant time on x86.
+ // However, these set flags.
+ case X86::BSF16rr:
+ case X86::BSF32rr:
+ case X86::BSF64rr:
+ case X86::BSR16rr:
+ case X86::BSR32rr:
+ case X86::BSR64rr:
+ case X86::LZCNT16rr:
+ case X86::LZCNT32rr:
+ case X86::LZCNT64rr:
+ case X86::POPCNT16rr:
+ case X86::POPCNT32rr:
+ case X86::POPCNT64rr:
+ case X86::TZCNT16rr:
+ case X86::TZCNT32rr:
+ case X86::TZCNT64rr:
+
+ // Bit manipulation instructions are effectively combinations of basic
+ // arithmetic ops, and should still execute in constant time. These also
+ // set flags.
+ case X86::BLCFILL32rr:
+ case X86::BLCFILL64rr:
+ case X86::BLCI32rr:
+ case X86::BLCI64rr:
+ case X86::BLCIC32rr:
+ case X86::BLCIC64rr:
+ case X86::BLCMSK32rr:
+ case X86::BLCMSK64rr:
+ case X86::BLCS32rr:
+ case X86::BLCS64rr:
+ case X86::BLSFILL32rr:
+ case X86::BLSFILL64rr:
+ case X86::BLSI32rr:
+ case X86::BLSI64rr:
+ case X86::BLSIC32rr:
+ case X86::BLSIC64rr:
+ case X86::BLSMSK32rr:
+ case X86::BLSMSK64rr:
+ case X86::BLSR32rr:
+ case X86::BLSR64rr:
+ case X86::TZMSK32rr:
+ case X86::TZMSK64rr:
+
+ // Bit extracting and clearing instructions should execute in constant time,
+ // and set flags.
+ case X86::BEXTR32rr:
+ case X86::BEXTR64rr:
+ case X86::BEXTRI32ri:
+ case X86::BEXTRI64ri:
+ case X86::BZHI32rr:
+ case X86::BZHI64rr:
+
+ // Shift and rotate.
+ case X86::ROL8r1:
+ case X86::ROL16r1:
+ case X86::ROL32r1:
+ case X86::ROL64r1:
+ case X86::ROL8rCL:
+ case X86::ROL16rCL:
+ case X86::ROL32rCL:
+ case X86::ROL64rCL:
+ case X86::ROL8ri:
+ case X86::ROL16ri:
+ case X86::ROL32ri:
+ case X86::ROL64ri:
+ case X86::ROR8r1:
+ case X86::ROR16r1:
+ case X86::ROR32r1:
+ case X86::ROR64r1:
+ case X86::ROR8rCL:
+ case X86::ROR16rCL:
+ case X86::ROR32rCL:
+ case X86::ROR64rCL:
+ case X86::ROR8ri:
+ case X86::ROR16ri:
+ case X86::ROR32ri:
+ case X86::ROR64ri:
+ case X86::SAR8r1:
+ case X86::SAR16r1:
+ case X86::SAR32r1:
+ case X86::SAR64r1:
+ case X86::SAR8rCL:
+ case X86::SAR16rCL:
+ case X86::SAR32rCL:
+ case X86::SAR64rCL:
+ case X86::SAR8ri:
+ case X86::SAR16ri:
+ case X86::SAR32ri:
+ case X86::SAR64ri:
+ case X86::SHL8r1:
+ case X86::SHL16r1:
+ case X86::SHL32r1:
+ case X86::SHL64r1:
+ case X86::SHL8rCL:
+ case X86::SHL16rCL:
+ case X86::SHL32rCL:
+ case X86::SHL64rCL:
+ case X86::SHL8ri:
+ case X86::SHL16ri:
+ case X86::SHL32ri:
+ case X86::SHL64ri:
+ case X86::SHR8r1:
+ case X86::SHR16r1:
+ case X86::SHR32r1:
+ case X86::SHR64r1:
+ case X86::SHR8rCL:
+ case X86::SHR16rCL:
+ case X86::SHR32rCL:
+ case X86::SHR64rCL:
+ case X86::SHR8ri:
+ case X86::SHR16ri:
+ case X86::SHR32ri:
+ case X86::SHR64ri:
+ case X86::SHLD16rrCL:
+ case X86::SHLD32rrCL:
+ case X86::SHLD64rrCL:
+ case X86::SHLD16rri8:
+ case X86::SHLD32rri8:
+ case X86::SHLD64rri8:
+ case X86::SHRD16rrCL:
+ case X86::SHRD32rrCL:
+ case X86::SHRD64rrCL:
+ case X86::SHRD16rri8:
+ case X86::SHRD32rri8:
+ case X86::SHRD64rri8:
+
+ // Basic arithmetic is constant time on the input but does set flags.
+ case X86::ADC8rr:
+ case X86::ADC8ri:
+ case X86::ADC16rr:
+ case X86::ADC16ri:
+ case X86::ADC16ri8:
+ case X86::ADC32rr:
+ case X86::ADC32ri:
+ case X86::ADC32ri8:
+ case X86::ADC64rr:
+ case X86::ADC64ri8:
+ case X86::ADC64ri32:
+ case X86::ADD8rr:
+ case X86::ADD8ri:
+ case X86::ADD16rr:
+ case X86::ADD16ri:
+ case X86::ADD16ri8:
+ case X86::ADD32rr:
+ case X86::ADD32ri:
+ case X86::ADD32ri8:
+ case X86::ADD64rr:
+ case X86::ADD64ri8:
+ case X86::ADD64ri32:
+ case X86::AND8rr:
+ case X86::AND8ri:
+ case X86::AND16rr:
+ case X86::AND16ri:
+ case X86::AND16ri8:
+ case X86::AND32rr:
+ case X86::AND32ri:
+ case X86::AND32ri8:
+ case X86::AND64rr:
+ case X86::AND64ri8:
+ case X86::AND64ri32:
+ case X86::OR8rr:
+ case X86::OR8ri:
+ case X86::OR16rr:
+ case X86::OR16ri:
+ case X86::OR16ri8:
+ case X86::OR32rr:
+ case X86::OR32ri:
+ case X86::OR32ri8:
+ case X86::OR64rr:
+ case X86::OR64ri8:
+ case X86::OR64ri32:
+ case X86::SBB8rr:
+ case X86::SBB8ri:
+ case X86::SBB16rr:
+ case X86::SBB16ri:
+ case X86::SBB16ri8:
+ case X86::SBB32rr:
+ case X86::SBB32ri:
+ case X86::SBB32ri8:
+ case X86::SBB64rr:
+ case X86::SBB64ri8:
+ case X86::SBB64ri32:
+ case X86::SUB8rr:
+ case X86::SUB8ri:
+ case X86::SUB16rr:
+ case X86::SUB16ri:
+ case X86::SUB16ri8:
+ case X86::SUB32rr:
+ case X86::SUB32ri:
+ case X86::SUB32ri8:
+ case X86::SUB64rr:
+ case X86::SUB64ri8:
+ case X86::SUB64ri32:
+ case X86::XOR8rr:
+ case X86::XOR8ri:
+ case X86::XOR16rr:
+ case X86::XOR16ri:
+ case X86::XOR16ri8:
+ case X86::XOR32rr:
+ case X86::XOR32ri:
+ case X86::XOR32ri8:
+ case X86::XOR64rr:
+ case X86::XOR64ri8:
+ case X86::XOR64ri32:
+ // Arithmetic with just 32-bit and 64-bit variants and no immediates.
+ case X86::ADCX32rr:
+ case X86::ADCX64rr:
+ case X86::ADOX32rr:
+ case X86::ADOX64rr:
+ case X86::ANDN32rr:
+ case X86::ANDN64rr:
+ // Unary arithmetic operations.
+ case X86::DEC8r:
+ case X86::DEC16r:
+ case X86::DEC32r:
+ case X86::DEC64r:
+ case X86::INC8r:
+ case X86::INC16r:
+ case X86::INC32r:
+ case X86::INC64r:
+ case X86::NEG8r:
+ case X86::NEG16r:
+ case X86::NEG32r:
+ case X86::NEG64r:
+
+ // Unlike other arithmetic, NOT doesn't set EFLAGS.
+ case X86::NOT8r:
+ case X86::NOT16r:
+ case X86::NOT32r:
+ case X86::NOT64r:
+
+ // Various move instructions used to zero or sign extend things. Note that we
+ // intentionally don't support the _NOREX variants as we can't handle that
+ // register constraint anyways.
+ case X86::MOVSX16rr8:
+ case X86::MOVSX32rr8:
+ case X86::MOVSX32rr16:
+ case X86::MOVSX64rr8:
+ case X86::MOVSX64rr16:
+ case X86::MOVSX64rr32:
+ case X86::MOVZX16rr8:
+ case X86::MOVZX32rr8:
+ case X86::MOVZX32rr16:
+ case X86::MOVZX64rr8:
+ case X86::MOVZX64rr16:
+ case X86::MOV32rr:
+
+ // Arithmetic instructions that are both constant time and don't set flags.
+ case X86::RORX32ri:
+ case X86::RORX64ri:
+ case X86::SARX32rr:
+ case X86::SARX64rr:
+ case X86::SHLX32rr:
+ case X86::SHLX64rr:
+ case X86::SHRX32rr:
+ case X86::SHRX64rr:
+
+ // LEA doesn't actually access memory, and its arithmetic is constant time.
+ case X86::LEA16r:
+ case X86::LEA32r:
+ case X86::LEA64_32r:
+ case X86::LEA64r:
+ return true;
+ }
+}
+
+bool X86InstrInfo::isDataInvariantLoad(MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ default:
+ // By default, assume that the load will immediately leak.
+ return false;
+
+ // On x86 it is believed that imul is constant time w.r.t. the loaded data.
+ // However, they set flags and are perhaps the most surprisingly constant
+ // time operations so we call them out here separately.
+ case X86::IMUL16rm:
+ case X86::IMUL16rmi8:
+ case X86::IMUL16rmi:
+ case X86::IMUL32rm:
+ case X86::IMUL32rmi8:
+ case X86::IMUL32rmi:
+ case X86::IMUL64rm:
+ case X86::IMUL64rmi32:
+ case X86::IMUL64rmi8:
+
+ // Bit scanning and counting instructions that are somewhat surprisingly
+ // constant time as they scan across bits and do other fairly complex
+ // operations like popcnt, but are believed to be constant time on x86.
+ // However, these set flags.
+ case X86::BSF16rm:
+ case X86::BSF32rm:
+ case X86::BSF64rm:
+ case X86::BSR16rm:
+ case X86::BSR32rm:
+ case X86::BSR64rm:
+ case X86::LZCNT16rm:
+ case X86::LZCNT32rm:
+ case X86::LZCNT64rm:
+ case X86::POPCNT16rm:
+ case X86::POPCNT32rm:
+ case X86::POPCNT64rm:
+ case X86::TZCNT16rm:
+ case X86::TZCNT32rm:
+ case X86::TZCNT64rm:
+
+ // Bit manipulation instructions are effectively combinations of basic
+ // arithmetic ops, and should still execute in constant time. These also
+ // set flags.
+ case X86::BLCFILL32rm:
+ case X86::BLCFILL64rm:
+ case X86::BLCI32rm:
+ case X86::BLCI64rm:
+ case X86::BLCIC32rm:
+ case X86::BLCIC64rm:
+ case X86::BLCMSK32rm:
+ case X86::BLCMSK64rm:
+ case X86::BLCS32rm:
+ case X86::BLCS64rm:
+ case X86::BLSFILL32rm:
+ case X86::BLSFILL64rm:
+ case X86::BLSI32rm:
+ case X86::BLSI64rm:
+ case X86::BLSIC32rm:
+ case X86::BLSIC64rm:
+ case X86::BLSMSK32rm:
+ case X86::BLSMSK64rm:
+ case X86::BLSR32rm:
+ case X86::BLSR64rm:
+ case X86::TZMSK32rm:
+ case X86::TZMSK64rm:
+
+ // Bit extracting and clearing instructions should execute in constant time,
+ // and set flags.
+ case X86::BEXTR32rm:
+ case X86::BEXTR64rm:
+ case X86::BEXTRI32mi:
+ case X86::BEXTRI64mi:
+ case X86::BZHI32rm:
+ case X86::BZHI64rm:
+
+ // Basic arithmetic is constant time on the input but does set flags.
+ case X86::ADC8rm:
+ case X86::ADC16rm:
+ case X86::ADC32rm:
+ case X86::ADC64rm:
+ case X86::ADCX32rm:
+ case X86::ADCX64rm:
+ case X86::ADD8rm:
+ case X86::ADD16rm:
+ case X86::ADD32rm:
+ case X86::ADD64rm:
+ case X86::ADOX32rm:
+ case X86::ADOX64rm:
+ case X86::AND8rm:
+ case X86::AND16rm:
+ case X86::AND32rm:
+ case X86::AND64rm:
+ case X86::ANDN32rm:
+ case X86::ANDN64rm:
+ case X86::OR8rm:
+ case X86::OR16rm:
+ case X86::OR32rm:
+ case X86::OR64rm:
+ case X86::SBB8rm:
+ case X86::SBB16rm:
+ case X86::SBB32rm:
+ case X86::SBB64rm:
+ case X86::SUB8rm:
+ case X86::SUB16rm:
+ case X86::SUB32rm:
+ case X86::SUB64rm:
+ case X86::XOR8rm:
+ case X86::XOR16rm:
+ case X86::XOR32rm:
+ case X86::XOR64rm:
+
+ // Integer multiply w/o affecting flags is still believed to be constant
+ // time on x86. Called out separately as this is among the most surprising
+ // instructions to exhibit that behavior.
+ case X86::MULX32rm:
+ case X86::MULX64rm:
+
+ // Arithmetic instructions that are both constant time and don't set flags.
+ case X86::RORX32mi:
+ case X86::RORX64mi:
+ case X86::SARX32rm:
+ case X86::SARX64rm:
+ case X86::SHLX32rm:
+ case X86::SHLX64rm:
+ case X86::SHRX32rm:
+ case X86::SHRX64rm:
+
+ // Conversions are believed to be constant time and don't set flags.
+ case X86::CVTTSD2SI64rm:
+ case X86::VCVTTSD2SI64rm:
+ case X86::VCVTTSD2SI64Zrm:
+ case X86::CVTTSD2SIrm:
+ case X86::VCVTTSD2SIrm:
+ case X86::VCVTTSD2SIZrm:
+ case X86::CVTTSS2SI64rm:
+ case X86::VCVTTSS2SI64rm:
+ case X86::VCVTTSS2SI64Zrm:
+ case X86::CVTTSS2SIrm:
+ case X86::VCVTTSS2SIrm:
+ case X86::VCVTTSS2SIZrm:
+ case X86::CVTSI2SDrm:
+ case X86::VCVTSI2SDrm:
+ case X86::VCVTSI2SDZrm:
+ case X86::CVTSI2SSrm:
+ case X86::VCVTSI2SSrm:
+ case X86::VCVTSI2SSZrm:
+ case X86::CVTSI642SDrm:
+ case X86::VCVTSI642SDrm:
+ case X86::VCVTSI642SDZrm:
+ case X86::CVTSI642SSrm:
+ case X86::VCVTSI642SSrm:
+ case X86::VCVTSI642SSZrm:
+ case X86::CVTSS2SDrm:
+ case X86::VCVTSS2SDrm:
+ case X86::VCVTSS2SDZrm:
+ case X86::CVTSD2SSrm:
+ case X86::VCVTSD2SSrm:
+ case X86::VCVTSD2SSZrm:
+ // AVX512 added unsigned integer conversions.
+ case X86::VCVTTSD2USI64Zrm:
+ case X86::VCVTTSD2USIZrm:
+ case X86::VCVTTSS2USI64Zrm:
+ case X86::VCVTTSS2USIZrm:
+ case X86::VCVTUSI2SDZrm:
+ case X86::VCVTUSI642SDZrm:
+ case X86::VCVTUSI2SSZrm:
+ case X86::VCVTUSI642SSZrm:
+
+ // Loads to register don't set flags.
+ case X86::MOV8rm:
+ case X86::MOV8rm_NOREX:
+ case X86::MOV16rm:
+ case X86::MOV32rm:
+ case X86::MOV64rm:
+ case X86::MOVSX16rm8:
+ case X86::MOVSX32rm16:
+ case X86::MOVSX32rm8:
+ case X86::MOVSX32rm8_NOREX:
+ case X86::MOVSX64rm16:
+ case X86::MOVSX64rm32:
+ case X86::MOVSX64rm8:
+ case X86::MOVZX16rm8:
+ case X86::MOVZX32rm16:
+ case X86::MOVZX32rm8:
+ case X86::MOVZX32rm8_NOREX:
+ case X86::MOVZX64rm16:
+ case X86::MOVZX64rm8:
+ return true;
+ }
+}
+
int X86InstrInfo::getSPAdjust(const MachineInstr &MI) const {
const MachineFunction *MF = MI.getParent()->getParent();
const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
if (isFrameInstr(MI)) {
- unsigned StackAlign = TFI->getStackAlignment();
- int SPAdj = alignTo(getFrameSize(MI), StackAlign);
+ int SPAdj = alignTo(getFrameSize(MI), TFI->getStackAlign());
SPAdj -= getFrameAdjustment(MI);
if (!isFrameSetup(MI))
SPAdj = -SPAdj;
@@ -639,7 +1123,7 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SubIdx,
+ Register DestReg, unsigned SubIdx,
const MachineInstr &Orig,
const TargetRegisterInfo &TRI) const {
bool ClobbersEFLAGS = Orig.modifiesRegister(X86::EFLAGS, &TRI);
@@ -1182,61 +1666,61 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
case X86::VMOVUPSZ128rmk: case X86::VMOVAPSZ128rmk:
case X86::VMOVUPSZ256rmk: case X86::VMOVAPSZ256rmk:
case X86::VMOVUPSZrmk: case X86::VMOVAPSZrmk:
- case X86::VBROADCASTSDZ256mk:
- case X86::VBROADCASTSDZmk:
- case X86::VBROADCASTSSZ128mk:
- case X86::VBROADCASTSSZ256mk:
- case X86::VBROADCASTSSZmk:
- case X86::VPBROADCASTDZ128mk:
- case X86::VPBROADCASTDZ256mk:
- case X86::VPBROADCASTDZmk:
- case X86::VPBROADCASTQZ128mk:
- case X86::VPBROADCASTQZ256mk:
- case X86::VPBROADCASTQZmk: {
+ case X86::VBROADCASTSDZ256rmk:
+ case X86::VBROADCASTSDZrmk:
+ case X86::VBROADCASTSSZ128rmk:
+ case X86::VBROADCASTSSZ256rmk:
+ case X86::VBROADCASTSSZrmk:
+ case X86::VPBROADCASTDZ128rmk:
+ case X86::VPBROADCASTDZ256rmk:
+ case X86::VPBROADCASTDZrmk:
+ case X86::VPBROADCASTQZ128rmk:
+ case X86::VPBROADCASTQZ256rmk:
+ case X86::VPBROADCASTQZrmk: {
unsigned Opc;
switch (MIOpc) {
default: llvm_unreachable("Unreachable!");
- case X86::VMOVDQU8Z128rmk: Opc = X86::VPBLENDMBZ128rmk; break;
- case X86::VMOVDQU8Z256rmk: Opc = X86::VPBLENDMBZ256rmk; break;
- case X86::VMOVDQU8Zrmk: Opc = X86::VPBLENDMBZrmk; break;
- case X86::VMOVDQU16Z128rmk: Opc = X86::VPBLENDMWZ128rmk; break;
- case X86::VMOVDQU16Z256rmk: Opc = X86::VPBLENDMWZ256rmk; break;
- case X86::VMOVDQU16Zrmk: Opc = X86::VPBLENDMWZrmk; break;
- case X86::VMOVDQU32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
- case X86::VMOVDQU32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
- case X86::VMOVDQU32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
- case X86::VMOVDQU64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
- case X86::VMOVDQU64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
- case X86::VMOVDQU64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
- case X86::VMOVUPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
- case X86::VMOVUPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
- case X86::VMOVUPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
- case X86::VMOVUPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
- case X86::VMOVUPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
- case X86::VMOVUPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
- case X86::VMOVDQA32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
- case X86::VMOVDQA32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
- case X86::VMOVDQA32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
- case X86::VMOVDQA64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
- case X86::VMOVDQA64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
- case X86::VMOVDQA64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
- case X86::VMOVAPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
- case X86::VMOVAPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
- case X86::VMOVAPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
- case X86::VMOVAPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
- case X86::VMOVAPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
- case X86::VMOVAPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
- case X86::VBROADCASTSDZ256mk: Opc = X86::VBLENDMPDZ256rmbk; break;
- case X86::VBROADCASTSDZmk: Opc = X86::VBLENDMPDZrmbk; break;
- case X86::VBROADCASTSSZ128mk: Opc = X86::VBLENDMPSZ128rmbk; break;
- case X86::VBROADCASTSSZ256mk: Opc = X86::VBLENDMPSZ256rmbk; break;
- case X86::VBROADCASTSSZmk: Opc = X86::VBLENDMPSZrmbk; break;
- case X86::VPBROADCASTDZ128mk: Opc = X86::VPBLENDMDZ128rmbk; break;
- case X86::VPBROADCASTDZ256mk: Opc = X86::VPBLENDMDZ256rmbk; break;
- case X86::VPBROADCASTDZmk: Opc = X86::VPBLENDMDZrmbk; break;
- case X86::VPBROADCASTQZ128mk: Opc = X86::VPBLENDMQZ128rmbk; break;
- case X86::VPBROADCASTQZ256mk: Opc = X86::VPBLENDMQZ256rmbk; break;
- case X86::VPBROADCASTQZmk: Opc = X86::VPBLENDMQZrmbk; break;
+ case X86::VMOVDQU8Z128rmk: Opc = X86::VPBLENDMBZ128rmk; break;
+ case X86::VMOVDQU8Z256rmk: Opc = X86::VPBLENDMBZ256rmk; break;
+ case X86::VMOVDQU8Zrmk: Opc = X86::VPBLENDMBZrmk; break;
+ case X86::VMOVDQU16Z128rmk: Opc = X86::VPBLENDMWZ128rmk; break;
+ case X86::VMOVDQU16Z256rmk: Opc = X86::VPBLENDMWZ256rmk; break;
+ case X86::VMOVDQU16Zrmk: Opc = X86::VPBLENDMWZrmk; break;
+ case X86::VMOVDQU32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
+ case X86::VMOVDQU32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
+ case X86::VMOVDQU32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
+ case X86::VMOVDQU64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
+ case X86::VMOVDQU64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
+ case X86::VMOVDQU64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
+ case X86::VMOVUPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
+ case X86::VMOVUPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
+ case X86::VMOVUPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
+ case X86::VMOVUPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
+ case X86::VMOVUPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
+ case X86::VMOVUPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
+ case X86::VMOVDQA32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
+ case X86::VMOVDQA32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
+ case X86::VMOVDQA32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
+ case X86::VMOVDQA64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
+ case X86::VMOVDQA64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
+ case X86::VMOVDQA64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
+ case X86::VMOVAPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
+ case X86::VMOVAPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
+ case X86::VMOVAPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
+ case X86::VMOVAPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
+ case X86::VMOVAPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
+ case X86::VMOVAPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
+ case X86::VBROADCASTSDZ256rmk: Opc = X86::VBLENDMPDZ256rmbk; break;
+ case X86::VBROADCASTSDZrmk: Opc = X86::VBLENDMPDZrmbk; break;
+ case X86::VBROADCASTSSZ128rmk: Opc = X86::VBLENDMPSZ128rmbk; break;
+ case X86::VBROADCASTSSZ256rmk: Opc = X86::VBLENDMPSZ256rmbk; break;
+ case X86::VBROADCASTSSZrmk: Opc = X86::VBLENDMPSZrmbk; break;
+ case X86::VPBROADCASTDZ128rmk: Opc = X86::VPBLENDMDZ128rmbk; break;
+ case X86::VPBROADCASTDZ256rmk: Opc = X86::VPBLENDMDZ256rmbk; break;
+ case X86::VPBROADCASTDZrmk: Opc = X86::VPBLENDMDZrmbk; break;
+ case X86::VPBROADCASTQZ128rmk: Opc = X86::VPBLENDMQZ128rmbk; break;
+ case X86::VPBROADCASTQZ256rmk: Opc = X86::VPBLENDMQZ256rmbk; break;
+ case X86::VPBROADCASTQZrmk: Opc = X86::VPBLENDMQZrmbk; break;
}
NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc))
@@ -1883,7 +2367,7 @@ X86InstrInfo::findThreeSrcCommutedOpIndices(const MachineInstr &MI,
unsigned KMaskOp = -1U;
if (X86II::isKMasked(TSFlags)) {
// For k-zero-masked operations it is Ok to commute the first vector
- // operand.
+ // operand. Unless this is an intrinsic instruction.
// For regular k-masked operations a conservative choice is done as the
// elements of the first vector operand, for which the corresponding bit
// in the k-mask operand is set to 0, are copied to the result of the
@@ -1902,7 +2386,7 @@ X86InstrInfo::findThreeSrcCommutedOpIndices(const MachineInstr &MI,
// The operand with index = 1 is used as a source for those elements for
// which the corresponding bit in the k-mask is set to 0.
- if (X86II::isKMergeMasked(TSFlags))
+ if (X86II::isKMergeMasked(TSFlags) || IsIntrinsic)
FirstCommutableVecOp = 3;
LastCommutableVecOp++;
@@ -2379,17 +2863,6 @@ unsigned X86::getSwappedVCMPImm(unsigned Imm) {
return Imm;
}
-bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const {
- if (!MI.isTerminator()) return false;
-
- // Conditional branch is a special case.
- if (MI.isBranch() && !MI.isBarrier())
- return true;
- if (!MI.isPredicable())
- return true;
- return !isPredicated(MI);
-}
-
bool X86InstrInfo::isUnconditionalTailCall(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
case X86::TCRETURNdi:
@@ -2826,11 +3299,11 @@ unsigned X86InstrInfo::insertBranch(MachineBasicBlock &MBB,
return Count;
}
-bool X86InstrInfo::
-canInsertSelect(const MachineBasicBlock &MBB,
- ArrayRef<MachineOperand> Cond,
- unsigned TrueReg, unsigned FalseReg,
- int &CondCycles, int &TrueCycles, int &FalseCycles) const {
+bool X86InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
+ ArrayRef<MachineOperand> Cond,
+ Register DstReg, Register TrueReg,
+ Register FalseReg, int &CondCycles,
+ int &TrueCycles, int &FalseCycles) const {
// Not all subtargets have cmov instructions.
if (!Subtarget.hasCMov())
return false;
@@ -2865,9 +3338,9 @@ canInsertSelect(const MachineBasicBlock &MBB,
void X86InstrInfo::insertSelect(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DstReg,
- ArrayRef<MachineOperand> Cond, unsigned TrueReg,
- unsigned FalseReg) const {
+ const DebugLoc &DL, Register DstReg,
+ ArrayRef<MachineOperand> Cond, Register TrueReg,
+ Register FalseReg) const {
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
const TargetRegisterClass &RC = *MRI.getRegClass(DstReg);
@@ -3189,8 +3662,9 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
}
}
-bool X86InstrInfo::getMemOperandWithOffset(
- const MachineInstr &MemOp, const MachineOperand *&BaseOp, int64_t &Offset,
+bool X86InstrInfo::getMemOperandsWithOffsetWidth(
+ const MachineInstr &MemOp, SmallVectorImpl<const MachineOperand *> &BaseOps,
+ int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
const TargetRegisterInfo *TRI) const {
const MCInstrDesc &Desc = MemOp.getDesc();
int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags);
@@ -3199,7 +3673,8 @@ bool X86InstrInfo::getMemOperandWithOffset(
MemRefBegin += X86II::getOperandBias(Desc);
- BaseOp = &MemOp.getOperand(MemRefBegin + X86::AddrBaseReg);
+ const MachineOperand *BaseOp =
+ &MemOp.getOperand(MemRefBegin + X86::AddrBaseReg);
if (!BaseOp->isReg()) // Can be an MO_FrameIndex
return false;
@@ -3221,6 +3696,13 @@ bool X86InstrInfo::getMemOperandWithOffset(
if (!BaseOp->isReg())
return false;
+ OffsetIsScalable = false;
+ // FIXME: Relying on memoperands() may not be right thing to do here. Check
+ // with X86 maintainers, and fix it accordingly. For now, it is ok, since
+ // there is no use of `Width` for X86 back-end at the moment.
+ Width =
+ !MemOp.memoperands_empty() ? MemOp.memoperands().front()->getSize() : 0;
+ BaseOps.push_back(BaseOp);
return true;
}
@@ -3241,7 +3723,7 @@ static unsigned getLoadRegOpcode(unsigned DestReg,
void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- unsigned SrcReg, bool isKill, int FrameIdx,
+ Register SrcReg, bool isKill, int FrameIdx,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
const MachineFunction &MF = *MBB.getParent();
@@ -3249,7 +3731,7 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
"Stack slot too small for store");
unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16);
bool isAligned =
- (Subtarget.getFrameLowering()->getStackAlignment() >= Alignment) ||
+ (Subtarget.getFrameLowering()->getStackAlign() >= Alignment) ||
RI.canRealignStack(MF);
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget);
addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc)), FrameIdx)
@@ -3258,20 +3740,20 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- unsigned DestReg, int FrameIdx,
+ Register DestReg, int FrameIdx,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
const MachineFunction &MF = *MBB.getParent();
unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16);
bool isAligned =
- (Subtarget.getFrameLowering()->getStackAlignment() >= Alignment) ||
+ (Subtarget.getFrameLowering()->getStackAlign() >= Alignment) ||
RI.canRealignStack(MF);
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget);
addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg), FrameIdx);
}
-bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
- unsigned &SrcReg2, int &CmpMask,
+bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
+ Register &SrcReg2, int &CmpMask,
int &CmpValue) const {
switch (MI.getOpcode()) {
default: break;
@@ -3358,7 +3840,7 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
/// SrcReg, SrcRegs: register operands for FlagI.
/// ImmValue: immediate for FlagI if it takes an immediate.
inline static bool isRedundantFlagInstr(const MachineInstr &FlagI,
- unsigned SrcReg, unsigned SrcReg2,
+ Register SrcReg, Register SrcReg2,
int ImmMask, int ImmValue,
const MachineInstr &OI) {
if (((FlagI.getOpcode() == X86::CMP64rr && OI.getOpcode() == X86::SUB64rr) ||
@@ -3547,8 +4029,8 @@ static X86::CondCode isUseDefConvertible(const MachineInstr &MI) {
/// Check if there exists an earlier instruction that
/// operates on the same source operands and sets flags in the same way as
/// Compare; remove Compare if possible.
-bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
- unsigned SrcReg2, int CmpMask,
+bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
+ Register SrcReg2, int CmpMask,
int CmpValue,
const MachineRegisterInfo *MRI) const {
// Check whether we can replace SUB with CMP.
@@ -3875,15 +4357,15 @@ MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr &MI,
static bool Expand2AddrUndef(MachineInstrBuilder &MIB,
const MCInstrDesc &Desc) {
assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction.");
- Register Reg = MIB->getOperand(0).getReg();
+ Register Reg = MIB.getReg(0);
MIB->setDesc(Desc);
// MachineInstr::addOperand() will insert explicit operands before any
// implicit operands.
MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef);
// But we don't trust that.
- assert(MIB->getOperand(1).getReg() == Reg &&
- MIB->getOperand(2).getReg() == Reg && "Misplaced operand");
+ assert(MIB.getReg(1) == Reg &&
+ MIB.getReg(2) == Reg && "Misplaced operand");
return true;
}
@@ -3905,7 +4387,7 @@ static bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII,
bool MinusOne) {
MachineBasicBlock &MBB = *MIB->getParent();
DebugLoc DL = MIB->getDebugLoc();
- Register Reg = MIB->getOperand(0).getReg();
+ Register Reg = MIB.getReg(0);
// Insert the XOR.
BuildMI(MBB, MIB.getInstr(), DL, TII.get(X86::XOR32rr), Reg)
@@ -3949,13 +4431,15 @@ static bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB,
BuildMI(MBB, I, DL, TII.get(X86::PUSH64i8)).addImm(Imm);
MIB->setDesc(TII.get(X86::POP64r));
MIB->getOperand(0)
- .setReg(getX86SubSuperRegister(MIB->getOperand(0).getReg(), 64));
+ .setReg(getX86SubSuperRegister(MIB.getReg(0), 64));
} else {
assert(MIB->getOpcode() == X86::MOV32ImmSExti8);
StackAdjustment = 4;
BuildMI(MBB, I, DL, TII.get(X86::PUSH32i8)).addImm(Imm);
MIB->setDesc(TII.get(X86::POP32r));
}
+ MIB->RemoveOperand(1);
+ MIB->addImplicitDefUseOperands(*MBB.getParent());
// Build CFI if necessary.
MachineFunction &MF = *MBB.getParent();
@@ -3979,14 +4463,14 @@ static void expandLoadStackGuard(MachineInstrBuilder &MIB,
const TargetInstrInfo &TII) {
MachineBasicBlock &MBB = *MIB->getParent();
DebugLoc DL = MIB->getDebugLoc();
- Register Reg = MIB->getOperand(0).getReg();
+ Register Reg = MIB.getReg(0);
const GlobalValue *GV =
cast<GlobalValue>((*MIB->memoperands_begin())->getValue());
auto Flags = MachineMemOperand::MOLoad |
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant;
MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
- MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 8, 8);
+ MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 8, Align(8));
MachineBasicBlock::iterator I = MIB.getInstr();
BuildMI(MBB, I, DL, TII.get(X86::MOV64rm), Reg).addReg(X86::RIP).addImm(1)
@@ -4017,7 +4501,7 @@ static bool expandNOVLXLoad(MachineInstrBuilder &MIB,
const MCInstrDesc &LoadDesc,
const MCInstrDesc &BroadcastDesc,
unsigned SubIdx) {
- Register DestReg = MIB->getOperand(0).getReg();
+ Register DestReg = MIB.getReg(0);
// Check if DestReg is XMM16-31 or YMM16-31.
if (TRI->getEncodingValue(DestReg) < 16) {
// We can use a normal VEX encoded load.
@@ -4040,7 +4524,7 @@ static bool expandNOVLXStore(MachineInstrBuilder &MIB,
const MCInstrDesc &StoreDesc,
const MCInstrDesc &ExtractDesc,
unsigned SubIdx) {
- Register SrcReg = MIB->getOperand(X86::AddrNumOperands).getReg();
+ Register SrcReg = MIB.getReg(X86::AddrNumOperands);
// Check if DestReg is XMM16-31 or YMM16-31.
if (TRI->getEncodingValue(SrcReg) < 16) {
// We can use a normal VEX encoded store.
@@ -4063,7 +4547,7 @@ static bool expandSHXDROT(MachineInstrBuilder &MIB, const MCInstrDesc &Desc) {
// Temporarily remove the immediate so we can add another source register.
MIB->RemoveOperand(2);
// Add the register. Don't copy the kill flag if there is one.
- MIB.addReg(MIB->getOperand(1).getReg(),
+ MIB.addReg(MIB.getReg(1),
getUndefRegState(MIB->getOperand(1).isUndef()));
// Add back the immediate.
MIB.addImm(ShiftAmt);
@@ -4083,10 +4567,6 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case X86::MOV32ImmSExti8:
case X86::MOV64ImmSExti8:
return ExpandMOVImmSExti8(MIB, *this, Subtarget);
- case X86::SETB_C8r:
- return Expand2AddrUndef(MIB, get(X86::SBB8rr));
- case X86::SETB_C16r:
- return Expand2AddrUndef(MIB, get(X86::SBB16rr));
case X86::SETB_C32r:
return Expand2AddrUndef(MIB, get(X86::SBB32rr));
case X86::SETB_C64r:
@@ -4101,7 +4581,7 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case X86::AVX_SET0: {
assert(HasAVX && "AVX not supported");
const TargetRegisterInfo *TRI = &getRegisterInfo();
- Register SrcReg = MIB->getOperand(0).getReg();
+ Register SrcReg = MIB.getReg(0);
Register XReg = TRI->getSubReg(SrcReg, X86::sub_xmm);
MIB->getOperand(0).setReg(XReg);
Expand2AddrUndef(MIB, get(X86::VXORPSrr));
@@ -4113,7 +4593,7 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case X86::AVX512_FsFLD0SD:
case X86::AVX512_FsFLD0F128: {
bool HasVLX = Subtarget.hasVLX();
- Register SrcReg = MIB->getOperand(0).getReg();
+ Register SrcReg = MIB.getReg(0);
const TargetRegisterInfo *TRI = &getRegisterInfo();
if (HasVLX || TRI->getEncodingValue(SrcReg) < 16)
return Expand2AddrUndef(MIB,
@@ -4127,7 +4607,7 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case X86::AVX512_256_SET0:
case X86::AVX512_512_SET0: {
bool HasVLX = Subtarget.hasVLX();
- Register SrcReg = MIB->getOperand(0).getReg();
+ Register SrcReg = MIB.getReg(0);
const TargetRegisterInfo *TRI = &getRegisterInfo();
if (HasVLX || TRI->getEncodingValue(SrcReg) < 16) {
Register XReg = TRI->getSubReg(SrcReg, X86::sub_xmm);
@@ -4150,14 +4630,14 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case X86::AVX2_SETALLONES:
return Expand2AddrUndef(MIB, get(X86::VPCMPEQDYrr));
case X86::AVX1_SETALLONES: {
- Register Reg = MIB->getOperand(0).getReg();
+ Register Reg = MIB.getReg(0);
// VCMPPSYrri with an immediate 0xf should produce VCMPTRUEPS.
MIB->setDesc(get(X86::VCMPPSYrri));
MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef).addImm(0xf);
return true;
}
case X86::AVX512_512_SETALLONES: {
- Register Reg = MIB->getOperand(0).getReg();
+ Register Reg = MIB.getReg(0);
MIB->setDesc(get(X86::VPTERNLOGDZrri));
// VPTERNLOGD needs 3 register inputs and an immediate.
// 0xff will return 1s for any input.
@@ -4167,8 +4647,8 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
}
case X86::AVX512_512_SEXT_MASK_32:
case X86::AVX512_512_SEXT_MASK_64: {
- Register Reg = MIB->getOperand(0).getReg();
- Register MaskReg = MIB->getOperand(1).getReg();
+ Register Reg = MIB.getReg(0);
+ Register MaskReg = MIB.getReg(1);
unsigned MaskState = getRegState(MIB->getOperand(1));
unsigned Opc = (MI.getOpcode() == X86::AVX512_512_SEXT_MASK_64) ?
X86::VPTERNLOGQZrrikz : X86::VPTERNLOGDZrrikz;
@@ -4205,7 +4685,7 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSYmr),
get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm);
case X86::MOV32ri64: {
- Register Reg = MIB->getOperand(0).getReg();
+ Register Reg = MIB.getReg(0);
Register Reg32 = RI.getSubReg(Reg, X86::sub_32bit);
MI.setDesc(get(X86::MOV32ri));
MIB->getOperand(0).setReg(Reg32);
@@ -4358,11 +4838,105 @@ unsigned X86InstrInfo::getPartialRegUpdateClearance(
// Return true for any instruction the copies the high bits of the first source
// operand into the unused high bits of the destination operand.
-static bool hasUndefRegUpdate(unsigned Opcode, unsigned &OpNum,
+// Also returns true for instructions that have two inputs where one may
+// be undef and we want it to use the same register as the other input.
+static bool hasUndefRegUpdate(unsigned Opcode, unsigned OpNum,
bool ForLoadFold = false) {
// Set the OpNum parameter to the first source operand.
- OpNum = 1;
switch (Opcode) {
+ case X86::MMX_PUNPCKHBWirr:
+ case X86::MMX_PUNPCKHWDirr:
+ case X86::MMX_PUNPCKHDQirr:
+ case X86::MMX_PUNPCKLBWirr:
+ case X86::MMX_PUNPCKLWDirr:
+ case X86::MMX_PUNPCKLDQirr:
+ case X86::MOVHLPSrr:
+ case X86::PACKSSWBrr:
+ case X86::PACKUSWBrr:
+ case X86::PACKSSDWrr:
+ case X86::PACKUSDWrr:
+ case X86::PUNPCKHBWrr:
+ case X86::PUNPCKLBWrr:
+ case X86::PUNPCKHWDrr:
+ case X86::PUNPCKLWDrr:
+ case X86::PUNPCKHDQrr:
+ case X86::PUNPCKLDQrr:
+ case X86::PUNPCKHQDQrr:
+ case X86::PUNPCKLQDQrr:
+ case X86::SHUFPDrri:
+ case X86::SHUFPSrri:
+ // These instructions are sometimes used with an undef first or second
+ // source. Return true here so BreakFalseDeps will assign this source to the
+ // same register as the first source to avoid a false dependency.
+ // Operand 1 of these instructions is tied so they're separate from their
+ // VEX counterparts.
+ return OpNum == 2 && !ForLoadFold;
+
+ case X86::VMOVLHPSrr:
+ case X86::VMOVLHPSZrr:
+ case X86::VPACKSSWBrr:
+ case X86::VPACKUSWBrr:
+ case X86::VPACKSSDWrr:
+ case X86::VPACKUSDWrr:
+ case X86::VPACKSSWBZ128rr:
+ case X86::VPACKUSWBZ128rr:
+ case X86::VPACKSSDWZ128rr:
+ case X86::VPACKUSDWZ128rr:
+ case X86::VPERM2F128rr:
+ case X86::VPERM2I128rr:
+ case X86::VSHUFF32X4Z256rri:
+ case X86::VSHUFF32X4Zrri:
+ case X86::VSHUFF64X2Z256rri:
+ case X86::VSHUFF64X2Zrri:
+ case X86::VSHUFI32X4Z256rri:
+ case X86::VSHUFI32X4Zrri:
+ case X86::VSHUFI64X2Z256rri:
+ case X86::VSHUFI64X2Zrri:
+ case X86::VPUNPCKHBWrr:
+ case X86::VPUNPCKLBWrr:
+ case X86::VPUNPCKHBWYrr:
+ case X86::VPUNPCKLBWYrr:
+ case X86::VPUNPCKHBWZ128rr:
+ case X86::VPUNPCKLBWZ128rr:
+ case X86::VPUNPCKHBWZ256rr:
+ case X86::VPUNPCKLBWZ256rr:
+ case X86::VPUNPCKHBWZrr:
+ case X86::VPUNPCKLBWZrr:
+ case X86::VPUNPCKHWDrr:
+ case X86::VPUNPCKLWDrr:
+ case X86::VPUNPCKHWDYrr:
+ case X86::VPUNPCKLWDYrr:
+ case X86::VPUNPCKHWDZ128rr:
+ case X86::VPUNPCKLWDZ128rr:
+ case X86::VPUNPCKHWDZ256rr:
+ case X86::VPUNPCKLWDZ256rr:
+ case X86::VPUNPCKHWDZrr:
+ case X86::VPUNPCKLWDZrr:
+ case X86::VPUNPCKHDQrr:
+ case X86::VPUNPCKLDQrr:
+ case X86::VPUNPCKHDQYrr:
+ case X86::VPUNPCKLDQYrr:
+ case X86::VPUNPCKHDQZ128rr:
+ case X86::VPUNPCKLDQZ128rr:
+ case X86::VPUNPCKHDQZ256rr:
+ case X86::VPUNPCKLDQZ256rr:
+ case X86::VPUNPCKHDQZrr:
+ case X86::VPUNPCKLDQZrr:
+ case X86::VPUNPCKHQDQrr:
+ case X86::VPUNPCKLQDQrr:
+ case X86::VPUNPCKHQDQYrr:
+ case X86::VPUNPCKLQDQYrr:
+ case X86::VPUNPCKHQDQZ128rr:
+ case X86::VPUNPCKLQDQZ128rr:
+ case X86::VPUNPCKHQDQZ256rr:
+ case X86::VPUNPCKLQDQZ256rr:
+ case X86::VPUNPCKHQDQZrr:
+ case X86::VPUNPCKLQDQZrr:
+ // These instructions are sometimes used with an undef first or second
+ // source. Return true here so BreakFalseDeps will assign this source to the
+ // same register as the first source to avoid a false dependency.
+ return (OpNum == 1 || OpNum == 2) && !ForLoadFold;
+
case X86::VCVTSI2SSrr:
case X86::VCVTSI2SSrm:
case X86::VCVTSI2SSrr_Int:
@@ -4420,7 +4994,7 @@ static bool hasUndefRegUpdate(unsigned Opcode, unsigned &OpNum,
case X86::VCVTUSI642SDZrm_Int:
// Load folding won't effect the undef register update since the input is
// a GPR.
- return !ForLoadFold;
+ return OpNum == 1 && !ForLoadFold;
case X86::VCVTSD2SSrr:
case X86::VCVTSD2SSrm:
case X86::VCVTSD2SSrr_Int:
@@ -4519,15 +5093,13 @@ static bool hasUndefRegUpdate(unsigned Opcode, unsigned &OpNum,
case X86::VSQRTSDZrb_Int:
case X86::VSQRTSDZm:
case X86::VSQRTSDZm_Int:
- return true;
+ return OpNum == 1;
case X86::VMOVSSZrrk:
case X86::VMOVSDZrrk:
- OpNum = 3;
- return true;
+ return OpNum == 3 && !ForLoadFold;
case X86::VMOVSSZrrkz:
case X86::VMOVSDZrrkz:
- OpNum = 2;
- return true;
+ return OpNum == 2 && !ForLoadFold;
}
return false;
@@ -4550,13 +5122,17 @@ static bool hasUndefRegUpdate(unsigned Opcode, unsigned &OpNum,
unsigned
X86InstrInfo::getUndefRegClearance(const MachineInstr &MI, unsigned &OpNum,
const TargetRegisterInfo *TRI) const {
- if (!hasUndefRegUpdate(MI.getOpcode(), OpNum))
- return 0;
-
- const MachineOperand &MO = MI.getOperand(OpNum);
- if (MO.isUndef() && Register::isPhysicalRegister(MO.getReg())) {
- return UndefRegClearance;
+ for (unsigned i = MI.getNumExplicitDefs(), e = MI.getNumExplicitOperands();
+ i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (MO.isReg() && MO.isUndef() &&
+ Register::isPhysicalRegister(MO.getReg()) &&
+ hasUndefRegUpdate(MI.getOpcode(), i)) {
+ OpNum = i;
+ return UndefRegClearance;
+ }
}
+
return 0;
}
@@ -4727,7 +5303,7 @@ static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode,
MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
MachineFunction &MF, MachineInstr &MI, unsigned OpNum,
ArrayRef<MachineOperand> MOs, MachineBasicBlock::iterator InsertPt,
- unsigned Size, unsigned Align) const {
+ unsigned Size, Align Alignment) const {
switch (MI.getOpcode()) {
case X86::INSERTPSrr:
case X86::VINSERTPSrr:
@@ -4743,7 +5319,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF);
unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
- if ((Size == 0 || Size >= 16) && RCSize >= 16 && 4 <= Align) {
+ if ((Size == 0 || Size >= 16) && RCSize >= 16 && Alignment >= Align(4)) {
int PtrOffset = SrcIdx * 4;
unsigned NewImm = (DstIdx << 4) | ZMask;
unsigned NewOpCode =
@@ -4767,7 +5343,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF);
unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
- if ((Size == 0 || Size >= 16) && RCSize >= 16 && 8 <= Align) {
+ if ((Size == 0 || Size >= 16) && RCSize >= 16 && Alignment >= Align(8)) {
unsigned NewOpCode =
(MI.getOpcode() == X86::VMOVHLPSZrr) ? X86::VMOVLPSZ128rm :
(MI.getOpcode() == X86::VMOVHLPSrr) ? X86::VMOVLPSrm :
@@ -4786,7 +5362,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF);
unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
- if ((Size == 0 || Size >= 16) && RCSize >= 16 && Align < 16) {
+ if ((Size == 0 || Size >= 16) && RCSize >= 16 && Alignment < Align(16)) {
MachineInstr *NewMI =
FuseInst(MF, X86::MOVHPDrm, OpNum, MOs, InsertPt, MI, *this);
return NewMI;
@@ -4800,8 +5376,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
static bool shouldPreventUndefRegUpdateMemFold(MachineFunction &MF,
MachineInstr &MI) {
- unsigned Ignored;
- if (!hasUndefRegUpdate(MI.getOpcode(), Ignored, /*ForLoadFold*/true) ||
+ if (!hasUndefRegUpdate(MI.getOpcode(), 1, /*ForLoadFold*/true) ||
!MI.getOperand(1).isReg())
return false;
@@ -4818,11 +5393,10 @@ static bool shouldPreventUndefRegUpdateMemFold(MachineFunction &MF,
return VRegDef && VRegDef->isImplicitDef();
}
-
MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
MachineFunction &MF, MachineInstr &MI, unsigned OpNum,
ArrayRef<MachineOperand> MOs, MachineBasicBlock::iterator InsertPt,
- unsigned Size, unsigned Align, bool AllowCommute) const {
+ unsigned Size, Align Alignment, bool AllowCommute) const {
bool isSlowTwoMemOps = Subtarget.slowTwoMemOps();
bool isTwoAddrFold = false;
@@ -4862,8 +5436,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
MachineInstr *NewMI = nullptr;
// Attempt to fold any custom cases we have.
- if (MachineInstr *CustomMI =
- foldMemoryOperandCustom(MF, MI, OpNum, MOs, InsertPt, Size, Align))
+ if (MachineInstr *CustomMI = foldMemoryOperandCustom(
+ MF, MI, OpNum, MOs, InsertPt, Size, Alignment))
return CustomMI;
const X86MemoryFoldTableEntry *I = nullptr;
@@ -4890,9 +5464,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
if (I != nullptr) {
unsigned Opcode = I->DstOp;
- unsigned MinAlign = (I->Flags & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT;
- MinAlign = MinAlign ? 1 << (MinAlign - 1) : 0;
- if (Align < MinAlign)
+ MaybeAlign MinAlign =
+ decodeMaybeAlign((I->Flags & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT);
+ if (MinAlign && Alignment < *MinAlign)
return nullptr;
bool NarrowToMOV32rm = false;
if (Size) {
@@ -4967,8 +5541,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
}
// Attempt to fold with the commuted version of the instruction.
- NewMI = foldMemoryOperandImpl(MF, MI, CommuteOpIdx2, MOs, InsertPt,
- Size, Align, /*AllowCommute=*/false);
+ NewMI = foldMemoryOperandImpl(MF, MI, CommuteOpIdx2, MOs, InsertPt, Size,
+ Alignment, /*AllowCommute=*/false);
if (NewMI)
return NewMI;
@@ -5022,12 +5596,12 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
const MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned Size = MFI.getObjectSize(FrameIndex);
- unsigned Alignment = MFI.getObjectAlignment(FrameIndex);
+ Align Alignment = MFI.getObjectAlign(FrameIndex);
// If the function stack isn't realigned we don't want to fold instructions
// that need increased alignment.
if (!RI.needsStackRealignment(MF))
Alignment =
- std::min(Alignment, Subtarget.getFrameLowering()->getStackAlignment());
+ std::min(Alignment, Subtarget.getFrameLowering()->getStackAlign());
if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
unsigned NewOpc = 0;
unsigned RCSize = 0;
@@ -5085,12 +5659,31 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
// destination register is wider than 32 bits (4 bytes), and its user
// instruction isn't scalar (SS).
switch (UserOpc) {
+ case X86::CVTSS2SDrr_Int:
+ case X86::VCVTSS2SDrr_Int:
+ case X86::VCVTSS2SDZrr_Int:
+ case X86::VCVTSS2SDZrr_Intk:
+ case X86::VCVTSS2SDZrr_Intkz:
+ case X86::CVTSS2SIrr_Int: case X86::CVTSS2SI64rr_Int:
+ case X86::VCVTSS2SIrr_Int: case X86::VCVTSS2SI64rr_Int:
+ case X86::VCVTSS2SIZrr_Int: case X86::VCVTSS2SI64Zrr_Int:
+ case X86::CVTTSS2SIrr_Int: case X86::CVTTSS2SI64rr_Int:
+ case X86::VCVTTSS2SIrr_Int: case X86::VCVTTSS2SI64rr_Int:
+ case X86::VCVTTSS2SIZrr_Int: case X86::VCVTTSS2SI64Zrr_Int:
+ case X86::VCVTSS2USIZrr_Int: case X86::VCVTSS2USI64Zrr_Int:
+ case X86::VCVTTSS2USIZrr_Int: case X86::VCVTTSS2USI64Zrr_Int:
+ case X86::RCPSSr_Int: case X86::VRCPSSr_Int:
+ case X86::RSQRTSSr_Int: case X86::VRSQRTSSr_Int:
+ case X86::ROUNDSSr_Int: case X86::VROUNDSSr_Int:
+ case X86::COMISSrr_Int: case X86::VCOMISSrr_Int: case X86::VCOMISSZrr_Int:
+ case X86::UCOMISSrr_Int:case X86::VUCOMISSrr_Int:case X86::VUCOMISSZrr_Int:
case X86::ADDSSrr_Int: case X86::VADDSSrr_Int: case X86::VADDSSZrr_Int:
case X86::CMPSSrr_Int: case X86::VCMPSSrr_Int: case X86::VCMPSSZrr_Int:
case X86::DIVSSrr_Int: case X86::VDIVSSrr_Int: case X86::VDIVSSZrr_Int:
case X86::MAXSSrr_Int: case X86::VMAXSSrr_Int: case X86::VMAXSSZrr_Int:
case X86::MINSSrr_Int: case X86::VMINSSrr_Int: case X86::VMINSSZrr_Int:
case X86::MULSSrr_Int: case X86::VMULSSrr_Int: case X86::VMULSSZrr_Int:
+ case X86::SQRTSSr_Int: case X86::VSQRTSSr_Int: case X86::VSQRTSSZr_Int:
case X86::SUBSSrr_Int: case X86::VSUBSSrr_Int: case X86::VSUBSSZrr_Int:
case X86::VADDSSZrr_Intk: case X86::VADDSSZrr_Intkz:
case X86::VCMPSSZrr_Intk:
@@ -5098,6 +5691,7 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::VMAXSSZrr_Intk: case X86::VMAXSSZrr_Intkz:
case X86::VMINSSZrr_Intk: case X86::VMINSSZrr_Intkz:
case X86::VMULSSZrr_Intk: case X86::VMULSSZrr_Intkz:
+ case X86::VSQRTSSZr_Intk: case X86::VSQRTSSZr_Intkz:
case X86::VSUBSSZrr_Intk: case X86::VSUBSSZrr_Intkz:
case X86::VFMADDSS4rr_Int: case X86::VFNMADDSS4rr_Int:
case X86::VFMSUBSS4rr_Int: case X86::VFNMSUBSS4rr_Int:
@@ -5125,6 +5719,41 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::VFMSUB132SSZr_Intkz: case X86::VFNMSUB132SSZr_Intkz:
case X86::VFMSUB213SSZr_Intkz: case X86::VFNMSUB213SSZr_Intkz:
case X86::VFMSUB231SSZr_Intkz: case X86::VFNMSUB231SSZr_Intkz:
+ case X86::VFIXUPIMMSSZrri:
+ case X86::VFIXUPIMMSSZrrik:
+ case X86::VFIXUPIMMSSZrrikz:
+ case X86::VFPCLASSSSZrr:
+ case X86::VFPCLASSSSZrrk:
+ case X86::VGETEXPSSZr:
+ case X86::VGETEXPSSZrk:
+ case X86::VGETEXPSSZrkz:
+ case X86::VGETMANTSSZrri:
+ case X86::VGETMANTSSZrrik:
+ case X86::VGETMANTSSZrrikz:
+ case X86::VRANGESSZrri:
+ case X86::VRANGESSZrrik:
+ case X86::VRANGESSZrrikz:
+ case X86::VRCP14SSZrr:
+ case X86::VRCP14SSZrrk:
+ case X86::VRCP14SSZrrkz:
+ case X86::VRCP28SSZr:
+ case X86::VRCP28SSZrk:
+ case X86::VRCP28SSZrkz:
+ case X86::VREDUCESSZrri:
+ case X86::VREDUCESSZrrik:
+ case X86::VREDUCESSZrrikz:
+ case X86::VRNDSCALESSZr_Int:
+ case X86::VRNDSCALESSZr_Intk:
+ case X86::VRNDSCALESSZr_Intkz:
+ case X86::VRSQRT14SSZrr:
+ case X86::VRSQRT14SSZrrk:
+ case X86::VRSQRT14SSZrrkz:
+ case X86::VRSQRT28SSZr:
+ case X86::VRSQRT28SSZrk:
+ case X86::VRSQRT28SSZrkz:
+ case X86::VSCALEFSSZrr:
+ case X86::VSCALEFSSZrrk:
+ case X86::VSCALEFSSZrrkz:
return false;
default:
return true;
@@ -5139,12 +5768,29 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
// destination register is wider than 64 bits (8 bytes), and its user
// instruction isn't scalar (SD).
switch (UserOpc) {
+ case X86::CVTSD2SSrr_Int:
+ case X86::VCVTSD2SSrr_Int:
+ case X86::VCVTSD2SSZrr_Int:
+ case X86::VCVTSD2SSZrr_Intk:
+ case X86::VCVTSD2SSZrr_Intkz:
+ case X86::CVTSD2SIrr_Int: case X86::CVTSD2SI64rr_Int:
+ case X86::VCVTSD2SIrr_Int: case X86::VCVTSD2SI64rr_Int:
+ case X86::VCVTSD2SIZrr_Int: case X86::VCVTSD2SI64Zrr_Int:
+ case X86::CVTTSD2SIrr_Int: case X86::CVTTSD2SI64rr_Int:
+ case X86::VCVTTSD2SIrr_Int: case X86::VCVTTSD2SI64rr_Int:
+ case X86::VCVTTSD2SIZrr_Int: case X86::VCVTTSD2SI64Zrr_Int:
+ case X86::VCVTSD2USIZrr_Int: case X86::VCVTSD2USI64Zrr_Int:
+ case X86::VCVTTSD2USIZrr_Int: case X86::VCVTTSD2USI64Zrr_Int:
+ case X86::ROUNDSDr_Int: case X86::VROUNDSDr_Int:
+ case X86::COMISDrr_Int: case X86::VCOMISDrr_Int: case X86::VCOMISDZrr_Int:
+ case X86::UCOMISDrr_Int:case X86::VUCOMISDrr_Int:case X86::VUCOMISDZrr_Int:
case X86::ADDSDrr_Int: case X86::VADDSDrr_Int: case X86::VADDSDZrr_Int:
case X86::CMPSDrr_Int: case X86::VCMPSDrr_Int: case X86::VCMPSDZrr_Int:
case X86::DIVSDrr_Int: case X86::VDIVSDrr_Int: case X86::VDIVSDZrr_Int:
case X86::MAXSDrr_Int: case X86::VMAXSDrr_Int: case X86::VMAXSDZrr_Int:
case X86::MINSDrr_Int: case X86::VMINSDrr_Int: case X86::VMINSDZrr_Int:
case X86::MULSDrr_Int: case X86::VMULSDrr_Int: case X86::VMULSDZrr_Int:
+ case X86::SQRTSDr_Int: case X86::VSQRTSDr_Int: case X86::VSQRTSDZr_Int:
case X86::SUBSDrr_Int: case X86::VSUBSDrr_Int: case X86::VSUBSDZrr_Int:
case X86::VADDSDZrr_Intk: case X86::VADDSDZrr_Intkz:
case X86::VCMPSDZrr_Intk:
@@ -5152,6 +5798,7 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::VMAXSDZrr_Intk: case X86::VMAXSDZrr_Intkz:
case X86::VMINSDZrr_Intk: case X86::VMINSDZrr_Intkz:
case X86::VMULSDZrr_Intk: case X86::VMULSDZrr_Intkz:
+ case X86::VSQRTSDZr_Intk: case X86::VSQRTSDZr_Intkz:
case X86::VSUBSDZrr_Intk: case X86::VSUBSDZrr_Intkz:
case X86::VFMADDSD4rr_Int: case X86::VFNMADDSD4rr_Int:
case X86::VFMSUBSD4rr_Int: case X86::VFNMSUBSD4rr_Int:
@@ -5179,6 +5826,41 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::VFMSUB132SDZr_Intkz: case X86::VFNMSUB132SDZr_Intkz:
case X86::VFMSUB213SDZr_Intkz: case X86::VFNMSUB213SDZr_Intkz:
case X86::VFMSUB231SDZr_Intkz: case X86::VFNMSUB231SDZr_Intkz:
+ case X86::VFIXUPIMMSDZrri:
+ case X86::VFIXUPIMMSDZrrik:
+ case X86::VFIXUPIMMSDZrrikz:
+ case X86::VFPCLASSSDZrr:
+ case X86::VFPCLASSSDZrrk:
+ case X86::VGETEXPSDZr:
+ case X86::VGETEXPSDZrk:
+ case X86::VGETEXPSDZrkz:
+ case X86::VGETMANTSDZrri:
+ case X86::VGETMANTSDZrrik:
+ case X86::VGETMANTSDZrrikz:
+ case X86::VRANGESDZrri:
+ case X86::VRANGESDZrrik:
+ case X86::VRANGESDZrrikz:
+ case X86::VRCP14SDZrr:
+ case X86::VRCP14SDZrrk:
+ case X86::VRCP14SDZrrkz:
+ case X86::VRCP28SDZr:
+ case X86::VRCP28SDZrk:
+ case X86::VRCP28SDZrkz:
+ case X86::VREDUCESDZrri:
+ case X86::VREDUCESDZrrik:
+ case X86::VREDUCESDZrrikz:
+ case X86::VRNDSCALESDZr_Int:
+ case X86::VRNDSCALESDZr_Intk:
+ case X86::VRNDSCALESDZr_Intkz:
+ case X86::VRSQRT14SDZrr:
+ case X86::VRSQRT14SDZrrk:
+ case X86::VRSQRT14SDZrrkz:
+ case X86::VRSQRT28SDZr:
+ case X86::VRSQRT28SDZrk:
+ case X86::VRSQRT28SDZrkz:
+ case X86::VSCALEFSDZrr:
+ case X86::VSCALEFSDZrrk:
+ case X86::VSCALEFSDZrrkz:
return false;
default:
return true;
@@ -5219,36 +5901,36 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
return nullptr;
// Determine the alignment of the load.
- unsigned Alignment = 0;
+ Align Alignment;
if (LoadMI.hasOneMemOperand())
- Alignment = (*LoadMI.memoperands_begin())->getAlignment();
+ Alignment = (*LoadMI.memoperands_begin())->getAlign();
else
switch (LoadMI.getOpcode()) {
case X86::AVX512_512_SET0:
case X86::AVX512_512_SETALLONES:
- Alignment = 64;
+ Alignment = Align(64);
break;
case X86::AVX2_SETALLONES:
case X86::AVX1_SETALLONES:
case X86::AVX_SET0:
case X86::AVX512_256_SET0:
- Alignment = 32;
+ Alignment = Align(32);
break;
case X86::V_SET0:
case X86::V_SETALLONES:
case X86::AVX512_128_SET0:
case X86::FsFLD0F128:
case X86::AVX512_FsFLD0F128:
- Alignment = 16;
+ Alignment = Align(16);
break;
case X86::MMX_SET0:
case X86::FsFLD0SD:
case X86::AVX512_FsFLD0SD:
- Alignment = 8;
+ Alignment = Align(8);
break;
case X86::FsFLD0SS:
case X86::AVX512_FsFLD0SS:
- Alignment = 4;
+ Alignment = Align(4);
break;
default:
return nullptr;
@@ -5323,14 +6005,18 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
else if (Opc == X86::FsFLD0F128 || Opc == X86::AVX512_FsFLD0F128)
Ty = Type::getFP128Ty(MF.getFunction().getContext());
else if (Opc == X86::AVX512_512_SET0 || Opc == X86::AVX512_512_SETALLONES)
- Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),16);
+ Ty = FixedVectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),
+ 16);
else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX_SET0 ||
Opc == X86::AVX512_256_SET0 || Opc == X86::AVX1_SETALLONES)
- Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()), 8);
+ Ty = FixedVectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),
+ 8);
else if (Opc == X86::MMX_SET0)
- Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()), 2);
+ Ty = FixedVectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),
+ 2);
else
- Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()), 4);
+ Ty = FixedVectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),
+ 4);
bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX2_SETALLONES ||
Opc == X86::AVX512_512_SETALLONES ||
@@ -5416,33 +6102,33 @@ static unsigned getBroadcastOpcode(const X86MemoryFoldTableEntry *I,
case TB_BCAST_D:
switch (SpillSize) {
default: llvm_unreachable("Unknown spill size");
- case 16: return X86::VPBROADCASTDZ128m;
- case 32: return X86::VPBROADCASTDZ256m;
- case 64: return X86::VPBROADCASTDZm;
+ case 16: return X86::VPBROADCASTDZ128rm;
+ case 32: return X86::VPBROADCASTDZ256rm;
+ case 64: return X86::VPBROADCASTDZrm;
}
break;
case TB_BCAST_Q:
switch (SpillSize) {
default: llvm_unreachable("Unknown spill size");
- case 16: return X86::VPBROADCASTQZ128m;
- case 32: return X86::VPBROADCASTQZ256m;
- case 64: return X86::VPBROADCASTQZm;
+ case 16: return X86::VPBROADCASTQZ128rm;
+ case 32: return X86::VPBROADCASTQZ256rm;
+ case 64: return X86::VPBROADCASTQZrm;
}
break;
case TB_BCAST_SS:
switch (SpillSize) {
default: llvm_unreachable("Unknown spill size");
- case 16: return X86::VBROADCASTSSZ128m;
- case 32: return X86::VBROADCASTSSZ256m;
- case 64: return X86::VBROADCASTSSZm;
+ case 16: return X86::VBROADCASTSSZ128rm;
+ case 32: return X86::VBROADCASTSSZ256rm;
+ case 64: return X86::VBROADCASTSSZrm;
}
break;
case TB_BCAST_SD:
switch (SpillSize) {
default: llvm_unreachable("Unknown spill size");
case 16: return X86::VMOVDDUPZ128rm;
- case 32: return X86::VBROADCASTSDZ256m;
- case 64: return X86::VBROADCASTSDZm;
+ case 32: return X86::VBROADCASTSDZ256rm;
+ case 64: return X86::VBROADCASTSDZrm;
}
break;
}
@@ -5502,7 +6188,7 @@ bool X86InstrInfo::unfoldMemoryOperand(
Opc = getBroadcastOpcode(I, RC, Subtarget);
} else {
unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
- bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
+ bool isAligned = !MMOs.empty() && MMOs.front()->getAlign() >= Alignment;
Opc = getLoadRegOpcode(Reg, RC, isAligned, Subtarget);
}
@@ -5579,7 +6265,7 @@ bool X86InstrInfo::unfoldMemoryOperand(
const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI, MF);
auto MMOs = extractStoreMMOs(MI.memoperands(), MF);
unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*DstRC), 16);
- bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
+ bool isAligned = !MMOs.empty() && MMOs.front()->getAlign() >= Alignment;
unsigned Opc = getStoreRegOpcode(Reg, DstRC, isAligned, Subtarget);
DebugLoc DL;
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
@@ -5646,7 +6332,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
Opc = getBroadcastOpcode(I, RC, Subtarget);
} else {
unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
- bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
+ bool isAligned = !MMOs.empty() && MMOs.front()->getAlign() >= Alignment;
Opc = getLoadRegOpcode(0, RC, isAligned, Subtarget);
}
@@ -5712,7 +6398,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
// FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
// memory access is slow above.
unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
- bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
+ bool isAligned = !MMOs.empty() && MMOs.front()->getAlign() >= Alignment;
SDNode *Store =
DAG.getMachineNode(getStoreRegOpcode(0, DstRC, isAligned, Subtarget),
dl, MVT::Other, AddrOps);
@@ -6122,18 +6808,18 @@ static const uint16_t ReplaceableInstrs[][3] = {
{ X86::VMOVSDZrm_alt, X86::VMOVSDZrm_alt, X86::VMOVQI2PQIZrm },
{ X86::VMOVSSZrm, X86::VMOVSSZrm, X86::VMOVDI2PDIZrm },
{ X86::VMOVSSZrm_alt, X86::VMOVSSZrm_alt, X86::VMOVDI2PDIZrm },
- { X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128r, X86::VPBROADCASTDZ128r },
- { X86::VBROADCASTSSZ128m, X86::VBROADCASTSSZ128m, X86::VPBROADCASTDZ128m },
- { X86::VBROADCASTSSZ256r, X86::VBROADCASTSSZ256r, X86::VPBROADCASTDZ256r },
- { X86::VBROADCASTSSZ256m, X86::VBROADCASTSSZ256m, X86::VPBROADCASTDZ256m },
- { X86::VBROADCASTSSZr, X86::VBROADCASTSSZr, X86::VPBROADCASTDZr },
- { X86::VBROADCASTSSZm, X86::VBROADCASTSSZm, X86::VPBROADCASTDZm },
- { X86::VMOVDDUPZ128rr, X86::VMOVDDUPZ128rr, X86::VPBROADCASTQZ128r },
- { X86::VMOVDDUPZ128rm, X86::VMOVDDUPZ128rm, X86::VPBROADCASTQZ128m },
- { X86::VBROADCASTSDZ256r, X86::VBROADCASTSDZ256r, X86::VPBROADCASTQZ256r },
- { X86::VBROADCASTSDZ256m, X86::VBROADCASTSDZ256m, X86::VPBROADCASTQZ256m },
- { X86::VBROADCASTSDZr, X86::VBROADCASTSDZr, X86::VPBROADCASTQZr },
- { X86::VBROADCASTSDZm, X86::VBROADCASTSDZm, X86::VPBROADCASTQZm },
+ { X86::VBROADCASTSSZ128rr,X86::VBROADCASTSSZ128rr,X86::VPBROADCASTDZ128rr },
+ { X86::VBROADCASTSSZ128rm,X86::VBROADCASTSSZ128rm,X86::VPBROADCASTDZ128rm },
+ { X86::VBROADCASTSSZ256rr,X86::VBROADCASTSSZ256rr,X86::VPBROADCASTDZ256rr },
+ { X86::VBROADCASTSSZ256rm,X86::VBROADCASTSSZ256rm,X86::VPBROADCASTDZ256rm },
+ { X86::VBROADCASTSSZrr, X86::VBROADCASTSSZrr, X86::VPBROADCASTDZrr },
+ { X86::VBROADCASTSSZrm, X86::VBROADCASTSSZrm, X86::VPBROADCASTDZrm },
+ { X86::VMOVDDUPZ128rr, X86::VMOVDDUPZ128rr, X86::VPBROADCASTQZ128rr },
+ { X86::VMOVDDUPZ128rm, X86::VMOVDDUPZ128rm, X86::VPBROADCASTQZ128rm },
+ { X86::VBROADCASTSDZ256rr,X86::VBROADCASTSDZ256rr,X86::VPBROADCASTQZ256rr },
+ { X86::VBROADCASTSDZ256rm,X86::VBROADCASTSDZ256rm,X86::VPBROADCASTQZ256rm },
+ { X86::VBROADCASTSDZrr, X86::VBROADCASTSDZrr, X86::VPBROADCASTQZrr },
+ { X86::VBROADCASTSDZrm, X86::VBROADCASTSDZrm, X86::VPBROADCASTQZrm },
{ X86::VINSERTF32x4Zrr, X86::VINSERTF32x4Zrr, X86::VINSERTI32x4Zrr },
{ X86::VINSERTF32x4Zrm, X86::VINSERTF32x4Zrm, X86::VINSERTI32x4Zrm },
{ X86::VINSERTF32x8Zrr, X86::VINSERTF32x8Zrr, X86::VINSERTI32x8Zrr },
@@ -6893,7 +7579,7 @@ void X86InstrInfo::setExecutionDomain(MachineInstr &MI, unsigned Domain) const {
assert((Subtarget.hasDQI() || Domain >= 3) && "Requires AVX-512DQ");
table = lookupAVX512(MI.getOpcode(), dom, ReplaceableInstrsAVX512DQ);
// Don't change integer Q instructions to D instructions and
- // use D intructions if we started with a PS instruction.
+ // use D instructions if we started with a PS instruction.
if (table && Domain == 3 && (dom == 1 || table[3] == MI.getOpcode()))
Domain = 4;
}
@@ -7550,7 +8236,8 @@ bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
case X86::VMULSSrr:
case X86::VMULSDZrr:
case X86::VMULSSZrr:
- return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
+ return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&
+ Inst.getFlag(MachineInstr::MIFlag::FmNsz);
default:
return false;
}
@@ -7677,6 +8364,10 @@ X86InstrInfo::describeLoadedValue(const MachineInstr &MI, Register Reg) const {
return ParamLoadedValue(*Op, Expr);;
}
+ case X86::MOV8ri:
+ case X86::MOV16ri:
+ // TODO: Handle MOV8ri and MOV16ri.
+ return None;
case X86::MOV32ri:
case X86::MOV64ri:
case X86::MOV64ri32:
@@ -7736,6 +8427,20 @@ void X86InstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1,
MachineInstr &OldMI2,
MachineInstr &NewMI1,
MachineInstr &NewMI2) const {
+ // Propagate FP flags from the original instructions.
+ // But clear poison-generating flags because those may not be valid now.
+ // TODO: There should be a helper function for copying only fast-math-flags.
+ uint16_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags();
+ NewMI1.setFlags(IntersectedFlags);
+ NewMI1.clearFlag(MachineInstr::MIFlag::NoSWrap);
+ NewMI1.clearFlag(MachineInstr::MIFlag::NoUWrap);
+ NewMI1.clearFlag(MachineInstr::MIFlag::IsExact);
+
+ NewMI2.setFlags(IntersectedFlags);
+ NewMI2.clearFlag(MachineInstr::MIFlag::NoSWrap);
+ NewMI2.clearFlag(MachineInstr::MIFlag::NoUWrap);
+ NewMI2.clearFlag(MachineInstr::MIFlag::IsExact);
+
// Integer instructions may define an implicit EFLAGS dest register operand.
MachineOperand *OldFlagDef1 = OldMI1.findRegisterDefOperand(X86::EFLAGS);
MachineOperand *OldFlagDef2 = OldMI2.findRegisterDefOperand(X86::EFLAGS);
@@ -7955,8 +8660,7 @@ namespace {
}
// Visit the children of this block in the dominator tree.
- for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end();
- I != E; ++I) {
+ for (auto I = Node->begin(), E = Node->end(); I != E; ++I) {
Changed |= VisitNode(*I, TLSBaseAddrReg);
}
@@ -8071,6 +8775,35 @@ outliner::OutlinedFunction X86InstrInfo::getOutliningCandidateInfo(
return Sum + 1;
});
+ // We check to see if CFI Instructions are present, and if they are
+ // we find the number of CFI Instructions in the candidates.
+ unsigned CFICount = 0;
+ MachineBasicBlock::iterator MBBI = RepeatedSequenceLocs[0].front();
+ for (unsigned Loc = RepeatedSequenceLocs[0].getStartIdx();
+ Loc < RepeatedSequenceLocs[0].getEndIdx() + 1; Loc++) {
+ const std::vector<MCCFIInstruction> &CFIInstructions =
+ RepeatedSequenceLocs[0].getMF()->getFrameInstructions();
+ if (MBBI->isCFIInstruction()) {
+ unsigned CFIIndex = MBBI->getOperand(0).getCFIIndex();
+ MCCFIInstruction CFI = CFIInstructions[CFIIndex];
+ CFICount++;
+ }
+ MBBI++;
+ }
+
+ // We compare the number of found CFI Instructions to the number of CFI
+ // instructions in the parent function for each candidate. We must check this
+ // since if we outline one of the CFI instructions in a function, we have to
+ // outline them all for correctness. If we do not, the address offsets will be
+ // incorrect between the two sections of the program.
+ for (outliner::Candidate &C : RepeatedSequenceLocs) {
+ std::vector<MCCFIInstruction> CFIInstructions =
+ C.getMF()->getFrameInstructions();
+
+ if (CFICount > 0 && CFICount != CFIInstructions.size())
+ return outliner::OutlinedFunction();
+ }
+
// FIXME: Use real size in bytes for call and ret instructions.
if (RepeatedSequenceLocs[0].back()->isTerminator()) {
for (outliner::Candidate &C : RepeatedSequenceLocs)
@@ -8082,6 +8815,9 @@ outliner::OutlinedFunction X86InstrInfo::getOutliningCandidateInfo(
);
}
+ if (CFICount > 0)
+ return outliner::OutlinedFunction();
+
for (outliner::Candidate &C : RepeatedSequenceLocs)
C.setCallInfo(MachineOutlinerDefault, 1);