aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/X86/X86InstrInfo.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2021-07-29 20:15:26 +0000
committerDimitry Andric <dim@FreeBSD.org>2021-07-29 20:15:26 +0000
commit344a3780b2e33f6ca763666c380202b18aab72a3 (patch)
treef0b203ee6eb71d7fdd792373e3c81eb18d6934dd /llvm/lib/Target/X86/X86InstrInfo.cpp
parentb60736ec1405bb0a8dd40989f67ef4c93da068ab (diff)
downloadsrc-344a3780b2e33f6ca763666c380202b18aab72a3.tar.gz
src-344a3780b2e33f6ca763666c380202b18aab72a3.zip
the upstream release/13.x branch was created.
Diffstat (limited to 'llvm/lib/Target/X86/X86InstrInfo.cpp')
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp186
1 files changed, 126 insertions, 60 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index d9bab14f0c08..12a2d92fd888 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -905,7 +905,7 @@ unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI,
FrameIndex =
cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
->getFrameIndex();
- return 1;
+ return MI.getOperand(0).getReg();
}
}
return 0;
@@ -940,7 +940,7 @@ unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI,
FrameIndex =
cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
->getFrameIndex();
- return 1;
+ return MI.getOperand(X86::AddrNumOperands).getReg();
}
}
return 0;
@@ -1006,6 +1006,7 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
case X86::MOV64ri:
case X86::MOV64ri32:
case X86::MOV8ri:
+ case X86::PTILEZEROV:
return true;
case X86::MOV8rm:
@@ -2669,6 +2670,58 @@ bool X86InstrInfo::findCommutedOpIndices(const MachineInstr &MI,
return false;
}
+static bool isConvertibleLEA(MachineInstr *MI) {
+ unsigned Opcode = MI->getOpcode();
+ if (Opcode != X86::LEA32r && Opcode != X86::LEA64r &&
+ Opcode != X86::LEA64_32r)
+ return false;
+
+ const MachineOperand &Scale = MI->getOperand(1 + X86::AddrScaleAmt);
+ const MachineOperand &Disp = MI->getOperand(1 + X86::AddrDisp);
+ const MachineOperand &Segment = MI->getOperand(1 + X86::AddrSegmentReg);
+
+ if (Segment.getReg() != 0 || !Disp.isImm() || Disp.getImm() != 0 ||
+ Scale.getImm() > 1)
+ return false;
+
+ return true;
+}
+
+bool X86InstrInfo::hasCommutePreference(MachineInstr &MI, bool &Commute) const {
+ // Currently we're interested in following sequence only.
+ // r3 = lea r1, r2
+ // r5 = add r3, r4
+ // Both r3 and r4 are killed in add, we hope the add instruction has the
+ // operand order
+ // r5 = add r4, r3
+ // So later in X86FixupLEAs the lea instruction can be rewritten as add.
+ unsigned Opcode = MI.getOpcode();
+ if (Opcode != X86::ADD32rr && Opcode != X86::ADD64rr)
+ return false;
+
+ const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ Register Reg1 = MI.getOperand(1).getReg();
+ Register Reg2 = MI.getOperand(2).getReg();
+
+ // Check if Reg1 comes from LEA in the same MBB.
+ if (MachineInstr *Inst = MRI.getUniqueVRegDef(Reg1)) {
+ if (isConvertibleLEA(Inst) && Inst->getParent() == MI.getParent()) {
+ Commute = true;
+ return true;
+ }
+ }
+
+ // Check if Reg2 comes from LEA in the same MBB.
+ if (MachineInstr *Inst = MRI.getUniqueVRegDef(Reg2)) {
+ if (isConvertibleLEA(Inst) && Inst->getParent() == MI.getParent()) {
+ Commute = false;
+ return true;
+ }
+ }
+
+ return false;
+}
+
X86::CondCode X86::getCondFromBranch(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default: return X86::COND_INVALID;
@@ -3794,7 +3847,8 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
const MachineFunction &MF = *MBB.getParent();
- assert(MF.getFrameInfo().getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) &&
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ assert(MFI.getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) &&
"Stack slot too small for store");
if (RC->getID() == X86::TILERegClassID) {
unsigned Opc = X86::TILESTORED;
@@ -3808,15 +3862,11 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineOperand &MO = NewMI->getOperand(2);
MO.setReg(VirtReg);
MO.setIsKill(true);
- } else if (RC->getID() == X86::TILECFGRegClassID) {
- unsigned Opc = X86::PSTTILECFG;
- addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc)), FrameIdx)
- .addReg(SrcReg, getKillRegState(isKill));
} else {
unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16);
bool isAligned =
(Subtarget.getFrameLowering()->getStackAlign() >= Alignment) ||
- RI.canRealignStack(MF);
+ (RI.canRealignStack(MF) && !MFI.isFixedObjectIndex(FrameIdx));
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget);
addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc)), FrameIdx)
.addReg(SrcReg, getKillRegState(isKill));
@@ -3840,16 +3890,13 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineOperand &MO = NewMI->getOperand(3);
MO.setReg(VirtReg);
MO.setIsKill(true);
- } else if (RC->getID() == X86::TILECFGRegClassID) {
- unsigned Opc = X86::PLDTILECFG;
- addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg),
- FrameIdx);
} else {
const MachineFunction &MF = *MBB.getParent();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16);
bool isAligned =
(Subtarget.getFrameLowering()->getStackAlign() >= Alignment) ||
- RI.canRealignStack(MF);
+ (RI.canRealignStack(MF) && !MFI.isFixedObjectIndex(FrameIdx));
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget);
addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg),
FrameIdx);
@@ -3977,8 +4024,10 @@ inline static bool isRedundantFlagInstr(const MachineInstr &FlagI,
/// Check whether the definition can be converted
/// to remove a comparison against zero.
-inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag) {
+inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag,
+ bool &ClearsOverflowFlag) {
NoSignFlag = false;
+ ClearsOverflowFlag = false;
switch (MI.getOpcode()) {
default: return false;
@@ -4013,21 +4062,6 @@ inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag) {
case X86::ADD16rr: case X86::ADD8rr: case X86::ADD64rm:
case X86::ADD32rm: case X86::ADD16rm: case X86::ADD8rm:
case X86::INC64r: case X86::INC32r: case X86::INC16r: case X86::INC8r:
- case X86::AND64ri32: case X86::AND64ri8: case X86::AND32ri:
- case X86::AND32ri8: case X86::AND16ri: case X86::AND16ri8:
- case X86::AND8ri: case X86::AND64rr: case X86::AND32rr:
- case X86::AND16rr: case X86::AND8rr: case X86::AND64rm:
- case X86::AND32rm: case X86::AND16rm: case X86::AND8rm:
- case X86::XOR64ri32: case X86::XOR64ri8: case X86::XOR32ri:
- case X86::XOR32ri8: case X86::XOR16ri: case X86::XOR16ri8:
- case X86::XOR8ri: case X86::XOR64rr: case X86::XOR32rr:
- case X86::XOR16rr: case X86::XOR8rr: case X86::XOR64rm:
- case X86::XOR32rm: case X86::XOR16rm: case X86::XOR8rm:
- case X86::OR64ri32: case X86::OR64ri8: case X86::OR32ri:
- case X86::OR32ri8: case X86::OR16ri: case X86::OR16ri8:
- case X86::OR8ri: case X86::OR64rr: case X86::OR32rr:
- case X86::OR16rr: case X86::OR8rr: case X86::OR64rm:
- case X86::OR32rm: case X86::OR16rm: case X86::OR8rm:
case X86::ADC64ri32: case X86::ADC64ri8: case X86::ADC32ri:
case X86::ADC32ri8: case X86::ADC16ri: case X86::ADC16ri8:
case X86::ADC8ri: case X86::ADC64rr: case X86::ADC32rr:
@@ -4042,16 +4076,6 @@ inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag) {
case X86::SAR8r1: case X86::SAR16r1: case X86::SAR32r1:case X86::SAR64r1:
case X86::SHR8r1: case X86::SHR16r1: case X86::SHR32r1:case X86::SHR64r1:
case X86::SHL8r1: case X86::SHL16r1: case X86::SHL32r1:case X86::SHL64r1:
- case X86::ANDN32rr: case X86::ANDN32rm:
- case X86::ANDN64rr: case X86::ANDN64rm:
- case X86::BLSI32rr: case X86::BLSI32rm:
- case X86::BLSI64rr: case X86::BLSI64rm:
- case X86::BLSMSK32rr:case X86::BLSMSK32rm:
- case X86::BLSMSK64rr:case X86::BLSMSK64rm:
- case X86::BLSR32rr: case X86::BLSR32rm:
- case X86::BLSR64rr: case X86::BLSR64rm:
- case X86::BZHI32rr: case X86::BZHI32rm:
- case X86::BZHI64rr: case X86::BZHI64rm:
case X86::LZCNT16rr: case X86::LZCNT16rm:
case X86::LZCNT32rr: case X86::LZCNT32rm:
case X86::LZCNT64rr: case X86::LZCNT64rm:
@@ -4061,6 +4085,30 @@ inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag) {
case X86::TZCNT16rr: case X86::TZCNT16rm:
case X86::TZCNT32rr: case X86::TZCNT32rm:
case X86::TZCNT64rr: case X86::TZCNT64rm:
+ return true;
+ case X86::AND64ri32: case X86::AND64ri8: case X86::AND32ri:
+ case X86::AND32ri8: case X86::AND16ri: case X86::AND16ri8:
+ case X86::AND8ri: case X86::AND64rr: case X86::AND32rr:
+ case X86::AND16rr: case X86::AND8rr: case X86::AND64rm:
+ case X86::AND32rm: case X86::AND16rm: case X86::AND8rm:
+ case X86::XOR64ri32: case X86::XOR64ri8: case X86::XOR32ri:
+ case X86::XOR32ri8: case X86::XOR16ri: case X86::XOR16ri8:
+ case X86::XOR8ri: case X86::XOR64rr: case X86::XOR32rr:
+ case X86::XOR16rr: case X86::XOR8rr: case X86::XOR64rm:
+ case X86::XOR32rm: case X86::XOR16rm: case X86::XOR8rm:
+ case X86::OR64ri32: case X86::OR64ri8: case X86::OR32ri:
+ case X86::OR32ri8: case X86::OR16ri: case X86::OR16ri8:
+ case X86::OR8ri: case X86::OR64rr: case X86::OR32rr:
+ case X86::OR16rr: case X86::OR8rr: case X86::OR64rm:
+ case X86::OR32rm: case X86::OR16rm: case X86::OR8rm:
+ case X86::ANDN32rr: case X86::ANDN32rm:
+ case X86::ANDN64rr: case X86::ANDN64rm:
+ case X86::BLSI32rr: case X86::BLSI32rm:
+ case X86::BLSI64rr: case X86::BLSI64rm:
+ case X86::BLSMSK32rr: case X86::BLSMSK32rm:
+ case X86::BLSMSK64rr: case X86::BLSMSK64rm:
+ case X86::BLSR32rr: case X86::BLSR32rm:
+ case X86::BLSR64rr: case X86::BLSR64rm:
case X86::BLCFILL32rr: case X86::BLCFILL32rm:
case X86::BLCFILL64rr: case X86::BLCFILL64rm:
case X86::BLCI32rr: case X86::BLCI32rm:
@@ -4075,16 +4123,23 @@ inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag) {
case X86::BLSFILL64rr: case X86::BLSFILL64rm:
case X86::BLSIC32rr: case X86::BLSIC32rm:
case X86::BLSIC64rr: case X86::BLSIC64rm:
+ case X86::BZHI32rr: case X86::BZHI32rm:
+ case X86::BZHI64rr: case X86::BZHI64rm:
case X86::T1MSKC32rr: case X86::T1MSKC32rm:
case X86::T1MSKC64rr: case X86::T1MSKC64rm:
case X86::TZMSK32rr: case X86::TZMSK32rm:
case X86::TZMSK64rr: case X86::TZMSK64rm:
+ // These instructions clear the overflow flag just like TEST.
+ // FIXME: These are not the only instructions in this switch that clear the
+ // overflow flag.
+ ClearsOverflowFlag = true;
return true;
case X86::BEXTR32rr: case X86::BEXTR64rr:
case X86::BEXTR32rm: case X86::BEXTR64rm:
case X86::BEXTRI32ri: case X86::BEXTRI32mi:
case X86::BEXTRI64ri: case X86::BEXTRI64mi:
- // BEXTR doesn't update the sign flag so we can't use it.
+ // BEXTR doesn't update the sign flag so we can't use it. It does clear
+ // the overflow flag, but that's not useful without the sign flag.
NoSignFlag = true;
return true;
}
@@ -4179,6 +4234,8 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
}
CmpInstr.setDesc(get(NewOpcode));
CmpInstr.RemoveOperand(0);
+ // Mutating this instruction invalidates any debug data associated with it.
+ CmpInstr.dropDebugNumber();
// Fall through to optimize Cmp if Cmp is CMPrr or CMPri.
if (NewOpcode == X86::CMP64rm || NewOpcode == X86::CMP32rm ||
NewOpcode == X86::CMP16rm || NewOpcode == X86::CMP8rm)
@@ -4204,8 +4261,9 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
// right way.
bool ShouldUpdateCC = false;
bool NoSignFlag = false;
+ bool ClearsOverflowFlag = false;
X86::CondCode NewCC = X86::COND_INVALID;
- if (IsCmpZero && !isDefConvertible(*MI, NoSignFlag)) {
+ if (IsCmpZero && !isDefConvertible(*MI, NoSignFlag, ClearsOverflowFlag)) {
// Scan forward from the use until we hit the use we're looking for or the
// compare instruction.
for (MachineBasicBlock::iterator J = MI;; ++J) {
@@ -4317,11 +4375,15 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
default: break;
case X86::COND_A: case X86::COND_AE:
case X86::COND_B: case X86::COND_BE:
+ // CF is used, we can't perform this optimization.
+ return false;
case X86::COND_G: case X86::COND_GE:
case X86::COND_L: case X86::COND_LE:
case X86::COND_O: case X86::COND_NO:
- // CF and OF are used, we can't perform this optimization.
- return false;
+ // If OF is used, the instruction needs to clear it like CmpZero does.
+ if (!ClearsOverflowFlag)
+ return false;
+ break;
case X86::COND_S: case X86::COND_NS:
// If SF is used, but the instruction doesn't update the SF, then we
// can't do the optimization.
@@ -4490,7 +4552,7 @@ static bool Expand2AddrKreg(MachineInstrBuilder &MIB, const MCInstrDesc &Desc,
static bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII,
bool MinusOne) {
MachineBasicBlock &MBB = *MIB->getParent();
- DebugLoc DL = MIB->getDebugLoc();
+ const DebugLoc &DL = MIB->getDebugLoc();
Register Reg = MIB.getReg(0);
// Insert the XOR.
@@ -4509,7 +4571,7 @@ static bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB,
const TargetInstrInfo &TII,
const X86Subtarget &Subtarget) {
MachineBasicBlock &MBB = *MIB->getParent();
- DebugLoc DL = MIB->getDebugLoc();
+ const DebugLoc &DL = MIB->getDebugLoc();
int64_t Imm = MIB->getOperand(1).getImm();
assert(Imm != 0 && "Using push/pop for 0 is not efficient.");
MachineBasicBlock::iterator I = MIB.getInstr();
@@ -4566,7 +4628,7 @@ static bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB,
static void expandLoadStackGuard(MachineInstrBuilder &MIB,
const TargetInstrInfo &TII) {
MachineBasicBlock &MBB = *MIB->getParent();
- DebugLoc DL = MIB->getDebugLoc();
+ const DebugLoc &DL = MIB->getDebugLoc();
Register Reg = MIB.getReg(0);
const GlobalValue *GV =
cast<GlobalValue>((*MIB->memoperands_begin())->getValue());
@@ -5706,7 +5768,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
Align Alignment = MFI.getObjectAlign(FrameIndex);
// If the function stack isn't realigned we don't want to fold instructions
// that need increased alignment.
- if (!RI.needsStackRealignment(MF))
+ if (!RI.hasStackRealignment(MF))
Alignment =
std::min(Alignment, Subtarget.getFrameLowering()->getStackAlign());
if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
@@ -6090,15 +6152,16 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
// x86-32 PIC requires a PIC base register for constant pools.
unsigned PICBase = 0;
- if (MF.getTarget().isPositionIndependent()) {
- if (Subtarget.is64Bit())
- PICBase = X86::RIP;
- else
- // FIXME: PICBase = getGlobalBaseReg(&MF);
- // This doesn't work for several reasons.
- // 1. GlobalBaseReg may have been spilled.
- // 2. It may not be live at MI.
- return nullptr;
+ // Since we're using Small or Kernel code model, we can always use
+ // RIP-relative addressing for a smaller encoding.
+ if (Subtarget.is64Bit()) {
+ PICBase = X86::RIP;
+ } else if (MF.getTarget().isPositionIndependent()) {
+ // FIXME: PICBase = getGlobalBaseReg(&MF);
+ // This doesn't work for several reasons.
+ // 1. GlobalBaseReg may have been spilled.
+ // 2. It may not be live at MI.
+ return nullptr;
}
// Create a constant-pool entry.
@@ -6348,7 +6411,7 @@ bool X86InstrInfo::unfoldMemoryOperand(
case X86::CMP8ri: {
MachineOperand &MO0 = DataMI->getOperand(0);
MachineOperand &MO1 = DataMI->getOperand(1);
- if (MO1.getImm() == 0) {
+ if (MO1.isImm() && MO1.getImm() == 0) {
unsigned NewOpc;
switch (DataMI->getOpcode()) {
default: llvm_unreachable("Unreachable!");
@@ -6788,7 +6851,8 @@ bool X86InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
// ENDBR instructions should not be scheduled around.
unsigned Opcode = MI.getOpcode();
- if (Opcode == X86::ENDBR64 || Opcode == X86::ENDBR32)
+ if (Opcode == X86::ENDBR64 || Opcode == X86::ENDBR32 ||
+ Opcode == X86::LDTILECFG)
return true;
return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF);
@@ -7713,8 +7777,10 @@ void X86InstrInfo::setExecutionDomain(MachineInstr &MI, unsigned Domain) const {
}
/// Return the noop instruction to use for a noop.
-void X86InstrInfo::getNoop(MCInst &NopInst) const {
- NopInst.setOpcode(X86::NOOP);
+MCInst X86InstrInfo::getNop() const {
+ MCInst Nop;
+ Nop.setOpcode(X86::NOOP);
+ return Nop;
}
bool X86InstrInfo::isHighLatencyDef(int opc) const {