diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2021-07-29 20:15:26 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2021-07-29 20:15:26 +0000 |
commit | 344a3780b2e33f6ca763666c380202b18aab72a3 (patch) | |
tree | f0b203ee6eb71d7fdd792373e3c81eb18d6934dd /llvm/lib/Target/X86/X86InstrInfo.cpp | |
parent | b60736ec1405bb0a8dd40989f67ef4c93da068ab (diff) | |
download | src-344a3780b2e33f6ca763666c380202b18aab72a3.tar.gz src-344a3780b2e33f6ca763666c380202b18aab72a3.zip |
Vendor import of llvm-project main 88e66fa60ae5, the last commit beforevendor/llvm-project/llvmorg-13-init-16847-g88e66fa60ae5vendor/llvm-project/llvmorg-12.0.1-rc2-0-ge7dac564cd0evendor/llvm-project/llvmorg-12.0.1-0-gfed41342a82f
the upstream release/13.x branch was created.
Diffstat (limited to 'llvm/lib/Target/X86/X86InstrInfo.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 186 |
1 files changed, 126 insertions, 60 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index d9bab14f0c08..12a2d92fd888 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -905,7 +905,7 @@ unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI, FrameIndex = cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue()) ->getFrameIndex(); - return 1; + return MI.getOperand(0).getReg(); } } return 0; @@ -940,7 +940,7 @@ unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI, FrameIndex = cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue()) ->getFrameIndex(); - return 1; + return MI.getOperand(X86::AddrNumOperands).getReg(); } } return 0; @@ -1006,6 +1006,7 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, case X86::MOV64ri: case X86::MOV64ri32: case X86::MOV8ri: + case X86::PTILEZEROV: return true; case X86::MOV8rm: @@ -2669,6 +2670,58 @@ bool X86InstrInfo::findCommutedOpIndices(const MachineInstr &MI, return false; } +static bool isConvertibleLEA(MachineInstr *MI) { + unsigned Opcode = MI->getOpcode(); + if (Opcode != X86::LEA32r && Opcode != X86::LEA64r && + Opcode != X86::LEA64_32r) + return false; + + const MachineOperand &Scale = MI->getOperand(1 + X86::AddrScaleAmt); + const MachineOperand &Disp = MI->getOperand(1 + X86::AddrDisp); + const MachineOperand &Segment = MI->getOperand(1 + X86::AddrSegmentReg); + + if (Segment.getReg() != 0 || !Disp.isImm() || Disp.getImm() != 0 || + Scale.getImm() > 1) + return false; + + return true; +} + +bool X86InstrInfo::hasCommutePreference(MachineInstr &MI, bool &Commute) const { + // Currently we're interested in following sequence only. + // r3 = lea r1, r2 + // r5 = add r3, r4 + // Both r3 and r4 are killed in add, we hope the add instruction has the + // operand order + // r5 = add r4, r3 + // So later in X86FixupLEAs the lea instruction can be rewritten as add. + unsigned Opcode = MI.getOpcode(); + if (Opcode != X86::ADD32rr && Opcode != X86::ADD64rr) + return false; + + const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + Register Reg1 = MI.getOperand(1).getReg(); + Register Reg2 = MI.getOperand(2).getReg(); + + // Check if Reg1 comes from LEA in the same MBB. + if (MachineInstr *Inst = MRI.getUniqueVRegDef(Reg1)) { + if (isConvertibleLEA(Inst) && Inst->getParent() == MI.getParent()) { + Commute = true; + return true; + } + } + + // Check if Reg2 comes from LEA in the same MBB. + if (MachineInstr *Inst = MRI.getUniqueVRegDef(Reg2)) { + if (isConvertibleLEA(Inst) && Inst->getParent() == MI.getParent()) { + Commute = false; + return true; + } + } + + return false; +} + X86::CondCode X86::getCondFromBranch(const MachineInstr &MI) { switch (MI.getOpcode()) { default: return X86::COND_INVALID; @@ -3794,7 +3847,8 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { const MachineFunction &MF = *MBB.getParent(); - assert(MF.getFrameInfo().getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) && + const MachineFrameInfo &MFI = MF.getFrameInfo(); + assert(MFI.getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) && "Stack slot too small for store"); if (RC->getID() == X86::TILERegClassID) { unsigned Opc = X86::TILESTORED; @@ -3808,15 +3862,11 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineOperand &MO = NewMI->getOperand(2); MO.setReg(VirtReg); MO.setIsKill(true); - } else if (RC->getID() == X86::TILECFGRegClassID) { - unsigned Opc = X86::PSTTILECFG; - addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc)), FrameIdx) - .addReg(SrcReg, getKillRegState(isKill)); } else { unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16); bool isAligned = (Subtarget.getFrameLowering()->getStackAlign() >= Alignment) || - RI.canRealignStack(MF); + (RI.canRealignStack(MF) && !MFI.isFixedObjectIndex(FrameIdx)); unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget); addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc)), FrameIdx) .addReg(SrcReg, getKillRegState(isKill)); @@ -3840,16 +3890,13 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineOperand &MO = NewMI->getOperand(3); MO.setReg(VirtReg); MO.setIsKill(true); - } else if (RC->getID() == X86::TILECFGRegClassID) { - unsigned Opc = X86::PLDTILECFG; - addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg), - FrameIdx); } else { const MachineFunction &MF = *MBB.getParent(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16); bool isAligned = (Subtarget.getFrameLowering()->getStackAlign() >= Alignment) || - RI.canRealignStack(MF); + (RI.canRealignStack(MF) && !MFI.isFixedObjectIndex(FrameIdx)); unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget); addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg), FrameIdx); @@ -3977,8 +4024,10 @@ inline static bool isRedundantFlagInstr(const MachineInstr &FlagI, /// Check whether the definition can be converted /// to remove a comparison against zero. -inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag) { +inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag, + bool &ClearsOverflowFlag) { NoSignFlag = false; + ClearsOverflowFlag = false; switch (MI.getOpcode()) { default: return false; @@ -4013,21 +4062,6 @@ inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag) { case X86::ADD16rr: case X86::ADD8rr: case X86::ADD64rm: case X86::ADD32rm: case X86::ADD16rm: case X86::ADD8rm: case X86::INC64r: case X86::INC32r: case X86::INC16r: case X86::INC8r: - case X86::AND64ri32: case X86::AND64ri8: case X86::AND32ri: - case X86::AND32ri8: case X86::AND16ri: case X86::AND16ri8: - case X86::AND8ri: case X86::AND64rr: case X86::AND32rr: - case X86::AND16rr: case X86::AND8rr: case X86::AND64rm: - case X86::AND32rm: case X86::AND16rm: case X86::AND8rm: - case X86::XOR64ri32: case X86::XOR64ri8: case X86::XOR32ri: - case X86::XOR32ri8: case X86::XOR16ri: case X86::XOR16ri8: - case X86::XOR8ri: case X86::XOR64rr: case X86::XOR32rr: - case X86::XOR16rr: case X86::XOR8rr: case X86::XOR64rm: - case X86::XOR32rm: case X86::XOR16rm: case X86::XOR8rm: - case X86::OR64ri32: case X86::OR64ri8: case X86::OR32ri: - case X86::OR32ri8: case X86::OR16ri: case X86::OR16ri8: - case X86::OR8ri: case X86::OR64rr: case X86::OR32rr: - case X86::OR16rr: case X86::OR8rr: case X86::OR64rm: - case X86::OR32rm: case X86::OR16rm: case X86::OR8rm: case X86::ADC64ri32: case X86::ADC64ri8: case X86::ADC32ri: case X86::ADC32ri8: case X86::ADC16ri: case X86::ADC16ri8: case X86::ADC8ri: case X86::ADC64rr: case X86::ADC32rr: @@ -4042,16 +4076,6 @@ inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag) { case X86::SAR8r1: case X86::SAR16r1: case X86::SAR32r1:case X86::SAR64r1: case X86::SHR8r1: case X86::SHR16r1: case X86::SHR32r1:case X86::SHR64r1: case X86::SHL8r1: case X86::SHL16r1: case X86::SHL32r1:case X86::SHL64r1: - case X86::ANDN32rr: case X86::ANDN32rm: - case X86::ANDN64rr: case X86::ANDN64rm: - case X86::BLSI32rr: case X86::BLSI32rm: - case X86::BLSI64rr: case X86::BLSI64rm: - case X86::BLSMSK32rr:case X86::BLSMSK32rm: - case X86::BLSMSK64rr:case X86::BLSMSK64rm: - case X86::BLSR32rr: case X86::BLSR32rm: - case X86::BLSR64rr: case X86::BLSR64rm: - case X86::BZHI32rr: case X86::BZHI32rm: - case X86::BZHI64rr: case X86::BZHI64rm: case X86::LZCNT16rr: case X86::LZCNT16rm: case X86::LZCNT32rr: case X86::LZCNT32rm: case X86::LZCNT64rr: case X86::LZCNT64rm: @@ -4061,6 +4085,30 @@ inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag) { case X86::TZCNT16rr: case X86::TZCNT16rm: case X86::TZCNT32rr: case X86::TZCNT32rm: case X86::TZCNT64rr: case X86::TZCNT64rm: + return true; + case X86::AND64ri32: case X86::AND64ri8: case X86::AND32ri: + case X86::AND32ri8: case X86::AND16ri: case X86::AND16ri8: + case X86::AND8ri: case X86::AND64rr: case X86::AND32rr: + case X86::AND16rr: case X86::AND8rr: case X86::AND64rm: + case X86::AND32rm: case X86::AND16rm: case X86::AND8rm: + case X86::XOR64ri32: case X86::XOR64ri8: case X86::XOR32ri: + case X86::XOR32ri8: case X86::XOR16ri: case X86::XOR16ri8: + case X86::XOR8ri: case X86::XOR64rr: case X86::XOR32rr: + case X86::XOR16rr: case X86::XOR8rr: case X86::XOR64rm: + case X86::XOR32rm: case X86::XOR16rm: case X86::XOR8rm: + case X86::OR64ri32: case X86::OR64ri8: case X86::OR32ri: + case X86::OR32ri8: case X86::OR16ri: case X86::OR16ri8: + case X86::OR8ri: case X86::OR64rr: case X86::OR32rr: + case X86::OR16rr: case X86::OR8rr: case X86::OR64rm: + case X86::OR32rm: case X86::OR16rm: case X86::OR8rm: + case X86::ANDN32rr: case X86::ANDN32rm: + case X86::ANDN64rr: case X86::ANDN64rm: + case X86::BLSI32rr: case X86::BLSI32rm: + case X86::BLSI64rr: case X86::BLSI64rm: + case X86::BLSMSK32rr: case X86::BLSMSK32rm: + case X86::BLSMSK64rr: case X86::BLSMSK64rm: + case X86::BLSR32rr: case X86::BLSR32rm: + case X86::BLSR64rr: case X86::BLSR64rm: case X86::BLCFILL32rr: case X86::BLCFILL32rm: case X86::BLCFILL64rr: case X86::BLCFILL64rm: case X86::BLCI32rr: case X86::BLCI32rm: @@ -4075,16 +4123,23 @@ inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag) { case X86::BLSFILL64rr: case X86::BLSFILL64rm: case X86::BLSIC32rr: case X86::BLSIC32rm: case X86::BLSIC64rr: case X86::BLSIC64rm: + case X86::BZHI32rr: case X86::BZHI32rm: + case X86::BZHI64rr: case X86::BZHI64rm: case X86::T1MSKC32rr: case X86::T1MSKC32rm: case X86::T1MSKC64rr: case X86::T1MSKC64rm: case X86::TZMSK32rr: case X86::TZMSK32rm: case X86::TZMSK64rr: case X86::TZMSK64rm: + // These instructions clear the overflow flag just like TEST. + // FIXME: These are not the only instructions in this switch that clear the + // overflow flag. + ClearsOverflowFlag = true; return true; case X86::BEXTR32rr: case X86::BEXTR64rr: case X86::BEXTR32rm: case X86::BEXTR64rm: case X86::BEXTRI32ri: case X86::BEXTRI32mi: case X86::BEXTRI64ri: case X86::BEXTRI64mi: - // BEXTR doesn't update the sign flag so we can't use it. + // BEXTR doesn't update the sign flag so we can't use it. It does clear + // the overflow flag, but that's not useful without the sign flag. NoSignFlag = true; return true; } @@ -4179,6 +4234,8 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, } CmpInstr.setDesc(get(NewOpcode)); CmpInstr.RemoveOperand(0); + // Mutating this instruction invalidates any debug data associated with it. + CmpInstr.dropDebugNumber(); // Fall through to optimize Cmp if Cmp is CMPrr or CMPri. if (NewOpcode == X86::CMP64rm || NewOpcode == X86::CMP32rm || NewOpcode == X86::CMP16rm || NewOpcode == X86::CMP8rm) @@ -4204,8 +4261,9 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, // right way. bool ShouldUpdateCC = false; bool NoSignFlag = false; + bool ClearsOverflowFlag = false; X86::CondCode NewCC = X86::COND_INVALID; - if (IsCmpZero && !isDefConvertible(*MI, NoSignFlag)) { + if (IsCmpZero && !isDefConvertible(*MI, NoSignFlag, ClearsOverflowFlag)) { // Scan forward from the use until we hit the use we're looking for or the // compare instruction. for (MachineBasicBlock::iterator J = MI;; ++J) { @@ -4317,11 +4375,15 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, default: break; case X86::COND_A: case X86::COND_AE: case X86::COND_B: case X86::COND_BE: + // CF is used, we can't perform this optimization. + return false; case X86::COND_G: case X86::COND_GE: case X86::COND_L: case X86::COND_LE: case X86::COND_O: case X86::COND_NO: - // CF and OF are used, we can't perform this optimization. - return false; + // If OF is used, the instruction needs to clear it like CmpZero does. + if (!ClearsOverflowFlag) + return false; + break; case X86::COND_S: case X86::COND_NS: // If SF is used, but the instruction doesn't update the SF, then we // can't do the optimization. @@ -4490,7 +4552,7 @@ static bool Expand2AddrKreg(MachineInstrBuilder &MIB, const MCInstrDesc &Desc, static bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII, bool MinusOne) { MachineBasicBlock &MBB = *MIB->getParent(); - DebugLoc DL = MIB->getDebugLoc(); + const DebugLoc &DL = MIB->getDebugLoc(); Register Reg = MIB.getReg(0); // Insert the XOR. @@ -4509,7 +4571,7 @@ static bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB, const TargetInstrInfo &TII, const X86Subtarget &Subtarget) { MachineBasicBlock &MBB = *MIB->getParent(); - DebugLoc DL = MIB->getDebugLoc(); + const DebugLoc &DL = MIB->getDebugLoc(); int64_t Imm = MIB->getOperand(1).getImm(); assert(Imm != 0 && "Using push/pop for 0 is not efficient."); MachineBasicBlock::iterator I = MIB.getInstr(); @@ -4566,7 +4628,7 @@ static bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB, static void expandLoadStackGuard(MachineInstrBuilder &MIB, const TargetInstrInfo &TII) { MachineBasicBlock &MBB = *MIB->getParent(); - DebugLoc DL = MIB->getDebugLoc(); + const DebugLoc &DL = MIB->getDebugLoc(); Register Reg = MIB.getReg(0); const GlobalValue *GV = cast<GlobalValue>((*MIB->memoperands_begin())->getValue()); @@ -5706,7 +5768,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, Align Alignment = MFI.getObjectAlign(FrameIndex); // If the function stack isn't realigned we don't want to fold instructions // that need increased alignment. - if (!RI.needsStackRealignment(MF)) + if (!RI.hasStackRealignment(MF)) Alignment = std::min(Alignment, Subtarget.getFrameLowering()->getStackAlign()); if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { @@ -6090,15 +6152,16 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( // x86-32 PIC requires a PIC base register for constant pools. unsigned PICBase = 0; - if (MF.getTarget().isPositionIndependent()) { - if (Subtarget.is64Bit()) - PICBase = X86::RIP; - else - // FIXME: PICBase = getGlobalBaseReg(&MF); - // This doesn't work for several reasons. - // 1. GlobalBaseReg may have been spilled. - // 2. It may not be live at MI. - return nullptr; + // Since we're using Small or Kernel code model, we can always use + // RIP-relative addressing for a smaller encoding. + if (Subtarget.is64Bit()) { + PICBase = X86::RIP; + } else if (MF.getTarget().isPositionIndependent()) { + // FIXME: PICBase = getGlobalBaseReg(&MF); + // This doesn't work for several reasons. + // 1. GlobalBaseReg may have been spilled. + // 2. It may not be live at MI. + return nullptr; } // Create a constant-pool entry. @@ -6348,7 +6411,7 @@ bool X86InstrInfo::unfoldMemoryOperand( case X86::CMP8ri: { MachineOperand &MO0 = DataMI->getOperand(0); MachineOperand &MO1 = DataMI->getOperand(1); - if (MO1.getImm() == 0) { + if (MO1.isImm() && MO1.getImm() == 0) { unsigned NewOpc; switch (DataMI->getOpcode()) { default: llvm_unreachable("Unreachable!"); @@ -6788,7 +6851,8 @@ bool X86InstrInfo::isSchedulingBoundary(const MachineInstr &MI, // ENDBR instructions should not be scheduled around. unsigned Opcode = MI.getOpcode(); - if (Opcode == X86::ENDBR64 || Opcode == X86::ENDBR32) + if (Opcode == X86::ENDBR64 || Opcode == X86::ENDBR32 || + Opcode == X86::LDTILECFG) return true; return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF); @@ -7713,8 +7777,10 @@ void X86InstrInfo::setExecutionDomain(MachineInstr &MI, unsigned Domain) const { } /// Return the noop instruction to use for a noop. -void X86InstrInfo::getNoop(MCInst &NopInst) const { - NopInst.setOpcode(X86::NOOP); +MCInst X86InstrInfo::getNop() const { + MCInst Nop; + Nop.setOpcode(X86::NOOP); + return Nop; } bool X86InstrInfo::isHighLatencyDef(int opc) const { |