diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIInstrInfo.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 170 |
1 files changed, 127 insertions, 43 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index d97e6a62971b..d53950ca4465 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -85,7 +85,9 @@ BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), SIInstrInfo::SIInstrInfo(const GCNSubtarget &ST) : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN), - RI(ST), ST(ST) {} + RI(ST), ST(ST) { + SchedModel.init(&ST); +} //===----------------------------------------------------------------------===// // TargetInstrInfo callbacks @@ -260,6 +262,9 @@ bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, const TargetRegisterInfo *TRI) const { + if (!LdSt.mayLoadOrStore()) + return false; + unsigned Opc = LdSt.getOpcode(); if (isDS(LdSt)) { @@ -270,12 +275,11 @@ bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt, BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr); // TODO: ds_consume/ds_append use M0 for the base address. Is it safe to // report that here? - if (!BaseOp) + if (!BaseOp || !BaseOp->isReg()) return false; Offset = OffsetImm->getImm(); - assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base " - "operands of type register."); + return true; } @@ -307,9 +311,11 @@ bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt, EltSize *= 64; BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr); + if (!BaseOp->isReg()) + return false; + Offset = EltSize * Offset0; - assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base " - "operands of type register."); + return true; } @@ -346,12 +352,12 @@ bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt, getNamedOperand(LdSt, AMDGPU::OpName::offset); BaseOp = AddrReg; Offset = OffsetImm->getImm(); - if (SOffset) // soffset can be an inline immediate. Offset += SOffset->getImm(); - assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base " - "operands of type register."); + if (!BaseOp->isReg()) + return false; + return true; } @@ -364,8 +370,9 @@ bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt, const MachineOperand *SBaseReg = getNamedOperand(LdSt, AMDGPU::OpName::sbase); BaseOp = SBaseReg; Offset = OffsetImm->getImm(); - assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base " - "operands of type register."); + if (!BaseOp->isReg()) + return false; + return true; } @@ -383,8 +390,8 @@ bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt, } Offset = getNamedOperand(LdSt, AMDGPU::OpName::offset)->getImm(); - assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base " - "operands of type register."); + if (!BaseOp->isReg()) + return false; return true; } @@ -418,7 +425,7 @@ static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, const MachineFunction &MF = *MI1.getParent()->getParent(); const DataLayout &DL = MF.getFunction().getParent()->getDataLayout(); Base1 = GetUnderlyingObject(Base1, DL); - Base2 = GetUnderlyingObject(Base1, DL); + Base2 = GetUnderlyingObject(Base2, DL); if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2)) return false; @@ -508,8 +515,8 @@ bool SIInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const DebugLoc &DL, unsigned DestReg, - unsigned SrcReg, bool KillSrc) { + const DebugLoc &DL, MCRegister DestReg, + MCRegister SrcReg, bool KillSrc) { MachineFunction *MF = MBB.getParent(); DiagnosticInfoUnsupported IllegalCopy(MF->getFunction(), "illegal SGPR to VGPR copy", @@ -523,8 +530,8 @@ static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const DebugLoc &DL, unsigned DestReg, - unsigned SrcReg, bool KillSrc) const { + const DebugLoc &DL, MCRegister DestReg, + MCRegister SrcReg, bool KillSrc) const { const TargetRegisterClass *RC = RI.getPhysRegClass(DestReg); if (RC == &AMDGPU::VGPR_32RegClass) { @@ -542,7 +549,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, RC == &AMDGPU::SReg_32RegClass) { if (SrcReg == AMDGPU::SCC) { BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B32), DestReg) - .addImm(-1) + .addImm(1) .addImm(0); return; } @@ -840,7 +847,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, Register SReg = MRI.createVirtualRegister(BoolXExecRC); BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32 : AMDGPU::S_CSELECT_B64), SReg) - .addImm(-1) + .addImm(1) .addImm(0); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) .addImm(0) @@ -855,7 +862,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32 : AMDGPU::S_CSELECT_B64), SReg) .addImm(0) - .addImm(-1); + .addImm(1); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) .addImm(0) .addReg(FalseReg) @@ -900,7 +907,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, .addImm(0); BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32 : AMDGPU::S_CSELECT_B64), SReg) - .addImm(-1) + .addImm(1) .addImm(0); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) .addImm(0) @@ -919,7 +926,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32 : AMDGPU::S_CSELECT_B64), SReg) .addImm(0) - .addImm(-1); + .addImm(1); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) .addImm(0) .addReg(FalseReg) @@ -1062,6 +1069,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, if (RI.isSGPRClass(RC)) { MFI->setHasSpilledSGPRs(); + assert(SrcReg != AMDGPU::M0 && "m0 should not be spilled"); // We are only allowed to create one new instruction when spilling // registers, so we need to use pseudo instruction for spilling SGPRs. @@ -1190,6 +1198,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, if (RI.isSGPRClass(RC)) { MFI->setHasSpilledSGPRs(); + assert(DestReg != AMDGPU::M0 && "m0 should not be reloaded into"); // FIXME: Maybe this should not include a memoperand because it will be // lowered to non-memory instructions. @@ -2542,9 +2551,9 @@ bool SIInstrInfo::checkInstOffsetsDoNotOverlap(const MachineInstr &MIa, bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const { - assert((MIa.mayLoad() || MIa.mayStore()) && + assert(MIa.mayLoadOrStore() && "MIa must load from or modify a memory location"); - assert((MIb.mayLoad() || MIb.mayStore()) && + assert(MIb.mayLoadOrStore() && "MIb must load from or modify a memory location"); if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects()) @@ -3921,20 +3930,18 @@ bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI, ? MRI.getRegClass(Reg) : RI.getPhysRegClass(Reg); - const SIRegisterInfo *TRI = - static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo()); - RC = TRI->getSubRegClass(RC, MO.getSubReg()); - - // In order to be legal, the common sub-class must be equal to the - // class of the current operand. For example: - // - // v_mov_b32 s0 ; Operand defined as vsrc_b32 - // ; RI.getCommonSubClass(s0,vsrc_b32) = sgpr ; LEGAL - // - // s_sendmsg 0, s0 ; Operand defined as m0reg - // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL + const TargetRegisterClass *DRC = RI.getRegClass(OpInfo.RegClass); + if (MO.getSubReg()) { + const MachineFunction *MF = MO.getParent()->getParent()->getParent(); + const TargetRegisterClass *SuperRC = RI.getLargestLegalSuperClass(RC, *MF); + if (!SuperRC) + return false; - return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC; + DRC = RI.getMatchingSuperRegClass(SuperRC, DRC, MO.getSubReg()); + if (!DRC) + return false; + } + return RC->hasSuperClassEq(DRC); } bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI, @@ -4451,12 +4458,12 @@ static void loadSRsrcFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, // Update dominators. We know that MBB immediately dominates LoopBB, that // LoopBB immediately dominates RemainderBB, and that RemainderBB immediately // dominates all of the successors transferred to it from MBB that MBB used - // to dominate. + // to properly dominate. if (MDT) { MDT->addNewBlock(LoopBB, &MBB); MDT->addNewBlock(RemainderBB, LoopBB); for (auto &Succ : RemainderBB->successors()) { - if (MDT->dominates(&MBB, Succ)) { + if (MDT->properlyDominates(&MBB, Succ)) { MDT->changeImmediateDominator(Succ, RemainderBB); } } @@ -6211,7 +6218,11 @@ MachineInstrBuilder SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB, if (ST.hasAddNoCarry()) return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e32), DestReg); - Register UnusedCarry = RS.scavengeRegister(RI.getBoolRC(), I, 0, false); + // If available, prefer to use vcc. + Register UnusedCarry = !RS.isRegUsed(AMDGPU::VCC) + ? Register(RI.getVCC()) + : RS.scavengeRegister(RI.getBoolRC(), I, 0, false); + // TODO: Users need to deal with this. if (!UnusedCarry.isValid()) return MachineInstrBuilder(); @@ -6329,6 +6340,26 @@ static SIEncodingFamily subtargetEncodingFamily(const GCNSubtarget &ST) { llvm_unreachable("Unknown subtarget generation!"); } +bool SIInstrInfo::isAsmOnlyOpcode(int MCOp) const { + switch(MCOp) { + // These opcodes use indirect register addressing so + // they need special handling by codegen (currently missing). + // Therefore it is too risky to allow these opcodes + // to be selected by dpp combiner or sdwa peepholer. + case AMDGPU::V_MOVRELS_B32_dpp_gfx10: + case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: + case AMDGPU::V_MOVRELD_B32_dpp_gfx10: + case AMDGPU::V_MOVRELD_B32_sdwa_gfx10: + case AMDGPU::V_MOVRELSD_B32_dpp_gfx10: + case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: + case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10: + case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: + return true; + default: + return false; + } +} + int SIInstrInfo::pseudoToMCOpcode(int Opcode) const { SIEncodingFamily Gen = subtargetEncodingFamily(ST); @@ -6367,6 +6398,9 @@ int SIInstrInfo::pseudoToMCOpcode(int Opcode) const { if (MCOp == (uint16_t)-1) return -1; + if (isAsmOnlyOpcode(MCOp)) + return -1; + return MCOp; } @@ -6541,14 +6575,14 @@ MachineInstr *SIInstrInfo::createPHIDestinationCopy( MachineInstr *SIInstrInfo::createPHISourceCopy( MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, - const DebugLoc &DL, Register Src, Register SrcSubReg, Register Dst) const { + const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const { if (InsPt != MBB.end() && (InsPt->getOpcode() == AMDGPU::SI_IF || InsPt->getOpcode() == AMDGPU::SI_ELSE || InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) && InsPt->definesRegister(Src)) { InsPt++; - return BuildMI(MBB, InsPt, InsPt->getDebugLoc(), + return BuildMI(MBB, InsPt, DL, get(ST.isWave32() ? AMDGPU::S_MOV_B32_term : AMDGPU::S_MOV_B64_term), Dst) @@ -6560,3 +6594,53 @@ MachineInstr *SIInstrInfo::createPHISourceCopy( } bool llvm::SIInstrInfo::isWave32() const { return ST.isWave32(); } + +MachineInstr *SIInstrInfo::foldMemoryOperandImpl( + MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS, + VirtRegMap *VRM) const { + // This is a bit of a hack (copied from AArch64). Consider this instruction: + // + // %0:sreg_32 = COPY $m0 + // + // We explicitly chose SReg_32 for the virtual register so such a copy might + // be eliminated by RegisterCoalescer. However, that may not be possible, and + // %0 may even spill. We can't spill $m0 normally (it would require copying to + // a numbered SGPR anyway), and since it is in the SReg_32 register class, + // TargetInstrInfo::foldMemoryOperand() is going to try. + // + // To prevent that, constrain the %0 register class here. + if (MI.isFullCopy()) { + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + + if (DstReg == AMDGPU::M0 && SrcReg.isVirtual()) { + MF.getRegInfo().constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass); + return nullptr; + } + + if (SrcReg == AMDGPU::M0 && DstReg.isVirtual()) { + MF.getRegInfo().constrainRegClass(DstReg, &AMDGPU::SReg_32_XM0RegClass); + return nullptr; + } + } + + return nullptr; +} + +unsigned SIInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, + const MachineInstr &MI, + unsigned *PredCost) const { + if (MI.isBundle()) { + MachineBasicBlock::const_instr_iterator I(MI.getIterator()); + MachineBasicBlock::const_instr_iterator E(MI.getParent()->instr_end()); + unsigned Lat = 0, Count = 0; + for (++I; I != E && I->isBundledWithPred(); ++I) { + ++Count; + Lat = std::max(Lat, SchedModel.computeInstrLatency(&*I)); + } + return Lat + Count - 1; + } + + return SchedModel.computeInstrLatency(&MI); +} |