aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIInstrInfo.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp170
1 files changed, 127 insertions, 43 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index d97e6a62971b..d53950ca4465 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -85,7 +85,9 @@ BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16),
SIInstrInfo::SIInstrInfo(const GCNSubtarget &ST)
: AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
- RI(ST), ST(ST) {}
+ RI(ST), ST(ST) {
+ SchedModel.init(&ST);
+}
//===----------------------------------------------------------------------===//
// TargetInstrInfo callbacks
@@ -260,6 +262,9 @@ bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
const MachineOperand *&BaseOp,
int64_t &Offset,
const TargetRegisterInfo *TRI) const {
+ if (!LdSt.mayLoadOrStore())
+ return false;
+
unsigned Opc = LdSt.getOpcode();
if (isDS(LdSt)) {
@@ -270,12 +275,11 @@ bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr);
// TODO: ds_consume/ds_append use M0 for the base address. Is it safe to
// report that here?
- if (!BaseOp)
+ if (!BaseOp || !BaseOp->isReg())
return false;
Offset = OffsetImm->getImm();
- assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
- "operands of type register.");
+
return true;
}
@@ -307,9 +311,11 @@ bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
EltSize *= 64;
BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr);
+ if (!BaseOp->isReg())
+ return false;
+
Offset = EltSize * Offset0;
- assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
- "operands of type register.");
+
return true;
}
@@ -346,12 +352,12 @@ bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
getNamedOperand(LdSt, AMDGPU::OpName::offset);
BaseOp = AddrReg;
Offset = OffsetImm->getImm();
-
if (SOffset) // soffset can be an inline immediate.
Offset += SOffset->getImm();
- assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
- "operands of type register.");
+ if (!BaseOp->isReg())
+ return false;
+
return true;
}
@@ -364,8 +370,9 @@ bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
const MachineOperand *SBaseReg = getNamedOperand(LdSt, AMDGPU::OpName::sbase);
BaseOp = SBaseReg;
Offset = OffsetImm->getImm();
- assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
- "operands of type register.");
+ if (!BaseOp->isReg())
+ return false;
+
return true;
}
@@ -383,8 +390,8 @@ bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
}
Offset = getNamedOperand(LdSt, AMDGPU::OpName::offset)->getImm();
- assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
- "operands of type register.");
+ if (!BaseOp->isReg())
+ return false;
return true;
}
@@ -418,7 +425,7 @@ static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
const MachineFunction &MF = *MI1.getParent()->getParent();
const DataLayout &DL = MF.getFunction().getParent()->getDataLayout();
Base1 = GetUnderlyingObject(Base1, DL);
- Base2 = GetUnderlyingObject(Base1, DL);
+ Base2 = GetUnderlyingObject(Base2, DL);
if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))
return false;
@@ -508,8 +515,8 @@ bool SIInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const DebugLoc &DL, unsigned DestReg,
- unsigned SrcReg, bool KillSrc) {
+ const DebugLoc &DL, MCRegister DestReg,
+ MCRegister SrcReg, bool KillSrc) {
MachineFunction *MF = MBB.getParent();
DiagnosticInfoUnsupported IllegalCopy(MF->getFunction(),
"illegal SGPR to VGPR copy",
@@ -523,8 +530,8 @@ static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB,
void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const DebugLoc &DL, unsigned DestReg,
- unsigned SrcReg, bool KillSrc) const {
+ const DebugLoc &DL, MCRegister DestReg,
+ MCRegister SrcReg, bool KillSrc) const {
const TargetRegisterClass *RC = RI.getPhysRegClass(DestReg);
if (RC == &AMDGPU::VGPR_32RegClass) {
@@ -542,7 +549,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
RC == &AMDGPU::SReg_32RegClass) {
if (SrcReg == AMDGPU::SCC) {
BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B32), DestReg)
- .addImm(-1)
+ .addImm(1)
.addImm(0);
return;
}
@@ -840,7 +847,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
Register SReg = MRI.createVirtualRegister(BoolXExecRC);
BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
: AMDGPU::S_CSELECT_B64), SReg)
- .addImm(-1)
+ .addImm(1)
.addImm(0);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
.addImm(0)
@@ -855,7 +862,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
: AMDGPU::S_CSELECT_B64), SReg)
.addImm(0)
- .addImm(-1);
+ .addImm(1);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
.addImm(0)
.addReg(FalseReg)
@@ -900,7 +907,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
.addImm(0);
BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
: AMDGPU::S_CSELECT_B64), SReg)
- .addImm(-1)
+ .addImm(1)
.addImm(0);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
.addImm(0)
@@ -919,7 +926,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
: AMDGPU::S_CSELECT_B64), SReg)
.addImm(0)
- .addImm(-1);
+ .addImm(1);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
.addImm(0)
.addReg(FalseReg)
@@ -1062,6 +1069,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
if (RI.isSGPRClass(RC)) {
MFI->setHasSpilledSGPRs();
+ assert(SrcReg != AMDGPU::M0 && "m0 should not be spilled");
// We are only allowed to create one new instruction when spilling
// registers, so we need to use pseudo instruction for spilling SGPRs.
@@ -1190,6 +1198,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
if (RI.isSGPRClass(RC)) {
MFI->setHasSpilledSGPRs();
+ assert(DestReg != AMDGPU::M0 && "m0 should not be reloaded into");
// FIXME: Maybe this should not include a memoperand because it will be
// lowered to non-memory instructions.
@@ -2542,9 +2551,9 @@ bool SIInstrInfo::checkInstOffsetsDoNotOverlap(const MachineInstr &MIa,
bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
const MachineInstr &MIb) const {
- assert((MIa.mayLoad() || MIa.mayStore()) &&
+ assert(MIa.mayLoadOrStore() &&
"MIa must load from or modify a memory location");
- assert((MIb.mayLoad() || MIb.mayStore()) &&
+ assert(MIb.mayLoadOrStore() &&
"MIb must load from or modify a memory location");
if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects())
@@ -3921,20 +3930,18 @@ bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI,
? MRI.getRegClass(Reg)
: RI.getPhysRegClass(Reg);
- const SIRegisterInfo *TRI =
- static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
- RC = TRI->getSubRegClass(RC, MO.getSubReg());
-
- // In order to be legal, the common sub-class must be equal to the
- // class of the current operand. For example:
- //
- // v_mov_b32 s0 ; Operand defined as vsrc_b32
- // ; RI.getCommonSubClass(s0,vsrc_b32) = sgpr ; LEGAL
- //
- // s_sendmsg 0, s0 ; Operand defined as m0reg
- // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
+ const TargetRegisterClass *DRC = RI.getRegClass(OpInfo.RegClass);
+ if (MO.getSubReg()) {
+ const MachineFunction *MF = MO.getParent()->getParent()->getParent();
+ const TargetRegisterClass *SuperRC = RI.getLargestLegalSuperClass(RC, *MF);
+ if (!SuperRC)
+ return false;
- return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
+ DRC = RI.getMatchingSuperRegClass(SuperRC, DRC, MO.getSubReg());
+ if (!DRC)
+ return false;
+ }
+ return RC->hasSuperClassEq(DRC);
}
bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,
@@ -4451,12 +4458,12 @@ static void loadSRsrcFromVGPR(const SIInstrInfo &TII, MachineInstr &MI,
// Update dominators. We know that MBB immediately dominates LoopBB, that
// LoopBB immediately dominates RemainderBB, and that RemainderBB immediately
// dominates all of the successors transferred to it from MBB that MBB used
- // to dominate.
+ // to properly dominate.
if (MDT) {
MDT->addNewBlock(LoopBB, &MBB);
MDT->addNewBlock(RemainderBB, LoopBB);
for (auto &Succ : RemainderBB->successors()) {
- if (MDT->dominates(&MBB, Succ)) {
+ if (MDT->properlyDominates(&MBB, Succ)) {
MDT->changeImmediateDominator(Succ, RemainderBB);
}
}
@@ -6211,7 +6218,11 @@ MachineInstrBuilder SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
if (ST.hasAddNoCarry())
return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e32), DestReg);
- Register UnusedCarry = RS.scavengeRegister(RI.getBoolRC(), I, 0, false);
+ // If available, prefer to use vcc.
+ Register UnusedCarry = !RS.isRegUsed(AMDGPU::VCC)
+ ? Register(RI.getVCC())
+ : RS.scavengeRegister(RI.getBoolRC(), I, 0, false);
+
// TODO: Users need to deal with this.
if (!UnusedCarry.isValid())
return MachineInstrBuilder();
@@ -6329,6 +6340,26 @@ static SIEncodingFamily subtargetEncodingFamily(const GCNSubtarget &ST) {
llvm_unreachable("Unknown subtarget generation!");
}
+bool SIInstrInfo::isAsmOnlyOpcode(int MCOp) const {
+ switch(MCOp) {
+ // These opcodes use indirect register addressing so
+ // they need special handling by codegen (currently missing).
+ // Therefore it is too risky to allow these opcodes
+ // to be selected by dpp combiner or sdwa peepholer.
+ case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
+ case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
+ case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
+ case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
+ case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
+ case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
+ case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
+ case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
+ return true;
+ default:
+ return false;
+ }
+}
+
int SIInstrInfo::pseudoToMCOpcode(int Opcode) const {
SIEncodingFamily Gen = subtargetEncodingFamily(ST);
@@ -6367,6 +6398,9 @@ int SIInstrInfo::pseudoToMCOpcode(int Opcode) const {
if (MCOp == (uint16_t)-1)
return -1;
+ if (isAsmOnlyOpcode(MCOp))
+ return -1;
+
return MCOp;
}
@@ -6541,14 +6575,14 @@ MachineInstr *SIInstrInfo::createPHIDestinationCopy(
MachineInstr *SIInstrInfo::createPHISourceCopy(
MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt,
- const DebugLoc &DL, Register Src, Register SrcSubReg, Register Dst) const {
+ const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const {
if (InsPt != MBB.end() &&
(InsPt->getOpcode() == AMDGPU::SI_IF ||
InsPt->getOpcode() == AMDGPU::SI_ELSE ||
InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
InsPt->definesRegister(Src)) {
InsPt++;
- return BuildMI(MBB, InsPt, InsPt->getDebugLoc(),
+ return BuildMI(MBB, InsPt, DL,
get(ST.isWave32() ? AMDGPU::S_MOV_B32_term
: AMDGPU::S_MOV_B64_term),
Dst)
@@ -6560,3 +6594,53 @@ MachineInstr *SIInstrInfo::createPHISourceCopy(
}
bool llvm::SIInstrInfo::isWave32() const { return ST.isWave32(); }
+
+MachineInstr *SIInstrInfo::foldMemoryOperandImpl(
+ MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS,
+ VirtRegMap *VRM) const {
+ // This is a bit of a hack (copied from AArch64). Consider this instruction:
+ //
+ // %0:sreg_32 = COPY $m0
+ //
+ // We explicitly chose SReg_32 for the virtual register so such a copy might
+ // be eliminated by RegisterCoalescer. However, that may not be possible, and
+ // %0 may even spill. We can't spill $m0 normally (it would require copying to
+ // a numbered SGPR anyway), and since it is in the SReg_32 register class,
+ // TargetInstrInfo::foldMemoryOperand() is going to try.
+ //
+ // To prevent that, constrain the %0 register class here.
+ if (MI.isFullCopy()) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+
+ if (DstReg == AMDGPU::M0 && SrcReg.isVirtual()) {
+ MF.getRegInfo().constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass);
+ return nullptr;
+ }
+
+ if (SrcReg == AMDGPU::M0 && DstReg.isVirtual()) {
+ MF.getRegInfo().constrainRegClass(DstReg, &AMDGPU::SReg_32_XM0RegClass);
+ return nullptr;
+ }
+ }
+
+ return nullptr;
+}
+
+unsigned SIInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr &MI,
+ unsigned *PredCost) const {
+ if (MI.isBundle()) {
+ MachineBasicBlock::const_instr_iterator I(MI.getIterator());
+ MachineBasicBlock::const_instr_iterator E(MI.getParent()->instr_end());
+ unsigned Lat = 0, Count = 0;
+ for (++I; I != E && I->isBundledWithPred(); ++I) {
+ ++Count;
+ Lat = std::max(Lat, SchedModel.computeInstrLatency(&*I));
+ }
+ return Lat + Count - 1;
+ }
+
+ return SchedModel.computeInstrLatency(&MI);
+}