aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/SIRegisterInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/SIRegisterInfo.cpp')
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.cpp120
1 files changed, 106 insertions, 14 deletions
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 06cfc95be96a..6fb01a09fe13 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -117,11 +117,7 @@ unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
}
-unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
- const MachineFunction &MF) const {
-
- const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
- unsigned RegCount = ST.getMaxNumSGPRs(MF);
+static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) {
unsigned Reg;
// Try to place it in a hole after PrivateSegmentBufferReg.
@@ -134,9 +130,22 @@ unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
// wave offset before it.
Reg = RegCount - 5;
}
+
+ return Reg;
+}
+
+unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
+ const MachineFunction &MF) const {
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ unsigned Reg = findPrivateSegmentWaveByteOffsetRegIndex(ST.getMaxNumSGPRs(MF));
return AMDGPU::SGPR_32RegClass.getRegister(Reg);
}
+unsigned SIRegisterInfo::reservedStackPtrOffsetReg(
+ const MachineFunction &MF) const {
+ return AMDGPU::SGPR32;
+}
+
BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
@@ -198,15 +207,33 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
}
+ unsigned StackPtrReg = MFI->getStackPtrOffsetReg();
+ if (StackPtrReg != AMDGPU::NoRegister) {
+ reserveRegisterTuples(Reserved, StackPtrReg);
+ assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
+ }
+
+ unsigned FrameReg = MFI->getFrameOffsetReg();
+ if (FrameReg != AMDGPU::NoRegister) {
+ reserveRegisterTuples(Reserved, FrameReg);
+ assert(!isSubRegister(ScratchRSrcReg, FrameReg));
+ }
+
return Reserved;
}
bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
- return Fn.getFrameInfo().hasStackObjects();
+ const SIMachineFunctionInfo *Info = Fn.getInfo<SIMachineFunctionInfo>();
+ if (Info->isEntryFunction()) {
+ const MachineFrameInfo &MFI = Fn.getFrameInfo();
+ return MFI.hasStackObjects() || MFI.hasCalls();
+ }
+
+ // May need scavenger for dealing with callee saved registers.
+ return true;
}
-bool
-SIRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) const {
+bool SIRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) const {
return MF.getFrameInfo().hasStackObjects();
}
@@ -318,8 +345,11 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
assert(FIOp && FIOp->isFI() && "frame index must be address operand");
-
assert(TII->isMUBUF(MI));
+ assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() ==
+ MF->getInfo<SIMachineFunctionInfo>()->getFrameOffsetReg() &&
+ "should only be seeing frame offset relative FrameIndex");
+
MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
int64_t NewOffset = OffsetOp->getImm() + Offset;
@@ -981,12 +1011,72 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
}
default: {
- if (TII->isMUBUF(*MI)) {
+ const DebugLoc &DL = MI->getDebugLoc();
+ bool IsMUBUF = TII->isMUBUF(*MI);
+
+ if (!IsMUBUF &&
+ MFI->getFrameOffsetReg() != MFI->getScratchWaveOffsetReg()) {
+ // Convert to an absolute stack address by finding the offset from the
+ // scratch wave base and scaling by the wave size.
+ //
+ // In an entry function/kernel the stack address is already the absolute
+ // address relative to the the scratch wave offset.
+
+ unsigned DiffReg
+ = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+
+ bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
+ unsigned ResultReg = IsCopy ?
+ MI->getOperand(0).getReg() :
+ MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
+ .addReg(MFI->getFrameOffsetReg())
+ .addReg(MFI->getScratchWaveOffsetReg());
+
+ int64_t Offset = FrameInfo.getObjectOffset(Index);
+ if (Offset == 0) {
+ // XXX - This never happens because of emergency scavenging slot at 0?
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
+ .addImm(Log2_32(ST.getWavefrontSize()))
+ .addReg(DiffReg);
+ } else {
+ unsigned CarryOut
+ = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+ unsigned ScaledReg
+ = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+
+ // XXX - Should this use a vector shift?
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg)
+ .addReg(DiffReg, RegState::Kill)
+ .addImm(Log2_32(ST.getWavefrontSize()));
+
+ // TODO: Fold if use instruction is another add of a constant.
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e64), ResultReg)
+ .addReg(CarryOut, RegState::Define | RegState::Dead)
+ .addImm(Offset)
+ .addReg(ScaledReg, RegState::Kill);
+
+ MRI.setRegAllocationHint(CarryOut, 0, AMDGPU::VCC);
+ }
+
+ // Don't introduce an extra copy if we're just materializing in a mov.
+ if (IsCopy)
+ MI->eraseFromParent();
+ else
+ FIOp.ChangeToRegister(ResultReg, false, false, true);
+ return;
+ }
+
+ if (IsMUBUF) {
// Disable offen so we don't need a 0 vgpr base.
assert(static_cast<int>(FIOperandNum) ==
AMDGPU::getNamedOperandIdx(MI->getOpcode(),
AMDGPU::OpName::vaddr));
+ assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg()
+ == MFI->getFrameOffsetReg());
+
int64_t Offset = FrameInfo.getObjectOffset(Index);
int64_t OldImm
= TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
@@ -995,17 +1085,19 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
if (isUInt<12>(NewOffset) &&
buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
MI->eraseFromParent();
- break;
+ return;
}
}
+ // If the offset is simply too big, don't convert to a scratch wave offset
+ // relative index.
+
int64_t Offset = FrameInfo.getObjectOffset(Index);
FIOp.ChangeToImmediate(Offset);
if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- BuildMI(*MBB, MI, MI->getDebugLoc(),
- TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
- .addImm(Offset);
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
+ .addImm(Offset);
FIOp.ChangeToRegister(TmpReg, false, false, true);
}
}