aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp62
1 files changed, 47 insertions, 15 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index 2f1e7823f65c..cd084fd5440a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -192,8 +192,20 @@ struct AMDGPUOutgoingArgHandler : public AMDGPUOutgoingValueHandler {
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- if (!SPReg)
- SPReg = MIRBuilder.buildCopy(PtrTy, MFI->getStackPtrOffsetReg()).getReg(0);
+ if (!SPReg) {
+ const GCNSubtarget &ST = MIRBuilder.getMF().getSubtarget<GCNSubtarget>();
+ if (ST.enableFlatScratch()) {
+ // The stack is accessed unswizzled, so we can use a regular copy.
+ SPReg = MIRBuilder.buildCopy(PtrTy,
+ MFI->getStackPtrOffsetReg()).getReg(0);
+ } else {
+ // The address we produce here, without knowing the use context, is going
+ // to be interpreted as a vector address, so we need to convert to a
+ // swizzled address.
+ SPReg = MIRBuilder.buildInstr(AMDGPU::G_AMDGPU_WAVE_ADDRESS, {PtrTy},
+ {MFI->getStackPtrOffsetReg()}).getReg(0);
+ }
+ }
auto OffsetReg = MIRBuilder.buildConstant(S32, Offset);
@@ -615,6 +627,13 @@ bool AMDGPUCallLowering::lowerFormalArguments(
CCInfo.AllocateReg(ImplicitBufferPtrReg);
}
+ // FIXME: This probably isn't defined for mesa
+ if (Info->hasFlatScratchInit() && !Subtarget.isAmdPalOS()) {
+ Register FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
+ MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
+ CCInfo.AllocateReg(FlatScratchInitReg);
+ }
+
SmallVector<ArgInfo, 32> SplitArgs;
unsigned Idx = 0;
unsigned PSInputNum = 0;
@@ -879,13 +898,17 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
Register InputReg;
if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX &&
NeedWorkItemIDX) {
- InputReg = MRI.createGenericVirtualRegister(S32);
- LI->loadInputValue(InputReg, MIRBuilder, IncomingArgX,
- std::get<1>(WorkitemIDX), std::get<2>(WorkitemIDX));
+ if (ST.getMaxWorkitemID(MF.getFunction(), 0) != 0) {
+ InputReg = MRI.createGenericVirtualRegister(S32);
+ LI->loadInputValue(InputReg, MIRBuilder, IncomingArgX,
+ std::get<1>(WorkitemIDX), std::get<2>(WorkitemIDX));
+ } else {
+ InputReg = MIRBuilder.buildConstant(S32, 0).getReg(0);
+ }
}
if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY &&
- NeedWorkItemIDY) {
+ NeedWorkItemIDY && ST.getMaxWorkitemID(MF.getFunction(), 1) != 0) {
Register Y = MRI.createGenericVirtualRegister(S32);
LI->loadInputValue(Y, MIRBuilder, IncomingArgY, std::get<1>(WorkitemIDY),
std::get<2>(WorkitemIDY));
@@ -895,7 +918,7 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
}
if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ &&
- NeedWorkItemIDZ) {
+ NeedWorkItemIDZ && ST.getMaxWorkitemID(MF.getFunction(), 2) != 0) {
Register Z = MRI.createGenericVirtualRegister(S32);
LI->loadInputValue(Z, MIRBuilder, IncomingArgZ, std::get<1>(WorkitemIDZ),
std::get<2>(WorkitemIDZ));
@@ -904,16 +927,24 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Z).getReg(0) : Z;
}
- if (!InputReg && (NeedWorkItemIDX || NeedWorkItemIDY || NeedWorkItemIDZ)) {
+ if (!InputReg &&
+ (NeedWorkItemIDX || NeedWorkItemIDY || NeedWorkItemIDZ)) {
InputReg = MRI.createGenericVirtualRegister(S32);
-
- // Workitem ids are already packed, any of present incoming arguments will
- // carry all required fields.
- ArgDescriptor IncomingArg = ArgDescriptor::createArg(
- IncomingArgX ? *IncomingArgX :
+ if (!IncomingArgX && !IncomingArgY && !IncomingArgZ) {
+ // We're in a situation where the outgoing function requires the workitem
+ // ID, but the calling function does not have it (e.g a graphics function
+ // calling a C calling convention function). This is illegal, but we need
+ // to produce something.
+ MIRBuilder.buildUndef(InputReg);
+ } else {
+ // Workitem ids are already packed, any of present incoming arguments will
+ // carry all required fields.
+ ArgDescriptor IncomingArg = ArgDescriptor::createArg(
+ IncomingArgX ? *IncomingArgX :
IncomingArgY ? *IncomingArgY : *IncomingArgZ, ~0u);
- LI->loadInputValue(InputReg, MIRBuilder, &IncomingArg,
- &AMDGPU::VGPR_32RegClass, S32);
+ LI->loadInputValue(InputReg, MIRBuilder, &IncomingArg,
+ &AMDGPU::VGPR_32RegClass, S32);
+ }
}
if (OutgoingArg->isRegister()) {
@@ -1314,6 +1345,7 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
return false;
}
+ Info.IsTailCall = CanTailCallOpt;
if (CanTailCallOpt)
return lowerTailCall(MIRBuilder, Info, OutArgs);