diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp | 62 |
1 files changed, 47 insertions, 15 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index 2f1e7823f65c..cd084fd5440a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -192,8 +192,20 @@ struct AMDGPUOutgoingArgHandler : public AMDGPUOutgoingValueHandler { const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); - if (!SPReg) - SPReg = MIRBuilder.buildCopy(PtrTy, MFI->getStackPtrOffsetReg()).getReg(0); + if (!SPReg) { + const GCNSubtarget &ST = MIRBuilder.getMF().getSubtarget<GCNSubtarget>(); + if (ST.enableFlatScratch()) { + // The stack is accessed unswizzled, so we can use a regular copy. + SPReg = MIRBuilder.buildCopy(PtrTy, + MFI->getStackPtrOffsetReg()).getReg(0); + } else { + // The address we produce here, without knowing the use context, is going + // to be interpreted as a vector address, so we need to convert to a + // swizzled address. + SPReg = MIRBuilder.buildInstr(AMDGPU::G_AMDGPU_WAVE_ADDRESS, {PtrTy}, + {MFI->getStackPtrOffsetReg()}).getReg(0); + } + } auto OffsetReg = MIRBuilder.buildConstant(S32, Offset); @@ -615,6 +627,13 @@ bool AMDGPUCallLowering::lowerFormalArguments( CCInfo.AllocateReg(ImplicitBufferPtrReg); } + // FIXME: This probably isn't defined for mesa + if (Info->hasFlatScratchInit() && !Subtarget.isAmdPalOS()) { + Register FlatScratchInitReg = Info->addFlatScratchInit(*TRI); + MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass); + CCInfo.AllocateReg(FlatScratchInitReg); + } + SmallVector<ArgInfo, 32> SplitArgs; unsigned Idx = 0; unsigned PSInputNum = 0; @@ -879,13 +898,17 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder, Register InputReg; if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX && NeedWorkItemIDX) { - InputReg = MRI.createGenericVirtualRegister(S32); - LI->loadInputValue(InputReg, MIRBuilder, IncomingArgX, - std::get<1>(WorkitemIDX), std::get<2>(WorkitemIDX)); + if (ST.getMaxWorkitemID(MF.getFunction(), 0) != 0) { + InputReg = MRI.createGenericVirtualRegister(S32); + LI->loadInputValue(InputReg, MIRBuilder, IncomingArgX, + std::get<1>(WorkitemIDX), std::get<2>(WorkitemIDX)); + } else { + InputReg = MIRBuilder.buildConstant(S32, 0).getReg(0); + } } if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY && - NeedWorkItemIDY) { + NeedWorkItemIDY && ST.getMaxWorkitemID(MF.getFunction(), 1) != 0) { Register Y = MRI.createGenericVirtualRegister(S32); LI->loadInputValue(Y, MIRBuilder, IncomingArgY, std::get<1>(WorkitemIDY), std::get<2>(WorkitemIDY)); @@ -895,7 +918,7 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder, } if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ && - NeedWorkItemIDZ) { + NeedWorkItemIDZ && ST.getMaxWorkitemID(MF.getFunction(), 2) != 0) { Register Z = MRI.createGenericVirtualRegister(S32); LI->loadInputValue(Z, MIRBuilder, IncomingArgZ, std::get<1>(WorkitemIDZ), std::get<2>(WorkitemIDZ)); @@ -904,16 +927,24 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder, InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Z).getReg(0) : Z; } - if (!InputReg && (NeedWorkItemIDX || NeedWorkItemIDY || NeedWorkItemIDZ)) { + if (!InputReg && + (NeedWorkItemIDX || NeedWorkItemIDY || NeedWorkItemIDZ)) { InputReg = MRI.createGenericVirtualRegister(S32); - - // Workitem ids are already packed, any of present incoming arguments will - // carry all required fields. - ArgDescriptor IncomingArg = ArgDescriptor::createArg( - IncomingArgX ? *IncomingArgX : + if (!IncomingArgX && !IncomingArgY && !IncomingArgZ) { + // We're in a situation where the outgoing function requires the workitem + // ID, but the calling function does not have it (e.g a graphics function + // calling a C calling convention function). This is illegal, but we need + // to produce something. + MIRBuilder.buildUndef(InputReg); + } else { + // Workitem ids are already packed, any of present incoming arguments will + // carry all required fields. + ArgDescriptor IncomingArg = ArgDescriptor::createArg( + IncomingArgX ? *IncomingArgX : IncomingArgY ? *IncomingArgY : *IncomingArgZ, ~0u); - LI->loadInputValue(InputReg, MIRBuilder, &IncomingArg, - &AMDGPU::VGPR_32RegClass, S32); + LI->loadInputValue(InputReg, MIRBuilder, &IncomingArg, + &AMDGPU::VGPR_32RegClass, S32); + } } if (OutgoingArg->isRegister()) { @@ -1314,6 +1345,7 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, return false; } + Info.IsTailCall = CanTailCallOpt; if (CanTailCallOpt) return lowerTailCall(MIRBuilder, Info, OutArgs); |