diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 40 |
1 files changed, 37 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index d16da2a8b86b..438e8b200ecc 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1664,6 +1664,17 @@ SDValue SITargetLowering::getImplicitArgPtr(SelectionDAG &DAG, return lowerKernArgParameterPtr(DAG, SL, DAG.getEntryNode(), Offset); } +SDValue SITargetLowering::getLDSKernelId(SelectionDAG &DAG, + const SDLoc &SL) const { + + Function &F = DAG.getMachineFunction().getFunction(); + Optional<uint32_t> KnownSize = + AMDGPUMachineFunction::getLDSKernelIdMetadata(F); + if (KnownSize.has_value()) + return DAG.getConstant(KnownSize.value(), SL, MVT::i32); + return SDValue(); +} + SDValue SITargetLowering::convertArgType(SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL, SDValue Val, bool Signed, @@ -2049,6 +2060,9 @@ void SITargetLowering::allocateSpecialInputSGPRs( if (Info.hasWorkGroupIDZ()) allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDZ); + + if (Info.hasLDSKernelId()) + allocateSGPR32Input(CCInfo, ArgInfo.LDSKernelId); } // Allocate special inputs passed in user SGPRs. @@ -2102,6 +2116,12 @@ void SITargetLowering::allocateHSAUserSGPRs(CCState &CCInfo, CCInfo.AllocateReg(FlatScratchInitReg); } + if (Info.hasLDSKernelId()) { + Register Reg = Info.addLDSKernelId(); + MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass); + CCInfo.AllocateReg(Reg); + } + // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read // these from the dispatch pointer. } @@ -2347,8 +2367,8 @@ SDValue SITargetLowering::LowerFormalArguments( (!Info->hasFlatScratchInit() || Subtarget->enableFlatScratch()) && !Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() && !Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() && - !Info->hasWorkItemIDX() && !Info->hasWorkItemIDY() && - !Info->hasWorkItemIDZ()); + !Info->hasLDSKernelId() && !Info->hasWorkItemIDX() && + !Info->hasWorkItemIDY() && !Info->hasWorkItemIDZ()); } if (CallConv == CallingConv::AMDGPU_PS) { @@ -2762,7 +2782,8 @@ void SITargetLowering::passSpecialInputs( {AMDGPUFunctionArgInfo::DISPATCH_ID, "amdgpu-no-dispatch-id"}, {AMDGPUFunctionArgInfo::WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x"}, {AMDGPUFunctionArgInfo::WORKGROUP_ID_Y,"amdgpu-no-workgroup-id-y"}, - {AMDGPUFunctionArgInfo::WORKGROUP_ID_Z,"amdgpu-no-workgroup-id-z"} + {AMDGPUFunctionArgInfo::WORKGROUP_ID_Z,"amdgpu-no-workgroup-id-z"}, + {AMDGPUFunctionArgInfo::LDS_KERNEL_ID,"amdgpu-no-lds-kernel-id"}, }; for (auto Attr : ImplicitAttrs) { @@ -2798,6 +2819,13 @@ void SITargetLowering::passSpecialInputs( // The implicit arg ptr is special because it doesn't have a corresponding // input for kernels, and is computed from the kernarg segment pointer. InputReg = getImplicitArgPtr(DAG, DL); + } else if (InputID == AMDGPUFunctionArgInfo::LDS_KERNEL_ID) { + Optional<uint32_t> Id = AMDGPUMachineFunction::getLDSKernelIdMetadata(F); + if (Id.has_value()) { + InputReg = DAG.getConstant(Id.value(), DL, ArgVT); + } else { + InputReg = DAG.getUNDEF(ArgVT); + } } else { // We may have proven the input wasn't needed, although the ABI is // requiring it. We just need to allocate the register appropriately. @@ -6887,6 +6915,12 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::amdgcn_workgroup_id_z: return getPreloadedValue(DAG, *MFI, VT, AMDGPUFunctionArgInfo::WORKGROUP_ID_Z); + case Intrinsic::amdgcn_lds_kernel_id: { + if (MFI->isEntryFunction()) + return getLDSKernelId(DAG, DL); + return getPreloadedValue(DAG, *MFI, VT, + AMDGPUFunctionArgInfo::LDS_KERNEL_ID); + } case Intrinsic::amdgcn_workitem_id_x: return lowerWorkitemID(DAG, Op, 0, MFI->getArgInfo().WorkItemIDX); case Intrinsic::amdgcn_workitem_id_y: |