diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 21 |
1 files changed, 15 insertions, 6 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 0655b4342ba1..cd05797fdbdb 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -413,21 +413,21 @@ bool GCNSubtarget::zeroesHigh16BitsOfDest(unsigned Opcode) const { case AMDGPU::V_MAX_I16_e32: case AMDGPU::V_MIN_I16_e64: case AMDGPU::V_MIN_I16_e32: + case AMDGPU::V_MAD_F16_e64: + case AMDGPU::V_MAD_U16_e64: + case AMDGPU::V_MAD_I16_e64: + case AMDGPU::V_FMA_F16_e64: + case AMDGPU::V_DIV_FIXUP_F16_e64: // On gfx10, all 16-bit instructions preserve the high bits. return getGeneration() <= AMDGPUSubtarget::GFX9; - case AMDGPU::V_MAD_F16_e64: case AMDGPU::V_MADAK_F16: case AMDGPU::V_MADMK_F16: case AMDGPU::V_MAC_F16_e64: case AMDGPU::V_MAC_F16_e32: case AMDGPU::V_FMAMK_F16: case AMDGPU::V_FMAAK_F16: - case AMDGPU::V_MAD_U16_e64: - case AMDGPU::V_MAD_I16_e64: - case AMDGPU::V_FMA_F16_e64: case AMDGPU::V_FMAC_F16_e64: case AMDGPU::V_FMAC_F16_e32: - case AMDGPU::V_DIV_FIXUP_F16_e64: // In gfx9, the preferred handling of the unused high 16-bits changed. Most // instructions maintain the legacy behavior of 0ing. Some instructions // changed to preserving the high bits. @@ -648,9 +648,18 @@ bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const { } unsigned AMDGPUSubtarget::getImplicitArgNumBytes(const Function &F) const { + assert(AMDGPU::isKernel(F.getCallingConv())); + + // We don't allocate the segment if we know the implicit arguments weren't + // used, even if the ABI implies we need them. + if (F.hasFnAttribute("amdgpu-no-implicitarg-ptr")) + return 0; + if (isMesaKernel(F)) return 16; - return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0); + + // Assume all implicit inputs are used by default + return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 56); } uint64_t AMDGPUSubtarget::getExplicitKernArgSize(const Function &F, |