aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp21
1 files changed, 15 insertions, 6 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 0655b4342ba1..cd05797fdbdb 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -413,21 +413,21 @@ bool GCNSubtarget::zeroesHigh16BitsOfDest(unsigned Opcode) const {
case AMDGPU::V_MAX_I16_e32:
case AMDGPU::V_MIN_I16_e64:
case AMDGPU::V_MIN_I16_e32:
+ case AMDGPU::V_MAD_F16_e64:
+ case AMDGPU::V_MAD_U16_e64:
+ case AMDGPU::V_MAD_I16_e64:
+ case AMDGPU::V_FMA_F16_e64:
+ case AMDGPU::V_DIV_FIXUP_F16_e64:
// On gfx10, all 16-bit instructions preserve the high bits.
return getGeneration() <= AMDGPUSubtarget::GFX9;
- case AMDGPU::V_MAD_F16_e64:
case AMDGPU::V_MADAK_F16:
case AMDGPU::V_MADMK_F16:
case AMDGPU::V_MAC_F16_e64:
case AMDGPU::V_MAC_F16_e32:
case AMDGPU::V_FMAMK_F16:
case AMDGPU::V_FMAAK_F16:
- case AMDGPU::V_MAD_U16_e64:
- case AMDGPU::V_MAD_I16_e64:
- case AMDGPU::V_FMA_F16_e64:
case AMDGPU::V_FMAC_F16_e64:
case AMDGPU::V_FMAC_F16_e32:
- case AMDGPU::V_DIV_FIXUP_F16_e64:
// In gfx9, the preferred handling of the unused high 16-bits changed. Most
// instructions maintain the legacy behavior of 0ing. Some instructions
// changed to preserving the high bits.
@@ -648,9 +648,18 @@ bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const {
}
unsigned AMDGPUSubtarget::getImplicitArgNumBytes(const Function &F) const {
+ assert(AMDGPU::isKernel(F.getCallingConv()));
+
+ // We don't allocate the segment if we know the implicit arguments weren't
+ // used, even if the ABI implies we need them.
+ if (F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
+ return 0;
+
if (isMesaKernel(F))
return 16;
- return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0);
+
+ // Assume all implicit inputs are used by default
+ return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 56);
}
uint64_t AMDGPUSubtarget::getExplicitKernArgSize(const Function &F,