aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td')
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td142
1 files changed, 90 insertions, 52 deletions
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 01380afae006..ac2291f9d43b 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -18,7 +18,7 @@ class AMDGPUReadPreloadRegisterIntrinsicNamed<string name>
// Used to tag image and resource intrinsics with information used to generate
// mem operands.
-class AMDGPURsrcIntrinsic<int rsrcarg, bit isimage = 0> {
+class AMDGPURsrcIntrinsic<int rsrcarg, bit isimage = false> {
int RsrcArg = rsrcarg;
bit IsImage = isimage;
}
@@ -182,6 +182,8 @@ def int_amdgcn_init_exec : Intrinsic<[],
// Set EXEC according to a thread count packed in an SGPR input:
// thread_count = (input >> bitoffset) & 0x7f;
// This is always moved to the beginning of the basic block.
+// Note: only inreg arguments to the parent function are valid as
+// inputs to this intrinsic, computed values cannot be used.
def int_amdgcn_init_exec_from_input : Intrinsic<[],
[llvm_i32_ty, // 32-bit SGPR input
llvm_i32_ty], // bit offset of the thread count
@@ -255,7 +257,17 @@ def int_amdgcn_log_clamp : Intrinsic<
def int_amdgcn_fmul_legacy : GCCBuiltin<"__builtin_amdgcn_fmul_legacy">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, IntrSpeculatable, IntrWillReturn]
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]
+>;
+
+// Fused single-precision multiply-add with legacy behaviour for the multiply,
+// which is that +/- 0.0 * anything (even NaN or infinity) is +0.0. This is
+// intended for use on subtargets that have the v_fma_legacy_f32 and/or
+// v_fmac_legacy_f32 instructions. (Note that v_fma_legacy_f16 is unrelated and
+// has a completely different kind of legacy behaviour.)
+def int_amdgcn_fma_legacy :
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]
>;
def int_amdgcn_rcp : Intrinsic<
@@ -397,11 +409,10 @@ class AMDGPUAtomicIncIntrin : Intrinsic<[llvm_anyint_ty],
def int_amdgcn_atomic_inc : AMDGPUAtomicIncIntrin;
def int_amdgcn_atomic_dec : AMDGPUAtomicIncIntrin;
-class AMDGPULDSF32Intrin<string clang_builtin> :
- GCCBuiltin<clang_builtin>,
- Intrinsic<[llvm_float_ty],
- [LLVMQualPointerType<llvm_float_ty, 3>,
- llvm_float_ty,
+class AMDGPULDSIntrin :
+ Intrinsic<[llvm_any_ty],
+ [LLVMQualPointerType<LLVMMatchType<0>, 3>,
+ LLVMMatchType<0>,
llvm_i32_ty, // ordering
llvm_i32_ty, // scope
llvm_i1_ty], // isVolatile
@@ -446,9 +457,9 @@ def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic;
def int_amdgcn_ds_append : AMDGPUDSAppendConsumedIntrinsic;
def int_amdgcn_ds_consume : AMDGPUDSAppendConsumedIntrinsic;
-def int_amdgcn_ds_fadd : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_faddf">;
-def int_amdgcn_ds_fmin : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fminf">;
-def int_amdgcn_ds_fmax : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fmaxf">;
+def int_amdgcn_ds_fadd : AMDGPULDSIntrin;
+def int_amdgcn_ds_fmin : AMDGPULDSIntrin;
+def int_amdgcn_ds_fmax : AMDGPULDSIntrin;
} // TargetPrefix = "amdgcn"
@@ -545,7 +556,7 @@ class AMDGPUSampleVariant<string ucmod, string lcmod, list<AMDGPUArg> extra_addr
// {offset} {bias} {z-compare}
list<AMDGPUArg> ExtraAddrArgs = extra_addr;
- bit Gradients = 0;
+ bit Gradients = false;
// Name of the {lod} or {clamp} argument that is appended to the coordinates,
// if any.
@@ -585,7 +596,7 @@ defset list<AMDGPUSampleVariant> AMDGPUSampleVariants = {
defm AMDGPUSample : AMDGPUSampleHelper_Compare<"_LZ", "_lz", []>;
}
- let Gradients = 1 in {
+ let Gradients = true in {
defm AMDGPUSample : AMDGPUSampleHelper_Clamp<"_D", "_d", []>;
defm AMDGPUSample : AMDGPUSampleHelper_Clamp<"_CD", "_cd", []>;
}
@@ -600,12 +611,12 @@ class AMDGPUDimProfile<string opmod,
string OpMod = opmod; // the corresponding instruction is named IMAGE_OpMod
// These are intended to be overwritten by subclasses
- bit IsSample = 0;
- bit IsAtomic = 0;
+ bit IsSample = false;
+ bit IsAtomic = false;
list<LLVMType> RetTypes = [];
list<AMDGPUArg> DataArgs = [];
list<AMDGPUArg> ExtraAddrArgs = [];
- bit Gradients = 0;
+ bit Gradients = false;
string LodClampMip = "";
int NumRetAndDataAnyTypes =
@@ -616,7 +627,7 @@ class AMDGPUDimProfile<string opmod,
arglistconcat<[ExtraAddrArgs,
!if(Gradients, dim.GradientArgs, []),
!listconcat(!if(IsSample, dim.CoordSliceArgs, dim.CoordSliceIntArgs),
- !if(!eq(LodClampMip, ""),
+ !if(!empty(LodClampMip),
[]<AMDGPUArg>,
[AMDGPUArg<LLVMMatchType<0>, LodClampMip>]))],
NumRetAndDataAnyTypes>.ret;
@@ -646,7 +657,7 @@ class AMDGPUDimProfileCopy<AMDGPUDimProfile base> : AMDGPUDimProfile<base.OpMod,
class AMDGPUDimSampleProfile<string opmod,
AMDGPUDimProps dim,
AMDGPUSampleVariant sample> : AMDGPUDimProfile<opmod, dim> {
- let IsSample = 1;
+ let IsSample = true;
let RetTypes = [llvm_any_ty];
let ExtraAddrArgs = sample.ExtraAddrArgs;
let Gradients = sample.Gradients;
@@ -657,7 +668,7 @@ class AMDGPUDimNoSampleProfile<string opmod,
AMDGPUDimProps dim,
list<LLVMType> retty,
list<AMDGPUArg> dataargs,
- bit Mip = 0> : AMDGPUDimProfile<opmod, dim> {
+ bit Mip = false> : AMDGPUDimProfile<opmod, dim> {
let RetTypes = retty;
let DataArgs = dataargs;
let LodClampMip = !if(Mip, "mip", "");
@@ -668,7 +679,7 @@ class AMDGPUDimAtomicProfile<string opmod,
list<AMDGPUArg> dataargs> : AMDGPUDimProfile<opmod, dim> {
let RetTypes = [llvm_anyint_ty];
let DataArgs = dataargs;
- let IsAtomic = 1;
+ let IsAtomic = true;
}
class AMDGPUDimGetResInfoProfile<AMDGPUDimProps dim> : AMDGPUDimProfile<"GET_RESINFO", dim> {
@@ -681,13 +692,23 @@ class AMDGPUDimGetResInfoProfile<AMDGPUDimProps dim> : AMDGPUDimProfile<"GET_RES
// Helper class for figuring out image intrinsic argument indexes.
class AMDGPUImageDimIntrinsicEval<AMDGPUDimProfile P_> {
int NumDataArgs = !size(P_.DataArgs);
- int NumDmaskArgs = !if(P_.IsAtomic, 0, 1);
+ int NumDmaskArgs = !not(P_.IsAtomic);
+ int NumExtraAddrArgs = !size(P_.ExtraAddrArgs);
int NumVAddrArgs = !size(P_.AddrArgs);
+ int NumGradientArgs = !if(P_.Gradients, !size(P_.Dim.GradientArgs), 0);
+ int NumCoordArgs = !if(P_.IsSample, !size(P_.Dim.CoordSliceArgs), !size(P_.Dim.CoordSliceIntArgs));
int NumRSrcArgs = 1;
int NumSampArgs = !if(P_.IsSample, 2, 0);
int DmaskArgIndex = NumDataArgs;
- int UnormArgIndex = !add(NumDataArgs, NumDmaskArgs, NumVAddrArgs, NumRSrcArgs, 1);
- int TexFailCtrlArgIndex = !add(NumDataArgs, NumDmaskArgs, NumVAddrArgs, NumRSrcArgs, NumSampArgs);
+ int VAddrArgIndex = !add(DmaskArgIndex, NumDmaskArgs);
+ int GradientArgIndex = !add(VAddrArgIndex, NumExtraAddrArgs);
+ int CoordArgIndex = !add(GradientArgIndex, NumGradientArgs);
+ int LodArgIndex = !add(VAddrArgIndex, NumVAddrArgs, -1);
+ int MipArgIndex = LodArgIndex;
+ int RsrcArgIndex = !add(VAddrArgIndex, NumVAddrArgs);
+ int SampArgIndex = !add(RsrcArgIndex, NumRSrcArgs);
+ int UnormArgIndex = !add(SampArgIndex, 1);
+ int TexFailCtrlArgIndex = !add(SampArgIndex, NumSampArgs);
int CachePolicyArgIndex = !add(TexFailCtrlArgIndex, 1);
}
@@ -738,7 +759,7 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
list<AMDGPUArg> dataargs,
list<IntrinsicProperty> props,
list<SDNodeProperty> sdnodeprops,
- bit Mip = 0> {
+ bit Mip = false> {
foreach dim = AMDGPUDims.NoMsaa in {
def !strconcat(NAME, "_", dim.Name)
: AMDGPUImageDimIntrinsic<
@@ -752,7 +773,7 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
list<AMDGPUArg> dataargs,
list<IntrinsicProperty> props,
list<SDNodeProperty> sdnodeprops,
- bit Mip = 0> {
+ bit Mip = false> {
foreach dim = AMDGPUDims.All in {
def !strconcat(NAME, "_", dim.Name)
: AMDGPUImageDimIntrinsic<
@@ -787,7 +808,7 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
//////////////////////////////////////////////////////////////////////////
multiclass AMDGPUImageDimSampleDims<string opmod,
AMDGPUSampleVariant sample,
- bit NoMem = 0> {
+ bit NoMem = false> {
foreach dim = AMDGPUDims.NoMsaa in {
def !strconcat(NAME, "_", dim.Name) : AMDGPUImageDimIntrinsic<
AMDGPUDimSampleProfile<opmod, dim, sample>,
@@ -973,9 +994,9 @@ class AMDGPUStructBufferStore<LLVMType data_ty = llvm_any_ty> : Intrinsic <
def int_amdgcn_struct_buffer_store_format : AMDGPUStructBufferStore;
def int_amdgcn_struct_buffer_store : AMDGPUStructBufferStore;
-class AMDGPURawBufferAtomic<LLVMType data_ty = llvm_any_ty> : Intrinsic <
- [data_ty],
- [LLVMMatchType<0>, // vdata(VGPR)
+class AMDGPURawBufferAtomic<LLVMType data_ty = llvm_any_ty, bit NoRtn = false> : Intrinsic <
+ !if(NoRtn, [], [data_ty]),
+ [!if(NoRtn, data_ty, LLVMMatchType<0>), // vdata(VGPR)
llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
@@ -1005,9 +1026,12 @@ def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic<
[ImmArg<ArgIndex<5>>, IntrWillReturn], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<2, 0>;
-class AMDGPUStructBufferAtomic<LLVMType data_ty = llvm_any_ty> : Intrinsic <
- [data_ty],
- [LLVMMatchType<0>, // vdata(VGPR)
+// gfx908 intrinsic
+def int_amdgcn_raw_buffer_atomic_fadd : AMDGPURawBufferAtomic<llvm_anyfloat_ty>;
+
+class AMDGPUStructBufferAtomic<LLVMType data_ty = llvm_any_ty, bit NoRtn = false> : Intrinsic <
+ !if(NoRtn, [], [data_ty]),
+ [!if(NoRtn, data_ty, LLVMMatchType<0>), // vdata(VGPR)
llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // vindex(VGPR)
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
@@ -1039,6 +1063,10 @@ def int_amdgcn_struct_buffer_atomic_cmpswap : Intrinsic<
[ImmArg<ArgIndex<6>>, IntrWillReturn], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<2, 0>;
+// gfx908 intrinsic
+def int_amdgcn_struct_buffer_atomic_fadd : AMDGPUStructBufferAtomic<llvm_anyfloat_ty>;
+
+
// Obsolescent tbuffer intrinsics.
def int_amdgcn_tbuffer_load : Intrinsic <
[llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
@@ -1168,6 +1196,19 @@ def int_amdgcn_buffer_atomic_cmpswap : Intrinsic<
AMDGPURsrcIntrinsic<2, 0>;
def int_amdgcn_buffer_atomic_csub : AMDGPUBufferAtomic;
+
+class AMDGPUBufferAtomicFP : Intrinsic <
+ [llvm_anyfloat_ty],
+ [LLVMMatchType<0>, // vdata(VGPR)
+ llvm_v4i32_ty, // rsrc(SGPR)
+ llvm_i32_ty, // vindex(VGPR)
+ llvm_i32_ty, // offset(SGPR/VGPR/imm)
+ llvm_i1_ty], // slc(imm)
+ [ImmArg<ArgIndex<4>>, IntrWillReturn], "", [SDNPMemOperand]>,
+ AMDGPURsrcIntrinsic<1, 0>;
+
+// Legacy form of the intrinsic. raw and struct forms should be preferred.
+def int_amdgcn_buffer_atomic_fadd : AMDGPUBufferAtomicFP;
} // defset AMDGPUBufferIntrinsics
// Uses that do not set the done bit should set IntrWriteMem on the
@@ -1248,7 +1289,7 @@ def int_amdgcn_s_getreg :
def int_amdgcn_s_setreg :
GCCBuiltin<"__builtin_amdgcn_s_setreg">,
Intrinsic<[], [llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>]
+ [IntrNoMem, IntrHasSideEffects, IntrWillReturn, ImmArg<ArgIndex<0>>]
>;
// int_amdgcn_s_getpc is provided to allow a specific style of position
@@ -1291,6 +1332,7 @@ def int_amdgcn_interp_p2 :
// See int_amdgcn_v_interp_p1 for why this is IntrNoMem.
// __builtin_amdgcn_interp_p1_f16 <i>, <attr_chan>, <attr>, <high>, <m0>
+// high selects whether high or low 16-bits are loaded from LDS
def int_amdgcn_interp_p1_f16 :
GCCBuiltin<"__builtin_amdgcn_interp_p1_f16">,
Intrinsic<[llvm_float_ty],
@@ -1299,6 +1341,7 @@ def int_amdgcn_interp_p1_f16 :
ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>;
// __builtin_amdgcn_interp_p2_f16 <p1>, <j>, <attr_chan>, <attr>, <high>, <m0>
+// high selects whether high or low 16-bits are loaded from LDS
def int_amdgcn_interp_p2_f16 :
GCCBuiltin<"__builtin_amdgcn_interp_p2_f16">,
Intrinsic<[llvm_half_ty],
@@ -1538,6 +1581,10 @@ def int_amdgcn_wqm_vote : Intrinsic<[llvm_i1_ty],
// FIXME: Should this be IntrNoMem, IntrHasSideEffects, or IntrWillReturn?
def int_amdgcn_kill : Intrinsic<[], [llvm_i1_ty], []>;
+def int_amdgcn_endpgm : GCCBuiltin<"__builtin_amdgcn_endpgm">,
+ Intrinsic<[], [], [IntrNoReturn, IntrCold, IntrNoMem, IntrHasSideEffects]
+>;
+
// Copies the active channels of the source value to the destination value,
// with the guarantee that the source value is computed as if the entire
// program were executed in Whole Wavefront Mode, i.e. with all channels
@@ -1667,10 +1714,19 @@ class AMDGPUGlobalAtomicRtn<LLVMType vt> : Intrinsic <
[vt],
[llvm_anyptr_ty, // vaddr
vt], // vdata(VGPR)
- [IntrArgMemOnly, NoCapture<ArgIndex<0>>], "", [SDNPMemOperand]>;
+ [IntrArgMemOnly, IntrWillReturn, NoCapture<ArgIndex<0>>], "",
+ [SDNPMemOperand]>;
def int_amdgcn_global_atomic_csub : AMDGPUGlobalAtomicRtn<llvm_i32_ty>;
+// uint4 llvm.amdgcn.image.bvh.intersect.ray <node_ptr>, <ray_extent>, <ray_origin>,
+// <ray_dir>, <ray_inv_dir>, <texture_descr>
+def int_amdgcn_image_bvh_intersect_ray :
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_anyint_ty, llvm_float_ty, llvm_v4f32_ty, llvm_anyvector_ty,
+ LLVMMatchType<1>, llvm_v4i32_ty],
+ [IntrReadMem, IntrWillReturn]>;
+
//===----------------------------------------------------------------------===//
// Deep learning intrinsics.
//===----------------------------------------------------------------------===//
@@ -1786,25 +1842,7 @@ def int_amdgcn_udot8 :
// gfx908 intrinsics
// ===----------------------------------------------------------------------===//
-class AMDGPUBufferAtomicNoRtn : Intrinsic <
- [],
- [llvm_anyfloat_ty, // vdata(VGPR)
- llvm_v4i32_ty, // rsrc(SGPR)
- llvm_i32_ty, // vindex(VGPR)
- llvm_i32_ty, // offset(SGPR/VGPR/imm)
- llvm_i1_ty], // slc(imm)
- [ImmArg<ArgIndex<4>>, IntrWillReturn], "", [SDNPMemOperand]>,
- AMDGPURsrcIntrinsic<1, 0>;
-
-class AMDGPUGlobalAtomicNoRtn : Intrinsic <
- [],
- [llvm_anyptr_ty, // vaddr
- llvm_anyfloat_ty], // vdata(VGPR)
- [IntrArgMemOnly, IntrWillReturn, NoCapture<ArgIndex<0>>], "",
- [SDNPMemOperand]>;
-
-def int_amdgcn_buffer_atomic_fadd : AMDGPUBufferAtomicNoRtn;
-def int_amdgcn_global_atomic_fadd : AMDGPUGlobalAtomicNoRtn;
+def int_amdgcn_global_atomic_fadd : AMDGPUGlobalAtomicRtn<llvm_anyfloat_ty>;
// llvm.amdgcn.mfma.f32.* vdst, srcA, srcB, srcC, cbsz, abid, blgp
def int_amdgcn_mfma_f32_32x32x1f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x1f32">,