aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/clang/include/clang/Basic/BuiltinsNVPTX.def
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/clang/include/clang/Basic/BuiltinsNVPTX.def')
-rw-r--r--contrib/llvm-project/clang/include/clang/Basic/BuiltinsNVPTX.def296
1 files changed, 265 insertions, 31 deletions
diff --git a/contrib/llvm-project/clang/include/clang/Basic/BuiltinsNVPTX.def b/contrib/llvm-project/clang/include/clang/Basic/BuiltinsNVPTX.def
index 6b94dd857300..0f2e8260143b 100644
--- a/contrib/llvm-project/clang/include/clang/Basic/BuiltinsNVPTX.def
+++ b/contrib/llvm-project/clang/include/clang/Basic/BuiltinsNVPTX.def
@@ -17,12 +17,21 @@
# define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
#endif
+#pragma push_macro("SM_53")
#pragma push_macro("SM_70")
#pragma push_macro("SM_72")
#pragma push_macro("SM_75")
#pragma push_macro("SM_80")
#pragma push_macro("SM_86")
-#define SM_86 "sm_86"
+#pragma push_macro("SM_87")
+#pragma push_macro("SM_89")
+#pragma push_macro("SM_90")
+#pragma push_macro("SM_90a")
+#define SM_90a "sm_90a"
+#define SM_90 "sm_90|" SM_90a
+#define SM_89 "sm_89|" SM_90
+#define SM_87 "sm_87|" SM_89
+#define SM_86 "sm_86|" SM_87
#define SM_80 "sm_80|" SM_86
#define SM_75 "sm_75|" SM_80
#define SM_72 "sm_72|" SM_75
@@ -30,7 +39,9 @@
#pragma push_macro("SM_60")
#define SM_60 "sm_60|sm_61|sm_62|" SM_70
+#define SM_53 "sm_53|" SM_60
+#pragma push_macro("PTX42")
#pragma push_macro("PTX60")
#pragma push_macro("PTX61")
#pragma push_macro("PTX63")
@@ -42,7 +53,21 @@
#pragma push_macro("PTX73")
#pragma push_macro("PTX74")
#pragma push_macro("PTX75")
-#define PTX75 "ptx75"
+#pragma push_macro("PTX76")
+#pragma push_macro("PTX77")
+#pragma push_macro("PTX78")
+#pragma push_macro("PTX80")
+#pragma push_macro("PTX81")
+#pragma push_macro("PTX82")
+#pragma push_macro("PTX83")
+#define PTX83 "ptx83"
+#define PTX82 "ptx82|" PTX83
+#define PTX81 "ptx81|" PTX82
+#define PTX80 "ptx80|" PTX81
+#define PTX78 "ptx78|" PTX80
+#define PTX77 "ptx77|" PTX78
+#define PTX76 "ptx76|" PTX77
+#define PTX75 "ptx75|" PTX76
#define PTX74 "ptx74|" PTX75
#define PTX73 "ptx73|" PTX74
#define PTX72 "ptx72|" PTX73
@@ -53,6 +78,7 @@
#define PTX63 "ptx63|" PTX64
#define PTX61 "ptx61|" PTX63
#define PTX60 "ptx60|" PTX61
+#define PTX42 "ptx42|" PTX60
#pragma push_macro("AND")
#define AND(a, b) "(" a "),(" b ")"
@@ -79,6 +105,31 @@ BUILTIN(__nvvm_read_ptx_sreg_nctaid_y, "i", "nc")
BUILTIN(__nvvm_read_ptx_sreg_nctaid_z, "i", "nc")
BUILTIN(__nvvm_read_ptx_sreg_nctaid_w, "i", "nc")
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_x, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_y, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_z, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_w, "i", "nc", AND(SM_90, PTX78))
+
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_x, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_y, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_z, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_w, "i", "nc", AND(SM_90, PTX78))
+
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_x, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_y, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_z, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_w, "i", "nc", AND(SM_90, PTX78))
+
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_x, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_y, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_z, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_w, "i", "nc", AND(SM_90, PTX78))
+
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctarank, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctarank, "i", "nc", AND(SM_90, PTX78))
+
+TARGET_BUILTIN(__nvvm_is_explicit_cluster, "b", "nc", AND(SM_90, PTX78))
+
BUILTIN(__nvvm_read_ptx_sreg_laneid, "i", "nc")
BUILTIN(__nvvm_read_ptx_sreg_warpid, "i", "nc")
BUILTIN(__nvvm_read_ptx_sreg_nwarpid, "i", "nc")
@@ -107,13 +158,97 @@ BUILTIN(__nvvm_prmt, "UiUiUiUi", "")
// Min Max
-BUILTIN(__nvvm_fmax_ftz_f, "fff", "")
-BUILTIN(__nvvm_fmax_f, "fff", "")
-BUILTIN(__nvvm_fmin_ftz_f, "fff", "")
-BUILTIN(__nvvm_fmin_f, "fff", "")
+TARGET_BUILTIN(__nvvm_fmin_f16, "hhh", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmin_ftz_f16, "hhh", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmin_nan_f16, "hhh", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f16, "hhh", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
+TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
+TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
+TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f16, "hhh", "",
+ AND(SM_86, PTX72))
+TARGET_BUILTIN(__nvvm_fmin_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmin_ftz_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmin_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f16x2, "V2hV2hV2h", "",
+ AND(SM_86, PTX72))
+TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f16x2, "V2hV2hV2h", "",
+ AND(SM_86, PTX72))
+TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
+ AND(SM_86, PTX72))
+TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
+ AND(SM_86, PTX72))
+TARGET_BUILTIN(__nvvm_fmin_bf16, "yyy", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmin_ftz_bf16, "yyy", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmin_nan_bf16, "yyy", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmin_ftz_nan_bf16, "yyy", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16, "yyy", "", AND(SM_86, PTX72))
+TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16, "yyy", "",
+ AND(SM_86, PTX72))
+TARGET_BUILTIN(__nvvm_fmin_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmin_ftz_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmin_nan_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmin_ftz_nan_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16x2, "V2yV2yV2y", "",
+ AND(SM_86, PTX72))
+TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16x2, "V2yV2yV2y", "",
+ AND(SM_86, PTX72))
+BUILTIN(__nvvm_fmin_f, "fff", "")
+BUILTIN(__nvvm_fmin_ftz_f, "fff", "")
+TARGET_BUILTIN(__nvvm_fmin_nan_f, "fff", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f, "fff", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
+TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
+TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
+TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
+BUILTIN(__nvvm_fmin_d, "ddd", "")
+TARGET_BUILTIN(__nvvm_fmax_f16, "hhh", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmax_ftz_f16, "hhh", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmax_nan_f16, "hhh", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f16, "hhh", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
+TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
+TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
+TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f16, "hhh", "",
+ AND(SM_86, PTX72))
+TARGET_BUILTIN(__nvvm_fmax_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmax_ftz_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmax_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f16x2, "V2hV2hV2h", "",
+ AND(SM_86, PTX72))
+TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f16x2, "V2hV2hV2h", "",
+ AND(SM_86, PTX72))
+TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
+ AND(SM_86, PTX72))
+TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
+ AND(SM_86, PTX72))
+TARGET_BUILTIN(__nvvm_fmax_bf16, "yyy", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmax_ftz_bf16, "yyy", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmax_nan_bf16, "yyy", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmax_ftz_nan_bf16, "yyy", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16, "yyy", "", AND(SM_86, PTX72))
+TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16, "yyy", "",
+ AND(SM_86, PTX72))
+TARGET_BUILTIN(__nvvm_fmax_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmax_ftz_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmax_nan_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmax_ftz_nan_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16x2, "V2yV2yV2y", "",
+ AND(SM_86, PTX72))
+TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16x2, "V2yV2yV2y", "",
+ AND(SM_86, PTX72))
+BUILTIN(__nvvm_fmax_f, "fff", "")
+BUILTIN(__nvvm_fmax_ftz_f, "fff", "")
+TARGET_BUILTIN(__nvvm_fmax_nan_f, "fff", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f, "fff", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
+TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
+TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
+TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
BUILTIN(__nvvm_fmax_d, "ddd", "")
-BUILTIN(__nvvm_fmin_d, "ddd", "")
// Multiplication
@@ -202,6 +337,8 @@ BUILTIN(__nvvm_saturate_d, "dd", "")
BUILTIN(__nvvm_ex2_approx_ftz_f, "ff", "")
BUILTIN(__nvvm_ex2_approx_f, "ff", "")
BUILTIN(__nvvm_ex2_approx_d, "dd", "")
+TARGET_BUILTIN(__nvvm_ex2_approx_f16, "hh", "", AND(SM_75, PTX70))
+TARGET_BUILTIN(__nvvm_ex2_approx_f16x2, "V2hV2h", "", AND(SM_75, PTX70))
BUILTIN(__nvvm_lg2_approx_ftz_f, "ff", "")
BUILTIN(__nvvm_lg2_approx_f, "ff", "")
@@ -217,6 +354,22 @@ BUILTIN(__nvvm_cos_approx_f, "ff", "")
// Fma
+TARGET_BUILTIN(__nvvm_fma_rn_f16, "hhhh", "", AND(SM_53, PTX42))
+TARGET_BUILTIN(__nvvm_fma_rn_ftz_f16, "hhhh", "", AND(SM_53, PTX42))
+TARGET_BUILTIN(__nvvm_fma_rn_sat_f16, "hhhh", "", AND(SM_53, PTX42))
+TARGET_BUILTIN(__nvvm_fma_rn_ftz_sat_f16, "hhhh", "", AND(SM_53, PTX42))
+TARGET_BUILTIN(__nvvm_fma_rn_relu_f16, "hhhh", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fma_rn_ftz_relu_f16, "hhhh", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fma_rn_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
+TARGET_BUILTIN(__nvvm_fma_rn_ftz_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
+TARGET_BUILTIN(__nvvm_fma_rn_sat_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
+TARGET_BUILTIN(__nvvm_fma_rn_ftz_sat_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
+TARGET_BUILTIN(__nvvm_fma_rn_relu_f16x2, "V2hV2hV2hV2h", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fma_rn_ftz_relu_f16x2, "V2hV2hV2hV2h", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fma_rn_bf16, "yyyy", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16, "yyyy", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fma_rn_bf16x2, "V2yV2yV2yV2y", "", AND(SM_80, PTX70))
+TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16x2, "V2yV2yV2yV2y", "", AND(SM_80, PTX70))
BUILTIN(__nvvm_fma_rn_ftz_f, "ffff", "")
BUILTIN(__nvvm_fma_rn_f, "ffff", "")
BUILTIN(__nvvm_fma_rz_ftz_f, "ffff", "")
@@ -245,6 +398,8 @@ BUILTIN(__nvvm_rcp_rn_d, "dd", "")
BUILTIN(__nvvm_rcp_rz_d, "dd", "")
BUILTIN(__nvvm_rcp_rm_d, "dd", "")
BUILTIN(__nvvm_rcp_rp_d, "dd", "")
+
+BUILTIN(__nvvm_rcp_approx_ftz_f, "ff", "")
BUILTIN(__nvvm_rcp_approx_ftz_d, "dd", "")
// Sqrt
@@ -402,20 +557,20 @@ BUILTIN(__nvvm_ull2d_rp, "dULLi", "")
BUILTIN(__nvvm_f2h_rn_ftz, "Usf", "")
BUILTIN(__nvvm_f2h_rn, "Usf", "")
-TARGET_BUILTIN(__nvvm_ff2bf16x2_rn, "ZUiff", "", AND(SM_80,PTX70))
-TARGET_BUILTIN(__nvvm_ff2bf16x2_rn_relu, "ZUiff", "", AND(SM_80,PTX70))
-TARGET_BUILTIN(__nvvm_ff2bf16x2_rz, "ZUiff", "", AND(SM_80,PTX70))
-TARGET_BUILTIN(__nvvm_ff2bf16x2_rz_relu, "ZUiff", "", AND(SM_80,PTX70))
+TARGET_BUILTIN(__nvvm_ff2bf16x2_rn, "V2yff", "", AND(SM_80,PTX70))
+TARGET_BUILTIN(__nvvm_ff2bf16x2_rn_relu, "V2yff", "", AND(SM_80,PTX70))
+TARGET_BUILTIN(__nvvm_ff2bf16x2_rz, "V2yff", "", AND(SM_80,PTX70))
+TARGET_BUILTIN(__nvvm_ff2bf16x2_rz_relu, "V2yff", "", AND(SM_80,PTX70))
TARGET_BUILTIN(__nvvm_ff2f16x2_rn, "V2hff", "", AND(SM_80,PTX70))
TARGET_BUILTIN(__nvvm_ff2f16x2_rn_relu, "V2hff", "", AND(SM_80,PTX70))
TARGET_BUILTIN(__nvvm_ff2f16x2_rz, "V2hff", "", AND(SM_80,PTX70))
TARGET_BUILTIN(__nvvm_ff2f16x2_rz_relu, "V2hff", "", AND(SM_80,PTX70))
-TARGET_BUILTIN(__nvvm_f2bf16_rn, "ZUsf", "", AND(SM_80,PTX70))
-TARGET_BUILTIN(__nvvm_f2bf16_rn_relu, "ZUsf", "", AND(SM_80,PTX70))
-TARGET_BUILTIN(__nvvm_f2bf16_rz, "ZUsf", "", AND(SM_80,PTX70))
-TARGET_BUILTIN(__nvvm_f2bf16_rz_relu, "ZUsf", "", AND(SM_80,PTX70))
+TARGET_BUILTIN(__nvvm_f2bf16_rn, "yf", "", AND(SM_80,PTX70))
+TARGET_BUILTIN(__nvvm_f2bf16_rn_relu, "yf", "", AND(SM_80,PTX70))
+TARGET_BUILTIN(__nvvm_f2bf16_rz, "yf", "", AND(SM_80,PTX70))
+TARGET_BUILTIN(__nvvm_f2bf16_rz_relu, "yf", "", AND(SM_80,PTX70))
TARGET_BUILTIN(__nvvm_f2tf32_rna, "ZUif", "", AND(SM_80,PTX70))
@@ -441,6 +596,11 @@ TARGET_BUILTIN(__nvvm_bar_warp_sync, "vUi", "n", PTX60)
TARGET_BUILTIN(__nvvm_barrier_sync, "vUi", "n", PTX60)
TARGET_BUILTIN(__nvvm_barrier_sync_cnt, "vUiUi", "n", PTX60)
+TARGET_BUILTIN(__nvvm_barrier_cluster_arrive, "v", "n", AND(SM_90,PTX78))
+TARGET_BUILTIN(__nvvm_barrier_cluster_arrive_relaxed, "v", "n", AND(SM_90,PTX80))
+TARGET_BUILTIN(__nvvm_barrier_cluster_wait, "v", "n", AND(SM_90,PTX78))
+TARGET_BUILTIN(__nvvm_fence_sc_cluster, "v", "n", AND(SM_90,PTX78))
+
// Shuffle
BUILTIN(__nvvm_shfl_down_i32, "iiii", "")
@@ -473,11 +633,11 @@ TARGET_BUILTIN(__nvvm_vote_uni_sync, "bUib", "", PTX60)
TARGET_BUILTIN(__nvvm_vote_ballot_sync, "UiUib", "", PTX60)
// Match
-TARGET_BUILTIN(__nvvm_match_any_sync_i32, "UiUiUi", "", PTX60)
-TARGET_BUILTIN(__nvvm_match_any_sync_i64, "WiUiWi", "", PTX60)
+TARGET_BUILTIN(__nvvm_match_any_sync_i32, "UiUiUi", "", AND(SM_70,PTX60))
+TARGET_BUILTIN(__nvvm_match_any_sync_i64, "UiUiWi", "", AND(SM_70,PTX60))
// These return a pair {value, predicate}, which requires custom lowering.
-TARGET_BUILTIN(__nvvm_match_all_sync_i32p, "UiUiUii*", "", PTX60)
-TARGET_BUILTIN(__nvvm_match_all_sync_i64p, "WiUiWii*", "", PTX60)
+TARGET_BUILTIN(__nvvm_match_all_sync_i32p, "UiUiUii*", "", AND(SM_70,PTX60))
+TARGET_BUILTIN(__nvvm_match_all_sync_i64p, "UiUiWii*", "", AND(SM_70,PTX60))
// Redux
TARGET_BUILTIN(__nvvm_redux_sync_add, "iii", "", AND(SM_80,PTX70))
@@ -670,8 +830,50 @@ TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_ll, "LLiLLiD*LLiLLi", "n", SM_60)
BUILTIN(__nvvm_compiler_error, "vcC*4", "n")
BUILTIN(__nvvm_compiler_warn, "vcC*4", "n")
-// __ldg. This is not implemented as a builtin by nvcc.
+BUILTIN(__nvvm_ldu_c, "ccC*", "")
+BUILTIN(__nvvm_ldu_sc, "ScScC*", "")
+BUILTIN(__nvvm_ldu_s, "ssC*", "")
+BUILTIN(__nvvm_ldu_i, "iiC*", "")
+BUILTIN(__nvvm_ldu_l, "LiLiC*", "")
+BUILTIN(__nvvm_ldu_ll, "LLiLLiC*", "")
+
+BUILTIN(__nvvm_ldu_uc, "UcUcC*", "")
+BUILTIN(__nvvm_ldu_us, "UsUsC*", "")
+BUILTIN(__nvvm_ldu_ui, "UiUiC*", "")
+BUILTIN(__nvvm_ldu_ul, "ULiULiC*", "")
+BUILTIN(__nvvm_ldu_ull, "ULLiULLiC*", "")
+
+BUILTIN(__nvvm_ldu_h, "hhC*", "")
+BUILTIN(__nvvm_ldu_f, "ffC*", "")
+BUILTIN(__nvvm_ldu_d, "ddC*", "")
+
+BUILTIN(__nvvm_ldu_c2, "E2cE2cC*", "")
+BUILTIN(__nvvm_ldu_sc2, "E2ScE2ScC*", "")
+BUILTIN(__nvvm_ldu_c4, "E4cE4cC*", "")
+BUILTIN(__nvvm_ldu_sc4, "E4ScE4ScC*", "")
+BUILTIN(__nvvm_ldu_s2, "E2sE2sC*", "")
+BUILTIN(__nvvm_ldu_s4, "E4sE4sC*", "")
+BUILTIN(__nvvm_ldu_i2, "E2iE2iC*", "")
+BUILTIN(__nvvm_ldu_i4, "E4iE4iC*", "")
+BUILTIN(__nvvm_ldu_l2, "E2LiE2LiC*", "")
+BUILTIN(__nvvm_ldu_ll2, "E2LLiE2LLiC*", "")
+
+BUILTIN(__nvvm_ldu_uc2, "E2UcE2UcC*", "")
+BUILTIN(__nvvm_ldu_uc4, "E4UcE4UcC*", "")
+BUILTIN(__nvvm_ldu_us2, "E2UsE2UsC*", "")
+BUILTIN(__nvvm_ldu_us4, "E4UsE4UsC*", "")
+BUILTIN(__nvvm_ldu_ui2, "E2UiE2UiC*", "")
+BUILTIN(__nvvm_ldu_ui4, "E4UiE4UiC*", "")
+BUILTIN(__nvvm_ldu_ul2, "E2ULiE2ULiC*", "")
+BUILTIN(__nvvm_ldu_ull2, "E2ULLiE2ULLiC*", "")
+
+BUILTIN(__nvvm_ldu_h2, "E2hE2hC*", "")
+BUILTIN(__nvvm_ldu_f2, "E2fE2fC*", "")
+BUILTIN(__nvvm_ldu_f4, "E4fE4fC*", "")
+BUILTIN(__nvvm_ldu_d2, "E2dE2dC*", "")
+
BUILTIN(__nvvm_ldg_c, "ccC*", "")
+BUILTIN(__nvvm_ldg_sc, "ScScC*", "")
BUILTIN(__nvvm_ldg_s, "ssC*", "")
BUILTIN(__nvvm_ldg_i, "iiC*", "")
BUILTIN(__nvvm_ldg_l, "LiLiC*", "")
@@ -683,15 +885,19 @@ BUILTIN(__nvvm_ldg_ui, "UiUiC*", "")
BUILTIN(__nvvm_ldg_ul, "ULiULiC*", "")
BUILTIN(__nvvm_ldg_ull, "ULLiULLiC*", "")
+BUILTIN(__nvvm_ldg_h, "hhC*", "")
BUILTIN(__nvvm_ldg_f, "ffC*", "")
BUILTIN(__nvvm_ldg_d, "ddC*", "")
BUILTIN(__nvvm_ldg_c2, "E2cE2cC*", "")
+BUILTIN(__nvvm_ldg_sc2, "E2ScE2ScC*", "")
BUILTIN(__nvvm_ldg_c4, "E4cE4cC*", "")
+BUILTIN(__nvvm_ldg_sc4, "E4ScE4ScC*", "")
BUILTIN(__nvvm_ldg_s2, "E2sE2sC*", "")
BUILTIN(__nvvm_ldg_s4, "E4sE4sC*", "")
BUILTIN(__nvvm_ldg_i2, "E2iE2iC*", "")
BUILTIN(__nvvm_ldg_i4, "E4iE4iC*", "")
+BUILTIN(__nvvm_ldg_l2, "E2LiE2LiC*", "")
BUILTIN(__nvvm_ldg_ll2, "E2LLiE2LLiC*", "")
BUILTIN(__nvvm_ldg_uc2, "E2UcE2UcC*", "")
@@ -700,8 +906,10 @@ BUILTIN(__nvvm_ldg_us2, "E2UsE2UsC*", "")
BUILTIN(__nvvm_ldg_us4, "E4UsE4UsC*", "")
BUILTIN(__nvvm_ldg_ui2, "E2UiE2UiC*", "")
BUILTIN(__nvvm_ldg_ui4, "E4UiE4UiC*", "")
+BUILTIN(__nvvm_ldg_ul2, "E2ULiE2ULiC*", "")
BUILTIN(__nvvm_ldg_ull2, "E2ULLiE2ULLiC*", "")
+BUILTIN(__nvvm_ldg_h2, "E2hE2hC*", "")
BUILTIN(__nvvm_ldg_f2, "E2fE2fC*", "")
BUILTIN(__nvvm_ldg_f4, "E4fE4fC*", "")
BUILTIN(__nvvm_ldg_d2, "E2dE2dC*", "")
@@ -711,28 +919,29 @@ BUILTIN(__nvvm_isspacep_const, "bvC*", "nc")
BUILTIN(__nvvm_isspacep_global, "bvC*", "nc")
BUILTIN(__nvvm_isspacep_local, "bvC*", "nc")
BUILTIN(__nvvm_isspacep_shared, "bvC*", "nc")
+TARGET_BUILTIN(__nvvm_isspacep_shared_cluster,"bvC*", "nc", AND(SM_90,PTX78))
// Builtins to support WMMA instructions on sm_70
TARGET_BUILTIN(__hmma_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX60))
TARGET_BUILTIN(__hmma_m16n16k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX60))
TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX60))
TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX60))
-TARGET_BUILTIN(__hmma_m16n16k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX60))
-TARGET_BUILTIN(__hmma_m16n16k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX60))
+TARGET_BUILTIN(__hmma_m16n16k16_st_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX60))
+TARGET_BUILTIN(__hmma_m16n16k16_st_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX60))
TARGET_BUILTIN(__hmma_m32n8k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX61))
TARGET_BUILTIN(__hmma_m32n8k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX61))
TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61))
TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61))
-TARGET_BUILTIN(__hmma_m32n8k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX61))
-TARGET_BUILTIN(__hmma_m32n8k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX61))
+TARGET_BUILTIN(__hmma_m32n8k16_st_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61))
+TARGET_BUILTIN(__hmma_m32n8k16_st_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61))
TARGET_BUILTIN(__hmma_m8n32k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX61))
TARGET_BUILTIN(__hmma_m8n32k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX61))
TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61))
TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61))
-TARGET_BUILTIN(__hmma_m8n32k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX61))
-TARGET_BUILTIN(__hmma_m8n32k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX61))
+TARGET_BUILTIN(__hmma_m8n32k16_st_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61))
+TARGET_BUILTIN(__hmma_m8n32k16_st_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61))
TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX60))
TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX60))
@@ -753,7 +962,7 @@ TARGET_BUILTIN(__hmma_m8n32k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX
TARGET_BUILTIN(__bmma_m8n8k128_ld_a_b1, "vi*iC*UiIi", "", AND(SM_75,PTX63))
TARGET_BUILTIN(__bmma_m8n8k128_ld_b_b1, "vi*iC*UiIi", "", AND(SM_75,PTX63))
TARGET_BUILTIN(__bmma_m8n8k128_ld_c, "vi*iC*UiIi", "", AND(SM_75,PTX63))
-TARGET_BUILTIN(__bmma_m8n8k128_mma_and_popc_b1, "vi*iC*iC*iC*Ii", "", AND(SM_75,PTX71))
+TARGET_BUILTIN(__bmma_m8n8k128_mma_and_popc_b1, "vi*iC*iC*iC*Ii", "", AND(SM_80,PTX71))
TARGET_BUILTIN(__bmma_m8n8k128_mma_xor_popc_b1, "vi*iC*iC*iC*Ii", "", AND(SM_75,PTX63))
TARGET_BUILTIN(__bmma_m8n8k128_st_c_i32, "vi*iC*UiIi", "", AND(SM_75,PTX63))
TARGET_BUILTIN(__imma_m16n16k16_ld_a_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
@@ -818,24 +1027,42 @@ TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_shared, "vWi*3", "", AND(SM_80,PT
TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_noinc, "vWi*", "", AND(SM_80,PTX70))
TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_noinc_shared, "vWi*3", "", AND(SM_80,PTX70))
-TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_4, "vv*3vC*1", "", AND(SM_80,PTX70))
-TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_8, "vv*3vC*1", "", AND(SM_80,PTX70))
-TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_16, "vv*3vC*1", "", AND(SM_80,PTX70))
-TARGET_BUILTIN(__nvvm_cp_async_cg_shared_global_16, "vv*3vC*1", "", AND(SM_80,PTX70))
+TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_4, "vv*3vC*1.", "", AND(SM_80,PTX70))
+TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_8, "vv*3vC*1.", "", AND(SM_80,PTX70))
+TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_16, "vv*3vC*1.", "", AND(SM_80,PTX70))
+TARGET_BUILTIN(__nvvm_cp_async_cg_shared_global_16, "vv*3vC*1.", "", AND(SM_80,PTX70))
TARGET_BUILTIN(__nvvm_cp_async_commit_group, "v", "", AND(SM_80,PTX70))
TARGET_BUILTIN(__nvvm_cp_async_wait_group, "vIi", "", AND(SM_80,PTX70))
TARGET_BUILTIN(__nvvm_cp_async_wait_all, "v", "", AND(SM_80,PTX70))
+
+// bf16, bf16x2 abs, neg
+TARGET_BUILTIN(__nvvm_abs_bf16, "yy", "", AND(SM_80,PTX70))
+TARGET_BUILTIN(__nvvm_abs_bf16x2, "V2yV2y", "", AND(SM_80,PTX70))
+TARGET_BUILTIN(__nvvm_neg_bf16, "yy", "", AND(SM_80,PTX70))
+TARGET_BUILTIN(__nvvm_neg_bf16x2, "V2yV2y", "", AND(SM_80,PTX70))
+
+TARGET_BUILTIN(__nvvm_mapa, "v*v*i", "", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_mapa_shared_cluster, "v*3v*3i", "", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_getctarank, "iv*", "", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78))
+
#undef BUILTIN
#undef TARGET_BUILTIN
#pragma pop_macro("AND")
+#pragma pop_macro("SM_53")
#pragma pop_macro("SM_60")
#pragma pop_macro("SM_70")
#pragma pop_macro("SM_72")
#pragma pop_macro("SM_75")
#pragma pop_macro("SM_80")
#pragma pop_macro("SM_86")
+#pragma pop_macro("SM_87")
+#pragma pop_macro("SM_89")
+#pragma pop_macro("SM_90")
+#pragma pop_macro("SM_90a")
+#pragma pop_macro("PTX42")
#pragma pop_macro("PTX60")
#pragma pop_macro("PTX61")
#pragma pop_macro("PTX63")
@@ -847,3 +1074,10 @@ TARGET_BUILTIN(__nvvm_cp_async_wait_all, "v", "", AND(SM_80,PTX70))
#pragma pop_macro("PTX73")
#pragma pop_macro("PTX74")
#pragma pop_macro("PTX75")
+#pragma pop_macro("PTX76")
+#pragma pop_macro("PTX77")
+#pragma pop_macro("PTX78")
+#pragma pop_macro("PTX80")
+#pragma pop_macro("PTX81")
+#pragma pop_macro("PTX82")
+#pragma pop_macro("PTX83")