aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/NVPTX/NVPTXIntrinsics.td')
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXIntrinsics.td2860
1 files changed, 1767 insertions, 1093 deletions
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index c81dfa68e4bd..56d8b734bf01 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -69,17 +69,17 @@ def INT_BARRIER0 : NVPTXInst<(outs), (ins),
[(int_nvvm_barrier0)]>;
def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
"bar.sync \t$src1;",
- [(int_nvvm_barrier_n Int32Regs:$src1)]>;
+ [(int_nvvm_barrier_n i32:$src1)]>;
def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
"bar.sync \t$src1, $src2;",
- [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>;
+ [(int_nvvm_barrier i32:$src1, i32:$src2)]>;
def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
!strconcat("{{ \n\t",
".reg .pred \t%p1; \n\t",
"setp.ne.u32 \t%p1, $pred, 0; \n\t",
"bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
"}}"),
- [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
+ [(set i32:$dst, (int_nvvm_barrier0_popc i32:$pred))]>;
def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
!strconcat("{{ \n\t",
".reg .pred \t%p1; \n\t",
@@ -88,7 +88,7 @@ def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
"bar.red.and.pred \t%p2, 0, %p1; \n\t",
"selp.u32 \t$dst, 1, 0, %p2; \n\t",
"}}"),
- [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
+ [(set i32:$dst, (int_nvvm_barrier0_and i32:$pred))]>;
def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
!strconcat("{{ \n\t",
".reg .pred \t%p1; \n\t",
@@ -97,7 +97,7 @@ def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
"bar.red.or.pred \t%p2, 0, %p1; \n\t",
"selp.u32 \t$dst, 1, 0, %p2; \n\t",
"}}"),
- [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
+ [(set i32:$dst, (int_nvvm_barrier0_or i32:$pred))]>;
def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
[(int_nvvm_bar_sync imm:$i)]>;
@@ -106,27 +106,27 @@ def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i
[(int_nvvm_bar_warp_sync imm:$i)]>,
Requires<[hasPTX<60>, hasSM<30>]>;
def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;",
- [(int_nvvm_bar_warp_sync Int32Regs:$i)]>,
+ [(int_nvvm_bar_warp_sync i32:$i)]>,
Requires<[hasPTX<60>, hasSM<30>]>;
def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;",
[(int_nvvm_barrier_sync imm:$i)]>,
Requires<[hasPTX<60>, hasSM<30>]>;
def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;",
- [(int_nvvm_barrier_sync Int32Regs:$i)]>,
+ [(int_nvvm_barrier_sync i32:$i)]>,
Requires<[hasPTX<60>, hasSM<30>]>;
def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt),
"barrier.sync \t$id, $cnt;",
- [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>,
+ [(int_nvvm_barrier_sync_cnt i32:$id, i32:$cnt)]>,
Requires<[hasPTX<60>, hasSM<30>]>;
def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt),
"barrier.sync \t$id, $cnt;",
- [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>,
+ [(int_nvvm_barrier_sync_cnt i32:$id, imm:$cnt)]>,
Requires<[hasPTX<60>, hasSM<30>]>;
def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt),
"barrier.sync \t$id, $cnt;",
- [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>,
+ [(int_nvvm_barrier_sync_cnt imm:$id, i32:$cnt)]>,
Requires<[hasPTX<60>, hasSM<30>]>;
def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
"barrier.sync \t$id, $cnt;",
@@ -217,7 +217,7 @@ foreach sync = [false, true] in {
multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred),
"vote." # mode # " \t$dest, $pred;",
- [(set regclass:$dest, (IntOp Int1Regs:$pred))]>,
+ [(set regclass:$dest, (IntOp i1:$pred))]>,
Requires<[hasPTX<60>, hasSM<30>]>;
}
@@ -230,11 +230,11 @@ defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>;
multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred),
"vote.sync." # mode # " \t$dest, $pred, $mask;",
- [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>,
+ [(set regclass:$dest, (IntOp imm:$mask, i1:$pred))]>,
Requires<[hasPTX<60>, hasSM<30>]>;
def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred),
"vote.sync." # mode #" \t$dest, $pred, $mask;",
- [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>,
+ [(set regclass:$dest, (IntOp i32:$mask, i1:$pred))]>,
Requires<[hasPTX<60>, hasSM<30>]>;
}
@@ -243,30 +243,40 @@ defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>;
defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>;
defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>;
+// elect.sync
+def INT_ELECT_SYNC_I : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), (ins i32imm:$mask),
+ "elect.sync \t$dest|$pred, $mask;",
+ [(set i32:$dest, i1:$pred, (int_nvvm_elect_sync imm:$mask))]>,
+ Requires<[hasPTX<80>, hasSM<90>]>;
+def INT_ELECT_SYNC_R : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), (ins Int32Regs:$mask),
+ "elect.sync \t$dest|$pred, $mask;",
+ [(set i32:$dest, i1:$pred, (int_nvvm_elect_sync i32:$mask))]>,
+ Requires<[hasPTX<80>, hasSM<90>]>;
+
multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
Operand ImmOp> {
def ii : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, ImmOp:$value),
"match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
- [(set Int32Regs:$dest, (IntOp imm:$mask, imm:$value))]>,
+ [(set i32:$dest, (IntOp imm:$mask, imm:$value))]>,
Requires<[hasPTX<60>, hasSM<70>]>;
def ir : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, ImmOp:$value),
"match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
- [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, imm:$value))]>,
+ [(set i32:$dest, (IntOp i32:$mask, imm:$value))]>,
Requires<[hasPTX<60>, hasSM<70>]>;
def ri : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, regclass:$value),
"match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
- [(set Int32Regs:$dest, (IntOp imm:$mask, regclass:$value))]>,
+ [(set i32:$dest, (IntOp imm:$mask, regclass:$value))]>,
Requires<[hasPTX<60>, hasSM<70>]>;
def rr : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, regclass:$value),
"match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
- [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>,
+ [(set i32:$dest, (IntOp i32:$mask, regclass:$value))]>,
Requires<[hasPTX<60>, hasSM<70>]>;
}
// activemask.b32
def ACTIVEMASK : NVPTXInst<(outs Int32Regs:$dest), (ins),
"activemask.b32 \t$dest;",
- [(set Int32Regs:$dest, (int_nvvm_activemask))]>,
+ [(set i32:$dest, (int_nvvm_activemask))]>,
Requires<[hasPTX<62>, hasSM<30>]>;
defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32,
@@ -279,22 +289,22 @@ multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic Int
def ii : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
(ins i32imm:$mask, ImmOp:$value),
"match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
- [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>,
+ [(set i32:$dest, i1:$pred, (IntOp imm:$mask, imm:$value))]>,
Requires<[hasPTX<60>, hasSM<70>]>;
def ir : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
(ins Int32Regs:$mask, ImmOp:$value),
"match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
- [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>,
+ [(set i32:$dest, i1:$pred, (IntOp i32:$mask, imm:$value))]>,
Requires<[hasPTX<60>, hasSM<70>]>;
def ri : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
(ins i32imm:$mask, regclass:$value),
"match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
- [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>,
+ [(set i32:$dest, i1:$pred, (IntOp imm:$mask, regclass:$value))]>,
Requires<[hasPTX<60>, hasSM<70>]>;
def rr : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
(ins Int32Regs:$mask, regclass:$value),
"match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
- [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>,
+ [(set i32:$dest, i1:$pred, (IntOp i32:$mask, regclass:$value))]>,
Requires<[hasPTX<60>, hasSM<70>]>;
}
defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p,
@@ -305,7 +315,7 @@ defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_s
multiclass REDUX_SYNC<string BinOp, string PTXType, Intrinsic Intrin> {
def : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$mask),
"redux.sync." # BinOp # "." # PTXType # " $dst, $src, $mask;",
- [(set Int32Regs:$dst, (Intrin Int32Regs:$src, Int32Regs:$mask))]>,
+ [(set i32:$dst, (Intrin i32:$src, Int32Regs:$mask))]>,
Requires<[hasPTX<70>, hasSM<80>]>;
}
@@ -335,6 +345,48 @@ def INT_FENCE_SC_CLUSTER:
MEMBAR<"fence.sc.cluster;", int_nvvm_fence_sc_cluster>,
Requires<[hasPTX<78>, hasSM<90>]>;
+// Proxy fence (uni-directional)
+// fence.proxy.tensormap.release variants
+
+class FENCE_PROXY_TENSORMAP_GENERIC_RELEASE<string Scope, Intrinsic Intr> :
+ NVPTXInst<(outs), (ins),
+ "fence.proxy.tensormap::generic.release." # Scope # ";", [(Intr)]>,
+ Requires<[hasPTX<83>, hasSM<90>]>;
+
+def INT_FENCE_PROXY_TENSORMAP_GENERIC_RELEASE_CTA:
+ FENCE_PROXY_TENSORMAP_GENERIC_RELEASE<"cta",
+ int_nvvm_fence_proxy_tensormap_generic_release_cta>;
+def INT_FENCE_PROXY_TENSORMAP_GENERIC_RELEASE_CLUSTER:
+ FENCE_PROXY_TENSORMAP_GENERIC_RELEASE<"cluster",
+ int_nvvm_fence_proxy_tensormap_generic_release_cluster>;
+def INT_FENCE_PROXY_TENSORMAP_GENERIC_RELEASE_GPU:
+ FENCE_PROXY_TENSORMAP_GENERIC_RELEASE<"gpu",
+ int_nvvm_fence_proxy_tensormap_generic_release_gpu>;
+def INT_FENCE_PROXY_TENSORMAP_GENERIC_RELEASE_SYS:
+ FENCE_PROXY_TENSORMAP_GENERIC_RELEASE<"sys",
+ int_nvvm_fence_proxy_tensormap_generic_release_sys>;
+
+// fence.proxy.tensormap.acquire variants
+
+class FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE<string Scope, Intrinsic Intr> :
+ NVPTXInst<(outs), (ins Int64Regs:$addr),
+ "fence.proxy.tensormap::generic.acquire." # Scope # " [$addr], 128;",
+ [(Intr i64:$addr, (i32 128))]>,
+ Requires<[hasPTX<83>, hasSM<90>]>;
+
+def INT_FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE_CTA :
+ FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE<"cta",
+ int_nvvm_fence_proxy_tensormap_generic_acquire_cta>;
+def INT_FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE_CLUSTER :
+ FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE<"cluster",
+ int_nvvm_fence_proxy_tensormap_generic_acquire_cluster>;
+def INT_FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE_GPU :
+ FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE<"gpu",
+ int_nvvm_fence_proxy_tensormap_generic_acquire_gpu>;
+def INT_FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE_SYS :
+ FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE<"sys",
+ int_nvvm_fence_proxy_tensormap_generic_acquire_sys>;
+
//-----------------------------------
// Async Copy Functions
//-----------------------------------
@@ -342,11 +394,11 @@ def INT_FENCE_SC_CLUSTER:
multiclass CP_ASYNC_MBARRIER_ARRIVE<string NoInc, string AddrSpace, Intrinsic Intrin> {
def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
!strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
- [(Intrin Int32Regs:$addr)]>,
+ [(Intrin i32:$addr)]>,
Requires<[hasPTX<70>, hasSM<80>]>;
def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
!strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
- [(Intrin Int64Regs:$addr)]>,
+ [(Intrin i64:$addr)]>,
Requires<[hasPTX<70>, hasSM<80>]>;
}
@@ -362,28 +414,28 @@ defm CP_ASYNC_MBARRIER_ARRIVE_NOINC_SHARED :
multiclass CP_ASYNC_SHARED_GLOBAL_I<string cc, string cpsize, Intrinsic Intrin, Intrinsic IntrinS> {
def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src),
!strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"),
- [(Intrin Int32Regs:$dst, Int32Regs:$src)]>,
+ [(Intrin i32:$dst, i32:$src)]>,
Requires<[hasPTX<70>, hasSM<80>]>;
def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src),
!strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"),
- [(Intrin Int64Regs:$dst, Int64Regs:$src)]>,
+ [(Intrin i64:$dst, i64:$src)]>,
Requires<[hasPTX<70>, hasSM<80>]>;
// Variant with src_size parameter
def _32s : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, Int32Regs:$src_size),
!strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
- [(IntrinS Int32Regs:$dst, Int32Regs:$src, Int32Regs:$src_size)]>,
+ [(IntrinS i32:$dst, i32:$src, i32:$src_size)]>,
Requires<[hasPTX<70>, hasSM<80>]>;
def _32si: NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, i32imm:$src_size),
!strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
- [(IntrinS Int32Regs:$dst, Int32Regs:$src, imm:$src_size)]>,
+ [(IntrinS i32:$dst, i32:$src, imm:$src_size)]>,
Requires<[hasPTX<70>, hasSM<80>]>;
def _64s : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, Int32Regs:$src_size),
!strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
- [(IntrinS Int64Regs:$dst, Int64Regs:$src, Int32Regs:$src_size)]>,
+ [(IntrinS i64:$dst, i64:$src, i32:$src_size)]>,
Requires<[hasPTX<70>, hasSM<80>]>;
def _64si: NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, i32imm:$src_size),
!strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
- [(IntrinS Int64Regs:$dst, Int64Regs:$src, imm:$src_size)]>,
+ [(IntrinS i64:$dst, i64:$src, imm:$src_size)]>,
Requires<[hasPTX<70>, hasSM<80>]>;
}
@@ -409,7 +461,7 @@ def CP_ASYNC_COMMIT_GROUP :
def CP_ASYNC_WAIT_GROUP :
NVPTXInst<(outs), (ins i32imm:$n), "cp.async.wait_group $n;",
- [(int_nvvm_cp_async_wait_group (i32 timm:$n))]>,
+ [(int_nvvm_cp_async_wait_group timm:$n)]>,
Requires<[hasPTX<70>, hasSM<80>]>;
def CP_ASYNC_WAIT_ALL :
@@ -425,14 +477,269 @@ def CP_ASYNC_BULK_COMMIT_GROUP :
def CP_ASYNC_BULK_WAIT_GROUP :
NVPTXInst<(outs), (ins i32imm:$n), "cp.async.bulk.wait_group $n;",
- [(int_nvvm_cp_async_bulk_wait_group (i32 timm:$n))]>,
+ [(int_nvvm_cp_async_bulk_wait_group timm:$n)]>,
Requires<[hasPTX<80>, hasSM<90>]>;
def CP_ASYNC_BULK_WAIT_GROUP_READ :
NVPTXInst<(outs), (ins i32imm:$n), "cp.async.bulk.wait_group.read $n;",
- [(int_nvvm_cp_async_bulk_wait_group_read (i32 timm:$n))]>,
+ [(int_nvvm_cp_async_bulk_wait_group_read timm:$n)]>,
Requires<[hasPTX<80>, hasSM<90>]>;
+//------------------------------
+// TMA Async Bulk Copy Functions
+//------------------------------
+
+class CpAsyncBulkStr<bit mc, bit ch> {
+ // Shared to Global memory
+ string S2G = "cp.async.bulk.global.shared::cta.bulk_group"
+ # !if(ch, ".L2::cache_hint", "");
+
+ // Global to Shared cluster memory
+ string G2S = "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes"
+ # !if(mc, ".multicast::cluster", "")
+ # !if(ch, ".L2::cache_hint", "");
+
+ // Shared CTA to Cluster memory
+ string C2C = "cp.async.bulk.shared::cluster.shared::cta.mbarrier::complete_tx::bytes";
+}
+
+multiclass CP_ASYNC_BULK_S2G<NVPTXRegClass rc> {
+ def NAME: NVPTXInst<(outs),
+ (ins Int64Regs:$dst, rc:$src, Int32Regs:$size),
+ !strconcat(CpAsyncBulkStr<0, 0>.S2G, " [$dst], [$src], $size;"), []>,
+ Requires<[hasPTX<80>, hasSM<90>]>;
+ def NAME # _CH: NVPTXInst<(outs),
+ (ins Int64Regs:$dst, rc:$src, Int32Regs:$size, Int64Regs:$ch),
+ !strconcat(CpAsyncBulkStr<0, 1>.S2G, " [$dst], [$src], $size, $ch;"), []>,
+ Requires<[hasPTX<80>, hasSM<90>]>;
+}
+defm CP_ASYNC_BULK_S2G : CP_ASYNC_BULK_S2G<Int64Regs>;
+defm CP_ASYNC_BULK_S2G_SHARED32 : CP_ASYNC_BULK_S2G<Int32Regs>;
+
+multiclass CP_ASYNC_BULK_G2S<NVPTXRegClass rc> {
+ def NAME: NVPTXInst<(outs),
+ (ins rc:$dst, rc:$mbar, Int64Regs:$src, Int32Regs:$size),
+ !strconcat(CpAsyncBulkStr<0, 0>.G2S, " [$dst], [$src], $size, [$mbar];"), []>,
+ Requires<[hasPTX<80>, hasSM<90>]>;
+ def NAME # _MC: NVPTXInst<(outs),
+ (ins rc:$dst, rc:$mbar, Int64Regs:$src, Int32Regs:$size, Int16Regs:$mc),
+ !strconcat(CpAsyncBulkStr<1, 0>.G2S, " [$dst], [$src], $size, [$mbar], $mc;"), []>,
+ Requires<[hasPTX<80>, hasSM<90>]>;
+ def NAME # _CH: NVPTXInst<(outs),
+ (ins rc:$dst, rc:$mbar, Int64Regs:$src, Int32Regs:$size, Int64Regs:$ch),
+ !strconcat(CpAsyncBulkStr<0, 1>.G2S, " [$dst], [$src], $size, [$mbar], $ch;"), []>,
+ Requires<[hasPTX<80>, hasSM<90>]>;
+ def NAME # _MC_CH: NVPTXInst<(outs),
+ (ins rc:$dst, rc:$mbar, Int64Regs:$src, Int32Regs:$size, Int16Regs:$mc, Int64Regs:$ch),
+ !strconcat(CpAsyncBulkStr<1, 1>.G2S, " [$dst], [$src], $size, [$mbar], $mc, $ch;"), []>,
+ Requires<[hasPTX<80>, hasSM<90>]>;
+}
+defm CP_ASYNC_BULK_G2S : CP_ASYNC_BULK_G2S<Int64Regs>;
+defm CP_ASYNC_BULK_G2S_SHARED32 : CP_ASYNC_BULK_G2S<Int32Regs>;
+
+multiclass CP_ASYNC_BULK_CTA_TO_CLUSTER<NVPTXRegClass rc> {
+ def NAME: NVPTXInst<(outs),
+ (ins rc:$dst, rc:$mbar, rc:$src, Int32Regs:$size),
+ !strconcat(CpAsyncBulkStr<0, 0>.C2C, " [$dst], [$src], $size, [$mbar];"),
+ [(int_nvvm_cp_async_bulk_shared_cta_to_cluster rc:$dst, rc:$mbar, rc:$src, Int32Regs:$size)]>,
+ Requires<[hasPTX<80>, hasSM<90>]>;
+}
+defm CP_ASYNC_BULK_CTA_TO_CLUSTER : CP_ASYNC_BULK_CTA_TO_CLUSTER<Int64Regs>;
+defm CP_ASYNC_BULK_CTA_TO_CLUSTER_SHARED32 : CP_ASYNC_BULK_CTA_TO_CLUSTER<Int32Regs>;
+
+//------------------------------
+// Bulk Copy Prefetch Functions
+//------------------------------
+def CP_ASYNC_BULK_PREFETCH : NVPTXInst<(outs),
+ (ins Int64Regs:$src, Int32Regs:$size),
+ "cp.async.bulk.prefetch.L2.global [$src], $size;", []>,
+ Requires<[hasPTX<80>, hasSM<90>]>;
+
+def CP_ASYNC_BULK_PREFETCH_CH : NVPTXInst<(outs),
+ (ins Int64Regs:$src, Int32Regs:$size, Int64Regs:$ch),
+ "cp.async.bulk.prefetch.L2.global.L2::cache_hint [$src], $size, $ch;", []>,
+ Requires<[hasPTX<80>, hasSM<90>]>;
+//-------------------------------------
+// TMA Async Bulk Tensor Copy Functions
+//-------------------------------------
+
+// From Global to Shared memory (G2S)
+class G2S_STRINGS<int dim, string mode, bit mc, bit ch, bit is_shared32 = 0> {
+ string prefix = "cp.async.bulk.tensor";
+ string dir = "shared::cluster.global";
+ string completion = "mbarrier::complete_tx::bytes";
+ string inst_name = prefix
+ # "." # dim # "d"
+ # "." # dir
+ # "." # mode
+ # "." # completion
+ # !if(mc, ".multicast::cluster", "")
+ # !if(ch, ".L2::cache_hint", "");
+ string intr_name = "CP_ASYNC_BULK_TENSOR_G2S_"
+ # dim # "D"
+ # !if(is_shared32, "_SHARED32", "")
+ # !if(!eq(mode, "tile"), "_TILE", "_IM2COL");
+}
+
+multiclass CP_ASYNC_BULK_TENSOR_G2S_INTR<int dim, bit is_shared32, string mode> {
+ defvar dims_dag = !dag(ins, !listsplat(Int32Regs, dim), !foreach(i, !range(dim), "d" # i));
+ defvar dims_str = !interleave(!foreach(i, !range(dim), "$d" # i), ", ");
+ defvar asm_str_default = " [$dst], [$tmap, {{" # dims_str # "}}], [$mbar]";
+ defvar rc = !if(is_shared32, Int32Regs, Int64Regs);
+
+ defvar num_im2col = !if(!ge(dim, 3), !add(dim, -2), 0);
+ defvar im2col_dag = !if(!eq(mode, "im2col"),
+ !dag(ins, !listsplat(Int16Regs, num_im2col), !foreach(i, !range(num_im2col), "im2col" # i)),
+ (ins));
+ defvar im2col_str = !interleave(!foreach(i, !range(num_im2col), "$im2col" # i), ", ");
+ defvar im2col_asm_str = ", {{" # im2col_str # "}}";
+
+ defvar asm_str = !if(!eq(mode, "im2col"),
+ !strconcat(asm_str_default, im2col_asm_str), asm_str_default);
+
+ def NAME: NVPTXInst<(outs),
+ !con((ins rc:$dst, rc:$mbar, Int64Regs:$tmap), dims_dag, im2col_dag),
+ !strconcat(G2S_STRINGS<dim, mode, 0, 0>.inst_name, asm_str, ";"), []>,
+ Requires<[hasPTX<80>, hasSM<90>]>;
+ def NAME # _MC: NVPTXInst<(outs),
+ !con((ins rc:$dst, rc:$mbar, Int64Regs:$tmap), dims_dag, im2col_dag, (ins Int16Regs:$mc)),
+ !strconcat(G2S_STRINGS<dim, mode, 1, 0>.inst_name, asm_str, ", $mc;"), []>,
+ Requires<[hasPTX<80>, hasSM<90>]>;
+ def NAME # _CH: NVPTXInst<(outs),
+ !con((ins rc:$dst, rc:$mbar, Int64Regs:$tmap), dims_dag, im2col_dag, (ins Int64Regs:$ch)),
+ !strconcat(G2S_STRINGS<dim, mode, 0, 1>.inst_name, asm_str, ", $ch;"), []>,
+ Requires<[hasPTX<80>, hasSM<90>]>;
+ def NAME # _MC_CH: NVPTXInst<(outs),
+ !con((ins rc:$dst, rc:$mbar, Int64Regs:$tmap), dims_dag, im2col_dag, (ins Int16Regs:$mc, Int64Regs:$ch)),
+ !strconcat(G2S_STRINGS<dim, mode, 1, 1>.inst_name, asm_str, ", $mc, $ch;"), []>,
+ Requires<[hasPTX<80>, hasSM<90>]>;
+}
+
+foreach dim = [1, 2, 3, 4, 5] in {
+ foreach shared32 = [true, false] in {
+ foreach mode = !if(!ge(dim, 3), ["tile", "im2col"], ["tile"]) in {
+ defm G2S_STRINGS<dim, mode, 0, 0, shared32>.intr_name :
+ CP_ASYNC_BULK_TENSOR_G2S_INTR<dim, shared32, mode>;
+ }
+ }
+}
+
+// From Shared to Global memory (S2G)
+class S2G_STRINGS<int dim, string mode, bit ch,
+ bit is_shared32 = 0, bit is_reduce = 0> {
+ string dir = "global.shared::cta";
+ string completion = "bulk_group";
+ string inst_name = !if(is_reduce, "cp.reduce", "cp")
+ # ".async.bulk.tensor"
+ # "." # dim # "d"
+ # "." # dir
+ # "." # mode
+ # "." # completion
+ # !if(ch, ".L2::cache_hint", "");
+ string intr_name = "CP_ASYNC_BULK_TENSOR_"
+ # !if(is_reduce, "RED_", "S2G_")
+ # dim # "D"
+ # !if(is_shared32, "_SHARED32", "")
+ # !if(!eq(mode, "tile"), "_TILE", "_IM2COL");
+}
+
+multiclass CP_ASYNC_BULK_TENSOR_S2G_INTR<int dim, bit shared32, string mode> {
+ defvar dims_dag = !dag(ins, !listsplat(Int32Regs, dim), !foreach(i, !range(dim), "d" # i));
+ defvar dims_str = !interleave(!foreach(i, !range(dim), "$d" # i), ", ");
+ defvar asm_str = " [$tmap, {{" # dims_str # "}}], [$src]";
+ defvar rc = !if(shared32, Int32Regs, Int64Regs);
+
+ def NAME: NVPTXInst<(outs),
+ !con((ins rc:$src, Int64Regs:$tmap), dims_dag),
+ !strconcat(S2G_STRINGS<dim, mode, 0>.inst_name, asm_str, ";"), []>,
+ Requires<[hasPTX<80>, hasSM<90>]>;
+ def NAME # _CH: NVPTXInst<(outs),
+ !con((ins rc:$src, Int64Regs:$tmap), dims_dag, (ins Int64Regs:$ch)),
+ !strconcat(S2G_STRINGS<dim, mode, 1>.inst_name, asm_str, ", $ch;"), []>,
+ Requires<[hasPTX<80>, hasSM<90>]>;
+}
+
+def TMAReductionFlags : Operand<i32> {
+ let PrintMethod = "printTmaReductionMode";
+}
+
+// TMA Copy from Shared to Global memory with Reduction
+multiclass CP_ASYNC_BULK_TENSOR_REDUCE_INTR<int dim, bit shared32, string mode> {
+ defvar dims_dag = !dag(ins, !listsplat(Int32Regs, dim), !foreach(i, !range(dim), "d" # i));
+ defvar dims_str = !interleave(!foreach(i, !range(dim), "$d" # i), ", ");
+ defvar asm_str = " [$tmap, {{" # dims_str # "}}], [$src]";
+ defvar rc = !if(shared32, Int32Regs, Int64Regs);
+
+ defvar prefix = "cp.reduce.async.bulk.tensor" # "." # dim # "d" # ".global.shared::cta";
+ defvar suffix = "." # mode # ".bulk_group";
+
+ def NAME: NVPTXInst<(outs),
+ !con((ins rc:$src, Int64Regs:$tmap), dims_dag, (ins TMAReductionFlags:$red_op)),
+ !strconcat(prefix, "${red_op}", suffix, asm_str, ";"), []>,
+ Requires<[hasPTX<80>, hasSM<90>]>;
+ def NAME # _CH: NVPTXInst<(outs),
+ !con((ins rc:$src, Int64Regs:$tmap), dims_dag, (ins Int64Regs:$ch, TMAReductionFlags:$red_op)),
+ !strconcat(prefix, "${red_op}", suffix, ".L2::cache_hint", asm_str, ", $ch;"), []>,
+ Requires<[hasPTX<80>, hasSM<90>]>;
+}
+
+foreach dim = [1, 2, 3, 4, 5] in {
+ foreach shared32 = [true, false] in {
+ foreach mode = !if(!ge(dim, 3), ["tile", "im2col_no_offs"], ["tile"]) in {
+ defm S2G_STRINGS<dim, mode, 0, shared32>.intr_name :
+ CP_ASYNC_BULK_TENSOR_S2G_INTR<dim, shared32, mode>;
+ defm S2G_STRINGS<dim, mode, 0, shared32, 1>.intr_name :
+ CP_ASYNC_BULK_TENSOR_REDUCE_INTR<dim, shared32, mode>;
+ }
+ }
+}
+
+// TMA Prefetch from Global memory to L2 cache
+class PREFETCH_STRINGS<int dim, string mode, bit ch> {
+ string prefix = "cp.async.bulk.prefetch.tensor";
+ string dir = "L2.global";
+ string inst_name = prefix
+ # "." # dim # "d"
+ # "." # dir
+ # "." # mode
+ # !if(ch, ".L2::cache_hint", "");
+ string intr_name = "CP_ASYNC_BULK_TENSOR_PREFETCH_"
+ # dim # "D"
+ # !if(!eq(mode, "tile"), "_TILE", "_IM2COL");
+}
+
+multiclass CP_ASYNC_BULK_TENSOR_PREFETCH_INTR<int dim, string mode> {
+ defvar dims_dag = !dag(ins, !listsplat(Int32Regs, dim), !foreach(i, !range(dim), "d" # i));
+ defvar dims_str = !interleave(!foreach(i, !range(dim), "$d" # i), ", ");
+ defvar asm_str_default = " [$tmap, {{" # dims_str # "}}]";
+
+ defvar num_im2col = !if(!ge(dim, 3), !add(dim, -2), 0);
+ defvar im2col_dag = !if(!eq(mode, "im2col"),
+ !dag(ins, !listsplat(Int16Regs, num_im2col), !foreach(i, !range(num_im2col), "im2col" # i)),
+ (ins));
+ defvar im2col_str = !interleave(!foreach(i, !range(num_im2col), "$im2col" # i), ", ");
+ defvar im2col_asm_str = ", {{" # im2col_str # "}}";
+
+ defvar asm_str = !if(!eq(mode, "im2col"),
+ !strconcat(asm_str_default, im2col_asm_str), asm_str_default);
+
+ def NAME: NVPTXInst<(outs),
+ !con((ins Int64Regs:$tmap), dims_dag, im2col_dag),
+ !strconcat(PREFETCH_STRINGS<dim, mode, 0>.inst_name, asm_str, ";"), []>,
+ Requires<[hasPTX<80>, hasSM<90>]>;
+ def NAME # _CH: NVPTXInst<(outs),
+ !con((ins Int64Regs:$tmap), dims_dag, im2col_dag, (ins Int64Regs:$ch)),
+ !strconcat(PREFETCH_STRINGS<dim, mode, 1>.inst_name, asm_str, ", $ch;"), []>,
+ Requires<[hasPTX<80>, hasSM<90>]>;
+}
+
+foreach dim = [1, 2, 3, 4, 5] in {
+ foreach mode = !if(!ge(dim, 3), ["tile", "im2col"], ["tile"]) in {
+ defm PREFETCH_STRINGS<dim, mode, 0>.intr_name :
+ CP_ASYNC_BULK_TENSOR_PREFETCH_INTR<dim, mode>;
+ }
+}
+
//-----------------------------------
// MBarrier Functions
//-----------------------------------
@@ -440,11 +747,11 @@ def CP_ASYNC_BULK_WAIT_GROUP_READ :
multiclass MBARRIER_INIT<string AddrSpace, Intrinsic Intrin> {
def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr, Int32Regs:$count),
!strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
- [(Intrin Int32Regs:$addr, Int32Regs:$count)]>,
+ [(Intrin i32:$addr, i32:$count)]>,
Requires<[hasPTX<70>, hasSM<80>]>;
def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr, Int32Regs:$count),
!strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
- [(Intrin Int64Regs:$addr, Int32Regs:$count)]>,
+ [(Intrin i64:$addr, i32:$count)]>,
Requires<[hasPTX<70>, hasSM<80>]>;
}
@@ -455,11 +762,11 @@ defm MBARRIER_INIT_SHARED : MBARRIER_INIT<".shared",
multiclass MBARRIER_INVAL<string AddrSpace, Intrinsic Intrin> {
def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
!strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
- [(Intrin Int32Regs:$addr)]>,
+ [(Intrin i32:$addr)]>,
Requires<[hasPTX<70>, hasSM<80>]>;
def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
!strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
- [(Intrin Int64Regs:$addr)]>,
+ [(Intrin i64:$addr)]>,
Requires<[hasPTX<70>, hasSM<80>]>;
}
@@ -470,11 +777,11 @@ defm MBARRIER_INVAL_SHARED : MBARRIER_INVAL<".shared",
multiclass MBARRIER_ARRIVE<string AddrSpace, Intrinsic Intrin> {
def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
!strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
- [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
+ [(set i64:$state, (Intrin i32:$addr))]>,
Requires<[hasPTX<70>, hasSM<80>]>;
def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
!strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
- [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
+ [(set i64:$state, (Intrin i64:$addr))]>,
Requires<[hasPTX<70>, hasSM<80>]>;
}
@@ -487,13 +794,13 @@ multiclass MBARRIER_ARRIVE_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
(ins Int32Regs:$addr, Int32Regs:$count),
!strconcat("mbarrier.arrive.noComplete", AddrSpace,
".b64 $state, [$addr], $count;"),
- [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
+ [(set i64:$state, (Intrin i32:$addr, i32:$count))]>,
Requires<[hasPTX<70>, hasSM<80>]>;
def _64 : NVPTXInst<(outs Int64Regs:$state),
(ins Int64Regs:$addr, Int32Regs:$count),
!strconcat("mbarrier.arrive.noComplete", AddrSpace,
".b64 $state, [$addr], $count;"),
- [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
+ [(set i64:$state, (Intrin i64:$addr, i32:$count))]>,
Requires<[hasPTX<70>, hasSM<80>]>;
}
@@ -506,12 +813,12 @@ multiclass MBARRIER_ARRIVE_DROP<string AddrSpace, Intrinsic Intrin> {
def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
!strconcat("mbarrier.arrive_drop", AddrSpace,
".b64 $state, [$addr];"),
- [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
+ [(set i64:$state, (Intrin i32:$addr))]>,
Requires<[hasPTX<70>, hasSM<80>]>;
def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
!strconcat("mbarrier.arrive_drop", AddrSpace,
".b64 $state, [$addr];"),
- [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
+ [(set i64:$state, (Intrin i64:$addr))]>,
Requires<[hasPTX<70>, hasSM<80>]>;
}
@@ -525,13 +832,13 @@ multiclass MBARRIER_ARRIVE_DROP_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
(ins Int32Regs:$addr, Int32Regs:$count),
!strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
".b64 $state, [$addr], $count;"),
- [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
+ [(set i64:$state, (Intrin i32:$addr, i32:$count))]>,
Requires<[hasPTX<70>, hasSM<80>]>;
def _64 : NVPTXInst<(outs Int64Regs:$state),
(ins Int64Regs:$addr, Int32Regs:$count),
!strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
".b64 $state, [$addr], $count;"),
- [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
+ [(set i64:$state, (Intrin i64:$addr, i32:$count))]>,
Requires<[hasPTX<70>, hasSM<80>]>;
}
@@ -544,11 +851,11 @@ defm MBARRIER_ARRIVE_DROP_NOCOMPLETE_SHARED :
multiclass MBARRIER_TEST_WAIT<string AddrSpace, Intrinsic Intrin> {
def _32 : NVPTXInst<(outs Int1Regs:$res), (ins Int32Regs:$addr, Int64Regs:$state),
!strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
- [(set Int1Regs:$res, (Intrin Int32Regs:$addr, Int64Regs:$state))]>,
+ [(set i1:$res, (Intrin i32:$addr, i64:$state))]>,
Requires<[hasPTX<70>, hasSM<80>]>;
def _64 : NVPTXInst<(outs Int1Regs:$res), (ins Int64Regs:$addr, Int64Regs:$state),
!strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
- [(set Int1Regs:$res, (Intrin Int64Regs:$addr, Int64Regs:$state))]>,
+ [(set i1:$res, (Intrin i64:$addr, i64:$state))]>,
Requires<[hasPTX<70>, hasSM<80>]>;
}
@@ -560,7 +867,7 @@ defm MBARRIER_TEST_WAIT_SHARED :
class MBARRIER_PENDING_COUNT<Intrinsic Intrin> :
NVPTXInst<(outs Int32Regs:$res), (ins Int64Regs:$state),
"mbarrier.pending_count.b64 $res, $state;",
- [(set Int32Regs:$res, (Intrin Int64Regs:$state))]>,
+ [(set i32:$res, (Intrin i64:$state))]>,
Requires<[hasPTX<70>, hasSM<80>]>;
def MBARRIER_PENDING_COUNT :
@@ -577,30 +884,30 @@ def MBARRIER_PENDING_COUNT :
// Same story for fmax, fmin.
def : Pat<(int_nvvm_fmin_f immFloat1,
- (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
- (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
+ (int_nvvm_fmax_f immFloat0, f32:$a)),
+ (CVT_f32_f32 $a, CvtSAT)>;
def : Pat<(int_nvvm_fmin_f immFloat1,
- (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
- (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
+ (int_nvvm_fmax_f f32:$a, immFloat0)),
+ (CVT_f32_f32 $a, CvtSAT)>;
def : Pat<(int_nvvm_fmin_f
- (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
- (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
+ (int_nvvm_fmax_f immFloat0, f32:$a), immFloat1),
+ (CVT_f32_f32 $a, CvtSAT)>;
def : Pat<(int_nvvm_fmin_f
- (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
- (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
+ (int_nvvm_fmax_f f32:$a, immFloat0), immFloat1),
+ (CVT_f32_f32 $a, CvtSAT)>;
def : Pat<(int_nvvm_fmin_d immDouble1,
- (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
- (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
+ (int_nvvm_fmax_d immDouble0, f64:$a)),
+ (CVT_f64_f64 $a, CvtSAT)>;
def : Pat<(int_nvvm_fmin_d immDouble1,
- (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
- (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
+ (int_nvvm_fmax_d f64:$a, immDouble0)),
+ (CVT_f64_f64 $a, CvtSAT)>;
def : Pat<(int_nvvm_fmin_d
- (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
- (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
+ (int_nvvm_fmax_d immDouble0, f64:$a), immDouble1),
+ (CVT_f64_f64 $a, CvtSAT)>;
def : Pat<(int_nvvm_fmin_d
- (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
- (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
+ (int_nvvm_fmax_d f64:$a, immDouble0), immDouble1),
+ (CVT_f64_f64 $a, CvtSAT)>;
// We need a full string for OpcStr here because we need to deal with case like
@@ -644,7 +951,7 @@ def INT_NVVM_NANOSLEEP_I : NVPTXInst<(outs), (ins i32imm:$i), "nanosleep.u32 \t$
[(int_nvvm_nanosleep imm:$i)]>,
Requires<[hasPTX<63>, hasSM<70>]>;
def INT_NVVM_NANOSLEEP_R : NVPTXInst<(outs), (ins Int32Regs:$i), "nanosleep.u32 \t$i;",
- [(int_nvvm_nanosleep Int32Regs:$i)]>,
+ [(int_nvvm_nanosleep i32:$i)]>,
Requires<[hasPTX<63>, hasSM<70>]>;
//
// Min Max
@@ -878,6 +1185,18 @@ def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
+def : Pat<(int_nvvm_div_full f32:$a, f32:$b),
+ (FDIV32rr $a, $b)>;
+
+def : Pat<(int_nvvm_div_full f32:$a, fpimm:$b),
+ (FDIV32ri $a, f32imm:$b)>;
+
+def : Pat<(int_nvvm_div_full_ftz f32:$a, f32:$b),
+ (FDIV32rr_ftz $a, $b)>;
+
+def : Pat<(int_nvvm_div_full_ftz f32:$a, fpimm:$b),
+ (FDIV32ri_ftz $a, f32imm:$b)>;
+
//
// Sad
//
@@ -899,19 +1218,19 @@ def INT_NVVM_SAD_ULL : F_MATH_3<"sad.u64 \t$dst, $src0, $src1, $src2;",
// Floor Ceil
//
-def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
- (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
-def : Pat<(int_nvvm_floor_f Float32Regs:$a),
- (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
-def : Pat<(int_nvvm_floor_d Float64Regs:$a),
- (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
+def : Pat<(int_nvvm_floor_ftz_f f32:$a),
+ (CVT_f32_f32 $a, CvtRMI_FTZ)>;
+def : Pat<(int_nvvm_floor_f f32:$a),
+ (CVT_f32_f32 $a, CvtRMI)>;
+def : Pat<(int_nvvm_floor_d f64:$a),
+ (CVT_f64_f64 $a, CvtRMI)>;
-def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
- (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
-def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
- (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
-def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
- (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
+def : Pat<(int_nvvm_ceil_ftz_f f32:$a),
+ (CVT_f32_f32 $a, CvtRPI_FTZ)>;
+def : Pat<(int_nvvm_ceil_f f32:$a),
+ (CVT_f32_f32 $a, CvtRPI)>;
+def : Pat<(int_nvvm_ceil_d f64:$a),
+ (CVT_f64_f64 $a, CvtRPI)>;
//
// Abs
@@ -926,6 +1245,22 @@ def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
Float64Regs, int_nvvm_fabs_d>;
//
+// copysign
+//
+
+def fcopysign_nvptx : SDNode<"NVPTXISD::FCOPYSIGN", SDTFPBinOp>;
+
+def COPYSIGN_F :
+ NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src0, Float32Regs:$src1),
+ "copysign.f32 \t$dst, $src0, $src1;",
+ [(set f32:$dst, (fcopysign_nvptx f32:$src1, f32:$src0))]>;
+
+def COPYSIGN_D :
+ NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src0, Float64Regs:$src1),
+ "copysign.f64 \t$dst, $src0, $src1;",
+ [(set f64:$dst, (fcopysign_nvptx f64:$src1, f64:$src0))]>;
+
+//
// Abs, Neg bf16, bf16x2
//
@@ -942,34 +1277,34 @@ def INT_NVVM_NEG_BF16X2 : F_MATH_1<"neg.bf16x2 \t$dst, $src0;", Int32Regs,
// Round
//
-def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
- (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
-def : Pat<(int_nvvm_round_f Float32Regs:$a),
- (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
-def : Pat<(int_nvvm_round_d Float64Regs:$a),
- (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
+def : Pat<(int_nvvm_round_ftz_f f32:$a),
+ (CVT_f32_f32 $a, CvtRNI_FTZ)>;
+def : Pat<(int_nvvm_round_f f32:$a),
+ (CVT_f32_f32 $a, CvtRNI)>;
+def : Pat<(int_nvvm_round_d f64:$a),
+ (CVT_f64_f64 $a, CvtRNI)>;
//
// Trunc
//
-def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
- (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
-def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
- (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
-def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
- (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
+def : Pat<(int_nvvm_trunc_ftz_f f32:$a),
+ (CVT_f32_f32 $a, CvtRZI_FTZ)>;
+def : Pat<(int_nvvm_trunc_f f32:$a),
+ (CVT_f32_f32 $a, CvtRZI)>;
+def : Pat<(int_nvvm_trunc_d f64:$a),
+ (CVT_f64_f64 $a, CvtRZI)>;
//
// Saturate
//
-def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
- (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
-def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
- (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
-def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
- (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
+def : Pat<(int_nvvm_saturate_ftz_f f32:$a),
+ (CVT_f32_f32 $a, CvtSAT_FTZ)>;
+def : Pat<(int_nvvm_saturate_f f32:$a),
+ (CVT_f32_f32 $a, CvtSAT)>;
+def : Pat<(int_nvvm_saturate_d f64:$a),
+ (CVT_f64_f64 $a, CvtSAT)>;
//
// Exp2 Log2
@@ -981,11 +1316,21 @@ def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
+
def INT_NVVM_EX2_APPROX_F16 : F_MATH_1<"ex2.approx.f16 \t$dst, $src0;",
Int16Regs, Int16Regs, int_nvvm_ex2_approx_f16, [hasPTX<70>, hasSM<75>]>;
def INT_NVVM_EX2_APPROX_F16X2 : F_MATH_1<"ex2.approx.f16x2 \t$dst, $src0;",
Int32Regs, Int32Regs, int_nvvm_ex2_approx_f16x2, [hasPTX<70>, hasSM<75>]>;
+def : Pat<(fexp2 f32:$a),
+ (INT_NVVM_EX2_APPROX_FTZ_F $a)>, Requires<[doF32FTZ]>;
+def : Pat<(fexp2 f32:$a),
+ (INT_NVVM_EX2_APPROX_F $a)>, Requires<[doNoF32FTZ]>;
+def : Pat<(fexp2 f16:$a),
+ (INT_NVVM_EX2_APPROX_F16 $a)>, Requires<[useFP16Math]>;
+def : Pat<(fexp2 v2f16:$a),
+ (INT_NVVM_EX2_APPROX_F16X2 $a)>, Requires<[useFP16Math]>;
+
def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
@@ -993,6 +1338,11 @@ def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
+def : Pat<(flog2 f32:$a), (INT_NVVM_LG2_APPROX_FTZ_F $a)>,
+ Requires<[doF32FTZ]>;
+def : Pat<(flog2 f32:$a), (INT_NVVM_LG2_APPROX_F $a)>,
+ Requires<[doNoF32FTZ]>;
+
//
// Sin Cos
//
@@ -1155,14 +1505,14 @@ def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
Float64Regs, int_nvvm_sqrt_rp_d>;
// nvvm_sqrt intrinsic
-def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
- (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
-def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
- (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
-def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
- (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
-def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
- (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
+def : Pat<(int_nvvm_sqrt_f f32:$a),
+ (INT_NVVM_SQRT_RN_FTZ_F $a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
+def : Pat<(int_nvvm_sqrt_f f32:$a),
+ (INT_NVVM_SQRT_RN_F $a)>, Requires<[do_SQRTF32_RN]>;
+def : Pat<(int_nvvm_sqrt_f f32:$a),
+ (INT_NVVM_SQRT_APPROX_FTZ_F $a)>, Requires<[doF32FTZ]>;
+def : Pat<(int_nvvm_sqrt_f f32:$a),
+ (INT_NVVM_SQRT_APPROX_F $a)>;
//
// Rsqrt
@@ -1181,25 +1531,25 @@ def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
// 1.0f / sqrt_approx -> rsqrt_approx
-def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_approx_f Float32Regs:$a)),
- (INT_NVVM_RSQRT_APPROX_F Float32Regs:$a)>,
+def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_approx_f f32:$a)),
+ (INT_NVVM_RSQRT_APPROX_F $a)>,
Requires<[doRsqrtOpt]>;
-def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_approx_ftz_f Float32Regs:$a)),
- (INT_NVVM_RSQRT_APPROX_FTZ_F Float32Regs:$a)>,
+def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_approx_ftz_f f32:$a)),
+ (INT_NVVM_RSQRT_APPROX_FTZ_F $a)>,
Requires<[doRsqrtOpt]>;
// same for int_nvvm_sqrt_f when non-precision sqrt is requested
-def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f Float32Regs:$a)),
- (INT_NVVM_RSQRT_APPROX_F Float32Regs:$a)>,
+def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f f32:$a)),
+ (INT_NVVM_RSQRT_APPROX_F $a)>,
Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doNoF32FTZ]>;
-def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f Float32Regs:$a)),
- (INT_NVVM_RSQRT_APPROX_FTZ_F Float32Regs:$a)>,
+def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f f32:$a)),
+ (INT_NVVM_RSQRT_APPROX_FTZ_F $a)>,
Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doF32FTZ]>;
-def: Pat<(fdiv FloatConst1, (fsqrt Float32Regs:$a)),
- (INT_NVVM_RSQRT_APPROX_F Float32Regs:$a)>,
+def: Pat<(fdiv FloatConst1, (fsqrt f32:$a)),
+ (INT_NVVM_RSQRT_APPROX_F $a)>,
Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doNoF32FTZ]>;
-def: Pat<(fdiv FloatConst1, (fsqrt Float32Regs:$a)),
- (INT_NVVM_RSQRT_APPROX_FTZ_F Float32Regs:$a)>,
+def: Pat<(fdiv FloatConst1, (fsqrt f32:$a)),
+ (INT_NVVM_RSQRT_APPROX_FTZ_F $a)>,
Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doF32FTZ]>;
//
// Add
@@ -1232,145 +1582,159 @@ def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
//
+// BFIND
+//
+
+foreach t = [I32RT, I64RT] in {
+ foreach sign = ["s", "u"] in {
+ defvar flo_intrin = !cast<Intrinsic>("int_nvvm_flo_" # sign);
+ def BFIND_ # sign # t.Size
+ : NVPTXInst<(outs Int32Regs:$dst), (ins t.RC:$src),
+ "bfind." # sign # t.Size # " \t$dst, $src;",
+ [(set i32:$dst, (flo_intrin t.Ty:$src, 0))]>;
+
+ def BFIND_SHIFTAMT_ # sign # t.Size
+ : NVPTXInst<(outs Int32Regs:$dst), (ins t.RC:$src),
+ "bfind.shiftamt." # sign # t.Size # " \t$dst, $src;",
+ [(set i32:$dst, (flo_intrin t.Ty:$src, -1))]>;
+ }
+}
+
+//
// Convert
//
-def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
- (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
-def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
- (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
-def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
- (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
-def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
- (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
-def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
- (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
-def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
- (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
-def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
- (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
-def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
- (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
-
-def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
- (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
-def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
- (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
-def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
- (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
-def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
- (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
-
-def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
- (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
-def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
- (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
-def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
- (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
-def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
- (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
-
-def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
- (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
-def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
- (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
-def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
- (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
-def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
- (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
-
-def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
- (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
-def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
- (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
-def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
- (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
-def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
- (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
-
-def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
- (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
-def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
- (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
-def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
- (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
-def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
- (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
-def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
- (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
-def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
- (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
-def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
- (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
-def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
- (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
-
-def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
- (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
-def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
- (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
-def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
- (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
-def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
- (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
-def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
- (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
-def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
- (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
-def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
- (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
-def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
- (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
-
-def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
- (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
-def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
- (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
-def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
- (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
-def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
- (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
-
-def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
- (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
-def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
- (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
-def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
- (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
-def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
- (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
-
-def : Pat<(int_nvvm_ff2bf16x2_rn Float32Regs:$a, Float32Regs:$b),
- (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>;
-def : Pat<(int_nvvm_ff2bf16x2_rn_relu Float32Regs:$a, Float32Regs:$b),
- (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>;
-def : Pat<(int_nvvm_ff2bf16x2_rz Float32Regs:$a, Float32Regs:$b),
- (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>;
-def : Pat<(int_nvvm_ff2bf16x2_rz_relu Float32Regs:$a, Float32Regs:$b),
- (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>;
-
-def : Pat<(int_nvvm_ff2f16x2_rn Float32Regs:$a, Float32Regs:$b),
- (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>;
-def : Pat<(int_nvvm_ff2f16x2_rn_relu Float32Regs:$a, Float32Regs:$b),
- (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>;
-def : Pat<(int_nvvm_ff2f16x2_rz Float32Regs:$a, Float32Regs:$b),
- (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>;
-def : Pat<(int_nvvm_ff2f16x2_rz_relu Float32Regs:$a, Float32Regs:$b),
- (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>;
-
-def : Pat<(int_nvvm_f2bf16_rn Float32Regs:$a),
- (CVT_bf16_f32 Float32Regs:$a, CvtRN)>;
-def : Pat<(int_nvvm_f2bf16_rn_relu Float32Regs:$a),
- (CVT_bf16_f32 Float32Regs:$a, CvtRN_RELU)>;
-def : Pat<(int_nvvm_f2bf16_rz Float32Regs:$a),
- (CVT_bf16_f32 Float32Regs:$a, CvtRZ)>;
-def : Pat<(int_nvvm_f2bf16_rz_relu Float32Regs:$a),
- (CVT_bf16_f32 Float32Regs:$a, CvtRZ_RELU)>;
-
-def CVT_tf32_f32 :
- NVPTXInst<(outs Int32Regs:$dest), (ins Float32Regs:$a),
- "cvt.rna.tf32.f32 \t$dest, $a;",
- [(set Int32Regs:$dest, (int_nvvm_f2tf32_rna Float32Regs:$a))]>;
+def : Pat<(int_nvvm_d2f_rn_ftz f64:$a),
+ (CVT_f32_f64 $a, CvtRN_FTZ)>;
+def : Pat<(int_nvvm_d2f_rn f64:$a),
+ (CVT_f32_f64 $a, CvtRN)>;
+def : Pat<(int_nvvm_d2f_rz_ftz f64:$a),
+ (CVT_f32_f64 $a, CvtRZ_FTZ)>;
+def : Pat<(int_nvvm_d2f_rz f64:$a),
+ (CVT_f32_f64 $a, CvtRZ)>;
+def : Pat<(int_nvvm_d2f_rm_ftz f64:$a),
+ (CVT_f32_f64 $a, CvtRM_FTZ)>;
+def : Pat<(int_nvvm_d2f_rm f64:$a),
+ (CVT_f32_f64 $a, CvtRM)>;
+def : Pat<(int_nvvm_d2f_rp_ftz f64:$a),
+ (CVT_f32_f64 $a, CvtRP_FTZ)>;
+def : Pat<(int_nvvm_d2f_rp f64:$a),
+ (CVT_f32_f64 $a, CvtRP)>;
+
+def : Pat<(int_nvvm_d2i_rn f64:$a),
+ (CVT_s32_f64 $a, CvtRNI)>;
+def : Pat<(int_nvvm_d2i_rz f64:$a),
+ (CVT_s32_f64 $a, CvtRZI)>;
+def : Pat<(int_nvvm_d2i_rm f64:$a),
+ (CVT_s32_f64 $a, CvtRMI)>;
+def : Pat<(int_nvvm_d2i_rp f64:$a),
+ (CVT_s32_f64 $a, CvtRPI)>;
+
+def : Pat<(int_nvvm_d2ui_rn f64:$a),
+ (CVT_u32_f64 $a, CvtRNI)>;
+def : Pat<(int_nvvm_d2ui_rz f64:$a),
+ (CVT_u32_f64 $a, CvtRZI)>;
+def : Pat<(int_nvvm_d2ui_rm f64:$a),
+ (CVT_u32_f64 $a, CvtRMI)>;
+def : Pat<(int_nvvm_d2ui_rp f64:$a),
+ (CVT_u32_f64 $a, CvtRPI)>;
+
+def : Pat<(int_nvvm_i2d_rn i32:$a),
+ (CVT_f64_s32 $a, CvtRN)>;
+def : Pat<(int_nvvm_i2d_rz i32:$a),
+ (CVT_f64_s32 $a, CvtRZ)>;
+def : Pat<(int_nvvm_i2d_rm i32:$a),
+ (CVT_f64_s32 $a, CvtRM)>;
+def : Pat<(int_nvvm_i2d_rp i32:$a),
+ (CVT_f64_s32 $a, CvtRP)>;
+
+def : Pat<(int_nvvm_ui2d_rn i32:$a),
+ (CVT_f64_u32 $a, CvtRN)>;
+def : Pat<(int_nvvm_ui2d_rz i32:$a),
+ (CVT_f64_u32 $a, CvtRZ)>;
+def : Pat<(int_nvvm_ui2d_rm i32:$a),
+ (CVT_f64_u32 $a, CvtRM)>;
+def : Pat<(int_nvvm_ui2d_rp i32:$a),
+ (CVT_f64_u32 $a, CvtRP)>;
+
+def : Pat<(int_nvvm_f2i_rn_ftz f32:$a),
+ (CVT_s32_f32 $a, CvtRNI_FTZ)>;
+def : Pat<(int_nvvm_f2i_rn f32:$a),
+ (CVT_s32_f32 $a, CvtRNI)>;
+def : Pat<(int_nvvm_f2i_rz_ftz f32:$a),
+ (CVT_s32_f32 $a, CvtRZI_FTZ)>;
+def : Pat<(int_nvvm_f2i_rz f32:$a),
+ (CVT_s32_f32 $a, CvtRZI)>;
+def : Pat<(int_nvvm_f2i_rm_ftz f32:$a),
+ (CVT_s32_f32 $a, CvtRMI_FTZ)>;
+def : Pat<(int_nvvm_f2i_rm f32:$a),
+ (CVT_s32_f32 $a, CvtRMI)>;
+def : Pat<(int_nvvm_f2i_rp_ftz f32:$a),
+ (CVT_s32_f32 $a, CvtRPI_FTZ)>;
+def : Pat<(int_nvvm_f2i_rp f32:$a),
+ (CVT_s32_f32 $a, CvtRPI)>;
+
+def : Pat<(int_nvvm_f2ui_rn_ftz f32:$a),
+ (CVT_u32_f32 $a, CvtRNI_FTZ)>;
+def : Pat<(int_nvvm_f2ui_rn f32:$a),
+ (CVT_u32_f32 $a, CvtRNI)>;
+def : Pat<(int_nvvm_f2ui_rz_ftz f32:$a),
+ (CVT_u32_f32 $a, CvtRZI_FTZ)>;
+def : Pat<(int_nvvm_f2ui_rz f32:$a),
+ (CVT_u32_f32 $a, CvtRZI)>;
+def : Pat<(int_nvvm_f2ui_rm_ftz f32:$a),
+ (CVT_u32_f32 $a, CvtRMI_FTZ)>;
+def : Pat<(int_nvvm_f2ui_rm f32:$a),
+ (CVT_u32_f32 $a, CvtRMI)>;
+def : Pat<(int_nvvm_f2ui_rp_ftz f32:$a),
+ (CVT_u32_f32 $a, CvtRPI_FTZ)>;
+def : Pat<(int_nvvm_f2ui_rp f32:$a),
+ (CVT_u32_f32 $a, CvtRPI)>;
+
+def : Pat<(int_nvvm_i2f_rn i32:$a),
+ (CVT_f32_s32 $a, CvtRN)>;
+def : Pat<(int_nvvm_i2f_rz i32:$a),
+ (CVT_f32_s32 $a, CvtRZ)>;
+def : Pat<(int_nvvm_i2f_rm i32:$a),
+ (CVT_f32_s32 $a, CvtRM)>;
+def : Pat<(int_nvvm_i2f_rp i32:$a),
+ (CVT_f32_s32 $a, CvtRP)>;
+
+def : Pat<(int_nvvm_ui2f_rn i32:$a),
+ (CVT_f32_u32 $a, CvtRN)>;
+def : Pat<(int_nvvm_ui2f_rz i32:$a),
+ (CVT_f32_u32 $a, CvtRZ)>;
+def : Pat<(int_nvvm_ui2f_rm i32:$a),
+ (CVT_f32_u32 $a, CvtRM)>;
+def : Pat<(int_nvvm_ui2f_rp i32:$a),
+ (CVT_f32_u32 $a, CvtRP)>;
+
+def : Pat<(int_nvvm_ff2bf16x2_rn f32:$a, f32:$b),
+ (CVT_bf16x2_f32 $a, $b, CvtRN)>;
+def : Pat<(int_nvvm_ff2bf16x2_rn_relu f32:$a, f32:$b),
+ (CVT_bf16x2_f32 $a, $b, CvtRN_RELU)>;
+def : Pat<(int_nvvm_ff2bf16x2_rz f32:$a, f32:$b),
+ (CVT_bf16x2_f32 $a, $b, CvtRZ)>;
+def : Pat<(int_nvvm_ff2bf16x2_rz_relu f32:$a, f32:$b),
+ (CVT_bf16x2_f32 $a, $b, CvtRZ_RELU)>;
+
+def : Pat<(int_nvvm_ff2f16x2_rn f32:$a, f32:$b),
+ (CVT_f16x2_f32 $a, $b, CvtRN)>;
+def : Pat<(int_nvvm_ff2f16x2_rn_relu f32:$a, f32:$b),
+ (CVT_f16x2_f32 $a, $b, CvtRN_RELU)>;
+def : Pat<(int_nvvm_ff2f16x2_rz f32:$a, f32:$b),
+ (CVT_f16x2_f32 $a, $b, CvtRZ)>;
+def : Pat<(int_nvvm_ff2f16x2_rz_relu f32:$a, f32:$b),
+ (CVT_f16x2_f32 $a, $b, CvtRZ_RELU)>;
+
+def : Pat<(int_nvvm_f2bf16_rn f32:$a),
+ (CVT_bf16_f32 $a, CvtRN)>;
+def : Pat<(int_nvvm_f2bf16_rn_relu f32:$a),
+ (CVT_bf16_f32 $a, CvtRN_RELU)>;
+def : Pat<(int_nvvm_f2bf16_rz f32:$a),
+ (CVT_bf16_f32 $a, CvtRZ)>;
+def : Pat<(int_nvvm_f2bf16_rz_relu f32:$a),
+ (CVT_bf16_f32 $a, CvtRZ_RELU)>;
def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
@@ -1388,113 +1752,126 @@ def INT_NVVM_D2I_HI : F_MATH_1<
"}}"),
Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
-def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
- (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
-def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
- (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
-def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
- (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
-def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
- (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
-def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
- (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
-def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
- (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
-def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
- (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
-def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
- (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
-
-def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
- (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
-def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
- (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
-def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
- (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
-def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
- (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
-def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
- (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
-def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
- (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
-def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
- (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
-def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
- (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
-
-def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
- (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
-def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
- (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
-def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
- (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
-def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
- (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
-
-def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
- (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
-def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
- (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
-def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
- (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
-def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
- (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
-
-def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
- (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
-def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
- (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
-def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
- (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
-def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
- (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
-
-def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
- (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
-def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
- (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
-def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
- (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
-def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
- (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
-
-def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
- (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
-def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
- (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
-def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
- (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
-def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
- (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
-
-def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
- (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
-def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
- (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
-def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
- (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
-def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
- (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
-
-
-def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
- (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>;
-def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
- (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
-
-//
-// Bitcast
-//
-
-def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
- Float32Regs, int_nvvm_bitcast_f2i>;
-def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
- Int32Regs, int_nvvm_bitcast_i2f>;
-
-def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
- Int64Regs, int_nvvm_bitcast_ll2d>;
-def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
- Float64Regs, int_nvvm_bitcast_d2ll>;
+def : Pat<(int_nvvm_f2ll_rn_ftz f32:$a),
+ (CVT_s64_f32 $a, CvtRNI_FTZ)>;
+def : Pat<(int_nvvm_f2ll_rn f32:$a),
+ (CVT_s64_f32 $a, CvtRNI)>;
+def : Pat<(int_nvvm_f2ll_rz_ftz f32:$a),
+ (CVT_s64_f32 $a, CvtRZI_FTZ)>;
+def : Pat<(int_nvvm_f2ll_rz f32:$a),
+ (CVT_s64_f32 $a, CvtRZI)>;
+def : Pat<(int_nvvm_f2ll_rm_ftz f32:$a),
+ (CVT_s64_f32 $a, CvtRMI_FTZ)>;
+def : Pat<(int_nvvm_f2ll_rm f32:$a),
+ (CVT_s64_f32 $a, CvtRMI)>;
+def : Pat<(int_nvvm_f2ll_rp_ftz f32:$a),
+ (CVT_s64_f32 $a, CvtRPI_FTZ)>;
+def : Pat<(int_nvvm_f2ll_rp f32:$a),
+ (CVT_s64_f32 $a, CvtRPI)>;
+
+def : Pat<(int_nvvm_f2ull_rn_ftz f32:$a),
+ (CVT_u64_f32 $a, CvtRNI_FTZ)>;
+def : Pat<(int_nvvm_f2ull_rn f32:$a),
+ (CVT_u64_f32 $a, CvtRNI)>;
+def : Pat<(int_nvvm_f2ull_rz_ftz f32:$a),
+ (CVT_u64_f32 $a, CvtRZI_FTZ)>;
+def : Pat<(int_nvvm_f2ull_rz f32:$a),
+ (CVT_u64_f32 $a, CvtRZI)>;
+def : Pat<(int_nvvm_f2ull_rm_ftz f32:$a),
+ (CVT_u64_f32 $a, CvtRMI_FTZ)>;
+def : Pat<(int_nvvm_f2ull_rm f32:$a),
+ (CVT_u64_f32 $a, CvtRMI)>;
+def : Pat<(int_nvvm_f2ull_rp_ftz f32:$a),
+ (CVT_u64_f32 $a, CvtRPI_FTZ)>;
+def : Pat<(int_nvvm_f2ull_rp f32:$a),
+ (CVT_u64_f32 $a, CvtRPI)>;
+
+def : Pat<(int_nvvm_d2ll_rn f64:$a),
+ (CVT_s64_f64 $a, CvtRNI)>;
+def : Pat<(int_nvvm_d2ll_rz f64:$a),
+ (CVT_s64_f64 $a, CvtRZI)>;
+def : Pat<(int_nvvm_d2ll_rm f64:$a),
+ (CVT_s64_f64 $a, CvtRMI)>;
+def : Pat<(int_nvvm_d2ll_rp f64:$a),
+ (CVT_s64_f64 $a, CvtRPI)>;
+
+def : Pat<(int_nvvm_d2ull_rn f64:$a),
+ (CVT_u64_f64 $a, CvtRNI)>;
+def : Pat<(int_nvvm_d2ull_rz f64:$a),
+ (CVT_u64_f64 $a, CvtRZI)>;
+def : Pat<(int_nvvm_d2ull_rm f64:$a),
+ (CVT_u64_f64 $a, CvtRMI)>;
+def : Pat<(int_nvvm_d2ull_rp f64:$a),
+ (CVT_u64_f64 $a, CvtRPI)>;
+
+def : Pat<(int_nvvm_ll2f_rn i64:$a),
+ (CVT_f32_s64 $a, CvtRN)>;
+def : Pat<(int_nvvm_ll2f_rz i64:$a),
+ (CVT_f32_s64 $a, CvtRZ)>;
+def : Pat<(int_nvvm_ll2f_rm i64:$a),
+ (CVT_f32_s64 $a, CvtRM)>;
+def : Pat<(int_nvvm_ll2f_rp i64:$a),
+ (CVT_f32_s64 $a, CvtRP)>;
+
+def : Pat<(int_nvvm_ull2f_rn i64:$a),
+ (CVT_f32_u64 $a, CvtRN)>;
+def : Pat<(int_nvvm_ull2f_rz i64:$a),
+ (CVT_f32_u64 $a, CvtRZ)>;
+def : Pat<(int_nvvm_ull2f_rm i64:$a),
+ (CVT_f32_u64 $a, CvtRM)>;
+def : Pat<(int_nvvm_ull2f_rp i64:$a),
+ (CVT_f32_u64 $a, CvtRP)>;
+
+def : Pat<(int_nvvm_ll2d_rn i64:$a),
+ (CVT_f64_s64 $a, CvtRN)>;
+def : Pat<(int_nvvm_ll2d_rz i64:$a),
+ (CVT_f64_s64 $a, CvtRZ)>;
+def : Pat<(int_nvvm_ll2d_rm i64:$a),
+ (CVT_f64_s64 $a, CvtRM)>;
+def : Pat<(int_nvvm_ll2d_rp i64:$a),
+ (CVT_f64_s64 $a, CvtRP)>;
+
+def : Pat<(int_nvvm_ull2d_rn i64:$a),
+ (CVT_f64_u64 $a, CvtRN)>;
+def : Pat<(int_nvvm_ull2d_rz i64:$a),
+ (CVT_f64_u64 $a, CvtRZ)>;
+def : Pat<(int_nvvm_ull2d_rm i64:$a),
+ (CVT_f64_u64 $a, CvtRM)>;
+def : Pat<(int_nvvm_ull2d_rp i64:$a),
+ (CVT_f64_u64 $a, CvtRP)>;
+
+
+def : Pat<(int_nvvm_f2h_rn_ftz f32:$a),
+ (CVT_f16_f32 $a, CvtRN_FTZ)>;
+def : Pat<(int_nvvm_f2h_rn f32:$a),
+ (CVT_f16_f32 $a, CvtRN)>;
+
+def : Pat<(int_nvvm_ff_to_e4m3x2_rn f32:$a, f32:$b),
+ (CVT_e4m3x2_f32 $a, $b, CvtRN)>;
+def : Pat<(int_nvvm_ff_to_e4m3x2_rn_relu f32:$a, f32:$b),
+ (CVT_e4m3x2_f32 $a, $b, CvtRN_RELU)>;
+def : Pat<(int_nvvm_ff_to_e5m2x2_rn f32:$a, f32:$b),
+ (CVT_e5m2x2_f32 $a, $b, CvtRN)>;
+def : Pat<(int_nvvm_ff_to_e5m2x2_rn_relu f32:$a, f32:$b),
+ (CVT_e5m2x2_f32 $a, $b, CvtRN_RELU)>;
+
+def : Pat<(int_nvvm_f16x2_to_e4m3x2_rn Int32Regs:$a),
+ (CVT_e4m3x2_f16x2 $a, CvtRN)>;
+def : Pat<(int_nvvm_f16x2_to_e4m3x2_rn_relu Int32Regs:$a),
+ (CVT_e4m3x2_f16x2 $a, CvtRN_RELU)>;
+def : Pat<(int_nvvm_f16x2_to_e5m2x2_rn Int32Regs:$a),
+ (CVT_e5m2x2_f16x2 $a, CvtRN)>;
+def : Pat<(int_nvvm_f16x2_to_e5m2x2_rn_relu Int32Regs:$a),
+ (CVT_e5m2x2_f16x2 $a, CvtRN_RELU)>;
+
+def : Pat<(int_nvvm_e4m3x2_to_f16x2_rn Int16Regs:$a),
+ (CVT_f16x2_e4m3x2 $a, CvtRN)>;
+def : Pat<(int_nvvm_e4m3x2_to_f16x2_rn_relu Int16Regs:$a),
+ (CVT_f16x2_e4m3x2 $a, CvtRN_RELU)>;
+def : Pat<(int_nvvm_e5m2x2_to_f16x2_rn Int16Regs:$a),
+ (CVT_f16x2_e5m2x2 $a, CvtRN)>;
+def : Pat<(int_nvvm_e5m2x2_to_f16x2_rn_relu Int16Regs:$a),
+ (CVT_f16x2_e5m2x2 $a, CvtRN_RELU)>;
//
// FNS
@@ -1503,23 +1880,23 @@ def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
class INT_FNS_MBO<dag ins, dag Operands>
: NVPTXInst<(outs Int32Regs:$dst), ins,
"fns.b32 \t$dst, $mask, $base, $offset;",
- [(set Int32Regs:$dst, Operands )]>,
+ [(set i32:$dst, Operands)]>,
Requires<[hasPTX<60>, hasSM<30>]>;
def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
- (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>;
+ (int_nvvm_fns i32:$mask, i32:$base, i32:$offset)>;
def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, i32imm:$offset),
- (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, imm:$offset)>;
+ (int_nvvm_fns i32:$mask, i32:$base, imm:$offset)>;
def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, Int32Regs:$offset),
- (int_nvvm_fns Int32Regs:$mask, imm:$base, Int32Regs:$offset)>;
+ (int_nvvm_fns i32:$mask, imm:$base, i32:$offset)>;
def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, i32imm:$offset),
- (int_nvvm_fns Int32Regs:$mask, imm:$base, imm:$offset)>;
+ (int_nvvm_fns i32:$mask, imm:$base, imm:$offset)>;
def INT_FNS_irr : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
- (int_nvvm_fns imm:$mask, Int32Regs:$base, Int32Regs:$offset)>;
+ (int_nvvm_fns imm:$mask, i32:$base, i32:$offset)>;
def INT_FNS_iri : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, i32imm:$offset),
- (int_nvvm_fns imm:$mask, Int32Regs:$base, imm:$offset)>;
+ (int_nvvm_fns imm:$mask, i32:$base, imm:$offset)>;
def INT_FNS_iir : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, Int32Regs:$offset),
- (int_nvvm_fns imm:$mask, imm:$base, Int32Regs:$offset)>;
+ (int_nvvm_fns imm:$mask, imm:$base, i32:$offset)>;
def INT_FNS_iii : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, i32imm:$offset),
(int_nvvm_fns imm:$mask, imm:$base, imm:$offset)>;
@@ -1538,14 +1915,16 @@ multiclass F_ATOMIC_2_imp<ValueType ptrT, NVPTXRegClass ptrclass,
ValueType regT, NVPTXRegClass regclass,
string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
Operand IMMType, SDNode IMM, list<Predicate> Pred> {
- def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
- !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
- [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>,
- Requires<Pred>;
- def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
- !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
- [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), IMM:$b))]>,
- Requires<!if(!or(!eq(TypeStr, ".f16"), !eq(TypeStr, ".bf16")), [Predicate<"false">], Pred)>;
+ let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in {
+ def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
+ !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
+ [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>,
+ Requires<Pred>;
+ def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
+ !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
+ [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), IMM:$b))]>,
+ Requires<!if(!or(!eq(TypeStr, ".f16"), !eq(TypeStr, ".bf16")), [Predicate<"false">], Pred)>;
+ }
}
multiclass F_ATOMIC_2<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr,
string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
@@ -1561,15 +1940,17 @@ multiclass F_ATOMIC_2_NEG_imp<ValueType ptrT, NVPTXRegClass ptrclass,
ValueType regT, NVPTXRegClass regclass,
string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
list<Predicate> Pred> {
- def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
- !strconcat(
- "{{ \n\t",
- ".reg \t.s", TypeStr, " temp; \n\t",
- "neg.s", TypeStr, " \ttemp, $b; \n\t",
- "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
- "}}"),
- [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>,
- Requires<Pred>;
+ let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in {
+ def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
+ !strconcat(
+ "{{ \n\t",
+ ".reg \t.s", TypeStr, " temp; \n\t",
+ "neg.s", TypeStr, " \ttemp, $b; \n\t",
+ "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
+ "}}"),
+ [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>,
+ Requires<Pred>;
+ }
}
multiclass F_ATOMIC_2_NEG<ValueType regT, NVPTXRegClass regclass, string SpaceStr,
string TypeStr, string OpcStr, PatFrag IntOp, list<Predicate> Pred = []> {
@@ -1584,29 +1965,31 @@ multiclass F_ATOMIC_3_imp<ValueType ptrT, NVPTXRegClass ptrclass,
ValueType regT, NVPTXRegClass regclass,
string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
Operand IMMType, list<Predicate> Pred> {
- def reg : NVPTXInst<(outs regclass:$dst),
- (ins ptrclass:$addr, regclass:$b, regclass:$c),
- !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
- [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), (regT regclass:$c)))]>,
- Requires<Pred>;
-
- def imm1 : NVPTXInst<(outs regclass:$dst),
- (ins ptrclass:$addr, IMMType:$b, regclass:$c),
- !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
- [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, (regT regclass:$c)))]>,
- Requires<Pred>;
-
- def imm2 : NVPTXInst<(outs regclass:$dst),
- (ins ptrclass:$addr, regclass:$b, IMMType:$c),
- !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
- [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), imm:$c))]>,
- Requires<Pred>;
-
- def imm3 : NVPTXInst<(outs regclass:$dst),
- (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
- !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
- [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, imm:$c))]>,
- Requires<Pred>;
+ let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in {
+ def reg : NVPTXInst<(outs regclass:$dst),
+ (ins ptrclass:$addr, regclass:$b, regclass:$c),
+ !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
+ [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), (regT regclass:$c)))]>,
+ Requires<Pred>;
+
+ def imm1 : NVPTXInst<(outs regclass:$dst),
+ (ins ptrclass:$addr, IMMType:$b, regclass:$c),
+ !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
+ [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, (regT regclass:$c)))]>,
+ Requires<Pred>;
+
+ def imm2 : NVPTXInst<(outs regclass:$dst),
+ (ins ptrclass:$addr, regclass:$b, IMMType:$c),
+ !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
+ [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), imm:$c))]>,
+ Requires<Pred>;
+
+ def imm3 : NVPTXInst<(outs regclass:$dst),
+ (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
+ !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
+ [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, imm:$c))]>,
+ Requires<Pred>;
+ }
}
multiclass F_ATOMIC_3<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr,
string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
@@ -1997,6 +2380,12 @@ defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64
// atom_cas
+def atomic_cmp_swap_i16_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
+ (atomic_cmp_swap_i16 node:$a, node:$b, node:$c)>;
+def atomic_cmp_swap_i16_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
+ (atomic_cmp_swap_i16 node:$a, node:$b, node:$c)>;
+def atomic_cmp_swap_i16_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
+ (atomic_cmp_swap_i16 node:$a, node:$b, node:$c)>;
def atomic_cmp_swap_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
(atomic_cmp_swap_i32 node:$a, node:$b, node:$c)>;
def atomic_cmp_swap_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
@@ -2010,6 +2399,14 @@ def atomic_cmp_swap_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
def atomic_cmp_swap_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
(atomic_cmp_swap_i64 node:$a, node:$b, node:$c)>;
+defm INT_PTX_ATOM_CAS_G_16 : F_ATOMIC_3<i16, Int16Regs, ".global", ".b16", ".cas",
+ atomic_cmp_swap_i16_g, i16imm, [hasSM<70>, hasPTX<63>]>;
+defm INT_PTX_ATOM_CAS_S_16 : F_ATOMIC_3<i16, Int16Regs, ".shared", ".b16", ".cas",
+ atomic_cmp_swap_i16_s, i16imm, [hasSM<70>, hasPTX<63>]>;
+defm INT_PTX_ATOM_CAS_GEN_16 : F_ATOMIC_3<i16, Int16Regs, "", ".b16", ".cas",
+ atomic_cmp_swap_i16_gen, i16imm, [hasSM<70>, hasPTX<63>]>;
+defm INT_PTX_ATOM_CAS_GEN_16_USE_G : F_ATOMIC_3<i16, Int16Regs, ".global", ".b16", ".cas",
+ atomic_cmp_swap_i16_gen, i16imm, [hasSM<70>, hasPTX<63>]>;
defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32", ".cas",
atomic_cmp_swap_i32_g, i32imm>;
defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<i32, Int32Regs, ".shared", ".b32", ".cas",
@@ -2037,7 +2434,7 @@ class ATOM23_impl<string AsmStr, ValueType regT, NVPTXRegClass regclass, list<Pr
dag ins, dag Operands>
: NVPTXInst<(outs regclass:$result), ins,
AsmStr,
- [(set (regT regclass:$result), Operands)]>,
+ [(set regT:$result, Operands)]>,
Requires<Preds>;
// Define instruction variants for all addressing modes.
@@ -2048,26 +2445,26 @@ multiclass ATOM2P_impl<string AsmStr, Intrinsic Intr,
let AddedComplexity = 1 in {
def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int16Regs:$src, regclass:$b),
- (Intr (i16 Int16Regs:$src), (regT regclass:$b))>;
+ (Intr i16:$src, regT:$b)>;
def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int32Regs:$src, regclass:$b),
- (Intr (i32 Int32Regs:$src), (regT regclass:$b))>;
+ (Intr i32:$src, regT:$b)>;
def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int64Regs:$src, regclass:$b),
- (Intr (i64 Int64Regs:$src), (regT regclass:$b))>;
+ (Intr i64:$src, regT:$b)>;
}
// tablegen can't infer argument types from Intrinsic (though it can
// from Instruction) so we have to enforce specific type on
// immediates via explicit cast to ImmTy.
def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int16Regs:$src, ImmType:$b),
- (Intr (i16 Int16Regs:$src), (ImmTy Imm:$b))>;
+ (Intr i16:$src, (ImmTy Imm:$b))>;
def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int32Regs:$src, ImmType:$b),
- (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b))>;
+ (Intr i32:$src, (ImmTy Imm:$b))>;
def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int64Regs:$src, ImmType:$b),
- (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b))>;
+ (Intr i64:$src, (ImmTy Imm:$b))>;
}
multiclass ATOM3P_impl<string AsmStr, Intrinsic Intr,
@@ -2078,31 +2475,31 @@ multiclass ATOM3P_impl<string AsmStr, Intrinsic Intr,
let AddedComplexity = 2 in {
def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int32Regs:$src, regclass:$b, regclass:$c),
- (Intr (i32 Int32Regs:$src), (regT regclass:$b), (regT regclass:$c))>;
+ (Intr i32:$src, regT:$b, regT:$c)>;
def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int64Regs:$src, regclass:$b, regclass:$c),
- (Intr (i64 Int64Regs:$src), (regT regclass:$b), (regT regclass:$c))>;
+ (Intr i64:$src, regT:$b, regT:$c)>;
}
let AddedComplexity = 1 in {
def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int32Regs:$src, ImmType:$b, regclass:$c),
- (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b), (regT regclass:$c))>;
+ (Intr i32:$src, (ImmTy Imm:$b), regT:$c)>;
def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int64Regs:$src, ImmType:$b, regclass:$c),
- (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b), (regT regclass:$c))>;
+ (Intr i64:$src, (ImmTy Imm:$b), regT:$c)>;
def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int32Regs:$src, regclass:$b, ImmType:$c),
- (Intr (i32 Int32Regs:$src), (regT regclass:$b), (ImmTy Imm:$c))>;
+ (Intr i32:$src, regT:$b, (ImmTy Imm:$c))>;
def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int64Regs:$src, regclass:$b, ImmType:$c),
- (Intr (i64 Int64Regs:$src), (regT regclass:$b), (ImmTy Imm:$c))>;
+ (Intr i64:$src, regT:$b, (ImmTy Imm:$c))>;
}
def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int32Regs:$src, ImmType:$b, ImmType:$c),
- (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b), (ImmTy Imm:$c))>;
+ (Intr i32:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int64Regs:$src, ImmType:$b, ImmType:$c),
- (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b), (ImmTy Imm:$c))>;
+ (Intr i64:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
}
// Constructs intrinsic name and instruction asm strings.
@@ -2221,6 +2618,7 @@ multiclass ATOM2_incdec_impl<string OpStr> {
// atom.cas
multiclass ATOM3_cas_impl<string OpStr> {
+ defm _b16 : ATOM3S_impl<OpStr, "i", "b16", i16, Int16Regs, i16imm, imm, i16, []>;
defm _b32 : ATOM3S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
defm _b64 : ATOM3S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, []>;
}
@@ -2441,89 +2839,75 @@ defm INT_PTX_LDG_G_v4f32_ELE
: VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
-multiclass NG_TO_G<string Str, Intrinsic Intrin, Predicate ShortPtr> {
+multiclass NG_TO_G<string Str> {
def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
- !strconcat("cvta.", Str, ".u32 \t$result, $src;"),
- [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
+ "cvta." # Str # ".u32 \t$result, $src;", []>;
def _64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
- !strconcat("cvta.", Str, ".u64 \t$result, $src;"),
- [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
- def _6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src),
- "{{ .reg .b64 %tmp;\n\t"
- #" cvt.u64.u32 \t%tmp, $src;\n\t"
- #" cvta." # Str # ".u64 \t$result, %tmp; }}",
- [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>,
- Requires<[ShortPtr]>;
+ "cvta." # Str # ".u64 \t$result, $src;", []>;
}
-multiclass G_TO_NG<string Str, Intrinsic Intrin, Predicate ShortPtr> {
+multiclass G_TO_NG<string Str> {
def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
- !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
- [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
+ "cvta.to." # Str # ".u32 \t$result, $src;", []>;
def _64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
- !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
- [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
- def _3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src),
- "{{ .reg .b64 %tmp;\n\t"
- #" cvta.to." # Str # ".u64 \t%tmp, $src;\n\t"
- #" cvt.u32.u64 \t$result, %tmp; }}",
- [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>,
- Requires<[ShortPtr]>;
-}
-
-defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen, useShortPtrLocal>;
-defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen, useShortPtrShared>;
-defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen, False>;
-defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen, useShortPtrConst>;
-defm cvta_param : NG_TO_G<"param", int_nvvm_ptr_param_to_gen, False>;
-
-defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local, useShortPtrLocal>;
-defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared, useShortPtrShared>;
-defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global, False>;
-defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant, useShortPtrConst>;
+ "cvta.to." # Str # ".u64 \t$result, $src;", []>;
+}
+
+defm cvta_local : NG_TO_G<"local">;
+defm cvta_shared : NG_TO_G<"shared">;
+defm cvta_global : NG_TO_G<"global">;
+defm cvta_const : NG_TO_G<"const">;
+
+defm cvta_to_local : G_TO_NG<"local">;
+defm cvta_to_shared : G_TO_NG<"shared">;
+defm cvta_to_global : G_TO_NG<"global">;
+defm cvta_to_const : G_TO_NG<"const">;
+
+// nvvm.ptr.param.to.gen
+defm cvta_param : NG_TO_G<"param">;
+
+def : Pat<(int_nvvm_ptr_param_to_gen i32:$src),
+ (cvta_param $src)>;
+
+def : Pat<(int_nvvm_ptr_param_to_gen i64:$src),
+ (cvta_param_64 $src)>;
// nvvm.ptr.gen.to.param
-def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
- (ins Int32Regs:$src),
- "mov.u32 \t$result, $src;",
- [(set Int32Regs:$result,
- (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
-def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
- (ins Int64Regs:$src),
- "mov.u64 \t$result, $src;",
- [(set Int64Regs:$result,
- (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
+def : Pat<(int_nvvm_ptr_gen_to_param i32:$src),
+ (i32 Int32Regs:$src)>;
+def : Pat<(int_nvvm_ptr_gen_to_param i64:$src),
+ (i64 Int64Regs:$src)>;
// nvvm.move intrinsicc
def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
"mov.b16 \t$r, $s;",
- [(set Int16Regs:$r,
- (int_nvvm_move_i16 Int16Regs:$s))]>;
+ [(set i16:$r,
+ (int_nvvm_move_i16 i16:$s))]>;
def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
"mov.b32 \t$r, $s;",
- [(set Int32Regs:$r,
- (int_nvvm_move_i32 Int32Regs:$s))]>;
+ [(set i32:$r,
+ (int_nvvm_move_i32 i32:$s))]>;
def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
"mov.b64 \t$r, $s;",
- [(set Int64Regs:$r,
- (int_nvvm_move_i64 Int64Regs:$s))]>;
+ [(set i64:$r,
+ (int_nvvm_move_i64 i64:$s))]>;
def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
"mov.f32 \t$r, $s;",
- [(set Float32Regs:$r,
- (int_nvvm_move_float Float32Regs:$s))]>;
+ [(set f32:$r,
+ (int_nvvm_move_float f32:$s))]>;
def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
"mov.f64 \t$r, $s;",
- [(set Float64Regs:$r,
- (int_nvvm_move_double Float64Regs:$s))]>;
+ [(set f64:$r,
+ (int_nvvm_move_double f64:$s))]>;
def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
"mov.u32 \t$r, $s;",
- [(set Int32Regs:$r,
- (int_nvvm_move_ptr Int32Regs:$s))]>;
+ [(set i32:$r,
+ (int_nvvm_move_ptr i32:$s))]>;
def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
"mov.u64 \t$r, $s;",
- [(set Int64Regs:$r,
- (int_nvvm_move_ptr Int64Regs:$s))]>;
+ [(set i64:$r,
+ (int_nvvm_move_ptr i64:$s))]>;
// @TODO: Are these actually needed, or will we always just see symbols
// copied to registers first?
@@ -2536,24 +2920,6 @@ def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
[(set Int64Regs:$r,
(int_nvvm_move_ptr texternalsym:$s))]>;*/
-
-// MoveParam %r1, param
-// ptr_local_to_gen %r2, %r1
-// ptr_gen_to_local %r3, %r2
-// ->
-// mov %r1, param
-
-// @TODO: Revisit this. There is a type
-// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
-// instructions are not currently defined. However, we can use the ptr
-// variants and the asm printer will do the right thing.
-def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
- (MoveParam texternalsym:$src)))),
- (nvvm_move_ptr64 texternalsym:$src)>;
-def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
- (MoveParam texternalsym:$src)))),
- (nvvm_move_ptr32 texternalsym:$src)>;
-
def texsurf_handles
: NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
"mov.u64 \t$result, $src;", []>;
@@ -2565,16 +2931,16 @@ def texsurf_handles
def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
"// llvm.nvvm.compiler.warn()",
- [(int_nvvm_compiler_warn Int32Regs:$a)]>;
+ [(int_nvvm_compiler_warn i32:$a)]>;
def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
"// llvm.nvvm.compiler.warn()",
- [(int_nvvm_compiler_warn Int64Regs:$a)]>;
+ [(int_nvvm_compiler_warn i64:$a)]>;
def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
"// llvm.nvvm.compiler.error()",
- [(int_nvvm_compiler_error Int32Regs:$a)]>;
+ [(int_nvvm_compiler_error i32:$a)]>;
def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
"// llvm.nvvm.compiler.error()",
- [(int_nvvm_compiler_error Int64Regs:$a)]>;
+ [(int_nvvm_compiler_error i64:$a)]>;
// isspacep
@@ -2582,11 +2948,11 @@ def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
multiclass ISSPACEP<string suffix, Intrinsic Intr, list<Predicate> Preds = []> {
def _32: NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
"isspacep." # suffix # "\t$d, $a;",
- [(set Int1Regs:$d, (Intr Int32Regs:$a))]>,
+ [(set i1:$d, (Intr i32:$a))]>,
Requires<Preds>;
def _64: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
"isspacep." # suffix # "\t$d, $a;",
- [(set Int1Regs:$d, (Intr Int64Regs:$a))]>,
+ [(set i1:$d, (Intr i64:$a))]>,
Requires<Preds>;
}
@@ -2637,134 +3003,9 @@ def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
-// rotate builtin support
-
-def ROTATE_B32_HW_IMM
- : NVPTXInst<(outs Int32Regs:$dst),
- (ins Int32Regs:$src, i32imm:$amt),
- "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
- [(set Int32Regs:$dst,
- (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
- Requires<[hasHWROT32]> ;
-
-def ROTATE_B32_HW_REG
- : NVPTXInst<(outs Int32Regs:$dst),
- (ins Int32Regs:$src, Int32Regs:$amt),
- "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
- [(set Int32Regs:$dst,
- (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
- Requires<[hasHWROT32]> ;
-
-def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
- (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
- Requires<[noHWROT32]> ;
-
-def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
- (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
- Requires<[noHWROT32]> ;
-
-let hasSideEffects = false in {
- def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
- !strconcat("{{\n\t",
- ".reg .b32 %dummy;\n\t",
- "mov.b64 \t{$dst,%dummy}, $src;\n\t",
- "}}"),
- []> ;
-
- def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
- !strconcat("{{\n\t",
- ".reg .b32 %dummy;\n\t",
- "mov.b64 \t{%dummy,$dst}, $src;\n\t",
- "}}"),
- []> ;
-}
-
-let hasSideEffects = false in {
- def PACK_TWO_INT32
- : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
- "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
-}
-
-def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
- (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
- (GET_LO_INT64 Int64Regs:$src))> ;
-
-// Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so
-// no side effects.
-let hasSideEffects = false in {
- def SHF_L_WRAP_B32_IMM
- : NVPTXInst<(outs Int32Regs:$dst),
- (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
- "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
- Requires<[hasHWROT32]>;
-
- def SHF_L_WRAP_B32_REG
- : NVPTXInst<(outs Int32Regs:$dst),
- (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
- "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
- Requires<[hasHWROT32]>;
-
- def SHF_R_WRAP_B32_IMM
- : NVPTXInst<(outs Int32Regs:$dst),
- (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
- "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
- Requires<[hasHWROT32]>;
-
- def SHF_R_WRAP_B32_REG
- : NVPTXInst<(outs Int32Regs:$dst),
- (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
- "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
- Requires<[hasHWROT32]>;
-}
-
-// HW version of rotate 64
-def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
- (PACK_TWO_INT32
- (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
- (GET_LO_INT64 Int64Regs:$src), imm:$amt),
- (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
- (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
- Requires<[hasHWROT32]>;
-
-def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
- (PACK_TWO_INT32
- (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
- (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
- (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
- (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
- Requires<[hasHWROT32]>;
-
-
-def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
- (PACK_TWO_INT32
- (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
- (GET_HI_INT64 Int64Regs:$src), imm:$amt),
- (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
- (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
- Requires<[hasHWROT32]>;
-
-def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
- (PACK_TWO_INT32
- (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
- (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
- (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
- (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
- Requires<[hasHWROT32]>;
-
-// SW version of rotate 64
-def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
- (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>,
- Requires<[noHWROT32]>;
-def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
- (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
- Requires<[noHWROT32]>;
-def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
- (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
- Requires<[noHWROT32]>;
-def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
- (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
- Requires<[noHWROT32]>;
-
+def : Pat<(int_nvvm_swap_lo_hi_b64 i64:$src),
+ (V2I32toI64 (I64toI32H $src),
+ (I64toI32L $src))> ;
//-----------------------------------
// Texture Intrinsics
@@ -2778,16 +3019,19 @@ let IsTex = true, IsTexModeUnified = false in {
// Texture fetch instructions using handles
class TEX_1D_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp>
+ NVPTXRegClass intype, dag texsamp, list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(texsamp, (ins intype:$x)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
- []>;
-
-multiclass TEX_1D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
- def _RR : TEX_1D_base<inst, outtype, intype,
- (ins Int64Regs:$t, Int64Regs:$s)>;
+ pattern>;
+
+multiclass TEX_1D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype,
+ Intrinsic intr> {
+ def _RR : TEX_1D_base<
+ inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, i64:$s, intype:$x))]>;
def _RI : TEX_1D_base<inst, outtype, intype,
(ins Int64Regs:$t, i64imm:$s)>;
def _IR : TEX_1D_base<inst, outtype, intype,
@@ -2796,25 +3040,34 @@ multiclass TEX_1D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
(ins i64imm:$t, i64imm:$s)>;
}
-defm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
-defm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
-defm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
-defm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
-defm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
-defm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
+defm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs,
+ int_nvvm_tex_1d_v4f32_s32>;
+defm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tex_1d_v4f32_f32>;
+defm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs,
+ int_nvvm_tex_1d_v4s32_s32>;
+defm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_1d_v4s32_f32>;
+defm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs,
+ int_nvvm_tex_1d_v4u32_s32>;
+defm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_1d_v4u32_f32>;
class TEX_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp>
+ NVPTXRegClass intype, dag texsamp,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(texsamp, (ins intype:$x, intype:$lod)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}], $lod;",
- []>;
+ pattern>;
multiclass TEX_1D_LEVEL<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _RR : TEX_1D_LEVEL_base<inst, outtype, intype,
- (ins Int64Regs:$t, Int64Regs:$s)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _RR : TEX_1D_LEVEL_base<
+ inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, i64:$s, intype:$x, intype:$lod))]>;
def _RI : TEX_1D_LEVEL_base<inst, outtype, intype,
(ins Int64Regs:$t, i64imm:$s)>;
def _IR : TEX_1D_LEVEL_base<inst, outtype, intype,
@@ -2824,25 +3077,31 @@ multiclass TEX_1D_LEVEL<string inst, NVPTXRegClass outtype,
}
defm TEX_1D_F32_F32_LEVEL :
- TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
+ TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tex_1d_level_v4f32_f32>;
defm TEX_1D_S32_F32_LEVEL :
- TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
+ TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_1d_level_v4s32_f32>;
defm TEX_1D_U32_F32_LEVEL :
- TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
+ TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_1d_level_v4u32_f32>;
class TEX_1D_GRAD_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp>
+ NVPTXRegClass intype, dag texsamp,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(texsamp, (ins intype:$x, intype:$gradx, intype:$grady)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}],"
" \\{$gradx\\}, \\{$grady\\};",
- []>;
+ pattern>;
multiclass TEX_1D_GRAD<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _RR : TEX_1D_GRAD_base<inst, outtype, intype,
- (ins Int64Regs:$t, Int64Regs:$s)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _RR : TEX_1D_GRAD_base<
+ inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, i64:$s, intype:$x, intype:$gradx, intype:$grady))]>;
def _RI : TEX_1D_GRAD_base<inst, outtype, intype,
(ins Int64Regs:$t, i64imm:$s)>;
def _IR : TEX_1D_GRAD_base<inst, outtype, intype,
@@ -2852,24 +3111,30 @@ multiclass TEX_1D_GRAD<string inst, NVPTXRegClass outtype,
}
defm TEX_1D_F32_F32_GRAD
- : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
+ : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tex_1d_grad_v4f32_f32>;
defm TEX_1D_S32_F32_GRAD
- : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
+ : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_1d_grad_v4s32_f32>;
defm TEX_1D_U32_F32_GRAD
- : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
+ : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_1d_grad_v4u32_f32>;
class TEX_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp>
+ NVPTXRegClass intype, dag texsamp,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(texsamp, (ins Int32Regs:$l, intype:$x)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}];",
- []>;
+ pattern>;
multiclass TEX_1D_ARRAY<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _RR : TEX_1D_ARRAY_base<inst, outtype, intype,
- (ins Int64Regs:$t, Int64Regs:$s)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _RR : TEX_1D_ARRAY_base<
+ inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, i64:$s, i32:$l, intype:$x))]>;
def _RI : TEX_1D_ARRAY_base<inst, outtype, intype,
(ins Int64Regs:$t, i64imm:$s)>;
def _IR : TEX_1D_ARRAY_base<inst, outtype, intype,
@@ -2879,31 +3144,40 @@ multiclass TEX_1D_ARRAY<string inst, NVPTXRegClass outtype,
}
defm TEX_1D_ARRAY_F32_F32
- : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
+ : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tex_1d_array_v4f32_f32>;
defm TEX_1D_ARRAY_F32_S32
- : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
+ : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs,
+ int_nvvm_tex_1d_array_v4f32_s32>;
defm TEX_1D_ARRAY_S32_S32
- : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
+ : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs,
+ int_nvvm_tex_1d_array_v4s32_s32>;
defm TEX_1D_ARRAY_S32_F32
- : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
+ : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_1d_array_v4s32_f32>;
defm TEX_1D_ARRAY_U32_S32
- : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
+ : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs,
+ int_nvvm_tex_1d_array_v4u32_s32>;
defm TEX_1D_ARRAY_U32_F32
- : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
+ : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_1d_array_v4u32_f32>;
class TEX_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp>
+ NVPTXRegClass intype, dag texsamp,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$lod)),
inst # " \t\\{$r, $g, $b, $a\\},"
" [$t, $s, \\{$l, $x\\}], $lod;",
- []>;
+ pattern>;
multiclass TEX_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _RR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
- (ins Int64Regs:$t, Int64Regs:$s)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _RR : TEX_1D_ARRAY_LEVEL_base<
+ inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$lod))]>;
def _RI : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
(ins Int64Regs:$t, i64imm:$s)>;
def _IR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
@@ -2913,26 +3187,33 @@ multiclass TEX_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
}
defm TEX_1D_ARRAY_F32_F32_LEVEL
- : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
+ : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tex_1d_array_level_v4f32_f32>;
defm TEX_1D_ARRAY_S32_F32_LEVEL
- : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
+ : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_1d_array_level_v4s32_f32>;
defm TEX_1D_ARRAY_U32_F32_LEVEL
- : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
+ : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_1d_array_level_v4u32_f32>;
class TEX_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp>
+ NVPTXRegClass intype, dag texsamp,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(texsamp, (ins Int32Regs:$l, intype:$x,
intype:$gradx, intype:$grady)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}],"
" \\{$gradx\\}, \\{$grady\\};",
- []>;
+ pattern>;
multiclass TEX_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _RR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
- (ins Int64Regs:$t, Int64Regs:$s)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _RR : TEX_1D_ARRAY_GRAD_base<
+ inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, i64:$s, i32:$l, intype:$x,
+ intype:$gradx, intype:$grady))]>;
def _RI : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
(ins Int64Regs:$t, i64imm:$s)>;
def _IR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
@@ -2942,48 +3223,63 @@ multiclass TEX_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
}
defm TEX_1D_ARRAY_F32_F32_GRAD
- : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
+ : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tex_1d_array_grad_v4f32_f32>;
defm TEX_1D_ARRAY_S32_F32_GRAD
- : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
+ : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_1d_array_grad_v4s32_f32>;
defm TEX_1D_ARRAY_U32_F32_GRAD
- : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
+ : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_1d_array_grad_v4u32_f32>;
class TEX_2D_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp>
+ NVPTXRegClass intype, dag texsamp, list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(texsamp, (ins intype:$x, intype:$y)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}];",
- []>;
-
-multiclass TEX_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
- def _RR : TEX_2D_base<inst, outtype, intype,
- (ins Int64Regs:$t, Int64Regs:$s)>;
+ pattern>;
+
+multiclass TEX_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype,
+ Intrinsic intr> {
+ def _RR : TEX_2D_base<
+ inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, i64:$s, intype:$x, intype:$y))]>;
def _RI : TEX_2D_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>;
def _IR : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, Int64Regs:$s)>;
def _II : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, i64imm:$s)>;
}
-defm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
-defm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
-defm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
-defm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
-defm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
-defm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+defm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tex_2d_v4f32_f32>;
+defm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs,
+ int_nvvm_tex_2d_v4f32_s32>;
+defm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs,
+ int_nvvm_tex_2d_v4s32_s32>;
+defm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_2d_v4s32_f32>;
+defm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs,
+ int_nvvm_tex_2d_v4u32_s32>;
+defm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_2d_v4u32_f32>;
class TEX_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp>
+ NVPTXRegClass intype, dag texsamp,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(texsamp, (ins intype:$x, intype:$y, intype:$lod)),
inst # " \t\\{$r, $g, $b, $a\\},"
" [$t, $s, \\{$x, $y\\}], $lod;",
- []>;
+ pattern>;
multiclass TEX_2D_LEVEL<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _RR : TEX_2D_LEVEL_base<inst, outtype, intype,
- (ins Int64Regs:$t, Int64Regs:$s)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _RR : TEX_2D_LEVEL_base<
+ inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$lod))]>;
def _RI : TEX_2D_LEVEL_base<inst, outtype, intype,
(ins Int64Regs:$t, i64imm:$s)>;
def _IR : TEX_2D_LEVEL_base<inst, outtype, intype,
@@ -2993,14 +3289,18 @@ multiclass TEX_2D_LEVEL<string inst, NVPTXRegClass outtype,
}
defm TEX_2D_F32_F32_LEVEL :
- TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+ TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tex_2d_level_v4f32_f32>;
defm TEX_2D_S32_F32_LEVEL :
- TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+ TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_2d_level_v4s32_f32>;
defm TEX_2D_U32_F32_LEVEL :
- TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+ TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_2d_level_v4u32_f32>;
class TEX_2D_GRAD_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp>
+ NVPTXRegClass intype, dag texsamp,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(texsamp, (ins intype:$x, intype:$y,
@@ -3008,12 +3308,16 @@ class TEX_2D_GRAD_base<string inst, NVPTXRegClass outtype,
intype:$grady0, intype:$grady1)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}],"
" \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
- []>;
+ pattern>;
multiclass TEX_2D_GRAD<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _RR : TEX_2D_GRAD_base<inst, outtype, intype,
- (ins Int64Regs:$t, Int64Regs:$s)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _RR : TEX_2D_GRAD_base<
+ inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, i64:$s, intype:$x, intype:$y,
+ intype:$gradx0, intype:$gradx1,
+ intype:$grady0, intype:$grady1))]>;
def _RI : TEX_2D_GRAD_base<inst, outtype, intype,
(ins Int64Regs:$t, i64imm:$s)>;
def _IR : TEX_2D_GRAD_base<inst, outtype, intype,
@@ -3023,25 +3327,31 @@ multiclass TEX_2D_GRAD<string inst, NVPTXRegClass outtype,
}
defm TEX_2D_F32_F32_GRAD :
- TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+ TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tex_2d_grad_v4f32_f32>;
defm TEX_2D_S32_F32_GRAD :
- TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+ TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_2d_grad_v4s32_f32>;
defm TEX_2D_U32_F32_GRAD :
- TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+ TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_2d_grad_v4u32_f32>;
class TEX_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp>
+ NVPTXRegClass intype, dag texsamp,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y)),
inst # " \t\\{$r, $g, $b, $a\\},"
" [$t, $s, \\{$l, $x, $y, $y\\}];",
- []>;
+ pattern>;
multiclass TEX_2D_ARRAY<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _RR : TEX_2D_ARRAY_base<inst, outtype, intype,
- (ins Int64Regs:$t, Int64Regs:$s)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _RR : TEX_2D_ARRAY_base<
+ inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$y))]>;
def _RI : TEX_2D_ARRAY_base<inst, outtype, intype,
(ins Int64Regs:$t, i64imm:$s)>;
def _IR : TEX_2D_ARRAY_base<inst, outtype, intype,
@@ -3051,32 +3361,41 @@ multiclass TEX_2D_ARRAY<string inst, NVPTXRegClass outtype,
}
defm TEX_2D_ARRAY_F32_F32
- : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
+ : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tex_2d_array_v4f32_f32>;
defm TEX_2D_ARRAY_F32_S32
- : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
+ : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs,
+ int_nvvm_tex_2d_array_v4f32_s32>;
defm TEX_2D_ARRAY_S32_S32
- : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
+ : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs,
+ int_nvvm_tex_2d_array_v4s32_s32>;
defm TEX_2D_ARRAY_S32_F32
- : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
+ : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_2d_array_v4s32_f32>;
defm TEX_2D_ARRAY_U32_S32
- : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
+ : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs,
+ int_nvvm_tex_2d_array_v4u32_s32>;
defm TEX_2D_ARRAY_U32_F32
- : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
+ : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_2d_array_v4u32_f32>;
class TEX_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp>
+ NVPTXRegClass intype, dag texsamp,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
intype:$lod)),
inst # " \t\\{$r, $g, $b, $a\\},"
" [$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
- []>;
+ pattern>;
multiclass TEX_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _RR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
- (ins Int64Regs:$t, Int64Regs:$s)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _RR : TEX_2D_ARRAY_LEVEL_base<
+ inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$y, intype:$lod))]>;
def _RI : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
(ins Int64Regs:$t, i64imm:$s)>;
def _IR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
@@ -3086,14 +3405,18 @@ multiclass TEX_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
}
defm TEX_2D_ARRAY_F32_F32_LEVEL
- : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
+ : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tex_2d_array_level_v4f32_f32>;
defm TEX_2D_ARRAY_S32_F32_LEVEL
- : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
+ : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_2d_array_level_v4s32_f32>;
defm TEX_2D_ARRAY_U32_F32_LEVEL
- : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
+ : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_2d_array_level_v4u32_f32>;
class TEX_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp>
+ NVPTXRegClass intype, dag texsamp,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
@@ -3102,12 +3425,16 @@ class TEX_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
inst # " \t\\{$r, $g, $b, $a\\},"
" [$t, $s, \\{$l, $x, $y, $y\\}],"
" \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
- []>;
+ pattern>;
multiclass TEX_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _RR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
- (ins Int64Regs:$t, Int64Regs:$s)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _RR : TEX_2D_ARRAY_GRAD_base<
+ inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$y,
+ intype:$gradx0, intype:$gradx1,
+ intype:$grady0, intype:$grady1))]>;
def _RI : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
(ins Int64Regs:$t, i64imm:$s)>;
def _IR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
@@ -3117,24 +3444,30 @@ multiclass TEX_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
}
defm TEX_2D_ARRAY_F32_F32_GRAD
- : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
+ : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tex_2d_array_grad_v4f32_f32>;
defm TEX_2D_ARRAY_S32_F32_GRAD
- : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
+ : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_2d_array_grad_v4s32_f32>;
defm TEX_2D_ARRAY_U32_F32_GRAD
- : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
+ : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_2d_array_grad_v4u32_f32>;
class TEX_3D_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp>
+ NVPTXRegClass intype, dag texsamp, list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
inst # " \t\\{$r, $g, $b, $a\\},"
" [$t, $s, \\{$x, $y, $z, $z\\}];",
- []>;
-
-multiclass TEX_3D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
- def _RR : TEX_3D_base<inst, outtype, intype,
- (ins Int64Regs:$t, Int64Regs:$s)>;
+ pattern>;
+
+multiclass TEX_3D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype,
+ Intrinsic intr> {
+ def _RR : TEX_3D_base<
+ inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$z))]>;
def _RI : TEX_3D_base<inst, outtype, intype,
(ins Int64Regs:$t, i64imm:$s)>;
def _IR : TEX_3D_base<inst, outtype, intype,
@@ -3143,27 +3476,37 @@ multiclass TEX_3D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
(ins i64imm:$t, i64imm:$s)>;
}
-defm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
-defm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
-defm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
-defm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
-defm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
-defm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
+defm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tex_3d_v4f32_f32>;
+defm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs,
+ int_nvvm_tex_3d_v4f32_s32>;
+defm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs,
+ int_nvvm_tex_3d_v4s32_s32>;
+defm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_3d_v4s32_f32>;
+defm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs,
+ int_nvvm_tex_3d_v4u32_s32>;
+defm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_3d_v4u32_f32>;
class TEX_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp>
+ NVPTXRegClass intype, dag texsamp,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(texsamp, (ins intype:$x, intype:$y, intype:$z,
intype:$lod)),
inst # " \t\\{$r, $g, $b, $a\\},"
" [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
+ pattern>;
multiclass TEX_3D_LEVEL<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _RR : TEX_3D_LEVEL_base<inst, outtype, intype,
- (ins Int64Regs:$t, Int64Regs:$s)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _RR : TEX_3D_LEVEL_base<
+ inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$z,
+ intype:$lod))]>;
def _RI : TEX_3D_LEVEL_base<inst, outtype, intype,
(ins Int64Regs:$t, i64imm:$s)>;
def _IR : TEX_3D_LEVEL_base<inst, outtype, intype,
@@ -3173,14 +3516,18 @@ multiclass TEX_3D_LEVEL<string inst, NVPTXRegClass outtype,
}
defm TEX_3D_F32_F32_LEVEL
- : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
+ : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tex_3d_level_v4f32_f32>;
defm TEX_3D_S32_F32_LEVEL
- : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
+ : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_3d_level_v4s32_f32>;
defm TEX_3D_U32_F32_LEVEL
- : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
+ : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_3d_level_v4u32_f32>;
class TEX_3D_GRAD_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp>
+ NVPTXRegClass intype, dag texsamp,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(texsamp, (ins intype:$x, intype:$y, intype:$z,
@@ -3191,12 +3538,16 @@ class TEX_3D_GRAD_base<string inst, NVPTXRegClass outtype,
" [$t, $s, \\{$x, $y, $z, $z\\}],"
" \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
" \\{$grady0, $grady1, $grady2, $grady2\\};",
- []>;
+ pattern>;
multiclass TEX_3D_GRAD<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _RR : TEX_3D_GRAD_base<inst, outtype, intype,
- (ins Int64Regs:$t, Int64Regs:$s)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _RR : TEX_3D_GRAD_base<
+ inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$z,
+ intype:$gradx0, intype:$gradx1, intype:$gradx2,
+ intype:$grady0, intype:$grady1, intype:$grady2))]>;
def _RI : TEX_3D_GRAD_base<inst, outtype, intype,
(ins Int64Regs:$t, i64imm:$s)>;
def _IR : TEX_3D_GRAD_base<inst, outtype, intype,
@@ -3206,24 +3557,30 @@ multiclass TEX_3D_GRAD<string inst, NVPTXRegClass outtype,
}
defm TEX_3D_F32_F32_GRAD
- : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
+ : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tex_3d_grad_v4f32_f32>;
defm TEX_3D_S32_F32_GRAD
- : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
+ : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_3d_grad_v4s32_f32>;
defm TEX_3D_U32_F32_GRAD
- : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
+ : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_3d_grad_v4u32_f32>;
class TEX_CUBE_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp>
+ NVPTXRegClass intype, dag texsamp, list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
inst # " \t\\{$r, $g, $b, $a\\},"
" [$t, $s, \\{$x, $y, $z, $z\\}];",
- []>;
-
-multiclass TEX_CUBE<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
- def _RR : TEX_CUBE_base<inst, outtype, intype,
- (ins Int64Regs:$t, Int64Regs:$s)>;
+ pattern>;
+
+multiclass TEX_CUBE<string inst, NVPTXRegClass outtype, NVPTXRegClass intype,
+ Intrinsic intr> {
+ def _RR : TEX_CUBE_base<
+ inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$z))]>;
def _RI : TEX_CUBE_base<inst, outtype, intype,
(ins Int64Regs:$t, i64imm:$s)>;
def _IR : TEX_CUBE_base<inst, outtype, intype,
@@ -3233,26 +3590,33 @@ multiclass TEX_CUBE<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
}
defm TEX_CUBE_F32_F32
- : TEX_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
+ : TEX_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tex_cube_v4f32_f32>;
defm TEX_CUBE_S32_F32
- : TEX_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
+ : TEX_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_cube_v4s32_f32>;
defm TEX_CUBE_U32_F32
- : TEX_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
+ : TEX_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_cube_v4u32_f32>;
class TEX_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp>
+ NVPTXRegClass intype, dag texsamp,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(texsamp, (ins intype:$x, intype:$y, intype:$z,
intype:$lod)),
inst # " \t\\{$r, $g, $b, $a\\},"
" [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
+ pattern>;
multiclass TEX_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _RR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
- (ins Int64Regs:$t, Int64Regs:$s)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _RR : TEX_CUBE_LEVEL_base<
+ inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$z,
+ intype:$lod))]>;
def _RI : TEX_CUBE_LEVEL_base<inst, outtype, intype,
(ins Int64Regs:$t, i64imm:$s)>;
def _IR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
@@ -3262,26 +3626,32 @@ multiclass TEX_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
}
defm TEX_CUBE_F32_F32_LEVEL
- : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", Float32Regs, Float32Regs>;
+ : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tex_cube_level_v4f32_f32>;
defm TEX_CUBE_S32_F32_LEVEL
- : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", Int32Regs, Float32Regs>;
+ : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_cube_level_v4s32_f32>;
defm TEX_CUBE_U32_F32_LEVEL
- : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", Int32Regs, Float32Regs>;
+ : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_cube_level_v4u32_f32>;
class TEX_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp>
+ NVPTXRegClass intype, dag texsamp,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
intype:$z)),
inst # " \t\\{$r, $g, $b, $a\\},"
" [$t, $s, \\{$l, $x, $y, $z\\}];",
- []>;
+ pattern>;
multiclass TEX_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _RR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
- (ins Int64Regs:$t, Int64Regs:$s)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _RR : TEX_CUBE_ARRAY_base<
+ inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$y, intype:$z))]>;
def _RI : TEX_CUBE_ARRAY_base<inst, outtype, intype,
(ins Int64Regs:$t, i64imm:$s)>;
def _IR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
@@ -3291,26 +3661,33 @@ multiclass TEX_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
}
defm TEX_CUBE_ARRAY_F32_F32
- : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
+ : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tex_cube_array_v4f32_f32>;
defm TEX_CUBE_ARRAY_S32_F32
- : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
+ : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_cube_array_v4s32_f32>;
defm TEX_CUBE_ARRAY_U32_F32
- : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
+ : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_cube_array_v4u32_f32>;
class TEX_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp>
+ NVPTXRegClass intype, dag texsamp,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
intype:$z, intype:$lod)),
inst # " \t\\{$r, $g, $b, $a\\},"
" [$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
- []>;
+ pattern>;
multiclass TEX_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _RR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
- (ins Int64Regs:$t, Int64Regs:$s)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _RR : TEX_CUBE_ARRAY_LEVEL_base<
+ inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$y, intype:$z,
+ intype:$lod))]>;
def _RI : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
(ins Int64Regs:$t, i64imm:$s)>;
def _IR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
@@ -3321,25 +3698,31 @@ multiclass TEX_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
defm TEX_CUBE_ARRAY_F32_F32_LEVEL
: TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
- Float32Regs, Float32Regs>;
+ Float32Regs, Float32Regs,
+ int_nvvm_tex_cube_array_level_v4f32_f32>;
defm TEX_CUBE_ARRAY_S32_F32_LEVEL
: TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
- Int32Regs, Float32Regs>;
+ Int32Regs, Float32Regs,
+ int_nvvm_tex_cube_array_level_v4s32_f32>;
defm TEX_CUBE_ARRAY_U32_F32_LEVEL
: TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
- Int32Regs, Float32Regs>;
+ Int32Regs, Float32Regs,
+ int_nvvm_tex_cube_array_level_v4u32_f32>;
class TLD4_2D_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp>
+ NVPTXRegClass intype, dag texsamp, list<dag> pattern = []>
: NVPTXInst<(outs outtype:$v0, outtype:$v1,
outtype:$v2, outtype:$v3),
!con(texsamp, (ins intype:$x, intype:$y)),
inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, $s, \\{$x, $y\\}];",
- []>;
-
-multiclass TLD4_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
- def _RR : TLD4_2D_base<inst, outtype, intype,
- (ins Int64Regs:$t, Int64Regs:$s)>;
+ pattern>;
+
+multiclass TLD4_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype,
+ Intrinsic intr> {
+ def _RR : TLD4_2D_base<
+ inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
+ [(set outtype:$v0, outtype:$v1, outtype:$v2, outtype:$v3,
+ (intr i64:$t, i64:$s, intype:$x, intype:$y))]>;
def _RI : TLD4_2D_base<inst, outtype, intype,
(ins Int64Regs:$t, i64imm:$s)>;
def _IR : TLD4_2D_base<inst, outtype, intype,
@@ -3349,31 +3732,43 @@ multiclass TLD4_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
}
defm TLD4_R_2D_F32_F32
- : TLD4_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+ : TLD4_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tld4_r_2d_v4f32_f32>;
defm TLD4_G_2D_F32_F32
- : TLD4_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+ : TLD4_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tld4_g_2d_v4f32_f32>;
defm TLD4_B_2D_F32_F32
- : TLD4_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+ : TLD4_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tld4_b_2d_v4f32_f32>;
defm TLD4_A_2D_F32_F32
- : TLD4_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+ : TLD4_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tld4_a_2d_v4f32_f32>;
defm TLD4_R_2D_S32_F32
- : TLD4_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+ : TLD4_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tld4_r_2d_v4s32_f32>;
defm TLD4_G_2D_S32_F32
- : TLD4_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+ : TLD4_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tld4_g_2d_v4s32_f32>;
defm TLD4_B_2D_S32_F32
- : TLD4_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+ : TLD4_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tld4_b_2d_v4s32_f32>;
defm TLD4_A_2D_S32_F32
- : TLD4_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+ : TLD4_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tld4_a_2d_v4s32_f32>;
defm TLD4_R_2D_U32_F32
- : TLD4_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+ : TLD4_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tld4_r_2d_v4u32_f32>;
defm TLD4_G_2D_U32_F32
- : TLD4_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+ : TLD4_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tld4_g_2d_v4u32_f32>;
defm TLD4_B_2D_U32_F32
- : TLD4_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+ : TLD4_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tld4_b_2d_v4u32_f32>;
defm TLD4_A_2D_U32_F32
- : TLD4_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+ : TLD4_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tld4_a_2d_v4u32_f32>;
}
@@ -3383,206 +3778,268 @@ let IsTex = true, IsTexModeUnified = true in {
// Texture fetch instructions using handles
class TEX_UNIFIED_1D_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex>
+ NVPTXRegClass intype, dag tex, list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(tex, (ins intype:$x)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
- []>;
+ pattern>;
multiclass TEX_UNIFIED_1D<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _R : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _R : TEX_UNIFIED_1D_base<
+ inst, outtype, intype, (ins Int64Regs:$t),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, intype:$x))]>;
def _I : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins i64imm:$t)>;
}
defm TEX_UNIFIED_1D_F32_S32
- : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
+ : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs,
+ int_nvvm_tex_unified_1d_v4f32_s32>;
defm TEX_UNIFIED_1D_F32_F32
- : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
+ : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tex_unified_1d_v4f32_f32>;
defm TEX_UNIFIED_1D_S32_S32
- : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
+ : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs,
+ int_nvvm_tex_unified_1d_v4s32_s32>;
defm TEX_UNIFIED_1D_S32_F32
- : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
+ : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_1d_v4s32_f32>;
defm TEX_UNIFIED_1D_U32_S32
- : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
+ : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs,
+ int_nvvm_tex_unified_1d_v4u32_s32>;
defm TEX_UNIFIED_1D_U32_F32
- : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
+ : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_1d_v4u32_f32>;
class TEX_UNIFIED_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex>
+ NVPTXRegClass intype, dag tex,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(tex, (ins intype:$x, intype:$lod)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}], $lod;",
- []>;
+ pattern>;
multiclass TEX_UNIFIED_1D_LEVEL<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _R : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _R : TEX_UNIFIED_1D_LEVEL_base<
+ inst, outtype, intype, (ins Int64Regs:$t),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, intype:$x, intype:$lod))]>;
def _I : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
}
defm TEX_UNIFIED_1D_F32_F32_LEVEL
- : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
+ : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tex_unified_1d_level_v4f32_f32>;
defm TEX_UNIFIED_1D_S32_F32_LEVEL
- : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
+ : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_1d_level_v4s32_f32>;
defm TEX_UNIFIED_1D_U32_F32_LEVEL
- : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
+ : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_1d_level_v4u32_f32>;
class TEX_UNIFIED_1D_GRAD_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex>
+ NVPTXRegClass intype, dag tex,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(tex, (ins intype:$x, intype:$gradx, intype:$grady)),
inst # " \t\\{$r, $g, $b, $a\\},"
" [$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
+ pattern>;
multiclass TEX_UNIFIED_1D_GRAD<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _R : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _R : TEX_UNIFIED_1D_GRAD_base<
+ inst, outtype, intype, (ins Int64Regs:$t),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, intype:$x, intype:$gradx, intype:$grady))]>;
def _I : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
}
defm TEX_UNIFIED_1D_F32_F32_GRAD
- : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
+ : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tex_unified_1d_grad_v4f32_f32>;
defm TEX_UNIFIED_1D_S32_F32_GRAD
- : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
+ : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_1d_grad_v4s32_f32>;
defm TEX_UNIFIED_1D_U32_F32_GRAD
- : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
+ : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_1d_grad_v4u32_f32>;
class TEX_UNIFIED_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex>
+ NVPTXRegClass intype, dag tex,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(tex, (ins Int32Regs:$l, intype:$x)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}];",
- []>;
+ pattern>;
multiclass TEX_UNIFIED_1D_ARRAY<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _R : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _R : TEX_UNIFIED_1D_ARRAY_base<
+ inst, outtype, intype, (ins Int64Regs:$t),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, i32:$l, intype:$x))]>;
def _I : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
}
defm TEX_UNIFIED_1D_ARRAY_F32_S32
- : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
+ : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs,
+ int_nvvm_tex_unified_1d_array_v4f32_s32>;
defm TEX_UNIFIED_1D_ARRAY_F32_F32
- : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
+ : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tex_unified_1d_array_v4f32_f32>;
defm TEX_UNIFIED_1D_ARRAY_S32_S32
- : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
+ : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs,
+ int_nvvm_tex_unified_1d_array_v4s32_s32>;
defm TEX_UNIFIED_1D_ARRAY_S32_F32
- : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
+ : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_1d_array_v4s32_f32>;
defm TEX_UNIFIED_1D_ARRAY_U32_S32
- : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
+ : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs,
+ int_nvvm_tex_unified_1d_array_v4u32_s32>;
defm TEX_UNIFIED_1D_ARRAY_U32_F32
- : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
+ : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_1d_array_v4u32_f32>;
class TEX_UNIFIED_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex>
+ NVPTXRegClass intype, dag tex,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(tex, (ins Int32Regs:$l, intype:$x, intype:$lod)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}], $lod;",
- []>;
+ pattern>;
multiclass TEX_UNIFIED_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
- (ins Int64Regs:$t)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base<
+ inst, outtype, intype, (ins Int64Regs:$t),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, i32:$l, intype:$x, intype:$lod))]>;
def _I : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
(ins i64imm:$t)>;
}
defm TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
: TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32",
- Float32Regs, Float32Regs>;
+ Float32Regs, Float32Regs,
+ int_nvvm_tex_unified_1d_array_level_v4f32_f32>;
defm TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
: TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32",
- Int32Regs, Float32Regs>;
+ Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_1d_array_level_v4s32_f32>;
defm TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
: TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32",
- Int32Regs, Float32Regs>;
+ Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_1d_array_level_v4u32_f32>;
class TEX_UNIFIED_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex>
+ NVPTXRegClass intype, dag tex,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(tex, (ins Int32Regs:$l, intype:$x,
intype:$gradx, intype:$grady)),
inst # " \t\\{$r, $g, $b, $a\\},"
" [$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
+ pattern>;
multiclass TEX_UNIFIED_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
- (ins Int64Regs:$t)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base<
+ inst, outtype, intype, (ins Int64Regs:$t),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, i32:$l, intype:$x, intype:$gradx, intype:$grady))]>;
def _I : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
(ins i64imm:$t)>;
}
defm TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
: TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32",
- Float32Regs, Float32Regs>;
+ Float32Regs, Float32Regs,
+ int_nvvm_tex_unified_1d_array_grad_v4f32_f32>;
defm TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
: TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32",
- Int32Regs, Float32Regs>;
+ Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_1d_array_grad_v4s32_f32>;
defm TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
: TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32",
- Int32Regs, Float32Regs>;
+ Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_1d_array_grad_v4u32_f32>;
class TEX_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex>
+ NVPTXRegClass intype, dag tex, list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(tex, (ins intype:$x, intype:$y)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}];",
- []>;
+ pattern>;
multiclass TEX_UNIFIED_2D<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _R : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _R : TEX_UNIFIED_2D_base<
+ inst, outtype, intype, (ins Int64Regs:$t),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, intype:$x, intype:$y))]>;
def _I : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
}
defm TEX_UNIFIED_2D_F32_S32
- : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
+ : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs,
+ int_nvvm_tex_unified_2d_v4f32_s32>;
defm TEX_UNIFIED_2D_F32_F32
- : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+ : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tex_unified_2d_v4f32_f32>;
defm TEX_UNIFIED_2D_S32_S32
- : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
+ : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs,
+ int_nvvm_tex_unified_2d_v4s32_s32>;
defm TEX_UNIFIED_2D_S32_F32
- : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+ : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_2d_v4s32_f32>;
defm TEX_UNIFIED_2D_U32_S32
- : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
+ : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs,
+ int_nvvm_tex_unified_2d_v4u32_s32>;
defm TEX_UNIFIED_2D_U32_F32
- : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+ : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_2d_v4u32_f32>;
class TEX_UNIFIED_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex>
+ NVPTXRegClass intype, dag tex,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(tex, (ins intype:$x, intype:$y, intype:$lod)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}], $lod;",
- []>;
+ pattern>;
multiclass TEX_UNIFIED_2D_LEVEL<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _R : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _R : TEX_UNIFIED_2D_LEVEL_base<
+ inst, outtype, intype, (ins Int64Regs:$t),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, intype:$x, intype:$y, intype:$lod))]>;
def _I : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
}
defm TEX_UNIFIED_2D_F32_F32_LEVEL
- : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+ : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tex_unified_2d_level_v4f32_f32>;
defm TEX_UNIFIED_2D_S32_F32_LEVEL
- : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+ : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_2d_level_v4s32_f32>;
defm TEX_UNIFIED_2D_U32_F32_LEVEL
- : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+ : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_2d_level_v4u32_f32>;
class TEX_UNIFIED_2D_GRAD_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex>
+ NVPTXRegClass intype, dag tex,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(tex, (ins intype:$x, intype:$y,
@@ -3590,75 +4047,100 @@ class TEX_UNIFIED_2D_GRAD_base<string inst, NVPTXRegClass outtype,
intype:$grady0, intype:$grady1)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}],"
" \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
- []>;
+ pattern>;
multiclass TEX_UNIFIED_2D_GRAD<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _R : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _R : TEX_UNIFIED_2D_GRAD_base<
+ inst, outtype, intype, (ins Int64Regs:$t),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, intype:$x, intype:$y,
+ intype:$gradx0, intype:$gradx1,
+ intype:$grady0, intype:$grady1))]>;
def _I : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
}
defm TEX_UNIFIED_2D_F32_F32_GRAD
- : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+ : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tex_unified_2d_grad_v4f32_f32>;
defm TEX_UNIFIED_2D_S32_F32_GRAD
- : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+ : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_2d_grad_v4s32_f32>;
defm TEX_UNIFIED_2D_U32_F32_GRAD
- : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+ : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_2d_grad_v4u32_f32>;
class TEX_UNIFIED_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex>
+ NVPTXRegClass intype, dag tex,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(tex, (ins Int32Regs:$l, intype:$x, intype:$y)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}];",
- []>;
+ pattern>;
multiclass TEX_UNIFIED_2D_ARRAY<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _R : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _R : TEX_UNIFIED_2D_ARRAY_base<
+ inst, outtype, intype, (ins Int64Regs:$t),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, i32:$l, intype:$x, intype:$y))]>;
def _I : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
}
defm TEX_UNIFIED_2D_ARRAY_F32_S32
- : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
+ : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs,
+ int_nvvm_tex_unified_2d_array_v4f32_s32>;
defm TEX_UNIFIED_2D_ARRAY_F32_F32
- : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
+ : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tex_unified_2d_array_v4f32_f32>;
defm TEX_UNIFIED_2D_ARRAY_S32_S32
- : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
+ : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs,
+ int_nvvm_tex_unified_2d_array_v4s32_s32>;
defm TEX_UNIFIED_2D_ARRAY_S32_F32
- : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
+ : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_2d_array_v4s32_f32>;
defm TEX_UNIFIED_2D_ARRAY_U32_S32
- : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
+ : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs,
+ int_nvvm_tex_unified_2d_array_v4u32_s32>;
defm TEX_UNIFIED_2D_ARRAY_U32_F32
- : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
+ : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_2d_array_v4u32_f32>;
class TEX_UNIFIED_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex>
+ NVPTXRegClass intype, dag tex,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
intype:$lod)),
inst # " \t\\{$r, $g, $b, $a\\},"
" [$t, \\{$l, $x, $y, $y\\}], $lod;",
- []>;
+ pattern>;
multiclass TEX_UNIFIED_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
- (ins Int64Regs:$t)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base<
+ inst, outtype, intype, (ins Int64Regs:$t),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, i32:$l, intype:$x, intype:$y, intype:$lod))]>;
def _I : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
(ins i64imm:$t)>;
}
defm TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
: TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32",
- Float32Regs, Float32Regs>;
+ Float32Regs, Float32Regs,
+ int_nvvm_tex_unified_2d_array_level_v4f32_f32>;
defm TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
: TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32",
- Int32Regs, Float32Regs>;
+ Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_2d_array_level_v4s32_f32>;
defm TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
: TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32",
- Int32Regs, Float32Regs>;
+ Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_2d_array_level_v4u32_f32>;
class TEX_UNIFIED_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex>
+ NVPTXRegClass intype, dag tex,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
@@ -3666,74 +4148,98 @@ class TEX_UNIFIED_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
intype:$grady0, intype:$grady1)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}],"
" \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
- []>;
+ pattern>;
multiclass TEX_UNIFIED_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
- (ins Int64Regs:$t)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base<
+ inst, outtype, intype, (ins Int64Regs:$t),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, i32:$l, intype:$x, intype:$y,
+ intype:$gradx0, intype:$gradx1,
+ intype:$grady0, intype:$grady1))]>;
def _I : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
(ins i64imm:$t)>;
}
defm TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
: TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32",
- Float32Regs, Float32Regs>;
+ Float32Regs, Float32Regs,
+ int_nvvm_tex_unified_2d_array_grad_v4f32_f32>;
defm TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
: TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32",
- Int32Regs, Float32Regs>;
+ Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_2d_array_grad_v4s32_f32>;
defm TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
: TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32",
- Int32Regs, Float32Regs>;
+ Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_2d_array_grad_v4u32_f32>;
class TEX_UNIFIED_3D_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex>
+ NVPTXRegClass intype, dag tex, list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(tex, (ins intype:$x, intype:$y, intype:$z)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
- []>;
+ pattern>;
multiclass TEX_UNIFIED_3D<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _R : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _R : TEX_UNIFIED_3D_base<
+ inst, outtype, intype, (ins Int64Regs:$t),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, intype:$x, intype:$y, intype:$z))]>;
def _I : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins i64imm:$t)>;
}
defm TEX_UNIFIED_3D_F32_S32
- : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
+ : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs,
+ int_nvvm_tex_unified_3d_v4f32_s32>;
defm TEX_UNIFIED_3D_F32_F32
- : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
+ : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tex_unified_3d_v4f32_f32>;
defm TEX_UNIFIED_3D_S32_S32
- : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
+ : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs,
+ int_nvvm_tex_unified_3d_v4s32_s32>;
defm TEX_UNIFIED_3D_S32_F32
- : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
+ : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_3d_v4s32_f32>;
defm TEX_UNIFIED_3D_U32_S32
- : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
+ : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs,
+ int_nvvm_tex_unified_3d_v4u32_s32>;
defm TEX_UNIFIED_3D_U32_F32
- : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
+ : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_3d_v4u32_f32>;
class TEX_UNIFIED_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex>
+ NVPTXRegClass intype, dag tex,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
inst # " \t\\{$r, $g, $b, $a\\},"
" [$t, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
+ pattern>;
multiclass TEX_UNIFIED_3D_LEVEL<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _R : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _R : TEX_UNIFIED_3D_LEVEL_base<
+ inst, outtype, intype, (ins Int64Regs:$t),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, intype:$x, intype:$y, intype:$z, intype:$lod))]>;
def _I : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
}
defm TEX_UNIFIED_3D_F32_F32_LEVEL
- : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
+ : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tex_unified_3d_level_v4f32_f32>;
defm TEX_UNIFIED_3D_S32_F32_LEVEL
- : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
+ : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_3d_level_v4s32_f32>;
defm TEX_UNIFIED_3D_U32_F32_LEVEL
- : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
+ : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_3d_level_v4u32_f32>;
class TEX_UNIFIED_3D_GRAD_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex>
+ NVPTXRegClass intype, dag tex,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(tex, (ins intype:$x, intype:$y, intype:$z,
@@ -3743,117 +4249,151 @@ class TEX_UNIFIED_3D_GRAD_base<string inst, NVPTXRegClass outtype,
inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}],"
" \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
" \\{$grady0, $grady1, $grady2, $grady2\\};",
- []>;
+ pattern>;
multiclass TEX_UNIFIED_3D_GRAD<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _R : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _R : TEX_UNIFIED_3D_GRAD_base<
+ inst, outtype, intype, (ins Int64Regs:$t),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, intype:$x, intype:$y, intype:$z,
+ intype:$gradx0, intype:$gradx1, intype:$gradx2,
+ intype:$grady0, intype:$grady1, intype:$grady2))]>;
def _I : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
}
defm TEX_UNIFIED_3D_F32_F32_GRAD
- : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
+ : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tex_unified_3d_grad_v4f32_f32>;
defm TEX_UNIFIED_3D_S32_F32_GRAD
- : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
+ : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_3d_grad_v4s32_f32>;
defm TEX_UNIFIED_3D_U32_F32_GRAD
- : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
+ : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_3d_grad_v4u32_f32>;
class TEX_UNIFIED_CUBE_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex>
+ NVPTXRegClass intype, dag tex,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(tex, (ins intype:$x, intype:$y, intype:$z)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
- []>;
+ pattern>;
multiclass TEX_UNIFIED_CUBE<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _R : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _R : TEX_UNIFIED_CUBE_base<
+ inst, outtype, intype, (ins Int64Regs:$t),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, intype:$x, intype:$y, intype:$z))]>;
def _I : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins i64imm:$t)>;
}
defm TEX_UNIFIED_CUBE_F32_F32
- : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
+ : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tex_unified_cube_v4f32_f32>;
defm TEX_UNIFIED_CUBE_S32_F32
- : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
+ : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_cube_v4s32_f32>;
defm TEX_UNIFIED_CUBE_U32_F32
- : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
+ : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_cube_v4u32_f32>;
class TEX_UNIFIED_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex>
+ NVPTXRegClass intype, dag tex,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
inst # " \t\\{$r, $g, $b, $a\\},"
" [$t, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
+ pattern>;
multiclass TEX_UNIFIED_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _R : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
- (ins Int64Regs:$t)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _R : TEX_UNIFIED_CUBE_LEVEL_base<
+ inst, outtype, intype, (ins Int64Regs:$t),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, intype:$x, intype:$y, intype:$z, intype:$lod))]>;
def _I : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
(ins i64imm:$t)>;
}
defm TEX_UNIFIED_CUBE_F32_F32_LEVEL
: TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.f32.f32",
- Float32Regs, Float32Regs>;
+ Float32Regs, Float32Regs,
+ int_nvvm_tex_unified_cube_level_v4f32_f32>;
defm TEX_UNIFIED_CUBE_S32_F32_LEVEL
: TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.s32.f32",
- Int32Regs, Float32Regs>;
+ Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_cube_level_v4s32_f32>;
defm TEX_UNIFIED_CUBE_U32_F32_LEVEL
: TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.u32.f32",
- Int32Regs, Float32Regs>;
+ Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_cube_level_v4u32_f32>;
class TEX_UNIFIED_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex>
+ NVPTXRegClass intype, dag tex,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}];",
- []>;
+ pattern>;
multiclass TEX_UNIFIED_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _R : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
- (ins Int64Regs:$t)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _R : TEX_UNIFIED_CUBE_ARRAY_base<
+ inst, outtype, intype, (ins Int64Regs:$t),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, i32:$l, intype:$x, intype:$y, intype:$z))]>;
def _I : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
(ins i64imm:$t)>;
}
defm TEX_UNIFIED_CUBE_ARRAY_F32_F32
- : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
+ : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tex_unified_cube_array_v4f32_f32>;
defm TEX_UNIFIED_CUBE_ARRAY_S32_F32
- : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
+ : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_cube_array_v4s32_f32>;
defm TEX_UNIFIED_CUBE_ARRAY_U32_F32
- : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
+ : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_cube_array_v4u32_f32>;
class TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex>
+ NVPTXRegClass intype, dag tex,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z,
intype:$lod)),
inst # " \t\\{$r, $g, $b, $a\\},"
" [$t, \\{$l, $x, $y, $z\\}], $lod;",
- []>;
+ pattern>;
multiclass TEX_UNIFIED_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
- (ins Int64Regs:$t)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<
+ inst, outtype, intype, (ins Int64Regs:$t),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, i32:$l, intype:$x, intype:$y, intype:$z, intype:$lod))]>;
def _I : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
(ins i64imm:$t)>;
}
defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
: TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
- Float32Regs, Float32Regs>;
+ Float32Regs, Float32Regs,
+ int_nvvm_tex_unified_cube_array_level_v4f32_f32>;
defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
: TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
- Int32Regs, Float32Regs>;
+ Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_cube_array_level_v4s32_f32>;
defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
: TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
- Int32Regs, Float32Regs>;
+ Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_cube_array_level_v4u32_f32>;
class TEX_UNIFIED_CUBE_GRAD_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex>
+ NVPTXRegClass intype, dag tex,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(tex, (ins intype:$x, intype:$y, intype:$z,
@@ -3863,23 +4403,32 @@ class TEX_UNIFIED_CUBE_GRAD_base<string inst, NVPTXRegClass outtype,
inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}],"
" \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
" \\{$grady0, $grady1, $grady2, $grady2\\};",
- []>;
+ pattern>;
multiclass TEX_UNIFIED_CUBE_GRAD<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _R : TEX_UNIFIED_CUBE_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _R : TEX_UNIFIED_CUBE_GRAD_base<
+ inst, outtype, intype, (ins Int64Regs:$t),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, intype:$x, intype:$y, intype:$z,
+ intype:$gradx0, intype:$gradx1, intype:$gradx2,
+ intype:$grady0, intype:$grady1, intype:$grady2))]>;
def _I : TEX_UNIFIED_CUBE_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
}
defm TEX_UNIFIED_CUBE_F32_F32_GRAD
- : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.f32.f32", Float32Regs, Float32Regs>;
+ : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tex_unified_cube_grad_v4f32_f32>;
defm TEX_UNIFIED_CUBE_S32_F32_GRAD
- : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.s32.f32", Int32Regs, Float32Regs>;
+ : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_cube_grad_v4s32_f32>;
defm TEX_UNIFIED_CUBE_U32_F32_GRAD
- : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.u32.f32", Int32Regs, Float32Regs>;
+ : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_cube_grad_v4u32_f32>;
class TEX_UNIFIED_CUBE_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex>
+ NVPTXRegClass intype, dag tex,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g,
outtype:$b, outtype:$a),
!con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z,
@@ -3889,64 +4438,88 @@ class TEX_UNIFIED_CUBE_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}],"
" \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
" \\{$grady0, $grady1, $grady2, $grady2\\};",
- []>;
+ pattern>;
multiclass TEX_UNIFIED_CUBE_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _R : TEX_UNIFIED_CUBE_ARRAY_GRAD_base<inst, outtype, intype,
- (ins Int64Regs:$t)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _R : TEX_UNIFIED_CUBE_ARRAY_GRAD_base<
+ inst, outtype, intype, (ins Int64Regs:$t),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$t, i32:$l, intype:$x, intype:$y, intype:$z,
+ intype:$gradx0, intype:$gradx1,
+ intype:$gradx2, intype:$grady0,
+ intype:$grady1, intype:$grady2))]>;
def _I : TEX_UNIFIED_CUBE_ARRAY_GRAD_base<inst, outtype, intype,
(ins i64imm:$t)>;
}
defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_GRAD
: TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.f32.f32",
- Float32Regs, Float32Regs>;
+ Float32Regs, Float32Regs,
+ int_nvvm_tex_unified_cube_array_grad_v4f32_f32>;
defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_GRAD
: TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.s32.f32",
- Int32Regs, Float32Regs>;
+ Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_cube_array_grad_v4s32_f32>;
defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_GRAD
: TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.u32.f32",
- Int32Regs, Float32Regs>;
+ Int32Regs, Float32Regs,
+ int_nvvm_tex_unified_cube_array_grad_v4u32_f32>;
class TLD4_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex>
+ NVPTXRegClass intype, dag tex,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$v0, outtype:$v1,
outtype:$v2, outtype:$v3),
!con(tex, (ins intype:$x, intype:$y)),
inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, \\{$x, $y\\}];",
- []>;
+ pattern>;
multiclass TLD4_UNIFIED_2D<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype> {
- def _R : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ NVPTXRegClass intype, Intrinsic intr> {
+ def _R : TLD4_UNIFIED_2D_base<
+ inst, outtype, intype, (ins Int64Regs:$t),
+ [(set outtype:$v0, outtype:$v1, outtype:$v2, outtype:$v3,
+ (intr i64:$t, intype:$x, intype:$y))]>;
def _I : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
}
defm TLD4_UNIFIED_R_2D_F32_F32
- : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+ : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tld4_unified_r_2d_v4f32_f32>;
defm TLD4_UNIFIED_G_2D_F32_F32
- : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+ : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tld4_unified_g_2d_v4f32_f32>;
defm TLD4_UNIFIED_B_2D_F32_F32
- : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+ : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tld4_unified_b_2d_v4f32_f32>;
defm TLD4_UNIFIED_A_2D_F32_F32
- : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+ : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs,
+ int_nvvm_tld4_unified_a_2d_v4f32_f32>;
defm TLD4_UNIFIED_R_2D_S32_F32
- : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+ : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tld4_unified_r_2d_v4s32_f32>;
defm TLD4_UNIFIED_G_2D_S32_F32
- : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+ : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tld4_unified_g_2d_v4s32_f32>;
defm TLD4_UNIFIED_B_2D_S32_F32
- : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+ : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tld4_unified_b_2d_v4s32_f32>;
defm TLD4_UNIFIED_A_2D_S32_F32
- : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+ : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tld4_unified_a_2d_v4s32_f32>;
defm TLD4_UNIFIED_R_2D_U32_F32
- : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+ : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tld4_unified_r_2d_v4u32_f32>;
defm TLD4_UNIFIED_G_2D_U32_F32
- : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+ : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tld4_unified_g_2d_v4u32_f32>;
defm TLD4_UNIFIED_B_2D_U32_F32
- : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+ : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tld4_unified_b_2d_v4u32_f32>;
defm TLD4_UNIFIED_A_2D_U32_F32
- : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+ : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs,
+ int_nvvm_tld4_unified_a_2d_v4u32_f32>;
}
@@ -3956,13 +4529,17 @@ defm TLD4_UNIFIED_A_2D_U32_F32
let IsSuld = true in {
-class SULD_1D_base<string inst, NVPTXRegClass outtype, dag surf>
+class SULD_1D_base<string inst, NVPTXRegClass outtype, dag surf,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r),
!con(surf, (ins Int32Regs:$x)),
inst # " \\{$r\\}, [$s, \\{$x\\}];",
- []>;
+ pattern>;
multiclass SULD_1D<string inst, NVPTXRegClass outtype> {
- def _R : SULD_1D_base<inst, outtype, (ins Int64Regs:$s)>;
+ defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
+
+ def _R : SULD_1D_base<inst, outtype, (ins Int64Regs:$s),
+ [(set outtype:$r, (intr i64:$s, i32:$x))]>;
def _I : SULD_1D_base<inst, outtype, (ins i64imm:$s)>;
}
@@ -3981,13 +4558,18 @@ defm SULD_1D_I16_ZERO : SULD_1D<"suld.b.1d.b16.zero", Int16Regs>;
defm SULD_1D_I32_ZERO : SULD_1D<"suld.b.1d.b32.zero", Int32Regs>;
defm SULD_1D_I64_ZERO : SULD_1D<"suld.b.1d.b64.zero", Int64Regs>;
-class SULD_1D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
+class SULD_1D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r),
!con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
inst # " \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
+ pattern>;
multiclass SULD_1D_ARRAY<string inst, NVPTXRegClass outtype> {
- def _R : SULD_1D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
+ defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
+
+ def _R : SULD_1D_ARRAY_base<inst, outtype, (ins Int64Regs:$s),
+ [(set outtype:$r,
+ (intr i64:$s, i32:$l, i32:$x))]>;
def _I : SULD_1D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
}
@@ -4018,13 +4600,17 @@ defm SULD_1D_ARRAY_I32_ZERO
defm SULD_1D_ARRAY_I64_ZERO
: SULD_1D_ARRAY<"suld.b.a1d.b64.zero", Int64Regs>;
-class SULD_2D_base<string inst, NVPTXRegClass outtype, dag surf>
+class SULD_2D_base<string inst, NVPTXRegClass outtype, dag surf,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r),
!con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
inst # " \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
+ pattern>;
multiclass SULD_2D<string inst, NVPTXRegClass outtype> {
- def _R : SULD_2D_base<inst, outtype, (ins Int64Regs:$s)>;
+ defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
+
+ def _R : SULD_2D_base<inst, outtype, (ins Int64Regs:$s),
+ [(set outtype:$r, (intr i64:$s, i32:$x, i32:$y))]>;
def _I : SULD_2D_base<inst, outtype, (ins i64imm:$s)>;
}
@@ -4043,13 +4629,18 @@ defm SULD_2D_I16_ZERO : SULD_2D<"suld.b.2d.b16.zero", Int16Regs>;
defm SULD_2D_I32_ZERO : SULD_2D<"suld.b.2d.b32.zero", Int32Regs>;
defm SULD_2D_I64_ZERO : SULD_2D<"suld.b.2d.b64.zero", Int64Regs>;
-class SULD_2D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
+class SULD_2D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r),
!con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
inst # " \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
+ pattern>;
multiclass SULD_2D_ARRAY<string inst, NVPTXRegClass outtype> {
- def _R : SULD_2D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
+ defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
+
+ def _R : SULD_2D_ARRAY_base<inst, outtype, (ins Int64Regs:$s),
+ [(set outtype:$r,
+ (intr i64:$s, i32:$l, i32:$x, i32:$y))]>;
def _I : SULD_2D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
}
@@ -4068,13 +4659,18 @@ defm SULD_2D_ARRAY_I16_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b16.zero", Int16Regs>;
defm SULD_2D_ARRAY_I32_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b32.zero", Int32Regs>;
defm SULD_2D_ARRAY_I64_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b64.zero", Int64Regs>;
-class SULD_3D_base<string inst, NVPTXRegClass outtype, dag surf>
+class SULD_3D_base<string inst, NVPTXRegClass outtype, dag surf,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r),
!con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
inst # " \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
+ pattern>;
multiclass SULD_3D<string inst, NVPTXRegClass outtype> {
- def _R : SULD_3D_base<inst, outtype, (ins Int64Regs:$s)>;
+ defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
+
+ def _R : SULD_3D_base<inst, outtype, (ins Int64Regs:$s),
+ [(set outtype:$r,
+ (intr i64:$s, i32:$x, i32:$y, i32:$z))]>;
def _I : SULD_3D_base<inst, outtype, (ins i64imm:$s)>;
}
@@ -4096,13 +4692,18 @@ defm SULD_3D_I64_ZERO : SULD_3D<"suld.b.3d.b64.zero", Int64Regs>;
let IsSuld = 2 in {
-class SULD_1D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
+class SULD_1D_V2_base<string inst, NVPTXRegClass outtype, dag surf,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g),
!con(surf, (ins Int32Regs:$x)),
inst # " \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
+ pattern>;
multiclass SULD_1D_V2<string inst, NVPTXRegClass outtype> {
- def _R : SULD_1D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
+ defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
+
+ def _R : SULD_1D_V2_base<inst, outtype, (ins Int64Regs:$s),
+ [(set outtype:$r, outtype:$g,
+ (intr i64:$s, i32:$x))]>;
def _I : SULD_1D_V2_base<inst, outtype, (ins i64imm:$s)>;
}
@@ -4121,13 +4722,18 @@ defm SULD_1D_V2I16_ZERO : SULD_1D_V2<"suld.b.1d.v2.b16.zero", Int16Regs>;
defm SULD_1D_V2I32_ZERO : SULD_1D_V2<"suld.b.1d.v2.b32.zero", Int32Regs>;
defm SULD_1D_V2I64_ZERO : SULD_1D_V2<"suld.b.1d.v2.b64.zero", Int64Regs>;
-class SULD_1D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
+class SULD_1D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g),
!con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
inst # " \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
+ pattern>;
multiclass SULD_1D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
- def _R : SULD_1D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
+ defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
+
+ def _R : SULD_1D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s),
+ [(set outtype:$r, outtype:$g,
+ (intr i64:$s, i32:$l, i32:$x))]>;
def _I : SULD_1D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
}
@@ -4158,13 +4764,18 @@ defm SULD_1D_ARRAY_V2I32_ZERO
defm SULD_1D_ARRAY_V2I64_ZERO
: SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.zero", Int64Regs>;
-class SULD_2D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
+class SULD_2D_V2_base<string inst, NVPTXRegClass outtype, dag surf,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g),
!con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
inst # " \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
+ pattern>;
multiclass SULD_2D_V2<string inst, NVPTXRegClass outtype> {
- def _R : SULD_2D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
+ defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
+
+ def _R : SULD_2D_V2_base<inst, outtype, (ins Int64Regs:$s),
+ [(set outtype:$r, outtype:$g,
+ (intr i64:$s, i32:$x, i32:$y))]>;
def _I : SULD_2D_V2_base<inst, outtype, (ins i64imm:$s)>;
}
@@ -4195,13 +4806,18 @@ defm SULD_2D_V2I32_ZERO
defm SULD_2D_V2I64_ZERO
: SULD_2D_V2<"suld.b.2d.v2.b64.zero", Int64Regs>;
-class SULD_2D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
+class SULD_2D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g),
!con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
inst # " \\{$r, $g\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
+ pattern>;
multiclass SULD_2D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
- def _R : SULD_2D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
+ defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
+
+ def _R : SULD_2D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s),
+ [(set outtype:$r, outtype:$g,
+ (intr i64:$s, i32:$l, i32:$x, i32:$y))]>;
def _I : SULD_2D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
}
@@ -4232,13 +4848,18 @@ defm SULD_2D_ARRAY_V2I32_ZERO
defm SULD_2D_ARRAY_V2I64_ZERO
: SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.zero", Int64Regs>;
-class SULD_3D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
+class SULD_3D_V2_base<string inst, NVPTXRegClass outtype, dag surf,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g),
!con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
inst # " \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
+ pattern>;
multiclass SULD_3D_V2<string inst, NVPTXRegClass outtype> {
- def _R : SULD_3D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
+ defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
+
+ def _R : SULD_3D_V2_base<inst, outtype, (ins Int64Regs:$s),
+ [(set outtype:$r, outtype:$g,
+ (intr i64:$s, i32:$x, i32:$y, i32:$z))]>;
def _I : SULD_3D_V2_base<inst, outtype, (ins i64imm:$s)>;
}
@@ -4261,13 +4882,18 @@ defm SULD_3D_V2I64_ZERO : SULD_3D_V2<"suld.b.3d.v2.b64.zero", Int64Regs>;
let IsSuld = 3 in {
-class SULD_1D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
+class SULD_1D_V4_base<string inst, NVPTXRegClass outtype, dag surf,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
!con(surf, (ins Int32Regs:$x)),
inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
- []>;
+ pattern>;
multiclass SULD_1D_V4<string inst, NVPTXRegClass outtype> {
- def _R : SULD_1D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
+ defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
+
+ def _R : SULD_1D_V4_base<inst, outtype, (ins Int64Regs:$s),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$s, i32:$x))]>;
def _I : SULD_1D_V4_base<inst, outtype, (ins i64imm:$s)>;
}
@@ -4283,13 +4909,19 @@ defm SULD_1D_V4I8_ZERO : SULD_1D_V4<"suld.b.1d.v4.b8.zero", Int16Regs>;
defm SULD_1D_V4I16_ZERO : SULD_1D_V4<"suld.b.1d.v4.b16.zero", Int16Regs>;
defm SULD_1D_V4I32_ZERO : SULD_1D_V4<"suld.b.1d.v4.b32.zero", Int32Regs>;
-class SULD_1D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
+class SULD_1D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
!con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x\\}];",
- []>;
+ pattern>;
multiclass SULD_1D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
- def _R : SULD_1D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
+ defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
+
+ def _R : SULD_1D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s),
+ [(set outtype:$r, outtype:$g, outtype:$b,
+ outtype:$a,
+ (intr i64:$s, i32:$l, i32:$x))]>;
def _I : SULD_1D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
}
@@ -4314,13 +4946,18 @@ defm SULD_1D_ARRAY_V4I16_ZERO
defm SULD_1D_ARRAY_V4I32_ZERO
: SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.zero", Int32Regs>;
-class SULD_2D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
+class SULD_2D_V4_base<string inst, NVPTXRegClass outtype, dag surf,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
!con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
- []>;
+ pattern>;
multiclass SULD_2D_V4<string inst, NVPTXRegClass outtype> {
- def _R : SULD_2D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
+ defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
+
+ def _R : SULD_2D_V4_base<inst, outtype, (ins Int64Regs:$s),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$s, i32:$x, i32:$y))]>;
def _I : SULD_2D_V4_base<inst, outtype, (ins i64imm:$s)>;
}
@@ -4336,13 +4973,19 @@ defm SULD_2D_V4I8_ZERO : SULD_2D_V4<"suld.b.2d.v4.b8.zero", Int16Regs>;
defm SULD_2D_V4I16_ZERO : SULD_2D_V4<"suld.b.2d.v4.b16.zero", Int16Regs>;
defm SULD_2D_V4I32_ZERO : SULD_2D_V4<"suld.b.2d.v4.b32.zero", Int32Regs>;
-class SULD_2D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
+class SULD_2D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
!con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
+ pattern>;
multiclass SULD_2D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
- def _R : SULD_2D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
+ defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
+
+ def _R : SULD_2D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s),
+ [(set outtype:$r, outtype:$g, outtype:$b,
+ outtype:$a,
+ (intr i64:$s, i32:$l, i32:$x, i32:$y))]>;
def _I : SULD_2D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
}
@@ -4367,13 +5010,18 @@ defm SULD_2D_ARRAY_V4I16_ZERO
defm SULD_2D_ARRAY_V4I32_ZERO
: SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.zero", Int32Regs>;
-class SULD_3D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
+class SULD_3D_V4_base<string inst, NVPTXRegClass outtype, dag surf,
+ list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
!con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
+ pattern>;
multiclass SULD_3D_V4<string inst, NVPTXRegClass outtype> {
- def _R : SULD_3D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
+ defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
+
+ def _R : SULD_3D_V4_base<inst, outtype, (ins Int64Regs:$s),
+ [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
+ (intr i64:$s, i32:$x, i32:$y, i32:$z))]>;
def _I : SULD_3D_V4_base<inst, outtype, (ins i64imm:$s)>;
}
@@ -4462,22 +5110,22 @@ def TXQ_NUM_MIPMAP_LEVELS_I
[]>;
}
-def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
- (TXQ_CHANNEL_ORDER_R Int64Regs:$a)>;
-def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
- (TXQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
-def : Pat<(int_nvvm_txq_width Int64Regs:$a),
- (TXQ_WIDTH_R Int64Regs:$a)>;
-def : Pat<(int_nvvm_txq_height Int64Regs:$a),
- (TXQ_HEIGHT_R Int64Regs:$a)>;
-def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
- (TXQ_DEPTH_R Int64Regs:$a)>;
-def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
- (TXQ_ARRAY_SIZE_R Int64Regs:$a)>;
-def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
- (TXQ_NUM_SAMPLES_R Int64Regs:$a)>;
-def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
- (TXQ_NUM_MIPMAP_LEVELS_R Int64Regs:$a)>;
+def : Pat<(int_nvvm_txq_channel_order i64:$a),
+ (TXQ_CHANNEL_ORDER_R $a)>;
+def : Pat<(int_nvvm_txq_channel_data_type i64:$a),
+ (TXQ_CHANNEL_DATA_TYPE_R $a)>;
+def : Pat<(int_nvvm_txq_width i64:$a),
+ (TXQ_WIDTH_R $a)>;
+def : Pat<(int_nvvm_txq_height i64:$a),
+ (TXQ_HEIGHT_R $a)>;
+def : Pat<(int_nvvm_txq_depth i64:$a),
+ (TXQ_DEPTH_R $a)>;
+def : Pat<(int_nvvm_txq_array_size i64:$a),
+ (TXQ_ARRAY_SIZE_R $a)>;
+def : Pat<(int_nvvm_txq_num_samples i64:$a),
+ (TXQ_NUM_SAMPLES_R $a)>;
+def : Pat<(int_nvvm_txq_num_mipmap_levels i64:$a),
+ (TXQ_NUM_MIPMAP_LEVELS_R $a)>;
//-----------------------------------
@@ -4535,18 +5183,18 @@ def SUQ_ARRAY_SIZE_I
[]>;
}
-def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
- (SUQ_CHANNEL_ORDER_R Int64Regs:$a)>;
-def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
- (SUQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
-def : Pat<(int_nvvm_suq_width Int64Regs:$a),
- (SUQ_WIDTH_R Int64Regs:$a)>;
-def : Pat<(int_nvvm_suq_height Int64Regs:$a),
- (SUQ_HEIGHT_R Int64Regs:$a)>;
-def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
- (SUQ_DEPTH_R Int64Regs:$a)>;
-def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
- (SUQ_ARRAY_SIZE_R Int64Regs:$a)>;
+def : Pat<(int_nvvm_suq_channel_order i64:$a),
+ (SUQ_CHANNEL_ORDER_R $a)>;
+def : Pat<(int_nvvm_suq_channel_data_type i64:$a),
+ (SUQ_CHANNEL_DATA_TYPE_R $a)>;
+def : Pat<(int_nvvm_suq_width i64:$a),
+ (SUQ_WIDTH_R $a)>;
+def : Pat<(int_nvvm_suq_height i64:$a),
+ (SUQ_HEIGHT_R $a)>;
+def : Pat<(int_nvvm_suq_depth i64:$a),
+ (SUQ_DEPTH_R $a)>;
+def : Pat<(int_nvvm_suq_array_size i64:$a),
+ (SUQ_ARRAY_SIZE_R $a)>;
//===- Handle Query -------------------------------------------------------===//
@@ -4555,15 +5203,15 @@ def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
def ISTYPEP_SAMPLER
: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
"istypep.samplerref \t$d, $a;",
- [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
+ [(set i1:$d, (int_nvvm_istypep_sampler i64:$a))]>;
def ISTYPEP_SURFACE
: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
"istypep.surfref \t$d, $a;",
- [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
+ [(set i1:$d, (int_nvvm_istypep_surface i64:$a))]>;
def ISTYPEP_TEXTURE
: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
"istypep.texref \t$d, $a;",
- [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
+ [(set i1:$d, (int_nvvm_istypep_texture i64:$a))]>;
//===- Surface Stores -----------------------------------------------------===//
@@ -6354,13 +7002,13 @@ def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
class PTX_READ_SREG_R64<string regname, Intrinsic intop, list<Predicate> Preds=[]>
: NVPTXInst<(outs Int64Regs:$d), (ins),
!strconcat("mov.u64 \t$d, %", regname, ";"),
- [(set Int64Regs:$d, (intop))]>,
+ [(set i64:$d, (intop))]>,
Requires<Preds>;
class PTX_READ_SREG_R32<string regname, Intrinsic intop, list<Predicate> Preds=[]>
: NVPTXInst<(outs Int32Regs:$d), (ins),
!strconcat("mov.u32 \t$d, %", regname, ";"),
- [(set Int32Regs:$d, (intop))]>,
+ [(set i32:$d, (intop))]>,
Requires<Preds>;
multiclass PTX_READ_SREG_R32V4<string regname, list<Predicate> Preds=[]> {
@@ -6442,7 +7090,7 @@ def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
// handle the constant.
def INT_PTX_SREG_WARPSIZE :
NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
- [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
+ [(set i32:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
// Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
// In addition to target-independent fields provided by WMMA_REGS, it adds
@@ -6854,19 +7502,19 @@ foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in
multiclass MAPA<string suffix, Intrinsic Intr> {
def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, Int32Regs:$b),
"mapa" # suffix # ".u32\t$d, $a, $b;",
- [(set Int32Regs:$d, (Intr Int32Regs:$a, Int32Regs:$b))]>,
+ [(set i32:$d, (Intr i32:$a, i32:$b))]>,
Requires<[hasSM<90>, hasPTX<78>]>;
def _32i: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, i32imm:$b),
"mapa" # suffix # ".u32\t$d, $a, $b;",
- [(set Int32Regs:$d, (Intr Int32Regs:$a, imm:$b))]>,
+ [(set i32:$d, (Intr i32:$a, imm:$b))]>,
Requires<[hasSM<90>, hasPTX<78>]>;
def _64: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, Int32Regs:$b),
"mapa" # suffix # ".u64\t$d, $a, $b;",
- [(set Int64Regs:$d, (Intr Int64Regs:$a, Int32Regs:$b))]>,
+ [(set i64:$d, (Intr i64:$a, i32:$b))]>,
Requires<[hasSM<90>, hasPTX<78>]>;
def _64i: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, i32imm:$b),
"mapa" # suffix # ".u64\t$d, $a, $b;",
- [(set Int64Regs:$d, (Intr Int64Regs:$a, imm:$b))]>,
+ [(set i64:$d, (Intr i64:$a, imm:$b))]>,
Requires<[hasSM<90>, hasPTX<78>]>;
}
@@ -6877,11 +7525,11 @@ defm mapa_shared_cluster : MAPA<".shared::cluster", int_nvvm_mapa_shared_cluste
multiclass GETCTARANK<string suffix, Intrinsic Intr> {
def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
"getctarank" # suffix # ".u32\t$d, $a;",
- [(set Int32Regs:$d, (Intr Int32Regs:$a))]>,
+ [(set i32:$d, (Intr i32:$a))]>,
Requires<[hasSM<90>, hasPTX<78>]>;
def _64: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"getctarank" # suffix # ".u64\t$d, $a;",
- [(set Int32Regs:$d, (Intr Int64Regs:$a))]>,
+ [(set i32:$d, (Intr i64:$a))]>,
Requires<[hasSM<90>, hasPTX<78>]>;
}
@@ -6890,7 +7538,7 @@ defm getctarank_shared_cluster : GETCTARANK<".shared::cluster", int_nvvm_getcta
def is_explicit_cluster: NVPTXInst<(outs Int1Regs:$d), (ins),
"mov.pred\t$d, %is_explicit_cluster;",
- [(set Int1Regs:$d, (int_nvvm_is_explicit_cluster))]>,
+ [(set i1:$d, (int_nvvm_is_explicit_cluster))]>,
Requires<[hasSM<90>, hasPTX<78>]>;
// setmaxnreg inc/dec intrinsics
@@ -6907,4 +7555,30 @@ defm INT_SET_MAXNREG_DEC : SET_MAXNREG<"dec", int_nvvm_setmaxnreg_dec_sync_align
} // isConvergent
+//
+// WGMMA fence instructions
+//
+let isConvergent = true in {
+def INT_NVVM_WGMMA_FENCE_SYNC_ALIGNED : NVPTXInst<(outs), (ins), "wgmma.fence.sync.aligned;",
+ [(int_nvvm_wgmma_fence_sync_aligned)]>, Requires<[hasSM90a, hasPTX<80>]>;
+
+def INT_NVVM_WGMMA_COMMIT_GROUP_SYNC_ALIGNED : NVPTXInst<(outs), (ins), "wgmma.commit_group.sync.aligned;",
+ [(int_nvvm_wgmma_commit_group_sync_aligned)]>, Requires<[hasSM90a, hasPTX<80>]>;
+
+def INT_NVVM_WGMMA_WAIT_GROUP_SYNC_ALIGNED : NVPTXInst<(outs), (ins i64imm:$n), "wgmma.wait_group.sync.aligned \t$n;",
+ [(int_nvvm_wgmma_wait_group_sync_aligned timm:$n)]>, Requires<[hasSM90a, hasPTX<80>]>;
+} // isConvergent = true
+
+def GRIDDEPCONTROL_LAUNCH_DEPENDENTS :
+ NVPTXInst<(outs), (ins),
+ "griddepcontrol.launch_dependents;",
+ [(int_nvvm_griddepcontrol_launch_dependents)]>,
+ Requires<[hasSM<90>, hasPTX<78>]>;
+
+def GRIDDEPCONTROL_WAIT :
+ NVPTXInst<(outs), (ins),
+ "griddepcontrol.wait;",
+ [(int_nvvm_griddepcontrol_wait)]>,
+ Requires<[hasSM<90>, hasPTX<78>]>;
+
def INT_EXIT : NVPTXInst<(outs), (ins), "exit;", [(int_nvvm_exit)]>;