diff options
Diffstat (limited to 'llvm/lib/Target/NVPTX/NVPTXIntrinsics.td')
| -rw-r--r-- | llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 2860 |
1 files changed, 1767 insertions, 1093 deletions
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index c81dfa68e4bd..56d8b734bf01 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -69,17 +69,17 @@ def INT_BARRIER0 : NVPTXInst<(outs), (ins), [(int_nvvm_barrier0)]>; def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1), "bar.sync \t$src1;", - [(int_nvvm_barrier_n Int32Regs:$src1)]>; + [(int_nvvm_barrier_n i32:$src1)]>; def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2), "bar.sync \t$src1, $src2;", - [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>; + [(int_nvvm_barrier i32:$src1, i32:$src2)]>; def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), !strconcat("{{ \n\t", ".reg .pred \t%p1; \n\t", "setp.ne.u32 \t%p1, $pred, 0; \n\t", "bar.red.popc.u32 \t$dst, 0, %p1; \n\t", "}}"), - [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>; + [(set i32:$dst, (int_nvvm_barrier0_popc i32:$pred))]>; def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), !strconcat("{{ \n\t", ".reg .pred \t%p1; \n\t", @@ -88,7 +88,7 @@ def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), "bar.red.and.pred \t%p2, 0, %p1; \n\t", "selp.u32 \t$dst, 1, 0, %p2; \n\t", "}}"), - [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>; + [(set i32:$dst, (int_nvvm_barrier0_and i32:$pred))]>; def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), !strconcat("{{ \n\t", ".reg .pred \t%p1; \n\t", @@ -97,7 +97,7 @@ def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), "bar.red.or.pred \t%p2, 0, %p1; \n\t", "selp.u32 \t$dst, 1, 0, %p2; \n\t", "}}"), - [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>; + [(set i32:$dst, (int_nvvm_barrier0_or i32:$pred))]>; def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;", [(int_nvvm_bar_sync imm:$i)]>; @@ -106,27 +106,27 @@ def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i [(int_nvvm_bar_warp_sync imm:$i)]>, Requires<[hasPTX<60>, hasSM<30>]>; def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;", - [(int_nvvm_bar_warp_sync Int32Regs:$i)]>, + [(int_nvvm_bar_warp_sync i32:$i)]>, Requires<[hasPTX<60>, hasSM<30>]>; def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;", [(int_nvvm_barrier_sync imm:$i)]>, Requires<[hasPTX<60>, hasSM<30>]>; def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;", - [(int_nvvm_barrier_sync Int32Regs:$i)]>, + [(int_nvvm_barrier_sync i32:$i)]>, Requires<[hasPTX<60>, hasSM<30>]>; def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt), "barrier.sync \t$id, $cnt;", - [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>, + [(int_nvvm_barrier_sync_cnt i32:$id, i32:$cnt)]>, Requires<[hasPTX<60>, hasSM<30>]>; def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt), "barrier.sync \t$id, $cnt;", - [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>, + [(int_nvvm_barrier_sync_cnt i32:$id, imm:$cnt)]>, Requires<[hasPTX<60>, hasSM<30>]>; def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt), "barrier.sync \t$id, $cnt;", - [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>, + [(int_nvvm_barrier_sync_cnt imm:$id, i32:$cnt)]>, Requires<[hasPTX<60>, hasSM<30>]>; def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt), "barrier.sync \t$id, $cnt;", @@ -217,7 +217,7 @@ foreach sync = [false, true] in { multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> { def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred), "vote." # mode # " \t$dest, $pred;", - [(set regclass:$dest, (IntOp Int1Regs:$pred))]>, + [(set regclass:$dest, (IntOp i1:$pred))]>, Requires<[hasPTX<60>, hasSM<30>]>; } @@ -230,11 +230,11 @@ defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>; multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> { def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred), "vote.sync." # mode # " \t$dest, $pred, $mask;", - [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>, + [(set regclass:$dest, (IntOp imm:$mask, i1:$pred))]>, Requires<[hasPTX<60>, hasSM<30>]>; def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred), "vote.sync." # mode #" \t$dest, $pred, $mask;", - [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>, + [(set regclass:$dest, (IntOp i32:$mask, i1:$pred))]>, Requires<[hasPTX<60>, hasSM<30>]>; } @@ -243,30 +243,40 @@ defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>; defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>; defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>; +// elect.sync +def INT_ELECT_SYNC_I : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), (ins i32imm:$mask), + "elect.sync \t$dest|$pred, $mask;", + [(set i32:$dest, i1:$pred, (int_nvvm_elect_sync imm:$mask))]>, + Requires<[hasPTX<80>, hasSM<90>]>; +def INT_ELECT_SYNC_R : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), (ins Int32Regs:$mask), + "elect.sync \t$dest|$pred, $mask;", + [(set i32:$dest, i1:$pred, (int_nvvm_elect_sync i32:$mask))]>, + Requires<[hasPTX<80>, hasSM<90>]>; + multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp, Operand ImmOp> { def ii : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, ImmOp:$value), "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", - [(set Int32Regs:$dest, (IntOp imm:$mask, imm:$value))]>, + [(set i32:$dest, (IntOp imm:$mask, imm:$value))]>, Requires<[hasPTX<60>, hasSM<70>]>; def ir : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, ImmOp:$value), "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", - [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, imm:$value))]>, + [(set i32:$dest, (IntOp i32:$mask, imm:$value))]>, Requires<[hasPTX<60>, hasSM<70>]>; def ri : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, regclass:$value), "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", - [(set Int32Regs:$dest, (IntOp imm:$mask, regclass:$value))]>, + [(set i32:$dest, (IntOp imm:$mask, regclass:$value))]>, Requires<[hasPTX<60>, hasSM<70>]>; def rr : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, regclass:$value), "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", - [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>, + [(set i32:$dest, (IntOp i32:$mask, regclass:$value))]>, Requires<[hasPTX<60>, hasSM<70>]>; } // activemask.b32 def ACTIVEMASK : NVPTXInst<(outs Int32Regs:$dest), (ins), "activemask.b32 \t$dest;", - [(set Int32Regs:$dest, (int_nvvm_activemask))]>, + [(set i32:$dest, (int_nvvm_activemask))]>, Requires<[hasPTX<62>, hasSM<30>]>; defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32, @@ -279,22 +289,22 @@ multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic Int def ii : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), (ins i32imm:$mask, ImmOp:$value), "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", - [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>, + [(set i32:$dest, i1:$pred, (IntOp imm:$mask, imm:$value))]>, Requires<[hasPTX<60>, hasSM<70>]>; def ir : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), (ins Int32Regs:$mask, ImmOp:$value), "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", - [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>, + [(set i32:$dest, i1:$pred, (IntOp i32:$mask, imm:$value))]>, Requires<[hasPTX<60>, hasSM<70>]>; def ri : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), (ins i32imm:$mask, regclass:$value), "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", - [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>, + [(set i32:$dest, i1:$pred, (IntOp imm:$mask, regclass:$value))]>, Requires<[hasPTX<60>, hasSM<70>]>; def rr : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), (ins Int32Regs:$mask, regclass:$value), "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", - [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>, + [(set i32:$dest, i1:$pred, (IntOp i32:$mask, regclass:$value))]>, Requires<[hasPTX<60>, hasSM<70>]>; } defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p, @@ -305,7 +315,7 @@ defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_s multiclass REDUX_SYNC<string BinOp, string PTXType, Intrinsic Intrin> { def : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$mask), "redux.sync." # BinOp # "." # PTXType # " $dst, $src, $mask;", - [(set Int32Regs:$dst, (Intrin Int32Regs:$src, Int32Regs:$mask))]>, + [(set i32:$dst, (Intrin i32:$src, Int32Regs:$mask))]>, Requires<[hasPTX<70>, hasSM<80>]>; } @@ -335,6 +345,48 @@ def INT_FENCE_SC_CLUSTER: MEMBAR<"fence.sc.cluster;", int_nvvm_fence_sc_cluster>, Requires<[hasPTX<78>, hasSM<90>]>; +// Proxy fence (uni-directional) +// fence.proxy.tensormap.release variants + +class FENCE_PROXY_TENSORMAP_GENERIC_RELEASE<string Scope, Intrinsic Intr> : + NVPTXInst<(outs), (ins), + "fence.proxy.tensormap::generic.release." # Scope # ";", [(Intr)]>, + Requires<[hasPTX<83>, hasSM<90>]>; + +def INT_FENCE_PROXY_TENSORMAP_GENERIC_RELEASE_CTA: + FENCE_PROXY_TENSORMAP_GENERIC_RELEASE<"cta", + int_nvvm_fence_proxy_tensormap_generic_release_cta>; +def INT_FENCE_PROXY_TENSORMAP_GENERIC_RELEASE_CLUSTER: + FENCE_PROXY_TENSORMAP_GENERIC_RELEASE<"cluster", + int_nvvm_fence_proxy_tensormap_generic_release_cluster>; +def INT_FENCE_PROXY_TENSORMAP_GENERIC_RELEASE_GPU: + FENCE_PROXY_TENSORMAP_GENERIC_RELEASE<"gpu", + int_nvvm_fence_proxy_tensormap_generic_release_gpu>; +def INT_FENCE_PROXY_TENSORMAP_GENERIC_RELEASE_SYS: + FENCE_PROXY_TENSORMAP_GENERIC_RELEASE<"sys", + int_nvvm_fence_proxy_tensormap_generic_release_sys>; + +// fence.proxy.tensormap.acquire variants + +class FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE<string Scope, Intrinsic Intr> : + NVPTXInst<(outs), (ins Int64Regs:$addr), + "fence.proxy.tensormap::generic.acquire." # Scope # " [$addr], 128;", + [(Intr i64:$addr, (i32 128))]>, + Requires<[hasPTX<83>, hasSM<90>]>; + +def INT_FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE_CTA : + FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE<"cta", + int_nvvm_fence_proxy_tensormap_generic_acquire_cta>; +def INT_FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE_CLUSTER : + FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE<"cluster", + int_nvvm_fence_proxy_tensormap_generic_acquire_cluster>; +def INT_FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE_GPU : + FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE<"gpu", + int_nvvm_fence_proxy_tensormap_generic_acquire_gpu>; +def INT_FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE_SYS : + FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE<"sys", + int_nvvm_fence_proxy_tensormap_generic_acquire_sys>; + //----------------------------------- // Async Copy Functions //----------------------------------- @@ -342,11 +394,11 @@ def INT_FENCE_SC_CLUSTER: multiclass CP_ASYNC_MBARRIER_ARRIVE<string NoInc, string AddrSpace, Intrinsic Intrin> { def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr), !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"), - [(Intrin Int32Regs:$addr)]>, + [(Intrin i32:$addr)]>, Requires<[hasPTX<70>, hasSM<80>]>; def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr), !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"), - [(Intrin Int64Regs:$addr)]>, + [(Intrin i64:$addr)]>, Requires<[hasPTX<70>, hasSM<80>]>; } @@ -362,28 +414,28 @@ defm CP_ASYNC_MBARRIER_ARRIVE_NOINC_SHARED : multiclass CP_ASYNC_SHARED_GLOBAL_I<string cc, string cpsize, Intrinsic Intrin, Intrinsic IntrinS> { def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src), !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"), - [(Intrin Int32Regs:$dst, Int32Regs:$src)]>, + [(Intrin i32:$dst, i32:$src)]>, Requires<[hasPTX<70>, hasSM<80>]>; def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src), !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"), - [(Intrin Int64Regs:$dst, Int64Regs:$src)]>, + [(Intrin i64:$dst, i64:$src)]>, Requires<[hasPTX<70>, hasSM<80>]>; // Variant with src_size parameter def _32s : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, Int32Regs:$src_size), !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"), - [(IntrinS Int32Regs:$dst, Int32Regs:$src, Int32Regs:$src_size)]>, + [(IntrinS i32:$dst, i32:$src, i32:$src_size)]>, Requires<[hasPTX<70>, hasSM<80>]>; def _32si: NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, i32imm:$src_size), !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"), - [(IntrinS Int32Regs:$dst, Int32Regs:$src, imm:$src_size)]>, + [(IntrinS i32:$dst, i32:$src, imm:$src_size)]>, Requires<[hasPTX<70>, hasSM<80>]>; def _64s : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, Int32Regs:$src_size), !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"), - [(IntrinS Int64Regs:$dst, Int64Regs:$src, Int32Regs:$src_size)]>, + [(IntrinS i64:$dst, i64:$src, i32:$src_size)]>, Requires<[hasPTX<70>, hasSM<80>]>; def _64si: NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, i32imm:$src_size), !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"), - [(IntrinS Int64Regs:$dst, Int64Regs:$src, imm:$src_size)]>, + [(IntrinS i64:$dst, i64:$src, imm:$src_size)]>, Requires<[hasPTX<70>, hasSM<80>]>; } @@ -409,7 +461,7 @@ def CP_ASYNC_COMMIT_GROUP : def CP_ASYNC_WAIT_GROUP : NVPTXInst<(outs), (ins i32imm:$n), "cp.async.wait_group $n;", - [(int_nvvm_cp_async_wait_group (i32 timm:$n))]>, + [(int_nvvm_cp_async_wait_group timm:$n)]>, Requires<[hasPTX<70>, hasSM<80>]>; def CP_ASYNC_WAIT_ALL : @@ -425,14 +477,269 @@ def CP_ASYNC_BULK_COMMIT_GROUP : def CP_ASYNC_BULK_WAIT_GROUP : NVPTXInst<(outs), (ins i32imm:$n), "cp.async.bulk.wait_group $n;", - [(int_nvvm_cp_async_bulk_wait_group (i32 timm:$n))]>, + [(int_nvvm_cp_async_bulk_wait_group timm:$n)]>, Requires<[hasPTX<80>, hasSM<90>]>; def CP_ASYNC_BULK_WAIT_GROUP_READ : NVPTXInst<(outs), (ins i32imm:$n), "cp.async.bulk.wait_group.read $n;", - [(int_nvvm_cp_async_bulk_wait_group_read (i32 timm:$n))]>, + [(int_nvvm_cp_async_bulk_wait_group_read timm:$n)]>, Requires<[hasPTX<80>, hasSM<90>]>; +//------------------------------ +// TMA Async Bulk Copy Functions +//------------------------------ + +class CpAsyncBulkStr<bit mc, bit ch> { + // Shared to Global memory + string S2G = "cp.async.bulk.global.shared::cta.bulk_group" + # !if(ch, ".L2::cache_hint", ""); + + // Global to Shared cluster memory + string G2S = "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes" + # !if(mc, ".multicast::cluster", "") + # !if(ch, ".L2::cache_hint", ""); + + // Shared CTA to Cluster memory + string C2C = "cp.async.bulk.shared::cluster.shared::cta.mbarrier::complete_tx::bytes"; +} + +multiclass CP_ASYNC_BULK_S2G<NVPTXRegClass rc> { + def NAME: NVPTXInst<(outs), + (ins Int64Regs:$dst, rc:$src, Int32Regs:$size), + !strconcat(CpAsyncBulkStr<0, 0>.S2G, " [$dst], [$src], $size;"), []>, + Requires<[hasPTX<80>, hasSM<90>]>; + def NAME # _CH: NVPTXInst<(outs), + (ins Int64Regs:$dst, rc:$src, Int32Regs:$size, Int64Regs:$ch), + !strconcat(CpAsyncBulkStr<0, 1>.S2G, " [$dst], [$src], $size, $ch;"), []>, + Requires<[hasPTX<80>, hasSM<90>]>; +} +defm CP_ASYNC_BULK_S2G : CP_ASYNC_BULK_S2G<Int64Regs>; +defm CP_ASYNC_BULK_S2G_SHARED32 : CP_ASYNC_BULK_S2G<Int32Regs>; + +multiclass CP_ASYNC_BULK_G2S<NVPTXRegClass rc> { + def NAME: NVPTXInst<(outs), + (ins rc:$dst, rc:$mbar, Int64Regs:$src, Int32Regs:$size), + !strconcat(CpAsyncBulkStr<0, 0>.G2S, " [$dst], [$src], $size, [$mbar];"), []>, + Requires<[hasPTX<80>, hasSM<90>]>; + def NAME # _MC: NVPTXInst<(outs), + (ins rc:$dst, rc:$mbar, Int64Regs:$src, Int32Regs:$size, Int16Regs:$mc), + !strconcat(CpAsyncBulkStr<1, 0>.G2S, " [$dst], [$src], $size, [$mbar], $mc;"), []>, + Requires<[hasPTX<80>, hasSM<90>]>; + def NAME # _CH: NVPTXInst<(outs), + (ins rc:$dst, rc:$mbar, Int64Regs:$src, Int32Regs:$size, Int64Regs:$ch), + !strconcat(CpAsyncBulkStr<0, 1>.G2S, " [$dst], [$src], $size, [$mbar], $ch;"), []>, + Requires<[hasPTX<80>, hasSM<90>]>; + def NAME # _MC_CH: NVPTXInst<(outs), + (ins rc:$dst, rc:$mbar, Int64Regs:$src, Int32Regs:$size, Int16Regs:$mc, Int64Regs:$ch), + !strconcat(CpAsyncBulkStr<1, 1>.G2S, " [$dst], [$src], $size, [$mbar], $mc, $ch;"), []>, + Requires<[hasPTX<80>, hasSM<90>]>; +} +defm CP_ASYNC_BULK_G2S : CP_ASYNC_BULK_G2S<Int64Regs>; +defm CP_ASYNC_BULK_G2S_SHARED32 : CP_ASYNC_BULK_G2S<Int32Regs>; + +multiclass CP_ASYNC_BULK_CTA_TO_CLUSTER<NVPTXRegClass rc> { + def NAME: NVPTXInst<(outs), + (ins rc:$dst, rc:$mbar, rc:$src, Int32Regs:$size), + !strconcat(CpAsyncBulkStr<0, 0>.C2C, " [$dst], [$src], $size, [$mbar];"), + [(int_nvvm_cp_async_bulk_shared_cta_to_cluster rc:$dst, rc:$mbar, rc:$src, Int32Regs:$size)]>, + Requires<[hasPTX<80>, hasSM<90>]>; +} +defm CP_ASYNC_BULK_CTA_TO_CLUSTER : CP_ASYNC_BULK_CTA_TO_CLUSTER<Int64Regs>; +defm CP_ASYNC_BULK_CTA_TO_CLUSTER_SHARED32 : CP_ASYNC_BULK_CTA_TO_CLUSTER<Int32Regs>; + +//------------------------------ +// Bulk Copy Prefetch Functions +//------------------------------ +def CP_ASYNC_BULK_PREFETCH : NVPTXInst<(outs), + (ins Int64Regs:$src, Int32Regs:$size), + "cp.async.bulk.prefetch.L2.global [$src], $size;", []>, + Requires<[hasPTX<80>, hasSM<90>]>; + +def CP_ASYNC_BULK_PREFETCH_CH : NVPTXInst<(outs), + (ins Int64Regs:$src, Int32Regs:$size, Int64Regs:$ch), + "cp.async.bulk.prefetch.L2.global.L2::cache_hint [$src], $size, $ch;", []>, + Requires<[hasPTX<80>, hasSM<90>]>; +//------------------------------------- +// TMA Async Bulk Tensor Copy Functions +//------------------------------------- + +// From Global to Shared memory (G2S) +class G2S_STRINGS<int dim, string mode, bit mc, bit ch, bit is_shared32 = 0> { + string prefix = "cp.async.bulk.tensor"; + string dir = "shared::cluster.global"; + string completion = "mbarrier::complete_tx::bytes"; + string inst_name = prefix + # "." # dim # "d" + # "." # dir + # "." # mode + # "." # completion + # !if(mc, ".multicast::cluster", "") + # !if(ch, ".L2::cache_hint", ""); + string intr_name = "CP_ASYNC_BULK_TENSOR_G2S_" + # dim # "D" + # !if(is_shared32, "_SHARED32", "") + # !if(!eq(mode, "tile"), "_TILE", "_IM2COL"); +} + +multiclass CP_ASYNC_BULK_TENSOR_G2S_INTR<int dim, bit is_shared32, string mode> { + defvar dims_dag = !dag(ins, !listsplat(Int32Regs, dim), !foreach(i, !range(dim), "d" # i)); + defvar dims_str = !interleave(!foreach(i, !range(dim), "$d" # i), ", "); + defvar asm_str_default = " [$dst], [$tmap, {{" # dims_str # "}}], [$mbar]"; + defvar rc = !if(is_shared32, Int32Regs, Int64Regs); + + defvar num_im2col = !if(!ge(dim, 3), !add(dim, -2), 0); + defvar im2col_dag = !if(!eq(mode, "im2col"), + !dag(ins, !listsplat(Int16Regs, num_im2col), !foreach(i, !range(num_im2col), "im2col" # i)), + (ins)); + defvar im2col_str = !interleave(!foreach(i, !range(num_im2col), "$im2col" # i), ", "); + defvar im2col_asm_str = ", {{" # im2col_str # "}}"; + + defvar asm_str = !if(!eq(mode, "im2col"), + !strconcat(asm_str_default, im2col_asm_str), asm_str_default); + + def NAME: NVPTXInst<(outs), + !con((ins rc:$dst, rc:$mbar, Int64Regs:$tmap), dims_dag, im2col_dag), + !strconcat(G2S_STRINGS<dim, mode, 0, 0>.inst_name, asm_str, ";"), []>, + Requires<[hasPTX<80>, hasSM<90>]>; + def NAME # _MC: NVPTXInst<(outs), + !con((ins rc:$dst, rc:$mbar, Int64Regs:$tmap), dims_dag, im2col_dag, (ins Int16Regs:$mc)), + !strconcat(G2S_STRINGS<dim, mode, 1, 0>.inst_name, asm_str, ", $mc;"), []>, + Requires<[hasPTX<80>, hasSM<90>]>; + def NAME # _CH: NVPTXInst<(outs), + !con((ins rc:$dst, rc:$mbar, Int64Regs:$tmap), dims_dag, im2col_dag, (ins Int64Regs:$ch)), + !strconcat(G2S_STRINGS<dim, mode, 0, 1>.inst_name, asm_str, ", $ch;"), []>, + Requires<[hasPTX<80>, hasSM<90>]>; + def NAME # _MC_CH: NVPTXInst<(outs), + !con((ins rc:$dst, rc:$mbar, Int64Regs:$tmap), dims_dag, im2col_dag, (ins Int16Regs:$mc, Int64Regs:$ch)), + !strconcat(G2S_STRINGS<dim, mode, 1, 1>.inst_name, asm_str, ", $mc, $ch;"), []>, + Requires<[hasPTX<80>, hasSM<90>]>; +} + +foreach dim = [1, 2, 3, 4, 5] in { + foreach shared32 = [true, false] in { + foreach mode = !if(!ge(dim, 3), ["tile", "im2col"], ["tile"]) in { + defm G2S_STRINGS<dim, mode, 0, 0, shared32>.intr_name : + CP_ASYNC_BULK_TENSOR_G2S_INTR<dim, shared32, mode>; + } + } +} + +// From Shared to Global memory (S2G) +class S2G_STRINGS<int dim, string mode, bit ch, + bit is_shared32 = 0, bit is_reduce = 0> { + string dir = "global.shared::cta"; + string completion = "bulk_group"; + string inst_name = !if(is_reduce, "cp.reduce", "cp") + # ".async.bulk.tensor" + # "." # dim # "d" + # "." # dir + # "." # mode + # "." # completion + # !if(ch, ".L2::cache_hint", ""); + string intr_name = "CP_ASYNC_BULK_TENSOR_" + # !if(is_reduce, "RED_", "S2G_") + # dim # "D" + # !if(is_shared32, "_SHARED32", "") + # !if(!eq(mode, "tile"), "_TILE", "_IM2COL"); +} + +multiclass CP_ASYNC_BULK_TENSOR_S2G_INTR<int dim, bit shared32, string mode> { + defvar dims_dag = !dag(ins, !listsplat(Int32Regs, dim), !foreach(i, !range(dim), "d" # i)); + defvar dims_str = !interleave(!foreach(i, !range(dim), "$d" # i), ", "); + defvar asm_str = " [$tmap, {{" # dims_str # "}}], [$src]"; + defvar rc = !if(shared32, Int32Regs, Int64Regs); + + def NAME: NVPTXInst<(outs), + !con((ins rc:$src, Int64Regs:$tmap), dims_dag), + !strconcat(S2G_STRINGS<dim, mode, 0>.inst_name, asm_str, ";"), []>, + Requires<[hasPTX<80>, hasSM<90>]>; + def NAME # _CH: NVPTXInst<(outs), + !con((ins rc:$src, Int64Regs:$tmap), dims_dag, (ins Int64Regs:$ch)), + !strconcat(S2G_STRINGS<dim, mode, 1>.inst_name, asm_str, ", $ch;"), []>, + Requires<[hasPTX<80>, hasSM<90>]>; +} + +def TMAReductionFlags : Operand<i32> { + let PrintMethod = "printTmaReductionMode"; +} + +// TMA Copy from Shared to Global memory with Reduction +multiclass CP_ASYNC_BULK_TENSOR_REDUCE_INTR<int dim, bit shared32, string mode> { + defvar dims_dag = !dag(ins, !listsplat(Int32Regs, dim), !foreach(i, !range(dim), "d" # i)); + defvar dims_str = !interleave(!foreach(i, !range(dim), "$d" # i), ", "); + defvar asm_str = " [$tmap, {{" # dims_str # "}}], [$src]"; + defvar rc = !if(shared32, Int32Regs, Int64Regs); + + defvar prefix = "cp.reduce.async.bulk.tensor" # "." # dim # "d" # ".global.shared::cta"; + defvar suffix = "." # mode # ".bulk_group"; + + def NAME: NVPTXInst<(outs), + !con((ins rc:$src, Int64Regs:$tmap), dims_dag, (ins TMAReductionFlags:$red_op)), + !strconcat(prefix, "${red_op}", suffix, asm_str, ";"), []>, + Requires<[hasPTX<80>, hasSM<90>]>; + def NAME # _CH: NVPTXInst<(outs), + !con((ins rc:$src, Int64Regs:$tmap), dims_dag, (ins Int64Regs:$ch, TMAReductionFlags:$red_op)), + !strconcat(prefix, "${red_op}", suffix, ".L2::cache_hint", asm_str, ", $ch;"), []>, + Requires<[hasPTX<80>, hasSM<90>]>; +} + +foreach dim = [1, 2, 3, 4, 5] in { + foreach shared32 = [true, false] in { + foreach mode = !if(!ge(dim, 3), ["tile", "im2col_no_offs"], ["tile"]) in { + defm S2G_STRINGS<dim, mode, 0, shared32>.intr_name : + CP_ASYNC_BULK_TENSOR_S2G_INTR<dim, shared32, mode>; + defm S2G_STRINGS<dim, mode, 0, shared32, 1>.intr_name : + CP_ASYNC_BULK_TENSOR_REDUCE_INTR<dim, shared32, mode>; + } + } +} + +// TMA Prefetch from Global memory to L2 cache +class PREFETCH_STRINGS<int dim, string mode, bit ch> { + string prefix = "cp.async.bulk.prefetch.tensor"; + string dir = "L2.global"; + string inst_name = prefix + # "." # dim # "d" + # "." # dir + # "." # mode + # !if(ch, ".L2::cache_hint", ""); + string intr_name = "CP_ASYNC_BULK_TENSOR_PREFETCH_" + # dim # "D" + # !if(!eq(mode, "tile"), "_TILE", "_IM2COL"); +} + +multiclass CP_ASYNC_BULK_TENSOR_PREFETCH_INTR<int dim, string mode> { + defvar dims_dag = !dag(ins, !listsplat(Int32Regs, dim), !foreach(i, !range(dim), "d" # i)); + defvar dims_str = !interleave(!foreach(i, !range(dim), "$d" # i), ", "); + defvar asm_str_default = " [$tmap, {{" # dims_str # "}}]"; + + defvar num_im2col = !if(!ge(dim, 3), !add(dim, -2), 0); + defvar im2col_dag = !if(!eq(mode, "im2col"), + !dag(ins, !listsplat(Int16Regs, num_im2col), !foreach(i, !range(num_im2col), "im2col" # i)), + (ins)); + defvar im2col_str = !interleave(!foreach(i, !range(num_im2col), "$im2col" # i), ", "); + defvar im2col_asm_str = ", {{" # im2col_str # "}}"; + + defvar asm_str = !if(!eq(mode, "im2col"), + !strconcat(asm_str_default, im2col_asm_str), asm_str_default); + + def NAME: NVPTXInst<(outs), + !con((ins Int64Regs:$tmap), dims_dag, im2col_dag), + !strconcat(PREFETCH_STRINGS<dim, mode, 0>.inst_name, asm_str, ";"), []>, + Requires<[hasPTX<80>, hasSM<90>]>; + def NAME # _CH: NVPTXInst<(outs), + !con((ins Int64Regs:$tmap), dims_dag, im2col_dag, (ins Int64Regs:$ch)), + !strconcat(PREFETCH_STRINGS<dim, mode, 1>.inst_name, asm_str, ", $ch;"), []>, + Requires<[hasPTX<80>, hasSM<90>]>; +} + +foreach dim = [1, 2, 3, 4, 5] in { + foreach mode = !if(!ge(dim, 3), ["tile", "im2col"], ["tile"]) in { + defm PREFETCH_STRINGS<dim, mode, 0>.intr_name : + CP_ASYNC_BULK_TENSOR_PREFETCH_INTR<dim, mode>; + } +} + //----------------------------------- // MBarrier Functions //----------------------------------- @@ -440,11 +747,11 @@ def CP_ASYNC_BULK_WAIT_GROUP_READ : multiclass MBARRIER_INIT<string AddrSpace, Intrinsic Intrin> { def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr, Int32Regs:$count), !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"), - [(Intrin Int32Regs:$addr, Int32Regs:$count)]>, + [(Intrin i32:$addr, i32:$count)]>, Requires<[hasPTX<70>, hasSM<80>]>; def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr, Int32Regs:$count), !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"), - [(Intrin Int64Regs:$addr, Int32Regs:$count)]>, + [(Intrin i64:$addr, i32:$count)]>, Requires<[hasPTX<70>, hasSM<80>]>; } @@ -455,11 +762,11 @@ defm MBARRIER_INIT_SHARED : MBARRIER_INIT<".shared", multiclass MBARRIER_INVAL<string AddrSpace, Intrinsic Intrin> { def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr), !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"), - [(Intrin Int32Regs:$addr)]>, + [(Intrin i32:$addr)]>, Requires<[hasPTX<70>, hasSM<80>]>; def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr), !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"), - [(Intrin Int64Regs:$addr)]>, + [(Intrin i64:$addr)]>, Requires<[hasPTX<70>, hasSM<80>]>; } @@ -470,11 +777,11 @@ defm MBARRIER_INVAL_SHARED : MBARRIER_INVAL<".shared", multiclass MBARRIER_ARRIVE<string AddrSpace, Intrinsic Intrin> { def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr), !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"), - [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>, + [(set i64:$state, (Intrin i32:$addr))]>, Requires<[hasPTX<70>, hasSM<80>]>; def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr), !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"), - [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>, + [(set i64:$state, (Intrin i64:$addr))]>, Requires<[hasPTX<70>, hasSM<80>]>; } @@ -487,13 +794,13 @@ multiclass MBARRIER_ARRIVE_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> { (ins Int32Regs:$addr, Int32Regs:$count), !strconcat("mbarrier.arrive.noComplete", AddrSpace, ".b64 $state, [$addr], $count;"), - [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>, + [(set i64:$state, (Intrin i32:$addr, i32:$count))]>, Requires<[hasPTX<70>, hasSM<80>]>; def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr, Int32Regs:$count), !strconcat("mbarrier.arrive.noComplete", AddrSpace, ".b64 $state, [$addr], $count;"), - [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>, + [(set i64:$state, (Intrin i64:$addr, i32:$count))]>, Requires<[hasPTX<70>, hasSM<80>]>; } @@ -506,12 +813,12 @@ multiclass MBARRIER_ARRIVE_DROP<string AddrSpace, Intrinsic Intrin> { def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr), !strconcat("mbarrier.arrive_drop", AddrSpace, ".b64 $state, [$addr];"), - [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>, + [(set i64:$state, (Intrin i32:$addr))]>, Requires<[hasPTX<70>, hasSM<80>]>; def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr), !strconcat("mbarrier.arrive_drop", AddrSpace, ".b64 $state, [$addr];"), - [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>, + [(set i64:$state, (Intrin i64:$addr))]>, Requires<[hasPTX<70>, hasSM<80>]>; } @@ -525,13 +832,13 @@ multiclass MBARRIER_ARRIVE_DROP_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> { (ins Int32Regs:$addr, Int32Regs:$count), !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace, ".b64 $state, [$addr], $count;"), - [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>, + [(set i64:$state, (Intrin i32:$addr, i32:$count))]>, Requires<[hasPTX<70>, hasSM<80>]>; def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr, Int32Regs:$count), !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace, ".b64 $state, [$addr], $count;"), - [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>, + [(set i64:$state, (Intrin i64:$addr, i32:$count))]>, Requires<[hasPTX<70>, hasSM<80>]>; } @@ -544,11 +851,11 @@ defm MBARRIER_ARRIVE_DROP_NOCOMPLETE_SHARED : multiclass MBARRIER_TEST_WAIT<string AddrSpace, Intrinsic Intrin> { def _32 : NVPTXInst<(outs Int1Regs:$res), (ins Int32Regs:$addr, Int64Regs:$state), !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"), - [(set Int1Regs:$res, (Intrin Int32Regs:$addr, Int64Regs:$state))]>, + [(set i1:$res, (Intrin i32:$addr, i64:$state))]>, Requires<[hasPTX<70>, hasSM<80>]>; def _64 : NVPTXInst<(outs Int1Regs:$res), (ins Int64Regs:$addr, Int64Regs:$state), !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"), - [(set Int1Regs:$res, (Intrin Int64Regs:$addr, Int64Regs:$state))]>, + [(set i1:$res, (Intrin i64:$addr, i64:$state))]>, Requires<[hasPTX<70>, hasSM<80>]>; } @@ -560,7 +867,7 @@ defm MBARRIER_TEST_WAIT_SHARED : class MBARRIER_PENDING_COUNT<Intrinsic Intrin> : NVPTXInst<(outs Int32Regs:$res), (ins Int64Regs:$state), "mbarrier.pending_count.b64 $res, $state;", - [(set Int32Regs:$res, (Intrin Int64Regs:$state))]>, + [(set i32:$res, (Intrin i64:$state))]>, Requires<[hasPTX<70>, hasSM<80>]>; def MBARRIER_PENDING_COUNT : @@ -577,30 +884,30 @@ def MBARRIER_PENDING_COUNT : // Same story for fmax, fmin. def : Pat<(int_nvvm_fmin_f immFloat1, - (int_nvvm_fmax_f immFloat0, Float32Regs:$a)), - (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; + (int_nvvm_fmax_f immFloat0, f32:$a)), + (CVT_f32_f32 $a, CvtSAT)>; def : Pat<(int_nvvm_fmin_f immFloat1, - (int_nvvm_fmax_f Float32Regs:$a, immFloat0)), - (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; + (int_nvvm_fmax_f f32:$a, immFloat0)), + (CVT_f32_f32 $a, CvtSAT)>; def : Pat<(int_nvvm_fmin_f - (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1), - (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; + (int_nvvm_fmax_f immFloat0, f32:$a), immFloat1), + (CVT_f32_f32 $a, CvtSAT)>; def : Pat<(int_nvvm_fmin_f - (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1), - (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; + (int_nvvm_fmax_f f32:$a, immFloat0), immFloat1), + (CVT_f32_f32 $a, CvtSAT)>; def : Pat<(int_nvvm_fmin_d immDouble1, - (int_nvvm_fmax_d immDouble0, Float64Regs:$a)), - (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; + (int_nvvm_fmax_d immDouble0, f64:$a)), + (CVT_f64_f64 $a, CvtSAT)>; def : Pat<(int_nvvm_fmin_d immDouble1, - (int_nvvm_fmax_d Float64Regs:$a, immDouble0)), - (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; + (int_nvvm_fmax_d f64:$a, immDouble0)), + (CVT_f64_f64 $a, CvtSAT)>; def : Pat<(int_nvvm_fmin_d - (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1), - (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; + (int_nvvm_fmax_d immDouble0, f64:$a), immDouble1), + (CVT_f64_f64 $a, CvtSAT)>; def : Pat<(int_nvvm_fmin_d - (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1), - (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; + (int_nvvm_fmax_d f64:$a, immDouble0), immDouble1), + (CVT_f64_f64 $a, CvtSAT)>; // We need a full string for OpcStr here because we need to deal with case like @@ -644,7 +951,7 @@ def INT_NVVM_NANOSLEEP_I : NVPTXInst<(outs), (ins i32imm:$i), "nanosleep.u32 \t$ [(int_nvvm_nanosleep imm:$i)]>, Requires<[hasPTX<63>, hasSM<70>]>; def INT_NVVM_NANOSLEEP_R : NVPTXInst<(outs), (ins Int32Regs:$i), "nanosleep.u32 \t$i;", - [(int_nvvm_nanosleep Int32Regs:$i)]>, + [(int_nvvm_nanosleep i32:$i)]>, Requires<[hasPTX<63>, hasSM<70>]>; // // Min Max @@ -878,6 +1185,18 @@ def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;", def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;", Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>; +def : Pat<(int_nvvm_div_full f32:$a, f32:$b), + (FDIV32rr $a, $b)>; + +def : Pat<(int_nvvm_div_full f32:$a, fpimm:$b), + (FDIV32ri $a, f32imm:$b)>; + +def : Pat<(int_nvvm_div_full_ftz f32:$a, f32:$b), + (FDIV32rr_ftz $a, $b)>; + +def : Pat<(int_nvvm_div_full_ftz f32:$a, fpimm:$b), + (FDIV32ri_ftz $a, f32imm:$b)>; + // // Sad // @@ -899,19 +1218,19 @@ def INT_NVVM_SAD_ULL : F_MATH_3<"sad.u64 \t$dst, $src0, $src1, $src2;", // Floor Ceil // -def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a), - (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>; -def : Pat<(int_nvvm_floor_f Float32Regs:$a), - (CVT_f32_f32 Float32Regs:$a, CvtRMI)>; -def : Pat<(int_nvvm_floor_d Float64Regs:$a), - (CVT_f64_f64 Float64Regs:$a, CvtRMI)>; +def : Pat<(int_nvvm_floor_ftz_f f32:$a), + (CVT_f32_f32 $a, CvtRMI_FTZ)>; +def : Pat<(int_nvvm_floor_f f32:$a), + (CVT_f32_f32 $a, CvtRMI)>; +def : Pat<(int_nvvm_floor_d f64:$a), + (CVT_f64_f64 $a, CvtRMI)>; -def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a), - (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>; -def : Pat<(int_nvvm_ceil_f Float32Regs:$a), - (CVT_f32_f32 Float32Regs:$a, CvtRPI)>; -def : Pat<(int_nvvm_ceil_d Float64Regs:$a), - (CVT_f64_f64 Float64Regs:$a, CvtRPI)>; +def : Pat<(int_nvvm_ceil_ftz_f f32:$a), + (CVT_f32_f32 $a, CvtRPI_FTZ)>; +def : Pat<(int_nvvm_ceil_f f32:$a), + (CVT_f32_f32 $a, CvtRPI)>; +def : Pat<(int_nvvm_ceil_d f64:$a), + (CVT_f64_f64 $a, CvtRPI)>; // // Abs @@ -926,6 +1245,22 @@ def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs, Float64Regs, int_nvvm_fabs_d>; // +// copysign +// + +def fcopysign_nvptx : SDNode<"NVPTXISD::FCOPYSIGN", SDTFPBinOp>; + +def COPYSIGN_F : + NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src0, Float32Regs:$src1), + "copysign.f32 \t$dst, $src0, $src1;", + [(set f32:$dst, (fcopysign_nvptx f32:$src1, f32:$src0))]>; + +def COPYSIGN_D : + NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src0, Float64Regs:$src1), + "copysign.f64 \t$dst, $src0, $src1;", + [(set f64:$dst, (fcopysign_nvptx f64:$src1, f64:$src0))]>; + +// // Abs, Neg bf16, bf16x2 // @@ -942,34 +1277,34 @@ def INT_NVVM_NEG_BF16X2 : F_MATH_1<"neg.bf16x2 \t$dst, $src0;", Int32Regs, // Round // -def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a), - (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>; -def : Pat<(int_nvvm_round_f Float32Regs:$a), - (CVT_f32_f32 Float32Regs:$a, CvtRNI)>; -def : Pat<(int_nvvm_round_d Float64Regs:$a), - (CVT_f64_f64 Float64Regs:$a, CvtRNI)>; +def : Pat<(int_nvvm_round_ftz_f f32:$a), + (CVT_f32_f32 $a, CvtRNI_FTZ)>; +def : Pat<(int_nvvm_round_f f32:$a), + (CVT_f32_f32 $a, CvtRNI)>; +def : Pat<(int_nvvm_round_d f64:$a), + (CVT_f64_f64 $a, CvtRNI)>; // // Trunc // -def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a), - (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>; -def : Pat<(int_nvvm_trunc_f Float32Regs:$a), - (CVT_f32_f32 Float32Regs:$a, CvtRZI)>; -def : Pat<(int_nvvm_trunc_d Float64Regs:$a), - (CVT_f64_f64 Float64Regs:$a, CvtRZI)>; +def : Pat<(int_nvvm_trunc_ftz_f f32:$a), + (CVT_f32_f32 $a, CvtRZI_FTZ)>; +def : Pat<(int_nvvm_trunc_f f32:$a), + (CVT_f32_f32 $a, CvtRZI)>; +def : Pat<(int_nvvm_trunc_d f64:$a), + (CVT_f64_f64 $a, CvtRZI)>; // // Saturate // -def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a), - (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>; -def : Pat<(int_nvvm_saturate_f Float32Regs:$a), - (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; -def : Pat<(int_nvvm_saturate_d Float64Regs:$a), - (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; +def : Pat<(int_nvvm_saturate_ftz_f f32:$a), + (CVT_f32_f32 $a, CvtSAT_FTZ)>; +def : Pat<(int_nvvm_saturate_f f32:$a), + (CVT_f32_f32 $a, CvtSAT)>; +def : Pat<(int_nvvm_saturate_d f64:$a), + (CVT_f64_f64 $a, CvtSAT)>; // // Exp2 Log2 @@ -981,11 +1316,21 @@ def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;", Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>; def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;", Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>; + def INT_NVVM_EX2_APPROX_F16 : F_MATH_1<"ex2.approx.f16 \t$dst, $src0;", Int16Regs, Int16Regs, int_nvvm_ex2_approx_f16, [hasPTX<70>, hasSM<75>]>; def INT_NVVM_EX2_APPROX_F16X2 : F_MATH_1<"ex2.approx.f16x2 \t$dst, $src0;", Int32Regs, Int32Regs, int_nvvm_ex2_approx_f16x2, [hasPTX<70>, hasSM<75>]>; +def : Pat<(fexp2 f32:$a), + (INT_NVVM_EX2_APPROX_FTZ_F $a)>, Requires<[doF32FTZ]>; +def : Pat<(fexp2 f32:$a), + (INT_NVVM_EX2_APPROX_F $a)>, Requires<[doNoF32FTZ]>; +def : Pat<(fexp2 f16:$a), + (INT_NVVM_EX2_APPROX_F16 $a)>, Requires<[useFP16Math]>; +def : Pat<(fexp2 v2f16:$a), + (INT_NVVM_EX2_APPROX_F16X2 $a)>, Requires<[useFP16Math]>; + def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>; def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;", @@ -993,6 +1338,11 @@ def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;", def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;", Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>; +def : Pat<(flog2 f32:$a), (INT_NVVM_LG2_APPROX_FTZ_F $a)>, + Requires<[doF32FTZ]>; +def : Pat<(flog2 f32:$a), (INT_NVVM_LG2_APPROX_F $a)>, + Requires<[doNoF32FTZ]>; + // // Sin Cos // @@ -1155,14 +1505,14 @@ def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs, Float64Regs, int_nvvm_sqrt_rp_d>; // nvvm_sqrt intrinsic -def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), - (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>; -def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), - (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>; -def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), - (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>; -def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), - (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>; +def : Pat<(int_nvvm_sqrt_f f32:$a), + (INT_NVVM_SQRT_RN_FTZ_F $a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>; +def : Pat<(int_nvvm_sqrt_f f32:$a), + (INT_NVVM_SQRT_RN_F $a)>, Requires<[do_SQRTF32_RN]>; +def : Pat<(int_nvvm_sqrt_f f32:$a), + (INT_NVVM_SQRT_APPROX_FTZ_F $a)>, Requires<[doF32FTZ]>; +def : Pat<(int_nvvm_sqrt_f f32:$a), + (INT_NVVM_SQRT_APPROX_F $a)>; // // Rsqrt @@ -1181,25 +1531,25 @@ def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;", Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>; // 1.0f / sqrt_approx -> rsqrt_approx -def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_approx_f Float32Regs:$a)), - (INT_NVVM_RSQRT_APPROX_F Float32Regs:$a)>, +def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_approx_f f32:$a)), + (INT_NVVM_RSQRT_APPROX_F $a)>, Requires<[doRsqrtOpt]>; -def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_approx_ftz_f Float32Regs:$a)), - (INT_NVVM_RSQRT_APPROX_FTZ_F Float32Regs:$a)>, +def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_approx_ftz_f f32:$a)), + (INT_NVVM_RSQRT_APPROX_FTZ_F $a)>, Requires<[doRsqrtOpt]>; // same for int_nvvm_sqrt_f when non-precision sqrt is requested -def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f Float32Regs:$a)), - (INT_NVVM_RSQRT_APPROX_F Float32Regs:$a)>, +def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f f32:$a)), + (INT_NVVM_RSQRT_APPROX_F $a)>, Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doNoF32FTZ]>; -def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f Float32Regs:$a)), - (INT_NVVM_RSQRT_APPROX_FTZ_F Float32Regs:$a)>, +def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f f32:$a)), + (INT_NVVM_RSQRT_APPROX_FTZ_F $a)>, Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doF32FTZ]>; -def: Pat<(fdiv FloatConst1, (fsqrt Float32Regs:$a)), - (INT_NVVM_RSQRT_APPROX_F Float32Regs:$a)>, +def: Pat<(fdiv FloatConst1, (fsqrt f32:$a)), + (INT_NVVM_RSQRT_APPROX_F $a)>, Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doNoF32FTZ]>; -def: Pat<(fdiv FloatConst1, (fsqrt Float32Regs:$a)), - (INT_NVVM_RSQRT_APPROX_FTZ_F Float32Regs:$a)>, +def: Pat<(fdiv FloatConst1, (fsqrt f32:$a)), + (INT_NVVM_RSQRT_APPROX_FTZ_F $a)>, Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doF32FTZ]>; // // Add @@ -1232,145 +1582,159 @@ def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;", Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>; // +// BFIND +// + +foreach t = [I32RT, I64RT] in { + foreach sign = ["s", "u"] in { + defvar flo_intrin = !cast<Intrinsic>("int_nvvm_flo_" # sign); + def BFIND_ # sign # t.Size + : NVPTXInst<(outs Int32Regs:$dst), (ins t.RC:$src), + "bfind." # sign # t.Size # " \t$dst, $src;", + [(set i32:$dst, (flo_intrin t.Ty:$src, 0))]>; + + def BFIND_SHIFTAMT_ # sign # t.Size + : NVPTXInst<(outs Int32Regs:$dst), (ins t.RC:$src), + "bfind.shiftamt." # sign # t.Size # " \t$dst, $src;", + [(set i32:$dst, (flo_intrin t.Ty:$src, -1))]>; + } +} + +// // Convert // -def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a), - (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>; -def : Pat<(int_nvvm_d2f_rn Float64Regs:$a), - (CVT_f32_f64 Float64Regs:$a, CvtRN)>; -def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a), - (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>; -def : Pat<(int_nvvm_d2f_rz Float64Regs:$a), - (CVT_f32_f64 Float64Regs:$a, CvtRZ)>; -def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a), - (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>; -def : Pat<(int_nvvm_d2f_rm Float64Regs:$a), - (CVT_f32_f64 Float64Regs:$a, CvtRM)>; -def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a), - (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>; -def : Pat<(int_nvvm_d2f_rp Float64Regs:$a), - (CVT_f32_f64 Float64Regs:$a, CvtRP)>; - -def : Pat<(int_nvvm_d2i_rn Float64Regs:$a), - (CVT_s32_f64 Float64Regs:$a, CvtRNI)>; -def : Pat<(int_nvvm_d2i_rz Float64Regs:$a), - (CVT_s32_f64 Float64Regs:$a, CvtRZI)>; -def : Pat<(int_nvvm_d2i_rm Float64Regs:$a), - (CVT_s32_f64 Float64Regs:$a, CvtRMI)>; -def : Pat<(int_nvvm_d2i_rp Float64Regs:$a), - (CVT_s32_f64 Float64Regs:$a, CvtRPI)>; - -def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a), - (CVT_u32_f64 Float64Regs:$a, CvtRNI)>; -def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a), - (CVT_u32_f64 Float64Regs:$a, CvtRZI)>; -def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a), - (CVT_u32_f64 Float64Regs:$a, CvtRMI)>; -def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a), - (CVT_u32_f64 Float64Regs:$a, CvtRPI)>; - -def : Pat<(int_nvvm_i2d_rn Int32Regs:$a), - (CVT_f64_s32 Int32Regs:$a, CvtRN)>; -def : Pat<(int_nvvm_i2d_rz Int32Regs:$a), - (CVT_f64_s32 Int32Regs:$a, CvtRZ)>; -def : Pat<(int_nvvm_i2d_rm Int32Regs:$a), - (CVT_f64_s32 Int32Regs:$a, CvtRM)>; -def : Pat<(int_nvvm_i2d_rp Int32Regs:$a), - (CVT_f64_s32 Int32Regs:$a, CvtRP)>; - -def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a), - (CVT_f64_u32 Int32Regs:$a, CvtRN)>; -def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a), - (CVT_f64_u32 Int32Regs:$a, CvtRZ)>; -def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a), - (CVT_f64_u32 Int32Regs:$a, CvtRM)>; -def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a), - (CVT_f64_u32 Int32Regs:$a, CvtRP)>; - -def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a), - (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>; -def : Pat<(int_nvvm_f2i_rn Float32Regs:$a), - (CVT_s32_f32 Float32Regs:$a, CvtRNI)>; -def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a), - (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>; -def : Pat<(int_nvvm_f2i_rz Float32Regs:$a), - (CVT_s32_f32 Float32Regs:$a, CvtRZI)>; -def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a), - (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>; -def : Pat<(int_nvvm_f2i_rm Float32Regs:$a), - (CVT_s32_f32 Float32Regs:$a, CvtRMI)>; -def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a), - (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>; -def : Pat<(int_nvvm_f2i_rp Float32Regs:$a), - (CVT_s32_f32 Float32Regs:$a, CvtRPI)>; - -def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a), - (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>; -def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a), - (CVT_u32_f32 Float32Regs:$a, CvtRNI)>; -def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a), - (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>; -def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a), - (CVT_u32_f32 Float32Regs:$a, CvtRZI)>; -def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a), - (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>; -def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a), - (CVT_u32_f32 Float32Regs:$a, CvtRMI)>; -def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a), - (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>; -def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a), - (CVT_u32_f32 Float32Regs:$a, CvtRPI)>; - -def : Pat<(int_nvvm_i2f_rn Int32Regs:$a), - (CVT_f32_s32 Int32Regs:$a, CvtRN)>; -def : Pat<(int_nvvm_i2f_rz Int32Regs:$a), - (CVT_f32_s32 Int32Regs:$a, CvtRZ)>; -def : Pat<(int_nvvm_i2f_rm Int32Regs:$a), - (CVT_f32_s32 Int32Regs:$a, CvtRM)>; -def : Pat<(int_nvvm_i2f_rp Int32Regs:$a), - (CVT_f32_s32 Int32Regs:$a, CvtRP)>; - -def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a), - (CVT_f32_u32 Int32Regs:$a, CvtRN)>; -def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a), - (CVT_f32_u32 Int32Regs:$a, CvtRZ)>; -def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a), - (CVT_f32_u32 Int32Regs:$a, CvtRM)>; -def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a), - (CVT_f32_u32 Int32Regs:$a, CvtRP)>; - -def : Pat<(int_nvvm_ff2bf16x2_rn Float32Regs:$a, Float32Regs:$b), - (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>; -def : Pat<(int_nvvm_ff2bf16x2_rn_relu Float32Regs:$a, Float32Regs:$b), - (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>; -def : Pat<(int_nvvm_ff2bf16x2_rz Float32Regs:$a, Float32Regs:$b), - (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>; -def : Pat<(int_nvvm_ff2bf16x2_rz_relu Float32Regs:$a, Float32Regs:$b), - (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>; - -def : Pat<(int_nvvm_ff2f16x2_rn Float32Regs:$a, Float32Regs:$b), - (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>; -def : Pat<(int_nvvm_ff2f16x2_rn_relu Float32Regs:$a, Float32Regs:$b), - (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>; -def : Pat<(int_nvvm_ff2f16x2_rz Float32Regs:$a, Float32Regs:$b), - (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>; -def : Pat<(int_nvvm_ff2f16x2_rz_relu Float32Regs:$a, Float32Regs:$b), - (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>; - -def : Pat<(int_nvvm_f2bf16_rn Float32Regs:$a), - (CVT_bf16_f32 Float32Regs:$a, CvtRN)>; -def : Pat<(int_nvvm_f2bf16_rn_relu Float32Regs:$a), - (CVT_bf16_f32 Float32Regs:$a, CvtRN_RELU)>; -def : Pat<(int_nvvm_f2bf16_rz Float32Regs:$a), - (CVT_bf16_f32 Float32Regs:$a, CvtRZ)>; -def : Pat<(int_nvvm_f2bf16_rz_relu Float32Regs:$a), - (CVT_bf16_f32 Float32Regs:$a, CvtRZ_RELU)>; - -def CVT_tf32_f32 : - NVPTXInst<(outs Int32Regs:$dest), (ins Float32Regs:$a), - "cvt.rna.tf32.f32 \t$dest, $a;", - [(set Int32Regs:$dest, (int_nvvm_f2tf32_rna Float32Regs:$a))]>; +def : Pat<(int_nvvm_d2f_rn_ftz f64:$a), + (CVT_f32_f64 $a, CvtRN_FTZ)>; +def : Pat<(int_nvvm_d2f_rn f64:$a), + (CVT_f32_f64 $a, CvtRN)>; +def : Pat<(int_nvvm_d2f_rz_ftz f64:$a), + (CVT_f32_f64 $a, CvtRZ_FTZ)>; +def : Pat<(int_nvvm_d2f_rz f64:$a), + (CVT_f32_f64 $a, CvtRZ)>; +def : Pat<(int_nvvm_d2f_rm_ftz f64:$a), + (CVT_f32_f64 $a, CvtRM_FTZ)>; +def : Pat<(int_nvvm_d2f_rm f64:$a), + (CVT_f32_f64 $a, CvtRM)>; +def : Pat<(int_nvvm_d2f_rp_ftz f64:$a), + (CVT_f32_f64 $a, CvtRP_FTZ)>; +def : Pat<(int_nvvm_d2f_rp f64:$a), + (CVT_f32_f64 $a, CvtRP)>; + +def : Pat<(int_nvvm_d2i_rn f64:$a), + (CVT_s32_f64 $a, CvtRNI)>; +def : Pat<(int_nvvm_d2i_rz f64:$a), + (CVT_s32_f64 $a, CvtRZI)>; +def : Pat<(int_nvvm_d2i_rm f64:$a), + (CVT_s32_f64 $a, CvtRMI)>; +def : Pat<(int_nvvm_d2i_rp f64:$a), + (CVT_s32_f64 $a, CvtRPI)>; + +def : Pat<(int_nvvm_d2ui_rn f64:$a), + (CVT_u32_f64 $a, CvtRNI)>; +def : Pat<(int_nvvm_d2ui_rz f64:$a), + (CVT_u32_f64 $a, CvtRZI)>; +def : Pat<(int_nvvm_d2ui_rm f64:$a), + (CVT_u32_f64 $a, CvtRMI)>; +def : Pat<(int_nvvm_d2ui_rp f64:$a), + (CVT_u32_f64 $a, CvtRPI)>; + +def : Pat<(int_nvvm_i2d_rn i32:$a), + (CVT_f64_s32 $a, CvtRN)>; +def : Pat<(int_nvvm_i2d_rz i32:$a), + (CVT_f64_s32 $a, CvtRZ)>; +def : Pat<(int_nvvm_i2d_rm i32:$a), + (CVT_f64_s32 $a, CvtRM)>; +def : Pat<(int_nvvm_i2d_rp i32:$a), + (CVT_f64_s32 $a, CvtRP)>; + +def : Pat<(int_nvvm_ui2d_rn i32:$a), + (CVT_f64_u32 $a, CvtRN)>; +def : Pat<(int_nvvm_ui2d_rz i32:$a), + (CVT_f64_u32 $a, CvtRZ)>; +def : Pat<(int_nvvm_ui2d_rm i32:$a), + (CVT_f64_u32 $a, CvtRM)>; +def : Pat<(int_nvvm_ui2d_rp i32:$a), + (CVT_f64_u32 $a, CvtRP)>; + +def : Pat<(int_nvvm_f2i_rn_ftz f32:$a), + (CVT_s32_f32 $a, CvtRNI_FTZ)>; +def : Pat<(int_nvvm_f2i_rn f32:$a), + (CVT_s32_f32 $a, CvtRNI)>; +def : Pat<(int_nvvm_f2i_rz_ftz f32:$a), + (CVT_s32_f32 $a, CvtRZI_FTZ)>; +def : Pat<(int_nvvm_f2i_rz f32:$a), + (CVT_s32_f32 $a, CvtRZI)>; +def : Pat<(int_nvvm_f2i_rm_ftz f32:$a), + (CVT_s32_f32 $a, CvtRMI_FTZ)>; +def : Pat<(int_nvvm_f2i_rm f32:$a), + (CVT_s32_f32 $a, CvtRMI)>; +def : Pat<(int_nvvm_f2i_rp_ftz f32:$a), + (CVT_s32_f32 $a, CvtRPI_FTZ)>; +def : Pat<(int_nvvm_f2i_rp f32:$a), + (CVT_s32_f32 $a, CvtRPI)>; + +def : Pat<(int_nvvm_f2ui_rn_ftz f32:$a), + (CVT_u32_f32 $a, CvtRNI_FTZ)>; +def : Pat<(int_nvvm_f2ui_rn f32:$a), + (CVT_u32_f32 $a, CvtRNI)>; +def : Pat<(int_nvvm_f2ui_rz_ftz f32:$a), + (CVT_u32_f32 $a, CvtRZI_FTZ)>; +def : Pat<(int_nvvm_f2ui_rz f32:$a), + (CVT_u32_f32 $a, CvtRZI)>; +def : Pat<(int_nvvm_f2ui_rm_ftz f32:$a), + (CVT_u32_f32 $a, CvtRMI_FTZ)>; +def : Pat<(int_nvvm_f2ui_rm f32:$a), + (CVT_u32_f32 $a, CvtRMI)>; +def : Pat<(int_nvvm_f2ui_rp_ftz f32:$a), + (CVT_u32_f32 $a, CvtRPI_FTZ)>; +def : Pat<(int_nvvm_f2ui_rp f32:$a), + (CVT_u32_f32 $a, CvtRPI)>; + +def : Pat<(int_nvvm_i2f_rn i32:$a), + (CVT_f32_s32 $a, CvtRN)>; +def : Pat<(int_nvvm_i2f_rz i32:$a), + (CVT_f32_s32 $a, CvtRZ)>; +def : Pat<(int_nvvm_i2f_rm i32:$a), + (CVT_f32_s32 $a, CvtRM)>; +def : Pat<(int_nvvm_i2f_rp i32:$a), + (CVT_f32_s32 $a, CvtRP)>; + +def : Pat<(int_nvvm_ui2f_rn i32:$a), + (CVT_f32_u32 $a, CvtRN)>; +def : Pat<(int_nvvm_ui2f_rz i32:$a), + (CVT_f32_u32 $a, CvtRZ)>; +def : Pat<(int_nvvm_ui2f_rm i32:$a), + (CVT_f32_u32 $a, CvtRM)>; +def : Pat<(int_nvvm_ui2f_rp i32:$a), + (CVT_f32_u32 $a, CvtRP)>; + +def : Pat<(int_nvvm_ff2bf16x2_rn f32:$a, f32:$b), + (CVT_bf16x2_f32 $a, $b, CvtRN)>; +def : Pat<(int_nvvm_ff2bf16x2_rn_relu f32:$a, f32:$b), + (CVT_bf16x2_f32 $a, $b, CvtRN_RELU)>; +def : Pat<(int_nvvm_ff2bf16x2_rz f32:$a, f32:$b), + (CVT_bf16x2_f32 $a, $b, CvtRZ)>; +def : Pat<(int_nvvm_ff2bf16x2_rz_relu f32:$a, f32:$b), + (CVT_bf16x2_f32 $a, $b, CvtRZ_RELU)>; + +def : Pat<(int_nvvm_ff2f16x2_rn f32:$a, f32:$b), + (CVT_f16x2_f32 $a, $b, CvtRN)>; +def : Pat<(int_nvvm_ff2f16x2_rn_relu f32:$a, f32:$b), + (CVT_f16x2_f32 $a, $b, CvtRN_RELU)>; +def : Pat<(int_nvvm_ff2f16x2_rz f32:$a, f32:$b), + (CVT_f16x2_f32 $a, $b, CvtRZ)>; +def : Pat<(int_nvvm_ff2f16x2_rz_relu f32:$a, f32:$b), + (CVT_f16x2_f32 $a, $b, CvtRZ_RELU)>; + +def : Pat<(int_nvvm_f2bf16_rn f32:$a), + (CVT_bf16_f32 $a, CvtRN)>; +def : Pat<(int_nvvm_f2bf16_rn_relu f32:$a), + (CVT_bf16_f32 $a, CvtRN_RELU)>; +def : Pat<(int_nvvm_f2bf16_rz f32:$a), + (CVT_bf16_f32 $a, CvtRZ)>; +def : Pat<(int_nvvm_f2bf16_rz_relu f32:$a), + (CVT_bf16_f32 $a, CvtRZ_RELU)>; def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};", Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>; @@ -1388,113 +1752,126 @@ def INT_NVVM_D2I_HI : F_MATH_1< "}}"), Int32Regs, Float64Regs, int_nvvm_d2i_hi>; -def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a), - (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>; -def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a), - (CVT_s64_f32 Float32Regs:$a, CvtRNI)>; -def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a), - (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>; -def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a), - (CVT_s64_f32 Float32Regs:$a, CvtRZI)>; -def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a), - (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>; -def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a), - (CVT_s64_f32 Float32Regs:$a, CvtRMI)>; -def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a), - (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>; -def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a), - (CVT_s64_f32 Float32Regs:$a, CvtRPI)>; - -def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a), - (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>; -def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a), - (CVT_u64_f32 Float32Regs:$a, CvtRNI)>; -def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a), - (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>; -def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a), - (CVT_u64_f32 Float32Regs:$a, CvtRZI)>; -def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a), - (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>; -def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a), - (CVT_u64_f32 Float32Regs:$a, CvtRMI)>; -def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a), - (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>; -def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a), - (CVT_u64_f32 Float32Regs:$a, CvtRPI)>; - -def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a), - (CVT_s64_f64 Float64Regs:$a, CvtRNI)>; -def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a), - (CVT_s64_f64 Float64Regs:$a, CvtRZI)>; -def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a), - (CVT_s64_f64 Float64Regs:$a, CvtRMI)>; -def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a), - (CVT_s64_f64 Float64Regs:$a, CvtRPI)>; - -def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a), - (CVT_u64_f64 Float64Regs:$a, CvtRNI)>; -def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a), - (CVT_u64_f64 Float64Regs:$a, CvtRZI)>; -def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a), - (CVT_u64_f64 Float64Regs:$a, CvtRMI)>; -def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a), - (CVT_u64_f64 Float64Regs:$a, CvtRPI)>; - -def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a), - (CVT_f32_s64 Int64Regs:$a, CvtRN)>; -def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a), - (CVT_f32_s64 Int64Regs:$a, CvtRZ)>; -def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a), - (CVT_f32_s64 Int64Regs:$a, CvtRM)>; -def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a), - (CVT_f32_s64 Int64Regs:$a, CvtRP)>; - -def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a), - (CVT_f32_u64 Int64Regs:$a, CvtRN)>; -def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a), - (CVT_f32_u64 Int64Regs:$a, CvtRZ)>; -def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a), - (CVT_f32_u64 Int64Regs:$a, CvtRM)>; -def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a), - (CVT_f32_u64 Int64Regs:$a, CvtRP)>; - -def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a), - (CVT_f64_s64 Int64Regs:$a, CvtRN)>; -def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a), - (CVT_f64_s64 Int64Regs:$a, CvtRZ)>; -def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a), - (CVT_f64_s64 Int64Regs:$a, CvtRM)>; -def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a), - (CVT_f64_s64 Int64Regs:$a, CvtRP)>; - -def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a), - (CVT_f64_u64 Int64Regs:$a, CvtRN)>; -def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a), - (CVT_f64_u64 Int64Regs:$a, CvtRZ)>; -def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a), - (CVT_f64_u64 Int64Regs:$a, CvtRM)>; -def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a), - (CVT_f64_u64 Int64Regs:$a, CvtRP)>; - - -def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a), - (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>; -def : Pat<(int_nvvm_f2h_rn Float32Regs:$a), - (CVT_f16_f32 Float32Regs:$a, CvtRN)>; - -// -// Bitcast -// - -def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs, - Float32Regs, int_nvvm_bitcast_f2i>; -def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs, - Int32Regs, int_nvvm_bitcast_i2f>; - -def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs, - Int64Regs, int_nvvm_bitcast_ll2d>; -def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs, - Float64Regs, int_nvvm_bitcast_d2ll>; +def : Pat<(int_nvvm_f2ll_rn_ftz f32:$a), + (CVT_s64_f32 $a, CvtRNI_FTZ)>; +def : Pat<(int_nvvm_f2ll_rn f32:$a), + (CVT_s64_f32 $a, CvtRNI)>; +def : Pat<(int_nvvm_f2ll_rz_ftz f32:$a), + (CVT_s64_f32 $a, CvtRZI_FTZ)>; +def : Pat<(int_nvvm_f2ll_rz f32:$a), + (CVT_s64_f32 $a, CvtRZI)>; +def : Pat<(int_nvvm_f2ll_rm_ftz f32:$a), + (CVT_s64_f32 $a, CvtRMI_FTZ)>; +def : Pat<(int_nvvm_f2ll_rm f32:$a), + (CVT_s64_f32 $a, CvtRMI)>; +def : Pat<(int_nvvm_f2ll_rp_ftz f32:$a), + (CVT_s64_f32 $a, CvtRPI_FTZ)>; +def : Pat<(int_nvvm_f2ll_rp f32:$a), + (CVT_s64_f32 $a, CvtRPI)>; + +def : Pat<(int_nvvm_f2ull_rn_ftz f32:$a), + (CVT_u64_f32 $a, CvtRNI_FTZ)>; +def : Pat<(int_nvvm_f2ull_rn f32:$a), + (CVT_u64_f32 $a, CvtRNI)>; +def : Pat<(int_nvvm_f2ull_rz_ftz f32:$a), + (CVT_u64_f32 $a, CvtRZI_FTZ)>; +def : Pat<(int_nvvm_f2ull_rz f32:$a), + (CVT_u64_f32 $a, CvtRZI)>; +def : Pat<(int_nvvm_f2ull_rm_ftz f32:$a), + (CVT_u64_f32 $a, CvtRMI_FTZ)>; +def : Pat<(int_nvvm_f2ull_rm f32:$a), + (CVT_u64_f32 $a, CvtRMI)>; +def : Pat<(int_nvvm_f2ull_rp_ftz f32:$a), + (CVT_u64_f32 $a, CvtRPI_FTZ)>; +def : Pat<(int_nvvm_f2ull_rp f32:$a), + (CVT_u64_f32 $a, CvtRPI)>; + +def : Pat<(int_nvvm_d2ll_rn f64:$a), + (CVT_s64_f64 $a, CvtRNI)>; +def : Pat<(int_nvvm_d2ll_rz f64:$a), + (CVT_s64_f64 $a, CvtRZI)>; +def : Pat<(int_nvvm_d2ll_rm f64:$a), + (CVT_s64_f64 $a, CvtRMI)>; +def : Pat<(int_nvvm_d2ll_rp f64:$a), + (CVT_s64_f64 $a, CvtRPI)>; + +def : Pat<(int_nvvm_d2ull_rn f64:$a), + (CVT_u64_f64 $a, CvtRNI)>; +def : Pat<(int_nvvm_d2ull_rz f64:$a), + (CVT_u64_f64 $a, CvtRZI)>; +def : Pat<(int_nvvm_d2ull_rm f64:$a), + (CVT_u64_f64 $a, CvtRMI)>; +def : Pat<(int_nvvm_d2ull_rp f64:$a), + (CVT_u64_f64 $a, CvtRPI)>; + +def : Pat<(int_nvvm_ll2f_rn i64:$a), + (CVT_f32_s64 $a, CvtRN)>; +def : Pat<(int_nvvm_ll2f_rz i64:$a), + (CVT_f32_s64 $a, CvtRZ)>; +def : Pat<(int_nvvm_ll2f_rm i64:$a), + (CVT_f32_s64 $a, CvtRM)>; +def : Pat<(int_nvvm_ll2f_rp i64:$a), + (CVT_f32_s64 $a, CvtRP)>; + +def : Pat<(int_nvvm_ull2f_rn i64:$a), + (CVT_f32_u64 $a, CvtRN)>; +def : Pat<(int_nvvm_ull2f_rz i64:$a), + (CVT_f32_u64 $a, CvtRZ)>; +def : Pat<(int_nvvm_ull2f_rm i64:$a), + (CVT_f32_u64 $a, CvtRM)>; +def : Pat<(int_nvvm_ull2f_rp i64:$a), + (CVT_f32_u64 $a, CvtRP)>; + +def : Pat<(int_nvvm_ll2d_rn i64:$a), + (CVT_f64_s64 $a, CvtRN)>; +def : Pat<(int_nvvm_ll2d_rz i64:$a), + (CVT_f64_s64 $a, CvtRZ)>; +def : Pat<(int_nvvm_ll2d_rm i64:$a), + (CVT_f64_s64 $a, CvtRM)>; +def : Pat<(int_nvvm_ll2d_rp i64:$a), + (CVT_f64_s64 $a, CvtRP)>; + +def : Pat<(int_nvvm_ull2d_rn i64:$a), + (CVT_f64_u64 $a, CvtRN)>; +def : Pat<(int_nvvm_ull2d_rz i64:$a), + (CVT_f64_u64 $a, CvtRZ)>; +def : Pat<(int_nvvm_ull2d_rm i64:$a), + (CVT_f64_u64 $a, CvtRM)>; +def : Pat<(int_nvvm_ull2d_rp i64:$a), + (CVT_f64_u64 $a, CvtRP)>; + + +def : Pat<(int_nvvm_f2h_rn_ftz f32:$a), + (CVT_f16_f32 $a, CvtRN_FTZ)>; +def : Pat<(int_nvvm_f2h_rn f32:$a), + (CVT_f16_f32 $a, CvtRN)>; + +def : Pat<(int_nvvm_ff_to_e4m3x2_rn f32:$a, f32:$b), + (CVT_e4m3x2_f32 $a, $b, CvtRN)>; +def : Pat<(int_nvvm_ff_to_e4m3x2_rn_relu f32:$a, f32:$b), + (CVT_e4m3x2_f32 $a, $b, CvtRN_RELU)>; +def : Pat<(int_nvvm_ff_to_e5m2x2_rn f32:$a, f32:$b), + (CVT_e5m2x2_f32 $a, $b, CvtRN)>; +def : Pat<(int_nvvm_ff_to_e5m2x2_rn_relu f32:$a, f32:$b), + (CVT_e5m2x2_f32 $a, $b, CvtRN_RELU)>; + +def : Pat<(int_nvvm_f16x2_to_e4m3x2_rn Int32Regs:$a), + (CVT_e4m3x2_f16x2 $a, CvtRN)>; +def : Pat<(int_nvvm_f16x2_to_e4m3x2_rn_relu Int32Regs:$a), + (CVT_e4m3x2_f16x2 $a, CvtRN_RELU)>; +def : Pat<(int_nvvm_f16x2_to_e5m2x2_rn Int32Regs:$a), + (CVT_e5m2x2_f16x2 $a, CvtRN)>; +def : Pat<(int_nvvm_f16x2_to_e5m2x2_rn_relu Int32Regs:$a), + (CVT_e5m2x2_f16x2 $a, CvtRN_RELU)>; + +def : Pat<(int_nvvm_e4m3x2_to_f16x2_rn Int16Regs:$a), + (CVT_f16x2_e4m3x2 $a, CvtRN)>; +def : Pat<(int_nvvm_e4m3x2_to_f16x2_rn_relu Int16Regs:$a), + (CVT_f16x2_e4m3x2 $a, CvtRN_RELU)>; +def : Pat<(int_nvvm_e5m2x2_to_f16x2_rn Int16Regs:$a), + (CVT_f16x2_e5m2x2 $a, CvtRN)>; +def : Pat<(int_nvvm_e5m2x2_to_f16x2_rn_relu Int16Regs:$a), + (CVT_f16x2_e5m2x2 $a, CvtRN_RELU)>; // // FNS @@ -1503,23 +1880,23 @@ def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs, class INT_FNS_MBO<dag ins, dag Operands> : NVPTXInst<(outs Int32Regs:$dst), ins, "fns.b32 \t$dst, $mask, $base, $offset;", - [(set Int32Regs:$dst, Operands )]>, + [(set i32:$dst, Operands)]>, Requires<[hasPTX<60>, hasSM<30>]>; def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset), - (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>; + (int_nvvm_fns i32:$mask, i32:$base, i32:$offset)>; def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, i32imm:$offset), - (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, imm:$offset)>; + (int_nvvm_fns i32:$mask, i32:$base, imm:$offset)>; def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, Int32Regs:$offset), - (int_nvvm_fns Int32Regs:$mask, imm:$base, Int32Regs:$offset)>; + (int_nvvm_fns i32:$mask, imm:$base, i32:$offset)>; def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, i32imm:$offset), - (int_nvvm_fns Int32Regs:$mask, imm:$base, imm:$offset)>; + (int_nvvm_fns i32:$mask, imm:$base, imm:$offset)>; def INT_FNS_irr : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, Int32Regs:$offset), - (int_nvvm_fns imm:$mask, Int32Regs:$base, Int32Regs:$offset)>; + (int_nvvm_fns imm:$mask, i32:$base, i32:$offset)>; def INT_FNS_iri : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, i32imm:$offset), - (int_nvvm_fns imm:$mask, Int32Regs:$base, imm:$offset)>; + (int_nvvm_fns imm:$mask, i32:$base, imm:$offset)>; def INT_FNS_iir : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, Int32Regs:$offset), - (int_nvvm_fns imm:$mask, imm:$base, Int32Regs:$offset)>; + (int_nvvm_fns imm:$mask, imm:$base, i32:$offset)>; def INT_FNS_iii : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, i32imm:$offset), (int_nvvm_fns imm:$mask, imm:$base, imm:$offset)>; @@ -1538,14 +1915,16 @@ multiclass F_ATOMIC_2_imp<ValueType ptrT, NVPTXRegClass ptrclass, ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM, list<Predicate> Pred> { - def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), - !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"), - [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>, - Requires<Pred>; - def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b), - !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""), - [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), IMM:$b))]>, - Requires<!if(!or(!eq(TypeStr, ".f16"), !eq(TypeStr, ".bf16")), [Predicate<"false">], Pred)>; + let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in { + def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), + !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"), + [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>, + Requires<Pred>; + def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b), + !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""), + [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), IMM:$b))]>, + Requires<!if(!or(!eq(TypeStr, ".f16"), !eq(TypeStr, ".bf16")), [Predicate<"false">], Pred)>; + } } multiclass F_ATOMIC_2<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM, @@ -1561,15 +1940,17 @@ multiclass F_ATOMIC_2_NEG_imp<ValueType ptrT, NVPTXRegClass ptrclass, ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, list<Predicate> Pred> { - def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), - !strconcat( - "{{ \n\t", - ".reg \t.s", TypeStr, " temp; \n\t", - "neg.s", TypeStr, " \ttemp, $b; \n\t", - "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t", - "}}"), - [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>, - Requires<Pred>; + let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in { + def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), + !strconcat( + "{{ \n\t", + ".reg \t.s", TypeStr, " temp; \n\t", + "neg.s", TypeStr, " \ttemp, $b; \n\t", + "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t", + "}}"), + [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>, + Requires<Pred>; + } } multiclass F_ATOMIC_2_NEG<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, list<Predicate> Pred = []> { @@ -1584,29 +1965,31 @@ multiclass F_ATOMIC_3_imp<ValueType ptrT, NVPTXRegClass ptrclass, ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred> { - def reg : NVPTXInst<(outs regclass:$dst), - (ins ptrclass:$addr, regclass:$b, regclass:$c), - !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), - [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), (regT regclass:$c)))]>, - Requires<Pred>; - - def imm1 : NVPTXInst<(outs regclass:$dst), - (ins ptrclass:$addr, IMMType:$b, regclass:$c), - !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), - [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, (regT regclass:$c)))]>, - Requires<Pred>; - - def imm2 : NVPTXInst<(outs regclass:$dst), - (ins ptrclass:$addr, regclass:$b, IMMType:$c), - !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""), - [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), imm:$c))]>, - Requires<Pred>; - - def imm3 : NVPTXInst<(outs regclass:$dst), - (ins ptrclass:$addr, IMMType:$b, IMMType:$c), - !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), - [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, imm:$c))]>, - Requires<Pred>; + let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in { + def reg : NVPTXInst<(outs regclass:$dst), + (ins ptrclass:$addr, regclass:$b, regclass:$c), + !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), + [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), (regT regclass:$c)))]>, + Requires<Pred>; + + def imm1 : NVPTXInst<(outs regclass:$dst), + (ins ptrclass:$addr, IMMType:$b, regclass:$c), + !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), + [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, (regT regclass:$c)))]>, + Requires<Pred>; + + def imm2 : NVPTXInst<(outs regclass:$dst), + (ins ptrclass:$addr, regclass:$b, IMMType:$c), + !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""), + [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), imm:$c))]>, + Requires<Pred>; + + def imm3 : NVPTXInst<(outs regclass:$dst), + (ins ptrclass:$addr, IMMType:$b, IMMType:$c), + !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), + [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, imm:$c))]>, + Requires<Pred>; + } } multiclass F_ATOMIC_3<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> { @@ -1997,6 +2380,12 @@ defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64 // atom_cas +def atomic_cmp_swap_i16_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), + (atomic_cmp_swap_i16 node:$a, node:$b, node:$c)>; +def atomic_cmp_swap_i16_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), + (atomic_cmp_swap_i16 node:$a, node:$b, node:$c)>; +def atomic_cmp_swap_i16_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), + (atomic_cmp_swap_i16 node:$a, node:$b, node:$c)>; def atomic_cmp_swap_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), (atomic_cmp_swap_i32 node:$a, node:$b, node:$c)>; def atomic_cmp_swap_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), @@ -2010,6 +2399,14 @@ def atomic_cmp_swap_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), def atomic_cmp_swap_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), (atomic_cmp_swap_i64 node:$a, node:$b, node:$c)>; +defm INT_PTX_ATOM_CAS_G_16 : F_ATOMIC_3<i16, Int16Regs, ".global", ".b16", ".cas", + atomic_cmp_swap_i16_g, i16imm, [hasSM<70>, hasPTX<63>]>; +defm INT_PTX_ATOM_CAS_S_16 : F_ATOMIC_3<i16, Int16Regs, ".shared", ".b16", ".cas", + atomic_cmp_swap_i16_s, i16imm, [hasSM<70>, hasPTX<63>]>; +defm INT_PTX_ATOM_CAS_GEN_16 : F_ATOMIC_3<i16, Int16Regs, "", ".b16", ".cas", + atomic_cmp_swap_i16_gen, i16imm, [hasSM<70>, hasPTX<63>]>; +defm INT_PTX_ATOM_CAS_GEN_16_USE_G : F_ATOMIC_3<i16, Int16Regs, ".global", ".b16", ".cas", + atomic_cmp_swap_i16_gen, i16imm, [hasSM<70>, hasPTX<63>]>; defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32", ".cas", atomic_cmp_swap_i32_g, i32imm>; defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<i32, Int32Regs, ".shared", ".b32", ".cas", @@ -2037,7 +2434,7 @@ class ATOM23_impl<string AsmStr, ValueType regT, NVPTXRegClass regclass, list<Pr dag ins, dag Operands> : NVPTXInst<(outs regclass:$result), ins, AsmStr, - [(set (regT regclass:$result), Operands)]>, + [(set regT:$result, Operands)]>, Requires<Preds>; // Define instruction variants for all addressing modes. @@ -2048,26 +2445,26 @@ multiclass ATOM2P_impl<string AsmStr, Intrinsic Intr, let AddedComplexity = 1 in { def : ATOM23_impl<AsmStr, regT, regclass, Preds, (ins Int16Regs:$src, regclass:$b), - (Intr (i16 Int16Regs:$src), (regT regclass:$b))>; + (Intr i16:$src, regT:$b)>; def : ATOM23_impl<AsmStr, regT, regclass, Preds, (ins Int32Regs:$src, regclass:$b), - (Intr (i32 Int32Regs:$src), (regT regclass:$b))>; + (Intr i32:$src, regT:$b)>; def : ATOM23_impl<AsmStr, regT, regclass, Preds, (ins Int64Regs:$src, regclass:$b), - (Intr (i64 Int64Regs:$src), (regT regclass:$b))>; + (Intr i64:$src, regT:$b)>; } // tablegen can't infer argument types from Intrinsic (though it can // from Instruction) so we have to enforce specific type on // immediates via explicit cast to ImmTy. def : ATOM23_impl<AsmStr, regT, regclass, Preds, (ins Int16Regs:$src, ImmType:$b), - (Intr (i16 Int16Regs:$src), (ImmTy Imm:$b))>; + (Intr i16:$src, (ImmTy Imm:$b))>; def : ATOM23_impl<AsmStr, regT, regclass, Preds, (ins Int32Regs:$src, ImmType:$b), - (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b))>; + (Intr i32:$src, (ImmTy Imm:$b))>; def : ATOM23_impl<AsmStr, regT, regclass, Preds, (ins Int64Regs:$src, ImmType:$b), - (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b))>; + (Intr i64:$src, (ImmTy Imm:$b))>; } multiclass ATOM3P_impl<string AsmStr, Intrinsic Intr, @@ -2078,31 +2475,31 @@ multiclass ATOM3P_impl<string AsmStr, Intrinsic Intr, let AddedComplexity = 2 in { def : ATOM23_impl<AsmStr, regT, regclass, Preds, (ins Int32Regs:$src, regclass:$b, regclass:$c), - (Intr (i32 Int32Regs:$src), (regT regclass:$b), (regT regclass:$c))>; + (Intr i32:$src, regT:$b, regT:$c)>; def : ATOM23_impl<AsmStr, regT, regclass, Preds, (ins Int64Regs:$src, regclass:$b, regclass:$c), - (Intr (i64 Int64Regs:$src), (regT regclass:$b), (regT regclass:$c))>; + (Intr i64:$src, regT:$b, regT:$c)>; } let AddedComplexity = 1 in { def : ATOM23_impl<AsmStr, regT, regclass, Preds, (ins Int32Regs:$src, ImmType:$b, regclass:$c), - (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b), (regT regclass:$c))>; + (Intr i32:$src, (ImmTy Imm:$b), regT:$c)>; def : ATOM23_impl<AsmStr, regT, regclass, Preds, (ins Int64Regs:$src, ImmType:$b, regclass:$c), - (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b), (regT regclass:$c))>; + (Intr i64:$src, (ImmTy Imm:$b), regT:$c)>; def : ATOM23_impl<AsmStr, regT, regclass, Preds, (ins Int32Regs:$src, regclass:$b, ImmType:$c), - (Intr (i32 Int32Regs:$src), (regT regclass:$b), (ImmTy Imm:$c))>; + (Intr i32:$src, regT:$b, (ImmTy Imm:$c))>; def : ATOM23_impl<AsmStr, regT, regclass, Preds, (ins Int64Regs:$src, regclass:$b, ImmType:$c), - (Intr (i64 Int64Regs:$src), (regT regclass:$b), (ImmTy Imm:$c))>; + (Intr i64:$src, regT:$b, (ImmTy Imm:$c))>; } def : ATOM23_impl<AsmStr, regT, regclass, Preds, (ins Int32Regs:$src, ImmType:$b, ImmType:$c), - (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b), (ImmTy Imm:$c))>; + (Intr i32:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>; def : ATOM23_impl<AsmStr, regT, regclass, Preds, (ins Int64Regs:$src, ImmType:$b, ImmType:$c), - (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b), (ImmTy Imm:$c))>; + (Intr i64:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>; } // Constructs intrinsic name and instruction asm strings. @@ -2221,6 +2618,7 @@ multiclass ATOM2_incdec_impl<string OpStr> { // atom.cas multiclass ATOM3_cas_impl<string OpStr> { + defm _b16 : ATOM3S_impl<OpStr, "i", "b16", i16, Int16Regs, i16imm, imm, i16, []>; defm _b32 : ATOM3S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>; defm _b64 : ATOM3S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, []>; } @@ -2441,89 +2839,75 @@ defm INT_PTX_LDG_G_v4f32_ELE : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>; -multiclass NG_TO_G<string Str, Intrinsic Intrin, Predicate ShortPtr> { +multiclass NG_TO_G<string Str> { def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), - !strconcat("cvta.", Str, ".u32 \t$result, $src;"), - [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; + "cvta." # Str # ".u32 \t$result, $src;", []>; def _64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), - !strconcat("cvta.", Str, ".u64 \t$result, $src;"), - [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; - def _6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src), - "{{ .reg .b64 %tmp;\n\t" - #" cvt.u64.u32 \t%tmp, $src;\n\t" - #" cvta." # Str # ".u64 \t$result, %tmp; }}", - [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>, - Requires<[ShortPtr]>; + "cvta." # Str # ".u64 \t$result, $src;", []>; } -multiclass G_TO_NG<string Str, Intrinsic Intrin, Predicate ShortPtr> { +multiclass G_TO_NG<string Str> { def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), - !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"), - [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; + "cvta.to." # Str # ".u32 \t$result, $src;", []>; def _64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), - !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"), - [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; - def _3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src), - "{{ .reg .b64 %tmp;\n\t" - #" cvta.to." # Str # ".u64 \t%tmp, $src;\n\t" - #" cvt.u32.u64 \t$result, %tmp; }}", - [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>, - Requires<[ShortPtr]>; -} - -defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen, useShortPtrLocal>; -defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen, useShortPtrShared>; -defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen, False>; -defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen, useShortPtrConst>; -defm cvta_param : NG_TO_G<"param", int_nvvm_ptr_param_to_gen, False>; - -defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local, useShortPtrLocal>; -defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared, useShortPtrShared>; -defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global, False>; -defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant, useShortPtrConst>; + "cvta.to." # Str # ".u64 \t$result, $src;", []>; +} + +defm cvta_local : NG_TO_G<"local">; +defm cvta_shared : NG_TO_G<"shared">; +defm cvta_global : NG_TO_G<"global">; +defm cvta_const : NG_TO_G<"const">; + +defm cvta_to_local : G_TO_NG<"local">; +defm cvta_to_shared : G_TO_NG<"shared">; +defm cvta_to_global : G_TO_NG<"global">; +defm cvta_to_const : G_TO_NG<"const">; + +// nvvm.ptr.param.to.gen +defm cvta_param : NG_TO_G<"param">; + +def : Pat<(int_nvvm_ptr_param_to_gen i32:$src), + (cvta_param $src)>; + +def : Pat<(int_nvvm_ptr_param_to_gen i64:$src), + (cvta_param_64 $src)>; // nvvm.ptr.gen.to.param -def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result), - (ins Int32Regs:$src), - "mov.u32 \t$result, $src;", - [(set Int32Regs:$result, - (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>; -def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result), - (ins Int64Regs:$src), - "mov.u64 \t$result, $src;", - [(set Int64Regs:$result, - (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>; +def : Pat<(int_nvvm_ptr_gen_to_param i32:$src), + (i32 Int32Regs:$src)>; +def : Pat<(int_nvvm_ptr_gen_to_param i64:$src), + (i64 Int64Regs:$src)>; // nvvm.move intrinsicc def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s), "mov.b16 \t$r, $s;", - [(set Int16Regs:$r, - (int_nvvm_move_i16 Int16Regs:$s))]>; + [(set i16:$r, + (int_nvvm_move_i16 i16:$s))]>; def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), "mov.b32 \t$r, $s;", - [(set Int32Regs:$r, - (int_nvvm_move_i32 Int32Regs:$s))]>; + [(set i32:$r, + (int_nvvm_move_i32 i32:$s))]>; def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), "mov.b64 \t$r, $s;", - [(set Int64Regs:$r, - (int_nvvm_move_i64 Int64Regs:$s))]>; + [(set i64:$r, + (int_nvvm_move_i64 i64:$s))]>; def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s), "mov.f32 \t$r, $s;", - [(set Float32Regs:$r, - (int_nvvm_move_float Float32Regs:$s))]>; + [(set f32:$r, + (int_nvvm_move_float f32:$s))]>; def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s), "mov.f64 \t$r, $s;", - [(set Float64Regs:$r, - (int_nvvm_move_double Float64Regs:$s))]>; + [(set f64:$r, + (int_nvvm_move_double f64:$s))]>; def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), "mov.u32 \t$r, $s;", - [(set Int32Regs:$r, - (int_nvvm_move_ptr Int32Regs:$s))]>; + [(set i32:$r, + (int_nvvm_move_ptr i32:$s))]>; def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), "mov.u64 \t$r, $s;", - [(set Int64Regs:$r, - (int_nvvm_move_ptr Int64Regs:$s))]>; + [(set i64:$r, + (int_nvvm_move_ptr i64:$s))]>; // @TODO: Are these actually needed, or will we always just see symbols // copied to registers first? @@ -2536,24 +2920,6 @@ def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s), [(set Int64Regs:$r, (int_nvvm_move_ptr texternalsym:$s))]>;*/ - -// MoveParam %r1, param -// ptr_local_to_gen %r2, %r1 -// ptr_gen_to_local %r3, %r2 -// -> -// mov %r1, param - -// @TODO: Revisit this. There is a type -// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym -// instructions are not currently defined. However, we can use the ptr -// variants and the asm printer will do the right thing. -def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen - (MoveParam texternalsym:$src)))), - (nvvm_move_ptr64 texternalsym:$src)>; -def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen - (MoveParam texternalsym:$src)))), - (nvvm_move_ptr32 texternalsym:$src)>; - def texsurf_handles : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src), "mov.u64 \t$result, $src;", []>; @@ -2565,16 +2931,16 @@ def texsurf_handles def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a), "// llvm.nvvm.compiler.warn()", - [(int_nvvm_compiler_warn Int32Regs:$a)]>; + [(int_nvvm_compiler_warn i32:$a)]>; def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a), "// llvm.nvvm.compiler.warn()", - [(int_nvvm_compiler_warn Int64Regs:$a)]>; + [(int_nvvm_compiler_warn i64:$a)]>; def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a), "// llvm.nvvm.compiler.error()", - [(int_nvvm_compiler_error Int32Regs:$a)]>; + [(int_nvvm_compiler_error i32:$a)]>; def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a), "// llvm.nvvm.compiler.error()", - [(int_nvvm_compiler_error Int64Regs:$a)]>; + [(int_nvvm_compiler_error i64:$a)]>; // isspacep @@ -2582,11 +2948,11 @@ def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a), multiclass ISSPACEP<string suffix, Intrinsic Intr, list<Predicate> Preds = []> { def _32: NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), "isspacep." # suffix # "\t$d, $a;", - [(set Int1Regs:$d, (Intr Int32Regs:$a))]>, + [(set i1:$d, (Intr i32:$a))]>, Requires<Preds>; def _64: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), "isspacep." # suffix # "\t$d, $a;", - [(set Int1Regs:$d, (Intr Int64Regs:$a))]>, + [(set i1:$d, (Intr i64:$a))]>, Requires<Preds>; } @@ -2637,134 +3003,9 @@ def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>; def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>; -// rotate builtin support - -def ROTATE_B32_HW_IMM - : NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$src, i32imm:$amt), - "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", - [(set Int32Regs:$dst, - (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>, - Requires<[hasHWROT32]> ; - -def ROTATE_B32_HW_REG - : NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$src, Int32Regs:$amt), - "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", - [(set Int32Regs:$dst, - (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>, - Requires<[hasHWROT32]> ; - -def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)), - (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, - Requires<[noHWROT32]> ; - -def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt), - (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>, - Requires<[noHWROT32]> ; - -let hasSideEffects = false in { - def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), - !strconcat("{{\n\t", - ".reg .b32 %dummy;\n\t", - "mov.b64 \t{$dst,%dummy}, $src;\n\t", - "}}"), - []> ; - - def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), - !strconcat("{{\n\t", - ".reg .b32 %dummy;\n\t", - "mov.b64 \t{%dummy,$dst}, $src;\n\t", - "}}"), - []> ; -} - -let hasSideEffects = false in { - def PACK_TWO_INT32 - : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi), - "mov.b64 \t$dst, {{$lo, $hi}};", []> ; -} - -def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src), - (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src), - (GET_LO_INT64 Int64Regs:$src))> ; - -// Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so -// no side effects. -let hasSideEffects = false in { - def SHF_L_WRAP_B32_IMM - : NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), - "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, - Requires<[hasHWROT32]>; - - def SHF_L_WRAP_B32_REG - : NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), - "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, - Requires<[hasHWROT32]>; - - def SHF_R_WRAP_B32_IMM - : NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), - "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, - Requires<[hasHWROT32]>; - - def SHF_R_WRAP_B32_REG - : NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), - "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, - Requires<[hasHWROT32]>; -} - -// HW version of rotate 64 -def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), - (PACK_TWO_INT32 - (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src), - (GET_LO_INT64 Int64Regs:$src), imm:$amt), - (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src), - (GET_HI_INT64 Int64Regs:$src), imm:$amt))>, - Requires<[hasHWROT32]>; - -def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), - (PACK_TWO_INT32 - (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src), - (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt), - (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src), - (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>, - Requires<[hasHWROT32]>; - - -def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)), - (PACK_TWO_INT32 - (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src), - (GET_HI_INT64 Int64Regs:$src), imm:$amt), - (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src), - (GET_LO_INT64 Int64Regs:$src), imm:$amt))>, - Requires<[hasHWROT32]>; - -def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), - (PACK_TWO_INT32 - (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src), - (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt), - (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src), - (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>, - Requires<[hasHWROT32]>; - -// SW version of rotate 64 -def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), - (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>, - Requires<[noHWROT32]>; -def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), - (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>, - Requires<[noHWROT32]>; -def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)), - (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>, - Requires<[noHWROT32]>; -def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), - (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>, - Requires<[noHWROT32]>; - +def : Pat<(int_nvvm_swap_lo_hi_b64 i64:$src), + (V2I32toI64 (I64toI32H $src), + (I64toI32L $src))> ; //----------------------------------- // Texture Intrinsics @@ -2778,16 +3019,19 @@ let IsTex = true, IsTexModeUnified = false in { // Texture fetch instructions using handles class TEX_1D_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag texsamp> + NVPTXRegClass intype, dag texsamp, list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(texsamp, (ins intype:$x)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", - []>; - -multiclass TEX_1D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { - def _RR : TEX_1D_base<inst, outtype, intype, - (ins Int64Regs:$t, Int64Regs:$s)>; + pattern>; + +multiclass TEX_1D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype, + Intrinsic intr> { + def _RR : TEX_1D_base< + inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, i64:$s, intype:$x))]>; def _RI : TEX_1D_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>; def _IR : TEX_1D_base<inst, outtype, intype, @@ -2796,25 +3040,34 @@ multiclass TEX_1D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { (ins i64imm:$t, i64imm:$s)>; } -defm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>; -defm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>; -defm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>; -defm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>; -defm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>; -defm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>; +defm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs, + int_nvvm_tex_1d_v4f32_s32>; +defm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tex_1d_v4f32_f32>; +defm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs, + int_nvvm_tex_1d_v4s32_s32>; +defm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_1d_v4s32_f32>; +defm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs, + int_nvvm_tex_1d_v4u32_s32>; +defm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_1d_v4u32_f32>; class TEX_1D_LEVEL_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag texsamp> + NVPTXRegClass intype, dag texsamp, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(texsamp, (ins intype:$x, intype:$lod)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}], $lod;", - []>; + pattern>; multiclass TEX_1D_LEVEL<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _RR : TEX_1D_LEVEL_base<inst, outtype, intype, - (ins Int64Regs:$t, Int64Regs:$s)>; + NVPTXRegClass intype, Intrinsic intr> { + def _RR : TEX_1D_LEVEL_base< + inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, i64:$s, intype:$x, intype:$lod))]>; def _RI : TEX_1D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>; def _IR : TEX_1D_LEVEL_base<inst, outtype, intype, @@ -2824,25 +3077,31 @@ multiclass TEX_1D_LEVEL<string inst, NVPTXRegClass outtype, } defm TEX_1D_F32_F32_LEVEL : - TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>; + TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tex_1d_level_v4f32_f32>; defm TEX_1D_S32_F32_LEVEL : - TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>; + TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_1d_level_v4s32_f32>; defm TEX_1D_U32_F32_LEVEL : - TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>; + TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_1d_level_v4u32_f32>; class TEX_1D_GRAD_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag texsamp> + NVPTXRegClass intype, dag texsamp, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(texsamp, (ins intype:$x, intype:$gradx, intype:$grady)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}]," " \\{$gradx\\}, \\{$grady\\};", - []>; + pattern>; multiclass TEX_1D_GRAD<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _RR : TEX_1D_GRAD_base<inst, outtype, intype, - (ins Int64Regs:$t, Int64Regs:$s)>; + NVPTXRegClass intype, Intrinsic intr> { + def _RR : TEX_1D_GRAD_base< + inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, i64:$s, intype:$x, intype:$gradx, intype:$grady))]>; def _RI : TEX_1D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>; def _IR : TEX_1D_GRAD_base<inst, outtype, intype, @@ -2852,24 +3111,30 @@ multiclass TEX_1D_GRAD<string inst, NVPTXRegClass outtype, } defm TEX_1D_F32_F32_GRAD - : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>; + : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tex_1d_grad_v4f32_f32>; defm TEX_1D_S32_F32_GRAD - : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>; + : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_1d_grad_v4s32_f32>; defm TEX_1D_U32_F32_GRAD - : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>; + : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_1d_grad_v4u32_f32>; class TEX_1D_ARRAY_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag texsamp> + NVPTXRegClass intype, dag texsamp, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(texsamp, (ins Int32Regs:$l, intype:$x)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}];", - []>; + pattern>; multiclass TEX_1D_ARRAY<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _RR : TEX_1D_ARRAY_base<inst, outtype, intype, - (ins Int64Regs:$t, Int64Regs:$s)>; + NVPTXRegClass intype, Intrinsic intr> { + def _RR : TEX_1D_ARRAY_base< + inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, i64:$s, i32:$l, intype:$x))]>; def _RI : TEX_1D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>; def _IR : TEX_1D_ARRAY_base<inst, outtype, intype, @@ -2879,31 +3144,40 @@ multiclass TEX_1D_ARRAY<string inst, NVPTXRegClass outtype, } defm TEX_1D_ARRAY_F32_F32 - : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>; + : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tex_1d_array_v4f32_f32>; defm TEX_1D_ARRAY_F32_S32 - : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>; + : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs, + int_nvvm_tex_1d_array_v4f32_s32>; defm TEX_1D_ARRAY_S32_S32 - : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>; + : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs, + int_nvvm_tex_1d_array_v4s32_s32>; defm TEX_1D_ARRAY_S32_F32 - : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>; + : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_1d_array_v4s32_f32>; defm TEX_1D_ARRAY_U32_S32 - : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>; + : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs, + int_nvvm_tex_1d_array_v4u32_s32>; defm TEX_1D_ARRAY_U32_F32 - : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>; + : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_1d_array_v4u32_f32>; class TEX_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag texsamp> + NVPTXRegClass intype, dag texsamp, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$lod)), inst # " \t\\{$r, $g, $b, $a\\}," " [$t, $s, \\{$l, $x\\}], $lod;", - []>; + pattern>; multiclass TEX_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _RR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype, - (ins Int64Regs:$t, Int64Regs:$s)>; + NVPTXRegClass intype, Intrinsic intr> { + def _RR : TEX_1D_ARRAY_LEVEL_base< + inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$lod))]>; def _RI : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>; def _IR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype, @@ -2913,26 +3187,33 @@ multiclass TEX_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, } defm TEX_1D_ARRAY_F32_F32_LEVEL - : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", Float32Regs, Float32Regs>; + : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tex_1d_array_level_v4f32_f32>; defm TEX_1D_ARRAY_S32_F32_LEVEL - : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", Int32Regs, Float32Regs>; + : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_1d_array_level_v4s32_f32>; defm TEX_1D_ARRAY_U32_F32_LEVEL - : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", Int32Regs, Float32Regs>; + : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_1d_array_level_v4u32_f32>; class TEX_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag texsamp> + NVPTXRegClass intype, dag texsamp, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$gradx, intype:$grady)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}]," " \\{$gradx\\}, \\{$grady\\};", - []>; + pattern>; multiclass TEX_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _RR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype, - (ins Int64Regs:$t, Int64Regs:$s)>; + NVPTXRegClass intype, Intrinsic intr> { + def _RR : TEX_1D_ARRAY_GRAD_base< + inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, i64:$s, i32:$l, intype:$x, + intype:$gradx, intype:$grady))]>; def _RI : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>; def _IR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype, @@ -2942,48 +3223,63 @@ multiclass TEX_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, } defm TEX_1D_ARRAY_F32_F32_GRAD - : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", Float32Regs, Float32Regs>; + : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tex_1d_array_grad_v4f32_f32>; defm TEX_1D_ARRAY_S32_F32_GRAD - : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", Int32Regs, Float32Regs>; + : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_1d_array_grad_v4s32_f32>; defm TEX_1D_ARRAY_U32_F32_GRAD - : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", Int32Regs, Float32Regs>; + : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_1d_array_grad_v4u32_f32>; class TEX_2D_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag texsamp> + NVPTXRegClass intype, dag texsamp, list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(texsamp, (ins intype:$x, intype:$y)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}];", - []>; - -multiclass TEX_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { - def _RR : TEX_2D_base<inst, outtype, intype, - (ins Int64Regs:$t, Int64Regs:$s)>; + pattern>; + +multiclass TEX_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype, + Intrinsic intr> { + def _RR : TEX_2D_base< + inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, i64:$s, intype:$x, intype:$y))]>; def _RI : TEX_2D_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>; def _IR : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, Int64Regs:$s)>; def _II : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, i64imm:$s)>; } -defm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>; -defm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>; -defm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>; -defm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>; -defm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>; -defm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>; +defm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tex_2d_v4f32_f32>; +defm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs, + int_nvvm_tex_2d_v4f32_s32>; +defm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs, + int_nvvm_tex_2d_v4s32_s32>; +defm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_2d_v4s32_f32>; +defm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs, + int_nvvm_tex_2d_v4u32_s32>; +defm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_2d_v4u32_f32>; class TEX_2D_LEVEL_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag texsamp> + NVPTXRegClass intype, dag texsamp, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(texsamp, (ins intype:$x, intype:$y, intype:$lod)), inst # " \t\\{$r, $g, $b, $a\\}," " [$t, $s, \\{$x, $y\\}], $lod;", - []>; + pattern>; multiclass TEX_2D_LEVEL<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _RR : TEX_2D_LEVEL_base<inst, outtype, intype, - (ins Int64Regs:$t, Int64Regs:$s)>; + NVPTXRegClass intype, Intrinsic intr> { + def _RR : TEX_2D_LEVEL_base< + inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$lod))]>; def _RI : TEX_2D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>; def _IR : TEX_2D_LEVEL_base<inst, outtype, intype, @@ -2993,14 +3289,18 @@ multiclass TEX_2D_LEVEL<string inst, NVPTXRegClass outtype, } defm TEX_2D_F32_F32_LEVEL : - TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>; + TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tex_2d_level_v4f32_f32>; defm TEX_2D_S32_F32_LEVEL : - TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>; + TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_2d_level_v4s32_f32>; defm TEX_2D_U32_F32_LEVEL : - TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>; + TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_2d_level_v4u32_f32>; class TEX_2D_GRAD_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag texsamp> + NVPTXRegClass intype, dag texsamp, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(texsamp, (ins intype:$x, intype:$y, @@ -3008,12 +3308,16 @@ class TEX_2D_GRAD_base<string inst, NVPTXRegClass outtype, intype:$grady0, intype:$grady1)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}]," " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", - []>; + pattern>; multiclass TEX_2D_GRAD<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _RR : TEX_2D_GRAD_base<inst, outtype, intype, - (ins Int64Regs:$t, Int64Regs:$s)>; + NVPTXRegClass intype, Intrinsic intr> { + def _RR : TEX_2D_GRAD_base< + inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, i64:$s, intype:$x, intype:$y, + intype:$gradx0, intype:$gradx1, + intype:$grady0, intype:$grady1))]>; def _RI : TEX_2D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>; def _IR : TEX_2D_GRAD_base<inst, outtype, intype, @@ -3023,25 +3327,31 @@ multiclass TEX_2D_GRAD<string inst, NVPTXRegClass outtype, } defm TEX_2D_F32_F32_GRAD : - TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>; + TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tex_2d_grad_v4f32_f32>; defm TEX_2D_S32_F32_GRAD : - TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>; + TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_2d_grad_v4s32_f32>; defm TEX_2D_U32_F32_GRAD : - TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>; + TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_2d_grad_v4u32_f32>; class TEX_2D_ARRAY_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag texsamp> + NVPTXRegClass intype, dag texsamp, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y)), inst # " \t\\{$r, $g, $b, $a\\}," " [$t, $s, \\{$l, $x, $y, $y\\}];", - []>; + pattern>; multiclass TEX_2D_ARRAY<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _RR : TEX_2D_ARRAY_base<inst, outtype, intype, - (ins Int64Regs:$t, Int64Regs:$s)>; + NVPTXRegClass intype, Intrinsic intr> { + def _RR : TEX_2D_ARRAY_base< + inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$y))]>; def _RI : TEX_2D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>; def _IR : TEX_2D_ARRAY_base<inst, outtype, intype, @@ -3051,32 +3361,41 @@ multiclass TEX_2D_ARRAY<string inst, NVPTXRegClass outtype, } defm TEX_2D_ARRAY_F32_F32 - : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>; + : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tex_2d_array_v4f32_f32>; defm TEX_2D_ARRAY_F32_S32 - : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>; + : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs, + int_nvvm_tex_2d_array_v4f32_s32>; defm TEX_2D_ARRAY_S32_S32 - : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>; + : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs, + int_nvvm_tex_2d_array_v4s32_s32>; defm TEX_2D_ARRAY_S32_F32 - : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>; + : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_2d_array_v4s32_f32>; defm TEX_2D_ARRAY_U32_S32 - : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>; + : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs, + int_nvvm_tex_2d_array_v4u32_s32>; defm TEX_2D_ARRAY_U32_F32 - : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>; + : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_2d_array_v4u32_f32>; class TEX_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag texsamp> + NVPTXRegClass intype, dag texsamp, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$lod)), inst # " \t\\{$r, $g, $b, $a\\}," " [$t, $s, \\{$l, $x, $y, $y\\}], $lod;", - []>; + pattern>; multiclass TEX_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _RR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype, - (ins Int64Regs:$t, Int64Regs:$s)>; + NVPTXRegClass intype, Intrinsic intr> { + def _RR : TEX_2D_ARRAY_LEVEL_base< + inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$y, intype:$lod))]>; def _RI : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>; def _IR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype, @@ -3086,14 +3405,18 @@ multiclass TEX_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, } defm TEX_2D_ARRAY_F32_F32_LEVEL - : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", Float32Regs, Float32Regs>; + : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tex_2d_array_level_v4f32_f32>; defm TEX_2D_ARRAY_S32_F32_LEVEL - : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", Int32Regs, Float32Regs>; + : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_2d_array_level_v4s32_f32>; defm TEX_2D_ARRAY_U32_F32_LEVEL - : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", Int32Regs, Float32Regs>; + : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_2d_array_level_v4u32_f32>; class TEX_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag texsamp> + NVPTXRegClass intype, dag texsamp, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, @@ -3102,12 +3425,16 @@ class TEX_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, inst # " \t\\{$r, $g, $b, $a\\}," " [$t, $s, \\{$l, $x, $y, $y\\}]," " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", - []>; + pattern>; multiclass TEX_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _RR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype, - (ins Int64Regs:$t, Int64Regs:$s)>; + NVPTXRegClass intype, Intrinsic intr> { + def _RR : TEX_2D_ARRAY_GRAD_base< + inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$y, + intype:$gradx0, intype:$gradx1, + intype:$grady0, intype:$grady1))]>; def _RI : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>; def _IR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype, @@ -3117,24 +3444,30 @@ multiclass TEX_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, } defm TEX_2D_ARRAY_F32_F32_GRAD - : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", Float32Regs, Float32Regs>; + : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tex_2d_array_grad_v4f32_f32>; defm TEX_2D_ARRAY_S32_F32_GRAD - : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", Int32Regs, Float32Regs>; + : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_2d_array_grad_v4s32_f32>; defm TEX_2D_ARRAY_U32_F32_GRAD - : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", Int32Regs, Float32Regs>; + : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_2d_array_grad_v4u32_f32>; class TEX_3D_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag texsamp> + NVPTXRegClass intype, dag texsamp, list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(texsamp, (ins intype:$x, intype:$y, intype:$z)), inst # " \t\\{$r, $g, $b, $a\\}," " [$t, $s, \\{$x, $y, $z, $z\\}];", - []>; - -multiclass TEX_3D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { - def _RR : TEX_3D_base<inst, outtype, intype, - (ins Int64Regs:$t, Int64Regs:$s)>; + pattern>; + +multiclass TEX_3D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype, + Intrinsic intr> { + def _RR : TEX_3D_base< + inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$z))]>; def _RI : TEX_3D_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>; def _IR : TEX_3D_base<inst, outtype, intype, @@ -3143,27 +3476,37 @@ multiclass TEX_3D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { (ins i64imm:$t, i64imm:$s)>; } -defm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>; -defm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>; -defm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>; -defm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>; -defm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>; -defm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>; +defm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tex_3d_v4f32_f32>; +defm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs, + int_nvvm_tex_3d_v4f32_s32>; +defm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs, + int_nvvm_tex_3d_v4s32_s32>; +defm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_3d_v4s32_f32>; +defm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs, + int_nvvm_tex_3d_v4u32_s32>; +defm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_3d_v4u32_f32>; class TEX_3D_LEVEL_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag texsamp> + NVPTXRegClass intype, dag texsamp, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(texsamp, (ins intype:$x, intype:$y, intype:$z, intype:$lod)), inst # " \t\\{$r, $g, $b, $a\\}," " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;", - []>; + pattern>; multiclass TEX_3D_LEVEL<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _RR : TEX_3D_LEVEL_base<inst, outtype, intype, - (ins Int64Regs:$t, Int64Regs:$s)>; + NVPTXRegClass intype, Intrinsic intr> { + def _RR : TEX_3D_LEVEL_base< + inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$z, + intype:$lod))]>; def _RI : TEX_3D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>; def _IR : TEX_3D_LEVEL_base<inst, outtype, intype, @@ -3173,14 +3516,18 @@ multiclass TEX_3D_LEVEL<string inst, NVPTXRegClass outtype, } defm TEX_3D_F32_F32_LEVEL - : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>; + : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tex_3d_level_v4f32_f32>; defm TEX_3D_S32_F32_LEVEL - : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>; + : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_3d_level_v4s32_f32>; defm TEX_3D_U32_F32_LEVEL - : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>; + : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_3d_level_v4u32_f32>; class TEX_3D_GRAD_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag texsamp> + NVPTXRegClass intype, dag texsamp, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(texsamp, (ins intype:$x, intype:$y, intype:$z, @@ -3191,12 +3538,16 @@ class TEX_3D_GRAD_base<string inst, NVPTXRegClass outtype, " [$t, $s, \\{$x, $y, $z, $z\\}]," " \\{$gradx0, $gradx1, $gradx2, $gradx2\\}," " \\{$grady0, $grady1, $grady2, $grady2\\};", - []>; + pattern>; multiclass TEX_3D_GRAD<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _RR : TEX_3D_GRAD_base<inst, outtype, intype, - (ins Int64Regs:$t, Int64Regs:$s)>; + NVPTXRegClass intype, Intrinsic intr> { + def _RR : TEX_3D_GRAD_base< + inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$z, + intype:$gradx0, intype:$gradx1, intype:$gradx2, + intype:$grady0, intype:$grady1, intype:$grady2))]>; def _RI : TEX_3D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>; def _IR : TEX_3D_GRAD_base<inst, outtype, intype, @@ -3206,24 +3557,30 @@ multiclass TEX_3D_GRAD<string inst, NVPTXRegClass outtype, } defm TEX_3D_F32_F32_GRAD - : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>; + : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tex_3d_grad_v4f32_f32>; defm TEX_3D_S32_F32_GRAD - : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>; + : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_3d_grad_v4s32_f32>; defm TEX_3D_U32_F32_GRAD - : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>; + : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_3d_grad_v4u32_f32>; class TEX_CUBE_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag texsamp> + NVPTXRegClass intype, dag texsamp, list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(texsamp, (ins intype:$x, intype:$y, intype:$z)), inst # " \t\\{$r, $g, $b, $a\\}," " [$t, $s, \\{$x, $y, $z, $z\\}];", - []>; - -multiclass TEX_CUBE<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { - def _RR : TEX_CUBE_base<inst, outtype, intype, - (ins Int64Regs:$t, Int64Regs:$s)>; + pattern>; + +multiclass TEX_CUBE<string inst, NVPTXRegClass outtype, NVPTXRegClass intype, + Intrinsic intr> { + def _RR : TEX_CUBE_base< + inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$z))]>; def _RI : TEX_CUBE_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>; def _IR : TEX_CUBE_base<inst, outtype, intype, @@ -3233,26 +3590,33 @@ multiclass TEX_CUBE<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { } defm TEX_CUBE_F32_F32 - : TEX_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>; + : TEX_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tex_cube_v4f32_f32>; defm TEX_CUBE_S32_F32 - : TEX_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>; + : TEX_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_cube_v4s32_f32>; defm TEX_CUBE_U32_F32 - : TEX_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>; + : TEX_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_cube_v4u32_f32>; class TEX_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag texsamp> + NVPTXRegClass intype, dag texsamp, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(texsamp, (ins intype:$x, intype:$y, intype:$z, intype:$lod)), inst # " \t\\{$r, $g, $b, $a\\}," " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;", - []>; + pattern>; multiclass TEX_CUBE_LEVEL<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _RR : TEX_CUBE_LEVEL_base<inst, outtype, intype, - (ins Int64Regs:$t, Int64Regs:$s)>; + NVPTXRegClass intype, Intrinsic intr> { + def _RR : TEX_CUBE_LEVEL_base< + inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$z, + intype:$lod))]>; def _RI : TEX_CUBE_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>; def _IR : TEX_CUBE_LEVEL_base<inst, outtype, intype, @@ -3262,26 +3626,32 @@ multiclass TEX_CUBE_LEVEL<string inst, NVPTXRegClass outtype, } defm TEX_CUBE_F32_F32_LEVEL - : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", Float32Regs, Float32Regs>; + : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tex_cube_level_v4f32_f32>; defm TEX_CUBE_S32_F32_LEVEL - : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", Int32Regs, Float32Regs>; + : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_cube_level_v4s32_f32>; defm TEX_CUBE_U32_F32_LEVEL - : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", Int32Regs, Float32Regs>; + : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_cube_level_v4u32_f32>; class TEX_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag texsamp> + NVPTXRegClass intype, dag texsamp, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z)), inst # " \t\\{$r, $g, $b, $a\\}," " [$t, $s, \\{$l, $x, $y, $z\\}];", - []>; + pattern>; multiclass TEX_CUBE_ARRAY<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _RR : TEX_CUBE_ARRAY_base<inst, outtype, intype, - (ins Int64Regs:$t, Int64Regs:$s)>; + NVPTXRegClass intype, Intrinsic intr> { + def _RR : TEX_CUBE_ARRAY_base< + inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$y, intype:$z))]>; def _RI : TEX_CUBE_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>; def _IR : TEX_CUBE_ARRAY_base<inst, outtype, intype, @@ -3291,26 +3661,33 @@ multiclass TEX_CUBE_ARRAY<string inst, NVPTXRegClass outtype, } defm TEX_CUBE_ARRAY_F32_F32 - : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>; + : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tex_cube_array_v4f32_f32>; defm TEX_CUBE_ARRAY_S32_F32 - : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>; + : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_cube_array_v4s32_f32>; defm TEX_CUBE_ARRAY_U32_F32 - : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>; + : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_cube_array_v4u32_f32>; class TEX_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag texsamp> + NVPTXRegClass intype, dag texsamp, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z, intype:$lod)), inst # " \t\\{$r, $g, $b, $a\\}," " [$t, $s, \\{$l, $x, $y, $z\\}], $lod;", - []>; + pattern>; multiclass TEX_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _RR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, - (ins Int64Regs:$t, Int64Regs:$s)>; + NVPTXRegClass intype, Intrinsic intr> { + def _RR : TEX_CUBE_ARRAY_LEVEL_base< + inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$y, intype:$z, + intype:$lod))]>; def _RI : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>; def _IR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, @@ -3321,25 +3698,31 @@ multiclass TEX_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, defm TEX_CUBE_ARRAY_F32_F32_LEVEL : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32", - Float32Regs, Float32Regs>; + Float32Regs, Float32Regs, + int_nvvm_tex_cube_array_level_v4f32_f32>; defm TEX_CUBE_ARRAY_S32_F32_LEVEL : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32", - Int32Regs, Float32Regs>; + Int32Regs, Float32Regs, + int_nvvm_tex_cube_array_level_v4s32_f32>; defm TEX_CUBE_ARRAY_U32_F32_LEVEL : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32", - Int32Regs, Float32Regs>; + Int32Regs, Float32Regs, + int_nvvm_tex_cube_array_level_v4u32_f32>; class TLD4_2D_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag texsamp> + NVPTXRegClass intype, dag texsamp, list<dag> pattern = []> : NVPTXInst<(outs outtype:$v0, outtype:$v1, outtype:$v2, outtype:$v3), !con(texsamp, (ins intype:$x, intype:$y)), inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, $s, \\{$x, $y\\}];", - []>; - -multiclass TLD4_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { - def _RR : TLD4_2D_base<inst, outtype, intype, - (ins Int64Regs:$t, Int64Regs:$s)>; + pattern>; + +multiclass TLD4_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype, + Intrinsic intr> { + def _RR : TLD4_2D_base< + inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), + [(set outtype:$v0, outtype:$v1, outtype:$v2, outtype:$v3, + (intr i64:$t, i64:$s, intype:$x, intype:$y))]>; def _RI : TLD4_2D_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>; def _IR : TLD4_2D_base<inst, outtype, intype, @@ -3349,31 +3732,43 @@ multiclass TLD4_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { } defm TLD4_R_2D_F32_F32 - : TLD4_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>; + : TLD4_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tld4_r_2d_v4f32_f32>; defm TLD4_G_2D_F32_F32 - : TLD4_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>; + : TLD4_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tld4_g_2d_v4f32_f32>; defm TLD4_B_2D_F32_F32 - : TLD4_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>; + : TLD4_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tld4_b_2d_v4f32_f32>; defm TLD4_A_2D_F32_F32 - : TLD4_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>; + : TLD4_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tld4_a_2d_v4f32_f32>; defm TLD4_R_2D_S32_F32 - : TLD4_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>; + : TLD4_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tld4_r_2d_v4s32_f32>; defm TLD4_G_2D_S32_F32 - : TLD4_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>; + : TLD4_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tld4_g_2d_v4s32_f32>; defm TLD4_B_2D_S32_F32 - : TLD4_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>; + : TLD4_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tld4_b_2d_v4s32_f32>; defm TLD4_A_2D_S32_F32 - : TLD4_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>; + : TLD4_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tld4_a_2d_v4s32_f32>; defm TLD4_R_2D_U32_F32 - : TLD4_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>; + : TLD4_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tld4_r_2d_v4u32_f32>; defm TLD4_G_2D_U32_F32 - : TLD4_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>; + : TLD4_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tld4_g_2d_v4u32_f32>; defm TLD4_B_2D_U32_F32 - : TLD4_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>; + : TLD4_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tld4_b_2d_v4u32_f32>; defm TLD4_A_2D_U32_F32 - : TLD4_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>; + : TLD4_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tld4_a_2d_v4u32_f32>; } @@ -3383,206 +3778,268 @@ let IsTex = true, IsTexModeUnified = true in { // Texture fetch instructions using handles class TEX_UNIFIED_1D_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag tex> + NVPTXRegClass intype, dag tex, list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(tex, (ins intype:$x)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", - []>; + pattern>; multiclass TEX_UNIFIED_1D<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _R : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins Int64Regs:$t)>; + NVPTXRegClass intype, Intrinsic intr> { + def _R : TEX_UNIFIED_1D_base< + inst, outtype, intype, (ins Int64Regs:$t), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, intype:$x))]>; def _I : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins i64imm:$t)>; } defm TEX_UNIFIED_1D_F32_S32 - : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>; + : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs, + int_nvvm_tex_unified_1d_v4f32_s32>; defm TEX_UNIFIED_1D_F32_F32 - : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>; + : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tex_unified_1d_v4f32_f32>; defm TEX_UNIFIED_1D_S32_S32 - : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>; + : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs, + int_nvvm_tex_unified_1d_v4s32_s32>; defm TEX_UNIFIED_1D_S32_F32 - : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>; + : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_unified_1d_v4s32_f32>; defm TEX_UNIFIED_1D_U32_S32 - : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>; + : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs, + int_nvvm_tex_unified_1d_v4u32_s32>; defm TEX_UNIFIED_1D_U32_F32 - : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>; + : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_unified_1d_v4u32_f32>; class TEX_UNIFIED_1D_LEVEL_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag tex> + NVPTXRegClass intype, dag tex, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(tex, (ins intype:$x, intype:$lod)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}], $lod;", - []>; + pattern>; multiclass TEX_UNIFIED_1D_LEVEL<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _R : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>; + NVPTXRegClass intype, Intrinsic intr> { + def _R : TEX_UNIFIED_1D_LEVEL_base< + inst, outtype, intype, (ins Int64Regs:$t), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, intype:$x, intype:$lod))]>; def _I : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>; } defm TEX_UNIFIED_1D_F32_F32_LEVEL - : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>; + : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tex_unified_1d_level_v4f32_f32>; defm TEX_UNIFIED_1D_S32_F32_LEVEL - : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>; + : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_unified_1d_level_v4s32_f32>; defm TEX_UNIFIED_1D_U32_F32_LEVEL - : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>; + : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_unified_1d_level_v4u32_f32>; class TEX_UNIFIED_1D_GRAD_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag tex> + NVPTXRegClass intype, dag tex, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(tex, (ins intype:$x, intype:$gradx, intype:$grady)), inst # " \t\\{$r, $g, $b, $a\\}," " [$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", - []>; + pattern>; multiclass TEX_UNIFIED_1D_GRAD<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _R : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>; + NVPTXRegClass intype, Intrinsic intr> { + def _R : TEX_UNIFIED_1D_GRAD_base< + inst, outtype, intype, (ins Int64Regs:$t), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, intype:$x, intype:$gradx, intype:$grady))]>; def _I : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>; } defm TEX_UNIFIED_1D_F32_F32_GRAD - : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>; + : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tex_unified_1d_grad_v4f32_f32>; defm TEX_UNIFIED_1D_S32_F32_GRAD - : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>; + : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_unified_1d_grad_v4s32_f32>; defm TEX_UNIFIED_1D_U32_F32_GRAD - : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>; + : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_unified_1d_grad_v4u32_f32>; class TEX_UNIFIED_1D_ARRAY_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag tex> + NVPTXRegClass intype, dag tex, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(tex, (ins Int32Regs:$l, intype:$x)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}];", - []>; + pattern>; multiclass TEX_UNIFIED_1D_ARRAY<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _R : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>; + NVPTXRegClass intype, Intrinsic intr> { + def _R : TEX_UNIFIED_1D_ARRAY_base< + inst, outtype, intype, (ins Int64Regs:$t), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, i32:$l, intype:$x))]>; def _I : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>; } defm TEX_UNIFIED_1D_ARRAY_F32_S32 - : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>; + : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs, + int_nvvm_tex_unified_1d_array_v4f32_s32>; defm TEX_UNIFIED_1D_ARRAY_F32_F32 - : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>; + : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tex_unified_1d_array_v4f32_f32>; defm TEX_UNIFIED_1D_ARRAY_S32_S32 - : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>; + : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs, + int_nvvm_tex_unified_1d_array_v4s32_s32>; defm TEX_UNIFIED_1D_ARRAY_S32_F32 - : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>; + : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_unified_1d_array_v4s32_f32>; defm TEX_UNIFIED_1D_ARRAY_U32_S32 - : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>; + : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs, + int_nvvm_tex_unified_1d_array_v4u32_s32>; defm TEX_UNIFIED_1D_ARRAY_U32_F32 - : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>; + : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_unified_1d_array_v4u32_f32>; class TEX_UNIFIED_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag tex> + NVPTXRegClass intype, dag tex, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(tex, (ins Int32Regs:$l, intype:$x, intype:$lod)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}], $lod;", - []>; + pattern>; multiclass TEX_UNIFIED_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype, - (ins Int64Regs:$t)>; + NVPTXRegClass intype, Intrinsic intr> { + def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base< + inst, outtype, intype, (ins Int64Regs:$t), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, i32:$l, intype:$x, intype:$lod))]>; def _I : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>; } defm TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", - Float32Regs, Float32Regs>; + Float32Regs, Float32Regs, + int_nvvm_tex_unified_1d_array_level_v4f32_f32>; defm TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", - Int32Regs, Float32Regs>; + Int32Regs, Float32Regs, + int_nvvm_tex_unified_1d_array_level_v4s32_f32>; defm TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", - Int32Regs, Float32Regs>; + Int32Regs, Float32Regs, + int_nvvm_tex_unified_1d_array_level_v4u32_f32>; class TEX_UNIFIED_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag tex> + NVPTXRegClass intype, dag tex, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(tex, (ins Int32Regs:$l, intype:$x, intype:$gradx, intype:$grady)), inst # " \t\\{$r, $g, $b, $a\\}," " [$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", - []>; + pattern>; multiclass TEX_UNIFIED_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype, - (ins Int64Regs:$t)>; + NVPTXRegClass intype, Intrinsic intr> { + def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base< + inst, outtype, intype, (ins Int64Regs:$t), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, i32:$l, intype:$x, intype:$gradx, intype:$grady))]>; def _I : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>; } defm TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", - Float32Regs, Float32Regs>; + Float32Regs, Float32Regs, + int_nvvm_tex_unified_1d_array_grad_v4f32_f32>; defm TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", - Int32Regs, Float32Regs>; + Int32Regs, Float32Regs, + int_nvvm_tex_unified_1d_array_grad_v4s32_f32>; defm TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", - Int32Regs, Float32Regs>; + Int32Regs, Float32Regs, + int_nvvm_tex_unified_1d_array_grad_v4u32_f32>; class TEX_UNIFIED_2D_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag tex> + NVPTXRegClass intype, dag tex, list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(tex, (ins intype:$x, intype:$y)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}];", - []>; + pattern>; multiclass TEX_UNIFIED_2D<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _R : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>; + NVPTXRegClass intype, Intrinsic intr> { + def _R : TEX_UNIFIED_2D_base< + inst, outtype, intype, (ins Int64Regs:$t), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, intype:$x, intype:$y))]>; def _I : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>; } defm TEX_UNIFIED_2D_F32_S32 - : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>; + : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs, + int_nvvm_tex_unified_2d_v4f32_s32>; defm TEX_UNIFIED_2D_F32_F32 - : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>; + : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tex_unified_2d_v4f32_f32>; defm TEX_UNIFIED_2D_S32_S32 - : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>; + : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs, + int_nvvm_tex_unified_2d_v4s32_s32>; defm TEX_UNIFIED_2D_S32_F32 - : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>; + : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_unified_2d_v4s32_f32>; defm TEX_UNIFIED_2D_U32_S32 - : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>; + : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs, + int_nvvm_tex_unified_2d_v4u32_s32>; defm TEX_UNIFIED_2D_U32_F32 - : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>; + : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_unified_2d_v4u32_f32>; class TEX_UNIFIED_2D_LEVEL_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag tex> + NVPTXRegClass intype, dag tex, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(tex, (ins intype:$x, intype:$y, intype:$lod)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}], $lod;", - []>; + pattern>; multiclass TEX_UNIFIED_2D_LEVEL<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _R : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>; + NVPTXRegClass intype, Intrinsic intr> { + def _R : TEX_UNIFIED_2D_LEVEL_base< + inst, outtype, intype, (ins Int64Regs:$t), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, intype:$x, intype:$y, intype:$lod))]>; def _I : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>; } defm TEX_UNIFIED_2D_F32_F32_LEVEL - : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>; + : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tex_unified_2d_level_v4f32_f32>; defm TEX_UNIFIED_2D_S32_F32_LEVEL - : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>; + : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_unified_2d_level_v4s32_f32>; defm TEX_UNIFIED_2D_U32_F32_LEVEL - : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>; + : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_unified_2d_level_v4u32_f32>; class TEX_UNIFIED_2D_GRAD_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag tex> + NVPTXRegClass intype, dag tex, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(tex, (ins intype:$x, intype:$y, @@ -3590,75 +4047,100 @@ class TEX_UNIFIED_2D_GRAD_base<string inst, NVPTXRegClass outtype, intype:$grady0, intype:$grady1)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}]," " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", - []>; + pattern>; multiclass TEX_UNIFIED_2D_GRAD<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _R : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>; + NVPTXRegClass intype, Intrinsic intr> { + def _R : TEX_UNIFIED_2D_GRAD_base< + inst, outtype, intype, (ins Int64Regs:$t), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, intype:$x, intype:$y, + intype:$gradx0, intype:$gradx1, + intype:$grady0, intype:$grady1))]>; def _I : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>; } defm TEX_UNIFIED_2D_F32_F32_GRAD - : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>; + : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tex_unified_2d_grad_v4f32_f32>; defm TEX_UNIFIED_2D_S32_F32_GRAD - : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>; + : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_unified_2d_grad_v4s32_f32>; defm TEX_UNIFIED_2D_U32_F32_GRAD - : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>; + : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_unified_2d_grad_v4u32_f32>; class TEX_UNIFIED_2D_ARRAY_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag tex> + NVPTXRegClass intype, dag tex, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}];", - []>; + pattern>; multiclass TEX_UNIFIED_2D_ARRAY<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _R : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>; + NVPTXRegClass intype, Intrinsic intr> { + def _R : TEX_UNIFIED_2D_ARRAY_base< + inst, outtype, intype, (ins Int64Regs:$t), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, i32:$l, intype:$x, intype:$y))]>; def _I : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>; } defm TEX_UNIFIED_2D_ARRAY_F32_S32 - : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>; + : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs, + int_nvvm_tex_unified_2d_array_v4f32_s32>; defm TEX_UNIFIED_2D_ARRAY_F32_F32 - : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>; + : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tex_unified_2d_array_v4f32_f32>; defm TEX_UNIFIED_2D_ARRAY_S32_S32 - : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>; + : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs, + int_nvvm_tex_unified_2d_array_v4s32_s32>; defm TEX_UNIFIED_2D_ARRAY_S32_F32 - : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>; + : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_unified_2d_array_v4s32_f32>; defm TEX_UNIFIED_2D_ARRAY_U32_S32 - : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>; + : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs, + int_nvvm_tex_unified_2d_array_v4u32_s32>; defm TEX_UNIFIED_2D_ARRAY_U32_F32 - : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>; + : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_unified_2d_array_v4u32_f32>; class TEX_UNIFIED_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag tex> + NVPTXRegClass intype, dag tex, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$lod)), inst # " \t\\{$r, $g, $b, $a\\}," " [$t, \\{$l, $x, $y, $y\\}], $lod;", - []>; + pattern>; multiclass TEX_UNIFIED_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype, - (ins Int64Regs:$t)>; + NVPTXRegClass intype, Intrinsic intr> { + def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base< + inst, outtype, intype, (ins Int64Regs:$t), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, i32:$l, intype:$x, intype:$y, intype:$lod))]>; def _I : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>; } defm TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", - Float32Regs, Float32Regs>; + Float32Regs, Float32Regs, + int_nvvm_tex_unified_2d_array_level_v4f32_f32>; defm TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", - Int32Regs, Float32Regs>; + Int32Regs, Float32Regs, + int_nvvm_tex_unified_2d_array_level_v4s32_f32>; defm TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", - Int32Regs, Float32Regs>; + Int32Regs, Float32Regs, + int_nvvm_tex_unified_2d_array_level_v4u32_f32>; class TEX_UNIFIED_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag tex> + NVPTXRegClass intype, dag tex, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, @@ -3666,74 +4148,98 @@ class TEX_UNIFIED_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, intype:$grady0, intype:$grady1)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}]," " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", - []>; + pattern>; multiclass TEX_UNIFIED_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype, - (ins Int64Regs:$t)>; + NVPTXRegClass intype, Intrinsic intr> { + def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base< + inst, outtype, intype, (ins Int64Regs:$t), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, i32:$l, intype:$x, intype:$y, + intype:$gradx0, intype:$gradx1, + intype:$grady0, intype:$grady1))]>; def _I : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>; } defm TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", - Float32Regs, Float32Regs>; + Float32Regs, Float32Regs, + int_nvvm_tex_unified_2d_array_grad_v4f32_f32>; defm TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", - Int32Regs, Float32Regs>; + Int32Regs, Float32Regs, + int_nvvm_tex_unified_2d_array_grad_v4s32_f32>; defm TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", - Int32Regs, Float32Regs>; + Int32Regs, Float32Regs, + int_nvvm_tex_unified_2d_array_grad_v4u32_f32>; class TEX_UNIFIED_3D_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag tex> + NVPTXRegClass intype, dag tex, list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(tex, (ins intype:$x, intype:$y, intype:$z)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];", - []>; + pattern>; multiclass TEX_UNIFIED_3D<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _R : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins Int64Regs:$t)>; + NVPTXRegClass intype, Intrinsic intr> { + def _R : TEX_UNIFIED_3D_base< + inst, outtype, intype, (ins Int64Regs:$t), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, intype:$x, intype:$y, intype:$z))]>; def _I : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins i64imm:$t)>; } defm TEX_UNIFIED_3D_F32_S32 - : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>; + : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs, + int_nvvm_tex_unified_3d_v4f32_s32>; defm TEX_UNIFIED_3D_F32_F32 - : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>; + : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tex_unified_3d_v4f32_f32>; defm TEX_UNIFIED_3D_S32_S32 - : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>; + : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs, + int_nvvm_tex_unified_3d_v4s32_s32>; defm TEX_UNIFIED_3D_S32_F32 - : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>; + : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_unified_3d_v4s32_f32>; defm TEX_UNIFIED_3D_U32_S32 - : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>; + : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs, + int_nvvm_tex_unified_3d_v4u32_s32>; defm TEX_UNIFIED_3D_U32_F32 - : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>; + : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_unified_3d_v4u32_f32>; class TEX_UNIFIED_3D_LEVEL_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag tex> + NVPTXRegClass intype, dag tex, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)), inst # " \t\\{$r, $g, $b, $a\\}," " [$t, \\{$x, $y, $z, $z\\}], $lod;", - []>; + pattern>; multiclass TEX_UNIFIED_3D_LEVEL<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _R : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>; + NVPTXRegClass intype, Intrinsic intr> { + def _R : TEX_UNIFIED_3D_LEVEL_base< + inst, outtype, intype, (ins Int64Regs:$t), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, intype:$x, intype:$y, intype:$z, intype:$lod))]>; def _I : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>; } defm TEX_UNIFIED_3D_F32_F32_LEVEL - : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>; + : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tex_unified_3d_level_v4f32_f32>; defm TEX_UNIFIED_3D_S32_F32_LEVEL - : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>; + : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_unified_3d_level_v4s32_f32>; defm TEX_UNIFIED_3D_U32_F32_LEVEL - : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>; + : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_unified_3d_level_v4u32_f32>; class TEX_UNIFIED_3D_GRAD_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag tex> + NVPTXRegClass intype, dag tex, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(tex, (ins intype:$x, intype:$y, intype:$z, @@ -3743,117 +4249,151 @@ class TEX_UNIFIED_3D_GRAD_base<string inst, NVPTXRegClass outtype, inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}]," " \\{$gradx0, $gradx1, $gradx2, $gradx2\\}," " \\{$grady0, $grady1, $grady2, $grady2\\};", - []>; + pattern>; multiclass TEX_UNIFIED_3D_GRAD<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _R : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>; + NVPTXRegClass intype, Intrinsic intr> { + def _R : TEX_UNIFIED_3D_GRAD_base< + inst, outtype, intype, (ins Int64Regs:$t), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, intype:$x, intype:$y, intype:$z, + intype:$gradx0, intype:$gradx1, intype:$gradx2, + intype:$grady0, intype:$grady1, intype:$grady2))]>; def _I : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>; } defm TEX_UNIFIED_3D_F32_F32_GRAD - : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>; + : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tex_unified_3d_grad_v4f32_f32>; defm TEX_UNIFIED_3D_S32_F32_GRAD - : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>; + : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_unified_3d_grad_v4s32_f32>; defm TEX_UNIFIED_3D_U32_F32_GRAD - : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>; + : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_unified_3d_grad_v4u32_f32>; class TEX_UNIFIED_CUBE_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag tex> + NVPTXRegClass intype, dag tex, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(tex, (ins intype:$x, intype:$y, intype:$z)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];", - []>; + pattern>; multiclass TEX_UNIFIED_CUBE<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _R : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins Int64Regs:$t)>; + NVPTXRegClass intype, Intrinsic intr> { + def _R : TEX_UNIFIED_CUBE_base< + inst, outtype, intype, (ins Int64Regs:$t), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, intype:$x, intype:$y, intype:$z))]>; def _I : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins i64imm:$t)>; } defm TEX_UNIFIED_CUBE_F32_F32 - : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>; + : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tex_unified_cube_v4f32_f32>; defm TEX_UNIFIED_CUBE_S32_F32 - : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>; + : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_unified_cube_v4s32_f32>; defm TEX_UNIFIED_CUBE_U32_F32 - : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>; + : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_unified_cube_v4u32_f32>; class TEX_UNIFIED_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag tex> + NVPTXRegClass intype, dag tex, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)), inst # " \t\\{$r, $g, $b, $a\\}," " [$t, \\{$x, $y, $z, $z\\}], $lod;", - []>; + pattern>; multiclass TEX_UNIFIED_CUBE_LEVEL<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _R : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype, - (ins Int64Regs:$t)>; + NVPTXRegClass intype, Intrinsic intr> { + def _R : TEX_UNIFIED_CUBE_LEVEL_base< + inst, outtype, intype, (ins Int64Regs:$t), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, intype:$x, intype:$y, intype:$z, intype:$lod))]>; def _I : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>; } defm TEX_UNIFIED_CUBE_F32_F32_LEVEL : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", - Float32Regs, Float32Regs>; + Float32Regs, Float32Regs, + int_nvvm_tex_unified_cube_level_v4f32_f32>; defm TEX_UNIFIED_CUBE_S32_F32_LEVEL : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", - Int32Regs, Float32Regs>; + Int32Regs, Float32Regs, + int_nvvm_tex_unified_cube_level_v4s32_f32>; defm TEX_UNIFIED_CUBE_U32_F32_LEVEL : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", - Int32Regs, Float32Regs>; + Int32Regs, Float32Regs, + int_nvvm_tex_unified_cube_level_v4u32_f32>; class TEX_UNIFIED_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag tex> + NVPTXRegClass intype, dag tex, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}];", - []>; + pattern>; multiclass TEX_UNIFIED_CUBE_ARRAY<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _R : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype, - (ins Int64Regs:$t)>; + NVPTXRegClass intype, Intrinsic intr> { + def _R : TEX_UNIFIED_CUBE_ARRAY_base< + inst, outtype, intype, (ins Int64Regs:$t), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, i32:$l, intype:$x, intype:$y, intype:$z))]>; def _I : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>; } defm TEX_UNIFIED_CUBE_ARRAY_F32_F32 - : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>; + : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tex_unified_cube_array_v4f32_f32>; defm TEX_UNIFIED_CUBE_ARRAY_S32_F32 - : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>; + : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_unified_cube_array_v4s32_f32>; defm TEX_UNIFIED_CUBE_ARRAY_U32_F32 - : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>; + : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_unified_cube_array_v4u32_f32>; class TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag tex> + NVPTXRegClass intype, dag tex, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z, intype:$lod)), inst # " \t\\{$r, $g, $b, $a\\}," " [$t, \\{$l, $x, $y, $z\\}], $lod;", - []>; + pattern>; multiclass TEX_UNIFIED_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, - (ins Int64Regs:$t)>; + NVPTXRegClass intype, Intrinsic intr> { + def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base< + inst, outtype, intype, (ins Int64Regs:$t), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, i32:$l, intype:$x, intype:$y, intype:$z, intype:$lod))]>; def _I : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>; } defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32", - Float32Regs, Float32Regs>; + Float32Regs, Float32Regs, + int_nvvm_tex_unified_cube_array_level_v4f32_f32>; defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32", - Int32Regs, Float32Regs>; + Int32Regs, Float32Regs, + int_nvvm_tex_unified_cube_array_level_v4s32_f32>; defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32", - Int32Regs, Float32Regs>; + Int32Regs, Float32Regs, + int_nvvm_tex_unified_cube_array_level_v4u32_f32>; class TEX_UNIFIED_CUBE_GRAD_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag tex> + NVPTXRegClass intype, dag tex, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(tex, (ins intype:$x, intype:$y, intype:$z, @@ -3863,23 +4403,32 @@ class TEX_UNIFIED_CUBE_GRAD_base<string inst, NVPTXRegClass outtype, inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}]," " \\{$gradx0, $gradx1, $gradx2, $gradx2\\}," " \\{$grady0, $grady1, $grady2, $grady2\\};", - []>; + pattern>; multiclass TEX_UNIFIED_CUBE_GRAD<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _R : TEX_UNIFIED_CUBE_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>; + NVPTXRegClass intype, Intrinsic intr> { + def _R : TEX_UNIFIED_CUBE_GRAD_base< + inst, outtype, intype, (ins Int64Regs:$t), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, intype:$x, intype:$y, intype:$z, + intype:$gradx0, intype:$gradx1, intype:$gradx2, + intype:$grady0, intype:$grady1, intype:$grady2))]>; def _I : TEX_UNIFIED_CUBE_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>; } defm TEX_UNIFIED_CUBE_F32_F32_GRAD - : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.f32.f32", Float32Regs, Float32Regs>; + : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tex_unified_cube_grad_v4f32_f32>; defm TEX_UNIFIED_CUBE_S32_F32_GRAD - : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.s32.f32", Int32Regs, Float32Regs>; + : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_unified_cube_grad_v4s32_f32>; defm TEX_UNIFIED_CUBE_U32_F32_GRAD - : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.u32.f32", Int32Regs, Float32Regs>; + : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tex_unified_cube_grad_v4u32_f32>; class TEX_UNIFIED_CUBE_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag tex> + NVPTXRegClass intype, dag tex, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z, @@ -3889,64 +4438,88 @@ class TEX_UNIFIED_CUBE_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}]," " \\{$gradx0, $gradx1, $gradx2, $gradx2\\}," " \\{$grady0, $grady1, $grady2, $grady2\\};", - []>; + pattern>; multiclass TEX_UNIFIED_CUBE_ARRAY_GRAD<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _R : TEX_UNIFIED_CUBE_ARRAY_GRAD_base<inst, outtype, intype, - (ins Int64Regs:$t)>; + NVPTXRegClass intype, Intrinsic intr> { + def _R : TEX_UNIFIED_CUBE_ARRAY_GRAD_base< + inst, outtype, intype, (ins Int64Regs:$t), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$t, i32:$l, intype:$x, intype:$y, intype:$z, + intype:$gradx0, intype:$gradx1, + intype:$gradx2, intype:$grady0, + intype:$grady1, intype:$grady2))]>; def _I : TEX_UNIFIED_CUBE_ARRAY_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>; } defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_GRAD : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.f32.f32", - Float32Regs, Float32Regs>; + Float32Regs, Float32Regs, + int_nvvm_tex_unified_cube_array_grad_v4f32_f32>; defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_GRAD : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.s32.f32", - Int32Regs, Float32Regs>; + Int32Regs, Float32Regs, + int_nvvm_tex_unified_cube_array_grad_v4s32_f32>; defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_GRAD : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.u32.f32", - Int32Regs, Float32Regs>; + Int32Regs, Float32Regs, + int_nvvm_tex_unified_cube_array_grad_v4u32_f32>; class TLD4_UNIFIED_2D_base<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype, dag tex> + NVPTXRegClass intype, dag tex, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$v0, outtype:$v1, outtype:$v2, outtype:$v3), !con(tex, (ins intype:$x, intype:$y)), inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, \\{$x, $y\\}];", - []>; + pattern>; multiclass TLD4_UNIFIED_2D<string inst, NVPTXRegClass outtype, - NVPTXRegClass intype> { - def _R : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>; + NVPTXRegClass intype, Intrinsic intr> { + def _R : TLD4_UNIFIED_2D_base< + inst, outtype, intype, (ins Int64Regs:$t), + [(set outtype:$v0, outtype:$v1, outtype:$v2, outtype:$v3, + (intr i64:$t, intype:$x, intype:$y))]>; def _I : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>; } defm TLD4_UNIFIED_R_2D_F32_F32 - : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>; + : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tld4_unified_r_2d_v4f32_f32>; defm TLD4_UNIFIED_G_2D_F32_F32 - : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>; + : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tld4_unified_g_2d_v4f32_f32>; defm TLD4_UNIFIED_B_2D_F32_F32 - : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>; + : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tld4_unified_b_2d_v4f32_f32>; defm TLD4_UNIFIED_A_2D_F32_F32 - : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>; + : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs, + int_nvvm_tld4_unified_a_2d_v4f32_f32>; defm TLD4_UNIFIED_R_2D_S32_F32 - : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>; + : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tld4_unified_r_2d_v4s32_f32>; defm TLD4_UNIFIED_G_2D_S32_F32 - : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>; + : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tld4_unified_g_2d_v4s32_f32>; defm TLD4_UNIFIED_B_2D_S32_F32 - : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>; + : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tld4_unified_b_2d_v4s32_f32>; defm TLD4_UNIFIED_A_2D_S32_F32 - : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>; + : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs, + int_nvvm_tld4_unified_a_2d_v4s32_f32>; defm TLD4_UNIFIED_R_2D_U32_F32 - : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>; + : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tld4_unified_r_2d_v4u32_f32>; defm TLD4_UNIFIED_G_2D_U32_F32 - : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>; + : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tld4_unified_g_2d_v4u32_f32>; defm TLD4_UNIFIED_B_2D_U32_F32 - : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>; + : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tld4_unified_b_2d_v4u32_f32>; defm TLD4_UNIFIED_A_2D_U32_F32 - : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>; + : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs, + int_nvvm_tld4_unified_a_2d_v4u32_f32>; } @@ -3956,13 +4529,17 @@ defm TLD4_UNIFIED_A_2D_U32_F32 let IsSuld = true in { -class SULD_1D_base<string inst, NVPTXRegClass outtype, dag surf> +class SULD_1D_base<string inst, NVPTXRegClass outtype, dag surf, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r), !con(surf, (ins Int32Regs:$x)), inst # " \\{$r\\}, [$s, \\{$x\\}];", - []>; + pattern>; multiclass SULD_1D<string inst, NVPTXRegClass outtype> { - def _R : SULD_1D_base<inst, outtype, (ins Int64Regs:$s)>; + defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME)); + + def _R : SULD_1D_base<inst, outtype, (ins Int64Regs:$s), + [(set outtype:$r, (intr i64:$s, i32:$x))]>; def _I : SULD_1D_base<inst, outtype, (ins i64imm:$s)>; } @@ -3981,13 +4558,18 @@ defm SULD_1D_I16_ZERO : SULD_1D<"suld.b.1d.b16.zero", Int16Regs>; defm SULD_1D_I32_ZERO : SULD_1D<"suld.b.1d.b32.zero", Int32Regs>; defm SULD_1D_I64_ZERO : SULD_1D<"suld.b.1d.b64.zero", Int64Regs>; -class SULD_1D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf> +class SULD_1D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r), !con(surf, (ins Int32Regs:$l, Int32Regs:$x)), inst # " \\{$r\\}, [$s, \\{$l, $x\\}];", - []>; + pattern>; multiclass SULD_1D_ARRAY<string inst, NVPTXRegClass outtype> { - def _R : SULD_1D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>; + defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME)); + + def _R : SULD_1D_ARRAY_base<inst, outtype, (ins Int64Regs:$s), + [(set outtype:$r, + (intr i64:$s, i32:$l, i32:$x))]>; def _I : SULD_1D_ARRAY_base<inst, outtype, (ins i64imm:$s)>; } @@ -4018,13 +4600,17 @@ defm SULD_1D_ARRAY_I32_ZERO defm SULD_1D_ARRAY_I64_ZERO : SULD_1D_ARRAY<"suld.b.a1d.b64.zero", Int64Regs>; -class SULD_2D_base<string inst, NVPTXRegClass outtype, dag surf> +class SULD_2D_base<string inst, NVPTXRegClass outtype, dag surf, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r), !con(surf, (ins Int32Regs:$x, Int32Regs:$y)), inst # " \\{$r\\}, [$s, \\{$x, $y\\}];", - []>; + pattern>; multiclass SULD_2D<string inst, NVPTXRegClass outtype> { - def _R : SULD_2D_base<inst, outtype, (ins Int64Regs:$s)>; + defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME)); + + def _R : SULD_2D_base<inst, outtype, (ins Int64Regs:$s), + [(set outtype:$r, (intr i64:$s, i32:$x, i32:$y))]>; def _I : SULD_2D_base<inst, outtype, (ins i64imm:$s)>; } @@ -4043,13 +4629,18 @@ defm SULD_2D_I16_ZERO : SULD_2D<"suld.b.2d.b16.zero", Int16Regs>; defm SULD_2D_I32_ZERO : SULD_2D<"suld.b.2d.b32.zero", Int32Regs>; defm SULD_2D_I64_ZERO : SULD_2D<"suld.b.2d.b64.zero", Int64Regs>; -class SULD_2D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf> +class SULD_2D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r), !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)), inst # " \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", - []>; + pattern>; multiclass SULD_2D_ARRAY<string inst, NVPTXRegClass outtype> { - def _R : SULD_2D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>; + defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME)); + + def _R : SULD_2D_ARRAY_base<inst, outtype, (ins Int64Regs:$s), + [(set outtype:$r, + (intr i64:$s, i32:$l, i32:$x, i32:$y))]>; def _I : SULD_2D_ARRAY_base<inst, outtype, (ins i64imm:$s)>; } @@ -4068,13 +4659,18 @@ defm SULD_2D_ARRAY_I16_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b16.zero", Int16Regs>; defm SULD_2D_ARRAY_I32_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b32.zero", Int32Regs>; defm SULD_2D_ARRAY_I64_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b64.zero", Int64Regs>; -class SULD_3D_base<string inst, NVPTXRegClass outtype, dag surf> +class SULD_3D_base<string inst, NVPTXRegClass outtype, dag surf, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r), !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)), inst # " \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; + pattern>; multiclass SULD_3D<string inst, NVPTXRegClass outtype> { - def _R : SULD_3D_base<inst, outtype, (ins Int64Regs:$s)>; + defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME)); + + def _R : SULD_3D_base<inst, outtype, (ins Int64Regs:$s), + [(set outtype:$r, + (intr i64:$s, i32:$x, i32:$y, i32:$z))]>; def _I : SULD_3D_base<inst, outtype, (ins i64imm:$s)>; } @@ -4096,13 +4692,18 @@ defm SULD_3D_I64_ZERO : SULD_3D<"suld.b.3d.b64.zero", Int64Regs>; let IsSuld = 2 in { -class SULD_1D_V2_base<string inst, NVPTXRegClass outtype, dag surf> +class SULD_1D_V2_base<string inst, NVPTXRegClass outtype, dag surf, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g), !con(surf, (ins Int32Regs:$x)), inst # " \\{$r, $g\\}, [$s, \\{$x\\}];", - []>; + pattern>; multiclass SULD_1D_V2<string inst, NVPTXRegClass outtype> { - def _R : SULD_1D_V2_base<inst, outtype, (ins Int64Regs:$s)>; + defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME)); + + def _R : SULD_1D_V2_base<inst, outtype, (ins Int64Regs:$s), + [(set outtype:$r, outtype:$g, + (intr i64:$s, i32:$x))]>; def _I : SULD_1D_V2_base<inst, outtype, (ins i64imm:$s)>; } @@ -4121,13 +4722,18 @@ defm SULD_1D_V2I16_ZERO : SULD_1D_V2<"suld.b.1d.v2.b16.zero", Int16Regs>; defm SULD_1D_V2I32_ZERO : SULD_1D_V2<"suld.b.1d.v2.b32.zero", Int32Regs>; defm SULD_1D_V2I64_ZERO : SULD_1D_V2<"suld.b.1d.v2.b64.zero", Int64Regs>; -class SULD_1D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf> +class SULD_1D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g), !con(surf, (ins Int32Regs:$l, Int32Regs:$x)), inst # " \\{$r, $g\\}, [$s, \\{$l, $x\\}];", - []>; + pattern>; multiclass SULD_1D_ARRAY_V2<string inst, NVPTXRegClass outtype> { - def _R : SULD_1D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>; + defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME)); + + def _R : SULD_1D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s), + [(set outtype:$r, outtype:$g, + (intr i64:$s, i32:$l, i32:$x))]>; def _I : SULD_1D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>; } @@ -4158,13 +4764,18 @@ defm SULD_1D_ARRAY_V2I32_ZERO defm SULD_1D_ARRAY_V2I64_ZERO : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.zero", Int64Regs>; -class SULD_2D_V2_base<string inst, NVPTXRegClass outtype, dag surf> +class SULD_2D_V2_base<string inst, NVPTXRegClass outtype, dag surf, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g), !con(surf, (ins Int32Regs:$x, Int32Regs:$y)), inst # " \\{$r, $g\\}, [$s, \\{$x, $y\\}];", - []>; + pattern>; multiclass SULD_2D_V2<string inst, NVPTXRegClass outtype> { - def _R : SULD_2D_V2_base<inst, outtype, (ins Int64Regs:$s)>; + defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME)); + + def _R : SULD_2D_V2_base<inst, outtype, (ins Int64Regs:$s), + [(set outtype:$r, outtype:$g, + (intr i64:$s, i32:$x, i32:$y))]>; def _I : SULD_2D_V2_base<inst, outtype, (ins i64imm:$s)>; } @@ -4195,13 +4806,18 @@ defm SULD_2D_V2I32_ZERO defm SULD_2D_V2I64_ZERO : SULD_2D_V2<"suld.b.2d.v2.b64.zero", Int64Regs>; -class SULD_2D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf> +class SULD_2D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g), !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)), inst # " \\{$r, $g\\}, [$s, \\{$l, $x, $y, $y\\}];", - []>; + pattern>; multiclass SULD_2D_ARRAY_V2<string inst, NVPTXRegClass outtype> { - def _R : SULD_2D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>; + defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME)); + + def _R : SULD_2D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s), + [(set outtype:$r, outtype:$g, + (intr i64:$s, i32:$l, i32:$x, i32:$y))]>; def _I : SULD_2D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>; } @@ -4232,13 +4848,18 @@ defm SULD_2D_ARRAY_V2I32_ZERO defm SULD_2D_ARRAY_V2I64_ZERO : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.zero", Int64Regs>; -class SULD_3D_V2_base<string inst, NVPTXRegClass outtype, dag surf> +class SULD_3D_V2_base<string inst, NVPTXRegClass outtype, dag surf, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g), !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)), inst # " \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; + pattern>; multiclass SULD_3D_V2<string inst, NVPTXRegClass outtype> { - def _R : SULD_3D_V2_base<inst, outtype, (ins Int64Regs:$s)>; + defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME)); + + def _R : SULD_3D_V2_base<inst, outtype, (ins Int64Regs:$s), + [(set outtype:$r, outtype:$g, + (intr i64:$s, i32:$x, i32:$y, i32:$z))]>; def _I : SULD_3D_V2_base<inst, outtype, (ins i64imm:$s)>; } @@ -4261,13 +4882,18 @@ defm SULD_3D_V2I64_ZERO : SULD_3D_V2<"suld.b.3d.v2.b64.zero", Int64Regs>; let IsSuld = 3 in { -class SULD_1D_V4_base<string inst, NVPTXRegClass outtype, dag surf> +class SULD_1D_V4_base<string inst, NVPTXRegClass outtype, dag surf, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(surf, (ins Int32Regs:$x)), inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", - []>; + pattern>; multiclass SULD_1D_V4<string inst, NVPTXRegClass outtype> { - def _R : SULD_1D_V4_base<inst, outtype, (ins Int64Regs:$s)>; + defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME)); + + def _R : SULD_1D_V4_base<inst, outtype, (ins Int64Regs:$s), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$s, i32:$x))]>; def _I : SULD_1D_V4_base<inst, outtype, (ins i64imm:$s)>; } @@ -4283,13 +4909,19 @@ defm SULD_1D_V4I8_ZERO : SULD_1D_V4<"suld.b.1d.v4.b8.zero", Int16Regs>; defm SULD_1D_V4I16_ZERO : SULD_1D_V4<"suld.b.1d.v4.b16.zero", Int16Regs>; defm SULD_1D_V4I32_ZERO : SULD_1D_V4<"suld.b.1d.v4.b32.zero", Int32Regs>; -class SULD_1D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf> +class SULD_1D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(surf, (ins Int32Regs:$l, Int32Regs:$x)), inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x\\}];", - []>; + pattern>; multiclass SULD_1D_ARRAY_V4<string inst, NVPTXRegClass outtype> { - def _R : SULD_1D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>; + defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME)); + + def _R : SULD_1D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s), + [(set outtype:$r, outtype:$g, outtype:$b, + outtype:$a, + (intr i64:$s, i32:$l, i32:$x))]>; def _I : SULD_1D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>; } @@ -4314,13 +4946,18 @@ defm SULD_1D_ARRAY_V4I16_ZERO defm SULD_1D_ARRAY_V4I32_ZERO : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.zero", Int32Regs>; -class SULD_2D_V4_base<string inst, NVPTXRegClass outtype, dag surf> +class SULD_2D_V4_base<string inst, NVPTXRegClass outtype, dag surf, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(surf, (ins Int32Regs:$x, Int32Regs:$y)), inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", - []>; + pattern>; multiclass SULD_2D_V4<string inst, NVPTXRegClass outtype> { - def _R : SULD_2D_V4_base<inst, outtype, (ins Int64Regs:$s)>; + defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME)); + + def _R : SULD_2D_V4_base<inst, outtype, (ins Int64Regs:$s), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$s, i32:$x, i32:$y))]>; def _I : SULD_2D_V4_base<inst, outtype, (ins i64imm:$s)>; } @@ -4336,13 +4973,19 @@ defm SULD_2D_V4I8_ZERO : SULD_2D_V4<"suld.b.2d.v4.b8.zero", Int16Regs>; defm SULD_2D_V4I16_ZERO : SULD_2D_V4<"suld.b.2d.v4.b16.zero", Int16Regs>; defm SULD_2D_V4I32_ZERO : SULD_2D_V4<"suld.b.2d.v4.b32.zero", Int32Regs>; -class SULD_2D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf> +class SULD_2D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)), inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x, $y, $y\\}];", - []>; + pattern>; multiclass SULD_2D_ARRAY_V4<string inst, NVPTXRegClass outtype> { - def _R : SULD_2D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>; + defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME)); + + def _R : SULD_2D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s), + [(set outtype:$r, outtype:$g, outtype:$b, + outtype:$a, + (intr i64:$s, i32:$l, i32:$x, i32:$y))]>; def _I : SULD_2D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>; } @@ -4367,13 +5010,18 @@ defm SULD_2D_ARRAY_V4I16_ZERO defm SULD_2D_ARRAY_V4I32_ZERO : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.zero", Int32Regs>; -class SULD_3D_V4_base<string inst, NVPTXRegClass outtype, dag surf> +class SULD_3D_V4_base<string inst, NVPTXRegClass outtype, dag surf, + list<dag> pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)), inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; + pattern>; multiclass SULD_3D_V4<string inst, NVPTXRegClass outtype> { - def _R : SULD_3D_V4_base<inst, outtype, (ins Int64Regs:$s)>; + defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME)); + + def _R : SULD_3D_V4_base<inst, outtype, (ins Int64Regs:$s), + [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, + (intr i64:$s, i32:$x, i32:$y, i32:$z))]>; def _I : SULD_3D_V4_base<inst, outtype, (ins i64imm:$s)>; } @@ -4462,22 +5110,22 @@ def TXQ_NUM_MIPMAP_LEVELS_I []>; } -def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a), - (TXQ_CHANNEL_ORDER_R Int64Regs:$a)>; -def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a), - (TXQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>; -def : Pat<(int_nvvm_txq_width Int64Regs:$a), - (TXQ_WIDTH_R Int64Regs:$a)>; -def : Pat<(int_nvvm_txq_height Int64Regs:$a), - (TXQ_HEIGHT_R Int64Regs:$a)>; -def : Pat<(int_nvvm_txq_depth Int64Regs:$a), - (TXQ_DEPTH_R Int64Regs:$a)>; -def : Pat<(int_nvvm_txq_array_size Int64Regs:$a), - (TXQ_ARRAY_SIZE_R Int64Regs:$a)>; -def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a), - (TXQ_NUM_SAMPLES_R Int64Regs:$a)>; -def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a), - (TXQ_NUM_MIPMAP_LEVELS_R Int64Regs:$a)>; +def : Pat<(int_nvvm_txq_channel_order i64:$a), + (TXQ_CHANNEL_ORDER_R $a)>; +def : Pat<(int_nvvm_txq_channel_data_type i64:$a), + (TXQ_CHANNEL_DATA_TYPE_R $a)>; +def : Pat<(int_nvvm_txq_width i64:$a), + (TXQ_WIDTH_R $a)>; +def : Pat<(int_nvvm_txq_height i64:$a), + (TXQ_HEIGHT_R $a)>; +def : Pat<(int_nvvm_txq_depth i64:$a), + (TXQ_DEPTH_R $a)>; +def : Pat<(int_nvvm_txq_array_size i64:$a), + (TXQ_ARRAY_SIZE_R $a)>; +def : Pat<(int_nvvm_txq_num_samples i64:$a), + (TXQ_NUM_SAMPLES_R $a)>; +def : Pat<(int_nvvm_txq_num_mipmap_levels i64:$a), + (TXQ_NUM_MIPMAP_LEVELS_R $a)>; //----------------------------------- @@ -4535,18 +5183,18 @@ def SUQ_ARRAY_SIZE_I []>; } -def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a), - (SUQ_CHANNEL_ORDER_R Int64Regs:$a)>; -def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a), - (SUQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>; -def : Pat<(int_nvvm_suq_width Int64Regs:$a), - (SUQ_WIDTH_R Int64Regs:$a)>; -def : Pat<(int_nvvm_suq_height Int64Regs:$a), - (SUQ_HEIGHT_R Int64Regs:$a)>; -def : Pat<(int_nvvm_suq_depth Int64Regs:$a), - (SUQ_DEPTH_R Int64Regs:$a)>; -def : Pat<(int_nvvm_suq_array_size Int64Regs:$a), - (SUQ_ARRAY_SIZE_R Int64Regs:$a)>; +def : Pat<(int_nvvm_suq_channel_order i64:$a), + (SUQ_CHANNEL_ORDER_R $a)>; +def : Pat<(int_nvvm_suq_channel_data_type i64:$a), + (SUQ_CHANNEL_DATA_TYPE_R $a)>; +def : Pat<(int_nvvm_suq_width i64:$a), + (SUQ_WIDTH_R $a)>; +def : Pat<(int_nvvm_suq_height i64:$a), + (SUQ_HEIGHT_R $a)>; +def : Pat<(int_nvvm_suq_depth i64:$a), + (SUQ_DEPTH_R $a)>; +def : Pat<(int_nvvm_suq_array_size i64:$a), + (SUQ_ARRAY_SIZE_R $a)>; //===- Handle Query -------------------------------------------------------===// @@ -4555,15 +5203,15 @@ def : Pat<(int_nvvm_suq_array_size Int64Regs:$a), def ISTYPEP_SAMPLER : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), "istypep.samplerref \t$d, $a;", - [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>; + [(set i1:$d, (int_nvvm_istypep_sampler i64:$a))]>; def ISTYPEP_SURFACE : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), "istypep.surfref \t$d, $a;", - [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>; + [(set i1:$d, (int_nvvm_istypep_surface i64:$a))]>; def ISTYPEP_TEXTURE : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), "istypep.texref \t$d, $a;", - [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>; + [(set i1:$d, (int_nvvm_istypep_texture i64:$a))]>; //===- Surface Stores -----------------------------------------------------===// @@ -6354,13 +7002,13 @@ def : Pat<(int_nvvm_sust_p_3d_v4i32_trap class PTX_READ_SREG_R64<string regname, Intrinsic intop, list<Predicate> Preds=[]> : NVPTXInst<(outs Int64Regs:$d), (ins), !strconcat("mov.u64 \t$d, %", regname, ";"), - [(set Int64Regs:$d, (intop))]>, + [(set i64:$d, (intop))]>, Requires<Preds>; class PTX_READ_SREG_R32<string regname, Intrinsic intop, list<Predicate> Preds=[]> : NVPTXInst<(outs Int32Regs:$d), (ins), !strconcat("mov.u32 \t$d, %", regname, ";"), - [(set Int32Regs:$d, (intop))]>, + [(set i32:$d, (intop))]>, Requires<Preds>; multiclass PTX_READ_SREG_R32V4<string regname, list<Predicate> Preds=[]> { @@ -6442,7 +7090,7 @@ def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>; // handle the constant. def INT_PTX_SREG_WARPSIZE : NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;", - [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>; + [(set i32:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>; // Helper class that represents a 'fragment' of an NVPTX *MMA instruction. // In addition to target-independent fields provided by WMMA_REGS, it adds @@ -6854,19 +7502,19 @@ foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in multiclass MAPA<string suffix, Intrinsic Intr> { def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, Int32Regs:$b), "mapa" # suffix # ".u32\t$d, $a, $b;", - [(set Int32Regs:$d, (Intr Int32Regs:$a, Int32Regs:$b))]>, + [(set i32:$d, (Intr i32:$a, i32:$b))]>, Requires<[hasSM<90>, hasPTX<78>]>; def _32i: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, i32imm:$b), "mapa" # suffix # ".u32\t$d, $a, $b;", - [(set Int32Regs:$d, (Intr Int32Regs:$a, imm:$b))]>, + [(set i32:$d, (Intr i32:$a, imm:$b))]>, Requires<[hasSM<90>, hasPTX<78>]>; def _64: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, Int32Regs:$b), "mapa" # suffix # ".u64\t$d, $a, $b;", - [(set Int64Regs:$d, (Intr Int64Regs:$a, Int32Regs:$b))]>, + [(set i64:$d, (Intr i64:$a, i32:$b))]>, Requires<[hasSM<90>, hasPTX<78>]>; def _64i: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, i32imm:$b), "mapa" # suffix # ".u64\t$d, $a, $b;", - [(set Int64Regs:$d, (Intr Int64Regs:$a, imm:$b))]>, + [(set i64:$d, (Intr i64:$a, imm:$b))]>, Requires<[hasSM<90>, hasPTX<78>]>; } @@ -6877,11 +7525,11 @@ defm mapa_shared_cluster : MAPA<".shared::cluster", int_nvvm_mapa_shared_cluste multiclass GETCTARANK<string suffix, Intrinsic Intr> { def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), "getctarank" # suffix # ".u32\t$d, $a;", - [(set Int32Regs:$d, (Intr Int32Regs:$a))]>, + [(set i32:$d, (Intr i32:$a))]>, Requires<[hasSM<90>, hasPTX<78>]>; def _64: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "getctarank" # suffix # ".u64\t$d, $a;", - [(set Int32Regs:$d, (Intr Int64Regs:$a))]>, + [(set i32:$d, (Intr i64:$a))]>, Requires<[hasSM<90>, hasPTX<78>]>; } @@ -6890,7 +7538,7 @@ defm getctarank_shared_cluster : GETCTARANK<".shared::cluster", int_nvvm_getcta def is_explicit_cluster: NVPTXInst<(outs Int1Regs:$d), (ins), "mov.pred\t$d, %is_explicit_cluster;", - [(set Int1Regs:$d, (int_nvvm_is_explicit_cluster))]>, + [(set i1:$d, (int_nvvm_is_explicit_cluster))]>, Requires<[hasSM<90>, hasPTX<78>]>; // setmaxnreg inc/dec intrinsics @@ -6907,4 +7555,30 @@ defm INT_SET_MAXNREG_DEC : SET_MAXNREG<"dec", int_nvvm_setmaxnreg_dec_sync_align } // isConvergent +// +// WGMMA fence instructions +// +let isConvergent = true in { +def INT_NVVM_WGMMA_FENCE_SYNC_ALIGNED : NVPTXInst<(outs), (ins), "wgmma.fence.sync.aligned;", + [(int_nvvm_wgmma_fence_sync_aligned)]>, Requires<[hasSM90a, hasPTX<80>]>; + +def INT_NVVM_WGMMA_COMMIT_GROUP_SYNC_ALIGNED : NVPTXInst<(outs), (ins), "wgmma.commit_group.sync.aligned;", + [(int_nvvm_wgmma_commit_group_sync_aligned)]>, Requires<[hasSM90a, hasPTX<80>]>; + +def INT_NVVM_WGMMA_WAIT_GROUP_SYNC_ALIGNED : NVPTXInst<(outs), (ins i64imm:$n), "wgmma.wait_group.sync.aligned \t$n;", + [(int_nvvm_wgmma_wait_group_sync_aligned timm:$n)]>, Requires<[hasSM90a, hasPTX<80>]>; +} // isConvergent = true + +def GRIDDEPCONTROL_LAUNCH_DEPENDENTS : + NVPTXInst<(outs), (ins), + "griddepcontrol.launch_dependents;", + [(int_nvvm_griddepcontrol_launch_dependents)]>, + Requires<[hasSM<90>, hasPTX<78>]>; + +def GRIDDEPCONTROL_WAIT : + NVPTXInst<(outs), (ins), + "griddepcontrol.wait;", + [(int_nvvm_griddepcontrol_wait)]>, + Requires<[hasSM<90>, hasPTX<78>]>; + def INT_EXIT : NVPTXInst<(outs), (ins), "exit;", [(int_nvvm_exit)]>; |
