diff options
Diffstat (limited to 'lib/Target/AMDGPU/CIInstructions.td')
-rw-r--r-- | lib/Target/AMDGPU/CIInstructions.td | 148 |
1 files changed, 82 insertions, 66 deletions
diff --git a/lib/Target/AMDGPU/CIInstructions.td b/lib/Target/AMDGPU/CIInstructions.td index c543814cae0d..f9a9f79126bd 100644 --- a/lib/Target/AMDGPU/CIInstructions.td +++ b/lib/Target/AMDGPU/CIInstructions.td @@ -25,14 +25,6 @@ // BUFFER_LOAD_DWORDX3 // BUFFER_STORE_DWORDX3 - -def isCIVI : Predicate < - "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS || " - "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS" ->, AssemblerPredicate<"FeatureCIInsts">; - -def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">; - //===----------------------------------------------------------------------===// // VOP1 Instructions //===----------------------------------------------------------------------===// @@ -108,9 +100,11 @@ defm S_DCACHE_INV_VOL : SMRD_Inval <smrd<0x1d, 0x22>, // MUBUF Instructions //===----------------------------------------------------------------------===// +let DisableSIDecoder = 1 in { defm BUFFER_WBINVL1_VOL : MUBUF_Invalidate <mubuf<0x70, 0x3f>, "buffer_wbinvl1_vol", int_amdgcn_buffer_wbinvl1_vol >; +} //===----------------------------------------------------------------------===// // Flat Instructions @@ -159,129 +153,114 @@ defm FLAT_STORE_DWORDX3 : FLAT_Store_Helper < flat<0x1f, 0x1e>, "flat_store_dwordx3", VReg_96 >; defm FLAT_ATOMIC_SWAP : FLAT_ATOMIC < - flat<0x30, 0x40>, "flat_atomic_swap", VGPR_32 + flat<0x30, 0x40>, "flat_atomic_swap", VGPR_32, i32, atomic_swap_flat >; defm FLAT_ATOMIC_CMPSWAP : FLAT_ATOMIC < - flat<0x31, 0x41>, "flat_atomic_cmpswap", VGPR_32, VReg_64 + flat<0x31, 0x41>, "flat_atomic_cmpswap", VGPR_32, i32, + atomic_cmp_swap_flat, v2i32, VReg_64 >; defm FLAT_ATOMIC_ADD : FLAT_ATOMIC < - flat<0x32, 0x42>, "flat_atomic_add", VGPR_32 + flat<0x32, 0x42>, "flat_atomic_add", VGPR_32, i32, atomic_add_flat >; defm FLAT_ATOMIC_SUB : FLAT_ATOMIC < - flat<0x33, 0x43>, "flat_atomic_sub", VGPR_32 + flat<0x33, 0x43>, "flat_atomic_sub", VGPR_32, i32, atomic_sub_flat >; defm FLAT_ATOMIC_SMIN : FLAT_ATOMIC < - flat<0x35, 0x44>, "flat_atomic_smin", VGPR_32 + flat<0x35, 0x44>, "flat_atomic_smin", VGPR_32, i32, atomic_min_flat >; defm FLAT_ATOMIC_UMIN : FLAT_ATOMIC < - flat<0x36, 0x45>, "flat_atomic_umin", VGPR_32 + flat<0x36, 0x45>, "flat_atomic_umin", VGPR_32, i32, atomic_umin_flat >; defm FLAT_ATOMIC_SMAX : FLAT_ATOMIC < - flat<0x37, 0x46>, "flat_atomic_smax", VGPR_32 + flat<0x37, 0x46>, "flat_atomic_smax", VGPR_32, i32, atomic_max_flat >; defm FLAT_ATOMIC_UMAX : FLAT_ATOMIC < - flat<0x38, 0x47>, "flat_atomic_umax", VGPR_32 + flat<0x38, 0x47>, "flat_atomic_umax", VGPR_32, i32, atomic_umax_flat >; defm FLAT_ATOMIC_AND : FLAT_ATOMIC < - flat<0x39, 0x48>, "flat_atomic_and", VGPR_32 + flat<0x39, 0x48>, "flat_atomic_and", VGPR_32, i32, atomic_and_flat >; defm FLAT_ATOMIC_OR : FLAT_ATOMIC < - flat<0x3a, 0x49>, "flat_atomic_or", VGPR_32 + flat<0x3a, 0x49>, "flat_atomic_or", VGPR_32, i32, atomic_or_flat >; defm FLAT_ATOMIC_XOR : FLAT_ATOMIC < - flat<0x3b, 0x4a>, "flat_atomic_xor", VGPR_32 + flat<0x3b, 0x4a>, "flat_atomic_xor", VGPR_32, i32, atomic_xor_flat >; defm FLAT_ATOMIC_INC : FLAT_ATOMIC < - flat<0x3c, 0x4b>, "flat_atomic_inc", VGPR_32 + flat<0x3c, 0x4b>, "flat_atomic_inc", VGPR_32, i32, atomic_inc_flat >; defm FLAT_ATOMIC_DEC : FLAT_ATOMIC < - flat<0x3d, 0x4c>, "flat_atomic_dec", VGPR_32 + flat<0x3d, 0x4c>, "flat_atomic_dec", VGPR_32, i32, atomic_dec_flat >; defm FLAT_ATOMIC_SWAP_X2 : FLAT_ATOMIC < - flat<0x50, 0x60>, "flat_atomic_swap_x2", VReg_64 + flat<0x50, 0x60>, "flat_atomic_swap_x2", VReg_64, i64, atomic_swap_flat >; defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_ATOMIC < - flat<0x51, 0x61>, "flat_atomic_cmpswap_x2", VReg_64, VReg_128 + flat<0x51, 0x61>, "flat_atomic_cmpswap_x2", VReg_64, i64, + atomic_cmp_swap_flat, v2i64, VReg_128 >; defm FLAT_ATOMIC_ADD_X2 : FLAT_ATOMIC < - flat<0x52, 0x62>, "flat_atomic_add_x2", VReg_64 + flat<0x52, 0x62>, "flat_atomic_add_x2", VReg_64, i64, atomic_add_flat >; defm FLAT_ATOMIC_SUB_X2 : FLAT_ATOMIC < - flat<0x53, 0x63>, "flat_atomic_sub_x2", VReg_64 + flat<0x53, 0x63>, "flat_atomic_sub_x2", VReg_64, i64, atomic_sub_flat >; defm FLAT_ATOMIC_SMIN_X2 : FLAT_ATOMIC < - flat<0x55, 0x64>, "flat_atomic_smin_x2", VReg_64 + flat<0x55, 0x64>, "flat_atomic_smin_x2", VReg_64, i64, atomic_min_flat >; defm FLAT_ATOMIC_UMIN_X2 : FLAT_ATOMIC < - flat<0x56, 0x65>, "flat_atomic_umin_x2", VReg_64 + flat<0x56, 0x65>, "flat_atomic_umin_x2", VReg_64, i64, atomic_umin_flat >; defm FLAT_ATOMIC_SMAX_X2 : FLAT_ATOMIC < - flat<0x57, 0x66>, "flat_atomic_smax_x2", VReg_64 + flat<0x57, 0x66>, "flat_atomic_smax_x2", VReg_64, i64, atomic_max_flat >; defm FLAT_ATOMIC_UMAX_X2 : FLAT_ATOMIC < - flat<0x58, 0x67>, "flat_atomic_umax_x2", VReg_64 + flat<0x58, 0x67>, "flat_atomic_umax_x2", VReg_64, i64, atomic_umax_flat >; defm FLAT_ATOMIC_AND_X2 : FLAT_ATOMIC < - flat<0x59, 0x68>, "flat_atomic_and_x2", VReg_64 + flat<0x59, 0x68>, "flat_atomic_and_x2", VReg_64, i64, atomic_and_flat >; defm FLAT_ATOMIC_OR_X2 : FLAT_ATOMIC < - flat<0x5a, 0x69>, "flat_atomic_or_x2", VReg_64 + flat<0x5a, 0x69>, "flat_atomic_or_x2", VReg_64, i64, atomic_or_flat >; defm FLAT_ATOMIC_XOR_X2 : FLAT_ATOMIC < - flat<0x5b, 0x6a>, "flat_atomic_xor_x2", VReg_64 + flat<0x5b, 0x6a>, "flat_atomic_xor_x2", VReg_64, i64, atomic_xor_flat >; defm FLAT_ATOMIC_INC_X2 : FLAT_ATOMIC < - flat<0x5c, 0x6b>, "flat_atomic_inc_x2", VReg_64 + flat<0x5c, 0x6b>, "flat_atomic_inc_x2", VReg_64, i64, atomic_inc_flat >; defm FLAT_ATOMIC_DEC_X2 : FLAT_ATOMIC < - flat<0x5d, 0x6c>, "flat_atomic_dec_x2", VReg_64 + flat<0x5d, 0x6c>, "flat_atomic_dec_x2", VReg_64, i64, atomic_dec_flat >; } // End SubtargetPredicate = isCIVI // CI Only flat instructions -let SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst in { +let SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst, DisableVIDecoder = 1 in { defm FLAT_ATOMIC_FCMPSWAP : FLAT_ATOMIC < - flat<0x3e>, "flat_atomic_fcmpswap", VGPR_32, VReg_64 + flat<0x3e>, "flat_atomic_fcmpswap", VGPR_32, f32, + null_frag, v2f32, VReg_64 >; defm FLAT_ATOMIC_FMIN : FLAT_ATOMIC < - flat<0x3f>, "flat_atomic_fmin", VGPR_32 + flat<0x3f>, "flat_atomic_fmin", VGPR_32, f32 >; defm FLAT_ATOMIC_FMAX : FLAT_ATOMIC < - flat<0x40>, "flat_atomic_fmax", VGPR_32 + flat<0x40>, "flat_atomic_fmax", VGPR_32, f32 >; defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_ATOMIC < - flat<0x5e>, "flat_atomic_fcmpswap_x2", VReg_64, VReg_128 + flat<0x5e>, "flat_atomic_fcmpswap_x2", VReg_64, f64, + null_frag, v2f64, VReg_128 >; defm FLAT_ATOMIC_FMIN_X2 : FLAT_ATOMIC < - flat<0x5f>, "flat_atomic_fmin_x2", VReg_64 + flat<0x5f>, "flat_atomic_fmin_x2", VReg_64, f64 >; defm FLAT_ATOMIC_FMAX_X2 : FLAT_ATOMIC < - flat<0x60>, "flat_atomic_fmax_x2", VReg_64 + flat<0x60>, "flat_atomic_fmax_x2", VReg_64, f64 >; -} // End let SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst - -let Predicates = [isCI] in { - -// Convert (x - floor(x)) to fract(x) -def : Pat < - (f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)), - (f32 (ffloor (f32 (VOP3Mods f32:$x, i32:$mods)))))), - (V_FRACT_F32_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE) ->; - -// Convert (x + (-floor(x))) to fract(x) -def : Pat < - (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)), - (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))), - (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE) ->; - -} // End Predicates = [isCI] - +} // End SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst, DisableVIDecoder = 1 //===----------------------------------------------------------------------===// // Flat Patterns @@ -289,12 +268,17 @@ def : Pat < let Predicates = [isCIVI] in { -// Patterns for global loads with no offset +// Patterns for global loads with no offset. class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat < (vt (node i64:$addr)), (inst $addr, 0, 0, 0) >; +class FlatLoadAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat < + (vt (node i64:$addr)), + (inst $addr, 1, 0, 0) +>; + def : FlatLoadPat <FLAT_LOAD_UBYTE, flat_az_extloadi8, i32>; def : FlatLoadPat <FLAT_LOAD_SBYTE, flat_sextloadi8, i32>; def : FlatLoadPat <FLAT_LOAD_USHORT, flat_az_extloadi16, i32>; @@ -303,9 +287,20 @@ def : FlatLoadPat <FLAT_LOAD_DWORD, flat_load, i32>; def : FlatLoadPat <FLAT_LOAD_DWORDX2, flat_load, v2i32>; def : FlatLoadPat <FLAT_LOAD_DWORDX4, flat_load, v4i32>; +def : FlatLoadAtomicPat <FLAT_LOAD_DWORD, atomic_flat_load, i32>; +def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_flat_load, i64>; + + class FlatStorePat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat < (node vt:$data, i64:$addr), - (inst $data, $addr, 0, 0, 0) + (inst $addr, $data, 0, 0, 0) +>; + +class FlatStoreAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat < + // atomic store follows atomic binop convention so the address comes + // first. + (node i64:$addr, vt:$data), + (inst $addr, $data, 1, 0, 0) >; def : FlatStorePat <FLAT_STORE_BYTE, flat_truncstorei8, i32>; @@ -314,20 +309,41 @@ def : FlatStorePat <FLAT_STORE_DWORD, flat_store, i32>; def : FlatStorePat <FLAT_STORE_DWORDX2, flat_store, v2i32>; def : FlatStorePat <FLAT_STORE_DWORDX4, flat_store, v4i32>; -class FlatAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat < - (vt (node i64:$addr, vt:$data)), +def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_flat_store, i32>; +def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_flat_store, i64>; + +class FlatAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt, + ValueType data_vt = vt> : Pat < + (vt (node i64:$addr, data_vt:$data)), (inst $addr, $data, 0, 0) >; def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_add_global, i32>; -def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_and_global, i32>; def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_sub_global, i32>; +def : FlatAtomicPat <FLAT_ATOMIC_INC_RTN, atomic_inc_global, i32>; +def : FlatAtomicPat <FLAT_ATOMIC_DEC_RTN, atomic_dec_global, i32>; +def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_and_global, i32>; def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_max_global, i32>; def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_umax_global, i32>; def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_min_global, i32>; def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_umin_global, i32>; def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>; def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>; +def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, atomic_cmp_swap_global, i32, v2i32>; def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>; +def : FlatAtomicPat <FLAT_ATOMIC_ADD_X2_RTN, atomic_add_global, i64>; +def : FlatAtomicPat <FLAT_ATOMIC_SUB_X2_RTN, atomic_sub_global, i64>; +def : FlatAtomicPat <FLAT_ATOMIC_INC_X2_RTN, atomic_inc_global, i64>; +def : FlatAtomicPat <FLAT_ATOMIC_DEC_X2_RTN, atomic_dec_global, i64>; +def : FlatAtomicPat <FLAT_ATOMIC_AND_X2_RTN, atomic_and_global, i64>; +def : FlatAtomicPat <FLAT_ATOMIC_SMAX_X2_RTN, atomic_max_global, i64>; +def : FlatAtomicPat <FLAT_ATOMIC_UMAX_X2_RTN, atomic_umax_global, i64>; +def : FlatAtomicPat <FLAT_ATOMIC_SMIN_X2_RTN, atomic_min_global, i64>; +def : FlatAtomicPat <FLAT_ATOMIC_UMIN_X2_RTN, atomic_umin_global, i64>; +def : FlatAtomicPat <FLAT_ATOMIC_OR_X2_RTN, atomic_or_global, i64>; +def : FlatAtomicPat <FLAT_ATOMIC_SWAP_X2_RTN, atomic_swap_global, i64>; +def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, atomic_cmp_swap_global, i64, v2i64>; +def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_xor_global, i64>; + } // End Predicates = [isCIVI] |