aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td75
1 files changed, 64 insertions, 11 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 2057cac346d4..69facada2e96 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -1,4 +1,4 @@
-//===-- FLATInstructions.td - FLAT Instruction Defintions -----------------===//
+//===-- FLATInstructions.td - FLAT Instruction Definitions ----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -100,7 +100,7 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
!if(ps.is_flat_scratch, 0b01, 0));
// Signed offset. Highest bit ignored for flat and treated as 12-bit
- // unsigned for flat acceses.
+ // unsigned for flat accesses.
bits<13> offset;
bits<1> nv = 0; // XXX - What does this actually do?
@@ -175,7 +175,7 @@ class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
}
multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> {
- let is_flat_global = 1 in {
+ let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>,
GlobalSaddrTable<0, opName>;
def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>,
@@ -183,8 +183,27 @@ multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit Ha
}
}
+class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass,
+ bit HasTiedOutput = 0, bit HasSignedOffset = 0> : FLAT_Pseudo<
+ opName,
+ (outs regClass:$vdst),
+ !con((ins SReg_64:$saddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc),
+ !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))),
+ " $vdst, $saddr$offset$glc$slc$dlc"> {
+ let is_flat_global = 1;
+ let has_data = 0;
+ let mayLoad = 1;
+ let has_vaddr = 0;
+ let has_saddr = 1;
+ let enabled_saddr = 1;
+ let maybeAtomic = 1;
+
+ let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
+ let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
+}
+
multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
- let is_flat_global = 1 in {
+ let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
def "" : FLAT_Store_Pseudo<opName, regClass, 1>,
GlobalSaddrTable<0, opName>;
def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1>,
@@ -192,6 +211,24 @@ multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
}
}
+class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass,
+ bit HasSignedOffset = 0> : FLAT_Pseudo<
+ opName,
+ (outs),
+ !con(
+ (ins vdataClass:$vdata, SReg_64:$saddr),
+ (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
+ " $vdata, $saddr$offset$glc$slc$dlc"> {
+ let is_flat_global = 1;
+ let mayLoad = 0;
+ let mayStore = 1;
+ let has_vdst = 0;
+ let has_vaddr = 0;
+ let has_saddr = 1;
+ let enabled_saddr = 1;
+ let maybeAtomic = 1;
+}
+
class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass,
bit EnableSaddr = 0>: FLAT_Pseudo<
opName,
@@ -279,6 +316,7 @@ multiclass FLAT_Atomic_Pseudo<
AtomicNoRet <opName, 0> {
let PseudoInstr = NAME;
let FPAtomic = isFP;
+ let AddedComplexity = -1; // Prefer global atomics if available
}
def _RTN : FLAT_AtomicRet_Pseudo <opName,
@@ -290,6 +328,7 @@ multiclass FLAT_Atomic_Pseudo<
GlobalSaddrTable<0, opName#"_rtn">,
AtomicNoRet <opName, 1>{
let FPAtomic = isFP;
+ let AddedComplexity = -1; // Prefer global atomics if available
}
}
@@ -367,10 +406,12 @@ multiclass FLAT_Global_Atomic_Pseudo<
SDPatternOperator atomic_rtn = null_frag,
SDPatternOperator atomic_no_rtn = null_frag,
ValueType data_vt = vt,
- RegisterClass data_rc = vdst_rc> :
- FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, atomic_no_rtn, data_vt, data_rc>,
- FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, atomic_rtn, data_vt, data_rc>;
-
+ RegisterClass data_rc = vdst_rc> {
+ let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
+ defm "" : FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, atomic_no_rtn, data_vt, data_rc>;
+ defm "" : FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, atomic_rtn, data_vt, data_rc>;
+ }
+}
//===----------------------------------------------------------------------===//
// Flat Instructions
@@ -507,7 +548,6 @@ defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2",
} // End SubtargetPredicate = isGFX7GFX10
-let SubtargetPredicate = HasFlatGlobalInsts in {
defm GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>;
defm GLOBAL_LOAD_SBYTE : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>;
defm GLOBAL_LOAD_USHORT : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>;
@@ -523,6 +563,8 @@ defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16"
defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16_hi", VGPR_32, 1>;
defm GLOBAL_LOAD_SHORT_D16 : FLAT_Global_Load_Pseudo <"global_load_short_d16", VGPR_32, 1>;
defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Global_Load_Pseudo <"global_load_short_d16_hi", VGPR_32, 1>;
+let OtherPredicates = [HasGFX10_BEncoding] in
+def GLOBAL_LOAD_DWORD_ADDTID : FLAT_Global_Load_AddTid_Pseudo <"global_load_dword_addtid", VGPR_32>;
defm GLOBAL_STORE_BYTE : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>;
defm GLOBAL_STORE_SHORT : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>;
@@ -530,6 +572,8 @@ defm GLOBAL_STORE_DWORD : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR
defm GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>;
defm GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>;
defm GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>;
+let OtherPredicates = [HasGFX10_BEncoding] in
+def GLOBAL_STORE_DWORD_ADDTID : FLAT_Global_Store_AddTid_Pseudo <"global_store_dword_addtid", VGPR_32>;
defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>;
defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>;
@@ -615,9 +659,12 @@ defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2",
defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2",
VReg_64, i64, atomic_dec_global_64>;
+
+let SubtargetPredicate = HasGFX10_BEncoding in
+defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo_RTN <"global_atomic_csub",
+ VGPR_32, i32, atomic_csub_global_32>;
} // End is_flat_global = 1
-} // End SubtargetPredicate = HasFlatGlobalInsts
let SubtargetPredicate = HasFlatScratchInsts in {
@@ -912,6 +959,7 @@ def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_RTN, atomic_load_or_global_32, i32>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_RTN, atomic_swap_global_32, i32>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global_32, i32, v2i32>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_RTN, atomic_load_xor_global_32, i32>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CSUB_RTN, atomic_csub_global_32, i32>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_X2_RTN, atomic_load_add_global_64, i64>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_X2_RTN, atomic_load_sub_global_64, i64>;
@@ -1212,6 +1260,9 @@ multiclass FLAT_Real_GlblAtomics_gfx10<bits<7> op> :
FLAT_Real_RTN_gfx10<op>,
FLAT_Real_SADDR_RTN_gfx10<op>;
+multiclass FLAT_Real_GlblAtomics_RTN_gfx10<bits<7> op> :
+ FLAT_Real_RTN_gfx10<op>,
+ FLAT_Real_SADDR_RTN_gfx10<op>;
// ENC_FLAT.
defm FLAT_LOAD_UBYTE : FLAT_Real_Base_gfx10<0x008>;
@@ -1297,6 +1348,7 @@ defm GLOBAL_ATOMIC_SWAP : FLAT_Real_GlblAtomics_gfx10<0x030>;
defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x031>;
defm GLOBAL_ATOMIC_ADD : FLAT_Real_GlblAtomics_gfx10<0x032>;
defm GLOBAL_ATOMIC_SUB : FLAT_Real_GlblAtomics_gfx10<0x033>;
+defm GLOBAL_ATOMIC_CSUB : FLAT_Real_GlblAtomics_RTN_gfx10<0x034>;
defm GLOBAL_ATOMIC_SMIN : FLAT_Real_GlblAtomics_gfx10<0x035>;
defm GLOBAL_ATOMIC_UMIN : FLAT_Real_GlblAtomics_gfx10<0x036>;
defm GLOBAL_ATOMIC_SMAX : FLAT_Real_GlblAtomics_gfx10<0x037>;
@@ -1325,7 +1377,8 @@ defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05d>;
defm GLOBAL_ATOMIC_FCMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x05e>;
defm GLOBAL_ATOMIC_FMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x05f>;
defm GLOBAL_ATOMIC_FMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x060>;
-
+defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Real_Base_gfx10<0x016>;
+defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Real_Base_gfx10<0x017>;
// ENC_FLAT_SCRATCH.
defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_gfx10<0x008>;