aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/DSInstructions.td
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/DSInstructions.td')
-rw-r--r--llvm/lib/Target/AMDGPU/DSInstructions.td197
1 files changed, 113 insertions, 84 deletions
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index 328c81005df4..ad9528ece7d0 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -52,32 +52,41 @@ class DS_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> patt
let Uses = !if(has_m0_read, [M0, EXEC], [EXEC]);
}
-class DS_Real <DS_Pseudo ds> :
- InstSI <ds.OutOperandList, ds.InOperandList, ds.Mnemonic # ds.AsmOperands, []>,
+class DS_Real <DS_Pseudo ps> :
+ InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
Enc64 {
let isPseudo = 0;
let isCodeGenOnly = 0;
+ let LGKM_CNT = 1;
let DS = 1;
let UseNamedOperandTable = 1;
// copy relevant pseudo op flags
- let SubtargetPredicate = ds.SubtargetPredicate;
- let OtherPredicates = ds.OtherPredicates;
- let AsmMatchConverter = ds.AsmMatchConverter;
+ let SubtargetPredicate = ps.SubtargetPredicate;
+ let OtherPredicates = ps.OtherPredicates;
+ let AsmMatchConverter = ps.AsmMatchConverter;
+ let SchedRW = ps.SchedRW;
+ let mayLoad = ps.mayLoad;
+ let mayStore = ps.mayStore;
+ let IsAtomicRet = ps.IsAtomicRet;
+ let IsAtomicNoRet = ps.IsAtomicNoRet;
// encoding fields
- bits<8> vdst;
+ bits<10> vdst;
bits<1> gds;
bits<8> addr;
- bits<8> data0;
- bits<8> data1;
+ bits<10> data0;
+ bits<10> data1;
bits<8> offset0;
bits<8> offset1;
bits<16> offset;
- let offset0 = !if(ds.has_offset, offset{7-0}, ?);
- let offset1 = !if(ds.has_offset, offset{15-8}, ?);
+ let offset0 = !if(ps.has_offset, offset{7-0}, ?);
+ let offset1 = !if(ps.has_offset, offset{15-8}, ?);
+
+ bits<1> acc = !if(ps.has_vdst, vdst{9},
+ !if(!or(ps.has_data0, ps.has_gws_data0), data0{9}, 0));
}
@@ -86,7 +95,7 @@ class DS_Real <DS_Pseudo ds> :
class DS_0A1D_NORET<string opName, RegisterClass rc = VGPR_32>
: DS_Pseudo<opName,
(outs),
- (ins rc:$data0, offset:$offset, gds:$gds),
+ (ins getLdStRegisterOperand<rc>.ret:$data0, offset:$offset, gds:$gds),
" $data0$offset$gds"> {
let has_addr = 0;
@@ -97,11 +106,12 @@ class DS_0A1D_NORET<string opName, RegisterClass rc = VGPR_32>
class DS_1A1D_NORET<string opName, RegisterClass rc = VGPR_32>
: DS_Pseudo<opName,
(outs),
- (ins VGPR_32:$addr, rc:$data0, offset:$offset, gds:$gds),
+ (ins VGPR_32:$addr, getLdStRegisterOperand<rc>.ret:$data0, offset:$offset, gds:$gds),
" $addr, $data0$offset$gds"> {
let has_data1 = 0;
let has_vdst = 0;
+ let IsAtomicNoRet = 1;
}
multiclass DS_1A1D_NORET_mc<string opName, RegisterClass rc = VGPR_32> {
@@ -114,13 +124,22 @@ multiclass DS_1A1D_NORET_mc<string opName, RegisterClass rc = VGPR_32> {
}
}
-class DS_1A2D_NORET<string opName, RegisterClass rc = VGPR_32>
+multiclass DS_1A1D_NORET_mc_gfx9<string opName, RegisterClass rc = VGPR_32> {
+ let has_m0_read = 0 in {
+ def "" : DS_1A1D_NORET<opName, rc>,
+ AtomicNoRet<opName, 0>;
+ }
+}
+
+class DS_1A2D_NORET<string opName, RegisterClass rc = VGPR_32,
+ RegisterOperand data_op = getLdStRegisterOperand<rc>.ret>
: DS_Pseudo<opName,
(outs),
- (ins VGPR_32:$addr, rc:$data0, rc:$data1, offset:$offset, gds:$gds),
+ (ins VGPR_32:$addr, data_op:$data0, data_op:$data1, offset:$offset, gds:$gds),
" $addr, $data0, $data1$offset$gds"> {
let has_vdst = 0;
+ let IsAtomicNoRet = 1;
}
multiclass DS_1A2D_NORET_mc<string opName, RegisterClass rc = VGPR_32> {
@@ -133,10 +152,11 @@ multiclass DS_1A2D_NORET_mc<string opName, RegisterClass rc = VGPR_32> {
}
}
-class DS_1A2D_Off8_NORET <string opName, RegisterClass rc = VGPR_32>
+class DS_1A2D_Off8_NORET <string opName, RegisterClass rc = VGPR_32,
+ RegisterOperand data_op = getLdStRegisterOperand<rc>.ret>
: DS_Pseudo<opName,
(outs),
- (ins VGPR_32:$addr, rc:$data0, rc:$data1,
+ (ins VGPR_32:$addr, data_op:$data0, data_op:$data1,
offset0:$offset0, offset1:$offset1, gds:$gds),
" $addr, $data0, $data1$offset0$offset1$gds"> {
@@ -153,14 +173,16 @@ multiclass DS_1A2D_Off8_NORET_mc <string opName, RegisterClass rc = VGPR_32> {
}
}
-class DS_1A1D_RET <string opName, RegisterClass rc = VGPR_32>
+class DS_1A1D_RET <string opName, RegisterClass rc = VGPR_32,
+ RegisterOperand data_op = getLdStRegisterOperand<rc>.ret>
: DS_Pseudo<opName,
- (outs rc:$vdst),
- (ins VGPR_32:$addr, rc:$data0, offset:$offset, gds:$gds),
+ (outs data_op:$vdst),
+ (ins VGPR_32:$addr, data_op:$data0, offset:$offset, gds:$gds),
" $vdst, $addr, $data0$offset$gds"> {
let hasPostISelHook = 1;
let has_data1 = 0;
+ let IsAtomicRet = 1;
}
multiclass DS_1A1D_RET_mc <string opName, RegisterClass rc = VGPR_32,
@@ -175,15 +197,27 @@ multiclass DS_1A1D_RET_mc <string opName, RegisterClass rc = VGPR_32,
}
}
+multiclass DS_1A1D_RET_mc_gfx9 <string opName, RegisterClass rc = VGPR_32,
+ string NoRetOp = ""> {
+ let has_m0_read = 0 in {
+ def "" : DS_1A1D_RET<opName, rc>,
+ AtomicNoRet<!if(!eq(NoRetOp, ""), "", NoRetOp),
+ !if(!eq(NoRetOp, ""), 0, 1)>;
+ }
+}
+
class DS_1A2D_RET<string opName,
RegisterClass rc = VGPR_32,
- RegisterClass src = rc>
+ RegisterClass src = rc,
+ RegisterOperand dst_op = getLdStRegisterOperand<rc>.ret,
+ RegisterOperand src_op = getLdStRegisterOperand<src>.ret>
: DS_Pseudo<opName,
- (outs rc:$vdst),
- (ins VGPR_32:$addr, src:$data0, src:$data1, offset:$offset, gds:$gds),
+ (outs dst_op:$vdst),
+ (ins VGPR_32:$addr, src_op:$data0, src_op:$data1, offset:$offset, gds:$gds),
" $vdst, $addr, $data0, $data1$offset$gds"> {
let hasPostISelHook = 1;
+ let IsAtomicRet = 1;
}
multiclass DS_1A2D_RET_mc<string opName,
@@ -201,10 +235,12 @@ multiclass DS_1A2D_RET_mc<string opName,
class DS_1A2D_Off8_RET<string opName,
RegisterClass rc = VGPR_32,
- RegisterClass src = rc>
+ RegisterClass src = rc,
+ RegisterOperand dst_op = getLdStRegisterOperand<rc>.ret,
+ RegisterOperand src_op = getLdStRegisterOperand<src>.ret>
: DS_Pseudo<opName,
- (outs rc:$vdst),
- (ins VGPR_32:$addr, src:$data0, src:$data1, offset0:$offset0, offset1:$offset1, gds:$gds),
+ (outs dst_op:$vdst),
+ (ins VGPR_32:$addr, src_op:$data0, src_op:$data1, offset0:$offset0, offset1:$offset1, gds:$gds),
" $vdst, $addr, $data0, $data1$offset0$offset1$gds"> {
let has_offset = 0;
@@ -224,11 +260,12 @@ multiclass DS_1A2D_Off8_RET_mc<string opName,
}
-class DS_1A_RET<string opName, RegisterClass rc = VGPR_32, bit HasTiedOutput = 0, Operand ofs = offset>
+class DS_1A_RET<string opName, RegisterClass rc = VGPR_32, bit HasTiedOutput = 0, Operand ofs = offset,
+ RegisterOperand data_op = getLdStRegisterOperand<rc>.ret>
: DS_Pseudo<opName,
- (outs rc:$vdst),
+ (outs data_op:$vdst),
!if(HasTiedOutput,
- (ins VGPR_32:$addr, ofs:$offset, gds:$gds, rc:$vdst_in),
+ (ins VGPR_32:$addr, ofs:$offset, gds:$gds, data_op:$vdst_in),
(ins VGPR_32:$addr, ofs:$offset, gds:$gds)),
" $vdst, $addr$offset$gds"> {
let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
@@ -250,7 +287,7 @@ class DS_1A_RET_Tied<string opName, RegisterClass rc = VGPR_32> :
class DS_1A_Off8_RET <string opName, RegisterClass rc = VGPR_32>
: DS_Pseudo<opName,
- (outs rc:$vdst),
+ (outs getLdStRegisterOperand<rc>.ret:$vdst),
(ins VGPR_32:$addr, offset0:$offset0, offset1:$offset1, gds:$gds),
" $vdst, $addr$offset0$offset1$gds"> {
@@ -269,7 +306,7 @@ multiclass DS_1A_Off8_RET_mc <string opName, RegisterClass rc = VGPR_32> {
}
class DS_1A_RET_GDS <string opName> : DS_Pseudo<opName,
- (outs VGPR_32:$vdst),
+ (outs getLdStRegisterOperand<VGPR_32>.ret:$vdst),
(ins VGPR_32:$addr, offset:$offset),
" $vdst, $addr$offset gds"> {
@@ -281,7 +318,7 @@ class DS_1A_RET_GDS <string opName> : DS_Pseudo<opName,
}
class DS_0A_RET <string opName> : DS_Pseudo<opName,
- (outs VGPR_32:$vdst),
+ (outs getLdStRegisterOperand<VGPR_32>.ret:$vdst),
(ins offset:$offset, gds:$gds),
" $vdst$offset$gds"> {
@@ -336,7 +373,8 @@ class DS_GWS_0D <string opName>
class DS_GWS_1D <string opName>
: DS_GWS<opName,
- (ins VGPR_32:$data0, offset:$offset), " $data0$offset gds"> {
+ (ins getLdStRegisterOperand<VGPR_32>.ret:$data0, offset:$offset),
+ " $data0$offset gds"> {
let has_gws_data0 = 1;
let hasSideEffects = 1;
@@ -360,10 +398,11 @@ class DS_VOID <string opName> : DS_Pseudo<opName,
let has_gds = 0;
}
-class DS_1A1D_PERMUTE <string opName, SDPatternOperator node = null_frag>
+class DS_1A1D_PERMUTE <string opName, SDPatternOperator node = null_frag,
+ RegisterOperand data_op = getLdStRegisterOperand<VGPR_32>.ret>
: DS_Pseudo<opName,
- (outs VGPR_32:$vdst),
- (ins VGPR_32:$addr, VGPR_32:$data0, offset:$offset),
+ (outs data_op:$vdst),
+ (ins VGPR_32:$addr, data_op:$data0, offset:$offset),
" $vdst, $addr, $data0$offset",
[(set i32:$vdst,
(node (DS1Addr1Offset i32:$addr, i16:$offset), i32:$data0))] > {
@@ -420,6 +459,11 @@ def DS_WRITE_ADDTID_B32 : DS_0A1D_NORET<"ds_write_addtid_b32">;
} // End mayLoad = 0
+let SubtargetPredicate = isGFX90APlus in {
+ defm DS_ADD_F64 : DS_1A1D_NORET_mc_gfx9<"ds_add_f64", VReg_64>;
+ defm DS_ADD_RTN_F64 : DS_1A1D_RET_mc_gfx9<"ds_add_rtn_f64", VReg_64, "ds_add_f64">;
+} // End SubtargetPredicate = isGFX90APlus
+
defm DS_MSKOR_B32 : DS_1A2D_NORET_mc<"ds_mskor_b32">;
defm DS_CMPST_B32 : DS_1A2D_NORET_mc<"ds_cmpst_b32">;
defm DS_CMPST_F32 : DS_1A2D_NORET_mc<"ds_cmpst_f32">;
@@ -674,38 +718,6 @@ defm : DSReadPat_mc <DS_READ_B32, vt, "load_local">;
defm : DSReadPat_mc <DS_READ_B32, i32, "atomic_load_32_local">;
defm : DSReadPat_mc <DS_READ_B64, i64, "atomic_load_64_local">;
-let AddedComplexity = 100 in {
-
-foreach vt = VReg_64.RegTypes in {
-defm : DSReadPat_mc <DS_READ_B64, vt, "load_align8_local">;
-}
-
-let SubtargetPredicate = isGFX7Plus in {
-
-foreach vt = VReg_96.RegTypes in {
-defm : DSReadPat_mc <DS_READ_B96, vt, "load_align16_local">;
-}
-
-foreach vt = VReg_128.RegTypes in {
-defm : DSReadPat_mc <DS_READ_B128, vt, "load_align16_local">;
-}
-
-let SubtargetPredicate = HasUnalignedAccessMode in {
-
-foreach vt = VReg_96.RegTypes in {
-defm : DSReadPat_mc <DS_READ_B96, vt, "load_local">;
-}
-
-foreach vt = VReg_128.RegTypes in {
-defm : DSReadPat_mc <DS_READ_B128, vt, "load_local">;
-}
-
-} // End SubtargetPredicate = HasUnalignedAccessMode
-
-} // End SubtargetPredicate = isGFX7Plus
-
-} // End AddedComplexity = 100
-
let OtherPredicates = [D16PreservesUnusedBits] in {
def : DSReadPat_D16<DS_READ_U16_D16_HI, load_d16_hi_local, v2i16>;
def : DSReadPat_D16<DS_READ_U16_D16_HI, load_d16_hi_local, v2f16>;
@@ -829,31 +841,38 @@ foreach vt = VReg_128.RegTypes in {
defm : DS128Bit8ByteAlignedPat_mc<vt>;
}
+// Prefer ds_read over ds_read2 and ds_write over ds_write2, all other things
+// being equal, because it has a larger immediate offset range.
let AddedComplexity = 100 in {
foreach vt = VReg_64.RegTypes in {
+defm : DSReadPat_mc <DS_READ_B64, vt, "load_align8_local">;
defm : DSWritePat_mc <DS_WRITE_B64, vt, "store_align8_local">;
}
let SubtargetPredicate = isGFX7Plus in {
foreach vt = VReg_96.RegTypes in {
+defm : DSReadPat_mc <DS_READ_B96, vt, "load_align16_local">;
defm : DSWritePat_mc <DS_WRITE_B96, vt, "store_align16_local">;
}
foreach vt = VReg_128.RegTypes in {
+defm : DSReadPat_mc <DS_READ_B128, vt, "load_align16_local">;
defm : DSWritePat_mc <DS_WRITE_B128, vt, "store_align16_local">;
}
let SubtargetPredicate = HasUnalignedAccessMode in {
+// FIXME: From performance point of view, is ds_read_b96/ds_write_b96 better choice
+// for unaligned accesses?
foreach vt = VReg_96.RegTypes in {
+defm : DSReadPat_mc <DS_READ_B96, vt, "load_local">;
defm : DSWritePat_mc <DS_WRITE_B96, vt, "store_local">;
}
-foreach vt = VReg_128.RegTypes in {
-defm : DSWritePat_mc <DS_WRITE_B128, vt, "store_local">;
-}
+// For performance reasons, *do not* select ds_read_b128/ds_write_b128 for unaligned
+// accesses.
} // End SubtargetPredicate = HasUnalignedAccessMode
@@ -938,6 +957,10 @@ defm : DSAtomicRetPat_mc<DS_MAX_RTN_U64, i64, "atomic_load_umax">;
defm : DSAtomicCmpXChg_mc<DS_CMPST_RTN_B64, i64, "atomic_cmp_swap">;
+let SubtargetPredicate = isGFX90APlus in {
+def : DSAtomicRetPat<DS_ADD_RTN_F64, f64, atomic_load_fadd_local_64>;
+}
+
def : Pat <
(SIds_ordered_count i32:$value, i16:$offset),
(DS_ORDERED_COUNT $value, (as_i16imm $offset))
@@ -959,10 +982,10 @@ class Base_DS_Real_gfx6_gfx7_gfx10<bits<8> op, DS_Pseudo ps, int ef> :
let Inst{17} = !if(ps.has_gds, gds, ps.gdsValue);
let Inst{25-18} = op;
let Inst{31-26} = 0x36;
- let Inst{39-32} = !if(ps.has_addr, addr, !if(ps.has_gws_data0, data0, 0));
- let Inst{47-40} = !if(ps.has_data0, data0, 0);
- let Inst{55-48} = !if(ps.has_data1, data1, 0);
- let Inst{63-56} = !if(ps.has_vdst, vdst, 0);
+ let Inst{39-32} = !if(ps.has_addr, addr, !if(ps.has_gws_data0, data0{7-0}, 0));
+ let Inst{47-40} = !if(ps.has_data0, data0{7-0}, 0);
+ let Inst{55-48} = !if(ps.has_data1, data1{7-0}, 0);
+ let Inst{63-56} = !if(ps.has_vdst, vdst{7-0}, 0);
}
//===----------------------------------------------------------------------===//
@@ -1166,22 +1189,23 @@ defm DS_MAX_SRC2_F64 : DS_Real_gfx6_gfx7_gfx10<0x0d3>;
// GFX8, GFX9 (VI).
//===----------------------------------------------------------------------===//
-class DS_Real_vi <bits<8> op, DS_Pseudo ds> :
- DS_Real <ds>,
- SIMCInstr <ds.Mnemonic, SIEncodingFamily.VI> {
+class DS_Real_vi <bits<8> op, DS_Pseudo ps> :
+ DS_Real <ps>,
+ SIMCInstr <ps.Mnemonic, SIEncodingFamily.VI> {
let AssemblerPredicate = isGFX8GFX9;
let DecoderNamespace = "GFX8";
// encoding
- let Inst{7-0} = !if(ds.has_offset0, offset0, 0);
- let Inst{15-8} = !if(ds.has_offset1, offset1, 0);
- let Inst{16} = !if(ds.has_gds, gds, ds.gdsValue);
+ let Inst{7-0} = !if(ps.has_offset0, offset0, 0);
+ let Inst{15-8} = !if(ps.has_offset1, offset1, 0);
+ let Inst{16} = !if(ps.has_gds, gds, ps.gdsValue);
let Inst{24-17} = op;
+ let Inst{25} = acc;
let Inst{31-26} = 0x36; // ds prefix
- let Inst{39-32} = !if(ds.has_addr, addr, !if(ds.has_gws_data0, data0, 0));
- let Inst{47-40} = !if(ds.has_data0, data0, 0);
- let Inst{55-48} = !if(ds.has_data1, data1, 0);
- let Inst{63-56} = !if(ds.has_vdst, vdst, 0);
+ let Inst{39-32} = !if(ps.has_addr, addr, !if(ps.has_gws_data0, data0{7-0}, 0));
+ let Inst{47-40} = !if(ps.has_data0, data0{7-0}, 0);
+ let Inst{55-48} = !if(ps.has_data1, data1{7-0}, 0);
+ let Inst{63-56} = !if(ps.has_vdst, vdst{7-0}, 0);
}
def DS_ADD_U32_vi : DS_Real_vi<0x0, DS_ADD_U32>;
@@ -1344,3 +1368,8 @@ def DS_WRITE_B96_vi : DS_Real_vi<0xde, DS_WRITE_B96>;
def DS_WRITE_B128_vi : DS_Real_vi<0xdf, DS_WRITE_B128>;
def DS_READ_B96_vi : DS_Real_vi<0xfe, DS_READ_B96>;
def DS_READ_B128_vi : DS_Real_vi<0xff, DS_READ_B128>;
+
+let SubtargetPredicate = isGFX90APlus in {
+ def DS_ADD_F64_vi : DS_Real_vi<0x5c, DS_ADD_F64>;
+ def DS_ADD_RTN_F64_vi : DS_Real_vi<0x7c, DS_ADD_RTN_F64>;
+} // End SubtargetPredicate = isGFX90APlus