diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/DSInstructions.td')
-rw-r--r-- | llvm/lib/Target/AMDGPU/DSInstructions.td | 197 |
1 files changed, 113 insertions, 84 deletions
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index 328c81005df4..ad9528ece7d0 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -52,32 +52,41 @@ class DS_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> patt let Uses = !if(has_m0_read, [M0, EXEC], [EXEC]); } -class DS_Real <DS_Pseudo ds> : - InstSI <ds.OutOperandList, ds.InOperandList, ds.Mnemonic # ds.AsmOperands, []>, +class DS_Real <DS_Pseudo ps> : + InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>, Enc64 { let isPseudo = 0; let isCodeGenOnly = 0; + let LGKM_CNT = 1; let DS = 1; let UseNamedOperandTable = 1; // copy relevant pseudo op flags - let SubtargetPredicate = ds.SubtargetPredicate; - let OtherPredicates = ds.OtherPredicates; - let AsmMatchConverter = ds.AsmMatchConverter; + let SubtargetPredicate = ps.SubtargetPredicate; + let OtherPredicates = ps.OtherPredicates; + let AsmMatchConverter = ps.AsmMatchConverter; + let SchedRW = ps.SchedRW; + let mayLoad = ps.mayLoad; + let mayStore = ps.mayStore; + let IsAtomicRet = ps.IsAtomicRet; + let IsAtomicNoRet = ps.IsAtomicNoRet; // encoding fields - bits<8> vdst; + bits<10> vdst; bits<1> gds; bits<8> addr; - bits<8> data0; - bits<8> data1; + bits<10> data0; + bits<10> data1; bits<8> offset0; bits<8> offset1; bits<16> offset; - let offset0 = !if(ds.has_offset, offset{7-0}, ?); - let offset1 = !if(ds.has_offset, offset{15-8}, ?); + let offset0 = !if(ps.has_offset, offset{7-0}, ?); + let offset1 = !if(ps.has_offset, offset{15-8}, ?); + + bits<1> acc = !if(ps.has_vdst, vdst{9}, + !if(!or(ps.has_data0, ps.has_gws_data0), data0{9}, 0)); } @@ -86,7 +95,7 @@ class DS_Real <DS_Pseudo ds> : class DS_0A1D_NORET<string opName, RegisterClass rc = VGPR_32> : DS_Pseudo<opName, (outs), - (ins rc:$data0, offset:$offset, gds:$gds), + (ins getLdStRegisterOperand<rc>.ret:$data0, offset:$offset, gds:$gds), " $data0$offset$gds"> { let has_addr = 0; @@ -97,11 +106,12 @@ class DS_0A1D_NORET<string opName, RegisterClass rc = VGPR_32> class DS_1A1D_NORET<string opName, RegisterClass rc = VGPR_32> : DS_Pseudo<opName, (outs), - (ins VGPR_32:$addr, rc:$data0, offset:$offset, gds:$gds), + (ins VGPR_32:$addr, getLdStRegisterOperand<rc>.ret:$data0, offset:$offset, gds:$gds), " $addr, $data0$offset$gds"> { let has_data1 = 0; let has_vdst = 0; + let IsAtomicNoRet = 1; } multiclass DS_1A1D_NORET_mc<string opName, RegisterClass rc = VGPR_32> { @@ -114,13 +124,22 @@ multiclass DS_1A1D_NORET_mc<string opName, RegisterClass rc = VGPR_32> { } } -class DS_1A2D_NORET<string opName, RegisterClass rc = VGPR_32> +multiclass DS_1A1D_NORET_mc_gfx9<string opName, RegisterClass rc = VGPR_32> { + let has_m0_read = 0 in { + def "" : DS_1A1D_NORET<opName, rc>, + AtomicNoRet<opName, 0>; + } +} + +class DS_1A2D_NORET<string opName, RegisterClass rc = VGPR_32, + RegisterOperand data_op = getLdStRegisterOperand<rc>.ret> : DS_Pseudo<opName, (outs), - (ins VGPR_32:$addr, rc:$data0, rc:$data1, offset:$offset, gds:$gds), + (ins VGPR_32:$addr, data_op:$data0, data_op:$data1, offset:$offset, gds:$gds), " $addr, $data0, $data1$offset$gds"> { let has_vdst = 0; + let IsAtomicNoRet = 1; } multiclass DS_1A2D_NORET_mc<string opName, RegisterClass rc = VGPR_32> { @@ -133,10 +152,11 @@ multiclass DS_1A2D_NORET_mc<string opName, RegisterClass rc = VGPR_32> { } } -class DS_1A2D_Off8_NORET <string opName, RegisterClass rc = VGPR_32> +class DS_1A2D_Off8_NORET <string opName, RegisterClass rc = VGPR_32, + RegisterOperand data_op = getLdStRegisterOperand<rc>.ret> : DS_Pseudo<opName, (outs), - (ins VGPR_32:$addr, rc:$data0, rc:$data1, + (ins VGPR_32:$addr, data_op:$data0, data_op:$data1, offset0:$offset0, offset1:$offset1, gds:$gds), " $addr, $data0, $data1$offset0$offset1$gds"> { @@ -153,14 +173,16 @@ multiclass DS_1A2D_Off8_NORET_mc <string opName, RegisterClass rc = VGPR_32> { } } -class DS_1A1D_RET <string opName, RegisterClass rc = VGPR_32> +class DS_1A1D_RET <string opName, RegisterClass rc = VGPR_32, + RegisterOperand data_op = getLdStRegisterOperand<rc>.ret> : DS_Pseudo<opName, - (outs rc:$vdst), - (ins VGPR_32:$addr, rc:$data0, offset:$offset, gds:$gds), + (outs data_op:$vdst), + (ins VGPR_32:$addr, data_op:$data0, offset:$offset, gds:$gds), " $vdst, $addr, $data0$offset$gds"> { let hasPostISelHook = 1; let has_data1 = 0; + let IsAtomicRet = 1; } multiclass DS_1A1D_RET_mc <string opName, RegisterClass rc = VGPR_32, @@ -175,15 +197,27 @@ multiclass DS_1A1D_RET_mc <string opName, RegisterClass rc = VGPR_32, } } +multiclass DS_1A1D_RET_mc_gfx9 <string opName, RegisterClass rc = VGPR_32, + string NoRetOp = ""> { + let has_m0_read = 0 in { + def "" : DS_1A1D_RET<opName, rc>, + AtomicNoRet<!if(!eq(NoRetOp, ""), "", NoRetOp), + !if(!eq(NoRetOp, ""), 0, 1)>; + } +} + class DS_1A2D_RET<string opName, RegisterClass rc = VGPR_32, - RegisterClass src = rc> + RegisterClass src = rc, + RegisterOperand dst_op = getLdStRegisterOperand<rc>.ret, + RegisterOperand src_op = getLdStRegisterOperand<src>.ret> : DS_Pseudo<opName, - (outs rc:$vdst), - (ins VGPR_32:$addr, src:$data0, src:$data1, offset:$offset, gds:$gds), + (outs dst_op:$vdst), + (ins VGPR_32:$addr, src_op:$data0, src_op:$data1, offset:$offset, gds:$gds), " $vdst, $addr, $data0, $data1$offset$gds"> { let hasPostISelHook = 1; + let IsAtomicRet = 1; } multiclass DS_1A2D_RET_mc<string opName, @@ -201,10 +235,12 @@ multiclass DS_1A2D_RET_mc<string opName, class DS_1A2D_Off8_RET<string opName, RegisterClass rc = VGPR_32, - RegisterClass src = rc> + RegisterClass src = rc, + RegisterOperand dst_op = getLdStRegisterOperand<rc>.ret, + RegisterOperand src_op = getLdStRegisterOperand<src>.ret> : DS_Pseudo<opName, - (outs rc:$vdst), - (ins VGPR_32:$addr, src:$data0, src:$data1, offset0:$offset0, offset1:$offset1, gds:$gds), + (outs dst_op:$vdst), + (ins VGPR_32:$addr, src_op:$data0, src_op:$data1, offset0:$offset0, offset1:$offset1, gds:$gds), " $vdst, $addr, $data0, $data1$offset0$offset1$gds"> { let has_offset = 0; @@ -224,11 +260,12 @@ multiclass DS_1A2D_Off8_RET_mc<string opName, } -class DS_1A_RET<string opName, RegisterClass rc = VGPR_32, bit HasTiedOutput = 0, Operand ofs = offset> +class DS_1A_RET<string opName, RegisterClass rc = VGPR_32, bit HasTiedOutput = 0, Operand ofs = offset, + RegisterOperand data_op = getLdStRegisterOperand<rc>.ret> : DS_Pseudo<opName, - (outs rc:$vdst), + (outs data_op:$vdst), !if(HasTiedOutput, - (ins VGPR_32:$addr, ofs:$offset, gds:$gds, rc:$vdst_in), + (ins VGPR_32:$addr, ofs:$offset, gds:$gds, data_op:$vdst_in), (ins VGPR_32:$addr, ofs:$offset, gds:$gds)), " $vdst, $addr$offset$gds"> { let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); @@ -250,7 +287,7 @@ class DS_1A_RET_Tied<string opName, RegisterClass rc = VGPR_32> : class DS_1A_Off8_RET <string opName, RegisterClass rc = VGPR_32> : DS_Pseudo<opName, - (outs rc:$vdst), + (outs getLdStRegisterOperand<rc>.ret:$vdst), (ins VGPR_32:$addr, offset0:$offset0, offset1:$offset1, gds:$gds), " $vdst, $addr$offset0$offset1$gds"> { @@ -269,7 +306,7 @@ multiclass DS_1A_Off8_RET_mc <string opName, RegisterClass rc = VGPR_32> { } class DS_1A_RET_GDS <string opName> : DS_Pseudo<opName, - (outs VGPR_32:$vdst), + (outs getLdStRegisterOperand<VGPR_32>.ret:$vdst), (ins VGPR_32:$addr, offset:$offset), " $vdst, $addr$offset gds"> { @@ -281,7 +318,7 @@ class DS_1A_RET_GDS <string opName> : DS_Pseudo<opName, } class DS_0A_RET <string opName> : DS_Pseudo<opName, - (outs VGPR_32:$vdst), + (outs getLdStRegisterOperand<VGPR_32>.ret:$vdst), (ins offset:$offset, gds:$gds), " $vdst$offset$gds"> { @@ -336,7 +373,8 @@ class DS_GWS_0D <string opName> class DS_GWS_1D <string opName> : DS_GWS<opName, - (ins VGPR_32:$data0, offset:$offset), " $data0$offset gds"> { + (ins getLdStRegisterOperand<VGPR_32>.ret:$data0, offset:$offset), + " $data0$offset gds"> { let has_gws_data0 = 1; let hasSideEffects = 1; @@ -360,10 +398,11 @@ class DS_VOID <string opName> : DS_Pseudo<opName, let has_gds = 0; } -class DS_1A1D_PERMUTE <string opName, SDPatternOperator node = null_frag> +class DS_1A1D_PERMUTE <string opName, SDPatternOperator node = null_frag, + RegisterOperand data_op = getLdStRegisterOperand<VGPR_32>.ret> : DS_Pseudo<opName, - (outs VGPR_32:$vdst), - (ins VGPR_32:$addr, VGPR_32:$data0, offset:$offset), + (outs data_op:$vdst), + (ins VGPR_32:$addr, data_op:$data0, offset:$offset), " $vdst, $addr, $data0$offset", [(set i32:$vdst, (node (DS1Addr1Offset i32:$addr, i16:$offset), i32:$data0))] > { @@ -420,6 +459,11 @@ def DS_WRITE_ADDTID_B32 : DS_0A1D_NORET<"ds_write_addtid_b32">; } // End mayLoad = 0 +let SubtargetPredicate = isGFX90APlus in { + defm DS_ADD_F64 : DS_1A1D_NORET_mc_gfx9<"ds_add_f64", VReg_64>; + defm DS_ADD_RTN_F64 : DS_1A1D_RET_mc_gfx9<"ds_add_rtn_f64", VReg_64, "ds_add_f64">; +} // End SubtargetPredicate = isGFX90APlus + defm DS_MSKOR_B32 : DS_1A2D_NORET_mc<"ds_mskor_b32">; defm DS_CMPST_B32 : DS_1A2D_NORET_mc<"ds_cmpst_b32">; defm DS_CMPST_F32 : DS_1A2D_NORET_mc<"ds_cmpst_f32">; @@ -674,38 +718,6 @@ defm : DSReadPat_mc <DS_READ_B32, vt, "load_local">; defm : DSReadPat_mc <DS_READ_B32, i32, "atomic_load_32_local">; defm : DSReadPat_mc <DS_READ_B64, i64, "atomic_load_64_local">; -let AddedComplexity = 100 in { - -foreach vt = VReg_64.RegTypes in { -defm : DSReadPat_mc <DS_READ_B64, vt, "load_align8_local">; -} - -let SubtargetPredicate = isGFX7Plus in { - -foreach vt = VReg_96.RegTypes in { -defm : DSReadPat_mc <DS_READ_B96, vt, "load_align16_local">; -} - -foreach vt = VReg_128.RegTypes in { -defm : DSReadPat_mc <DS_READ_B128, vt, "load_align16_local">; -} - -let SubtargetPredicate = HasUnalignedAccessMode in { - -foreach vt = VReg_96.RegTypes in { -defm : DSReadPat_mc <DS_READ_B96, vt, "load_local">; -} - -foreach vt = VReg_128.RegTypes in { -defm : DSReadPat_mc <DS_READ_B128, vt, "load_local">; -} - -} // End SubtargetPredicate = HasUnalignedAccessMode - -} // End SubtargetPredicate = isGFX7Plus - -} // End AddedComplexity = 100 - let OtherPredicates = [D16PreservesUnusedBits] in { def : DSReadPat_D16<DS_READ_U16_D16_HI, load_d16_hi_local, v2i16>; def : DSReadPat_D16<DS_READ_U16_D16_HI, load_d16_hi_local, v2f16>; @@ -829,31 +841,38 @@ foreach vt = VReg_128.RegTypes in { defm : DS128Bit8ByteAlignedPat_mc<vt>; } +// Prefer ds_read over ds_read2 and ds_write over ds_write2, all other things +// being equal, because it has a larger immediate offset range. let AddedComplexity = 100 in { foreach vt = VReg_64.RegTypes in { +defm : DSReadPat_mc <DS_READ_B64, vt, "load_align8_local">; defm : DSWritePat_mc <DS_WRITE_B64, vt, "store_align8_local">; } let SubtargetPredicate = isGFX7Plus in { foreach vt = VReg_96.RegTypes in { +defm : DSReadPat_mc <DS_READ_B96, vt, "load_align16_local">; defm : DSWritePat_mc <DS_WRITE_B96, vt, "store_align16_local">; } foreach vt = VReg_128.RegTypes in { +defm : DSReadPat_mc <DS_READ_B128, vt, "load_align16_local">; defm : DSWritePat_mc <DS_WRITE_B128, vt, "store_align16_local">; } let SubtargetPredicate = HasUnalignedAccessMode in { +// FIXME: From performance point of view, is ds_read_b96/ds_write_b96 better choice +// for unaligned accesses? foreach vt = VReg_96.RegTypes in { +defm : DSReadPat_mc <DS_READ_B96, vt, "load_local">; defm : DSWritePat_mc <DS_WRITE_B96, vt, "store_local">; } -foreach vt = VReg_128.RegTypes in { -defm : DSWritePat_mc <DS_WRITE_B128, vt, "store_local">; -} +// For performance reasons, *do not* select ds_read_b128/ds_write_b128 for unaligned +// accesses. } // End SubtargetPredicate = HasUnalignedAccessMode @@ -938,6 +957,10 @@ defm : DSAtomicRetPat_mc<DS_MAX_RTN_U64, i64, "atomic_load_umax">; defm : DSAtomicCmpXChg_mc<DS_CMPST_RTN_B64, i64, "atomic_cmp_swap">; +let SubtargetPredicate = isGFX90APlus in { +def : DSAtomicRetPat<DS_ADD_RTN_F64, f64, atomic_load_fadd_local_64>; +} + def : Pat < (SIds_ordered_count i32:$value, i16:$offset), (DS_ORDERED_COUNT $value, (as_i16imm $offset)) @@ -959,10 +982,10 @@ class Base_DS_Real_gfx6_gfx7_gfx10<bits<8> op, DS_Pseudo ps, int ef> : let Inst{17} = !if(ps.has_gds, gds, ps.gdsValue); let Inst{25-18} = op; let Inst{31-26} = 0x36; - let Inst{39-32} = !if(ps.has_addr, addr, !if(ps.has_gws_data0, data0, 0)); - let Inst{47-40} = !if(ps.has_data0, data0, 0); - let Inst{55-48} = !if(ps.has_data1, data1, 0); - let Inst{63-56} = !if(ps.has_vdst, vdst, 0); + let Inst{39-32} = !if(ps.has_addr, addr, !if(ps.has_gws_data0, data0{7-0}, 0)); + let Inst{47-40} = !if(ps.has_data0, data0{7-0}, 0); + let Inst{55-48} = !if(ps.has_data1, data1{7-0}, 0); + let Inst{63-56} = !if(ps.has_vdst, vdst{7-0}, 0); } //===----------------------------------------------------------------------===// @@ -1166,22 +1189,23 @@ defm DS_MAX_SRC2_F64 : DS_Real_gfx6_gfx7_gfx10<0x0d3>; // GFX8, GFX9 (VI). //===----------------------------------------------------------------------===// -class DS_Real_vi <bits<8> op, DS_Pseudo ds> : - DS_Real <ds>, - SIMCInstr <ds.Mnemonic, SIEncodingFamily.VI> { +class DS_Real_vi <bits<8> op, DS_Pseudo ps> : + DS_Real <ps>, + SIMCInstr <ps.Mnemonic, SIEncodingFamily.VI> { let AssemblerPredicate = isGFX8GFX9; let DecoderNamespace = "GFX8"; // encoding - let Inst{7-0} = !if(ds.has_offset0, offset0, 0); - let Inst{15-8} = !if(ds.has_offset1, offset1, 0); - let Inst{16} = !if(ds.has_gds, gds, ds.gdsValue); + let Inst{7-0} = !if(ps.has_offset0, offset0, 0); + let Inst{15-8} = !if(ps.has_offset1, offset1, 0); + let Inst{16} = !if(ps.has_gds, gds, ps.gdsValue); let Inst{24-17} = op; + let Inst{25} = acc; let Inst{31-26} = 0x36; // ds prefix - let Inst{39-32} = !if(ds.has_addr, addr, !if(ds.has_gws_data0, data0, 0)); - let Inst{47-40} = !if(ds.has_data0, data0, 0); - let Inst{55-48} = !if(ds.has_data1, data1, 0); - let Inst{63-56} = !if(ds.has_vdst, vdst, 0); + let Inst{39-32} = !if(ps.has_addr, addr, !if(ps.has_gws_data0, data0{7-0}, 0)); + let Inst{47-40} = !if(ps.has_data0, data0{7-0}, 0); + let Inst{55-48} = !if(ps.has_data1, data1{7-0}, 0); + let Inst{63-56} = !if(ps.has_vdst, vdst{7-0}, 0); } def DS_ADD_U32_vi : DS_Real_vi<0x0, DS_ADD_U32>; @@ -1344,3 +1368,8 @@ def DS_WRITE_B96_vi : DS_Real_vi<0xde, DS_WRITE_B96>; def DS_WRITE_B128_vi : DS_Real_vi<0xdf, DS_WRITE_B128>; def DS_READ_B96_vi : DS_Real_vi<0xfe, DS_READ_B96>; def DS_READ_B128_vi : DS_Real_vi<0xff, DS_READ_B128>; + +let SubtargetPredicate = isGFX90APlus in { + def DS_ADD_F64_vi : DS_Real_vi<0x5c, DS_ADD_F64>; + def DS_ADD_RTN_F64_vi : DS_Real_vi<0x7c, DS_ADD_RTN_F64>; +} // End SubtargetPredicate = isGFX90APlus |