diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/BUFInstructions.td')
-rw-r--r-- | llvm/lib/Target/AMDGPU/BUFInstructions.td | 609 |
1 files changed, 374 insertions, 235 deletions
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 5dc5481df49e..5f43aa8388ee 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -6,17 +6,12 @@ // //===----------------------------------------------------------------------===// -def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">; -def MUBUFAddr64 : ComplexPattern<i64, 9, "SelectMUBUFAddr64">; -def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">; +def MUBUFAddr64 : ComplexPattern<i64, 4, "SelectMUBUFAddr64">; +def MUBUFOffset : ComplexPattern<i64, 3, "SelectMUBUFOffset">; def MUBUFScratchOffen : ComplexPattern<i64, 4, "SelectMUBUFScratchOffen", [], [SDNPWantParent]>; def MUBUFScratchOffset : ComplexPattern<i64, 3, "SelectMUBUFScratchOffset", [], [SDNPWantParent], 20>; -def MUBUFOffset : ComplexPattern<i64, 8, "SelectMUBUFOffset">; -def MUBUFOffsetNoGLC : ComplexPattern<i64, 3, "SelectMUBUFOffset">; -def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">; - def BUFAddrKind { int Offset = 0; int OffEn = 1; @@ -105,6 +100,8 @@ class MTBUF_Pseudo <string opName, dag outs, dag ins, bits<1> has_slc = 1; bits<1> has_tfe = 1; bits<4> elements = 0; + bits<1> has_sccb = 1; + bits<1> sccb_value = 0; } class MTBUF_Real <MTBUF_Pseudo ps> : @@ -113,6 +110,10 @@ class MTBUF_Real <MTBUF_Pseudo ps> : let isPseudo = 0; let isCodeGenOnly = 0; + let VM_CNT = 1; + let EXP_CNT = 1; + let MTBUF = 1; + // copy relevant pseudo op flags let UseNamedOperandTable = ps.UseNamedOperandTable; let SubtargetPredicate = ps.SubtargetPredicate; @@ -120,39 +121,47 @@ class MTBUF_Real <MTBUF_Pseudo ps> : let Constraints = ps.Constraints; let DisableEncoding = ps.DisableEncoding; let TSFlags = ps.TSFlags; + let SchedRW = ps.SchedRW; + let mayLoad = ps.mayLoad; + let mayStore = ps.mayStore; + let IsAtomicRet = ps.IsAtomicRet; + let IsAtomicNoRet = ps.IsAtomicNoRet; bits<12> offset; - bits<1> glc; - bits<1> dlc; + bits<5> cpol; bits<7> format; bits<8> vaddr; - bits<8> vdata; + bits<10> vdata; bits<7> srsrc; - bits<1> slc; bits<1> tfe; bits<8> soffset; bits<4> dfmt = format{3-0}; bits<3> nfmt = format{6-4}; + + // GFX90A+ only: instruction uses AccVGPR for data + // Bit superceedes tfe. + bits<1> acc = !if(ps.has_vdata, vdata{9}, 0); } class getMTBUFInsDA<list<RegisterClass> vdataList, list<RegisterClass> vaddrList=[]> { RegisterClass vdataClass = !if(!empty(vdataList), ?, !head(vdataList)); RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList)); + RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret; dag InsNoData = !if(!empty(vaddrList), (ins SReg_128:$srsrc, SCSrc_b32:$soffset, - offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz), + offset:$offset, FORMAT:$format, CPol:$cpol, TFE:$tfe, SWZ:$swz), (ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, - offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz) + offset:$offset, FORMAT:$format, CPol:$cpol, TFE:$tfe, SWZ:$swz) ); dag InsData = !if(!empty(vaddrList), - (ins vdataClass:$vdata, SReg_128:$srsrc, - SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc, - SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz), - (ins vdataClass:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc, - SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc, - SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz) + (ins vdata_op:$vdata, SReg_128:$srsrc, + SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, CPol:$cpol, + TFE:$tfe, SWZ:$swz), + (ins vdata_op:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc, + SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, CPol:$cpol, + TFE:$tfe, SWZ:$swz) ); dag ret = !if(!empty(vdataList), InsNoData, InsData); } @@ -202,9 +211,9 @@ class MTBUF_Load_Pseudo <string opName, // Workaround bug bz30254 int addrKindCopy = addrKind> : MTBUF_Pseudo<opName, - (outs vdataClass:$vdata), + (outs getLdStRegisterOperand<vdataClass>.ret:$vdata), getMTBUFIns<addrKindCopy>.ret, - " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc$swz", + " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$cpol$tfe$swz", pattern>, MTBUF_SetupAddr<addrKindCopy> { let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret; @@ -217,17 +226,11 @@ multiclass MTBUF_Pseudo_Loads<string opName, RegisterClass vdataClass, int elems, ValueType load_vt = i32, SDPatternOperator ld = null_frag> { - def _OFFSET : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass, elems, - [(set load_vt:$vdata, - (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i8:$format, - i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz)))]>, - MTBUFAddr64Table<0, NAME>; + def _OFFSET : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass, elems>, + MTBUFAddr64Table<0, NAME>; - def _ADDR64 : MTBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, elems, - [(set load_vt:$vdata, - (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, - i8:$format, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz)))]>, - MTBUFAddr64Table<1, NAME>; + def _ADDR64 : MTBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, elems>, + MTBUFAddr64Table<1, NAME>; def _OFFEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, elems>; def _IDXEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass, elems>; @@ -252,7 +255,7 @@ class MTBUF_Store_Pseudo <string opName, : MTBUF_Pseudo<opName, (outs), getMTBUFIns<addrKindCopy, [vdataClassCopy]>.ret, - " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc$swz", + " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$cpol$tfe$swz", pattern>, MTBUF_SetupAddr<addrKindCopy> { let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret; @@ -265,16 +268,10 @@ multiclass MTBUF_Pseudo_Stores<string opName, RegisterClass vdataClass, int elems, ValueType store_vt = i32, SDPatternOperator st = null_frag> { - def _OFFSET : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass, elems, - [(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, - i16:$offset, i8:$format, i1:$glc, - i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>, + def _OFFSET : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass, elems>, MTBUFAddr64Table<0, NAME>; - def _ADDR64 : MTBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, elems, - [(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, - i16:$offset, i8:$format, i1:$glc, - i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>, + def _ADDR64 : MTBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, elems>, MTBUFAddr64Table<1, NAME>; def _OFFEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, elems>; @@ -341,6 +338,9 @@ class MUBUF_Pseudo <string opName, dag outs, dag ins, bits<1> has_slc = 1; bits<1> has_tfe = 1; bits<4> elements = 0; + bits<1> has_sccb = 1; + bits<1> sccb_value = 0; + bits<1> IsBufferInv = 0; } class MUBUF_Real <MUBUF_Pseudo ps> : @@ -349,6 +349,10 @@ class MUBUF_Real <MUBUF_Pseudo ps> : let isPseudo = 0; let isCodeGenOnly = 0; + let VM_CNT = 1; + let EXP_CNT = 1; + let MUBUF = 1; + // copy relevant pseudo op flags let SubtargetPredicate = ps.SubtargetPredicate; let AsmMatchConverter = ps.AsmMatchConverter; @@ -357,16 +361,23 @@ class MUBUF_Real <MUBUF_Pseudo ps> : let DisableEncoding = ps.DisableEncoding; let TSFlags = ps.TSFlags; let UseNamedOperandTable = ps.UseNamedOperandTable; + let SchedRW = ps.SchedRW; + let mayLoad = ps.mayLoad; + let mayStore = ps.mayStore; + let IsAtomicRet = ps.IsAtomicRet; + let IsAtomicNoRet = ps.IsAtomicNoRet; bits<12> offset; - bits<1> glc; - bits<1> dlc; + bits<5> cpol; bits<8> vaddr; - bits<8> vdata; + bits<10> vdata; bits<7> srsrc; - bits<1> slc; bits<1> tfe; bits<8> soffset; + + // GFX90A+ only: instruction uses AccVGPR for data + // Bit superceedes tfe. + bits<1> acc = !if(ps.has_vdata, vdata{9}, 0); } @@ -380,7 +391,8 @@ class MUBUF_Invalidate <string opName, SDPatternOperator node = null_frag> : let mayLoad = 0; let mayStore = 0; - // Set everything to 0. + let IsBufferInv = 1; + // Set everything else to 0. let offen = 0; let idxen = 0; let addr64 = 0; @@ -395,6 +407,8 @@ class MUBUF_Invalidate <string opName, SDPatternOperator node = null_frag> : let has_offset = 0; let has_slc = 0; let has_tfe = 0; + let has_sccb = 0; + let sccb_value = 0; } class getMUBUFInsDA<list<RegisterClass> vdataList, @@ -402,33 +416,31 @@ class getMUBUFInsDA<list<RegisterClass> vdataList, bit isLds = 0> { RegisterClass vdataClass = !if(!empty(vdataList), ?, !head(vdataList)); RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList)); + RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret; dag InsNoData = !if(!empty(vaddrList), (ins SReg_128:$srsrc, SCSrc_b32:$soffset, - offset:$offset, GLC:$glc, SLC:$slc), + offset:$offset, CPol_0:$cpol), (ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, - offset:$offset, GLC:$glc, SLC:$slc) + offset:$offset, CPol_0:$cpol) ); dag InsData = !if(!empty(vaddrList), - (ins vdataClass:$vdata, SReg_128:$srsrc, - SCSrc_b32:$soffset, offset:$offset, GLC:$glc, SLC:$slc), - (ins vdataClass:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc, - SCSrc_b32:$soffset, offset:$offset, GLC:$glc, SLC:$slc) + (ins vdata_op:$vdata, SReg_128:$srsrc, + SCSrc_b32:$soffset, offset:$offset, CPol_0:$cpol), + (ins vdata_op:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc, + SCSrc_b32:$soffset, offset:$offset, CPol_0:$cpol) ); dag ret = !con( !if(!empty(vdataList), InsNoData, InsData), - !if(isLds, (ins DLC:$dlc, SWZ:$swz), (ins TFE:$tfe, DLC:$dlc,SWZ:$swz)) + !if(isLds, (ins SWZ_0:$swz), (ins TFE_0:$tfe, SWZ_0:$swz)) ); } class getMUBUFElements<ValueType vt> { - // eq does not support ValueType for some reason. - string vtAsStr = !cast<string>(vt); - int ret = - !if(!eq(vtAsStr, "f16"), 1, - !if(!eq(vtAsStr, "v2f16"), 2, - !if(!eq(vtAsStr, "v3f16"), 3, - !if(!eq(vtAsStr, "v4f16"), 4, + !if(!eq(vt, f16), 1, + !if(!eq(vt, v2f16), 2, + !if(!eq(vt, v3f16), 3, + !if(!eq(vt, v4f16), 4, !if(!eq(vt.Size, 32), 1, !if(!eq(vt.Size, 64), 2, !if(!eq(vt.Size, 96), 3, @@ -482,13 +494,15 @@ class MUBUF_Load_Pseudo <string opName, bit isLds = 0, list<dag> pattern=[], // Workaround bug bz30254 - int addrKindCopy = addrKind> + int addrKindCopy = addrKind, + RegisterClass vdata_rc = getVregSrcForVT<vdata_vt>.ret, + RegisterOperand vdata_op = getLdStRegisterOperand<vdata_rc>.ret> : MUBUF_Pseudo<opName, - (outs getVregSrcForVT<vdata_vt>.ret:$vdata), + (outs vdata_op:$vdata), !con(getMUBUFIns<addrKindCopy, [], isLds>.ret, - !if(HasTiedDest, (ins getVregSrcForVT<vdata_vt>.ret:$vdata_in), (ins))), - " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc" # - !if(isLds, " lds", "$tfe") # "$dlc$swz", + !if(HasTiedDest, (ins vdata_op:$vdata_in), (ins))), + " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$cpol" # + !if(isLds, " lds", "$tfe") # "$swz", pattern>, MUBUF_SetupAddr<addrKindCopy> { let PseudoInstr = opName # !if(isLds, "_lds", "") # @@ -506,15 +520,15 @@ class MUBUF_Load_Pseudo <string opName, } class MUBUF_Offset_Load_Pat <Instruction inst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> : Pat < - (load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))), - (load_vt (inst v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz)) + (load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset))), + (load_vt (inst v4i32:$srsrc, i32:$soffset, i16:$offset)) >; class MUBUF_Addr64_Load_Pat <Instruction inst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> : Pat < - (load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))), - (load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz)) + (load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset))), + (load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i16:$offset)) >; multiclass MUBUF_Pseudo_Load_Pats<string BaseInst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> { @@ -531,7 +545,7 @@ multiclass MUBUF_Pseudo_Loads<string opName, bit TiedDest = 0, bit isLds = 0> { - defvar legal_load_vt = !if(!eq(!cast<string>(load_vt), !cast<string>(v3f16)), v4f16, load_vt); + defvar legal_load_vt = !if(!eq(load_vt, v3f16), v4f16, load_vt); def _OFFSET : MUBUF_Load_Pseudo <opName, BUFAddrKind.Offset, legal_load_vt, TiedDest, isLds>, MUBUFAddr64Table<0, NAME # !if(isLds, "_LDS", "")>; @@ -567,7 +581,7 @@ class MUBUF_Store_Pseudo <string opName, : MUBUF_Pseudo<opName, (outs), getMUBUFIns<addrKindCopy, [getVregSrcForVT<store_vt>.ret]>.ret, - " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc$swz", + " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$cpol$tfe$swz", pattern>, MUBUF_SetupAddr<addrKindCopy> { let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret; @@ -581,16 +595,16 @@ multiclass MUBUF_Pseudo_Stores<string opName, ValueType store_vt = i32, SDPatternOperator st = null_frag> { - defvar legal_store_vt = !if(!eq(!cast<string>(store_vt), !cast<string>(v3f16)), v4f16, store_vt); + defvar legal_store_vt = !if(!eq(store_vt, v3f16), v4f16, store_vt); def _OFFSET : MUBUF_Store_Pseudo <opName, BUFAddrKind.Offset, legal_store_vt, [(st legal_store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, - i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>, + i16:$offset))]>, MUBUFAddr64Table<0, NAME>; def _ADDR64 : MUBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, legal_store_vt, [(st legal_store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, - i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>, + i16:$offset))]>, MUBUFAddr64Table<1, NAME>; def _OFFEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, legal_store_vt>; @@ -608,8 +622,8 @@ multiclass MUBUF_Pseudo_Stores<string opName, class MUBUF_Pseudo_Store_Lds<string opName> : MUBUF_Pseudo<opName, (outs), - (ins SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, GLC:$glc, SLC:$slc, SWZ:$swz), - " $srsrc, $soffset$offset lds$glc$slc$swz"> { + (ins SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol:$cpol, SWZ:$swz), + " $srsrc, $soffset$offset lds$cpol$swz"> { let mayLoad = 0; let mayStore = 1; let maybeAtomic = 1; @@ -626,18 +640,19 @@ class MUBUF_Pseudo_Store_Lds<string opName> class getMUBUFAtomicInsDA<RegisterClass vdataClass, bit vdata_in, list<RegisterClass> vaddrList=[]> { RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList)); + RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret; dag ret = !if(vdata_in, !if(!empty(vaddrList), - (ins vdataClass:$vdata_in, - SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, GLC_1:$glc1, SLC:$slc), - (ins vdataClass:$vdata_in, vaddrClass:$vaddr, - SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, GLC_1:$glc1, SLC:$slc) + (ins vdata_op:$vdata_in, + SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol_GLC1:$cpol), + (ins vdata_op:$vdata_in, vaddrClass:$vaddr, + SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol_GLC1:$cpol) ), !if(!empty(vaddrList), - (ins vdataClass:$vdata, - SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, SLC:$slc), - (ins vdataClass:$vdata, vaddrClass:$vaddr, - SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, SLC:$slc) + (ins vdata_op:$vdata, + SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol_0:$cpol), + (ins vdata_op:$vdata, vaddrClass:$vaddr, + SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol_0:$cpol) )); } @@ -678,7 +693,9 @@ class MUBUF_Atomic_Pseudo<string opName, let has_glc = 0; let has_dlc = 0; let has_tfe = 0; + let has_sccb = 1; let maybeAtomic = 1; + let AsmMatchConverter = "cvtMubufAtomic"; } class MUBUF_AtomicNoRet_Pseudo<string opName, int addrKind, @@ -690,13 +707,14 @@ class MUBUF_AtomicNoRet_Pseudo<string opName, int addrKind, : MUBUF_Atomic_Pseudo<opName, addrKindCopy, (outs), getMUBUFAtomicIns<addrKindCopy, vdataClassCopy, 0>.ret, - " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$slc", + " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$cpol", pattern>, AtomicNoRet<opName # "_" # getAddrName<addrKindCopy>.ret, 0> { let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret; let glc_value = 0; let dlc_value = 0; - let AsmMatchConverter = "cvtMubufAtomic"; + let sccb_value = 0; + let IsAtomicNoRet = 1; } class MUBUF_AtomicRet_Pseudo<string opName, int addrKind, @@ -704,19 +722,21 @@ class MUBUF_AtomicRet_Pseudo<string opName, int addrKind, list<dag> pattern=[], // Workaround bug bz30254 int addrKindCopy = addrKind, - RegisterClass vdataClassCopy = vdataClass> + RegisterClass vdataClassCopy = vdataClass, + RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret> : MUBUF_Atomic_Pseudo<opName, addrKindCopy, - (outs vdataClassCopy:$vdata), + (outs vdata_op:$vdata), getMUBUFAtomicIns<addrKindCopy, vdataClassCopy, 1>.ret, - " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc1$slc", + " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$cpol", pattern>, AtomicNoRet<opName # "_" # getAddrName<addrKindCopy>.ret, 1> { let PseudoInstr = opName # "_rtn_" # getAddrName<addrKindCopy>.ret; let glc_value = 1; let dlc_value = 0; + let sccb_value = 0; + let IsAtomicRet = 1; let Constraints = "$vdata = $vdata_in"; let DisableEncoding = "$vdata_in"; - let AsmMatchConverter = "cvtMubufAtomicReturn"; } multiclass MUBUF_Pseudo_Atomics_NO_RTN <string opName, @@ -751,15 +771,15 @@ multiclass MUBUF_Pseudo_Atomics_RTN <string opName, let FPAtomic = isFP in def _OFFSET_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass, [(set vdataType:$vdata, - (atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$slc), + (atomic (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset), vdataType:$vdata_in))]>, MUBUFAddr64Table <0, NAME # "_RTN">; let FPAtomic = isFP in def _ADDR64_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, [(set vdataType:$vdata, - (atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$slc), - vdataType:$vdata_in))]>, + (atomic (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset), + vdataType:$vdata_in))]>, MUBUFAddr64Table <1, NAME # "_RTN">; let FPAtomic = isFP in @@ -1106,6 +1126,15 @@ defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_NO_RTN < defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_NO_RTN < "buffer_atomic_pk_add_f16", VGPR_32, v2f16, atomic_load_fadd_v2f16_global_noret_32 >; + +let OtherPredicates = [isGFX90APlus] in { +defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_RTN < + "buffer_atomic_add_f32", VGPR_32, f32, atomic_load_fadd_global_32 +>; +defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_RTN < + "buffer_atomic_pk_add_f16", VGPR_32, v2f16, atomic_load_fadd_v2f16_global_32 +>; +} } // End SubtargetPredicate = HasAtomicFaddInsts //===----------------------------------------------------------------------===// @@ -1154,6 +1183,17 @@ def BUFFER_WBINVL1_VOL : MUBUF_Invalidate <"buffer_wbinvl1_vol", } // End let SubtargetPredicate = isGFX7Plus +let SubtargetPredicate = isGFX90APlus in { + def BUFFER_WBL2 : MUBUF_Invalidate<"buffer_wbl2"> { + } + def BUFFER_INVL2 : MUBUF_Invalidate<"buffer_invl2"> { + } + + defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64, int_amdgcn_global_atomic_fadd>; + defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_min_f64", VReg_64, f64, int_amdgcn_global_atomic_fmin>; + defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64, int_amdgcn_global_atomic_fmax>; +} // End SubtargetPredicate = isGFX90APlus + let SubtargetPredicate = isGFX10Plus in { def BUFFER_GL0_INV : MUBUF_Invalidate<"buffer_gl0_inv">; def BUFFER_GL1_INV : MUBUF_Invalidate<"buffer_gl1_inv">; @@ -1169,30 +1209,27 @@ let SubtargetPredicate = isGFX10Plus in { multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt, string opcode, ValueType memoryVt = vt> { - defvar st = !if(!eq(!cast<string>(memoryVt), !cast<string>(vt)), name, mubuf_intrinsic_load<name, memoryVt>); + defvar st = !if(!eq(memoryVt, vt), name, mubuf_intrinsic_load<name, memoryVt>); def : GCNPat< (vt (st v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, timm:$auxiliary, 0)), (!cast<MUBUF_Pseudo>(opcode # _OFFSET) SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< (vt (st v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, timm:$auxiliary, 0)), (!cast<MUBUF_Pseudo>(opcode # _OFFEN) VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< (vt (st v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, timm:$auxiliary, timm)), (!cast<MUBUF_Pseudo>(opcode # _IDXEN) VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< @@ -1201,8 +1238,7 @@ multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt, (!cast<MUBUF_Pseudo>(opcode # _BOTHEN) (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1), SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; } @@ -1255,32 +1291,27 @@ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_ushort, i32, "BUFFER_LOAD_USHORT">; multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt, string opcode, ValueType memoryVt = vt> { - defvar st = !if(!eq(!cast<string>(memoryVt), !cast<string>(vt)), name, mubuf_intrinsic_store<name, memoryVt>); + defvar st = !if(!eq(memoryVt, vt), name, mubuf_intrinsic_store<name, memoryVt>); def : GCNPat< (st vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, timm:$auxiliary, 0), (!cast<MUBUF_Pseudo>(opcode # _OFFSET_exact) getVregSrcForVT<vt>.ret:$vdata, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< (st vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, timm:$auxiliary, 0), (!cast<MUBUF_Pseudo>(opcode # _OFFEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, - (as_i16timm $offset), (extract_glc $auxiliary), - (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary)) + (as_i16timm $offset), (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< (st vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, timm:$auxiliary, timm), (!cast<MUBUF_Pseudo>(opcode # _IDXEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, - (as_i16timm $offset), (extract_glc $auxiliary), - (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary)) + (as_i16timm $offset), (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< @@ -1289,9 +1320,8 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt, (!cast<MUBUF_Pseudo>(opcode # _BOTHEN_exact) getVregSrcForVT<vt>.ret:$vdata, (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1), - SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_glc $auxiliary), - (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary)) + SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_cpol $auxiliary), + 0, (extract_swz $auxiliary)) >; } @@ -1351,7 +1381,7 @@ multiclass BufferAtomicPatterns<SDPatternOperator name, ValueType vt, timm:$offset, timm:$cachepolicy, 0)), (!cast<MUBUF_Pseudo>(opcode # _OFFSET_RTN) getVregSrcForVT<vt>.ret:$vdata_in, SReg_128:$rsrc, SCSrc_b32:$soffset, - (as_i16timm $offset), (extract_slc $cachepolicy)) + (as_i16timm $offset), (set_glc $cachepolicy)) >; def : GCNPat< @@ -1359,7 +1389,7 @@ multiclass BufferAtomicPatterns<SDPatternOperator name, ValueType vt, timm:$offset, timm:$cachepolicy, timm)), (!cast<MUBUF_Pseudo>(opcode # _IDXEN_RTN) getVregSrcForVT<vt>.ret:$vdata_in, VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, - (as_i16timm $offset), (extract_slc $cachepolicy)) + (as_i16timm $offset), (set_glc $cachepolicy)) >; def : GCNPat< @@ -1367,7 +1397,7 @@ multiclass BufferAtomicPatterns<SDPatternOperator name, ValueType vt, i32:$soffset, timm:$offset, timm:$cachepolicy, 0)), (!cast<MUBUF_Pseudo>(opcode # _OFFEN_RTN) getVregSrcForVT<vt>.ret:$vdata_in, VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, - (as_i16timm $offset), (extract_slc $cachepolicy)) + (as_i16timm $offset), (set_glc $cachepolicy)) >; def : GCNPat< @@ -1377,7 +1407,7 @@ multiclass BufferAtomicPatterns<SDPatternOperator name, ValueType vt, getVregSrcForVT<vt>.ret:$vdata_in, (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1), SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), - (extract_slc $cachepolicy)) + (set_glc $cachepolicy)) >; } @@ -1425,7 +1455,7 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt, 0, i32:$soffset, timm:$offset, timm:$cachepolicy, 0), (!cast<MUBUF_Pseudo>(opcode # _OFFSET) getVregSrcForVT<vt>.ret:$vdata_in, SReg_128:$rsrc, SCSrc_b32:$soffset, - (as_i16timm $offset), (extract_slc $cachepolicy)) + (as_i16timm $offset), $cachepolicy) >; def : GCNPat< @@ -1433,7 +1463,7 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt, 0, i32:$soffset, timm:$offset, timm:$cachepolicy, timm), (!cast<MUBUF_Pseudo>(opcode # _IDXEN) getVregSrcForVT<vt>.ret:$vdata_in, VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, - (as_i16timm $offset), (extract_slc $cachepolicy)) + (as_i16timm $offset), $cachepolicy) >; def : GCNPat< @@ -1441,7 +1471,7 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt, i32:$voffset, i32:$soffset, timm:$offset, timm:$cachepolicy, 0), (!cast<MUBUF_Pseudo>(opcode # _OFFEN) getVregSrcForVT<vt>.ret:$vdata_in, VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, - (as_i16timm $offset), (extract_slc $cachepolicy)) + (as_i16timm $offset), $cachepolicy) >; def : GCNPat< @@ -1451,7 +1481,7 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt, (!cast<MUBUF_Pseudo>(opcode # _BOTHEN) getVregSrcForVT<vt>.ret:$vdata_in, (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1), - SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_slc $cachepolicy)) + SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), $cachepolicy) >; } @@ -1460,15 +1490,24 @@ defm : BufferAtomicPatterns_NO_RTN<SIbuffer_atomic_fadd, f32, "BUFFER_ATOMIC_ADD defm : BufferAtomicPatterns_NO_RTN<SIbuffer_atomic_fadd, v2f16, "BUFFER_ATOMIC_PK_ADD_F16">; } +let SubtargetPredicate = isGFX90APlus in { + defm : BufferAtomicPatterns<SIbuffer_atomic_fadd, f32, "BUFFER_ATOMIC_ADD_F32">; + defm : BufferAtomicPatterns<SIbuffer_atomic_fadd, v2f16, "BUFFER_ATOMIC_PK_ADD_F16">; + + defm : BufferAtomicPatterns<SIbuffer_atomic_fadd, f64, "BUFFER_ATOMIC_ADD_F64">; + defm : BufferAtomicPatterns<SIbuffer_atomic_fmin, f64, "BUFFER_ATOMIC_MIN_F64">; + defm : BufferAtomicPatterns<SIbuffer_atomic_fmax, f64, "BUFFER_ATOMIC_MAX_F64">; +} // End SubtargetPredicate = isGFX90APlus + def : GCNPat< (SIbuffer_atomic_cmpswap i32:$data, i32:$cmp, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, timm:$cachepolicy, 0), - (EXTRACT_SUBREG + (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS (BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), - (extract_slc $cachepolicy)), sub0) + (set_glc $cachepolicy)), VReg_64)), sub0) >; def : GCNPat< @@ -1476,10 +1515,11 @@ def : GCNPat< i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, timm:$cachepolicy, timm), - (EXTRACT_SUBREG + (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS (BUFFER_ATOMIC_CMPSWAP_IDXEN_RTN (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), - VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_slc $cachepolicy)), + VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), + (set_glc $cachepolicy)), VReg_64)), sub0) >; @@ -1488,10 +1528,11 @@ def : GCNPat< i32:$data, i32:$cmp, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, timm:$cachepolicy, 0), - (EXTRACT_SUBREG + (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS (BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), - VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_slc $cachepolicy)), + VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), + (set_glc $cachepolicy)), VReg_64)), sub0) >; @@ -1500,32 +1541,32 @@ def : GCNPat< i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset, timm:$cachepolicy, timm), - (EXTRACT_SUBREG + (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS (BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1), - SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_slc $cachepolicy)), + SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), + (set_glc $cachepolicy)), VReg_64)), sub0) >; class MUBUFLoad_PatternADDR64 <MUBUF_Pseudo Instr_ADDR64, ValueType vt, PatFrag constant_ld> : GCNPat < (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, - i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))), - (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc, $swz) + i16:$offset))), + (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset) >; multiclass MUBUFLoad_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Instr_OFFSET, ValueType vt, PatFrag atomic_ld> { def : GCNPat < - (vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, - i16:$offset, i1:$slc))), - (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0, 0) + (vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset))), + (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset) >; def : GCNPat < - (vt (atomic_ld (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset))), - (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0, 0) + (vt (atomic_ld (MUBUFOffset v4i32:$rsrc, i32:$soffset, i16:$offset))), + (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset)) >; } @@ -1545,9 +1586,8 @@ multiclass MUBUFLoad_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt, PatFrag ld> { def : GCNPat < - (vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, - i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))), - (Instr_OFFSET $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc, $swz) + (vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset))), + (Instr_OFFSET $srsrc, $soffset, $offset) >; } @@ -1570,12 +1610,12 @@ multiclass MUBUFScratchLoadPat <MUBUF_Pseudo InstrOffen, def : GCNPat < (vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset))), - (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0) + (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) >; def : GCNPat < (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset))), - (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0, 0) + (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0) >; } @@ -1585,12 +1625,12 @@ multiclass MUBUFScratchLoadPat_D16 <MUBUF_Pseudo InstrOffen, ValueType vt, PatFrag ld_frag> { def : GCNPat < (ld_frag (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset), vt:$in), - (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, $in) + (InstrOffen $vaddr, $srsrc, $soffset, $offset, $in) >; def : GCNPat < (ld_frag (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset), vt:$in), - (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, $in) + (InstrOffset $srsrc, $soffset, $offset, $in) >; } @@ -1635,14 +1675,13 @@ multiclass MUBUFStore_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo In ValueType vt, PatFrag atomic_st> { // Store follows atomic op convention so address is first def : GCNPat < - (atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, - i16:$offset, i1:$slc), vt:$val), - (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0, 0) + (atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset), vt:$val), + (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset) >; def : GCNPat < - (atomic_st (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val), - (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0, 0) + (atomic_st (MUBUFOffset v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val), + (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset)) >; } let SubtargetPredicate = isGFX6GFX7 in { @@ -1655,9 +1694,8 @@ multiclass MUBUFStore_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt, PatFrag st> { def : GCNPat < - (st vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, - i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz)), - (Instr_OFFSET $vdata, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc, $swz) + (st vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset)), + (Instr_OFFSET $vdata, $srsrc, $soffset, $offset) >; } @@ -1671,13 +1709,13 @@ multiclass MUBUFScratchStorePat <MUBUF_Pseudo InstrOffen, def : GCNPat < (st vt:$value, (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset)), - (InstrOffen rc:$value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0) + (InstrOffen rc:$value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) >; def : GCNPat < (st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset)), - (InstrOffset rc:$value, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0) + (InstrOffset rc:$value, $srsrc, $soffset, $offset, 0, 0, 0) >; } @@ -1716,15 +1754,14 @@ defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_D16_HI_OFFEN, BUFFER_STORE_BYTE_D multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt, string opcode, ValueType memoryVt = vt> { - defvar st = !if(!eq(!cast<string>(memoryVt), !cast<string>(vt)), name, mtbuf_intrinsic_load<name, memoryVt>); + defvar st = !if(!eq(memoryVt, vt), name, mtbuf_intrinsic_load<name, memoryVt>); def : GCNPat< (vt (st v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, timm:$format, timm:$auxiliary, 0)), (!cast<MTBUF_Pseudo>(opcode # _OFFSET) SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< @@ -1732,8 +1769,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt, timm:$format, timm:$auxiliary, timm)), (!cast<MTBUF_Pseudo>(opcode # _IDXEN) VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< @@ -1741,8 +1777,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt, timm:$format, timm:$auxiliary, 0)), (!cast<MTBUF_Pseudo>(opcode # _OFFEN) VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< @@ -1752,8 +1787,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt, (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1), SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; } @@ -1784,15 +1818,14 @@ let SubtargetPredicate = HasPackedD16VMem in { multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt, string opcode, ValueType memoryVt = vt> { - defvar st = !if(!eq(!cast<string>(memoryVt), !cast<string>(vt)), name, mtbuf_intrinsic_store<name, memoryVt>); + defvar st = !if(!eq(memoryVt, vt), name, mtbuf_intrinsic_store<name, memoryVt>); def : GCNPat< (st vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, timm:$format, timm:$auxiliary, 0), (!cast<MTBUF_Pseudo>(opcode # _OFFSET_exact) getVregSrcForVT<vt>.ret:$vdata, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< @@ -1800,8 +1833,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt, timm:$format, timm:$auxiliary, timm), (!cast<MTBUF_Pseudo>(opcode # _IDXEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< @@ -1809,8 +1841,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt, timm:$format, timm:$auxiliary, 0), (!cast<MTBUF_Pseudo>(opcode # _OFFEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< @@ -1820,8 +1851,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt, getVregSrcForVT<vt>.ret:$vdata, (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1), SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; } @@ -1863,21 +1893,21 @@ class Base_MUBUF_Real_gfx6_gfx7_gfx10<bits<7> op, MUBUF_Pseudo ps, int ef> : let Inst{11-0} = !if(ps.has_offset, offset, ?); let Inst{12} = ps.offen; let Inst{13} = ps.idxen; - let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); + let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value); let Inst{16} = ps.lds; let Inst{24-18} = op; let Inst{31-26} = 0x38; let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); - let Inst{47-40} = !if(ps.has_vdata, vdata, ?); + let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?); let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); - let Inst{54} = !if(ps.has_slc, slc, ?); + let Inst{54} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?); let Inst{55} = !if(ps.has_tfe, tfe, ?); let Inst{63-56} = !if(ps.has_soffset, soffset, ?); } class MUBUF_Real_gfx10<bits<8> op, MUBUF_Pseudo ps> : Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.GFX10> { - let Inst{15} = !if(ps.has_dlc, dlc, ps.dlc_value); + let Inst{15} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlc_value); let Inst{25} = op{7}; } @@ -1891,13 +1921,6 @@ class MUBUF_Real_gfx6_gfx7<bits<8> op, MUBUF_Pseudo ps> : //===----------------------------------------------------------------------===// let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { - multiclass MUBUF_Real_gfx10_with_name<bits<8> op, string opName, - string asmName> { - def _gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(opName)> { - MUBUF_Pseudo ps = !cast<MUBUF_Pseudo>(opName); - let AsmString = asmName # ps.AsmOperands; - } - } multiclass MUBUF_Real_AllAddr_gfx10<bits<8> op> { def _BOTHEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>; @@ -1929,16 +1952,33 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { } multiclass MUBUF_Real_Atomics_RTN_gfx10<bits<8> op> { def _BOTHEN_RTN_gfx10 : - MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>; + MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>, + AtomicNoRet<NAME # "_BOTHEN_gfx10", 1>; def _IDXEN_RTN_gfx10 : - MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>; + MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>, + AtomicNoRet<NAME # "_IDXEN_gfx10", 1>; def _OFFEN_RTN_gfx10 : - MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>; + MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>, + AtomicNoRet<NAME # "_OFFEN_gfx10", 1>; def _OFFSET_RTN_gfx10 : - MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>; + MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>, + AtomicNoRet<NAME # "_OFFSET_gfx10", 1>; } multiclass MUBUF_Real_Atomics_gfx10<bits<8> op> : - MUBUF_Real_AllAddr_gfx10<op>, MUBUF_Real_Atomics_RTN_gfx10<op>; + MUBUF_Real_Atomics_RTN_gfx10<op> { + def _BOTHEN_gfx10 : + MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>, + AtomicNoRet<NAME # "_BOTHEN_gfx10", 0>; + def _IDXEN_gfx10 : + MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>, + AtomicNoRet<NAME # "_IDXEN_gfx10", 0>; + def _OFFEN_gfx10 : + MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>, + AtomicNoRet<NAME # "_OFFEN_gfx10", 0>; + def _OFFSET_gfx10 : + MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>, + AtomicNoRet<NAME # "_OFFSET_gfx10", 0>; + } } // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x019>; @@ -2018,18 +2058,38 @@ let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { def _LDS_BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>, MUBUFLdsTable<1, NAME # "_BOTHEN_gfx6_gfx7">; } - multiclass MUBUF_Real_Atomics_gfx6_gfx7<bits<8> op> : - MUBUF_Real_AllAddr_gfx6_gfx7<op> { + multiclass MUBUF_Real_Atomics_gfx6_gfx7<bits<8> op> { + def _ADDR64_gfx6_gfx7 : + MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64")>, + AtomicNoRet<NAME # "_ADDR64_gfx6_gfx7", 0>; + def _BOTHEN_gfx6_gfx7 : + MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>, + AtomicNoRet<NAME # "_BOTHEN_gfx6_gfx7", 0>; + def _IDXEN_gfx6_gfx7 : + MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>, + AtomicNoRet<NAME # "_IDXEN_gfx6_gfx7", 0>; + def _OFFEN_gfx6_gfx7 : + MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>, + AtomicNoRet<NAME # "_OFFEN_gfx6_gfx7", 0>; + def _OFFSET_gfx6_gfx7 : + MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>, + AtomicNoRet<NAME # "_OFFSET_gfx6_gfx7", 0>; + def _ADDR64_RTN_gfx6_gfx7 : - MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64_RTN")>; + MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64_RTN")>, + AtomicNoRet<NAME # "_ADDR64_gfx6_gfx7", 1>; def _BOTHEN_RTN_gfx6_gfx7 : - MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>; + MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>, + AtomicNoRet<NAME # "_BOTHEN_gfx6_gfx7", 1>; def _IDXEN_RTN_gfx6_gfx7 : - MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>; + MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>, + AtomicNoRet<NAME # "_IDXEN_gfx6_gfx7", 1>; def _OFFEN_RTN_gfx6_gfx7 : - MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>; + MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>, + AtomicNoRet<NAME # "_OFFEN_gfx6_gfx7", 1>; def _OFFSET_RTN_gfx6_gfx7 : - MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>; + MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>, + AtomicNoRet<NAME # "_OFFSET_gfx6_gfx7", 1>; } } // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" @@ -2118,13 +2178,13 @@ class Base_MTBUF_Real_gfx6_gfx7_gfx10<bits<3> op, MTBUF_Pseudo ps, int ef> : let Inst{11-0} = !if(ps.has_offset, offset, ?); let Inst{12} = ps.offen; let Inst{13} = ps.idxen; - let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); + let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value); let Inst{18-16} = op; let Inst{31-26} = 0x3a; //encoding let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); - let Inst{47-40} = !if(ps.has_vdata, vdata, ?); + let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?); let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); - let Inst{54} = !if(ps.has_slc, slc, ?); + let Inst{54} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?); let Inst{55} = !if(ps.has_tfe, tfe, ?); let Inst{63-56} = !if(ps.has_soffset, soffset, ?); } @@ -2135,7 +2195,7 @@ class Base_MTBUF_Real_gfx6_gfx7_gfx10<bits<3> op, MTBUF_Pseudo ps, int ef> : class MTBUF_Real_gfx10<bits<4> op, MTBUF_Pseudo ps> : Base_MTBUF_Real_gfx6_gfx7_gfx10<op{2-0}, ps, SIEncodingFamily.GFX10> { - let Inst{15} = !if(ps.has_dlc, dlc, ps.dlc_value); + let Inst{15} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlc_value); let Inst{25-19} = format; let Inst{53} = op{3}; } @@ -2204,33 +2264,58 @@ defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x007>; // GFX8, GFX9 (VI). //===----------------------------------------------------------------------===// -class MUBUF_Real_vi <bits<7> op, MUBUF_Pseudo ps> : +class MUBUF_Real_Base_vi <bits<7> op, MUBUF_Pseudo ps, int Enc, + bit has_sccb = ps.has_sccb> : MUBUF_Real<ps>, Enc64, - SIMCInstr<ps.PseudoInstr, SIEncodingFamily.VI> { - let AssemblerPredicate = isGFX8GFX9; - let DecoderNamespace = "GFX8"; + SIMCInstr<ps.PseudoInstr, Enc>, + AtomicNoRet<!subst("_RTN","",NAME), !if(ps.IsAtomicNoRet, 0, + !if(ps.IsAtomicRet, 1, ?))> { let Inst{11-0} = !if(ps.has_offset, offset, ?); let Inst{12} = ps.offen; let Inst{13} = ps.idxen; - let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); + let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value); + let Inst{15} = !if(has_sccb, cpol{CPolBit.SCC}, ps.sccb_value); let Inst{16} = ps.lds; - let Inst{17} = !if(ps.has_slc, slc, ?); + let Inst{17} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?); let Inst{24-18} = op; let Inst{31-26} = 0x38; //encoding let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); - let Inst{47-40} = !if(ps.has_vdata, vdata, ?); + let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?); let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); - let Inst{55} = !if(ps.has_tfe, tfe, ?); let Inst{63-56} = !if(ps.has_soffset, soffset, ?); } +class MUBUF_Real_vi <bits<7> op, MUBUF_Pseudo ps, bit has_sccb = ps.has_sccb> : + MUBUF_Real_Base_vi<op, ps, SIEncodingFamily.VI, has_sccb> { + let AssemblerPredicate = isGFX8GFX9NotGFX90A; + let DecoderNamespace = "GFX8"; + + let Inst{55} = !if(ps.has_tfe, tfe, ?); +} + +class MUBUF_Real_gfx90a <bits<7> op, MUBUF_Pseudo ps, + bit has_sccb = ps.has_sccb> : + MUBUF_Real_Base_vi<op, ps, SIEncodingFamily.GFX90A, has_sccb> { + let AssemblerPredicate = isGFX90APlus; + let DecoderNamespace = "GFX90A"; + let AsmString = ps.Mnemonic # !subst("$sccb", !if(has_sccb, "$sccb",""), + !subst("$tfe", "", ps.AsmOperands)); + + let Inst{55} = acc; +} + +multiclass MUBUF_Real_vi_gfx90a<bits<7> op, MUBUF_Pseudo ps> { + def _vi : MUBUF_Real_vi<op, ps>; + def _gfx90a : MUBUF_Real_gfx90a<op, ps, !and(ps.has_sccb,!not(ps.FPAtomic))>; +} + multiclass MUBUF_Real_AllAddr_vi<bits<7> op> { - def _OFFSET_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>; - def _OFFEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>; - def _IDXEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>; - def _BOTHEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>; + defm _OFFSET : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>; + defm _OFFEN : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>; + defm _IDXEN : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>; + defm _BOTHEN : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>; } multiclass MUBUF_Real_AllAddr_Lds_vi<bits<7> op> { @@ -2252,6 +2337,24 @@ multiclass MUBUF_Real_AllAddr_Lds_vi<bits<7> op> { MUBUFLdsTable<1, NAME # "_IDXEN_vi">; def _LDS_BOTHEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>, MUBUFLdsTable<1, NAME # "_BOTHEN_vi">; + + def _OFFSET_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>, + MUBUFLdsTable<0, NAME # "_OFFSET_gfx90a">; + def _OFFEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>, + MUBUFLdsTable<0, NAME # "_OFFEN_gfx90a">; + def _IDXEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>, + MUBUFLdsTable<0, NAME # "_IDXEN_gfx90a">; + def _BOTHEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>, + MUBUFLdsTable<0, NAME # "_BOTHEN_gfx90a">; + + def _LDS_OFFSET_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>, + MUBUFLdsTable<1, NAME # "_OFFSET_gfx90a">; + def _LDS_OFFEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>, + MUBUFLdsTable<1, NAME # "_OFFEN_gfx90a">; + def _LDS_IDXEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>, + MUBUFLdsTable<1, NAME # "_IDXEN_gfx90a">; + def _LDS_BOTHEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>, + MUBUFLdsTable<1, NAME # "_BOTHEN_gfx90a">; } class MUBUF_Real_gfx80 <bits<7> op, MUBUF_Pseudo ps> : @@ -2264,13 +2367,13 @@ class MUBUF_Real_gfx80 <bits<7> op, MUBUF_Pseudo ps> : let Inst{11-0} = !if(ps.has_offset, offset, ?); let Inst{12} = ps.offen; let Inst{13} = ps.idxen; - let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); + let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value); let Inst{16} = ps.lds; - let Inst{17} = !if(ps.has_slc, slc, ?); + let Inst{17} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?); let Inst{24-18} = op; let Inst{31-26} = 0x38; //encoding let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); - let Inst{47-40} = !if(ps.has_vdata, vdata, ?); + let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?); let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); let Inst{55} = !if(ps.has_tfe, tfe, ?); let Inst{63-56} = !if(ps.has_soffset, soffset, ?); @@ -2285,10 +2388,10 @@ multiclass MUBUF_Real_AllAddr_gfx80<bits<7> op> { multiclass MUBUF_Real_Atomic_vi<bits<7> op> : MUBUF_Real_AllAddr_vi<op> { - def _OFFSET_RTN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>; - def _OFFEN_RTN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>; - def _IDXEN_RTN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>; - def _BOTHEN_RTN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>; + defm _OFFSET_RTN : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>; + defm _OFFEN_RTN : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>; + defm _IDXEN_RTN : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>; + defm _BOTHEN_RTN : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>; } defm BUFFER_LOAD_FORMAT_X : MUBUF_Real_AllAddr_Lds_vi <0x00>; @@ -2374,46 +2477,79 @@ defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Real_Atomic_vi <0x6a>; defm BUFFER_ATOMIC_INC_X2 : MUBUF_Real_Atomic_vi <0x6b>; defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomic_vi <0x6c>; -def BUFFER_STORE_LDS_DWORD_vi : MUBUF_Real_vi <0x3d, BUFFER_STORE_LDS_DWORD>; +defm BUFFER_STORE_LDS_DWORD : MUBUF_Real_vi_gfx90a <0x3d, BUFFER_STORE_LDS_DWORD>; +let AssemblerPredicate = isGFX8GFX9 in { def BUFFER_WBINVL1_vi : MUBUF_Real_vi <0x3e, BUFFER_WBINVL1>; def BUFFER_WBINVL1_VOL_vi : MUBUF_Real_vi <0x3f, BUFFER_WBINVL1_VOL>; +} // End AssemblerPredicate = isGFX8GFX9 let SubtargetPredicate = HasAtomicFaddInsts in { -defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Real_AllAddr_vi <0x4d>; -defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Real_AllAddr_vi <0x4e>; +defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Real_Atomic_vi <0x4d>; +defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Real_Atomic_vi <0x4e>; } // End SubtargetPredicate = HasAtomicFaddInsts -class MTBUF_Real_vi <bits<4> op, MTBUF_Pseudo ps> : +let SubtargetPredicate = isGFX90APlus in { + defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Real_Atomic_vi<0x4f>; + defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Real_Atomic_vi<0x50>; + defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Real_Atomic_vi<0x51>; +} // End SubtargetPredicate = isGFX90APlus, AssemblerPredicate = isGFX90APlus + +def BUFFER_WBL2_gfx90a : MUBUF_Real_gfx90a<0x28, BUFFER_WBL2> { +} +def BUFFER_INVL2_gfx90a : MUBUF_Real_gfx90a<0x29, BUFFER_INVL2>; + +class MTBUF_Real_Base_vi <bits<4> op, MTBUF_Pseudo ps, int Enc> : MTBUF_Real<ps>, Enc64, - SIMCInstr<ps.PseudoInstr, SIEncodingFamily.VI> { - let AssemblerPredicate = isGFX8GFX9; - let DecoderNamespace = "GFX8"; + SIMCInstr<ps.PseudoInstr, Enc> { let Inst{11-0} = !if(ps.has_offset, offset, ?); let Inst{12} = ps.offen; let Inst{13} = ps.idxen; - let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); + let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value); let Inst{18-15} = op; let Inst{22-19} = dfmt; let Inst{25-23} = nfmt; let Inst{31-26} = 0x3a; //encoding let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); - let Inst{47-40} = !if(ps.has_vdata, vdata, ?); + let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?); let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); - let Inst{54} = !if(ps.has_slc, slc, ?); + let Inst{53} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccb_value); + let Inst{54} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?); let Inst{55} = !if(ps.has_tfe, tfe, ?); let Inst{63-56} = !if(ps.has_soffset, soffset, ?); } +class MTBUF_Real_vi <bits<4> op, MTBUF_Pseudo ps> : + MTBUF_Real_Base_vi <op, ps, SIEncodingFamily.VI> { + let AssemblerPredicate = isGFX8GFX9NotGFX90A; + let DecoderNamespace = "GFX8"; + + let Inst{55} = !if(ps.has_tfe, tfe, ?); +} + +class MTBUF_Real_gfx90a <bits<4> op, MTBUF_Pseudo ps> : + MTBUF_Real_Base_vi <op, ps, SIEncodingFamily.GFX90A> { + let AssemblerPredicate = isGFX90APlus; + let DecoderNamespace = "GFX90A"; + let AsmString = ps.Mnemonic # !subst("$tfe", "", ps.AsmOperands); + + let Inst{55} = acc; +} + +multiclass MTBUF_Real_vi_gfx90a<bits<4> op, MTBUF_Pseudo ps> { + def _vi : MTBUF_Real_vi<op, ps>; + def _gfx90a : MTBUF_Real_gfx90a<op, ps>; +} + multiclass MTBUF_Real_AllAddr_vi<bits<4> op> { - def _OFFSET_vi : MTBUF_Real_vi <op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>; - def _OFFEN_vi : MTBUF_Real_vi <op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN")>; - def _IDXEN_vi : MTBUF_Real_vi <op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN")>; - def _BOTHEN_vi : MTBUF_Real_vi <op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>; + defm _OFFSET : MTBUF_Real_vi_gfx90a <op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>; + defm _OFFEN : MTBUF_Real_vi_gfx90a <op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN")>; + defm _IDXEN : MTBUF_Real_vi_gfx90a <op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN")>; + defm _BOTHEN : MTBUF_Real_vi_gfx90a <op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>; } class MTBUF_Real_gfx80 <bits<4> op, MTBUF_Pseudo ps> : @@ -2426,15 +2562,15 @@ class MTBUF_Real_gfx80 <bits<4> op, MTBUF_Pseudo ps> : let Inst{11-0} = !if(ps.has_offset, offset, ?); let Inst{12} = ps.offen; let Inst{13} = ps.idxen; - let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); + let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value); let Inst{18-15} = op; let Inst{22-19} = dfmt; let Inst{25-23} = nfmt; let Inst{31-26} = 0x3a; //encoding let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); - let Inst{47-40} = !if(ps.has_vdata, vdata, ?); + let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?); let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); - let Inst{54} = !if(ps.has_slc, slc, ?); + let Inst{54} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?); let Inst{55} = !if(ps.has_tfe, tfe, ?); let Inst{63-56} = !if(ps.has_soffset, soffset, ?); } @@ -2478,7 +2614,10 @@ let SubtargetPredicate = HasPackedD16VMem in { def MUBUFInfoTable : GenericTable { let FilterClass = "MUBUF_Pseudo"; let CppTypeName = "MUBUFInfo"; - let Fields = ["Opcode", "BaseOpcode", "elements", "has_vaddr", "has_srsrc", "has_soffset"]; + let Fields = [ + "Opcode", "BaseOpcode", "elements", "has_vaddr", "has_srsrc", "has_soffset", + "IsBufferInv" + ]; let PrimaryKey = ["Opcode"]; let PrimaryKeyName = "getMUBUFOpcodeHelper"; |