diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td | 100 |
1 files changed, 89 insertions, 11 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td index dddd0aacc140..a911483cade5 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -481,6 +481,30 @@ def shl_0_to_4 : PatFrag< }]; } +def VOP3_CVT_PK_F8_F32_Profile : VOP3_Profile<VOP_I32_F32_F32, VOP3_OPSEL> { + let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0, + FP32InputMods:$src1_modifiers, Src1RC64:$src1, + VGPR_32:$vdst_in, op_sel0:$op_sel); + let HasClamp = 0; + let HasExtVOP3DPP = 0; +} + +def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, i32, f32]>, + VOP3_OPSEL> { + let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0, + FP32InputMods:$src1_modifiers, Src1RC64:$src1, + FP32InputMods:$src2_modifiers, VGPR_32:$src2, + op_sel0:$op_sel); + let HasClamp = 0; + let HasSrc2 = 0; + let HasSrc2Mods = 1; + let AsmVOP3OpSel = !subst(", $src2_modifiers", "", + getAsmVOP3OpSel<3, HasClamp, + HasSrc0FloatMods, HasSrc1FloatMods, + HasSrc2FloatMods>.ret); + let HasExtVOP3DPP = 0; +} + let SubtargetPredicate = isGFX9Plus in { let isCommutable = 1, isReMaterializable = 1 in { defm V_ADD3_U32 : VOP3Inst <"v_add3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>; @@ -526,6 +550,43 @@ defm V_LSHL_OR_B32 : VOP3Inst <"v_lshl_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32 let SubtargetPredicate = isGFX940Plus in defm V_LSHL_ADD_U64 : VOP3Inst <"v_lshl_add_u64", VOP3_Profile<VOP_I64_I64_I32_I64>>; +let SubtargetPredicate = HasFP8Insts, mayRaiseFPException = 0, + SchedRW = [WriteFloatCvt] in { + let Constraints = "$vdst = $vdst_in", DisableEncoding = "$vdst_in" in { + defm V_CVT_PK_FP8_F32 : VOP3Inst<"v_cvt_pk_fp8_f32", VOP3_CVT_PK_F8_F32_Profile>; + defm V_CVT_PK_BF8_F32 : VOP3Inst<"v_cvt_pk_bf8_f32", VOP3_CVT_PK_F8_F32_Profile>; + } + + // These instructions have non-standard use of op_sel. In particular they are + // using op_sel bits 2 and 3 while only having two sources. Therefore dummy + // src2 is used to hold the op_sel value. + let Constraints = "$vdst = $src2", DisableEncoding = "$src2" in { + defm V_CVT_SR_FP8_F32 : VOP3Inst<"v_cvt_sr_fp8_f32", VOP3_CVT_SR_F8_F32_Profile>; + defm V_CVT_SR_BF8_F32 : VOP3Inst<"v_cvt_sr_bf8_f32", VOP3_CVT_SR_F8_F32_Profile>; + } +} + +class Cvt_PK_F8_F32_Pat<SDPatternOperator node, int index, VOP3_Pseudo inst> : GCNPat< + (i32 (node f32:$src0, f32:$src1, i32:$old, index)), + (inst !if(index, SRCMODS.DST_OP_SEL, 0), $src0, 0, $src1, $old, !if(index, SRCMODS.OP_SEL_0, 0)) +>; + +class Cvt_SR_F8_F32_Pat<SDPatternOperator node, bits<2> index, VOP3_Pseudo inst> : GCNPat< + (i32 (node f32:$src0, i32:$src1, i32:$old, index)), + (inst !if(index{1}, SRCMODS.DST_OP_SEL, 0), $src0, 0, $src1, + !if(index{0}, SRCMODS.OP_SEL_0, 0), $old, !if(index{1}, SRCMODS.OP_SEL_0, 0)) +>; + +foreach Index = [0, -1] in { + def : Cvt_PK_F8_F32_Pat<int_amdgcn_cvt_pk_fp8_f32, Index, V_CVT_PK_FP8_F32_e64>; + def : Cvt_PK_F8_F32_Pat<int_amdgcn_cvt_pk_bf8_f32, Index, V_CVT_PK_BF8_F32_e64>; +} + +foreach Index = [0, 1, 2, 3] in { + def : Cvt_SR_F8_F32_Pat<int_amdgcn_cvt_sr_fp8_f32, Index, V_CVT_SR_FP8_F32_e64>; + def : Cvt_SR_F8_F32_Pat<int_amdgcn_cvt_sr_bf8_f32, Index, V_CVT_SR_BF8_F32_e64>; +} + class ThreeOp_i32_Pats <SDPatternOperator op1, SDPatternOperator op2, Instruction inst> : GCNPat < // This matches (op2 (op1 i32:$src0, i32:$src1), i32:$src2) with conditions. (ThreeOpFrag<op1, op2> i32:$src0, i32:$src1, i32:$src2), @@ -699,15 +760,19 @@ def : DivFmasPat<f64, V_DIV_FMAS_F64_e64, VCC_LO>; } class VOP3_DOT_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOP3_Profile<P, Features> { - // FIXME VOP3 DPP versions are unsupported - let HasExtVOP3DPP = 0; let HasClamp = 0; let HasOMod = 0; - let InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64, - NumSrcArgs, HasClamp, HasOMod, - !if(isFloatType<Src0VT>.ret, FPVRegInputMods, IntOpSelMods), - !if(isFloatType<Src1VT>.ret, FPVRegInputMods, IntOpSelMods), - !if(isFloatType<Src2VT>.ret, FPVRegInputMods, IntOpSelMods)>.ret; + // Override modifiers for bf16(i16) (same as float modifiers). + let HasSrc0Mods = 1; + let HasSrc1Mods = 1; + let HasSrc2Mods = 1; + let Src0ModDPP = FPVRegInputMods; + let Src1ModDPP = FPVRegInputMods; + let Src2ModVOP3DPP = FPVRegInputMods; + let InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs, + HasClamp, HasOMod, FPVRegInputMods, + FPVRegInputMods, FPVRegInputMods>.ret; + let AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs, HasClamp, 1, 1, 1>.ret; } let SubtargetPredicate = isGFX11Plus in { @@ -723,7 +788,7 @@ let SubtargetPredicate = isGFX11Plus in { defm V_CVT_PK_U16_F32 : VOP3Inst<"v_cvt_pk_u16_f32", VOP3_Profile<VOP_V2I16_F32_F32>>; } // End SubtargetPredicate = isGFX11Plus -let SubtargetPredicate = HasDot8Insts in { +let SubtargetPredicate = HasDot8Insts, IsDOT=1 in { defm V_DOT2_F16_F16 : VOP3Inst<"v_dot2_f16_f16", VOP3_DOT_Profile<VOP_F16_V2F16_V2F16_F16>, int_amdgcn_fdot2_f16_f16>; defm V_DOT2_BF16_BF16 : VOP3Inst<"v_dot2_bf16_bf16", VOP3_DOT_Profile<VOP_I16_V2I16_V2I16_I16>, int_amdgcn_fdot2_bf16_bf16>; } @@ -848,9 +913,8 @@ defm V_MAXMIN_U32 : VOP3_Realtriple_gfx11<0x262>; defm V_MINMAX_U32 : VOP3_Realtriple_gfx11<0x263>; defm V_MAXMIN_I32 : VOP3_Realtriple_gfx11<0x264>; defm V_MINMAX_I32 : VOP3_Realtriple_gfx11<0x265>; -// FIXME VOP3 DPP Dot instructions are unsupported -defm V_DOT2_F16_F16 : VOP3_Real_Base_gfx11<0x266>; -defm V_DOT2_BF16_BF16 : VOP3_Real_Base_gfx11<0x267>; +defm V_DOT2_F16_F16 : VOP3Dot_Realtriple_gfx11<0x266>; +defm V_DOT2_BF16_BF16 : VOP3Dot_Realtriple_gfx11<0x267>; defm V_DIV_SCALE_F32 : VOP3be_Real_gfx11<0x2fc, "V_DIV_SCALE_F32", "v_div_scale_f32">; defm V_DIV_SCALE_F64 : VOP3be_Real_gfx11<0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">; defm V_MAD_U64_U32_gfx11 : VOP3be_Real_gfx11<0x2fe, "V_MAD_U64_U32_gfx11", "v_mad_u64_u32">; @@ -1161,6 +1225,13 @@ multiclass VOP3OpSel_Real_gfx9<bits<10> op> { VOP3OpSel_gfx9 <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl>; } +multiclass VOP3OpSel_Real_gfx9_forced_opsel2<bits<10> op> { + def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, + VOP3OpSel_gfx9 <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl> { + let Inst{13} = src2_modifiers{2}; // op_sel(2) + } +} + multiclass VOP3Interp_Real_vi<bits<10> op> { def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>, VOP3Interp_vi <op, !cast<VOP_Pseudo>(NAME).Pfl>; @@ -1352,3 +1423,10 @@ defm V_CVT_PKNORM_I16_F16 : VOP3OpSel_Real_gfx9 <0x299>; defm V_CVT_PKNORM_U16_F16 : VOP3OpSel_Real_gfx9 <0x29a>; defm V_LSHL_ADD_U64 : VOP3_Real_vi <0x208>; + +let OtherPredicates = [HasFP8Insts] in { +defm V_CVT_PK_FP8_F32 : VOP3OpSel_Real_gfx9 <0x2a2>; +defm V_CVT_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x2a3>; +defm V_CVT_SR_FP8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a4>; +defm V_CVT_SR_BF8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a5>; +} |