aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td100
1 files changed, 89 insertions, 11 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index dddd0aacc140..a911483cade5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -481,6 +481,30 @@ def shl_0_to_4 : PatFrag<
}];
}
+def VOP3_CVT_PK_F8_F32_Profile : VOP3_Profile<VOP_I32_F32_F32, VOP3_OPSEL> {
+ let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
+ FP32InputMods:$src1_modifiers, Src1RC64:$src1,
+ VGPR_32:$vdst_in, op_sel0:$op_sel);
+ let HasClamp = 0;
+ let HasExtVOP3DPP = 0;
+}
+
+def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, i32, f32]>,
+ VOP3_OPSEL> {
+ let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
+ FP32InputMods:$src1_modifiers, Src1RC64:$src1,
+ FP32InputMods:$src2_modifiers, VGPR_32:$src2,
+ op_sel0:$op_sel);
+ let HasClamp = 0;
+ let HasSrc2 = 0;
+ let HasSrc2Mods = 1;
+ let AsmVOP3OpSel = !subst(", $src2_modifiers", "",
+ getAsmVOP3OpSel<3, HasClamp,
+ HasSrc0FloatMods, HasSrc1FloatMods,
+ HasSrc2FloatMods>.ret);
+ let HasExtVOP3DPP = 0;
+}
+
let SubtargetPredicate = isGFX9Plus in {
let isCommutable = 1, isReMaterializable = 1 in {
defm V_ADD3_U32 : VOP3Inst <"v_add3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
@@ -526,6 +550,43 @@ defm V_LSHL_OR_B32 : VOP3Inst <"v_lshl_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32
let SubtargetPredicate = isGFX940Plus in
defm V_LSHL_ADD_U64 : VOP3Inst <"v_lshl_add_u64", VOP3_Profile<VOP_I64_I64_I32_I64>>;
+let SubtargetPredicate = HasFP8Insts, mayRaiseFPException = 0,
+ SchedRW = [WriteFloatCvt] in {
+ let Constraints = "$vdst = $vdst_in", DisableEncoding = "$vdst_in" in {
+ defm V_CVT_PK_FP8_F32 : VOP3Inst<"v_cvt_pk_fp8_f32", VOP3_CVT_PK_F8_F32_Profile>;
+ defm V_CVT_PK_BF8_F32 : VOP3Inst<"v_cvt_pk_bf8_f32", VOP3_CVT_PK_F8_F32_Profile>;
+ }
+
+ // These instructions have non-standard use of op_sel. In particular they are
+ // using op_sel bits 2 and 3 while only having two sources. Therefore dummy
+ // src2 is used to hold the op_sel value.
+ let Constraints = "$vdst = $src2", DisableEncoding = "$src2" in {
+ defm V_CVT_SR_FP8_F32 : VOP3Inst<"v_cvt_sr_fp8_f32", VOP3_CVT_SR_F8_F32_Profile>;
+ defm V_CVT_SR_BF8_F32 : VOP3Inst<"v_cvt_sr_bf8_f32", VOP3_CVT_SR_F8_F32_Profile>;
+ }
+}
+
+class Cvt_PK_F8_F32_Pat<SDPatternOperator node, int index, VOP3_Pseudo inst> : GCNPat<
+ (i32 (node f32:$src0, f32:$src1, i32:$old, index)),
+ (inst !if(index, SRCMODS.DST_OP_SEL, 0), $src0, 0, $src1, $old, !if(index, SRCMODS.OP_SEL_0, 0))
+>;
+
+class Cvt_SR_F8_F32_Pat<SDPatternOperator node, bits<2> index, VOP3_Pseudo inst> : GCNPat<
+ (i32 (node f32:$src0, i32:$src1, i32:$old, index)),
+ (inst !if(index{1}, SRCMODS.DST_OP_SEL, 0), $src0, 0, $src1,
+ !if(index{0}, SRCMODS.OP_SEL_0, 0), $old, !if(index{1}, SRCMODS.OP_SEL_0, 0))
+>;
+
+foreach Index = [0, -1] in {
+ def : Cvt_PK_F8_F32_Pat<int_amdgcn_cvt_pk_fp8_f32, Index, V_CVT_PK_FP8_F32_e64>;
+ def : Cvt_PK_F8_F32_Pat<int_amdgcn_cvt_pk_bf8_f32, Index, V_CVT_PK_BF8_F32_e64>;
+}
+
+foreach Index = [0, 1, 2, 3] in {
+ def : Cvt_SR_F8_F32_Pat<int_amdgcn_cvt_sr_fp8_f32, Index, V_CVT_SR_FP8_F32_e64>;
+ def : Cvt_SR_F8_F32_Pat<int_amdgcn_cvt_sr_bf8_f32, Index, V_CVT_SR_BF8_F32_e64>;
+}
+
class ThreeOp_i32_Pats <SDPatternOperator op1, SDPatternOperator op2, Instruction inst> : GCNPat <
// This matches (op2 (op1 i32:$src0, i32:$src1), i32:$src2) with conditions.
(ThreeOpFrag<op1, op2> i32:$src0, i32:$src1, i32:$src2),
@@ -699,15 +760,19 @@ def : DivFmasPat<f64, V_DIV_FMAS_F64_e64, VCC_LO>;
}
class VOP3_DOT_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOP3_Profile<P, Features> {
- // FIXME VOP3 DPP versions are unsupported
- let HasExtVOP3DPP = 0;
let HasClamp = 0;
let HasOMod = 0;
- let InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64,
- NumSrcArgs, HasClamp, HasOMod,
- !if(isFloatType<Src0VT>.ret, FPVRegInputMods, IntOpSelMods),
- !if(isFloatType<Src1VT>.ret, FPVRegInputMods, IntOpSelMods),
- !if(isFloatType<Src2VT>.ret, FPVRegInputMods, IntOpSelMods)>.ret;
+ // Override modifiers for bf16(i16) (same as float modifiers).
+ let HasSrc0Mods = 1;
+ let HasSrc1Mods = 1;
+ let HasSrc2Mods = 1;
+ let Src0ModDPP = FPVRegInputMods;
+ let Src1ModDPP = FPVRegInputMods;
+ let Src2ModVOP3DPP = FPVRegInputMods;
+ let InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
+ HasClamp, HasOMod, FPVRegInputMods,
+ FPVRegInputMods, FPVRegInputMods>.ret;
+ let AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs, HasClamp, 1, 1, 1>.ret;
}
let SubtargetPredicate = isGFX11Plus in {
@@ -723,7 +788,7 @@ let SubtargetPredicate = isGFX11Plus in {
defm V_CVT_PK_U16_F32 : VOP3Inst<"v_cvt_pk_u16_f32", VOP3_Profile<VOP_V2I16_F32_F32>>;
} // End SubtargetPredicate = isGFX11Plus
-let SubtargetPredicate = HasDot8Insts in {
+let SubtargetPredicate = HasDot8Insts, IsDOT=1 in {
defm V_DOT2_F16_F16 : VOP3Inst<"v_dot2_f16_f16", VOP3_DOT_Profile<VOP_F16_V2F16_V2F16_F16>, int_amdgcn_fdot2_f16_f16>;
defm V_DOT2_BF16_BF16 : VOP3Inst<"v_dot2_bf16_bf16", VOP3_DOT_Profile<VOP_I16_V2I16_V2I16_I16>, int_amdgcn_fdot2_bf16_bf16>;
}
@@ -848,9 +913,8 @@ defm V_MAXMIN_U32 : VOP3_Realtriple_gfx11<0x262>;
defm V_MINMAX_U32 : VOP3_Realtriple_gfx11<0x263>;
defm V_MAXMIN_I32 : VOP3_Realtriple_gfx11<0x264>;
defm V_MINMAX_I32 : VOP3_Realtriple_gfx11<0x265>;
-// FIXME VOP3 DPP Dot instructions are unsupported
-defm V_DOT2_F16_F16 : VOP3_Real_Base_gfx11<0x266>;
-defm V_DOT2_BF16_BF16 : VOP3_Real_Base_gfx11<0x267>;
+defm V_DOT2_F16_F16 : VOP3Dot_Realtriple_gfx11<0x266>;
+defm V_DOT2_BF16_BF16 : VOP3Dot_Realtriple_gfx11<0x267>;
defm V_DIV_SCALE_F32 : VOP3be_Real_gfx11<0x2fc, "V_DIV_SCALE_F32", "v_div_scale_f32">;
defm V_DIV_SCALE_F64 : VOP3be_Real_gfx11<0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">;
defm V_MAD_U64_U32_gfx11 : VOP3be_Real_gfx11<0x2fe, "V_MAD_U64_U32_gfx11", "v_mad_u64_u32">;
@@ -1161,6 +1225,13 @@ multiclass VOP3OpSel_Real_gfx9<bits<10> op> {
VOP3OpSel_gfx9 <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl>;
}
+multiclass VOP3OpSel_Real_gfx9_forced_opsel2<bits<10> op> {
+ def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
+ VOP3OpSel_gfx9 <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl> {
+ let Inst{13} = src2_modifiers{2}; // op_sel(2)
+ }
+}
+
multiclass VOP3Interp_Real_vi<bits<10> op> {
def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>,
VOP3Interp_vi <op, !cast<VOP_Pseudo>(NAME).Pfl>;
@@ -1352,3 +1423,10 @@ defm V_CVT_PKNORM_I16_F16 : VOP3OpSel_Real_gfx9 <0x299>;
defm V_CVT_PKNORM_U16_F16 : VOP3OpSel_Real_gfx9 <0x29a>;
defm V_LSHL_ADD_U64 : VOP3_Real_vi <0x208>;
+
+let OtherPredicates = [HasFP8Insts] in {
+defm V_CVT_PK_FP8_F32 : VOP3OpSel_Real_gfx9 <0x2a2>;
+defm V_CVT_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x2a3>;
+defm V_CVT_SR_FP8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a4>;
+defm V_CVT_SR_BF8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a5>;
+}