diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86InstrAVX512.td')
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 2614 |
1 files changed, 1490 insertions, 1124 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 32f012033fb0..a3ad0b1c8dd6 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -76,11 +76,11 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc, PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT); PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName); - ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"), - !cast<ComplexPattern>("sse_load_f32"), - !if (!eq (EltTypeName, "f64"), - !cast<ComplexPattern>("sse_load_f64"), - ?)); + PatFrags ScalarIntMemFrags = !if (!eq (EltTypeName, "f32"), + !cast<PatFrags>("sse_load_f32"), + !if (!eq (EltTypeName, "f64"), + !cast<PatFrags>("sse_load_f64"), + ?)); // The string to specify embedded broadcast in assembly. string BroadcastStr = "{1to" # NumElts # "}"; @@ -169,6 +169,18 @@ def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>; def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>; def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>; +// Used for matching masked operations. Ensures the operation part only has a +// single use. +def vselect_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2), + (vselect node:$mask, node:$src1, node:$src2), [{ + return isProfitableToFormMaskedOp(N); +}]>; + +def X86selects_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2), + (X86selects node:$mask, node:$src1, node:$src2), [{ + return isProfitableToFormMaskedOp(N); +}]>; + // This multiclass generates the masking variants from the non-masking // variant. It only provides the assembly pieces for the masking variants. // It assumes custom ISel patterns for masking which can be provided as @@ -220,7 +232,7 @@ multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, dag RHS, dag MaskingRHS, - SDNode Select = vselect, + SDPatternOperator Select = vselect_mask, string MaskingConstraint = "", bit IsCommutable = 0, bit IsKCommutable = 0, @@ -236,35 +248,36 @@ multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _, // This multiclass generates the unconditional/non-masking, the masking and // the zero-masking variant of the vector instruction. In the masking case, the -// perserved vector elements come from a new dummy input operand tied to $dst. +// preserved vector elements come from a new dummy input operand tied to $dst. // This version uses a separate dag for non-masking and masking. multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, dag RHS, dag MaskRHS, bit IsCommutable = 0, bit IsKCommutable = 0, - SDNode Select = vselect> : + bit IsKZCommutable = IsCommutable> : AVX512_maskable_custom<O, F, Outs, Ins, !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), !con((ins _.KRCWM:$mask), Ins), OpcodeStr, AttSrcAsm, IntelSrcAsm, [(set _.RC:$dst, RHS)], [(set _.RC:$dst, - (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))], + (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))], [(set _.RC:$dst, - (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))], - "$src0 = $dst", IsCommutable, IsKCommutable>; + (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))], + "$src0 = $dst", IsCommutable, IsKCommutable, + IsKZCommutable>; // This multiclass generates the unconditional/non-masking, the masking and // the zero-masking variant of the vector instruction. In the masking case, the -// perserved vector elements come from a new dummy input operand tied to $dst. +// preserved vector elements come from a new dummy input operand tied to $dst. multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, dag RHS, bit IsCommutable = 0, bit IsKCommutable = 0, bit IsKZCommutable = IsCommutable, - SDNode Select = vselect> : + SDPatternOperator Select = vselect_mask> : AVX512_maskable_common<O, F, _, Outs, Ins, !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), !con((ins _.KRCWM:$mask), Ins), @@ -280,7 +293,7 @@ multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _, string AttSrcAsm, string IntelSrcAsm, dag RHS> : AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm, - RHS, 0, 0, 0, X86selects>; + RHS, 0, 0, 0, X86selects_mask>; // Similar to AVX512_maskable but in this case one of the source operands // ($src1) is already tied to $dst so we just use that for the preserved @@ -292,7 +305,7 @@ multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _, dag RHS, bit IsCommutable = 0, bit IsKCommutable = 0, - SDNode Select = vselect, + SDPatternOperator Select = vselect_mask, bit MaskOnly = 0> : AVX512_maskable_common<O, F, _, Outs, !con((ins _.RC:$src1), NonTiedIns), @@ -317,9 +330,9 @@ multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT, !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag), - (vselect InVT.KRCWM:$mask, RHS, + (vselect_mask InVT.KRCWM:$mask, RHS, (bitconvert InVT.RC:$src1)), - vselect, "", IsCommutable>; + vselect_mask, "", IsCommutable>; multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _, dag Outs, dag NonTiedIns, string OpcodeStr, @@ -330,7 +343,7 @@ multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _, bit MaskOnly = 0> : AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, IsCommutable, IsKCommutable, - X86selects, MaskOnly>; + X86selects_mask, MaskOnly>; multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, @@ -399,6 +412,36 @@ multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _, OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, (and _.KRCWM:$mask, RHS_su), IsCommutable>; +// Used by conversion instructions. +multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _, + dag Outs, + dag Ins, dag MaskingIns, dag ZeroMaskingIns, + string OpcodeStr, + string AttSrcAsm, string IntelSrcAsm, + dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> : + AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr, + AttSrcAsm, IntelSrcAsm, + [(set _.RC:$dst, RHS)], + [(set _.RC:$dst, MaskingRHS)], + [(set _.RC:$dst, ZeroMaskingRHS)], + "$src0 = $dst">; + +multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _, + dag Outs, dag NonTiedIns, string OpcodeStr, + string AttSrcAsm, string IntelSrcAsm, + dag RHS, dag MaskingRHS, bit IsCommutable, + bit IsKCommutable> : + AVX512_maskable_custom<O, F, Outs, + !con((ins _.RC:$src1), NonTiedIns), + !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), + !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), + OpcodeStr, AttSrcAsm, IntelSrcAsm, + [(set _.RC:$dst, RHS)], + [(set _.RC:$dst, + (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))], + [(set _.RC:$dst, + (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))], + "", IsCommutable, IsKCommutable>; // Alias instruction that maps zero vector to pxor / xorp* for AVX-512. // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then @@ -625,45 +668,45 @@ multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From, list<Predicate> p> { let Predicates = p in { def : Pat<(Cast.VT - (vselect Cast.KRCWM:$mask, - (bitconvert - (vinsert_insert:$ins (To.VT To.RC:$src1), - (From.VT From.RC:$src2), - (iPTR imm))), - Cast.RC:$src0)), + (vselect_mask Cast.KRCWM:$mask, + (bitconvert + (vinsert_insert:$ins (To.VT To.RC:$src1), + (From.VT From.RC:$src2), + (iPTR imm))), + Cast.RC:$src0)), (!cast<Instruction>(InstrStr#"rrk") Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, (INSERT_get_vinsert_imm To.RC:$ins))>; def : Pat<(Cast.VT - (vselect Cast.KRCWM:$mask, - (bitconvert - (vinsert_insert:$ins (To.VT To.RC:$src1), - (From.VT - (bitconvert - (From.LdFrag addr:$src2))), - (iPTR imm))), - Cast.RC:$src0)), + (vselect_mask Cast.KRCWM:$mask, + (bitconvert + (vinsert_insert:$ins (To.VT To.RC:$src1), + (From.VT + (bitconvert + (From.LdFrag addr:$src2))), + (iPTR imm))), + Cast.RC:$src0)), (!cast<Instruction>(InstrStr#"rmk") Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, (INSERT_get_vinsert_imm To.RC:$ins))>; def : Pat<(Cast.VT - (vselect Cast.KRCWM:$mask, - (bitconvert - (vinsert_insert:$ins (To.VT To.RC:$src1), - (From.VT From.RC:$src2), - (iPTR imm))), - Cast.ImmAllZerosV)), + (vselect_mask Cast.KRCWM:$mask, + (bitconvert + (vinsert_insert:$ins (To.VT To.RC:$src1), + (From.VT From.RC:$src2), + (iPTR imm))), + Cast.ImmAllZerosV)), (!cast<Instruction>(InstrStr#"rrkz") Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, (INSERT_get_vinsert_imm To.RC:$ins))>; def : Pat<(Cast.VT - (vselect Cast.KRCWM:$mask, - (bitconvert - (vinsert_insert:$ins (To.VT To.RC:$src1), - (From.VT (From.LdFrag addr:$src2)), - (iPTR imm))), - Cast.ImmAllZerosV)), + (vselect_mask Cast.KRCWM:$mask, + (bitconvert + (vinsert_insert:$ins (To.VT To.RC:$src1), + (From.VT (From.LdFrag addr:$src2)), + (iPTR imm))), + Cast.ImmAllZerosV)), (!cast<Instruction>(InstrStr#"rmkz") Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, (INSERT_get_vinsert_imm To.RC:$ins))>; @@ -981,20 +1024,20 @@ multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From, SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> { let Predicates = p in { - def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask, - (bitconvert - (To.VT (vextract_extract:$ext - (From.VT From.RC:$src), (iPTR imm)))), - To.RC:$src0)), + def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask, + (bitconvert + (To.VT (vextract_extract:$ext + (From.VT From.RC:$src), (iPTR imm)))), + To.RC:$src0)), (Cast.VT (!cast<Instruction>(InstrStr#"rrk") Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src, (EXTRACT_get_vextract_imm To.RC:$ext)))>; - def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask, - (bitconvert - (To.VT (vextract_extract:$ext - (From.VT From.RC:$src), (iPTR imm)))), - Cast.ImmAllZerosV)), + def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask, + (bitconvert + (To.VT (vextract_extract:$ext + (From.VT From.RC:$src), (iPTR imm)))), + Cast.ImmAllZerosV)), (Cast.VT (!cast<Instruction>(InstrStr#"rrkz") Cast.KRCWM:$mask, From.RC:$src, (EXTRACT_get_vextract_imm To.RC:$ext)))>; @@ -1101,18 +1144,18 @@ multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr, string Name, X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> { def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)), - (!cast<Instruction>(Name#DestInfo.ZSuffix#r) + (!cast<Instruction>(Name#DestInfo.ZSuffix#rr) (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; - def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask, - (X86VBroadcast SrcInfo.FRC:$src), - DestInfo.RC:$src0)), - (!cast<Instruction>(Name#DestInfo.ZSuffix#rk) + def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask, + (X86VBroadcast SrcInfo.FRC:$src), + DestInfo.RC:$src0)), + (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk) DestInfo.RC:$src0, DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; - def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask, - (X86VBroadcast SrcInfo.FRC:$src), - DestInfo.ImmAllZerosV)), - (!cast<Instruction>(Name#DestInfo.ZSuffix#rkz) + def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask, + (X86VBroadcast SrcInfo.FRC:$src), + DestInfo.ImmAllZerosV)), + (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz) DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; } @@ -1128,83 +1171,83 @@ multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr, SDPatternOperator UnmaskedOp = X86VBroadcast, SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> { let hasSideEffects = 0 in - def r : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set MaskInfo.RC:$dst, - (MaskInfo.VT - (bitconvert - (DestInfo.VT - (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))], - DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>; - def rkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), - (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src), - !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", - "${dst} {${mask}} {z}, $src}"), - [(set MaskInfo.RC:$dst, - (vselect MaskInfo.KRCWM:$mask, - (MaskInfo.VT - (bitconvert - (DestInfo.VT - (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), - MaskInfo.ImmAllZerosV))], - DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>; - let Constraints = "$src0 = $dst" in - def rk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), - (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, - SrcInfo.RC:$src), - !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", - "${dst} {${mask}}, $src}"), + def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set MaskInfo.RC:$dst, - (vselect MaskInfo.KRCWM:$mask, - (MaskInfo.VT - (bitconvert - (DestInfo.VT - (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), - MaskInfo.RC:$src0))], - DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>; + (MaskInfo.VT + (bitconvert + (DestInfo.VT + (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))], + DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>; + def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), + (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src), + !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", + "${dst} {${mask}} {z}, $src}"), + [(set MaskInfo.RC:$dst, + (vselect_mask MaskInfo.KRCWM:$mask, + (MaskInfo.VT + (bitconvert + (DestInfo.VT + (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), + MaskInfo.ImmAllZerosV))], + DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>; + let Constraints = "$src0 = $dst" in + def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), + (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, + SrcInfo.RC:$src), + !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", + "${dst} {${mask}}, $src}"), + [(set MaskInfo.RC:$dst, + (vselect_mask MaskInfo.KRCWM:$mask, + (MaskInfo.VT + (bitconvert + (DestInfo.VT + (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), + MaskInfo.RC:$src0))], + DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>; let hasSideEffects = 0, mayLoad = 1 in - def m : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), - (ins SrcInfo.ScalarMemOp:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set MaskInfo.RC:$dst, - (MaskInfo.VT - (bitconvert - (DestInfo.VT - (UnmaskedBcastOp addr:$src)))))], - DestInfo.ExeDomain>, T8PD, EVEX, - EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; - - def mkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), - (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src), - !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", - "${dst} {${mask}} {z}, $src}"), - [(set MaskInfo.RC:$dst, - (vselect MaskInfo.KRCWM:$mask, - (MaskInfo.VT - (bitconvert - (DestInfo.VT - (SrcInfo.BroadcastLdFrag addr:$src)))), - MaskInfo.ImmAllZerosV))], - DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, - EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; + def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), + (ins SrcInfo.ScalarMemOp:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set MaskInfo.RC:$dst, + (MaskInfo.VT + (bitconvert + (DestInfo.VT + (UnmaskedBcastOp addr:$src)))))], + DestInfo.ExeDomain>, T8PD, EVEX, + EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; + + def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), + (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src), + !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", + "${dst} {${mask}} {z}, $src}"), + [(set MaskInfo.RC:$dst, + (vselect_mask MaskInfo.KRCWM:$mask, + (MaskInfo.VT + (bitconvert + (DestInfo.VT + (SrcInfo.BroadcastLdFrag addr:$src)))), + MaskInfo.ImmAllZerosV))], + DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, + EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = IsConvertibleToThreeAddress in - def mk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), - (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, - SrcInfo.ScalarMemOp:$src), - !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", - "${dst} {${mask}}, $src}"), - [(set MaskInfo.RC:$dst, - (vselect MaskInfo.KRCWM:$mask, - (MaskInfo.VT - (bitconvert - (DestInfo.VT - (SrcInfo.BroadcastLdFrag addr:$src)))), - MaskInfo.RC:$src0))], - DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, - EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; + def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), + (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, + SrcInfo.ScalarMemOp:$src), + !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", + "${dst} {${mask}}, $src}"), + [(set MaskInfo.RC:$dst, + (vselect_mask MaskInfo.KRCWM:$mask, + (MaskInfo.VT + (bitconvert + (DestInfo.VT + (SrcInfo.BroadcastLdFrag addr:$src)))), + MaskInfo.RC:$src0))], + DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, + EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; } // Helper class to force mask and broadcast result to same type. @@ -1267,35 +1310,38 @@ defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd", multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR, X86VectorVTInfo _, SDPatternOperator OpNode, RegisterClass SrcRC> { + // Fold with a mask even if it has multiple uses since it is cheap. let ExeDomain = _.ExeDomain in - defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), - (ins SrcRC:$src), - "vpbroadcast"##_.Suffix, "$src", "$src", - (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX, - Sched<[SchedRR]>; + defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), + (ins SrcRC:$src), + "vpbroadcast"#_.Suffix, "$src", "$src", + (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0, + /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>, + T8PD, EVEX, Sched<[SchedRR]>; } multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR, X86VectorVTInfo _, SDPatternOperator OpNode, RegisterClass SrcRC, SubRegIndex Subreg> { let hasSideEffects = 0, ExeDomain = _.ExeDomain in - defm r : AVX512_maskable_custom<opc, MRMSrcReg, - (outs _.RC:$dst), (ins GR32:$src), - !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)), - !con((ins _.KRCWM:$mask), (ins GR32:$src)), - "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [], - "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>; + defm rr : AVX512_maskable_custom<opc, MRMSrcReg, + (outs _.RC:$dst), (ins GR32:$src), + !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)), + !con((ins _.KRCWM:$mask), (ins GR32:$src)), + "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [], + "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>; def : Pat <(_.VT (OpNode SrcRC:$src)), - (!cast<Instruction>(Name#r) + (!cast<Instruction>(Name#rr) (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; + // Fold with a mask even if it has multiple uses since it is cheap. def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0), - (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask, + (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask, (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV), - (!cast<Instruction>(Name#rkz) _.KRCWM:$mask, + (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask, (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; } @@ -1392,72 +1438,6 @@ multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr, AVX5128IBase, EVEX; } -let Predicates = [HasAVX512] in { - // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD. - def : Pat<(v8i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), - (VPBROADCASTQZm addr:$src)>; - - // FIXME this is to handle aligned extloads from i8. - def : Pat<(v16i32 (X86VBroadcast (loadi32 addr:$src))), - (VPBROADCASTDZm addr:$src)>; -} - -let Predicates = [HasVLX] in { - // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD. - def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), - (VPBROADCASTQZ128m addr:$src)>; - def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), - (VPBROADCASTQZ256m addr:$src)>; - - // FIXME this is to handle aligned extloads from i8. - def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), - (VPBROADCASTDZ128m addr:$src)>; - def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), - (VPBROADCASTDZ256m addr:$src)>; -} -let Predicates = [HasVLX, HasBWI] in { - // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. - // This means we'll encounter truncated i32 loads; match that here. - def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))), - (VPBROADCASTWZ128m addr:$src)>; - def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))), - (VPBROADCASTWZ256m addr:$src)>; - def : Pat<(v8i16 (X86VBroadcast - (i16 (trunc (i32 (extloadi16 addr:$src)))))), - (VPBROADCASTWZ128m addr:$src)>; - def : Pat<(v8i16 (X86VBroadcast - (i16 (trunc (i32 (zextloadi16 addr:$src)))))), - (VPBROADCASTWZ128m addr:$src)>; - def : Pat<(v16i16 (X86VBroadcast - (i16 (trunc (i32 (extloadi16 addr:$src)))))), - (VPBROADCASTWZ256m addr:$src)>; - def : Pat<(v16i16 (X86VBroadcast - (i16 (trunc (i32 (zextloadi16 addr:$src)))))), - (VPBROADCASTWZ256m addr:$src)>; - - // FIXME this is to handle aligned extloads from i8. - def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))), - (VPBROADCASTWZ128m addr:$src)>; - def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))), - (VPBROADCASTWZ256m addr:$src)>; -} -let Predicates = [HasBWI] in { - // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. - // This means we'll encounter truncated i32 loads; match that here. - def : Pat<(v32i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))), - (VPBROADCASTWZm addr:$src)>; - def : Pat<(v32i16 (X86VBroadcast - (i16 (trunc (i32 (extloadi16 addr:$src)))))), - (VPBROADCASTWZm addr:$src)>; - def : Pat<(v32i16 (X86VBroadcast - (i16 (trunc (i32 (zextloadi16 addr:$src)))))), - (VPBROADCASTWZm addr:$src)>; - - // FIXME this is to handle aligned extloads from i8. - def : Pat<(v32i16 (X86VBroadcast (loadi16 addr:$src))), - (VPBROADCASTWZm addr:$src)>; -} - //===----------------------------------------------------------------------===// // AVX-512 BROADCAST SUBVECTORS // @@ -1516,38 +1496,38 @@ def : Pat<(v64i8 (X86SubVBroadcast (loadv16i8 addr:$src))), (VBROADCASTI32X4rm addr:$src)>; // Patterns for selects of bitcasted operations. -def : Pat<(vselect VK16WM:$mask, - (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), - (v16f32 immAllZerosV)), +def : Pat<(vselect_mask VK16WM:$mask, + (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), + (v16f32 immAllZerosV)), (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>; -def : Pat<(vselect VK16WM:$mask, - (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), - VR512:$src0), +def : Pat<(vselect_mask VK16WM:$mask, + (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), + VR512:$src0), (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; -def : Pat<(vselect VK16WM:$mask, - (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), - (v16i32 immAllZerosV)), +def : Pat<(vselect_mask VK16WM:$mask, + (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), + (v16i32 immAllZerosV)), (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>; -def : Pat<(vselect VK16WM:$mask, - (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), - VR512:$src0), +def : Pat<(vselect_mask VK16WM:$mask, + (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), + VR512:$src0), (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; -def : Pat<(vselect VK8WM:$mask, - (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))), - (v8f64 immAllZerosV)), +def : Pat<(vselect_mask VK8WM:$mask, + (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))), + (v8f64 immAllZerosV)), (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>; -def : Pat<(vselect VK8WM:$mask, - (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))), - VR512:$src0), +def : Pat<(vselect_mask VK8WM:$mask, + (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))), + VR512:$src0), (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; -def : Pat<(vselect VK8WM:$mask, - (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))), - (v8i64 immAllZerosV)), +def : Pat<(vselect_mask VK8WM:$mask, + (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))), + (v8i64 immAllZerosV)), (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>; -def : Pat<(vselect VK8WM:$mask, - (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))), - VR512:$src0), +def : Pat<(vselect_mask VK8WM:$mask, + (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))), + VR512:$src0), (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; } @@ -1569,21 +1549,21 @@ def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))), (VBROADCASTI32X4Z256rm addr:$src)>; // Patterns for selects of bitcasted operations. -def : Pat<(vselect VK8WM:$mask, - (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), - (v8f32 immAllZerosV)), +def : Pat<(vselect_mask VK8WM:$mask, + (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), + (v8f32 immAllZerosV)), (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>; -def : Pat<(vselect VK8WM:$mask, - (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), - VR256X:$src0), +def : Pat<(vselect_mask VK8WM:$mask, + (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), + VR256X:$src0), (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; -def : Pat<(vselect VK8WM:$mask, - (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), - (v8i32 immAllZerosV)), +def : Pat<(vselect_mask VK8WM:$mask, + (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), + (v8i32 immAllZerosV)), (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>; -def : Pat<(vselect VK8WM:$mask, - (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), - VR256X:$src0), +def : Pat<(vselect_mask VK8WM:$mask, + (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))), + VR256X:$src0), (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; @@ -1618,21 +1598,21 @@ defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2" EVEX_V256, EVEX_CD8<64, CD8VT2>; // Patterns for selects of bitcasted operations. -def : Pat<(vselect VK4WM:$mask, - (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), - (v4f64 immAllZerosV)), +def : Pat<(vselect_mask VK4WM:$mask, + (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), + (v4f64 immAllZerosV)), (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>; -def : Pat<(vselect VK4WM:$mask, - (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), - VR256X:$src0), +def : Pat<(vselect_mask VK4WM:$mask, + (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), + VR256X:$src0), (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; -def : Pat<(vselect VK4WM:$mask, - (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))), - (v4i64 immAllZerosV)), +def : Pat<(vselect_mask VK4WM:$mask, + (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))), + (v4i64 immAllZerosV)), (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>; -def : Pat<(vselect VK4WM:$mask, - (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))), - VR256X:$src0), +def : Pat<(vselect_mask VK4WM:$mask, + (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))), + VR256X:$src0), (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; } @@ -1651,38 +1631,38 @@ defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8", EVEX_V512, EVEX_CD8<32, CD8VT8>; // Patterns for selects of bitcasted operations. -def : Pat<(vselect VK16WM:$mask, - (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))), - (v16f32 immAllZerosV)), +def : Pat<(vselect_mask VK16WM:$mask, + (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))), + (v16f32 immAllZerosV)), (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>; -def : Pat<(vselect VK16WM:$mask, - (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))), - VR512:$src0), +def : Pat<(vselect_mask VK16WM:$mask, + (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))), + VR512:$src0), (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; -def : Pat<(vselect VK16WM:$mask, - (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))), - (v16i32 immAllZerosV)), +def : Pat<(vselect_mask VK16WM:$mask, + (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))), + (v16i32 immAllZerosV)), (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>; -def : Pat<(vselect VK16WM:$mask, - (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))), - VR512:$src0), +def : Pat<(vselect_mask VK16WM:$mask, + (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))), + VR512:$src0), (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; -def : Pat<(vselect VK8WM:$mask, - (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), - (v8f64 immAllZerosV)), +def : Pat<(vselect_mask VK8WM:$mask, + (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), + (v8f64 immAllZerosV)), (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>; -def : Pat<(vselect VK8WM:$mask, - (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), - VR512:$src0), +def : Pat<(vselect_mask VK8WM:$mask, + (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), + VR512:$src0), (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; -def : Pat<(vselect VK8WM:$mask, - (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))), - (v8i64 immAllZerosV)), +def : Pat<(vselect_mask VK8WM:$mask, + (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))), + (v8i64 immAllZerosV)), (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>; -def : Pat<(vselect VK8WM:$mask, - (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))), - VR512:$src0), +def : Pat<(vselect_mask VK8WM:$mask, + (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))), + VR512:$src0), (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; } @@ -1836,24 +1816,27 @@ defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256, multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _, X86VectorVTInfo IdxVT, X86VectorVTInfo CastVT> { - def : Pat<(_.VT (vselect _.KRCWM:$mask, - (X86VPermt2 (_.VT _.RC:$src2), - (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), _.RC:$src3), - (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), + def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, + (X86VPermt2 (_.VT _.RC:$src2), + (IdxVT.VT (bitconvert + (CastVT.VT _.RC:$src1))), + _.RC:$src3), + (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, _.RC:$src3)>; - def : Pat<(_.VT (vselect _.KRCWM:$mask, - (X86VPermt2 _.RC:$src2, - (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), - (_.LdFrag addr:$src3)), - (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), + def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, + (X86VPermt2 _.RC:$src2, + (IdxVT.VT (bitconvert + (CastVT.VT _.RC:$src1))), + (_.LdFrag addr:$src3)), + (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3)>; - def : Pat<(_.VT (vselect _.KRCWM:$mask, - (X86VPermt2 _.RC:$src2, - (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), - (_.BroadcastLdFrag addr:$src3)), - (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), + def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, + (X86VPermt2 _.RC:$src2, + (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), + (_.BroadcastLdFrag addr:$src3)), + (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3)>; } @@ -2085,9 +2068,9 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE, (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc), "vcmp"#_.Suffix, "$cc, $src2, $src1", "$src1, $src2, $cc", - (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2, + (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), timm:$cc), - (OpNode_su (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2, + (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; @@ -2646,13 +2629,13 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in { def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src1, i32u8imm:$src2), - OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", + OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1), (i32 timm:$src2)))]>, Sched<[sched]>; def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), - OpcodeStr##_.Suffix# + OpcodeStr#_.Suffix# "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclasss_su (_.VT _.RC:$src1), @@ -2660,18 +2643,18 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, EVEX_K, Sched<[sched]>; def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.IntScalarMemOp:$src1, i32u8imm:$src2), - OpcodeStr##_.Suffix## + OpcodeStr#_.Suffix# "\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.KRC:$dst, - (X86Vfpclasss _.ScalarIntMemCPat:$src1, - (i32 timm:$src2)))]>, + (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1), + (i32 timm:$src2)))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2), - OpcodeStr##_.Suffix## + OpcodeStr#_.Suffix# "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", [(set _.KRC:$dst,(and _.KRCWM:$mask, - (X86Vfpclasss_su _.ScalarIntMemCPat:$src1, + (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1), (i32 timm:$src2))))]>, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -2686,13 +2669,13 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, let ExeDomain = _.ExeDomain, Uses = [MXCSR] in { def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src1, i32u8imm:$src2), - OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", + OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1), (i32 timm:$src2)))]>, Sched<[sched]>; def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), - OpcodeStr##_.Suffix# + OpcodeStr#_.Suffix# "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su (_.VT _.RC:$src1), @@ -2700,7 +2683,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, EVEX_K, Sched<[sched]>; def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.MemOp:$src1, i32u8imm:$src2), - OpcodeStr##_.Suffix#"{"#mem#"}"# + OpcodeStr#_.Suffix#"{"#mem#"}"# "\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.KRC:$dst,(X86Vfpclass (_.VT (_.LdFrag addr:$src1)), @@ -2708,7 +2691,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2), - OpcodeStr##_.Suffix#"{"#mem#"}"# + OpcodeStr#_.Suffix#"{"#mem#"}"# "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su (_.VT (_.LdFrag addr:$src1)), @@ -2716,18 +2699,18 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.ScalarMemOp:$src1, i32u8imm:$src2), - OpcodeStr##_.Suffix##"\t{$src2, ${src1}"## - _.BroadcastStr##", $dst|$dst, ${src1}" - ##_.BroadcastStr##", $src2}", + OpcodeStr#_.Suffix#"\t{$src2, ${src1}"# + _.BroadcastStr#", $dst|$dst, ${src1}" + #_.BroadcastStr#", $src2}", [(set _.KRC:$dst,(X86Vfpclass (_.VT (_.BroadcastLdFrag addr:$src1)), (i32 timm:$src2)))]>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), - OpcodeStr##_.Suffix##"\t{$src2, ${src1}"## - _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"## - _.BroadcastStr##", $src2}", + OpcodeStr#_.Suffix#"\t{$src2, ${src1}"# + _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"# + _.BroadcastStr#", $src2}", [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su (_.VT (_.BroadcastLdFrag addr:$src1)), (i32 timm:$src2))))]>, @@ -2979,6 +2962,8 @@ def : Pat<(vnot VK4:$src), (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>; def : Pat<(vnot VK2:$src), (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>; +def : Pat<(vnot VK1:$src), + (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>; // Mask binary operation // - KAND, KANDN, KOR, KXNOR, KXOR @@ -3008,8 +2993,6 @@ multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr, sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS; } -def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>; -def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>; // These nodes use 'vnot' instead of 'not' to support vectors. def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>; def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>; @@ -3022,7 +3005,7 @@ defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XM defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>; defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>; -multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode, +multiclass avx512_binop_pat<SDPatternOperator VOpNode, Instruction Inst> { // With AVX512F, 8-bit mask is promoted to 16-bit mask, // for the DQI set, this type is legal and KxxxB instruction is used @@ -3033,25 +3016,25 @@ multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode, (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>; // All types smaller than 8 bits require conversion anyway - def : Pat<(OpNode VK1:$src1, VK1:$src2), + def : Pat<(VOpNode VK1:$src1, VK1:$src2), (COPY_TO_REGCLASS (Inst (COPY_TO_REGCLASS VK1:$src1, VK16), (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>; def : Pat<(VOpNode VK2:$src1, VK2:$src2), (COPY_TO_REGCLASS (Inst (COPY_TO_REGCLASS VK2:$src1, VK16), - (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>; + (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>; def : Pat<(VOpNode VK4:$src1, VK4:$src2), (COPY_TO_REGCLASS (Inst (COPY_TO_REGCLASS VK4:$src1, VK16), - (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>; + (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>; } -defm : avx512_binop_pat<and, and, KANDWrr>; -defm : avx512_binop_pat<vandn, andn, KANDNWrr>; -defm : avx512_binop_pat<or, or, KORWrr>; -defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>; -defm : avx512_binop_pat<xor, xor, KXORWrr>; +defm : avx512_binop_pat<and, KANDWrr>; +defm : avx512_binop_pat<vandn, KANDNWrr>; +defm : avx512_binop_pat<or, KORWrr>; +defm : avx512_binop_pat<vxnor, KXNORWrr>; +defm : avx512_binop_pat<xor, KXORWrr>; // Mask unpacking multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst, @@ -3065,7 +3048,7 @@ multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst, VEX_4V, VEX_L, Sched<[sched]>; def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)), - (!cast<Instruction>(NAME##rr) Src.KRC:$src2, Src.KRC:$src1)>; + (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>; } } @@ -3201,8 +3184,8 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr, X86VectorVTInfo Narrow, X86VectorVTInfo Wide> { -def : Pat<(Narrow.KVT (X86any_cmpm (Narrow.VT Narrow.RC:$src1), - (Narrow.VT Narrow.RC:$src2), timm:$cc)), +def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), + (Narrow.VT Narrow.RC:$src2), timm:$cc)), (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrri") (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), @@ -3219,8 +3202,8 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, timm:$cc), Narrow.KRC)>; // Broadcast load. -def : Pat<(Narrow.KVT (X86any_cmpm (Narrow.VT Narrow.RC:$src1), - (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)), +def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), + (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)), (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbi") (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), @@ -3235,8 +3218,8 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, addr:$src2, timm:$cc), Narrow.KRC)>; // Commuted with broadcast load. -def : Pat<(Narrow.KVT (X86any_cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), - (Narrow.VT Narrow.RC:$src1), timm:$cc)), +def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), + (Narrow.VT Narrow.RC:$src1), timm:$cc)), (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbi") (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), @@ -3301,7 +3284,7 @@ multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> { let Predicates = [HasAVX512] in let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1, SchedRW = [WriteZero] in - def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "", + def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "", [(set KRC:$dst, (VT Val))]>; } @@ -3409,7 +3392,7 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name, !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", "${dst} {${mask}}, $src1}"), [(set _.RC:$dst, (_.VT - (vselect _.KRCWM:$mask, + (vselect_mask _.KRCWM:$mask, (_.VT (ld_frag addr:$src1)), (_.VT _.RC:$src0))))], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.RM]>; @@ -3418,18 +3401,18 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name, (ins _.KRCWM:$mask, _.MemOp:$src), OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"# "${dst} {${mask}} {z}, $src}", - [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask, + [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask, (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>; } def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)), - (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>; + (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>; def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)), - (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>; + (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>; def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))), - (!cast<Instruction>(Name#_.ZSuffix##rmk) _.RC:$src0, + (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0, _.KRCWM:$mask, addr:$ptr)>; } @@ -4286,6 +4269,17 @@ def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))) def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)), (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>; + +def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))), + (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; +def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))), + (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; + +def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))), + (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; +def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))), + (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; + let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2), @@ -4439,8 +4433,6 @@ let Predicates = [HasAVX512] in { (VMOV64toPQIZrr GR64:$src)>; // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. - def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))), - (VMOVDI2PDIZrm addr:$src)>; def : Pat<(v4i32 (X86vzload32 addr:$src)), (VMOVDI2PDIZrm addr:$src)>; def : Pat<(v8i32 (X86vzload32 addr:$src)), @@ -4624,8 +4616,8 @@ multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> { defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, - "${src2}"##_.BroadcastStr##", $src1", - "$src1, ${src2}"##_.BroadcastStr, + "${src2}"#_.BroadcastStr#", $src1", + "$src1, ${src2}"#_.BroadcastStr, (_.VT (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src2)))>, AVX512BIBase, EVEX_4V, EVEX_B, @@ -4750,8 +4742,8 @@ multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2), OpcodeStr, - "${src2}"##_Brdct.BroadcastStr##", $src1", - "$src1, ${src2}"##_Brdct.BroadcastStr, + "${src2}"#_Brdct.BroadcastStr#", $src1", + "$src1, ${src2}"#_Brdct.BroadcastStr, (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>, AVX512BIBase, EVEX_4V, EVEX_B, @@ -4822,8 +4814,8 @@ multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2), OpcodeStr, - "${src2}"##_Src.BroadcastStr##", $src1", - "$src1, ${src2}"##_Src.BroadcastStr, + "${src2}"#_Src.BroadcastStr#", $src1", + "$src1, ${src2}"#_Src.BroadcastStr, (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>, EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>, @@ -5159,26 +5151,26 @@ multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode, X86VectorVTInfo _, X86VectorVTInfo IntInfo> { // Masked register-register logical operations. - def : Pat<(_.VT (vselect _.KRCWM:$mask, + def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))), _.RC:$src0)), (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2)>; - def : Pat<(_.VT (vselect _.KRCWM:$mask, + def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))), _.ImmAllZerosV)), (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1, _.RC:$src2)>; // Masked register-memory logical operations. - def : Pat<(_.VT (vselect _.KRCWM:$mask, + def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, (bitconvert (IntInfo.VT (OpNode _.RC:$src1, (load addr:$src2)))), _.RC:$src0)), (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, addr:$src2)>; - def : Pat<(_.VT (vselect _.KRCWM:$mask, + def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, (bitconvert (IntInfo.VT (OpNode _.RC:$src1, (load addr:$src2)))), _.ImmAllZerosV)), @@ -5190,14 +5182,14 @@ multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode, X86VectorVTInfo _, X86VectorVTInfo IntInfo> { // Register-broadcast logical operations. - def : Pat<(_.VT (vselect _.KRCWM:$mask, + def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, (bitconvert (IntInfo.VT (OpNode _.RC:$src1, (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), _.RC:$src0)), (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, addr:$src2)>; - def : Pat<(_.VT (vselect _.KRCWM:$mask, + def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, (bitconvert (IntInfo.VT (OpNode _.RC:$src1, (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), @@ -5304,7 +5296,7 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (VecNode _.RC:$src1, - _.ScalarIntMemCPat:$src2))>, + (_.ScalarIntMemFrags addr:$src2)))>, Sched<[sched.Folded, sched.ReadAfterFold]>; let isCodeGenOnly = 1, Predicates = [HasAVX512] in { def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), @@ -5350,7 +5342,7 @@ multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (VecNode _.RC:$src1, - _.ScalarIntMemCPat:$src2))>, + (_.ScalarIntMemFrags addr:$src2)))>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; let isCodeGenOnly = 1, Predicates = [HasAVX512], @@ -5463,28 +5455,32 @@ defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc, EVEX_CD8<64, CD8VT1>, SIMD_EXC; multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, + SDPatternOperator MaskOpNode, X86VectorVTInfo _, X86FoldableSchedWrite sched, bit IsCommutable, bit IsKCommutable = IsCommutable> { let ExeDomain = _.ExeDomain, hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in { - defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), - (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix, + defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2", - (_.VT (OpNode _.RC:$src1, _.RC:$src2)), IsCommutable, + (_.VT (OpNode _.RC:$src1, _.RC:$src2)), + (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), IsCommutable, IsKCommutable, IsKCommutable>, EVEX_4V, Sched<[sched]>; let mayLoad = 1 in { - defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), - (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix, + defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2", - (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>, + (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), + (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2))>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; - defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), - (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix, - "${src2}"##_.BroadcastStr##", $src1", - "$src1, ${src2}"##_.BroadcastStr, - (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>, + defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix, + "${src2}"#_.BroadcastStr#", $src1", + "$src1, ${src2}"#_.BroadcastStr, + (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))), + (MaskOpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -5496,7 +5492,7 @@ multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain, Uses = [MXCSR] in defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), - (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix, + (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#_.Suffix, "$rc, $src2, $src1", "$src1, $src2, $rc", (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc)))>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; @@ -5507,38 +5503,39 @@ multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain, Uses = [MXCSR] in defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), - (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix, + (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, "{sae}, $src2, $src1", "$src1, $src2, {sae}", (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>, EVEX_4V, EVEX_B, Sched<[sched]>; } multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, + SDPatternOperator MaskOpNode, Predicate prd, X86SchedWriteSizes sched, bit IsCommutable = 0, bit IsPD128Commutable = IsCommutable> { let Predicates = [prd] in { - defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info, + defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info, sched.PS.ZMM, IsCommutable>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; - defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info, + defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info, sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>; } // Define only if AVX512VL feature is present. let Predicates = [prd, HasVLX] in { - defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info, + defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info, sched.PS.XMM, IsCommutable>, EVEX_V128, PS, EVEX_CD8<32, CD8VF>; - defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info, + defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info, sched.PS.YMM, IsCommutable>, EVEX_V256, PS, EVEX_CD8<32, CD8VF>; - defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info, + defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info, sched.PD.XMM, IsPD128Commutable, IsCommutable>, EVEX_V128, PD, VEX_W, EVEX_CD8<64, CD8VF>; - defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info, + defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info, sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W, EVEX_CD8<64, CD8VF>; } @@ -5566,38 +5563,38 @@ multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; } -defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, HasAVX512, +defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512, SchedWriteFAddSizes, 1>, avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>; -defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, HasAVX512, +defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512, SchedWriteFMulSizes, 1>, avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>; -defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, HasAVX512, +defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512, SchedWriteFAddSizes>, avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>; -defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, HasAVX512, +defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512, SchedWriteFDivSizes>, avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>; -defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512, +defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512, SchedWriteFCmpSizes, 0>, avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>; -defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512, +defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512, SchedWriteFCmpSizes, 0>, avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>; let isCodeGenOnly = 1 in { - defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512, + defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512, SchedWriteFCmpSizes, 1>; - defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512, + defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512, SchedWriteFCmpSizes, 1>; } let Uses = []<Register>, mayRaiseFPException = 0 in { -defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI, +defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI, SchedWriteFLogicSizes, 1>; -defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI, +defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI, SchedWriteFLogicSizes, 0>; -defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI, +defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI, SchedWriteFLogicSizes, 1>; -defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI, +defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI, SchedWriteFLogicSizes, 1>; } @@ -5605,19 +5602,19 @@ multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), - (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix, + (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, EVEX_4V, Sched<[sched]>; defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), - (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix, + (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2", (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), - (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix, - "${src2}"##_.BroadcastStr##", $src1", - "$src1, ${src2}"##_.BroadcastStr, + (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix, + "${src2}"#_.BroadcastStr#", $src1", + "$src1, ${src2}"#_.BroadcastStr, (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -5627,14 +5624,14 @@ multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), - (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix, + (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, Sched<[sched]>; defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), - (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix, + (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2", - (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2)>, + (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -5648,11 +5645,11 @@ multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>, - avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, + avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info, X86scalefsRnd, sched.Scl>, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>, - avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, + avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info, X86scalefsRnd, sched.Scl>, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W; @@ -5679,7 +5676,7 @@ multiclass avx512_vptest<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _, string Name> { // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG. - // There are just too many permuations due to commutability and bitcasts. + // There are just too many permutations due to commutability and bitcasts. let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2), OpcodeStr, @@ -5701,8 +5698,8 @@ multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, - "${src2}"##_.BroadcastStr##", $src1", - "$src1, ${src2}"##_.BroadcastStr, + "${src2}"#_.BroadcastStr#", $src1", + "$src1, ${src2}"#_.BroadcastStr, (null_frag), (null_frag)>, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; @@ -5790,7 +5787,7 @@ multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM, let ExeDomain = _.ExeDomain in defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr, - "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2", + "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2", (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>, EVEX_B, Sched<[sched.Folded]>; } @@ -5973,8 +5970,8 @@ multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode, let ExeDomain = _.ExeDomain in defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, - "${src2}"##_.BroadcastStr##", $src1", - "$src1, ${src2}"##_.BroadcastStr, + "${src2}"#_.BroadcastStr#", $src1", + "$src1, ${src2}"#_.BroadcastStr, (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>, AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; @@ -6245,8 +6242,8 @@ multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, - "${src2}"##_.BroadcastStr##", $src1", - "$src1, ${src2}"##_.BroadcastStr, + "${src2}"#_.BroadcastStr#", $src1", + "$src1, ${src2}"#_.BroadcastStr, (_.VT (OpNode _.RC:$src1, (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>, @@ -6370,9 +6367,6 @@ defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd, let Predicates = [HasAVX512] in { // VMOVHPD patterns - def : Pat<(v2f64 (X86Unpckl VR128X:$src1, - (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))), - (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>; def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))), (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>; @@ -6419,29 +6413,33 @@ let Predicates = [HasAVX512] in { // multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, - X86FoldableSchedWrite sched, + SDNode MaskOpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _, string Suff> { let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in { - defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), + defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src2, _.RC:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", - (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>, + (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), + (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>, AVX512FMA3Base, Sched<[sched]>; - defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), + defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src2, _.MemOp:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", - (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>, + (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), + (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>, AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>; - defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), + defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src2, _.ScalarMemOp:$src3), OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr ), (OpNode _.RC:$src2, + _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), + (MaskOpNode _.RC:$src2, _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>, - AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; + AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -6450,74 +6448,88 @@ multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, string Suff> { let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, Uses = [MXCSR] in - defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), + defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", + (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; } multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode, - SDNode OpNodeRnd, X86SchedWriteWidths sched, + SDNode MaskOpNode, SDNode OpNodeRnd, + X86SchedWriteWidths sched, AVX512VLVectorVTInfo _, string Suff> { let Predicates = [HasAVX512] in { - defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.ZMM, - _.info512, Suff>, + defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, + sched.ZMM, _.info512, Suff>, avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, _.info512, Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; } let Predicates = [HasVLX, HasAVX512] in { - defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.YMM, - _.info256, Suff>, + defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, + sched.YMM, _.info256, Suff>, EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; - defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.XMM, - _.info128, Suff>, + defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, + sched.XMM, _.info128, Suff>, EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; } } multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode, - SDNode OpNodeRnd> { - defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd, - SchedWriteFMA, avx512vl_f32_info, "PS">; - defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd, - SchedWriteFMA, avx512vl_f64_info, "PD">, - VEX_W; -} - -defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86any_Fmadd, X86FmaddRnd>; -defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>; -defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>; -defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>; -defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>; -defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>; + SDNode MaskOpNode, SDNode OpNodeRnd> { + defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, + OpNodeRnd, SchedWriteFMA, + avx512vl_f32_info, "PS">; + defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, + OpNodeRnd, SchedWriteFMA, + avx512vl_f64_info, "PD">, VEX_W; +} + +defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86any_Fmadd, + X86Fmadd, X86FmaddRnd>; +defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub, + X86Fmsub, X86FmsubRnd>; +defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, + X86Fmaddsub, X86FmaddsubRnd>; +defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, + X86Fmsubadd, X86FmsubaddRnd>; +defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd, + X86Fnmadd, X86FnmaddRnd>; +defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub, + X86Fnmsub, X86FnmsubRnd>; multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, - X86FoldableSchedWrite sched, + SDNode MaskOpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _, string Suff> { let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in { - defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), + defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src2, _.RC:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", - (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1, - vselect, 1>, AVX512FMA3Base, Sched<[sched]>; + (null_frag), + (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>, + AVX512FMA3Base, Sched<[sched]>; - defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), + defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src2, _.MemOp:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", - (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>, + (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), + (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>, AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>; - defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), + defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src2, _.ScalarMemOp:$src3), - OpcodeStr, "${src3}"##_.BroadcastStr##", $src2", - "$src2, ${src3}"##_.BroadcastStr, + OpcodeStr, "${src3}"#_.BroadcastStr#", $src2", + "$src2, ${src3}"#_.BroadcastStr, (_.VT (OpNode _.RC:$src2, (_.VT (_.BroadcastLdFrag addr:$src3)), - _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B, + _.RC:$src1)), + (_.VT (MaskOpNode _.RC:$src2, + (_.VT (_.BroadcastLdFrag addr:$src3)), + _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -6527,77 +6539,89 @@ multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, string Suff> { let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, Uses = [MXCSR] in - defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), + defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", - (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))), - 1, 1, vselect, 1>, - AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; + (null_frag), + (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))), + 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; } multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode, - SDNode OpNodeRnd, X86SchedWriteWidths sched, + SDNode MaskOpNode, SDNode OpNodeRnd, + X86SchedWriteWidths sched, AVX512VLVectorVTInfo _, string Suff> { let Predicates = [HasAVX512] in { - defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.ZMM, - _.info512, Suff>, + defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, + sched.ZMM, _.info512, Suff>, avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, _.info512, Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; } let Predicates = [HasVLX, HasAVX512] in { - defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.YMM, - _.info256, Suff>, + defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, + sched.YMM, _.info256, Suff>, EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; - defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.XMM, - _.info128, Suff>, + defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, + sched.XMM, _.info128, Suff>, EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; } } multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode, - SDNode OpNodeRnd > { - defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd, - SchedWriteFMA, avx512vl_f32_info, "PS">; - defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd, - SchedWriteFMA, avx512vl_f64_info, "PD">, - VEX_W; -} - -defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86any_Fmadd, X86FmaddRnd>; -defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>; -defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>; -defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>; -defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>; -defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>; + SDNode MaskOpNode, SDNode OpNodeRnd > { + defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, + OpNodeRnd, SchedWriteFMA, + avx512vl_f32_info, "PS">; + defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, + OpNodeRnd, SchedWriteFMA, + avx512vl_f64_info, "PD">, VEX_W; +} + +defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86any_Fmadd, + X86Fmadd, X86FmaddRnd>; +defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub, + X86Fmsub, X86FmsubRnd>; +defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, + X86Fmaddsub, X86FmaddsubRnd>; +defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, + X86Fmsubadd, X86FmsubaddRnd>; +defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd, + X86Fnmadd, X86FnmaddRnd>; +defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub, + X86Fnmsub, X86FnmsubRnd>; multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, - X86FoldableSchedWrite sched, + SDNode MaskOpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _, string Suff> { let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in { - defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), + defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src2, _.RC:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", - (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1, vselect, 1>, + (null_frag), + (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>, AVX512FMA3Base, Sched<[sched]>; // Pattern is 312 order so that the load is in a different place from the // 213 and 231 patterns this helps tablegen's duplicate pattern detection. - defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), + defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src2, _.MemOp:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", - (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>, + (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), + (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>, AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>; // Pattern is 312 order so that the load is in a different place from the // 213 and 231 patterns this helps tablegen's duplicate pattern detection. - defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), + defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src2, _.ScalarMemOp:$src3), - OpcodeStr, "${src3}"##_.BroadcastStr##", $src2", - "$src2, ${src3}"##_.BroadcastStr, + OpcodeStr, "${src3}"#_.BroadcastStr#", $src2", + "$src2, ${src3}"#_.BroadcastStr, (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)), - _.RC:$src1, _.RC:$src2)), 1, 0>, + _.RC:$src1, _.RC:$src2)), + (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)), + _.RC:$src1, _.RC:$src2)), 1, 0>, AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -6607,49 +6631,57 @@ multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, string Suff> { let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, Uses = [MXCSR] in - defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), + defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", - (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))), - 1, 1, vselect, 1>, - AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; + (null_frag), + (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))), + 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>; } multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode, - SDNode OpNodeRnd, X86SchedWriteWidths sched, + SDNode MaskOpNode, SDNode OpNodeRnd, + X86SchedWriteWidths sched, AVX512VLVectorVTInfo _, string Suff> { let Predicates = [HasAVX512] in { - defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.ZMM, - _.info512, Suff>, + defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, + sched.ZMM, _.info512, Suff>, avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, _.info512, Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; } let Predicates = [HasVLX, HasAVX512] in { - defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.YMM, - _.info256, Suff>, + defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, + sched.YMM, _.info256, Suff>, EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; - defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.XMM, - _.info128, Suff>, + defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, + sched.XMM, _.info128, Suff>, EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; } } multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode, - SDNode OpNodeRnd > { - defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd, - SchedWriteFMA, avx512vl_f32_info, "PS">; - defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd, - SchedWriteFMA, avx512vl_f64_info, "PD">, - VEX_W; -} - -defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86any_Fmadd, X86FmaddRnd>; -defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>; -defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>; -defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>; -defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>; -defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>; + SDNode MaskOpNode, SDNode OpNodeRnd > { + defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, + OpNodeRnd, SchedWriteFMA, + avx512vl_f32_info, "PS">; + defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, + OpNodeRnd, SchedWriteFMA, + avx512vl_f64_info, "PD">, VEX_W; +} + +defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86any_Fmadd, + X86Fmadd, X86FmaddRnd>; +defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub, + X86Fmsub, X86FmsubRnd>; +defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, + X86Fmaddsub, X86FmaddsubRnd>; +defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, + X86Fmsubadd, X86FmsubaddRnd>; +defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd, + X86Fnmadd, X86FnmaddRnd>; +defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub, + X86Fnmsub, X86FnmsubRnd>; // Scalar FMA multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, @@ -6742,11 +6774,12 @@ multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132, } defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86any_Fmadd, X86FmaddRnd>; -defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>; -defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>; -defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>; +defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>; +defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>; +defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>; -multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix, +multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode MaskedOp, + SDNode RndOp, string Prefix, string Suffix, SDNode Move, X86VectorVTInfo _, PatLeaf ZeroFP> { let Predicates = [HasAVX512] in { @@ -6788,8 +6821,8 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix, addr:$src3)>; def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector - (X86selects VK1WM:$mask, - (Op _.FRC:$src2, + (X86selects_mask VK1WM:$mask, + (MaskedOp _.FRC:$src2, (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), _.FRC:$src3), (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), @@ -6799,8 +6832,8 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix, (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector - (X86selects VK1WM:$mask, - (Op _.FRC:$src2, + (X86selects_mask VK1WM:$mask, + (MaskedOp _.FRC:$src2, (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), (_.ScalarLdFrag addr:$src3)), (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), @@ -6809,18 +6842,18 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector - (X86selects VK1WM:$mask, - (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), - (_.ScalarLdFrag addr:$src3), _.FRC:$src2), + (X86selects_mask VK1WM:$mask, + (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), + (_.ScalarLdFrag addr:$src3), _.FRC:$src2), (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk") VR128X:$src1, VK1WM:$mask, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector - (X86selects VK1WM:$mask, - (Op _.FRC:$src2, _.FRC:$src3, - (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), + (X86selects_mask VK1WM:$mask, + (MaskedOp _.FRC:$src2, _.FRC:$src3, + (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk") VR128X:$src1, VK1WM:$mask, @@ -6828,19 +6861,19 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix, (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector - (X86selects VK1WM:$mask, - (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3), - (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), + (X86selects_mask VK1WM:$mask, + (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3), + (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk") VR128X:$src1, VK1WM:$mask, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector - (X86selects VK1WM:$mask, - (Op _.FRC:$src2, - (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), - _.FRC:$src3), + (X86selects_mask VK1WM:$mask, + (MaskedOp _.FRC:$src2, + (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), + _.FRC:$src3), (_.EltVT ZeroFP)))))), (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz") VR128X:$src1, VK1WM:$mask, @@ -6848,9 +6881,9 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix, (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector - (X86selects VK1WM:$mask, - (Op _.FRC:$src2, _.FRC:$src3, - (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), + (X86selects_mask VK1WM:$mask, + (MaskedOp _.FRC:$src2, _.FRC:$src3, + (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), (_.EltVT ZeroFP)))))), (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz") VR128X:$src1, VK1WM:$mask, @@ -6858,28 +6891,28 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix, (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector - (X86selects VK1WM:$mask, - (Op _.FRC:$src2, - (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), - (_.ScalarLdFrag addr:$src3)), + (X86selects_mask VK1WM:$mask, + (MaskedOp _.FRC:$src2, + (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), + (_.ScalarLdFrag addr:$src3)), (_.EltVT ZeroFP)))))), (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz") VR128X:$src1, VK1WM:$mask, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector - (X86selects VK1WM:$mask, - (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), - _.FRC:$src2, (_.ScalarLdFrag addr:$src3)), + (X86selects_mask VK1WM:$mask, + (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), + _.FRC:$src2, (_.ScalarLdFrag addr:$src3)), (_.EltVT ZeroFP)))))), (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz") VR128X:$src1, VK1WM:$mask, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector - (X86selects VK1WM:$mask, - (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3), - (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), + (X86selects_mask VK1WM:$mask, + (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3), + (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), (_.EltVT ZeroFP)))))), (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz") VR128X:$src1, VK1WM:$mask, @@ -6903,7 +6936,7 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix, (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector - (X86selects VK1WM:$mask, + (X86selects_mask VK1WM:$mask, (RndOp _.FRC:$src2, (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), _.FRC:$src3, (i32 timm:$rc)), @@ -6914,7 +6947,7 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix, (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector - (X86selects VK1WM:$mask, + (X86selects_mask VK1WM:$mask, (RndOp _.FRC:$src2, _.FRC:$src3, (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), (i32 timm:$rc)), @@ -6925,7 +6958,7 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix, (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector - (X86selects VK1WM:$mask, + (X86selects_mask VK1WM:$mask, (RndOp _.FRC:$src2, (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), _.FRC:$src3, (i32 timm:$rc)), @@ -6936,7 +6969,7 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix, (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector - (X86selects VK1WM:$mask, + (X86selects_mask VK1WM:$mask, (RndOp _.FRC:$src2, _.FRC:$src3, (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), (i32 timm:$rc)), @@ -6948,23 +6981,23 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix, } } -defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86FmaddRnd, "VFMADD", "SS", - X86Movss, v4f32x_info, fp32imm0>; -defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SS", - X86Movss, v4f32x_info, fp32imm0>; -defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SS", - X86Movss, v4f32x_info, fp32imm0>; -defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SS", - X86Movss, v4f32x_info, fp32imm0>; +defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86Fmadd, X86FmaddRnd, "VFMADD", + "SS", X86Movss, v4f32x_info, fp32imm0>; +defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", + "SS", X86Movss, v4f32x_info, fp32imm0>; +defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", + "SS", X86Movss, v4f32x_info, fp32imm0>; +defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", + "SS", X86Movss, v4f32x_info, fp32imm0>; -defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86FmaddRnd, "VFMADD", "SD", - X86Movsd, v2f64x_info, fp64imm0>; -defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SD", - X86Movsd, v2f64x_info, fp64imm0>; -defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SD", - X86Movsd, v2f64x_info, fp64imm0>; -defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SD", - X86Movsd, v2f64x_info, fp64imm0>; +defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86Fmadd, X86FmaddRnd, "VFMADD", + "SD", X86Movsd, v2f64x_info, fp64imm0>; +defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", + "SD", X86Movsd, v2f64x_info, fp64imm0>; +defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", + "SD", X86Movsd, v2f64x_info, fp64imm0>; +defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", + "SD", X86Movsd, v2f64x_info, fp64imm0>; //===----------------------------------------------------------------------===// // AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA @@ -7194,7 +7227,7 @@ multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT, def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src), !strconcat(asm,"\t{$src, $dst|$dst, $src}"), [(set DstVT.RC:$dst, (OpNode - (SrcVT.VT SrcVT.ScalarIntMemCPat:$src)))]>, + (SrcVT.ScalarIntMemFrags addr:$src)))]>, EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; } // Predicates = [HasAVX512] @@ -7233,6 +7266,45 @@ defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2u X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; +multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT, + X86VectorVTInfo DstVT, SDNode OpNode, + X86FoldableSchedWrite sched, + string aliasStr> { + let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in { + let isCodeGenOnly = 1 in { + def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src), + !strconcat(asm,"\t{$src, $dst|$dst, $src}"), + [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>, + EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; + def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src), + !strconcat(asm,"\t{$src, $dst|$dst, $src}"), + [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>, + EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; + } + } // Predicates = [HasAVX512] +} + +defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info, + lrint, WriteCvtSS2I, + "{l}">, XS, EVEX_CD8<32, CD8VT1>; +defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info, + llrint, WriteCvtSS2I, + "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>; +defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info, + lrint, WriteCvtSD2I, + "{l}">, XD, EVEX_CD8<64, CD8VT1>; +defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info, + llrint, WriteCvtSD2I, + "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>; + +let Predicates = [HasAVX512] in { + def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>; + def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>; + + def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>; + def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>; +} + // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang // which produce unnecessary vmovs{s,d} instructions let Predicates = [HasAVX512] in { @@ -7347,7 +7419,7 @@ let Predicates = [HasAVX512], ExeDomain = _SrcRC.ExeDomain in { (ins _SrcRC.IntScalarMemOp:$src), !strconcat(asm,"\t{$src, $dst|$dst, $src}"), [(set _DstRC.RC:$dst, - (OpNodeInt (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src)))]>, + (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>, EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; } //HasAVX512 @@ -7404,7 +7476,7 @@ multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _ (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (OpNode (_.VT _.RC:$src1), - (_Src.VT _Src.ScalarIntMemCPat:$src2)))>, + (_Src.ScalarIntMemFrags addr:$src2)))>, EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>; @@ -7421,7 +7493,7 @@ multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _ } } -// Scalar Coversion with SAE - suppress all exceptions +// Scalar Conversion with SAE - suppress all exceptions multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, X86VectorVTInfo _Src, SDNode OpNodeSAE, X86FoldableSchedWrite sched> { @@ -7506,55 +7578,63 @@ def : Pat<(v2f64 (X86Movsd //===----------------------------------------------------------------------===// multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, - X86VectorVTInfo _Src, SDNode OpNode, + X86VectorVTInfo _Src, SDNode OpNode, SDNode MaskOpNode, X86FoldableSchedWrite sched, string Broadcast = _.BroadcastStr, string Alias = "", X86MemOperand MemOp = _Src.MemOp, RegisterClass MaskRC = _.KRCWM, - dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src))))> { + dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))), + dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> { let Uses = [MXCSR], mayRaiseFPException = 1 in { - defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst), + defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _Src.RC:$src), (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src), (ins MaskRC:$mask, _Src.RC:$src), OpcodeStr, "$src", "$src", (_.VT (OpNode (_Src.VT _Src.RC:$src))), - (vselect MaskRC:$mask, - (_.VT (OpNode (_Src.VT _Src.RC:$src))), - _.RC:$src0), - vselect, "$src0 = $dst">, + (vselect_mask MaskRC:$mask, + (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))), + _.RC:$src0), + (vselect_mask MaskRC:$mask, + (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))), + _.ImmAllZerosV)>, EVEX, Sched<[sched]>; - defm rm : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst), + defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins MemOp:$src), (ins _.RC:$src0, MaskRC:$mask, MemOp:$src), (ins MaskRC:$mask, MemOp:$src), OpcodeStr#Alias, "$src", "$src", LdDAG, - (vselect MaskRC:$mask, LdDAG, _.RC:$src0), - vselect, "$src0 = $dst">, + (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0), + (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>, EVEX, Sched<[sched.Folded]>; - defm rmb : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst), + defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _Src.ScalarMemOp:$src), (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src), (ins MaskRC:$mask, _Src.ScalarMemOp:$src), OpcodeStr, - "${src}"##Broadcast, "${src}"##Broadcast, + "${src}"#Broadcast, "${src}"#Broadcast, (_.VT (OpNode (_Src.VT (_Src.BroadcastLdFrag addr:$src)) )), - (vselect MaskRC:$mask, - (_.VT - (OpNode - (_Src.VT - (_Src.BroadcastLdFrag addr:$src)))), - _.RC:$src0), - vselect, "$src0 = $dst">, + (vselect_mask MaskRC:$mask, + (_.VT + (MaskOpNode + (_Src.VT + (_Src.BroadcastLdFrag addr:$src)))), + _.RC:$src0), + (vselect_mask MaskRC:$mask, + (_.VT + (MaskOpNode + (_Src.VT + (_Src.BroadcastLdFrag addr:$src)))), + _.ImmAllZerosV)>, EVEX, EVEX_B, Sched<[sched.Folded]>; } } -// Coversion with SAE - suppress all exceptions +// Conversion with SAE - suppress all exceptions multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, X86VectorVTInfo _Src, SDNode OpNodeSAE, X86FoldableSchedWrite sched> { @@ -7581,12 +7661,14 @@ multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, // Similar to avx512_vcvt_fp, but uses an extload for the memory form. multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, X86VectorVTInfo _Src, SDNode OpNode, + SDNode MaskOpNode, X86FoldableSchedWrite sched, string Broadcast = _.BroadcastStr, string Alias = "", X86MemOperand MemOp = _Src.MemOp, RegisterClass MaskRC = _.KRCWM> - : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, sched, Broadcast, Alias, - MemOp, MaskRC, + : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast, + Alias, MemOp, MaskRC, + (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)), (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>; // Extend Float to Double @@ -7594,69 +7676,72 @@ multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> { let Predicates = [HasAVX512] in { defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info, - any_fpextend, sched.ZMM>, + any_fpextend, fpextend, sched.ZMM>, avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info, X86vfpextSAE, sched.ZMM>, EVEX_V512; } let Predicates = [HasVLX] in { defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info, - X86any_vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128; - defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info, any_fpextend, - sched.YMM>, EVEX_V256; + X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", + "", f64mem>, EVEX_V128; + defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info, + any_fpextend, fpextend, sched.YMM>, EVEX_V256; } } // Truncate Double to Float multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> { let Predicates = [HasAVX512] in { - defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, X86any_vfpround, sched.ZMM>, + defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, + X86any_vfpround, X86vfpround, sched.ZMM>, avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info, X86vfproundRnd, sched.ZMM>, EVEX_V512; } let Predicates = [HasVLX] in { defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info, - null_frag, sched.XMM, "{1to2}", "{x}", f128mem, VK2WM>, - EVEX_V128; - defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, X86any_vfpround, + null_frag, null_frag, sched.XMM, "{1to2}", "{x}", + f128mem, VK2WM>, EVEX_V128; + defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, + X86any_vfpround, X86vfpround, sched.YMM, "{1to4}", "{y}">, EVEX_V256; } - def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", + def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">; - def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", + def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, VK2WM:$mask, VR128X:$src), 0, "att">; - def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|" + def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|" "$dst {${mask}} {z}, $src}", (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, VK2WM:$mask, VR128X:$src), 0, "att">; - def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", + def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">; - def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|" + def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" "$dst {${mask}}, ${src}{1to2}}", (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, VK2WM:$mask, f64mem:$src), 0, "att">; - def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|" + def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" "$dst {${mask}} {z}, ${src}{1to2}}", (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, VK2WM:$mask, f64mem:$src), 0, "att">; - def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", + def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">; - def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", + def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, VK4WM:$mask, VR256X:$src), 0, "att">; - def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|" + def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" "$dst {${mask}} {z}, $src}", (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, VK4WM:$mask, VR256X:$src), 0, "att">; - def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", + def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">; - def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|" + def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" "$dst {${mask}}, ${src}{1to4}}", (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, VK4WM:$mask, f64mem:$src), 0, "att">; - def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|" + def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" "$dst {${mask}} {z}, ${src}{1to4}}", (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, VK4WM:$mask, f64mem:$src), 0, "att">; @@ -7701,81 +7786,91 @@ let Predicates = [HasVLX] in { // Convert Signed/Unsigned Doubleword to Double let Uses = []<Register>, mayRaiseFPException = 0 in multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode, - SDNode OpNode128, X86SchedWriteWidths sched> { + SDNode MaskOpNode, SDNode OpNode128, + SDNode MaskOpNode128, + X86SchedWriteWidths sched> { // No rounding in this op let Predicates = [HasAVX512] in defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode, - sched.ZMM>, EVEX_V512; + MaskOpNode, sched.ZMM>, EVEX_V512; let Predicates = [HasVLX] in { defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info, - OpNode128, sched.XMM, "{1to2}", "", i64mem, VK2WM, + OpNode128, MaskOpNode128, sched.XMM, "{1to2}", + "", i64mem, VK2WM, (v2f64 (OpNode128 (bc_v4i32 (v2i64 + (scalar_to_vector (loadi64 addr:$src)))))), + (v2f64 (MaskOpNode128 (bc_v4i32 + (v2i64 (scalar_to_vector (loadi64 addr:$src))))))>, EVEX_V128; defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode, - sched.YMM>, EVEX_V256; + MaskOpNode, sched.YMM>, EVEX_V256; } } // Convert Signed/Unsigned Doubleword to Float multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode, - SDNode OpNodeRnd, X86SchedWriteWidths sched> { + SDNode MaskOpNode, SDNode OpNodeRnd, + X86SchedWriteWidths sched> { let Predicates = [HasAVX512] in defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode, - sched.ZMM>, + MaskOpNode, sched.ZMM>, avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info, OpNodeRnd, sched.ZMM>, EVEX_V512; let Predicates = [HasVLX] in { defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode, - sched.XMM>, EVEX_V128; + MaskOpNode, sched.XMM>, EVEX_V128; defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode, - sched.YMM>, EVEX_V256; + MaskOpNode, sched.YMM>, EVEX_V256; } } // Convert Float to Signed/Unsigned Doubleword with truncation multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, + SDNode MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched> { let Predicates = [HasAVX512] in { defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, - sched.ZMM>, + MaskOpNode, sched.ZMM>, avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info, OpNodeSAE, sched.ZMM>, EVEX_V512; } let Predicates = [HasVLX] in { defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, - sched.XMM>, EVEX_V128; + MaskOpNode, sched.XMM>, EVEX_V128; defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, - sched.YMM>, EVEX_V256; + MaskOpNode, sched.YMM>, EVEX_V256; } } // Convert Float to Signed/Unsigned Doubleword multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, - SDNode OpNodeRnd, X86SchedWriteWidths sched> { + SDNode MaskOpNode, SDNode OpNodeRnd, + X86SchedWriteWidths sched> { let Predicates = [HasAVX512] in { defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, - sched.ZMM>, + MaskOpNode, sched.ZMM>, avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info, OpNodeRnd, sched.ZMM>, EVEX_V512; } let Predicates = [HasVLX] in { defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, - sched.XMM>, EVEX_V128; + MaskOpNode, sched.XMM>, EVEX_V128; defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, - sched.YMM>, EVEX_V256; + MaskOpNode, sched.YMM>, EVEX_V256; } } // Convert Double to Signed/Unsigned Doubleword with truncation multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, - SDNode OpNodeSAE, X86SchedWriteWidths sched> { + SDNode MaskOpNode, SDNode OpNodeSAE, + X86SchedWriteWidths sched> { let Predicates = [HasAVX512] in { defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, - sched.ZMM>, + MaskOpNode, sched.ZMM>, avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNodeSAE, sched.ZMM>, EVEX_V512; } @@ -7785,50 +7880,50 @@ multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly // due to the same reason. defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, - null_frag, sched.XMM, "{1to2}", "{x}", f128mem, + null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem, VK2WM>, EVEX_V128; defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, - sched.YMM, "{1to4}", "{y}">, EVEX_V256; + MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256; } - def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", + def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">; - def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", + def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, VK2WM:$mask, VR128X:$src), 0, "att">; - def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", + def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, VK2WM:$mask, VR128X:$src), 0, "att">; - def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", + def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">; - def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|" + def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" "$dst {${mask}}, ${src}{1to2}}", (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, VK2WM:$mask, f64mem:$src), 0, "att">; - def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|" + def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" "$dst {${mask}} {z}, ${src}{1to2}}", (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, VK2WM:$mask, f64mem:$src), 0, "att">; - def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", + def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">; - def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", + def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, VK4WM:$mask, VR256X:$src), 0, "att">; - def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", + def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, VK4WM:$mask, VR256X:$src), 0, "att">; - def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", + def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">; - def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|" + def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" "$dst {${mask}}, ${src}{1to4}}", (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, VK4WM:$mask, f64mem:$src), 0, "att">; - def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|" + def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" "$dst {${mask}} {z}, ${src}{1to4}}", (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, VK4WM:$mask, f64mem:$src), 0, "att">; @@ -7836,10 +7931,11 @@ multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, // Convert Double to Signed/Unsigned Doubleword multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, - SDNode OpNodeRnd, X86SchedWriteWidths sched> { + SDNode MaskOpNode, SDNode OpNodeRnd, + X86SchedWriteWidths sched> { let Predicates = [HasAVX512] in { defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, - sched.ZMM>, + MaskOpNode, sched.ZMM>, avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNodeRnd, sched.ZMM>, EVEX_V512; } @@ -7849,48 +7945,48 @@ multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly // due to the same reason. defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, - null_frag, sched.XMM, "{1to2}", "{x}", f128mem, + null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem, VK2WM>, EVEX_V128; defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, - sched.YMM, "{1to4}", "{y}">, EVEX_V256; + MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256; } - def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", + def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">; - def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", + def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, VK2WM:$mask, VR128X:$src), 0, "att">; - def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", + def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, VK2WM:$mask, VR128X:$src), 0, "att">; - def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", + def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">; - def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|" + def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" "$dst {${mask}}, ${src}{1to2}}", (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, VK2WM:$mask, f64mem:$src), 0, "att">; - def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|" + def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" "$dst {${mask}} {z}, ${src}{1to2}}", (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, VK2WM:$mask, f64mem:$src), 0, "att">; - def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", + def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">; - def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", + def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, VK4WM:$mask, VR256X:$src), 0, "att">; - def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", + def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, VK4WM:$mask, VR256X:$src), 0, "att">; - def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", + def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">; - def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|" + def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" "$dst {${mask}}, ${src}{1to4}}", (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, VK4WM:$mask, f64mem:$src), 0, "att">; - def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|" + def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" "$dst {${mask}} {z}, ${src}{1to4}}", (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, VK4WM:$mask, f64mem:$src), 0, "att">; @@ -7898,61 +7994,65 @@ multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, // Convert Double to Signed/Unsigned Quardword multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, - SDNode OpNodeRnd, X86SchedWriteWidths sched> { + SDNode MaskOpNode, SDNode OpNodeRnd, + X86SchedWriteWidths sched> { let Predicates = [HasDQI] in { defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, - sched.ZMM>, + MaskOpNode, sched.ZMM>, avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info, OpNodeRnd, sched.ZMM>, EVEX_V512; } let Predicates = [HasDQI, HasVLX] in { defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, - sched.XMM>, EVEX_V128; + MaskOpNode, sched.XMM>, EVEX_V128; defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, - sched.YMM>, EVEX_V256; + MaskOpNode, sched.YMM>, EVEX_V256; } } // Convert Double to Signed/Unsigned Quardword with truncation multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, - SDNode OpNodeRnd, X86SchedWriteWidths sched> { + SDNode MaskOpNode, SDNode OpNodeRnd, + X86SchedWriteWidths sched> { let Predicates = [HasDQI] in { defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, - sched.ZMM>, + MaskOpNode, sched.ZMM>, avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info, OpNodeRnd, sched.ZMM>, EVEX_V512; } let Predicates = [HasDQI, HasVLX] in { defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, - sched.XMM>, EVEX_V128; + MaskOpNode, sched.XMM>, EVEX_V128; defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, - sched.YMM>, EVEX_V256; + MaskOpNode, sched.YMM>, EVEX_V256; } } // Convert Signed/Unsigned Quardword to Double multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode, - SDNode OpNodeRnd, X86SchedWriteWidths sched> { + SDNode MaskOpNode, SDNode OpNodeRnd, + X86SchedWriteWidths sched> { let Predicates = [HasDQI] in { defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode, - sched.ZMM>, + MaskOpNode, sched.ZMM>, avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info, OpNodeRnd, sched.ZMM>, EVEX_V512; } let Predicates = [HasDQI, HasVLX] in { defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode, - sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible; + MaskOpNode, sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible; defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode, - sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible; + MaskOpNode, sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible; } } // Convert Float to Signed/Unsigned Quardword multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, - SDNode OpNodeRnd, X86SchedWriteWidths sched> { + SDNode MaskOpNode, SDNode OpNodeRnd, + X86SchedWriteWidths sched> { let Predicates = [HasDQI] in { defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, - sched.ZMM>, + MaskOpNode, sched.ZMM>, avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNodeRnd, sched.ZMM>, EVEX_V512; } @@ -7960,21 +8060,26 @@ multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, // Explicitly specified broadcast string, since we take only 2 elements // from v4f32x_info source defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, - sched.XMM, "{1to2}", "", f64mem, VK2WM, + MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM, (v2i64 (OpNode (bc_v4f32 (v2f64 + (scalar_to_vector (loadf64 addr:$src)))))), + (v2i64 (MaskOpNode (bc_v4f32 + (v2f64 (scalar_to_vector (loadf64 addr:$src))))))>, EVEX_V128; defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, - sched.YMM>, EVEX_V256; + MaskOpNode, sched.YMM>, EVEX_V256; } } // Convert Float to Signed/Unsigned Quardword with truncation multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, - SDNode OpNodeRnd, X86SchedWriteWidths sched> { + SDNode MaskOpNode, SDNode OpNodeRnd, + X86SchedWriteWidths sched> { let Predicates = [HasDQI] in { - defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, sched.ZMM>, + defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, + MaskOpNode, sched.ZMM>, avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNodeRnd, sched.ZMM>, EVEX_V512; } @@ -7982,22 +8087,26 @@ multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, // Explicitly specified broadcast string, since we take only 2 elements // from v4f32x_info source defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, - sched.XMM, "{1to2}", "", f64mem, VK2WM, + MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM, (v2i64 (OpNode (bc_v4f32 (v2f64 + (scalar_to_vector (loadf64 addr:$src)))))), + (v2i64 (MaskOpNode (bc_v4f32 + (v2f64 (scalar_to_vector (loadf64 addr:$src))))))>, EVEX_V128; defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, - sched.YMM>, EVEX_V256; + MaskOpNode, sched.YMM>, EVEX_V256; } } // Convert Signed/Unsigned Quardword to Float multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode, - SDNode OpNodeRnd, X86SchedWriteWidths sched> { + SDNode MaskOpNode, SDNode OpNodeRnd, + X86SchedWriteWidths sched> { let Predicates = [HasDQI] in { defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode, - sched.ZMM>, + MaskOpNode, sched.ZMM>, avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNodeRnd, sched.ZMM>, EVEX_V512; } @@ -8007,152 +8116,159 @@ multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode, // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly // due to the same reason. defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, null_frag, - sched.XMM, "{1to2}", "{x}", i128mem, VK2WM>, + null_frag, sched.XMM, "{1to2}", "{x}", i128mem, VK2WM>, EVEX_V128, NotEVEX2VEXConvertible; defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode, - sched.YMM, "{1to4}", "{y}">, EVEX_V256, + MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256, NotEVEX2VEXConvertible; } - def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", + def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">; - def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", + def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, VK2WM:$mask, VR128X:$src), 0, "att">; - def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", + def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, VK2WM:$mask, VR128X:$src), 0, "att">; - def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", + def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, i64mem:$src), 0, "att">; - def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|" + def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" "$dst {${mask}}, ${src}{1to2}}", (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, VK2WM:$mask, i64mem:$src), 0, "att">; - def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|" + def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" "$dst {${mask}} {z}, ${src}{1to2}}", (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, VK2WM:$mask, i64mem:$src), 0, "att">; - def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", + def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">; - def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|" + def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|" "$dst {${mask}}, $src}", (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, VK4WM:$mask, VR256X:$src), 0, "att">; - def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|" + def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" "$dst {${mask}} {z}, $src}", (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, VK4WM:$mask, VR256X:$src), 0, "att">; - def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", + def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, i64mem:$src), 0, "att">; - def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|" + def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" "$dst {${mask}}, ${src}{1to4}}", (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, VK4WM:$mask, i64mem:$src), 0, "att">; - def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|" + def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" "$dst {${mask}} {z}, ${src}{1to4}}", (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, VK4WM:$mask, i64mem:$src), 0, "att">; } -defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, X86any_VSintToFP, +defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp, + X86any_VSintToFP, X86VSintToFP, SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>; -defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, +defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp, X86VSintToFpRnd, SchedWriteCvtDQ2PS>, PS, EVEX_CD8<32, CD8VF>; defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si, - X86cvttp2siSAE, SchedWriteCvtPS2DQ>, - XS, EVEX_CD8<32, CD8VF>; + X86cvttp2si, X86cvttp2siSAE, + SchedWriteCvtPS2DQ>, XS, EVEX_CD8<32, CD8VF>; defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si, - X86cvttp2siSAE, SchedWriteCvtPD2DQ>, + X86cvttp2si, X86cvttp2siSAE, + SchedWriteCvtPD2DQ>, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui, - X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PS, - EVEX_CD8<32, CD8VF>; + X86cvttp2ui, X86cvttp2uiSAE, + SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>; defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui, - X86cvttp2uiSAE, SchedWriteCvtPD2DQ>, + X86cvttp2ui, X86cvttp2uiSAE, + SchedWriteCvtPD2DQ>, PS, VEX_W, EVEX_CD8<64, CD8VF>; defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp, - X86any_VUintToFP, SchedWriteCvtDQ2PD>, XS, - EVEX_CD8<32, CD8VH>; + uint_to_fp, X86any_VUintToFP, X86VUintToFP, + SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>; defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp, - X86VUintToFpRnd, SchedWriteCvtDQ2PS>, XD, - EVEX_CD8<32, CD8VF>; + uint_to_fp, X86VUintToFpRnd, + SchedWriteCvtDQ2PS>, XD, EVEX_CD8<32, CD8VF>; -defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, +defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int, X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD, EVEX_CD8<32, CD8VF>; -defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, +defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int, X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD, VEX_W, EVEX_CD8<64, CD8VF>; -defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, +defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt, X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>; -defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, +defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt, X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W, PS, EVEX_CD8<64, CD8VF>; -defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, +defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int, X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W, PD, EVEX_CD8<64, CD8VF>; -defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, +defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int, X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD, EVEX_CD8<32, CD8VH>; -defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, +defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt, X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W, PD, EVEX_CD8<64, CD8VF>; -defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, +defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt, X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD, EVEX_CD8<32, CD8VH>; defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si, - X86cvttp2siSAE, SchedWriteCvtPD2DQ>, VEX_W, + X86cvttp2si, X86cvttp2siSAE, + SchedWriteCvtPD2DQ>, VEX_W, PD, EVEX_CD8<64, CD8VF>; defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si, - X86cvttp2siSAE, SchedWriteCvtPS2DQ>, PD, + X86cvttp2si, X86cvttp2siSAE, + SchedWriteCvtPS2DQ>, PD, EVEX_CD8<32, CD8VH>; defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui, - X86cvttp2uiSAE, SchedWriteCvtPD2DQ>, VEX_W, + X86cvttp2ui, X86cvttp2uiSAE, + SchedWriteCvtPD2DQ>, VEX_W, PD, EVEX_CD8<64, CD8VF>; defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui, - X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PD, + X86cvttp2ui, X86cvttp2uiSAE, + SchedWriteCvtPS2DQ>, PD, EVEX_CD8<32, CD8VH>; defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp, - X86VSintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS, - EVEX_CD8<64, CD8VF>; + sint_to_fp, X86VSintToFpRnd, + SchedWriteCvtDQ2PD>, VEX_W, XS, EVEX_CD8<64, CD8VF>; defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp, - X86VUintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS, - EVEX_CD8<64, CD8VF>; + uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>, + VEX_W, XS, EVEX_CD8<64, CD8VF>; defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", any_sint_to_fp, - X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, PS, - EVEX_CD8<64, CD8VF>; + sint_to_fp, X86VSintToFpRnd, SchedWriteCvtDQ2PS>, + VEX_W, PS, EVEX_CD8<64, CD8VF>; defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", any_uint_to_fp, - X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD, - EVEX_CD8<64, CD8VF>; + uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PS>, + VEX_W, XD, EVEX_CD8<64, CD8VF>; let Predicates = [HasVLX] in { // Special patterns to allow use of X86mcvtp2Int for masking. Instruction @@ -8275,70 +8391,70 @@ let Predicates = [HasVLX] in { let Predicates = [HasDQI, HasVLX] in { def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), (VCVTPS2QQZ128rm addr:$src)>; - def : Pat<(v2i64 (vselect VK2WM:$mask, - (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), - VR128X:$src0)), + def : Pat<(v2i64 (vselect_mask VK2WM:$mask, + (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), + VR128X:$src0)), (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; - def : Pat<(v2i64 (vselect VK2WM:$mask, - (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), - v2i64x_info.ImmAllZerosV)), + def : Pat<(v2i64 (vselect_mask VK2WM:$mask, + (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), + v2i64x_info.ImmAllZerosV)), (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), (VCVTPS2UQQZ128rm addr:$src)>; - def : Pat<(v2i64 (vselect VK2WM:$mask, - (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), - VR128X:$src0)), + def : Pat<(v2i64 (vselect_mask VK2WM:$mask, + (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), + VR128X:$src0)), (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; - def : Pat<(v2i64 (vselect VK2WM:$mask, - (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), - v2i64x_info.ImmAllZerosV)), + def : Pat<(v2i64 (vselect_mask VK2WM:$mask, + (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), + v2i64x_info.ImmAllZerosV)), (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), (VCVTTPS2QQZ128rm addr:$src)>; - def : Pat<(v2i64 (vselect VK2WM:$mask, - (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), - VR128X:$src0)), + def : Pat<(v2i64 (vselect_mask VK2WM:$mask, + (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), + VR128X:$src0)), (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; - def : Pat<(v2i64 (vselect VK2WM:$mask, - (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), - v2i64x_info.ImmAllZerosV)), + def : Pat<(v2i64 (vselect_mask VK2WM:$mask, + (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), + v2i64x_info.ImmAllZerosV)), (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), (VCVTTPS2UQQZ128rm addr:$src)>; - def : Pat<(v2i64 (vselect VK2WM:$mask, - (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), - VR128X:$src0)), + def : Pat<(v2i64 (vselect_mask VK2WM:$mask, + (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), + VR128X:$src0)), (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; - def : Pat<(v2i64 (vselect VK2WM:$mask, - (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), - v2i64x_info.ImmAllZerosV)), + def : Pat<(v2i64 (vselect_mask VK2WM:$mask, + (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), + v2i64x_info.ImmAllZerosV)), (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; } let Predicates = [HasVLX] in { def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), (VCVTDQ2PDZ128rm addr:$src)>; - def : Pat<(v2f64 (vselect VK2WM:$mask, - (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), - VR128X:$src0)), + def : Pat<(v2f64 (vselect_mask VK2WM:$mask, + (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), + VR128X:$src0)), (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; - def : Pat<(v2f64 (vselect VK2WM:$mask, - (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), - v2f64x_info.ImmAllZerosV)), + def : Pat<(v2f64 (vselect_mask VK2WM:$mask, + (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), + v2f64x_info.ImmAllZerosV)), (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), (VCVTUDQ2PDZ128rm addr:$src)>; - def : Pat<(v2f64 (vselect VK2WM:$mask, - (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), - VR128X:$src0)), + def : Pat<(v2f64 (vselect_mask VK2WM:$mask, + (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), + VR128X:$src0)), (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; - def : Pat<(v2f64 (vselect VK2WM:$mask, - (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), - v2f64x_info.ImmAllZerosV)), + def : Pat<(v2f64 (vselect_mask VK2WM:$mask, + (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), + v2f64x_info.ImmAllZerosV)), (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; } @@ -8408,16 +8524,17 @@ let Predicates = [HasDQI, HasVLX] in { let Uses = [MXCSR], mayRaiseFPException = 1 in multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src, - X86MemOperand x86memop, PatFrag ld_frag, + X86MemOperand x86memop, dag ld_dag, X86FoldableSchedWrite sched> { - defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), + defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src), "vcvtph2ps", "$src", "$src", + (X86any_cvtph2ps (_src.VT _src.RC:$src)), (X86cvtph2ps (_src.VT _src.RC:$src))>, T8PD, Sched<[sched]>; - defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), + defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), (ins x86memop:$src), "vcvtph2ps", "$src", "$src", - (X86cvtph2ps (_src.VT - (ld_frag addr:$src)))>, + (X86any_cvtph2ps (_src.VT ld_dag)), + (X86cvtph2ps (_src.VT ld_dag))>, T8PD, Sched<[sched.Folded]>; } @@ -8432,23 +8549,22 @@ multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, } let Predicates = [HasAVX512] in - defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, load, - WriteCvtPH2PSZ>, + defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, + (load addr:$src), WriteCvtPH2PSZ>, avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; let Predicates = [HasVLX] in { defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem, - load, WriteCvtPH2PSY>, EVEX, EVEX_V256, + (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem, - load, WriteCvtPH2PS>, EVEX, EVEX_V128, + (bitconvert (v2i64 (X86vzload64 addr:$src))), + WriteCvtPH2PS>, EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; // Pattern match vcvtph2ps of a scalar i64 load. - def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), - (VCVTPH2PSZ128rm addr:$src)>; - def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert + def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), (VCVTPH2PSZ128rm addr:$src)>; } @@ -8460,7 +8576,7 @@ let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { (ins _src.RC:$src1, i32u8imm:$src2), "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _dest.RC:$dst, - (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>, + (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>, Sched<[RR]>; let Constraints = "$src0 = $dst" in def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), @@ -8505,54 +8621,35 @@ let Predicates = [HasAVX512] in { WriteCvtPS2PHZ, WriteCvtPS2PHZSt>, avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; - let Predicates = [HasVLX] in { - defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem, - WriteCvtPS2PHY, WriteCvtPS2PHYSt>, - EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; - defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem, - WriteCvtPS2PH, WriteCvtPS2PHSt>, - EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; - } + + def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst), + (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>; +} + +let Predicates = [HasVLX] in { + defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem, + WriteCvtPS2PHY, WriteCvtPS2PHYSt>, + EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; + defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem, + WriteCvtPS2PH, WriteCvtPS2PHSt>, + EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; def : Pat<(store (f64 (extractelt - (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, timm:$src2))), + (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))), (iPTR 0))), addr:$dst), (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; def : Pat<(store (i64 (extractelt - (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, timm:$src2))), + (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))), (iPTR 0))), addr:$dst), (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; - def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst), + def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst), (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>; - def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, timm:$src2)), addr:$dst), - (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>; -} - -// Patterns for matching conversions from float to half-float and vice versa. -let Predicates = [HasVLX] in { - // Use MXCSR.RC for rounding instead of explicitly specifying the default - // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the - // configurations we support (the default). However, falling back to MXCSR is - // more consistent with other instructions, which are always controlled by it. - // It's encoded as 0b100. - def : Pat<(fp_to_f16 FR32X:$src), - (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (v8i16 (VCVTPS2PHZ128rr - (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4))), sub_16bit))>; - - def : Pat<(f16_to_fp GR16:$src), - (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr - (v8i16 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)))), FR32X)) >; - - def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))), - (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr - (v8i16 (VCVTPS2PHZ128rr - (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4)))), FR32X)) >; } // Unordered/Ordered scalar fp compare with Sae and set EFLAGS multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _, string OpcodeStr, Domain d, - X86FoldableSchedWrite sched = WriteFCom> { + X86FoldableSchedWrite sched = WriteFComX> { let hasSideEffects = 0, Uses = [MXCSR] in def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2), !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>, @@ -8613,7 +8710,7 @@ multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (OpNode (_.VT _.RC:$src1), - _.ScalarIntMemCPat:$src2)>, EVEX_4V, VEX_LIG, + (_.ScalarIntMemFrags addr:$src2))>, EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -8646,7 +8743,7 @@ multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode, Sched<[sched.Folded, sched.ReadAfterFold]>; defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.ScalarMemOp:$src), OpcodeStr, - "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr, + "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, (OpNode (_.VT (_.BroadcastLdFrag addr:$src)))>, EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; @@ -8701,7 +8798,7 @@ multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", - (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2)>, + (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; } } @@ -8741,7 +8838,7 @@ multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.ScalarMemOp:$src), OpcodeStr, - "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr, + "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, (OpNode (_.VT (_.BroadcastLdFrag addr:$src)))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; @@ -8811,20 +8908,21 @@ multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr, multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _>{ let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { - defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), + defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src), OpcodeStr, "$src", "$src", - (_.VT (any_fsqrt _.RC:$src))>, EVEX, + (_.VT (any_fsqrt _.RC:$src)), + (_.VT (fsqrt _.RC:$src))>, EVEX, Sched<[sched]>; - defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), + defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.MemOp:$src), OpcodeStr, "$src", "$src", - (any_fsqrt (_.VT - (bitconvert (_.LdFrag addr:$src))))>, EVEX, - Sched<[sched.Folded, sched.ReadAfterFold]>; - defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), + (any_fsqrt (_.VT (_.LdFrag addr:$src))), + (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX, + Sched<[sched.Folded, sched.ReadAfterFold]>; + defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.ScalarMemOp:$src), OpcodeStr, - "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr, - (any_fsqrt (_.VT - (_.BroadcastLdFrag addr:$src)))>, + "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, + (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))), + (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>, EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -8879,7 +8977,7 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWri (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (X86fsqrts (_.VT _.RC:$src1), - _.ScalarIntMemCPat:$src2)>, + (_.ScalarIntMemFrags addr:$src2))>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; let Uses = [MXCSR] in defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), @@ -8952,7 +9050,7 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (_.VT (X86RndScales _.RC:$src1, - _.ScalarIntMemCPat:$src2, (i32 timm:$src3)))>, + (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in { @@ -8971,13 +9069,13 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, let Predicates = [HasAVX512] in { def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2), - (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)), + (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)), _.FRC:$src1, timm:$src2))>; } let Predicates = [HasAVX512, OptForSize] in { def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2), - (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)), + (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)), addr:$src1, timm:$src2))>; } } @@ -8996,13 +9094,13 @@ multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move, dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP, dag OutMask, Predicate BasePredicate> { let Predicates = [BasePredicate] in { - def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask, + def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask, (OpNode (extractelt _.VT:$src2, (iPTR 0))), (extractelt _.VT:$dst, (iPTR 0))))), (!cast<Instruction>("V"#OpcPrefix#r_Intk) _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>; - def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask, + def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask, (OpNode (extractelt _.VT:$src2, (iPTR 0))), ZeroFP))), (!cast<Instruction>("V"#OpcPrefix#r_Intkz) @@ -9026,14 +9124,14 @@ defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd, // same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass // either to the multiclasses. def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask), - (vselect node:$mask, - (trunc node:$src), node:$src0)>; + (vselect_mask node:$mask, + (trunc node:$src), node:$src0)>; def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask), - (vselect node:$mask, - (X86vtruncs node:$src), node:$src0)>; + (vselect_mask node:$mask, + (X86vtruncs node:$src), node:$src0)>; def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask), - (vselect node:$mask, - (X86vtruncus node:$src), node:$src0)>; + (vselect_mask node:$mask, + (X86vtruncus node:$src), node:$src0)>; multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDPatternOperator MaskNode, @@ -9083,12 +9181,12 @@ multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo, string Name> { def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst), - (!cast<Instruction>(Name#SrcInfo.ZSuffix##mr) + (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr) addr:$dst, SrcInfo.RC:$src)>; def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst, SrcInfo.KRCWM:$mask), - (!cast<Instruction>(Name#SrcInfo.ZSuffix##mrk) + (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk) addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>; } @@ -9548,6 +9646,8 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp, let Predicates = [HasVLX] in { def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; + def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))), + (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; @@ -9558,6 +9658,8 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp, def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; + def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))), + (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; } @@ -9565,6 +9667,10 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp, let Predicates = [HasAVX512] in { def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; + def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), + (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; + def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), + (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; } } @@ -9586,54 +9692,49 @@ def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))), // FIXME: Improve scheduling of gather/scatter instructions. multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, - X86MemOperand memop, PatFrag GatherNode, - RegisterClass MaskRC = _.KRCWM> { + X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> { let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb", - ExeDomain = _.ExeDomain in + ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb), (ins _.RC:$src1, MaskRC:$mask, memop:$src2), !strconcat(OpcodeStr#_.Suffix, "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), - [(set _.RC:$dst, MaskRC:$mask_wb, - (GatherNode (_.VT _.RC:$src1), MaskRC:$mask, - vectoraddr:$src2))]>, EVEX, EVEX_K, - EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>; + []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>; } multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc, AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { - defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, - vy512xmem, mgatherv8i32>, EVEX_V512, VEX_W; - defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512, - vz512mem, mgatherv8i64>, EVEX_V512, VEX_W; + defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, + vy512xmem>, EVEX_V512, VEX_W; + defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512, + vz512mem>, EVEX_V512, VEX_W; let Predicates = [HasVLX] in { - defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256, - vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W; - defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256, - vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W; - defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128, - vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W; - defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128, - vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W; + defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256, + vx256xmem>, EVEX_V256, VEX_W; + defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256, + vy256xmem>, EVEX_V256, VEX_W; + defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128, + vx128xmem>, EVEX_V128, VEX_W; + defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128, + vx128xmem>, EVEX_V128, VEX_W; } } multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc, AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { - defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem, - mgatherv16i32>, EVEX_V512; - defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256mem, - mgatherv8i64>, EVEX_V512; + defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>, + EVEX_V512; + defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>, + EVEX_V512; let Predicates = [HasVLX] in { - defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256, - vy256xmem, mgatherv8i32>, EVEX_V256; - defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128, - vy128xmem, mgatherv4i64>, EVEX_V256; - defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128, - vx128xmem, mgatherv4i32>, EVEX_V128; - defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128, - vx64xmem, mgatherv2i64, VK2WM>, - EVEX_V128; + defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256, + vy256xmem>, EVEX_V256; + defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128, + vy128xmem>, EVEX_V256; + defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128, + vx128xmem>, EVEX_V128; + defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128, + vx64xmem, VK2WM>, EVEX_V128; } } @@ -9645,55 +9746,52 @@ defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">; multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, - X86MemOperand memop, PatFrag ScatterNode, - RegisterClass MaskRC = _.KRCWM> { + X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> { -let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in +let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain, + hasSideEffects = 0 in def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb), (ins memop:$dst, MaskRC:$mask, _.RC:$src), !strconcat(OpcodeStr#_.Suffix, "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"), - [(set MaskRC:$mask_wb, (ScatterNode (_.VT _.RC:$src), - MaskRC:$mask, vectoraddr:$dst))]>, - EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, + []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteStore]>; } multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc, AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { - defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, - vy512xmem, mscatterv8i32>, EVEX_V512, VEX_W; - defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512, - vz512mem, mscatterv8i64>, EVEX_V512, VEX_W; + defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, + vy512xmem>, EVEX_V512, VEX_W; + defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512, + vz512mem>, EVEX_V512, VEX_W; let Predicates = [HasVLX] in { - defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256, - vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W; - defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256, - vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W; - defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128, - vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W; - defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128, - vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W; + defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256, + vx256xmem>, EVEX_V256, VEX_W; + defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256, + vy256xmem>, EVEX_V256, VEX_W; + defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128, + vx128xmem>, EVEX_V128, VEX_W; + defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128, + vx128xmem>, EVEX_V128, VEX_W; } } multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc, AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { - defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem, - mscatterv16i32>, EVEX_V512; - defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256mem, - mscatterv8i64>, EVEX_V512; + defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>, + EVEX_V512; + defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>, + EVEX_V512; let Predicates = [HasVLX] in { - defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256, - vy256xmem, mscatterv8i32>, EVEX_V256; - defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128, - vy128xmem, mscatterv4i64>, EVEX_V256; - defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128, - vx128xmem, mscatterv4i32>, EVEX_V128; - defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128, - vx64xmem, mscatterv2i64, VK2WM>, - EVEX_V128; + defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256, + vy256xmem>, EVEX_V256; + defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128, + vy128xmem>, EVEX_V256; + defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128, + vx128xmem>, EVEX_V128; + defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128, + vx64xmem, VK2WM>, EVEX_V128; } } @@ -9762,13 +9860,9 @@ defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > { def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src), - !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"), + !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"), [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>, EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc? - -// Also need a pattern for anyextend. -def : Pat<(Vec.VT (anyext Vec.KRC:$src)), - (!cast<Instruction>(NAME#"rr") Vec.KRC:$src)>; } multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo, @@ -9842,19 +9936,11 @@ let Predicates = [HasDQI, NoBWI] in { (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; def : Pat<(v16i16 (sext (v16i1 VK16:$src))), (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; - - def : Pat<(v16i8 (anyext (v16i1 VK16:$src))), - (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; - def : Pat<(v16i16 (anyext (v16i1 VK16:$src))), - (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; } let Predicates = [HasDQI, NoBWI, HasVLX] in { def : Pat<(v8i16 (sext (v8i1 VK8:$src))), (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>; - - def : Pat<(v8i16 (anyext (v8i1 VK8:$src))), - (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>; } //===----------------------------------------------------------------------===// @@ -9885,14 +9971,14 @@ multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _, multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> { def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask), - (!cast<Instruction>(Name#_.ZSuffix##mrk) + (!cast<Instruction>(Name#_.ZSuffix#mrk) addr:$dst, _.KRCWM:$mask, _.RC:$src)>; def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), - (!cast<Instruction>(Name#_.ZSuffix##rrk) + (!cast<Instruction>(Name#_.ZSuffix#rrk) _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), - (!cast<Instruction>(Name#_.ZSuffix##rrkz) + (!cast<Instruction>(Name#_.ZSuffix#rrkz) _.KRCWM:$mask, _.RC:$src)>; } @@ -9940,23 +10026,23 @@ multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _, multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> { def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)), - (!cast<Instruction>(Name#_.ZSuffix##rmkz) + (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$src)>; def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)), - (!cast<Instruction>(Name#_.ZSuffix##rmkz) + (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$src)>; def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, (_.VT _.RC:$src0))), - (!cast<Instruction>(Name#_.ZSuffix##rmk) + (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0, _.KRCWM:$mask, addr:$src)>; def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), - (!cast<Instruction>(Name#_.ZSuffix##rrk) + (!cast<Instruction>(Name#_.ZSuffix#rrk) _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), - (!cast<Instruction>(Name#_.ZSuffix##rrkz) + (!cast<Instruction>(Name#_.ZSuffix#rrkz) _.KRCWM:$mask, _.RC:$src)>; } @@ -9990,26 +10076,33 @@ defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256, // op(mem_vec,imm) // op(broadcast(eltVt),imm) //all instruction created with FROUND_CURRENT -multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, - X86FoldableSchedWrite sched, X86VectorVTInfo _> { +multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, + SDNode OpNode, SDNode MaskOpNode, + X86FoldableSchedWrite sched, + X86VectorVTInfo _> { let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { - defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), + defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, i32u8imm:$src2), - OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2", - (OpNode (_.VT _.RC:$src1), - (i32 timm:$src2))>, Sched<[sched]>; - defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), + OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2", + (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)), + (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>, + Sched<[sched]>; + defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.MemOp:$src1, i32u8imm:$src2), - OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2", + OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2", (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), - (i32 timm:$src2))>, + (i32 timm:$src2)), + (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), + (i32 timm:$src2))>, Sched<[sched.Folded, sched.ReadAfterFold]>; - defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), + defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.ScalarMemOp:$src1, i32u8imm:$src2), - OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr, - "${src1}"##_.BroadcastStr##", $src2", + OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr, + "${src1}"#_.BroadcastStr#", $src2", (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)), - (i32 timm:$src2))>, EVEX_B, + (i32 timm:$src2)), + (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)), + (i32 timm:$src2))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -10021,7 +10114,7 @@ multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, let ExeDomain = _.ExeDomain, Uses = [MXCSR] in defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, i32u8imm:$src2), - OpcodeStr##_.Suffix, "$src2, {sae}, $src1", + OpcodeStr#_.Suffix, "$src2, {sae}, $src1", "$src1, {sae}, $src2", (OpNode (_.VT _.RC:$src1), (i32 timm:$src2))>, @@ -10030,18 +10123,19 @@ multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr, AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, - SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{ + SDNode MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched, + Predicate prd>{ let Predicates = [prd] in { - defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, - _.info512>, + defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, + sched.ZMM, _.info512>, avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>, EVEX_V512; } let Predicates = [prd, HasVLX] in { - defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, - _.info128>, EVEX_V128; - defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, - _.info256>, EVEX_V256; + defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, + sched.XMM, _.info128>, EVEX_V128; + defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, + sched.YMM, _.info256>, EVEX_V256; } } @@ -10068,8 +10162,8 @@ multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), - OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", - "$src1, ${src2}"##_.BroadcastStr##", $src3", + OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", + "$src1, ${src2}"#_.BroadcastStr#", $src3", (OpNode (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), (i32 timm:$src3))>, EVEX_B, @@ -10111,8 +10205,8 @@ multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, let ExeDomain = _.ExeDomain in defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), - OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", - "$src1, ${src2}"##_.BroadcastStr##", $src3", + OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", + "$src1, ${src2}"#_.BroadcastStr#", $src3", (OpNode (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), (i8 timm:$src3))>, EVEX_B, @@ -10135,7 +10229,7 @@ multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3), OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (OpNode (_.VT _.RC:$src1), - (_.VT _.ScalarIntMemCPat:$src2), + (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3))>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -10228,24 +10322,26 @@ multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr, multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr, bits<8> opcPs, bits<8> opcPd, SDNode OpNode, - SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{ + SDNode MaskOpNode, SDNode OpNodeSAE, + X86SchedWriteWidths sched, Predicate prd>{ defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info, - opcPs, OpNode, OpNodeSAE, sched, prd>, + opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>, EVEX_CD8<32, CD8VF>; defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info, - opcPd, OpNode, OpNodeSAE, sched, prd>, + opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>, EVEX_CD8<64, CD8VF>, VEX_W; } defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56, - X86VReduce, X86VReduceSAE, SchedWriteFRnd, HasDQI>, - AVX512AIi8Base, EVEX; + X86VReduce, X86VReduce, X86VReduceSAE, + SchedWriteFRnd, HasDQI>, AVX512AIi8Base, EVEX; defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09, - X86any_VRndScale, X86VRndScaleSAE, SchedWriteFRnd, HasAVX512>, + X86any_VRndScale, X86VRndScale, X86VRndScaleSAE, + SchedWriteFRnd, HasAVX512>, AVX512AIi8Base, EVEX; defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26, - X86VGetMant, X86VGetMantSAE, SchedWriteFRnd, HasAVX512>, - AVX512AIi8Base, EVEX; + X86VGetMant, X86VGetMant, X86VGetMantSAE, + SchedWriteFRnd, HasAVX512>, AVX512AIi8Base, EVEX; defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info, 0x50, X86VRange, X86VRangeSAE, @@ -10302,8 +10398,8 @@ multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr, EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), - OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", - "$src1, ${src2}"##_.BroadcastStr##", $src3", + OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", + "$src1, ${src2}"#_.BroadcastStr#", $src3", (_.VT (bitconvert (CastInfo.VT @@ -10391,8 +10487,8 @@ multiclass avx512_valign<bits<8> opc, string OpcodeStr, defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), - OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", - "$src1, ${src2}"##_.BroadcastStr##", $src3", + OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", + "$src1, ${src2}"#_.BroadcastStr#", $src3", (X86VAlign _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)), (i8 timm:$src3))>, EVEX_B, @@ -10441,40 +10537,40 @@ def ValigndImm8XForm : SDNodeXForm<timm, [{ multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode, X86VectorVTInfo From, X86VectorVTInfo To, SDNodeXForm ImmXForm> { - def : Pat<(To.VT (vselect To.KRCWM:$mask, - (bitconvert - (From.VT (OpNode From.RC:$src1, From.RC:$src2, - timm:$src3))), - To.RC:$src0)), + def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, + (bitconvert + (From.VT (OpNode From.RC:$src1, From.RC:$src2, + timm:$src3))), + To.RC:$src0)), (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask, To.RC:$src1, To.RC:$src2, (ImmXForm timm:$src3))>; - def : Pat<(To.VT (vselect To.KRCWM:$mask, - (bitconvert - (From.VT (OpNode From.RC:$src1, From.RC:$src2, - timm:$src3))), - To.ImmAllZerosV)), + def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, + (bitconvert + (From.VT (OpNode From.RC:$src1, From.RC:$src2, + timm:$src3))), + To.ImmAllZerosV)), (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask, To.RC:$src1, To.RC:$src2, (ImmXForm timm:$src3))>; - def : Pat<(To.VT (vselect To.KRCWM:$mask, - (bitconvert - (From.VT (OpNode From.RC:$src1, - (From.LdFrag addr:$src2), - timm:$src3))), - To.RC:$src0)), + def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, + (bitconvert + (From.VT (OpNode From.RC:$src1, + (From.LdFrag addr:$src2), + timm:$src3))), + To.RC:$src0)), (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask, To.RC:$src1, addr:$src2, (ImmXForm timm:$src3))>; - def : Pat<(To.VT (vselect To.KRCWM:$mask, - (bitconvert - (From.VT (OpNode From.RC:$src1, - (From.LdFrag addr:$src2), - timm:$src3))), - To.ImmAllZerosV)), + def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, + (bitconvert + (From.VT (OpNode From.RC:$src1, + (From.LdFrag addr:$src2), + timm:$src3))), + To.ImmAllZerosV)), (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask, To.RC:$src1, addr:$src2, (ImmXForm timm:$src3))>; @@ -10491,24 +10587,24 @@ multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode, (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2, (ImmXForm timm:$src3))>; - def : Pat<(To.VT (vselect To.KRCWM:$mask, - (bitconvert - (From.VT (OpNode From.RC:$src1, - (bitconvert - (To.VT (To.BroadcastLdFrag addr:$src2))), - timm:$src3))), - To.RC:$src0)), + def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, + (bitconvert + (From.VT (OpNode From.RC:$src1, + (bitconvert + (To.VT (To.BroadcastLdFrag addr:$src2))), + timm:$src3))), + To.RC:$src0)), (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask, To.RC:$src1, addr:$src2, (ImmXForm timm:$src3))>; - def : Pat<(To.VT (vselect To.KRCWM:$mask, - (bitconvert - (From.VT (OpNode From.RC:$src1, - (bitconvert - (To.VT (To.BroadcastLdFrag addr:$src2))), - timm:$src3))), - To.ImmAllZerosV)), + def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, + (bitconvert + (From.VT (OpNode From.RC:$src1, + (bitconvert + (To.VT (To.BroadcastLdFrag addr:$src2))), + timm:$src3))), + To.ImmAllZerosV)), (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask, To.RC:$src1, addr:$src2, (ImmXForm timm:$src3))>; @@ -10567,8 +10663,8 @@ multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> { defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.ScalarMemOp:$src1), OpcodeStr, - "${src1}"##_.BroadcastStr, - "${src1}"##_.BroadcastStr, + "${src1}"#_.BroadcastStr, + "${src1}"#_.BroadcastStr, (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>, EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded]>; @@ -10751,32 +10847,14 @@ defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle> let Predicates = [HasVLX] in { def : Pat<(v2f64 (X86VBroadcast f64:$src)), (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; -def : Pat<(v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))), - (VMOVDDUPZ128rm addr:$src)>; -def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))), - (VMOVDDUPZ128rm addr:$src)>; -def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), - (v2f64 VR128X:$src0)), +def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), + (v2f64 VR128X:$src0)), (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; -def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), - immAllZerosV), +def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), + immAllZerosV), (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; - -def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcastld64 addr:$src)), - (v2f64 VR128X:$src0)), - (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; -def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcastld64 addr:$src)), - immAllZerosV), - (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>; - -def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))), - (v2f64 VR128X:$src0)), - (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; -def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))), - immAllZerosV), - (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>; } //===----------------------------------------------------------------------===// @@ -10784,9 +10862,9 @@ def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load //===----------------------------------------------------------------------===// let Uses = []<Register>, mayRaiseFPException = 0 in { -defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512, +defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512, SchedWriteFShuffleSizes, 0, 1>; -defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512, +defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512, SchedWriteFShuffleSizes>; } @@ -10945,16 +11023,15 @@ defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, // AVX-512 - Byte shift Left/Right //===----------------------------------------------------------------------===// -// FIXME: The SSE/AVX names are PSLLDQri etc. - should we add the i here as well? multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr, Format MRMm, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _>{ - def rr : AVX512<opc, MRMr, + def ri : AVX512<opc, MRMr, (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>, Sched<[sched]>; - def rm : AVX512<opc, MRMm, + def mi : AVX512<opc, MRMm, (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set _.RC:$dst,(_.VT (OpNode @@ -11106,8 +11183,8 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4), - OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2", - "$src2, ${src3}"##_.BroadcastStr##", $src4", + OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2", + "$src2, ${src3}"#_.BroadcastStr#", $src4", (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), (_.VT (_.BroadcastLdFrag addr:$src3)), @@ -11117,12 +11194,12 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode, }// Constraints = "$src1 = $dst" // Additional patterns for matching passthru operand in other positions. - def : Pat<(_.VT (vselect _.KRCWM:$mask, + def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), _.RC:$src1)), (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>; - def : Pat<(_.VT (vselect _.KRCWM:$mask, + def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)), _.RC:$src1)), (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, @@ -11141,13 +11218,13 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode, // Additional patterns for matching zero masking with loads in other // positions. - def : Pat<(_.VT (vselect _.KRCWM:$mask, + def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, (OpNode (bitconvert (_.LdFrag addr:$src3)), _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), _.ImmAllZerosV)), (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; - def : Pat<(_.VT (vselect _.KRCWM:$mask, + def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), _.RC:$src2, (i8 timm:$src4)), _.ImmAllZerosV)), @@ -11156,31 +11233,31 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode, // Additional patterns for matching masked loads with different // operand orders. - def : Pat<(_.VT (vselect _.KRCWM:$mask, + def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), _.RC:$src2, (i8 timm:$src4)), _.RC:$src1)), (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; - def : Pat<(_.VT (vselect _.KRCWM:$mask, + def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, (OpNode (bitconvert (_.LdFrag addr:$src3)), _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), _.RC:$src1)), (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; - def : Pat<(_.VT (vselect _.KRCWM:$mask, + def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, (OpNode _.RC:$src2, _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)), _.RC:$src1)), (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; - def : Pat<(_.VT (vselect _.KRCWM:$mask, + def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)), _.RC:$src1, (i8 timm:$src4)), _.RC:$src1)), (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; - def : Pat<(_.VT (vselect _.KRCWM:$mask, + def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, (OpNode (bitconvert (_.LdFrag addr:$src3)), _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), _.RC:$src1)), @@ -11200,14 +11277,14 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode, // Additional patterns for matching zero masking with broadcasts in other // positions. - def : Pat<(_.VT (vselect _.KRCWM:$mask, + def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, (OpNode (_.BroadcastLdFrag addr:$src3), _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), _.ImmAllZerosV)), (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; - def : Pat<(_.VT (vselect _.KRCWM:$mask, + def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3), _.RC:$src2, (i8 timm:$src4)), @@ -11218,32 +11295,32 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode, // Additional patterns for matching masked broadcasts with different // operand orders. - def : Pat<(_.VT (vselect _.KRCWM:$mask, + def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3), _.RC:$src2, (i8 timm:$src4)), _.RC:$src1)), (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; - def : Pat<(_.VT (vselect _.KRCWM:$mask, + def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, (OpNode (_.BroadcastLdFrag addr:$src3), _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), _.RC:$src1)), (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; - def : Pat<(_.VT (vselect _.KRCWM:$mask, + def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, (OpNode _.RC:$src2, _.RC:$src1, (_.BroadcastLdFrag addr:$src3), (i8 timm:$src4)), _.RC:$src1)), (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; - def : Pat<(_.VT (vselect _.KRCWM:$mask, + def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, (OpNode _.RC:$src2, (_.BroadcastLdFrag addr:$src3), _.RC:$src1, (i8 timm:$src4)), _.RC:$src1)), (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; - def : Pat<(_.VT (vselect _.KRCWM:$mask, + def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, (OpNode (_.BroadcastLdFrag addr:$src3), _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), _.RC:$src1)), @@ -11288,6 +11365,36 @@ let Predicates = [HasVLX] in { (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; + def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, + (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v16i8 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))), + VR128X:$src2, VR128X:$src1, (i8 timm:$src4))), + (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v16i8 (X86vpternlog VR128X:$src1, + (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))), + VR128X:$src2, (i8 timm:$src4))), + (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, + (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v16i8 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))), + VR128X:$src2, VR128X:$src1, (i8 timm:$src4))), + (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v16i8 (X86vpternlog VR128X:$src1, + (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))), + VR128X:$src2, (i8 timm:$src4))), + (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 timm:$src4))), (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, @@ -11305,6 +11412,66 @@ let Predicates = [HasVLX] in { (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; + def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, + (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v8i16 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))), + VR128X:$src2, VR128X:$src1, (i8 timm:$src4))), + (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v8i16 (X86vpternlog VR128X:$src1, + (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))), + VR128X:$src2, (i8 timm:$src4))), + (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, + (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v8i16 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))), + VR128X:$src2, VR128X:$src1, (i8 timm:$src4))), + (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v8i16 (X86vpternlog VR128X:$src1, + (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))), + VR128X:$src2, (i8 timm:$src4))), + (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v4i32 (X86vpternlog VR128X:$src1, VR128X:$src2, + (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v4i32 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))), + VR128X:$src2, VR128X:$src1, (i8 timm:$src4))), + (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v4i32 (X86vpternlog VR128X:$src1, + (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))), + VR128X:$src2, (i8 timm:$src4))), + (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v2i64 (X86vpternlog VR128X:$src1, VR128X:$src2, + (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v2i64 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))), + VR128X:$src2, VR128X:$src1, (i8 timm:$src4))), + (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v2i64 (X86vpternlog VR128X:$src1, + (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))), + VR128X:$src2, (i8 timm:$src4))), + (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 timm:$src4))), (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, @@ -11322,6 +11489,36 @@ let Predicates = [HasVLX] in { (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; + def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, + (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v32i8 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))), + VR256X:$src2, VR256X:$src1, (i8 timm:$src4))), + (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v32i8 (X86vpternlog VR256X:$src1, + (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))), + VR256X:$src2, (i8 timm:$src4))), + (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, + (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v32i8 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))), + VR256X:$src2, VR256X:$src1, (i8 timm:$src4))), + (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v32i8 (X86vpternlog VR256X:$src1, + (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))), + VR256X:$src2, (i8 timm:$src4))), + (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 timm:$src4))), (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, @@ -11338,6 +11535,66 @@ let Predicates = [HasVLX] in { VR256X:$src2, (i8 timm:$src4))), (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, + (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v16i16 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))), + VR256X:$src2, VR256X:$src1, (i8 timm:$src4))), + (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v16i16 (X86vpternlog VR256X:$src1, + (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))), + VR256X:$src2, (i8 timm:$src4))), + (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, + (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v16i16 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))), + VR256X:$src2, VR256X:$src1, (i8 timm:$src4))), + (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v16i16 (X86vpternlog VR256X:$src1, + (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))), + VR256X:$src2, (i8 timm:$src4))), + (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v8i32 (X86vpternlog VR256X:$src1, VR256X:$src2, + (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v8i32 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))), + VR256X:$src2, VR256X:$src1, (i8 timm:$src4))), + (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v8i32 (X86vpternlog VR256X:$src1, + (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))), + VR256X:$src2, (i8 timm:$src4))), + (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v4i64 (X86vpternlog VR256X:$src1, VR256X:$src2, + (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v4i64 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))), + VR256X:$src2, VR256X:$src1, (i8 timm:$src4))), + (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v4i64 (X86vpternlog VR256X:$src1, + (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))), + VR256X:$src2, (i8 timm:$src4))), + (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; } let Predicates = [HasAVX512] in { @@ -11358,6 +11615,36 @@ let Predicates = [HasAVX512] in { (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; + def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, + (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v64i8 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), + VR512:$src2, VR512:$src1, (i8 timm:$src4))), + (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v64i8 (X86vpternlog VR512:$src1, + (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), + VR512:$src2, (i8 timm:$src4))), + (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, + (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v64i8 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))), + VR512:$src2, VR512:$src1, (i8 timm:$src4))), + (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v64i8 (X86vpternlog VR512:$src1, + (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))), + VR512:$src2, (i8 timm:$src4))), + (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3, (i8 timm:$src4))), (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, @@ -11371,9 +11658,84 @@ let Predicates = [HasAVX512] in { (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; def : Pat<(v32i16 (X86vpternlog VR512:$src1, (loadv32i16 addr:$src3), - VR512:$src2, (i8 timm:$src4))), + VR512:$src2, (i8 timm:$src4))), (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, + (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v32i16 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), + VR512:$src2, VR512:$src1, (i8 timm:$src4))), + (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v32i16 (X86vpternlog VR512:$src1, + (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), + VR512:$src2, (i8 timm:$src4))), + (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, + (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v32i16 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))), + VR512:$src2, VR512:$src1, (i8 timm:$src4))), + (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v32i16 (X86vpternlog VR512:$src1, + (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))), + VR512:$src2, (i8 timm:$src4))), + (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, + (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v32i16 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), + VR512:$src2, VR512:$src1, (i8 timm:$src4))), + (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v32i16 (X86vpternlog VR512:$src1, + (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), + VR512:$src2, (i8 timm:$src4))), + (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v16i32 (X86vpternlog VR512:$src1, VR512:$src2, + (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v16i32 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))), + VR512:$src2, VR512:$src1, (i8 timm:$src4))), + (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v16i32 (X86vpternlog VR512:$src1, + (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))), + VR512:$src2, (i8 timm:$src4))), + (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v8i64 (X86vpternlog VR512:$src1, VR512:$src2, + (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), + (i8 timm:$src4))), + (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v8i64 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), + VR512:$src2, VR512:$src1, (i8 timm:$src4))), + (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v8i64 (X86vpternlog VR512:$src1, + (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))), + VR512:$src2, (i8 timm:$src4))), + (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; } // Patterns to implement vnot using vpternlog instead of creating all ones @@ -11484,14 +11846,14 @@ multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, Uses = [MXCSR], mayRaiseFPException = 1 in { defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), - OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", + OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", (X86VFixupimm (_.VT _.RC:$src1), (_.VT _.RC:$src2), (TblVT.VT _.RC:$src3), (i32 timm:$src4))>, Sched<[sched]>; defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4), - OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", + OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", (X86VFixupimm (_.VT _.RC:$src1), (_.VT _.RC:$src2), (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))), @@ -11499,8 +11861,8 @@ multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), - OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2", - "$src2, ${src3}"##_.BroadcastStr##", $src4", + OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2", + "$src2, ${src3}"#_.BroadcastStr#", $src4", (X86VFixupimm (_.VT _.RC:$src1), (_.VT _.RC:$src2), (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)), @@ -11516,7 +11878,7 @@ multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr, let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in { defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), - OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2", + OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2", "$src2, $src3, {sae}, $src4", (X86VFixupimmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), @@ -11533,7 +11895,7 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, ExeDomain = _.ExeDomain in { defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), - OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", + OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", (X86VFixupimms (_.VT _.RC:$src1), (_.VT _.RC:$src2), (_src3VT.VT _src3VT.RC:$src3), @@ -11541,7 +11903,7 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, let Uses = [MXCSR] in defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), - OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2", + OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2", "$src2, $src3, {sae}, $src4", (X86VFixupimmSAEs (_.VT _.RC:$src1), (_.VT _.RC:$src2), @@ -11550,7 +11912,7 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), - OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", + OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", (X86VFixupimms (_.VT _.RC:$src1), (_.VT _.RC:$src2), (_src3VT.VT (scalar_to_vector @@ -11630,8 +11992,9 @@ defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info, // TODO: Some canonicalization in lowering would simplify the number of // patterns we have to try to match. -multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode MoveNode, - X86VectorVTInfo _, PatLeaf ZeroFP> { +multiclass AVX512_scalar_math_fp_patterns<SDNode Op, SDNode MaskedOp, + string OpcPrefix, SDNode MoveNode, + X86VectorVTInfo _, PatLeaf ZeroFP> { let Predicates = [HasAVX512] in { // extracted scalar math op with insert via movss def : Pat<(MoveNode @@ -11639,79 +12002,79 @@ multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode Mo (_.VT (scalar_to_vector (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), _.FRC:$src)))), - (!cast<Instruction>("V"#OpcPrefix#Zrr_Int) _.VT:$dst, + (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst, (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>; def : Pat<(MoveNode (_.VT VR128X:$dst), (_.VT (scalar_to_vector (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), (_.ScalarLdFrag addr:$src))))), - (!cast<Instruction>("V"#OpcPrefix#Zrm_Int) _.VT:$dst, addr:$src)>; + (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>; // extracted masked scalar math op with insert via movss def : Pat<(MoveNode (_.VT VR128X:$src1), (scalar_to_vector - (X86selects VK1WM:$mask, - (Op (_.EltVT - (extractelt (_.VT VR128X:$src1), (iPTR 0))), - _.FRC:$src2), + (X86selects_mask VK1WM:$mask, + (MaskedOp (_.EltVT + (extractelt (_.VT VR128X:$src1), (iPTR 0))), + _.FRC:$src2), _.FRC:$src0))), - (!cast<Instruction>("V"#OpcPrefix#Zrr_Intk) + (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk") (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), VK1WM:$mask, _.VT:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; def : Pat<(MoveNode (_.VT VR128X:$src1), (scalar_to_vector - (X86selects VK1WM:$mask, - (Op (_.EltVT - (extractelt (_.VT VR128X:$src1), (iPTR 0))), - (_.ScalarLdFrag addr:$src2)), + (X86selects_mask VK1WM:$mask, + (MaskedOp (_.EltVT + (extractelt (_.VT VR128X:$src1), (iPTR 0))), + (_.ScalarLdFrag addr:$src2)), _.FRC:$src0))), - (!cast<Instruction>("V"#OpcPrefix#Zrm_Intk) + (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk") (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), VK1WM:$mask, _.VT:$src1, addr:$src2)>; // extracted masked scalar math op with insert via movss def : Pat<(MoveNode (_.VT VR128X:$src1), (scalar_to_vector - (X86selects VK1WM:$mask, - (Op (_.EltVT - (extractelt (_.VT VR128X:$src1), (iPTR 0))), - _.FRC:$src2), (_.EltVT ZeroFP)))), - (!cast<I>("V"#OpcPrefix#Zrr_Intkz) + (X86selects_mask VK1WM:$mask, + (MaskedOp (_.EltVT + (extractelt (_.VT VR128X:$src1), (iPTR 0))), + _.FRC:$src2), (_.EltVT ZeroFP)))), + (!cast<I>("V"#OpcPrefix#"Zrr_Intkz") VK1WM:$mask, _.VT:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; def : Pat<(MoveNode (_.VT VR128X:$src1), (scalar_to_vector - (X86selects VK1WM:$mask, - (Op (_.EltVT - (extractelt (_.VT VR128X:$src1), (iPTR 0))), - (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))), - (!cast<I>("V"#OpcPrefix#Zrm_Intkz) VK1WM:$mask, _.VT:$src1, addr:$src2)>; + (X86selects_mask VK1WM:$mask, + (MaskedOp (_.EltVT + (extractelt (_.VT VR128X:$src1), (iPTR 0))), + (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))), + (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>; } } -defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>; -defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>; -defm : AVX512_scalar_math_fp_patterns<fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>; -defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>; +defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>; +defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>; +defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>; +defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>; -defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>; -defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>; -defm : AVX512_scalar_math_fp_patterns<fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>; -defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>; +defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>; +defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>; +defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>; +defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>; multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix, SDNode Move, X86VectorVTInfo _> { let Predicates = [HasAVX512] in { def : Pat<(_.VT (Move _.VT:$dst, (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))), - (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src)>; + (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>; } } -defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32x_info>; -defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64x_info>; +defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>; +defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>; //===----------------------------------------------------------------------===// // AES instructions @@ -11724,13 +12087,13 @@ multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> { loadv2i64, 0, VR128X, i128mem>, EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG; defm Z256 : AESI_binop_rm_int<Op, OpStr, - !cast<Intrinsic>(IntPrefix##"_256"), + !cast<Intrinsic>(IntPrefix#"_256"), loadv4i64, 0, VR256X, i256mem>, EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG; } let Predicates = [HasAVX512, HasVAES] in defm Z : AESI_binop_rm_int<Op, OpStr, - !cast<Intrinsic>(IntPrefix##"_512"), + !cast<Intrinsic>(IntPrefix#"_512"), loadv8i64, 0, VR512, i512mem>, EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG; } @@ -11792,8 +12155,8 @@ multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode, ExeDomain = VTI.ExeDomain in defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr, - "${src3}"##VTI.BroadcastStr##", $src2", - "$src2, ${src3}"##VTI.BroadcastStr, + "${src3}"#VTI.BroadcastStr#", $src2", + "$src2, ${src3}"#VTI.BroadcastStr, (OpNode VTI.RC:$src1, VTI.RC:$src2, (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, AVX512FMA3Base, EVEX_B, @@ -11827,22 +12190,22 @@ multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode, } multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix, SDNode OpNode, X86SchedWriteWidths sched> { - defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, sched, + defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched, avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>; - defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, sched, + defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; - defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, sched, + defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; } multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix, SDNode OpNode, X86SchedWriteWidths sched> { - defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", sched, + defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched, avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>, VEX_W, EVEX_CD8<16, CD8VF>; - defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp, + defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp, OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; - defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode, + defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; } @@ -11890,8 +12253,8 @@ multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode, Sched<[sched.Folded, sched.ReadAfterFold]>; defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), - OpStr, "${src3}"##VTI.BroadcastStr##", $src2", - "$src2, ${src3}"##VTI.BroadcastStr, + OpStr, "${src3}"#VTI.BroadcastStr#", $src2", + "$src2, ${src3}"#VTI.BroadcastStr, (OpNode VTI.RC:$src1, VTI.RC:$src2, (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B, @@ -12027,8 +12390,8 @@ multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode, let ExeDomain = VTI.ExeDomain in defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3), - OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1", - "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3", + OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1", + "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3", (OpNode (VTI.VT VTI.RC:$src1), (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))), (i8 timm:$src3))>, EVEX_B, @@ -12184,41 +12547,44 @@ multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr, } } +let ExeDomain = SSEPackedSingle in defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16", - SchedWriteCvtPD2PS, //FIXME: Shoulod be SchedWriteCvtPS2BF + SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF avx512vl_f32_info, avx512vl_i16_info, X86cvtne2ps2bf16, HasBF16, 0>, T8XD; // Truncate Float to BFloat16 multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> { + let ExeDomain = SSEPackedSingle in { let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in { defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info, - X86cvtneps2bf16, sched.ZMM>, EVEX_V512; + X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512; } let Predicates = [HasBF16, HasVLX] in { let Uses = []<Register>, mayRaiseFPException = 0 in { defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info, - null_frag, sched.XMM, "{1to4}", "{x}", f128mem, + null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem, VK4WM>, EVEX_V128; defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info, - X86cvtneps2bf16, + X86cvtneps2bf16, X86cvtneps2bf16, sched.YMM, "{1to8}", "{y}">, EVEX_V256; } + } // Predicates = [HasBF16, HasVLX] + } // ExeDomain = SSEPackedSingle - def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", - (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, - VR128X:$src), 0>; - def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", - (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, - f128mem:$src), 0, "intel">; - def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", - (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, - VR256X:$src), 0>; - def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}", - (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, - f256mem:$src), 0, "intel">; - } + def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", + (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, + VR128X:$src), 0>; + def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", + (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, + f128mem:$src), 0, "intel">; + def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", + (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, + VR256X:$src), 0>; + def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", + (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, + f256mem:$src), 0, "intel">; } defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16", @@ -12262,25 +12628,24 @@ multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _, X86VectorVTInfo src_v> { defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), - (ins _.RC:$src2, _.RC:$src3), + (ins src_v.RC:$src2, src_v.RC:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", - (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>, + (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>, EVEX_4V, Sched<[sched]>; defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), - (ins _.RC:$src2, _.MemOp:$src3), + (ins src_v.RC:$src2, src_v.MemOp:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", - (_.VT (OpNode _.RC:$src1, _.RC:$src2, - (src_v.VT (bitconvert - (src_v.LdFrag addr:$src3)))))>, EVEX_4V, + (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, + (src_v.LdFrag addr:$src3)))>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), - (ins _.RC:$src2, _.ScalarMemOp:$src3), + (ins src_v.RC:$src2, src_v.ScalarMemOp:$src3), OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr), - (_.VT (OpNode _.RC:$src1, _.RC:$src2, + (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>, EVEX_B, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; @@ -12302,6 +12667,7 @@ multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, } } +let ExeDomain = SSEPackedSingle in defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA, avx512vl_f32_info, avx512vl_i32_info, HasBF16>, T8XS, EVEX_CD8<32, CD8VF>; |