aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/X86/X86InstrAVX512.td
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86InstrAVX512.td')
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td2614
1 files changed, 1490 insertions, 1124 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 32f012033fb0..a3ad0b1c8dd6 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -76,11 +76,11 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName);
- ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
- !cast<ComplexPattern>("sse_load_f32"),
- !if (!eq (EltTypeName, "f64"),
- !cast<ComplexPattern>("sse_load_f64"),
- ?));
+ PatFrags ScalarIntMemFrags = !if (!eq (EltTypeName, "f32"),
+ !cast<PatFrags>("sse_load_f32"),
+ !if (!eq (EltTypeName, "f64"),
+ !cast<PatFrags>("sse_load_f64"),
+ ?));
// The string to specify embedded broadcast in assembly.
string BroadcastStr = "{1to" # NumElts # "}";
@@ -169,6 +169,18 @@ def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
+// Used for matching masked operations. Ensures the operation part only has a
+// single use.
+def vselect_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
+ (vselect node:$mask, node:$src1, node:$src2), [{
+ return isProfitableToFormMaskedOp(N);
+}]>;
+
+def X86selects_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
+ (X86selects node:$mask, node:$src1, node:$src2), [{
+ return isProfitableToFormMaskedOp(N);
+}]>;
+
// This multiclass generates the masking variants from the non-masking
// variant. It only provides the assembly pieces for the masking variants.
// It assumes custom ISel patterns for masking which can be provided as
@@ -220,7 +232,7 @@ multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
dag RHS, dag MaskingRHS,
- SDNode Select = vselect,
+ SDPatternOperator Select = vselect_mask,
string MaskingConstraint = "",
bit IsCommutable = 0,
bit IsKCommutable = 0,
@@ -236,35 +248,36 @@ multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
// This multiclass generates the unconditional/non-masking, the masking and
// the zero-masking variant of the vector instruction. In the masking case, the
-// perserved vector elements come from a new dummy input operand tied to $dst.
+// preserved vector elements come from a new dummy input operand tied to $dst.
// This version uses a separate dag for non-masking and masking.
multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag Ins, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
dag RHS, dag MaskRHS,
bit IsCommutable = 0, bit IsKCommutable = 0,
- SDNode Select = vselect> :
+ bit IsKZCommutable = IsCommutable> :
AVX512_maskable_custom<O, F, Outs, Ins,
!con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
!con((ins _.KRCWM:$mask), Ins),
OpcodeStr, AttSrcAsm, IntelSrcAsm,
[(set _.RC:$dst, RHS)],
[(set _.RC:$dst,
- (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
+ (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
[(set _.RC:$dst,
- (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
- "$src0 = $dst", IsCommutable, IsKCommutable>;
+ (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
+ "$src0 = $dst", IsCommutable, IsKCommutable,
+ IsKZCommutable>;
// This multiclass generates the unconditional/non-masking, the masking and
// the zero-masking variant of the vector instruction. In the masking case, the
-// perserved vector elements come from a new dummy input operand tied to $dst.
+// preserved vector elements come from a new dummy input operand tied to $dst.
multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag Ins, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
dag RHS,
bit IsCommutable = 0, bit IsKCommutable = 0,
bit IsKZCommutable = IsCommutable,
- SDNode Select = vselect> :
+ SDPatternOperator Select = vselect_mask> :
AVX512_maskable_common<O, F, _, Outs, Ins,
!con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
!con((ins _.KRCWM:$mask), Ins),
@@ -280,7 +293,7 @@ multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
string AttSrcAsm, string IntelSrcAsm,
dag RHS> :
AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
- RHS, 0, 0, 0, X86selects>;
+ RHS, 0, 0, 0, X86selects_mask>;
// Similar to AVX512_maskable but in this case one of the source operands
// ($src1) is already tied to $dst so we just use that for the preserved
@@ -292,7 +305,7 @@ multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
dag RHS,
bit IsCommutable = 0,
bit IsKCommutable = 0,
- SDNode Select = vselect,
+ SDPatternOperator Select = vselect_mask,
bit MaskOnly = 0> :
AVX512_maskable_common<O, F, _, Outs,
!con((ins _.RC:$src1), NonTiedIns),
@@ -317,9 +330,9 @@ multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
!con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
!con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
- (vselect InVT.KRCWM:$mask, RHS,
+ (vselect_mask InVT.KRCWM:$mask, RHS,
(bitconvert InVT.RC:$src1)),
- vselect, "", IsCommutable>;
+ vselect_mask, "", IsCommutable>;
multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag NonTiedIns, string OpcodeStr,
@@ -330,7 +343,7 @@ multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
bit MaskOnly = 0> :
AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
- X86selects, MaskOnly>;
+ X86selects_mask, MaskOnly>;
multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag Ins,
@@ -399,6 +412,36 @@ multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
(and _.KRCWM:$mask, RHS_su), IsCommutable>;
+// Used by conversion instructions.
+multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _,
+ dag Outs,
+ dag Ins, dag MaskingIns, dag ZeroMaskingIns,
+ string OpcodeStr,
+ string AttSrcAsm, string IntelSrcAsm,
+ dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> :
+ AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
+ AttSrcAsm, IntelSrcAsm,
+ [(set _.RC:$dst, RHS)],
+ [(set _.RC:$dst, MaskingRHS)],
+ [(set _.RC:$dst, ZeroMaskingRHS)],
+ "$src0 = $dst">;
+
+multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _,
+ dag Outs, dag NonTiedIns, string OpcodeStr,
+ string AttSrcAsm, string IntelSrcAsm,
+ dag RHS, dag MaskingRHS, bit IsCommutable,
+ bit IsKCommutable> :
+ AVX512_maskable_custom<O, F, Outs,
+ !con((ins _.RC:$src1), NonTiedIns),
+ !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
+ !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
+ OpcodeStr, AttSrcAsm, IntelSrcAsm,
+ [(set _.RC:$dst, RHS)],
+ [(set _.RC:$dst,
+ (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))],
+ [(set _.RC:$dst,
+ (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))],
+ "", IsCommutable, IsKCommutable>;
// Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
@@ -625,45 +668,45 @@ multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
list<Predicate> p> {
let Predicates = p in {
def : Pat<(Cast.VT
- (vselect Cast.KRCWM:$mask,
- (bitconvert
- (vinsert_insert:$ins (To.VT To.RC:$src1),
- (From.VT From.RC:$src2),
- (iPTR imm))),
- Cast.RC:$src0)),
+ (vselect_mask Cast.KRCWM:$mask,
+ (bitconvert
+ (vinsert_insert:$ins (To.VT To.RC:$src1),
+ (From.VT From.RC:$src2),
+ (iPTR imm))),
+ Cast.RC:$src0)),
(!cast<Instruction>(InstrStr#"rrk")
Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
(INSERT_get_vinsert_imm To.RC:$ins))>;
def : Pat<(Cast.VT
- (vselect Cast.KRCWM:$mask,
- (bitconvert
- (vinsert_insert:$ins (To.VT To.RC:$src1),
- (From.VT
- (bitconvert
- (From.LdFrag addr:$src2))),
- (iPTR imm))),
- Cast.RC:$src0)),
+ (vselect_mask Cast.KRCWM:$mask,
+ (bitconvert
+ (vinsert_insert:$ins (To.VT To.RC:$src1),
+ (From.VT
+ (bitconvert
+ (From.LdFrag addr:$src2))),
+ (iPTR imm))),
+ Cast.RC:$src0)),
(!cast<Instruction>(InstrStr#"rmk")
Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
(INSERT_get_vinsert_imm To.RC:$ins))>;
def : Pat<(Cast.VT
- (vselect Cast.KRCWM:$mask,
- (bitconvert
- (vinsert_insert:$ins (To.VT To.RC:$src1),
- (From.VT From.RC:$src2),
- (iPTR imm))),
- Cast.ImmAllZerosV)),
+ (vselect_mask Cast.KRCWM:$mask,
+ (bitconvert
+ (vinsert_insert:$ins (To.VT To.RC:$src1),
+ (From.VT From.RC:$src2),
+ (iPTR imm))),
+ Cast.ImmAllZerosV)),
(!cast<Instruction>(InstrStr#"rrkz")
Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
(INSERT_get_vinsert_imm To.RC:$ins))>;
def : Pat<(Cast.VT
- (vselect Cast.KRCWM:$mask,
- (bitconvert
- (vinsert_insert:$ins (To.VT To.RC:$src1),
- (From.VT (From.LdFrag addr:$src2)),
- (iPTR imm))),
- Cast.ImmAllZerosV)),
+ (vselect_mask Cast.KRCWM:$mask,
+ (bitconvert
+ (vinsert_insert:$ins (To.VT To.RC:$src1),
+ (From.VT (From.LdFrag addr:$src2)),
+ (iPTR imm))),
+ Cast.ImmAllZerosV)),
(!cast<Instruction>(InstrStr#"rmkz")
Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
(INSERT_get_vinsert_imm To.RC:$ins))>;
@@ -981,20 +1024,20 @@ multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
SDNodeXForm EXTRACT_get_vextract_imm,
list<Predicate> p> {
let Predicates = p in {
- def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
- (bitconvert
- (To.VT (vextract_extract:$ext
- (From.VT From.RC:$src), (iPTR imm)))),
- To.RC:$src0)),
+ def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
+ (bitconvert
+ (To.VT (vextract_extract:$ext
+ (From.VT From.RC:$src), (iPTR imm)))),
+ To.RC:$src0)),
(Cast.VT (!cast<Instruction>(InstrStr#"rrk")
Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
(EXTRACT_get_vextract_imm To.RC:$ext)))>;
- def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
- (bitconvert
- (To.VT (vextract_extract:$ext
- (From.VT From.RC:$src), (iPTR imm)))),
- Cast.ImmAllZerosV)),
+ def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
+ (bitconvert
+ (To.VT (vextract_extract:$ext
+ (From.VT From.RC:$src), (iPTR imm)))),
+ Cast.ImmAllZerosV)),
(Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
Cast.KRCWM:$mask, From.RC:$src,
(EXTRACT_get_vextract_imm To.RC:$ext)))>;
@@ -1101,18 +1144,18 @@ multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
string Name,
X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
- (!cast<Instruction>(Name#DestInfo.ZSuffix#r)
+ (!cast<Instruction>(Name#DestInfo.ZSuffix#rr)
(SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
- def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
- (X86VBroadcast SrcInfo.FRC:$src),
- DestInfo.RC:$src0)),
- (!cast<Instruction>(Name#DestInfo.ZSuffix#rk)
+ def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
+ (X86VBroadcast SrcInfo.FRC:$src),
+ DestInfo.RC:$src0)),
+ (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk)
DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
(SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
- def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
- (X86VBroadcast SrcInfo.FRC:$src),
- DestInfo.ImmAllZerosV)),
- (!cast<Instruction>(Name#DestInfo.ZSuffix#rkz)
+ def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
+ (X86VBroadcast SrcInfo.FRC:$src),
+ DestInfo.ImmAllZerosV)),
+ (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz)
DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
}
@@ -1128,83 +1171,83 @@ multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
SDPatternOperator UnmaskedOp = X86VBroadcast,
SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> {
let hasSideEffects = 0 in
- def r : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set MaskInfo.RC:$dst,
- (MaskInfo.VT
- (bitconvert
- (DestInfo.VT
- (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
- DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>;
- def rkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
- (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
- !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
- "${dst} {${mask}} {z}, $src}"),
- [(set MaskInfo.RC:$dst,
- (vselect MaskInfo.KRCWM:$mask,
- (MaskInfo.VT
- (bitconvert
- (DestInfo.VT
- (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
- MaskInfo.ImmAllZerosV))],
- DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
- let Constraints = "$src0 = $dst" in
- def rk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
- (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
- SrcInfo.RC:$src),
- !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
- "${dst} {${mask}}, $src}"),
+ def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set MaskInfo.RC:$dst,
- (vselect MaskInfo.KRCWM:$mask,
- (MaskInfo.VT
- (bitconvert
- (DestInfo.VT
- (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
- MaskInfo.RC:$src0))],
- DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>;
+ (MaskInfo.VT
+ (bitconvert
+ (DestInfo.VT
+ (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
+ DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>;
+ def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
+ (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
+ !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
+ "${dst} {${mask}} {z}, $src}"),
+ [(set MaskInfo.RC:$dst,
+ (vselect_mask MaskInfo.KRCWM:$mask,
+ (MaskInfo.VT
+ (bitconvert
+ (DestInfo.VT
+ (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
+ MaskInfo.ImmAllZerosV))],
+ DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
+ let Constraints = "$src0 = $dst" in
+ def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
+ (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
+ SrcInfo.RC:$src),
+ !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
+ "${dst} {${mask}}, $src}"),
+ [(set MaskInfo.RC:$dst,
+ (vselect_mask MaskInfo.KRCWM:$mask,
+ (MaskInfo.VT
+ (bitconvert
+ (DestInfo.VT
+ (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
+ MaskInfo.RC:$src0))],
+ DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>;
let hasSideEffects = 0, mayLoad = 1 in
- def m : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
- (ins SrcInfo.ScalarMemOp:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set MaskInfo.RC:$dst,
- (MaskInfo.VT
- (bitconvert
- (DestInfo.VT
- (UnmaskedBcastOp addr:$src)))))],
- DestInfo.ExeDomain>, T8PD, EVEX,
- EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
-
- def mkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
- (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
- !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
- "${dst} {${mask}} {z}, $src}"),
- [(set MaskInfo.RC:$dst,
- (vselect MaskInfo.KRCWM:$mask,
- (MaskInfo.VT
- (bitconvert
- (DestInfo.VT
- (SrcInfo.BroadcastLdFrag addr:$src)))),
- MaskInfo.ImmAllZerosV))],
- DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ,
- EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
+ def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
+ (ins SrcInfo.ScalarMemOp:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set MaskInfo.RC:$dst,
+ (MaskInfo.VT
+ (bitconvert
+ (DestInfo.VT
+ (UnmaskedBcastOp addr:$src)))))],
+ DestInfo.ExeDomain>, T8PD, EVEX,
+ EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
+
+ def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
+ (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
+ !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
+ "${dst} {${mask}} {z}, $src}"),
+ [(set MaskInfo.RC:$dst,
+ (vselect_mask MaskInfo.KRCWM:$mask,
+ (MaskInfo.VT
+ (bitconvert
+ (DestInfo.VT
+ (SrcInfo.BroadcastLdFrag addr:$src)))),
+ MaskInfo.ImmAllZerosV))],
+ DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ,
+ EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
let Constraints = "$src0 = $dst",
isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
- def mk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
- (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
- SrcInfo.ScalarMemOp:$src),
- !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
- "${dst} {${mask}}, $src}"),
- [(set MaskInfo.RC:$dst,
- (vselect MaskInfo.KRCWM:$mask,
- (MaskInfo.VT
- (bitconvert
- (DestInfo.VT
- (SrcInfo.BroadcastLdFrag addr:$src)))),
- MaskInfo.RC:$src0))],
- DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K,
- EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
+ def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
+ (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
+ SrcInfo.ScalarMemOp:$src),
+ !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
+ "${dst} {${mask}}, $src}"),
+ [(set MaskInfo.RC:$dst,
+ (vselect_mask MaskInfo.KRCWM:$mask,
+ (MaskInfo.VT
+ (bitconvert
+ (DestInfo.VT
+ (SrcInfo.BroadcastLdFrag addr:$src)))),
+ MaskInfo.RC:$src0))],
+ DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K,
+ EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
}
// Helper class to force mask and broadcast result to same type.
@@ -1267,35 +1310,38 @@ defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
X86VectorVTInfo _, SDPatternOperator OpNode,
RegisterClass SrcRC> {
+ // Fold with a mask even if it has multiple uses since it is cheap.
let ExeDomain = _.ExeDomain in
- defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins SrcRC:$src),
- "vpbroadcast"##_.Suffix, "$src", "$src",
- (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX,
- Sched<[SchedRR]>;
+ defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins SrcRC:$src),
+ "vpbroadcast"#_.Suffix, "$src", "$src",
+ (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0,
+ /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>,
+ T8PD, EVEX, Sched<[SchedRR]>;
}
multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
X86VectorVTInfo _, SDPatternOperator OpNode,
RegisterClass SrcRC, SubRegIndex Subreg> {
let hasSideEffects = 0, ExeDomain = _.ExeDomain in
- defm r : AVX512_maskable_custom<opc, MRMSrcReg,
- (outs _.RC:$dst), (ins GR32:$src),
- !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
- !con((ins _.KRCWM:$mask), (ins GR32:$src)),
- "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [],
- "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
+ defm rr : AVX512_maskable_custom<opc, MRMSrcReg,
+ (outs _.RC:$dst), (ins GR32:$src),
+ !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
+ !con((ins _.KRCWM:$mask), (ins GR32:$src)),
+ "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [],
+ "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
def : Pat <(_.VT (OpNode SrcRC:$src)),
- (!cast<Instruction>(Name#r)
+ (!cast<Instruction>(Name#rr)
(i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
+ // Fold with a mask even if it has multiple uses since it is cheap.
def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
- (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask,
+ (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask,
(i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
- (!cast<Instruction>(Name#rkz) _.KRCWM:$mask,
+ (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask,
(i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
}
@@ -1392,72 +1438,6 @@ multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
AVX5128IBase, EVEX;
}
-let Predicates = [HasAVX512] in {
- // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
- def : Pat<(v8i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
- (VPBROADCASTQZm addr:$src)>;
-
- // FIXME this is to handle aligned extloads from i8.
- def : Pat<(v16i32 (X86VBroadcast (loadi32 addr:$src))),
- (VPBROADCASTDZm addr:$src)>;
-}
-
-let Predicates = [HasVLX] in {
- // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
- def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
- (VPBROADCASTQZ128m addr:$src)>;
- def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
- (VPBROADCASTQZ256m addr:$src)>;
-
- // FIXME this is to handle aligned extloads from i8.
- def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
- (VPBROADCASTDZ128m addr:$src)>;
- def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
- (VPBROADCASTDZ256m addr:$src)>;
-}
-let Predicates = [HasVLX, HasBWI] in {
- // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
- // This means we'll encounter truncated i32 loads; match that here.
- def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
- (VPBROADCASTWZ128m addr:$src)>;
- def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
- (VPBROADCASTWZ256m addr:$src)>;
- def : Pat<(v8i16 (X86VBroadcast
- (i16 (trunc (i32 (extloadi16 addr:$src)))))),
- (VPBROADCASTWZ128m addr:$src)>;
- def : Pat<(v8i16 (X86VBroadcast
- (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
- (VPBROADCASTWZ128m addr:$src)>;
- def : Pat<(v16i16 (X86VBroadcast
- (i16 (trunc (i32 (extloadi16 addr:$src)))))),
- (VPBROADCASTWZ256m addr:$src)>;
- def : Pat<(v16i16 (X86VBroadcast
- (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
- (VPBROADCASTWZ256m addr:$src)>;
-
- // FIXME this is to handle aligned extloads from i8.
- def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))),
- (VPBROADCASTWZ128m addr:$src)>;
- def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))),
- (VPBROADCASTWZ256m addr:$src)>;
-}
-let Predicates = [HasBWI] in {
- // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
- // This means we'll encounter truncated i32 loads; match that here.
- def : Pat<(v32i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
- (VPBROADCASTWZm addr:$src)>;
- def : Pat<(v32i16 (X86VBroadcast
- (i16 (trunc (i32 (extloadi16 addr:$src)))))),
- (VPBROADCASTWZm addr:$src)>;
- def : Pat<(v32i16 (X86VBroadcast
- (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
- (VPBROADCASTWZm addr:$src)>;
-
- // FIXME this is to handle aligned extloads from i8.
- def : Pat<(v32i16 (X86VBroadcast (loadi16 addr:$src))),
- (VPBROADCASTWZm addr:$src)>;
-}
-
//===----------------------------------------------------------------------===//
// AVX-512 BROADCAST SUBVECTORS
//
@@ -1516,38 +1496,38 @@ def : Pat<(v64i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
(VBROADCASTI32X4rm addr:$src)>;
// Patterns for selects of bitcasted operations.
-def : Pat<(vselect VK16WM:$mask,
- (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
- (v16f32 immAllZerosV)),
+def : Pat<(vselect_mask VK16WM:$mask,
+ (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
+ (v16f32 immAllZerosV)),
(VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
-def : Pat<(vselect VK16WM:$mask,
- (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
- VR512:$src0),
+def : Pat<(vselect_mask VK16WM:$mask,
+ (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
+ VR512:$src0),
(VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
-def : Pat<(vselect VK16WM:$mask,
- (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
- (v16i32 immAllZerosV)),
+def : Pat<(vselect_mask VK16WM:$mask,
+ (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
+ (v16i32 immAllZerosV)),
(VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
-def : Pat<(vselect VK16WM:$mask,
- (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
- VR512:$src0),
+def : Pat<(vselect_mask VK16WM:$mask,
+ (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
+ VR512:$src0),
(VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
-def : Pat<(vselect VK8WM:$mask,
- (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
- (v8f64 immAllZerosV)),
+def : Pat<(vselect_mask VK8WM:$mask,
+ (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
+ (v8f64 immAllZerosV)),
(VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
-def : Pat<(vselect VK8WM:$mask,
- (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
- VR512:$src0),
+def : Pat<(vselect_mask VK8WM:$mask,
+ (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
+ VR512:$src0),
(VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
-def : Pat<(vselect VK8WM:$mask,
- (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
- (v8i64 immAllZerosV)),
+def : Pat<(vselect_mask VK8WM:$mask,
+ (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
+ (v8i64 immAllZerosV)),
(VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
-def : Pat<(vselect VK8WM:$mask,
- (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
- VR512:$src0),
+def : Pat<(vselect_mask VK8WM:$mask,
+ (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
+ VR512:$src0),
(VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
}
@@ -1569,21 +1549,21 @@ def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
(VBROADCASTI32X4Z256rm addr:$src)>;
// Patterns for selects of bitcasted operations.
-def : Pat<(vselect VK8WM:$mask,
- (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
- (v8f32 immAllZerosV)),
+def : Pat<(vselect_mask VK8WM:$mask,
+ (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
+ (v8f32 immAllZerosV)),
(VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
-def : Pat<(vselect VK8WM:$mask,
- (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
- VR256X:$src0),
+def : Pat<(vselect_mask VK8WM:$mask,
+ (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
+ VR256X:$src0),
(VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
-def : Pat<(vselect VK8WM:$mask,
- (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
- (v8i32 immAllZerosV)),
+def : Pat<(vselect_mask VK8WM:$mask,
+ (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
+ (v8i32 immAllZerosV)),
(VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
-def : Pat<(vselect VK8WM:$mask,
- (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
- VR256X:$src0),
+def : Pat<(vselect_mask VK8WM:$mask,
+ (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
+ VR256X:$src0),
(VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
@@ -1618,21 +1598,21 @@ defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2"
EVEX_V256, EVEX_CD8<64, CD8VT2>;
// Patterns for selects of bitcasted operations.
-def : Pat<(vselect VK4WM:$mask,
- (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
- (v4f64 immAllZerosV)),
+def : Pat<(vselect_mask VK4WM:$mask,
+ (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
+ (v4f64 immAllZerosV)),
(VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
-def : Pat<(vselect VK4WM:$mask,
- (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
- VR256X:$src0),
+def : Pat<(vselect_mask VK4WM:$mask,
+ (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
+ VR256X:$src0),
(VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
-def : Pat<(vselect VK4WM:$mask,
- (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
- (v4i64 immAllZerosV)),
+def : Pat<(vselect_mask VK4WM:$mask,
+ (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
+ (v4i64 immAllZerosV)),
(VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
-def : Pat<(vselect VK4WM:$mask,
- (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
- VR256X:$src0),
+def : Pat<(vselect_mask VK4WM:$mask,
+ (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
+ VR256X:$src0),
(VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
}
@@ -1651,38 +1631,38 @@ defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
EVEX_V512, EVEX_CD8<32, CD8VT8>;
// Patterns for selects of bitcasted operations.
-def : Pat<(vselect VK16WM:$mask,
- (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
- (v16f32 immAllZerosV)),
+def : Pat<(vselect_mask VK16WM:$mask,
+ (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
+ (v16f32 immAllZerosV)),
(VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
-def : Pat<(vselect VK16WM:$mask,
- (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
- VR512:$src0),
+def : Pat<(vselect_mask VK16WM:$mask,
+ (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
+ VR512:$src0),
(VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
-def : Pat<(vselect VK16WM:$mask,
- (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
- (v16i32 immAllZerosV)),
+def : Pat<(vselect_mask VK16WM:$mask,
+ (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
+ (v16i32 immAllZerosV)),
(VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
-def : Pat<(vselect VK16WM:$mask,
- (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
- VR512:$src0),
+def : Pat<(vselect_mask VK16WM:$mask,
+ (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
+ VR512:$src0),
(VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
-def : Pat<(vselect VK8WM:$mask,
- (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
- (v8f64 immAllZerosV)),
+def : Pat<(vselect_mask VK8WM:$mask,
+ (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
+ (v8f64 immAllZerosV)),
(VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
-def : Pat<(vselect VK8WM:$mask,
- (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
- VR512:$src0),
+def : Pat<(vselect_mask VK8WM:$mask,
+ (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
+ VR512:$src0),
(VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
-def : Pat<(vselect VK8WM:$mask,
- (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
- (v8i64 immAllZerosV)),
+def : Pat<(vselect_mask VK8WM:$mask,
+ (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
+ (v8i64 immAllZerosV)),
(VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
-def : Pat<(vselect VK8WM:$mask,
- (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
- VR512:$src0),
+def : Pat<(vselect_mask VK8WM:$mask,
+ (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
+ VR512:$src0),
(VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
}
@@ -1836,24 +1816,27 @@ defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
X86VectorVTInfo IdxVT,
X86VectorVTInfo CastVT> {
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (X86VPermt2 (_.VT _.RC:$src2),
- (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), _.RC:$src3),
- (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
+ (X86VPermt2 (_.VT _.RC:$src2),
+ (IdxVT.VT (bitconvert
+ (CastVT.VT _.RC:$src1))),
+ _.RC:$src3),
+ (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
(!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, _.RC:$src3)>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (X86VPermt2 _.RC:$src2,
- (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
- (_.LdFrag addr:$src3)),
- (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
+ (X86VPermt2 _.RC:$src2,
+ (IdxVT.VT (bitconvert
+ (CastVT.VT _.RC:$src1))),
+ (_.LdFrag addr:$src3)),
+ (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
(!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3)>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (X86VPermt2 _.RC:$src2,
- (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
- (_.BroadcastLdFrag addr:$src3)),
- (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
+ (X86VPermt2 _.RC:$src2,
+ (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
+ (_.BroadcastLdFrag addr:$src3)),
+ (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
(!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3)>;
}
@@ -2085,9 +2068,9 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
(ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc",
- (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
+ (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
timm:$cc),
- (OpNode_su (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
+ (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
@@ -2646,13 +2629,13 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
(ins _.RC:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
(i32 timm:$src2)))]>,
Sched<[sched]>;
def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix#
+ OpcodeStr#_.Suffix#
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst,(and _.KRCWM:$mask,
(X86Vfpclasss_su (_.VT _.RC:$src1),
@@ -2660,18 +2643,18 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
EVEX_K, Sched<[sched]>;
def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix##
+ OpcodeStr#_.Suffix#
"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.KRC:$dst,
- (X86Vfpclasss _.ScalarIntMemCPat:$src1,
- (i32 timm:$src2)))]>,
+ (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1),
+ (i32 timm:$src2)))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix##
+ OpcodeStr#_.Suffix#
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst,(and _.KRCWM:$mask,
- (X86Vfpclasss_su _.ScalarIntMemCPat:$src1,
+ (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1),
(i32 timm:$src2))))]>,
EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -2686,13 +2669,13 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
(ins _.RC:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
(i32 timm:$src2)))]>,
Sched<[sched]>;
def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix#
+ OpcodeStr#_.Suffix#
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst,(and _.KRCWM:$mask,
(X86Vfpclass_su (_.VT _.RC:$src1),
@@ -2700,7 +2683,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
EVEX_K, Sched<[sched]>;
def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.MemOp:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix#"{"#mem#"}"#
+ OpcodeStr#_.Suffix#"{"#mem#"}"#
"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.KRC:$dst,(X86Vfpclass
(_.VT (_.LdFrag addr:$src1)),
@@ -2708,7 +2691,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix#"{"#mem#"}"#
+ OpcodeStr#_.Suffix#"{"#mem#"}"#
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
(_.VT (_.LdFrag addr:$src1)),
@@ -2716,18 +2699,18 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.ScalarMemOp:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix##"\t{$src2, ${src1}"##
- _.BroadcastStr##", $dst|$dst, ${src1}"
- ##_.BroadcastStr##", $src2}",
+ OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
+ _.BroadcastStr#", $dst|$dst, ${src1}"
+ #_.BroadcastStr#", $src2}",
[(set _.KRC:$dst,(X86Vfpclass
(_.VT (_.BroadcastLdFrag addr:$src1)),
(i32 timm:$src2)))]>,
EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix##"\t{$src2, ${src1}"##
- _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
- _.BroadcastStr##", $src2}",
+ OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
+ _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"#
+ _.BroadcastStr#", $src2}",
[(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
(_.VT (_.BroadcastLdFrag addr:$src1)),
(i32 timm:$src2))))]>,
@@ -2979,6 +2962,8 @@ def : Pat<(vnot VK4:$src),
(COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
def : Pat<(vnot VK2:$src),
(COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
+def : Pat<(vnot VK1:$src),
+ (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>;
// Mask binary operation
// - KAND, KANDN, KOR, KXNOR, KXOR
@@ -3008,8 +2993,6 @@ multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
}
-def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
-def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
// These nodes use 'vnot' instead of 'not' to support vectors.
def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
@@ -3022,7 +3005,7 @@ defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XM
defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>;
defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
-multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
+multiclass avx512_binop_pat<SDPatternOperator VOpNode,
Instruction Inst> {
// With AVX512F, 8-bit mask is promoted to 16-bit mask,
// for the DQI set, this type is legal and KxxxB instruction is used
@@ -3033,25 +3016,25 @@ multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
(COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
// All types smaller than 8 bits require conversion anyway
- def : Pat<(OpNode VK1:$src1, VK1:$src2),
+ def : Pat<(VOpNode VK1:$src1, VK1:$src2),
(COPY_TO_REGCLASS (Inst
(COPY_TO_REGCLASS VK1:$src1, VK16),
(COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
def : Pat<(VOpNode VK2:$src1, VK2:$src2),
(COPY_TO_REGCLASS (Inst
(COPY_TO_REGCLASS VK2:$src1, VK16),
- (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
+ (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>;
def : Pat<(VOpNode VK4:$src1, VK4:$src2),
(COPY_TO_REGCLASS (Inst
(COPY_TO_REGCLASS VK4:$src1, VK16),
- (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
+ (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>;
}
-defm : avx512_binop_pat<and, and, KANDWrr>;
-defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
-defm : avx512_binop_pat<or, or, KORWrr>;
-defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
-defm : avx512_binop_pat<xor, xor, KXORWrr>;
+defm : avx512_binop_pat<and, KANDWrr>;
+defm : avx512_binop_pat<vandn, KANDNWrr>;
+defm : avx512_binop_pat<or, KORWrr>;
+defm : avx512_binop_pat<vxnor, KXNORWrr>;
+defm : avx512_binop_pat<xor, KXORWrr>;
// Mask unpacking
multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
@@ -3065,7 +3048,7 @@ multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
VEX_4V, VEX_L, Sched<[sched]>;
def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
- (!cast<Instruction>(NAME##rr) Src.KRC:$src2, Src.KRC:$src1)>;
+ (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>;
}
}
@@ -3201,8 +3184,8 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
X86VectorVTInfo Narrow,
X86VectorVTInfo Wide> {
-def : Pat<(Narrow.KVT (X86any_cmpm (Narrow.VT Narrow.RC:$src1),
- (Narrow.VT Narrow.RC:$src2), timm:$cc)),
+def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
+ (Narrow.VT Narrow.RC:$src2), timm:$cc)),
(COPY_TO_REGCLASS
(!cast<Instruction>(InstStr#"Zrri")
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
@@ -3219,8 +3202,8 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
timm:$cc), Narrow.KRC)>;
// Broadcast load.
-def : Pat<(Narrow.KVT (X86any_cmpm (Narrow.VT Narrow.RC:$src1),
- (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
+def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
+ (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
(COPY_TO_REGCLASS
(!cast<Instruction>(InstStr#"Zrmbi")
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
@@ -3235,8 +3218,8 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
addr:$src2, timm:$cc), Narrow.KRC)>;
// Commuted with broadcast load.
-def : Pat<(Narrow.KVT (X86any_cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
- (Narrow.VT Narrow.RC:$src1), timm:$cc)),
+def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
+ (Narrow.VT Narrow.RC:$src1), timm:$cc)),
(COPY_TO_REGCLASS
(!cast<Instruction>(InstStr#"Zrmbi")
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
@@ -3301,7 +3284,7 @@ multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
let Predicates = [HasAVX512] in
let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
SchedRW = [WriteZero] in
- def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
+ def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
[(set KRC:$dst, (VT Val))]>;
}
@@ -3409,7 +3392,7 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
!strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
"${dst} {${mask}}, $src1}"),
[(set _.RC:$dst, (_.VT
- (vselect _.KRCWM:$mask,
+ (vselect_mask _.KRCWM:$mask,
(_.VT (ld_frag addr:$src1)),
(_.VT _.RC:$src0))))], _.ExeDomain>,
EVEX, EVEX_K, Sched<[Sched.RM]>;
@@ -3418,18 +3401,18 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
(ins _.KRCWM:$mask, _.MemOp:$src),
OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
"${dst} {${mask}} {z}, $src}",
- [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
+ [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask,
(_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
_.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
}
def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
- (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
+ (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
- (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
+ (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
- (!cast<Instruction>(Name#_.ZSuffix##rmk) _.RC:$src0,
+ (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0,
_.KRCWM:$mask, addr:$ptr)>;
}
@@ -4286,6 +4269,17 @@ def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0)))
def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)),
(COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>;
+
+def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))),
+ (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
+def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))),
+ (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
+
+def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))),
+ (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
+def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))),
+ (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
+
let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
(ins VR128X:$src1, VR128X:$src2),
@@ -4439,8 +4433,6 @@ let Predicates = [HasAVX512] in {
(VMOV64toPQIZrr GR64:$src)>;
// AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
- def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
- (VMOVDI2PDIZrm addr:$src)>;
def : Pat<(v4i32 (X86vzload32 addr:$src)),
(VMOVDI2PDIZrm addr:$src)>;
def : Pat<(v8i32 (X86vzload32 addr:$src)),
@@ -4624,8 +4616,8 @@ multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
- "${src2}"##_.BroadcastStr##", $src1",
- "$src1, ${src2}"##_.BroadcastStr,
+ "${src2}"#_.BroadcastStr#", $src1",
+ "$src1, ${src2}"#_.BroadcastStr,
(_.VT (OpNode _.RC:$src1,
(_.BroadcastLdFrag addr:$src2)))>,
AVX512BIBase, EVEX_4V, EVEX_B,
@@ -4750,8 +4742,8 @@ multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
(ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
OpcodeStr,
- "${src2}"##_Brdct.BroadcastStr##", $src1",
- "$src1, ${src2}"##_Brdct.BroadcastStr,
+ "${src2}"#_Brdct.BroadcastStr#", $src1",
+ "$src1, ${src2}"#_Brdct.BroadcastStr,
(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
(_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>,
AVX512BIBase, EVEX_4V, EVEX_B,
@@ -4822,8 +4814,8 @@ multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
(ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
OpcodeStr,
- "${src2}"##_Src.BroadcastStr##", $src1",
- "$src1, ${src2}"##_Src.BroadcastStr,
+ "${src2}"#_Src.BroadcastStr#", $src1",
+ "$src1, ${src2}"#_Src.BroadcastStr,
(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
(_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>,
EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
@@ -5159,26 +5151,26 @@ multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
X86VectorVTInfo _,
X86VectorVTInfo IntInfo> {
// Masked register-register logical operations.
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
_.RC:$src0)),
(!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
_.RC:$src1, _.RC:$src2)>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
_.ImmAllZerosV)),
(!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
_.RC:$src2)>;
// Masked register-memory logical operations.
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(bitconvert (IntInfo.VT (OpNode _.RC:$src1,
(load addr:$src2)))),
_.RC:$src0)),
(!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
_.RC:$src1, addr:$src2)>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(bitconvert (IntInfo.VT (OpNode _.RC:$src1,
(load addr:$src2)))),
_.ImmAllZerosV)),
@@ -5190,14 +5182,14 @@ multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
X86VectorVTInfo _,
X86VectorVTInfo IntInfo> {
// Register-broadcast logical operations.
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(bitconvert
(IntInfo.VT (OpNode _.RC:$src1,
(IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
_.RC:$src0)),
(!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
_.RC:$src1, addr:$src2)>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(bitconvert
(IntInfo.VT (OpNode _.RC:$src1,
(IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
@@ -5304,7 +5296,7 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (VecNode _.RC:$src1,
- _.ScalarIntMemCPat:$src2))>,
+ (_.ScalarIntMemFrags addr:$src2)))>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
@@ -5350,7 +5342,7 @@ multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (VecNode _.RC:$src1,
- _.ScalarIntMemCPat:$src2))>,
+ (_.ScalarIntMemFrags addr:$src2)))>,
Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
let isCodeGenOnly = 1, Predicates = [HasAVX512],
@@ -5463,28 +5455,32 @@ defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
EVEX_CD8<64, CD8VT1>, SIMD_EXC;
multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDPatternOperator MaskOpNode,
X86VectorVTInfo _, X86FoldableSchedWrite sched,
bit IsCommutable,
bit IsKCommutable = IsCommutable> {
let ExeDomain = _.ExeDomain, hasSideEffects = 0,
Uses = [MXCSR], mayRaiseFPException = 1 in {
- defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
+ defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
"$src2, $src1", "$src1, $src2",
- (_.VT (OpNode _.RC:$src1, _.RC:$src2)), IsCommutable,
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
+ (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), IsCommutable,
IsKCommutable, IsKCommutable>,
EVEX_4V, Sched<[sched]>;
let mayLoad = 1 in {
- defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
+ defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
"$src2, $src1", "$src1, $src2",
- (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
+ (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
+ (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
- defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
- "${src2}"##_.BroadcastStr##", $src1",
- "$src1, ${src2}"##_.BroadcastStr,
- (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
+ defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
+ "${src2}"#_.BroadcastStr#", $src1",
+ "$src1, ${src2}"#_.BroadcastStr,
+ (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
+ (MaskOpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
EVEX_4V, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -5496,7 +5492,7 @@ multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
+ (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#_.Suffix,
"$rc, $src2, $src1", "$src1, $src2, $rc",
(_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc)))>,
EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
@@ -5507,38 +5503,39 @@ multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
(_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
EVEX_4V, EVEX_B, Sched<[sched]>;
}
multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDPatternOperator MaskOpNode,
Predicate prd, X86SchedWriteSizes sched,
bit IsCommutable = 0,
bit IsPD128Commutable = IsCommutable> {
let Predicates = [prd] in {
- defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
+ defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
EVEX_CD8<32, CD8VF>;
- defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
+ defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info,
sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
EVEX_CD8<64, CD8VF>;
}
// Define only if AVX512VL feature is present.
let Predicates = [prd, HasVLX] in {
- defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
+ defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
EVEX_CD8<32, CD8VF>;
- defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
+ defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
EVEX_CD8<32, CD8VF>;
- defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
+ defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info,
sched.PD.XMM, IsPD128Commutable,
IsCommutable>, EVEX_V128, PD, VEX_W,
EVEX_CD8<64, CD8VF>;
- defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
+ defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info,
sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
EVEX_CD8<64, CD8VF>;
}
@@ -5566,38 +5563,38 @@ multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd
EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
}
-defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, HasAVX512,
+defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512,
SchedWriteFAddSizes, 1>,
avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
-defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, HasAVX512,
+defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512,
SchedWriteFMulSizes, 1>,
avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
-defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, HasAVX512,
+defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512,
SchedWriteFAddSizes>,
avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
-defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, HasAVX512,
+defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512,
SchedWriteFDivSizes>,
avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
-defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
+defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512,
SchedWriteFCmpSizes, 0>,
avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
-defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
+defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512,
SchedWriteFCmpSizes, 0>,
avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
let isCodeGenOnly = 1 in {
- defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
+ defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512,
SchedWriteFCmpSizes, 1>;
- defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
+ defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512,
SchedWriteFCmpSizes, 1>;
}
let Uses = []<Register>, mayRaiseFPException = 0 in {
-defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
+defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI,
SchedWriteFLogicSizes, 1>;
-defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
+defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI,
SchedWriteFLogicSizes, 0>;
-defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
+defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI,
SchedWriteFLogicSizes, 1>;
-defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
+defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI,
SchedWriteFLogicSizes, 1>;
}
@@ -5605,19 +5602,19 @@ multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched, X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
EVEX_4V, Sched<[sched]>;
defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
+ (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
"$src2, $src1", "$src1, $src2",
(OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
- "${src2}"##_.BroadcastStr##", $src1",
- "$src1, ${src2}"##_.BroadcastStr,
+ (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
+ "${src2}"#_.BroadcastStr#", $src1",
+ "$src1, ${src2}"#_.BroadcastStr,
(OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -5627,14 +5624,14 @@ multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched, X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
Sched<[sched]>;
defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix,
+ (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix,
"$src2, $src1", "$src1, $src2",
- (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2)>,
+ (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -5648,11 +5645,11 @@ multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr
avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
- avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info,
+ avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info,
X86scalefsRnd, sched.Scl>,
EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
- avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info,
+ avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info,
X86scalefsRnd, sched.Scl>,
EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W;
@@ -5679,7 +5676,7 @@ multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, X86VectorVTInfo _,
string Name> {
// NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
- // There are just too many permuations due to commutability and bitcasts.
+ // There are just too many permutations due to commutability and bitcasts.
let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
@@ -5701,8 +5698,8 @@ multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
- "${src2}"##_.BroadcastStr##", $src1",
- "$src1, ${src2}"##_.BroadcastStr,
+ "${src2}"#_.BroadcastStr#", $src1",
+ "$src1, ${src2}"#_.BroadcastStr,
(null_frag), (null_frag)>,
EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
@@ -5790,7 +5787,7 @@ multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
let ExeDomain = _.ExeDomain in
defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
- "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
+ "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2",
(_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>,
EVEX_B, Sched<[sched.Folded]>;
}
@@ -5973,8 +5970,8 @@ multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
let ExeDomain = _.ExeDomain in
defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
- "${src2}"##_.BroadcastStr##", $src1",
- "$src1, ${src2}"##_.BroadcastStr,
+ "${src2}"#_.BroadcastStr#", $src1",
+ "$src1, ${src2}"#_.BroadcastStr,
(_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>,
AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
@@ -6245,8 +6242,8 @@ multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
- "${src2}"##_.BroadcastStr##", $src1",
- "$src1, ${src2}"##_.BroadcastStr,
+ "${src2}"#_.BroadcastStr#", $src1",
+ "$src1, ${src2}"#_.BroadcastStr,
(_.VT (OpNode
_.RC:$src1,
(Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>,
@@ -6370,9 +6367,6 @@ defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
let Predicates = [HasAVX512] in {
// VMOVHPD patterns
- def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
- (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
- (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
(VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
@@ -6419,29 +6413,33 @@ let Predicates = [HasAVX512] in {
//
multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86FoldableSchedWrite sched,
+ SDNode MaskOpNode, X86FoldableSchedWrite sched,
X86VectorVTInfo _, string Suff> {
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
Uses = [MXCSR], mayRaiseFPException = 1 in {
- defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
+ (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)),
+ (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
AVX512FMA3Base, Sched<[sched]>;
- defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
+ (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
+ (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
- defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
!strconcat("$src2, ${src3}", _.BroadcastStr ),
(OpNode _.RC:$src2,
+ _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))),
+ (MaskOpNode _.RC:$src2,
_.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
- AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -6450,74 +6448,88 @@ multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _, string Suff> {
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
Uses = [MXCSR] in
- defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
+ (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))),
(_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
}
multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd, X86SchedWriteWidths sched,
+ SDNode MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched,
AVX512VLVectorVTInfo _, string Suff> {
let Predicates = [HasAVX512] in {
- defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.ZMM,
- _.info512, Suff>,
+ defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.ZMM, _.info512, Suff>,
avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
_.info512, Suff>,
EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
}
let Predicates = [HasVLX, HasAVX512] in {
- defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.YMM,
- _.info256, Suff>,
+ defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.YMM, _.info256, Suff>,
EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
- defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.XMM,
- _.info128, Suff>,
+ defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.XMM, _.info128, Suff>,
EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
}
}
multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd> {
- defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
- SchedWriteFMA, avx512vl_f32_info, "PS">;
- defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
- SchedWriteFMA, avx512vl_f64_info, "PD">,
- VEX_W;
-}
-
-defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86any_Fmadd, X86FmaddRnd>;
-defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
-defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
-defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
-defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
-defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
+ SDNode MaskOpNode, SDNode OpNodeRnd> {
+ defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
+ OpNodeRnd, SchedWriteFMA,
+ avx512vl_f32_info, "PS">;
+ defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
+ OpNodeRnd, SchedWriteFMA,
+ avx512vl_f64_info, "PD">, VEX_W;
+}
+
+defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86any_Fmadd,
+ X86Fmadd, X86FmaddRnd>;
+defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub,
+ X86Fmsub, X86FmsubRnd>;
+defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub,
+ X86Fmaddsub, X86FmaddsubRnd>;
+defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd,
+ X86Fmsubadd, X86FmsubaddRnd>;
+defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd,
+ X86Fnmadd, X86FnmaddRnd>;
+defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub,
+ X86Fnmsub, X86FnmsubRnd>;
multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86FoldableSchedWrite sched,
+ SDNode MaskOpNode, X86FoldableSchedWrite sched,
X86VectorVTInfo _, string Suff> {
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
Uses = [MXCSR], mayRaiseFPException = 1 in {
- defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1,
- vselect, 1>, AVX512FMA3Base, Sched<[sched]>;
+ (null_frag),
+ (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
+ AVX512FMA3Base, Sched<[sched]>;
- defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
+ (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
+ (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
- defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
- OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
- "$src2, ${src3}"##_.BroadcastStr,
+ OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
+ "$src2, ${src3}"#_.BroadcastStr,
(_.VT (OpNode _.RC:$src2,
(_.VT (_.BroadcastLdFrag addr:$src3)),
- _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
+ _.RC:$src1)),
+ (_.VT (MaskOpNode _.RC:$src2,
+ (_.VT (_.BroadcastLdFrag addr:$src3)),
+ _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -6527,77 +6539,89 @@ multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _, string Suff> {
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
Uses = [MXCSR] in
- defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
- (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
- 1, 1, vselect, 1>,
- AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
+ (null_frag),
+ (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
+ 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
}
multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd, X86SchedWriteWidths sched,
+ SDNode MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched,
AVX512VLVectorVTInfo _, string Suff> {
let Predicates = [HasAVX512] in {
- defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.ZMM,
- _.info512, Suff>,
+ defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.ZMM, _.info512, Suff>,
avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
_.info512, Suff>,
EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
}
let Predicates = [HasVLX, HasAVX512] in {
- defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.YMM,
- _.info256, Suff>,
+ defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.YMM, _.info256, Suff>,
EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
- defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.XMM,
- _.info128, Suff>,
+ defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.XMM, _.info128, Suff>,
EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
}
}
multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd > {
- defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
- SchedWriteFMA, avx512vl_f32_info, "PS">;
- defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
- SchedWriteFMA, avx512vl_f64_info, "PD">,
- VEX_W;
-}
-
-defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86any_Fmadd, X86FmaddRnd>;
-defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
-defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
-defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
-defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
-defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
+ SDNode MaskOpNode, SDNode OpNodeRnd > {
+ defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
+ OpNodeRnd, SchedWriteFMA,
+ avx512vl_f32_info, "PS">;
+ defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
+ OpNodeRnd, SchedWriteFMA,
+ avx512vl_f64_info, "PD">, VEX_W;
+}
+
+defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86any_Fmadd,
+ X86Fmadd, X86FmaddRnd>;
+defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub,
+ X86Fmsub, X86FmsubRnd>;
+defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub,
+ X86Fmaddsub, X86FmaddsubRnd>;
+defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd,
+ X86Fmsubadd, X86FmsubaddRnd>;
+defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd,
+ X86Fnmadd, X86FnmaddRnd>;
+defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub,
+ X86Fnmsub, X86FnmsubRnd>;
multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86FoldableSchedWrite sched,
+ SDNode MaskOpNode, X86FoldableSchedWrite sched,
X86VectorVTInfo _, string Suff> {
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
Uses = [MXCSR], mayRaiseFPException = 1 in {
- defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1, vselect, 1>,
+ (null_frag),
+ (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>,
AVX512FMA3Base, Sched<[sched]>;
// Pattern is 312 order so that the load is in a different place from the
// 213 and 231 patterns this helps tablegen's duplicate pattern detection.
- defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
+ (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
+ (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
// Pattern is 312 order so that the load is in a different place from the
// 213 and 231 patterns this helps tablegen's duplicate pattern detection.
- defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
- OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
- "$src2, ${src3}"##_.BroadcastStr,
+ OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
+ "$src2, ${src3}"#_.BroadcastStr,
(_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
- _.RC:$src1, _.RC:$src2)), 1, 0>,
+ _.RC:$src1, _.RC:$src2)),
+ (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
+ _.RC:$src1, _.RC:$src2)), 1, 0>,
AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -6607,49 +6631,57 @@ multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _, string Suff> {
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
Uses = [MXCSR] in
- defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
- (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
- 1, 1, vselect, 1>,
- AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
+ (null_frag),
+ (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
+ 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
}
multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd, X86SchedWriteWidths sched,
+ SDNode MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched,
AVX512VLVectorVTInfo _, string Suff> {
let Predicates = [HasAVX512] in {
- defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.ZMM,
- _.info512, Suff>,
+ defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.ZMM, _.info512, Suff>,
avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
_.info512, Suff>,
EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
}
let Predicates = [HasVLX, HasAVX512] in {
- defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.YMM,
- _.info256, Suff>,
+ defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.YMM, _.info256, Suff>,
EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
- defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.XMM,
- _.info128, Suff>,
+ defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.XMM, _.info128, Suff>,
EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
}
}
multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd > {
- defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
- SchedWriteFMA, avx512vl_f32_info, "PS">;
- defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
- SchedWriteFMA, avx512vl_f64_info, "PD">,
- VEX_W;
-}
-
-defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86any_Fmadd, X86FmaddRnd>;
-defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
-defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
-defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
-defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
-defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
+ SDNode MaskOpNode, SDNode OpNodeRnd > {
+ defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
+ OpNodeRnd, SchedWriteFMA,
+ avx512vl_f32_info, "PS">;
+ defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
+ OpNodeRnd, SchedWriteFMA,
+ avx512vl_f64_info, "PD">, VEX_W;
+}
+
+defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86any_Fmadd,
+ X86Fmadd, X86FmaddRnd>;
+defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub,
+ X86Fmsub, X86FmsubRnd>;
+defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub,
+ X86Fmaddsub, X86FmaddsubRnd>;
+defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd,
+ X86Fmsubadd, X86FmsubaddRnd>;
+defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd,
+ X86Fnmadd, X86FnmaddRnd>;
+defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub,
+ X86Fnmsub, X86FnmsubRnd>;
// Scalar FMA
multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
@@ -6742,11 +6774,12 @@ multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
}
defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86any_Fmadd, X86FmaddRnd>;
-defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>;
-defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
-defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
+defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>;
+defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>;
+defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>;
-multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
+multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode MaskedOp,
+ SDNode RndOp, string Prefix,
string Suffix, SDNode Move,
X86VectorVTInfo _, PatLeaf ZeroFP> {
let Predicates = [HasAVX512] in {
@@ -6788,8 +6821,8 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
addr:$src3)>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
- (X86selects VK1WM:$mask,
- (Op _.FRC:$src2,
+ (X86selects_mask VK1WM:$mask,
+ (MaskedOp _.FRC:$src2,
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
_.FRC:$src3),
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
@@ -6799,8 +6832,8 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
- (X86selects VK1WM:$mask,
- (Op _.FRC:$src2,
+ (X86selects_mask VK1WM:$mask,
+ (MaskedOp _.FRC:$src2,
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
(_.ScalarLdFrag addr:$src3)),
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
@@ -6809,18 +6842,18 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
- (X86selects VK1WM:$mask,
- (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
- (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
+ (X86selects_mask VK1WM:$mask,
+ (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
+ (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
(!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
VR128X:$src1, VK1WM:$mask,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
- (X86selects VK1WM:$mask,
- (Op _.FRC:$src2, _.FRC:$src3,
- (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
+ (X86selects_mask VK1WM:$mask,
+ (MaskedOp _.FRC:$src2, _.FRC:$src3,
+ (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
(!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
VR128X:$src1, VK1WM:$mask,
@@ -6828,19 +6861,19 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
- (X86selects VK1WM:$mask,
- (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
- (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
+ (X86selects_mask VK1WM:$mask,
+ (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
+ (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
(!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
VR128X:$src1, VK1WM:$mask,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
- (X86selects VK1WM:$mask,
- (Op _.FRC:$src2,
- (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
- _.FRC:$src3),
+ (X86selects_mask VK1WM:$mask,
+ (MaskedOp _.FRC:$src2,
+ (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
+ _.FRC:$src3),
(_.EltVT ZeroFP)))))),
(!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
VR128X:$src1, VK1WM:$mask,
@@ -6848,9 +6881,9 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
- (X86selects VK1WM:$mask,
- (Op _.FRC:$src2, _.FRC:$src3,
- (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
+ (X86selects_mask VK1WM:$mask,
+ (MaskedOp _.FRC:$src2, _.FRC:$src3,
+ (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
(_.EltVT ZeroFP)))))),
(!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
VR128X:$src1, VK1WM:$mask,
@@ -6858,28 +6891,28 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
- (X86selects VK1WM:$mask,
- (Op _.FRC:$src2,
- (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
- (_.ScalarLdFrag addr:$src3)),
+ (X86selects_mask VK1WM:$mask,
+ (MaskedOp _.FRC:$src2,
+ (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
+ (_.ScalarLdFrag addr:$src3)),
(_.EltVT ZeroFP)))))),
(!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
VR128X:$src1, VK1WM:$mask,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
- (X86selects VK1WM:$mask,
- (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
- _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
+ (X86selects_mask VK1WM:$mask,
+ (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
+ _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
(_.EltVT ZeroFP)))))),
(!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
VR128X:$src1, VK1WM:$mask,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
- (X86selects VK1WM:$mask,
- (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
- (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
+ (X86selects_mask VK1WM:$mask,
+ (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
+ (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
(_.EltVT ZeroFP)))))),
(!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
VR128X:$src1, VK1WM:$mask,
@@ -6903,7 +6936,7 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
- (X86selects VK1WM:$mask,
+ (X86selects_mask VK1WM:$mask,
(RndOp _.FRC:$src2,
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
_.FRC:$src3, (i32 timm:$rc)),
@@ -6914,7 +6947,7 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
- (X86selects VK1WM:$mask,
+ (X86selects_mask VK1WM:$mask,
(RndOp _.FRC:$src2, _.FRC:$src3,
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
(i32 timm:$rc)),
@@ -6925,7 +6958,7 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
- (X86selects VK1WM:$mask,
+ (X86selects_mask VK1WM:$mask,
(RndOp _.FRC:$src2,
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
_.FRC:$src3, (i32 timm:$rc)),
@@ -6936,7 +6969,7 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
- (X86selects VK1WM:$mask,
+ (X86selects_mask VK1WM:$mask,
(RndOp _.FRC:$src2, _.FRC:$src3,
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
(i32 timm:$rc)),
@@ -6948,23 +6981,23 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
}
}
-defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86FmaddRnd, "VFMADD", "SS",
- X86Movss, v4f32x_info, fp32imm0>;
-defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SS",
- X86Movss, v4f32x_info, fp32imm0>;
-defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SS",
- X86Movss, v4f32x_info, fp32imm0>;
-defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SS",
- X86Movss, v4f32x_info, fp32imm0>;
+defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86Fmadd, X86FmaddRnd, "VFMADD",
+ "SS", X86Movss, v4f32x_info, fp32imm0>;
+defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
+ "SS", X86Movss, v4f32x_info, fp32imm0>;
+defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
+ "SS", X86Movss, v4f32x_info, fp32imm0>;
+defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
+ "SS", X86Movss, v4f32x_info, fp32imm0>;
-defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86FmaddRnd, "VFMADD", "SD",
- X86Movsd, v2f64x_info, fp64imm0>;
-defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SD",
- X86Movsd, v2f64x_info, fp64imm0>;
-defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SD",
- X86Movsd, v2f64x_info, fp64imm0>;
-defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SD",
- X86Movsd, v2f64x_info, fp64imm0>;
+defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86Fmadd, X86FmaddRnd, "VFMADD",
+ "SD", X86Movsd, v2f64x_info, fp64imm0>;
+defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
+ "SD", X86Movsd, v2f64x_info, fp64imm0>;
+defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
+ "SD", X86Movsd, v2f64x_info, fp64imm0>;
+defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
+ "SD", X86Movsd, v2f64x_info, fp64imm0>;
//===----------------------------------------------------------------------===//
// AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
@@ -7194,7 +7227,7 @@ multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set DstVT.RC:$dst, (OpNode
- (SrcVT.VT SrcVT.ScalarIntMemCPat:$src)))]>,
+ (SrcVT.ScalarIntMemFrags addr:$src)))]>,
EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
} // Predicates = [HasAVX512]
@@ -7233,6 +7266,45 @@ defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2u
X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
XD, VEX_W, EVEX_CD8<64, CD8VT1>;
+multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
+ X86VectorVTInfo DstVT, SDNode OpNode,
+ X86FoldableSchedWrite sched,
+ string aliasStr> {
+ let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
+ let isCodeGenOnly = 1 in {
+ def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+ [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>,
+ EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
+ def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+ [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>,
+ EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
+ }
+ } // Predicates = [HasAVX512]
+}
+
+defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info,
+ lrint, WriteCvtSS2I,
+ "{l}">, XS, EVEX_CD8<32, CD8VT1>;
+defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info,
+ llrint, WriteCvtSS2I,
+ "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
+defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info,
+ lrint, WriteCvtSD2I,
+ "{l}">, XD, EVEX_CD8<64, CD8VT1>;
+defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info,
+ llrint, WriteCvtSD2I,
+ "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
+
+let Predicates = [HasAVX512] in {
+ def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>;
+ def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>;
+
+ def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>;
+ def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>;
+}
+
// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
// which produce unnecessary vmovs{s,d} instructions
let Predicates = [HasAVX512] in {
@@ -7347,7 +7419,7 @@ let Predicates = [HasAVX512], ExeDomain = _SrcRC.ExeDomain in {
(ins _SrcRC.IntScalarMemOp:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set _DstRC.RC:$dst,
- (OpNodeInt (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src)))]>,
+ (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>,
EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
} //HasAVX512
@@ -7404,7 +7476,7 @@ multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _
(ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode (_.VT _.RC:$src1),
- (_Src.VT _Src.ScalarIntMemCPat:$src2)))>,
+ (_Src.ScalarIntMemFrags addr:$src2)))>,
EVEX_4V, VEX_LIG,
Sched<[sched.Folded, sched.ReadAfterFold]>;
@@ -7421,7 +7493,7 @@ multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _
}
}
-// Scalar Coversion with SAE - suppress all exceptions
+// Scalar Conversion with SAE - suppress all exceptions
multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86VectorVTInfo _Src, SDNode OpNodeSAE,
X86FoldableSchedWrite sched> {
@@ -7506,55 +7578,63 @@ def : Pat<(v2f64 (X86Movsd
//===----------------------------------------------------------------------===//
multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
- X86VectorVTInfo _Src, SDNode OpNode,
+ X86VectorVTInfo _Src, SDNode OpNode, SDNode MaskOpNode,
X86FoldableSchedWrite sched,
string Broadcast = _.BroadcastStr,
string Alias = "", X86MemOperand MemOp = _Src.MemOp,
RegisterClass MaskRC = _.KRCWM,
- dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
+ dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))),
+ dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
let Uses = [MXCSR], mayRaiseFPException = 1 in {
- defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _Src.RC:$src),
(ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
(ins MaskRC:$mask, _Src.RC:$src),
OpcodeStr, "$src", "$src",
(_.VT (OpNode (_Src.VT _Src.RC:$src))),
- (vselect MaskRC:$mask,
- (_.VT (OpNode (_Src.VT _Src.RC:$src))),
- _.RC:$src0),
- vselect, "$src0 = $dst">,
+ (vselect_mask MaskRC:$mask,
+ (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
+ _.RC:$src0),
+ (vselect_mask MaskRC:$mask,
+ (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
+ _.ImmAllZerosV)>,
EVEX, Sched<[sched]>;
- defm rm : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins MemOp:$src),
(ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
(ins MaskRC:$mask, MemOp:$src),
OpcodeStr#Alias, "$src", "$src",
LdDAG,
- (vselect MaskRC:$mask, LdDAG, _.RC:$src0),
- vselect, "$src0 = $dst">,
+ (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0),
+ (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>,
EVEX, Sched<[sched.Folded]>;
- defm rmb : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _Src.ScalarMemOp:$src),
(ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
(ins MaskRC:$mask, _Src.ScalarMemOp:$src),
OpcodeStr,
- "${src}"##Broadcast, "${src}"##Broadcast,
+ "${src}"#Broadcast, "${src}"#Broadcast,
(_.VT (OpNode (_Src.VT
(_Src.BroadcastLdFrag addr:$src))
)),
- (vselect MaskRC:$mask,
- (_.VT
- (OpNode
- (_Src.VT
- (_Src.BroadcastLdFrag addr:$src)))),
- _.RC:$src0),
- vselect, "$src0 = $dst">,
+ (vselect_mask MaskRC:$mask,
+ (_.VT
+ (MaskOpNode
+ (_Src.VT
+ (_Src.BroadcastLdFrag addr:$src)))),
+ _.RC:$src0),
+ (vselect_mask MaskRC:$mask,
+ (_.VT
+ (MaskOpNode
+ (_Src.VT
+ (_Src.BroadcastLdFrag addr:$src)))),
+ _.ImmAllZerosV)>,
EVEX, EVEX_B, Sched<[sched.Folded]>;
}
}
-// Coversion with SAE - suppress all exceptions
+// Conversion with SAE - suppress all exceptions
multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86VectorVTInfo _Src, SDNode OpNodeSAE,
X86FoldableSchedWrite sched> {
@@ -7581,12 +7661,14 @@ multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
// Similar to avx512_vcvt_fp, but uses an extload for the memory form.
multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86VectorVTInfo _Src, SDNode OpNode,
+ SDNode MaskOpNode,
X86FoldableSchedWrite sched,
string Broadcast = _.BroadcastStr,
string Alias = "", X86MemOperand MemOp = _Src.MemOp,
RegisterClass MaskRC = _.KRCWM>
- : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, sched, Broadcast, Alias,
- MemOp, MaskRC,
+ : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast,
+ Alias, MemOp, MaskRC,
+ (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)),
(_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
// Extend Float to Double
@@ -7594,69 +7676,72 @@ multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
X86SchedWriteWidths sched> {
let Predicates = [HasAVX512] in {
defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info,
- any_fpextend, sched.ZMM>,
+ any_fpextend, fpextend, sched.ZMM>,
avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
X86vfpextSAE, sched.ZMM>, EVEX_V512;
}
let Predicates = [HasVLX] in {
defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info,
- X86any_vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
- defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info, any_fpextend,
- sched.YMM>, EVEX_V256;
+ X86any_vfpext, X86vfpext, sched.XMM, "{1to2}",
+ "", f64mem>, EVEX_V128;
+ defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info,
+ any_fpextend, fpextend, sched.YMM>, EVEX_V256;
}
}
// Truncate Double to Float
multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
let Predicates = [HasAVX512] in {
- defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, X86any_vfpround, sched.ZMM>,
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info,
+ X86any_vfpround, X86vfpround, sched.ZMM>,
avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
X86vfproundRnd, sched.ZMM>, EVEX_V512;
}
let Predicates = [HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
- null_frag, sched.XMM, "{1to2}", "{x}", f128mem, VK2WM>,
- EVEX_V128;
- defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, X86any_vfpround,
+ null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
+ f128mem, VK2WM>, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info,
+ X86any_vfpround, X86vfpround,
sched.YMM, "{1to4}", "{y}">, EVEX_V256;
}
- def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
(!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
VK2WM:$mask, VR128X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|"
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|"
"$dst {${mask}} {z}, $src}",
(!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
VK2WM:$mask, VR128X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
(!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
"$dst {${mask}}, ${src}{1to2}}",
(!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
VK2WM:$mask, f64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
"$dst {${mask}} {z}, ${src}{1to2}}",
(!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
VK2WM:$mask, f64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
(!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
VK4WM:$mask, VR256X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|"
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
"$dst {${mask}} {z}, $src}",
(!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
VK4WM:$mask, VR256X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
(!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
"$dst {${mask}}, ${src}{1to4}}",
(!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
VK4WM:$mask, f64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
"$dst {${mask}} {z}, ${src}{1to4}}",
(!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
VK4WM:$mask, f64mem:$src), 0, "att">;
@@ -7701,81 +7786,91 @@ let Predicates = [HasVLX] in {
// Convert Signed/Unsigned Doubleword to Double
let Uses = []<Register>, mayRaiseFPException = 0 in
multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNode128, X86SchedWriteWidths sched> {
+ SDNode MaskOpNode, SDNode OpNode128,
+ SDNode MaskOpNode128,
+ X86SchedWriteWidths sched> {
// No rounding in this op
let Predicates = [HasAVX512] in
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
- sched.ZMM>, EVEX_V512;
+ MaskOpNode, sched.ZMM>, EVEX_V512;
let Predicates = [HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
- OpNode128, sched.XMM, "{1to2}", "", i64mem, VK2WM,
+ OpNode128, MaskOpNode128, sched.XMM, "{1to2}",
+ "", i64mem, VK2WM,
(v2f64 (OpNode128 (bc_v4i32
(v2i64
+ (scalar_to_vector (loadi64 addr:$src)))))),
+ (v2f64 (MaskOpNode128 (bc_v4i32
+ (v2i64
(scalar_to_vector (loadi64 addr:$src))))))>,
EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
- sched.YMM>, EVEX_V256;
+ MaskOpNode, sched.YMM>, EVEX_V256;
}
}
// Convert Signed/Unsigned Doubleword to Float
multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd, X86SchedWriteWidths sched> {
+ SDNode MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
let Predicates = [HasAVX512] in
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
- sched.ZMM>,
+ MaskOpNode, sched.ZMM>,
avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
OpNodeRnd, sched.ZMM>, EVEX_V512;
let Predicates = [HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
- sched.XMM>, EVEX_V128;
+ MaskOpNode, sched.XMM>, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
- sched.YMM>, EVEX_V256;
+ MaskOpNode, sched.YMM>, EVEX_V256;
}
}
// Convert Float to Signed/Unsigned Doubleword with truncation
multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode MaskOpNode,
SDNode OpNodeSAE, X86SchedWriteWidths sched> {
let Predicates = [HasAVX512] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
- sched.ZMM>,
+ MaskOpNode, sched.ZMM>,
avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
OpNodeSAE, sched.ZMM>, EVEX_V512;
}
let Predicates = [HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
- sched.XMM>, EVEX_V128;
+ MaskOpNode, sched.XMM>, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
- sched.YMM>, EVEX_V256;
+ MaskOpNode, sched.YMM>, EVEX_V256;
}
}
// Convert Float to Signed/Unsigned Doubleword
multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd, X86SchedWriteWidths sched> {
+ SDNode MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
let Predicates = [HasAVX512] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
- sched.ZMM>,
+ MaskOpNode, sched.ZMM>,
avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
OpNodeRnd, sched.ZMM>, EVEX_V512;
}
let Predicates = [HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
- sched.XMM>, EVEX_V128;
+ MaskOpNode, sched.XMM>, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
- sched.YMM>, EVEX_V256;
+ MaskOpNode, sched.YMM>, EVEX_V256;
}
}
// Convert Double to Signed/Unsigned Doubleword with truncation
multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeSAE, X86SchedWriteWidths sched> {
+ SDNode MaskOpNode, SDNode OpNodeSAE,
+ X86SchedWriteWidths sched> {
let Predicates = [HasAVX512] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
- sched.ZMM>,
+ MaskOpNode, sched.ZMM>,
avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
OpNodeSAE, sched.ZMM>, EVEX_V512;
}
@@ -7785,50 +7880,50 @@ multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
// dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
// due to the same reason.
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
- null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
+ null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
VK2WM>, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
- sched.YMM, "{1to4}", "{y}">, EVEX_V256;
+ MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
}
- def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
VR128X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
(!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
VK2WM:$mask, VR128X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
(!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
VK2WM:$mask, VR128X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
(!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
f64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
"$dst {${mask}}, ${src}{1to2}}",
(!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
VK2WM:$mask, f64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
"$dst {${mask}} {z}, ${src}{1to2}}",
(!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
VK2WM:$mask, f64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
VR256X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
(!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
VK4WM:$mask, VR256X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
(!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
VK4WM:$mask, VR256X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
(!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
f64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
"$dst {${mask}}, ${src}{1to4}}",
(!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
VK4WM:$mask, f64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
"$dst {${mask}} {z}, ${src}{1to4}}",
(!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
VK4WM:$mask, f64mem:$src), 0, "att">;
@@ -7836,10 +7931,11 @@ multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
// Convert Double to Signed/Unsigned Doubleword
multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd, X86SchedWriteWidths sched> {
+ SDNode MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
let Predicates = [HasAVX512] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
- sched.ZMM>,
+ MaskOpNode, sched.ZMM>,
avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
OpNodeRnd, sched.ZMM>, EVEX_V512;
}
@@ -7849,48 +7945,48 @@ multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
// dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
// due to the same reason.
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
- null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
+ null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
VK2WM>, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
- sched.YMM, "{1to4}", "{y}">, EVEX_V256;
+ MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
}
- def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
(!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
VK2WM:$mask, VR128X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
(!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
VK2WM:$mask, VR128X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
(!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
f64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
"$dst {${mask}}, ${src}{1to2}}",
(!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
VK2WM:$mask, f64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
"$dst {${mask}} {z}, ${src}{1to2}}",
(!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
VK2WM:$mask, f64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
(!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
VK4WM:$mask, VR256X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
(!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
VK4WM:$mask, VR256X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
(!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
f64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
"$dst {${mask}}, ${src}{1to4}}",
(!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
VK4WM:$mask, f64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
"$dst {${mask}} {z}, ${src}{1to4}}",
(!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
VK4WM:$mask, f64mem:$src), 0, "att">;
@@ -7898,61 +7994,65 @@ multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
// Convert Double to Signed/Unsigned Quardword
multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd, X86SchedWriteWidths sched> {
+ SDNode MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
let Predicates = [HasDQI] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
- sched.ZMM>,
+ MaskOpNode, sched.ZMM>,
avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
OpNodeRnd, sched.ZMM>, EVEX_V512;
}
let Predicates = [HasDQI, HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
- sched.XMM>, EVEX_V128;
+ MaskOpNode, sched.XMM>, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
- sched.YMM>, EVEX_V256;
+ MaskOpNode, sched.YMM>, EVEX_V256;
}
}
// Convert Double to Signed/Unsigned Quardword with truncation
multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd, X86SchedWriteWidths sched> {
+ SDNode MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
let Predicates = [HasDQI] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
- sched.ZMM>,
+ MaskOpNode, sched.ZMM>,
avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
OpNodeRnd, sched.ZMM>, EVEX_V512;
}
let Predicates = [HasDQI, HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
- sched.XMM>, EVEX_V128;
+ MaskOpNode, sched.XMM>, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
- sched.YMM>, EVEX_V256;
+ MaskOpNode, sched.YMM>, EVEX_V256;
}
}
// Convert Signed/Unsigned Quardword to Double
multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd, X86SchedWriteWidths sched> {
+ SDNode MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
let Predicates = [HasDQI] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
- sched.ZMM>,
+ MaskOpNode, sched.ZMM>,
avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
OpNodeRnd, sched.ZMM>, EVEX_V512;
}
let Predicates = [HasDQI, HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
- sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
+ MaskOpNode, sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
- sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
+ MaskOpNode, sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
}
}
// Convert Float to Signed/Unsigned Quardword
multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd, X86SchedWriteWidths sched> {
+ SDNode MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
let Predicates = [HasDQI] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
- sched.ZMM>,
+ MaskOpNode, sched.ZMM>,
avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
OpNodeRnd, sched.ZMM>, EVEX_V512;
}
@@ -7960,21 +8060,26 @@ multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
// Explicitly specified broadcast string, since we take only 2 elements
// from v4f32x_info source
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
- sched.XMM, "{1to2}", "", f64mem, VK2WM,
+ MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
(v2i64 (OpNode (bc_v4f32
(v2f64
+ (scalar_to_vector (loadf64 addr:$src)))))),
+ (v2i64 (MaskOpNode (bc_v4f32
+ (v2f64
(scalar_to_vector (loadf64 addr:$src))))))>,
EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
- sched.YMM>, EVEX_V256;
+ MaskOpNode, sched.YMM>, EVEX_V256;
}
}
// Convert Float to Signed/Unsigned Quardword with truncation
multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd, X86SchedWriteWidths sched> {
+ SDNode MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
let Predicates = [HasDQI] in {
- defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, sched.ZMM>,
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
+ MaskOpNode, sched.ZMM>,
avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
OpNodeRnd, sched.ZMM>, EVEX_V512;
}
@@ -7982,22 +8087,26 @@ multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
// Explicitly specified broadcast string, since we take only 2 elements
// from v4f32x_info source
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
- sched.XMM, "{1to2}", "", f64mem, VK2WM,
+ MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
(v2i64 (OpNode (bc_v4f32
(v2f64
+ (scalar_to_vector (loadf64 addr:$src)))))),
+ (v2i64 (MaskOpNode (bc_v4f32
+ (v2f64
(scalar_to_vector (loadf64 addr:$src))))))>,
EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
- sched.YMM>, EVEX_V256;
+ MaskOpNode, sched.YMM>, EVEX_V256;
}
}
// Convert Signed/Unsigned Quardword to Float
multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd, X86SchedWriteWidths sched> {
+ SDNode MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
let Predicates = [HasDQI] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
- sched.ZMM>,
+ MaskOpNode, sched.ZMM>,
avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
OpNodeRnd, sched.ZMM>, EVEX_V512;
}
@@ -8007,152 +8116,159 @@ multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
// dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
// due to the same reason.
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, null_frag,
- sched.XMM, "{1to2}", "{x}", i128mem, VK2WM>,
+ null_frag, sched.XMM, "{1to2}", "{x}", i128mem, VK2WM>,
EVEX_V128, NotEVEX2VEXConvertible;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
- sched.YMM, "{1to4}", "{y}">, EVEX_V256,
+ MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256,
NotEVEX2VEXConvertible;
}
- def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
VR128X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
(!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
VK2WM:$mask, VR128X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
(!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
VK2WM:$mask, VR128X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
(!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
i64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
"$dst {${mask}}, ${src}{1to2}}",
(!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
VK2WM:$mask, i64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
"$dst {${mask}} {z}, ${src}{1to2}}",
(!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
VK2WM:$mask, i64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
VR256X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|"
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
"$dst {${mask}}, $src}",
(!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
VK4WM:$mask, VR256X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|"
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
"$dst {${mask}} {z}, $src}",
(!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
VK4WM:$mask, VR256X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
(!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
i64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
"$dst {${mask}}, ${src}{1to4}}",
(!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
VK4WM:$mask, i64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
"$dst {${mask}} {z}, ${src}{1to4}}",
(!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
VK4WM:$mask, i64mem:$src), 0, "att">;
}
-defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, X86any_VSintToFP,
+defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp,
+ X86any_VSintToFP, X86VSintToFP,
SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
-defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp,
+defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp,
X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
PS, EVEX_CD8<32, CD8VF>;
defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si,
- X86cvttp2siSAE, SchedWriteCvtPS2DQ>,
- XS, EVEX_CD8<32, CD8VF>;
+ X86cvttp2si, X86cvttp2siSAE,
+ SchedWriteCvtPS2DQ>, XS, EVEX_CD8<32, CD8VF>;
defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si,
- X86cvttp2siSAE, SchedWriteCvtPD2DQ>,
+ X86cvttp2si, X86cvttp2siSAE,
+ SchedWriteCvtPD2DQ>,
PD, VEX_W, EVEX_CD8<64, CD8VF>;
defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui,
- X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PS,
- EVEX_CD8<32, CD8VF>;
+ X86cvttp2ui, X86cvttp2uiSAE,
+ SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>;
defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui,
- X86cvttp2uiSAE, SchedWriteCvtPD2DQ>,
+ X86cvttp2ui, X86cvttp2uiSAE,
+ SchedWriteCvtPD2DQ>,
PS, VEX_W, EVEX_CD8<64, CD8VF>;
defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp,
- X86any_VUintToFP, SchedWriteCvtDQ2PD>, XS,
- EVEX_CD8<32, CD8VH>;
+ uint_to_fp, X86any_VUintToFP, X86VUintToFP,
+ SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp,
- X86VUintToFpRnd, SchedWriteCvtDQ2PS>, XD,
- EVEX_CD8<32, CD8VF>;
+ uint_to_fp, X86VUintToFpRnd,
+ SchedWriteCvtDQ2PS>, XD, EVEX_CD8<32, CD8VF>;
-defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int,
+defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int,
X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
EVEX_CD8<32, CD8VF>;
-defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int,
+defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int,
X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
VEX_W, EVEX_CD8<64, CD8VF>;
-defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt,
+defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt,
X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
PS, EVEX_CD8<32, CD8VF>;
-defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt,
+defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt,
X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
PS, EVEX_CD8<64, CD8VF>;
-defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int,
+defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int,
X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W,
PD, EVEX_CD8<64, CD8VF>;
-defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int,
+defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int,
X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
EVEX_CD8<32, CD8VH>;
-defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt,
+defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt,
X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
PD, EVEX_CD8<64, CD8VF>;
-defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
+defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt,
X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
EVEX_CD8<32, CD8VH>;
defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si,
- X86cvttp2siSAE, SchedWriteCvtPD2DQ>, VEX_W,
+ X86cvttp2si, X86cvttp2siSAE,
+ SchedWriteCvtPD2DQ>, VEX_W,
PD, EVEX_CD8<64, CD8VF>;
defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si,
- X86cvttp2siSAE, SchedWriteCvtPS2DQ>, PD,
+ X86cvttp2si, X86cvttp2siSAE,
+ SchedWriteCvtPS2DQ>, PD,
EVEX_CD8<32, CD8VH>;
defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui,
- X86cvttp2uiSAE, SchedWriteCvtPD2DQ>, VEX_W,
+ X86cvttp2ui, X86cvttp2uiSAE,
+ SchedWriteCvtPD2DQ>, VEX_W,
PD, EVEX_CD8<64, CD8VF>;
defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui,
- X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PD,
+ X86cvttp2ui, X86cvttp2uiSAE,
+ SchedWriteCvtPS2DQ>, PD,
EVEX_CD8<32, CD8VH>;
defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp,
- X86VSintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
- EVEX_CD8<64, CD8VF>;
+ sint_to_fp, X86VSintToFpRnd,
+ SchedWriteCvtDQ2PD>, VEX_W, XS, EVEX_CD8<64, CD8VF>;
defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
- X86VUintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
- EVEX_CD8<64, CD8VF>;
+ uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>,
+ VEX_W, XS, EVEX_CD8<64, CD8VF>;
defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", any_sint_to_fp,
- X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, PS,
- EVEX_CD8<64, CD8VF>;
+ sint_to_fp, X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
+ VEX_W, PS, EVEX_CD8<64, CD8VF>;
defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", any_uint_to_fp,
- X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD,
- EVEX_CD8<64, CD8VF>;
+ uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PS>,
+ VEX_W, XD, EVEX_CD8<64, CD8VF>;
let Predicates = [HasVLX] in {
// Special patterns to allow use of X86mcvtp2Int for masking. Instruction
@@ -8275,70 +8391,70 @@ let Predicates = [HasVLX] in {
let Predicates = [HasDQI, HasVLX] in {
def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
(VCVTPS2QQZ128rm addr:$src)>;
- def : Pat<(v2i64 (vselect VK2WM:$mask,
- (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
- VR128X:$src0)),
+ def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
+ (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
+ VR128X:$src0)),
(VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
- def : Pat<(v2i64 (vselect VK2WM:$mask,
- (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
- v2i64x_info.ImmAllZerosV)),
+ def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
+ (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
+ v2i64x_info.ImmAllZerosV)),
(VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
(VCVTPS2UQQZ128rm addr:$src)>;
- def : Pat<(v2i64 (vselect VK2WM:$mask,
- (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
- VR128X:$src0)),
+ def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
+ (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
+ VR128X:$src0)),
(VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
- def : Pat<(v2i64 (vselect VK2WM:$mask,
- (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
- v2i64x_info.ImmAllZerosV)),
+ def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
+ (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
+ v2i64x_info.ImmAllZerosV)),
(VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
(VCVTTPS2QQZ128rm addr:$src)>;
- def : Pat<(v2i64 (vselect VK2WM:$mask,
- (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
- VR128X:$src0)),
+ def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
+ (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
+ VR128X:$src0)),
(VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
- def : Pat<(v2i64 (vselect VK2WM:$mask,
- (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
- v2i64x_info.ImmAllZerosV)),
+ def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
+ (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
+ v2i64x_info.ImmAllZerosV)),
(VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
(VCVTTPS2UQQZ128rm addr:$src)>;
- def : Pat<(v2i64 (vselect VK2WM:$mask,
- (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
- VR128X:$src0)),
+ def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
+ (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
+ VR128X:$src0)),
(VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
- def : Pat<(v2i64 (vselect VK2WM:$mask,
- (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
- v2i64x_info.ImmAllZerosV)),
+ def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
+ (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
+ v2i64x_info.ImmAllZerosV)),
(VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
}
let Predicates = [HasVLX] in {
def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
(VCVTDQ2PDZ128rm addr:$src)>;
- def : Pat<(v2f64 (vselect VK2WM:$mask,
- (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
- VR128X:$src0)),
+ def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
+ (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
+ VR128X:$src0)),
(VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
- def : Pat<(v2f64 (vselect VK2WM:$mask,
- (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
- v2f64x_info.ImmAllZerosV)),
+ def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
+ (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
+ v2f64x_info.ImmAllZerosV)),
(VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
(VCVTUDQ2PDZ128rm addr:$src)>;
- def : Pat<(v2f64 (vselect VK2WM:$mask,
- (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
- VR128X:$src0)),
+ def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
+ (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
+ VR128X:$src0)),
(VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
- def : Pat<(v2f64 (vselect VK2WM:$mask,
- (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
- v2f64x_info.ImmAllZerosV)),
+ def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
+ (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
+ v2f64x_info.ImmAllZerosV)),
(VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
}
@@ -8408,16 +8524,17 @@ let Predicates = [HasDQI, HasVLX] in {
let Uses = [MXCSR], mayRaiseFPException = 1 in
multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
- X86MemOperand x86memop, PatFrag ld_frag,
+ X86MemOperand x86memop, dag ld_dag,
X86FoldableSchedWrite sched> {
- defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
+ defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
(ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
+ (X86any_cvtph2ps (_src.VT _src.RC:$src)),
(X86cvtph2ps (_src.VT _src.RC:$src))>,
T8PD, Sched<[sched]>;
- defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
+ defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
(ins x86memop:$src), "vcvtph2ps", "$src", "$src",
- (X86cvtph2ps (_src.VT
- (ld_frag addr:$src)))>,
+ (X86any_cvtph2ps (_src.VT ld_dag)),
+ (X86cvtph2ps (_src.VT ld_dag))>,
T8PD, Sched<[sched.Folded]>;
}
@@ -8432,23 +8549,22 @@ multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
}
let Predicates = [HasAVX512] in
- defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, load,
- WriteCvtPH2PSZ>,
+ defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem,
+ (load addr:$src), WriteCvtPH2PSZ>,
avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
let Predicates = [HasVLX] in {
defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
- load, WriteCvtPH2PSY>, EVEX, EVEX_V256,
+ (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256,
EVEX_CD8<32, CD8VH>;
defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
- load, WriteCvtPH2PS>, EVEX, EVEX_V128,
+ (bitconvert (v2i64 (X86vzload64 addr:$src))),
+ WriteCvtPH2PS>, EVEX, EVEX_V128,
EVEX_CD8<32, CD8VH>;
// Pattern match vcvtph2ps of a scalar i64 load.
- def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
- (VCVTPH2PSZ128rm addr:$src)>;
- def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
+ def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert
(v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
(VCVTPH2PSZ128rm addr:$src)>;
}
@@ -8460,7 +8576,7 @@ let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
(ins _src.RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _dest.RC:$dst,
- (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
+ (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
Sched<[RR]>;
let Constraints = "$src0 = $dst" in
def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
@@ -8505,54 +8621,35 @@ let Predicates = [HasAVX512] in {
WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
- let Predicates = [HasVLX] in {
- defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
- WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
- EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
- defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
- WriteCvtPS2PH, WriteCvtPS2PHSt>,
- EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
- }
+
+ def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
+ (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
+}
+
+let Predicates = [HasVLX] in {
+ defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
+ WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
+ EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
+ defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
+ WriteCvtPS2PH, WriteCvtPS2PHSt>,
+ EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
def : Pat<(store (f64 (extractelt
- (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, timm:$src2))),
+ (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
(iPTR 0))), addr:$dst),
(VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
def : Pat<(store (i64 (extractelt
- (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, timm:$src2))),
+ (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
(iPTR 0))), addr:$dst),
(VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
- def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
+ def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
(VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>;
- def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
- (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
-}
-
-// Patterns for matching conversions from float to half-float and vice versa.
-let Predicates = [HasVLX] in {
- // Use MXCSR.RC for rounding instead of explicitly specifying the default
- // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
- // configurations we support (the default). However, falling back to MXCSR is
- // more consistent with other instructions, which are always controlled by it.
- // It's encoded as 0b100.
- def : Pat<(fp_to_f16 FR32X:$src),
- (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (v8i16 (VCVTPS2PHZ128rr
- (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4))), sub_16bit))>;
-
- def : Pat<(f16_to_fp GR16:$src),
- (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
- (v8i16 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)))), FR32X)) >;
-
- def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
- (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
- (v8i16 (VCVTPS2PHZ128rr
- (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4)))), FR32X)) >;
}
// Unordered/Ordered scalar fp compare with Sae and set EFLAGS
multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
string OpcodeStr, Domain d,
- X86FoldableSchedWrite sched = WriteFCom> {
+ X86FoldableSchedWrite sched = WriteFComX> {
let hasSideEffects = 0, Uses = [MXCSR] in
def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
@@ -8613,7 +8710,7 @@ multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(OpNode (_.VT _.RC:$src1),
- _.ScalarIntMemCPat:$src2)>, EVEX_4V, VEX_LIG,
+ (_.ScalarIntMemFrags addr:$src2))>, EVEX_4V, VEX_LIG,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -8646,7 +8743,7 @@ multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src), OpcodeStr,
- "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
+ "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
(OpNode (_.VT
(_.BroadcastLdFrag addr:$src)))>,
EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
@@ -8701,7 +8798,7 @@ multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2)>,
+ (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>,
Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
}
}
@@ -8741,7 +8838,7 @@ multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src), OpcodeStr,
- "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
+ "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
(OpNode (_.VT
(_.BroadcastLdFrag addr:$src)))>,
EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
@@ -8811,20 +8908,21 @@ multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, X86VectorVTInfo _>{
let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
- defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr, "$src", "$src",
- (_.VT (any_fsqrt _.RC:$src))>, EVEX,
+ (_.VT (any_fsqrt _.RC:$src)),
+ (_.VT (fsqrt _.RC:$src))>, EVEX,
Sched<[sched]>;
- defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.MemOp:$src), OpcodeStr, "$src", "$src",
- (any_fsqrt (_.VT
- (bitconvert (_.LdFrag addr:$src))))>, EVEX,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
- defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (any_fsqrt (_.VT (_.LdFrag addr:$src))),
+ (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
+ defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src), OpcodeStr,
- "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
- (any_fsqrt (_.VT
- (_.BroadcastLdFrag addr:$src)))>,
+ "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
+ (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))),
+ (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>,
EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -8879,7 +8977,7 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWri
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(X86fsqrts (_.VT _.RC:$src1),
- _.ScalarIntMemCPat:$src2)>,
+ (_.ScalarIntMemFrags addr:$src2))>,
Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
let Uses = [MXCSR] in
defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -8952,7 +9050,7 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
OpcodeStr,
"$src3, $src2, $src1", "$src1, $src2, $src3",
(_.VT (X86RndScales _.RC:$src1,
- _.ScalarIntMemCPat:$src2, (i32 timm:$src3)))>,
+ (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>,
Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
@@ -8971,13 +9069,13 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
let Predicates = [HasAVX512] in {
def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2),
- (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
+ (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)),
_.FRC:$src1, timm:$src2))>;
}
let Predicates = [HasAVX512, OptForSize] in {
def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
- (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
+ (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)),
addr:$src1, timm:$src2))>;
}
}
@@ -8996,13 +9094,13 @@ multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
dag OutMask, Predicate BasePredicate> {
let Predicates = [BasePredicate] in {
- def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
+ def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
(OpNode (extractelt _.VT:$src2, (iPTR 0))),
(extractelt _.VT:$dst, (iPTR 0))))),
(!cast<Instruction>("V"#OpcPrefix#r_Intk)
_.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
- def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
+ def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
(OpNode (extractelt _.VT:$src2, (iPTR 0))),
ZeroFP))),
(!cast<Instruction>("V"#OpcPrefix#r_Intkz)
@@ -9026,14 +9124,14 @@ defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
// same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
// either to the multiclasses.
def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
- (vselect node:$mask,
- (trunc node:$src), node:$src0)>;
+ (vselect_mask node:$mask,
+ (trunc node:$src), node:$src0)>;
def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
- (vselect node:$mask,
- (X86vtruncs node:$src), node:$src0)>;
+ (vselect_mask node:$mask,
+ (X86vtruncs node:$src), node:$src0)>;
def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
- (vselect node:$mask,
- (X86vtruncus node:$src), node:$src0)>;
+ (vselect_mask node:$mask,
+ (X86vtruncus node:$src), node:$src0)>;
multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDPatternOperator MaskNode,
@@ -9083,12 +9181,12 @@ multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
string Name> {
def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
- (!cast<Instruction>(Name#SrcInfo.ZSuffix##mr)
+ (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr)
addr:$dst, SrcInfo.RC:$src)>;
def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
SrcInfo.KRCWM:$mask),
- (!cast<Instruction>(Name#SrcInfo.ZSuffix##mrk)
+ (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk)
addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
}
@@ -9548,6 +9646,8 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
let Predicates = [HasVLX] in {
def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
+ def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
+ (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
(!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
@@ -9558,6 +9658,8 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
+ def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
+ (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
(!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
}
@@ -9565,6 +9667,10 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
let Predicates = [HasAVX512] in {
def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
+ def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
+ (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
+ def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
+ (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
}
}
@@ -9586,54 +9692,49 @@ def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
// FIXME: Improve scheduling of gather/scatter instructions.
multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
- X86MemOperand memop, PatFrag GatherNode,
- RegisterClass MaskRC = _.KRCWM> {
+ X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
- ExeDomain = _.ExeDomain in
+ ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
(ins _.RC:$src1, MaskRC:$mask, memop:$src2),
!strconcat(OpcodeStr#_.Suffix,
"\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
- [(set _.RC:$dst, MaskRC:$mask_wb,
- (GatherNode (_.VT _.RC:$src1), MaskRC:$mask,
- vectoraddr:$src2))]>, EVEX, EVEX_K,
- EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
+ []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
}
multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
- defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
- vy512xmem, mgatherv8i32>, EVEX_V512, VEX_W;
- defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
- vz512mem, mgatherv8i64>, EVEX_V512, VEX_W;
+ defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512,
+ vy512xmem>, EVEX_V512, VEX_W;
+ defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512,
+ vz512mem>, EVEX_V512, VEX_W;
let Predicates = [HasVLX] in {
- defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
- vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W;
- defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
- vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W;
- defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
- vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W;
- defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
- vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W;
+ defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
+ vx256xmem>, EVEX_V256, VEX_W;
+ defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256,
+ vy256xmem>, EVEX_V256, VEX_W;
+ defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
+ vx128xmem>, EVEX_V128, VEX_W;
+ defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
+ vx128xmem>, EVEX_V128, VEX_W;
}
}
multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
- defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
- mgatherv16i32>, EVEX_V512;
- defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256mem,
- mgatherv8i64>, EVEX_V512;
+ defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>,
+ EVEX_V512;
+ defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>,
+ EVEX_V512;
let Predicates = [HasVLX] in {
- defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
- vy256xmem, mgatherv8i32>, EVEX_V256;
- defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
- vy128xmem, mgatherv4i64>, EVEX_V256;
- defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
- vx128xmem, mgatherv4i32>, EVEX_V128;
- defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
- vx64xmem, mgatherv2i64, VK2WM>,
- EVEX_V128;
+ defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
+ vy256xmem>, EVEX_V256;
+ defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128,
+ vy128xmem>, EVEX_V256;
+ defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
+ vx128xmem>, EVEX_V128;
+ defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
+ vx64xmem, VK2WM>, EVEX_V128;
}
}
@@ -9645,55 +9746,52 @@ defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q
avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
- X86MemOperand memop, PatFrag ScatterNode,
- RegisterClass MaskRC = _.KRCWM> {
+ X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
-let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
+let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain,
+ hasSideEffects = 0 in
def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
(ins memop:$dst, MaskRC:$mask, _.RC:$src),
!strconcat(OpcodeStr#_.Suffix,
"\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
- [(set MaskRC:$mask_wb, (ScatterNode (_.VT _.RC:$src),
- MaskRC:$mask, vectoraddr:$dst))]>,
- EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
+ []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
Sched<[WriteStore]>;
}
multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
- defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
- vy512xmem, mscatterv8i32>, EVEX_V512, VEX_W;
- defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
- vz512mem, mscatterv8i64>, EVEX_V512, VEX_W;
+ defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512,
+ vy512xmem>, EVEX_V512, VEX_W;
+ defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512,
+ vz512mem>, EVEX_V512, VEX_W;
let Predicates = [HasVLX] in {
- defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
- vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W;
- defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
- vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W;
- defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
- vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W;
- defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
- vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W;
+ defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
+ vx256xmem>, EVEX_V256, VEX_W;
+ defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256,
+ vy256xmem>, EVEX_V256, VEX_W;
+ defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
+ vx128xmem>, EVEX_V128, VEX_W;
+ defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
+ vx128xmem>, EVEX_V128, VEX_W;
}
}
multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
- defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
- mscatterv16i32>, EVEX_V512;
- defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256mem,
- mscatterv8i64>, EVEX_V512;
+ defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>,
+ EVEX_V512;
+ defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>,
+ EVEX_V512;
let Predicates = [HasVLX] in {
- defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
- vy256xmem, mscatterv8i32>, EVEX_V256;
- defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
- vy128xmem, mscatterv4i64>, EVEX_V256;
- defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
- vx128xmem, mscatterv4i32>, EVEX_V128;
- defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
- vx64xmem, mscatterv2i64, VK2WM>,
- EVEX_V128;
+ defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
+ vy256xmem>, EVEX_V256;
+ defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
+ vy128xmem>, EVEX_V256;
+ defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
+ vx128xmem>, EVEX_V128;
+ defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
+ vx64xmem, VK2WM>, EVEX_V128;
}
}
@@ -9762,13 +9860,9 @@ defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd
multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
- !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
+ !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
[(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc?
-
-// Also need a pattern for anyextend.
-def : Pat<(Vec.VT (anyext Vec.KRC:$src)),
- (!cast<Instruction>(NAME#"rr") Vec.KRC:$src)>;
}
multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
@@ -9842,19 +9936,11 @@ let Predicates = [HasDQI, NoBWI] in {
(VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
(VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
-
- def : Pat<(v16i8 (anyext (v16i1 VK16:$src))),
- (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
- def : Pat<(v16i16 (anyext (v16i1 VK16:$src))),
- (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
}
let Predicates = [HasDQI, NoBWI, HasVLX] in {
def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
(VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
-
- def : Pat<(v8i16 (anyext (v8i1 VK8:$src))),
- (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
}
//===----------------------------------------------------------------------===//
@@ -9885,14 +9971,14 @@ multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
- (!cast<Instruction>(Name#_.ZSuffix##mrk)
+ (!cast<Instruction>(Name#_.ZSuffix#mrk)
addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
- (!cast<Instruction>(Name#_.ZSuffix##rrk)
+ (!cast<Instruction>(Name#_.ZSuffix#rrk)
_.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
- (!cast<Instruction>(Name#_.ZSuffix##rrkz)
+ (!cast<Instruction>(Name#_.ZSuffix#rrkz)
_.KRCWM:$mask, _.RC:$src)>;
}
@@ -9940,23 +10026,23 @@ multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
- (!cast<Instruction>(Name#_.ZSuffix##rmkz)
+ (!cast<Instruction>(Name#_.ZSuffix#rmkz)
_.KRCWM:$mask, addr:$src)>;
def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
- (!cast<Instruction>(Name#_.ZSuffix##rmkz)
+ (!cast<Instruction>(Name#_.ZSuffix#rmkz)
_.KRCWM:$mask, addr:$src)>;
def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
(_.VT _.RC:$src0))),
- (!cast<Instruction>(Name#_.ZSuffix##rmk)
+ (!cast<Instruction>(Name#_.ZSuffix#rmk)
_.RC:$src0, _.KRCWM:$mask, addr:$src)>;
def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
- (!cast<Instruction>(Name#_.ZSuffix##rrk)
+ (!cast<Instruction>(Name#_.ZSuffix#rrk)
_.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
- (!cast<Instruction>(Name#_.ZSuffix##rrkz)
+ (!cast<Instruction>(Name#_.ZSuffix#rrkz)
_.KRCWM:$mask, _.RC:$src)>;
}
@@ -9990,26 +10076,33 @@ defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
// op(mem_vec,imm)
// op(broadcast(eltVt),imm)
//all instruction created with FROUND_CURRENT
-multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86FoldableSchedWrite sched, X86VectorVTInfo _> {
+multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, SDNode MaskOpNode,
+ X86FoldableSchedWrite sched,
+ X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
- defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
- (OpNode (_.VT _.RC:$src1),
- (i32 timm:$src2))>, Sched<[sched]>;
- defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
+ (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)),
+ (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>,
+ Sched<[sched]>;
+ defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.MemOp:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
+ OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
(OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
- (i32 timm:$src2))>,
+ (i32 timm:$src2)),
+ (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
+ (i32 timm:$src2))>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
- defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
- "${src1}"##_.BroadcastStr##", $src2",
+ OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr,
+ "${src1}"#_.BroadcastStr#", $src2",
(OpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
- (i32 timm:$src2))>, EVEX_B,
+ (i32 timm:$src2)),
+ (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
+ (i32 timm:$src2))>, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -10021,7 +10114,7 @@ multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
+ OpcodeStr#_.Suffix, "$src2, {sae}, $src1",
"$src1, {sae}, $src2",
(OpNode (_.VT _.RC:$src1),
(i32 timm:$src2))>,
@@ -10030,18 +10123,19 @@ multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
- SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
+ SDNode MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched,
+ Predicate prd>{
let Predicates = [prd] in {
- defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM,
- _.info512>,
+ defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.ZMM, _.info512>,
avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
sched.ZMM, _.info512>, EVEX_V512;
}
let Predicates = [prd, HasVLX] in {
- defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM,
- _.info128>, EVEX_V128;
- defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM,
- _.info256>, EVEX_V256;
+ defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.XMM, _.info128>, EVEX_V128;
+ defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.YMM, _.info256>, EVEX_V256;
}
}
@@ -10068,8 +10162,8 @@ multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
- OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
- "$src1, ${src2}"##_.BroadcastStr##", $src3",
+ OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
+ "$src1, ${src2}"#_.BroadcastStr#", $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT (_.BroadcastLdFrag addr:$src2)),
(i32 timm:$src3))>, EVEX_B,
@@ -10111,8 +10205,8 @@ multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
let ExeDomain = _.ExeDomain in
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
- OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
- "$src1, ${src2}"##_.BroadcastStr##", $src3",
+ OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
+ "$src1, ${src2}"#_.BroadcastStr#", $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT (_.BroadcastLdFrag addr:$src2)),
(i8 timm:$src3))>, EVEX_B,
@@ -10135,7 +10229,7 @@ multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(OpNode (_.VT _.RC:$src1),
- (_.VT _.ScalarIntMemCPat:$src2),
+ (_.ScalarIntMemFrags addr:$src2),
(i32 timm:$src3))>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -10228,24 +10322,26 @@ multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
- SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
+ SDNode MaskOpNode, SDNode OpNodeSAE,
+ X86SchedWriteWidths sched, Predicate prd>{
defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
- opcPs, OpNode, OpNodeSAE, sched, prd>,
+ opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
EVEX_CD8<32, CD8VF>;
defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
- opcPd, OpNode, OpNodeSAE, sched, prd>,
+ opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
EVEX_CD8<64, CD8VF>, VEX_W;
}
defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
- X86VReduce, X86VReduceSAE, SchedWriteFRnd, HasDQI>,
- AVX512AIi8Base, EVEX;
+ X86VReduce, X86VReduce, X86VReduceSAE,
+ SchedWriteFRnd, HasDQI>, AVX512AIi8Base, EVEX;
defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
- X86any_VRndScale, X86VRndScaleSAE, SchedWriteFRnd, HasAVX512>,
+ X86any_VRndScale, X86VRndScale, X86VRndScaleSAE,
+ SchedWriteFRnd, HasAVX512>,
AVX512AIi8Base, EVEX;
defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
- X86VGetMant, X86VGetMantSAE, SchedWriteFRnd, HasAVX512>,
- AVX512AIi8Base, EVEX;
+ X86VGetMant, X86VGetMant, X86VGetMantSAE,
+ SchedWriteFRnd, HasAVX512>, AVX512AIi8Base, EVEX;
defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
0x50, X86VRange, X86VRangeSAE,
@@ -10302,8 +10398,8 @@ multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
- OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
- "$src1, ${src2}"##_.BroadcastStr##", $src3",
+ OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
+ "$src1, ${src2}"#_.BroadcastStr#", $src3",
(_.VT
(bitconvert
(CastInfo.VT
@@ -10391,8 +10487,8 @@ multiclass avx512_valign<bits<8> opc, string OpcodeStr,
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
- OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
- "$src1, ${src2}"##_.BroadcastStr##", $src3",
+ OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
+ "$src1, ${src2}"#_.BroadcastStr#", $src3",
(X86VAlign _.RC:$src1,
(_.VT (_.BroadcastLdFrag addr:$src2)),
(i8 timm:$src3))>, EVEX_B,
@@ -10441,40 +10537,40 @@ def ValigndImm8XForm : SDNodeXForm<timm, [{
multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
X86VectorVTInfo From, X86VectorVTInfo To,
SDNodeXForm ImmXForm> {
- def : Pat<(To.VT (vselect To.KRCWM:$mask,
- (bitconvert
- (From.VT (OpNode From.RC:$src1, From.RC:$src2,
- timm:$src3))),
- To.RC:$src0)),
+ def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
+ (bitconvert
+ (From.VT (OpNode From.RC:$src1, From.RC:$src2,
+ timm:$src3))),
+ To.RC:$src0)),
(!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
To.RC:$src1, To.RC:$src2,
(ImmXForm timm:$src3))>;
- def : Pat<(To.VT (vselect To.KRCWM:$mask,
- (bitconvert
- (From.VT (OpNode From.RC:$src1, From.RC:$src2,
- timm:$src3))),
- To.ImmAllZerosV)),
+ def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
+ (bitconvert
+ (From.VT (OpNode From.RC:$src1, From.RC:$src2,
+ timm:$src3))),
+ To.ImmAllZerosV)),
(!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
To.RC:$src1, To.RC:$src2,
(ImmXForm timm:$src3))>;
- def : Pat<(To.VT (vselect To.KRCWM:$mask,
- (bitconvert
- (From.VT (OpNode From.RC:$src1,
- (From.LdFrag addr:$src2),
- timm:$src3))),
- To.RC:$src0)),
+ def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
+ (bitconvert
+ (From.VT (OpNode From.RC:$src1,
+ (From.LdFrag addr:$src2),
+ timm:$src3))),
+ To.RC:$src0)),
(!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
To.RC:$src1, addr:$src2,
(ImmXForm timm:$src3))>;
- def : Pat<(To.VT (vselect To.KRCWM:$mask,
- (bitconvert
- (From.VT (OpNode From.RC:$src1,
- (From.LdFrag addr:$src2),
- timm:$src3))),
- To.ImmAllZerosV)),
+ def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
+ (bitconvert
+ (From.VT (OpNode From.RC:$src1,
+ (From.LdFrag addr:$src2),
+ timm:$src3))),
+ To.ImmAllZerosV)),
(!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
To.RC:$src1, addr:$src2,
(ImmXForm timm:$src3))>;
@@ -10491,24 +10587,24 @@ multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
(!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
(ImmXForm timm:$src3))>;
- def : Pat<(To.VT (vselect To.KRCWM:$mask,
- (bitconvert
- (From.VT (OpNode From.RC:$src1,
- (bitconvert
- (To.VT (To.BroadcastLdFrag addr:$src2))),
- timm:$src3))),
- To.RC:$src0)),
+ def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
+ (bitconvert
+ (From.VT (OpNode From.RC:$src1,
+ (bitconvert
+ (To.VT (To.BroadcastLdFrag addr:$src2))),
+ timm:$src3))),
+ To.RC:$src0)),
(!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
To.RC:$src1, addr:$src2,
(ImmXForm timm:$src3))>;
- def : Pat<(To.VT (vselect To.KRCWM:$mask,
- (bitconvert
- (From.VT (OpNode From.RC:$src1,
- (bitconvert
- (To.VT (To.BroadcastLdFrag addr:$src2))),
- timm:$src3))),
- To.ImmAllZerosV)),
+ def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
+ (bitconvert
+ (From.VT (OpNode From.RC:$src1,
+ (bitconvert
+ (To.VT (To.BroadcastLdFrag addr:$src2))),
+ timm:$src3))),
+ To.ImmAllZerosV)),
(!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
To.RC:$src1, addr:$src2,
(ImmXForm timm:$src3))>;
@@ -10567,8 +10663,8 @@ multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src1), OpcodeStr,
- "${src1}"##_.BroadcastStr,
- "${src1}"##_.BroadcastStr,
+ "${src1}"#_.BroadcastStr,
+ "${src1}"#_.BroadcastStr,
(_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>,
EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded]>;
@@ -10751,32 +10847,14 @@ defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>
let Predicates = [HasVLX] in {
def : Pat<(v2f64 (X86VBroadcast f64:$src)),
(VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
-def : Pat<(v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
- (VMOVDDUPZ128rm addr:$src)>;
-def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))),
- (VMOVDDUPZ128rm addr:$src)>;
-def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
- (v2f64 VR128X:$src0)),
+def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
+ (v2f64 VR128X:$src0)),
(VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
(v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
-def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
- immAllZerosV),
+def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
+ immAllZerosV),
(VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
-
-def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcastld64 addr:$src)),
- (v2f64 VR128X:$src0)),
- (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
-def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcastld64 addr:$src)),
- immAllZerosV),
- (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
-
-def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
- (v2f64 VR128X:$src0)),
- (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
-def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
- immAllZerosV),
- (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
}
//===----------------------------------------------------------------------===//
@@ -10784,9 +10862,9 @@ def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load
//===----------------------------------------------------------------------===//
let Uses = []<Register>, mayRaiseFPException = 0 in {
-defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
+defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512,
SchedWriteFShuffleSizes, 0, 1>;
-defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
+defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512,
SchedWriteFShuffleSizes>;
}
@@ -10945,16 +11023,15 @@ defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD,
// AVX-512 - Byte shift Left/Right
//===----------------------------------------------------------------------===//
-// FIXME: The SSE/AVX names are PSLLDQri etc. - should we add the i here as well?
multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
Format MRMm, string OpcodeStr,
X86FoldableSchedWrite sched, X86VectorVTInfo _>{
- def rr : AVX512<opc, MRMr,
+ def ri : AVX512<opc, MRMr,
(outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>,
Sched<[sched]>;
- def rm : AVX512<opc, MRMm,
+ def mi : AVX512<opc, MRMm,
(outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.RC:$dst,(_.VT (OpNode
@@ -11106,8 +11183,8 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
- OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
- "$src2, ${src3}"##_.BroadcastStr##", $src4",
+ OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2",
+ "$src2, ${src3}"#_.BroadcastStr#", $src4",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(_.VT (_.BroadcastLdFrag addr:$src3)),
@@ -11117,12 +11194,12 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
}// Constraints = "$src1 = $dst"
// Additional patterns for matching passthru operand in other positions.
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
@@ -11141,13 +11218,13 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
// Additional patterns for matching zero masking with loads in other
// positions.
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(OpNode (bitconvert (_.LdFrag addr:$src3)),
_.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
_.ImmAllZerosV)),
(!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
_.RC:$src2, (i8 timm:$src4)),
_.ImmAllZerosV)),
@@ -11156,31 +11233,31 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
// Additional patterns for matching masked loads with different
// operand orders.
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
_.RC:$src2, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(OpNode (bitconvert (_.LdFrag addr:$src3)),
_.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(OpNode _.RC:$src2, _.RC:$src1,
(bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
_.RC:$src1, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(OpNode (bitconvert (_.LdFrag addr:$src3)),
_.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
_.RC:$src1)),
@@ -11200,14 +11277,14 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
// Additional patterns for matching zero masking with broadcasts in other
// positions.
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(OpNode (_.BroadcastLdFrag addr:$src3),
_.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
_.ImmAllZerosV)),
(!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
_.KRCWM:$mask, _.RC:$src2, addr:$src3,
(VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(OpNode _.RC:$src1,
(_.BroadcastLdFrag addr:$src3),
_.RC:$src2, (i8 timm:$src4)),
@@ -11218,32 +11295,32 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
// Additional patterns for matching masked broadcasts with different
// operand orders.
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3),
_.RC:$src2, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(OpNode (_.BroadcastLdFrag addr:$src3),
_.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(OpNode _.RC:$src2, _.RC:$src1,
(_.BroadcastLdFrag addr:$src3),
(i8 timm:$src4)), _.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(OpNode _.RC:$src2,
(_.BroadcastLdFrag addr:$src3),
_.RC:$src1, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(OpNode (_.BroadcastLdFrag addr:$src3),
_.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
_.RC:$src1)),
@@ -11288,6 +11365,36 @@ let Predicates = [HasVLX] in {
(VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
+ def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
+ (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v16i8 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
+ VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v16i8 (X86vpternlog VR128X:$src1,
+ (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
+ VR128X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
+ (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v16i8 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
+ VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v16i8 (X86vpternlog VR128X:$src1,
+ (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
+ VR128X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
(i8 timm:$src4))),
(VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
@@ -11305,6 +11412,66 @@ let Predicates = [HasVLX] in {
(VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
+ def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
+ (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v8i16 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
+ VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v8i16 (X86vpternlog VR128X:$src1,
+ (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
+ VR128X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
+ (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v8i16 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
+ VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v8i16 (X86vpternlog VR128X:$src1,
+ (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
+ VR128X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v4i32 (X86vpternlog VR128X:$src1, VR128X:$src2,
+ (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v4i32 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
+ VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v4i32 (X86vpternlog VR128X:$src1,
+ (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
+ VR128X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v2i64 (X86vpternlog VR128X:$src1, VR128X:$src2,
+ (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v2i64 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
+ VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v2i64 (X86vpternlog VR128X:$src1,
+ (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
+ VR128X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
(i8 timm:$src4))),
(VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
@@ -11322,6 +11489,36 @@ let Predicates = [HasVLX] in {
(VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
+ def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
+ (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v32i8 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
+ VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v32i8 (X86vpternlog VR256X:$src1,
+ (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
+ VR256X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
+ (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v32i8 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
+ VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v32i8 (X86vpternlog VR256X:$src1,
+ (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
+ VR256X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
(i8 timm:$src4))),
(VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
@@ -11338,6 +11535,66 @@ let Predicates = [HasVLX] in {
VR256X:$src2, (i8 timm:$src4))),
(VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
+ (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v16i16 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
+ VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v16i16 (X86vpternlog VR256X:$src1,
+ (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
+ VR256X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
+ (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v16i16 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
+ VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v16i16 (X86vpternlog VR256X:$src1,
+ (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
+ VR256X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v8i32 (X86vpternlog VR256X:$src1, VR256X:$src2,
+ (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v8i32 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
+ VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v8i32 (X86vpternlog VR256X:$src1,
+ (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
+ VR256X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v4i64 (X86vpternlog VR256X:$src1, VR256X:$src2,
+ (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v4i64 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
+ VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v4i64 (X86vpternlog VR256X:$src1,
+ (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
+ VR256X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
}
let Predicates = [HasAVX512] in {
@@ -11358,6 +11615,36 @@ let Predicates = [HasAVX512] in {
(VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
+ def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
+ (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v64i8 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+ VR512:$src2, VR512:$src1, (i8 timm:$src4))),
+ (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v64i8 (X86vpternlog VR512:$src1,
+ (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+ VR512:$src2, (i8 timm:$src4))),
+ (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
+ (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v64i8 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
+ VR512:$src2, VR512:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v64i8 (X86vpternlog VR512:$src1,
+ (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
+ VR512:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
(i8 timm:$src4))),
(VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
@@ -11371,9 +11658,84 @@ let Predicates = [HasAVX512] in {
(VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
(VPTERNLOG321_imm8 timm:$src4))>;
def : Pat<(v32i16 (X86vpternlog VR512:$src1, (loadv32i16 addr:$src3),
- VR512:$src2, (i8 timm:$src4))),
+ VR512:$src2, (i8 timm:$src4))),
(VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
+ (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v32i16 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+ VR512:$src2, VR512:$src1, (i8 timm:$src4))),
+ (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v32i16 (X86vpternlog VR512:$src1,
+ (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+ VR512:$src2, (i8 timm:$src4))),
+ (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
+ (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v32i16 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
+ VR512:$src2, VR512:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v32i16 (X86vpternlog VR512:$src1,
+ (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
+ VR512:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
+ (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v32i16 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+ VR512:$src2, VR512:$src1, (i8 timm:$src4))),
+ (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v32i16 (X86vpternlog VR512:$src1,
+ (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+ VR512:$src2, (i8 timm:$src4))),
+ (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v16i32 (X86vpternlog VR512:$src1, VR512:$src2,
+ (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v16i32 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
+ VR512:$src2, VR512:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v16i32 (X86vpternlog VR512:$src1,
+ (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
+ VR512:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v8i64 (X86vpternlog VR512:$src1, VR512:$src2,
+ (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v8i64 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+ VR512:$src2, VR512:$src1, (i8 timm:$src4))),
+ (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v8i64 (X86vpternlog VR512:$src1,
+ (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+ VR512:$src2, (i8 timm:$src4))),
+ (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
}
// Patterns to implement vnot using vpternlog instead of creating all ones
@@ -11484,14 +11846,14 @@ multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
Uses = [MXCSR], mayRaiseFPException = 1 in {
defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
- OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
+ OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
(X86VFixupimm (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(TblVT.VT _.RC:$src3),
(i32 timm:$src4))>, Sched<[sched]>;
defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
- OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
+ OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
(X86VFixupimm (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
@@ -11499,8 +11861,8 @@ multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
- OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
- "$src2, ${src3}"##_.BroadcastStr##", $src4",
+ OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2",
+ "$src2, ${src3}"#_.BroadcastStr#", $src4",
(X86VFixupimm (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)),
@@ -11516,7 +11878,7 @@ multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
- OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
+ OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
"$src2, $src3, {sae}, $src4",
(X86VFixupimmSAE (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
@@ -11533,7 +11895,7 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
ExeDomain = _.ExeDomain in {
defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
- OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
+ OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
(X86VFixupimms (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(_src3VT.VT _src3VT.RC:$src3),
@@ -11541,7 +11903,7 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
let Uses = [MXCSR] in
defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
- OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
+ OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
"$src2, $src3, {sae}, $src4",
(X86VFixupimmSAEs (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
@@ -11550,7 +11912,7 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
- OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
+ OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
(X86VFixupimms (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(_src3VT.VT (scalar_to_vector
@@ -11630,8 +11992,9 @@ defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
// TODO: Some canonicalization in lowering would simplify the number of
// patterns we have to try to match.
-multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode MoveNode,
- X86VectorVTInfo _, PatLeaf ZeroFP> {
+multiclass AVX512_scalar_math_fp_patterns<SDNode Op, SDNode MaskedOp,
+ string OpcPrefix, SDNode MoveNode,
+ X86VectorVTInfo _, PatLeaf ZeroFP> {
let Predicates = [HasAVX512] in {
// extracted scalar math op with insert via movss
def : Pat<(MoveNode
@@ -11639,79 +12002,79 @@ multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode Mo
(_.VT (scalar_to_vector
(Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
_.FRC:$src)))),
- (!cast<Instruction>("V"#OpcPrefix#Zrr_Int) _.VT:$dst,
+ (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst,
(_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
def : Pat<(MoveNode
(_.VT VR128X:$dst),
(_.VT (scalar_to_vector
(Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
(_.ScalarLdFrag addr:$src))))),
- (!cast<Instruction>("V"#OpcPrefix#Zrm_Int) _.VT:$dst, addr:$src)>;
+ (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>;
// extracted masked scalar math op with insert via movss
def : Pat<(MoveNode (_.VT VR128X:$src1),
(scalar_to_vector
- (X86selects VK1WM:$mask,
- (Op (_.EltVT
- (extractelt (_.VT VR128X:$src1), (iPTR 0))),
- _.FRC:$src2),
+ (X86selects_mask VK1WM:$mask,
+ (MaskedOp (_.EltVT
+ (extractelt (_.VT VR128X:$src1), (iPTR 0))),
+ _.FRC:$src2),
_.FRC:$src0))),
- (!cast<Instruction>("V"#OpcPrefix#Zrr_Intk)
+ (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk")
(_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
VK1WM:$mask, _.VT:$src1,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
def : Pat<(MoveNode (_.VT VR128X:$src1),
(scalar_to_vector
- (X86selects VK1WM:$mask,
- (Op (_.EltVT
- (extractelt (_.VT VR128X:$src1), (iPTR 0))),
- (_.ScalarLdFrag addr:$src2)),
+ (X86selects_mask VK1WM:$mask,
+ (MaskedOp (_.EltVT
+ (extractelt (_.VT VR128X:$src1), (iPTR 0))),
+ (_.ScalarLdFrag addr:$src2)),
_.FRC:$src0))),
- (!cast<Instruction>("V"#OpcPrefix#Zrm_Intk)
+ (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk")
(_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
VK1WM:$mask, _.VT:$src1, addr:$src2)>;
// extracted masked scalar math op with insert via movss
def : Pat<(MoveNode (_.VT VR128X:$src1),
(scalar_to_vector
- (X86selects VK1WM:$mask,
- (Op (_.EltVT
- (extractelt (_.VT VR128X:$src1), (iPTR 0))),
- _.FRC:$src2), (_.EltVT ZeroFP)))),
- (!cast<I>("V"#OpcPrefix#Zrr_Intkz)
+ (X86selects_mask VK1WM:$mask,
+ (MaskedOp (_.EltVT
+ (extractelt (_.VT VR128X:$src1), (iPTR 0))),
+ _.FRC:$src2), (_.EltVT ZeroFP)))),
+ (!cast<I>("V"#OpcPrefix#"Zrr_Intkz")
VK1WM:$mask, _.VT:$src1,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
def : Pat<(MoveNode (_.VT VR128X:$src1),
(scalar_to_vector
- (X86selects VK1WM:$mask,
- (Op (_.EltVT
- (extractelt (_.VT VR128X:$src1), (iPTR 0))),
- (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
- (!cast<I>("V"#OpcPrefix#Zrm_Intkz) VK1WM:$mask, _.VT:$src1, addr:$src2)>;
+ (X86selects_mask VK1WM:$mask,
+ (MaskedOp (_.EltVT
+ (extractelt (_.VT VR128X:$src1), (iPTR 0))),
+ (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
+ (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>;
}
}
-defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
-defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
-defm : AVX512_scalar_math_fp_patterns<fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
-defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
+defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
+defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
+defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
+defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
-defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
-defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
-defm : AVX512_scalar_math_fp_patterns<fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
-defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
+defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
+defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
+defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
+defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix,
SDNode Move, X86VectorVTInfo _> {
let Predicates = [HasAVX512] in {
def : Pat<(_.VT (Move _.VT:$dst,
(scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
- (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src)>;
+ (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>;
}
}
-defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
-defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
+defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
+defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
//===----------------------------------------------------------------------===//
// AES instructions
@@ -11724,13 +12087,13 @@ multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
loadv2i64, 0, VR128X, i128mem>,
EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
defm Z256 : AESI_binop_rm_int<Op, OpStr,
- !cast<Intrinsic>(IntPrefix##"_256"),
+ !cast<Intrinsic>(IntPrefix#"_256"),
loadv4i64, 0, VR256X, i256mem>,
EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
}
let Predicates = [HasAVX512, HasVAES] in
defm Z : AESI_binop_rm_int<Op, OpStr,
- !cast<Intrinsic>(IntPrefix##"_512"),
+ !cast<Intrinsic>(IntPrefix#"_512"),
loadv8i64, 0, VR512, i512mem>,
EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
}
@@ -11792,8 +12155,8 @@ multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
ExeDomain = VTI.ExeDomain in
defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
- "${src3}"##VTI.BroadcastStr##", $src2",
- "$src2, ${src3}"##VTI.BroadcastStr,
+ "${src3}"#VTI.BroadcastStr#", $src2",
+ "$src2, ${src3}"#VTI.BroadcastStr,
(OpNode VTI.RC:$src1, VTI.RC:$src2,
(VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
AVX512FMA3Base, EVEX_B,
@@ -11827,22 +12190,22 @@ multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
}
multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
SDNode OpNode, X86SchedWriteWidths sched> {
- defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, sched,
+ defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched,
avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
- defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, sched,
+ defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched,
avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
- defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, sched,
+ defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched,
avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
}
multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
SDNode OpNode, X86SchedWriteWidths sched> {
- defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", sched,
+ defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched,
avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
VEX_W, EVEX_CD8<16, CD8VF>;
- defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp,
+ defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp,
OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
- defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode,
+ defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode,
sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
}
@@ -11890,8 +12253,8 @@ multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
- OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
- "$src2, ${src3}"##VTI.BroadcastStr,
+ OpStr, "${src3}"#VTI.BroadcastStr#", $src2",
+ "$src2, ${src3}"#VTI.BroadcastStr,
(OpNode VTI.RC:$src1, VTI.RC:$src2,
(VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
@@ -12027,8 +12390,8 @@ multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
let ExeDomain = VTI.ExeDomain in
defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
- OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1",
- "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3",
+ OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1",
+ "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3",
(OpNode (VTI.VT VTI.RC:$src1),
(bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))),
(i8 timm:$src3))>, EVEX_B,
@@ -12184,41 +12547,44 @@ multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
}
}
+let ExeDomain = SSEPackedSingle in
defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
- SchedWriteCvtPD2PS, //FIXME: Shoulod be SchedWriteCvtPS2BF
+ SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF
avx512vl_f32_info, avx512vl_i16_info,
X86cvtne2ps2bf16, HasBF16, 0>, T8XD;
// Truncate Float to BFloat16
multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
X86SchedWriteWidths sched> {
+ let ExeDomain = SSEPackedSingle in {
let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info,
- X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
+ X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
}
let Predicates = [HasBF16, HasVLX] in {
let Uses = []<Register>, mayRaiseFPException = 0 in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info,
- null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
+ null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
VK4WM>, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info,
- X86cvtneps2bf16,
+ X86cvtneps2bf16, X86cvtneps2bf16,
sched.YMM, "{1to8}", "{y}">, EVEX_V256;
}
+ } // Predicates = [HasBF16, HasVLX]
+ } // ExeDomain = SSEPackedSingle
- def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
- (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
- VR128X:$src), 0>;
- def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
- (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
- f128mem:$src), 0, "intel">;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
- (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
- VR256X:$src), 0>;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
- (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
- f256mem:$src), 0, "intel">;
- }
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
+ VR128X:$src), 0>;
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
+ f128mem:$src), 0, "intel">;
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
+ VR256X:$src), 0>;
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
+ f256mem:$src), 0, "intel">;
}
defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
@@ -12262,25 +12628,24 @@ multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched,
X86VectorVTInfo _, X86VectorVTInfo src_v> {
defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src2, _.RC:$src3),
+ (ins src_v.RC:$src2, src_v.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
+ (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>,
EVEX_4V, Sched<[sched]>;
defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src2, _.MemOp:$src3),
+ (ins src_v.RC:$src2, src_v.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (_.VT (OpNode _.RC:$src1, _.RC:$src2,
- (src_v.VT (bitconvert
- (src_v.LdFrag addr:$src3)))))>, EVEX_4V,
+ (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
+ (src_v.LdFrag addr:$src3)))>, EVEX_4V,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src2, _.ScalarMemOp:$src3),
+ (ins src_v.RC:$src2, src_v.ScalarMemOp:$src3),
OpcodeStr,
!strconcat("${src3}", _.BroadcastStr,", $src2"),
!strconcat("$src2, ${src3}", _.BroadcastStr),
- (_.VT (OpNode _.RC:$src1, _.RC:$src2,
+ (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
(src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>,
EVEX_B, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
@@ -12302,6 +12667,7 @@ multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
}
+let ExeDomain = SSEPackedSingle in
defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA,
avx512vl_f32_info, avx512vl_i32_info,
HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;