diff options
Diffstat (limited to 'lib/Target/Hexagon/HexagonPatterns.td')
-rw-r--r-- | lib/Target/Hexagon/HexagonPatterns.td | 799 |
1 files changed, 375 insertions, 424 deletions
diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td index cdc2085986a5..384fda4ce39a 100644 --- a/lib/Target/Hexagon/HexagonPatterns.td +++ b/lib/Target/Hexagon/HexagonPatterns.td @@ -100,6 +100,17 @@ def HWI8: PatLeaf<(VecPI8 HvxWR:$R)>; def HWI16: PatLeaf<(VecPI16 HvxWR:$R)>; def HWI32: PatLeaf<(VecPI32 HvxWR:$R)>; +def SDTVecVecIntOp: + SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>, + SDTCisVT<3,i32>]>; + +def HexagonVALIGN: SDNode<"HexagonISD::VALIGN", SDTVecVecIntOp>; +def HexagonVALIGNADDR: SDNode<"HexagonISD::VALIGNADDR", SDTIntUnaryOp>; + +def valign: PatFrag<(ops node:$Vt, node:$Vs, node:$Ru), + (HexagonVALIGN node:$Vt, node:$Vs, node:$Ru)>; +def valignaddr: PatFrag<(ops node:$Addr), (HexagonVALIGNADDR node:$Addr)>; + // Pattern fragments to extract the low and high subregisters from a // 64-bit value. def LoReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_lo)>; @@ -109,16 +120,6 @@ def IsOrAdd: PatFrag<(ops node:$A, node:$B), (or node:$A, node:$B), [{ return isOrEquivalentToAdd(N); }]>; -def IsVecOff : PatLeaf<(i32 imm), [{ - int32_t V = N->getSExtValue(); - int32_t VecSize = HRI->getSpillSize(Hexagon::HvxVRRegClass); - assert(isPowerOf2_32(VecSize)); - if ((uint32_t(V) & (uint32_t(VecSize)-1)) != 0) - return false; - int32_t L = Log2_32(VecSize); - return isInt<4>(V >> L); -}]>; - def IsPow2_32: PatLeaf<(i32 imm), [{ uint32_t V = N->getZExtValue(); return isPowerOf2_32(V); @@ -214,7 +215,7 @@ def NegImm32: SDNodeXForm<imm, [{ // Helpers for type promotions/contractions. def I1toI32: OutPatFrag<(ops node:$Rs), (C2_muxii (i1 $Rs), 1, 0)>; -def I32toI1: OutPatFrag<(ops node:$Rs), (i1 (C2_tfrrp (i32 $Rs)))>; +def I32toI1: OutPatFrag<(ops node:$Rs), (i1 (C2_cmpgtui (i32 $Rs), (i32 0)))>; def ToZext64: OutPatFrag<(ops node:$Rs), (i64 (A4_combineir 0, (i32 $Rs)))>; def ToSext64: OutPatFrag<(ops node:$Rs), (i64 (A2_sxtw (i32 $Rs)))>; @@ -249,23 +250,6 @@ def: Pat<(IsOrAdd (i32 AddrFI:$Rs), s32_0ImmPred:$off), (PS_fi (i32 AddrFI:$Rs), imm:$off)>; -def alignedload: PatFrag<(ops node:$a), (load $a), [{ - return isAlignedMemNode(dyn_cast<MemSDNode>(N)); -}]>; - -def unalignedload: PatFrag<(ops node:$a), (load $a), [{ - return !isAlignedMemNode(dyn_cast<MemSDNode>(N)); -}]>; - -def alignedstore: PatFrag<(ops node:$v, node:$a), (store $v, $a), [{ - return isAlignedMemNode(dyn_cast<MemSDNode>(N)); -}]>; - -def unalignedstore: PatFrag<(ops node:$v, node:$a), (store $v, $a), [{ - return !isAlignedMemNode(dyn_cast<MemSDNode>(N)); -}]>; - - // Converters from unary/binary SDNode to PatFrag. class pf1<SDNode Op> : PatFrag<(ops node:$a), (Op node:$a)>; class pf2<SDNode Op> : PatFrag<(ops node:$a, node:$b), (Op node:$a, node:$b)>; @@ -274,7 +258,7 @@ class Not2<PatFrag P> : PatFrag<(ops node:$A, node:$B), (P node:$A, (not node:$B))>; class Su<PatFrag Op> - : PatFrag<Op.Operands, Op.Fragment, [{ return hasOneUse(N); }], + : PatFrag<Op.Operands, !head(Op.Fragments), [{ return hasOneUse(N); }], Op.OperandTransform>; // Main selection macros. @@ -298,9 +282,9 @@ class AccRRI_pat<InstHexagon MI, PatFrag AccOp, PatFrag Op, (MI RegPred:$Rx, RegPred:$Rs, imm:$I)>; class AccRRR_pat<InstHexagon MI, PatFrag AccOp, PatFrag Op, - PatFrag RsPred, PatFrag RtPred> - : Pat<(AccOp RsPred:$Rx, (Op RsPred:$Rs, RtPred:$Rt)), - (MI RsPred:$Rx, RsPred:$Rs, RtPred:$Rt)>; + PatFrag RxPred, PatFrag RsPred, PatFrag RtPred> + : Pat<(AccOp RxPred:$Rx, (Op RsPred:$Rs, RtPred:$Rt)), + (MI RxPred:$Rx, RsPred:$Rs, RtPred:$Rt)>; multiclass SelMinMax_pats<PatFrag CmpOp, PatFrag Val, InstHexagon InstA, InstHexagon InstB> { @@ -316,6 +300,7 @@ def Add: pf2<add>; def And: pf2<and>; def Sra: pf2<sra>; def Sub: pf2<sub>; def Or: pf2<or>; def Srl: pf2<srl>; def Mul: pf2<mul>; def Xor: pf2<xor>; def Shl: pf2<shl>; +def Rol: pf2<rotl>; // --(1) Immediate ------------------------------------------------------- // @@ -363,7 +348,7 @@ def ToI32: OutPatFrag<(ops node:$V), (A2_tfrsi $V)>; // --(2) Type cast ------------------------------------------------------- // -let Predicates = [HasV5T] in { +let Predicates = [HasV5] in { def: OpR_R_pat<F2_conv_sf2df, pf1<fpextend>, f64, F32>; def: OpR_R_pat<F2_conv_df2sf, pf1<fpround>, f32, F64>; @@ -389,7 +374,7 @@ let Predicates = [HasV5T] in { } // Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp]. -let Predicates = [HasV5T] in { +let Predicates = [HasV5] in { def: Pat<(i32 (bitconvert F32:$v)), (I32:$v)>; def: Pat<(f32 (bitconvert I32:$v)), (F32:$v)>; def: Pat<(i64 (bitconvert F64:$v)), (I64:$v)>; @@ -422,9 +407,14 @@ def: Pat<(i64 (sext I1:$Pu)), (Combinew (C2_muxii PredRegs:$Pu, -1, 0), (C2_muxii PredRegs:$Pu, -1, 0))>; -def: Pat<(i32 (sext I1:$Pu)), (C2_muxii I1:$Pu, -1, 0)>; -def: Pat<(i32 (zext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>; -def: Pat<(i64 (zext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>; +def: Pat<(i32 (sext I1:$Pu)), (C2_muxii I1:$Pu, -1, 0)>; +def: Pat<(i32 (zext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>; +def: Pat<(i64 (zext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>; +def: Pat<(v2i16 (sext V2I1:$Pu)), (S2_vtrunehb (C2_mask V2I1:$Pu))>; +def: Pat<(v2i32 (sext V2I1:$Pu)), (C2_mask V2I1:$Pu)>; +def: Pat<(v4i8 (sext V4I1:$Pu)), (S2_vtrunehb (C2_mask V4I1:$Pu))>; +def: Pat<(v4i16 (sext V4I1:$Pu)), (C2_mask V4I1:$Pu)>; +def: Pat<(v8i8 (sext V8I1:$Pu)), (C2_mask V8I1:$Pu)>; def: Pat<(i64 (sext I32:$Rs)), (A2_sxtw I32:$Rs)>; def: Pat<(Zext64 I32:$Rs), (ToZext64 $Rs)>; @@ -441,6 +431,20 @@ let AddedComplexity = 20 in { def: Pat<(i32 (anyext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>; def: Pat<(i64 (anyext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>; +def Vsplatpi: OutPatFrag<(ops node:$V), + (Combinew (A2_tfrsi $V), (A2_tfrsi $V))>; +def: Pat<(v8i8 (zext V8I1:$Pu)), + (A2_andp (C2_mask V8I1:$Pu), (Vsplatpi (i32 0x01010101)))>; +def: Pat<(v4i16 (zext V4I1:$Pu)), + (A2_andp (C2_mask V4I1:$Pu), (Vsplatpi (i32 0x00010001)))>; +def: Pat<(v2i32 (zext V2I1:$Pu)), + (A2_andp (C2_mask V2I1:$Pu), (A2_combineii (i32 1), (i32 1)))>; + +def: Pat<(v4i8 (zext V4I1:$Pu)), + (A2_andir (LoReg (C2_mask V4I1:$Pu)), (i32 0x01010101))>; +def: Pat<(v2i16 (zext V2I1:$Pu)), + (A2_andir (LoReg (C2_mask V2I1:$Pu)), (i32 0x00010001))>; + def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; @@ -475,25 +479,40 @@ def: Pat<(v2i16 (trunc V2I32:$Rs)), // def: Pat<(not I1:$Ps), (C2_not I1:$Ps)>; +def: Pat<(not V8I1:$Ps), (C2_not V8I1:$Ps)>; def: Pat<(add I1:$Ps, -1), (C2_not I1:$Ps)>; -def: OpR_RR_pat<C2_and, And, i1, I1>; -def: OpR_RR_pat<C2_or, Or, i1, I1>; -def: OpR_RR_pat<C2_xor, Xor, i1, I1>; -def: OpR_RR_pat<C2_andn, Not2<And>, i1, I1>; -def: OpR_RR_pat<C2_orn, Not2<Or>, i1, I1>; +multiclass BoolOpR_RR_pat<InstHexagon MI, PatFrag Op> { + def: OpR_RR_pat<MI, Op, i1, I1>; + def: OpR_RR_pat<MI, Op, v2i1, V2I1>; + def: OpR_RR_pat<MI, Op, v4i1, V4I1>; + def: OpR_RR_pat<MI, Op, v8i1, V8I1>; +} + +multiclass BoolAccRRR_pat<InstHexagon MI, PatFrag AccOp, PatFrag Op> { + def: AccRRR_pat<MI, AccOp, Op, I1, I1, I1>; + def: AccRRR_pat<MI, AccOp, Op, V2I1, V2I1, V2I1>; + def: AccRRR_pat<MI, AccOp, Op, V4I1, V4I1, V4I1>; + def: AccRRR_pat<MI, AccOp, Op, V8I1, V8I1, V8I1>; +} + +defm: BoolOpR_RR_pat<C2_and, And>; +defm: BoolOpR_RR_pat<C2_or, Or>; +defm: BoolOpR_RR_pat<C2_xor, Xor>; +defm: BoolOpR_RR_pat<C2_andn, Not2<And>>; +defm: BoolOpR_RR_pat<C2_orn, Not2<Or>>; // op(Ps, op(Pt, Pu)) -def: AccRRR_pat<C4_and_and, And, Su<And>, I1, I1>; -def: AccRRR_pat<C4_and_or, And, Su<Or>, I1, I1>; -def: AccRRR_pat<C4_or_and, Or, Su<And>, I1, I1>; -def: AccRRR_pat<C4_or_or, Or, Su<Or>, I1, I1>; +defm: BoolAccRRR_pat<C4_and_and, And, Su<And>>; +defm: BoolAccRRR_pat<C4_and_or, And, Su<Or>>; +defm: BoolAccRRR_pat<C4_or_and, Or, Su<And>>; +defm: BoolAccRRR_pat<C4_or_or, Or, Su<Or>>; // op(Ps, op(Pt, ~Pu)) -def: AccRRR_pat<C4_and_andn, And, Su<Not2<And>>, I1, I1>; -def: AccRRR_pat<C4_and_orn, And, Su<Not2<Or>>, I1, I1>; -def: AccRRR_pat<C4_or_andn, Or, Su<Not2<And>>, I1, I1>; -def: AccRRR_pat<C4_or_orn, Or, Su<Not2<Or>>, I1, I1>; +defm: BoolAccRRR_pat<C4_and_andn, And, Su<Not2<And>>>; +defm: BoolAccRRR_pat<C4_and_orn, And, Su<Not2<Or>>>; +defm: BoolAccRRR_pat<C4_or_andn, Or, Su<Not2<And>>>; +defm: BoolAccRRR_pat<C4_or_orn, Or, Su<Not2<Or>>>; // --(5) Compare --------------------------------------------------------- @@ -519,7 +538,7 @@ def: Pat<(i1 (setult I32:$Rs, u32_0ImmPred:$u9)), // Patfrag to convert the usual comparison patfrags (e.g. setlt) to ones // that reverse the order of the operands. class RevCmp<PatFrag F> - : PatFrag<(ops node:$rhs, node:$lhs), F.Fragment, F.PredicateCode, + : PatFrag<(ops node:$rhs, node:$lhs), !head(F.Fragments), F.PredicateCode, F.OperandTransform>; def: OpR_RR_pat<C2_cmpeq, seteq, i1, I32>; @@ -563,7 +582,7 @@ def: OpR_RR_pat<A2_vcmpwgtu, RevCmp<setult>, v2i1, V2I32>; def: OpR_RR_pat<A2_vcmpwgtu, setugt, i1, V2I32>; def: OpR_RR_pat<A2_vcmpwgtu, setugt, v2i1, V2I32>; -let Predicates = [HasV5T] in { +let Predicates = [HasV5] in { def: OpR_RR_pat<F2_sfcmpeq, seteq, i1, F32>; def: OpR_RR_pat<F2_sfcmpgt, setgt, i1, F32>; def: OpR_RR_pat<F2_sfcmpge, setge, i1, F32>; @@ -598,27 +617,40 @@ def: Pat<(i1 (setle I32:$Rs, anyimm:$u5)), def: Pat<(i1 (setule I32:$Rs, anyimm:$u5)), (C2_not (C2_cmpgtui I32:$Rs, imm:$u5))>; -def: Pat<(i1 (setne I32:$Rs, I32:$Rt)), - (C2_not (C2_cmpeq I32:$Rs, I32:$Rt))>; -def: Pat<(i1 (setle I32:$Rs, I32:$Rt)), - (C2_not (C2_cmpgt I32:$Rs, I32:$Rt))>; -def: Pat<(i1 (setule I32:$Rs, I32:$Rt)), - (C2_not (C2_cmpgtu I32:$Rs, I32:$Rt))>; -def: Pat<(i1 (setge I32:$Rs, I32:$Rt)), - (C2_not (C2_cmpgt I32:$Rt, I32:$Rs))>; -def: Pat<(i1 (setuge I32:$Rs, I32:$Rt)), - (C2_not (C2_cmpgtu I32:$Rt, I32:$Rs))>; - -def: Pat<(i1 (setle I64:$Rs, I64:$Rt)), - (C2_not (C2_cmpgtp I64:$Rs, I64:$Rt))>; -def: Pat<(i1 (setne I64:$Rs, I64:$Rt)), - (C2_not (C2_cmpeqp I64:$Rs, I64:$Rt))>; -def: Pat<(i1 (setge I64:$Rs, I64:$Rt)), - (C2_not (C2_cmpgtp I64:$Rt, I64:$Rs))>; -def: Pat<(i1 (setuge I64:$Rs, I64:$Rt)), - (C2_not (C2_cmpgtup I64:$Rt, I64:$Rs))>; -def: Pat<(i1 (setule I64:$Rs, I64:$Rt)), - (C2_not (C2_cmpgtup I64:$Rs, I64:$Rt))>; +class OpmR_RR_pat<PatFrag Output, PatFrag Op, ValueType ResType, + PatFrag RsPred, PatFrag RtPred = RsPred> + : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)), + (Output RsPred:$Rs, RtPred:$Rt)>; + +class Outn<InstHexagon MI> + : OutPatFrag<(ops node:$Rs, node:$Rt), + (C2_not (MI $Rs, $Rt))>; + +def: OpmR_RR_pat<Outn<C2_cmpeq>, setne, i1, I32>; +def: OpmR_RR_pat<Outn<C2_cmpgt>, setle, i1, I32>; +def: OpmR_RR_pat<Outn<C2_cmpgtu>, setule, i1, I32>; +def: OpmR_RR_pat<Outn<C2_cmpgt>, RevCmp<setge>, i1, I32>; +def: OpmR_RR_pat<Outn<C2_cmpgtu>, RevCmp<setuge>, i1, I32>; +def: OpmR_RR_pat<Outn<C2_cmpeqp>, setne, i1, I64>; +def: OpmR_RR_pat<Outn<C2_cmpgtp>, setle, i1, I64>; +def: OpmR_RR_pat<Outn<C2_cmpgtup>, setule, i1, I64>; +def: OpmR_RR_pat<Outn<C2_cmpgtp>, RevCmp<setge>, i1, I64>; +def: OpmR_RR_pat<Outn<C2_cmpgtup>, RevCmp<setuge>, i1, I64>; +def: OpmR_RR_pat<Outn<A2_vcmpbeq>, setne, v8i1, V8I8>; +def: OpmR_RR_pat<Outn<A4_vcmpbgt>, setle, v8i1, V8I8>; +def: OpmR_RR_pat<Outn<A2_vcmpbgtu>, setule, v8i1, V8I8>; +def: OpmR_RR_pat<Outn<A4_vcmpbgt>, RevCmp<setge>, v8i1, V8I8>; +def: OpmR_RR_pat<Outn<A2_vcmpbgtu>, RevCmp<setuge>, v8i1, V8I8>; +def: OpmR_RR_pat<Outn<A2_vcmpheq>, setne, v4i1, V4I16>; +def: OpmR_RR_pat<Outn<A2_vcmphgt>, setle, v4i1, V4I16>; +def: OpmR_RR_pat<Outn<A2_vcmphgtu>, setule, v4i1, V4I16>; +def: OpmR_RR_pat<Outn<A2_vcmphgt>, RevCmp<setge>, v4i1, V4I16>; +def: OpmR_RR_pat<Outn<A2_vcmphgtu>, RevCmp<setuge>, v4i1, V4I16>; +def: OpmR_RR_pat<Outn<A2_vcmpweq>, setne, v2i1, V2I32>; +def: OpmR_RR_pat<Outn<A2_vcmpwgt>, setle, v2i1, V2I32>; +def: OpmR_RR_pat<Outn<A2_vcmpwgtu>, setule, v2i1, V2I32>; +def: OpmR_RR_pat<Outn<A2_vcmpwgt>, RevCmp<setge>, v2i1, V2I32>; +def: OpmR_RR_pat<Outn<A2_vcmpwgtu>, RevCmp<setuge>, v2i1, V2I32>; let AddedComplexity = 100 in { def: Pat<(i1 (seteq (and (xor I32:$Rs, I32:$Rt), 255), 0)), @@ -680,25 +712,10 @@ def: Pat<(i32 (zext (i1 (seteq I32:$Rs, anyimm:$s8)))), def: Pat<(i32 (zext (i1 (setne I32:$Rs, anyimm:$s8)))), (A4_rcmpneqi I32:$Rs, imm:$s8)>; -def: Pat<(i1 (setne I1:$Ps, I1:$Pt)), - (C2_xor I1:$Ps, I1:$Pt)>; - -def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)), - (A2_vcmpbeq (ToZext64 $Rs), (ToZext64 $Rt))>; -def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)), - (A4_vcmpbgt (ToZext64 $Rs), (ToZext64 $Rt))>; -def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)), - (A2_vcmpbgtu (ToZext64 $Rs), (ToZext64 $Rt))>; - -def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)), - (A2_vcmpheq (ToZext64 $Rs), (ToZext64 $Rt))>; -def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)), - (A2_vcmphgt (ToZext64 $Rs), (ToZext64 $Rt))>; -def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)), - (A2_vcmphgtu (ToZext64 $Rs), (ToZext64 $Rt))>; - -def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)), - (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>; +def: Pat<(i1 (seteq I1:$Ps, (i1 -1))), (I1:$Ps)>; +def: Pat<(i1 (setne I1:$Ps, (i1 -1))), (C2_not I1:$Ps)>; +def: Pat<(i1 (seteq I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, (C2_not I1:$Pt))>; +def: Pat<(i1 (setne I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, I1:$Pt)>; // Floating-point comparisons with checks for ordered/unordered status. @@ -706,18 +723,13 @@ class T3<InstHexagon MI1, InstHexagon MI2, InstHexagon MI3> : OutPatFrag<(ops node:$Rs, node:$Rt), (MI1 (MI2 $Rs, $Rt), (MI3 $Rs, $Rt))>; -class OpmR_RR_pat<PatFrag Output, PatFrag Op, ValueType ResType, - PatFrag RsPred, PatFrag RtPred = RsPred> - : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)), - (Output RsPred:$Rs, RtPred:$Rt)>; - class Cmpuf<InstHexagon MI>: T3<C2_or, F2_sfcmpuo, MI>; class Cmpud<InstHexagon MI>: T3<C2_or, F2_dfcmpuo, MI>; class Cmpufn<InstHexagon MI>: T3<C2_orn, F2_sfcmpuo, MI>; class Cmpudn<InstHexagon MI>: T3<C2_orn, F2_dfcmpuo, MI>; -let Predicates = [HasV5T] in { +let Predicates = [HasV5] in { def: OpmR_RR_pat<Cmpuf<F2_sfcmpeq>, setueq, i1, F32>; def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>, setuge, i1, F32>; def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>, setugt, i1, F32>; @@ -733,11 +745,7 @@ let Predicates = [HasV5T] in { def: OpmR_RR_pat<Cmpudn<F2_dfcmpeq>, setune, i1, F64>; } -class Outn<InstHexagon MI> - : OutPatFrag<(ops node:$Rs, node:$Rt), - (C2_not (MI $Rs, $Rt))>; - -let Predicates = [HasV5T] in { +let Predicates = [HasV5] in { def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setone, i1, F32>; def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setne, i1, F32>; @@ -776,7 +784,7 @@ def: Pat<(select I1:$Pu, I64:$Rs, I64:$Rt), (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)), (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>; -let Predicates = [HasV5T] in { +let Predicates = [HasV5] in { def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$I), (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>; def: Pat<(select I1:$Pu, f32ImmPred:$I, F32:$Rt), @@ -813,20 +821,6 @@ def: Pat<(vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt), def: Pat<(vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt), (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>; - -class HvxSel_pat<InstHexagon MI, PatFrag RegPred> - : Pat<(select I1:$Pu, RegPred:$Vs, RegPred:$Vt), - (MI I1:$Pu, RegPred:$Vs, RegPred:$Vt)>; - -let Predicates = [HasV60T,UseHVX] in { - def: HvxSel_pat<PS_vselect, HVI8>; - def: HvxSel_pat<PS_vselect, HVI16>; - def: HvxSel_pat<PS_vselect, HVI32>; - def: HvxSel_pat<PS_wselect, HWI8>; - def: HvxSel_pat<PS_wselect, HWI16>; - def: HvxSel_pat<PS_wselect, HWI32>; -} - // From LegalizeDAG.cpp: (Pu ? Pv : Pw) <=> (Pu & Pv) | (!Pu & Pw). def: Pat<(select I1:$Pu, I1:$Pv, I1:$Pw), (C2_or (C2_and I1:$Pu, I1:$Pv), @@ -878,7 +872,7 @@ let AddedComplexity = 200 in { defm: SelMinMax_pats<setult, I64, A2_minup, A2_maxup>; } -let AddedComplexity = 100, Predicates = [HasV5T] in { +let AddedComplexity = 100, Predicates = [HasV5] in { defm: SelMinMax_pats<setolt, F32, F2_sfmin, F2_sfmax>; defm: SelMinMax_pats<setole, F32, F2_sfmin, F2_sfmax>; defm: SelMinMax_pats<setogt, F32, F2_sfmax, F2_sfmin>; @@ -892,40 +886,34 @@ let AddedComplexity = 100, Predicates = [HasV5T] in { def SDTHexagonINSERT: SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>; -def SDTHexagonINSERTRP: - SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, - SDTCisInt<0>, SDTCisVT<3, i64>]>; - def HexagonINSERT: SDNode<"HexagonISD::INSERT", SDTHexagonINSERT>; -def HexagonINSERTRP: SDNode<"HexagonISD::INSERTRP", SDTHexagonINSERTRP>; -def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2), - (S2_insert I32:$Rs, I32:$Rt, imm:$u1, imm:$u2)>; -def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2), - (S2_insertp I64:$Rs, I64:$Rt, imm:$u1, imm:$u2)>; -def: Pat<(HexagonINSERTRP I32:$Rs, I32:$Rt, I64:$Ru), - (S2_insert_rp I32:$Rs, I32:$Rt, I64:$Ru)>; -def: Pat<(HexagonINSERTRP I64:$Rs, I64:$Rt, I64:$Ru), - (S2_insertp_rp I64:$Rs, I64:$Rt, I64:$Ru)>; +let AddedComplexity = 10 in { + def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2), + (S2_insert I32:$Rs, I32:$Rt, imm:$u1, imm:$u2)>; + def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2), + (S2_insertp I64:$Rs, I64:$Rt, imm:$u1, imm:$u2)>; +} +def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, I32:$Width, I32:$Off), + (S2_insert_rp I32:$Rs, I32:$Rt, (Combinew $Width, $Off))>; +def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, I32:$Width, I32:$Off), + (S2_insertp_rp I64:$Rs, I64:$Rt, (Combinew $Width, $Off))>; def SDTHexagonEXTRACTU : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>, SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; -def SDTHexagonEXTRACTURP - : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>, - SDTCisVT<2, i64>]>; - def HexagonEXTRACTU: SDNode<"HexagonISD::EXTRACTU", SDTHexagonEXTRACTU>; -def HexagonEXTRACTURP: SDNode<"HexagonISD::EXTRACTURP", SDTHexagonEXTRACTURP>; -def: Pat<(HexagonEXTRACTU I32:$Rs, u5_0ImmPred:$u5, u5_0ImmPred:$U5), - (S2_extractu I32:$Rs, imm:$u5, imm:$U5)>; -def: Pat<(HexagonEXTRACTU I64:$Rs, u6_0ImmPred:$u6, u6_0ImmPred:$U6), - (S2_extractup I64:$Rs, imm:$u6, imm:$U6)>; -def: Pat<(HexagonEXTRACTURP I32:$Rs, I64:$Rt), - (S2_extractu_rp I32:$Rs, I64:$Rt)>; -def: Pat<(HexagonEXTRACTURP I64:$Rs, I64:$Rt), - (S2_extractup_rp I64:$Rs, I64:$Rt)>; +let AddedComplexity = 10 in { + def: Pat<(HexagonEXTRACTU I32:$Rs, u5_0ImmPred:$u5, u5_0ImmPred:$U5), + (S2_extractu I32:$Rs, imm:$u5, imm:$U5)>; + def: Pat<(HexagonEXTRACTU I64:$Rs, u6_0ImmPred:$u6, u6_0ImmPred:$U6), + (S2_extractup I64:$Rs, imm:$u6, imm:$U6)>; +} +def: Pat<(HexagonEXTRACTU I32:$Rs, I32:$Width, I32:$Off), + (S2_extractu_rp I32:$Rs, (Combinew $Width, $Off))>; +def: Pat<(HexagonEXTRACTU I64:$Rs, I32:$Width, I32:$Off), + (S2_extractup_rp I64:$Rs, (Combinew $Width, $Off))>; def SDTHexagonVSPLAT: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; @@ -938,20 +926,20 @@ def: Pat<(v2i32 (HexagonVSPLAT s8_0ImmPred:$s8)), (A2_combineii imm:$s8, imm:$s8)>; def: Pat<(v2i32 (HexagonVSPLAT I32:$Rs)), (Combinew I32:$Rs, I32:$Rs)>; +let AddedComplexity = 10 in +def: Pat<(v8i8 (HexagonVSPLAT I32:$Rs)), (S6_vsplatrbp I32:$Rs)>, + Requires<[HasV62]>; +def: Pat<(v8i8 (HexagonVSPLAT I32:$Rs)), + (Combinew (S2_vsplatrb I32:$Rs), (S2_vsplatrb I32:$Rs))>; + // --(8) Shift/permute --------------------------------------------------- // def SDTHexagonI64I32I32: SDTypeProfile<1, 2, [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; -def SDTHexagonVCOMBINE: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>, - SDTCisSubVecOfVec<1, 0>]>; -def SDTHexagonVPACK: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>, SDTCisVec<1>]>; def HexagonCOMBINE: SDNode<"HexagonISD::COMBINE", SDTHexagonI64I32I32>; -def HexagonVCOMBINE: SDNode<"HexagonISD::VCOMBINE", SDTHexagonVCOMBINE>; -def HexagonVPACKE: SDNode<"HexagonISD::VPACKE", SDTHexagonVPACK>; -def HexagonVPACKO: SDNode<"HexagonISD::VPACKO", SDTHexagonVPACK>; def: Pat<(HexagonCOMBINE I32:$Rs, I32:$Rt), (Combinew $Rs, $Rt)>; @@ -1001,11 +989,15 @@ def: OpR_RR_pat<S2_asr_r_p, Sra, i64, I64, I32>; def: OpR_RR_pat<S2_lsr_r_p, Srl, i64, I64, I32>; def: OpR_RR_pat<S2_asl_r_p, Shl, i64, I64, I32>; +let Predicates = [HasV60] in { + def: OpR_RI_pat<S6_rol_i_r, Rol, i32, I32, u5_0ImmPred>; + def: OpR_RI_pat<S6_rol_i_p, Rol, i64, I64, u6_0ImmPred>; +} def: Pat<(sra (add (sra I32:$Rs, u5_0ImmPred:$u5), 1), (i32 1)), (S2_asr_i_r_rnd I32:$Rs, imm:$u5)>; def: Pat<(sra (add (sra I64:$Rs, u6_0ImmPred:$u6), 1), (i32 1)), - (S2_asr_i_p_rnd I64:$Rs, imm:$u6)>, Requires<[HasV5T]>; + (S2_asr_i_p_rnd I64:$Rs, imm:$u6)>, Requires<[HasV5]>; // Prefer S2_addasl_rrri over S2_asl_i_r_acc. let AddedComplexity = 120 in @@ -1046,41 +1038,55 @@ let AddedComplexity = 100 in { def: AccRRI_pat<S2_asl_i_p_and, And, Su<Shl>, I64, u6_0ImmPred>; def: AccRRI_pat<S2_asl_i_p_or, Or, Su<Shl>, I64, u6_0ImmPred>; def: AccRRI_pat<S2_asl_i_p_xacc, Xor, Su<Shl>, I64, u6_0ImmPred>; + + let Predicates = [HasV60] in { + def: AccRRI_pat<S6_rol_i_r_acc, Add, Su<Rol>, I32, u5_0ImmPred>; + def: AccRRI_pat<S6_rol_i_r_nac, Sub, Su<Rol>, I32, u5_0ImmPred>; + def: AccRRI_pat<S6_rol_i_r_and, And, Su<Rol>, I32, u5_0ImmPred>; + def: AccRRI_pat<S6_rol_i_r_or, Or, Su<Rol>, I32, u5_0ImmPred>; + def: AccRRI_pat<S6_rol_i_r_xacc, Xor, Su<Rol>, I32, u5_0ImmPred>; + + def: AccRRI_pat<S6_rol_i_p_acc, Add, Su<Rol>, I64, u6_0ImmPred>; + def: AccRRI_pat<S6_rol_i_p_nac, Sub, Su<Rol>, I64, u6_0ImmPred>; + def: AccRRI_pat<S6_rol_i_p_and, And, Su<Rol>, I64, u6_0ImmPred>; + def: AccRRI_pat<S6_rol_i_p_or, Or, Su<Rol>, I64, u6_0ImmPred>; + def: AccRRI_pat<S6_rol_i_p_xacc, Xor, Su<Rol>, I64, u6_0ImmPred>; + } } let AddedComplexity = 100 in { - def: AccRRR_pat<S2_asr_r_r_acc, Add, Su<Sra>, I32, I32>; - def: AccRRR_pat<S2_asr_r_r_nac, Sub, Su<Sra>, I32, I32>; - def: AccRRR_pat<S2_asr_r_r_and, And, Su<Sra>, I32, I32>; - def: AccRRR_pat<S2_asr_r_r_or, Or, Su<Sra>, I32, I32>; + def: AccRRR_pat<S2_asr_r_r_acc, Add, Su<Sra>, I32, I32, I32>; + def: AccRRR_pat<S2_asr_r_r_nac, Sub, Su<Sra>, I32, I32, I32>; + def: AccRRR_pat<S2_asr_r_r_and, And, Su<Sra>, I32, I32, I32>; + def: AccRRR_pat<S2_asr_r_r_or, Or, Su<Sra>, I32, I32, I32>; - def: AccRRR_pat<S2_asr_r_p_acc, Add, Su<Sra>, I64, I32>; - def: AccRRR_pat<S2_asr_r_p_nac, Sub, Su<Sra>, I64, I32>; - def: AccRRR_pat<S2_asr_r_p_and, And, Su<Sra>, I64, I32>; - def: AccRRR_pat<S2_asr_r_p_or, Or, Su<Sra>, I64, I32>; - def: AccRRR_pat<S2_asr_r_p_xor, Xor, Su<Sra>, I64, I32>; + def: AccRRR_pat<S2_asr_r_p_acc, Add, Su<Sra>, I64, I64, I32>; + def: AccRRR_pat<S2_asr_r_p_nac, Sub, Su<Sra>, I64, I64, I32>; + def: AccRRR_pat<S2_asr_r_p_and, And, Su<Sra>, I64, I64, I32>; + def: AccRRR_pat<S2_asr_r_p_or, Or, Su<Sra>, I64, I64, I32>; + def: AccRRR_pat<S2_asr_r_p_xor, Xor, Su<Sra>, I64, I64, I32>; - def: AccRRR_pat<S2_lsr_r_r_acc, Add, Su<Srl>, I32, I32>; - def: AccRRR_pat<S2_lsr_r_r_nac, Sub, Su<Srl>, I32, I32>; - def: AccRRR_pat<S2_lsr_r_r_and, And, Su<Srl>, I32, I32>; - def: AccRRR_pat<S2_lsr_r_r_or, Or, Su<Srl>, I32, I32>; + def: AccRRR_pat<S2_lsr_r_r_acc, Add, Su<Srl>, I32, I32, I32>; + def: AccRRR_pat<S2_lsr_r_r_nac, Sub, Su<Srl>, I32, I32, I32>; + def: AccRRR_pat<S2_lsr_r_r_and, And, Su<Srl>, I32, I32, I32>; + def: AccRRR_pat<S2_lsr_r_r_or, Or, Su<Srl>, I32, I32, I32>; - def: AccRRR_pat<S2_lsr_r_p_acc, Add, Su<Srl>, I64, I32>; - def: AccRRR_pat<S2_lsr_r_p_nac, Sub, Su<Srl>, I64, I32>; - def: AccRRR_pat<S2_lsr_r_p_and, And, Su<Srl>, I64, I32>; - def: AccRRR_pat<S2_lsr_r_p_or, Or, Su<Srl>, I64, I32>; - def: AccRRR_pat<S2_lsr_r_p_xor, Xor, Su<Srl>, I64, I32>; + def: AccRRR_pat<S2_lsr_r_p_acc, Add, Su<Srl>, I64, I64, I32>; + def: AccRRR_pat<S2_lsr_r_p_nac, Sub, Su<Srl>, I64, I64, I32>; + def: AccRRR_pat<S2_lsr_r_p_and, And, Su<Srl>, I64, I64, I32>; + def: AccRRR_pat<S2_lsr_r_p_or, Or, Su<Srl>, I64, I64, I32>; + def: AccRRR_pat<S2_lsr_r_p_xor, Xor, Su<Srl>, I64, I64, I32>; - def: AccRRR_pat<S2_asl_r_r_acc, Add, Su<Shl>, I32, I32>; - def: AccRRR_pat<S2_asl_r_r_nac, Sub, Su<Shl>, I32, I32>; - def: AccRRR_pat<S2_asl_r_r_and, And, Su<Shl>, I32, I32>; - def: AccRRR_pat<S2_asl_r_r_or, Or, Su<Shl>, I32, I32>; + def: AccRRR_pat<S2_asl_r_r_acc, Add, Su<Shl>, I32, I32, I32>; + def: AccRRR_pat<S2_asl_r_r_nac, Sub, Su<Shl>, I32, I32, I32>; + def: AccRRR_pat<S2_asl_r_r_and, And, Su<Shl>, I32, I32, I32>; + def: AccRRR_pat<S2_asl_r_r_or, Or, Su<Shl>, I32, I32, I32>; - def: AccRRR_pat<S2_asl_r_p_acc, Add, Su<Shl>, I64, I32>; - def: AccRRR_pat<S2_asl_r_p_nac, Sub, Su<Shl>, I64, I32>; - def: AccRRR_pat<S2_asl_r_p_and, And, Su<Shl>, I64, I32>; - def: AccRRR_pat<S2_asl_r_p_or, Or, Su<Shl>, I64, I32>; - def: AccRRR_pat<S2_asl_r_p_xor, Xor, Su<Shl>, I64, I32>; + def: AccRRR_pat<S2_asl_r_p_acc, Add, Su<Shl>, I64, I64, I32>; + def: AccRRR_pat<S2_asl_r_p_nac, Sub, Su<Shl>, I64, I64, I32>; + def: AccRRR_pat<S2_asl_r_p_and, And, Su<Shl>, I64, I64, I32>; + def: AccRRR_pat<S2_asl_r_p_or, Or, Su<Shl>, I64, I64, I32>; + def: AccRRR_pat<S2_asl_r_p_xor, Xor, Su<Shl>, I64, I64, I32>; } @@ -1170,11 +1176,13 @@ def: Pat<(shl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c))), // --(9) Arithmetic/bitwise ---------------------------------------------- // -def: Pat<(abs I32:$Rs), (A2_abs I32:$Rs)>; -def: Pat<(not I32:$Rs), (A2_subri -1, I32:$Rs)>; -def: Pat<(not I64:$Rs), (A2_notp I64:$Rs)>; +def: Pat<(abs I32:$Rs), (A2_abs I32:$Rs)>; +def: Pat<(abs I64:$Rs), (A2_absp I64:$Rs)>; +def: Pat<(not I32:$Rs), (A2_subri -1, I32:$Rs)>; +def: Pat<(not I64:$Rs), (A2_notp I64:$Rs)>; +def: Pat<(ineg I64:$Rs), (A2_negp I64:$Rs)>; -let Predicates = [HasV5T] in { +let Predicates = [HasV5] in { def: Pat<(fabs F32:$Rs), (S2_clrbit_i F32:$Rs, 31)>; def: Pat<(fneg F32:$Rs), (S2_togglebit_i F32:$Rs, 31)>; @@ -1186,13 +1194,6 @@ let Predicates = [HasV5T] in { (i32 (LoReg $Rs)))>; } -let AddedComplexity = 50 in -def: Pat<(xor (add (sra I32:$Rs, (i32 31)), - I32:$Rs), - (sra I32:$Rs, (i32 31))), - (A2_abs I32:$Rs)>; - - def: Pat<(add I32:$Rs, anyimm:$s16), (A2_addi I32:$Rs, imm:$s16)>; def: Pat<(or I32:$Rs, anyimm:$s10), (A2_orir I32:$Rs, imm:$s10)>; def: Pat<(and I32:$Rs, anyimm:$s10), (A2_andir I32:$Rs, imm:$s10)>; @@ -1221,18 +1222,20 @@ def: OpR_RR_pat<A2_vsubub, Sub, v8i8, V8I8>; def: OpR_RR_pat<A2_vsubh, Sub, v4i16, V4I16>; def: OpR_RR_pat<A2_vsubw, Sub, v2i32, V2I32>; +def: OpR_RR_pat<A2_and, And, v4i8, V4I8>; +def: OpR_RR_pat<A2_xor, Xor, v4i8, V4I8>; +def: OpR_RR_pat<A2_or, Or, v4i8, V4I8>; def: OpR_RR_pat<A2_and, And, v2i16, V2I16>; def: OpR_RR_pat<A2_xor, Xor, v2i16, V2I16>; def: OpR_RR_pat<A2_or, Or, v2i16, V2I16>; - def: OpR_RR_pat<A2_andp, And, v8i8, V8I8>; -def: OpR_RR_pat<A2_andp, And, v4i16, V4I16>; -def: OpR_RR_pat<A2_andp, And, v2i32, V2I32>; def: OpR_RR_pat<A2_orp, Or, v8i8, V8I8>; -def: OpR_RR_pat<A2_orp, Or, v4i16, V4I16>; -def: OpR_RR_pat<A2_orp, Or, v2i32, V2I32>; def: OpR_RR_pat<A2_xorp, Xor, v8i8, V8I8>; +def: OpR_RR_pat<A2_andp, And, v4i16, V4I16>; +def: OpR_RR_pat<A2_orp, Or, v4i16, V4I16>; def: OpR_RR_pat<A2_xorp, Xor, v4i16, V4I16>; +def: OpR_RR_pat<A2_andp, And, v2i32, V2I32>; +def: OpR_RR_pat<A2_orp, Or, v2i32, V2I32>; def: OpR_RR_pat<A2_xorp, Xor, v2i32, V2I32>; def: OpR_RR_pat<M2_mpyi, Mul, i32, I32>; @@ -1255,7 +1258,7 @@ def: OpR_RR_pat<C2_and, Mul, v2i1, V2I1>; def: OpR_RR_pat<C2_and, Mul, v4i1, V4I1>; def: OpR_RR_pat<C2_and, Mul, v8i1, V8I1>; -let Predicates = [HasV5T] in { +let Predicates = [HasV5] in { def: OpR_RR_pat<F2_sfadd, pf2<fadd>, f32, F32>; def: OpR_RR_pat<F2_sfsub, pf2<fsub>, f32, F32>; def: OpR_RR_pat<F2_sfmpy, pf2<fmul>, f32, F32>; @@ -1268,12 +1271,62 @@ let Predicates = [HasV5T] in { let AddedComplexity = 10 in { def: AccRRI_pat<M2_macsip, Add, Su<Mul>, I32, u32_0ImmPred>; def: AccRRI_pat<M2_macsin, Sub, Su<Mul>, I32, u32_0ImmPred>; - def: AccRRR_pat<M2_maci, Add, Su<Mul>, I32, I32>; + def: AccRRR_pat<M2_maci, Add, Su<Mul>, I32, I32, I32>; } def: AccRRI_pat<M2_naccii, Sub, Su<Add>, I32, s32_0ImmPred>; def: AccRRI_pat<M2_accii, Add, Su<Add>, I32, s32_0ImmPred>; -def: AccRRR_pat<M2_acci, Add, Su<Add>, I32, I32>; +def: AccRRR_pat<M2_acci, Add, Su<Add>, I32, I32, I32>; + +// Mulh for vectors +// +def: Pat<(v2i32 (mulhu V2I32:$Rss, V2I32:$Rtt)), + (Combinew (M2_mpyu_up (HiReg $Rss), (HiReg $Rtt)), + (M2_mpyu_up (LoReg $Rss), (LoReg $Rtt)))>; + +def: Pat<(v2i32 (mulhs V2I32:$Rs, V2I32:$Rt)), + (Combinew (M2_mpy_up (HiReg $Rs), (HiReg $Rt)), + (M2_mpy_up (LoReg $Rt), (LoReg $Rt)))>; + +def Mulhub: + OutPatFrag<(ops node:$Rss, node:$Rtt), + (Combinew (S2_vtrunohb (M5_vmpybuu (HiReg $Rss), (HiReg $Rtt))), + (S2_vtrunohb (M5_vmpybuu (LoReg $Rss), (LoReg $Rtt))))>; + +// Equivalent of byte-wise arithmetic shift right by 7 in v8i8. +def Asr7: + OutPatFrag<(ops node:$Rss), (C2_mask (C2_not (A4_vcmpbgti $Rss, 0)))>; + +def: Pat<(v8i8 (mulhu V8I8:$Rss, V8I8:$Rtt)), + (Mulhub $Rss, $Rtt)>; + +def: Pat<(v8i8 (mulhs V8I8:$Rss, V8I8:$Rtt)), + (A2_vsubub + (Mulhub $Rss, $Rtt), + (A2_vaddub (A2_andp V8I8:$Rss, (Asr7 $Rtt)), + (A2_andp V8I8:$Rtt, (Asr7 $Rss))))>; + +def Mpysh: + OutPatFrag<(ops node:$Rs, node:$Rt), (M2_vmpy2s_s0 $Rs, $Rt)>; +def Mpyshh: + OutPatFrag<(ops node:$Rss, node:$Rtt), (Mpysh (HiReg $Rss), (HiReg $Rtt))>; +def Mpyshl: + OutPatFrag<(ops node:$Rss, node:$Rtt), (Mpysh (LoReg $Rss), (LoReg $Rtt))>; + +def Mulhsh: + OutPatFrag<(ops node:$Rss, node:$Rtt), + (Combinew (A2_combine_hh (HiReg (Mpyshh $Rss, $Rtt)), + (LoReg (Mpyshh $Rss, $Rtt))), + (A2_combine_hh (HiReg (Mpyshl $Rss, $Rtt)), + (LoReg (Mpyshl $Rss, $Rtt))))>; + +def: Pat<(v4i16 (mulhs V4I16:$Rss, V4I16:$Rtt)), (Mulhsh $Rss, $Rtt)>; + +def: Pat<(v4i16 (mulhu V4I16:$Rss, V4I16:$Rtt)), + (A2_vaddh + (Mulhsh $Rss, $Rtt), + (A2_vaddh (A2_andp V4I16:$Rss, (S2_asr_i_vh $Rtt, 15)), + (A2_andp V4I16:$Rtt, (S2_asr_i_vh $Rss, 15))))>; def: Pat<(ineg (mul I32:$Rs, u8_0ImmPred:$u8)), @@ -1291,24 +1344,24 @@ def: Pat<(mul I32:$Rs, n8_0ImmPred:$n8), def: Pat<(add Sext64:$Rs, I64:$Rt), (A2_addsp (LoReg Sext64:$Rs), I64:$Rt)>; -def: AccRRR_pat<M4_and_and, And, Su<And>, I32, I32>; -def: AccRRR_pat<M4_and_or, And, Su<Or>, I32, I32>; -def: AccRRR_pat<M4_and_xor, And, Su<Xor>, I32, I32>; -def: AccRRR_pat<M4_or_and, Or, Su<And>, I32, I32>; -def: AccRRR_pat<M4_or_or, Or, Su<Or>, I32, I32>; -def: AccRRR_pat<M4_or_xor, Or, Su<Xor>, I32, I32>; -def: AccRRR_pat<M4_xor_and, Xor, Su<And>, I32, I32>; -def: AccRRR_pat<M4_xor_or, Xor, Su<Or>, I32, I32>; -def: AccRRR_pat<M2_xor_xacc, Xor, Su<Xor>, I32, I32>; -def: AccRRR_pat<M4_xor_xacc, Xor, Su<Xor>, I64, I64>; +def: AccRRR_pat<M4_and_and, And, Su<And>, I32, I32, I32>; +def: AccRRR_pat<M4_and_or, And, Su<Or>, I32, I32, I32>; +def: AccRRR_pat<M4_and_xor, And, Su<Xor>, I32, I32, I32>; +def: AccRRR_pat<M4_or_and, Or, Su<And>, I32, I32, I32>; +def: AccRRR_pat<M4_or_or, Or, Su<Or>, I32, I32, I32>; +def: AccRRR_pat<M4_or_xor, Or, Su<Xor>, I32, I32, I32>; +def: AccRRR_pat<M4_xor_and, Xor, Su<And>, I32, I32, I32>; +def: AccRRR_pat<M4_xor_or, Xor, Su<Or>, I32, I32, I32>; +def: AccRRR_pat<M2_xor_xacc, Xor, Su<Xor>, I32, I32, I32>; +def: AccRRR_pat<M4_xor_xacc, Xor, Su<Xor>, I64, I64, I64>; // For dags like (or (and (not _), _), (shl _, _)) where the "or" with // one argument matches the patterns below, and with the other argument // matches S2_asl_r_r_or, etc, prefer the patterns below. let AddedComplexity = 110 in { // greater than S2_asl_r_r_and/or/xor. - def: AccRRR_pat<M4_and_andn, And, Su<Not2<And>>, I32, I32>; - def: AccRRR_pat<M4_or_andn, Or, Su<Not2<And>>, I32, I32>; - def: AccRRR_pat<M4_xor_andn, Xor, Su<Not2<And>>, I32, I32>; + def: AccRRR_pat<M4_and_andn, And, Su<Not2<And>>, I32, I32, I32>; + def: AccRRR_pat<M4_or_andn, Or, Su<Not2<And>>, I32, I32, I32>; + def: AccRRR_pat<M4_xor_andn, Xor, Su<Not2<And>>, I32, I32, I32>; } // S4_addaddi and S4_subaddi don't have tied operands, so give them @@ -1444,7 +1497,7 @@ def: Pat<(add I32:$Ru, (Su<Mul> I32:$Ry, I32:$Rs)), (M4_mpyrr_addr IntRegs:$Ru, IntRegs:$Ry, IntRegs:$Rs)>; -let Predicates = [HasV5T] in { +let Predicates = [HasV5] in { def: Pat<(fma F32:$Rs, F32:$Rt, F32:$Rx), (F2_sffma F32:$Rx, F32:$Rs, F32:$Rt)>; def: Pat<(fma (fneg F32:$Rs), F32:$Rt, F32:$Rx), @@ -1479,13 +1532,13 @@ def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)), // Multiplies two v4i8 vectors. def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)), (S2_vtrunehb (M5_vmpybuu V4I8:$Rs, V4I8:$Rt))>, - Requires<[HasV5T]>; + Requires<[HasV5]>; // Multiplies two v8i8 vectors. def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), (Combinew (S2_vtrunehb (M5_vmpybuu (HiReg $Rs), (HiReg $Rt))), (S2_vtrunehb (M5_vmpybuu (LoReg $Rs), (LoReg $Rt))))>, - Requires<[HasV5T]>; + Requires<[HasV5]>; // --(10) Bit ------------------------------------------------------------ @@ -1519,7 +1572,6 @@ def: Pat<(i32 (ctpop I32:$Rs)), (S5_popcountp (A4_combineir 0, I32:$Rs))>; def: Pat<(bitreverse I32:$Rs), (S2_brev I32:$Rs)>; def: Pat<(bitreverse I64:$Rss), (S2_brevp I64:$Rss)>; - let AddedComplexity = 20 in { // Complexity greater than and/or/xor def: Pat<(and I32:$Rs, IsNPow2_32:$V), (S2_clrbit_i IntRegs:$Rs, (LogN2_32 $V))>; @@ -1582,6 +1634,15 @@ let AddedComplexity = 10 in // Complexity greater than compare reg-reg. def: Pat<(i1 (seteq (and I32:$Rs, I32:$Rt), IntRegs:$Rt)), (C2_bitsset IntRegs:$Rs, IntRegs:$Rt)>; +def SDTTestBit: + SDTypeProfile<1, 2, [SDTCisVT<0, i1>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>; +def HexagonTSTBIT: SDNode<"HexagonISD::TSTBIT", SDTTestBit>; + +def: Pat<(HexagonTSTBIT I32:$Rs, u5_0ImmPred:$u5), + (S2_tstbit_i I32:$Rs, imm:$u5)>; +def: Pat<(HexagonTSTBIT I32:$Rs, I32:$Rt), + (S2_tstbit_r I32:$Rs, I32:$Rt)>; + let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. def: Pat<(i1 (seteq (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)), (S4_ntstbit_i I32:$Rs, imm:$u5)>; @@ -1790,7 +1851,12 @@ let AddedComplexity = 20 in { defm: Loadxi_pat<zextloadv2i8, v2i16, anyimm1, L2_loadbzw2_io>; defm: Loadxi_pat<zextloadv4i8, v4i16, anyimm2, L2_loadbzw4_io>; defm: Loadxi_pat<load, i32, anyimm2, L2_loadri_io>; + defm: Loadxi_pat<load, v2i16, anyimm2, L2_loadri_io>; + defm: Loadxi_pat<load, v4i8, anyimm2, L2_loadri_io>; defm: Loadxi_pat<load, i64, anyimm3, L2_loadrd_io>; + defm: Loadxi_pat<load, v2i32, anyimm3, L2_loadrd_io>; + defm: Loadxi_pat<load, v4i16, anyimm3, L2_loadrd_io>; + defm: Loadxi_pat<load, v8i8, anyimm3, L2_loadrd_io>; defm: Loadxi_pat<load, f32, anyimm2, L2_loadri_io>; defm: Loadxi_pat<load, f64, anyimm3, L2_loadrd_io>; // No sextloadi1. @@ -1828,10 +1894,15 @@ let AddedComplexity = 60 in { def: Loadxu_pat<zextloadi16, i32, anyimm1, L4_loadruh_ur>; def: Loadxu_pat<zextloadv2i8, v2i16, anyimm1, L4_loadbzw2_ur>; def: Loadxu_pat<zextloadv4i8, v4i16, anyimm2, L4_loadbzw4_ur>; - def: Loadxu_pat<load, f32, anyimm2, L4_loadri_ur>; - def: Loadxu_pat<load, f64, anyimm3, L4_loadrd_ur>; def: Loadxu_pat<load, i32, anyimm2, L4_loadri_ur>; + def: Loadxu_pat<load, v2i16, anyimm2, L4_loadri_ur>; + def: Loadxu_pat<load, v4i8, anyimm2, L4_loadri_ur>; def: Loadxu_pat<load, i64, anyimm3, L4_loadrd_ur>; + def: Loadxu_pat<load, v2i32, anyimm3, L4_loadrd_ur>; + def: Loadxu_pat<load, v4i16, anyimm3, L4_loadrd_ur>; + def: Loadxu_pat<load, v8i8, anyimm3, L4_loadrd_ur>; + def: Loadxu_pat<load, f32, anyimm2, L4_loadri_ur>; + def: Loadxu_pat<load, f64, anyimm3, L4_loadrd_ur>; def: Loadxum_pat<sextloadi8, i64, anyimm0, ToSext64, L4_loadrb_ur>; def: Loadxum_pat<zextloadi8, i64, anyimm0, ToZext64, L4_loadrub_ur>; @@ -1845,29 +1916,39 @@ let AddedComplexity = 60 in { } let AddedComplexity = 40 in { - def: Loadxr_shl_pat<extloadi8, i32, L4_loadrub_rr>; - def: Loadxr_shl_pat<zextloadi8, i32, L4_loadrub_rr>; - def: Loadxr_shl_pat<sextloadi8, i32, L4_loadrb_rr>; - def: Loadxr_shl_pat<extloadi16, i32, L4_loadruh_rr>; - def: Loadxr_shl_pat<zextloadi16, i32, L4_loadruh_rr>; - def: Loadxr_shl_pat<sextloadi16, i32, L4_loadrh_rr>; - def: Loadxr_shl_pat<load, i32, L4_loadri_rr>; - def: Loadxr_shl_pat<load, i64, L4_loadrd_rr>; - def: Loadxr_shl_pat<load, f32, L4_loadri_rr>; - def: Loadxr_shl_pat<load, f64, L4_loadrd_rr>; + def: Loadxr_shl_pat<extloadi8, i32, L4_loadrub_rr>; + def: Loadxr_shl_pat<zextloadi8, i32, L4_loadrub_rr>; + def: Loadxr_shl_pat<sextloadi8, i32, L4_loadrb_rr>; + def: Loadxr_shl_pat<extloadi16, i32, L4_loadruh_rr>; + def: Loadxr_shl_pat<zextloadi16, i32, L4_loadruh_rr>; + def: Loadxr_shl_pat<sextloadi16, i32, L4_loadrh_rr>; + def: Loadxr_shl_pat<load, i32, L4_loadri_rr>; + def: Loadxr_shl_pat<load, v2i16, L4_loadri_rr>; + def: Loadxr_shl_pat<load, v4i8, L4_loadri_rr>; + def: Loadxr_shl_pat<load, i64, L4_loadrd_rr>; + def: Loadxr_shl_pat<load, v2i32, L4_loadrd_rr>; + def: Loadxr_shl_pat<load, v4i16, L4_loadrd_rr>; + def: Loadxr_shl_pat<load, v8i8, L4_loadrd_rr>; + def: Loadxr_shl_pat<load, f32, L4_loadri_rr>; + def: Loadxr_shl_pat<load, f64, L4_loadrd_rr>; } let AddedComplexity = 20 in { - def: Loadxr_add_pat<extloadi8, i32, L4_loadrub_rr>; - def: Loadxr_add_pat<zextloadi8, i32, L4_loadrub_rr>; - def: Loadxr_add_pat<sextloadi8, i32, L4_loadrb_rr>; - def: Loadxr_add_pat<extloadi16, i32, L4_loadruh_rr>; - def: Loadxr_add_pat<zextloadi16, i32, L4_loadruh_rr>; - def: Loadxr_add_pat<sextloadi16, i32, L4_loadrh_rr>; - def: Loadxr_add_pat<load, i32, L4_loadri_rr>; - def: Loadxr_add_pat<load, i64, L4_loadrd_rr>; - def: Loadxr_add_pat<load, f32, L4_loadri_rr>; - def: Loadxr_add_pat<load, f64, L4_loadrd_rr>; + def: Loadxr_add_pat<extloadi8, i32, L4_loadrub_rr>; + def: Loadxr_add_pat<zextloadi8, i32, L4_loadrub_rr>; + def: Loadxr_add_pat<sextloadi8, i32, L4_loadrb_rr>; + def: Loadxr_add_pat<extloadi16, i32, L4_loadruh_rr>; + def: Loadxr_add_pat<zextloadi16, i32, L4_loadruh_rr>; + def: Loadxr_add_pat<sextloadi16, i32, L4_loadrh_rr>; + def: Loadxr_add_pat<load, i32, L4_loadri_rr>; + def: Loadxr_add_pat<load, v2i16, L4_loadri_rr>; + def: Loadxr_add_pat<load, v4i8, L4_loadri_rr>; + def: Loadxr_add_pat<load, i64, L4_loadrd_rr>; + def: Loadxr_add_pat<load, v2i32, L4_loadrd_rr>; + def: Loadxr_add_pat<load, v4i16, L4_loadrd_rr>; + def: Loadxr_add_pat<load, v8i8, L4_loadrd_rr>; + def: Loadxr_add_pat<load, f32, L4_loadri_rr>; + def: Loadxr_add_pat<load, f64, L4_loadrd_rr>; } let AddedComplexity = 40 in { @@ -1897,17 +1978,22 @@ let AddedComplexity = 20 in { // Absolute address let AddedComplexity = 60 in { - def: Loada_pat<zextloadi1, i32, anyimm0, PS_loadrubabs>; - def: Loada_pat<sextloadi8, i32, anyimm0, PS_loadrbabs>; - def: Loada_pat<extloadi8, i32, anyimm0, PS_loadrubabs>; - def: Loada_pat<zextloadi8, i32, anyimm0, PS_loadrubabs>; - def: Loada_pat<sextloadi16, i32, anyimm1, PS_loadrhabs>; - def: Loada_pat<extloadi16, i32, anyimm1, PS_loadruhabs>; - def: Loada_pat<zextloadi16, i32, anyimm1, PS_loadruhabs>; - def: Loada_pat<load, i32, anyimm2, PS_loadriabs>; - def: Loada_pat<load, i64, anyimm3, PS_loadrdabs>; - def: Loada_pat<load, f32, anyimm2, PS_loadriabs>; - def: Loada_pat<load, f64, anyimm3, PS_loadrdabs>; + def: Loada_pat<zextloadi1, i32, anyimm0, PS_loadrubabs>; + def: Loada_pat<sextloadi8, i32, anyimm0, PS_loadrbabs>; + def: Loada_pat<extloadi8, i32, anyimm0, PS_loadrubabs>; + def: Loada_pat<zextloadi8, i32, anyimm0, PS_loadrubabs>; + def: Loada_pat<sextloadi16, i32, anyimm1, PS_loadrhabs>; + def: Loada_pat<extloadi16, i32, anyimm1, PS_loadruhabs>; + def: Loada_pat<zextloadi16, i32, anyimm1, PS_loadruhabs>; + def: Loada_pat<load, i32, anyimm2, PS_loadriabs>; + def: Loada_pat<load, v2i16, anyimm2, PS_loadriabs>; + def: Loada_pat<load, v4i8, anyimm2, PS_loadriabs>; + def: Loada_pat<load, i64, anyimm3, PS_loadrdabs>; + def: Loada_pat<load, v2i32, anyimm3, PS_loadrdabs>; + def: Loada_pat<load, v4i16, anyimm3, PS_loadrdabs>; + def: Loada_pat<load, v8i8, anyimm3, PS_loadrdabs>; + def: Loada_pat<load, f32, anyimm2, PS_loadriabs>; + def: Loada_pat<load, f64, anyimm3, PS_loadrdabs>; def: Loada_pat<atomic_load_8, i32, anyimm0, PS_loadrubabs>; def: Loada_pat<atomic_load_16, i32, anyimm1, PS_loadruhabs>; @@ -1933,18 +2019,23 @@ let AddedComplexity = 30 in { // GP-relative address let AddedComplexity = 100 in { - def: Loada_pat<extloadi1, i32, addrgp, L2_loadrubgp>; - def: Loada_pat<zextloadi1, i32, addrgp, L2_loadrubgp>; - def: Loada_pat<extloadi8, i32, addrgp, L2_loadrubgp>; - def: Loada_pat<sextloadi8, i32, addrgp, L2_loadrbgp>; - def: Loada_pat<zextloadi8, i32, addrgp, L2_loadrubgp>; - def: Loada_pat<extloadi16, i32, addrgp, L2_loadruhgp>; - def: Loada_pat<sextloadi16, i32, addrgp, L2_loadrhgp>; - def: Loada_pat<zextloadi16, i32, addrgp, L2_loadruhgp>; - def: Loada_pat<load, i32, addrgp, L2_loadrigp>; - def: Loada_pat<load, i64, addrgp, L2_loadrdgp>; - def: Loada_pat<load, f32, addrgp, L2_loadrigp>; - def: Loada_pat<load, f64, addrgp, L2_loadrdgp>; + def: Loada_pat<extloadi1, i32, addrgp, L2_loadrubgp>; + def: Loada_pat<zextloadi1, i32, addrgp, L2_loadrubgp>; + def: Loada_pat<extloadi8, i32, addrgp, L2_loadrubgp>; + def: Loada_pat<sextloadi8, i32, addrgp, L2_loadrbgp>; + def: Loada_pat<zextloadi8, i32, addrgp, L2_loadrubgp>; + def: Loada_pat<extloadi16, i32, addrgp, L2_loadruhgp>; + def: Loada_pat<sextloadi16, i32, addrgp, L2_loadrhgp>; + def: Loada_pat<zextloadi16, i32, addrgp, L2_loadruhgp>; + def: Loada_pat<load, i32, addrgp, L2_loadrigp>; + def: Loada_pat<load, v2i16, addrgp, L2_loadrigp>; + def: Loada_pat<load, v4i8, addrgp, L2_loadrigp>; + def: Loada_pat<load, i64, addrgp, L2_loadrdgp>; + def: Loada_pat<load, v2i32, addrgp, L2_loadrdgp>; + def: Loada_pat<load, v4i16, addrgp, L2_loadrdgp>; + def: Loada_pat<load, v8i8, addrgp, L2_loadrdgp>; + def: Loada_pat<load, f32, addrgp, L2_loadrigp>; + def: Loada_pat<load, f64, addrgp, L2_loadrdgp>; def: Loada_pat<atomic_load_8, i32, addrgp, L2_loadrubgp>; def: Loada_pat<atomic_load_16, i32, addrgp, L2_loadruhgp>; @@ -1983,46 +2074,10 @@ def: Pat<(i1 (load (add I32:$Rs, anyimm0:$Off))), def: Pat<(i1 (load I32:$Rs)), (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, 0))>; -// HVX loads - -multiclass HvxLd_pat<InstHexagon MI, PatFrag Load, ValueType VT, - PatFrag ImmPred> { - def: Pat<(VT (Load I32:$Rt)), (MI I32:$Rt, 0)>; - def: Pat<(VT (Load (add I32:$Rt, ImmPred:$s))), (MI I32:$Rt, imm:$s)>; - // The HVX selection code for shuffles can generate vector constants. - // Calling "Select" on the resulting loads from CP fails without these - // patterns. - def: Pat<(VT (Load (HexagonCP tconstpool:$A))), (MI (A2_tfrsi imm:$A), 0)>; - def: Pat<(VT (Load (HexagonAtPcrel tconstpool:$A))), - (MI (C4_addipc imm:$A), 0)>; -} - - -let Predicates = [UseHVX] in { - multiclass HvxLdVs_pat<InstHexagon MI, PatFrag Load> { - defm: HvxLd_pat<MI, Load, VecI8, IsVecOff>; - defm: HvxLd_pat<MI, Load, VecI16, IsVecOff>; - defm: HvxLd_pat<MI, Load, VecI32, IsVecOff>; - } - defm: HvxLdVs_pat<V6_vL32b_nt_ai, alignednontemporalload>; - defm: HvxLdVs_pat<V6_vL32b_ai, alignedload>; - defm: HvxLdVs_pat<V6_vL32Ub_ai, unalignedload>; - - multiclass HvxLdWs_pat<InstHexagon MI, PatFrag Load> { - defm: HvxLd_pat<MI, Load, VecPI8, IsVecOff>; - defm: HvxLd_pat<MI, Load, VecPI16, IsVecOff>; - defm: HvxLd_pat<MI, Load, VecPI32, IsVecOff>; - } - defm: HvxLdWs_pat<PS_vloadrw_nt_ai, alignednontemporalload>; - defm: HvxLdWs_pat<PS_vloadrw_ai, alignedload>; - defm: HvxLdWs_pat<PS_vloadrwu_ai, unalignedload>; -} - // --(13) Store ---------------------------------------------------------- // - class Storepi_pat<PatFrag Store, PatFrag Value, PatFrag Offset, InstHexagon MI> : Pat<(Store Value:$Rt, I32:$Rx, Offset:$s4), (MI I32:$Rx, imm:$s4, Value:$Rt)>; @@ -2135,7 +2190,7 @@ class Stoream_pat<PatFrag Store, PatFrag Value, PatFrag Addr, PatFrag ValueMod, // swapped. This relies on the knowledge that the F.Fragment uses names // "ptr" and "val". class AtomSt<PatFrag F> - : PatFrag<(ops node:$val, node:$ptr), F.Fragment, F.PredicateCode, + : PatFrag<(ops node:$val, node:$ptr), !head(F.Fragments), F.PredicateCode, F.OperandTransform> { let IsAtomic = F.IsAtomic; let MemoryVT = F.MemoryVT; @@ -2459,36 +2514,6 @@ let AddedComplexity = 10 in { def: Storexi_base_pat<AtomSt<atomic_store_64>, I64, S2_storerd_io>; } -// HVX stores - -multiclass HvxSt_pat<InstHexagon MI, PatFrag Store, PatFrag ImmPred, - PatFrag Value> { - def: Pat<(Store Value:$Vs, I32:$Rt), - (MI I32:$Rt, 0, Value:$Vs)>; - def: Pat<(Store Value:$Vs, (add I32:$Rt, ImmPred:$s)), - (MI I32:$Rt, imm:$s, Value:$Vs)>; -} - -let Predicates = [UseHVX] in { - multiclass HvxStVs_pat<InstHexagon MI, PatFrag Store> { - defm: HvxSt_pat<MI, Store, IsVecOff, HVI8>; - defm: HvxSt_pat<MI, Store, IsVecOff, HVI16>; - defm: HvxSt_pat<MI, Store, IsVecOff, HVI32>; - } - defm: HvxStVs_pat<V6_vS32b_nt_ai, alignednontemporalstore>; - defm: HvxStVs_pat<V6_vS32b_ai, alignedstore>; - defm: HvxStVs_pat<V6_vS32Ub_ai, unalignedstore>; - - multiclass HvxStWs_pat<InstHexagon MI, PatFrag Store> { - defm: HvxSt_pat<MI, Store, IsVecOff, HWI8>; - defm: HvxSt_pat<MI, Store, IsVecOff, HWI16>; - defm: HvxSt_pat<MI, Store, IsVecOff, HWI32>; - } - defm: HvxStWs_pat<PS_vstorerw_nt_ai, alignednontemporalstore>; - defm: HvxStWs_pat<PS_vstorerw_ai, alignedstore>; - defm: HvxStWs_pat<PS_vstorerwu_ai, unalignedstore>; -} - // --(14) Memop ---------------------------------------------------------- // @@ -2570,8 +2595,10 @@ multiclass Memopxr_add_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred, multiclass Memopxr_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred, SDNode Oper, InstHexagon MI> { - defm: Memopxr_base_pat <Load, Store, Oper, MI>; - defm: Memopxr_add_pat <Load, Store, ImmPred, Oper, MI>; + let Predicates = [UseMEMOPS] in { + defm: Memopxr_base_pat <Load, Store, Oper, MI>; + defm: Memopxr_add_pat <Load, Store, ImmPred, Oper, MI>; + } } let AddedComplexity = 200 in { @@ -2669,8 +2696,10 @@ multiclass Memopxi_add_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred, multiclass Memopxi_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred, SDNode Oper, PatFrag Arg, SDNodeXForm ArgMod, InstHexagon MI> { - defm: Memopxi_base_pat <Load, Store, Oper, Arg, ArgMod, MI>; - defm: Memopxi_add_pat <Load, Store, ImmPred, Oper, Arg, ArgMod, MI>; + let Predicates = [UseMEMOPS] in { + defm: Memopxi_base_pat <Load, Store, Oper, Arg, ArgMod, MI>; + defm: Memopxi_add_pat <Load, Store, ImmPred, Oper, Arg, ArgMod, MI>; + } } let AddedComplexity = 220 in { @@ -2829,6 +2858,8 @@ def: Pat<(brcond (not I1:$Pu), bb:$dst), (J2_jumpf I1:$Pu, bb:$dst)>; def: Pat<(brcond (i1 (setne I1:$Pu, -1)), bb:$dst), (J2_jumpf I1:$Pu, bb:$dst)>; +def: Pat<(brcond (i1 (seteq I1:$Pu, 0)), bb:$dst), + (J2_jumpf I1:$Pu, bb:$dst)>; def: Pat<(brcond (i1 (setne I1:$Pu, 0)), bb:$dst), (J2_jumpt I1:$Pu, bb:$dst)>; @@ -2898,97 +2929,17 @@ def HexagonREADCYCLE: SDNode<"HexagonISD::READCYCLE", SDTInt64Leaf, def: Pat<(HexagonREADCYCLE), (A4_tfrcpp UPCYCLE)>; - -def SDTVecLeaf: SDTypeProfile<1, 0, [SDTCisVec<0>]>; - -def SDTHexagonVEXTRACTW: SDTypeProfile<1, 2, - [SDTCisVT<0, i32>, SDTCisVec<1>, SDTCisVT<2, i32>]>; -def HexagonVEXTRACTW : SDNode<"HexagonISD::VEXTRACTW", SDTHexagonVEXTRACTW>; - -def SDTHexagonVINSERTW0: SDTypeProfile<1, 2, - [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>; -def HexagonVINSERTW0 : SDNode<"HexagonISD::VINSERTW0", SDTHexagonVINSERTW0>; - -def Combinev: OutPatFrag<(ops node:$Rs, node:$Rt), - (REG_SEQUENCE HvxWR, $Rs, vsub_hi, $Rt, vsub_lo)>; - -def LoVec: OutPatFrag<(ops node:$Vs), (EXTRACT_SUBREG $Vs, vsub_lo)>; -def HiVec: OutPatFrag<(ops node:$Vs), (EXTRACT_SUBREG $Vs, vsub_hi)>; - -let Predicates = [UseHVX] in { - def: OpR_RR_pat<V6_vpackeb, pf2<HexagonVPACKE>, VecI8, HVI8>; - def: OpR_RR_pat<V6_vpackob, pf2<HexagonVPACKO>, VecI8, HVI8>; - def: OpR_RR_pat<V6_vpackeh, pf2<HexagonVPACKE>, VecI16, HVI16>; - def: OpR_RR_pat<V6_vpackoh, pf2<HexagonVPACKO>, VecI16, HVI16>; -} - -def HexagonVZERO: SDNode<"HexagonISD::VZERO", SDTVecLeaf>; -def vzero: PatFrag<(ops), (HexagonVZERO)>; - -let Predicates = [UseHVX] in { - def: Pat<(VecI8 vzero), (V6_vd0)>; - def: Pat<(VecI16 vzero), (V6_vd0)>; - def: Pat<(VecI32 vzero), (V6_vd0)>; - - def: Pat<(VecPI8 (concat_vectors HVI8:$Vs, HVI8:$Vt)), - (Combinev HvxVR:$Vt, HvxVR:$Vs)>; - def: Pat<(VecPI16 (concat_vectors HVI16:$Vs, HVI16:$Vt)), - (Combinev HvxVR:$Vt, HvxVR:$Vs)>; - def: Pat<(VecPI32 (concat_vectors HVI32:$Vs, HVI32:$Vt)), - (Combinev HvxVR:$Vt, HvxVR:$Vs)>; - - def: Pat<(HexagonVEXTRACTW HVI8:$Vu, I32:$Rs), - (V6_extractw HvxVR:$Vu, I32:$Rs)>; - def: Pat<(HexagonVEXTRACTW HVI16:$Vu, I32:$Rs), - (V6_extractw HvxVR:$Vu, I32:$Rs)>; - def: Pat<(HexagonVEXTRACTW HVI32:$Vu, I32:$Rs), - (V6_extractw HvxVR:$Vu, I32:$Rs)>; - - def: Pat<(HexagonVINSERTW0 HVI8:$Vu, I32:$Rt), - (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; - def: Pat<(HexagonVINSERTW0 HVI16:$Vu, I32:$Rt), - (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; - def: Pat<(HexagonVINSERTW0 HVI32:$Vu, I32:$Rt), - (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; - - def: Pat<(add HVI8:$Vs, HVI8:$Vt), (V6_vaddb HvxVR:$Vs, HvxVR:$Vt)>; - def: Pat<(add HVI16:$Vs, HVI16:$Vt), (V6_vaddh HvxVR:$Vs, HvxVR:$Vt)>; - def: Pat<(add HVI32:$Vs, HVI32:$Vt), (V6_vaddw HvxVR:$Vs, HvxVR:$Vt)>; - - def: Pat<(sub HVI8:$Vs, HVI8:$Vt), (V6_vsubb HvxVR:$Vs, HvxVR:$Vt)>; - def: Pat<(sub HVI16:$Vs, HVI16:$Vt), (V6_vsubh HvxVR:$Vs, HvxVR:$Vt)>; - def: Pat<(sub HVI32:$Vs, HVI32:$Vt), (V6_vsubw HvxVR:$Vs, HvxVR:$Vt)>; - - def: Pat<(and HVI8:$Vs, HVI8:$Vt), (V6_vand HvxVR:$Vs, HvxVR:$Vt)>; - def: Pat<(or HVI8:$Vs, HVI8:$Vt), (V6_vor HvxVR:$Vs, HvxVR:$Vt)>; - def: Pat<(xor HVI8:$Vs, HVI8:$Vt), (V6_vxor HvxVR:$Vs, HvxVR:$Vt)>; - - def: Pat<(vselect HQ8:$Qu, HVI8:$Vs, HVI8:$Vt), - (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; - def: Pat<(vselect HQ16:$Qu, HVI16:$Vs, HVI16:$Vt), - (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; - def: Pat<(vselect HQ32:$Qu, HVI32:$Vs, HVI32:$Vt), - (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; - - def: Pat<(VecPI16 (sext HVI8:$Vs)), (V6_vsb HvxVR:$Vs)>; - def: Pat<(VecPI32 (sext HVI16:$Vs)), (V6_vsh HvxVR:$Vs)>; - def: Pat<(VecPI16 (zext HVI8:$Vs)), (V6_vzb HvxVR:$Vs)>; - def: Pat<(VecPI32 (zext HVI16:$Vs)), (V6_vzh HvxVR:$Vs)>; - - def: Pat<(sext_inreg HVI32:$Vs, v16i16), - (V6_vpackeb (LoVec (V6_vsh HvxVR:$Vs)), - (HiVec (V6_vsh HvxVR:$Vs)))>; - def: Pat<(sext_inreg HVI32:$Vs, v32i16), - (V6_vpackeb (LoVec (V6_vsh HvxVR:$Vs)), - (HiVec (V6_vsh HvxVR:$Vs)))>; - - def: Pat<(VecI16 (sext_invec HVI8:$Vs)), (LoVec (V6_vsb HvxVR:$Vs))>; - def: Pat<(VecI32 (sext_invec HVI16:$Vs)), (LoVec (V6_vsh HvxVR:$Vs))>; - def: Pat<(VecI32 (sext_invec HVI8:$Vs)), - (LoVec (V6_vsh (LoVec (V6_vsb HvxVR:$Vs))))>; - - def: Pat<(VecI16 (zext_invec HVI8:$Vs)), (LoVec (V6_vzb HvxVR:$Vs))>; - def: Pat<(VecI32 (zext_invec HVI16:$Vs)), (LoVec (V6_vzh HvxVR:$Vs))>; - def: Pat<(VecI32 (zext_invec HVI8:$Vs)), - (LoVec (V6_vzh (LoVec (V6_vzb HvxVR:$Vs))))>; +// The declared return value of the store-locked intrinsics is i32, but +// the instructions actually define i1. To avoid register copies from +// IntRegs to PredRegs and back, fold the entire pattern checking the +// result against true/false. +let AddedComplexity = 100 in { + def: Pat<(i1 (setne (int_hexagon_S2_storew_locked I32:$Rs, I32:$Rt), 0)), + (S2_storew_locked I32:$Rs, I32:$Rt)>; + def: Pat<(i1 (seteq (int_hexagon_S2_storew_locked I32:$Rs, I32:$Rt), 0)), + (C2_not (S2_storew_locked I32:$Rs, I32:$Rt))>; + def: Pat<(i1 (setne (int_hexagon_S4_stored_locked I32:$Rs, I64:$Rt), 0)), + (S4_stored_locked I32:$Rs, I64:$Rt)>; + def: Pat<(i1 (seteq (int_hexagon_S4_stored_locked I32:$Rs, I64:$Rt), 0)), + (C2_not (S4_stored_locked I32:$Rs, I64:$Rt))>; } |