aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/Hexagon/HexagonPatterns.td
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/Hexagon/HexagonPatterns.td')
-rw-r--r--lib/Target/Hexagon/HexagonPatterns.td799
1 files changed, 375 insertions, 424 deletions
diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td
index cdc2085986a5..384fda4ce39a 100644
--- a/lib/Target/Hexagon/HexagonPatterns.td
+++ b/lib/Target/Hexagon/HexagonPatterns.td
@@ -100,6 +100,17 @@ def HWI8: PatLeaf<(VecPI8 HvxWR:$R)>;
def HWI16: PatLeaf<(VecPI16 HvxWR:$R)>;
def HWI32: PatLeaf<(VecPI32 HvxWR:$R)>;
+def SDTVecVecIntOp:
+ SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>,
+ SDTCisVT<3,i32>]>;
+
+def HexagonVALIGN: SDNode<"HexagonISD::VALIGN", SDTVecVecIntOp>;
+def HexagonVALIGNADDR: SDNode<"HexagonISD::VALIGNADDR", SDTIntUnaryOp>;
+
+def valign: PatFrag<(ops node:$Vt, node:$Vs, node:$Ru),
+ (HexagonVALIGN node:$Vt, node:$Vs, node:$Ru)>;
+def valignaddr: PatFrag<(ops node:$Addr), (HexagonVALIGNADDR node:$Addr)>;
+
// Pattern fragments to extract the low and high subregisters from a
// 64-bit value.
def LoReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_lo)>;
@@ -109,16 +120,6 @@ def IsOrAdd: PatFrag<(ops node:$A, node:$B), (or node:$A, node:$B), [{
return isOrEquivalentToAdd(N);
}]>;
-def IsVecOff : PatLeaf<(i32 imm), [{
- int32_t V = N->getSExtValue();
- int32_t VecSize = HRI->getSpillSize(Hexagon::HvxVRRegClass);
- assert(isPowerOf2_32(VecSize));
- if ((uint32_t(V) & (uint32_t(VecSize)-1)) != 0)
- return false;
- int32_t L = Log2_32(VecSize);
- return isInt<4>(V >> L);
-}]>;
-
def IsPow2_32: PatLeaf<(i32 imm), [{
uint32_t V = N->getZExtValue();
return isPowerOf2_32(V);
@@ -214,7 +215,7 @@ def NegImm32: SDNodeXForm<imm, [{
// Helpers for type promotions/contractions.
def I1toI32: OutPatFrag<(ops node:$Rs), (C2_muxii (i1 $Rs), 1, 0)>;
-def I32toI1: OutPatFrag<(ops node:$Rs), (i1 (C2_tfrrp (i32 $Rs)))>;
+def I32toI1: OutPatFrag<(ops node:$Rs), (i1 (C2_cmpgtui (i32 $Rs), (i32 0)))>;
def ToZext64: OutPatFrag<(ops node:$Rs), (i64 (A4_combineir 0, (i32 $Rs)))>;
def ToSext64: OutPatFrag<(ops node:$Rs), (i64 (A2_sxtw (i32 $Rs)))>;
@@ -249,23 +250,6 @@ def: Pat<(IsOrAdd (i32 AddrFI:$Rs), s32_0ImmPred:$off),
(PS_fi (i32 AddrFI:$Rs), imm:$off)>;
-def alignedload: PatFrag<(ops node:$a), (load $a), [{
- return isAlignedMemNode(dyn_cast<MemSDNode>(N));
-}]>;
-
-def unalignedload: PatFrag<(ops node:$a), (load $a), [{
- return !isAlignedMemNode(dyn_cast<MemSDNode>(N));
-}]>;
-
-def alignedstore: PatFrag<(ops node:$v, node:$a), (store $v, $a), [{
- return isAlignedMemNode(dyn_cast<MemSDNode>(N));
-}]>;
-
-def unalignedstore: PatFrag<(ops node:$v, node:$a), (store $v, $a), [{
- return !isAlignedMemNode(dyn_cast<MemSDNode>(N));
-}]>;
-
-
// Converters from unary/binary SDNode to PatFrag.
class pf1<SDNode Op> : PatFrag<(ops node:$a), (Op node:$a)>;
class pf2<SDNode Op> : PatFrag<(ops node:$a, node:$b), (Op node:$a, node:$b)>;
@@ -274,7 +258,7 @@ class Not2<PatFrag P>
: PatFrag<(ops node:$A, node:$B), (P node:$A, (not node:$B))>;
class Su<PatFrag Op>
- : PatFrag<Op.Operands, Op.Fragment, [{ return hasOneUse(N); }],
+ : PatFrag<Op.Operands, !head(Op.Fragments), [{ return hasOneUse(N); }],
Op.OperandTransform>;
// Main selection macros.
@@ -298,9 +282,9 @@ class AccRRI_pat<InstHexagon MI, PatFrag AccOp, PatFrag Op,
(MI RegPred:$Rx, RegPred:$Rs, imm:$I)>;
class AccRRR_pat<InstHexagon MI, PatFrag AccOp, PatFrag Op,
- PatFrag RsPred, PatFrag RtPred>
- : Pat<(AccOp RsPred:$Rx, (Op RsPred:$Rs, RtPred:$Rt)),
- (MI RsPred:$Rx, RsPred:$Rs, RtPred:$Rt)>;
+ PatFrag RxPred, PatFrag RsPred, PatFrag RtPred>
+ : Pat<(AccOp RxPred:$Rx, (Op RsPred:$Rs, RtPred:$Rt)),
+ (MI RxPred:$Rx, RsPred:$Rs, RtPred:$Rt)>;
multiclass SelMinMax_pats<PatFrag CmpOp, PatFrag Val,
InstHexagon InstA, InstHexagon InstB> {
@@ -316,6 +300,7 @@ def Add: pf2<add>; def And: pf2<and>; def Sra: pf2<sra>;
def Sub: pf2<sub>; def Or: pf2<or>; def Srl: pf2<srl>;
def Mul: pf2<mul>; def Xor: pf2<xor>; def Shl: pf2<shl>;
+def Rol: pf2<rotl>;
// --(1) Immediate -------------------------------------------------------
//
@@ -363,7 +348,7 @@ def ToI32: OutPatFrag<(ops node:$V), (A2_tfrsi $V)>;
// --(2) Type cast -------------------------------------------------------
//
-let Predicates = [HasV5T] in {
+let Predicates = [HasV5] in {
def: OpR_R_pat<F2_conv_sf2df, pf1<fpextend>, f64, F32>;
def: OpR_R_pat<F2_conv_df2sf, pf1<fpround>, f32, F64>;
@@ -389,7 +374,7 @@ let Predicates = [HasV5T] in {
}
// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp].
-let Predicates = [HasV5T] in {
+let Predicates = [HasV5] in {
def: Pat<(i32 (bitconvert F32:$v)), (I32:$v)>;
def: Pat<(f32 (bitconvert I32:$v)), (F32:$v)>;
def: Pat<(i64 (bitconvert F64:$v)), (I64:$v)>;
@@ -422,9 +407,14 @@ def: Pat<(i64 (sext I1:$Pu)),
(Combinew (C2_muxii PredRegs:$Pu, -1, 0),
(C2_muxii PredRegs:$Pu, -1, 0))>;
-def: Pat<(i32 (sext I1:$Pu)), (C2_muxii I1:$Pu, -1, 0)>;
-def: Pat<(i32 (zext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>;
-def: Pat<(i64 (zext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>;
+def: Pat<(i32 (sext I1:$Pu)), (C2_muxii I1:$Pu, -1, 0)>;
+def: Pat<(i32 (zext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>;
+def: Pat<(i64 (zext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>;
+def: Pat<(v2i16 (sext V2I1:$Pu)), (S2_vtrunehb (C2_mask V2I1:$Pu))>;
+def: Pat<(v2i32 (sext V2I1:$Pu)), (C2_mask V2I1:$Pu)>;
+def: Pat<(v4i8 (sext V4I1:$Pu)), (S2_vtrunehb (C2_mask V4I1:$Pu))>;
+def: Pat<(v4i16 (sext V4I1:$Pu)), (C2_mask V4I1:$Pu)>;
+def: Pat<(v8i8 (sext V8I1:$Pu)), (C2_mask V8I1:$Pu)>;
def: Pat<(i64 (sext I32:$Rs)), (A2_sxtw I32:$Rs)>;
def: Pat<(Zext64 I32:$Rs), (ToZext64 $Rs)>;
@@ -441,6 +431,20 @@ let AddedComplexity = 20 in {
def: Pat<(i32 (anyext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>;
def: Pat<(i64 (anyext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>;
+def Vsplatpi: OutPatFrag<(ops node:$V),
+ (Combinew (A2_tfrsi $V), (A2_tfrsi $V))>;
+def: Pat<(v8i8 (zext V8I1:$Pu)),
+ (A2_andp (C2_mask V8I1:$Pu), (Vsplatpi (i32 0x01010101)))>;
+def: Pat<(v4i16 (zext V4I1:$Pu)),
+ (A2_andp (C2_mask V4I1:$Pu), (Vsplatpi (i32 0x00010001)))>;
+def: Pat<(v2i32 (zext V2I1:$Pu)),
+ (A2_andp (C2_mask V2I1:$Pu), (A2_combineii (i32 1), (i32 1)))>;
+
+def: Pat<(v4i8 (zext V4I1:$Pu)),
+ (A2_andir (LoReg (C2_mask V4I1:$Pu)), (i32 0x01010101))>;
+def: Pat<(v2i16 (zext V2I1:$Pu)),
+ (A2_andir (LoReg (C2_mask V2I1:$Pu)), (i32 0x00010001))>;
+
def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>;
def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>;
@@ -475,25 +479,40 @@ def: Pat<(v2i16 (trunc V2I32:$Rs)),
//
def: Pat<(not I1:$Ps), (C2_not I1:$Ps)>;
+def: Pat<(not V8I1:$Ps), (C2_not V8I1:$Ps)>;
def: Pat<(add I1:$Ps, -1), (C2_not I1:$Ps)>;
-def: OpR_RR_pat<C2_and, And, i1, I1>;
-def: OpR_RR_pat<C2_or, Or, i1, I1>;
-def: OpR_RR_pat<C2_xor, Xor, i1, I1>;
-def: OpR_RR_pat<C2_andn, Not2<And>, i1, I1>;
-def: OpR_RR_pat<C2_orn, Not2<Or>, i1, I1>;
+multiclass BoolOpR_RR_pat<InstHexagon MI, PatFrag Op> {
+ def: OpR_RR_pat<MI, Op, i1, I1>;
+ def: OpR_RR_pat<MI, Op, v2i1, V2I1>;
+ def: OpR_RR_pat<MI, Op, v4i1, V4I1>;
+ def: OpR_RR_pat<MI, Op, v8i1, V8I1>;
+}
+
+multiclass BoolAccRRR_pat<InstHexagon MI, PatFrag AccOp, PatFrag Op> {
+ def: AccRRR_pat<MI, AccOp, Op, I1, I1, I1>;
+ def: AccRRR_pat<MI, AccOp, Op, V2I1, V2I1, V2I1>;
+ def: AccRRR_pat<MI, AccOp, Op, V4I1, V4I1, V4I1>;
+ def: AccRRR_pat<MI, AccOp, Op, V8I1, V8I1, V8I1>;
+}
+
+defm: BoolOpR_RR_pat<C2_and, And>;
+defm: BoolOpR_RR_pat<C2_or, Or>;
+defm: BoolOpR_RR_pat<C2_xor, Xor>;
+defm: BoolOpR_RR_pat<C2_andn, Not2<And>>;
+defm: BoolOpR_RR_pat<C2_orn, Not2<Or>>;
// op(Ps, op(Pt, Pu))
-def: AccRRR_pat<C4_and_and, And, Su<And>, I1, I1>;
-def: AccRRR_pat<C4_and_or, And, Su<Or>, I1, I1>;
-def: AccRRR_pat<C4_or_and, Or, Su<And>, I1, I1>;
-def: AccRRR_pat<C4_or_or, Or, Su<Or>, I1, I1>;
+defm: BoolAccRRR_pat<C4_and_and, And, Su<And>>;
+defm: BoolAccRRR_pat<C4_and_or, And, Su<Or>>;
+defm: BoolAccRRR_pat<C4_or_and, Or, Su<And>>;
+defm: BoolAccRRR_pat<C4_or_or, Or, Su<Or>>;
// op(Ps, op(Pt, ~Pu))
-def: AccRRR_pat<C4_and_andn, And, Su<Not2<And>>, I1, I1>;
-def: AccRRR_pat<C4_and_orn, And, Su<Not2<Or>>, I1, I1>;
-def: AccRRR_pat<C4_or_andn, Or, Su<Not2<And>>, I1, I1>;
-def: AccRRR_pat<C4_or_orn, Or, Su<Not2<Or>>, I1, I1>;
+defm: BoolAccRRR_pat<C4_and_andn, And, Su<Not2<And>>>;
+defm: BoolAccRRR_pat<C4_and_orn, And, Su<Not2<Or>>>;
+defm: BoolAccRRR_pat<C4_or_andn, Or, Su<Not2<And>>>;
+defm: BoolAccRRR_pat<C4_or_orn, Or, Su<Not2<Or>>>;
// --(5) Compare ---------------------------------------------------------
@@ -519,7 +538,7 @@ def: Pat<(i1 (setult I32:$Rs, u32_0ImmPred:$u9)),
// Patfrag to convert the usual comparison patfrags (e.g. setlt) to ones
// that reverse the order of the operands.
class RevCmp<PatFrag F>
- : PatFrag<(ops node:$rhs, node:$lhs), F.Fragment, F.PredicateCode,
+ : PatFrag<(ops node:$rhs, node:$lhs), !head(F.Fragments), F.PredicateCode,
F.OperandTransform>;
def: OpR_RR_pat<C2_cmpeq, seteq, i1, I32>;
@@ -563,7 +582,7 @@ def: OpR_RR_pat<A2_vcmpwgtu, RevCmp<setult>, v2i1, V2I32>;
def: OpR_RR_pat<A2_vcmpwgtu, setugt, i1, V2I32>;
def: OpR_RR_pat<A2_vcmpwgtu, setugt, v2i1, V2I32>;
-let Predicates = [HasV5T] in {
+let Predicates = [HasV5] in {
def: OpR_RR_pat<F2_sfcmpeq, seteq, i1, F32>;
def: OpR_RR_pat<F2_sfcmpgt, setgt, i1, F32>;
def: OpR_RR_pat<F2_sfcmpge, setge, i1, F32>;
@@ -598,27 +617,40 @@ def: Pat<(i1 (setle I32:$Rs, anyimm:$u5)),
def: Pat<(i1 (setule I32:$Rs, anyimm:$u5)),
(C2_not (C2_cmpgtui I32:$Rs, imm:$u5))>;
-def: Pat<(i1 (setne I32:$Rs, I32:$Rt)),
- (C2_not (C2_cmpeq I32:$Rs, I32:$Rt))>;
-def: Pat<(i1 (setle I32:$Rs, I32:$Rt)),
- (C2_not (C2_cmpgt I32:$Rs, I32:$Rt))>;
-def: Pat<(i1 (setule I32:$Rs, I32:$Rt)),
- (C2_not (C2_cmpgtu I32:$Rs, I32:$Rt))>;
-def: Pat<(i1 (setge I32:$Rs, I32:$Rt)),
- (C2_not (C2_cmpgt I32:$Rt, I32:$Rs))>;
-def: Pat<(i1 (setuge I32:$Rs, I32:$Rt)),
- (C2_not (C2_cmpgtu I32:$Rt, I32:$Rs))>;
-
-def: Pat<(i1 (setle I64:$Rs, I64:$Rt)),
- (C2_not (C2_cmpgtp I64:$Rs, I64:$Rt))>;
-def: Pat<(i1 (setne I64:$Rs, I64:$Rt)),
- (C2_not (C2_cmpeqp I64:$Rs, I64:$Rt))>;
-def: Pat<(i1 (setge I64:$Rs, I64:$Rt)),
- (C2_not (C2_cmpgtp I64:$Rt, I64:$Rs))>;
-def: Pat<(i1 (setuge I64:$Rs, I64:$Rt)),
- (C2_not (C2_cmpgtup I64:$Rt, I64:$Rs))>;
-def: Pat<(i1 (setule I64:$Rs, I64:$Rt)),
- (C2_not (C2_cmpgtup I64:$Rs, I64:$Rt))>;
+class OpmR_RR_pat<PatFrag Output, PatFrag Op, ValueType ResType,
+ PatFrag RsPred, PatFrag RtPred = RsPred>
+ : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)),
+ (Output RsPred:$Rs, RtPred:$Rt)>;
+
+class Outn<InstHexagon MI>
+ : OutPatFrag<(ops node:$Rs, node:$Rt),
+ (C2_not (MI $Rs, $Rt))>;
+
+def: OpmR_RR_pat<Outn<C2_cmpeq>, setne, i1, I32>;
+def: OpmR_RR_pat<Outn<C2_cmpgt>, setle, i1, I32>;
+def: OpmR_RR_pat<Outn<C2_cmpgtu>, setule, i1, I32>;
+def: OpmR_RR_pat<Outn<C2_cmpgt>, RevCmp<setge>, i1, I32>;
+def: OpmR_RR_pat<Outn<C2_cmpgtu>, RevCmp<setuge>, i1, I32>;
+def: OpmR_RR_pat<Outn<C2_cmpeqp>, setne, i1, I64>;
+def: OpmR_RR_pat<Outn<C2_cmpgtp>, setle, i1, I64>;
+def: OpmR_RR_pat<Outn<C2_cmpgtup>, setule, i1, I64>;
+def: OpmR_RR_pat<Outn<C2_cmpgtp>, RevCmp<setge>, i1, I64>;
+def: OpmR_RR_pat<Outn<C2_cmpgtup>, RevCmp<setuge>, i1, I64>;
+def: OpmR_RR_pat<Outn<A2_vcmpbeq>, setne, v8i1, V8I8>;
+def: OpmR_RR_pat<Outn<A4_vcmpbgt>, setle, v8i1, V8I8>;
+def: OpmR_RR_pat<Outn<A2_vcmpbgtu>, setule, v8i1, V8I8>;
+def: OpmR_RR_pat<Outn<A4_vcmpbgt>, RevCmp<setge>, v8i1, V8I8>;
+def: OpmR_RR_pat<Outn<A2_vcmpbgtu>, RevCmp<setuge>, v8i1, V8I8>;
+def: OpmR_RR_pat<Outn<A2_vcmpheq>, setne, v4i1, V4I16>;
+def: OpmR_RR_pat<Outn<A2_vcmphgt>, setle, v4i1, V4I16>;
+def: OpmR_RR_pat<Outn<A2_vcmphgtu>, setule, v4i1, V4I16>;
+def: OpmR_RR_pat<Outn<A2_vcmphgt>, RevCmp<setge>, v4i1, V4I16>;
+def: OpmR_RR_pat<Outn<A2_vcmphgtu>, RevCmp<setuge>, v4i1, V4I16>;
+def: OpmR_RR_pat<Outn<A2_vcmpweq>, setne, v2i1, V2I32>;
+def: OpmR_RR_pat<Outn<A2_vcmpwgt>, setle, v2i1, V2I32>;
+def: OpmR_RR_pat<Outn<A2_vcmpwgtu>, setule, v2i1, V2I32>;
+def: OpmR_RR_pat<Outn<A2_vcmpwgt>, RevCmp<setge>, v2i1, V2I32>;
+def: OpmR_RR_pat<Outn<A2_vcmpwgtu>, RevCmp<setuge>, v2i1, V2I32>;
let AddedComplexity = 100 in {
def: Pat<(i1 (seteq (and (xor I32:$Rs, I32:$Rt), 255), 0)),
@@ -680,25 +712,10 @@ def: Pat<(i32 (zext (i1 (seteq I32:$Rs, anyimm:$s8)))),
def: Pat<(i32 (zext (i1 (setne I32:$Rs, anyimm:$s8)))),
(A4_rcmpneqi I32:$Rs, imm:$s8)>;
-def: Pat<(i1 (setne I1:$Ps, I1:$Pt)),
- (C2_xor I1:$Ps, I1:$Pt)>;
-
-def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)),
- (A2_vcmpbeq (ToZext64 $Rs), (ToZext64 $Rt))>;
-def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)),
- (A4_vcmpbgt (ToZext64 $Rs), (ToZext64 $Rt))>;
-def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)),
- (A2_vcmpbgtu (ToZext64 $Rs), (ToZext64 $Rt))>;
-
-def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)),
- (A2_vcmpheq (ToZext64 $Rs), (ToZext64 $Rt))>;
-def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)),
- (A2_vcmphgt (ToZext64 $Rs), (ToZext64 $Rt))>;
-def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)),
- (A2_vcmphgtu (ToZext64 $Rs), (ToZext64 $Rt))>;
-
-def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)),
- (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>;
+def: Pat<(i1 (seteq I1:$Ps, (i1 -1))), (I1:$Ps)>;
+def: Pat<(i1 (setne I1:$Ps, (i1 -1))), (C2_not I1:$Ps)>;
+def: Pat<(i1 (seteq I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, (C2_not I1:$Pt))>;
+def: Pat<(i1 (setne I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, I1:$Pt)>;
// Floating-point comparisons with checks for ordered/unordered status.
@@ -706,18 +723,13 @@ class T3<InstHexagon MI1, InstHexagon MI2, InstHexagon MI3>
: OutPatFrag<(ops node:$Rs, node:$Rt),
(MI1 (MI2 $Rs, $Rt), (MI3 $Rs, $Rt))>;
-class OpmR_RR_pat<PatFrag Output, PatFrag Op, ValueType ResType,
- PatFrag RsPred, PatFrag RtPred = RsPred>
- : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)),
- (Output RsPred:$Rs, RtPred:$Rt)>;
-
class Cmpuf<InstHexagon MI>: T3<C2_or, F2_sfcmpuo, MI>;
class Cmpud<InstHexagon MI>: T3<C2_or, F2_dfcmpuo, MI>;
class Cmpufn<InstHexagon MI>: T3<C2_orn, F2_sfcmpuo, MI>;
class Cmpudn<InstHexagon MI>: T3<C2_orn, F2_dfcmpuo, MI>;
-let Predicates = [HasV5T] in {
+let Predicates = [HasV5] in {
def: OpmR_RR_pat<Cmpuf<F2_sfcmpeq>, setueq, i1, F32>;
def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>, setuge, i1, F32>;
def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>, setugt, i1, F32>;
@@ -733,11 +745,7 @@ let Predicates = [HasV5T] in {
def: OpmR_RR_pat<Cmpudn<F2_dfcmpeq>, setune, i1, F64>;
}
-class Outn<InstHexagon MI>
- : OutPatFrag<(ops node:$Rs, node:$Rt),
- (C2_not (MI $Rs, $Rt))>;
-
-let Predicates = [HasV5T] in {
+let Predicates = [HasV5] in {
def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setone, i1, F32>;
def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setne, i1, F32>;
@@ -776,7 +784,7 @@ def: Pat<(select I1:$Pu, I64:$Rs, I64:$Rt),
(Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)),
(C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>;
-let Predicates = [HasV5T] in {
+let Predicates = [HasV5] in {
def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$I),
(C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>;
def: Pat<(select I1:$Pu, f32ImmPred:$I, F32:$Rt),
@@ -813,20 +821,6 @@ def: Pat<(vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt),
def: Pat<(vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt),
(C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>;
-
-class HvxSel_pat<InstHexagon MI, PatFrag RegPred>
- : Pat<(select I1:$Pu, RegPred:$Vs, RegPred:$Vt),
- (MI I1:$Pu, RegPred:$Vs, RegPred:$Vt)>;
-
-let Predicates = [HasV60T,UseHVX] in {
- def: HvxSel_pat<PS_vselect, HVI8>;
- def: HvxSel_pat<PS_vselect, HVI16>;
- def: HvxSel_pat<PS_vselect, HVI32>;
- def: HvxSel_pat<PS_wselect, HWI8>;
- def: HvxSel_pat<PS_wselect, HWI16>;
- def: HvxSel_pat<PS_wselect, HWI32>;
-}
-
// From LegalizeDAG.cpp: (Pu ? Pv : Pw) <=> (Pu & Pv) | (!Pu & Pw).
def: Pat<(select I1:$Pu, I1:$Pv, I1:$Pw),
(C2_or (C2_and I1:$Pu, I1:$Pv),
@@ -878,7 +872,7 @@ let AddedComplexity = 200 in {
defm: SelMinMax_pats<setult, I64, A2_minup, A2_maxup>;
}
-let AddedComplexity = 100, Predicates = [HasV5T] in {
+let AddedComplexity = 100, Predicates = [HasV5] in {
defm: SelMinMax_pats<setolt, F32, F2_sfmin, F2_sfmax>;
defm: SelMinMax_pats<setole, F32, F2_sfmin, F2_sfmax>;
defm: SelMinMax_pats<setogt, F32, F2_sfmax, F2_sfmin>;
@@ -892,40 +886,34 @@ let AddedComplexity = 100, Predicates = [HasV5T] in {
def SDTHexagonINSERT:
SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
SDTCisInt<0>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>;
-def SDTHexagonINSERTRP:
- SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
- SDTCisInt<0>, SDTCisVT<3, i64>]>;
-
def HexagonINSERT: SDNode<"HexagonISD::INSERT", SDTHexagonINSERT>;
-def HexagonINSERTRP: SDNode<"HexagonISD::INSERTRP", SDTHexagonINSERTRP>;
-def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2),
- (S2_insert I32:$Rs, I32:$Rt, imm:$u1, imm:$u2)>;
-def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2),
- (S2_insertp I64:$Rs, I64:$Rt, imm:$u1, imm:$u2)>;
-def: Pat<(HexagonINSERTRP I32:$Rs, I32:$Rt, I64:$Ru),
- (S2_insert_rp I32:$Rs, I32:$Rt, I64:$Ru)>;
-def: Pat<(HexagonINSERTRP I64:$Rs, I64:$Rt, I64:$Ru),
- (S2_insertp_rp I64:$Rs, I64:$Rt, I64:$Ru)>;
+let AddedComplexity = 10 in {
+ def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2),
+ (S2_insert I32:$Rs, I32:$Rt, imm:$u1, imm:$u2)>;
+ def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2),
+ (S2_insertp I64:$Rs, I64:$Rt, imm:$u1, imm:$u2)>;
+}
+def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, I32:$Width, I32:$Off),
+ (S2_insert_rp I32:$Rs, I32:$Rt, (Combinew $Width, $Off))>;
+def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, I32:$Width, I32:$Off),
+ (S2_insertp_rp I64:$Rs, I64:$Rt, (Combinew $Width, $Off))>;
def SDTHexagonEXTRACTU
: SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>,
SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
-def SDTHexagonEXTRACTURP
- : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>,
- SDTCisVT<2, i64>]>;
-
def HexagonEXTRACTU: SDNode<"HexagonISD::EXTRACTU", SDTHexagonEXTRACTU>;
-def HexagonEXTRACTURP: SDNode<"HexagonISD::EXTRACTURP", SDTHexagonEXTRACTURP>;
-def: Pat<(HexagonEXTRACTU I32:$Rs, u5_0ImmPred:$u5, u5_0ImmPred:$U5),
- (S2_extractu I32:$Rs, imm:$u5, imm:$U5)>;
-def: Pat<(HexagonEXTRACTU I64:$Rs, u6_0ImmPred:$u6, u6_0ImmPred:$U6),
- (S2_extractup I64:$Rs, imm:$u6, imm:$U6)>;
-def: Pat<(HexagonEXTRACTURP I32:$Rs, I64:$Rt),
- (S2_extractu_rp I32:$Rs, I64:$Rt)>;
-def: Pat<(HexagonEXTRACTURP I64:$Rs, I64:$Rt),
- (S2_extractup_rp I64:$Rs, I64:$Rt)>;
+let AddedComplexity = 10 in {
+ def: Pat<(HexagonEXTRACTU I32:$Rs, u5_0ImmPred:$u5, u5_0ImmPred:$U5),
+ (S2_extractu I32:$Rs, imm:$u5, imm:$U5)>;
+ def: Pat<(HexagonEXTRACTU I64:$Rs, u6_0ImmPred:$u6, u6_0ImmPred:$U6),
+ (S2_extractup I64:$Rs, imm:$u6, imm:$U6)>;
+}
+def: Pat<(HexagonEXTRACTU I32:$Rs, I32:$Width, I32:$Off),
+ (S2_extractu_rp I32:$Rs, (Combinew $Width, $Off))>;
+def: Pat<(HexagonEXTRACTU I64:$Rs, I32:$Width, I32:$Off),
+ (S2_extractup_rp I64:$Rs, (Combinew $Width, $Off))>;
def SDTHexagonVSPLAT:
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
@@ -938,20 +926,20 @@ def: Pat<(v2i32 (HexagonVSPLAT s8_0ImmPred:$s8)),
(A2_combineii imm:$s8, imm:$s8)>;
def: Pat<(v2i32 (HexagonVSPLAT I32:$Rs)), (Combinew I32:$Rs, I32:$Rs)>;
+let AddedComplexity = 10 in
+def: Pat<(v8i8 (HexagonVSPLAT I32:$Rs)), (S6_vsplatrbp I32:$Rs)>,
+ Requires<[HasV62]>;
+def: Pat<(v8i8 (HexagonVSPLAT I32:$Rs)),
+ (Combinew (S2_vsplatrb I32:$Rs), (S2_vsplatrb I32:$Rs))>;
+
// --(8) Shift/permute ---------------------------------------------------
//
def SDTHexagonI64I32I32: SDTypeProfile<1, 2,
[SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
-def SDTHexagonVCOMBINE: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>,
- SDTCisSubVecOfVec<1, 0>]>;
-def SDTHexagonVPACK: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>, SDTCisVec<1>]>;
def HexagonCOMBINE: SDNode<"HexagonISD::COMBINE", SDTHexagonI64I32I32>;
-def HexagonVCOMBINE: SDNode<"HexagonISD::VCOMBINE", SDTHexagonVCOMBINE>;
-def HexagonVPACKE: SDNode<"HexagonISD::VPACKE", SDTHexagonVPACK>;
-def HexagonVPACKO: SDNode<"HexagonISD::VPACKO", SDTHexagonVPACK>;
def: Pat<(HexagonCOMBINE I32:$Rs, I32:$Rt), (Combinew $Rs, $Rt)>;
@@ -1001,11 +989,15 @@ def: OpR_RR_pat<S2_asr_r_p, Sra, i64, I64, I32>;
def: OpR_RR_pat<S2_lsr_r_p, Srl, i64, I64, I32>;
def: OpR_RR_pat<S2_asl_r_p, Shl, i64, I64, I32>;
+let Predicates = [HasV60] in {
+ def: OpR_RI_pat<S6_rol_i_r, Rol, i32, I32, u5_0ImmPred>;
+ def: OpR_RI_pat<S6_rol_i_p, Rol, i64, I64, u6_0ImmPred>;
+}
def: Pat<(sra (add (sra I32:$Rs, u5_0ImmPred:$u5), 1), (i32 1)),
(S2_asr_i_r_rnd I32:$Rs, imm:$u5)>;
def: Pat<(sra (add (sra I64:$Rs, u6_0ImmPred:$u6), 1), (i32 1)),
- (S2_asr_i_p_rnd I64:$Rs, imm:$u6)>, Requires<[HasV5T]>;
+ (S2_asr_i_p_rnd I64:$Rs, imm:$u6)>, Requires<[HasV5]>;
// Prefer S2_addasl_rrri over S2_asl_i_r_acc.
let AddedComplexity = 120 in
@@ -1046,41 +1038,55 @@ let AddedComplexity = 100 in {
def: AccRRI_pat<S2_asl_i_p_and, And, Su<Shl>, I64, u6_0ImmPred>;
def: AccRRI_pat<S2_asl_i_p_or, Or, Su<Shl>, I64, u6_0ImmPred>;
def: AccRRI_pat<S2_asl_i_p_xacc, Xor, Su<Shl>, I64, u6_0ImmPred>;
+
+ let Predicates = [HasV60] in {
+ def: AccRRI_pat<S6_rol_i_r_acc, Add, Su<Rol>, I32, u5_0ImmPred>;
+ def: AccRRI_pat<S6_rol_i_r_nac, Sub, Su<Rol>, I32, u5_0ImmPred>;
+ def: AccRRI_pat<S6_rol_i_r_and, And, Su<Rol>, I32, u5_0ImmPred>;
+ def: AccRRI_pat<S6_rol_i_r_or, Or, Su<Rol>, I32, u5_0ImmPred>;
+ def: AccRRI_pat<S6_rol_i_r_xacc, Xor, Su<Rol>, I32, u5_0ImmPred>;
+
+ def: AccRRI_pat<S6_rol_i_p_acc, Add, Su<Rol>, I64, u6_0ImmPred>;
+ def: AccRRI_pat<S6_rol_i_p_nac, Sub, Su<Rol>, I64, u6_0ImmPred>;
+ def: AccRRI_pat<S6_rol_i_p_and, And, Su<Rol>, I64, u6_0ImmPred>;
+ def: AccRRI_pat<S6_rol_i_p_or, Or, Su<Rol>, I64, u6_0ImmPred>;
+ def: AccRRI_pat<S6_rol_i_p_xacc, Xor, Su<Rol>, I64, u6_0ImmPred>;
+ }
}
let AddedComplexity = 100 in {
- def: AccRRR_pat<S2_asr_r_r_acc, Add, Su<Sra>, I32, I32>;
- def: AccRRR_pat<S2_asr_r_r_nac, Sub, Su<Sra>, I32, I32>;
- def: AccRRR_pat<S2_asr_r_r_and, And, Su<Sra>, I32, I32>;
- def: AccRRR_pat<S2_asr_r_r_or, Or, Su<Sra>, I32, I32>;
+ def: AccRRR_pat<S2_asr_r_r_acc, Add, Su<Sra>, I32, I32, I32>;
+ def: AccRRR_pat<S2_asr_r_r_nac, Sub, Su<Sra>, I32, I32, I32>;
+ def: AccRRR_pat<S2_asr_r_r_and, And, Su<Sra>, I32, I32, I32>;
+ def: AccRRR_pat<S2_asr_r_r_or, Or, Su<Sra>, I32, I32, I32>;
- def: AccRRR_pat<S2_asr_r_p_acc, Add, Su<Sra>, I64, I32>;
- def: AccRRR_pat<S2_asr_r_p_nac, Sub, Su<Sra>, I64, I32>;
- def: AccRRR_pat<S2_asr_r_p_and, And, Su<Sra>, I64, I32>;
- def: AccRRR_pat<S2_asr_r_p_or, Or, Su<Sra>, I64, I32>;
- def: AccRRR_pat<S2_asr_r_p_xor, Xor, Su<Sra>, I64, I32>;
+ def: AccRRR_pat<S2_asr_r_p_acc, Add, Su<Sra>, I64, I64, I32>;
+ def: AccRRR_pat<S2_asr_r_p_nac, Sub, Su<Sra>, I64, I64, I32>;
+ def: AccRRR_pat<S2_asr_r_p_and, And, Su<Sra>, I64, I64, I32>;
+ def: AccRRR_pat<S2_asr_r_p_or, Or, Su<Sra>, I64, I64, I32>;
+ def: AccRRR_pat<S2_asr_r_p_xor, Xor, Su<Sra>, I64, I64, I32>;
- def: AccRRR_pat<S2_lsr_r_r_acc, Add, Su<Srl>, I32, I32>;
- def: AccRRR_pat<S2_lsr_r_r_nac, Sub, Su<Srl>, I32, I32>;
- def: AccRRR_pat<S2_lsr_r_r_and, And, Su<Srl>, I32, I32>;
- def: AccRRR_pat<S2_lsr_r_r_or, Or, Su<Srl>, I32, I32>;
+ def: AccRRR_pat<S2_lsr_r_r_acc, Add, Su<Srl>, I32, I32, I32>;
+ def: AccRRR_pat<S2_lsr_r_r_nac, Sub, Su<Srl>, I32, I32, I32>;
+ def: AccRRR_pat<S2_lsr_r_r_and, And, Su<Srl>, I32, I32, I32>;
+ def: AccRRR_pat<S2_lsr_r_r_or, Or, Su<Srl>, I32, I32, I32>;
- def: AccRRR_pat<S2_lsr_r_p_acc, Add, Su<Srl>, I64, I32>;
- def: AccRRR_pat<S2_lsr_r_p_nac, Sub, Su<Srl>, I64, I32>;
- def: AccRRR_pat<S2_lsr_r_p_and, And, Su<Srl>, I64, I32>;
- def: AccRRR_pat<S2_lsr_r_p_or, Or, Su<Srl>, I64, I32>;
- def: AccRRR_pat<S2_lsr_r_p_xor, Xor, Su<Srl>, I64, I32>;
+ def: AccRRR_pat<S2_lsr_r_p_acc, Add, Su<Srl>, I64, I64, I32>;
+ def: AccRRR_pat<S2_lsr_r_p_nac, Sub, Su<Srl>, I64, I64, I32>;
+ def: AccRRR_pat<S2_lsr_r_p_and, And, Su<Srl>, I64, I64, I32>;
+ def: AccRRR_pat<S2_lsr_r_p_or, Or, Su<Srl>, I64, I64, I32>;
+ def: AccRRR_pat<S2_lsr_r_p_xor, Xor, Su<Srl>, I64, I64, I32>;
- def: AccRRR_pat<S2_asl_r_r_acc, Add, Su<Shl>, I32, I32>;
- def: AccRRR_pat<S2_asl_r_r_nac, Sub, Su<Shl>, I32, I32>;
- def: AccRRR_pat<S2_asl_r_r_and, And, Su<Shl>, I32, I32>;
- def: AccRRR_pat<S2_asl_r_r_or, Or, Su<Shl>, I32, I32>;
+ def: AccRRR_pat<S2_asl_r_r_acc, Add, Su<Shl>, I32, I32, I32>;
+ def: AccRRR_pat<S2_asl_r_r_nac, Sub, Su<Shl>, I32, I32, I32>;
+ def: AccRRR_pat<S2_asl_r_r_and, And, Su<Shl>, I32, I32, I32>;
+ def: AccRRR_pat<S2_asl_r_r_or, Or, Su<Shl>, I32, I32, I32>;
- def: AccRRR_pat<S2_asl_r_p_acc, Add, Su<Shl>, I64, I32>;
- def: AccRRR_pat<S2_asl_r_p_nac, Sub, Su<Shl>, I64, I32>;
- def: AccRRR_pat<S2_asl_r_p_and, And, Su<Shl>, I64, I32>;
- def: AccRRR_pat<S2_asl_r_p_or, Or, Su<Shl>, I64, I32>;
- def: AccRRR_pat<S2_asl_r_p_xor, Xor, Su<Shl>, I64, I32>;
+ def: AccRRR_pat<S2_asl_r_p_acc, Add, Su<Shl>, I64, I64, I32>;
+ def: AccRRR_pat<S2_asl_r_p_nac, Sub, Su<Shl>, I64, I64, I32>;
+ def: AccRRR_pat<S2_asl_r_p_and, And, Su<Shl>, I64, I64, I32>;
+ def: AccRRR_pat<S2_asl_r_p_or, Or, Su<Shl>, I64, I64, I32>;
+ def: AccRRR_pat<S2_asl_r_p_xor, Xor, Su<Shl>, I64, I64, I32>;
}
@@ -1170,11 +1176,13 @@ def: Pat<(shl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c))),
// --(9) Arithmetic/bitwise ----------------------------------------------
//
-def: Pat<(abs I32:$Rs), (A2_abs I32:$Rs)>;
-def: Pat<(not I32:$Rs), (A2_subri -1, I32:$Rs)>;
-def: Pat<(not I64:$Rs), (A2_notp I64:$Rs)>;
+def: Pat<(abs I32:$Rs), (A2_abs I32:$Rs)>;
+def: Pat<(abs I64:$Rs), (A2_absp I64:$Rs)>;
+def: Pat<(not I32:$Rs), (A2_subri -1, I32:$Rs)>;
+def: Pat<(not I64:$Rs), (A2_notp I64:$Rs)>;
+def: Pat<(ineg I64:$Rs), (A2_negp I64:$Rs)>;
-let Predicates = [HasV5T] in {
+let Predicates = [HasV5] in {
def: Pat<(fabs F32:$Rs), (S2_clrbit_i F32:$Rs, 31)>;
def: Pat<(fneg F32:$Rs), (S2_togglebit_i F32:$Rs, 31)>;
@@ -1186,13 +1194,6 @@ let Predicates = [HasV5T] in {
(i32 (LoReg $Rs)))>;
}
-let AddedComplexity = 50 in
-def: Pat<(xor (add (sra I32:$Rs, (i32 31)),
- I32:$Rs),
- (sra I32:$Rs, (i32 31))),
- (A2_abs I32:$Rs)>;
-
-
def: Pat<(add I32:$Rs, anyimm:$s16), (A2_addi I32:$Rs, imm:$s16)>;
def: Pat<(or I32:$Rs, anyimm:$s10), (A2_orir I32:$Rs, imm:$s10)>;
def: Pat<(and I32:$Rs, anyimm:$s10), (A2_andir I32:$Rs, imm:$s10)>;
@@ -1221,18 +1222,20 @@ def: OpR_RR_pat<A2_vsubub, Sub, v8i8, V8I8>;
def: OpR_RR_pat<A2_vsubh, Sub, v4i16, V4I16>;
def: OpR_RR_pat<A2_vsubw, Sub, v2i32, V2I32>;
+def: OpR_RR_pat<A2_and, And, v4i8, V4I8>;
+def: OpR_RR_pat<A2_xor, Xor, v4i8, V4I8>;
+def: OpR_RR_pat<A2_or, Or, v4i8, V4I8>;
def: OpR_RR_pat<A2_and, And, v2i16, V2I16>;
def: OpR_RR_pat<A2_xor, Xor, v2i16, V2I16>;
def: OpR_RR_pat<A2_or, Or, v2i16, V2I16>;
-
def: OpR_RR_pat<A2_andp, And, v8i8, V8I8>;
-def: OpR_RR_pat<A2_andp, And, v4i16, V4I16>;
-def: OpR_RR_pat<A2_andp, And, v2i32, V2I32>;
def: OpR_RR_pat<A2_orp, Or, v8i8, V8I8>;
-def: OpR_RR_pat<A2_orp, Or, v4i16, V4I16>;
-def: OpR_RR_pat<A2_orp, Or, v2i32, V2I32>;
def: OpR_RR_pat<A2_xorp, Xor, v8i8, V8I8>;
+def: OpR_RR_pat<A2_andp, And, v4i16, V4I16>;
+def: OpR_RR_pat<A2_orp, Or, v4i16, V4I16>;
def: OpR_RR_pat<A2_xorp, Xor, v4i16, V4I16>;
+def: OpR_RR_pat<A2_andp, And, v2i32, V2I32>;
+def: OpR_RR_pat<A2_orp, Or, v2i32, V2I32>;
def: OpR_RR_pat<A2_xorp, Xor, v2i32, V2I32>;
def: OpR_RR_pat<M2_mpyi, Mul, i32, I32>;
@@ -1255,7 +1258,7 @@ def: OpR_RR_pat<C2_and, Mul, v2i1, V2I1>;
def: OpR_RR_pat<C2_and, Mul, v4i1, V4I1>;
def: OpR_RR_pat<C2_and, Mul, v8i1, V8I1>;
-let Predicates = [HasV5T] in {
+let Predicates = [HasV5] in {
def: OpR_RR_pat<F2_sfadd, pf2<fadd>, f32, F32>;
def: OpR_RR_pat<F2_sfsub, pf2<fsub>, f32, F32>;
def: OpR_RR_pat<F2_sfmpy, pf2<fmul>, f32, F32>;
@@ -1268,12 +1271,62 @@ let Predicates = [HasV5T] in {
let AddedComplexity = 10 in {
def: AccRRI_pat<M2_macsip, Add, Su<Mul>, I32, u32_0ImmPred>;
def: AccRRI_pat<M2_macsin, Sub, Su<Mul>, I32, u32_0ImmPred>;
- def: AccRRR_pat<M2_maci, Add, Su<Mul>, I32, I32>;
+ def: AccRRR_pat<M2_maci, Add, Su<Mul>, I32, I32, I32>;
}
def: AccRRI_pat<M2_naccii, Sub, Su<Add>, I32, s32_0ImmPred>;
def: AccRRI_pat<M2_accii, Add, Su<Add>, I32, s32_0ImmPred>;
-def: AccRRR_pat<M2_acci, Add, Su<Add>, I32, I32>;
+def: AccRRR_pat<M2_acci, Add, Su<Add>, I32, I32, I32>;
+
+// Mulh for vectors
+//
+def: Pat<(v2i32 (mulhu V2I32:$Rss, V2I32:$Rtt)),
+ (Combinew (M2_mpyu_up (HiReg $Rss), (HiReg $Rtt)),
+ (M2_mpyu_up (LoReg $Rss), (LoReg $Rtt)))>;
+
+def: Pat<(v2i32 (mulhs V2I32:$Rs, V2I32:$Rt)),
+ (Combinew (M2_mpy_up (HiReg $Rs), (HiReg $Rt)),
+ (M2_mpy_up (LoReg $Rt), (LoReg $Rt)))>;
+
+def Mulhub:
+ OutPatFrag<(ops node:$Rss, node:$Rtt),
+ (Combinew (S2_vtrunohb (M5_vmpybuu (HiReg $Rss), (HiReg $Rtt))),
+ (S2_vtrunohb (M5_vmpybuu (LoReg $Rss), (LoReg $Rtt))))>;
+
+// Equivalent of byte-wise arithmetic shift right by 7 in v8i8.
+def Asr7:
+ OutPatFrag<(ops node:$Rss), (C2_mask (C2_not (A4_vcmpbgti $Rss, 0)))>;
+
+def: Pat<(v8i8 (mulhu V8I8:$Rss, V8I8:$Rtt)),
+ (Mulhub $Rss, $Rtt)>;
+
+def: Pat<(v8i8 (mulhs V8I8:$Rss, V8I8:$Rtt)),
+ (A2_vsubub
+ (Mulhub $Rss, $Rtt),
+ (A2_vaddub (A2_andp V8I8:$Rss, (Asr7 $Rtt)),
+ (A2_andp V8I8:$Rtt, (Asr7 $Rss))))>;
+
+def Mpysh:
+ OutPatFrag<(ops node:$Rs, node:$Rt), (M2_vmpy2s_s0 $Rs, $Rt)>;
+def Mpyshh:
+ OutPatFrag<(ops node:$Rss, node:$Rtt), (Mpysh (HiReg $Rss), (HiReg $Rtt))>;
+def Mpyshl:
+ OutPatFrag<(ops node:$Rss, node:$Rtt), (Mpysh (LoReg $Rss), (LoReg $Rtt))>;
+
+def Mulhsh:
+ OutPatFrag<(ops node:$Rss, node:$Rtt),
+ (Combinew (A2_combine_hh (HiReg (Mpyshh $Rss, $Rtt)),
+ (LoReg (Mpyshh $Rss, $Rtt))),
+ (A2_combine_hh (HiReg (Mpyshl $Rss, $Rtt)),
+ (LoReg (Mpyshl $Rss, $Rtt))))>;
+
+def: Pat<(v4i16 (mulhs V4I16:$Rss, V4I16:$Rtt)), (Mulhsh $Rss, $Rtt)>;
+
+def: Pat<(v4i16 (mulhu V4I16:$Rss, V4I16:$Rtt)),
+ (A2_vaddh
+ (Mulhsh $Rss, $Rtt),
+ (A2_vaddh (A2_andp V4I16:$Rss, (S2_asr_i_vh $Rtt, 15)),
+ (A2_andp V4I16:$Rtt, (S2_asr_i_vh $Rss, 15))))>;
def: Pat<(ineg (mul I32:$Rs, u8_0ImmPred:$u8)),
@@ -1291,24 +1344,24 @@ def: Pat<(mul I32:$Rs, n8_0ImmPred:$n8),
def: Pat<(add Sext64:$Rs, I64:$Rt),
(A2_addsp (LoReg Sext64:$Rs), I64:$Rt)>;
-def: AccRRR_pat<M4_and_and, And, Su<And>, I32, I32>;
-def: AccRRR_pat<M4_and_or, And, Su<Or>, I32, I32>;
-def: AccRRR_pat<M4_and_xor, And, Su<Xor>, I32, I32>;
-def: AccRRR_pat<M4_or_and, Or, Su<And>, I32, I32>;
-def: AccRRR_pat<M4_or_or, Or, Su<Or>, I32, I32>;
-def: AccRRR_pat<M4_or_xor, Or, Su<Xor>, I32, I32>;
-def: AccRRR_pat<M4_xor_and, Xor, Su<And>, I32, I32>;
-def: AccRRR_pat<M4_xor_or, Xor, Su<Or>, I32, I32>;
-def: AccRRR_pat<M2_xor_xacc, Xor, Su<Xor>, I32, I32>;
-def: AccRRR_pat<M4_xor_xacc, Xor, Su<Xor>, I64, I64>;
+def: AccRRR_pat<M4_and_and, And, Su<And>, I32, I32, I32>;
+def: AccRRR_pat<M4_and_or, And, Su<Or>, I32, I32, I32>;
+def: AccRRR_pat<M4_and_xor, And, Su<Xor>, I32, I32, I32>;
+def: AccRRR_pat<M4_or_and, Or, Su<And>, I32, I32, I32>;
+def: AccRRR_pat<M4_or_or, Or, Su<Or>, I32, I32, I32>;
+def: AccRRR_pat<M4_or_xor, Or, Su<Xor>, I32, I32, I32>;
+def: AccRRR_pat<M4_xor_and, Xor, Su<And>, I32, I32, I32>;
+def: AccRRR_pat<M4_xor_or, Xor, Su<Or>, I32, I32, I32>;
+def: AccRRR_pat<M2_xor_xacc, Xor, Su<Xor>, I32, I32, I32>;
+def: AccRRR_pat<M4_xor_xacc, Xor, Su<Xor>, I64, I64, I64>;
// For dags like (or (and (not _), _), (shl _, _)) where the "or" with
// one argument matches the patterns below, and with the other argument
// matches S2_asl_r_r_or, etc, prefer the patterns below.
let AddedComplexity = 110 in { // greater than S2_asl_r_r_and/or/xor.
- def: AccRRR_pat<M4_and_andn, And, Su<Not2<And>>, I32, I32>;
- def: AccRRR_pat<M4_or_andn, Or, Su<Not2<And>>, I32, I32>;
- def: AccRRR_pat<M4_xor_andn, Xor, Su<Not2<And>>, I32, I32>;
+ def: AccRRR_pat<M4_and_andn, And, Su<Not2<And>>, I32, I32, I32>;
+ def: AccRRR_pat<M4_or_andn, Or, Su<Not2<And>>, I32, I32, I32>;
+ def: AccRRR_pat<M4_xor_andn, Xor, Su<Not2<And>>, I32, I32, I32>;
}
// S4_addaddi and S4_subaddi don't have tied operands, so give them
@@ -1444,7 +1497,7 @@ def: Pat<(add I32:$Ru, (Su<Mul> I32:$Ry, I32:$Rs)),
(M4_mpyrr_addr IntRegs:$Ru, IntRegs:$Ry, IntRegs:$Rs)>;
-let Predicates = [HasV5T] in {
+let Predicates = [HasV5] in {
def: Pat<(fma F32:$Rs, F32:$Rt, F32:$Rx),
(F2_sffma F32:$Rx, F32:$Rs, F32:$Rt)>;
def: Pat<(fma (fneg F32:$Rs), F32:$Rt, F32:$Rx),
@@ -1479,13 +1532,13 @@ def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)),
// Multiplies two v4i8 vectors.
def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
(S2_vtrunehb (M5_vmpybuu V4I8:$Rs, V4I8:$Rt))>,
- Requires<[HasV5T]>;
+ Requires<[HasV5]>;
// Multiplies two v8i8 vectors.
def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
(Combinew (S2_vtrunehb (M5_vmpybuu (HiReg $Rs), (HiReg $Rt))),
(S2_vtrunehb (M5_vmpybuu (LoReg $Rs), (LoReg $Rt))))>,
- Requires<[HasV5T]>;
+ Requires<[HasV5]>;
// --(10) Bit ------------------------------------------------------------
@@ -1519,7 +1572,6 @@ def: Pat<(i32 (ctpop I32:$Rs)), (S5_popcountp (A4_combineir 0, I32:$Rs))>;
def: Pat<(bitreverse I32:$Rs), (S2_brev I32:$Rs)>;
def: Pat<(bitreverse I64:$Rss), (S2_brevp I64:$Rss)>;
-
let AddedComplexity = 20 in { // Complexity greater than and/or/xor
def: Pat<(and I32:$Rs, IsNPow2_32:$V),
(S2_clrbit_i IntRegs:$Rs, (LogN2_32 $V))>;
@@ -1582,6 +1634,15 @@ let AddedComplexity = 10 in // Complexity greater than compare reg-reg.
def: Pat<(i1 (seteq (and I32:$Rs, I32:$Rt), IntRegs:$Rt)),
(C2_bitsset IntRegs:$Rs, IntRegs:$Rt)>;
+def SDTTestBit:
+ SDTypeProfile<1, 2, [SDTCisVT<0, i1>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;
+def HexagonTSTBIT: SDNode<"HexagonISD::TSTBIT", SDTTestBit>;
+
+def: Pat<(HexagonTSTBIT I32:$Rs, u5_0ImmPred:$u5),
+ (S2_tstbit_i I32:$Rs, imm:$u5)>;
+def: Pat<(HexagonTSTBIT I32:$Rs, I32:$Rt),
+ (S2_tstbit_r I32:$Rs, I32:$Rt)>;
+
let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm.
def: Pat<(i1 (seteq (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)),
(S4_ntstbit_i I32:$Rs, imm:$u5)>;
@@ -1790,7 +1851,12 @@ let AddedComplexity = 20 in {
defm: Loadxi_pat<zextloadv2i8, v2i16, anyimm1, L2_loadbzw2_io>;
defm: Loadxi_pat<zextloadv4i8, v4i16, anyimm2, L2_loadbzw4_io>;
defm: Loadxi_pat<load, i32, anyimm2, L2_loadri_io>;
+ defm: Loadxi_pat<load, v2i16, anyimm2, L2_loadri_io>;
+ defm: Loadxi_pat<load, v4i8, anyimm2, L2_loadri_io>;
defm: Loadxi_pat<load, i64, anyimm3, L2_loadrd_io>;
+ defm: Loadxi_pat<load, v2i32, anyimm3, L2_loadrd_io>;
+ defm: Loadxi_pat<load, v4i16, anyimm3, L2_loadrd_io>;
+ defm: Loadxi_pat<load, v8i8, anyimm3, L2_loadrd_io>;
defm: Loadxi_pat<load, f32, anyimm2, L2_loadri_io>;
defm: Loadxi_pat<load, f64, anyimm3, L2_loadrd_io>;
// No sextloadi1.
@@ -1828,10 +1894,15 @@ let AddedComplexity = 60 in {
def: Loadxu_pat<zextloadi16, i32, anyimm1, L4_loadruh_ur>;
def: Loadxu_pat<zextloadv2i8, v2i16, anyimm1, L4_loadbzw2_ur>;
def: Loadxu_pat<zextloadv4i8, v4i16, anyimm2, L4_loadbzw4_ur>;
- def: Loadxu_pat<load, f32, anyimm2, L4_loadri_ur>;
- def: Loadxu_pat<load, f64, anyimm3, L4_loadrd_ur>;
def: Loadxu_pat<load, i32, anyimm2, L4_loadri_ur>;
+ def: Loadxu_pat<load, v2i16, anyimm2, L4_loadri_ur>;
+ def: Loadxu_pat<load, v4i8, anyimm2, L4_loadri_ur>;
def: Loadxu_pat<load, i64, anyimm3, L4_loadrd_ur>;
+ def: Loadxu_pat<load, v2i32, anyimm3, L4_loadrd_ur>;
+ def: Loadxu_pat<load, v4i16, anyimm3, L4_loadrd_ur>;
+ def: Loadxu_pat<load, v8i8, anyimm3, L4_loadrd_ur>;
+ def: Loadxu_pat<load, f32, anyimm2, L4_loadri_ur>;
+ def: Loadxu_pat<load, f64, anyimm3, L4_loadrd_ur>;
def: Loadxum_pat<sextloadi8, i64, anyimm0, ToSext64, L4_loadrb_ur>;
def: Loadxum_pat<zextloadi8, i64, anyimm0, ToZext64, L4_loadrub_ur>;
@@ -1845,29 +1916,39 @@ let AddedComplexity = 60 in {
}
let AddedComplexity = 40 in {
- def: Loadxr_shl_pat<extloadi8, i32, L4_loadrub_rr>;
- def: Loadxr_shl_pat<zextloadi8, i32, L4_loadrub_rr>;
- def: Loadxr_shl_pat<sextloadi8, i32, L4_loadrb_rr>;
- def: Loadxr_shl_pat<extloadi16, i32, L4_loadruh_rr>;
- def: Loadxr_shl_pat<zextloadi16, i32, L4_loadruh_rr>;
- def: Loadxr_shl_pat<sextloadi16, i32, L4_loadrh_rr>;
- def: Loadxr_shl_pat<load, i32, L4_loadri_rr>;
- def: Loadxr_shl_pat<load, i64, L4_loadrd_rr>;
- def: Loadxr_shl_pat<load, f32, L4_loadri_rr>;
- def: Loadxr_shl_pat<load, f64, L4_loadrd_rr>;
+ def: Loadxr_shl_pat<extloadi8, i32, L4_loadrub_rr>;
+ def: Loadxr_shl_pat<zextloadi8, i32, L4_loadrub_rr>;
+ def: Loadxr_shl_pat<sextloadi8, i32, L4_loadrb_rr>;
+ def: Loadxr_shl_pat<extloadi16, i32, L4_loadruh_rr>;
+ def: Loadxr_shl_pat<zextloadi16, i32, L4_loadruh_rr>;
+ def: Loadxr_shl_pat<sextloadi16, i32, L4_loadrh_rr>;
+ def: Loadxr_shl_pat<load, i32, L4_loadri_rr>;
+ def: Loadxr_shl_pat<load, v2i16, L4_loadri_rr>;
+ def: Loadxr_shl_pat<load, v4i8, L4_loadri_rr>;
+ def: Loadxr_shl_pat<load, i64, L4_loadrd_rr>;
+ def: Loadxr_shl_pat<load, v2i32, L4_loadrd_rr>;
+ def: Loadxr_shl_pat<load, v4i16, L4_loadrd_rr>;
+ def: Loadxr_shl_pat<load, v8i8, L4_loadrd_rr>;
+ def: Loadxr_shl_pat<load, f32, L4_loadri_rr>;
+ def: Loadxr_shl_pat<load, f64, L4_loadrd_rr>;
}
let AddedComplexity = 20 in {
- def: Loadxr_add_pat<extloadi8, i32, L4_loadrub_rr>;
- def: Loadxr_add_pat<zextloadi8, i32, L4_loadrub_rr>;
- def: Loadxr_add_pat<sextloadi8, i32, L4_loadrb_rr>;
- def: Loadxr_add_pat<extloadi16, i32, L4_loadruh_rr>;
- def: Loadxr_add_pat<zextloadi16, i32, L4_loadruh_rr>;
- def: Loadxr_add_pat<sextloadi16, i32, L4_loadrh_rr>;
- def: Loadxr_add_pat<load, i32, L4_loadri_rr>;
- def: Loadxr_add_pat<load, i64, L4_loadrd_rr>;
- def: Loadxr_add_pat<load, f32, L4_loadri_rr>;
- def: Loadxr_add_pat<load, f64, L4_loadrd_rr>;
+ def: Loadxr_add_pat<extloadi8, i32, L4_loadrub_rr>;
+ def: Loadxr_add_pat<zextloadi8, i32, L4_loadrub_rr>;
+ def: Loadxr_add_pat<sextloadi8, i32, L4_loadrb_rr>;
+ def: Loadxr_add_pat<extloadi16, i32, L4_loadruh_rr>;
+ def: Loadxr_add_pat<zextloadi16, i32, L4_loadruh_rr>;
+ def: Loadxr_add_pat<sextloadi16, i32, L4_loadrh_rr>;
+ def: Loadxr_add_pat<load, i32, L4_loadri_rr>;
+ def: Loadxr_add_pat<load, v2i16, L4_loadri_rr>;
+ def: Loadxr_add_pat<load, v4i8, L4_loadri_rr>;
+ def: Loadxr_add_pat<load, i64, L4_loadrd_rr>;
+ def: Loadxr_add_pat<load, v2i32, L4_loadrd_rr>;
+ def: Loadxr_add_pat<load, v4i16, L4_loadrd_rr>;
+ def: Loadxr_add_pat<load, v8i8, L4_loadrd_rr>;
+ def: Loadxr_add_pat<load, f32, L4_loadri_rr>;
+ def: Loadxr_add_pat<load, f64, L4_loadrd_rr>;
}
let AddedComplexity = 40 in {
@@ -1897,17 +1978,22 @@ let AddedComplexity = 20 in {
// Absolute address
let AddedComplexity = 60 in {
- def: Loada_pat<zextloadi1, i32, anyimm0, PS_loadrubabs>;
- def: Loada_pat<sextloadi8, i32, anyimm0, PS_loadrbabs>;
- def: Loada_pat<extloadi8, i32, anyimm0, PS_loadrubabs>;
- def: Loada_pat<zextloadi8, i32, anyimm0, PS_loadrubabs>;
- def: Loada_pat<sextloadi16, i32, anyimm1, PS_loadrhabs>;
- def: Loada_pat<extloadi16, i32, anyimm1, PS_loadruhabs>;
- def: Loada_pat<zextloadi16, i32, anyimm1, PS_loadruhabs>;
- def: Loada_pat<load, i32, anyimm2, PS_loadriabs>;
- def: Loada_pat<load, i64, anyimm3, PS_loadrdabs>;
- def: Loada_pat<load, f32, anyimm2, PS_loadriabs>;
- def: Loada_pat<load, f64, anyimm3, PS_loadrdabs>;
+ def: Loada_pat<zextloadi1, i32, anyimm0, PS_loadrubabs>;
+ def: Loada_pat<sextloadi8, i32, anyimm0, PS_loadrbabs>;
+ def: Loada_pat<extloadi8, i32, anyimm0, PS_loadrubabs>;
+ def: Loada_pat<zextloadi8, i32, anyimm0, PS_loadrubabs>;
+ def: Loada_pat<sextloadi16, i32, anyimm1, PS_loadrhabs>;
+ def: Loada_pat<extloadi16, i32, anyimm1, PS_loadruhabs>;
+ def: Loada_pat<zextloadi16, i32, anyimm1, PS_loadruhabs>;
+ def: Loada_pat<load, i32, anyimm2, PS_loadriabs>;
+ def: Loada_pat<load, v2i16, anyimm2, PS_loadriabs>;
+ def: Loada_pat<load, v4i8, anyimm2, PS_loadriabs>;
+ def: Loada_pat<load, i64, anyimm3, PS_loadrdabs>;
+ def: Loada_pat<load, v2i32, anyimm3, PS_loadrdabs>;
+ def: Loada_pat<load, v4i16, anyimm3, PS_loadrdabs>;
+ def: Loada_pat<load, v8i8, anyimm3, PS_loadrdabs>;
+ def: Loada_pat<load, f32, anyimm2, PS_loadriabs>;
+ def: Loada_pat<load, f64, anyimm3, PS_loadrdabs>;
def: Loada_pat<atomic_load_8, i32, anyimm0, PS_loadrubabs>;
def: Loada_pat<atomic_load_16, i32, anyimm1, PS_loadruhabs>;
@@ -1933,18 +2019,23 @@ let AddedComplexity = 30 in {
// GP-relative address
let AddedComplexity = 100 in {
- def: Loada_pat<extloadi1, i32, addrgp, L2_loadrubgp>;
- def: Loada_pat<zextloadi1, i32, addrgp, L2_loadrubgp>;
- def: Loada_pat<extloadi8, i32, addrgp, L2_loadrubgp>;
- def: Loada_pat<sextloadi8, i32, addrgp, L2_loadrbgp>;
- def: Loada_pat<zextloadi8, i32, addrgp, L2_loadrubgp>;
- def: Loada_pat<extloadi16, i32, addrgp, L2_loadruhgp>;
- def: Loada_pat<sextloadi16, i32, addrgp, L2_loadrhgp>;
- def: Loada_pat<zextloadi16, i32, addrgp, L2_loadruhgp>;
- def: Loada_pat<load, i32, addrgp, L2_loadrigp>;
- def: Loada_pat<load, i64, addrgp, L2_loadrdgp>;
- def: Loada_pat<load, f32, addrgp, L2_loadrigp>;
- def: Loada_pat<load, f64, addrgp, L2_loadrdgp>;
+ def: Loada_pat<extloadi1, i32, addrgp, L2_loadrubgp>;
+ def: Loada_pat<zextloadi1, i32, addrgp, L2_loadrubgp>;
+ def: Loada_pat<extloadi8, i32, addrgp, L2_loadrubgp>;
+ def: Loada_pat<sextloadi8, i32, addrgp, L2_loadrbgp>;
+ def: Loada_pat<zextloadi8, i32, addrgp, L2_loadrubgp>;
+ def: Loada_pat<extloadi16, i32, addrgp, L2_loadruhgp>;
+ def: Loada_pat<sextloadi16, i32, addrgp, L2_loadrhgp>;
+ def: Loada_pat<zextloadi16, i32, addrgp, L2_loadruhgp>;
+ def: Loada_pat<load, i32, addrgp, L2_loadrigp>;
+ def: Loada_pat<load, v2i16, addrgp, L2_loadrigp>;
+ def: Loada_pat<load, v4i8, addrgp, L2_loadrigp>;
+ def: Loada_pat<load, i64, addrgp, L2_loadrdgp>;
+ def: Loada_pat<load, v2i32, addrgp, L2_loadrdgp>;
+ def: Loada_pat<load, v4i16, addrgp, L2_loadrdgp>;
+ def: Loada_pat<load, v8i8, addrgp, L2_loadrdgp>;
+ def: Loada_pat<load, f32, addrgp, L2_loadrigp>;
+ def: Loada_pat<load, f64, addrgp, L2_loadrdgp>;
def: Loada_pat<atomic_load_8, i32, addrgp, L2_loadrubgp>;
def: Loada_pat<atomic_load_16, i32, addrgp, L2_loadruhgp>;
@@ -1983,46 +2074,10 @@ def: Pat<(i1 (load (add I32:$Rs, anyimm0:$Off))),
def: Pat<(i1 (load I32:$Rs)),
(C2_tfrrp (L2_loadrub_io IntRegs:$Rs, 0))>;
-// HVX loads
-
-multiclass HvxLd_pat<InstHexagon MI, PatFrag Load, ValueType VT,
- PatFrag ImmPred> {
- def: Pat<(VT (Load I32:$Rt)), (MI I32:$Rt, 0)>;
- def: Pat<(VT (Load (add I32:$Rt, ImmPred:$s))), (MI I32:$Rt, imm:$s)>;
- // The HVX selection code for shuffles can generate vector constants.
- // Calling "Select" on the resulting loads from CP fails without these
- // patterns.
- def: Pat<(VT (Load (HexagonCP tconstpool:$A))), (MI (A2_tfrsi imm:$A), 0)>;
- def: Pat<(VT (Load (HexagonAtPcrel tconstpool:$A))),
- (MI (C4_addipc imm:$A), 0)>;
-}
-
-
-let Predicates = [UseHVX] in {
- multiclass HvxLdVs_pat<InstHexagon MI, PatFrag Load> {
- defm: HvxLd_pat<MI, Load, VecI8, IsVecOff>;
- defm: HvxLd_pat<MI, Load, VecI16, IsVecOff>;
- defm: HvxLd_pat<MI, Load, VecI32, IsVecOff>;
- }
- defm: HvxLdVs_pat<V6_vL32b_nt_ai, alignednontemporalload>;
- defm: HvxLdVs_pat<V6_vL32b_ai, alignedload>;
- defm: HvxLdVs_pat<V6_vL32Ub_ai, unalignedload>;
-
- multiclass HvxLdWs_pat<InstHexagon MI, PatFrag Load> {
- defm: HvxLd_pat<MI, Load, VecPI8, IsVecOff>;
- defm: HvxLd_pat<MI, Load, VecPI16, IsVecOff>;
- defm: HvxLd_pat<MI, Load, VecPI32, IsVecOff>;
- }
- defm: HvxLdWs_pat<PS_vloadrw_nt_ai, alignednontemporalload>;
- defm: HvxLdWs_pat<PS_vloadrw_ai, alignedload>;
- defm: HvxLdWs_pat<PS_vloadrwu_ai, unalignedload>;
-}
-
// --(13) Store ----------------------------------------------------------
//
-
class Storepi_pat<PatFrag Store, PatFrag Value, PatFrag Offset, InstHexagon MI>
: Pat<(Store Value:$Rt, I32:$Rx, Offset:$s4),
(MI I32:$Rx, imm:$s4, Value:$Rt)>;
@@ -2135,7 +2190,7 @@ class Stoream_pat<PatFrag Store, PatFrag Value, PatFrag Addr, PatFrag ValueMod,
// swapped. This relies on the knowledge that the F.Fragment uses names
// "ptr" and "val".
class AtomSt<PatFrag F>
- : PatFrag<(ops node:$val, node:$ptr), F.Fragment, F.PredicateCode,
+ : PatFrag<(ops node:$val, node:$ptr), !head(F.Fragments), F.PredicateCode,
F.OperandTransform> {
let IsAtomic = F.IsAtomic;
let MemoryVT = F.MemoryVT;
@@ -2459,36 +2514,6 @@ let AddedComplexity = 10 in {
def: Storexi_base_pat<AtomSt<atomic_store_64>, I64, S2_storerd_io>;
}
-// HVX stores
-
-multiclass HvxSt_pat<InstHexagon MI, PatFrag Store, PatFrag ImmPred,
- PatFrag Value> {
- def: Pat<(Store Value:$Vs, I32:$Rt),
- (MI I32:$Rt, 0, Value:$Vs)>;
- def: Pat<(Store Value:$Vs, (add I32:$Rt, ImmPred:$s)),
- (MI I32:$Rt, imm:$s, Value:$Vs)>;
-}
-
-let Predicates = [UseHVX] in {
- multiclass HvxStVs_pat<InstHexagon MI, PatFrag Store> {
- defm: HvxSt_pat<MI, Store, IsVecOff, HVI8>;
- defm: HvxSt_pat<MI, Store, IsVecOff, HVI16>;
- defm: HvxSt_pat<MI, Store, IsVecOff, HVI32>;
- }
- defm: HvxStVs_pat<V6_vS32b_nt_ai, alignednontemporalstore>;
- defm: HvxStVs_pat<V6_vS32b_ai, alignedstore>;
- defm: HvxStVs_pat<V6_vS32Ub_ai, unalignedstore>;
-
- multiclass HvxStWs_pat<InstHexagon MI, PatFrag Store> {
- defm: HvxSt_pat<MI, Store, IsVecOff, HWI8>;
- defm: HvxSt_pat<MI, Store, IsVecOff, HWI16>;
- defm: HvxSt_pat<MI, Store, IsVecOff, HWI32>;
- }
- defm: HvxStWs_pat<PS_vstorerw_nt_ai, alignednontemporalstore>;
- defm: HvxStWs_pat<PS_vstorerw_ai, alignedstore>;
- defm: HvxStWs_pat<PS_vstorerwu_ai, unalignedstore>;
-}
-
// --(14) Memop ----------------------------------------------------------
//
@@ -2570,8 +2595,10 @@ multiclass Memopxr_add_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
multiclass Memopxr_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
SDNode Oper, InstHexagon MI> {
- defm: Memopxr_base_pat <Load, Store, Oper, MI>;
- defm: Memopxr_add_pat <Load, Store, ImmPred, Oper, MI>;
+ let Predicates = [UseMEMOPS] in {
+ defm: Memopxr_base_pat <Load, Store, Oper, MI>;
+ defm: Memopxr_add_pat <Load, Store, ImmPred, Oper, MI>;
+ }
}
let AddedComplexity = 200 in {
@@ -2669,8 +2696,10 @@ multiclass Memopxi_add_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
multiclass Memopxi_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
SDNode Oper, PatFrag Arg, SDNodeXForm ArgMod,
InstHexagon MI> {
- defm: Memopxi_base_pat <Load, Store, Oper, Arg, ArgMod, MI>;
- defm: Memopxi_add_pat <Load, Store, ImmPred, Oper, Arg, ArgMod, MI>;
+ let Predicates = [UseMEMOPS] in {
+ defm: Memopxi_base_pat <Load, Store, Oper, Arg, ArgMod, MI>;
+ defm: Memopxi_add_pat <Load, Store, ImmPred, Oper, Arg, ArgMod, MI>;
+ }
}
let AddedComplexity = 220 in {
@@ -2829,6 +2858,8 @@ def: Pat<(brcond (not I1:$Pu), bb:$dst),
(J2_jumpf I1:$Pu, bb:$dst)>;
def: Pat<(brcond (i1 (setne I1:$Pu, -1)), bb:$dst),
(J2_jumpf I1:$Pu, bb:$dst)>;
+def: Pat<(brcond (i1 (seteq I1:$Pu, 0)), bb:$dst),
+ (J2_jumpf I1:$Pu, bb:$dst)>;
def: Pat<(brcond (i1 (setne I1:$Pu, 0)), bb:$dst),
(J2_jumpt I1:$Pu, bb:$dst)>;
@@ -2898,97 +2929,17 @@ def HexagonREADCYCLE: SDNode<"HexagonISD::READCYCLE", SDTInt64Leaf,
def: Pat<(HexagonREADCYCLE), (A4_tfrcpp UPCYCLE)>;
-
-def SDTVecLeaf: SDTypeProfile<1, 0, [SDTCisVec<0>]>;
-
-def SDTHexagonVEXTRACTW: SDTypeProfile<1, 2,
- [SDTCisVT<0, i32>, SDTCisVec<1>, SDTCisVT<2, i32>]>;
-def HexagonVEXTRACTW : SDNode<"HexagonISD::VEXTRACTW", SDTHexagonVEXTRACTW>;
-
-def SDTHexagonVINSERTW0: SDTypeProfile<1, 2,
- [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>;
-def HexagonVINSERTW0 : SDNode<"HexagonISD::VINSERTW0", SDTHexagonVINSERTW0>;
-
-def Combinev: OutPatFrag<(ops node:$Rs, node:$Rt),
- (REG_SEQUENCE HvxWR, $Rs, vsub_hi, $Rt, vsub_lo)>;
-
-def LoVec: OutPatFrag<(ops node:$Vs), (EXTRACT_SUBREG $Vs, vsub_lo)>;
-def HiVec: OutPatFrag<(ops node:$Vs), (EXTRACT_SUBREG $Vs, vsub_hi)>;
-
-let Predicates = [UseHVX] in {
- def: OpR_RR_pat<V6_vpackeb, pf2<HexagonVPACKE>, VecI8, HVI8>;
- def: OpR_RR_pat<V6_vpackob, pf2<HexagonVPACKO>, VecI8, HVI8>;
- def: OpR_RR_pat<V6_vpackeh, pf2<HexagonVPACKE>, VecI16, HVI16>;
- def: OpR_RR_pat<V6_vpackoh, pf2<HexagonVPACKO>, VecI16, HVI16>;
-}
-
-def HexagonVZERO: SDNode<"HexagonISD::VZERO", SDTVecLeaf>;
-def vzero: PatFrag<(ops), (HexagonVZERO)>;
-
-let Predicates = [UseHVX] in {
- def: Pat<(VecI8 vzero), (V6_vd0)>;
- def: Pat<(VecI16 vzero), (V6_vd0)>;
- def: Pat<(VecI32 vzero), (V6_vd0)>;
-
- def: Pat<(VecPI8 (concat_vectors HVI8:$Vs, HVI8:$Vt)),
- (Combinev HvxVR:$Vt, HvxVR:$Vs)>;
- def: Pat<(VecPI16 (concat_vectors HVI16:$Vs, HVI16:$Vt)),
- (Combinev HvxVR:$Vt, HvxVR:$Vs)>;
- def: Pat<(VecPI32 (concat_vectors HVI32:$Vs, HVI32:$Vt)),
- (Combinev HvxVR:$Vt, HvxVR:$Vs)>;
-
- def: Pat<(HexagonVEXTRACTW HVI8:$Vu, I32:$Rs),
- (V6_extractw HvxVR:$Vu, I32:$Rs)>;
- def: Pat<(HexagonVEXTRACTW HVI16:$Vu, I32:$Rs),
- (V6_extractw HvxVR:$Vu, I32:$Rs)>;
- def: Pat<(HexagonVEXTRACTW HVI32:$Vu, I32:$Rs),
- (V6_extractw HvxVR:$Vu, I32:$Rs)>;
-
- def: Pat<(HexagonVINSERTW0 HVI8:$Vu, I32:$Rt),
- (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>;
- def: Pat<(HexagonVINSERTW0 HVI16:$Vu, I32:$Rt),
- (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>;
- def: Pat<(HexagonVINSERTW0 HVI32:$Vu, I32:$Rt),
- (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>;
-
- def: Pat<(add HVI8:$Vs, HVI8:$Vt), (V6_vaddb HvxVR:$Vs, HvxVR:$Vt)>;
- def: Pat<(add HVI16:$Vs, HVI16:$Vt), (V6_vaddh HvxVR:$Vs, HvxVR:$Vt)>;
- def: Pat<(add HVI32:$Vs, HVI32:$Vt), (V6_vaddw HvxVR:$Vs, HvxVR:$Vt)>;
-
- def: Pat<(sub HVI8:$Vs, HVI8:$Vt), (V6_vsubb HvxVR:$Vs, HvxVR:$Vt)>;
- def: Pat<(sub HVI16:$Vs, HVI16:$Vt), (V6_vsubh HvxVR:$Vs, HvxVR:$Vt)>;
- def: Pat<(sub HVI32:$Vs, HVI32:$Vt), (V6_vsubw HvxVR:$Vs, HvxVR:$Vt)>;
-
- def: Pat<(and HVI8:$Vs, HVI8:$Vt), (V6_vand HvxVR:$Vs, HvxVR:$Vt)>;
- def: Pat<(or HVI8:$Vs, HVI8:$Vt), (V6_vor HvxVR:$Vs, HvxVR:$Vt)>;
- def: Pat<(xor HVI8:$Vs, HVI8:$Vt), (V6_vxor HvxVR:$Vs, HvxVR:$Vt)>;
-
- def: Pat<(vselect HQ8:$Qu, HVI8:$Vs, HVI8:$Vt),
- (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;
- def: Pat<(vselect HQ16:$Qu, HVI16:$Vs, HVI16:$Vt),
- (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;
- def: Pat<(vselect HQ32:$Qu, HVI32:$Vs, HVI32:$Vt),
- (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;
-
- def: Pat<(VecPI16 (sext HVI8:$Vs)), (V6_vsb HvxVR:$Vs)>;
- def: Pat<(VecPI32 (sext HVI16:$Vs)), (V6_vsh HvxVR:$Vs)>;
- def: Pat<(VecPI16 (zext HVI8:$Vs)), (V6_vzb HvxVR:$Vs)>;
- def: Pat<(VecPI32 (zext HVI16:$Vs)), (V6_vzh HvxVR:$Vs)>;
-
- def: Pat<(sext_inreg HVI32:$Vs, v16i16),
- (V6_vpackeb (LoVec (V6_vsh HvxVR:$Vs)),
- (HiVec (V6_vsh HvxVR:$Vs)))>;
- def: Pat<(sext_inreg HVI32:$Vs, v32i16),
- (V6_vpackeb (LoVec (V6_vsh HvxVR:$Vs)),
- (HiVec (V6_vsh HvxVR:$Vs)))>;
-
- def: Pat<(VecI16 (sext_invec HVI8:$Vs)), (LoVec (V6_vsb HvxVR:$Vs))>;
- def: Pat<(VecI32 (sext_invec HVI16:$Vs)), (LoVec (V6_vsh HvxVR:$Vs))>;
- def: Pat<(VecI32 (sext_invec HVI8:$Vs)),
- (LoVec (V6_vsh (LoVec (V6_vsb HvxVR:$Vs))))>;
-
- def: Pat<(VecI16 (zext_invec HVI8:$Vs)), (LoVec (V6_vzb HvxVR:$Vs))>;
- def: Pat<(VecI32 (zext_invec HVI16:$Vs)), (LoVec (V6_vzh HvxVR:$Vs))>;
- def: Pat<(VecI32 (zext_invec HVI8:$Vs)),
- (LoVec (V6_vzh (LoVec (V6_vzb HvxVR:$Vs))))>;
+// The declared return value of the store-locked intrinsics is i32, but
+// the instructions actually define i1. To avoid register copies from
+// IntRegs to PredRegs and back, fold the entire pattern checking the
+// result against true/false.
+let AddedComplexity = 100 in {
+ def: Pat<(i1 (setne (int_hexagon_S2_storew_locked I32:$Rs, I32:$Rt), 0)),
+ (S2_storew_locked I32:$Rs, I32:$Rt)>;
+ def: Pat<(i1 (seteq (int_hexagon_S2_storew_locked I32:$Rs, I32:$Rt), 0)),
+ (C2_not (S2_storew_locked I32:$Rs, I32:$Rt))>;
+ def: Pat<(i1 (setne (int_hexagon_S4_stored_locked I32:$Rs, I64:$Rt), 0)),
+ (S4_stored_locked I32:$Rs, I64:$Rt)>;
+ def: Pat<(i1 (seteq (int_hexagon_S4_stored_locked I32:$Rs, I64:$Rt), 0)),
+ (C2_not (S4_stored_locked I32:$Rs, I64:$Rt))>;
}