diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2021-12-25 22:30:44 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2021-12-25 22:30:44 +0000 |
commit | 77fc4c146f0870ffb09c1afb823ccbe742c5e6ff (patch) | |
tree | 5c0eb39553003b9c75a901af6bc4ddabd6f2f28c /llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | |
parent | f65dcba83ce5035ab88a85fe17628b447eb56e1b (diff) | |
download | src-77fc4c146f0870ffb09c1afb823ccbe742c5e6ff.tar.gz src-77fc4c146f0870ffb09c1afb823ccbe742c5e6ff.zip |
Vendor import of llvm-project main llvmorg-14-init-13186-g0c553cc1af2e.vendor/llvm-project/llvmorg-14-init-13186-g0c553cc1af2e
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 655 |
1 files changed, 403 insertions, 252 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index df5a041b87cd..067ad819e0d2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -487,10 +487,7 @@ namespace { SDValue visitFCEIL(SDNode *N); SDValue visitFTRUNC(SDNode *N); SDValue visitFFLOOR(SDNode *N); - SDValue visitFMINNUM(SDNode *N); - SDValue visitFMAXNUM(SDNode *N); - SDValue visitFMINIMUM(SDNode *N); - SDValue visitFMAXIMUM(SDNode *N); + SDValue visitFMinMax(SDNode *N); SDValue visitBRCOND(SDNode *N); SDValue visitBR_CC(SDNode *N); SDValue visitLOAD(SDNode *N); @@ -1701,10 +1698,10 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::FNEG: return visitFNEG(N); case ISD::FABS: return visitFABS(N); case ISD::FFLOOR: return visitFFLOOR(N); - case ISD::FMINNUM: return visitFMINNUM(N); - case ISD::FMAXNUM: return visitFMAXNUM(N); - case ISD::FMINIMUM: return visitFMINIMUM(N); - case ISD::FMAXIMUM: return visitFMAXIMUM(N); + case ISD::FMINNUM: + case ISD::FMAXNUM: + case ISD::FMINIMUM: + case ISD::FMAXIMUM: return visitFMinMax(N); case ISD::FCEIL: return visitFCEIL(N); case ISD::FTRUNC: return visitFTRUNC(N); case ISD::BRCOND: return visitBRCOND(N); @@ -2260,6 +2257,21 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { EVT VT = N0.getValueType(); SDLoc DL(N); + // fold (add x, undef) -> undef + if (N0.isUndef()) + return N0; + if (N1.isUndef()) + return N1; + + // fold (add c1, c2) -> c1+c2 + if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1})) + return C; + + // canonicalize constant to RHS + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(ISD::ADD, DL, VT, N1, N0); + // fold vector ops if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) @@ -2268,23 +2280,6 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { // fold (add x, 0) -> x, vector edition if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) return N0; - if (ISD::isConstantSplatVectorAllZeros(N0.getNode())) - return N1; - } - - // fold (add x, undef) -> undef - if (N0.isUndef()) - return N0; - - if (N1.isUndef()) - return N1; - - if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) { - // canonicalize constant to RHS - if (!DAG.isConstantIntBuildVectorOrConstantInt(N1)) - return DAG.getNode(ISD::ADD, DL, VT, N1, N0); - // fold (add c1, c2) -> c1+c2 - return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}); } // fold (add x, 0) -> x @@ -2554,6 +2549,19 @@ SDValue DAGCombiner::visitADDSAT(SDNode *N) { EVT VT = N0.getValueType(); SDLoc DL(N); + // fold (add_sat x, undef) -> -1 + if (N0.isUndef() || N1.isUndef()) + return DAG.getAllOnesConstant(DL, VT); + + // fold (add_sat c1, c2) -> c3 + if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1})) + return C; + + // canonicalize constant to RHS + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(Opcode, DL, VT, N1, N0); + // fold vector ops if (VT.isVector()) { // TODO SimplifyVBinOp @@ -2561,20 +2569,6 @@ SDValue DAGCombiner::visitADDSAT(SDNode *N) { // fold (add_sat x, 0) -> x, vector edition if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) return N0; - if (ISD::isConstantSplatVectorAllZeros(N0.getNode())) - return N1; - } - - // fold (add_sat x, undef) -> -1 - if (N0.isUndef() || N1.isUndef()) - return DAG.getAllOnesConstant(DL, VT); - - if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) { - // canonicalize constant to RHS - if (!DAG.isConstantIntBuildVectorOrConstantInt(N1)) - return DAG.getNode(Opcode, DL, VT, N1, N0); - // fold (add_sat c1, c2) -> c3 - return DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}); } // fold (add_sat x, 0) -> x @@ -3260,6 +3254,15 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { EVT VT = N0.getValueType(); SDLoc DL(N); + // fold (sub x, x) -> 0 + // FIXME: Refactor this and xor and other similar operations together. + if (N0 == N1) + return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations); + + // fold (sub c1, c2) -> c3 + if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1})) + return C; + // fold vector ops if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) @@ -3270,15 +3273,6 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { return N0; } - // fold (sub x, x) -> 0 - // FIXME: Refactor this and xor and other similar operations together. - if (N0 == N1) - return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations); - - // fold (sub c1, c2) -> c3 - if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1})) - return C; - if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -3611,15 +3605,6 @@ SDValue DAGCombiner::visitSUBSAT(SDNode *N) { EVT VT = N0.getValueType(); SDLoc DL(N); - // fold vector ops - if (VT.isVector()) { - // TODO SimplifyVBinOp - - // fold (sub_sat x, 0) -> x, vector edition - if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) - return N0; - } - // fold (sub_sat x, undef) -> 0 if (N0.isUndef() || N1.isUndef()) return DAG.getConstant(0, DL, VT); @@ -3632,6 +3617,15 @@ SDValue DAGCombiner::visitSUBSAT(SDNode *N) { if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1})) return C; + // fold vector ops + if (VT.isVector()) { + // TODO SimplifyVBinOp + + // fold (sub_sat x, 0) -> x, vector edition + if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) + return N0; + } + // fold (sub_sat x, 0) -> x if (isNullConstant(N1)) return N0; @@ -3781,6 +3775,15 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { if (N0.isUndef() || N1.isUndef()) return DAG.getConstant(0, SDLoc(N), VT); + // fold (mul c1, c2) -> c1*c2 + if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1})) + return C; + + // canonicalize constant to RHS (vector doesn't have to splat) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); + bool N1IsConst = false; bool N1IsOpaqueConst = false; APInt ConstValue1; @@ -3802,15 +3805,6 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { } } - // fold (mul c1, c2) -> c1*c2 - if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1})) - return C; - - // canonicalize constant to RHS (vector doesn't have to splat) - if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && - !DAG.isConstantIntBuildVectorOrConstantInt(N1)) - return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); - // fold (mul x, 0) -> 0 if (N1IsConst && ConstValue1.isZero()) return N1; @@ -4140,17 +4134,17 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { EVT CCVT = getSetCCResultType(VT); SDLoc DL(N); + // fold (sdiv c1, c2) -> c1/c2 + if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1})) + return C; + // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; - // fold (sdiv c1, c2) -> c1/c2 - ConstantSDNode *N1C = isConstOrConstSplat(N1); - if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1})) - return C; - // fold (sdiv X, -1) -> 0-X + ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N1C && N1C->isAllOnes()) return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); @@ -4284,17 +4278,17 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { EVT CCVT = getSetCCResultType(VT); SDLoc DL(N); + // fold (udiv c1, c2) -> c1/c2 + if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1})) + return C; + // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; - // fold (udiv c1, c2) -> c1/c2 - ConstantSDNode *N1C = isConstOrConstSplat(N1); - if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1})) - return C; - // fold (udiv X, -1) -> select(X == -1, 1, 0) + ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N1C && N1C->isAllOnes()) return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), DAG.getConstant(1, DL, VT), @@ -4463,6 +4457,15 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); + // fold (mulhs c1, c2) + if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1})) + return C; + + // canonicalize constant to RHS. + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0); + if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; @@ -4474,15 +4477,6 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { return DAG.getConstant(0, DL, VT); } - // fold (mulhs c1, c2) - if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1})) - return C; - - // canonicalize constant to RHS. - if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && - !DAG.isConstantIntBuildVectorOrConstantInt(N1)) - return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0); - // fold (mulhs x, 0) -> 0 if (isNullConstant(N1)) return N1; @@ -4523,6 +4517,15 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); + // fold (mulhu c1, c2) + if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1})) + return C; + + // canonicalize constant to RHS. + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0); + if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; @@ -4534,15 +4537,6 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { return DAG.getConstant(0, DL, VT); } - // fold (mulhu c1, c2) - if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1})) - return C; - - // canonicalize constant to RHS. - if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && - !DAG.isConstantIntBuildVectorOrConstantInt(N1)) - return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0); - // fold (mulhu x, 0) -> 0 if (isNullConstant(N1)) return N1; @@ -4786,12 +4780,14 @@ SDValue DAGCombiner::visitMULO(SDNode *N) { } // Function to calculate whether the Min/Max pair of SDNodes (potentially -// swapped around) make a signed saturate pattern, clamping to between -2^(BW-1) -// and 2^(BW-1)-1. Returns the node being clamped and the bitwidth of the clamp -// in BW. Should work with both SMIN/SMAX nodes and setcc/select combo. The -// operands are the same as SimplifySelectCC. N0<N1 ? N2 : N3 +// swapped around) make a signed saturate pattern, clamping to between a signed +// saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW. +// Returns the node being clamped and the bitwidth of the clamp in BW. Should +// work with both SMIN/SMAX nodes and setcc/select combo. The operands are the +// same as SimplifySelectCC. N0<N1 ? N2 : N3. static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, - SDValue N3, ISD::CondCode CC, unsigned &BW) { + SDValue N3, ISD::CondCode CC, unsigned &BW, + bool &Unsigned) { auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC) { // The compare and select operand should be the same or the select operands @@ -4858,17 +4854,27 @@ static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, const APInt &MinC = MinCOp->getAPIntValue(); const APInt &MaxC = MaxCOp->getAPIntValue(); APInt MinCPlus1 = MinC + 1; - if (-MaxC != MinCPlus1 || !MinCPlus1.isPowerOf2()) - return SDValue(); - BW = MinCPlus1.exactLogBase2() + 1; - return N02; + if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) { + BW = MinCPlus1.exactLogBase2() + 1; + Unsigned = false; + return N02; + } + + if (MaxC == 0 && MinCPlus1.isPowerOf2()) { + BW = MinCPlus1.exactLogBase2(); + Unsigned = true; + return N02; + } + + return SDValue(); } static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG) { unsigned BW; - SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW); + bool Unsigned; + SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned); if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT) return SDValue(); EVT FPVT = Fp.getOperand(0).getValueType(); @@ -4876,13 +4882,14 @@ static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, if (FPVT.isVector()) NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT, FPVT.getVectorElementCount()); - if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat( - ISD::FP_TO_SINT_SAT, Fp.getOperand(0).getValueType(), NewVT)) + unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT; + if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT)) return SDValue(); SDLoc DL(Fp); - SDValue Sat = DAG.getNode(ISD::FP_TO_SINT_SAT, DL, NewVT, Fp.getOperand(0), + SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0), DAG.getValueType(NewVT.getScalarType())); - return DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0)); + return Unsigned ? DAG.getZExtOrTrunc(Sat, DL, N2->getValueType(0)) + : DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0)); } SDValue DAGCombiner::visitIMINMAX(SDNode *N) { @@ -4892,11 +4899,6 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { unsigned Opcode = N->getOpcode(); SDLoc DL(N); - // fold vector ops - if (VT.isVector()) - if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) - return FoldedVOp; - // fold operation with constant operands. if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1})) return C; @@ -4904,7 +4906,12 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { // canonicalize constant to RHS if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) - return DAG.getNode(N->getOpcode(), DL, VT, N1, N0); + return DAG.getNode(Opcode, DL, VT, N1, N0); + + // fold vector ops + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) + return FoldedVOp; // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX. // Only do this if the current op isn't legal and the flipped is. @@ -5777,6 +5784,15 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (N0 == N1) return N0; + // fold (and c1, c2) -> c1&c2 + if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1})) + return C; + + // canonicalize constant to RHS + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); + // fold vector ops if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) @@ -5824,22 +5840,13 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } } - // fold (and c1, c2) -> c1&c2 - ConstantSDNode *N1C = isConstOrConstSplat(N1); - if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1})) - return C; - - // canonicalize constant to RHS - if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && - !DAG.isConstantIntBuildVectorOrConstantInt(N1)) - return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); - // fold (and x, -1) -> x if (isAllOnesConstant(N1)) return N0; // if (and x, c) is known to be zero, return 0 unsigned BitWidth = VT.getScalarSizeInBits(); + ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth))) return DAG.getConstant(0, SDLoc(N), VT); @@ -6546,21 +6553,25 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (N0 == N1) return N0; + // fold (or c1, c2) -> c1|c2 + if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1})) + return C; + + // canonicalize constant to RHS + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); + // fold vector ops if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) return FoldedVOp; // fold (or x, 0) -> x, vector edition - if (ISD::isConstantSplatVectorAllZeros(N0.getNode())) - return N1; if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) return N0; // fold (or x, -1) -> -1, vector edition - if (ISD::isConstantSplatVectorAllOnes(N0.getNode())) - // do not return N0, because undef node may exist in N0 - return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType()); if (ISD::isConstantSplatVectorAllOnes(N1.getNode())) // do not return N1, because undef node may exist in N1 return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType()); @@ -6629,16 +6640,6 @@ SDValue DAGCombiner::visitOR(SDNode *N) { } } - // fold (or c1, c2) -> c1|c2 - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); - if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1})) - return C; - - // canonicalize constant to RHS - if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && - !DAG.isConstantIntBuildVectorOrConstantInt(N1)) - return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); - // fold (or x, 0) -> x if (isNullConstant(N1)) return N0; @@ -6651,6 +6652,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return NewSel; // fold (or x, c) -> c iff (x & ~c) == 0 + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) return N1; @@ -7941,18 +7943,6 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { EVT VT = N0.getValueType(); SDLoc DL(N); - // fold vector ops - if (VT.isVector()) { - if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) - return FoldedVOp; - - // fold (xor x, 0) -> x, vector edition - if (ISD::isConstantSplatVectorAllZeros(N0.getNode())) - return N1; - if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) - return N0; - } - // fold (xor undef, undef) -> 0. This is a common idiom (misuse). if (N0.isUndef() && N1.isUndef()) return DAG.getConstant(0, DL, VT); @@ -7969,9 +7959,19 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { // canonicalize constant to RHS if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && - !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::XOR, DL, VT, N1, N0); + // fold vector ops + if (VT.isVector()) { + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) + return FoldedVOp; + + // fold (xor x, 0) -> x, vector edition + if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) + return N0; + } + // fold (xor x, 0) -> x if (isNullConstant(N1)) return N0; @@ -8409,6 +8409,10 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { EVT ShiftVT = N1.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); + // fold (shl c1, c2) -> c1<<c2 + if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1})) + return C; + // fold vector ops if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) @@ -8434,12 +8438,6 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { } } - ConstantSDNode *N1C = isConstOrConstSplat(N1); - - // fold (shl c1, c2) -> c1<<c2 - if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1})) - return C; - if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -8558,6 +8556,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2 // TODO - support non-uniform vector shift amounts. + ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) && N0->getFlags().hasExact()) { if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { @@ -8758,6 +8757,10 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); + // fold (sra c1, c2) -> (sra c1, c2) + if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1})) + return C; + // Arithmetic shifting an all-sign-bit value is a no-op. // fold (sra 0, x) -> 0 // fold (sra -1, x) -> -1 @@ -8769,17 +8772,12 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) return FoldedVOp; - ConstantSDNode *N1C = isConstOrConstSplat(N1); - - // fold (sra c1, c2) -> (sra c1, c2) - if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1})) - return C; - if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports // sext_inreg. + ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); @@ -8962,21 +8960,20 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); + // fold (srl c1, c2) -> c1 >>u c2 + if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1})) + return C; + // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) return FoldedVOp; - ConstantSDNode *N1C = isConstOrConstSplat(N1); - - // fold (srl c1, c2) -> c1 >>u c2 - if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1})) - return C; - if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; // if (srl x, c) is known to be zero, return 0 + ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits))) return DAG.getConstant(0, SDLoc(N), VT); @@ -10043,6 +10040,8 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N); SDValue Mask = MST->getMask(); SDValue Chain = MST->getChain(); + SDValue Value = MST->getValue(); + SDValue Ptr = MST->getBasePtr(); SDLoc DL(N); // Zap masked stores with a zero mask. @@ -10054,12 +10053,50 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() && !MST->isCompressingStore() && !MST->isTruncatingStore()) return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(), - MST->getBasePtr(), MST->getMemOperand()); + MST->getBasePtr(), MST->getPointerInfo(), + MST->getOriginalAlign(), MachineMemOperand::MOStore, + MST->getAAInfo()); // Try transforming N to an indexed store. if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) return SDValue(N, 0); + if (MST->isTruncatingStore() && MST->isUnindexed() && + Value.getValueType().isInteger() && + (!isa<ConstantSDNode>(Value) || + !cast<ConstantSDNode>(Value)->isOpaque())) { + APInt TruncDemandedBits = + APInt::getLowBitsSet(Value.getScalarValueSizeInBits(), + MST->getMemoryVT().getScalarSizeInBits()); + + // See if we can simplify the operation with + // SimplifyDemandedBits, which only works if the value has a single use. + if (SimplifyDemandedBits(Value, TruncDemandedBits)) { + // Re-visit the store if anything changed and the store hasn't been merged + // with another node (N is deleted) SimplifyDemandedBits will add Value's + // node back to the worklist if necessary, but we also need to re-visit + // the Store node itself. + if (N->getOpcode() != ISD::DELETED_NODE) + AddToWorklist(N); + return SDValue(N, 0); + } + } + + // If this is a TRUNC followed by a masked store, fold this into a masked + // truncating store. We can do this even if this is already a masked + // truncstore. + if ((Value.getOpcode() == ISD::TRUNCATE) && Value.getNode()->hasOneUse() && + MST->isUnindexed() && + TLI.canCombineTruncStore(Value.getOperand(0).getValueType(), + MST->getMemoryVT(), LegalOperations)) { + auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(), + Value.getOperand(0).getValueType()); + return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, + MST->getOffset(), Mask, MST->getMemoryVT(), + MST->getMemOperand(), MST->getAddressingMode(), + /*IsTruncating=*/true); + } + return SDValue(); } @@ -10109,8 +10146,10 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { // FIXME: Can we do this for indexed, expanding, or extending loads? if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() && !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) { - SDValue NewLd = DAG.getLoad(N->getValueType(0), SDLoc(N), MLD->getChain(), - MLD->getBasePtr(), MLD->getMemOperand()); + SDValue NewLd = DAG.getLoad( + N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(), + MLD->getPointerInfo(), MLD->getOriginalAlign(), + MachineMemOperand::MOLoad, MLD->getAAInfo(), MLD->getRanges()); return CombineTo(N, NewLd, NewLd.getValue(1)); } @@ -13876,19 +13915,19 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags)) return R; - // fold vector ops - if (VT.isVector()) - if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) - return FoldedVOp; - // fold (fadd c1, c2) -> c1 + c2 - if (N0CFP && N1CFP) - return DAG.getNode(ISD::FADD, DL, VT, N0, N1); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1})) + return C; // canonicalize constant to RHS if (N0CFP && !N1CFP) return DAG.getNode(ISD::FADD, DL, VT, N1, N0); + // fold vector ops + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) + return FoldedVOp; + // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math) ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true); if (N1C && N1C->isZero()) @@ -14084,15 +14123,15 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags)) return R; + // fold (fsub c1, c2) -> c1-c2 + if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1})) + return C; + // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; - // fold (fsub c1, c2) -> c1-c2 - if (N0CFP && N1CFP) - return DAG.getNode(ISD::FSUB, DL, VT, N0, N1); - if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -14157,7 +14196,6 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { SDValue DAGCombiner::visitFMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true); ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true); EVT VT = N->getValueType(0); SDLoc DL(N); @@ -14168,22 +14206,20 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags)) return R; - // fold vector ops - if (VT.isVector()) { - // This just handles C1 * C2 for vectors. Other vector folds are below. - if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) - return FoldedVOp; - } - // fold (fmul c1, c2) -> c1*c2 - if (N0CFP && N1CFP) - return DAG.getNode(ISD::FMUL, DL, VT, N0, N1); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1})) + return C; // canonicalize constant to RHS if (DAG.isConstantFPBuildVectorOrConstantFP(N0) && !DAG.isConstantFPBuildVectorOrConstantFP(N1)) return DAG.getNode(ISD::FMUL, DL, VT, N1, N0); + // fold vector ops + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) + return FoldedVOp; + if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -14495,8 +14531,6 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { SDValue DAGCombiner::visitFDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); - ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; @@ -14506,15 +14540,15 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags)) return R; + // fold (fdiv c1, c2) -> c1/c2 + if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1})) + return C; + // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; - // fold (fdiv c1, c2) -> c1/c2 - if (N0CFP && N1CFP) - return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1); - if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -14523,7 +14557,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) { // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. - if (N1CFP) { + if (auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) { // Compute the reciprocal 1.0 / c2. const APFloat &N1APF = N1CFP->getValueAPF(); APFloat Recip(N1APF.getSemantics(), 1); // 1.0 @@ -14639,8 +14673,6 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { SDValue DAGCombiner::visitFREM(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); - ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); SDNodeFlags Flags = N->getFlags(); SelectionDAG::FlagInserter FlagsInserter(DAG, N); @@ -14649,9 +14681,9 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { return R; // fold (frem c1, c2) -> fmod(c1,c2) - if (N0CFP && N1CFP) - return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1); - + if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, SDLoc(N), VT, {N0, N1})) + return C; + if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -14712,12 +14744,12 @@ static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) { SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0); - bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1); EVT VT = N->getValueType(0); - if (N0CFP && N1CFP) // Constant fold - return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1); + // fold (fcopysign c1, c2) -> fcopysign(c1,c2) + if (SDValue C = + DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, SDLoc(N), VT, {N0, N1})) + return C; if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) { const APFloat &V = N1C->getValueAPF(); @@ -14835,14 +14867,6 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) { static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI) { - // This optimization is guarded by a function attribute because it may produce - // unexpected results. Ie, programs may be relying on the platform-specific - // undefined behavior when the float-to-int conversion overflows. - const Function &F = DAG.getMachineFunction().getFunction(); - Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow"); - if (StrictOverflow.getValueAsString().equals("false")) - return SDValue(); - // We only do this if the target has legal ftrunc. Otherwise, we'd likely be // replacing casts with a libcall. We also must be allowed to ignore -0.0 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer @@ -15216,31 +15240,26 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { return SDValue(); } -static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N, - APFloat (*Op)(const APFloat &, const APFloat &)) { +SDValue DAGCombiner::visitFMinMax(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); - const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); - const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); const SDNodeFlags Flags = N->getFlags(); unsigned Opc = N->getOpcode(); bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM; bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM; SelectionDAG::FlagInserter FlagsInserter(DAG, N); - if (N0CFP && N1CFP) { - const APFloat &C0 = N0CFP->getValueAPF(); - const APFloat &C1 = N1CFP->getValueAPF(); - return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT); - } + // Constant fold. + if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1})) + return C; // Canonicalize to constant on RHS. if (DAG.isConstantFPBuildVectorOrConstantFP(N0) && !DAG.isConstantFPBuildVectorOrConstantFP(N1)) return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); - if (N1CFP) { + if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) { const APFloat &AF = N1CFP->getValueAPF(); // minnum(X, nan) -> X @@ -15272,22 +15291,6 @@ static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N, return SDValue(); } -SDValue DAGCombiner::visitFMINNUM(SDNode *N) { - return visitFMinMax(DAG, N, minnum); -} - -SDValue DAGCombiner::visitFMAXNUM(SDNode *N) { - return visitFMinMax(DAG, N, maxnum); -} - -SDValue DAGCombiner::visitFMINIMUM(SDNode *N) { - return visitFMinMax(DAG, N, minimum); -} - -SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) { - return visitFMinMax(DAG, N, maximum); -} - SDValue DAGCombiner::visitFABS(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -18392,8 +18395,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) { if (ST->isUnindexed() && ST->isSimple() && ST1->isUnindexed() && ST1->isSimple()) { - if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value && - ST->getMemoryVT() == ST1->getMemoryVT() && + if (OptLevel != CodeGenOpt::None && ST1->getBasePtr() == Ptr && + ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() && ST->getAddressSpace() == ST1->getAddressSpace()) { // If this is a store followed by a store with the same value to the // same location, then the store is dead/noop. @@ -20727,6 +20730,156 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { return NewLd; } +/// Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)), +/// try to produce VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?), +/// EXTRACT_SUBVECTOR(Op?, ?), +/// Mask')) +/// iff it is legal and profitable to do so. Notably, the trimmed mask +/// (containing only the elements that are extracted) +/// must reference at most two subvectors. +static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N, + SelectionDAG &DAG, + const TargetLowering &TLI, + bool LegalOperations) { + assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && + "Must only be called on EXTRACT_SUBVECTOR's"); + + SDValue N0 = N->getOperand(0); + + // Only deal with non-scalable vectors. + EVT NarrowVT = N->getValueType(0); + EVT WideVT = N0.getValueType(); + if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector()) + return SDValue(); + + // The operand must be a shufflevector. + auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(N0); + if (!WideShuffleVector) + return SDValue(); + + // The old shuffleneeds to go away. + if (!WideShuffleVector->hasOneUse()) + return SDValue(); + + // And the narrow shufflevector that we'll form must be legal. + if (LegalOperations && + !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, NarrowVT)) + return SDValue(); + + uint64_t FirstExtractedEltIdx = N->getConstantOperandVal(1); + int NumEltsExtracted = NarrowVT.getVectorNumElements(); + assert((FirstExtractedEltIdx % NumEltsExtracted) == 0 && + "Extract index is not a multiple of the output vector length."); + + int WideNumElts = WideVT.getVectorNumElements(); + + SmallVector<int, 16> NewMask; + NewMask.reserve(NumEltsExtracted); + SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2> + DemandedSubvectors; + + // Try to decode the wide mask into narrow mask from at most two subvectors. + for (int M : WideShuffleVector->getMask().slice(FirstExtractedEltIdx, + NumEltsExtracted)) { + assert((M >= -1) && (M < (2 * WideNumElts)) && + "Out-of-bounds shuffle mask?"); + + if (M < 0) { + // Does not depend on operands, does not require adjustment. + NewMask.emplace_back(M); + continue; + } + + // From which operand of the shuffle does this shuffle mask element pick? + int WideShufOpIdx = M / WideNumElts; + // Which element of that operand is picked? + int OpEltIdx = M % WideNumElts; + + assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M && + "Shuffle mask vector decomposition failure."); + + // And which NumEltsExtracted-sized subvector of that operand is that? + int OpSubvecIdx = OpEltIdx / NumEltsExtracted; + // And which element within that subvector of that operand is that? + int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted; + + assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx && + "Shuffle mask subvector decomposition failure."); + + assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted + + WideShufOpIdx * WideNumElts) == M && + "Shuffle mask full decomposition failure."); + + SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx); + + if (Op.isUndef()) { + // Picking from an undef operand. Let's adjust mask instead. + NewMask.emplace_back(-1); + continue; + } + + // Profitability check: only deal with extractions from the first subvector. + if (OpSubvecIdx != 0) + return SDValue(); + + const std::pair<SDValue, int> DemandedSubvector = + std::make_pair(Op, OpSubvecIdx); + + if (DemandedSubvectors.insert(DemandedSubvector)) { + if (DemandedSubvectors.size() > 2) + return SDValue(); // We can't handle more than two subvectors. + // How many elements into the WideVT does this subvector start? + int Index = NumEltsExtracted * OpSubvecIdx; + // Bail out if the extraction isn't going to be cheap. + if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index)) + return SDValue(); + } + + // Ok, but from which operand of the new shuffle will this element pick? + int NewOpIdx = + getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector); + assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index."); + + int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted; + NewMask.emplace_back(AdjM); + } + assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask."); + assert(DemandedSubvectors.size() <= 2 && + "Should have ended up demanding at most two subvectors."); + + // Did we discover that the shuffle does not actually depend on operands? + if (DemandedSubvectors.empty()) + return DAG.getUNDEF(NarrowVT); + + // We still perform the exact same EXTRACT_SUBVECTOR, just on different + // operand[s]/index[es], so there is no point in checking for it's legality. + + // Do not turn a legal shuffle into an illegal one. + if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) && + !TLI.isShuffleMaskLegal(NewMask, NarrowVT)) + return SDValue(); + + SDLoc DL(N); + + SmallVector<SDValue, 2> NewOps; + for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/> + &DemandedSubvector : DemandedSubvectors) { + // How many elements into the WideVT does this subvector start? + int Index = NumEltsExtracted * DemandedSubvector.second; + SDValue IndexC = DAG.getVectorIdxConstant(Index, DL); + NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT, + DemandedSubvector.first, IndexC)); + } + assert((NewOps.size() == 1 || NewOps.size() == 2) && + "Should end up with either one or two ops"); + + // If we ended up with only one operand, pad with an undef. + if (NewOps.size() == 1) + NewOps.emplace_back(DAG.getUNDEF(NarrowVT)); + + return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask); +} + SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { EVT NVT = N->getValueType(0); SDValue V = N->getOperand(0); @@ -20840,6 +20993,10 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { } } + if (SDValue V = + foldExtractSubvectorFromShuffleVector(N, DAG, TLI, LegalOperations)) + return V; + V = peekThroughBitcasts(V); // If the input is a build vector. Try to make a smaller build vector. @@ -22424,15 +22581,9 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); - SDValue Ops[] = {LHS, RHS}; unsigned Opcode = N->getOpcode(); SDNodeFlags Flags = N->getFlags(); - // See if we can constant fold the vector operation. - if (SDValue Fold = DAG.FoldConstantArithmetic(Opcode, SDLoc(LHS), - LHS.getValueType(), Ops)) - return Fold; - // Move unary shuffles with identical masks after a vector binop: // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask)) // --> shuffle (VBinOp A, B), Undef, Mask |