diff options
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 587 |
1 files changed, 440 insertions, 147 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 9ab1324533f1..24ab65171a17 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -52,6 +52,10 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const { const Function &F = DAG.getMachineFunction().getFunction(); + // First, check if tail calls have been disabled in this function. + if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true") + return false; + // Conservatively require the attributes of the call to match those of // the return. Ignore NoAlias and NonNull because they don't affect the // call sequence. @@ -122,7 +126,11 @@ std::pair<SDValue, SDValue> TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef<SDValue> Ops, MakeLibCallOptions CallOptions, - const SDLoc &dl) const { + const SDLoc &dl, + SDValue InChain) const { + if (!InChain) + InChain = DAG.getEntryNode(); + TargetLowering::ArgListTy Args; Args.reserve(Ops.size()); @@ -158,7 +166,7 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, } CLI.setDebugLoc(dl) - .setChain(DAG.getEntryNode()) + .setChain(InChain) .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setNoReturn(CallOptions.DoesNotReturn) .setDiscardResult(!CallOptions.IsReturnValueUsed) @@ -277,6 +285,22 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, ISD::CondCode &CCCode, const SDLoc &dl, const SDValue OldLHS, const SDValue OldRHS) const { + SDValue Chain; + return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS, + OldRHS, Chain); +} + +void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, + SDValue &NewLHS, SDValue &NewRHS, + ISD::CondCode &CCCode, + const SDLoc &dl, const SDValue OldLHS, + const SDValue OldRHS, + SDValue &Chain, + bool IsSignaling) const { + // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc + // not supporting it. We can update this code when libgcc provides such + // functions. + assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128) && "Unsupported setcc type!"); @@ -320,25 +344,18 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, (VT == MVT::f64) ? RTLIB::OGT_F64 : (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128; break; + case ISD::SETO: + ShouldInvertCC = true; + LLVM_FALLTHROUGH; case ISD::SETUO: LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : (VT == MVT::f64) ? RTLIB::UO_F64 : (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128; break; - case ISD::SETO: - LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : - (VT == MVT::f64) ? RTLIB::O_F64 : - (VT == MVT::f128) ? RTLIB::O_F128 : RTLIB::O_PPCF128; - break; case ISD::SETONE: - // SETONE = SETOLT | SETOGT - LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : - (VT == MVT::f64) ? RTLIB::OLT_F64 : - (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128; - LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : - (VT == MVT::f64) ? RTLIB::OGT_F64 : - (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128; - break; + // SETONE = O && UNE + ShouldInvertCC = true; + LLVM_FALLTHROUGH; case ISD::SETUEQ: LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : (VT == MVT::f64) ? RTLIB::UO_F64 : @@ -382,24 +399,33 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, EVT OpsVT[2] = { OldLHS.getValueType(), OldRHS.getValueType() }; CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true); - NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl).first; + auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain); + NewLHS = Call.first; NewRHS = DAG.getConstant(0, dl, RetVT); CCCode = getCmpLibcallCC(LC1); - if (ShouldInvertCC) - CCCode = getSetCCInverse(CCCode, /*isInteger=*/true); - - if (LC2 != RTLIB::UNKNOWN_LIBCALL) { - SDValue Tmp = DAG.getNode( - ISD::SETCC, dl, - getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT), - NewLHS, NewRHS, DAG.getCondCode(CCCode)); - NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl).first; - NewLHS = DAG.getNode( - ISD::SETCC, dl, - getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT), - NewLHS, NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2))); - NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS); + if (ShouldInvertCC) { + assert(RetVT.isInteger()); + CCCode = getSetCCInverse(CCCode, RetVT); + } + + if (LC2 == RTLIB::UNKNOWN_LIBCALL) { + // Update Chain. + Chain = Call.second; + } else { + EVT SetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT); + SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode); + auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain); + CCCode = getCmpLibcallCC(LC2); + if (ShouldInvertCC) + CCCode = getSetCCInverse(CCCode, RetVT); + NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode); + if (Chain) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second, + Call2.second); + NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl, + Tmp.getValueType(), Tmp, NewLHS); NewRHS = SDValue(); } } @@ -693,6 +719,27 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( return Op.getOperand(1); break; } + case ISD::SETCC: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); + // If (1) we only need the sign-bit, (2) the setcc operands are the same + // width as the setcc result, and (3) the result of a setcc conforms to 0 or + // -1, we may be able to bypass the setcc. + if (DemandedBits.isSignMask() && + Op0.getScalarValueSizeInBits() == DemandedBits.getBitWidth() && + getBooleanContents(Op0.getValueType()) == + BooleanContent::ZeroOrNegativeOneBooleanContent) { + // If we're testing X < 0, then this compare isn't needed - just use X! + // FIXME: We're limiting to integer types here, but this should also work + // if we don't care about FP signed-zero. The use of SETLT with FP means + // that we don't care about NaNs. + if (CC == ISD::SETLT && Op1.getValueType().isInteger() && + (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode()))) + return Op0; + } + break; + } case ISD::SIGN_EXTEND_INREG: { // If none of the extended bits are demanded, eliminate the sextinreg. EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); @@ -1251,7 +1298,7 @@ bool TargetLowering::SimplifyDemandedBits( // -1, we may be able to bypass the setcc. if (DemandedBits.isSignMask() && Op0.getScalarValueSizeInBits() == BitWidth && - getBooleanContents(VT) == + getBooleanContents(Op0.getValueType()) == BooleanContent::ZeroOrNegativeOneBooleanContent) { // If we're testing X < 0, then this compare isn't needed - just use X! // FIXME: We're limiting to integer types here, but this should also work @@ -1538,6 +1585,16 @@ bool TargetLowering::SimplifyDemandedBits( Known.Zero = Known2.Zero.reverseBits(); break; } + case ISD::BSWAP: { + SDValue Src = Op.getOperand(0); + APInt DemandedSrcBits = DemandedBits.byteSwap(); + if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO, + Depth + 1)) + return true; + Known.One = Known2.One.byteSwap(); + Known.Zero = Known2.Zero.byteSwap(); + break; + } case ISD::SIGN_EXTEND_INREG: { SDValue Op0 = Op.getOperand(0); EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); @@ -1753,15 +1810,11 @@ bool TargetLowering::SimplifyDemandedBits( // undesirable. break; - auto *ShAmt = dyn_cast<ConstantSDNode>(Src.getOperand(1)); - if (!ShAmt || ShAmt->getAPIntValue().uge(BitWidth)) + SDValue ShAmt = Src.getOperand(1); + auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt); + if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth)) break; - - SDValue Shift = Src.getOperand(1); - uint64_t ShVal = ShAmt->getZExtValue(); - - if (TLO.LegalTypes()) - Shift = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL)); + uint64_t ShVal = ShAmtC->getZExtValue(); APInt HighBits = APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth); @@ -1771,10 +1824,12 @@ bool TargetLowering::SimplifyDemandedBits( if (!(HighBits & DemandedBits)) { // None of the shifted in bits are needed. Add a truncate of the // shift input, then shift it. + if (TLO.LegalTypes()) + ShAmt = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL)); SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0)); return TLO.CombineTo( - Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, Shift)); + Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, ShAmt)); } break; } @@ -1818,6 +1873,17 @@ bool TargetLowering::SimplifyDemandedBits( Depth + 1)) return true; + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!DemandedSrcBits.isAllOnesValue() || + !DemandedSrcElts.isAllOnesValue()) { + if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits( + Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) { + SDValue NewOp = + TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx); + return TLO.CombineTo(Op, NewOp); + } + } + Known = Known2; if (BitWidth > EltBitWidth) Known = Known.zext(BitWidth, false /* => any extend */); @@ -2808,7 +2874,8 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, // Note that where Y is variable and is known to have at most one bit set // (for example, if it is Z & 1) we cannot do this; the expressions are not // equivalent when Y == 0. - Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); + assert(OpVT.isInteger()); + Cond = ISD::getSetCCInverse(Cond, OpVT); if (DCI.isBeforeLegalizeOps() || isCondCodeLegal(Cond, N0.getSimpleValueType())) return DAG.getSetCC(DL, VT, N0, Zero, Cond); @@ -2897,7 +2964,8 @@ SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck( // What if we invert constants? (and the target predicate) I1.negate(); I01.negate(); - NewCond = getSetCCInverse(NewCond, /*isInteger=*/true); + assert(XVT.isInteger()); + NewCond = getSetCCInverse(NewCond, XVT); if (!checkConstants()) return SDValue(); // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256 @@ -3052,6 +3120,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, DAGCombinerInfo &DCI, const SDLoc &dl) const { SelectionDAG &DAG = DCI.DAG; + const DataLayout &Layout = DAG.getDataLayout(); EVT OpVT = N0.getValueType(); // Constant fold or commute setcc. @@ -3132,7 +3201,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0) SDValue Zero = DAG.getConstant(0, dl, CTVT); SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT); - ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, true); + assert(CTVT.isInteger()); + ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT); SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne); SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add); SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond); @@ -3223,7 +3293,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode InvCond = ISD::getSetCCInverse( cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(), - TopSetCC.getOperand(0).getValueType().isInteger()); + TopSetCC.getOperand(0).getValueType()); return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0), TopSetCC.getOperand(1), InvCond); @@ -3256,7 +3326,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, APInt newMask = APInt::getLowBitsSet(maskWidth, width); for (unsigned offset=0; offset<origWidth/width; offset++) { if (Mask.isSubsetOf(newMask)) { - if (DAG.getDataLayout().isLittleEndian()) + if (Layout.isLittleEndian()) bestOffset = (uint64_t)offset * (width/8); else bestOffset = (origWidth/width - offset - 1) * (width/8); @@ -3272,11 +3342,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth); if (newVT.isRound() && shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) { - EVT PtrType = Lod->getOperand(1).getValueType(); SDValue Ptr = Lod->getBasePtr(); if (bestOffset != 0) - Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(), - DAG.getConstant(bestOffset, dl, PtrType)); + Ptr = DAG.getMemBasePlusOffset(Ptr, bestOffset, dl); unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset); SDValue NewLoad = DAG.getLoad( newVT, dl, Lod->getChain(), Ptr, @@ -3332,8 +3400,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (DCI.isBeforeLegalizeOps() || (isOperationLegal(ISD::SETCC, newVT) && isCondCodeLegal(Cond, newVT.getSimpleVT()))) { - EVT NewSetCCVT = - getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), newVT); + EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT); SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT); SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0), @@ -3379,14 +3446,16 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC if (N0.getOpcode() == ISD::SETCC && - isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) { + isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) && + (N0.getValueType() == MVT::i1 || + getBooleanContents(N0.getOperand(0).getValueType()) == + ZeroOrOneBooleanContent)) { bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne()); if (TrueWhenTrue) return DAG.getNode(ISD::TRUNCATE, dl, VT, N0); // Invert the condition. ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); - CC = ISD::getSetCCInverse(CC, - N0.getOperand(0).getValueType().isInteger()); + CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType()); if (DCI.isBeforeLegalizeOps() || isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType())) return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); @@ -3420,10 +3489,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return DAG.getSetCC(dl, VT, Val, N1, Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); } - } else if (N1C->isOne() && - (VT == MVT::i1 || - getBooleanContents(N0->getValueType(0)) == - ZeroOrOneBooleanContent)) { + } else if (N1C->isOne()) { SDValue Op0 = N0; if (Op0.getOpcode() == ISD::TRUNCATE) Op0 = Op0.getOperand(0); @@ -3431,10 +3497,18 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if ((Op0.getOpcode() == ISD::XOR) && Op0.getOperand(0).getOpcode() == ISD::SETCC && Op0.getOperand(1).getOpcode() == ISD::SETCC) { - // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc) - Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ; - return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1), - Cond); + SDValue XorLHS = Op0.getOperand(0); + SDValue XorRHS = Op0.getOperand(1); + // Ensure that the input setccs return an i1 type or 0/1 value. + if (Op0.getValueType() == MVT::i1 || + (getBooleanContents(XorLHS.getOperand(0).getValueType()) == + ZeroOrOneBooleanContent && + getBooleanContents(XorRHS.getOperand(0).getValueType()) == + ZeroOrOneBooleanContent)) { + // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc) + Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ; + return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond); + } } if (Op0.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op0.getOperand(1)) && @@ -3611,14 +3685,14 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) && N0.getOpcode() == ISD::AND) { - auto &DL = DAG.getDataLayout(); if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { - EVT ShiftTy = getShiftAmountTy(ShValTy, DL, !DCI.isBeforeLegalize()); + EVT ShiftTy = + getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize()); if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 // Perform the xform if the AND RHS is a single bit. unsigned ShCt = AndRHS->getAPIntValue().logBase2(); if (AndRHS->getAPIntValue().isPowerOf2() && - ShCt <= TLI.getShiftAmountThreshold(ShValTy)) { + !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) { return DAG.getNode(ISD::TRUNCATE, dl, VT, DAG.getNode(ISD::SRL, dl, ShValTy, N0, DAG.getConstant(ShCt, dl, ShiftTy))); @@ -3628,7 +3702,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Perform the xform if C1 is a single bit. unsigned ShCt = C1.logBase2(); if (C1.isPowerOf2() && - ShCt <= TLI.getShiftAmountThreshold(ShValTy)) { + !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) { return DAG.getNode(ISD::TRUNCATE, dl, VT, DAG.getNode(ISD::SRL, dl, ShValTy, N0, DAG.getConstant(ShCt, dl, ShiftTy))); @@ -3639,6 +3713,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (C1.getMinSignedBits() <= 64 && !isLegalICmpImmediate(C1.getSExtValue())) { + EVT ShiftTy = getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize()); // (X & -256) == 256 -> (X >> 8) == 1 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && N0.getOpcode() == ISD::AND && N0.hasOneUse()) { @@ -3646,15 +3721,13 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, const APInt &AndRHSC = AndRHS->getAPIntValue(); if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) { unsigned ShiftBits = AndRHSC.countTrailingZeros(); - auto &DL = DAG.getDataLayout(); - EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL, - !DCI.isBeforeLegalize()); - EVT CmpTy = N0.getValueType(); - SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0), - DAG.getConstant(ShiftBits, dl, - ShiftTy)); - SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, CmpTy); - return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond); + if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) { + SDValue Shift = + DAG.getNode(ISD::SRL, dl, ShValTy, N0.getOperand(0), + DAG.getConstant(ShiftBits, dl, ShiftTy)); + SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy); + return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond); + } } } } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE || @@ -3676,14 +3749,11 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } NewC.lshrInPlace(ShiftBits); if (ShiftBits && NewC.getMinSignedBits() <= 64 && - isLegalICmpImmediate(NewC.getSExtValue())) { - auto &DL = DAG.getDataLayout(); - EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL, - !DCI.isBeforeLegalize()); - EVT CmpTy = N0.getValueType(); - SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0, + isLegalICmpImmediate(NewC.getSExtValue()) && + !TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) { + SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0, DAG.getConstant(ShiftBits, dl, ShiftTy)); - SDValue CmpRHS = DAG.getConstant(NewC, dl, CmpTy); + SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy); return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond); } } @@ -4480,6 +4550,12 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, TargetLowering::ConstraintType CType = TLI.getConstraintType(OpInfo.Codes[i]); + // Indirect 'other' or 'immediate' constraints are not allowed. + if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory || + CType == TargetLowering::C_Register || + CType == TargetLowering::C_RegisterClass)) + continue; + // If this is an 'other' or 'immediate' constraint, see if the operand is // valid for it. For example, on X86 we might have an 'rI' constraint. If // the operand is an integer in the range [0..31] we want to use I (saving a @@ -4905,7 +4981,7 @@ SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode, ISD::CondCode Cond, DAGCombinerInfo &DCI, const SDLoc &DL) const { - SmallVector<SDNode *, 2> Built; + SmallVector<SDNode *, 5> Built; if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond, DCI, DL, Built)) { for (SDNode *N : Built) @@ -4940,26 +5016,44 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, if (!isOperationLegalOrCustom(ISD::MUL, VT)) return SDValue(); - // TODO: Could support comparing with non-zero too. - ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode); - if (!CompTarget || !CompTarget->isNullValue()) - return SDValue(); - - bool HadOneDivisor = false; - bool AllDivisorsAreOnes = true; + bool ComparingWithAllZeros = true; + bool AllComparisonsWithNonZerosAreTautological = true; + bool HadTautologicalLanes = false; + bool AllLanesAreTautological = true; bool HadEvenDivisor = false; bool AllDivisorsArePowerOfTwo = true; - SmallVector<SDValue, 16> PAmts, KAmts, QAmts; + bool HadTautologicalInvertedLanes = false; + SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts; - auto BuildUREMPattern = [&](ConstantSDNode *C) { + auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) { // Division by 0 is UB. Leave it to be constant-folded elsewhere. - if (C->isNullValue()) + if (CDiv->isNullValue()) return false; - const APInt &D = C->getAPIntValue(); - // If all divisors are ones, we will prefer to avoid the fold. - HadOneDivisor |= D.isOneValue(); - AllDivisorsAreOnes &= D.isOneValue(); + const APInt &D = CDiv->getAPIntValue(); + const APInt &Cmp = CCmp->getAPIntValue(); + + ComparingWithAllZeros &= Cmp.isNullValue(); + + // x u% C1` is *always* less than C1. So given `x u% C1 == C2`, + // if C2 is not less than C1, the comparison is always false. + // But we will only be able to produce the comparison that will give the + // opposive tautological answer. So this lane would need to be fixed up. + bool TautologicalInvertedLane = D.ule(Cmp); + HadTautologicalInvertedLanes |= TautologicalInvertedLane; + + // If all lanes are tautological (either all divisors are ones, or divisor + // is not greater than the constant we are comparing with), + // we will prefer to avoid the fold. + bool TautologicalLane = D.isOneValue() || TautologicalInvertedLane; + HadTautologicalLanes |= TautologicalLane; + AllLanesAreTautological &= TautologicalLane; + + // If we are comparing with non-zero, we need'll need to subtract said + // comparison value from the LHS. But there is no point in doing that if + // every lane where we are comparing with non-zero is tautological.. + if (!Cmp.isNullValue()) + AllComparisonsWithNonZerosAreTautological &= TautologicalLane; // Decompose D into D0 * 2^K unsigned K = D.countTrailingZeros(); @@ -4981,19 +5075,27 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check."); - // Q = floor((2^W - 1) / D) - APInt Q = APInt::getAllOnesValue(W).udiv(D); + // Q = floor((2^W - 1) u/ D) + // R = ((2^W - 1) u% D) + APInt Q, R; + APInt::udivrem(APInt::getAllOnesValue(W), D, Q, R); + + // If we are comparing with zero, then that comparison constant is okay, + // else it may need to be one less than that. + if (Cmp.ugt(R)) + Q -= 1; assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) && "We are expecting that K is always less than all-ones for ShSVT"); - // If the divisor is 1 the result can be constant-folded. - if (D.isOneValue()) { + // If the lane is tautological the result can be constant-folded. + if (TautologicalLane) { // Set P and K amount to a bogus values so we can try to splat them. P = 0; K = -1; - assert(Q.isAllOnesValue() && - "Expecting all-ones comparison for one divisor"); + // And ensure that comparison constant is tautological, + // it will always compare true/false. + Q = -1; } PAmts.push_back(DAG.getConstant(P, DL, SVT)); @@ -5007,11 +5109,11 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue D = REMNode.getOperand(1); // Collect the values from each element. - if (!ISD::matchUnaryPredicate(D, BuildUREMPattern)) + if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern)) return SDValue(); - // If this is a urem by a one, avoid the fold since it can be constant-folded. - if (AllDivisorsAreOnes) + // If all lanes are tautological, the result can be constant-folded. + if (AllLanesAreTautological) return SDValue(); // If this is a urem by a powers-of-two, avoid the fold since it can be @@ -5021,7 +5123,7 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue PVal, KVal, QVal; if (VT.isVector()) { - if (HadOneDivisor) { + if (HadTautologicalLanes) { // Try to turn PAmts into a splat, since we don't care about the values // that are currently '0'. If we can't, just keep '0'`s. turnVectorIntoSplatVector(PAmts, isNullConstant); @@ -5040,6 +5142,14 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, QVal = QAmts[0]; } + if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) { + if (!isOperationLegalOrCustom(ISD::SUB, VT)) + return SDValue(); // FIXME: Could/should use `ISD::ADD`? + assert(CompTargetNode.getValueType() == N.getValueType() && + "Expecting that the types on LHS and RHS of comparisons match."); + N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode); + } + // (mul N, P) SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal); Created.push_back(Op0.getNode()); @@ -5058,8 +5168,41 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, } // UREM: (setule/setugt (rotr (mul N, P), K), Q) - return DAG.getSetCC(DL, SETCCVT, Op0, QVal, - ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT)); + SDValue NewCC = + DAG.getSetCC(DL, SETCCVT, Op0, QVal, + ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT)); + if (!HadTautologicalInvertedLanes) + return NewCC; + + // If any lanes previously compared always-false, the NewCC will give + // always-true result for them, so we need to fixup those lanes. + // Or the other way around for inequality predicate. + assert(VT.isVector() && "Can/should only get here for vectors."); + Created.push_back(NewCC.getNode()); + + // x u% C1` is *always* less than C1. So given `x u% C1 == C2`, + // if C2 is not less than C1, the comparison is always false. + // But we have produced the comparison that will give the + // opposive tautological answer. So these lanes would need to be fixed up. + SDValue TautologicalInvertedChannels = + DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE); + Created.push_back(TautologicalInvertedChannels.getNode()); + + if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) { + // If we have a vector select, let's replace the comparison results in the + // affected lanes with the correct tautological result. + SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true, + DL, SETCCVT, SETCCVT); + return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels, + Replacement, NewCC); + } + + // Else, we can just invert the comparison result in the appropriate lanes. + if (isOperationLegalOrCustom(ISD::XOR, SETCCVT)) + return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC, + TautologicalInvertedChannels); + + return SDValue(); // Don't know how to lower. } /// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE @@ -5544,7 +5687,14 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, ForCodeSize, Depth + 1); char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, ForCodeSize, Depth + 1); - if (V0 >= V1) { + // TODO: This is a hack. It is possible that costs have changed between now + // and the initial calls to isNegatibleForFree(). That is because we + // are rewriting the expression, and that may change the number of + // uses (and therefore the cost) of values. If the negation costs are + // equal, only negate this value if it is a constant. Otherwise, try + // operand 1. A better fix would eliminate uses as a cost factor or + // track the change in uses as we rewrite the expression. + if (V0 > V1 || (V0 == V1 && isa<ConstantFPSDNode>(Op.getOperand(0)))) { // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z)) SDValue Neg0 = getNegatedExpression( Op.getOperand(0), DAG, LegalOperations, ForCodeSize, Depth + 1); @@ -5954,6 +6104,8 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, EVT DstVT = Node->getValueType(0); EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT); + EVT DstSetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT); // Only expand vector types if we have the appropriate vector bit operations. unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT : @@ -5980,7 +6132,15 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, } SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT); - SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT); + SDValue Sel; + + if (Node->isStrictFPOpcode()) { + Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT, + Node->getOperand(0), /*IsSignaling*/ true); + Chain = Sel.getValue(1); + } else { + Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT); + } bool Strict = Node->isStrictFPOpcode() || shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false); @@ -5989,28 +6149,29 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, // Expand based on maximum range of FP_TO_SINT, if the value exceeds the // signmask then offset (the result of which should be fully representable). // Sel = Src < 0x8000000000000000 - // Val = select Sel, Src, Src - 0x8000000000000000 - // Ofs = select Sel, 0, 0x8000000000000000 - // Result = fp_to_sint(Val) ^ Ofs + // FltOfs = select Sel, 0, 0x8000000000000000 + // IntOfs = select Sel, 0, 0x8000000000000000 + // Result = fp_to_sint(Src - FltOfs) ^ IntOfs // TODO: Should any fast-math-flags be set for the FSUB? - SDValue SrcBiased; - if (Node->isStrictFPOpcode()) - SrcBiased = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other }, - { Node->getOperand(0), Src, Cst }); - else - SrcBiased = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst); - SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src, SrcBiased); - SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), - DAG.getConstant(SignMask, dl, DstVT)); + SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel, + DAG.getConstantFP(0.0, dl, SrcVT), Cst); + Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT); + SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel, + DAG.getConstant(0, dl, DstVT), + DAG.getConstant(SignMask, dl, DstVT)); SDValue SInt; if (Node->isStrictFPOpcode()) { + SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other }, + { Chain, Src, FltOfs }); SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other }, - { SrcBiased.getValue(1), Val }); + { Val.getValue(1), Val }); Chain = SInt.getValue(1); - } else + } else { + SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs); SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val); - Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, Ofs); + } + Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs); } else { // Expand based on maximum range of FP_TO_SINT: // True = fp_to_sint(Src) @@ -6023,14 +6184,17 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst)); False = DAG.getNode(ISD::XOR, dl, DstVT, False, DAG.getConstant(SignMask, dl, DstVT)); + Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT); Result = DAG.getSelect(dl, DstVT, Sel, True, False); } return true; } bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result, + SDValue &Chain, SelectionDAG &DAG) const { - SDValue Src = Node->getOperand(0); + unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0; + SDValue Src = Node->getOperand(OpNo); EVT SrcVT = Src.getValueType(); EVT DstVT = Node->getValueType(0); @@ -6052,17 +6216,7 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result, return false; // For unsigned conversions, convert them to signed conversions using the - // algorithm from the x86_64 __floatundidf in compiler_rt. - SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src); - - SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT); - SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst); - SDValue AndConst = DAG.getConstant(1, dl, SrcVT); - SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst); - SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr); - - SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or); - SDValue Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt); + // algorithm from the x86_64 __floatundisf in compiler_rt. // TODO: This really should be implemented using a branch rather than a // select. We happen to get lucky and machinesink does the right @@ -6073,6 +6227,37 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result, SDValue SignBitTest = DAG.getSetCC( dl, SetCCVT, Src, DAG.getConstant(0, dl, SrcVT), ISD::SETLT); + + SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT); + SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst); + SDValue AndConst = DAG.getConstant(1, dl, SrcVT); + SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst); + SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr); + + SDValue Slow, Fast; + if (Node->isStrictFPOpcode()) { + // In strict mode, we must avoid spurious exceptions, and therefore + // must make sure to only emit a single STRICT_SINT_TO_FP. + SDValue InCvt = DAG.getSelect(dl, SrcVT, SignBitTest, Or, Src); + Fast = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, { DstVT, MVT::Other }, + { Node->getOperand(0), InCvt }); + Slow = DAG.getNode(ISD::STRICT_FADD, dl, { DstVT, MVT::Other }, + { Fast.getValue(1), Fast, Fast }); + Chain = Slow.getValue(1); + // The STRICT_SINT_TO_FP inherits the exception mode from the + // incoming STRICT_UINT_TO_FP node; the STRICT_FADD node can + // never raise any exception. + SDNodeFlags Flags; + Flags.setNoFPExcept(Node->getFlags().hasNoFPExcept()); + Fast->setFlags(Flags); + Flags.setNoFPExcept(true); + Slow->setFlags(Flags); + } else { + SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or); + Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt); + Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src); + } + Result = DAG.getSelect(dl, DstVT, SignBitTest, Slow, Fast); return true; } @@ -6105,8 +6290,18 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result, SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84); SDValue LoFlt = DAG.getBitcast(DstVT, LoOr); SDValue HiFlt = DAG.getBitcast(DstVT, HiOr); - SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52); - Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub); + if (Node->isStrictFPOpcode()) { + SDValue HiSub = + DAG.getNode(ISD::STRICT_FSUB, dl, {DstVT, MVT::Other}, + {Node->getOperand(0), HiFlt, TwoP84PlusTwoP52}); + Result = DAG.getNode(ISD::STRICT_FADD, dl, {DstVT, MVT::Other}, + {HiSub.getValue(1), LoFlt, HiSub}); + Chain = Result.getValue(1); + } else { + SDValue HiSub = + DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52); + Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub); + } return true; } @@ -6150,6 +6345,26 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node, } } + // If none of the above worked, but there are no NaNs, then expand to + // a compare/select sequence. This is required for correctness since + // InstCombine might have canonicalized a fcmp+select sequence to a + // FMINNUM/FMAXNUM node. If we were to fall through to the default + // expansion to libcall, we might introduce a link-time dependency + // on libm into a file that originally did not have one. + if (Node->getFlags().hasNoNaNs()) { + ISD::CondCode Pred = + Node->getOpcode() == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT; + SDValue Op1 = Node->getOperand(0); + SDValue Op2 = Node->getOperand(1); + SDValue SelCC = DAG.getSelectCC(dl, Op1, Op2, Op1, Op2, Pred); + // Copy FMF flags, but always set the no-signed-zeros flag + // as this is implied by the FMINNUM/FMAXNUM semantics. + SDNodeFlags Flags = Node->getFlags(); + Flags.setNoSignedZeros(true); + SelCC->setFlags(Flags); + return SelCC; + } + return SDValue(); } @@ -6342,8 +6557,9 @@ bool TargetLowering::expandABS(SDNode *N, SDValue &Result, return true; } -SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, - SelectionDAG &DAG) const { +std::pair<SDValue, SDValue> +TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, + SelectionDAG &DAG) const { SDLoc SL(LD); SDValue Chain = LD->getChain(); SDValue BasePTR = LD->getBasePtr(); @@ -6377,7 +6593,7 @@ SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains); SDValue Value = DAG.getBuildVector(LD->getValueType(0), SL, Vals); - return DAG.getMergeValues({Value, NewChain}, SL); + return std::make_pair(Value, NewChain); } SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, @@ -6471,10 +6687,7 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { if (!isOperationLegalOrCustom(ISD::LOAD, intVT) && LoadedVT.isVector()) { // Scalarize the load and let the individual components be handled. - SDValue Scalarized = scalarizeVectorLoad(LD, DAG); - if (Scalarized->getOpcode() == ISD::MERGE_VALUES) - return std::make_pair(Scalarized.getOperand(0), Scalarized.getOperand(1)); - return std::make_pair(Scalarized.getValue(0), Scalarized.getValue(1)); + return scalarizeVectorLoad(LD, DAG); } // Expand to a (misaligned) integer load of the same size, @@ -6807,7 +7020,7 @@ SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG, Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index, DAG.getConstant(EltSize, dl, IdxVT)); - return DAG.getNode(ISD::ADD, dl, IdxVT, VecPtr, Index); + return DAG.getMemBasePlusOffset(VecPtr, Index, dl); } //===----------------------------------------------------------------------===// @@ -7096,6 +7309,86 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { return Result; } +SDValue +TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, + SDValue LHS, SDValue RHS, + unsigned Scale, SelectionDAG &DAG) const { + assert((Opcode == ISD::SDIVFIX || + Opcode == ISD::UDIVFIX) && + "Expected a fixed point division opcode"); + + EVT VT = LHS.getValueType(); + bool Signed = Opcode == ISD::SDIVFIX; + EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + + // If there is enough room in the type to upscale the LHS or downscale the + // RHS before the division, we can perform it in this type without having to + // resize. For signed operations, the LHS headroom is the number of + // redundant sign bits, and for unsigned ones it is the number of zeroes. + // The headroom for the RHS is the number of trailing zeroes. + unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1 + : DAG.computeKnownBits(LHS).countMinLeadingZeros(); + unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros(); + + if (LHSLead + RHSTrail < Scale) + return SDValue(); + + unsigned LHSShift = std::min(LHSLead, Scale); + unsigned RHSShift = Scale - LHSShift; + + // At this point, we know that if we shift the LHS up by LHSShift and the + // RHS down by RHSShift, we can emit a regular division with a final scaling + // factor of Scale. + + EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout()); + if (LHSShift) + LHS = DAG.getNode(ISD::SHL, dl, VT, LHS, + DAG.getConstant(LHSShift, dl, ShiftTy)); + if (RHSShift) + RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS, + DAG.getConstant(RHSShift, dl, ShiftTy)); + + SDValue Quot; + if (Signed) { + // For signed operations, if the resulting quotient is negative and the + // remainder is nonzero, subtract 1 from the quotient to round towards + // negative infinity. + SDValue Rem; + // FIXME: Ideally we would always produce an SDIVREM here, but if the + // type isn't legal, SDIVREM cannot be expanded. There is no reason why + // we couldn't just form a libcall, but the type legalizer doesn't do it. + if (isTypeLegal(VT) && + isOperationLegalOrCustom(ISD::SDIVREM, VT)) { + Quot = DAG.getNode(ISD::SDIVREM, dl, + DAG.getVTList(VT, VT), + LHS, RHS); + Rem = Quot.getValue(1); + Quot = Quot.getValue(0); + } else { + Quot = DAG.getNode(ISD::SDIV, dl, VT, + LHS, RHS); + Rem = DAG.getNode(ISD::SREM, dl, VT, + LHS, RHS); + } + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE); + SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT); + SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT); + SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg); + SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot, + DAG.getConstant(1, dl, VT)); + Quot = DAG.getSelect(dl, VT, + DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg), + Sub1, Quot); + } else + Quot = DAG.getNode(ISD::UDIV, dl, VT, + LHS, RHS); + + // TODO: Saturation. + + return Quot; +} + void TargetLowering::expandUADDSUBO( SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const { SDLoc dl(Node); |