aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp587
1 files changed, 440 insertions, 147 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 9ab1324533f1..24ab65171a17 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -52,6 +52,10 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
SDValue &Chain) const {
const Function &F = DAG.getMachineFunction().getFunction();
+ // First, check if tail calls have been disabled in this function.
+ if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
+ return false;
+
// Conservatively require the attributes of the call to match those of
// the return. Ignore NoAlias and NonNull because they don't affect the
// call sequence.
@@ -122,7 +126,11 @@ std::pair<SDValue, SDValue>
TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
ArrayRef<SDValue> Ops,
MakeLibCallOptions CallOptions,
- const SDLoc &dl) const {
+ const SDLoc &dl,
+ SDValue InChain) const {
+ if (!InChain)
+ InChain = DAG.getEntryNode();
+
TargetLowering::ArgListTy Args;
Args.reserve(Ops.size());
@@ -158,7 +166,7 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
}
CLI.setDebugLoc(dl)
- .setChain(DAG.getEntryNode())
+ .setChain(InChain)
.setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
.setNoReturn(CallOptions.DoesNotReturn)
.setDiscardResult(!CallOptions.IsReturnValueUsed)
@@ -277,6 +285,22 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
ISD::CondCode &CCCode,
const SDLoc &dl, const SDValue OldLHS,
const SDValue OldRHS) const {
+ SDValue Chain;
+ return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
+ OldRHS, Chain);
+}
+
+void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
+ SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode,
+ const SDLoc &dl, const SDValue OldLHS,
+ const SDValue OldRHS,
+ SDValue &Chain,
+ bool IsSignaling) const {
+ // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
+ // not supporting it. We can update this code when libgcc provides such
+ // functions.
+
assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
&& "Unsupported setcc type!");
@@ -320,25 +344,18 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
(VT == MVT::f64) ? RTLIB::OGT_F64 :
(VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
break;
+ case ISD::SETO:
+ ShouldInvertCC = true;
+ LLVM_FALLTHROUGH;
case ISD::SETUO:
LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
(VT == MVT::f64) ? RTLIB::UO_F64 :
(VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
break;
- case ISD::SETO:
- LC1 = (VT == MVT::f32) ? RTLIB::O_F32 :
- (VT == MVT::f64) ? RTLIB::O_F64 :
- (VT == MVT::f128) ? RTLIB::O_F128 : RTLIB::O_PPCF128;
- break;
case ISD::SETONE:
- // SETONE = SETOLT | SETOGT
- LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
- (VT == MVT::f64) ? RTLIB::OLT_F64 :
- (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
- LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
- (VT == MVT::f64) ? RTLIB::OGT_F64 :
- (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
- break;
+ // SETONE = O && UNE
+ ShouldInvertCC = true;
+ LLVM_FALLTHROUGH;
case ISD::SETUEQ:
LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
(VT == MVT::f64) ? RTLIB::UO_F64 :
@@ -382,24 +399,33 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
EVT OpsVT[2] = { OldLHS.getValueType(),
OldRHS.getValueType() };
CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
- NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl).first;
+ auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
+ NewLHS = Call.first;
NewRHS = DAG.getConstant(0, dl, RetVT);
CCCode = getCmpLibcallCC(LC1);
- if (ShouldInvertCC)
- CCCode = getSetCCInverse(CCCode, /*isInteger=*/true);
-
- if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
- SDValue Tmp = DAG.getNode(
- ISD::SETCC, dl,
- getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
- NewLHS, NewRHS, DAG.getCondCode(CCCode));
- NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl).first;
- NewLHS = DAG.getNode(
- ISD::SETCC, dl,
- getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
- NewLHS, NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2)));
- NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS);
+ if (ShouldInvertCC) {
+ assert(RetVT.isInteger());
+ CCCode = getSetCCInverse(CCCode, RetVT);
+ }
+
+ if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
+ // Update Chain.
+ Chain = Call.second;
+ } else {
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
+ SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
+ auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
+ CCCode = getCmpLibcallCC(LC2);
+ if (ShouldInvertCC)
+ CCCode = getSetCCInverse(CCCode, RetVT);
+ NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
+ if (Chain)
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
+ Call2.second);
+ NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
+ Tmp.getValueType(), Tmp, NewLHS);
NewRHS = SDValue();
}
}
@@ -693,6 +719,27 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
return Op.getOperand(1);
break;
}
+ case ISD::SETCC: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ // If (1) we only need the sign-bit, (2) the setcc operands are the same
+ // width as the setcc result, and (3) the result of a setcc conforms to 0 or
+ // -1, we may be able to bypass the setcc.
+ if (DemandedBits.isSignMask() &&
+ Op0.getScalarValueSizeInBits() == DemandedBits.getBitWidth() &&
+ getBooleanContents(Op0.getValueType()) ==
+ BooleanContent::ZeroOrNegativeOneBooleanContent) {
+ // If we're testing X < 0, then this compare isn't needed - just use X!
+ // FIXME: We're limiting to integer types here, but this should also work
+ // if we don't care about FP signed-zero. The use of SETLT with FP means
+ // that we don't care about NaNs.
+ if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
+ (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
+ return Op0;
+ }
+ break;
+ }
case ISD::SIGN_EXTEND_INREG: {
// If none of the extended bits are demanded, eliminate the sextinreg.
EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
@@ -1251,7 +1298,7 @@ bool TargetLowering::SimplifyDemandedBits(
// -1, we may be able to bypass the setcc.
if (DemandedBits.isSignMask() &&
Op0.getScalarValueSizeInBits() == BitWidth &&
- getBooleanContents(VT) ==
+ getBooleanContents(Op0.getValueType()) ==
BooleanContent::ZeroOrNegativeOneBooleanContent) {
// If we're testing X < 0, then this compare isn't needed - just use X!
// FIXME: We're limiting to integer types here, but this should also work
@@ -1538,6 +1585,16 @@ bool TargetLowering::SimplifyDemandedBits(
Known.Zero = Known2.Zero.reverseBits();
break;
}
+ case ISD::BSWAP: {
+ SDValue Src = Op.getOperand(0);
+ APInt DemandedSrcBits = DemandedBits.byteSwap();
+ if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
+ Depth + 1))
+ return true;
+ Known.One = Known2.One.byteSwap();
+ Known.Zero = Known2.Zero.byteSwap();
+ break;
+ }
case ISD::SIGN_EXTEND_INREG: {
SDValue Op0 = Op.getOperand(0);
EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
@@ -1753,15 +1810,11 @@ bool TargetLowering::SimplifyDemandedBits(
// undesirable.
break;
- auto *ShAmt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
- if (!ShAmt || ShAmt->getAPIntValue().uge(BitWidth))
+ SDValue ShAmt = Src.getOperand(1);
+ auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt);
+ if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
break;
-
- SDValue Shift = Src.getOperand(1);
- uint64_t ShVal = ShAmt->getZExtValue();
-
- if (TLO.LegalTypes())
- Shift = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
+ uint64_t ShVal = ShAmtC->getZExtValue();
APInt HighBits =
APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
@@ -1771,10 +1824,12 @@ bool TargetLowering::SimplifyDemandedBits(
if (!(HighBits & DemandedBits)) {
// None of the shifted in bits are needed. Add a truncate of the
// shift input, then shift it.
+ if (TLO.LegalTypes())
+ ShAmt = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
SDValue NewTrunc =
TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
return TLO.CombineTo(
- Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, Shift));
+ Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, ShAmt));
}
break;
}
@@ -1818,6 +1873,17 @@ bool TargetLowering::SimplifyDemandedBits(
Depth + 1))
return true;
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedSrcBits.isAllOnesValue() ||
+ !DemandedSrcElts.isAllOnesValue()) {
+ if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
+ Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
+ SDValue NewOp =
+ TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
Known = Known2;
if (BitWidth > EltBitWidth)
Known = Known.zext(BitWidth, false /* => any extend */);
@@ -2808,7 +2874,8 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
// Note that where Y is variable and is known to have at most one bit set
// (for example, if it is Z & 1) we cannot do this; the expressions are not
// equivalent when Y == 0.
- Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
+ assert(OpVT.isInteger());
+ Cond = ISD::getSetCCInverse(Cond, OpVT);
if (DCI.isBeforeLegalizeOps() ||
isCondCodeLegal(Cond, N0.getSimpleValueType()))
return DAG.getSetCC(DL, VT, N0, Zero, Cond);
@@ -2897,7 +2964,8 @@ SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
// What if we invert constants? (and the target predicate)
I1.negate();
I01.negate();
- NewCond = getSetCCInverse(NewCond, /*isInteger=*/true);
+ assert(XVT.isInteger());
+ NewCond = getSetCCInverse(NewCond, XVT);
if (!checkConstants())
return SDValue();
// Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
@@ -3052,6 +3120,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
DAGCombinerInfo &DCI,
const SDLoc &dl) const {
SelectionDAG &DAG = DCI.DAG;
+ const DataLayout &Layout = DAG.getDataLayout();
EVT OpVT = N0.getValueType();
// Constant fold or commute setcc.
@@ -3132,7 +3201,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
SDValue Zero = DAG.getConstant(0, dl, CTVT);
SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
- ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, true);
+ assert(CTVT.isInteger());
+ ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
@@ -3223,7 +3293,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
ISD::CondCode InvCond = ISD::getSetCCInverse(
cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
- TopSetCC.getOperand(0).getValueType().isInteger());
+ TopSetCC.getOperand(0).getValueType());
return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
TopSetCC.getOperand(1),
InvCond);
@@ -3256,7 +3326,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
APInt newMask = APInt::getLowBitsSet(maskWidth, width);
for (unsigned offset=0; offset<origWidth/width; offset++) {
if (Mask.isSubsetOf(newMask)) {
- if (DAG.getDataLayout().isLittleEndian())
+ if (Layout.isLittleEndian())
bestOffset = (uint64_t)offset * (width/8);
else
bestOffset = (origWidth/width - offset - 1) * (width/8);
@@ -3272,11 +3342,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
if (newVT.isRound() &&
shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
- EVT PtrType = Lod->getOperand(1).getValueType();
SDValue Ptr = Lod->getBasePtr();
if (bestOffset != 0)
- Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(),
- DAG.getConstant(bestOffset, dl, PtrType));
+ Ptr = DAG.getMemBasePlusOffset(Ptr, bestOffset, dl);
unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
SDValue NewLoad = DAG.getLoad(
newVT, dl, Lod->getChain(), Ptr,
@@ -3332,8 +3400,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (DCI.isBeforeLegalizeOps() ||
(isOperationLegal(ISD::SETCC, newVT) &&
isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
- EVT NewSetCCVT =
- getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), newVT);
+ EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
@@ -3379,14 +3446,16 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
(Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
// SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
if (N0.getOpcode() == ISD::SETCC &&
- isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) {
+ isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
+ (N0.getValueType() == MVT::i1 ||
+ getBooleanContents(N0.getOperand(0).getValueType()) ==
+ ZeroOrOneBooleanContent)) {
bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
if (TrueWhenTrue)
return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
// Invert the condition.
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
- CC = ISD::getSetCCInverse(CC,
- N0.getOperand(0).getValueType().isInteger());
+ CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
if (DCI.isBeforeLegalizeOps() ||
isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
@@ -3420,10 +3489,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
return DAG.getSetCC(dl, VT, Val, N1,
Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
}
- } else if (N1C->isOne() &&
- (VT == MVT::i1 ||
- getBooleanContents(N0->getValueType(0)) ==
- ZeroOrOneBooleanContent)) {
+ } else if (N1C->isOne()) {
SDValue Op0 = N0;
if (Op0.getOpcode() == ISD::TRUNCATE)
Op0 = Op0.getOperand(0);
@@ -3431,10 +3497,18 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if ((Op0.getOpcode() == ISD::XOR) &&
Op0.getOperand(0).getOpcode() == ISD::SETCC &&
Op0.getOperand(1).getOpcode() == ISD::SETCC) {
- // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
- Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
- return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1),
- Cond);
+ SDValue XorLHS = Op0.getOperand(0);
+ SDValue XorRHS = Op0.getOperand(1);
+ // Ensure that the input setccs return an i1 type or 0/1 value.
+ if (Op0.getValueType() == MVT::i1 ||
+ (getBooleanContents(XorLHS.getOperand(0).getValueType()) ==
+ ZeroOrOneBooleanContent &&
+ getBooleanContents(XorRHS.getOperand(0).getValueType()) ==
+ ZeroOrOneBooleanContent)) {
+ // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
+ Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
+ return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
+ }
}
if (Op0.getOpcode() == ISD::AND &&
isa<ConstantSDNode>(Op0.getOperand(1)) &&
@@ -3611,14 +3685,14 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
(VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
N0.getOpcode() == ISD::AND) {
- auto &DL = DAG.getDataLayout();
if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
- EVT ShiftTy = getShiftAmountTy(ShValTy, DL, !DCI.isBeforeLegalize());
+ EVT ShiftTy =
+ getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
// Perform the xform if the AND RHS is a single bit.
unsigned ShCt = AndRHS->getAPIntValue().logBase2();
if (AndRHS->getAPIntValue().isPowerOf2() &&
- ShCt <= TLI.getShiftAmountThreshold(ShValTy)) {
+ !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
return DAG.getNode(ISD::TRUNCATE, dl, VT,
DAG.getNode(ISD::SRL, dl, ShValTy, N0,
DAG.getConstant(ShCt, dl, ShiftTy)));
@@ -3628,7 +3702,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// Perform the xform if C1 is a single bit.
unsigned ShCt = C1.logBase2();
if (C1.isPowerOf2() &&
- ShCt <= TLI.getShiftAmountThreshold(ShValTy)) {
+ !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
return DAG.getNode(ISD::TRUNCATE, dl, VT,
DAG.getNode(ISD::SRL, dl, ShValTy, N0,
DAG.getConstant(ShCt, dl, ShiftTy)));
@@ -3639,6 +3713,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (C1.getMinSignedBits() <= 64 &&
!isLegalICmpImmediate(C1.getSExtValue())) {
+ EVT ShiftTy = getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
// (X & -256) == 256 -> (X >> 8) == 1
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
@@ -3646,15 +3721,13 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
const APInt &AndRHSC = AndRHS->getAPIntValue();
if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
unsigned ShiftBits = AndRHSC.countTrailingZeros();
- auto &DL = DAG.getDataLayout();
- EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
- !DCI.isBeforeLegalize());
- EVT CmpTy = N0.getValueType();
- SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0),
- DAG.getConstant(ShiftBits, dl,
- ShiftTy));
- SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, CmpTy);
- return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
+ if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
+ SDValue Shift =
+ DAG.getNode(ISD::SRL, dl, ShValTy, N0.getOperand(0),
+ DAG.getConstant(ShiftBits, dl, ShiftTy));
+ SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
+ return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
+ }
}
}
} else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
@@ -3676,14 +3749,11 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
NewC.lshrInPlace(ShiftBits);
if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
- isLegalICmpImmediate(NewC.getSExtValue())) {
- auto &DL = DAG.getDataLayout();
- EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
- !DCI.isBeforeLegalize());
- EVT CmpTy = N0.getValueType();
- SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0,
+ isLegalICmpImmediate(NewC.getSExtValue()) &&
+ !TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
+ SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0,
DAG.getConstant(ShiftBits, dl, ShiftTy));
- SDValue CmpRHS = DAG.getConstant(NewC, dl, CmpTy);
+ SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
}
}
@@ -4480,6 +4550,12 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
TargetLowering::ConstraintType CType =
TLI.getConstraintType(OpInfo.Codes[i]);
+ // Indirect 'other' or 'immediate' constraints are not allowed.
+ if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
+ CType == TargetLowering::C_Register ||
+ CType == TargetLowering::C_RegisterClass))
+ continue;
+
// If this is an 'other' or 'immediate' constraint, see if the operand is
// valid for it. For example, on X86 we might have an 'rI' constraint. If
// the operand is an integer in the range [0..31] we want to use I (saving a
@@ -4905,7 +4981,7 @@ SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
ISD::CondCode Cond,
DAGCombinerInfo &DCI,
const SDLoc &DL) const {
- SmallVector<SDNode *, 2> Built;
+ SmallVector<SDNode *, 5> Built;
if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
DCI, DL, Built)) {
for (SDNode *N : Built)
@@ -4940,26 +5016,44 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
if (!isOperationLegalOrCustom(ISD::MUL, VT))
return SDValue();
- // TODO: Could support comparing with non-zero too.
- ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
- if (!CompTarget || !CompTarget->isNullValue())
- return SDValue();
-
- bool HadOneDivisor = false;
- bool AllDivisorsAreOnes = true;
+ bool ComparingWithAllZeros = true;
+ bool AllComparisonsWithNonZerosAreTautological = true;
+ bool HadTautologicalLanes = false;
+ bool AllLanesAreTautological = true;
bool HadEvenDivisor = false;
bool AllDivisorsArePowerOfTwo = true;
- SmallVector<SDValue, 16> PAmts, KAmts, QAmts;
+ bool HadTautologicalInvertedLanes = false;
+ SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
- auto BuildUREMPattern = [&](ConstantSDNode *C) {
+ auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
// Division by 0 is UB. Leave it to be constant-folded elsewhere.
- if (C->isNullValue())
+ if (CDiv->isNullValue())
return false;
- const APInt &D = C->getAPIntValue();
- // If all divisors are ones, we will prefer to avoid the fold.
- HadOneDivisor |= D.isOneValue();
- AllDivisorsAreOnes &= D.isOneValue();
+ const APInt &D = CDiv->getAPIntValue();
+ const APInt &Cmp = CCmp->getAPIntValue();
+
+ ComparingWithAllZeros &= Cmp.isNullValue();
+
+ // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
+ // if C2 is not less than C1, the comparison is always false.
+ // But we will only be able to produce the comparison that will give the
+ // opposive tautological answer. So this lane would need to be fixed up.
+ bool TautologicalInvertedLane = D.ule(Cmp);
+ HadTautologicalInvertedLanes |= TautologicalInvertedLane;
+
+ // If all lanes are tautological (either all divisors are ones, or divisor
+ // is not greater than the constant we are comparing with),
+ // we will prefer to avoid the fold.
+ bool TautologicalLane = D.isOneValue() || TautologicalInvertedLane;
+ HadTautologicalLanes |= TautologicalLane;
+ AllLanesAreTautological &= TautologicalLane;
+
+ // If we are comparing with non-zero, we need'll need to subtract said
+ // comparison value from the LHS. But there is no point in doing that if
+ // every lane where we are comparing with non-zero is tautological..
+ if (!Cmp.isNullValue())
+ AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
// Decompose D into D0 * 2^K
unsigned K = D.countTrailingZeros();
@@ -4981,19 +5075,27 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
- // Q = floor((2^W - 1) / D)
- APInt Q = APInt::getAllOnesValue(W).udiv(D);
+ // Q = floor((2^W - 1) u/ D)
+ // R = ((2^W - 1) u% D)
+ APInt Q, R;
+ APInt::udivrem(APInt::getAllOnesValue(W), D, Q, R);
+
+ // If we are comparing with zero, then that comparison constant is okay,
+ // else it may need to be one less than that.
+ if (Cmp.ugt(R))
+ Q -= 1;
assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
"We are expecting that K is always less than all-ones for ShSVT");
- // If the divisor is 1 the result can be constant-folded.
- if (D.isOneValue()) {
+ // If the lane is tautological the result can be constant-folded.
+ if (TautologicalLane) {
// Set P and K amount to a bogus values so we can try to splat them.
P = 0;
K = -1;
- assert(Q.isAllOnesValue() &&
- "Expecting all-ones comparison for one divisor");
+ // And ensure that comparison constant is tautological,
+ // it will always compare true/false.
+ Q = -1;
}
PAmts.push_back(DAG.getConstant(P, DL, SVT));
@@ -5007,11 +5109,11 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
SDValue D = REMNode.getOperand(1);
// Collect the values from each element.
- if (!ISD::matchUnaryPredicate(D, BuildUREMPattern))
+ if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
return SDValue();
- // If this is a urem by a one, avoid the fold since it can be constant-folded.
- if (AllDivisorsAreOnes)
+ // If all lanes are tautological, the result can be constant-folded.
+ if (AllLanesAreTautological)
return SDValue();
// If this is a urem by a powers-of-two, avoid the fold since it can be
@@ -5021,7 +5123,7 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
SDValue PVal, KVal, QVal;
if (VT.isVector()) {
- if (HadOneDivisor) {
+ if (HadTautologicalLanes) {
// Try to turn PAmts into a splat, since we don't care about the values
// that are currently '0'. If we can't, just keep '0'`s.
turnVectorIntoSplatVector(PAmts, isNullConstant);
@@ -5040,6 +5142,14 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
QVal = QAmts[0];
}
+ if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
+ if (!isOperationLegalOrCustom(ISD::SUB, VT))
+ return SDValue(); // FIXME: Could/should use `ISD::ADD`?
+ assert(CompTargetNode.getValueType() == N.getValueType() &&
+ "Expecting that the types on LHS and RHS of comparisons match.");
+ N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
+ }
+
// (mul N, P)
SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
Created.push_back(Op0.getNode());
@@ -5058,8 +5168,41 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
}
// UREM: (setule/setugt (rotr (mul N, P), K), Q)
- return DAG.getSetCC(DL, SETCCVT, Op0, QVal,
- ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
+ SDValue NewCC =
+ DAG.getSetCC(DL, SETCCVT, Op0, QVal,
+ ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
+ if (!HadTautologicalInvertedLanes)
+ return NewCC;
+
+ // If any lanes previously compared always-false, the NewCC will give
+ // always-true result for them, so we need to fixup those lanes.
+ // Or the other way around for inequality predicate.
+ assert(VT.isVector() && "Can/should only get here for vectors.");
+ Created.push_back(NewCC.getNode());
+
+ // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
+ // if C2 is not less than C1, the comparison is always false.
+ // But we have produced the comparison that will give the
+ // opposive tautological answer. So these lanes would need to be fixed up.
+ SDValue TautologicalInvertedChannels =
+ DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
+ Created.push_back(TautologicalInvertedChannels.getNode());
+
+ if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
+ // If we have a vector select, let's replace the comparison results in the
+ // affected lanes with the correct tautological result.
+ SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
+ DL, SETCCVT, SETCCVT);
+ return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
+ Replacement, NewCC);
+ }
+
+ // Else, we can just invert the comparison result in the appropriate lanes.
+ if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
+ return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
+ TautologicalInvertedChannels);
+
+ return SDValue(); // Don't know how to lower.
}
/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
@@ -5544,7 +5687,14 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
ForCodeSize, Depth + 1);
char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
ForCodeSize, Depth + 1);
- if (V0 >= V1) {
+ // TODO: This is a hack. It is possible that costs have changed between now
+ // and the initial calls to isNegatibleForFree(). That is because we
+ // are rewriting the expression, and that may change the number of
+ // uses (and therefore the cost) of values. If the negation costs are
+ // equal, only negate this value if it is a constant. Otherwise, try
+ // operand 1. A better fix would eliminate uses as a cost factor or
+ // track the change in uses as we rewrite the expression.
+ if (V0 > V1 || (V0 == V1 && isa<ConstantFPSDNode>(Op.getOperand(0)))) {
// fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
SDValue Neg0 = getNegatedExpression(
Op.getOperand(0), DAG, LegalOperations, ForCodeSize, Depth + 1);
@@ -5954,6 +6104,8 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
EVT DstVT = Node->getValueType(0);
EVT SetCCVT =
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
+ EVT DstSetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
// Only expand vector types if we have the appropriate vector bit operations.
unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
@@ -5980,7 +6132,15 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
}
SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
- SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
+ SDValue Sel;
+
+ if (Node->isStrictFPOpcode()) {
+ Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
+ Node->getOperand(0), /*IsSignaling*/ true);
+ Chain = Sel.getValue(1);
+ } else {
+ Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
+ }
bool Strict = Node->isStrictFPOpcode() ||
shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
@@ -5989,28 +6149,29 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
// Expand based on maximum range of FP_TO_SINT, if the value exceeds the
// signmask then offset (the result of which should be fully representable).
// Sel = Src < 0x8000000000000000
- // Val = select Sel, Src, Src - 0x8000000000000000
- // Ofs = select Sel, 0, 0x8000000000000000
- // Result = fp_to_sint(Val) ^ Ofs
+ // FltOfs = select Sel, 0, 0x8000000000000000
+ // IntOfs = select Sel, 0, 0x8000000000000000
+ // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
// TODO: Should any fast-math-flags be set for the FSUB?
- SDValue SrcBiased;
- if (Node->isStrictFPOpcode())
- SrcBiased = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
- { Node->getOperand(0), Src, Cst });
- else
- SrcBiased = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst);
- SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src, SrcBiased);
- SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT),
- DAG.getConstant(SignMask, dl, DstVT));
+ SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
+ DAG.getConstantFP(0.0, dl, SrcVT), Cst);
+ Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
+ SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
+ DAG.getConstant(0, dl, DstVT),
+ DAG.getConstant(SignMask, dl, DstVT));
SDValue SInt;
if (Node->isStrictFPOpcode()) {
+ SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
+ { Chain, Src, FltOfs });
SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
- { SrcBiased.getValue(1), Val });
+ { Val.getValue(1), Val });
Chain = SInt.getValue(1);
- } else
+ } else {
+ SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
- Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, Ofs);
+ }
+ Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
} else {
// Expand based on maximum range of FP_TO_SINT:
// True = fp_to_sint(Src)
@@ -6023,14 +6184,17 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
False = DAG.getNode(ISD::XOR, dl, DstVT, False,
DAG.getConstant(SignMask, dl, DstVT));
+ Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
Result = DAG.getSelect(dl, DstVT, Sel, True, False);
}
return true;
}
bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
+ SDValue &Chain,
SelectionDAG &DAG) const {
- SDValue Src = Node->getOperand(0);
+ unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
+ SDValue Src = Node->getOperand(OpNo);
EVT SrcVT = Src.getValueType();
EVT DstVT = Node->getValueType(0);
@@ -6052,17 +6216,7 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
return false;
// For unsigned conversions, convert them to signed conversions using the
- // algorithm from the x86_64 __floatundidf in compiler_rt.
- SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
-
- SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT);
- SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst);
- SDValue AndConst = DAG.getConstant(1, dl, SrcVT);
- SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst);
- SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr);
-
- SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or);
- SDValue Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt);
+ // algorithm from the x86_64 __floatundisf in compiler_rt.
// TODO: This really should be implemented using a branch rather than a
// select. We happen to get lucky and machinesink does the right
@@ -6073,6 +6227,37 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
SDValue SignBitTest = DAG.getSetCC(
dl, SetCCVT, Src, DAG.getConstant(0, dl, SrcVT), ISD::SETLT);
+
+ SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT);
+ SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst);
+ SDValue AndConst = DAG.getConstant(1, dl, SrcVT);
+ SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst);
+ SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr);
+
+ SDValue Slow, Fast;
+ if (Node->isStrictFPOpcode()) {
+ // In strict mode, we must avoid spurious exceptions, and therefore
+ // must make sure to only emit a single STRICT_SINT_TO_FP.
+ SDValue InCvt = DAG.getSelect(dl, SrcVT, SignBitTest, Or, Src);
+ Fast = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, { DstVT, MVT::Other },
+ { Node->getOperand(0), InCvt });
+ Slow = DAG.getNode(ISD::STRICT_FADD, dl, { DstVT, MVT::Other },
+ { Fast.getValue(1), Fast, Fast });
+ Chain = Slow.getValue(1);
+ // The STRICT_SINT_TO_FP inherits the exception mode from the
+ // incoming STRICT_UINT_TO_FP node; the STRICT_FADD node can
+ // never raise any exception.
+ SDNodeFlags Flags;
+ Flags.setNoFPExcept(Node->getFlags().hasNoFPExcept());
+ Fast->setFlags(Flags);
+ Flags.setNoFPExcept(true);
+ Slow->setFlags(Flags);
+ } else {
+ SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or);
+ Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt);
+ Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
+ }
+
Result = DAG.getSelect(dl, DstVT, SignBitTest, Slow, Fast);
return true;
}
@@ -6105,8 +6290,18 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
- SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
- Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
+ if (Node->isStrictFPOpcode()) {
+ SDValue HiSub =
+ DAG.getNode(ISD::STRICT_FSUB, dl, {DstVT, MVT::Other},
+ {Node->getOperand(0), HiFlt, TwoP84PlusTwoP52});
+ Result = DAG.getNode(ISD::STRICT_FADD, dl, {DstVT, MVT::Other},
+ {HiSub.getValue(1), LoFlt, HiSub});
+ Chain = Result.getValue(1);
+ } else {
+ SDValue HiSub =
+ DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
+ Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
+ }
return true;
}
@@ -6150,6 +6345,26 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
}
}
+ // If none of the above worked, but there are no NaNs, then expand to
+ // a compare/select sequence. This is required for correctness since
+ // InstCombine might have canonicalized a fcmp+select sequence to a
+ // FMINNUM/FMAXNUM node. If we were to fall through to the default
+ // expansion to libcall, we might introduce a link-time dependency
+ // on libm into a file that originally did not have one.
+ if (Node->getFlags().hasNoNaNs()) {
+ ISD::CondCode Pred =
+ Node->getOpcode() == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
+ SDValue Op1 = Node->getOperand(0);
+ SDValue Op2 = Node->getOperand(1);
+ SDValue SelCC = DAG.getSelectCC(dl, Op1, Op2, Op1, Op2, Pred);
+ // Copy FMF flags, but always set the no-signed-zeros flag
+ // as this is implied by the FMINNUM/FMAXNUM semantics.
+ SDNodeFlags Flags = Node->getFlags();
+ Flags.setNoSignedZeros(true);
+ SelCC->setFlags(Flags);
+ return SelCC;
+ }
+
return SDValue();
}
@@ -6342,8 +6557,9 @@ bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
return true;
}
-SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
- SelectionDAG &DAG) const {
+std::pair<SDValue, SDValue>
+TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
+ SelectionDAG &DAG) const {
SDLoc SL(LD);
SDValue Chain = LD->getChain();
SDValue BasePTR = LD->getBasePtr();
@@ -6377,7 +6593,7 @@ SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
SDValue Value = DAG.getBuildVector(LD->getValueType(0), SL, Vals);
- return DAG.getMergeValues({Value, NewChain}, SL);
+ return std::make_pair(Value, NewChain);
}
SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
@@ -6471,10 +6687,7 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
LoadedVT.isVector()) {
// Scalarize the load and let the individual components be handled.
- SDValue Scalarized = scalarizeVectorLoad(LD, DAG);
- if (Scalarized->getOpcode() == ISD::MERGE_VALUES)
- return std::make_pair(Scalarized.getOperand(0), Scalarized.getOperand(1));
- return std::make_pair(Scalarized.getValue(0), Scalarized.getValue(1));
+ return scalarizeVectorLoad(LD, DAG);
}
// Expand to a (misaligned) integer load of the same size,
@@ -6807,7 +7020,7 @@ SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
DAG.getConstant(EltSize, dl, IdxVT));
- return DAG.getNode(ISD::ADD, dl, IdxVT, VecPtr, Index);
+ return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
}
//===----------------------------------------------------------------------===//
@@ -7096,6 +7309,86 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
return Result;
}
+SDValue
+TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
+ SDValue LHS, SDValue RHS,
+ unsigned Scale, SelectionDAG &DAG) const {
+ assert((Opcode == ISD::SDIVFIX ||
+ Opcode == ISD::UDIVFIX) &&
+ "Expected a fixed point division opcode");
+
+ EVT VT = LHS.getValueType();
+ bool Signed = Opcode == ISD::SDIVFIX;
+ EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+
+ // If there is enough room in the type to upscale the LHS or downscale the
+ // RHS before the division, we can perform it in this type without having to
+ // resize. For signed operations, the LHS headroom is the number of
+ // redundant sign bits, and for unsigned ones it is the number of zeroes.
+ // The headroom for the RHS is the number of trailing zeroes.
+ unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
+ : DAG.computeKnownBits(LHS).countMinLeadingZeros();
+ unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
+
+ if (LHSLead + RHSTrail < Scale)
+ return SDValue();
+
+ unsigned LHSShift = std::min(LHSLead, Scale);
+ unsigned RHSShift = Scale - LHSShift;
+
+ // At this point, we know that if we shift the LHS up by LHSShift and the
+ // RHS down by RHSShift, we can emit a regular division with a final scaling
+ // factor of Scale.
+
+ EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
+ if (LHSShift)
+ LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
+ DAG.getConstant(LHSShift, dl, ShiftTy));
+ if (RHSShift)
+ RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
+ DAG.getConstant(RHSShift, dl, ShiftTy));
+
+ SDValue Quot;
+ if (Signed) {
+ // For signed operations, if the resulting quotient is negative and the
+ // remainder is nonzero, subtract 1 from the quotient to round towards
+ // negative infinity.
+ SDValue Rem;
+ // FIXME: Ideally we would always produce an SDIVREM here, but if the
+ // type isn't legal, SDIVREM cannot be expanded. There is no reason why
+ // we couldn't just form a libcall, but the type legalizer doesn't do it.
+ if (isTypeLegal(VT) &&
+ isOperationLegalOrCustom(ISD::SDIVREM, VT)) {
+ Quot = DAG.getNode(ISD::SDIVREM, dl,
+ DAG.getVTList(VT, VT),
+ LHS, RHS);
+ Rem = Quot.getValue(1);
+ Quot = Quot.getValue(0);
+ } else {
+ Quot = DAG.getNode(ISD::SDIV, dl, VT,
+ LHS, RHS);
+ Rem = DAG.getNode(ISD::SREM, dl, VT,
+ LHS, RHS);
+ }
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
+ SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
+ SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
+ SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
+ SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
+ DAG.getConstant(1, dl, VT));
+ Quot = DAG.getSelect(dl, VT,
+ DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
+ Sub1, Quot);
+ } else
+ Quot = DAG.getNode(ISD::UDIV, dl, VT,
+ LHS, RHS);
+
+ // TODO: Saturation.
+
+ return Quot;
+}
+
void TargetLowering::expandUADDSUBO(
SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
SDLoc dl(Node);