diff options
Diffstat (limited to 'lib/CodeGen/SelectionDAG/SelectionDAG.cpp')
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 844 |
1 files changed, 658 insertions, 186 deletions
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 4c8b63d2f239..48e03c6da68f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -32,7 +32,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -58,6 +57,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" +#include "llvm/Support/MachineValueType.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Mutex.h" @@ -89,11 +89,16 @@ void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {} #define DEBUG_TYPE "selectiondag" +static cl::opt<bool> EnableMemCpyDAGOpt("enable-memcpy-dag-opt", + cl::Hidden, cl::init(true), + cl::desc("Gang up loads and stores generated by inlining of memcpy")); + +static cl::opt<int> MaxLdStGlue("ldstmemcpy-glue-max", + cl::desc("Number limit for gluing ld/st of memcpy."), + cl::Hidden, cl::init(0)); + static void NewSDValueDbgMsg(SDValue V, StringRef Msg, SelectionDAG *G) { - DEBUG( - dbgs() << Msg; - V.getNode()->dump(G); - ); + LLVM_DEBUG(dbgs() << Msg; V.getNode()->dump(G);); } //===----------------------------------------------------------------------===// @@ -263,6 +268,52 @@ bool ISD::allOperandsUndef(const SDNode *N) { return true; } +bool ISD::matchUnaryPredicate(SDValue Op, + std::function<bool(ConstantSDNode *)> Match) { + if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) + return Match(Cst); + + if (ISD::BUILD_VECTOR != Op.getOpcode()) + return false; + + EVT SVT = Op.getValueType().getScalarType(); + for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { + auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i)); + if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst)) + return false; + } + return true; +} + +bool ISD::matchBinaryPredicate( + SDValue LHS, SDValue RHS, + std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match) { + if (LHS.getValueType() != RHS.getValueType()) + return false; + + if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS)) + if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS)) + return Match(LHSCst, RHSCst); + + if (ISD::BUILD_VECTOR != LHS.getOpcode() || + ISD::BUILD_VECTOR != RHS.getOpcode()) + return false; + + EVT SVT = LHS.getValueType().getScalarType(); + for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { + auto *LHSCst = dyn_cast<ConstantSDNode>(LHS.getOperand(i)); + auto *RHSCst = dyn_cast<ConstantSDNode>(RHS.getOperand(i)); + if (!LHSCst || !RHSCst) + return false; + if (LHSCst->getValueType(0) != SVT || + LHSCst->getValueType(0) != RHSCst->getValueType(0)) + return false; + if (!Match(LHSCst, RHSCst)) + return false; + } + return true; +} + ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) { switch (ExtType) { case ISD::EXTLOAD: @@ -487,12 +538,41 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(ST->getPointerInfo().getAddrSpace()); break; } + case ISD::MLOAD: { + const MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N); + ID.AddInteger(MLD->getMemoryVT().getRawBits()); + ID.AddInteger(MLD->getRawSubclassData()); + ID.AddInteger(MLD->getPointerInfo().getAddrSpace()); + break; + } + case ISD::MSTORE: { + const MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N); + ID.AddInteger(MST->getMemoryVT().getRawBits()); + ID.AddInteger(MST->getRawSubclassData()); + ID.AddInteger(MST->getPointerInfo().getAddrSpace()); + break; + } + case ISD::MGATHER: { + const MaskedGatherSDNode *MG = cast<MaskedGatherSDNode>(N); + ID.AddInteger(MG->getMemoryVT().getRawBits()); + ID.AddInteger(MG->getRawSubclassData()); + ID.AddInteger(MG->getPointerInfo().getAddrSpace()); + break; + } + case ISD::MSCATTER: { + const MaskedScatterSDNode *MS = cast<MaskedScatterSDNode>(N); + ID.AddInteger(MS->getMemoryVT().getRawBits()); + ID.AddInteger(MS->getRawSubclassData()); + ID.AddInteger(MS->getPointerInfo().getAddrSpace()); + break; + } case ISD::ATOMIC_CMP_SWAP: case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: case ISD::ATOMIC_SWAP: case ISD::ATOMIC_LOAD_ADD: case ISD::ATOMIC_LOAD_SUB: case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_CLR: case ISD::ATOMIC_LOAD_OR: case ISD::ATOMIC_LOAD_XOR: case ISD::ATOMIC_LOAD_NAND: @@ -726,7 +806,7 @@ static void VerifySDNode(SDNode *N) { } #endif // NDEBUG -/// \brief Insert a newly allocated node into the DAG. +/// Insert a newly allocated node into the DAG. /// /// Handles insertion into the all nodes list and CSE map, as well as /// verification and other common operations when a new node is allocated. @@ -903,13 +983,16 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) void SelectionDAG::init(MachineFunction &NewMF, OptimizationRemarkEmitter &NewORE, - Pass *PassPtr) { + Pass *PassPtr, const TargetLibraryInfo *LibraryInfo, + DivergenceAnalysis * Divergence) { MF = &NewMF; SDAGISelPass = PassPtr; ORE = &NewORE; TLI = getSubtarget().getTargetLowering(); TSI = getSubtarget().getSelectionDAGInfo(); + LibInfo = LibraryInfo; Context = &MF->getFunction().getContext(); + DA = Divergence; } SelectionDAG::~SelectionDAG() { @@ -1077,21 +1160,25 @@ SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) { } SDValue SelectionDAG::getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT) { - EVT EltVT = VT.getScalarType(); - SDValue TrueValue; - switch (TLI->getBooleanContents(VT)) { - case TargetLowering::ZeroOrOneBooleanContent: - case TargetLowering::UndefinedBooleanContent: - TrueValue = getConstant(1, DL, VT); - break; - case TargetLowering::ZeroOrNegativeOneBooleanContent: - TrueValue = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, - VT); - break; - } + SDValue TrueValue = getBoolConstant(true, DL, VT, VT); return getNode(ISD::XOR, DL, VT, Val, TrueValue); } +SDValue SelectionDAG::getBoolConstant(bool V, const SDLoc &DL, EVT VT, + EVT OpVT) { + if (!V) + return getConstant(0, DL, VT); + + switch (TLI->getBooleanContents(OpVT)) { + case TargetLowering::ZeroOrOneBooleanContent: + case TargetLowering::UndefinedBooleanContent: + return getConstant(1, DL, VT); + case TargetLowering::ZeroOrNegativeOneBooleanContent: + return getAllOnesConstant(DL, VT); + } + llvm_unreachable("Unexpected boolean content enum!"); +} + SDValue SelectionDAG::getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isT, bool isO) { EVT EltVT = VT.getScalarType(); @@ -1184,7 +1271,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, return SDValue(N, 0); if (!N) { - N = newSDNode<ConstantSDNode>(isT, isO, Elt, DL.getDebugLoc(), EltVT); + N = newSDNode<ConstantSDNode>(isT, isO, Elt, EltVT); CSEMap.InsertNode(N, IP); InsertNode(N); NewSDValueDbgMsg(SDValue(N, 0), "Creating constant: ", this); @@ -1227,7 +1314,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL, return SDValue(N, 0); if (!N) { - N = newSDNode<ConstantFPSDNode>(isTarget, &V, DL.getDebugLoc(), EltVT); + N = newSDNode<ConstantFPSDNode>(isTarget, &V, EltVT); CSEMap.InsertNode(N, IP); InsertNode(N); } @@ -1503,33 +1590,35 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, if (N1.isUndef()) commuteShuffle(N1, N2, MaskVec); - // If shuffling a splat, try to blend the splat instead. We do this here so - // that even when this arises during lowering we don't have to re-handle it. - auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) { - BitVector UndefElements; - SDValue Splat = BV->getSplatValue(&UndefElements); - if (!Splat) - return; + if (TLI->hasVectorBlend()) { + // If shuffling a splat, try to blend the splat instead. We do this here so + // that even when this arises during lowering we don't have to re-handle it. + auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) { + BitVector UndefElements; + SDValue Splat = BV->getSplatValue(&UndefElements); + if (!Splat) + return; - for (int i = 0; i < NElts; ++i) { - if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + NElts)) - continue; + for (int i = 0; i < NElts; ++i) { + if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + NElts)) + continue; - // If this input comes from undef, mark it as such. - if (UndefElements[MaskVec[i] - Offset]) { - MaskVec[i] = -1; - continue; - } + // If this input comes from undef, mark it as such. + if (UndefElements[MaskVec[i] - Offset]) { + MaskVec[i] = -1; + continue; + } - // If we can blend a non-undef lane, use that instead. - if (!UndefElements[i]) - MaskVec[i] = i + Offset; - } - }; - if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1)) - BlendSplat(N1BV, 0); - if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2)) - BlendSplat(N2BV, NElts); + // If we can blend a non-undef lane, use that instead. + if (!UndefElements[i]) + MaskVec[i] = i + Offset; + } + }; + if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1)) + BlendSplat(N1BV, 0); + if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2)) + BlendSplat(N2BV, NElts); + } // Canonicalize all index into lhs, -> shuffle lhs, undef // Canonicalize all index into rhs, -> shuffle rhs, undef @@ -1643,7 +1732,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, } SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) { - MVT VT = SV.getSimpleValueType(0); + EVT VT = SV.getValueType(0); SmallVector<int, 8> MaskVec(SV.getMask().begin(), SV.getMask().end()); ShuffleVectorSDNode::commuteMask(MaskVec); @@ -1661,6 +1750,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { return SDValue(E, 0); auto *N = newSDNode<RegisterSDNode>(RegNo, VT); + N->SDNodeBits.IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -1870,19 +1960,15 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl) { + EVT OpVT = N1.getValueType(); + // These setcc operations always fold. switch (Cond) { default: break; case ISD::SETFALSE: - case ISD::SETFALSE2: return getConstant(0, dl, VT); + case ISD::SETFALSE2: return getBoolConstant(false, dl, VT, OpVT); case ISD::SETTRUE: - case ISD::SETTRUE2: { - TargetLowering::BooleanContent Cnt = - TLI->getBooleanContents(N1->getValueType(0)); - return getConstant( - Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, dl, - VT); - } + case ISD::SETTRUE2: return getBoolConstant(true, dl, VT, OpVT); case ISD::SETOEQ: case ISD::SETOGT: @@ -1905,16 +1991,16 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, switch (Cond) { default: llvm_unreachable("Unknown integer setcc!"); - case ISD::SETEQ: return getConstant(C1 == C2, dl, VT); - case ISD::SETNE: return getConstant(C1 != C2, dl, VT); - case ISD::SETULT: return getConstant(C1.ult(C2), dl, VT); - case ISD::SETUGT: return getConstant(C1.ugt(C2), dl, VT); - case ISD::SETULE: return getConstant(C1.ule(C2), dl, VT); - case ISD::SETUGE: return getConstant(C1.uge(C2), dl, VT); - case ISD::SETLT: return getConstant(C1.slt(C2), dl, VT); - case ISD::SETGT: return getConstant(C1.sgt(C2), dl, VT); - case ISD::SETLE: return getConstant(C1.sle(C2), dl, VT); - case ISD::SETGE: return getConstant(C1.sge(C2), dl, VT); + case ISD::SETEQ: return getBoolConstant(C1 == C2, dl, VT, OpVT); + case ISD::SETNE: return getBoolConstant(C1 != C2, dl, VT, OpVT); + case ISD::SETULT: return getBoolConstant(C1.ult(C2), dl, VT, OpVT); + case ISD::SETUGT: return getBoolConstant(C1.ugt(C2), dl, VT, OpVT); + case ISD::SETULE: return getBoolConstant(C1.ule(C2), dl, VT, OpVT); + case ISD::SETUGE: return getBoolConstant(C1.uge(C2), dl, VT, OpVT); + case ISD::SETLT: return getBoolConstant(C1.slt(C2), dl, VT, OpVT); + case ISD::SETGT: return getBoolConstant(C1.sgt(C2), dl, VT, OpVT); + case ISD::SETLE: return getBoolConstant(C1.sle(C2), dl, VT, OpVT); + case ISD::SETGE: return getBoolConstant(C1.sge(C2), dl, VT, OpVT); } } } @@ -1926,41 +2012,54 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, case ISD::SETEQ: if (R==APFloat::cmpUnordered) return getUNDEF(VT); LLVM_FALLTHROUGH; - case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, dl, VT); + case ISD::SETOEQ: return getBoolConstant(R==APFloat::cmpEqual, dl, VT, + OpVT); case ISD::SETNE: if (R==APFloat::cmpUnordered) return getUNDEF(VT); LLVM_FALLTHROUGH; - case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan || - R==APFloat::cmpLessThan, dl, VT); + case ISD::SETONE: return getBoolConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpLessThan, dl, VT, + OpVT); case ISD::SETLT: if (R==APFloat::cmpUnordered) return getUNDEF(VT); LLVM_FALLTHROUGH; - case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, dl, VT); + case ISD::SETOLT: return getBoolConstant(R==APFloat::cmpLessThan, dl, VT, + OpVT); case ISD::SETGT: if (R==APFloat::cmpUnordered) return getUNDEF(VT); LLVM_FALLTHROUGH; - case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, dl, VT); + case ISD::SETOGT: return getBoolConstant(R==APFloat::cmpGreaterThan, dl, + VT, OpVT); case ISD::SETLE: if (R==APFloat::cmpUnordered) return getUNDEF(VT); LLVM_FALLTHROUGH; - case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan || - R==APFloat::cmpEqual, dl, VT); + case ISD::SETOLE: return getBoolConstant(R==APFloat::cmpLessThan || + R==APFloat::cmpEqual, dl, VT, + OpVT); case ISD::SETGE: if (R==APFloat::cmpUnordered) return getUNDEF(VT); LLVM_FALLTHROUGH; - case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan || - R==APFloat::cmpEqual, dl, VT); - case ISD::SETO: return getConstant(R!=APFloat::cmpUnordered, dl, VT); - case ISD::SETUO: return getConstant(R==APFloat::cmpUnordered, dl, VT); - case ISD::SETUEQ: return getConstant(R==APFloat::cmpUnordered || - R==APFloat::cmpEqual, dl, VT); - case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, dl, VT); - case ISD::SETULT: return getConstant(R==APFloat::cmpUnordered || - R==APFloat::cmpLessThan, dl, VT); - case ISD::SETUGT: return getConstant(R==APFloat::cmpGreaterThan || - R==APFloat::cmpUnordered, dl, VT); - case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, dl, VT); - case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, dl, VT); + case ISD::SETOGE: return getBoolConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpEqual, dl, VT, OpVT); + case ISD::SETO: return getBoolConstant(R!=APFloat::cmpUnordered, dl, VT, + OpVT); + case ISD::SETUO: return getBoolConstant(R==APFloat::cmpUnordered, dl, VT, + OpVT); + case ISD::SETUEQ: return getBoolConstant(R==APFloat::cmpUnordered || + R==APFloat::cmpEqual, dl, VT, + OpVT); + case ISD::SETUNE: return getBoolConstant(R!=APFloat::cmpEqual, dl, VT, + OpVT); + case ISD::SETULT: return getBoolConstant(R==APFloat::cmpUnordered || + R==APFloat::cmpLessThan, dl, VT, + OpVT); + case ISD::SETUGT: return getBoolConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpUnordered, dl, VT, + OpVT); + case ISD::SETULE: return getBoolConstant(R!=APFloat::cmpGreaterThan, dl, + VT, OpVT); + case ISD::SETUGE: return getBoolConstant(R!=APFloat::cmpLessThan, dl, VT, + OpVT); } } else { // Ensure that the constant occurs on the RHS. @@ -2297,10 +2396,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, break; } - // Support big-endian targets when it becomes useful. bool IsLE = getDataLayout().isLittleEndian(); - if (!IsLE) - break; // Bitcast 'small element' vector to 'large element' scalar/vector. if ((BitWidth % SubBitWidth) == 0) { @@ -2319,8 +2415,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, for (unsigned i = 0; i != SubScale; ++i) { computeKnownBits(N0, Known2, SubDemandedElts.shl(i), Depth + 1); - Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * i); - Known.Zero |= Known2.Zero.zext(BitWidth).shl(SubBitWidth * i); + unsigned Shifts = IsLE ? i : SubScale - 1 - i; + Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * Shifts); + Known.Zero |= Known2.Zero.zext(BitWidth).shl(SubBitWidth * Shifts); } } @@ -2342,7 +2439,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, Known.Zero.setAllBits(); Known.One.setAllBits(); for (unsigned i = 0; i != NumElts; ++i) if (DemandedElts[i]) { - unsigned Offset = (i % SubScale) * BitWidth; + unsigned Shifts = IsLE ? i : NumElts - 1 - i; + unsigned Offset = (Shifts % SubScale) * BitWidth; Known.One &= Known2.One.lshr(Offset).trunc(BitWidth); Known.Zero &= Known2.Zero.lshr(Offset).trunc(BitWidth); // If we don't know any bits, early out. @@ -2441,6 +2539,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, break; case ISD::SMULO: case ISD::UMULO: + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: if (Op.getResNo() != 1) break; // The boolean result conforms to getBooleanContents. @@ -2904,11 +3003,38 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, } case ISD::SMIN: case ISD::SMAX: { - computeKnownBits(Op.getOperand(0), Known, DemandedElts, - Depth + 1); - // If we don't know any bits, early out. - if (Known.isUnknown()) - break; + // If we have a clamp pattern, we know that the number of sign bits will be + // the minimum of the clamp min/max range. + bool IsMax = (Opcode == ISD::SMAX); + ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr; + if ((CstLow = isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts))) + if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX)) + CstHigh = isConstOrDemandedConstSplat(Op.getOperand(0).getOperand(1), + DemandedElts); + if (CstLow && CstHigh) { + if (!IsMax) + std::swap(CstLow, CstHigh); + + const APInt &ValueLow = CstLow->getAPIntValue(); + const APInt &ValueHigh = CstHigh->getAPIntValue(); + if (ValueLow.sle(ValueHigh)) { + unsigned LowSignBits = ValueLow.getNumSignBits(); + unsigned HighSignBits = ValueHigh.getNumSignBits(); + unsigned MinSignBits = std::min(LowSignBits, HighSignBits); + if (ValueLow.isNegative() && ValueHigh.isNegative()) { + Known.One.setHighBits(MinSignBits); + break; + } + if (ValueLow.isNonNegative() && ValueHigh.isNonNegative()) { + Known.Zero.setHighBits(MinSignBits); + break; + } + } + } + + // Fallback - just get the shared known bits of the operands. + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + if (Known.isUnknown()) break; // Early-out computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); Known.Zero &= Known2.Zero; Known.One &= Known2.One; @@ -3038,7 +3164,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, if (!DemandedElts) return 1; // No demanded elts, better to assume we don't know anything. - switch (Op.getOpcode()) { + unsigned Opcode = Op.getOpcode(); + switch (Opcode) { default: break; case ISD::AssertSext: Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits(); @@ -3189,7 +3316,32 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return std::min(Tmp, Tmp2); case ISD::SMIN: - case ISD::SMAX: + case ISD::SMAX: { + // If we have a clamp pattern, we know that the number of sign bits will be + // the minimum of the clamp min/max range. + bool IsMax = (Opcode == ISD::SMAX); + ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr; + if ((CstLow = isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts))) + if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX)) + CstHigh = isConstOrDemandedConstSplat(Op.getOperand(0).getOperand(1), + DemandedElts); + if (CstLow && CstHigh) { + if (!IsMax) + std::swap(CstLow, CstHigh); + if (CstLow->getAPIntValue().sle(CstHigh->getAPIntValue())) { + Tmp = CstLow->getAPIntValue().getNumSignBits(); + Tmp2 = CstHigh->getAPIntValue().getNumSignBits(); + return std::min(Tmp, Tmp2); + } + } + + // Fallback - just get the minimum number of sign bits of the operands. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1); + if (Tmp == 1) + return 1; // Early out. + Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth + 1); + return std::min(Tmp, Tmp2); + } case ISD::UMIN: case ISD::UMAX: Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1); @@ -3225,7 +3377,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, unsigned RotAmt = C->getAPIntValue().urem(VTBits); // Handle rotate right by N like a rotate left by 32-N. - if (Op.getOpcode() == ISD::ROTR) + if (Opcode == ISD::ROTR) RotAmt = (VTBits - RotAmt) % VTBits; // If we aren't rotating out all of the known-in sign bits, return the @@ -3423,10 +3575,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, } // Allow the target to implement this method for its nodes. - if (Op.getOpcode() >= ISD::BUILTIN_OP_END || - Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || - Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || - Op.getOpcode() == ISD::INTRINSIC_VOID) { + if (Opcode >= ISD::BUILTIN_OP_END || + Opcode == ISD::INTRINSIC_WO_CHAIN || + Opcode == ISD::INTRINSIC_W_CHAIN || + Opcode == ISD::INTRINSIC_VOID) { unsigned NumBits = TLI->ComputeNumSignBitsForTargetNode(Op, DemandedElts, *this, Depth); if (NumBits > 1) @@ -3487,17 +3639,33 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op) const { return false; } -bool SelectionDAG::isKnownNeverZero(SDValue Op) const { +bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) const { + assert(Op.getValueType().isFloatingPoint() && + "Floating point type expected"); + // If the value is a constant, we can obviously see if it is a zero or not. + // TODO: Add BuildVector support. if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) return !C->isZero(); + return false; +} + +bool SelectionDAG::isKnownNeverZero(SDValue Op) const { + assert(!Op.getValueType().isFloatingPoint() && + "Floating point types unsupported - use isKnownNeverZeroFloat"); + + // If the value is a constant, we can obviously see if it is a zero or not. + if (ISD::matchUnaryPredicate( + Op, [](ConstantSDNode *C) { return !C->isNullValue(); })) + return true; // TODO: Recognize more cases here. switch (Op.getOpcode()) { default: break; case ISD::OR: - if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) - return !C->isNullValue(); + if (isKnownNeverZero(Op.getOperand(1)) || + isKnownNeverZero(Op.getOperand(0))) + return true; break; } @@ -3517,6 +3685,8 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const { return false; } +// FIXME: unify with llvm::haveNoCommonBitsSet. +// FIXME: could also handle masked merge pattern (X & ~M) op (Y & M) bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const { assert(A.getValueType() == B.getValueType() && "Values must have the same type"); @@ -3841,11 +4011,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, else if (OpOpcode == ISD::UNDEF) return getUNDEF(VT); - // (ext (trunx x)) -> x + // (ext (trunc x)) -> x if (OpOpcode == ISD::TRUNCATE) { SDValue OpOp = Operand.getOperand(0); - if (OpOp.getValueType() == VT) + if (OpOp.getValueType() == VT) { + transferDbgValues(Operand, OpOp); return OpOp; + } } break; case ISD::TRUNCATE: @@ -3921,10 +4093,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, break; case ISD::FNEG: // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 - if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB) - // FIXME: FNEG has no fast-math-flags to propagate; use the FSUB's flags? + if ((getTarget().Options.UnsafeFPMath || Flags.hasNoSignedZeros()) && + OpOpcode == ISD::FSUB) return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1), - Operand.getOperand(0), Operand.getNode()->getFlags()); + Operand.getOperand(0), Flags); if (OpOpcode == ISD::FNEG) // --X -> X return Operand.getOperand(0); break; @@ -4314,24 +4486,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::FMUL: case ISD::FDIV: case ISD::FREM: - if (getTarget().Options.UnsafeFPMath) { - if (Opcode == ISD::FADD) { - // x+0 --> x - if (N2CFP && N2CFP->getValueAPF().isZero()) - return N1; - } else if (Opcode == ISD::FSUB) { - // x-0 --> x - if (N2CFP && N2CFP->getValueAPF().isZero()) - return N1; - } else if (Opcode == ISD::FMUL) { - // x*0 --> 0 - if (N2CFP && N2CFP->isZero()) - return N2; - // x*1 --> x - if (N2CFP && N2CFP->isExactlyValue(1.0)) - return N1; - } - } assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); assert(N1.getValueType() == N2.getValueType() && N1.getValueType() == VT && "Binary operator types must match!"); @@ -4448,12 +4602,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, break; } case ISD::EXTRACT_VECTOR_ELT: + assert(VT.getSizeInBits() >= N1.getValueType().getScalarSizeInBits() && + "The result of EXTRACT_VECTOR_ELT must be at least as wide as the \ + element type of the vector."); + // EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF. if (N1.isUndef()) return getUNDEF(VT); // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF - if (N2C && N2C->getZExtValue() >= N1.getValueType().getVectorNumElements()) + if (N2C && N2C->getAPIntValue().uge(N1.getValueType().getVectorNumElements())) return getUNDEF(VT); // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is @@ -4635,6 +4793,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } } + // Any FP binop with an undef operand is folded to NaN. This matches the + // behavior of the IR optimizer. + switch (Opcode) { + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + if (N1.isUndef() || N2.isUndef()) + return getConstantFP(APFloat::getNaN(EVTToAPFloatSemantics(VT)), DL, VT); + } + // Canonicalize an UNDEF to the RHS, even over a constant. if (N1.isUndef()) { if (TLI->isCommutativeBinOp(Opcode)) { @@ -4644,22 +4814,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::FP_ROUND_INREG: case ISD::SIGN_EXTEND_INREG: case ISD::SUB: - case ISD::FSUB: - case ISD::FDIV: - case ISD::FREM: - case ISD::SRA: - return N1; // fold op(undef, arg2) -> undef + return getUNDEF(VT); // fold op(undef, arg2) -> undef case ISD::UDIV: case ISD::SDIV: case ISD::UREM: case ISD::SREM: + case ISD::SRA: case ISD::SRL: case ISD::SHL: - if (!VT.isVector()) - return getConstant(0, DL, VT); // fold op(undef, arg2) -> 0 - // For vectors, we can't easily build an all zero vector, just return - // the LHS. - return N2; + return getConstant(0, DL, VT); // fold op(undef, arg2) -> 0 } } } @@ -4681,32 +4844,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::SDIV: case ISD::UREM: case ISD::SREM: - return N2; // fold op(arg1, undef) -> undef - case ISD::FADD: - case ISD::FSUB: - case ISD::FMUL: - case ISD::FDIV: - case ISD::FREM: - if (getTarget().Options.UnsafeFPMath) - return N2; - break; - case ISD::MUL: - case ISD::AND: + case ISD::SRA: case ISD::SRL: case ISD::SHL: - if (!VT.isVector()) - return getConstant(0, DL, VT); // fold op(arg1, undef) -> 0 - // For vectors, we can't easily build an all zero vector, just return - // the LHS. - return N1; + return getUNDEF(VT); // fold op(arg1, undef) -> undef + case ISD::MUL: + case ISD::AND: + return getConstant(0, DL, VT); // fold op(arg1, undef) -> 0 case ISD::OR: - if (!VT.isVector()) - return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT); - // For vectors, we can't easily build an all one vector, just return - // the LHS. - return N1; - case ISD::SRA: - return N1; + return getAllOnesConstant(DL, VT); } } @@ -4739,10 +4885,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, - SDValue N1, SDValue N2, SDValue N3) { + SDValue N1, SDValue N2, SDValue N3, + const SDNodeFlags Flags) { // Perform various simplifications. switch (Opcode) { case ISD::FMA: { + assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); + assert(N1.getValueType() == VT && N2.getValueType() == VT && + N3.getValueType() == VT && "FMA types must match!"); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2); ConstantFPSDNode *N3CFP = dyn_cast<ConstantFPSDNode>(N3); @@ -4833,10 +4983,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { + E->intersectFlagsWith(Flags); return SDValue(E, 0); + } N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + N->setFlags(Flags); createOperands(N, Ops); CSEMap.InsertNode(N, IP); } else { @@ -5107,6 +5260,31 @@ static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { return MF.getFunction().optForSize(); } +static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl, + SmallVector<SDValue, 32> &OutChains, unsigned From, + unsigned To, SmallVector<SDValue, 16> &OutLoadChains, + SmallVector<SDValue, 16> &OutStoreChains) { + assert(OutLoadChains.size() && "Missing loads in memcpy inlining"); + assert(OutStoreChains.size() && "Missing stores in memcpy inlining"); + SmallVector<SDValue, 16> GluedLoadChains; + for (unsigned i = From; i < To; ++i) { + OutChains.push_back(OutLoadChains[i]); + GluedLoadChains.push_back(OutLoadChains[i]); + } + + // Chain for all loads. + SDValue LoadToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + GluedLoadChains); + + for (unsigned i = From; i < To; ++i) { + StoreSDNode *ST = dyn_cast<StoreSDNode>(OutStoreChains[i]); + SDValue NewStore = DAG.getTruncStore(LoadToken, dl, ST->getValue(), + ST->getBasePtr(), ST->getMemoryVT(), + ST->getMemOperand()); + OutChains.push_back(NewStore); + } +} + static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, unsigned Align, @@ -5171,7 +5349,9 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, MachineMemOperand::Flags MMOFlags = isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; - SmallVector<SDValue, 8> OutChains; + SmallVector<SDValue, 16> OutLoadChains; + SmallVector<SDValue, 16> OutStoreChains; + SmallVector<SDValue, 32> OutChains; unsigned NumMemOps = MemOps.size(); uint64_t SrcOff = 0, DstOff = 0; for (unsigned i = 0; i != NumMemOps; ++i) { @@ -5205,11 +5385,13 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, SubSlice.Length = VTSize; } Value = getMemsetStringVal(VT, dl, DAG, TLI, SubSlice); - if (Value.getNode()) + if (Value.getNode()) { Store = DAG.getStore(Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), DstPtrInfo.getWithOffset(DstOff), Align, MMOFlags); + OutChains.push_back(Store); + } } if (!Store.getNode()) { @@ -5231,17 +5413,61 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, DAG.getMemBasePlusOffset(Src, SrcOff, dl), SrcPtrInfo.getWithOffset(SrcOff), VT, MinAlign(SrcAlign, SrcOff), SrcMMOFlags); - OutChains.push_back(Value.getValue(1)); + OutLoadChains.push_back(Value.getValue(1)); + Store = DAG.getTruncStore( Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), DstPtrInfo.getWithOffset(DstOff), VT, Align, MMOFlags); + OutStoreChains.push_back(Store); } - OutChains.push_back(Store); SrcOff += VTSize; DstOff += VTSize; Size -= VTSize; } + unsigned GluedLdStLimit = MaxLdStGlue == 0 ? + TLI.getMaxGluedStoresPerMemcpy() : MaxLdStGlue; + unsigned NumLdStInMemcpy = OutStoreChains.size(); + + if (NumLdStInMemcpy) { + // It may be that memcpy might be converted to memset if it's memcpy + // of constants. In such a case, we won't have loads and stores, but + // just stores. In the absence of loads, there is nothing to gang up. + if ((GluedLdStLimit <= 1) || !EnableMemCpyDAGOpt) { + // If target does not care, just leave as it. + for (unsigned i = 0; i < NumLdStInMemcpy; ++i) { + OutChains.push_back(OutLoadChains[i]); + OutChains.push_back(OutStoreChains[i]); + } + } else { + // Ld/St less than/equal limit set by target. + if (NumLdStInMemcpy <= GluedLdStLimit) { + chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, 0, + NumLdStInMemcpy, OutLoadChains, + OutStoreChains); + } else { + unsigned NumberLdChain = NumLdStInMemcpy / GluedLdStLimit; + unsigned RemainingLdStInMemcpy = NumLdStInMemcpy % GluedLdStLimit; + unsigned GlueIter = 0; + + for (unsigned cnt = 0; cnt < NumberLdChain; ++cnt) { + unsigned IndexFrom = NumLdStInMemcpy - GlueIter - GluedLdStLimit; + unsigned IndexTo = NumLdStInMemcpy - GlueIter; + + chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, IndexFrom, IndexTo, + OutLoadChains, OutStoreChains); + GlueIter += GluedLdStLimit; + } + + // Residual ld/st. + if (RemainingLdStInMemcpy) { + chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, 0, + RemainingLdStInMemcpy, OutLoadChains, + OutStoreChains); + } + } + } + } return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } @@ -5334,7 +5560,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } -/// \brief Lower the call to 'memset' intrinsic function into a series of store +/// Lower the call to 'memset' intrinsic function into a series of store /// operations. /// /// \param DAG Selection DAG where lowered code is placed. @@ -5518,6 +5744,47 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, return CallResult.second; } +SDValue SelectionDAG::getAtomicMemcpy(SDValue Chain, const SDLoc &dl, + SDValue Dst, unsigned DstAlign, + SDValue Src, unsigned SrcAlign, + SDValue Size, Type *SizeTy, + unsigned ElemSz, bool isTailCall, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) { + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = getDataLayout().getIntPtrType(*getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + + Entry.Node = Src; + Args.push_back(Entry); + + Entry.Ty = SizeTy; + Entry.Node = Size; + Args.push_back(Entry); + + RTLIB::Libcall LibraryCall = + RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(ElemSz); + if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) + report_fatal_error("Unsupported element size"); + + TargetLowering::CallLoweringInfo CLI(*this); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setLibCallee(TLI->getLibcallCallingConv(LibraryCall), + Type::getVoidTy(*getContext()), + getExternalSymbol(TLI->getLibcallName(LibraryCall), + TLI->getPointerTy(getDataLayout())), + std::move(Args)) + .setDiscardResult() + .setTailCall(isTailCall); + + std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI); + return CallResult.second; +} + SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool isTailCall, @@ -5579,6 +5846,47 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, return CallResult.second; } +SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl, + SDValue Dst, unsigned DstAlign, + SDValue Src, unsigned SrcAlign, + SDValue Size, Type *SizeTy, + unsigned ElemSz, bool isTailCall, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) { + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = getDataLayout().getIntPtrType(*getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + + Entry.Node = Src; + Args.push_back(Entry); + + Entry.Ty = SizeTy; + Entry.Node = Size; + Args.push_back(Entry); + + RTLIB::Libcall LibraryCall = + RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(ElemSz); + if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) + report_fatal_error("Unsupported element size"); + + TargetLowering::CallLoweringInfo CLI(*this); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setLibCallee(TLI->getLibcallCallingConv(LibraryCall), + Type::getVoidTy(*getContext()), + getExternalSymbol(TLI->getLibcallName(LibraryCall), + TLI->getPointerTy(getDataLayout())), + std::move(Args)) + .setDiscardResult() + .setTailCall(isTailCall); + + std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI); + return CallResult.second; +} + SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool isTailCall, @@ -5641,6 +5949,46 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, return CallResult.second; } +SDValue SelectionDAG::getAtomicMemset(SDValue Chain, const SDLoc &dl, + SDValue Dst, unsigned DstAlign, + SDValue Value, SDValue Size, Type *SizeTy, + unsigned ElemSz, bool isTailCall, + MachinePointerInfo DstPtrInfo) { + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = getDataLayout().getIntPtrType(*getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + + Entry.Ty = Type::getInt8Ty(*getContext()); + Entry.Node = Value; + Args.push_back(Entry); + + Entry.Ty = SizeTy; + Entry.Node = Size; + Args.push_back(Entry); + + RTLIB::Libcall LibraryCall = + RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElemSz); + if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) + report_fatal_error("Unsupported element size"); + + TargetLowering::CallLoweringInfo CLI(*this); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setLibCallee(TLI->getLibcallCallingConv(LibraryCall), + Type::getVoidTy(*getContext()), + getExternalSymbol(TLI->getLibcallName(LibraryCall), + TLI->getPointerTy(getDataLayout())), + std::move(Args)) + .setDiscardResult() + .setTailCall(isTailCall); + + std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI); + return CallResult.second; +} + SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTList, ArrayRef<SDValue> Ops, MachineMemOperand *MMO) { @@ -5736,6 +6084,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, assert((Opcode == ISD::ATOMIC_LOAD_ADD || Opcode == ISD::ATOMIC_LOAD_SUB || Opcode == ISD::ATOMIC_LOAD_AND || + Opcode == ISD::ATOMIC_LOAD_CLR || Opcode == ISD::ATOMIC_LOAD_OR || Opcode == ISD::ATOMIC_LOAD_XOR || Opcode == ISD::ATOMIC_LOAD_NAND || @@ -6207,7 +6556,7 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef<SDValue> Ops, MachineMemOperand *MMO) { - assert(Ops.size() == 5 && "Incompatible number of operands"); + assert(Ops.size() == 6 && "Incompatible number of operands"); FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops); @@ -6233,6 +6582,9 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, assert(N->getIndex().getValueType().getVectorNumElements() == N->getValueType(0).getVectorNumElements() && "Vector width mismatch between index and data"); + assert(isa<ConstantSDNode>(N->getScale()) && + cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() && + "Scale should be a constant power of 2"); CSEMap.InsertNode(N, IP); InsertNode(N); @@ -6244,7 +6596,7 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef<SDValue> Ops, MachineMemOperand *MMO) { - assert(Ops.size() == 5 && "Incompatible number of operands"); + assert(Ops.size() == 6 && "Incompatible number of operands"); FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops); @@ -6267,6 +6619,9 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, assert(N->getIndex().getValueType().getVectorNumElements() == N->getValue().getValueType().getVectorNumElements() && "Vector width mismatch between index and data"); + assert(isa<ConstantSDNode>(N->getScale()) && + cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() && + "Scale should be a constant power of 2"); CSEMap.InsertNode(N, IP); InsertNode(N); @@ -6558,6 +6913,7 @@ SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) { // Now we update the operands. N->OperandList[0].set(Op); + updateDivergence(N); // If this gets put into a CSE map, add it. if (InsertPos) CSEMap.InsertNode(N, InsertPos); return N; @@ -6586,6 +6942,7 @@ SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) { if (N->OperandList[1] != Op2) N->OperandList[1].set(Op2); + updateDivergence(N); // If this gets put into a CSE map, add it. if (InsertPos) CSEMap.InsertNode(N, InsertPos); return N; @@ -6636,6 +6993,7 @@ UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) { if (N->OperandList[i] != Ops[i]) N->OperandList[i].set(Ops[i]); + updateDivergence(N); // If this gets put into a CSE map, add it. if (InsertPos) CSEMap.InsertNode(N, InsertPos); return N; @@ -7061,11 +7419,24 @@ SDDbgValue *SelectionDAG::getConstantDbgValue(DIVariable *Var, /// FrameIndex SDDbgValue *SelectionDAG::getFrameIndexDbgValue(DIVariable *Var, DIExpression *Expr, unsigned FI, + bool IsIndirect, const DebugLoc &DL, unsigned O) { assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); - return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, FI, DL, O); + return new (DbgInfo->getAlloc()) + SDDbgValue(Var, Expr, FI, IsIndirect, DL, O, SDDbgValue::FRAMEIX); +} + +/// VReg +SDDbgValue *SelectionDAG::getVRegDbgValue(DIVariable *Var, + DIExpression *Expr, + unsigned VReg, bool IsIndirect, + const DebugLoc &DL, unsigned O) { + assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + return new (DbgInfo->getAlloc()) + SDDbgValue(Var, Expr, VReg, IsIndirect, DL, O, SDDbgValue::VREG); } void SelectionDAG::transferDbgValues(SDValue From, SDValue To, @@ -7155,8 +7526,9 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) { DV->isIndirect(), DV->getDebugLoc(), DV->getOrder()); ClonedDVs.push_back(Clone); DV->setIsInvalidated(); - DEBUG(dbgs() << "SALVAGE: Rewriting"; N0.getNode()->dumprFull(this); - dbgs() << " into " << *DIExpr << '\n'); + LLVM_DEBUG(dbgs() << "SALVAGE: Rewriting"; + N0.getNode()->dumprFull(this); + dbgs() << " into " << *DIExpr << '\n'); } } } @@ -7165,6 +7537,14 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) { AddDbgValue(Dbg, Dbg->getSDNode(), false); } +/// Creates a SDDbgLabel node. +SDDbgLabel *SelectionDAG::getDbgLabel(DILabel *Label, + const DebugLoc &DL, unsigned O) { + assert(cast<DILabel>(Label)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + return new (DbgInfo->getAlloc()) SDDbgLabel(Label, DL, O); +} + namespace { /// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node @@ -7227,8 +7607,9 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) { SDUse &Use = UI.getUse(); ++UI; Use.set(To); + if (To->isDivergent() != From->isDivergent()) + updateDivergence(User); } while (UI != UE && *UI == User); - // Now that we have modified User, add it back to the CSE maps. If it // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User); @@ -7282,6 +7663,8 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) { SDUse &Use = UI.getUse(); ++UI; Use.setNode(To); + if (To->isDivergent() != From->isDivergent()) + updateDivergence(User); } while (UI != UE && *UI == User); // Now that we have modified User, add it back to the CSE maps. If it @@ -7326,8 +7709,9 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) { const SDValue &ToOp = To[Use.getResNo()]; ++UI; Use.set(ToOp); + if (To->getNode()->isDivergent() != From->isDivergent()) + updateDivergence(User); } while (UI != UE && *UI == User); - // Now that we have modified User, add it back to the CSE maps. If it // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User); @@ -7385,8 +7769,9 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){ ++UI; Use.set(To); + if (To->isDivergent() != From->isDivergent()) + updateDivergence(User); } while (UI != UE && *UI == User); - // We are iterating over all uses of the From node, so if a use // doesn't use the specific value, no changes are made. if (!UserRemovedFromCSEMaps) @@ -7419,6 +7804,72 @@ namespace { } // end anonymous namespace +void SelectionDAG::updateDivergence(SDNode * N) +{ + if (TLI->isSDNodeAlwaysUniform(N)) + return; + bool IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA); + for (auto &Op : N->ops()) { + if (Op.Val.getValueType() != MVT::Other) + IsDivergent |= Op.getNode()->isDivergent(); + } + if (N->SDNodeBits.IsDivergent != IsDivergent) { + N->SDNodeBits.IsDivergent = IsDivergent; + for (auto U : N->uses()) { + updateDivergence(U); + } + } +} + + +void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode*>& Order) { + DenseMap<SDNode *, unsigned> Degree; + Order.reserve(AllNodes.size()); + for (auto & N : allnodes()) { + unsigned NOps = N.getNumOperands(); + Degree[&N] = NOps; + if (0 == NOps) + Order.push_back(&N); + } + for (std::vector<SDNode *>::iterator I = Order.begin(); + I!=Order.end();++I) { + SDNode * N = *I; + for (auto U : N->uses()) { + unsigned &UnsortedOps = Degree[U]; + if (0 == --UnsortedOps) + Order.push_back(U); + } + } +} + +void SelectionDAG::VerifyDAGDiverence() +{ + std::vector<SDNode*> TopoOrder; + CreateTopologicalOrder(TopoOrder); + const TargetLowering &TLI = getTargetLoweringInfo(); + DenseMap<const SDNode *, bool> DivergenceMap; + for (auto &N : allnodes()) { + DivergenceMap[&N] = false; + } + for (auto N : TopoOrder) { + bool IsDivergent = DivergenceMap[N]; + bool IsSDNodeDivergent = TLI.isSDNodeSourceOfDivergence(N, FLI, DA); + for (auto &Op : N->ops()) { + if (Op.Val.getValueType() != MVT::Other) + IsSDNodeDivergent |= DivergenceMap[Op.getNode()]; + } + if (!IsDivergent && IsSDNodeDivergent && !TLI.isSDNodeAlwaysUniform(N)) { + DivergenceMap[N] = true; + } + } + for (auto &N : allnodes()) { + (void)N; + assert(DivergenceMap[&N] == N.isDivergent() && + "Divergence bit inconsistency detected\n"); + } +} + + /// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving /// uses of other values produced by From.getNode() alone. The same value /// may appear in both the From and To list. The Deleted vector is @@ -7450,7 +7901,7 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From, } // Sort the uses, so that all the uses from a given User are together. - std::sort(Uses.begin(), Uses.end()); + llvm::sort(Uses.begin(), Uses.end()); for (unsigned UseIndex = 0, UseIndexEnd = Uses.size(); UseIndex != UseIndexEnd; ) { @@ -7579,6 +8030,10 @@ void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) { DbgInfo->add(DB, SD, isParameter); } +void SelectionDAG::AddDbgLabel(SDDbgLabel *DB) { + DbgInfo->add(DB); +} + SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp) { assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node"); @@ -7947,11 +8402,8 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, if (VT.getSizeInBits() / 8 != Bytes) return false; - SDValue Loc = LD->getOperand(1); - SDValue BaseLoc = Base->getOperand(1); - - auto BaseLocDecomp = BaseIndexOffset::match(BaseLoc, *this); - auto LocDecomp = BaseIndexOffset::match(Loc, *this); + auto BaseLocDecomp = BaseIndexOffset::match(Base, *this); + auto LocDecomp = BaseIndexOffset::match(LD, *this); int64_t Offset = 0; if (BaseLocDecomp.equalBaseIndex(LocDecomp, *this, Offset)) @@ -7966,8 +8418,8 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { const GlobalValue *GV; int64_t GVOffset = 0; if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { - unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType()); - KnownBits Known(PtrWidth); + unsigned IdxWidth = getDataLayout().getIndexTypeSizeInBits(GV->getType()); + KnownBits Known(IdxWidth); llvm::computeKnownBits(GV, Known, getDataLayout()); unsigned AlignBits = Known.countMinTrailingZeros(); unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; @@ -8201,7 +8653,7 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) { return true; } -// \brief Returns the SDNode if it is a constant integer BuildVector +// Returns the SDNode if it is a constant integer BuildVector // or constant integer. SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) { if (isa<ConstantSDNode>(N)) @@ -8227,6 +8679,26 @@ SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) { return nullptr; } +void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) { + assert(!Node->OperandList && "Node already has operands"); + SDUse *Ops = OperandRecycler.allocate( + ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator); + + bool IsDivergent = false; + for (unsigned I = 0; I != Vals.size(); ++I) { + Ops[I].setUser(Node); + Ops[I].setInitial(Vals[I]); + if (Ops[I].Val.getValueType() != MVT::Other) // Skip Chain. It does not carry divergence. + IsDivergent = IsDivergent || Ops[I].getNode()->isDivergent(); + } + Node->NumOperands = Vals.size(); + Node->OperandList = Ops; + IsDivergent |= TLI->isSDNodeSourceOfDivergence(Node, FLI, DA); + if (!TLI->isSDNodeAlwaysUniform(Node)) + Node->SDNodeBits.IsDivergent = IsDivergent; + checkForCycles(Node); +} + #ifndef NDEBUG static void checkForCyclesHelper(const SDNode *N, SmallPtrSetImpl<const SDNode*> &Visited, |