aboutsummaryrefslogtreecommitdiff
path: root/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen/SelectionDAG/SelectionDAG.cpp')
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp844
1 files changed, 658 insertions, 186 deletions
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 4c8b63d2f239..48e03c6da68f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -32,7 +32,6 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -58,6 +57,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
+#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Mutex.h"
@@ -89,11 +89,16 @@ void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {}
#define DEBUG_TYPE "selectiondag"
+static cl::opt<bool> EnableMemCpyDAGOpt("enable-memcpy-dag-opt",
+ cl::Hidden, cl::init(true),
+ cl::desc("Gang up loads and stores generated by inlining of memcpy"));
+
+static cl::opt<int> MaxLdStGlue("ldstmemcpy-glue-max",
+ cl::desc("Number limit for gluing ld/st of memcpy."),
+ cl::Hidden, cl::init(0));
+
static void NewSDValueDbgMsg(SDValue V, StringRef Msg, SelectionDAG *G) {
- DEBUG(
- dbgs() << Msg;
- V.getNode()->dump(G);
- );
+ LLVM_DEBUG(dbgs() << Msg; V.getNode()->dump(G););
}
//===----------------------------------------------------------------------===//
@@ -263,6 +268,52 @@ bool ISD::allOperandsUndef(const SDNode *N) {
return true;
}
+bool ISD::matchUnaryPredicate(SDValue Op,
+ std::function<bool(ConstantSDNode *)> Match) {
+ if (auto *Cst = dyn_cast<ConstantSDNode>(Op))
+ return Match(Cst);
+
+ if (ISD::BUILD_VECTOR != Op.getOpcode())
+ return false;
+
+ EVT SVT = Op.getValueType().getScalarType();
+ for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+ auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i));
+ if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst))
+ return false;
+ }
+ return true;
+}
+
+bool ISD::matchBinaryPredicate(
+ SDValue LHS, SDValue RHS,
+ std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match) {
+ if (LHS.getValueType() != RHS.getValueType())
+ return false;
+
+ if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS))
+ if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS))
+ return Match(LHSCst, RHSCst);
+
+ if (ISD::BUILD_VECTOR != LHS.getOpcode() ||
+ ISD::BUILD_VECTOR != RHS.getOpcode())
+ return false;
+
+ EVT SVT = LHS.getValueType().getScalarType();
+ for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
+ auto *LHSCst = dyn_cast<ConstantSDNode>(LHS.getOperand(i));
+ auto *RHSCst = dyn_cast<ConstantSDNode>(RHS.getOperand(i));
+ if (!LHSCst || !RHSCst)
+ return false;
+ if (LHSCst->getValueType(0) != SVT ||
+ LHSCst->getValueType(0) != RHSCst->getValueType(0))
+ return false;
+ if (!Match(LHSCst, RHSCst))
+ return false;
+ }
+ return true;
+}
+
ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) {
switch (ExtType) {
case ISD::EXTLOAD:
@@ -487,12 +538,41 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(ST->getPointerInfo().getAddrSpace());
break;
}
+ case ISD::MLOAD: {
+ const MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
+ ID.AddInteger(MLD->getMemoryVT().getRawBits());
+ ID.AddInteger(MLD->getRawSubclassData());
+ ID.AddInteger(MLD->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::MSTORE: {
+ const MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
+ ID.AddInteger(MST->getMemoryVT().getRawBits());
+ ID.AddInteger(MST->getRawSubclassData());
+ ID.AddInteger(MST->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::MGATHER: {
+ const MaskedGatherSDNode *MG = cast<MaskedGatherSDNode>(N);
+ ID.AddInteger(MG->getMemoryVT().getRawBits());
+ ID.AddInteger(MG->getRawSubclassData());
+ ID.AddInteger(MG->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::MSCATTER: {
+ const MaskedScatterSDNode *MS = cast<MaskedScatterSDNode>(N);
+ ID.AddInteger(MS->getMemoryVT().getRawBits());
+ ID.AddInteger(MS->getRawSubclassData());
+ ID.AddInteger(MS->getPointerInfo().getAddrSpace());
+ break;
+ }
case ISD::ATOMIC_CMP_SWAP:
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
case ISD::ATOMIC_SWAP:
case ISD::ATOMIC_LOAD_ADD:
case ISD::ATOMIC_LOAD_SUB:
case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_CLR:
case ISD::ATOMIC_LOAD_OR:
case ISD::ATOMIC_LOAD_XOR:
case ISD::ATOMIC_LOAD_NAND:
@@ -726,7 +806,7 @@ static void VerifySDNode(SDNode *N) {
}
#endif // NDEBUG
-/// \brief Insert a newly allocated node into the DAG.
+/// Insert a newly allocated node into the DAG.
///
/// Handles insertion into the all nodes list and CSE map, as well as
/// verification and other common operations when a new node is allocated.
@@ -903,13 +983,16 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
void SelectionDAG::init(MachineFunction &NewMF,
OptimizationRemarkEmitter &NewORE,
- Pass *PassPtr) {
+ Pass *PassPtr, const TargetLibraryInfo *LibraryInfo,
+ DivergenceAnalysis * Divergence) {
MF = &NewMF;
SDAGISelPass = PassPtr;
ORE = &NewORE;
TLI = getSubtarget().getTargetLowering();
TSI = getSubtarget().getSelectionDAGInfo();
+ LibInfo = LibraryInfo;
Context = &MF->getFunction().getContext();
+ DA = Divergence;
}
SelectionDAG::~SelectionDAG() {
@@ -1077,21 +1160,25 @@ SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) {
}
SDValue SelectionDAG::getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT) {
- EVT EltVT = VT.getScalarType();
- SDValue TrueValue;
- switch (TLI->getBooleanContents(VT)) {
- case TargetLowering::ZeroOrOneBooleanContent:
- case TargetLowering::UndefinedBooleanContent:
- TrueValue = getConstant(1, DL, VT);
- break;
- case TargetLowering::ZeroOrNegativeOneBooleanContent:
- TrueValue = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL,
- VT);
- break;
- }
+ SDValue TrueValue = getBoolConstant(true, DL, VT, VT);
return getNode(ISD::XOR, DL, VT, Val, TrueValue);
}
+SDValue SelectionDAG::getBoolConstant(bool V, const SDLoc &DL, EVT VT,
+ EVT OpVT) {
+ if (!V)
+ return getConstant(0, DL, VT);
+
+ switch (TLI->getBooleanContents(OpVT)) {
+ case TargetLowering::ZeroOrOneBooleanContent:
+ case TargetLowering::UndefinedBooleanContent:
+ return getConstant(1, DL, VT);
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ return getAllOnesConstant(DL, VT);
+ }
+ llvm_unreachable("Unexpected boolean content enum!");
+}
+
SDValue SelectionDAG::getConstant(uint64_t Val, const SDLoc &DL, EVT VT,
bool isT, bool isO) {
EVT EltVT = VT.getScalarType();
@@ -1184,7 +1271,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
return SDValue(N, 0);
if (!N) {
- N = newSDNode<ConstantSDNode>(isT, isO, Elt, DL.getDebugLoc(), EltVT);
+ N = newSDNode<ConstantSDNode>(isT, isO, Elt, EltVT);
CSEMap.InsertNode(N, IP);
InsertNode(N);
NewSDValueDbgMsg(SDValue(N, 0), "Creating constant: ", this);
@@ -1227,7 +1314,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL,
return SDValue(N, 0);
if (!N) {
- N = newSDNode<ConstantFPSDNode>(isTarget, &V, DL.getDebugLoc(), EltVT);
+ N = newSDNode<ConstantFPSDNode>(isTarget, &V, EltVT);
CSEMap.InsertNode(N, IP);
InsertNode(N);
}
@@ -1503,33 +1590,35 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
if (N1.isUndef())
commuteShuffle(N1, N2, MaskVec);
- // If shuffling a splat, try to blend the splat instead. We do this here so
- // that even when this arises during lowering we don't have to re-handle it.
- auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) {
- BitVector UndefElements;
- SDValue Splat = BV->getSplatValue(&UndefElements);
- if (!Splat)
- return;
+ if (TLI->hasVectorBlend()) {
+ // If shuffling a splat, try to blend the splat instead. We do this here so
+ // that even when this arises during lowering we don't have to re-handle it.
+ auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) {
+ BitVector UndefElements;
+ SDValue Splat = BV->getSplatValue(&UndefElements);
+ if (!Splat)
+ return;
- for (int i = 0; i < NElts; ++i) {
- if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + NElts))
- continue;
+ for (int i = 0; i < NElts; ++i) {
+ if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + NElts))
+ continue;
- // If this input comes from undef, mark it as such.
- if (UndefElements[MaskVec[i] - Offset]) {
- MaskVec[i] = -1;
- continue;
- }
+ // If this input comes from undef, mark it as such.
+ if (UndefElements[MaskVec[i] - Offset]) {
+ MaskVec[i] = -1;
+ continue;
+ }
- // If we can blend a non-undef lane, use that instead.
- if (!UndefElements[i])
- MaskVec[i] = i + Offset;
- }
- };
- if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1))
- BlendSplat(N1BV, 0);
- if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2))
- BlendSplat(N2BV, NElts);
+ // If we can blend a non-undef lane, use that instead.
+ if (!UndefElements[i])
+ MaskVec[i] = i + Offset;
+ }
+ };
+ if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1))
+ BlendSplat(N1BV, 0);
+ if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2))
+ BlendSplat(N2BV, NElts);
+ }
// Canonicalize all index into lhs, -> shuffle lhs, undef
// Canonicalize all index into rhs, -> shuffle rhs, undef
@@ -1643,7 +1732,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
}
SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) {
- MVT VT = SV.getSimpleValueType(0);
+ EVT VT = SV.getValueType(0);
SmallVector<int, 8> MaskVec(SV.getMask().begin(), SV.getMask().end());
ShuffleVectorSDNode::commuteMask(MaskVec);
@@ -1661,6 +1750,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
return SDValue(E, 0);
auto *N = newSDNode<RegisterSDNode>(RegNo, VT);
+ N->SDNodeBits.IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
@@ -1870,19 +1960,15 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
ISD::CondCode Cond, const SDLoc &dl) {
+ EVT OpVT = N1.getValueType();
+
// These setcc operations always fold.
switch (Cond) {
default: break;
case ISD::SETFALSE:
- case ISD::SETFALSE2: return getConstant(0, dl, VT);
+ case ISD::SETFALSE2: return getBoolConstant(false, dl, VT, OpVT);
case ISD::SETTRUE:
- case ISD::SETTRUE2: {
- TargetLowering::BooleanContent Cnt =
- TLI->getBooleanContents(N1->getValueType(0));
- return getConstant(
- Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, dl,
- VT);
- }
+ case ISD::SETTRUE2: return getBoolConstant(true, dl, VT, OpVT);
case ISD::SETOEQ:
case ISD::SETOGT:
@@ -1905,16 +1991,16 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
switch (Cond) {
default: llvm_unreachable("Unknown integer setcc!");
- case ISD::SETEQ: return getConstant(C1 == C2, dl, VT);
- case ISD::SETNE: return getConstant(C1 != C2, dl, VT);
- case ISD::SETULT: return getConstant(C1.ult(C2), dl, VT);
- case ISD::SETUGT: return getConstant(C1.ugt(C2), dl, VT);
- case ISD::SETULE: return getConstant(C1.ule(C2), dl, VT);
- case ISD::SETUGE: return getConstant(C1.uge(C2), dl, VT);
- case ISD::SETLT: return getConstant(C1.slt(C2), dl, VT);
- case ISD::SETGT: return getConstant(C1.sgt(C2), dl, VT);
- case ISD::SETLE: return getConstant(C1.sle(C2), dl, VT);
- case ISD::SETGE: return getConstant(C1.sge(C2), dl, VT);
+ case ISD::SETEQ: return getBoolConstant(C1 == C2, dl, VT, OpVT);
+ case ISD::SETNE: return getBoolConstant(C1 != C2, dl, VT, OpVT);
+ case ISD::SETULT: return getBoolConstant(C1.ult(C2), dl, VT, OpVT);
+ case ISD::SETUGT: return getBoolConstant(C1.ugt(C2), dl, VT, OpVT);
+ case ISD::SETULE: return getBoolConstant(C1.ule(C2), dl, VT, OpVT);
+ case ISD::SETUGE: return getBoolConstant(C1.uge(C2), dl, VT, OpVT);
+ case ISD::SETLT: return getBoolConstant(C1.slt(C2), dl, VT, OpVT);
+ case ISD::SETGT: return getBoolConstant(C1.sgt(C2), dl, VT, OpVT);
+ case ISD::SETLE: return getBoolConstant(C1.sle(C2), dl, VT, OpVT);
+ case ISD::SETGE: return getBoolConstant(C1.sge(C2), dl, VT, OpVT);
}
}
}
@@ -1926,41 +2012,54 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
case ISD::SETEQ: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
LLVM_FALLTHROUGH;
- case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, dl, VT);
+ case ISD::SETOEQ: return getBoolConstant(R==APFloat::cmpEqual, dl, VT,
+ OpVT);
case ISD::SETNE: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
LLVM_FALLTHROUGH;
- case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan ||
- R==APFloat::cmpLessThan, dl, VT);
+ case ISD::SETONE: return getBoolConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpLessThan, dl, VT,
+ OpVT);
case ISD::SETLT: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
LLVM_FALLTHROUGH;
- case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, dl, VT);
+ case ISD::SETOLT: return getBoolConstant(R==APFloat::cmpLessThan, dl, VT,
+ OpVT);
case ISD::SETGT: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
LLVM_FALLTHROUGH;
- case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, dl, VT);
+ case ISD::SETOGT: return getBoolConstant(R==APFloat::cmpGreaterThan, dl,
+ VT, OpVT);
case ISD::SETLE: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
LLVM_FALLTHROUGH;
- case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan ||
- R==APFloat::cmpEqual, dl, VT);
+ case ISD::SETOLE: return getBoolConstant(R==APFloat::cmpLessThan ||
+ R==APFloat::cmpEqual, dl, VT,
+ OpVT);
case ISD::SETGE: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
LLVM_FALLTHROUGH;
- case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan ||
- R==APFloat::cmpEqual, dl, VT);
- case ISD::SETO: return getConstant(R!=APFloat::cmpUnordered, dl, VT);
- case ISD::SETUO: return getConstant(R==APFloat::cmpUnordered, dl, VT);
- case ISD::SETUEQ: return getConstant(R==APFloat::cmpUnordered ||
- R==APFloat::cmpEqual, dl, VT);
- case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, dl, VT);
- case ISD::SETULT: return getConstant(R==APFloat::cmpUnordered ||
- R==APFloat::cmpLessThan, dl, VT);
- case ISD::SETUGT: return getConstant(R==APFloat::cmpGreaterThan ||
- R==APFloat::cmpUnordered, dl, VT);
- case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, dl, VT);
- case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, dl, VT);
+ case ISD::SETOGE: return getBoolConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpEqual, dl, VT, OpVT);
+ case ISD::SETO: return getBoolConstant(R!=APFloat::cmpUnordered, dl, VT,
+ OpVT);
+ case ISD::SETUO: return getBoolConstant(R==APFloat::cmpUnordered, dl, VT,
+ OpVT);
+ case ISD::SETUEQ: return getBoolConstant(R==APFloat::cmpUnordered ||
+ R==APFloat::cmpEqual, dl, VT,
+ OpVT);
+ case ISD::SETUNE: return getBoolConstant(R!=APFloat::cmpEqual, dl, VT,
+ OpVT);
+ case ISD::SETULT: return getBoolConstant(R==APFloat::cmpUnordered ||
+ R==APFloat::cmpLessThan, dl, VT,
+ OpVT);
+ case ISD::SETUGT: return getBoolConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpUnordered, dl, VT,
+ OpVT);
+ case ISD::SETULE: return getBoolConstant(R!=APFloat::cmpGreaterThan, dl,
+ VT, OpVT);
+ case ISD::SETUGE: return getBoolConstant(R!=APFloat::cmpLessThan, dl, VT,
+ OpVT);
}
} else {
// Ensure that the constant occurs on the RHS.
@@ -2297,10 +2396,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
break;
}
- // Support big-endian targets when it becomes useful.
bool IsLE = getDataLayout().isLittleEndian();
- if (!IsLE)
- break;
// Bitcast 'small element' vector to 'large element' scalar/vector.
if ((BitWidth % SubBitWidth) == 0) {
@@ -2319,8 +2415,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
for (unsigned i = 0; i != SubScale; ++i) {
computeKnownBits(N0, Known2, SubDemandedElts.shl(i),
Depth + 1);
- Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * i);
- Known.Zero |= Known2.Zero.zext(BitWidth).shl(SubBitWidth * i);
+ unsigned Shifts = IsLE ? i : SubScale - 1 - i;
+ Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * Shifts);
+ Known.Zero |= Known2.Zero.zext(BitWidth).shl(SubBitWidth * Shifts);
}
}
@@ -2342,7 +2439,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
Known.Zero.setAllBits(); Known.One.setAllBits();
for (unsigned i = 0; i != NumElts; ++i)
if (DemandedElts[i]) {
- unsigned Offset = (i % SubScale) * BitWidth;
+ unsigned Shifts = IsLE ? i : NumElts - 1 - i;
+ unsigned Offset = (Shifts % SubScale) * BitWidth;
Known.One &= Known2.One.lshr(Offset).trunc(BitWidth);
Known.Zero &= Known2.Zero.lshr(Offset).trunc(BitWidth);
// If we don't know any bits, early out.
@@ -2441,6 +2539,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
break;
case ISD::SMULO:
case ISD::UMULO:
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
if (Op.getResNo() != 1)
break;
// The boolean result conforms to getBooleanContents.
@@ -2904,11 +3003,38 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
}
case ISD::SMIN:
case ISD::SMAX: {
- computeKnownBits(Op.getOperand(0), Known, DemandedElts,
- Depth + 1);
- // If we don't know any bits, early out.
- if (Known.isUnknown())
- break;
+ // If we have a clamp pattern, we know that the number of sign bits will be
+ // the minimum of the clamp min/max range.
+ bool IsMax = (Opcode == ISD::SMAX);
+ ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr;
+ if ((CstLow = isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)))
+ if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX))
+ CstHigh = isConstOrDemandedConstSplat(Op.getOperand(0).getOperand(1),
+ DemandedElts);
+ if (CstLow && CstHigh) {
+ if (!IsMax)
+ std::swap(CstLow, CstHigh);
+
+ const APInt &ValueLow = CstLow->getAPIntValue();
+ const APInt &ValueHigh = CstHigh->getAPIntValue();
+ if (ValueLow.sle(ValueHigh)) {
+ unsigned LowSignBits = ValueLow.getNumSignBits();
+ unsigned HighSignBits = ValueHigh.getNumSignBits();
+ unsigned MinSignBits = std::min(LowSignBits, HighSignBits);
+ if (ValueLow.isNegative() && ValueHigh.isNegative()) {
+ Known.One.setHighBits(MinSignBits);
+ break;
+ }
+ if (ValueLow.isNonNegative() && ValueHigh.isNonNegative()) {
+ Known.Zero.setHighBits(MinSignBits);
+ break;
+ }
+ }
+ }
+
+ // Fallback - just get the shared known bits of the operands.
+ computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+ if (Known.isUnknown()) break; // Early-out
computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
Known.Zero &= Known2.Zero;
Known.One &= Known2.One;
@@ -3038,7 +3164,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
if (!DemandedElts)
return 1; // No demanded elts, better to assume we don't know anything.
- switch (Op.getOpcode()) {
+ unsigned Opcode = Op.getOpcode();
+ switch (Opcode) {
default: break;
case ISD::AssertSext:
Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
@@ -3189,7 +3316,32 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return std::min(Tmp, Tmp2);
case ISD::SMIN:
- case ISD::SMAX:
+ case ISD::SMAX: {
+ // If we have a clamp pattern, we know that the number of sign bits will be
+ // the minimum of the clamp min/max range.
+ bool IsMax = (Opcode == ISD::SMAX);
+ ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr;
+ if ((CstLow = isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)))
+ if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX))
+ CstHigh = isConstOrDemandedConstSplat(Op.getOperand(0).getOperand(1),
+ DemandedElts);
+ if (CstLow && CstHigh) {
+ if (!IsMax)
+ std::swap(CstLow, CstHigh);
+ if (CstLow->getAPIntValue().sle(CstHigh->getAPIntValue())) {
+ Tmp = CstLow->getAPIntValue().getNumSignBits();
+ Tmp2 = CstHigh->getAPIntValue().getNumSignBits();
+ return std::min(Tmp, Tmp2);
+ }
+ }
+
+ // Fallback - just get the minimum number of sign bits of the operands.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+ if (Tmp == 1)
+ return 1; // Early out.
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth + 1);
+ return std::min(Tmp, Tmp2);
+ }
case ISD::UMIN:
case ISD::UMAX:
Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
@@ -3225,7 +3377,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
unsigned RotAmt = C->getAPIntValue().urem(VTBits);
// Handle rotate right by N like a rotate left by 32-N.
- if (Op.getOpcode() == ISD::ROTR)
+ if (Opcode == ISD::ROTR)
RotAmt = (VTBits - RotAmt) % VTBits;
// If we aren't rotating out all of the known-in sign bits, return the
@@ -3423,10 +3575,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
}
// Allow the target to implement this method for its nodes.
- if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
- Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
- Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
- Op.getOpcode() == ISD::INTRINSIC_VOID) {
+ if (Opcode >= ISD::BUILTIN_OP_END ||
+ Opcode == ISD::INTRINSIC_WO_CHAIN ||
+ Opcode == ISD::INTRINSIC_W_CHAIN ||
+ Opcode == ISD::INTRINSIC_VOID) {
unsigned NumBits =
TLI->ComputeNumSignBitsForTargetNode(Op, DemandedElts, *this, Depth);
if (NumBits > 1)
@@ -3487,17 +3639,33 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
return false;
}
-bool SelectionDAG::isKnownNeverZero(SDValue Op) const {
+bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) const {
+ assert(Op.getValueType().isFloatingPoint() &&
+ "Floating point type expected");
+
// If the value is a constant, we can obviously see if it is a zero or not.
+ // TODO: Add BuildVector support.
if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
return !C->isZero();
+ return false;
+}
+
+bool SelectionDAG::isKnownNeverZero(SDValue Op) const {
+ assert(!Op.getValueType().isFloatingPoint() &&
+ "Floating point types unsupported - use isKnownNeverZeroFloat");
+
+ // If the value is a constant, we can obviously see if it is a zero or not.
+ if (ISD::matchUnaryPredicate(
+ Op, [](ConstantSDNode *C) { return !C->isNullValue(); }))
+ return true;
// TODO: Recognize more cases here.
switch (Op.getOpcode()) {
default: break;
case ISD::OR:
- if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
- return !C->isNullValue();
+ if (isKnownNeverZero(Op.getOperand(1)) ||
+ isKnownNeverZero(Op.getOperand(0)))
+ return true;
break;
}
@@ -3517,6 +3685,8 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
return false;
}
+// FIXME: unify with llvm::haveNoCommonBitsSet.
+// FIXME: could also handle masked merge pattern (X & ~M) op (Y & M)
bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
assert(A.getValueType() == B.getValueType() &&
"Values must have the same type");
@@ -3841,11 +4011,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
else if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
- // (ext (trunx x)) -> x
+ // (ext (trunc x)) -> x
if (OpOpcode == ISD::TRUNCATE) {
SDValue OpOp = Operand.getOperand(0);
- if (OpOp.getValueType() == VT)
+ if (OpOp.getValueType() == VT) {
+ transferDbgValues(Operand, OpOp);
return OpOp;
+ }
}
break;
case ISD::TRUNCATE:
@@ -3921,10 +4093,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
break;
case ISD::FNEG:
// -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
- if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB)
- // FIXME: FNEG has no fast-math-flags to propagate; use the FSUB's flags?
+ if ((getTarget().Options.UnsafeFPMath || Flags.hasNoSignedZeros()) &&
+ OpOpcode == ISD::FSUB)
return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1),
- Operand.getOperand(0), Operand.getNode()->getFlags());
+ Operand.getOperand(0), Flags);
if (OpOpcode == ISD::FNEG) // --X -> X
return Operand.getOperand(0);
break;
@@ -4314,24 +4486,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::FMUL:
case ISD::FDIV:
case ISD::FREM:
- if (getTarget().Options.UnsafeFPMath) {
- if (Opcode == ISD::FADD) {
- // x+0 --> x
- if (N2CFP && N2CFP->getValueAPF().isZero())
- return N1;
- } else if (Opcode == ISD::FSUB) {
- // x-0 --> x
- if (N2CFP && N2CFP->getValueAPF().isZero())
- return N1;
- } else if (Opcode == ISD::FMUL) {
- // x*0 --> 0
- if (N2CFP && N2CFP->isZero())
- return N2;
- // x*1 --> x
- if (N2CFP && N2CFP->isExactlyValue(1.0))
- return N1;
- }
- }
assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");
@@ -4448,12 +4602,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
break;
}
case ISD::EXTRACT_VECTOR_ELT:
+ assert(VT.getSizeInBits() >= N1.getValueType().getScalarSizeInBits() &&
+ "The result of EXTRACT_VECTOR_ELT must be at least as wide as the \
+ element type of the vector.");
+
// EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF.
if (N1.isUndef())
return getUNDEF(VT);
// EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF
- if (N2C && N2C->getZExtValue() >= N1.getValueType().getVectorNumElements())
+ if (N2C && N2C->getAPIntValue().uge(N1.getValueType().getVectorNumElements()))
return getUNDEF(VT);
// EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is
@@ -4635,6 +4793,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
}
+ // Any FP binop with an undef operand is folded to NaN. This matches the
+ // behavior of the IR optimizer.
+ switch (Opcode) {
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ if (N1.isUndef() || N2.isUndef())
+ return getConstantFP(APFloat::getNaN(EVTToAPFloatSemantics(VT)), DL, VT);
+ }
+
// Canonicalize an UNDEF to the RHS, even over a constant.
if (N1.isUndef()) {
if (TLI->isCommutativeBinOp(Opcode)) {
@@ -4644,22 +4814,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::FP_ROUND_INREG:
case ISD::SIGN_EXTEND_INREG:
case ISD::SUB:
- case ISD::FSUB:
- case ISD::FDIV:
- case ISD::FREM:
- case ISD::SRA:
- return N1; // fold op(undef, arg2) -> undef
+ return getUNDEF(VT); // fold op(undef, arg2) -> undef
case ISD::UDIV:
case ISD::SDIV:
case ISD::UREM:
case ISD::SREM:
+ case ISD::SRA:
case ISD::SRL:
case ISD::SHL:
- if (!VT.isVector())
- return getConstant(0, DL, VT); // fold op(undef, arg2) -> 0
- // For vectors, we can't easily build an all zero vector, just return
- // the LHS.
- return N2;
+ return getConstant(0, DL, VT); // fold op(undef, arg2) -> 0
}
}
}
@@ -4681,32 +4844,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::SDIV:
case ISD::UREM:
case ISD::SREM:
- return N2; // fold op(arg1, undef) -> undef
- case ISD::FADD:
- case ISD::FSUB:
- case ISD::FMUL:
- case ISD::FDIV:
- case ISD::FREM:
- if (getTarget().Options.UnsafeFPMath)
- return N2;
- break;
- case ISD::MUL:
- case ISD::AND:
+ case ISD::SRA:
case ISD::SRL:
case ISD::SHL:
- if (!VT.isVector())
- return getConstant(0, DL, VT); // fold op(arg1, undef) -> 0
- // For vectors, we can't easily build an all zero vector, just return
- // the LHS.
- return N1;
+ return getUNDEF(VT); // fold op(arg1, undef) -> undef
+ case ISD::MUL:
+ case ISD::AND:
+ return getConstant(0, DL, VT); // fold op(arg1, undef) -> 0
case ISD::OR:
- if (!VT.isVector())
- return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT);
- // For vectors, we can't easily build an all one vector, just return
- // the LHS.
- return N1;
- case ISD::SRA:
- return N1;
+ return getAllOnesConstant(DL, VT);
}
}
@@ -4739,10 +4885,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
- SDValue N1, SDValue N2, SDValue N3) {
+ SDValue N1, SDValue N2, SDValue N3,
+ const SDNodeFlags Flags) {
// Perform various simplifications.
switch (Opcode) {
case ISD::FMA: {
+ assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
+ assert(N1.getValueType() == VT && N2.getValueType() == VT &&
+ N3.getValueType() == VT && "FMA types must match!");
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
ConstantFPSDNode *N3CFP = dyn_cast<ConstantFPSDNode>(N3);
@@ -4833,10 +4983,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTs, Ops);
void *IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
+ E->intersectFlagsWith(Flags);
return SDValue(E, 0);
+ }
N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ N->setFlags(Flags);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
} else {
@@ -5107,6 +5260,31 @@ static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
return MF.getFunction().optForSize();
}
+static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
+ SmallVector<SDValue, 32> &OutChains, unsigned From,
+ unsigned To, SmallVector<SDValue, 16> &OutLoadChains,
+ SmallVector<SDValue, 16> &OutStoreChains) {
+ assert(OutLoadChains.size() && "Missing loads in memcpy inlining");
+ assert(OutStoreChains.size() && "Missing stores in memcpy inlining");
+ SmallVector<SDValue, 16> GluedLoadChains;
+ for (unsigned i = From; i < To; ++i) {
+ OutChains.push_back(OutLoadChains[i]);
+ GluedLoadChains.push_back(OutLoadChains[i]);
+ }
+
+ // Chain for all loads.
+ SDValue LoadToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ GluedLoadChains);
+
+ for (unsigned i = From; i < To; ++i) {
+ StoreSDNode *ST = dyn_cast<StoreSDNode>(OutStoreChains[i]);
+ SDValue NewStore = DAG.getTruncStore(LoadToken, dl, ST->getValue(),
+ ST->getBasePtr(), ST->getMemoryVT(),
+ ST->getMemOperand());
+ OutChains.push_back(NewStore);
+ }
+}
+
static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
uint64_t Size, unsigned Align,
@@ -5171,7 +5349,9 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
MachineMemOperand::Flags MMOFlags =
isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
- SmallVector<SDValue, 8> OutChains;
+ SmallVector<SDValue, 16> OutLoadChains;
+ SmallVector<SDValue, 16> OutStoreChains;
+ SmallVector<SDValue, 32> OutChains;
unsigned NumMemOps = MemOps.size();
uint64_t SrcOff = 0, DstOff = 0;
for (unsigned i = 0; i != NumMemOps; ++i) {
@@ -5205,11 +5385,13 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
SubSlice.Length = VTSize;
}
Value = getMemsetStringVal(VT, dl, DAG, TLI, SubSlice);
- if (Value.getNode())
+ if (Value.getNode()) {
Store = DAG.getStore(Chain, dl, Value,
DAG.getMemBasePlusOffset(Dst, DstOff, dl),
DstPtrInfo.getWithOffset(DstOff), Align,
MMOFlags);
+ OutChains.push_back(Store);
+ }
}
if (!Store.getNode()) {
@@ -5231,17 +5413,61 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
DAG.getMemBasePlusOffset(Src, SrcOff, dl),
SrcPtrInfo.getWithOffset(SrcOff), VT,
MinAlign(SrcAlign, SrcOff), SrcMMOFlags);
- OutChains.push_back(Value.getValue(1));
+ OutLoadChains.push_back(Value.getValue(1));
+
Store = DAG.getTruncStore(
Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
DstPtrInfo.getWithOffset(DstOff), VT, Align, MMOFlags);
+ OutStoreChains.push_back(Store);
}
- OutChains.push_back(Store);
SrcOff += VTSize;
DstOff += VTSize;
Size -= VTSize;
}
+ unsigned GluedLdStLimit = MaxLdStGlue == 0 ?
+ TLI.getMaxGluedStoresPerMemcpy() : MaxLdStGlue;
+ unsigned NumLdStInMemcpy = OutStoreChains.size();
+
+ if (NumLdStInMemcpy) {
+ // It may be that memcpy might be converted to memset if it's memcpy
+ // of constants. In such a case, we won't have loads and stores, but
+ // just stores. In the absence of loads, there is nothing to gang up.
+ if ((GluedLdStLimit <= 1) || !EnableMemCpyDAGOpt) {
+ // If target does not care, just leave as it.
+ for (unsigned i = 0; i < NumLdStInMemcpy; ++i) {
+ OutChains.push_back(OutLoadChains[i]);
+ OutChains.push_back(OutStoreChains[i]);
+ }
+ } else {
+ // Ld/St less than/equal limit set by target.
+ if (NumLdStInMemcpy <= GluedLdStLimit) {
+ chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, 0,
+ NumLdStInMemcpy, OutLoadChains,
+ OutStoreChains);
+ } else {
+ unsigned NumberLdChain = NumLdStInMemcpy / GluedLdStLimit;
+ unsigned RemainingLdStInMemcpy = NumLdStInMemcpy % GluedLdStLimit;
+ unsigned GlueIter = 0;
+
+ for (unsigned cnt = 0; cnt < NumberLdChain; ++cnt) {
+ unsigned IndexFrom = NumLdStInMemcpy - GlueIter - GluedLdStLimit;
+ unsigned IndexTo = NumLdStInMemcpy - GlueIter;
+
+ chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, IndexFrom, IndexTo,
+ OutLoadChains, OutStoreChains);
+ GlueIter += GluedLdStLimit;
+ }
+
+ // Residual ld/st.
+ if (RemainingLdStInMemcpy) {
+ chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, 0,
+ RemainingLdStInMemcpy, OutLoadChains,
+ OutStoreChains);
+ }
+ }
+ }
+ }
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
}
@@ -5334,7 +5560,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
}
-/// \brief Lower the call to 'memset' intrinsic function into a series of store
+/// Lower the call to 'memset' intrinsic function into a series of store
/// operations.
///
/// \param DAG Selection DAG where lowered code is placed.
@@ -5518,6 +5744,47 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
return CallResult.second;
}
+SDValue SelectionDAG::getAtomicMemcpy(SDValue Chain, const SDLoc &dl,
+ SDValue Dst, unsigned DstAlign,
+ SDValue Src, unsigned SrcAlign,
+ SDValue Size, Type *SizeTy,
+ unsigned ElemSz, bool isTailCall,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = getDataLayout().getIntPtrType(*getContext());
+ Entry.Node = Dst;
+ Args.push_back(Entry);
+
+ Entry.Node = Src;
+ Args.push_back(Entry);
+
+ Entry.Ty = SizeTy;
+ Entry.Node = Size;
+ Args.push_back(Entry);
+
+ RTLIB::Libcall LibraryCall =
+ RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(ElemSz);
+ if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
+ report_fatal_error("Unsupported element size");
+
+ TargetLowering::CallLoweringInfo CLI(*this);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setLibCallee(TLI->getLibcallCallingConv(LibraryCall),
+ Type::getVoidTy(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(LibraryCall),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
+ .setDiscardResult()
+ .setTailCall(isTailCall);
+
+ std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI);
+ return CallResult.second;
+}
+
SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
SDValue Src, SDValue Size, unsigned Align,
bool isVol, bool isTailCall,
@@ -5579,6 +5846,47 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
return CallResult.second;
}
+SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl,
+ SDValue Dst, unsigned DstAlign,
+ SDValue Src, unsigned SrcAlign,
+ SDValue Size, Type *SizeTy,
+ unsigned ElemSz, bool isTailCall,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = getDataLayout().getIntPtrType(*getContext());
+ Entry.Node = Dst;
+ Args.push_back(Entry);
+
+ Entry.Node = Src;
+ Args.push_back(Entry);
+
+ Entry.Ty = SizeTy;
+ Entry.Node = Size;
+ Args.push_back(Entry);
+
+ RTLIB::Libcall LibraryCall =
+ RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(ElemSz);
+ if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
+ report_fatal_error("Unsupported element size");
+
+ TargetLowering::CallLoweringInfo CLI(*this);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setLibCallee(TLI->getLibcallCallingConv(LibraryCall),
+ Type::getVoidTy(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(LibraryCall),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
+ .setDiscardResult()
+ .setTailCall(isTailCall);
+
+ std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI);
+ return CallResult.second;
+}
+
SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
SDValue Src, SDValue Size, unsigned Align,
bool isVol, bool isTailCall,
@@ -5641,6 +5949,46 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
return CallResult.second;
}
+SDValue SelectionDAG::getAtomicMemset(SDValue Chain, const SDLoc &dl,
+ SDValue Dst, unsigned DstAlign,
+ SDValue Value, SDValue Size, Type *SizeTy,
+ unsigned ElemSz, bool isTailCall,
+ MachinePointerInfo DstPtrInfo) {
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = getDataLayout().getIntPtrType(*getContext());
+ Entry.Node = Dst;
+ Args.push_back(Entry);
+
+ Entry.Ty = Type::getInt8Ty(*getContext());
+ Entry.Node = Value;
+ Args.push_back(Entry);
+
+ Entry.Ty = SizeTy;
+ Entry.Node = Size;
+ Args.push_back(Entry);
+
+ RTLIB::Libcall LibraryCall =
+ RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElemSz);
+ if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
+ report_fatal_error("Unsupported element size");
+
+ TargetLowering::CallLoweringInfo CLI(*this);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setLibCallee(TLI->getLibcallCallingConv(LibraryCall),
+ Type::getVoidTy(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(LibraryCall),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
+ .setDiscardResult()
+ .setTailCall(isTailCall);
+
+ std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI);
+ return CallResult.second;
+}
+
SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
SDVTList VTList, ArrayRef<SDValue> Ops,
MachineMemOperand *MMO) {
@@ -5736,6 +6084,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
assert((Opcode == ISD::ATOMIC_LOAD_ADD ||
Opcode == ISD::ATOMIC_LOAD_SUB ||
Opcode == ISD::ATOMIC_LOAD_AND ||
+ Opcode == ISD::ATOMIC_LOAD_CLR ||
Opcode == ISD::ATOMIC_LOAD_OR ||
Opcode == ISD::ATOMIC_LOAD_XOR ||
Opcode == ISD::ATOMIC_LOAD_NAND ||
@@ -6207,7 +6556,7 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
ArrayRef<SDValue> Ops,
MachineMemOperand *MMO) {
- assert(Ops.size() == 5 && "Incompatible number of operands");
+ assert(Ops.size() == 6 && "Incompatible number of operands");
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops);
@@ -6233,6 +6582,9 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
assert(N->getIndex().getValueType().getVectorNumElements() ==
N->getValueType(0).getVectorNumElements() &&
"Vector width mismatch between index and data");
+ assert(isa<ConstantSDNode>(N->getScale()) &&
+ cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() &&
+ "Scale should be a constant power of 2");
CSEMap.InsertNode(N, IP);
InsertNode(N);
@@ -6244,7 +6596,7 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
ArrayRef<SDValue> Ops,
MachineMemOperand *MMO) {
- assert(Ops.size() == 5 && "Incompatible number of operands");
+ assert(Ops.size() == 6 && "Incompatible number of operands");
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops);
@@ -6267,6 +6619,9 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
assert(N->getIndex().getValueType().getVectorNumElements() ==
N->getValue().getValueType().getVectorNumElements() &&
"Vector width mismatch between index and data");
+ assert(isa<ConstantSDNode>(N->getScale()) &&
+ cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() &&
+ "Scale should be a constant power of 2");
CSEMap.InsertNode(N, IP);
InsertNode(N);
@@ -6558,6 +6913,7 @@ SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) {
// Now we update the operands.
N->OperandList[0].set(Op);
+ updateDivergence(N);
// If this gets put into a CSE map, add it.
if (InsertPos) CSEMap.InsertNode(N, InsertPos);
return N;
@@ -6586,6 +6942,7 @@ SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) {
if (N->OperandList[1] != Op2)
N->OperandList[1].set(Op2);
+ updateDivergence(N);
// If this gets put into a CSE map, add it.
if (InsertPos) CSEMap.InsertNode(N, InsertPos);
return N;
@@ -6636,6 +6993,7 @@ UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) {
if (N->OperandList[i] != Ops[i])
N->OperandList[i].set(Ops[i]);
+ updateDivergence(N);
// If this gets put into a CSE map, add it.
if (InsertPos) CSEMap.InsertNode(N, InsertPos);
return N;
@@ -7061,11 +7419,24 @@ SDDbgValue *SelectionDAG::getConstantDbgValue(DIVariable *Var,
/// FrameIndex
SDDbgValue *SelectionDAG::getFrameIndexDbgValue(DIVariable *Var,
DIExpression *Expr, unsigned FI,
+ bool IsIndirect,
const DebugLoc &DL,
unsigned O) {
assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
- return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, FI, DL, O);
+ return new (DbgInfo->getAlloc())
+ SDDbgValue(Var, Expr, FI, IsIndirect, DL, O, SDDbgValue::FRAMEIX);
+}
+
+/// VReg
+SDDbgValue *SelectionDAG::getVRegDbgValue(DIVariable *Var,
+ DIExpression *Expr,
+ unsigned VReg, bool IsIndirect,
+ const DebugLoc &DL, unsigned O) {
+ assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ return new (DbgInfo->getAlloc())
+ SDDbgValue(Var, Expr, VReg, IsIndirect, DL, O, SDDbgValue::VREG);
}
void SelectionDAG::transferDbgValues(SDValue From, SDValue To,
@@ -7155,8 +7526,9 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {
DV->isIndirect(), DV->getDebugLoc(), DV->getOrder());
ClonedDVs.push_back(Clone);
DV->setIsInvalidated();
- DEBUG(dbgs() << "SALVAGE: Rewriting"; N0.getNode()->dumprFull(this);
- dbgs() << " into " << *DIExpr << '\n');
+ LLVM_DEBUG(dbgs() << "SALVAGE: Rewriting";
+ N0.getNode()->dumprFull(this);
+ dbgs() << " into " << *DIExpr << '\n');
}
}
}
@@ -7165,6 +7537,14 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {
AddDbgValue(Dbg, Dbg->getSDNode(), false);
}
+/// Creates a SDDbgLabel node.
+SDDbgLabel *SelectionDAG::getDbgLabel(DILabel *Label,
+ const DebugLoc &DL, unsigned O) {
+ assert(cast<DILabel>(Label)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ return new (DbgInfo->getAlloc()) SDDbgLabel(Label, DL, O);
+}
+
namespace {
/// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node
@@ -7227,8 +7607,9 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) {
SDUse &Use = UI.getUse();
++UI;
Use.set(To);
+ if (To->isDivergent() != From->isDivergent())
+ updateDivergence(User);
} while (UI != UE && *UI == User);
-
// Now that we have modified User, add it back to the CSE maps. If it
// already exists there, recursively merge the results together.
AddModifiedNodeToCSEMaps(User);
@@ -7282,6 +7663,8 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) {
SDUse &Use = UI.getUse();
++UI;
Use.setNode(To);
+ if (To->isDivergent() != From->isDivergent())
+ updateDivergence(User);
} while (UI != UE && *UI == User);
// Now that we have modified User, add it back to the CSE maps. If it
@@ -7326,8 +7709,9 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) {
const SDValue &ToOp = To[Use.getResNo()];
++UI;
Use.set(ToOp);
+ if (To->getNode()->isDivergent() != From->isDivergent())
+ updateDivergence(User);
} while (UI != UE && *UI == User);
-
// Now that we have modified User, add it back to the CSE maps. If it
// already exists there, recursively merge the results together.
AddModifiedNodeToCSEMaps(User);
@@ -7385,8 +7769,9 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){
++UI;
Use.set(To);
+ if (To->isDivergent() != From->isDivergent())
+ updateDivergence(User);
} while (UI != UE && *UI == User);
-
// We are iterating over all uses of the From node, so if a use
// doesn't use the specific value, no changes are made.
if (!UserRemovedFromCSEMaps)
@@ -7419,6 +7804,72 @@ namespace {
} // end anonymous namespace
+void SelectionDAG::updateDivergence(SDNode * N)
+{
+ if (TLI->isSDNodeAlwaysUniform(N))
+ return;
+ bool IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA);
+ for (auto &Op : N->ops()) {
+ if (Op.Val.getValueType() != MVT::Other)
+ IsDivergent |= Op.getNode()->isDivergent();
+ }
+ if (N->SDNodeBits.IsDivergent != IsDivergent) {
+ N->SDNodeBits.IsDivergent = IsDivergent;
+ for (auto U : N->uses()) {
+ updateDivergence(U);
+ }
+ }
+}
+
+
+void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode*>& Order) {
+ DenseMap<SDNode *, unsigned> Degree;
+ Order.reserve(AllNodes.size());
+ for (auto & N : allnodes()) {
+ unsigned NOps = N.getNumOperands();
+ Degree[&N] = NOps;
+ if (0 == NOps)
+ Order.push_back(&N);
+ }
+ for (std::vector<SDNode *>::iterator I = Order.begin();
+ I!=Order.end();++I) {
+ SDNode * N = *I;
+ for (auto U : N->uses()) {
+ unsigned &UnsortedOps = Degree[U];
+ if (0 == --UnsortedOps)
+ Order.push_back(U);
+ }
+ }
+}
+
+void SelectionDAG::VerifyDAGDiverence()
+{
+ std::vector<SDNode*> TopoOrder;
+ CreateTopologicalOrder(TopoOrder);
+ const TargetLowering &TLI = getTargetLoweringInfo();
+ DenseMap<const SDNode *, bool> DivergenceMap;
+ for (auto &N : allnodes()) {
+ DivergenceMap[&N] = false;
+ }
+ for (auto N : TopoOrder) {
+ bool IsDivergent = DivergenceMap[N];
+ bool IsSDNodeDivergent = TLI.isSDNodeSourceOfDivergence(N, FLI, DA);
+ for (auto &Op : N->ops()) {
+ if (Op.Val.getValueType() != MVT::Other)
+ IsSDNodeDivergent |= DivergenceMap[Op.getNode()];
+ }
+ if (!IsDivergent && IsSDNodeDivergent && !TLI.isSDNodeAlwaysUniform(N)) {
+ DivergenceMap[N] = true;
+ }
+ }
+ for (auto &N : allnodes()) {
+ (void)N;
+ assert(DivergenceMap[&N] == N.isDivergent() &&
+ "Divergence bit inconsistency detected\n");
+ }
+}
+
+
/// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving
/// uses of other values produced by From.getNode() alone. The same value
/// may appear in both the From and To list. The Deleted vector is
@@ -7450,7 +7901,7 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
}
// Sort the uses, so that all the uses from a given User are together.
- std::sort(Uses.begin(), Uses.end());
+ llvm::sort(Uses.begin(), Uses.end());
for (unsigned UseIndex = 0, UseIndexEnd = Uses.size();
UseIndex != UseIndexEnd; ) {
@@ -7579,6 +8030,10 @@ void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) {
DbgInfo->add(DB, SD, isParameter);
}
+void SelectionDAG::AddDbgLabel(SDDbgLabel *DB) {
+ DbgInfo->add(DB);
+}
+
SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
SDValue NewMemOp) {
assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node");
@@ -7947,11 +8402,8 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
if (VT.getSizeInBits() / 8 != Bytes)
return false;
- SDValue Loc = LD->getOperand(1);
- SDValue BaseLoc = Base->getOperand(1);
-
- auto BaseLocDecomp = BaseIndexOffset::match(BaseLoc, *this);
- auto LocDecomp = BaseIndexOffset::match(Loc, *this);
+ auto BaseLocDecomp = BaseIndexOffset::match(Base, *this);
+ auto LocDecomp = BaseIndexOffset::match(LD, *this);
int64_t Offset = 0;
if (BaseLocDecomp.equalBaseIndex(LocDecomp, *this, Offset))
@@ -7966,8 +8418,8 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
const GlobalValue *GV;
int64_t GVOffset = 0;
if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
- unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType());
- KnownBits Known(PtrWidth);
+ unsigned IdxWidth = getDataLayout().getIndexTypeSizeInBits(GV->getType());
+ KnownBits Known(IdxWidth);
llvm::computeKnownBits(GV, Known, getDataLayout());
unsigned AlignBits = Known.countMinTrailingZeros();
unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
@@ -8201,7 +8653,7 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
return true;
}
-// \brief Returns the SDNode if it is a constant integer BuildVector
+// Returns the SDNode if it is a constant integer BuildVector
// or constant integer.
SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) {
if (isa<ConstantSDNode>(N))
@@ -8227,6 +8679,26 @@ SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) {
return nullptr;
}
+void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) {
+ assert(!Node->OperandList && "Node already has operands");
+ SDUse *Ops = OperandRecycler.allocate(
+ ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator);
+
+ bool IsDivergent = false;
+ for (unsigned I = 0; I != Vals.size(); ++I) {
+ Ops[I].setUser(Node);
+ Ops[I].setInitial(Vals[I]);
+ if (Ops[I].Val.getValueType() != MVT::Other) // Skip Chain. It does not carry divergence.
+ IsDivergent = IsDivergent || Ops[I].getNode()->isDivergent();
+ }
+ Node->NumOperands = Vals.size();
+ Node->OperandList = Ops;
+ IsDivergent |= TLI->isSDNodeSourceOfDivergence(Node, FLI, DA);
+ if (!TLI->isSDNodeAlwaysUniform(Node))
+ Node->SDNodeBits.IsDivergent = IsDivergent;
+ checkForCycles(Node);
+}
+
#ifndef NDEBUG
static void checkForCyclesHelper(const SDNode *N,
SmallPtrSetImpl<const SDNode*> &Visited,