src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2013-04-08 18:41:23 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2013-04-08 18:41:23 +0000
commit	4a16efa3e43e35f0cc9efe3a67f620f0017c3d36 (patch)
tree	06099edc18d30894081a822b756f117cbe0b8207 /lib/CodeGen/SelectionDAG/SelectionDAG.cpp
parent	482e7bddf617ae804dc47133cb07eb4aa81e45de (diff)
download	src-4a16efa3e43e35f0cc9efe3a67f620f0017c3d36.tar.gz src-4a16efa3e43e35f0cc9efe3a67f620f0017c3d36.zip

Vendor import of llvm trunk r178860:vendor/llvm/llvm-trunk-r178860

http://llvm.org/svn/llvm-project/llvm/trunk@178860

Notes

Notes: svn path=/vendor/llvm/dist/; revision=249259 svn path=/vendor/llvm/llvm-trunk-r178860/; revision=249260; tag=vendor/llvm/llvm-trunk-r178860

Diffstat (limited to 'lib/CodeGen/SelectionDAG/SelectionDAG.cpp')

-rw-r--r--

lib/CodeGen/SelectionDAG/SelectionDAG.cpp

425

1 files changed, 278 insertions, 147 deletions

diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index f000ce38d367..64244313a326 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

@@ -12,42 +12,43 @@

//===----------------------------------------------------------------------===//

#include "llvm/CodeGen/SelectionDAG.h"

-#include "SDNodeOrdering.h"

#include "SDNodeDbgValue.h"

-#include "llvm/CallingConv.h"

-#include "llvm/Constants.h"

-#include "llvm/DebugInfo.h"

-#include "llvm/DerivedTypes.h"

-#include "llvm/Function.h"

-#include "llvm/GlobalAlias.h"

-#include "llvm/GlobalVariable.h"

-#include "llvm/Intrinsics.h"

+#include "SDNodeOrdering.h"

+#include "llvm/ADT/SetVector.h"

+#include "llvm/ADT/SmallPtrSet.h"

+#include "llvm/ADT/SmallSet.h"

+#include "llvm/ADT/SmallVector.h"

+#include "llvm/ADT/StringExtras.h"

+#include "llvm/Analysis/TargetTransformInfo.h"

#include "llvm/Analysis/ValueTracking.h"

#include "llvm/Assembly/Writer.h"

#include "llvm/CodeGen/MachineBasicBlock.h"

#include "llvm/CodeGen/MachineConstantPool.h"

#include "llvm/CodeGen/MachineFrameInfo.h"

#include "llvm/CodeGen/MachineModuleInfo.h"

-#include "llvm/Target/TargetRegisterInfo.h"

-#include "llvm/DataLayout.h"

-#include "llvm/Target/TargetLowering.h"

-#include "llvm/Target/TargetSelectionDAGInfo.h"

-#include "llvm/Target/TargetOptions.h"

-#include "llvm/Target/TargetInstrInfo.h"

-#include "llvm/Target/TargetIntrinsicInfo.h"

-#include "llvm/Target/TargetMachine.h"

+#include "llvm/DebugInfo.h"

+#include "llvm/IR/CallingConv.h"

+#include "llvm/IR/Constants.h"

+#include "llvm/IR/DataLayout.h"

+#include "llvm/IR/DerivedTypes.h"

+#include "llvm/IR/Function.h"

+#include "llvm/IR/GlobalAlias.h"

+#include "llvm/IR/GlobalVariable.h"

+#include "llvm/IR/Intrinsics.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Support/Debug.h"

#include "llvm/Support/ErrorHandling.h"

#include "llvm/Support/ManagedStatic.h"

#include "llvm/Support/MathExtras.h"

-#include "llvm/Support/raw_ostream.h"

#include "llvm/Support/Mutex.h"

-#include "llvm/ADT/SetVector.h"

-#include "llvm/ADT/SmallPtrSet.h"

-#include "llvm/ADT/SmallSet.h"

-#include "llvm/ADT/SmallVector.h"

-#include "llvm/ADT/StringExtras.h"

+#include "llvm/Support/raw_ostream.h"

+#include "llvm/Target/TargetInstrInfo.h"

+#include "llvm/Target/TargetIntrinsicInfo.h"

+#include "llvm/Target/TargetLowering.h"

+#include "llvm/Target/TargetMachine.h"

+#include "llvm/Target/TargetOptions.h"

+#include "llvm/Target/TargetRegisterInfo.h"

+#include "llvm/Target/TargetSelectionDAGInfo.h"

#include <algorithm>

#include <cmath>

using namespace llvm;

@@ -59,18 +60,6 @@ static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {

return Res;

}

-static const fltSemantics *EVTToAPFloatSemantics(EVT VT) {

- switch (VT.getSimpleVT().SimpleTy) {

- default: llvm_unreachable("Unknown FP format");

- case MVT::f16: return &APFloat::IEEEhalf;

- case MVT::f32: return &APFloat::IEEEsingle;

- case MVT::f64: return &APFloat::IEEEdouble;

- case MVT::f80: return &APFloat::x87DoubleExtended;

- case MVT::f128: return &APFloat::IEEEquad;

- case MVT::ppcf128: return &APFloat::PPCDoubleDouble;

- }

// Default null implementations of the callbacks.

void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {}

void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {}

@@ -94,7 +83,8 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT,

// convert modifies in place, so make a copy.

APFloat Val2 = APFloat(Val);

bool losesInfo;

- (void) Val2.convert(*EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven,

+ (void) Val2.convert(SelectionDAG::EVTToAPFloatSemantics(VT),

+ APFloat::rmNearestTiesToEven,

&losesInfo);

return !losesInfo;

}

@@ -884,15 +874,17 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {

// EntryNode could meaningfully have debug info if we can find it...

SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)

: TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()),

- OptLevel(OL), EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)),

+ TTI(0), OptLevel(OL), EntryNode(ISD::EntryToken, DebugLoc(),

+ getVTList(MVT::Other)),

Root(getEntryNode()), Ordering(0), UpdateListeners(0) {

AllNodes.push_back(&EntryNode);

Ordering = new SDNodeOrdering();

DbgInfo = new SDDbgInfo();

}

-void SelectionDAG::init(MachineFunction &mf) {

+void SelectionDAG::init(MachineFunction &mf, const TargetTransformInfo *tti) {

MF = &mf;

+ TTI = tti;

Context = &mf.getFunction()->getContext();

}

@@ -1074,10 +1066,11 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) {

return getConstantFP(APFloat((float)Val), VT, isTarget);

else if (EltVT==MVT::f64)

return getConstantFP(APFloat(Val), VT, isTarget);

- else if (EltVT==MVT::f80 || EltVT==MVT::f128 || EltVT==MVT::f16) {

+ else if (EltVT==MVT::f80 || EltVT==MVT::f128 || EltVT==MVT::ppcf128 ||

+ EltVT==MVT::f16) {

bool ignored;

APFloat apf = APFloat(Val);

- apf.convert(*EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,

+ apf.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,

&ignored);

return getConstantFP(apf, VT, isTarget);

} else

@@ -1525,7 +1518,7 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) {

/// the target's desired shift amount type.

SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) {

EVT OpTy = Op.getValueType();

- MVT ShTy = TLI.getShiftAmountTy(LHSTy);

+ EVT ShTy = TLI.getShiftAmountTy(LHSTy);

if (OpTy == ShTy || OpTy.isVector()) return Op;

ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;

@@ -1924,7 +1917,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,

}

case ISD::LOAD: {

LoadSDNode *LD = cast<LoadSDNode>(Op);

- if (ISD::isZEXTLoad(Op.getNode())) {

+ // If this is a ZEXTLoad and we are looking at the loaded value.

+ if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {

EVT VT = LD->getMemoryVT();

unsigned MemBits = VT.getScalarType().getSizeInBits();

KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);

@@ -2294,17 +2288,20 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{

break;

}

- // Handle LOADX separately here. EXTLOAD case will fallthrough.

- if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {

- unsigned ExtType = LD->getExtensionType();

- switch (ExtType) {

- default: break;

- case ISD::SEXTLOAD: // '17' bits known

- Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();

- return VTBits-Tmp+1;

- case ISD::ZEXTLOAD: // '16' bits known

- Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();

- return VTBits-Tmp;

+ // If we are looking at the loaded value of the SDNode.

+ if (Op.getResNo() == 0) {

+ // Handle LOADX separately here. EXTLOAD case will fallthrough.

+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {

+ unsigned ExtType = LD->getExtensionType();

+ switch (ExtType) {

+ default: break;

+ case ISD::SEXTLOAD: // '17' bits known

+ Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();

+ return VTBits-Tmp+1;

+ case ISD::ZEXTLOAD: // '16' bits known

+ Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();

+ return VTBits-Tmp;

+ }

}

@@ -2438,7 +2435,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,

return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT);

case ISD::UINT_TO_FP:

case ISD::SINT_TO_FP: {

- APFloat apf(APInt::getNullValue(VT.getSizeInBits()));

+ APFloat apf(EVTToAPFloatSemantics(VT),

+ APInt::getNullValue(VT.getSizeInBits()));

(void)apf.convertFromAPInt(Val,

Opcode==ISD::SINT_TO_FP,

APFloat::rmNearestTiesToEven);

@@ -2446,9 +2444,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,

}

case ISD::BITCAST:

if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)

- return getConstantFP(APFloat(Val), VT);

+ return getConstantFP(APFloat(APFloat::IEEEsingle, Val), VT);

else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)

- return getConstantFP(APFloat(Val), VT);

+ return getConstantFP(APFloat(APFloat::IEEEdouble, Val), VT);

break;

case ISD::BSWAP:

return getConstant(Val.byteSwap(), VT);

@@ -2495,7 +2493,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,

bool ignored;

// This can return overflow, underflow, or inexact; we don't care.

// FIXME need to be more flexible about rounding mode.

- (void)V.convert(*EVTToAPFloatSemantics(VT),

+ (void)V.convert(EVTToAPFloatSemantics(VT),

APFloat::rmNearestTiesToEven, &ignored);

return getConstantFP(V, VT);

}

@@ -2686,44 +2684,117 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,

return SDValue(N, 0);

}

-SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode,

- EVT VT,

- ConstantSDNode *Cst1,

- ConstantSDNode *Cst2) {

- const APInt &C1 = Cst1->getAPIntValue(), &C2 = Cst2->getAPIntValue();

+SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT,

+ SDNode *Cst1, SDNode *Cst2) {

+ SmallVector<std::pair<ConstantSDNode *, ConstantSDNode *>, 4> Inputs;

+ SmallVector<SDValue, 4> Outputs;

+ EVT SVT = VT.getScalarType();

- switch (Opcode) {

- case ISD::ADD: return getConstant(C1 + C2, VT);

- case ISD::SUB: return getConstant(C1 - C2, VT);

- case ISD::MUL: return getConstant(C1 * C2, VT);

- case ISD::UDIV:

- if (C2.getBoolValue()) return getConstant(C1.udiv(C2), VT);

- break;

- case ISD::UREM:

- if (C2.getBoolValue()) return getConstant(C1.urem(C2), VT);

- break;

- case ISD::SDIV:

- if (C2.getBoolValue()) return getConstant(C1.sdiv(C2), VT);

- break;

- case ISD::SREM:

- if (C2.getBoolValue()) return getConstant(C1.srem(C2), VT);

- break;

- case ISD::AND: return getConstant(C1 & C2, VT);

- case ISD::OR: return getConstant(C1 | C2, VT);

- case ISD::XOR: return getConstant(C1 ^ C2, VT);

- case ISD::SHL: return getConstant(C1 << C2, VT);

- case ISD::SRL: return getConstant(C1.lshr(C2), VT);

- case ISD::SRA: return getConstant(C1.ashr(C2), VT);

- case ISD::ROTL: return getConstant(C1.rotl(C2), VT);

- case ISD::ROTR: return getConstant(C1.rotr(C2), VT);

- default: break;

+ ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1);

+ ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2);

+ if (Scalar1 && Scalar2) {

+ // Scalar instruction.

+ Inputs.push_back(std::make_pair(Scalar1, Scalar2));

+ } else {

+ // For vectors extract each constant element into Inputs so we can constant

+ // fold them individually.

+ BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1);

+ BuildVectorSDNode *BV2 = dyn_cast<BuildVectorSDNode>(Cst2);

+ if (!BV1 || !BV2)

+ return SDValue();

+ assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!");

+ for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) {

+ ConstantSDNode *V1 = dyn_cast<ConstantSDNode>(BV1->getOperand(I));

+ ConstantSDNode *V2 = dyn_cast<ConstantSDNode>(BV2->getOperand(I));

+ if (!V1 || !V2) // Not a constant, bail.

+ return SDValue();

+ // Avoid BUILD_VECTOR nodes that perform implicit truncation.

+ // FIXME: This is valid and could be handled by truncating the APInts.

+ if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT)

+ return SDValue();

+ Inputs.push_back(std::make_pair(V1, V2));

+ }

}

- return SDValue();

+ // We have a number of constant values, constant fold them element by element.

+ for (unsigned I = 0, E = Inputs.size(); I != E; ++I) {

+ const APInt &C1 = Inputs[I].first->getAPIntValue();

+ const APInt &C2 = Inputs[I].second->getAPIntValue();

+ switch (Opcode) {

+ case ISD::ADD:

+ Outputs.push_back(getConstant(C1 + C2, SVT));

+ break;

+ case ISD::SUB:

+ Outputs.push_back(getConstant(C1 - C2, SVT));

+ break;

+ case ISD::MUL:

+ Outputs.push_back(getConstant(C1 * C2, SVT));

+ break;

+ case ISD::UDIV:

+ if (!C2.getBoolValue())

+ return SDValue();

+ Outputs.push_back(getConstant(C1.udiv(C2), SVT));

+ break;

+ case ISD::UREM:

+ if (!C2.getBoolValue())

+ return SDValue();

+ Outputs.push_back(getConstant(C1.urem(C2), SVT));

+ break;

+ case ISD::SDIV:

+ if (!C2.getBoolValue())

+ return SDValue();

+ Outputs.push_back(getConstant(C1.sdiv(C2), SVT));

+ break;

+ case ISD::SREM:

+ if (!C2.getBoolValue())

+ return SDValue();

+ Outputs.push_back(getConstant(C1.srem(C2), SVT));

+ break;

+ case ISD::AND:

+ Outputs.push_back(getConstant(C1 & C2, SVT));

+ break;

+ case ISD::OR:

+ Outputs.push_back(getConstant(C1 | C2, SVT));

+ break;

+ case ISD::XOR:

+ Outputs.push_back(getConstant(C1 ^ C2, SVT));

+ break;

+ case ISD::SHL:

+ Outputs.push_back(getConstant(C1 << C2, SVT));

+ break;

+ case ISD::SRL:

+ Outputs.push_back(getConstant(C1.lshr(C2), SVT));

+ break;

+ case ISD::SRA:

+ Outputs.push_back(getConstant(C1.ashr(C2), SVT));

+ break;

+ case ISD::ROTL:

+ Outputs.push_back(getConstant(C1.rotl(C2), SVT));

+ break;

+ case ISD::ROTR:

+ Outputs.push_back(getConstant(C1.rotr(C2), SVT));

+ break;

+ default:

+ return SDValue();

+ }

+ // Handle the scalar case first.

+ if (Outputs.size() == 1)

+ return Outputs.back();

+ // Otherwise build a big vector out of the scalar elements we generated.

+ return getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, Outputs.data(),

+ Outputs.size());

}

-SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,

- SDValue N1, SDValue N2) {

+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1,

+ SDValue N2) {

ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());

ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());

switch (Opcode) {

@@ -2845,6 +2916,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,

"Shift operators return type must be the same as their first arg");

assert(VT.isInteger() && N2.getValueType().isInteger() &&

"Shifts only work on integers");

+ assert((!VT.isVector() || VT == N2.getValueType()) &&

+ "Vector shift amounts must be in the same as their first arg");

// Verify that the shift amount VT is bit enough to hold valid shift

// amounts. This catches things like trying to shift an i1024 value by an

// i8, which is easy to fall into in generic code that uses

@@ -3019,16 +3092,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,

}

- if (N1C) {

- if (N2C) {

- SDValue SV = FoldConstantArithmetic(Opcode, VT, N1C, N2C);

- if (SV.getNode()) return SV;

- } else { // Cannonicalize constant to RHS if commutative

- if (isCommutativeBinOp(Opcode)) {

- std::swap(N1C, N2C);

- std::swap(N1, N2);

- }

+ // Perform trivial constant folding.

+ SDValue SV = FoldConstantArithmetic(Opcode, VT, N1.getNode(), N2.getNode());

+ if (SV.getNode()) return SV;

+ // Canonicalize constant to RHS if commutative.

+ if (N1C && !N2C && isCommutativeBinOp(Opcode)) {

+ std::swap(N1C, N2C);

+ std::swap(N1, N2);

}

// Constant fold FP operations.

@@ -3036,7 +3107,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,

ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode());

if (N1CFP) {

if (!N2CFP && isCommutativeBinOp(Opcode)) {

- // Cannonicalize constant to RHS if commutative

+ // Canonicalize constant to RHS if commutative.

std::swap(N1CFP, N2CFP);

std::swap(N1, N2);

} else if (N2CFP) {

@@ -3080,7 +3151,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,

bool ignored;

// This can return overflow, underflow, or inexact; we don't care.

// FIXME need to be more flexible about rounding mode.

- (void)V.convert(*EVTToAPFloatSemantics(VT),

+ (void)V.convert(EVTToAPFloatSemantics(VT),

APFloat::rmNearestTiesToEven, &ignored);

return getConstantFP(V, VT);

}

@@ -3312,17 +3383,6 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {

&ArgChains[0], ArgChains.size());

}

-/// SplatByte - Distribute ByteVal over NumBits bits.

-static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {

- APInt Val = APInt(NumBits, ByteVal);

- unsigned Shift = 8;

- for (unsigned i = NumBits; i > 8; i >>= 1) {

- Val = (Val << Shift) | Val;

- Shift <<= 1;

- }

- return Val;

/// getMemsetValue - Vectorized representation of the memset value

/// operand.

static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,

@@ -3331,17 +3391,18 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,

unsigned NumBits = VT.getScalarType().getSizeInBits();

if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {

- APInt Val = SplatByte(NumBits, C->getZExtValue() & 255);

+ assert(C->getAPIntValue().getBitWidth() == 8);

+ APInt Val = APInt::getSplat(NumBits, C->getAPIntValue());

if (VT.isInteger())

return DAG.getConstant(Val, VT);

- return DAG.getConstantFP(APFloat(Val), VT);

+ return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), VT);

}

Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value);

if (NumBits > 8) {

// Use a multiplication with 0x010101... to extend the input to the

// required length.

- APInt Magic = SplatByte(NumBits, 0x01);

+ APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));

Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT));

}

@@ -3370,10 +3431,11 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,

}

assert(!VT.isVector() && "Can't handle vector type here!");

- unsigned NumVTBytes = VT.getSizeInBits() / 8;

+ unsigned NumVTBits = VT.getSizeInBits();

+ unsigned NumVTBytes = NumVTBits / 8;

unsigned NumBytes = std::min(NumVTBytes, unsigned(Str.size()));

- uint64_t Val = 0;

+ APInt Val(NumVTBits, 0);

if (TLI.isLittleEndian()) {

for (unsigned i = 0; i != NumBytes; ++i)

Val |= (uint64_t)(unsigned char)Str[i] << i*8;

@@ -3382,7 +3444,12 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,

Val |= (uint64_t)(unsigned char)Str[i] << (NumVTBytes-i-1)*8;

}

- return DAG.getConstant(Val, VT);

+ // If the "cost" of materializing the integer immediate is 1 or free, then

+ // it is cost effective to turn the load into the immediate.

+ const TargetTransformInfo *TTI = DAG.getTargetTransformInfo();

+ if (TTI->getIntImmCost(Val, VT.getTypeForEVT(*DAG.getContext())) < 2)

+ return DAG.getConstant(Val, VT);

+ return SDValue(0, 0);

}

/// getMemBasePlusOffset - Returns base and offset node for the

@@ -3420,8 +3487,10 @@ static bool isMemSrcFromString(SDValue Src, StringRef &Str) {

static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,

unsigned Limit, uint64_t Size,

unsigned DstAlign, unsigned SrcAlign,

- bool IsZeroVal,

+ bool IsMemset,

+ bool ZeroMemset,

bool MemcpyStrSrc,

+ bool AllowOverlap,

SelectionDAG &DAG,

const TargetLowering &TLI) {

assert((SrcAlign == 0 || SrcAlign >= DstAlign) &&

@@ -3434,7 +3503,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,

// 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does

// not need to be loaded.

EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign,

- IsZeroVal, MemcpyStrSrc,

+ IsMemset, ZeroMemset, MemcpyStrSrc,

DAG.getMachineFunction());

if (VT == MVT::Other) {

@@ -3464,21 +3533,51 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,

unsigned VTSize = VT.getSizeInBits() / 8;

while (VTSize > Size) {

// For now, only use non-vector load / store's for the left-over pieces.

+ EVT NewVT = VT;

+ unsigned NewVTSize;

+ bool Found = false;

if (VT.isVector() || VT.isFloatingPoint()) {

- VT = MVT::i64;

- while (!TLI.isTypeLegal(VT))

- VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);

- VTSize = VT.getSizeInBits() / 8;

- } else {

- // This can result in a type that is not legal on the target, e.g.

- // 1 or 2 bytes on PPC.

- VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);

- VTSize >>= 1;

+ NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;

+ if (TLI.isOperationLegalOrCustom(ISD::STORE, NewVT) &&

+ TLI.isSafeMemOpType(NewVT.getSimpleVT()))

+ Found = true;

+ else if (NewVT == MVT::i64 &&

+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&

+ TLI.isSafeMemOpType(MVT::f64)) {

+ // i64 is usually not legal on 32-bit targets, but f64 may be.

+ NewVT = MVT::f64;

+ Found = true;

+ }

+ if (!Found) {

+ do {

+ NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);

+ if (NewVT == MVT::i8)

+ break;

+ } while (!TLI.isSafeMemOpType(NewVT.getSimpleVT()));

+ }

+ NewVTSize = NewVT.getSizeInBits() / 8;

+ // If the new VT cannot cover all of the remaining bits, then consider

+ // issuing a (or a pair of) unaligned and overlapping load / store.

+ // FIXME: Only does this for 64-bit or more since we don't have proper

+ // cost model for unaligned load / store.

+ bool Fast;

+ if (NumMemOps && AllowOverlap &&

+ VTSize >= 8 && NewVTSize < Size &&

+ TLI.allowsUnalignedMemoryAccesses(VT, &Fast) && Fast)

+ VTSize = Size;

+ else {

+ VT = NewVT;

+ VTSize = NewVTSize;

}

if (++NumMemOps > Limit)

return false;

MemOps.push_back(VT);

Size -= VTSize;

}

@@ -3507,8 +3606,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,

MachineFunction &MF = DAG.getMachineFunction();

MachineFrameInfo *MFI = MF.getFrameInfo();

bool OptSize =

- MF.getFunction()->getFnAttributes().

- hasAttribute(Attributes::OptimizeForSize);

+ MF.getFunction()->getAttributes().

+ hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);

FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);

if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))

DstAlignCanChange = true;

@@ -3523,12 +3622,21 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,

if (!FindOptimalMemOpLowering(MemOps, Limit, Size,

(DstAlignCanChange ? 0 : Align),

(isZeroStr ? 0 : SrcAlign),

- true, CopyFromStr, DAG, TLI))

+ false, false, CopyFromStr, true, DAG, TLI))

return SDValue();

if (DstAlignCanChange) {

Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());

unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty);

+ // Don't promote to an alignment that would require dynamic stack

+ // realignment.

+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();

+ if (!TRI->needsStackRealignment(MF))

+ while (NewAlign > Align &&

+ TLI.getDataLayout()->exceedsNaturalStackAlignment(NewAlign))

+ NewAlign /= 2;

if (NewAlign > Align) {

// Give the stack frame object a larger alignment if needed.

if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)

@@ -3545,6 +3653,14 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,

unsigned VTSize = VT.getSizeInBits() / 8;

SDValue Value, Store;

+ if (VTSize > Size) {

+ // Issuing an unaligned load / store pair that overlaps with the previous

+ // pair. Adjust the offset accordingly.

+ assert(i == NumMemOps-1 && i != 0);

+ SrcOff -= VTSize - Size;

+ DstOff -= VTSize - Size;

+ }

if (CopyFromStr &&

(isZeroStr || (VT.isInteger() && !VT.isVector()))) {

// It's unlikely a store of a vector immediate can be done in a single

@@ -3553,11 +3669,14 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,

// FIXME: Handle other cases where store of vector immediate is done in

// a single instruction.

Value = getMemsetStringVal(VT, dl, DAG, TLI, Str.substr(SrcOff));

- Store = DAG.getStore(Chain, dl, Value,

- getMemBasePlusOffset(Dst, DstOff, DAG),

- DstPtrInfo.getWithOffset(DstOff), isVol,

- false, Align);

- } else {

+ if (Value.getNode())

+ Store = DAG.getStore(Chain, dl, Value,

+ getMemBasePlusOffset(Dst, DstOff, DAG),

+ DstPtrInfo.getWithOffset(DstOff), isVol,

+ false, Align);

+ }

+ if (!Store.getNode()) {

// The type might not be legal for the target. This should only happen

// if the type is smaller than a legal type, as on PPC, so the right

// thing to do is generate a LoadExt/StoreTrunc pair. These simplify

@@ -3577,6 +3696,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,

OutChains.push_back(Store);

SrcOff += VTSize;

DstOff += VTSize;

+ Size -= VTSize;

}

return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,

@@ -3601,8 +3721,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,

bool DstAlignCanChange = false;

MachineFunction &MF = DAG.getMachineFunction();

MachineFrameInfo *MFI = MF.getFrameInfo();

- bool OptSize = MF.getFunction()->getFnAttributes().

- hasAttribute(Attributes::OptimizeForSize);

+ bool OptSize = MF.getFunction()->getAttributes().

+ hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);

FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);

if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))

DstAlignCanChange = true;

@@ -3612,8 +3732,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,

unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);

if (!FindOptimalMemOpLowering(MemOps, Limit, Size,

- (DstAlignCanChange ? 0 : Align),

- SrcAlign, true, false, DAG, TLI))

+ (DstAlignCanChange ? 0 : Align), SrcAlign,

+ false, false, false, false, DAG, TLI))

return SDValue();

if (DstAlignCanChange) {

@@ -3680,8 +3800,8 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,

bool DstAlignCanChange = false;

MachineFunction &MF = DAG.getMachineFunction();

MachineFrameInfo *MFI = MF.getFrameInfo();

- bool OptSize = MF.getFunction()->getFnAttributes().

- hasAttribute(Attributes::OptimizeForSize);

+ bool OptSize = MF.getFunction()->getAttributes().

+ hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);

FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);

if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))

DstAlignCanChange = true;

@@ -3689,7 +3809,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,

isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();

if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),

Size, (DstAlignCanChange ? 0 : Align), 0,

- IsZeroVal, false, DAG, TLI))

+ true, IsZeroVal, false, true, DAG, TLI))

return SDValue();

if (DstAlignCanChange) {

@@ -3716,6 +3836,13 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,

for (unsigned i = 0; i < NumMemOps; i++) {

EVT VT = MemOps[i];

+ unsigned VTSize = VT.getSizeInBits() / 8;

+ if (VTSize > Size) {

+ // Issuing an unaligned load / store pair that overlaps with the previous

+ // pair. Adjust the offset accordingly.

+ assert(i == NumMemOps-1 && i != 0);

+ DstOff -= VTSize - Size;

+ }

// If this store is smaller than the largest store see whether we can get

// the smaller value for free with a truncate.

@@ -3734,6 +3861,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,

isVol, false, Align);

OutChains.push_back(Store);

DstOff += VT.getSizeInBits() / 8;

+ Size -= VTSize;

}

return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,

@@ -3745,6 +3873,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,

unsigned Align, bool isVol, bool AlwaysInline,

MachinePointerInfo DstPtrInfo,

MachinePointerInfo SrcPtrInfo) {

+ assert(Align && "The SDAG layer expects explicit alignment and reserves 0");

// Check to see if we should lower the memcpy to loads and stores first.

// For cases within the target-specified limits, this is the best choice.

@@ -3812,6 +3941,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,

unsigned Align, bool isVol,

MachinePointerInfo DstPtrInfo,

MachinePointerInfo SrcPtrInfo) {

+ assert(Align && "The SDAG layer expects explicit alignment and reserves 0");

// Check to see if we should lower the memmove to loads and stores first.

// For cases within the target-specified limits, this is the best choice.

@@ -3866,6 +3996,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,

SDValue Src, SDValue Size,

unsigned Align, bool isVol,

MachinePointerInfo DstPtrInfo) {

+ assert(Align && "The SDAG layer expects explicit alignment and reserves 0");

// Check to see if we should lower the memset to stores first.

// For cases within the target-specified limits, this is the best choice.

@@ -4577,7 +4708,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,

}

SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,

- const std::vector<EVT> &ResultTys,

+ ArrayRef<EVT> ResultTys,

const SDValue *Ops, unsigned NumOps) {

return getNode(Opcode, DL, getVTList(&ResultTys[0], ResultTys.size()),

Ops, NumOps);

@@ -5229,7 +5360,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1,

MachineSDNode *

SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,

- const std::vector<EVT> &ResultTys,

+ ArrayRef<EVT> ResultTys,

const SDValue *Ops, unsigned NumOps) {

SDVTList VTs = getVTList(&ResultTys[0], ResultTys.size());

return getMachineNode(Opcode, dl, VTs, Ops, NumOps);