aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/NVPTX
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/NVPTX')
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp12
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp14
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp202
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.h17
-rw-r--r--lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp32
-rw-r--r--lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp4
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp5
-rw-r--r--lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp14
-rw-r--r--lib/Target/NVPTX/NVPTXTargetTransformInfo.h19
10 files changed, 162 insertions, 159 deletions
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
index 221d2f093aeb..ad7302037cad 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
@@ -37,7 +37,7 @@ static MCInstrInfo *createNVPTXMCInstrInfo() {
return X;
}
-static MCRegisterInfo *createNVPTXMCRegisterInfo(StringRef TT) {
+static MCRegisterInfo *createNVPTXMCRegisterInfo(const Triple &TT) {
MCRegisterInfo *X = new MCRegisterInfo();
// PTX does not have a return address register.
InitNVPTXMCRegisterInfo(X, 0);
@@ -46,13 +46,13 @@ static MCRegisterInfo *createNVPTXMCRegisterInfo(StringRef TT) {
static MCSubtargetInfo *
createNVPTXMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
- MCSubtargetInfo *X = new MCSubtargetInfo();
- InitNVPTXMCSubtargetInfo(X, TT, CPU, FS);
- return X;
+ return createNVPTXMCSubtargetInfoImpl(TT, CPU, FS);
}
-static MCCodeGenInfo *createNVPTXMCCodeGenInfo(
- StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) {
+static MCCodeGenInfo *createNVPTXMCCodeGenInfo(const Triple &TT,
+ Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
// The default relocation model is used regardless of what the client has
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index cadd7a46cd9d..ecb0f0a1d0a1 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -340,7 +340,7 @@ MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
}
void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
const TargetLowering *TLI = nvptxSubtarget->getTargetLowering();
Type *Ty = F->getReturnType();
@@ -366,20 +366,20 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
O << ".param .b" << size << " func_retval0";
} else if (isa<PointerType>(Ty)) {
- O << ".param .b" << TLI->getPointerTy().getSizeInBits()
+ O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits()
<< " func_retval0";
} else if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) {
- unsigned totalsz = TD->getTypeAllocSize(Ty);
+ unsigned totalsz = DL.getTypeAllocSize(Ty);
unsigned retAlignment = 0;
if (!llvm::getAlign(*F, 0, retAlignment))
- retAlignment = TD->getABITypeAlignment(Ty);
+ retAlignment = DL.getABITypeAlignment(Ty);
O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
<< "]";
} else
llvm_unreachable("Unknown return type");
} else {
SmallVector<EVT, 16> vtparts;
- ComputeValueVTs(*TLI, Ty, vtparts);
+ ComputeValueVTs(*TLI, DL, Ty, vtparts);
unsigned idx = 0;
for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
unsigned elems = 1;
@@ -1433,7 +1433,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
bool first = true;
bool isKernelFunc = llvm::isKernelFunction(*F);
bool isABI = (nvptxSubtarget->getSmVersion() >= 20);
- MVT thePointerTy = TLI->getPointerTy();
+ MVT thePointerTy = TLI->getPointerTy(*TD);
O << "(\n";
@@ -1579,7 +1579,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
// Further, if a part is vector, print the above for
// each vector element.
SmallVector<EVT, 16> vtparts;
- ComputeValueVTs(*TLI, ETy, vtparts);
+ ComputeValueVTs(*TLI, getDataLayout(), ETy, vtparts);
for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
unsigned elems = 1;
EVT elemtype = vtparts[i];
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 09e0bd5d3d88..b75cf4040312 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -80,14 +80,14 @@ static bool IsPTXVectorType(MVT VT) {
/// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the
/// same number of types as the Ins/Outs arrays in LowerFormalArguments,
/// LowerCall, and LowerReturn.
-static void ComputePTXValueVTs(const TargetLowering &TLI, Type *Ty,
- SmallVectorImpl<EVT> &ValueVTs,
+static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
+ Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
SmallVectorImpl<uint64_t> *Offsets = nullptr,
uint64_t StartingOffset = 0) {
SmallVector<EVT, 16> TempVTs;
SmallVector<uint64_t, 16> TempOffsets;
- ComputeValueVTs(TLI, Ty, TempVTs, &TempOffsets, StartingOffset);
+ ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset);
for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) {
EVT VT = TempVTs[i];
uint64_t Off = TempOffsets[i];
@@ -885,15 +885,16 @@ SDValue
NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- Op = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
- return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op);
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+ Op = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
+ return DAG.getNode(NVPTXISD::Wrapper, dl, PtrVT, Op);
}
-std::string
-NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- unsigned retAlignment,
- const ImmutableCallSite *CS) const {
+std::string NVPTXTargetLowering::getPrototype(
+ const DataLayout &DL, Type *retTy, const ArgListTy &Args,
+ const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment,
+ const ImmutableCallSite *CS) const {
+ auto PtrVT = getPointerTy(DL);
bool isABI = (STI.getSmVersion() >= 20);
assert(isABI && "Non-ABI compilation is not supported");
@@ -921,13 +922,12 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
O << ".param .b" << size << " _";
} else if (isa<PointerType>(retTy)) {
- O << ".param .b" << getPointerTy().getSizeInBits() << " _";
+ O << ".param .b" << PtrVT.getSizeInBits() << " _";
} else if ((retTy->getTypeID() == Type::StructTyID) ||
isa<VectorType>(retTy)) {
- O << ".param .align "
- << retAlignment
- << " .b8 _["
- << getDataLayout()->getTypeAllocSize(retTy) << "]";
+ auto &DL = CS->getCalledFunction()->getParent()->getDataLayout();
+ O << ".param .align " << retAlignment << " .b8 _["
+ << DL.getTypeAllocSize(retTy) << "]";
} else {
llvm_unreachable("Unknown return type");
}
@@ -936,7 +936,6 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
O << "_ (";
bool first = true;
- MVT thePointerTy = getPointerTy();
unsigned OIdx = 0;
for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) {
@@ -950,24 +949,23 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
if (Ty->isAggregateType() || Ty->isVectorTy()) {
unsigned align = 0;
const CallInst *CallI = cast<CallInst>(CS->getInstruction());
- const DataLayout *TD = getDataLayout();
// +1 because index 0 is reserved for return type alignment
if (!llvm::getAlign(*CallI, i + 1, align))
- align = TD->getABITypeAlignment(Ty);
- unsigned sz = TD->getTypeAllocSize(Ty);
+ align = DL.getABITypeAlignment(Ty);
+ unsigned sz = DL.getTypeAllocSize(Ty);
O << ".param .align " << align << " .b8 ";
O << "_";
O << "[" << sz << "]";
// update the index for Outs
SmallVector<EVT, 16> vtparts;
- ComputeValueVTs(*this, Ty, vtparts);
+ ComputeValueVTs(*this, DL, Ty, vtparts);
if (unsigned len = vtparts.size())
OIdx += len - 1;
continue;
}
// i8 types in IR will be i16 types in SDAG
- assert((getValueType(Ty) == Outs[OIdx].VT ||
- (getValueType(Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) &&
+ assert((getValueType(DL, Ty) == Outs[OIdx].VT ||
+ (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) &&
"type mismatch between callee prototype and arguments");
// scalar type
unsigned sz = 0;
@@ -976,7 +974,7 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
if (sz < 32)
sz = 32;
} else if (isa<PointerType>(Ty))
- sz = thePointerTy.getSizeInBits();
+ sz = PtrVT.getSizeInBits();
else
sz = Ty->getPrimitiveSizeInBits();
O << ".param .b" << sz << " ";
@@ -988,7 +986,7 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
Type *ETy = PTy->getElementType();
unsigned align = Outs[OIdx].Flags.getByValAlign();
- unsigned sz = getDataLayout()->getTypeAllocSize(ETy);
+ unsigned sz = DL.getTypeAllocSize(ETy);
O << ".param .align " << align << " .b8 ";
O << "_";
O << "[" << sz << "]";
@@ -1002,7 +1000,6 @@ NVPTXTargetLowering::getArgumentAlignment(SDValue Callee,
const ImmutableCallSite *CS,
Type *Ty,
unsigned Idx) const {
- const DataLayout *TD = getDataLayout();
unsigned Align = 0;
const Value *DirectCallee = CS->getCalledFunction();
@@ -1043,7 +1040,8 @@ NVPTXTargetLowering::getArgumentAlignment(SDValue Callee,
// Call is indirect or alignment information is not available, fall back to
// the ABI type alignment
- return TD->getABITypeAlignment(Ty);
+ auto &DL = CS->getCaller()->getParent()->getDataLayout();
+ return DL.getABITypeAlignment(Ty);
}
SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
@@ -1064,9 +1062,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
assert(isABI && "Non-ABI compilation is not supported");
if (!isABI)
return Chain;
- const DataLayout *TD = getDataLayout();
MachineFunction &MF = DAG.getMachineFunction();
const Function *F = MF.getFunction();
+ auto &DL = MF.getDataLayout();
SDValue tempChain = Chain;
Chain = DAG.getCALLSEQ_START(Chain,
@@ -1096,11 +1094,12 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// aggregate
SmallVector<EVT, 16> vtparts;
SmallVector<uint64_t, 16> Offsets;
- ComputePTXValueVTs(*this, Ty, vtparts, &Offsets, 0);
+ ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts, &Offsets,
+ 0);
unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1);
// declare .param .align <align> .b8 .param<n>[<size>];
- unsigned sz = TD->getTypeAllocSize(Ty);
+ unsigned sz = DL.getTypeAllocSize(Ty);
SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, dl,
MVT::i32),
@@ -1137,10 +1136,10 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
continue;
}
if (Ty->isVectorTy()) {
- EVT ObjectVT = getValueType(Ty);
+ EVT ObjectVT = getValueType(DL, Ty);
unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1);
// declare .param .align <align> .b8 .param<n>[<size>];
- unsigned sz = TD->getTypeAllocSize(Ty);
+ unsigned sz = DL.getTypeAllocSize(Ty);
SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue DeclareParamOps[] = { Chain,
DAG.getConstant(align, dl, MVT::i32),
@@ -1321,7 +1320,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVector<uint64_t, 16> Offsets;
const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty);
assert(PTy && "Type of a byval parameter should be pointer");
- ComputePTXValueVTs(*this, PTy->getElementType(), vtparts, &Offsets, 0);
+ ComputePTXValueVTs(*this, DAG.getDataLayout(), PTy->getElementType(),
+ vtparts, &Offsets, 0);
// declare .param .align <align> .b8 .param<n>[<size>];
unsigned sz = Outs[OIdx].Flags.getByValSize();
@@ -1342,9 +1342,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
EVT elemtype = vtparts[j];
int curOffset = Offsets[j];
unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset);
- SDValue srcAddr =
- DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[OIdx],
- DAG.getConstant(curOffset, dl, getPointerTy()));
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+ SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, OutVals[OIdx],
+ DAG.getConstant(curOffset, dl, PtrVT));
SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
MachinePointerInfo(), false, false, false,
PartAlign);
@@ -1371,12 +1371,12 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Handle Result
if (Ins.size() > 0) {
SmallVector<EVT, 16> resvtparts;
- ComputeValueVTs(*this, retTy, resvtparts);
+ ComputeValueVTs(*this, DL, retTy, resvtparts);
// Declare
// .param .align 16 .b8 retval0[<size-in-bytes>], or
// .param .b<size-in-bits> retval0
- unsigned resultsz = TD->getTypeAllocSizeInBits(retTy);
+ unsigned resultsz = DL.getTypeAllocSizeInBits(retTy);
// Emit ".param .b<size-in-bits> retval0" instead of byte arrays only for
// these three types to match the logic in
// NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype.
@@ -1415,7 +1415,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// The prototype is embedded in a string and put as the operand for a
// CallPrototype SDNode which will print out to the value of the string.
SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- std::string Proto = getPrototype(retTy, Args, Outs, retAlignment, CS);
+ std::string Proto =
+ getPrototype(DAG.getDataLayout(), retTy, Args, Outs, retAlignment, CS);
const char *ProtoStr =
nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str();
SDValue ProtoOps[] = {
@@ -1477,7 +1478,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Generate loads from param memory/moves from registers for result
if (Ins.size() > 0) {
if (retTy && retTy->isVectorTy()) {
- EVT ObjectVT = getValueType(retTy);
+ EVT ObjectVT = getValueType(DL, retTy);
unsigned NumElts = ObjectVT.getVectorNumElements();
EVT EltVT = ObjectVT.getVectorElementType();
assert(STI.getTargetLowering()->getNumRegisters(F->getContext(),
@@ -1590,13 +1591,13 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
InVals.push_back(Elt);
}
- Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
+ Ofst += DL.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
}
}
} else {
SmallVector<EVT, 16> VTs;
SmallVector<uint64_t, 16> Offsets;
- ComputePTXValueVTs(*this, retTy, VTs, &Offsets, 0);
+ ComputePTXValueVTs(*this, DAG.getDataLayout(), retTy, VTs, &Offsets, 0);
assert(VTs.size() == Ins.size() && "Bad value decomposition");
unsigned RetAlign = getArgumentAlignment(Callee, CS, retTy, 0);
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
@@ -1608,8 +1609,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVector<EVT, 4> LoadRetVTs;
EVT TheLoadType = VTs[i];
- if (retTy->isIntegerTy() &&
- TD->getTypeAllocSizeInBits(retTy) < 32) {
+ if (retTy->isIntegerTy() && DL.getTypeAllocSizeInBits(retTy) < 32) {
// This is for integer types only, and specifically not for
// aggregates.
LoadRetVTs.push_back(MVT::i32);
@@ -1920,11 +1920,11 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
}
MemSDNode *MemSD = cast<MemSDNode>(N);
- const DataLayout *TD = getDataLayout();
+ const DataLayout &TD = DAG.getDataLayout();
unsigned Align = MemSD->getAlignment();
unsigned PrefAlign =
- TD->getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext()));
+ TD.getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext()));
if (Align < PrefAlign) {
// This store is not sufficiently aligned, so bail out and let this vector
// store be scalarized. Note that we may still be able to emit smaller
@@ -2064,7 +2064,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
- const DataLayout *TD = getDataLayout();
+ const DataLayout &DL = DAG.getDataLayout();
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
const Function *F = MF.getFunction();
const AttributeSet &PAL = F->getAttributes();
@@ -2118,7 +2119,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
if (Ty->isAggregateType()) {
SmallVector<EVT, 16> vtparts;
- ComputePTXValueVTs(*this, Ty, vtparts);
+ ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts);
assert(vtparts.size() > 0 && "empty aggregate type not expected");
for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
++parti) {
@@ -2130,7 +2131,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
continue;
}
if (Ty->isVectorTy()) {
- EVT ObjectVT = getValueType(Ty);
+ EVT ObjectVT = getValueType(DL, Ty);
unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT);
for (unsigned parti = 0; parti < NumRegs; ++parti) {
InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
@@ -2156,13 +2157,14 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
// NOTE: Here, we lose the ability to issue vector loads for vectors
// that are a part of a struct. This should be investigated in the
// future.
- ComputePTXValueVTs(*this, Ty, vtparts, &offsets, 0);
+ ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts, &offsets,
+ 0);
assert(vtparts.size() > 0 && "empty aggregate type not expected");
bool aggregateIsPacked = false;
if (StructType *STy = llvm::dyn_cast<StructType>(Ty))
aggregateIsPacked = STy->isPacked();
- SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
+ SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
++parti) {
EVT partVT = vtparts[parti];
@@ -2170,12 +2172,12 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
PointerType::get(partVT.getTypeForEVT(F->getContext()),
llvm::ADDRESS_SPACE_PARAM));
SDValue srcAddr =
- DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg,
- DAG.getConstant(offsets[parti], dl, getPointerTy()));
- unsigned partAlign =
- aggregateIsPacked ? 1
- : TD->getABITypeAlignment(
- partVT.getTypeForEVT(F->getContext()));
+ DAG.getNode(ISD::ADD, dl, PtrVT, Arg,
+ DAG.getConstant(offsets[parti], dl, PtrVT));
+ unsigned partAlign = aggregateIsPacked
+ ? 1
+ : DL.getABITypeAlignment(
+ partVT.getTypeForEVT(F->getContext()));
SDValue p;
if (Ins[InsIdx].VT.getSizeInBits() > partVT.getSizeInBits()) {
ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ?
@@ -2198,8 +2200,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
continue;
}
if (Ty->isVectorTy()) {
- EVT ObjectVT = getValueType(Ty);
- SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
+ EVT ObjectVT = getValueType(DL, Ty);
+ SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
unsigned NumElts = ObjectVT.getVectorNumElements();
assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts &&
"Vector was not scalarized");
@@ -2212,9 +2214,9 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
Value *SrcValue = Constant::getNullValue(PointerType::get(
EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
SDValue P = DAG.getLoad(
- EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false,
- false, true,
- TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext())));
+ EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, false,
+ true,
+ DL.getABITypeAlignment(EltVT.getTypeForEVT(F->getContext())));
if (P.getNode())
P.getNode()->setIROrder(idx + 1);
@@ -2229,9 +2231,9 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
Value *SrcValue = Constant::getNullValue(PointerType::get(
VecVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
SDValue P = DAG.getLoad(
- VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false,
- false, true,
- TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())));
+ VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, false,
+ true,
+ DL.getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())));
if (P.getNode())
P.getNode()->setIROrder(idx + 1);
@@ -2269,13 +2271,12 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
Value *SrcValue = Constant::getNullValue(
PointerType::get(VecVT.getTypeForEVT(F->getContext()),
llvm::ADDRESS_SPACE_PARAM));
- SDValue SrcAddr =
- DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg,
- DAG.getConstant(Ofst, dl, getPointerTy()));
+ SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Arg,
+ DAG.getConstant(Ofst, dl, PtrVT));
SDValue P = DAG.getLoad(
VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false,
false, true,
- TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())));
+ DL.getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())));
if (P.getNode())
P.getNode()->setIROrder(idx + 1);
@@ -2288,7 +2289,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
Elt = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt);
InVals.push_back(Elt);
}
- Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
+ Ofst += DL.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
}
InsIdx += NumElts;
}
@@ -2298,23 +2299,24 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
continue;
}
// A plain scalar.
- EVT ObjectVT = getValueType(Ty);
+ EVT ObjectVT = getValueType(DL, Ty);
// If ABI, load from the param symbol
- SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
+ SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
Value *srcValue = Constant::getNullValue(PointerType::get(
ObjectVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
SDValue p;
if (ObjectVT.getSizeInBits() < Ins[InsIdx].VT.getSizeInBits()) {
ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ?
ISD::SEXTLOAD : ISD::ZEXTLOAD;
- p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, Arg,
- MachinePointerInfo(srcValue), ObjectVT, false, false,
- false,
- TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
+ p = DAG.getExtLoad(
+ ExtOp, dl, Ins[InsIdx].VT, Root, Arg, MachinePointerInfo(srcValue),
+ ObjectVT, false, false, false,
+ DL.getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
} else {
- p = DAG.getLoad(Ins[InsIdx].VT, dl, Root, Arg,
- MachinePointerInfo(srcValue), false, false, false,
- TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
+ p = DAG.getLoad(
+ Ins[InsIdx].VT, dl, Root, Arg, MachinePointerInfo(srcValue), false,
+ false, false,
+ DL.getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
}
if (p.getNode())
p.getNode()->setIROrder(idx + 1);
@@ -2329,10 +2331,10 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
// machine instruction fails because TargetExternalSymbol
// (not lowered) is target dependent, and CopyToReg assumes
// the source is lowered.
- EVT ObjectVT = getValueType(Ty);
+ EVT ObjectVT = getValueType(DL, Ty);
assert(ObjectVT == Ins[InsIdx].VT &&
"Ins type did not match function type");
- SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
+ SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
if (p.getNode())
p.getNode()->setIROrder(idx + 1);
@@ -2370,7 +2372,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
MachineFunction &MF = DAG.getMachineFunction();
const Function *F = MF.getFunction();
Type *RetTy = F->getReturnType();
- const DataLayout *TD = getDataLayout();
+ const DataLayout &TD = DAG.getDataLayout();
bool isABI = (STI.getSmVersion() >= 20);
assert(isABI && "Non-ABI compilation is not supported");
@@ -2384,7 +2386,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
assert(NumElts == Outs.size() && "Bad scalarization of return value");
// const_cast can be removed in later LLVM versions
- EVT EltVT = getValueType(RetTy).getVectorElementType();
+ EVT EltVT = getValueType(TD, RetTy).getVectorElementType();
bool NeedExtend = false;
if (EltVT.getSizeInBits() < 16)
NeedExtend = true;
@@ -2435,7 +2437,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
EVT VecVT =
EVT::getVectorVT(F->getContext(), EltVT, VecSize);
unsigned PerStoreOffset =
- TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
+ TD.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
for (unsigned i = 0; i < NumElts; i += VecSize) {
// Get values
@@ -2493,7 +2495,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
} else {
SmallVector<EVT, 16> ValVTs;
SmallVector<uint64_t, 16> Offsets;
- ComputePTXValueVTs(*this, RetTy, ValVTs, &Offsets, 0);
+ ComputePTXValueVTs(*this, DAG.getDataLayout(), RetTy, ValVTs, &Offsets, 0);
assert(ValVTs.size() == OutVals.size() && "Bad return value decomposition");
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
@@ -2509,8 +2511,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
TheValType.getVectorElementType(), TmpVal,
DAG.getIntPtrConstant(j, dl));
EVT TheStoreType = ValVTs[i];
- if (RetTy->isIntegerTy() &&
- TD->getTypeAllocSizeInBits(RetTy) < 32) {
+ if (RetTy->isIntegerTy() && TD.getTypeAllocSizeInBits(RetTy) < 32) {
// The following zero-extension is for integer types only, and
// specifically not for aggregates.
TmpVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, TmpVal);
@@ -3291,14 +3292,14 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
case Intrinsic::nvvm_ldu_global_i:
case Intrinsic::nvvm_ldu_global_f:
case Intrinsic::nvvm_ldu_global_p: {
-
+ auto &DL = I.getModule()->getDataLayout();
Info.opc = ISD::INTRINSIC_W_CHAIN;
if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
- Info.memVT = getValueType(I.getType());
+ Info.memVT = getValueType(DL, I.getType());
else if(Intrinsic == Intrinsic::nvvm_ldu_global_p)
- Info.memVT = getPointerTy();
+ Info.memVT = getPointerTy(DL);
else
- Info.memVT = getValueType(I.getType());
+ Info.memVT = getValueType(DL, I.getType());
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.vol = 0;
@@ -3311,14 +3312,15 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
case Intrinsic::nvvm_ldg_global_i:
case Intrinsic::nvvm_ldg_global_f:
case Intrinsic::nvvm_ldg_global_p: {
+ auto &DL = I.getModule()->getDataLayout();
Info.opc = ISD::INTRINSIC_W_CHAIN;
if (Intrinsic == Intrinsic::nvvm_ldg_global_i)
- Info.memVT = getValueType(I.getType());
+ Info.memVT = getValueType(DL, I.getType());
else if(Intrinsic == Intrinsic::nvvm_ldg_global_p)
- Info.memVT = getPointerTy();
+ Info.memVT = getPointerTy(DL);
else
- Info.memVT = getValueType(I.getType());
+ Info.memVT = getValueType(DL, I.getType());
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.vol = 0;
@@ -3731,8 +3733,8 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
/// Used to guide target specific optimizations, like loop strength reduction
/// (LoopStrengthReduce.cpp) and memory optimization for address mode
/// (CodeGenPrepare.cpp)
-bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty,
+bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL,
+ const AddrMode &AM, Type *Ty,
unsigned AS) const {
// AddrMode - This represents an addressing mode of:
@@ -3772,7 +3774,7 @@ bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
/// getConstraintType - Given a constraint letter, return the type of
/// constraint it is for this target.
NVPTXTargetLowering::ConstraintType
-NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const {
+NVPTXTargetLowering::getConstraintType(StringRef Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
default:
@@ -3794,7 +3796,7 @@ NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const {
std::pair<unsigned, const TargetRegisterClass *>
NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
- const std::string &Constraint,
+ StringRef Constraint,
MVT VT) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
@@ -4251,7 +4253,6 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
/// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
- const DataLayout *TD,
SmallVectorImpl<SDValue> &Results) {
EVT ResVT = N->getValueType(0);
SDLoc DL(N);
@@ -4282,8 +4283,9 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
LoadSDNode *LD = cast<LoadSDNode>(N);
unsigned Align = LD->getAlignment();
+ auto &TD = DAG.getDataLayout();
unsigned PrefAlign =
- TD->getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext()));
+ TD.getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext()));
if (Align < PrefAlign) {
// This load is not sufficiently aligned, so bail out and let this vector
// load be scalarized. Note that we may still be able to emit smaller
@@ -4495,7 +4497,7 @@ void NVPTXTargetLowering::ReplaceNodeResults(
default:
report_fatal_error("Unhandled custom legalization");
case ISD::LOAD:
- ReplaceLoadVector(N, DAG, getDataLayout(), Results);
+ ReplaceLoadVector(N, DAG, Results);
return;
case ISD::INTRINSIC_W_CHAIN:
ReplaceINTRINSIC_W_CHAIN(N, DAG, Results);
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index ed94775b3002..e5c37321a33b 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -456,24 +456,23 @@ public:
/// Used to guide target specific optimizations, like loop strength
/// reduction (LoopStrengthReduce.cpp) and memory optimization for
/// address mode (CodeGenPrepare.cpp)
- bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,
+ bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS) const override;
/// getFunctionAlignment - Return the Log2 alignment of this function.
unsigned getFunctionAlignment(const Function *F) const;
- EVT getSetCCResultType(LLVMContext &Ctx, EVT VT) const override {
+ EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx,
+ EVT VT) const override {
if (VT.isVector())
return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements());
return MVT::i1;
}
- ConstraintType
- getConstraintType(const std::string &Constraint) const override;
+ ConstraintType getConstraintType(StringRef Constraint) const override;
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
- const std::string &Constraint,
- MVT VT) const override;
+ StringRef Constraint, MVT VT) const override;
SDValue LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
@@ -483,7 +482,7 @@ public:
SDValue LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
- std::string getPrototype(Type *, const ArgListTy &,
+ std::string getPrototype(const DataLayout &DL, Type *, const ArgListTy &,
const SmallVectorImpl<ISD::OutputArg> &,
unsigned retAlignment,
const ImmutableCallSite *CS) const;
@@ -501,7 +500,9 @@ public:
const NVPTXTargetMachine *nvTM;
// PTX always uses 32-bit shift amounts
- MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }
+ MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
+ return MVT::i32;
+ }
TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(EVT VT) const override;
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index 6ab0fadf9a35..0bf72febc4a0 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -57,7 +57,6 @@ char NVPTXLowerAggrCopies::ID = 0;
// Lower MemTransferInst or load-store pair to loop
static void convertTransferToLoop(
Instruction *splitAt, Value *srcAddr, Value *dstAddr, Value *len,
- //unsigned numLoads,
bool srcVolatile, bool dstVolatile, LLVMContext &Context, Function &F) {
Type *indType = len->getType();
@@ -84,6 +83,8 @@ static void convertTransferToLoop(
ind->addIncoming(ConstantInt::get(indType, 0), origBB);
// load from srcAddr+ind
+ // TODO: we can leverage the align parameter of llvm.memcpy for more efficient
+ // word-sized loads and stores.
Value *val = loop.CreateLoad(loop.CreateGEP(loop.getInt8Ty(), srcAddr, ind),
srcVolatile);
// store at dstAddr+ind
@@ -137,13 +138,10 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
//
// Collect all the aggrLoads, aggrMemcpys and addrMemsets.
//
- //const BasicBlock *firstBB = &F.front(); // first BB in F
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
- //BasicBlock *bb = BI;
for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
++II) {
if (LoadInst *load = dyn_cast<LoadInst>(II)) {
-
if (!load->hasOneUse())
continue;
@@ -152,7 +150,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
User *use = load->user_back();
if (StoreInst *store = dyn_cast<StoreInst>(use)) {
- if (store->getOperand(0) != load) //getValueOperand
+ if (store->getOperand(0) != load)
continue;
aggrLoads.push_back(load);
}
@@ -188,8 +186,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
//
// Do the transformation of an aggr load/copy/set to a loop
//
- for (unsigned i = 0, e = aggrLoads.size(); i != e; ++i) {
- LoadInst *load = aggrLoads[i];
+ for (LoadInst *load : aggrLoads) {
StoreInst *store = dyn_cast<StoreInst>(*load->user_begin());
Value *srcAddr = load->getOperand(0);
Value *dstAddr = store->getOperand(1);
@@ -203,20 +200,19 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
load->eraseFromParent();
}
- for (unsigned i = 0, e = aggrMemcpys.size(); i != e; ++i) {
- MemTransferInst *cpy = aggrMemcpys[i];
- Value *len = cpy->getLength();
- // llvm 2.7 version of memcpy does not have volatile
- // operand yet. So always making it non-volatile
- // optimistically, so that we don't see unnecessary
- // st.volatile in ptx
- convertTransferToLoop(cpy, cpy->getSource(), cpy->getDest(), len, false,
- false, Context, F);
+ for (MemTransferInst *cpy : aggrMemcpys) {
+ convertTransferToLoop(/* splitAt */ cpy,
+ /* srcAddr */ cpy->getSource(),
+ /* dstAddr */ cpy->getDest(),
+ /* len */ cpy->getLength(),
+ /* srcVolatile */ cpy->isVolatile(),
+ /* dstVolatile */ cpy->isVolatile(),
+ /* Context */ Context,
+ /* Function F */ F);
cpy->eraseFromParent();
}
- for (unsigned i = 0, e = aggrMemsets.size(); i != e; ++i) {
- MemSetInst *memsetinst = aggrMemsets[i];
+ for (MemSetInst *memsetinst : aggrMemsets) {
Value *len = memsetinst->getLength();
Value *val = memsetinst->getValue();
convertMemSetToLoop(memsetinst, memsetinst->getDest(), len, val, Context,
diff --git a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
index e83f735a551e..5a83371b07f1 100644
--- a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
+++ b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
@@ -2,7 +2,7 @@
//
// The LLVM Compiler Infrastructure
//
-// This file is distributed under the University of Illinois Open Source
+// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
@@ -115,7 +115,7 @@ bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) {
replaceImageHandle(Handle, MF);
- return true;
+ return true;
}
return false;
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp
index 71645dca69c5..bd2509a3c8c9 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -48,7 +48,7 @@ NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU,
const NVPTXTargetMachine &TM)
: NVPTXGenSubtargetInfo(TT, CPU, FS), PTXVersion(0), SmVersion(20), TM(TM),
InstrInfo(), TLInfo(TM, initializeSubtargetDependencies(CPU, FS)),
- TSInfo(TM.getDataLayout()), FrameLowering() {}
+ FrameLowering() {}
bool NVPTXSubtarget::hasImageHandles() const {
// Enable handles for Kepler+, where CUDA supports indirect surfaces and
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 9d9072efc382..248f9e117d83 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -148,8 +148,9 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
}
TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis(
- [this](Function &) { return TargetTransformInfo(NVPTXTTIImpl(this)); });
+ return TargetIRAnalysis([this](Function &F) {
+ return TargetTransformInfo(NVPTXTTIImpl(this, F));
+ });
}
void NVPTXPassConfig::addIRPasses() {
diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index dc81802f4b5a..e7250cdba5ac 100644
--- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -94,7 +94,7 @@ unsigned NVPTXTTIImpl::getArithmeticInstrCost(
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo) {
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
int ISD = TLI->InstructionOpcodeToISD(Opcode);
@@ -117,3 +117,15 @@ unsigned NVPTXTTIImpl::getArithmeticInstrCost(
Opd1PropInfo, Opd2PropInfo);
}
}
+
+void NVPTXTTIImpl::getUnrollingPreferences(Loop *L,
+ TTI::UnrollingPreferences &UP) {
+ BaseT::getUnrollingPreferences(L, UP);
+
+ // Enable partial unrolling and runtime unrolling, but reduce the
+ // threshold. This partially unrolls small loops which are often
+ // unrolled by the PTX to SASS compiler and unrolling earlier can be
+ // beneficial.
+ UP.Partial = UP.Runtime = true;
+ UP.PartialThreshold = UP.Threshold / 4;
+}
diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index 4280888988f9..5bcd1e27a558 100644
--- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -37,8 +37,9 @@ class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> {
const NVPTXTargetLowering *getTLI() const { return TLI; };
public:
- explicit NVPTXTTIImpl(const NVPTXTargetMachine *TM)
- : BaseT(TM), ST(TM->getSubtargetImpl()), TLI(ST->getTargetLowering()) {}
+ explicit NVPTXTTIImpl(const NVPTXTargetMachine *TM, const Function &F)
+ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl()),
+ TLI(ST->getTargetLowering()) {}
// Provide value semantics. MSVC requires that we spell all of these out.
NVPTXTTIImpl(const NVPTXTTIImpl &Arg)
@@ -46,18 +47,6 @@ public:
NVPTXTTIImpl(NVPTXTTIImpl &&Arg)
: BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
TLI(std::move(Arg.TLI)) {}
- NVPTXTTIImpl &operator=(const NVPTXTTIImpl &RHS) {
- BaseT::operator=(static_cast<const BaseT &>(RHS));
- ST = RHS.ST;
- TLI = RHS.TLI;
- return *this;
- }
- NVPTXTTIImpl &operator=(NVPTXTTIImpl &&RHS) {
- BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
- ST = std::move(RHS.ST);
- TLI = std::move(RHS.TLI);
- return *this;
- }
bool hasBranchDivergence() { return true; }
@@ -69,6 +58,8 @@ public:
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
+
+ void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
};
} // end namespace llvm