diff options
Diffstat (limited to 'lib/Target/NVPTX')
-rw-r--r-- | lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp | 12 | ||||
-rw-r--r-- | lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 14 | ||||
-rw-r--r-- | lib/Target/NVPTX/NVPTXISelLowering.cpp | 202 | ||||
-rw-r--r-- | lib/Target/NVPTX/NVPTXISelLowering.h | 17 | ||||
-rw-r--r-- | lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp | 32 | ||||
-rw-r--r-- | lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp | 4 | ||||
-rw-r--r-- | lib/Target/NVPTX/NVPTXSubtarget.cpp | 2 | ||||
-rw-r--r-- | lib/Target/NVPTX/NVPTXTargetMachine.cpp | 5 | ||||
-rw-r--r-- | lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp | 14 | ||||
-rw-r--r-- | lib/Target/NVPTX/NVPTXTargetTransformInfo.h | 19 |
10 files changed, 162 insertions, 159 deletions
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp index 221d2f093aeb..ad7302037cad 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp @@ -37,7 +37,7 @@ static MCInstrInfo *createNVPTXMCInstrInfo() { return X; } -static MCRegisterInfo *createNVPTXMCRegisterInfo(StringRef TT) { +static MCRegisterInfo *createNVPTXMCRegisterInfo(const Triple &TT) { MCRegisterInfo *X = new MCRegisterInfo(); // PTX does not have a return address register. InitNVPTXMCRegisterInfo(X, 0); @@ -46,13 +46,13 @@ static MCRegisterInfo *createNVPTXMCRegisterInfo(StringRef TT) { static MCSubtargetInfo * createNVPTXMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { - MCSubtargetInfo *X = new MCSubtargetInfo(); - InitNVPTXMCSubtargetInfo(X, TT, CPU, FS); - return X; + return createNVPTXMCSubtargetInfoImpl(TT, CPU, FS); } -static MCCodeGenInfo *createNVPTXMCCodeGenInfo( - StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) { +static MCCodeGenInfo *createNVPTXMCCodeGenInfo(const Triple &TT, + Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); // The default relocation model is used regardless of what the client has diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index cadd7a46cd9d..ecb0f0a1d0a1 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -340,7 +340,7 @@ MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) { } void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { - const DataLayout *TD = TM.getDataLayout(); + const DataLayout &DL = getDataLayout(); const TargetLowering *TLI = nvptxSubtarget->getTargetLowering(); Type *Ty = F->getReturnType(); @@ -366,20 +366,20 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { O << ".param .b" << size << " func_retval0"; } else if (isa<PointerType>(Ty)) { - O << ".param .b" << TLI->getPointerTy().getSizeInBits() + O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits() << " func_retval0"; } else if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) { - unsigned totalsz = TD->getTypeAllocSize(Ty); + unsigned totalsz = DL.getTypeAllocSize(Ty); unsigned retAlignment = 0; if (!llvm::getAlign(*F, 0, retAlignment)) - retAlignment = TD->getABITypeAlignment(Ty); + retAlignment = DL.getABITypeAlignment(Ty); O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz << "]"; } else llvm_unreachable("Unknown return type"); } else { SmallVector<EVT, 16> vtparts; - ComputeValueVTs(*TLI, Ty, vtparts); + ComputeValueVTs(*TLI, DL, Ty, vtparts); unsigned idx = 0; for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { unsigned elems = 1; @@ -1433,7 +1433,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { bool first = true; bool isKernelFunc = llvm::isKernelFunction(*F); bool isABI = (nvptxSubtarget->getSmVersion() >= 20); - MVT thePointerTy = TLI->getPointerTy(); + MVT thePointerTy = TLI->getPointerTy(*TD); O << "(\n"; @@ -1579,7 +1579,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { // Further, if a part is vector, print the above for // each vector element. SmallVector<EVT, 16> vtparts; - ComputeValueVTs(*TLI, ETy, vtparts); + ComputeValueVTs(*TLI, getDataLayout(), ETy, vtparts); for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { unsigned elems = 1; EVT elemtype = vtparts[i]; diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 09e0bd5d3d88..b75cf4040312 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -80,14 +80,14 @@ static bool IsPTXVectorType(MVT VT) { /// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the /// same number of types as the Ins/Outs arrays in LowerFormalArguments, /// LowerCall, and LowerReturn. -static void ComputePTXValueVTs(const TargetLowering &TLI, Type *Ty, - SmallVectorImpl<EVT> &ValueVTs, +static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL, + Type *Ty, SmallVectorImpl<EVT> &ValueVTs, SmallVectorImpl<uint64_t> *Offsets = nullptr, uint64_t StartingOffset = 0) { SmallVector<EVT, 16> TempVTs; SmallVector<uint64_t, 16> TempOffsets; - ComputeValueVTs(TLI, Ty, TempVTs, &TempOffsets, StartingOffset); + ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset); for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) { EVT VT = TempVTs[i]; uint64_t Off = TempOffsets[i]; @@ -885,15 +885,16 @@ SDValue NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); - Op = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); - return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op); + auto PtrVT = getPointerTy(DAG.getDataLayout()); + Op = DAG.getTargetGlobalAddress(GV, dl, PtrVT); + return DAG.getNode(NVPTXISD::Wrapper, dl, PtrVT, Op); } -std::string -NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, - const SmallVectorImpl<ISD::OutputArg> &Outs, - unsigned retAlignment, - const ImmutableCallSite *CS) const { +std::string NVPTXTargetLowering::getPrototype( + const DataLayout &DL, Type *retTy, const ArgListTy &Args, + const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment, + const ImmutableCallSite *CS) const { + auto PtrVT = getPointerTy(DL); bool isABI = (STI.getSmVersion() >= 20); assert(isABI && "Non-ABI compilation is not supported"); @@ -921,13 +922,12 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, O << ".param .b" << size << " _"; } else if (isa<PointerType>(retTy)) { - O << ".param .b" << getPointerTy().getSizeInBits() << " _"; + O << ".param .b" << PtrVT.getSizeInBits() << " _"; } else if ((retTy->getTypeID() == Type::StructTyID) || isa<VectorType>(retTy)) { - O << ".param .align " - << retAlignment - << " .b8 _[" - << getDataLayout()->getTypeAllocSize(retTy) << "]"; + auto &DL = CS->getCalledFunction()->getParent()->getDataLayout(); + O << ".param .align " << retAlignment << " .b8 _[" + << DL.getTypeAllocSize(retTy) << "]"; } else { llvm_unreachable("Unknown return type"); } @@ -936,7 +936,6 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, O << "_ ("; bool first = true; - MVT thePointerTy = getPointerTy(); unsigned OIdx = 0; for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { @@ -950,24 +949,23 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, if (Ty->isAggregateType() || Ty->isVectorTy()) { unsigned align = 0; const CallInst *CallI = cast<CallInst>(CS->getInstruction()); - const DataLayout *TD = getDataLayout(); // +1 because index 0 is reserved for return type alignment if (!llvm::getAlign(*CallI, i + 1, align)) - align = TD->getABITypeAlignment(Ty); - unsigned sz = TD->getTypeAllocSize(Ty); + align = DL.getABITypeAlignment(Ty); + unsigned sz = DL.getTypeAllocSize(Ty); O << ".param .align " << align << " .b8 "; O << "_"; O << "[" << sz << "]"; // update the index for Outs SmallVector<EVT, 16> vtparts; - ComputeValueVTs(*this, Ty, vtparts); + ComputeValueVTs(*this, DL, Ty, vtparts); if (unsigned len = vtparts.size()) OIdx += len - 1; continue; } // i8 types in IR will be i16 types in SDAG - assert((getValueType(Ty) == Outs[OIdx].VT || - (getValueType(Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && + assert((getValueType(DL, Ty) == Outs[OIdx].VT || + (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && "type mismatch between callee prototype and arguments"); // scalar type unsigned sz = 0; @@ -976,7 +974,7 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, if (sz < 32) sz = 32; } else if (isa<PointerType>(Ty)) - sz = thePointerTy.getSizeInBits(); + sz = PtrVT.getSizeInBits(); else sz = Ty->getPrimitiveSizeInBits(); O << ".param .b" << sz << " "; @@ -988,7 +986,7 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, Type *ETy = PTy->getElementType(); unsigned align = Outs[OIdx].Flags.getByValAlign(); - unsigned sz = getDataLayout()->getTypeAllocSize(ETy); + unsigned sz = DL.getTypeAllocSize(ETy); O << ".param .align " << align << " .b8 "; O << "_"; O << "[" << sz << "]"; @@ -1002,7 +1000,6 @@ NVPTXTargetLowering::getArgumentAlignment(SDValue Callee, const ImmutableCallSite *CS, Type *Ty, unsigned Idx) const { - const DataLayout *TD = getDataLayout(); unsigned Align = 0; const Value *DirectCallee = CS->getCalledFunction(); @@ -1043,7 +1040,8 @@ NVPTXTargetLowering::getArgumentAlignment(SDValue Callee, // Call is indirect or alignment information is not available, fall back to // the ABI type alignment - return TD->getABITypeAlignment(Ty); + auto &DL = CS->getCaller()->getParent()->getDataLayout(); + return DL.getABITypeAlignment(Ty); } SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, @@ -1064,9 +1062,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, assert(isABI && "Non-ABI compilation is not supported"); if (!isABI) return Chain; - const DataLayout *TD = getDataLayout(); MachineFunction &MF = DAG.getMachineFunction(); const Function *F = MF.getFunction(); + auto &DL = MF.getDataLayout(); SDValue tempChain = Chain; Chain = DAG.getCALLSEQ_START(Chain, @@ -1096,11 +1094,12 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // aggregate SmallVector<EVT, 16> vtparts; SmallVector<uint64_t, 16> Offsets; - ComputePTXValueVTs(*this, Ty, vtparts, &Offsets, 0); + ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts, &Offsets, + 0); unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1); // declare .param .align <align> .b8 .param<n>[<size>]; - unsigned sz = TD->getTypeAllocSize(Ty); + unsigned sz = DL.getTypeAllocSize(Ty); SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, dl, MVT::i32), @@ -1137,10 +1136,10 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, continue; } if (Ty->isVectorTy()) { - EVT ObjectVT = getValueType(Ty); + EVT ObjectVT = getValueType(DL, Ty); unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1); // declare .param .align <align> .b8 .param<n>[<size>]; - unsigned sz = TD->getTypeAllocSize(Ty); + unsigned sz = DL.getTypeAllocSize(Ty); SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, dl, MVT::i32), @@ -1321,7 +1320,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVector<uint64_t, 16> Offsets; const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty); assert(PTy && "Type of a byval parameter should be pointer"); - ComputePTXValueVTs(*this, PTy->getElementType(), vtparts, &Offsets, 0); + ComputePTXValueVTs(*this, DAG.getDataLayout(), PTy->getElementType(), + vtparts, &Offsets, 0); // declare .param .align <align> .b8 .param<n>[<size>]; unsigned sz = Outs[OIdx].Flags.getByValSize(); @@ -1342,9 +1342,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, EVT elemtype = vtparts[j]; int curOffset = Offsets[j]; unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset); - SDValue srcAddr = - DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[OIdx], - DAG.getConstant(curOffset, dl, getPointerTy())); + auto PtrVT = getPointerTy(DAG.getDataLayout()); + SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, OutVals[OIdx], + DAG.getConstant(curOffset, dl, PtrVT)); SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, MachinePointerInfo(), false, false, false, PartAlign); @@ -1371,12 +1371,12 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Handle Result if (Ins.size() > 0) { SmallVector<EVT, 16> resvtparts; - ComputeValueVTs(*this, retTy, resvtparts); + ComputeValueVTs(*this, DL, retTy, resvtparts); // Declare // .param .align 16 .b8 retval0[<size-in-bytes>], or // .param .b<size-in-bits> retval0 - unsigned resultsz = TD->getTypeAllocSizeInBits(retTy); + unsigned resultsz = DL.getTypeAllocSizeInBits(retTy); // Emit ".param .b<size-in-bits> retval0" instead of byte arrays only for // these three types to match the logic in // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype. @@ -1415,7 +1415,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // The prototype is embedded in a string and put as the operand for a // CallPrototype SDNode which will print out to the value of the string. SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue); - std::string Proto = getPrototype(retTy, Args, Outs, retAlignment, CS); + std::string Proto = + getPrototype(DAG.getDataLayout(), retTy, Args, Outs, retAlignment, CS); const char *ProtoStr = nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str(); SDValue ProtoOps[] = { @@ -1477,7 +1478,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Generate loads from param memory/moves from registers for result if (Ins.size() > 0) { if (retTy && retTy->isVectorTy()) { - EVT ObjectVT = getValueType(retTy); + EVT ObjectVT = getValueType(DL, retTy); unsigned NumElts = ObjectVT.getVectorNumElements(); EVT EltVT = ObjectVT.getVectorElementType(); assert(STI.getTargetLowering()->getNumRegisters(F->getContext(), @@ -1590,13 +1591,13 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt); InVals.push_back(Elt); } - Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); + Ofst += DL.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); } } } else { SmallVector<EVT, 16> VTs; SmallVector<uint64_t, 16> Offsets; - ComputePTXValueVTs(*this, retTy, VTs, &Offsets, 0); + ComputePTXValueVTs(*this, DAG.getDataLayout(), retTy, VTs, &Offsets, 0); assert(VTs.size() == Ins.size() && "Bad value decomposition"); unsigned RetAlign = getArgumentAlignment(Callee, CS, retTy, 0); for (unsigned i = 0, e = Ins.size(); i != e; ++i) { @@ -1608,8 +1609,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVector<EVT, 4> LoadRetVTs; EVT TheLoadType = VTs[i]; - if (retTy->isIntegerTy() && - TD->getTypeAllocSizeInBits(retTy) < 32) { + if (retTy->isIntegerTy() && DL.getTypeAllocSizeInBits(retTy) < 32) { // This is for integer types only, and specifically not for // aggregates. LoadRetVTs.push_back(MVT::i32); @@ -1920,11 +1920,11 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { } MemSDNode *MemSD = cast<MemSDNode>(N); - const DataLayout *TD = getDataLayout(); + const DataLayout &TD = DAG.getDataLayout(); unsigned Align = MemSD->getAlignment(); unsigned PrefAlign = - TD->getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext())); + TD.getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext())); if (Align < PrefAlign) { // This store is not sufficiently aligned, so bail out and let this vector // store be scalarized. Note that we may still be able to emit smaller @@ -2064,7 +2064,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); - const DataLayout *TD = getDataLayout(); + const DataLayout &DL = DAG.getDataLayout(); + auto PtrVT = getPointerTy(DAG.getDataLayout()); const Function *F = MF.getFunction(); const AttributeSet &PAL = F->getAttributes(); @@ -2118,7 +2119,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( if (Ty->isAggregateType()) { SmallVector<EVT, 16> vtparts; - ComputePTXValueVTs(*this, Ty, vtparts); + ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts); assert(vtparts.size() > 0 && "empty aggregate type not expected"); for (unsigned parti = 0, parte = vtparts.size(); parti != parte; ++parti) { @@ -2130,7 +2131,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( continue; } if (Ty->isVectorTy()) { - EVT ObjectVT = getValueType(Ty); + EVT ObjectVT = getValueType(DL, Ty); unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT); for (unsigned parti = 0; parti < NumRegs; ++parti) { InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); @@ -2156,13 +2157,14 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( // NOTE: Here, we lose the ability to issue vector loads for vectors // that are a part of a struct. This should be investigated in the // future. - ComputePTXValueVTs(*this, Ty, vtparts, &offsets, 0); + ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts, &offsets, + 0); assert(vtparts.size() > 0 && "empty aggregate type not expected"); bool aggregateIsPacked = false; if (StructType *STy = llvm::dyn_cast<StructType>(Ty)) aggregateIsPacked = STy->isPacked(); - SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); + SDValue Arg = getParamSymbol(DAG, idx, PtrVT); for (unsigned parti = 0, parte = vtparts.size(); parti != parte; ++parti) { EVT partVT = vtparts[parti]; @@ -2170,12 +2172,12 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( PointerType::get(partVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); SDValue srcAddr = - DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, - DAG.getConstant(offsets[parti], dl, getPointerTy())); - unsigned partAlign = - aggregateIsPacked ? 1 - : TD->getABITypeAlignment( - partVT.getTypeForEVT(F->getContext())); + DAG.getNode(ISD::ADD, dl, PtrVT, Arg, + DAG.getConstant(offsets[parti], dl, PtrVT)); + unsigned partAlign = aggregateIsPacked + ? 1 + : DL.getABITypeAlignment( + partVT.getTypeForEVT(F->getContext())); SDValue p; if (Ins[InsIdx].VT.getSizeInBits() > partVT.getSizeInBits()) { ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ? @@ -2198,8 +2200,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( continue; } if (Ty->isVectorTy()) { - EVT ObjectVT = getValueType(Ty); - SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); + EVT ObjectVT = getValueType(DL, Ty); + SDValue Arg = getParamSymbol(DAG, idx, PtrVT); unsigned NumElts = ObjectVT.getVectorNumElements(); assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts && "Vector was not scalarized"); @@ -2212,9 +2214,9 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( Value *SrcValue = Constant::getNullValue(PointerType::get( EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); SDValue P = DAG.getLoad( - EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, - false, true, - TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext()))); + EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, false, + true, + DL.getABITypeAlignment(EltVT.getTypeForEVT(F->getContext()))); if (P.getNode()) P.getNode()->setIROrder(idx + 1); @@ -2229,9 +2231,9 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( Value *SrcValue = Constant::getNullValue(PointerType::get( VecVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); SDValue P = DAG.getLoad( - VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, - false, true, - TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext()))); + VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, false, + true, + DL.getABITypeAlignment(VecVT.getTypeForEVT(F->getContext()))); if (P.getNode()) P.getNode()->setIROrder(idx + 1); @@ -2269,13 +2271,12 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( Value *SrcValue = Constant::getNullValue( PointerType::get(VecVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); - SDValue SrcAddr = - DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, - DAG.getConstant(Ofst, dl, getPointerTy())); + SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, + DAG.getConstant(Ofst, dl, PtrVT)); SDValue P = DAG.getLoad( VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false, false, true, - TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext()))); + DL.getABITypeAlignment(VecVT.getTypeForEVT(F->getContext()))); if (P.getNode()) P.getNode()->setIROrder(idx + 1); @@ -2288,7 +2289,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( Elt = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt); InVals.push_back(Elt); } - Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); + Ofst += DL.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); } InsIdx += NumElts; } @@ -2298,23 +2299,24 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( continue; } // A plain scalar. - EVT ObjectVT = getValueType(Ty); + EVT ObjectVT = getValueType(DL, Ty); // If ABI, load from the param symbol - SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); + SDValue Arg = getParamSymbol(DAG, idx, PtrVT); Value *srcValue = Constant::getNullValue(PointerType::get( ObjectVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); SDValue p; if (ObjectVT.getSizeInBits() < Ins[InsIdx].VT.getSizeInBits()) { ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD; - p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, Arg, - MachinePointerInfo(srcValue), ObjectVT, false, false, - false, - TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); + p = DAG.getExtLoad( + ExtOp, dl, Ins[InsIdx].VT, Root, Arg, MachinePointerInfo(srcValue), + ObjectVT, false, false, false, + DL.getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); } else { - p = DAG.getLoad(Ins[InsIdx].VT, dl, Root, Arg, - MachinePointerInfo(srcValue), false, false, false, - TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); + p = DAG.getLoad( + Ins[InsIdx].VT, dl, Root, Arg, MachinePointerInfo(srcValue), false, + false, false, + DL.getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); } if (p.getNode()) p.getNode()->setIROrder(idx + 1); @@ -2329,10 +2331,10 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( // machine instruction fails because TargetExternalSymbol // (not lowered) is target dependent, and CopyToReg assumes // the source is lowered. - EVT ObjectVT = getValueType(Ty); + EVT ObjectVT = getValueType(DL, Ty); assert(ObjectVT == Ins[InsIdx].VT && "Ins type did not match function type"); - SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); + SDValue Arg = getParamSymbol(DAG, idx, PtrVT); SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); if (p.getNode()) p.getNode()->setIROrder(idx + 1); @@ -2370,7 +2372,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, MachineFunction &MF = DAG.getMachineFunction(); const Function *F = MF.getFunction(); Type *RetTy = F->getReturnType(); - const DataLayout *TD = getDataLayout(); + const DataLayout &TD = DAG.getDataLayout(); bool isABI = (STI.getSmVersion() >= 20); assert(isABI && "Non-ABI compilation is not supported"); @@ -2384,7 +2386,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, assert(NumElts == Outs.size() && "Bad scalarization of return value"); // const_cast can be removed in later LLVM versions - EVT EltVT = getValueType(RetTy).getVectorElementType(); + EVT EltVT = getValueType(TD, RetTy).getVectorElementType(); bool NeedExtend = false; if (EltVT.getSizeInBits() < 16) NeedExtend = true; @@ -2435,7 +2437,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize); unsigned PerStoreOffset = - TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); + TD.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); for (unsigned i = 0; i < NumElts; i += VecSize) { // Get values @@ -2493,7 +2495,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, } else { SmallVector<EVT, 16> ValVTs; SmallVector<uint64_t, 16> Offsets; - ComputePTXValueVTs(*this, RetTy, ValVTs, &Offsets, 0); + ComputePTXValueVTs(*this, DAG.getDataLayout(), RetTy, ValVTs, &Offsets, 0); assert(ValVTs.size() == OutVals.size() && "Bad return value decomposition"); for (unsigned i = 0, e = Outs.size(); i != e; ++i) { @@ -2509,8 +2511,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, TheValType.getVectorElementType(), TmpVal, DAG.getIntPtrConstant(j, dl)); EVT TheStoreType = ValVTs[i]; - if (RetTy->isIntegerTy() && - TD->getTypeAllocSizeInBits(RetTy) < 32) { + if (RetTy->isIntegerTy() && TD.getTypeAllocSizeInBits(RetTy) < 32) { // The following zero-extension is for integer types only, and // specifically not for aggregates. TmpVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, TmpVal); @@ -3291,14 +3292,14 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( case Intrinsic::nvvm_ldu_global_i: case Intrinsic::nvvm_ldu_global_f: case Intrinsic::nvvm_ldu_global_p: { - + auto &DL = I.getModule()->getDataLayout(); Info.opc = ISD::INTRINSIC_W_CHAIN; if (Intrinsic == Intrinsic::nvvm_ldu_global_i) - Info.memVT = getValueType(I.getType()); + Info.memVT = getValueType(DL, I.getType()); else if(Intrinsic == Intrinsic::nvvm_ldu_global_p) - Info.memVT = getPointerTy(); + Info.memVT = getPointerTy(DL); else - Info.memVT = getValueType(I.getType()); + Info.memVT = getValueType(DL, I.getType()); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.vol = 0; @@ -3311,14 +3312,15 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( case Intrinsic::nvvm_ldg_global_i: case Intrinsic::nvvm_ldg_global_f: case Intrinsic::nvvm_ldg_global_p: { + auto &DL = I.getModule()->getDataLayout(); Info.opc = ISD::INTRINSIC_W_CHAIN; if (Intrinsic == Intrinsic::nvvm_ldg_global_i) - Info.memVT = getValueType(I.getType()); + Info.memVT = getValueType(DL, I.getType()); else if(Intrinsic == Intrinsic::nvvm_ldg_global_p) - Info.memVT = getPointerTy(); + Info.memVT = getPointerTy(DL); else - Info.memVT = getValueType(I.getType()); + Info.memVT = getValueType(DL, I.getType()); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.vol = 0; @@ -3731,8 +3733,8 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( /// Used to guide target specific optimizations, like loop strength reduction /// (LoopStrengthReduce.cpp) and memory optimization for address mode /// (CodeGenPrepare.cpp) -bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty, +bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL, + const AddrMode &AM, Type *Ty, unsigned AS) const { // AddrMode - This represents an addressing mode of: @@ -3772,7 +3774,7 @@ bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM, /// getConstraintType - Given a constraint letter, return the type of /// constraint it is for this target. NVPTXTargetLowering::ConstraintType -NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const { +NVPTXTargetLowering::getConstraintType(StringRef Constraint) const { if (Constraint.size() == 1) { switch (Constraint[0]) { default: @@ -3794,7 +3796,7 @@ NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const { std::pair<unsigned, const TargetRegisterClass *> NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, - const std::string &Constraint, + StringRef Constraint, MVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { @@ -4251,7 +4253,6 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N, /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads. static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, - const DataLayout *TD, SmallVectorImpl<SDValue> &Results) { EVT ResVT = N->getValueType(0); SDLoc DL(N); @@ -4282,8 +4283,9 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, LoadSDNode *LD = cast<LoadSDNode>(N); unsigned Align = LD->getAlignment(); + auto &TD = DAG.getDataLayout(); unsigned PrefAlign = - TD->getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext())); + TD.getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext())); if (Align < PrefAlign) { // This load is not sufficiently aligned, so bail out and let this vector // load be scalarized. Note that we may still be able to emit smaller @@ -4495,7 +4497,7 @@ void NVPTXTargetLowering::ReplaceNodeResults( default: report_fatal_error("Unhandled custom legalization"); case ISD::LOAD: - ReplaceLoadVector(N, DAG, getDataLayout(), Results); + ReplaceLoadVector(N, DAG, Results); return; case ISD::INTRINSIC_W_CHAIN: ReplaceINTRINSIC_W_CHAIN(N, DAG, Results); diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index ed94775b3002..e5c37321a33b 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -456,24 +456,23 @@ public: /// Used to guide target specific optimizations, like loop strength /// reduction (LoopStrengthReduce.cpp) and memory optimization for /// address mode (CodeGenPrepare.cpp) - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty, + bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override; /// getFunctionAlignment - Return the Log2 alignment of this function. unsigned getFunctionAlignment(const Function *F) const; - EVT getSetCCResultType(LLVMContext &Ctx, EVT VT) const override { + EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx, + EVT VT) const override { if (VT.isVector()) return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements()); return MVT::i1; } - ConstraintType - getConstraintType(const std::string &Constraint) const override; + ConstraintType getConstraintType(StringRef Constraint) const override; std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, - const std::string &Constraint, - MVT VT) const override; + StringRef Constraint, MVT VT) const override; SDValue LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -483,7 +482,7 @@ public: SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const override; - std::string getPrototype(Type *, const ArgListTy &, + std::string getPrototype(const DataLayout &DL, Type *, const ArgListTy &, const SmallVectorImpl<ISD::OutputArg> &, unsigned retAlignment, const ImmutableCallSite *CS) const; @@ -501,7 +500,9 @@ public: const NVPTXTargetMachine *nvTM; // PTX always uses 32-bit shift amounts - MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; } + MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { + return MVT::i32; + } TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT) const override; diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp index 6ab0fadf9a35..0bf72febc4a0 100644 --- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp +++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp @@ -57,7 +57,6 @@ char NVPTXLowerAggrCopies::ID = 0; // Lower MemTransferInst or load-store pair to loop static void convertTransferToLoop( Instruction *splitAt, Value *srcAddr, Value *dstAddr, Value *len, - //unsigned numLoads, bool srcVolatile, bool dstVolatile, LLVMContext &Context, Function &F) { Type *indType = len->getType(); @@ -84,6 +83,8 @@ static void convertTransferToLoop( ind->addIncoming(ConstantInt::get(indType, 0), origBB); // load from srcAddr+ind + // TODO: we can leverage the align parameter of llvm.memcpy for more efficient + // word-sized loads and stores. Value *val = loop.CreateLoad(loop.CreateGEP(loop.getInt8Ty(), srcAddr, ind), srcVolatile); // store at dstAddr+ind @@ -137,13 +138,10 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { // // Collect all the aggrLoads, aggrMemcpys and addrMemsets. // - //const BasicBlock *firstBB = &F.front(); // first BB in F for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { - //BasicBlock *bb = BI; for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; ++II) { if (LoadInst *load = dyn_cast<LoadInst>(II)) { - if (!load->hasOneUse()) continue; @@ -152,7 +150,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { User *use = load->user_back(); if (StoreInst *store = dyn_cast<StoreInst>(use)) { - if (store->getOperand(0) != load) //getValueOperand + if (store->getOperand(0) != load) continue; aggrLoads.push_back(load); } @@ -188,8 +186,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { // // Do the transformation of an aggr load/copy/set to a loop // - for (unsigned i = 0, e = aggrLoads.size(); i != e; ++i) { - LoadInst *load = aggrLoads[i]; + for (LoadInst *load : aggrLoads) { StoreInst *store = dyn_cast<StoreInst>(*load->user_begin()); Value *srcAddr = load->getOperand(0); Value *dstAddr = store->getOperand(1); @@ -203,20 +200,19 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { load->eraseFromParent(); } - for (unsigned i = 0, e = aggrMemcpys.size(); i != e; ++i) { - MemTransferInst *cpy = aggrMemcpys[i]; - Value *len = cpy->getLength(); - // llvm 2.7 version of memcpy does not have volatile - // operand yet. So always making it non-volatile - // optimistically, so that we don't see unnecessary - // st.volatile in ptx - convertTransferToLoop(cpy, cpy->getSource(), cpy->getDest(), len, false, - false, Context, F); + for (MemTransferInst *cpy : aggrMemcpys) { + convertTransferToLoop(/* splitAt */ cpy, + /* srcAddr */ cpy->getSource(), + /* dstAddr */ cpy->getDest(), + /* len */ cpy->getLength(), + /* srcVolatile */ cpy->isVolatile(), + /* dstVolatile */ cpy->isVolatile(), + /* Context */ Context, + /* Function F */ F); cpy->eraseFromParent(); } - for (unsigned i = 0, e = aggrMemsets.size(); i != e; ++i) { - MemSetInst *memsetinst = aggrMemsets[i]; + for (MemSetInst *memsetinst : aggrMemsets) { Value *len = memsetinst->getLength(); Value *val = memsetinst->getValue(); convertMemSetToLoop(memsetinst, memsetinst->getDest(), len, val, Context, diff --git a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp index e83f735a551e..5a83371b07f1 100644 --- a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp +++ b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp @@ -2,7 +2,7 @@ // // The LLVM Compiler Infrastructure // -// This file is distributed under the University of Illinois Open Source +// This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// @@ -115,7 +115,7 @@ bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) { replaceImageHandle(Handle, MF); - return true; + return true; } return false; diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp index 71645dca69c5..bd2509a3c8c9 100644 --- a/lib/Target/NVPTX/NVPTXSubtarget.cpp +++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp @@ -48,7 +48,7 @@ NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU, const NVPTXTargetMachine &TM) : NVPTXGenSubtargetInfo(TT, CPU, FS), PTXVersion(0), SmVersion(20), TM(TM), InstrInfo(), TLInfo(TM, initializeSubtargetDependencies(CPU, FS)), - TSInfo(TM.getDataLayout()), FrameLowering() {} + FrameLowering() {} bool NVPTXSubtarget::hasImageHandles() const { // Enable handles for Kepler+, where CUDA supports indirect surfaces and diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 9d9072efc382..248f9e117d83 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -148,8 +148,9 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { } TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis( - [this](Function &) { return TargetTransformInfo(NVPTXTTIImpl(this)); }); + return TargetIRAnalysis([this](Function &F) { + return TargetTransformInfo(NVPTXTTIImpl(this, F)); + }); } void NVPTXPassConfig::addIRPasses() { diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index dc81802f4b5a..e7250cdba5ac 100644 --- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -94,7 +94,7 @@ unsigned NVPTXTTIImpl::getArithmeticInstrCost( TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo) { // Legalize the type. - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); int ISD = TLI->InstructionOpcodeToISD(Opcode); @@ -117,3 +117,15 @@ unsigned NVPTXTTIImpl::getArithmeticInstrCost( Opd1PropInfo, Opd2PropInfo); } } + +void NVPTXTTIImpl::getUnrollingPreferences(Loop *L, + TTI::UnrollingPreferences &UP) { + BaseT::getUnrollingPreferences(L, UP); + + // Enable partial unrolling and runtime unrolling, but reduce the + // threshold. This partially unrolls small loops which are often + // unrolled by the PTX to SASS compiler and unrolling earlier can be + // beneficial. + UP.Partial = UP.Runtime = true; + UP.PartialThreshold = UP.Threshold / 4; +} diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h index 4280888988f9..5bcd1e27a558 100644 --- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h +++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h @@ -37,8 +37,9 @@ class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> { const NVPTXTargetLowering *getTLI() const { return TLI; }; public: - explicit NVPTXTTIImpl(const NVPTXTargetMachine *TM) - : BaseT(TM), ST(TM->getSubtargetImpl()), TLI(ST->getTargetLowering()) {} + explicit NVPTXTTIImpl(const NVPTXTargetMachine *TM, const Function &F) + : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl()), + TLI(ST->getTargetLowering()) {} // Provide value semantics. MSVC requires that we spell all of these out. NVPTXTTIImpl(const NVPTXTTIImpl &Arg) @@ -46,18 +47,6 @@ public: NVPTXTTIImpl(NVPTXTTIImpl &&Arg) : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)), TLI(std::move(Arg.TLI)) {} - NVPTXTTIImpl &operator=(const NVPTXTTIImpl &RHS) { - BaseT::operator=(static_cast<const BaseT &>(RHS)); - ST = RHS.ST; - TLI = RHS.TLI; - return *this; - } - NVPTXTTIImpl &operator=(NVPTXTTIImpl &&RHS) { - BaseT::operator=(std::move(static_cast<BaseT &>(RHS))); - ST = std::move(RHS.ST); - TLI = std::move(RHS.TLI); - return *this; - } bool hasBranchDivergence() { return true; } @@ -69,6 +58,8 @@ public: TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None); + + void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP); }; } // end namespace llvm |