diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 103 |
1 files changed, 90 insertions, 13 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 5d6f58a77a39..ed28731b8ef2 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -328,10 +328,6 @@ static bool isMMAType(Type *Ty) { InstructionCost PPCTTIImpl::getUserCost(const User *U, ArrayRef<const Value *> Operands, TTI::TargetCostKind CostKind) { - // Set the max cost if an MMA type is present (v256i1, v512i1). - if (isMMAType(U->getType())) - return InstructionCost::getMax(); - // We already implement getCastInstrCost and getMemoryOpCost where we perform // the vector adjustment there. if (isa<CastInst>(U) || isa<LoadInst>(U) || isa<StoreInst>(U)) @@ -1276,23 +1272,21 @@ PPCTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, return BaseT::getIntrinsicInstrCost(ICA, CostKind); } -bool PPCTTIImpl::areFunctionArgsABICompatible( - const Function *Caller, const Function *Callee, - SmallPtrSetImpl<Argument *> &Args) const { +bool PPCTTIImpl::areTypesABICompatible(const Function *Caller, + const Function *Callee, + const ArrayRef<Type *> &Types) const { // We need to ensure that argument promotion does not // attempt to promote pointers to MMA types (__vector_pair // and __vector_quad) since these types explicitly cannot be // passed as arguments. Both of these types are larger than // the 128-bit Altivec vectors and have a scalar size of 1 bit. - if (!BaseT::areFunctionArgsABICompatible(Caller, Callee, Args)) + if (!BaseT::areTypesABICompatible(Caller, Callee, Types)) return false; - return llvm::none_of(Args, [](Argument *A) { - auto *EltTy = cast<PointerType>(A->getType())->getElementType(); - if (EltTy->isSized()) - return (EltTy->isIntOrIntVectorTy(1) && - EltTy->getPrimitiveSizeInBits() > 128); + return llvm::none_of(Types, [](Type *Ty) { + if (Ty->isSized()) + return Ty->isIntOrIntVectorTy(1) && Ty->getPrimitiveSizeInBits() > 128; return false; }); } @@ -1388,3 +1382,86 @@ bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, return false; } + +bool PPCTTIImpl::hasActiveVectorLength(unsigned Opcode, Type *DataType, + Align Alignment) const { + // Only load and stores instructions can have variable vector length on Power. + if (Opcode != Instruction::Load && Opcode != Instruction::Store) + return false; + // Loads/stores with length instructions use bits 0-7 of the GPR operand and + // therefore cannot be used in 32-bit mode. + if ((!ST->hasP9Vector() && !ST->hasP10Vector()) || !ST->isPPC64()) + return false; + if (isa<FixedVectorType>(DataType)) { + unsigned VecWidth = DataType->getPrimitiveSizeInBits(); + return VecWidth == 128; + } + Type *ScalarTy = DataType->getScalarType(); + + if (ScalarTy->isPointerTy()) + return true; + + if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy()) + return true; + + if (!ScalarTy->isIntegerTy()) + return false; + + unsigned IntWidth = ScalarTy->getIntegerBitWidth(); + return IntWidth == 8 || IntWidth == 16 || IntWidth == 32 || IntWidth == 64; +} + +InstructionCost PPCTTIImpl::getVPMemoryOpCost(unsigned Opcode, Type *Src, + Align Alignment, + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I) { + InstructionCost Cost = BaseT::getVPMemoryOpCost(Opcode, Src, Alignment, + AddressSpace, CostKind, I); + if (TLI->getValueType(DL, Src, true) == MVT::Other) + return Cost; + // TODO: Handle other cost kinds. + if (CostKind != TTI::TCK_RecipThroughput) + return Cost; + + assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && + "Invalid Opcode"); + + auto *SrcVTy = dyn_cast<FixedVectorType>(Src); + assert(SrcVTy && "Expected a vector type for VP memory operations"); + + if (hasActiveVectorLength(Opcode, Src, Alignment)) { + std::pair<InstructionCost, MVT> LT = + TLI->getTypeLegalizationCost(DL, SrcVTy); + + InstructionCost CostFactor = + vectorCostAdjustmentFactor(Opcode, Src, nullptr); + if (!CostFactor.isValid()) + return InstructionCost::getMax(); + + InstructionCost Cost = LT.first * CostFactor; + assert(Cost.isValid() && "Expected valid cost"); + + // On P9 but not on P10, if the op is misaligned then it will cause a + // pipeline flush. Otherwise the VSX masked memops cost the same as unmasked + // ones. + const Align DesiredAlignment(16); + if (Alignment >= DesiredAlignment || ST->getCPUDirective() != PPC::DIR_PWR9) + return Cost; + + // Since alignment may be under estimated, we try to compute the probability + // that the actual address is aligned to the desired boundary. For example + // an 8-byte aligned load is assumed to be actually 16-byte aligned half the + // time, while a 4-byte aligned load has a 25% chance of being 16-byte + // aligned. + float AlignmentProb = ((float)Alignment.value()) / DesiredAlignment.value(); + float MisalignmentProb = 1.0 - AlignmentProb; + return (MisalignmentProb * P9PipelineFlushEstimate) + + (AlignmentProb * *Cost.getValue()); + } + + // Usually we should not get to this point, but the following is an attempt to + // model the cost of legalization. Currently we can only lower intrinsics with + // evl but no mask, on Power 9/10. Otherwise, we must scalarize. + return getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind); +} |