aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp103
1 files changed, 90 insertions, 13 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 5d6f58a77a39..ed28731b8ef2 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -328,10 +328,6 @@ static bool isMMAType(Type *Ty) {
InstructionCost PPCTTIImpl::getUserCost(const User *U,
ArrayRef<const Value *> Operands,
TTI::TargetCostKind CostKind) {
- // Set the max cost if an MMA type is present (v256i1, v512i1).
- if (isMMAType(U->getType()))
- return InstructionCost::getMax();
-
// We already implement getCastInstrCost and getMemoryOpCost where we perform
// the vector adjustment there.
if (isa<CastInst>(U) || isa<LoadInst>(U) || isa<StoreInst>(U))
@@ -1276,23 +1272,21 @@ PPCTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
}
-bool PPCTTIImpl::areFunctionArgsABICompatible(
- const Function *Caller, const Function *Callee,
- SmallPtrSetImpl<Argument *> &Args) const {
+bool PPCTTIImpl::areTypesABICompatible(const Function *Caller,
+ const Function *Callee,
+ const ArrayRef<Type *> &Types) const {
// We need to ensure that argument promotion does not
// attempt to promote pointers to MMA types (__vector_pair
// and __vector_quad) since these types explicitly cannot be
// passed as arguments. Both of these types are larger than
// the 128-bit Altivec vectors and have a scalar size of 1 bit.
- if (!BaseT::areFunctionArgsABICompatible(Caller, Callee, Args))
+ if (!BaseT::areTypesABICompatible(Caller, Callee, Types))
return false;
- return llvm::none_of(Args, [](Argument *A) {
- auto *EltTy = cast<PointerType>(A->getType())->getElementType();
- if (EltTy->isSized())
- return (EltTy->isIntOrIntVectorTy(1) &&
- EltTy->getPrimitiveSizeInBits() > 128);
+ return llvm::none_of(Types, [](Type *Ty) {
+ if (Ty->isSized())
+ return Ty->isIntOrIntVectorTy(1) && Ty->getPrimitiveSizeInBits() > 128;
return false;
});
}
@@ -1388,3 +1382,86 @@ bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
return false;
}
+
+bool PPCTTIImpl::hasActiveVectorLength(unsigned Opcode, Type *DataType,
+ Align Alignment) const {
+ // Only load and stores instructions can have variable vector length on Power.
+ if (Opcode != Instruction::Load && Opcode != Instruction::Store)
+ return false;
+ // Loads/stores with length instructions use bits 0-7 of the GPR operand and
+ // therefore cannot be used in 32-bit mode.
+ if ((!ST->hasP9Vector() && !ST->hasP10Vector()) || !ST->isPPC64())
+ return false;
+ if (isa<FixedVectorType>(DataType)) {
+ unsigned VecWidth = DataType->getPrimitiveSizeInBits();
+ return VecWidth == 128;
+ }
+ Type *ScalarTy = DataType->getScalarType();
+
+ if (ScalarTy->isPointerTy())
+ return true;
+
+ if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy())
+ return true;
+
+ if (!ScalarTy->isIntegerTy())
+ return false;
+
+ unsigned IntWidth = ScalarTy->getIntegerBitWidth();
+ return IntWidth == 8 || IntWidth == 16 || IntWidth == 32 || IntWidth == 64;
+}
+
+InstructionCost PPCTTIImpl::getVPMemoryOpCost(unsigned Opcode, Type *Src,
+ Align Alignment,
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) {
+ InstructionCost Cost = BaseT::getVPMemoryOpCost(Opcode, Src, Alignment,
+ AddressSpace, CostKind, I);
+ if (TLI->getValueType(DL, Src, true) == MVT::Other)
+ return Cost;
+ // TODO: Handle other cost kinds.
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return Cost;
+
+ assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
+ "Invalid Opcode");
+
+ auto *SrcVTy = dyn_cast<FixedVectorType>(Src);
+ assert(SrcVTy && "Expected a vector type for VP memory operations");
+
+ if (hasActiveVectorLength(Opcode, Src, Alignment)) {
+ std::pair<InstructionCost, MVT> LT =
+ TLI->getTypeLegalizationCost(DL, SrcVTy);
+
+ InstructionCost CostFactor =
+ vectorCostAdjustmentFactor(Opcode, Src, nullptr);
+ if (!CostFactor.isValid())
+ return InstructionCost::getMax();
+
+ InstructionCost Cost = LT.first * CostFactor;
+ assert(Cost.isValid() && "Expected valid cost");
+
+ // On P9 but not on P10, if the op is misaligned then it will cause a
+ // pipeline flush. Otherwise the VSX masked memops cost the same as unmasked
+ // ones.
+ const Align DesiredAlignment(16);
+ if (Alignment >= DesiredAlignment || ST->getCPUDirective() != PPC::DIR_PWR9)
+ return Cost;
+
+ // Since alignment may be under estimated, we try to compute the probability
+ // that the actual address is aligned to the desired boundary. For example
+ // an 8-byte aligned load is assumed to be actually 16-byte aligned half the
+ // time, while a 4-byte aligned load has a 25% chance of being 16-byte
+ // aligned.
+ float AlignmentProb = ((float)Alignment.value()) / DesiredAlignment.value();
+ float MisalignmentProb = 1.0 - AlignmentProb;
+ return (MisalignmentProb * P9PipelineFlushEstimate) +
+ (AlignmentProb * *Cost.getValue());
+ }
+
+ // Usually we should not get to this point, but the following is an attempt to
+ // model the cost of legalization. Currently we can only lower intrinsics with
+ // evl but no mask, on Power 9/10. Otherwise, we must scalarize.
+ return getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);
+}