aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Vectorize/VPlan.h
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/VPlan.h')
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h1625
1 files changed, 1070 insertions, 555 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 0b596e7e4f63..a1ff684b2b80 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -28,7 +28,6 @@
#include "VPlanAnalysis.h"
#include "VPlanValue.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -38,6 +37,7 @@
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/FMF.h"
@@ -60,6 +60,7 @@ class RecurrenceDescriptor;
class SCEV;
class Type;
class VPBasicBlock;
+class VPBuilder;
class VPRegionBlock;
class VPlan;
class VPReplicateRecipe;
@@ -83,9 +84,6 @@ Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
int64_t Step);
-const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE,
- Loop *CurLoop = nullptr);
-
/// A helper function that returns the reciprocal of the block probability of
/// predicated blocks. If we return X, we are assuming the predicated block
/// will execute once for every X iterations of the loop header.
@@ -175,6 +173,7 @@ private:
Kind LaneKind;
public:
+ VPLane(unsigned Lane) : Lane(Lane), LaneKind(VPLane::Kind::First) {}
VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {}
static VPLane getFirstLane() { return VPLane(0, VPLane::Kind::First); }
@@ -225,132 +224,88 @@ public:
return Lane;
}
}
-
- /// Returns the maxmimum number of lanes that we are able to consider
- /// caching for \p VF.
- static unsigned getNumCachedLanes(const ElementCount &VF) {
- return VF.getKnownMinValue() * (VF.isScalable() ? 2 : 1);
- }
-};
-
-/// VPIteration represents a single point in the iteration space of the output
-/// (vectorized and/or unrolled) IR loop.
-struct VPIteration {
- /// in [0..UF)
- unsigned Part;
-
- VPLane Lane;
-
- VPIteration(unsigned Part, unsigned Lane,
- VPLane::Kind Kind = VPLane::Kind::First)
- : Part(Part), Lane(Lane, Kind) {}
-
- VPIteration(unsigned Part, const VPLane &Lane) : Part(Part), Lane(Lane) {}
-
- bool isFirstIteration() const { return Part == 0 && Lane.isFirstLane(); }
};
/// VPTransformState holds information passed down when "executing" a VPlan,
/// needed for generating the output IR.
struct VPTransformState {
- VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI,
- DominatorTree *DT, IRBuilderBase &Builder,
- InnerLoopVectorizer *ILV, VPlan *Plan, LLVMContext &Ctx);
-
- /// The chosen Vectorization and Unroll Factors of the loop being vectorized.
+ VPTransformState(const TargetTransformInfo *TTI, ElementCount VF, unsigned UF,
+ LoopInfo *LI, DominatorTree *DT, IRBuilderBase &Builder,
+ InnerLoopVectorizer *ILV, VPlan *Plan,
+ Loop *CurrentParentLoop, Type *CanonicalIVTy);
+ /// Target Transform Info.
+ const TargetTransformInfo *TTI;
+
+ /// The chosen Vectorization Factor of the loop being vectorized.
ElementCount VF;
- unsigned UF;
- /// Hold the indices to generate specific scalar instructions. Null indicates
+ /// Hold the index to generate specific scalar instructions. Null indicates
/// that all instances are to be generated, using either scalar or vector
/// instructions.
- std::optional<VPIteration> Instance;
+ std::optional<VPLane> Lane;
struct DataState {
- /// A type for vectorized values in the new loop. Each value from the
- /// original loop, when vectorized, is represented by UF vector values in
- /// the new unrolled loop, where UF is the unroll factor.
- typedef SmallVector<Value *, 2> PerPartValuesTy;
+ // Each value from the original loop, when vectorized, is represented by a
+ // vector value in the map.
+ DenseMap<VPValue *, Value *> VPV2Vector;
- DenseMap<VPValue *, PerPartValuesTy> PerPartOutput;
-
- using ScalarsPerPartValuesTy = SmallVector<SmallVector<Value *, 4>, 2>;
- DenseMap<VPValue *, ScalarsPerPartValuesTy> PerPartScalars;
+ DenseMap<VPValue *, SmallVector<Value *, 4>> VPV2Scalars;
} Data;
- /// Get the generated vector Value for a given VPValue \p Def and a given \p
- /// Part if \p IsScalar is false, otherwise return the generated scalar
- /// for \p Part. \See set.
- Value *get(VPValue *Def, unsigned Part, bool IsScalar = false);
+ /// Get the generated vector Value for a given VPValue \p Def if \p IsScalar
+ /// is false, otherwise return the generated scalar. \See set.
+ Value *get(VPValue *Def, bool IsScalar = false);
/// Get the generated Value for a given VPValue and given Part and Lane.
- Value *get(VPValue *Def, const VPIteration &Instance);
+ Value *get(VPValue *Def, const VPLane &Lane);
- bool hasVectorValue(VPValue *Def, unsigned Part) {
- auto I = Data.PerPartOutput.find(Def);
- return I != Data.PerPartOutput.end() && Part < I->second.size() &&
- I->second[Part];
- }
+ bool hasVectorValue(VPValue *Def) { return Data.VPV2Vector.contains(Def); }
- bool hasScalarValue(VPValue *Def, VPIteration Instance) {
- auto I = Data.PerPartScalars.find(Def);
- if (I == Data.PerPartScalars.end())
+ bool hasScalarValue(VPValue *Def, VPLane Lane) {
+ auto I = Data.VPV2Scalars.find(Def);
+ if (I == Data.VPV2Scalars.end())
return false;
- unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
- return Instance.Part < I->second.size() &&
- CacheIdx < I->second[Instance.Part].size() &&
- I->second[Instance.Part][CacheIdx];
+ unsigned CacheIdx = Lane.mapToCacheIndex(VF);
+ return CacheIdx < I->second.size() && I->second[CacheIdx];
}
- /// Set the generated vector Value for a given VPValue and a given Part, if \p
- /// IsScalar is false. If \p IsScalar is true, set the scalar in (Part, 0).
- void set(VPValue *Def, Value *V, unsigned Part, bool IsScalar = false) {
+ /// Set the generated vector Value for a given VPValue, if \p
+ /// IsScalar is false. If \p IsScalar is true, set the scalar in lane 0.
+ void set(VPValue *Def, Value *V, bool IsScalar = false) {
if (IsScalar) {
- set(Def, V, VPIteration(Part, 0));
+ set(Def, V, VPLane(0));
return;
}
assert((VF.isScalar() || V->getType()->isVectorTy()) &&
- "scalar values must be stored as (Part, 0)");
- if (!Data.PerPartOutput.count(Def)) {
- DataState::PerPartValuesTy Entry(UF);
- Data.PerPartOutput[Def] = Entry;
- }
- Data.PerPartOutput[Def][Part] = V;
+ "scalar values must be stored as (0, 0)");
+ Data.VPV2Vector[Def] = V;
}
/// Reset an existing vector value for \p Def and a given \p Part.
- void reset(VPValue *Def, Value *V, unsigned Part) {
- auto Iter = Data.PerPartOutput.find(Def);
- assert(Iter != Data.PerPartOutput.end() &&
- "need to overwrite existing value");
- Iter->second[Part] = V;
+ void reset(VPValue *Def, Value *V) {
+ assert(Data.VPV2Vector.contains(Def) && "need to overwrite existing value");
+ Data.VPV2Vector[Def] = V;
}
- /// Set the generated scalar \p V for \p Def and the given \p Instance.
- void set(VPValue *Def, Value *V, const VPIteration &Instance) {
- auto Iter = Data.PerPartScalars.insert({Def, {}});
- auto &PerPartVec = Iter.first->second;
- if (PerPartVec.size() <= Instance.Part)
- PerPartVec.resize(Instance.Part + 1);
- auto &Scalars = PerPartVec[Instance.Part];
- unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
+ /// Set the generated scalar \p V for \p Def and the given \p Lane.
+ void set(VPValue *Def, Value *V, const VPLane &Lane) {
+ auto &Scalars = Data.VPV2Scalars[Def];
+ unsigned CacheIdx = Lane.mapToCacheIndex(VF);
if (Scalars.size() <= CacheIdx)
Scalars.resize(CacheIdx + 1);
assert(!Scalars[CacheIdx] && "should overwrite existing value");
Scalars[CacheIdx] = V;
}
- /// Reset an existing scalar value for \p Def and a given \p Instance.
- void reset(VPValue *Def, Value *V, const VPIteration &Instance) {
- auto Iter = Data.PerPartScalars.find(Def);
- assert(Iter != Data.PerPartScalars.end() &&
- "need to overwrite existing value");
- assert(Instance.Part < Iter->second.size() &&
+ /// Reset an existing scalar value for \p Def and a given \p Lane.
+ void reset(VPValue *Def, Value *V, const VPLane &Lane) {
+ auto Iter = Data.VPV2Scalars.find(Def);
+ assert(Iter != Data.VPV2Scalars.end() &&
"need to overwrite existing value");
- unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
- assert(CacheIdx < Iter->second[Instance.Part].size() &&
+ unsigned CacheIdx = Lane.mapToCacheIndex(VF);
+ assert(CacheIdx < Iter->second.size() &&
"need to overwrite existing value");
- Iter->second[Instance.Part][CacheIdx] = V;
+ Iter->second[CacheIdx] = V;
}
/// Add additional metadata to \p To that was not present on \p Orig.
@@ -371,7 +326,7 @@ struct VPTransformState {
void setDebugLocFrom(DebugLoc DL);
/// Construct the vector value of a scalarized value \p V one lane at a time.
- void packScalarIntoVectorValue(VPValue *Def, const VPIteration &Instance);
+ void packScalarIntoVectorValue(VPValue *Def, const VPLane &Lane);
/// Hold state information used when constructing the CFG of the output IR,
/// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
@@ -414,8 +369,8 @@ struct VPTransformState {
/// Pointer to the VPlan code is generated for.
VPlan *Plan;
- /// The loop object for the current parent region, or nullptr.
- Loop *CurrentVectorLoop = nullptr;
+ /// The parent loop object for the current scope, or nullptr.
+ Loop *CurrentParentLoop = nullptr;
/// LoopVersioning. It's only set up (non-null) if memchecks were
/// used.
@@ -482,6 +437,26 @@ class VPBlockBase {
Successors.erase(Pos);
}
+ /// This function replaces one predecessor with another, useful when
+ /// trying to replace an old block in the CFG with a new one.
+ void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
+ auto I = find(Predecessors, Old);
+ assert(I != Predecessors.end());
+ assert(Old->getParent() == New->getParent() &&
+ "replaced predecessor must have the same parent");
+ *I = New;
+ }
+
+ /// This function replaces one successor with another, useful when
+ /// trying to replace an old block in the CFG with a new one.
+ void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
+ auto I = find(Successors, Old);
+ assert(I != Successors.end());
+ assert(Old->getParent() == New->getParent() &&
+ "replaced successor must have the same parent");
+ *I = New;
+ }
+
protected:
VPBlockBase(const unsigned char SC, const std::string &N)
: SubclassID(SC), Name(N) {}
@@ -535,6 +510,7 @@ public:
VPBlocksTy &getSuccessors() { return Successors; }
iterator_range<VPBlockBase **> successors() { return Successors; }
+ iterator_range<VPBlockBase **> predecessors() { return Predecessors; }
const VPBlocksTy &getPredecessors() const { return Predecessors; }
VPBlocksTy &getPredecessors() { return Predecessors; }
@@ -641,6 +617,14 @@ public:
/// Remove all the successors of this block.
void clearSuccessors() { Successors.clear(); }
+ /// Swap successors of the block. The block must have exactly 2 successors.
+ // TODO: This should be part of introducing conditional branch recipes rather
+ // than being independent.
+ void swapSuccessors() {
+ assert(Successors.size() == 2 && "must have 2 successors to swap");
+ std::swap(Successors[0], Successors[1]);
+ }
+
/// The method which generates the output IR that correspond to this
/// VPBlockBase, thereby "executing" the VPlan.
virtual void execute(VPTransformState *State) = 0;
@@ -648,9 +632,6 @@ public:
/// Return the cost of the block.
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx) = 0;
- /// Delete all blocks reachable from a given VPBlockBase, inclusive.
- static void deleteCFG(VPBlockBase *Entry);
-
/// Return true if it is legal to hoist instructions into this block.
bool isLegalToHoistInto() {
// There are currently no constraints that prevent an instruction to be
@@ -658,12 +639,8 @@ public:
return true;
}
- /// Replace all operands of VPUsers in the block with \p NewValue and also
- /// replaces all uses of VPValues defined in the block with NewValue.
- virtual void dropAllReferences(VPValue *NewValue) = 0;
-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- void printAsOperand(raw_ostream &OS, bool PrintType) const {
+ void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
OS << getName();
}
@@ -696,54 +673,21 @@ public:
virtual VPBlockBase *clone() = 0;
};
-/// A value that is used outside the VPlan. The operand of the user needs to be
-/// added to the associated phi node. The incoming block from VPlan is
-/// determined by where the VPValue is defined: if it is defined by a recipe
-/// outside a region, its parent block is used, otherwise the middle block is
-/// used.
-class VPLiveOut : public VPUser {
- PHINode *Phi;
-
-public:
- VPLiveOut(PHINode *Phi, VPValue *Op)
- : VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {}
-
- static inline bool classof(const VPUser *U) {
- return U->getVPUserID() == VPUser::VPUserID::LiveOut;
- }
-
- /// Fix the wrapped phi node. This means adding an incoming value to exit
- /// block phi's from the vector loop via middle block (values from scalar loop
- /// already reach these phi's), and updating the value to scalar header phi's
- /// from the scalar preheader.
- void fixPhi(VPlan &Plan, VPTransformState &State);
-
- /// Returns true if the VPLiveOut uses scalars of operand \p Op.
- bool usesScalars(const VPValue *Op) const override {
- assert(is_contained(operands(), Op) &&
- "Op must be an operand of the recipe");
- return true;
- }
-
- PHINode *getPhi() const { return Phi; }
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- /// Print the VPLiveOut to \p O.
- void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
-#endif
-};
-
/// Struct to hold various analysis needed for cost computations.
struct VPCostContext {
const TargetTransformInfo &TTI;
+ const TargetLibraryInfo &TLI;
VPTypeAnalysis Types;
LLVMContext &LLVMCtx;
LoopVectorizationCostModel &CM;
SmallPtrSet<Instruction *, 8> SkipCostComputation;
+ TargetTransformInfo::TargetCostKind CostKind;
- VPCostContext(const TargetTransformInfo &TTI, Type *CanIVTy,
- LLVMContext &LLVMCtx, LoopVectorizationCostModel &CM)
- : TTI(TTI), Types(CanIVTy, LLVMCtx), LLVMCtx(LLVMCtx), CM(CM) {}
+ VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI,
+ Type *CanIVTy, LoopVectorizationCostModel &CM,
+ TargetTransformInfo::TargetCostKind CostKind)
+ : TTI(TTI), TLI(TLI), Types(CanIVTy), LLVMCtx(CanIVTy->getContext()),
+ CM(CM), CostKind(CostKind) {}
/// Return the cost for \p UI with \p VF using the legacy cost model as
/// fallback until computing the cost of all recipes migrates to VPlan.
@@ -752,6 +696,9 @@ struct VPCostContext {
/// Return true if the cost for \p UI shouldn't be computed, e.g. because it
/// has already been pre-computed.
bool skipCostComputation(Instruction *UI, bool IsVector) const;
+
+ /// Returns the OperandInfo for \p V, if it is a live-in.
+ TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const;
};
/// VPRecipeBase is a base class modeling a sequence of one or more output IR
@@ -774,12 +721,12 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
public:
VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
DebugLoc DL = {})
- : VPDef(SC), VPUser(Operands, VPUser::VPUserID::Recipe), DL(DL) {}
+ : VPDef(SC), VPUser(Operands), DL(DL) {}
template <typename IterT>
VPRecipeBase(const unsigned char SC, iterator_range<IterT> Operands,
DebugLoc DL = {})
- : VPDef(SC), VPUser(Operands, VPUser::VPUserID::Recipe), DL(DL) {}
+ : VPDef(SC), VPUser(Operands), DL(DL) {}
virtual ~VPRecipeBase() = default;
/// Clone the current recipe.
@@ -796,7 +743,7 @@ public:
/// Return the cost of this recipe, taking into account if the cost
/// computation should be skipped and the ForceTargetInstructionCost flag.
/// Also takes care of printing the cost for debugging.
- virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx);
+ InstructionCost cost(ElementCount VF, VPCostContext &Ctx);
/// Insert an unlinked recipe into a basic block immediately before
/// the specified recipe.
@@ -833,9 +780,7 @@ public:
return true;
}
- static inline bool classof(const VPUser *U) {
- return U->getVPUserID() == VPUser::VPUserID::Recipe;
- }
+ static inline bool classof(const VPUser *U) { return true; }
/// Returns true if the recipe may have side-effects.
bool mayHaveSideEffects() const;
@@ -860,9 +805,11 @@ public:
DebugLoc getDebugLoc() const { return DL; }
protected:
- /// Compute the cost of this recipe using the legacy cost model and the
- /// underlying instructions.
- InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const;
+ /// Compute the cost of this recipe either using a recipe's specialized
+ /// implementation or using the legacy cost model and the underlying
+ /// instructions.
+ virtual InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const;
};
// Helper macro to define common classof implementations for recipes.
@@ -914,11 +861,14 @@ public:
case VPRecipeBase::VPReplicateSC:
case VPRecipeBase::VPScalarIVStepsSC:
case VPRecipeBase::VPVectorPointerSC:
+ case VPRecipeBase::VPReverseVectorPointerSC:
case VPRecipeBase::VPWidenCallSC:
case VPRecipeBase::VPWidenCanonicalIVSC:
case VPRecipeBase::VPWidenCastSC:
case VPRecipeBase::VPWidenGEPSC:
+ case VPRecipeBase::VPWidenIntrinsicSC:
case VPRecipeBase::VPWidenSC:
+ case VPRecipeBase::VPWidenEVLSC:
case VPRecipeBase::VPWidenSelectSC:
case VPRecipeBase::VPBlendSC:
case VPRecipeBase::VPPredInstPHISC:
@@ -930,13 +880,16 @@ public:
case VPRecipeBase::VPWidenPointerInductionSC:
case VPRecipeBase::VPReductionPHISC:
case VPRecipeBase::VPScalarCastSC:
+ case VPRecipeBase::VPPartialReductionSC:
return true;
- case VPRecipeBase::VPInterleaveSC:
case VPRecipeBase::VPBranchOnMaskSC:
+ case VPRecipeBase::VPInterleaveSC:
+ case VPRecipeBase::VPIRInstructionSC:
case VPRecipeBase::VPWidenLoadEVLSC:
case VPRecipeBase::VPWidenLoadSC:
case VPRecipeBase::VPWidenStoreEVLSC:
case VPRecipeBase::VPWidenStoreSC:
+ case VPRecipeBase::VPHistogramSC:
// TODO: Widened stores don't define a value, but widened loads do. Split
// the recipes to be able to make widened loads VPSingleDefRecipes.
return false;
@@ -958,6 +911,11 @@ public:
const Instruction *getUnderlyingInstr() const {
return cast<Instruction>(getUnderlyingValue());
}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print this VPSingleDefRecipe to dbgs() (for debugging).
+ LLVM_DUMP_METHOD void dump() const;
+#endif
};
/// Class to record LLVM IR flag for a recipe along with it.
@@ -986,12 +944,6 @@ public:
DisjointFlagsTy(bool IsDisjoint) : IsDisjoint(IsDisjoint) {}
};
-protected:
- struct GEPFlagsTy {
- char IsInBounds : 1;
- GEPFlagsTy(bool IsInBounds) : IsInBounds(IsInBounds) {}
- };
-
private:
struct ExactFlagsTy {
char IsExact : 1;
@@ -1018,7 +970,7 @@ private:
WrapFlagsTy WrapFlags;
DisjointFlagsTy DisjointFlags;
ExactFlagsTy ExactFlags;
- GEPFlagsTy GEPFlags;
+ GEPNoWrapFlags GEPFlags;
NonNegFlagsTy NonNegFlags;
FastMathFlagsTy FMFs;
unsigned AllFlags;
@@ -1055,7 +1007,7 @@ public:
ExactFlags.IsExact = Op->isExact();
} else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
OpType = OperationType::GEPOp;
- GEPFlags.IsInBounds = GEP->isInBounds();
+ GEPFlags = GEP->getNoWrapFlags();
} else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
OpType = OperationType::NonNegOp;
NonNegFlags.NonNeg = PNNI->hasNonNeg();
@@ -1095,7 +1047,7 @@ public:
protected:
template <typename IterT>
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
- GEPFlagsTy GEPFlags, DebugLoc DL = {})
+ GEPNoWrapFlags GEPFlags, DebugLoc DL = {})
: VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::GEPOp),
GEPFlags(GEPFlags) {}
@@ -1103,9 +1055,11 @@ public:
static inline bool classof(const VPRecipeBase *R) {
return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
R->getVPDefID() == VPRecipeBase::VPWidenSC ||
+ R->getVPDefID() == VPRecipeBase::VPWidenEVLSC ||
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
+ R->getVPDefID() == VPRecipeBase::VPReverseVectorPointerSC ||
R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
}
@@ -1130,7 +1084,7 @@ public:
ExactFlags.IsExact = false;
break;
case OperationType::GEPOp:
- GEPFlags.IsInBounds = false;
+ GEPFlags = GEPNoWrapFlags::none();
break;
case OperationType::FPMathOp:
FMFs.NoNaNs = false;
@@ -1159,10 +1113,7 @@ public:
I->setIsExact(ExactFlags.IsExact);
break;
case OperationType::GEPOp:
- // TODO(gep_nowrap): Track the full GEPNoWrapFlags in VPlan.
- cast<GetElementPtrInst>(I)->setNoWrapFlags(
- GEPFlags.IsInBounds ? GEPNoWrapFlags::inBounds()
- : GEPNoWrapFlags::none());
+ cast<GetElementPtrInst>(I)->setNoWrapFlags(GEPFlags);
break;
case OperationType::FPMathOp:
I->setHasAllowReassoc(FMFs.AllowReassoc);
@@ -1188,11 +1139,7 @@ public:
return CmpPredicate;
}
- bool isInBounds() const {
- assert(OpType == OperationType::GEPOp &&
- "recipe doesn't have inbounds flag");
- return GEPFlags.IsInBounds;
- }
+ GEPNoWrapFlags getGEPNoWrapFlags() const { return GEPFlags; }
/// Returns true if the recipe has fast-math flags.
bool hasFastMathFlags() const { return OpType == OperationType::FPMathOp; }
@@ -1222,11 +1169,24 @@ public:
#endif
};
+/// Helper to access the operand that contains the unroll part for this recipe
+/// after unrolling.
+template <unsigned PartOpIdx> class VPUnrollPartAccessor {
+protected:
+ /// Return the VPValue operand containing the unroll part or null if there is
+ /// no such operand.
+ VPValue *getUnrollPartOperand(VPUser &U) const;
+
+ /// Return the unroll part.
+ unsigned getUnrollPart(VPUser &U) const;
+};
+
/// This is a concrete Recipe that models a single VPlan-level instruction.
/// While as any Recipe it may generate a sequence of IR instructions when
/// executed, these instructions would always form a single-def expression as
/// the VPInstruction is also a single def-use vertex.
-class VPInstruction : public VPRecipeWithIRFlags {
+class VPInstruction : public VPRecipeWithIRFlags,
+ public VPUnrollPartAccessor<1> {
friend class VPlanSlp;
public:
@@ -1253,15 +1213,16 @@ public:
ComputeReductionResult,
// Takes the VPValue to extract from as first operand and the lane or part
// to extract as second operand, counting from the end starting with 1 for
- // last. The second operand must be a positive constant and <= VF when
- // extracting from a vector or <= UF when extracting from an unrolled
- // scalar.
+ // last. The second operand must be a positive constant and <= VF.
ExtractFromEnd,
LogicalAnd, // Non-poison propagating logical And.
// Add an offset in bytes (second operand) to a base pointer (first
// operand). Only generates scalar values (either for the first lane only or
// for all lanes, depending on its uses).
PtrAdd,
+ // Returns a scalar boolean value, which is true if any lane of its (only
+ // boolean) vector operand is true.
+ AnyOf,
};
private:
@@ -1283,16 +1244,15 @@ private:
/// needed.
bool canGenerateScalarForFirstLane() const;
- /// Utility methods serving execute(): generates a single instance of the
- /// modeled instruction for a given part. \returns the generated value for \p
- /// Part. In some cases an existing value is returned rather than a generated
- /// one.
- Value *generatePerPart(VPTransformState &State, unsigned Part);
+ /// Utility methods serving execute(): generates a single vector instance of
+ /// the modeled instruction. \returns the generated value. . In some cases an
+ /// existing value is returned rather than a generated one.
+ Value *generate(VPTransformState &State);
/// Utility methods serving execute(): generates a scalar single instance of
/// the modeled instruction for a given lane. \returns the scalar generated
/// value for lane \p Lane.
- Value *generatePerLane(VPTransformState &State, const VPIteration &Lane);
+ Value *generatePerLane(VPTransformState &State, const VPLane &Lane);
#if !defined(NDEBUG)
/// Return true if the VPInstruction is a floating point math operation, i.e.
@@ -1326,6 +1286,12 @@ public:
assert(Opcode == Instruction::Or && "only OR opcodes can be disjoint");
}
+ VPInstruction(VPValue *Ptr, VPValue *Offset, GEPNoWrapFlags Flags,
+ DebugLoc DL = {}, const Twine &Name = "")
+ : VPRecipeWithIRFlags(VPDef::VPInstructionSC,
+ ArrayRef<VPValue *>({Ptr, Offset}), Flags, DL),
+ Opcode(VPInstruction::PtrAdd), Name(Name.str()) {}
+
VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
FastMathFlags FMFs, DebugLoc DL = {}, const Twine &Name = "");
@@ -1345,6 +1311,13 @@ public:
/// provided.
void execute(VPTransformState &State) override;
+ /// Return the cost of this VPInstruction.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override {
+ // TODO: Compute accurate cost after retiring the legacy cost model.
+ return 0;
+ }
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the VPInstruction to \p O.
void print(raw_ostream &O, const Twine &Indent,
@@ -1354,14 +1327,6 @@ public:
LLVM_DUMP_METHOD void dump() const;
#endif
- /// Return true if this instruction may modify memory.
- bool mayWriteToMemory() const {
- // TODO: we can use attributes of the called function to rule out memory
- // modifications.
- return Opcode == Instruction::Store || Opcode == Instruction::Call ||
- Opcode == Instruction::Invoke || Opcode == SLPStore;
- }
-
bool hasResult() const {
// CallInst may or may not have a result, depending on the called function.
// Conservatively return calls have results for now.
@@ -1384,6 +1349,9 @@ public:
}
}
+ /// Returns true if the underlying opcode may read from or write to memory.
+ bool opcodeMayReadOrWriteFromMemory() const;
+
/// Returns true if the recipe only uses the first lane of operand \p Op.
bool onlyFirstLaneUsed(const VPValue *Op) const override;
@@ -1397,6 +1365,69 @@ public:
/// Returns true if this VPInstruction's operands are single scalars and the
/// result is also a single scalar.
bool isSingleScalar() const;
+
+ /// Returns the symbolic name assigned to the VPInstruction.
+ StringRef getName() const { return Name; }
+};
+
+/// A recipe to wrap on original IR instruction not to be modified during
+/// execution, execept for PHIs. For PHIs, a single VPValue operand is allowed,
+/// and it is used to add a new incoming value for the single predecessor VPBB.
+/// Expect PHIs, VPIRInstructions cannot have any operands.
+class VPIRInstruction : public VPRecipeBase {
+ Instruction &I;
+
+public:
+ VPIRInstruction(Instruction &I)
+ : VPRecipeBase(VPDef::VPIRInstructionSC, ArrayRef<VPValue *>()), I(I) {}
+
+ ~VPIRInstruction() override = default;
+
+ VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC)
+
+ VPIRInstruction *clone() override {
+ auto *R = new VPIRInstruction(I);
+ for (auto *Op : operands())
+ R->addOperand(Op);
+ return R;
+ }
+
+ void execute(VPTransformState &State) override;
+
+ /// Return the cost of this VPIRInstruction.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override;
+
+ Instruction &getInstruction() const { return I; }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+
+ bool usesScalars(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return true;
+ }
+
+ bool onlyFirstPartUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return true;
+ }
+
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return true;
+ }
+
+ /// Update the recipes single operand to the last lane of the operand using \p
+ /// Builder. Must only be used for single operand VPIRInstructions wrapping a
+ /// PHINode.
+ void extractLastLaneOfOperand(VPBuilder &Builder);
};
/// VPWidenRecipe is a recipe for producing a widened instruction using the
@@ -1406,11 +1437,16 @@ public:
class VPWidenRecipe : public VPRecipeWithIRFlags {
unsigned Opcode;
+protected:
+ template <typename IterT>
+ VPWidenRecipe(unsigned VPDefOpcode, Instruction &I,
+ iterator_range<IterT> Operands)
+ : VPRecipeWithIRFlags(VPDefOpcode, Operands, I), Opcode(I.getOpcode()) {}
+
public:
template <typename IterT>
VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands)
- : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I),
- Opcode(I.getOpcode()) {}
+ : VPWidenRecipe(VPDef::VPWidenSC, I, Operands) {}
~VPWidenRecipe() override = default;
@@ -1420,12 +1456,24 @@ public:
return R;
}
- VP_CLASSOF_IMPL(VPDef::VPWidenSC)
+ static inline bool classof(const VPRecipeBase *R) {
+ return R->getVPDefID() == VPRecipeBase::VPWidenSC ||
+ R->getVPDefID() == VPRecipeBase::VPWidenEVLSC;
+ }
+
+ static inline bool classof(const VPUser *U) {
+ auto *R = dyn_cast<VPRecipeBase>(U);
+ return R && classof(R);
+ }
/// Produce a widened instruction using the opcode and operands of the recipe,
/// processing State.VF elements.
void execute(VPTransformState &State) override;
+ /// Return the cost of this VPWidenRecipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override;
+
unsigned getOpcode() const { return Opcode; }
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1435,6 +1483,54 @@ public:
#endif
};
+/// A recipe for widening operations with vector-predication intrinsics with
+/// explicit vector length (EVL).
+class VPWidenEVLRecipe : public VPWidenRecipe {
+ using VPRecipeWithIRFlags::transferFlags;
+
+public:
+ template <typename IterT>
+ VPWidenEVLRecipe(Instruction &I, iterator_range<IterT> Operands, VPValue &EVL)
+ : VPWidenRecipe(VPDef::VPWidenEVLSC, I, Operands) {
+ addOperand(&EVL);
+ }
+ VPWidenEVLRecipe(VPWidenRecipe &W, VPValue &EVL)
+ : VPWidenEVLRecipe(*W.getUnderlyingInstr(), W.operands(), EVL) {
+ transferFlags(W);
+ }
+
+ ~VPWidenEVLRecipe() override = default;
+
+ VPWidenRecipe *clone() override final {
+ llvm_unreachable("VPWidenEVLRecipe cannot be cloned");
+ return nullptr;
+ }
+
+ VP_CLASSOF_IMPL(VPDef::VPWidenEVLSC);
+
+ VPValue *getEVL() { return getOperand(getNumOperands() - 1); }
+ const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); }
+
+ /// Produce a vp-intrinsic using the opcode and operands of the recipe,
+ /// processing EVL elements.
+ void execute(VPTransformState &State) override final;
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ // EVL in that recipe is always the last operand, thus any use before means
+ // the VPValue should be vectorized.
+ return getEVL() == Op;
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override final;
+#endif
+};
+
/// VPWidenCastRecipe is a recipe to create vector cast instructions.
class VPWidenCastRecipe : public VPRecipeWithIRFlags {
/// Cast instruction opcode.
@@ -1471,6 +1567,10 @@ public:
/// Produce widened copies of the cast.
void execute(VPTransformState &State) override;
+ /// Return the cost of this VPWidenCastRecipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override;
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
@@ -1489,23 +1589,32 @@ class VPScalarCastRecipe : public VPSingleDefRecipe {
Type *ResultTy;
- Value *generate(VPTransformState &State, unsigned Part);
+ Value *generate(VPTransformState &State);
public:
- VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
- : VPSingleDefRecipe(VPDef::VPScalarCastSC, {Op}), Opcode(Opcode),
+ VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
+ DebugLoc DL)
+ : VPSingleDefRecipe(VPDef::VPScalarCastSC, {Op}, DL), Opcode(Opcode),
ResultTy(ResultTy) {}
~VPScalarCastRecipe() override = default;
VPScalarCastRecipe *clone() override {
- return new VPScalarCastRecipe(Opcode, getOperand(0), ResultTy);
+ return new VPScalarCastRecipe(Opcode, getOperand(0), ResultTy,
+ getDebugLoc());
}
VP_CLASSOF_IMPL(VPDef::VPScalarCastSC)
void execute(VPTransformState &State) override;
+ /// Return the cost of this VPScalarCastRecipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override {
+ // TODO: Compute accurate cost after retiring the legacy cost model.
+ return 0;
+ }
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
@@ -1522,24 +1631,111 @@ public:
}
};
-/// A recipe for widening Call instructions.
-class VPWidenCallRecipe : public VPSingleDefRecipe {
- /// ID of the vector intrinsic to call when widening the call. If set the
- /// Intrinsic::not_intrinsic, a library call will be used instead.
+/// A recipe for widening vector intrinsics.
+class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags {
+ /// ID of the vector intrinsic to widen.
Intrinsic::ID VectorIntrinsicID;
- /// If this recipe represents a library call, Variant stores a pointer to
- /// the chosen function. There is a 1:1 mapping between a given VF and the
- /// chosen vectorized variant, so there will be a different vplan for each
- /// VF with a valid variant.
+
+ /// Scalar return type of the intrinsic.
+ Type *ResultTy;
+
+ /// True if the intrinsic may read from memory.
+ bool MayReadFromMemory;
+
+ /// True if the intrinsic may read write to memory.
+ bool MayWriteToMemory;
+
+ /// True if the intrinsic may have side-effects.
+ bool MayHaveSideEffects;
+
+public:
+ VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID,
+ ArrayRef<VPValue *> CallArguments, Type *Ty,
+ DebugLoc DL = {})
+ : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, CI),
+ VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
+ MayReadFromMemory(CI.mayReadFromMemory()),
+ MayWriteToMemory(CI.mayWriteToMemory()),
+ MayHaveSideEffects(CI.mayHaveSideEffects()) {}
+
+ VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID,
+ ArrayRef<VPValue *> CallArguments, Type *Ty,
+ DebugLoc DL = {})
+ : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, DL),
+ VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty) {
+ LLVMContext &Ctx = Ty->getContext();
+ AttributeList Attrs = Intrinsic::getAttributes(Ctx, VectorIntrinsicID);
+ MemoryEffects ME = Attrs.getMemoryEffects();
+ MayReadFromMemory = ME.onlyWritesMemory();
+ MayWriteToMemory = ME.onlyReadsMemory();
+ MayHaveSideEffects = MayWriteToMemory ||
+ !Attrs.hasFnAttr(Attribute::NoUnwind) ||
+ !Attrs.hasFnAttr(Attribute::WillReturn);
+ }
+
+ VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID,
+ std::initializer_list<VPValue *> CallArguments,
+ Type *Ty, DebugLoc DL = {})
+ : VPWidenIntrinsicRecipe(VectorIntrinsicID,
+ ArrayRef<VPValue *>(CallArguments), Ty, DL) {}
+
+ ~VPWidenIntrinsicRecipe() override = default;
+
+ VPWidenIntrinsicRecipe *clone() override {
+ return new VPWidenIntrinsicRecipe(*cast<CallInst>(getUnderlyingValue()),
+ VectorIntrinsicID, {op_begin(), op_end()},
+ ResultTy, getDebugLoc());
+ }
+
+ VP_CLASSOF_IMPL(VPDef::VPWidenIntrinsicSC)
+
+ /// Produce a widened version of the vector intrinsic.
+ void execute(VPTransformState &State) override;
+
+ /// Return the cost of this vector intrinsic.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override;
+
+ /// Return the ID of the intrinsic.
+ Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
+
+ /// Return the scalar return type of the intrinsic.
+ Type *getResultType() const { return ResultTy; }
+
+ /// Return to name of the intrinsic as string.
+ StringRef getIntrinsicName() const;
+
+ /// Returns true if the intrinsic may read from memory.
+ bool mayReadFromMemory() const { return MayReadFromMemory; }
+
+ /// Returns true if the intrinsic may write to memory.
+ bool mayWriteToMemory() const { return MayWriteToMemory; }
+
+ /// Returns true if the intrinsic may have side-effects.
+ bool mayHaveSideEffects() const { return MayHaveSideEffects; }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+
+ bool onlyFirstLaneUsed(const VPValue *Op) const override;
+};
+
+/// A recipe for widening Call instructions using library calls.
+class VPWidenCallRecipe : public VPRecipeWithIRFlags {
+ /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
+ /// between a given VF and the chosen vectorized variant, so there will be a
+ /// different VPlan for each VF with a valid variant.
Function *Variant;
public:
- template <typename IterT>
- VPWidenCallRecipe(Value *UV, iterator_range<IterT> CallArguments,
- Intrinsic::ID VectorIntrinsicID, DebugLoc DL = {},
- Function *Variant = nullptr)
- : VPSingleDefRecipe(VPDef::VPWidenCallSC, CallArguments, UV, DL),
- VectorIntrinsicID(VectorIntrinsicID), Variant(Variant) {
+ VPWidenCallRecipe(Value *UV, Function *Variant,
+ ArrayRef<VPValue *> CallArguments, DebugLoc DL = {})
+ : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments,
+ *cast<Instruction>(UV)),
+ Variant(Variant) {
assert(
isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
"last operand must be the called function");
@@ -1548,8 +1744,8 @@ public:
~VPWidenCallRecipe() override = default;
VPWidenCallRecipe *clone() override {
- return new VPWidenCallRecipe(getUnderlyingValue(), operands(),
- VectorIntrinsicID, getDebugLoc(), Variant);
+ return new VPWidenCallRecipe(getUnderlyingValue(), Variant,
+ {op_begin(), op_end()}, getDebugLoc());
}
VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
@@ -1557,6 +1753,10 @@ public:
/// Produce a widened version of the call instruction.
void execute(VPTransformState &State) override;
+ /// Return the cost of this VPWidenCallRecipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override;
+
Function *getCalledScalarFunction() const {
return cast<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue());
}
@@ -1575,12 +1775,56 @@ public:
#endif
};
+/// A recipe representing a sequence of load -> update -> store as part of
+/// a histogram operation. This means there may be aliasing between vector
+/// lanes, which is handled by the llvm.experimental.vector.histogram family
+/// of intrinsics. The only update operations currently supported are
+/// 'add' and 'sub' where the other term is loop-invariant.
+class VPHistogramRecipe : public VPRecipeBase {
+ /// Opcode of the update operation, currently either add or sub.
+ unsigned Opcode;
+
+public:
+ template <typename IterT>
+ VPHistogramRecipe(unsigned Opcode, iterator_range<IterT> Operands,
+ DebugLoc DL = {})
+ : VPRecipeBase(VPDef::VPHistogramSC, Operands, DL), Opcode(Opcode) {}
+
+ ~VPHistogramRecipe() override = default;
+
+ VPHistogramRecipe *clone() override {
+ return new VPHistogramRecipe(Opcode, operands(), getDebugLoc());
+ }
+
+ VP_CLASSOF_IMPL(VPDef::VPHistogramSC);
+
+ /// Produce a vectorized histogram operation.
+ void execute(VPTransformState &State) override;
+
+ /// Return the cost of this VPHistogramRecipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override;
+
+ unsigned getOpcode() const { return Opcode; }
+
+ /// Return the mask operand if one was provided, or a null pointer if all
+ /// lanes should be executed unconditionally.
+ VPValue *getMask() const {
+ return getNumOperands() == 3 ? getOperand(2) : nullptr;
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+};
+
/// A recipe for widening select instructions.
-struct VPWidenSelectRecipe : public VPSingleDefRecipe {
+struct VPWidenSelectRecipe : public VPRecipeWithIRFlags {
template <typename IterT>
VPWidenSelectRecipe(SelectInst &I, iterator_range<IterT> Operands)
- : VPSingleDefRecipe(VPDef::VPWidenSelectSC, Operands, &I,
- I.getDebugLoc()) {}
+ : VPRecipeWithIRFlags(VPDef::VPWidenSelectSC, Operands, I) {}
~VPWidenSelectRecipe() override = default;
@@ -1594,6 +1838,10 @@ struct VPWidenSelectRecipe : public VPSingleDefRecipe {
/// Produce a widened version of the select instruction.
void execute(VPTransformState &State) override;
+ /// Return the cost of this VPWidenSelectRecipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override;
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
@@ -1605,23 +1853,23 @@ struct VPWidenSelectRecipe : public VPSingleDefRecipe {
}
bool isInvariantCond() const {
- return getCond()->isDefinedOutsideVectorRegions();
+ return getCond()->isDefinedOutsideLoopRegions();
}
};
/// A recipe for handling GEP instructions.
class VPWidenGEPRecipe : public VPRecipeWithIRFlags {
bool isPointerLoopInvariant() const {
- return getOperand(0)->isDefinedOutsideVectorRegions();
+ return getOperand(0)->isDefinedOutsideLoopRegions();
}
bool isIndexLoopInvariant(unsigned I) const {
- return getOperand(I + 1)->isDefinedOutsideVectorRegions();
+ return getOperand(I + 1)->isDefinedOutsideLoopRegions();
}
bool areAllOperandsInvariant() const {
return all_of(operands(), [](VPValue *Op) {
- return Op->isDefinedOutsideVectorRegions();
+ return Op->isDefinedOutsideLoopRegions();
});
}
@@ -1642,6 +1890,67 @@ public:
/// Generate the gep nodes.
void execute(VPTransformState &State) override;
+ /// Return the cost of this VPWidenGEPRecipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override {
+ // TODO: Compute accurate cost after retiring the legacy cost model.
+ return 0;
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+};
+
+/// A recipe to compute the pointers for widened memory accesses of IndexTy
+/// in reverse order.
+class VPReverseVectorPointerRecipe : public VPRecipeWithIRFlags,
+ public VPUnrollPartAccessor<2> {
+ Type *IndexedTy;
+
+public:
+ VPReverseVectorPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy,
+ GEPNoWrapFlags GEPFlags, DebugLoc DL)
+ : VPRecipeWithIRFlags(VPDef::VPReverseVectorPointerSC,
+ ArrayRef<VPValue *>({Ptr, VF}), GEPFlags, DL),
+ IndexedTy(IndexedTy) {}
+
+ VP_CLASSOF_IMPL(VPDef::VPReverseVectorPointerSC)
+
+ VPValue *getVFValue() { return getOperand(1); }
+ const VPValue *getVFValue() const { return getOperand(1); }
+
+ void execute(VPTransformState &State) override;
+
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return true;
+ }
+
+ /// Return the cost of this VPVectorPointerRecipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override {
+ // TODO: Compute accurate cost after retiring the legacy cost model.
+ return 0;
+ }
+
+ /// Returns true if the recipe only uses the first part of operand \p Op.
+ bool onlyFirstPartUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ assert(getNumOperands() <= 2 && "must have at most two operands");
+ return true;
+ }
+
+ VPReverseVectorPointerRecipe *clone() override {
+ return new VPReverseVectorPointerRecipe(getOperand(0), getVFValue(),
+ IndexedTy, getGEPNoWrapFlags(),
+ getDebugLoc());
+ }
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
@@ -1649,19 +1958,17 @@ public:
#endif
};
-/// A recipe to compute the pointers for widened memory accesses of IndexTy for
-/// all parts. If IsReverse is true, compute pointers for accessing the input in
-/// reverse order per part.
-class VPVectorPointerRecipe : public VPRecipeWithIRFlags {
+/// A recipe to compute the pointers for widened memory accesses of IndexTy.
+class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
+ public VPUnrollPartAccessor<1> {
Type *IndexedTy;
- bool IsReverse;
public:
- VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse,
- bool IsInBounds, DebugLoc DL)
+ VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, GEPNoWrapFlags GEPFlags,
+ DebugLoc DL)
: VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
- GEPFlagsTy(IsInBounds), DL),
- IndexedTy(IndexedTy), IsReverse(IsReverse) {}
+ GEPFlags, DL),
+ IndexedTy(IndexedTy) {}
VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
@@ -1673,9 +1980,24 @@ public:
return true;
}
+ /// Returns true if the recipe only uses the first part of operand \p Op.
+ bool onlyFirstPartUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ assert(getNumOperands() <= 2 && "must have at most two operands");
+ return true;
+ }
+
VPVectorPointerRecipe *clone() override {
- return new VPVectorPointerRecipe(getOperand(0), IndexedTy, IsReverse,
- isInBounds(), getDebugLoc());
+ return new VPVectorPointerRecipe(getOperand(0), IndexedTy,
+ getGEPNoWrapFlags(), getDebugLoc());
+ }
+
+ /// Return the cost of this VPHeaderPHIRecipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override {
+ // TODO: Compute accurate cost after retiring the legacy cost model.
+ return 0;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1734,6 +2056,10 @@ public:
/// Generate the phi nodes.
void execute(VPTransformState &State) override = 0;
+ /// Return the cost of this header phi recipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override;
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
@@ -1763,34 +2089,90 @@ public:
}
};
+/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
+/// VPWidenPointerInductionRecipe), providing shared functionality, including
+/// retrieving the step value, induction descriptor and original phi node.
+class VPWidenInductionRecipe : public VPHeaderPHIRecipe {
+ const InductionDescriptor &IndDesc;
+
+public:
+ VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
+ VPValue *Step, const InductionDescriptor &IndDesc,
+ DebugLoc DL)
+ : VPHeaderPHIRecipe(Kind, IV, Start, DL), IndDesc(IndDesc) {
+ addOperand(Step);
+ }
+
+ static inline bool classof(const VPRecipeBase *R) {
+ return R->getVPDefID() == VPDef::VPWidenIntOrFpInductionSC ||
+ R->getVPDefID() == VPDef::VPWidenPointerInductionSC;
+ }
+
+ static inline bool classof(const VPValue *V) {
+ auto *R = V->getDefiningRecipe();
+ return R && classof(R);
+ }
+
+ static inline bool classof(const VPHeaderPHIRecipe *R) {
+ return classof(static_cast<const VPRecipeBase *>(R));
+ }
+
+ virtual void execute(VPTransformState &State) override = 0;
+
+ /// Returns the step value of the induction.
+ VPValue *getStepValue() { return getOperand(1); }
+ const VPValue *getStepValue() const { return getOperand(1); }
+
+ PHINode *getPHINode() const { return cast<PHINode>(getUnderlyingValue()); }
+
+ /// Returns the induction descriptor for the recipe.
+ const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
+
+ VPValue *getBackedgeValue() override {
+ // TODO: All operands of base recipe must exist and be at same index in
+ // derived recipe.
+ llvm_unreachable(
+ "VPWidenIntOrFpInductionRecipe generates its own backedge value");
+ }
+
+ VPRecipeBase &getBackedgeRecipe() override {
+ // TODO: All operands of base recipe must exist and be at same index in
+ // derived recipe.
+ llvm_unreachable(
+ "VPWidenIntOrFpInductionRecipe generates its own backedge value");
+ }
+};
+
/// A recipe for handling phi nodes of integer and floating-point inductions,
/// producing their vector values.
-class VPWidenIntOrFpInductionRecipe : public VPHeaderPHIRecipe {
- PHINode *IV;
+class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
TruncInst *Trunc;
- const InductionDescriptor &IndDesc;
public:
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
- const InductionDescriptor &IndDesc)
- : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start), IV(IV),
- Trunc(nullptr), IndDesc(IndDesc) {
- addOperand(Step);
+ VPValue *VF, const InductionDescriptor &IndDesc,
+ DebugLoc DL)
+ : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
+ Step, IndDesc, DL),
+ Trunc(nullptr) {
+ addOperand(VF);
}
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
- const InductionDescriptor &IndDesc,
- TruncInst *Trunc)
- : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, Trunc, Start),
- IV(IV), Trunc(Trunc), IndDesc(IndDesc) {
- addOperand(Step);
+ VPValue *VF, const InductionDescriptor &IndDesc,
+ TruncInst *Trunc, DebugLoc DL)
+ : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
+ Step, IndDesc, DL),
+ Trunc(Trunc) {
+ addOperand(VF);
}
~VPWidenIntOrFpInductionRecipe() override = default;
VPWidenIntOrFpInductionRecipe *clone() override {
- return new VPWidenIntOrFpInductionRecipe(IV, getStartValue(),
- getStepValue(), IndDesc, Trunc);
+ return new VPWidenIntOrFpInductionRecipe(
+ getPHINode(), getStartValue(), getStepValue(), getVFValue(),
+ getInductionDescriptor(), Trunc, getDebugLoc());
}
VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
@@ -1805,34 +2187,20 @@ public:
VPSlotTracker &SlotTracker) const override;
#endif
- VPValue *getBackedgeValue() override {
- // TODO: All operands of base recipe must exist and be at same index in
- // derived recipe.
- llvm_unreachable(
- "VPWidenIntOrFpInductionRecipe generates its own backedge value");
- }
+ VPValue *getVFValue() { return getOperand(2); }
+ const VPValue *getVFValue() const { return getOperand(2); }
- VPRecipeBase &getBackedgeRecipe() override {
- // TODO: All operands of base recipe must exist and be at same index in
- // derived recipe.
- llvm_unreachable(
- "VPWidenIntOrFpInductionRecipe generates its own backedge value");
+ VPValue *getSplatVFValue() {
+ // If the recipe has been unrolled (4 operands), return the VPValue for the
+ // induction increment.
+ return getNumOperands() == 5 ? getOperand(3) : nullptr;
}
- /// Returns the step value of the induction.
- VPValue *getStepValue() { return getOperand(1); }
- const VPValue *getStepValue() const { return getOperand(1); }
-
/// Returns the first defined value as TruncInst, if it is one or nullptr
/// otherwise.
TruncInst *getTruncInst() { return Trunc; }
const TruncInst *getTruncInst() const { return Trunc; }
- PHINode *getPHINode() { return IV; }
-
- /// Returns the induction descriptor for the recipe.
- const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
-
/// Returns true if the induction is canonical, i.e. starting at 0 and
/// incremented by UF * VF (= the original IV is incremented by 1) and has the
/// same type as the canonical induction.
@@ -1840,13 +2208,19 @@ public:
/// Returns the scalar type of the induction.
Type *getScalarType() const {
- return Trunc ? Trunc->getType() : IV->getType();
+ return Trunc ? Trunc->getType() : getPHINode()->getType();
}
-};
-class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe {
- const InductionDescriptor &IndDesc;
+ /// Returns the VPValue representing the value of this induction at
+ /// the last unrolled part, if it exists. Returns itself if unrolling did not
+ /// take place.
+ VPValue *getLastUnrolledPartOperand() {
+ return getNumOperands() == 5 ? getOperand(4) : this;
+ }
+};
+class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe,
+ public VPUnrollPartAccessor<3> {
bool IsScalarAfterVectorization;
public:
@@ -1854,20 +2228,17 @@ public:
/// Start.
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step,
const InductionDescriptor &IndDesc,
- bool IsScalarAfterVectorization)
- : VPHeaderPHIRecipe(VPDef::VPWidenPointerInductionSC, Phi),
- IndDesc(IndDesc),
- IsScalarAfterVectorization(IsScalarAfterVectorization) {
- addOperand(Start);
- addOperand(Step);
- }
+ bool IsScalarAfterVectorization, DebugLoc DL)
+ : VPWidenInductionRecipe(VPDef::VPWidenPointerInductionSC, Phi, Start,
+ Step, IndDesc, DL),
+ IsScalarAfterVectorization(IsScalarAfterVectorization) {}
~VPWidenPointerInductionRecipe() override = default;
VPWidenPointerInductionRecipe *clone() override {
return new VPWidenPointerInductionRecipe(
cast<PHINode>(getUnderlyingInstr()), getOperand(0), getOperand(1),
- IndDesc, IsScalarAfterVectorization);
+ getInductionDescriptor(), IsScalarAfterVectorization, getDebugLoc());
}
VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
@@ -1878,8 +2249,51 @@ public:
/// Returns true if only scalar values will be generated.
bool onlyScalarsGenerated(bool IsScalable);
- /// Returns the induction descriptor for the recipe.
- const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
+ /// Returns the VPValue representing the value of this induction at
+ /// the first unrolled part, if it exists. Returns itself if unrolling did not
+ /// take place.
+ VPValue *getFirstUnrolledPartOperand() {
+ return getUnrollPart(*this) == 0 ? this : getOperand(2);
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+};
+
+/// Recipe to generate a scalar PHI. Used to generate code for recipes that
+/// produce scalar header phis, including VPCanonicalIVPHIRecipe and
+/// VPEVLBasedIVPHIRecipe.
+class VPScalarPHIRecipe : public VPHeaderPHIRecipe {
+ std::string Name;
+
+public:
+ VPScalarPHIRecipe(VPValue *Start, VPValue *BackedgeValue, DebugLoc DL,
+ StringRef Name)
+ : VPHeaderPHIRecipe(VPDef::VPScalarPHISC, nullptr, Start, DL),
+ Name(Name.str()) {
+ addOperand(BackedgeValue);
+ }
+
+ ~VPScalarPHIRecipe() override = default;
+
+ VPScalarPHIRecipe *clone() override {
+ llvm_unreachable("cloning not implemented yet");
+ }
+
+ VP_CLASSOF_IMPL(VPDef::VPScalarPHISC)
+
+ /// Generate the phi/select nodes.
+ void execute(VPTransformState &State) override;
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return true;
+ }
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
@@ -1896,9 +2310,10 @@ class VPWidenPHIRecipe : public VPSingleDefRecipe {
SmallVector<VPBasicBlock *, 2> IncomingBlocks;
public:
- /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start.
- VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr)
- : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi) {
+ /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start and
+ /// debug location \p DL.
+ VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr, DebugLoc DL = {})
+ : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi, DL) {
if (Start)
addOperand(Start);
}
@@ -1953,6 +2368,10 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
void execute(VPTransformState &State) override;
+ /// Return the cost of this first-order recurrence phi recipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override;
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
@@ -1963,7 +2382,8 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
/// A recipe for handling reduction phis. The start value is the first operand
/// of the recipe and the incoming value from the backedge is the second
/// operand.
-class VPReductionPHIRecipe : public VPHeaderPHIRecipe {
+class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
+ public VPUnrollPartAccessor<2> {
/// Descriptor for the reduction.
const RecurrenceDescriptor &RdxDesc;
@@ -1973,23 +2393,28 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe {
/// The phi is part of an ordered reduction. Requires IsInLoop to be true.
bool IsOrdered;
+ /// When expanding the reduction PHI, the plan's VF element count is divided
+ /// by this factor to form the reduction phi's VF.
+ unsigned VFScaleFactor = 1;
+
public:
/// Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p
/// RdxDesc.
VPReductionPHIRecipe(PHINode *Phi, const RecurrenceDescriptor &RdxDesc,
VPValue &Start, bool IsInLoop = false,
- bool IsOrdered = false)
+ bool IsOrdered = false, unsigned VFScaleFactor = 1)
: VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start),
- RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered) {
+ RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered),
+ VFScaleFactor(VFScaleFactor) {
assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
}
~VPReductionPHIRecipe() override = default;
VPReductionPHIRecipe *clone() override {
- auto *R =
- new VPReductionPHIRecipe(cast<PHINode>(getUnderlyingInstr()), RdxDesc,
- *getOperand(0), IsInLoop, IsOrdered);
+ auto *R = new VPReductionPHIRecipe(cast<PHINode>(getUnderlyingInstr()),
+ RdxDesc, *getOperand(0), IsInLoop,
+ IsOrdered, VFScaleFactor);
R->addOperand(getBackedgeValue());
return R;
}
@@ -2020,17 +2445,66 @@ public:
bool isInLoop() const { return IsInLoop; }
};
+/// A recipe for forming partial reductions. In the loop, an accumulator and
+/// vector operand are added together and passed to the next iteration as the
+/// next accumulator. After the loop body, the accumulator is reduced to a
+/// scalar value.
+class VPPartialReductionRecipe : public VPSingleDefRecipe {
+ unsigned Opcode;
+
+public:
+ VPPartialReductionRecipe(Instruction *ReductionInst, VPValue *Op0,
+ VPValue *Op1)
+ : VPPartialReductionRecipe(ReductionInst->getOpcode(), Op0, Op1,
+ ReductionInst) {}
+ VPPartialReductionRecipe(unsigned Opcode, VPValue *Op0, VPValue *Op1,
+ Instruction *ReductionInst = nullptr)
+ : VPSingleDefRecipe(VPDef::VPPartialReductionSC,
+ ArrayRef<VPValue *>({Op0, Op1}), ReductionInst),
+ Opcode(Opcode) {
+ [[maybe_unused]] auto *AccumulatorRecipe =
+ getOperand(1)->getDefiningRecipe();
+ assert((isa<VPReductionPHIRecipe>(AccumulatorRecipe) ||
+ isa<VPPartialReductionRecipe>(AccumulatorRecipe)) &&
+ "Unexpected operand order for partial reduction recipe");
+ }
+ ~VPPartialReductionRecipe() override = default;
+
+ VPPartialReductionRecipe *clone() override {
+ return new VPPartialReductionRecipe(Opcode, getOperand(0), getOperand(1),
+ getUnderlyingInstr());
+ }
+
+ VP_CLASSOF_IMPL(VPDef::VPPartialReductionSC)
+
+ /// Generate the reduction in the loop.
+ void execute(VPTransformState &State) override;
+
+ /// Return the cost of this VPPartialReductionRecipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override;
+
+ /// Get the binary op's opcode.
+ unsigned getOpcode() const { return Opcode; }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+};
+
/// A recipe for vectorizing a phi-node as a sequence of mask-based select
/// instructions.
class VPBlendRecipe : public VPSingleDefRecipe {
public:
/// The blend operation is a User of the incoming values and of their
- /// respective masks, ordered [I0, I1, M1, I2, M2, ...]. Note that the first
- /// incoming value does not have a mask associated.
+ /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
+ /// be omitted (implied by passing an odd number of operands) in which case
+ /// all other incoming values are merged into it.
VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands)
: VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, Phi->getDebugLoc()) {
- assert((Operands.size() + 1) % 2 == 0 &&
- "Expected an odd number of operands");
+ assert(Operands.size() > 0 && "Expected at least one operand!");
}
VPBlendRecipe *clone() override {
@@ -2040,24 +2514,34 @@ public:
VP_CLASSOF_IMPL(VPDef::VPBlendSC)
- /// Return the number of incoming values, taking into account that the first
- /// incoming value has no mask.
- unsigned getNumIncomingValues() const { return (getNumOperands() + 1) / 2; }
+ /// A normalized blend is one that has an odd number of operands, whereby the
+ /// first operand does not have an associated mask.
+ bool isNormalized() const { return getNumOperands() % 2; }
+
+ /// Return the number of incoming values, taking into account when normalized
+ /// the first incoming value will have no mask.
+ unsigned getNumIncomingValues() const {
+ return (getNumOperands() + isNormalized()) / 2;
+ }
/// Return incoming value number \p Idx.
VPValue *getIncomingValue(unsigned Idx) const {
- return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - 1);
+ return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
}
/// Return mask number \p Idx.
VPValue *getMask(unsigned Idx) const {
- assert(Idx > 0 && "First index has no mask associated.");
- return getOperand(Idx * 2);
+ assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
+ return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
}
/// Generate the phi/select nodes.
void execute(VPTransformState &State) override;
+ /// Return the cost of this VPWidenMemoryRecipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override;
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
@@ -2143,6 +2627,10 @@ public:
/// Generate the wide load or store, and shuffles.
void execute(VPTransformState &State) override;
+ /// Return the cost of this VPInterleaveRecipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override;
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
@@ -2180,8 +2668,9 @@ class VPReductionRecipe : public VPSingleDefRecipe {
protected:
VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R,
Instruction *I, ArrayRef<VPValue *> Operands,
- VPValue *CondOp, bool IsOrdered)
- : VPSingleDefRecipe(SC, Operands, I), RdxDesc(R), IsOrdered(IsOrdered) {
+ VPValue *CondOp, bool IsOrdered, DebugLoc DL)
+ : VPSingleDefRecipe(SC, Operands, I, DL), RdxDesc(R),
+ IsOrdered(IsOrdered) {
if (CondOp) {
IsConditional = true;
addOperand(CondOp);
@@ -2191,16 +2680,17 @@ protected:
public:
VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I,
VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
- bool IsOrdered)
+ bool IsOrdered, DebugLoc DL = {})
: VPReductionRecipe(VPDef::VPReductionSC, R, I,
ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
- IsOrdered) {}
+ IsOrdered, DL) {}
~VPReductionRecipe() override = default;
VPReductionRecipe *clone() override {
return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(),
- getVecOp(), getCondOp(), IsOrdered);
+ getVecOp(), getCondOp(), IsOrdered,
+ getDebugLoc());
}
static inline bool classof(const VPRecipeBase *R) {
@@ -2213,9 +2703,13 @@ public:
return R && classof(R);
}
- /// Generate the reduction in the loop
+ /// Generate the reduction in the loop.
void execute(VPTransformState &State) override;
+ /// Return the cost of VPReductionRecipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override;
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
@@ -2246,12 +2740,12 @@ public:
/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
class VPReductionEVLRecipe : public VPReductionRecipe {
public:
- VPReductionEVLRecipe(VPReductionRecipe *R, VPValue *EVL, VPValue *CondOp)
+ VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp)
: VPReductionRecipe(
- VPDef::VPReductionEVLSC, R->getRecurrenceDescriptor(),
- cast_or_null<Instruction>(R->getUnderlyingValue()),
- ArrayRef<VPValue *>({R->getChainOp(), R->getVecOp(), EVL}), CondOp,
- R->isOrdered()) {}
+ VPDef::VPReductionEVLSC, R.getRecurrenceDescriptor(),
+ cast_or_null<Instruction>(R.getUnderlyingValue()),
+ ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
+ R.isOrdered(), R.getDebugLoc()) {}
~VPReductionEVLRecipe() override = default;
@@ -2319,6 +2813,10 @@ public:
/// the \p State.
void execute(VPTransformState &State) override;
+ /// Return the cost of this VPReplicateRecipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override;
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
@@ -2376,6 +2874,10 @@ public:
/// conditional branch.
void execute(VPTransformState &State) override;
+ /// Return the cost of this VPBranchOnMaskRecipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override;
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
@@ -2413,19 +2915,27 @@ class VPPredInstPHIRecipe : public VPSingleDefRecipe {
public:
/// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
/// nodes after merging back from a Branch-on-Mask.
- VPPredInstPHIRecipe(VPValue *PredV)
- : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV) {}
+ VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
+ : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV, DL) {}
~VPPredInstPHIRecipe() override = default;
VPPredInstPHIRecipe *clone() override {
- return new VPPredInstPHIRecipe(getOperand(0));
+ return new VPPredInstPHIRecipe(getOperand(0), getDebugLoc());
}
VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
- /// Generates phi nodes for live-outs as needed to retain SSA form.
+ /// Generates phi nodes for live-outs (from a replicate region) as needed to
+ /// retain SSA form.
void execute(VPTransformState &State) override;
+ /// Return the cost of this VPPredInstPHIRecipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override {
+ // TODO: Compute accurate cost after retiring the legacy cost model.
+ return 0;
+ }
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
@@ -2513,6 +3023,10 @@ public:
llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
}
+ /// Return the cost of this VPWidenMemoryRecipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override;
+
Instruction &getIngredient() const { return Ingredient; }
};
@@ -2558,10 +3072,10 @@ struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
/// using the address to load from, the explicit vector length and an optional
/// mask.
struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
- VPWidenLoadEVLRecipe(VPWidenLoadRecipe *L, VPValue *EVL, VPValue *Mask)
- : VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L->getIngredient(),
- {L->getAddr(), EVL}, L->isConsecutive(),
- L->isReverse(), L->getDebugLoc()),
+ VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue &EVL, VPValue *Mask)
+ : VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
+ {L.getAddr(), &EVL}, L.isConsecutive(),
+ L.isReverse(), L.getDebugLoc()),
VPValue(this, &getIngredient()) {
setMask(Mask);
}
@@ -2574,6 +3088,10 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
/// Generate the wide load or gather.
void execute(VPTransformState &State) override;
+ /// Return the cost of this VPWidenLoadEVLRecipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override;
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
@@ -2634,11 +3152,10 @@ struct VPWidenStoreRecipe final : public VPWidenMemoryRecipe {
/// using the value to store, the address to store to, the explicit vector
/// length and an optional mask.
struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
- VPWidenStoreEVLRecipe(VPWidenStoreRecipe *S, VPValue *EVL, VPValue *Mask)
- : VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S->getIngredient(),
- {S->getAddr(), S->getStoredValue(), EVL},
- S->isConsecutive(), S->isReverse(),
- S->getDebugLoc()) {
+ VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue &EVL, VPValue *Mask)
+ : VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
+ {S.getAddr(), S.getStoredValue(), &EVL},
+ S.isConsecutive(), S.isReverse(), S.getDebugLoc()) {
setMask(Mask);
}
@@ -2653,6 +3170,10 @@ struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
/// Generate the wide store or scatter.
void execute(VPTransformState &State) override;
+ /// Return the cost of this VPWidenStoreEVLRecipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override;
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
@@ -2694,6 +3215,13 @@ public:
/// Generate a canonical vector induction variable of the vector loop, with
void execute(VPTransformState &State) override;
+ /// Return the cost of this VPExpandSCEVRecipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override {
+ // TODO: Compute accurate cost after retiring the legacy cost model.
+ return 0;
+ }
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
@@ -2726,8 +3254,10 @@ public:
return D->getVPDefID() == VPDef::VPCanonicalIVPHISC;
}
- /// Generate the canonical scalar induction phi of the vector loop.
- void execute(VPTransformState &State) override;
+ void execute(VPTransformState &State) override {
+ llvm_unreachable(
+ "cannot execute this recipe, should be replaced by VPScalarPHIRecipe");
+ }
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
@@ -2754,10 +3284,12 @@ public:
return true;
}
- /// Check if the induction described by \p Kind, /p Start and \p Step is
- /// canonical, i.e. has the same start and step (of 1) as the canonical IV.
- bool isCanonical(InductionDescriptor::InductionKind Kind, VPValue *Start,
- VPValue *Step) const;
+ /// Return the cost of this VPCanonicalIVPHIRecipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override {
+ // For now, match the behavior of the legacy cost model.
+ return 0;
+ }
};
/// A recipe for generating the active lane mask for the vector loop that is
@@ -2773,7 +3305,10 @@ public:
~VPActiveLaneMaskPHIRecipe() override = default;
VPActiveLaneMaskPHIRecipe *clone() override {
- return new VPActiveLaneMaskPHIRecipe(getOperand(0), getDebugLoc());
+ auto *R = new VPActiveLaneMaskPHIRecipe(getOperand(0), getDebugLoc());
+ if (getNumOperands() == 2)
+ R->addOperand(getOperand(1));
+ return R;
}
VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
@@ -2813,9 +3348,17 @@ public:
return D->getVPDefID() == VPDef::VPEVLBasedIVPHISC;
}
- /// Generate phi for handling IV based on EVL over iterations correctly.
- /// TODO: investigate if it can share the code with VPCanonicalIVPHIRecipe.
- void execute(VPTransformState &State) override;
+ void execute(VPTransformState &State) override {
+ llvm_unreachable(
+ "cannot execute this recipe, should be replaced by VPScalarPHIRecipe");
+ }
+
+ /// Return the cost of this VPEVLBasedIVPHIRecipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override {
+ // For now, match the behavior of the legacy cost model.
+ return 0;
+ }
/// Returns true if the recipe only uses the first lane of operand \p Op.
bool onlyFirstLaneUsed(const VPValue *Op) const override {
@@ -2832,7 +3375,8 @@ public:
};
/// A Recipe for widening the canonical induction variable of the vector loop.
-class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe {
+class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe,
+ public VPUnrollPartAccessor<1> {
public:
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
: VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
@@ -2851,6 +3395,13 @@ public:
/// step = <VF*UF, VF*UF, ..., VF*UF>.
void execute(VPTransformState &State) override;
+ /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override {
+ // TODO: Compute accurate cost after retiring the legacy cost model.
+ return 0;
+ }
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
@@ -2868,19 +3419,23 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe {
/// for floating point inductions.
const FPMathOperator *FPBinOp;
+ /// Name to use for the generated IR instruction for the derived IV.
+ std::string Name;
+
public:
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start,
- VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step)
+ VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
+ const Twine &Name = "")
: VPDerivedIVRecipe(
IndDesc.getKind(),
dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
- Start, CanonicalIV, Step) {}
+ Start, CanonicalIV, Step, Name) {}
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind,
const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV,
- VPValue *Step)
+ VPValue *Step, const Twine &Name = "")
: VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind),
- FPBinOp(FPBinOp) {}
+ FPBinOp(FPBinOp), Name(Name.str()) {}
~VPDerivedIVRecipe() override = default;
@@ -2895,6 +3450,13 @@ public:
/// operand) + IV (2. operand) * StepValue (3, operand).
void execute(VPTransformState &State) override;
+ /// Return the cost of this VPDerivedIVRecipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override {
+ // TODO: Compute accurate cost after retiring the legacy cost model.
+ return 0;
+ }
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
@@ -2918,7 +3480,8 @@ public:
/// A recipe for handling phi nodes of integer and floating-point inductions,
/// producing their scalar values.
-class VPScalarIVStepsRecipe : public VPRecipeWithIRFlags {
+class VPScalarIVStepsRecipe : public VPRecipeWithIRFlags,
+ public VPUnrollPartAccessor<2> {
Instruction::BinaryOps InductionOpcode;
public:
@@ -2949,6 +3512,13 @@ public:
/// Generate the scalarized versions of the phi node as needed by their users.
void execute(VPTransformState &State) override;
+ /// Return the cost of this VPScalarIVStepsRecipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override {
+ // TODO: Compute accurate cost after retiring the legacy cost model.
+ return 0;
+ }
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
@@ -2969,6 +3539,15 @@ public:
/// holds a sequence of zero or more VPRecipe's each representing a sequence of
/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
class VPBasicBlock : public VPBlockBase {
+ friend class VPlan;
+
+ /// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
+ VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
+ : VPBlockBase(VPBasicBlockSC, Name.str()) {
+ if (Recipe)
+ appendRecipe(Recipe);
+ }
+
public:
using RecipeListTy = iplist<VPRecipeBase>;
@@ -2980,12 +3559,6 @@ protected:
: VPBlockBase(BlockSC, Name.str()) {}
public:
- VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
- : VPBlockBase(VPBasicBlockSC, Name.str()) {
- if (Recipe)
- appendRecipe(Recipe);
- }
-
~VPBasicBlock() override {
while (!Recipes.empty())
Recipes.pop_back();
@@ -3057,14 +3630,13 @@ public:
return make_range(begin(), getFirstNonPhi());
}
- void dropAllReferences(VPValue *NewValue) override;
-
/// Split current block at \p SplitAt by inserting a new block between the
/// current block and its successors and moving all recipes starting at
/// SplitAt to the new block. Returns the new block.
VPBasicBlock *splitAt(iterator SplitAt);
VPRegionBlock *getEnclosingLoopRegion();
+ const VPRegionBlock *getEnclosingLoopRegion() const;
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
@@ -3087,17 +3659,16 @@ public:
/// Clone the current block and it's recipes, without updating the operands of
/// the cloned recipes.
- VPBasicBlock *clone() override {
- auto *NewBlock = new VPBasicBlock(getName());
- for (VPRecipeBase &R : *this)
- NewBlock->appendRecipe(R.clone());
- return NewBlock;
- }
+ VPBasicBlock *clone() override;
protected:
/// Execute the recipes in the IR basic block \p BB.
void executeRecipes(VPTransformState *State, BasicBlock *BB);
+ /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
+ /// generated for this VPBB.
+ void connectToPredecessors(VPTransformState::CFGState &CFG);
+
private:
/// Create an IR BasicBlock to hold the output instructions generated by this
/// VPBasicBlock, and return it. Update the CFGState accordingly.
@@ -3110,14 +3681,17 @@ private:
/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
/// preheader block.
class VPIRBasicBlock : public VPBasicBlock {
+ friend class VPlan;
+
BasicBlock *IRBB;
-public:
+ /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks.
VPIRBasicBlock(BasicBlock *IRBB)
: VPBasicBlock(VPIRBasicBlockSC,
(Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
IRBB(IRBB) {}
+public:
~VPIRBasicBlock() override {}
static inline bool classof(const VPBlockBase *V) {
@@ -3128,12 +3702,7 @@ public:
/// this VPBasicBlock, thereby "executing" the VPlan.
void execute(VPTransformState *State) override;
- VPIRBasicBlock *clone() override {
- auto *NewBlock = new VPIRBasicBlock(IRBB);
- for (VPRecipeBase &R : Recipes)
- NewBlock->appendRecipe(R.clone());
- return NewBlock;
- }
+ VPIRBasicBlock *clone() override;
BasicBlock *getIRBasicBlock() const { return IRBB; }
};
@@ -3147,6 +3716,8 @@ public:
/// candidate VF's. The actual replication takes place only once the desired VF
/// and UF have been determined.
class VPRegionBlock : public VPBlockBase {
+ friend class VPlan;
+
/// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
VPBlockBase *Entry;
@@ -3158,7 +3729,7 @@ class VPRegionBlock : public VPBlockBase {
/// instances of output IR corresponding to its VPBlockBases.
bool IsReplicator;
-public:
+ /// Use VPlan::createVPRegionBlock to create VPRegionBlocks.
VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
const std::string &Name = "", bool IsReplicator = false)
: VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
@@ -3172,13 +3743,8 @@ public:
: VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
IsReplicator(IsReplicator) {}
- ~VPRegionBlock() override {
- if (Entry) {
- VPValue DummyValue;
- Entry->dropAllReferences(&DummyValue);
- deleteCFG(Entry);
- }
- }
+public:
+ ~VPRegionBlock() override {}
/// Method to support type inquiry through isa, cast, and dyn_cast.
static inline bool classof(const VPBlockBase *V) {
@@ -3226,8 +3792,6 @@ public:
// Return the cost of this region.
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
- void dropAllReferences(VPValue *NewValue) override;
-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
/// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
@@ -3254,14 +3818,15 @@ class VPlan {
friend class VPlanPrinter;
friend class VPSlotTracker;
- /// Hold the single entry to the Hierarchical CFG of the VPlan, i.e. the
- /// preheader of the vector loop.
- VPBasicBlock *Entry;
-
/// VPBasicBlock corresponding to the original preheader. Used to place
/// VPExpandSCEV recipes for expressions used during skeleton creation and the
/// rest of VPlan execution.
- VPBasicBlock *Preheader;
+ /// When this VPlan is used for the epilogue vector loop, the entry will be
+ /// replaced by a new entry block created during skeleton creation.
+ VPBasicBlock *Entry;
+
+ /// VPIRBasicBlock wrapping the header of the original scalar loop.
+ VPIRBasicBlock *ScalarHeader;
/// Holds the VFs applicable to this VPlan.
SmallSetVector<ElementCount, 2> VFs;
@@ -3284,6 +3849,9 @@ class VPlan {
/// Represents the vector trip count.
VPValue VectorTripCount;
+ /// Represents the vectorization factor of the loop.
+ VPValue VF;
+
/// Represents the loop-invariant VF * UF of the vector loop region.
VPValue VFxUF;
@@ -3295,56 +3863,63 @@ class VPlan {
/// definitions are VPValues that hold a pointer to their underlying IR.
SmallVector<VPValue *, 16> VPLiveInsToFree;
- /// Values used outside the plan. It contains live-outs that need fixing. Any
- /// live-out that is fixed outside VPlan needs to be removed. The remaining
- /// live-outs are fixed via VPLiveOut::fixPhi.
- MapVector<PHINode *, VPLiveOut *> LiveOuts;
-
/// Mapping from SCEVs to the VPValues representing their expansions.
/// NOTE: This mapping is temporary and will be removed once all users have
/// been modeled in VPlan directly.
DenseMap<const SCEV *, VPValue *> SCEVToExpansion;
-public:
- /// Construct a VPlan with original preheader \p Preheader, trip count \p TC
- /// and \p Entry to the plan. At the moment, \p Preheader and \p Entry need to
- /// be disconnected, as the bypass blocks between them are not yet modeled in
- /// VPlan.
- VPlan(VPBasicBlock *Preheader, VPValue *TC, VPBasicBlock *Entry)
- : VPlan(Preheader, Entry) {
- TripCount = TC;
- }
+ /// Blocks allocated and owned by the VPlan. They will be deleted once the
+ /// VPlan is destroyed.
+ SmallVector<VPBlockBase *> CreatedBlocks;
- /// Construct a VPlan with original preheader \p Preheader and \p Entry to
- /// the plan. At the moment, \p Preheader and \p Entry need to be
- /// disconnected, as the bypass blocks between them are not yet modeled in
- /// VPlan.
- VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry)
- : Entry(Entry), Preheader(Preheader) {
+ /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
+ /// wrapping the original header of the scalar loop.
+ VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
+ : Entry(Entry), ScalarHeader(ScalarHeader) {
Entry->setPlan(this);
- Preheader->setPlan(this);
- assert(Preheader->getNumSuccessors() == 0 &&
- Preheader->getNumPredecessors() == 0 &&
- "preheader must be disconnected");
+ assert(ScalarHeader->getNumSuccessors() == 0 &&
+ "scalar header must be a leaf node");
+ }
+
+public:
+ /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
+ /// original preheader and scalar header of \p L, to be used as entry and
+ /// scalar header blocks of the new VPlan.
+ VPlan(Loop *L);
+
+ /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
+ /// wrapping \p ScalarHeaderBB and a trip count of \p TC.
+ VPlan(BasicBlock *ScalarHeaderBB, VPValue *TC) {
+ setEntry(createVPBasicBlock("preheader"));
+ ScalarHeader = createVPIRBasicBlock(ScalarHeaderBB);
+ TripCount = TC;
}
~VPlan();
+ void setEntry(VPBasicBlock *VPBB) {
+ Entry = VPBB;
+ VPBB->setPlan(this);
+ }
+
/// Create initial VPlan, having an "entry" VPBasicBlock (wrapping
- /// original scalar pre-header ) which contains SCEV expansions that need
- /// to happen before the CFG is modified; a VPBasicBlock for the vector
- /// pre-header, followed by a region for the vector loop, followed by the
- /// middle VPBasicBlock. If a check is needed to guard executing the scalar
- /// epilogue loop, it will be added to the middle block, together with
- /// VPBasicBlocks for the scalar preheader and exit blocks.
- static VPlanPtr createInitialVPlan(const SCEV *TripCount,
- ScalarEvolution &PSE,
+ /// original scalar pre-header) which contains SCEV expansions that need
+ /// to happen before the CFG is modified (when executing a VPlan for the
+ /// epilogue vector loop, the original entry needs to be replaced by a new
+ /// one); a VPBasicBlock for the vector pre-header, followed by a region for
+ /// the vector loop, followed by the middle VPBasicBlock. If a check is needed
+ /// to guard executing the scalar epilogue loop, it will be added to the
+ /// middle block, together with VPBasicBlocks for the scalar preheader and
+ /// exit blocks. \p InductionTy is the type of the canonical induction and
+ /// used for related values, like the trip count expression.
+ static VPlanPtr createInitialVPlan(Type *InductionTy,
+ PredicatedScalarEvolution &PSE,
bool RequiresScalarEpilogueCheck,
bool TailFolded, Loop *TheLoop);
/// Prepare the plan for execution, setting up the required live-in values.
void prepareToExecute(Value *TripCount, Value *VectorTripCount,
- Value *CanonicalIVStartValue, VPTransformState &State);
+ VPTransformState &State);
/// Generate the IR code for this VPlan.
void execute(VPTransformState *State);
@@ -3355,6 +3930,43 @@ public:
VPBasicBlock *getEntry() { return Entry; }
const VPBasicBlock *getEntry() const { return Entry; }
+ /// Returns the preheader of the vector loop region, if one exists, or null
+ /// otherwise.
+ VPBasicBlock *getVectorPreheader() {
+ VPRegionBlock *VectorRegion = getVectorLoopRegion();
+ return VectorRegion
+ ? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())
+ : nullptr;
+ }
+
+ /// Returns the VPRegionBlock of the vector loop.
+ VPRegionBlock *getVectorLoopRegion();
+ const VPRegionBlock *getVectorLoopRegion() const;
+
+ /// Returns the 'middle' block of the plan, that is the block that selects
+ /// whether to execute the scalar tail loop or the exit block from the loop
+ /// latch.
+ const VPBasicBlock *getMiddleBlock() const {
+ return cast<VPBasicBlock>(getScalarPreheader()->getPredecessors().front());
+ }
+ VPBasicBlock *getMiddleBlock() {
+ return cast<VPBasicBlock>(getScalarPreheader()->getPredecessors().front());
+ }
+
+ /// Return the VPBasicBlock for the preheader of the scalar loop.
+ VPBasicBlock *getScalarPreheader() const {
+ return cast<VPBasicBlock>(getScalarHeader()->getSinglePredecessor());
+ }
+
+ /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
+ VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
+
+ /// Return an iterator range over the VPIRBasicBlock wrapping the exit blocks
+ /// of the VPlan, that is leaf nodes except the scalar header. Defined in
+ /// VPlanHCFG, as the definition of the type needs access to the definitions
+ /// of VPBlockShallowTraversalWrapper.
+ auto getExitBlocks();
+
/// The trip count of the original loop.
VPValue *getTripCount() const {
assert(TripCount && "trip count needs to be set before accessing it");
@@ -3379,6 +3991,9 @@ public:
/// The vector trip count.
VPValue &getVectorTripCount() { return VectorTripCount; }
+ /// Returns the VF of the vector loop region.
+ VPValue &getVF() { return VF; };
+
/// Returns VF * UF of the vector loop region.
VPValue &getVFxUF() { return VFxUF; }
@@ -3405,6 +4020,11 @@ public:
bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
+ unsigned getUF() const {
+ assert(UFs.size() == 1 && "Expected a single UF");
+ return UFs[0];
+ }
+
void setUF(unsigned UF) {
assert(hasUF(UF) && "Cannot set the UF not already in plan");
UFs.clear();
@@ -3451,14 +4071,6 @@ public:
LLVM_DUMP_METHOD void dump() const;
#endif
- /// Returns the VPRegionBlock of the vector loop.
- VPRegionBlock *getVectorLoopRegion() {
- return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
- }
- const VPRegionBlock *getVectorLoopRegion() const {
- return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
- }
-
/// Returns the canonical induction recipe of the vector loop.
VPCanonicalIVPHIRecipe *getCanonicalIV() {
VPBasicBlock *EntryVPBB = getVectorLoopRegion()->getEntryBasicBlock();
@@ -3469,17 +4081,6 @@ public:
return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
}
- void addLiveOut(PHINode *PN, VPValue *V);
-
- void removeLiveOut(PHINode *PN) {
- delete LiveOuts[PN];
- LiveOuts.erase(PN);
- }
-
- const MapVector<PHINode *, VPLiveOut *> &getLiveOuts() const {
- return LiveOuts;
- }
-
VPValue *getSCEVExpansion(const SCEV *S) const {
return SCEVToExpansion.lookup(S);
}
@@ -3489,13 +4090,52 @@ public:
SCEVToExpansion[S] = V;
}
- /// \return The block corresponding to the original preheader.
- VPBasicBlock *getPreheader() { return Preheader; }
- const VPBasicBlock *getPreheader() const { return Preheader; }
-
/// Clone the current VPlan, update all VPValues of the new VPlan and cloned
/// recipes to refer to the clones, and return it.
VPlan *duplicate();
+
+ /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
+ /// present. The returned block is owned by the VPlan and deleted once the
+ /// VPlan is destroyed.
+ VPBasicBlock *createVPBasicBlock(const Twine &Name,
+ VPRecipeBase *Recipe = nullptr) {
+ auto *VPB = new VPBasicBlock(Name, Recipe);
+ CreatedBlocks.push_back(VPB);
+ return VPB;
+ }
+
+ /// Create a new VPRegionBlock with \p Entry, \p Exiting and \p Name. If \p
+ /// IsReplicator is true, the region is a replicate region. The returned block
+ /// is owned by the VPlan and deleted once the VPlan is destroyed.
+ VPRegionBlock *createVPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
+ const std::string &Name = "",
+ bool IsReplicator = false) {
+ auto *VPB = new VPRegionBlock(Entry, Exiting, Name, IsReplicator);
+ CreatedBlocks.push_back(VPB);
+ return VPB;
+ }
+
+ /// Create a new VPRegionBlock with \p Name and entry and exiting blocks set
+ /// to nullptr. If \p IsReplicator is true, the region is a replicate region.
+ /// The returned block is owned by the VPlan and deleted once the VPlan is
+ /// destroyed.
+ VPRegionBlock *createVPRegionBlock(const std::string &Name = "",
+ bool IsReplicator = false) {
+ auto *VPB = new VPRegionBlock(Name, IsReplicator);
+ CreatedBlocks.push_back(VPB);
+ return VPB;
+ }
+
+ /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
+ /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
+ /// block is owned by the VPlan and deleted once the VPlan is destroyed.
+ VPIRBasicBlock *createEmptyVPIRBasicBlock(BasicBlock *IRBB);
+
+ /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
+ /// instructions in \p IRBB, except its terminator which is managed by the
+ /// successors of the block in VPlan. The returned block is owned by the VPlan
+ /// and deleted once the VPlan is destroyed.
+ VPIRBasicBlock *createVPIRBasicBlock(BasicBlock *IRBB);
};
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -3567,93 +4207,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
}
#endif
-//===----------------------------------------------------------------------===//
-// VPlan Utilities
-//===----------------------------------------------------------------------===//
-
-/// Class that provides utilities for VPBlockBases in VPlan.
-class VPBlockUtils {
-public:
- VPBlockUtils() = delete;
-
- /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p
- /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p
- /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. \p BlockPtr's
- /// successors are moved from \p BlockPtr to \p NewBlock. \p NewBlock must
- /// have neither successors nor predecessors.
- static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {
- assert(NewBlock->getSuccessors().empty() &&
- NewBlock->getPredecessors().empty() &&
- "Can't insert new block with predecessors or successors.");
- NewBlock->setParent(BlockPtr->getParent());
- SmallVector<VPBlockBase *> Succs(BlockPtr->successors());
- for (VPBlockBase *Succ : Succs) {
- disconnectBlocks(BlockPtr, Succ);
- connectBlocks(NewBlock, Succ);
- }
- connectBlocks(BlockPtr, NewBlock);
- }
-
- /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p
- /// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p
- /// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr
- /// parent to \p IfTrue and \p IfFalse. \p BlockPtr must have no successors
- /// and \p IfTrue and \p IfFalse must have neither successors nor
- /// predecessors.
- static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse,
- VPBlockBase *BlockPtr) {
- assert(IfTrue->getSuccessors().empty() &&
- "Can't insert IfTrue with successors.");
- assert(IfFalse->getSuccessors().empty() &&
- "Can't insert IfFalse with successors.");
- BlockPtr->setTwoSuccessors(IfTrue, IfFalse);
- IfTrue->setPredecessors({BlockPtr});
- IfFalse->setPredecessors({BlockPtr});
- IfTrue->setParent(BlockPtr->getParent());
- IfFalse->setParent(BlockPtr->getParent());
- }
-
- /// Connect VPBlockBases \p From and \p To bi-directionally. Append \p To to
- /// the successors of \p From and \p From to the predecessors of \p To. Both
- /// VPBlockBases must have the same parent, which can be null. Both
- /// VPBlockBases can be already connected to other VPBlockBases.
- static void connectBlocks(VPBlockBase *From, VPBlockBase *To) {
- assert((From->getParent() == To->getParent()) &&
- "Can't connect two block with different parents");
- assert(From->getNumSuccessors() < 2 &&
- "Blocks can't have more than two successors.");
- From->appendSuccessor(To);
- To->appendPredecessor(From);
- }
-
- /// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To
- /// from the successors of \p From and \p From from the predecessors of \p To.
- static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To) {
- assert(To && "Successor to disconnect is null.");
- From->removeSuccessor(To);
- To->removePredecessor(From);
- }
-
- /// Return an iterator range over \p Range which only includes \p BlockTy
- /// blocks. The accesses are casted to \p BlockTy.
- template <typename BlockTy, typename T>
- static auto blocksOnly(const T &Range) {
- // Create BaseTy with correct const-ness based on BlockTy.
- using BaseTy = std::conditional_t<std::is_const<BlockTy>::value,
- const VPBlockBase, VPBlockBase>;
-
- // We need to first create an iterator range over (const) BlocktTy & instead
- // of (const) BlockTy * for filter_range to work properly.
- auto Mapped =
- map_range(Range, [](BaseTy *Block) -> BaseTy & { return *Block; });
- auto Filter = make_filter_range(
- Mapped, [](BaseTy &Block) { return isa<BlockTy>(&Block); });
- return map_range(Filter, [](BaseTy &Block) -> BlockTy * {
- return cast<BlockTy>(&Block);
- });
- }
-};
-
class VPInterleavedAccessInfo {
DenseMap<VPInstruction *, InterleaveGroup<VPInstruction> *>
InterleaveGroupMap;
@@ -3787,44 +4340,6 @@ public:
/// Return true if all visited instruction can be combined.
bool isCompletelySLP() const { return CompletelySLP; }
};
-
-namespace vputils {
-
-/// Returns true if only the first lane of \p Def is used.
-bool onlyFirstLaneUsed(const VPValue *Def);
-
-/// Returns true if only the first part of \p Def is used.
-bool onlyFirstPartUsed(const VPValue *Def);
-
-/// Get or create a VPValue that corresponds to the expansion of \p Expr. If \p
-/// Expr is a SCEVConstant or SCEVUnknown, return a VPValue wrapping the live-in
-/// value. Otherwise return a VPExpandSCEVRecipe to expand \p Expr. If \p Plan's
-/// pre-header already contains a recipe expanding \p Expr, return it. If not,
-/// create a new one.
-VPValue *getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
- ScalarEvolution &SE);
-
-/// Returns true if \p VPV is uniform after vectorization.
-inline bool isUniformAfterVectorization(VPValue *VPV) {
- // A value defined outside the vector region must be uniform after
- // vectorization inside a vector region.
- if (VPV->isDefinedOutsideVectorRegions())
- return true;
- VPRecipeBase *Def = VPV->getDefiningRecipe();
- assert(Def && "Must have definition for value defined inside vector region");
- if (auto Rep = dyn_cast<VPReplicateRecipe>(Def))
- return Rep->isUniform();
- if (auto *GEP = dyn_cast<VPWidenGEPRecipe>(Def))
- return all_of(GEP->operands(), isUniformAfterVectorization);
- if (auto *VPI = dyn_cast<VPInstruction>(Def))
- return VPI->isSingleScalar() || VPI->isVectorToScalar();
- return false;
-}
-
-/// Return true if \p V is a header mask in \p Plan.
-bool isHeaderMask(VPValue *V, VPlan &Plan);
-} // end namespace vputils
-
} // end namespace llvm
#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H