aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Vectorize/VPlan.h
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/VPlan.h')
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h992
1 files changed, 690 insertions, 302 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 2cce127cd4ce..bdf09d15c27f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -40,6 +40,7 @@
#include "llvm/ADT/ilist_node.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/Support/InstructionCost.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -60,6 +61,11 @@ class VPRegionBlock;
class VPlan;
class VPlanSlp;
+/// Returns a calculation for the total number of elements for a given \p VF.
+/// For fixed width vectors this value is a constant, whereas for scalable
+/// vectors it is an expression determined at runtime.
+Value *getRuntimeVF(IRBuilder<> &B, Type *Ty, ElementCount VF);
+
/// A range of powers-of-2 vectorization factors with fixed start and
/// adjustable end. The range includes start and excludes end, e.g.,:
/// [1, 9) = {1, 2, 4, 8}
@@ -89,169 +95,108 @@ using VPlanPtr = std::unique_ptr<VPlan>;
/// vectorizer whereas the term "output IR" refers to code that is generated by
/// the vectorizer.
-/// VPIteration represents a single point in the iteration space of the output
-/// (vectorized and/or unrolled) IR loop.
-struct VPIteration {
- /// in [0..UF)
- unsigned Part;
+/// VPLane provides a way to access lanes in both fixed width and scalable
+/// vectors, where for the latter the lane index sometimes needs calculating
+/// as a runtime expression.
+class VPLane {
+public:
+ /// Kind describes how to interpret Lane.
+ enum class Kind : uint8_t {
+ /// For First, Lane is the index into the first N elements of a
+ /// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>.
+ First,
+ /// For ScalableLast, Lane is the offset from the start of the last
+ /// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For
+ /// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of
+ /// 1 corresponds to `((vscale - 1) * N) + 1`, etc.
+ ScalableLast
+ };
+private:
/// in [0..VF)
unsigned Lane;
-};
-
-/// This is a helper struct for maintaining vectorization state. It's used for
-/// mapping values from the original loop to their corresponding values in
-/// the new loop. Two mappings are maintained: one for vectorized values and
-/// one for scalarized values. Vectorized values are represented with UF
-/// vector values in the new loop, and scalarized values are represented with
-/// UF x VF scalar values in the new loop. UF and VF are the unroll and
-/// vectorization factors, respectively.
-///
-/// Entries can be added to either map with setVectorValue and setScalarValue,
-/// which assert that an entry was not already added before. If an entry is to
-/// replace an existing one, call resetVectorValue and resetScalarValue. This is
-/// currently needed to modify the mapped values during "fix-up" operations that
-/// occur once the first phase of widening is complete. These operations include
-/// type truncation and the second phase of recurrence widening.
-///
-/// Entries from either map can be retrieved using the getVectorValue and
-/// getScalarValue functions, which assert that the desired value exists.
-struct VectorizerValueMap {
- friend struct VPTransformState;
-
-private:
- /// The unroll factor. Each entry in the vector map contains UF vector values.
- unsigned UF;
-
- /// The vectorization factor. Each entry in the scalar map contains UF x VF
- /// scalar values.
- ElementCount VF;
- /// The vector and scalar map storage. We use std::map and not DenseMap
- /// because insertions to DenseMap invalidate its iterators.
- using VectorParts = SmallVector<Value *, 2>;
- using ScalarParts = SmallVector<SmallVector<Value *, 4>, 2>;
- std::map<Value *, VectorParts> VectorMapStorage;
- std::map<Value *, ScalarParts> ScalarMapStorage;
+ /// Indicates how the Lane should be interpreted, as described above.
+ Kind LaneKind;
public:
- /// Construct an empty map with the given unroll and vectorization factors.
- VectorizerValueMap(unsigned UF, ElementCount VF) : UF(UF), VF(VF) {}
+ VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {}
- /// \return True if the map has any vector entry for \p Key.
- bool hasAnyVectorValue(Value *Key) const {
- return VectorMapStorage.count(Key);
- }
+ static VPLane getFirstLane() { return VPLane(0, VPLane::Kind::First); }
- /// \return True if the map has a vector entry for \p Key and \p Part.
- bool hasVectorValue(Value *Key, unsigned Part) const {
- assert(Part < UF && "Queried Vector Part is too large.");
- if (!hasAnyVectorValue(Key))
- return false;
- const VectorParts &Entry = VectorMapStorage.find(Key)->second;
- assert(Entry.size() == UF && "VectorParts has wrong dimensions.");
- return Entry[Part] != nullptr;
+ static VPLane getLastLaneForVF(const ElementCount &VF) {
+ unsigned LaneOffset = VF.getKnownMinValue() - 1;
+ Kind LaneKind;
+ if (VF.isScalable())
+ // In this case 'LaneOffset' refers to the offset from the start of the
+ // last subvector with VF.getKnownMinValue() elements.
+ LaneKind = VPLane::Kind::ScalableLast;
+ else
+ LaneKind = VPLane::Kind::First;
+ return VPLane(LaneOffset, LaneKind);
}
- /// \return True if the map has any scalar entry for \p Key.
- bool hasAnyScalarValue(Value *Key) const {
- return ScalarMapStorage.count(Key);
+ /// Returns a compile-time known value for the lane index and asserts if the
+ /// lane can only be calculated at runtime.
+ unsigned getKnownLane() const {
+ assert(LaneKind == Kind::First);
+ return Lane;
}
- /// \return True if the map has a scalar entry for \p Key and \p Instance.
- bool hasScalarValue(Value *Key, const VPIteration &Instance) const {
- assert(Instance.Part < UF && "Queried Scalar Part is too large.");
- assert(Instance.Lane < VF.getKnownMinValue() &&
- "Queried Scalar Lane is too large.");
+ /// Returns an expression describing the lane index that can be used at
+ /// runtime.
+ Value *getAsRuntimeExpr(IRBuilder<> &Builder, const ElementCount &VF) const;
- if (!hasAnyScalarValue(Key))
- return false;
- const ScalarParts &Entry = ScalarMapStorage.find(Key)->second;
- assert(Entry.size() == UF && "ScalarParts has wrong dimensions.");
- assert(Entry[Instance.Part].size() == VF.getKnownMinValue() &&
- "ScalarParts has wrong dimensions.");
- return Entry[Instance.Part][Instance.Lane] != nullptr;
- }
-
- /// Retrieve the existing vector value that corresponds to \p Key and
- /// \p Part.
- Value *getVectorValue(Value *Key, unsigned Part) {
- assert(hasVectorValue(Key, Part) && "Getting non-existent value.");
- return VectorMapStorage[Key][Part];
- }
-
- /// Retrieve the existing scalar value that corresponds to \p Key and
- /// \p Instance.
- Value *getScalarValue(Value *Key, const VPIteration &Instance) {
- assert(hasScalarValue(Key, Instance) && "Getting non-existent value.");
- return ScalarMapStorage[Key][Instance.Part][Instance.Lane];
- }
-
- /// Set a vector value associated with \p Key and \p Part. Assumes such a
- /// value is not already set. If it is, use resetVectorValue() instead.
- void setVectorValue(Value *Key, unsigned Part, Value *Vector) {
- assert(!hasVectorValue(Key, Part) && "Vector value already set for part");
- if (!VectorMapStorage.count(Key)) {
- VectorParts Entry(UF);
- VectorMapStorage[Key] = Entry;
- }
- VectorMapStorage[Key][Part] = Vector;
- }
-
- /// Set a scalar value associated with \p Key and \p Instance. Assumes such a
- /// value is not already set.
- void setScalarValue(Value *Key, const VPIteration &Instance, Value *Scalar) {
- assert(!hasScalarValue(Key, Instance) && "Scalar value already set");
- if (!ScalarMapStorage.count(Key)) {
- ScalarParts Entry(UF);
- // TODO: Consider storing uniform values only per-part, as they occupy
- // lane 0 only, keeping the other VF-1 redundant entries null.
- for (unsigned Part = 0; Part < UF; ++Part)
- Entry[Part].resize(VF.getKnownMinValue(), nullptr);
- ScalarMapStorage[Key] = Entry;
- }
- ScalarMapStorage[Key][Instance.Part][Instance.Lane] = Scalar;
- }
+ /// Returns the Kind of lane offset.
+ Kind getKind() const { return LaneKind; }
- /// Reset the vector value associated with \p Key for the given \p Part.
- /// This function can be used to update values that have already been
- /// vectorized. This is the case for "fix-up" operations including type
- /// truncation and the second phase of recurrence vectorization.
- void resetVectorValue(Value *Key, unsigned Part, Value *Vector) {
- assert(hasVectorValue(Key, Part) && "Vector value not set for part");
- VectorMapStorage[Key][Part] = Vector;
+ /// Returns true if this is the first lane of the whole vector.
+ bool isFirstLane() const { return Lane == 0 && LaneKind == Kind::First; }
+
+ /// Maps the lane to a cache index based on \p VF.
+ unsigned mapToCacheIndex(const ElementCount &VF) const {
+ switch (LaneKind) {
+ case VPLane::Kind::ScalableLast:
+ assert(VF.isScalable() && Lane < VF.getKnownMinValue());
+ return VF.getKnownMinValue() + Lane;
+ default:
+ assert(Lane < VF.getKnownMinValue());
+ return Lane;
+ }
}
- /// Reset the scalar value associated with \p Key for \p Part and \p Lane.
- /// This function can be used to update values that have already been
- /// scalarized. This is the case for "fix-up" operations including scalar phi
- /// nodes for scalarized and predicated instructions.
- void resetScalarValue(Value *Key, const VPIteration &Instance,
- Value *Scalar) {
- assert(hasScalarValue(Key, Instance) &&
- "Scalar value not set for part and lane");
- ScalarMapStorage[Key][Instance.Part][Instance.Lane] = Scalar;
+ /// Returns the maxmimum number of lanes that we are able to consider
+ /// caching for \p VF.
+ static unsigned getNumCachedLanes(const ElementCount &VF) {
+ return VF.getKnownMinValue() * (VF.isScalable() ? 2 : 1);
}
};
-/// This class is used to enable the VPlan to invoke a method of ILV. This is
-/// needed until the method is refactored out of ILV and becomes reusable.
-struct VPCallback {
- virtual ~VPCallback() {}
- virtual Value *getOrCreateVectorValues(Value *V, unsigned Part) = 0;
- virtual Value *getOrCreateScalarValue(Value *V,
- const VPIteration &Instance) = 0;
+/// VPIteration represents a single point in the iteration space of the output
+/// (vectorized and/or unrolled) IR loop.
+struct VPIteration {
+ /// in [0..UF)
+ unsigned Part;
+
+ VPLane Lane;
+
+ VPIteration(unsigned Part, unsigned Lane,
+ VPLane::Kind Kind = VPLane::Kind::First)
+ : Part(Part), Lane(Lane, Kind) {}
+
+ VPIteration(unsigned Part, const VPLane &Lane) : Part(Part), Lane(Lane) {}
+
+ bool isFirstIteration() const { return Part == 0 && Lane.isFirstLane(); }
};
/// VPTransformState holds information passed down when "executing" a VPlan,
/// needed for generating the output IR.
struct VPTransformState {
- VPTransformState(ElementCount VF, unsigned UF, Loop *OrigLoop, LoopInfo *LI,
+ VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI,
DominatorTree *DT, IRBuilder<> &Builder,
- VectorizerValueMap &ValueMap, InnerLoopVectorizer *ILV,
- VPCallback &Callback)
- : VF(VF), UF(UF), Instance(), OrigLoop(OrigLoop), LI(LI), DT(DT),
- Builder(Builder), ValueMap(ValueMap), ILV(ILV), Callback(Callback) {}
+ InnerLoopVectorizer *ILV, VPlan *Plan)
+ : VF(VF), UF(UF), Instance(), LI(LI), DT(DT), Builder(Builder), ILV(ILV),
+ Plan(Plan) {}
/// The chosen Vectorization and Unroll Factors of the loop being vectorized.
ElementCount VF;
@@ -279,13 +224,7 @@ struct VPTransformState {
/// method will delegate the call to ILV in such cases in order to provide
/// callers a consistent API.
/// \see set.
- Value *get(VPValue *Def, unsigned Part) {
- // If Values have been set for this Def return the one relevant for \p Part.
- if (Data.PerPartOutput.count(Def))
- return Data.PerPartOutput[Def][Part];
- // Def is managed by ILV: bring the Values from ValueMap.
- return Callback.getOrCreateVectorValues(VPValue2Value[Def], Part);
- }
+ Value *get(VPValue *Def, unsigned Part);
/// Get the generated Value for a given VPValue and given Part and Lane.
Value *get(VPValue *Def, const VPIteration &Instance);
@@ -296,13 +235,18 @@ struct VPTransformState {
I->second[Part];
}
+ bool hasAnyVectorValue(VPValue *Def) const {
+ return Data.PerPartOutput.find(Def) != Data.PerPartOutput.end();
+ }
+
bool hasScalarValue(VPValue *Def, VPIteration Instance) {
auto I = Data.PerPartScalars.find(Def);
if (I == Data.PerPartScalars.end())
return false;
+ unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
return Instance.Part < I->second.size() &&
- Instance.Lane < I->second[Instance.Part].size() &&
- I->second[Instance.Part][Instance.Lane];
+ CacheIdx < I->second[Instance.Part].size() &&
+ I->second[Instance.Part][CacheIdx];
}
/// Set the generated Value for a given VPValue and a given Part.
@@ -313,17 +257,39 @@ struct VPTransformState {
}
Data.PerPartOutput[Def][Part] = V;
}
- void set(VPValue *Def, Value *IRDef, Value *V, unsigned Part);
+ /// Reset an existing vector value for \p Def and a given \p Part.
+ void reset(VPValue *Def, Value *V, unsigned Part) {
+ auto Iter = Data.PerPartOutput.find(Def);
+ assert(Iter != Data.PerPartOutput.end() &&
+ "need to overwrite existing value");
+ Iter->second[Part] = V;
+ }
+ /// Set the generated scalar \p V for \p Def and the given \p Instance.
void set(VPValue *Def, Value *V, const VPIteration &Instance) {
auto Iter = Data.PerPartScalars.insert({Def, {}});
auto &PerPartVec = Iter.first->second;
while (PerPartVec.size() <= Instance.Part)
PerPartVec.emplace_back();
auto &Scalars = PerPartVec[Instance.Part];
- while (Scalars.size() <= Instance.Lane)
+ unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
+ while (Scalars.size() <= CacheIdx)
Scalars.push_back(nullptr);
- Scalars[Instance.Lane] = V;
+ assert(!Scalars[CacheIdx] && "should overwrite existing value");
+ Scalars[CacheIdx] = V;
+ }
+
+ /// Reset an existing scalar value for \p Def and a given \p Instance.
+ void reset(VPValue *Def, Value *V, const VPIteration &Instance) {
+ auto Iter = Data.PerPartScalars.find(Def);
+ assert(Iter != Data.PerPartScalars.end() &&
+ "need to overwrite existing value");
+ assert(Instance.Part < Iter->second.size() &&
+ "need to overwrite existing value");
+ unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
+ assert(CacheIdx < Iter->second[Instance.Part].size() &&
+ "need to overwrite existing value");
+ Iter->second[Instance.Part][CacheIdx] = V;
}
/// Hold state information used when constructing the CFG of the output IR,
@@ -340,6 +306,13 @@ struct VPTransformState {
/// BasicBlock, used for placing the newly created BasicBlocks.
BasicBlock *LastBB = nullptr;
+ /// The IR BasicBlock that is the preheader of the vector loop in the output
+ /// IR.
+ /// FIXME: The vector preheader should also be modeled in VPlan, so any code
+ /// that needs to be added to the preheader gets directly generated by
+ /// VPlan. There should be no need to manage a pointer to the IR BasicBlock.
+ BasicBlock *VectorPreHeader = nullptr;
+
/// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
/// of replication, maps the BasicBlock of the last replica created.
SmallDenseMap<VPBasicBlock *, BasicBlock *> VPBB2IRBB;
@@ -351,9 +324,6 @@ struct VPTransformState {
CFGState() = default;
} CFG;
- /// Hold a pointer to the original loop.
- Loop *OrigLoop;
-
/// Hold a pointer to LoopInfo to register new basic blocks in the loop.
LoopInfo *LI;
@@ -363,12 +333,6 @@ struct VPTransformState {
/// Hold a reference to the IRBuilder used to generate output IR code.
IRBuilder<> &Builder;
- /// Hold a reference to the Value state information used when generating the
- /// Values of the output IR.
- VectorizerValueMap &ValueMap;
-
- /// Hold a reference to a mapping between VPValues in VPlan and original
- /// Values they correspond to.
VPValue2ValueTy VPValue2Value;
/// Hold the canonical scalar IV of the vector loop (start=0, step=VF*UF).
@@ -380,7 +344,43 @@ struct VPTransformState {
/// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
InnerLoopVectorizer *ILV;
- VPCallback &Callback;
+ /// Pointer to the VPlan code is generated for.
+ VPlan *Plan;
+};
+
+/// VPUsers instance used by VPBlockBase to manage CondBit and the block
+/// predicate. Currently VPBlockUsers are used in VPBlockBase for historical
+/// reasons, but in the future the only VPUsers should either be recipes or
+/// live-outs.VPBlockBase uses.
+struct VPBlockUser : public VPUser {
+ VPBlockUser() : VPUser({}, VPUserID::Block) {}
+
+ VPValue *getSingleOperandOrNull() {
+ if (getNumOperands() == 1)
+ return getOperand(0);
+
+ return nullptr;
+ }
+ const VPValue *getSingleOperandOrNull() const {
+ if (getNumOperands() == 1)
+ return getOperand(0);
+
+ return nullptr;
+ }
+
+ void resetSingleOpUser(VPValue *NewVal) {
+ assert(getNumOperands() <= 1 && "Didn't expect more than one operand!");
+ if (!NewVal) {
+ if (getNumOperands() == 1)
+ removeLastOperand();
+ return;
+ }
+
+ if (getNumOperands() == 1)
+ setOperand(0, NewVal);
+ else
+ addOperand(NewVal);
+ }
};
/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
@@ -403,11 +403,15 @@ class VPBlockBase {
/// List of successor blocks.
SmallVector<VPBlockBase *, 1> Successors;
- /// Successor selector, null for zero or single successor blocks.
- VPValue *CondBit = nullptr;
+ /// Successor selector managed by a VPUser. For blocks with zero or one
+ /// successors, there is no operand. Otherwise there is exactly one operand
+ /// which is the branch condition.
+ VPBlockUser CondBitUser;
- /// Current block predicate - null if the block does not need a predicate.
- VPValue *Predicate = nullptr;
+ /// If the block is predicated, its predicate is stored as an operand of this
+ /// VPUser to maintain the def-use relations. Otherwise there is no operand
+ /// here.
+ VPBlockUser PredicateUser;
/// VPlan containing the block. Can only be set on the entry block of the
/// plan.
@@ -553,17 +557,18 @@ public:
}
/// \return the condition bit selecting the successor.
- VPValue *getCondBit() { return CondBit; }
-
- const VPValue *getCondBit() const { return CondBit; }
-
- void setCondBit(VPValue *CV) { CondBit = CV; }
-
- VPValue *getPredicate() { return Predicate; }
-
- const VPValue *getPredicate() const { return Predicate; }
+ VPValue *getCondBit();
+ /// \return the condition bit selecting the successor.
+ const VPValue *getCondBit() const;
+ /// Set the condition bit selecting the successor.
+ void setCondBit(VPValue *CV);
- void setPredicate(VPValue *Pred) { Predicate = Pred; }
+ /// \return the block's predicate.
+ VPValue *getPredicate();
+ /// \return the block's predicate.
+ const VPValue *getPredicate() const;
+ /// Set the block's predicate.
+ void setPredicate(VPValue *Pred);
/// Set a given VPBlockBase \p Successor as the single successor of this
/// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
@@ -581,7 +586,7 @@ public:
VPValue *Condition) {
assert(Successors.empty() && "Setting two successors when others exist.");
assert(Condition && "Setting two successors without condition!");
- CondBit = Condition;
+ setCondBit(Condition);
appendSuccessor(IfTrue);
appendSuccessor(IfFalse);
}
@@ -601,7 +606,7 @@ public:
/// Remove all the successors of this block and set to null its condition bit
void clearSuccessors() {
Successors.clear();
- CondBit = nullptr;
+ setCondBit(nullptr);
}
/// The method which generates the output IR that correspond to this
@@ -611,16 +616,6 @@ public:
/// Delete all blocks reachable from a given VPBlockBase, inclusive.
static void deleteCFG(VPBlockBase *Entry);
- void printAsOperand(raw_ostream &OS, bool PrintType) const {
- OS << getName();
- }
-
- void print(raw_ostream &OS) const {
- // TODO: Only printing VPBB name for now since we only have dot printing
- // support for VPInstructions/Recipes.
- printAsOperand(OS, false);
- }
-
/// Return true if it is legal to hoist instructions into this block.
bool isLegalToHoistInto() {
// There are currently no constraints that prevent an instruction to be
@@ -631,6 +626,34 @@ public:
/// Replace all operands of VPUsers in the block with \p NewValue and also
/// replaces all uses of VPValues defined in the block with NewValue.
virtual void dropAllReferences(VPValue *NewValue) = 0;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ void printAsOperand(raw_ostream &OS, bool PrintType) const {
+ OS << getName();
+ }
+
+ /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
+ /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
+ /// consequtive numbers.
+ ///
+ /// Note that the numbering is applied to the whole VPlan, so printing
+ /// individual blocks is consistent with the whole VPlan printing.
+ virtual void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const = 0;
+
+ /// Print plain-text dump of this VPlan to \p O.
+ void print(raw_ostream &O) const {
+ VPSlotTracker SlotTracker(getPlan());
+ print(O, "", SlotTracker);
+ }
+
+ /// Print the successors of this block to \p O, prefixing all lines with \p
+ /// Indent.
+ void printSuccessors(raw_ostream &O, const Twine &Indent) const;
+
+ /// Dump this VPBlockBase to dbgs().
+ LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
+#endif
};
/// VPRecipeBase is a base class modeling a sequence of one or more output IR
@@ -639,16 +662,21 @@ public:
/// VPRecipeBases that also inherit from VPValue must make sure to inherit from
/// VPRecipeBase before VPValue.
class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
- public VPDef {
+ public VPDef,
+ public VPUser {
friend VPBasicBlock;
friend class VPBlockUtils;
-
/// Each VPRecipe belongs to a single VPBasicBlock.
VPBasicBlock *Parent = nullptr;
public:
- VPRecipeBase(const unsigned char SC) : VPDef(SC) {}
+ VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands)
+ : VPDef(SC), VPUser(Operands, VPUser::VPUserID::Recipe) {}
+
+ template <typename IterT>
+ VPRecipeBase(const unsigned char SC, iterator_range<IterT> Operands)
+ : VPDef(SC), VPUser(Operands, VPUser::VPUserID::Recipe) {}
virtual ~VPRecipeBase() = default;
/// \return the VPBasicBlock which this VPRecipe belongs to.
@@ -685,17 +713,13 @@ public:
/// \returns an iterator pointing to the element after the erased one
iplist<VPRecipeBase>::iterator eraseFromParent();
- /// Returns a pointer to a VPUser, if the recipe inherits from VPUser or
- /// nullptr otherwise.
- VPUser *toVPUser();
-
/// Returns the underlying instruction, if the recipe is a VPValue or nullptr
/// otherwise.
Instruction *getUnderlyingInstr() {
- return cast<Instruction>(getVPValue()->getUnderlyingValue());
+ return cast<Instruction>(getVPSingleValue()->getUnderlyingValue());
}
const Instruction *getUnderlyingInstr() const {
- return cast<Instruction>(getVPValue()->getUnderlyingValue());
+ return cast<Instruction>(getVPSingleValue()->getUnderlyingValue());
}
/// Method to support type inquiry through isa, cast, and dyn_cast.
@@ -703,6 +727,29 @@ public:
// All VPDefs are also VPRecipeBases.
return true;
}
+
+ static inline bool classof(const VPUser *U) {
+ return U->getVPUserID() == VPUser::VPUserID::Recipe;
+ }
+
+ /// Returns true if the recipe may have side-effects.
+ bool mayHaveSideEffects() const;
+
+ /// Returns true for PHI-like recipes.
+ bool isPhi() const {
+ return getVPDefID() >= VPFirstPHISC && getVPDefID() <= VPLastPHISC;
+ }
+
+ /// Returns true if the recipe may read from memory.
+ bool mayReadFromMemory() const;
+
+ /// Returns true if the recipe may write to memory.
+ bool mayWriteToMemory() const;
+
+ /// Returns true if the recipe may read from or write to memory.
+ bool mayReadOrWriteMemory() const {
+ return mayReadFromMemory() || mayWriteToMemory();
+ }
};
inline bool VPUser::classof(const VPDef *Def) {
@@ -723,13 +770,16 @@ inline bool VPUser::classof(const VPDef *Def) {
/// While as any Recipe it may generate a sequence of IR instructions when
/// executed, these instructions would always form a single-def expression as
/// the VPInstruction is also a single def-use vertex.
-class VPInstruction : public VPRecipeBase, public VPUser, public VPValue {
+class VPInstruction : public VPRecipeBase, public VPValue {
friend class VPlanSlp;
public:
/// VPlan opcodes, extending LLVM IR with idiomatics instructions.
enum {
- Not = Instruction::OtherOpsEnd + 1,
+ FirstOrderRecurrenceSplice =
+ Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
+ // values of a first-order recurrence.
+ Not,
ICmpULE,
SLPLoad,
SLPStore,
@@ -749,14 +799,14 @@ protected:
public:
VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands)
- : VPRecipeBase(VPRecipeBase::VPInstructionSC), VPUser(Operands),
+ : VPRecipeBase(VPRecipeBase::VPInstructionSC, Operands),
VPValue(VPValue::VPVInstructionSC, nullptr, this), Opcode(Opcode) {}
VPInstruction(unsigned Opcode, ArrayRef<VPInstruction *> Operands)
- : VPRecipeBase(VPRecipeBase::VPInstructionSC), VPUser({}),
+ : VPRecipeBase(VPRecipeBase::VPInstructionSC, {}),
VPValue(VPValue::VPVInstructionSC, nullptr, this), Opcode(Opcode) {
for (auto *I : Operands)
- addOperand(I->getVPValue());
+ addOperand(I->getVPSingleValue());
}
VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands)
@@ -784,12 +834,14 @@ public:
/// provided.
void execute(VPTransformState &State) override;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the VPInstruction to \p O.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
/// Print the VPInstruction to dbgs() (for debugging).
- void dump() const;
+ LLVM_DUMP_METHOD void dump() const;
+#endif
/// Return true if this instruction may modify memory.
bool mayWriteToMemory() const {
@@ -823,12 +875,12 @@ public:
/// VPWidenRecipe is a recipe for producing a copy of vector type its
/// ingredient. This recipe covers most of the traditional vectorization cases
/// where each ingredient transforms into a vectorized version of itself.
-class VPWidenRecipe : public VPRecipeBase, public VPValue, public VPUser {
+class VPWidenRecipe : public VPRecipeBase, public VPValue {
public:
template <typename IterT>
VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands)
- : VPRecipeBase(VPRecipeBase::VPWidenSC),
- VPValue(VPValue::VPVWidenSC, &I, this), VPUser(Operands) {}
+ : VPRecipeBase(VPRecipeBase::VPWidenSC, Operands),
+ VPValue(VPValue::VPVWidenSC, &I, this) {}
~VPWidenRecipe() override = default;
@@ -843,18 +895,20 @@ public:
/// Produce widened copies of all Ingredients.
void execute(VPTransformState &State) override;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
+#endif
};
/// A recipe for widening Call instructions.
-class VPWidenCallRecipe : public VPRecipeBase, public VPUser, public VPValue {
+class VPWidenCallRecipe : public VPRecipeBase, public VPValue {
public:
template <typename IterT>
VPWidenCallRecipe(CallInst &I, iterator_range<IterT> CallArguments)
- : VPRecipeBase(VPRecipeBase::VPWidenCallSC), VPUser(CallArguments),
+ : VPRecipeBase(VPRecipeBase::VPWidenCallSC, CallArguments),
VPValue(VPValue::VPVWidenCallSC, &I, this) {}
~VPWidenCallRecipe() override = default;
@@ -867,13 +921,15 @@ public:
/// Produce a widened version of the call instruction.
void execute(VPTransformState &State) override;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
+#endif
};
/// A recipe for widening select instructions.
-class VPWidenSelectRecipe : public VPRecipeBase, public VPUser, public VPValue {
+class VPWidenSelectRecipe : public VPRecipeBase, public VPValue {
/// Is the condition of the select loop invariant?
bool InvariantCond;
@@ -882,7 +938,7 @@ public:
template <typename IterT>
VPWidenSelectRecipe(SelectInst &I, iterator_range<IterT> Operands,
bool InvariantCond)
- : VPRecipeBase(VPRecipeBase::VPWidenSelectSC), VPUser(Operands),
+ : VPRecipeBase(VPRecipeBase::VPWidenSelectSC, Operands),
VPValue(VPValue::VPVWidenSelectSC, &I, this),
InvariantCond(InvariantCond) {}
@@ -896,29 +952,29 @@ public:
/// Produce a widened version of the select instruction.
void execute(VPTransformState &State) override;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
+#endif
};
/// A recipe for handling GEP instructions.
-class VPWidenGEPRecipe : public VPRecipeBase,
- public VPUser,
- public VPValue {
+class VPWidenGEPRecipe : public VPRecipeBase, public VPValue {
bool IsPtrLoopInvariant;
SmallBitVector IsIndexLoopInvariant;
public:
template <typename IterT>
VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range<IterT> Operands)
- : VPRecipeBase(VPRecipeBase::VPWidenGEPSC), VPUser(Operands),
+ : VPRecipeBase(VPRecipeBase::VPWidenGEPSC, Operands),
VPValue(VPWidenGEPSC, GEP, this),
IsIndexLoopInvariant(GEP->getNumIndices(), false) {}
template <typename IterT>
VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range<IterT> Operands,
Loop *OrigLoop)
- : VPRecipeBase(VPRecipeBase::VPWidenGEPSC), VPUser(Operands),
+ : VPRecipeBase(VPRecipeBase::VPWidenGEPSC, Operands),
VPValue(VPValue::VPVWidenGEPSC, GEP, this),
IsIndexLoopInvariant(GEP->getNumIndices(), false) {
IsPtrLoopInvariant = OrigLoop->isLoopInvariant(GEP->getPointerOperand());
@@ -936,26 +992,29 @@ public:
/// Generate the gep nodes.
void execute(VPTransformState &State) override;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
+#endif
};
/// A recipe for handling phi nodes of integer and floating-point inductions,
/// producing their vector and scalar values.
-class VPWidenIntOrFpInductionRecipe : public VPRecipeBase, public VPUser {
+class VPWidenIntOrFpInductionRecipe : public VPRecipeBase {
PHINode *IV;
- TruncInst *Trunc;
public:
- VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start,
+ VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, Instruction *Cast,
TruncInst *Trunc = nullptr)
- : VPRecipeBase(VPWidenIntOrFpInductionSC), VPUser({Start}), IV(IV),
- Trunc(Trunc) {
+ : VPRecipeBase(VPWidenIntOrFpInductionSC, {Start}), IV(IV) {
if (Trunc)
new VPValue(Trunc, this);
else
new VPValue(IV, this);
+
+ if (Cast)
+ new VPValue(Cast, this);
}
~VPWidenIntOrFpInductionRecipe() override = default;
@@ -968,59 +1027,199 @@ public:
/// needed by their users.
void execute(VPTransformState &State) override;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
+#endif
/// Returns the start value of the induction.
VPValue *getStartValue() { return getOperand(0); }
+
+ /// Returns the cast VPValue, if one is attached, or nullptr otherwise.
+ VPValue *getCastValue() {
+ if (getNumDefinedValues() != 2)
+ return nullptr;
+ return getVPValue(1);
+ }
+
+ /// Returns the first defined value as TruncInst, if it is one or nullptr
+ /// otherwise.
+ TruncInst *getTruncInst() {
+ return dyn_cast_or_null<TruncInst>(getVPValue(0)->getUnderlyingValue());
+ }
+ const TruncInst *getTruncInst() const {
+ return dyn_cast_or_null<TruncInst>(getVPValue(0)->getUnderlyingValue());
+ }
};
-/// A recipe for handling all phi nodes except for integer and FP inductions.
-/// For reduction PHIs, RdxDesc must point to the corresponding recurrence
-/// descriptor and the start value is the first operand of the recipe.
-class VPWidenPHIRecipe : public VPRecipeBase, public VPUser {
- PHINode *Phi;
+/// A recipe for handling first order recurrences and pointer inductions. For
+/// first-order recurrences, the start value is the first operand of the recipe
+/// and the incoming value from the backedge is the second operand. It also
+/// serves as base class for VPReductionPHIRecipe. In the VPlan native path, all
+/// incoming VPValues & VPBasicBlock pairs are managed in the recipe directly.
+class VPWidenPHIRecipe : public VPRecipeBase, public VPValue {
+ /// List of incoming blocks. Only used in the VPlan native path.
+ SmallVector<VPBasicBlock *, 2> IncomingBlocks;
- /// Descriptor for a reduction PHI.
- RecurrenceDescriptor *RdxDesc = nullptr;
+protected:
+ VPWidenPHIRecipe(unsigned char VPVID, unsigned char VPDefID, PHINode *Phi,
+ VPValue *Start = nullptr)
+ : VPRecipeBase(VPDefID, {}), VPValue(VPVID, Phi, this) {
+ if (Start)
+ addOperand(Start);
+ }
public:
- /// Create a new VPWidenPHIRecipe for the reduction \p Phi described by \p
- /// RdxDesc.
- VPWidenPHIRecipe(PHINode *Phi, RecurrenceDescriptor &RdxDesc, VPValue &Start)
- : VPWidenPHIRecipe(Phi) {
- this->RdxDesc = &RdxDesc;
+ /// Create a VPWidenPHIRecipe for \p Phi
+ VPWidenPHIRecipe(PHINode *Phi)
+ : VPWidenPHIRecipe(VPVWidenPHISC, VPWidenPHISC, Phi) {}
+
+ /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start.
+ VPWidenPHIRecipe(PHINode *Phi, VPValue &Start) : VPWidenPHIRecipe(Phi) {
addOperand(&Start);
}
- /// Create a VPWidenPHIRecipe for \p Phi
- VPWidenPHIRecipe(PHINode *Phi) : VPRecipeBase(VPWidenPHISC), Phi(Phi) {
- new VPValue(Phi, this);
- }
~VPWidenPHIRecipe() override = default;
/// Method to support type inquiry through isa, cast, and dyn_cast.
- static inline bool classof(const VPDef *D) {
- return D->getVPDefID() == VPRecipeBase::VPWidenPHISC;
+ static inline bool classof(const VPRecipeBase *B) {
+ return B->getVPDefID() == VPRecipeBase::VPWidenPHISC ||
+ B->getVPDefID() == VPRecipeBase::VPFirstOrderRecurrencePHISC ||
+ B->getVPDefID() == VPRecipeBase::VPReductionPHISC;
+ }
+ static inline bool classof(const VPValue *V) {
+ return V->getVPValueID() == VPValue::VPVWidenPHISC ||
+ V->getVPValueID() == VPValue::VPVFirstOrderRecurrencePHISC ||
+ V->getVPValueID() == VPValue::VPVReductionPHISC;
}
/// Generate the phi/select nodes.
void execute(VPTransformState &State) override;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
+#endif
- /// Returns the start value of the phi, if it is a reduction.
+ /// Returns the start value of the phi, if it is a reduction or first-order
+ /// recurrence.
VPValue *getStartValue() {
return getNumOperands() == 0 ? nullptr : getOperand(0);
}
+
+ /// Returns the incoming value from the loop backedge, if it is a reduction or
+ /// first-order recurrence.
+ VPValue *getBackedgeValue() {
+ return getOperand(1);
+ }
+
+ /// Returns the backedge value as a recipe. The backedge value is guaranteed
+ /// to be a recipe.
+ VPRecipeBase *getBackedgeRecipe() {
+ return cast<VPRecipeBase>(getBackedgeValue()->getDef());
+ }
+
+ /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi.
+ void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) {
+ addOperand(IncomingV);
+ IncomingBlocks.push_back(IncomingBlock);
+ }
+
+ /// Returns the \p I th incoming VPValue.
+ VPValue *getIncomingValue(unsigned I) { return getOperand(I); }
+
+ /// Returns the \p I th incoming VPBasicBlock.
+ VPBasicBlock *getIncomingBlock(unsigned I) { return IncomingBlocks[I]; }
+};
+
+/// A recipe for handling first-order recurrence phis. The start value is the
+/// first operand of the recipe and the incoming value from the backedge is the
+/// second operand.
+struct VPFirstOrderRecurrencePHIRecipe : public VPWidenPHIRecipe {
+ VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start)
+ : VPWidenPHIRecipe(VPVFirstOrderRecurrencePHISC,
+ VPFirstOrderRecurrencePHISC, Phi, &Start) {}
+
+ /// Method to support type inquiry through isa, cast, and dyn_cast.
+ static inline bool classof(const VPRecipeBase *R) {
+ return R->getVPDefID() == VPRecipeBase::VPFirstOrderRecurrencePHISC;
+ }
+ static inline bool classof(const VPWidenPHIRecipe *D) {
+ return D->getVPDefID() == VPRecipeBase::VPFirstOrderRecurrencePHISC;
+ }
+ static inline bool classof(const VPValue *V) {
+ return V->getVPValueID() == VPValue::VPVFirstOrderRecurrencePHISC;
+ }
+
+ void execute(VPTransformState &State) override;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+};
+
+/// A recipe for handling reduction phis. The start value is the first operand
+/// of the recipe and the incoming value from the backedge is the second
+/// operand.
+class VPReductionPHIRecipe : public VPWidenPHIRecipe {
+ /// Descriptor for the reduction.
+ RecurrenceDescriptor &RdxDesc;
+
+ /// The phi is part of an in-loop reduction.
+ bool IsInLoop;
+
+ /// The phi is part of an ordered reduction. Requires IsInLoop to be true.
+ bool IsOrdered;
+
+public:
+ /// Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p
+ /// RdxDesc.
+ VPReductionPHIRecipe(PHINode *Phi, RecurrenceDescriptor &RdxDesc,
+ VPValue &Start, bool IsInLoop = false,
+ bool IsOrdered = false)
+ : VPWidenPHIRecipe(VPVReductionPHISC, VPReductionPHISC, Phi, &Start),
+ RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered) {
+ assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
+ }
+
+ ~VPReductionPHIRecipe() override = default;
+
+ /// Method to support type inquiry through isa, cast, and dyn_cast.
+ static inline bool classof(const VPRecipeBase *R) {
+ return R->getVPDefID() == VPRecipeBase::VPReductionPHISC;
+ }
+ static inline bool classof(const VPValue *V) {
+ return V->getVPValueID() == VPValue::VPVReductionPHISC;
+ }
+ static inline bool classof(const VPWidenPHIRecipe *R) {
+ return R->getVPDefID() == VPRecipeBase::VPReductionPHISC;
+ }
+
+ /// Generate the phi/select nodes.
+ void execute(VPTransformState &State) override;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+
+ RecurrenceDescriptor &getRecurrenceDescriptor() { return RdxDesc; }
+
+ /// Returns true, if the phi is part of an ordered reduction.
+ bool isOrdered() const { return IsOrdered; }
+
+ /// Returns true, if the phi is part of an in-loop reduction.
+ bool isInLoop() const { return IsInLoop; }
};
/// A recipe for vectorizing a phi-node as a sequence of mask-based select
/// instructions.
-class VPBlendRecipe : public VPRecipeBase, public VPUser {
+class VPBlendRecipe : public VPRecipeBase, public VPValue {
PHINode *Phi;
public:
@@ -1028,8 +1227,8 @@ public:
/// respective masks, ordered [I0, M0, I1, M1, ...]. Note that a single value
/// might be incoming with a full mask for which there is no VPValue.
VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands)
- : VPRecipeBase(VPBlendSC), VPUser(Operands), Phi(Phi) {
- new VPValue(Phi, this);
+ : VPRecipeBase(VPBlendSC, Operands),
+ VPValue(VPValue::VPVBlendSC, Phi, this), Phi(Phi) {
assert(Operands.size() > 0 &&
((Operands.size() == 1) || (Operands.size() % 2 == 0)) &&
"Expected either a single incoming value or a positive even number "
@@ -1054,16 +1253,18 @@ public:
/// Generate the phi/select nodes.
void execute(VPTransformState &State) override;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
+#endif
};
/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
/// or stores into one wide load/store and shuffles. The first operand of a
/// VPInterleave recipe is the address, followed by the stored values, followed
/// by an optional mask.
-class VPInterleaveRecipe : public VPRecipeBase, public VPUser {
+class VPInterleaveRecipe : public VPRecipeBase {
const InterleaveGroup<Instruction> *IG;
bool HasMask = false;
@@ -1071,7 +1272,7 @@ class VPInterleaveRecipe : public VPRecipeBase, public VPUser {
public:
VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
ArrayRef<VPValue *> StoredValues, VPValue *Mask)
- : VPRecipeBase(VPInterleaveSC), VPUser(Addr), IG(IG) {
+ : VPRecipeBase(VPInterleaveSC, {Addr}), IG(IG) {
for (unsigned i = 0; i < IG->getFactor(); ++i)
if (Instruction *I = IG->getMember(i)) {
if (I->getType()->isVoidTy())
@@ -1117,9 +1318,11 @@ public:
/// Generate the wide load or store, and shuffles.
void execute(VPTransformState &State) override;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
+#endif
const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; }
};
@@ -1127,21 +1330,18 @@ public:
/// A recipe to represent inloop reduction operations, performing a reduction on
/// a vector operand into a scalar value, and adding the result to a chain.
/// The Operands are {ChainOp, VecOp, [Condition]}.
-class VPReductionRecipe : public VPRecipeBase, public VPUser, public VPValue {
+class VPReductionRecipe : public VPRecipeBase, public VPValue {
/// The recurrence decriptor for the reduction in question.
RecurrenceDescriptor *RdxDesc;
- /// Fast math flags to use for the resulting reduction operation.
- bool NoNaN;
/// Pointer to the TTI, needed to create the target reduction
const TargetTransformInfo *TTI;
public:
VPReductionRecipe(RecurrenceDescriptor *R, Instruction *I, VPValue *ChainOp,
- VPValue *VecOp, VPValue *CondOp, bool NoNaN,
+ VPValue *VecOp, VPValue *CondOp,
const TargetTransformInfo *TTI)
- : VPRecipeBase(VPRecipeBase::VPReductionSC), VPUser({ChainOp, VecOp}),
- VPValue(VPValue::VPVReductionSC, I, this), RdxDesc(R), NoNaN(NoNaN),
- TTI(TTI) {
+ : VPRecipeBase(VPRecipeBase::VPReductionSC, {ChainOp, VecOp}),
+ VPValue(VPValue::VPVReductionSC, I, this), RdxDesc(R), TTI(TTI) {
if (CondOp)
addOperand(CondOp);
}
@@ -1153,16 +1353,14 @@ public:
return V->getVPValueID() == VPValue::VPVReductionSC;
}
- static inline bool classof(const VPDef *D) {
- return D->getVPDefID() == VPRecipeBase::VPReductionSC;
- }
-
/// Generate the reduction in the loop
void execute(VPTransformState &State) override;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
+#endif
/// The VPValue of the scalar Chain being accumulated.
VPValue *getChainOp() const { return getOperand(0); }
@@ -1178,7 +1376,7 @@ public:
/// copies of the original scalar type, one per lane, instead of producing a
/// single copy of widened type for all lanes. If the instruction is known to be
/// uniform only one copy, per lane zero, will be generated.
-class VPReplicateRecipe : public VPRecipeBase, public VPUser, public VPValue {
+class VPReplicateRecipe : public VPRecipeBase, public VPValue {
/// Indicator if only a single replica per lane is needed.
bool IsUniform;
@@ -1192,9 +1390,8 @@ public:
template <typename IterT>
VPReplicateRecipe(Instruction *I, iterator_range<IterT> Operands,
bool IsUniform, bool IsPredicated = false)
- : VPRecipeBase(VPReplicateSC), VPUser(Operands),
- VPValue(VPVReplicateSC, I, this), IsUniform(IsUniform),
- IsPredicated(IsPredicated) {
+ : VPRecipeBase(VPReplicateSC, Operands), VPValue(VPVReplicateSC, I, this),
+ IsUniform(IsUniform), IsPredicated(IsPredicated) {
// Retain the previous behavior of predicateInstructions(), where an
// insert-element of a predicated instruction got hoisted into the
// predicated basic block iff it was its only user. This is achieved by
@@ -1221,17 +1418,24 @@ public:
void setAlsoPack(bool Pack) { AlsoPack = Pack; }
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
+#endif
bool isUniform() const { return IsUniform; }
+
+ bool isPacked() const { return AlsoPack; }
+
+ bool isPredicated() const { return IsPredicated; }
};
/// A recipe for generating conditional branches on the bits of a mask.
-class VPBranchOnMaskRecipe : public VPRecipeBase, public VPUser {
+class VPBranchOnMaskRecipe : public VPRecipeBase {
public:
- VPBranchOnMaskRecipe(VPValue *BlockInMask) : VPRecipeBase(VPBranchOnMaskSC) {
+ VPBranchOnMaskRecipe(VPValue *BlockInMask)
+ : VPRecipeBase(VPBranchOnMaskSC, {}) {
if (BlockInMask) // nullptr means all-one mask.
addOperand(BlockInMask);
}
@@ -1245,16 +1449,17 @@ public:
/// conditional branch.
void execute(VPTransformState &State) override;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override {
- O << " +\n" << Indent << "\"BRANCH-ON-MASK ";
+ O << Indent << "BRANCH-ON-MASK ";
if (VPValue *Mask = getMask())
Mask->printAsOperand(O, SlotTracker);
else
O << " All-One";
- O << "\\l\"";
}
+#endif
/// Return the mask used by this recipe. Note that a full mask is represented
/// by a nullptr.
@@ -1270,15 +1475,13 @@ public:
/// order to merge values that are set under such a branch and feed their uses.
/// The phi nodes can be scalar or vector depending on the users of the value.
/// This recipe works in concert with VPBranchOnMaskRecipe.
-class VPPredInstPHIRecipe : public VPRecipeBase, public VPUser {
-
+class VPPredInstPHIRecipe : public VPRecipeBase, public VPValue {
public:
/// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
/// nodes after merging back from a Branch-on-Mask.
VPPredInstPHIRecipe(VPValue *PredV)
- : VPRecipeBase(VPPredInstPHISC), VPUser(PredV) {
- new VPValue(PredV->getUnderlyingValue(), this);
- }
+ : VPRecipeBase(VPPredInstPHISC, PredV),
+ VPValue(VPValue::VPVPredInstPHI, nullptr, this) {}
~VPPredInstPHIRecipe() override = default;
/// Method to support type inquiry through isa, cast, and dyn_cast.
@@ -1289,9 +1492,11 @@ public:
/// Generates phi nodes for live-outs as needed to retain SSA form.
void execute(VPTransformState &State) override;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
+#endif
};
/// A Recipe for widening load/store operations.
@@ -1300,8 +1505,7 @@ public:
/// - For store: Address, stored value, optional mask
/// TODO: We currently execute only per-part unless a specific instance is
/// provided.
-class VPWidenMemoryInstructionRecipe : public VPRecipeBase,
- public VPUser {
+class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
Instruction &Ingredient;
void setMask(VPValue *Mask) {
@@ -1316,15 +1520,14 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase,
public:
VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask)
- : VPRecipeBase(VPWidenMemoryInstructionSC), VPUser({Addr}),
- Ingredient(Load) {
+ : VPRecipeBase(VPWidenMemoryInstructionSC, {Addr}), Ingredient(Load) {
new VPValue(VPValue::VPVMemoryInstructionSC, &Load, this);
setMask(Mask);
}
VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr,
VPValue *StoredValue, VPValue *Mask)
- : VPRecipeBase(VPWidenMemoryInstructionSC), VPUser({Addr, StoredValue}),
+ : VPRecipeBase(VPWidenMemoryInstructionSC, {Addr, StoredValue}),
Ingredient(Store) {
setMask(Mask);
}
@@ -1358,15 +1561,17 @@ public:
/// Generate the wide load/store.
void execute(VPTransformState &State) override;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
+#endif
};
/// A Recipe for widening the canonical induction variable of the vector loop.
class VPWidenCanonicalIVRecipe : public VPRecipeBase {
public:
- VPWidenCanonicalIVRecipe() : VPRecipeBase(VPWidenCanonicalIVSC) {
+ VPWidenCanonicalIVRecipe() : VPRecipeBase(VPWidenCanonicalIVSC, {}) {
new VPValue(nullptr, this);
}
@@ -1382,14 +1587,16 @@ public:
/// step = <VF*UF, VF*UF, ..., VF*UF>.
void execute(VPTransformState &State) override;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
+#endif
};
/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
/// holds a sequence of zero or more VPRecipe's each representing a sequence of
-/// output IR instructions.
+/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
class VPBasicBlock : public VPBlockBase {
public:
using RecipeListTy = iplist<VPRecipeBase>;
@@ -1405,7 +1612,10 @@ public:
appendRecipe(Recipe);
}
- ~VPBasicBlock() override { Recipes.clear(); }
+ ~VPBasicBlock() override {
+ while (!Recipes.empty())
+ Recipes.pop_back();
+ }
/// Instruction iterators...
using iterator = RecipeListTy::iterator;
@@ -1464,8 +1674,29 @@ public:
/// Return the position of the first non-phi node recipe in the block.
iterator getFirstNonPhi();
+ /// Returns an iterator range over the PHI-like recipes in the block.
+ iterator_range<iterator> phis() {
+ return make_range(begin(), getFirstNonPhi());
+ }
+
void dropAllReferences(VPValue *NewValue) override;
+ /// Split current block at \p SplitAt by inserting a new block between the
+ /// current block and its successors and moving all recipes starting at
+ /// SplitAt to the new block. Returns the new block.
+ VPBasicBlock *splitAt(iterator SplitAt);
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
+ /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
+ ///
+ /// Note that the numbering is applied to the whole VPlan, so printing
+ /// individual blocks is consistent with the whole VPlan printing.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+ using VPBlockBase::print; // Get the print(raw_stream &O) version.
+#endif
+
private:
/// Create an IR BasicBlock to hold the output instructions generated by this
/// VPBasicBlock, and return it. Update the CFGState accordingly.
@@ -1557,6 +1788,18 @@ public:
void execute(struct VPTransformState *State) override;
void dropAllReferences(VPValue *NewValue) override;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
+ /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
+ /// consequtive numbers.
+ ///
+ /// Note that the numbering is applied to the whole VPlan, so printing
+ /// individual regions is consistent with the whole VPlan printing.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+ using VPBlockBase::print; // Get the print(raw_stream &O) version.
+#endif
};
//===----------------------------------------------------------------------===//
@@ -1681,6 +1924,136 @@ struct GraphTraits<Inverse<VPRegionBlock *>>
}
};
+/// Iterator to traverse all successors of a VPBlockBase node. This includes the
+/// entry node of VPRegionBlocks. Exit blocks of a region implicitly have their
+/// parent region's successors. This ensures all blocks in a region are visited
+/// before any blocks in a successor region when doing a reverse post-order
+// traversal of the graph.
+template <typename BlockPtrTy>
+class VPAllSuccessorsIterator
+ : public iterator_facade_base<VPAllSuccessorsIterator<BlockPtrTy>,
+ std::forward_iterator_tag, VPBlockBase> {
+ BlockPtrTy Block;
+ /// Index of the current successor. For VPBasicBlock nodes, this simply is the
+ /// index for the successor array. For VPRegionBlock, SuccessorIdx == 0 is
+ /// used for the region's entry block, and SuccessorIdx - 1 are the indices
+ /// for the successor array.
+ size_t SuccessorIdx;
+
+ static BlockPtrTy getBlockWithSuccs(BlockPtrTy Current) {
+ while (Current && Current->getNumSuccessors() == 0)
+ Current = Current->getParent();
+ return Current;
+ }
+
+ /// Templated helper to dereference successor \p SuccIdx of \p Block. Used by
+ /// both the const and non-const operator* implementations.
+ template <typename T1> static T1 deref(T1 Block, unsigned SuccIdx) {
+ if (auto *R = dyn_cast<VPRegionBlock>(Block)) {
+ if (SuccIdx == 0)
+ return R->getEntry();
+ SuccIdx--;
+ }
+
+ // For exit blocks, use the next parent region with successors.
+ return getBlockWithSuccs(Block)->getSuccessors()[SuccIdx];
+ }
+
+public:
+ VPAllSuccessorsIterator(BlockPtrTy Block, size_t Idx = 0)
+ : Block(Block), SuccessorIdx(Idx) {}
+ VPAllSuccessorsIterator(const VPAllSuccessorsIterator &Other)
+ : Block(Other.Block), SuccessorIdx(Other.SuccessorIdx) {}
+
+ VPAllSuccessorsIterator &operator=(const VPAllSuccessorsIterator &R) {
+ Block = R.Block;
+ SuccessorIdx = R.SuccessorIdx;
+ return *this;
+ }
+
+ static VPAllSuccessorsIterator end(BlockPtrTy Block) {
+ BlockPtrTy ParentWithSuccs = getBlockWithSuccs(Block);
+ unsigned NumSuccessors = ParentWithSuccs
+ ? ParentWithSuccs->getNumSuccessors()
+ : Block->getNumSuccessors();
+
+ if (auto *R = dyn_cast<VPRegionBlock>(Block))
+ return {R, NumSuccessors + 1};
+ return {Block, NumSuccessors};
+ }
+
+ bool operator==(const VPAllSuccessorsIterator &R) const {
+ return Block == R.Block && SuccessorIdx == R.SuccessorIdx;
+ }
+
+ const VPBlockBase *operator*() const { return deref(Block, SuccessorIdx); }
+
+ BlockPtrTy operator*() { return deref(Block, SuccessorIdx); }
+
+ VPAllSuccessorsIterator &operator++() {
+ SuccessorIdx++;
+ return *this;
+ }
+
+ VPAllSuccessorsIterator operator++(int X) {
+ VPAllSuccessorsIterator Orig = *this;
+ SuccessorIdx++;
+ return Orig;
+ }
+};
+
+/// Helper for GraphTraits specialization that traverses through VPRegionBlocks.
+template <typename BlockTy> class VPBlockRecursiveTraversalWrapper {
+ BlockTy Entry;
+
+public:
+ VPBlockRecursiveTraversalWrapper(BlockTy Entry) : Entry(Entry) {}
+ BlockTy getEntry() { return Entry; }
+};
+
+/// GraphTraits specialization to recursively traverse VPBlockBase nodes,
+/// including traversing through VPRegionBlocks. Exit blocks of a region
+/// implicitly have their parent region's successors. This ensures all blocks in
+/// a region are visited before any blocks in a successor region when doing a
+/// reverse post-order traversal of the graph.
+template <>
+struct GraphTraits<VPBlockRecursiveTraversalWrapper<VPBlockBase *>> {
+ using NodeRef = VPBlockBase *;
+ using ChildIteratorType = VPAllSuccessorsIterator<VPBlockBase *>;
+
+ static NodeRef
+ getEntryNode(VPBlockRecursiveTraversalWrapper<VPBlockBase *> N) {
+ return N.getEntry();
+ }
+
+ static inline ChildIteratorType child_begin(NodeRef N) {
+ return ChildIteratorType(N);
+ }
+
+ static inline ChildIteratorType child_end(NodeRef N) {
+ return ChildIteratorType::end(N);
+ }
+};
+
+template <>
+struct GraphTraits<VPBlockRecursiveTraversalWrapper<const VPBlockBase *>> {
+ using NodeRef = const VPBlockBase *;
+ using ChildIteratorType = VPAllSuccessorsIterator<const VPBlockBase *>;
+
+ static NodeRef
+ getEntryNode(VPBlockRecursiveTraversalWrapper<const VPBlockBase *> N) {
+ return N.getEntry();
+ }
+
+ static inline ChildIteratorType child_begin(NodeRef N) {
+ return ChildIteratorType(N);
+ }
+
+ static inline ChildIteratorType child_end(NodeRef N) {
+ return ChildIteratorType::end(N);
+ }
+};
+
/// VPlan models a candidate for vectorization, encoding various decisions take
/// to produce efficient output IR, including which branches, basic-blocks and
/// output IR instructions to generate, and their cost. VPlan holds a
@@ -1704,7 +2077,7 @@ class VPlan {
// VPlan. External definitions must be immutable and hold a pointer to its
// underlying IR that will be used to implement its structural comparison
// (operators '==' and '<').
- SmallPtrSet<VPValue *, 16> VPExternalDefs;
+ SetVector<VPValue *> VPExternalDefs;
/// Represents the backedge taken count of the original loop, for folding
/// the tail.
@@ -1721,9 +2094,6 @@ class VPlan {
/// Holds the VPLoopInfo analysis for this VPlan.
VPLoopInfo VPLInfo;
- /// Holds the condition bit values built during VPInstruction to VPRecipe transformation.
- SmallVector<VPValue *, 4> VPCBVs;
-
public:
VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) {
if (Entry)
@@ -1744,8 +2114,6 @@ public:
delete BackedgeTakenCount;
for (VPValue *Def : VPExternalDefs)
delete Def;
- for (VPValue *CBV : VPCBVs)
- delete CBV;
}
/// Generate the IR code for this VPlan.
@@ -1777,14 +2145,7 @@ public:
/// Add \p VPVal to the pool of external definitions if it's not already
/// in the pool.
- void addExternalDef(VPValue *VPVal) {
- VPExternalDefs.insert(VPVal);
- }
-
- /// Add \p CBV to the vector of condition bit values.
- void addCBV(VPValue *CBV) {
- VPCBVs.push_back(CBV);
- }
+ void addExternalDef(VPValue *VPVal) { VPExternalDefs.insert(VPVal); }
void addVPValue(Value *V) {
assert(V && "Trying to add a null Value to VPlan");
@@ -1819,8 +2180,16 @@ public:
VPLoopInfo &getVPLoopInfo() { return VPLInfo; }
const VPLoopInfo &getVPLoopInfo() const { return VPLInfo; }
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print this VPlan to \p O.
+ void print(raw_ostream &O) const;
+
+ /// Print this VPlan in DOT format to \p O.
+ void printDOT(raw_ostream &O) const;
+
/// Dump the plan to stderr (for debugging).
- void dump() const;
+ LLVM_DUMP_METHOD void dump() const;
+#endif
/// Returns a range mapping the values the range \p Operands to their
/// corresponding VPValues.
@@ -1840,14 +2209,10 @@ private:
BasicBlock *LoopExitBB);
};
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// VPlanPrinter prints a given VPlan to a given output stream. The printing is
/// indented and follows the dot format.
class VPlanPrinter {
- friend inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan);
- friend inline raw_ostream &operator<<(raw_ostream &OS,
- const struct VPlanIngredient &I);
-
-private:
raw_ostream &OS;
const VPlan &Plan;
unsigned Depth = 0;
@@ -1858,9 +2223,6 @@ private:
VPSlotTracker SlotTracker;
- VPlanPrinter(raw_ostream &O, const VPlan &P)
- : OS(O), Plan(P), SlotTracker(&P) {}
-
/// Handle indentation.
void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }
@@ -1890,27 +2252,31 @@ private:
void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
const Twine &Label);
- void dump();
+public:
+ VPlanPrinter(raw_ostream &O, const VPlan &P)
+ : OS(O), Plan(P), SlotTracker(&P) {}
- static void printAsIngredient(raw_ostream &O, const Value *V);
+ LLVM_DUMP_METHOD void dump();
};
struct VPlanIngredient {
const Value *V;
VPlanIngredient(const Value *V) : V(V) {}
+
+ void print(raw_ostream &O) const;
};
inline raw_ostream &operator<<(raw_ostream &OS, const VPlanIngredient &I) {
- VPlanPrinter::printAsIngredient(OS, I.V);
+ I.print(OS);
return OS;
}
inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
- VPlanPrinter Printer(OS, Plan);
- Printer.dump();
+ Plan.print(OS);
return OS;
}
+#endif
//===----------------------------------------------------------------------===//
// VPlan Utilities
@@ -2011,6 +2377,26 @@ public:
}
return Count;
}
+
+ /// Return an iterator range over \p Range which only includes \p BlockTy
+ /// blocks. The accesses are casted to \p BlockTy.
+ template <typename BlockTy, typename T>
+ static auto blocksOnly(const T &Range) {
+ // Create BaseTy with correct const-ness based on BlockTy.
+ using BaseTy =
+ typename std::conditional<std::is_const<BlockTy>::value,
+ const VPBlockBase, VPBlockBase>::type;
+
+ // We need to first create an iterator range over (const) BlocktTy & instead
+ // of (const) BlockTy * for filter_range to work properly.
+ auto Mapped =
+ map_range(Range, [](BaseTy *Block) -> BaseTy & { return *Block; });
+ auto Filter = make_filter_range(
+ Mapped, [](BaseTy &Block) { return isa<BlockTy>(&Block); });
+ return map_range(Filter, [](BaseTy &Block) -> BlockTy * {
+ return cast<BlockTy>(&Block);
+ });
+ }
};
class VPInterleavedAccessInfo {
@@ -2126,8 +2512,10 @@ class VPlanSlp {
SmallPtrSetImpl<VPValue *> &Candidates,
VPInterleavedAccessInfo &IAI);
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print bundle \p Values to dbgs().
void dumpBundle(ArrayRef<VPValue *> Values);
+#endif
public:
VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}