diff options
Diffstat (limited to 'llvm/include/llvm/Analysis')
-rw-r--r-- | llvm/include/llvm/Analysis/AliasAnalysis.h | 2 | ||||
-rw-r--r-- | llvm/include/llvm/Analysis/AliasSetTracker.h | 244 | ||||
-rw-r--r-- | llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h | 5 | ||||
-rw-r--r-- | llvm/include/llvm/Analysis/DOTGraphTraitsPass.h | 28 | ||||
-rw-r--r-- | llvm/include/llvm/Analysis/RegionInfoImpl.h | 33 | ||||
-rw-r--r-- | llvm/include/llvm/Analysis/TargetTransformInfo.h | 10 | ||||
-rw-r--r-- | llvm/include/llvm/Analysis/TargetTransformInfoImpl.h | 9 | ||||
-rw-r--r-- | llvm/include/llvm/Analysis/VecFuncs.def | 146 | ||||
-rw-r--r-- | llvm/include/llvm/Analysis/VectorUtils.h | 187 |
9 files changed, 258 insertions, 406 deletions
diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h index e1cfb025fb65..d6f732d35fd4 100644 --- a/llvm/include/llvm/Analysis/AliasAnalysis.h +++ b/llvm/include/llvm/Analysis/AliasAnalysis.h @@ -154,6 +154,8 @@ struct CaptureInfo { /// Check whether Object is not captured before instruction I. If OrAt is /// true, captures by instruction I itself are also considered. + /// + /// If I is nullptr, then captures at any point will be considered. virtual bool isNotCapturedBefore(const Value *Object, const Instruction *I, bool OrAt) = 0; }; diff --git a/llvm/include/llvm/Analysis/AliasSetTracker.h b/llvm/include/llvm/Analysis/AliasSetTracker.h index 8afe455e2f08..3030cb30864a 100644 --- a/llvm/include/llvm/Analysis/AliasSetTracker.h +++ b/llvm/include/llvm/Analysis/AliasSetTracker.h @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // // This file defines two classes: AliasSetTracker and AliasSet. These interfaces -// are used to classify a collection of pointer references into a maximal number +// are used to classify a collection of memory locations into a maximal number // of disjoint sets. Each AliasSet object constructed by the AliasSetTracker // object refers to memory disjoint from the other sets. // @@ -19,7 +19,7 @@ #define LLVM_ANALYSIS_ALIASSETTRACKER_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" #include "llvm/Analysis/MemoryLocation.h" @@ -27,8 +27,6 @@ #include "llvm/IR/PassManager.h" #include "llvm/IR/ValueHandle.h" #include <cassert> -#include <cstddef> -#include <iterator> #include <vector> namespace llvm { @@ -49,99 +47,12 @@ class Value; class AliasSet : public ilist_node<AliasSet> { friend class AliasSetTracker; - class PointerRec { - Value *Val; // The pointer this record corresponds to. - PointerRec **PrevInList = nullptr; - PointerRec *NextInList = nullptr; - AliasSet *AS = nullptr; - LocationSize Size = LocationSize::mapEmpty(); - AAMDNodes AAInfo; - - // Whether the size for this record has been set at all. This makes no - // guarantees about the size being known. - bool isSizeSet() const { return Size != LocationSize::mapEmpty(); } - - public: - PointerRec(Value *V) - : Val(V), AAInfo(DenseMapInfo<AAMDNodes>::getEmptyKey()) {} - - Value *getValue() const { return Val; } - - PointerRec *getNext() const { return NextInList; } - bool hasAliasSet() const { return AS != nullptr; } - - PointerRec** setPrevInList(PointerRec **PIL) { - PrevInList = PIL; - return &NextInList; - } - - bool updateSizeAndAAInfo(LocationSize NewSize, const AAMDNodes &NewAAInfo) { - bool SizeChanged = false; - if (NewSize != Size) { - LocationSize OldSize = Size; - Size = isSizeSet() ? Size.unionWith(NewSize) : NewSize; - SizeChanged = OldSize != Size; - } - - if (AAInfo == DenseMapInfo<AAMDNodes>::getEmptyKey()) - // We don't have a AAInfo yet. Set it to NewAAInfo. - AAInfo = NewAAInfo; - else { - AAMDNodes Intersection(AAInfo.intersect(NewAAInfo)); - SizeChanged |= Intersection != AAInfo; - AAInfo = Intersection; - } - return SizeChanged; - } - - LocationSize getSize() const { - assert(isSizeSet() && "Getting an unset size!"); - return Size; - } - - /// Return the AAInfo, or null if there is no information or conflicting - /// information. - AAMDNodes getAAInfo() const { - // If we have missing or conflicting AAInfo, return null. - if (AAInfo == DenseMapInfo<AAMDNodes>::getEmptyKey() || - AAInfo == DenseMapInfo<AAMDNodes>::getTombstoneKey()) - return AAMDNodes(); - return AAInfo; - } - - AliasSet *getAliasSet(AliasSetTracker &AST) { - assert(AS && "No AliasSet yet!"); - if (AS->Forward) { - AliasSet *OldAS = AS; - AS = OldAS->getForwardedTarget(AST); - AS->addRef(); - OldAS->dropRef(AST); - } - return AS; - } - - void setAliasSet(AliasSet *as) { - assert(!AS && "Already have an alias set!"); - AS = as; - } - - void eraseFromList() { - if (NextInList) NextInList->PrevInList = PrevInList; - *PrevInList = NextInList; - if (AS->PtrListEnd == &NextInList) { - AS->PtrListEnd = PrevInList; - assert(*AS->PtrListEnd == nullptr && "List not terminated right!"); - } - delete this; - } - }; - - // Doubly linked list of nodes. - PointerRec *PtrList = nullptr; - PointerRec **PtrListEnd; // Forwarding pointer. AliasSet *Forward = nullptr; + /// Memory locations in this alias set. + SmallVector<MemoryLocation, 0> MemoryLocs; + /// All instructions without a specific address in this alias set. std::vector<AssertingVH<Instruction>> UnknownInsts; @@ -178,8 +89,6 @@ class AliasSet : public ilist_node<AliasSet> { }; unsigned Alias : 1; - unsigned SetSize = 0; - void addRef() { ++RefCount; } void dropRef(AliasSetTracker &AST) { @@ -205,95 +114,40 @@ public: /// Merge the specified alias set into this alias set. void mergeSetIn(AliasSet &AS, AliasSetTracker &AST, BatchAAResults &BatchAA); - // Alias Set iteration - Allow access to all of the pointers which are part of - // this alias set. - class iterator; - iterator begin() const { return iterator(PtrList); } - iterator end() const { return iterator(); } - bool empty() const { return PtrList == nullptr; } + // Alias Set iteration - Allow access to all of the memory locations which are + // part of this alias set. + using iterator = SmallVectorImpl<MemoryLocation>::const_iterator; + iterator begin() const { return MemoryLocs.begin(); } + iterator end() const { return MemoryLocs.end(); } - // Unfortunately, ilist::size() is linear, so we have to add code to keep - // track of the list's exact size. - unsigned size() { return SetSize; } + unsigned size() { return MemoryLocs.size(); } + + /// Retrieve the pointer values for the memory locations in this alias set. + /// The order matches that of the memory locations, but duplicate pointer + /// values are omitted. + using PointerVector = SmallVector<const Value *, 8>; + PointerVector getPointers() const; void print(raw_ostream &OS) const; void dump() const; - /// Define an iterator for alias sets... this is just a forward iterator. - class iterator { - PointerRec *CurNode; - - public: - using iterator_category = std::forward_iterator_tag; - using value_type = PointerRec; - using difference_type = std::ptrdiff_t; - using pointer = value_type *; - using reference = value_type &; - - explicit iterator(PointerRec *CN = nullptr) : CurNode(CN) {} - - bool operator==(const iterator& x) const { - return CurNode == x.CurNode; - } - bool operator!=(const iterator& x) const { return !operator==(x); } - - value_type &operator*() const { - assert(CurNode && "Dereferencing AliasSet.end()!"); - return *CurNode; - } - value_type *operator->() const { return &operator*(); } - - Value *getPointer() const { return CurNode->getValue(); } - LocationSize getSize() const { return CurNode->getSize(); } - AAMDNodes getAAInfo() const { return CurNode->getAAInfo(); } - - iterator& operator++() { // Preincrement - assert(CurNode && "Advancing past AliasSet.end()!"); - CurNode = CurNode->getNext(); - return *this; - } - iterator operator++(int) { // Postincrement - iterator tmp = *this; ++*this; return tmp; - } - }; - private: // Can only be created by AliasSetTracker. AliasSet() - : PtrListEnd(&PtrList), RefCount(0), AliasAny(false), Access(NoAccess), - Alias(SetMustAlias) {} - - PointerRec *getSomePointer() const { - return PtrList; - } - - /// Return the real alias set this represents. If this has been merged with - /// another set and is forwarding, return the ultimate destination set. This - /// also implements the union-find collapsing as well. - AliasSet *getForwardedTarget(AliasSetTracker &AST) { - if (!Forward) return this; - - AliasSet *Dest = Forward->getForwardedTarget(AST); - if (Dest != Forward) { - Dest->addRef(); - Forward->dropRef(AST); - Forward = Dest; - } - return Dest; - } + : RefCount(0), AliasAny(false), Access(NoAccess), Alias(SetMustAlias) {} void removeFromTracker(AliasSetTracker &AST); - void addPointer(AliasSetTracker &AST, PointerRec &Entry, LocationSize Size, - const AAMDNodes &AAInfo, bool KnownMustAlias = false, - bool SkipSizeUpdate = false); + void addMemoryLocation(AliasSetTracker &AST, const MemoryLocation &MemLoc, + bool KnownMustAlias = false); void addUnknownInst(Instruction *I, BatchAAResults &AA); public: - /// If the specified pointer "may" (or must) alias one of the members in the - /// set return the appropriate AliasResult. Otherwise return NoAlias. - AliasResult aliasesPointer(const Value *Ptr, LocationSize Size, - const AAMDNodes &AAInfo, BatchAAResults &AA) const; + /// If the specified memory location "may" (or must) alias one of the members + /// in the set return the appropriate AliasResult. Otherwise return NoAlias. + AliasResult aliasesMemoryLocation(const MemoryLocation &MemLoc, + BatchAAResults &AA) const; + ModRefInfo aliasesUnknownInst(const Instruction *Inst, BatchAAResults &AA) const; }; @@ -307,9 +161,10 @@ class AliasSetTracker { BatchAAResults &AA; ilist<AliasSet> AliasSets; - using PointerMapType = DenseMap<AssertingVH<Value>, AliasSet::PointerRec *>; + using PointerMapType = DenseMap<AssertingVH<const Value>, AliasSet *>; - // Map from pointers to their node + // Map from pointer values to the alias set holding one or more memory + // locations with that pointer value. PointerMapType PointerMap; public: @@ -327,9 +182,6 @@ public: /// 3. If the instruction aliases multiple sets, merge the sets, and add /// the instruction to the result. /// - /// These methods return true if inserting the instruction resulted in the - /// addition of a new alias set (i.e., the pointer did not alias anything). - /// void add(const MemoryLocation &Loc); void add(LoadInst *LI); void add(StoreInst *SI); @@ -370,31 +222,39 @@ public: private: friend class AliasSet; - // The total number of pointers contained in all "may" alias sets. - unsigned TotalMayAliasSetSize = 0; + // The total number of memory locations contained in all alias sets. + unsigned TotalAliasSetSize = 0; // A non-null value signifies this AST is saturated. A saturated AST lumps - // all pointers into a single "May" set. + // all elements into a single "May" set. AliasSet *AliasAnyAS = nullptr; void removeAliasSet(AliasSet *AS); - /// Just like operator[] on the map, except that it creates an entry for the - /// pointer if it doesn't already exist. - AliasSet::PointerRec &getEntryFor(Value *V) { - AliasSet::PointerRec *&Entry = PointerMap[V]; - if (!Entry) - Entry = new AliasSet::PointerRec(V); - return *Entry; + // Update an alias set field to point to its real destination. If the field is + // pointing to a set that has been merged with another set and is forwarding, + // the field is updated to point to the set obtained by following the + // forwarding links. The Forward fields of intermediate alias sets are + // collapsed as well, and alias set reference counts are updated to reflect + // the new situation. + void collapseForwardingIn(AliasSet *&AS) { + if (AS->Forward) { + collapseForwardingIn(AS->Forward); + // Swap out AS for AS->Forward, while updating reference counts. + AliasSet *NewAS = AS->Forward; + NewAS->addRef(); + AS->dropRef(*this); + AS = NewAS; + } } - AliasSet &addPointer(MemoryLocation Loc, AliasSet::AccessLattice E); - AliasSet *mergeAliasSetsForPointer(const Value *Ptr, LocationSize Size, - const AAMDNodes &AAInfo, - bool &MustAliasAll); + AliasSet &addMemoryLocation(MemoryLocation Loc, AliasSet::AccessLattice E); + AliasSet *mergeAliasSetsForMemoryLocation(const MemoryLocation &MemLoc, + AliasSet *PtrAS, + bool &MustAliasAll); - /// Merge all alias sets into a single set that is considered to alias any - /// pointer. + /// Merge all alias sets into a single set that is considered to alias + /// any memory location or instruction. AliasSet &mergeAllAliasSets(); AliasSet *findAliasSetForUnknownInst(Instruction *Inst); diff --git a/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h index 0c05be0b9b8c..8acb75e87254 100644 --- a/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h +++ b/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h @@ -1532,8 +1532,7 @@ void BlockFrequencyInfoImpl<BT>::findReachableBlocks( SmallPtrSet<const BlockT *, 8> InverseReachable; for (const BlockT &BB : *F) { // An exit block is a block without any successors - bool HasSucc = GraphTraits<const BlockT *>::child_begin(&BB) != - GraphTraits<const BlockT *>::child_end(&BB); + bool HasSucc = !llvm::children<const BlockT *>(&BB).empty(); if (!HasSucc && Reachable.count(&BB)) { Queue.push(&BB); InverseReachable.insert(&BB); @@ -1542,7 +1541,7 @@ void BlockFrequencyInfoImpl<BT>::findReachableBlocks( while (!Queue.empty()) { const BlockT *SrcBB = Queue.front(); Queue.pop(); - for (const BlockT *DstBB : children<Inverse<const BlockT *>>(SrcBB)) { + for (const BlockT *DstBB : inverse_children<const BlockT *>(SrcBB)) { auto EP = BPI->getEdgeProbability(DstBB, SrcBB); if (EP.isZero()) continue; diff --git a/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h b/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h index 07c08bc1cc3b..da72fb511f82 100644 --- a/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h +++ b/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h @@ -16,6 +16,9 @@ #include "llvm/Analysis/CFGPrinter.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/GraphWriter.h" +#include <unordered_set> + +static std::unordered_set<std::string> nameObj; namespace llvm { @@ -83,10 +86,28 @@ private: StringRef Name; }; +static inline void shortenFileName(std::string &FN, unsigned char len = 250) { + + FN = FN.substr(0, len); + + auto strLen = FN.length(); + while (strLen > 0) { + if (auto it = nameObj.find(FN); it != nameObj.end()) { + FN = FN.substr(0, --len); + } else { + nameObj.insert(FN); + break; + } + strLen--; + } +} + template <typename GraphT> void printGraphForFunction(Function &F, GraphT Graph, StringRef Name, bool IsSimple) { - std::string Filename = Name.str() + "." + F.getName().str() + ".dot"; + std::string Filename = Name.str() + "." + F.getName().str(); + shortenFileName(Filename); + Filename = Filename + ".dot"; std::error_code EC; errs() << "Writing '" << Filename << "'..."; @@ -272,6 +293,7 @@ public: bool runOnModule(Module &M) override { GraphT Graph = AnalysisGraphTraitsT::getGraph(&getAnalysis<AnalysisT>()); + shortenFileName(Name); std::string Filename = Name + ".dot"; std::error_code EC; @@ -301,7 +323,9 @@ private: template <typename GraphT> void WriteDOTGraphToFile(Function &F, GraphT &&Graph, std::string FileNamePrefix, bool IsSimple) { - std::string Filename = FileNamePrefix + "." + F.getName().str() + ".dot"; + std::string Filename = FileNamePrefix + "." + F.getName().str(); + shortenFileName(Filename); + Filename = Filename + ".dot"; std::error_code EC; errs() << "Writing '" << Filename << "'..."; diff --git a/llvm/include/llvm/Analysis/RegionInfoImpl.h b/llvm/include/llvm/Analysis/RegionInfoImpl.h index ec79b35ae324..c5e8821858fd 100644 --- a/llvm/include/llvm/Analysis/RegionInfoImpl.h +++ b/llvm/include/llvm/Analysis/RegionInfoImpl.h @@ -163,9 +163,7 @@ typename RegionBase<Tr>::BlockT *RegionBase<Tr>::getEnteringBlock() const { assert(!AllowRepeats && "Unexpected parameter value."); return DT->getNode(Pred) && !contains(Pred) ? Pred : nullptr; }; - BlockT *entry = getEntry(); - return find_singleton<BlockT>(make_range(InvBlockTraits::child_begin(entry), - InvBlockTraits::child_end(entry)), + return find_singleton<BlockT>(llvm::inverse_children<BlockT *>(getEntry()), isEnteringBlock); } @@ -177,10 +175,7 @@ bool RegionBase<Tr>::getExitingBlocks( if (!exit) return CoverAll; - for (PredIterTy PI = InvBlockTraits::child_begin(exit), - PE = InvBlockTraits::child_end(exit); - PI != PE; ++PI) { - BlockT *Pred = *PI; + for (BlockT *Pred : llvm::inverse_children<BlockT *>(exit)) { if (contains(Pred)) { Exitings.push_back(Pred); continue; @@ -202,8 +197,7 @@ typename RegionBase<Tr>::BlockT *RegionBase<Tr>::getExitingBlock() const { assert(!AllowRepeats && "Unexpected parameter value."); return contains(Pred) ? Pred : nullptr; }; - return find_singleton<BlockT>(make_range(InvBlockTraits::child_begin(exit), - InvBlockTraits::child_end(exit)), + return find_singleton<BlockT>(llvm::inverse_children<BlockT *>(exit), isContained); } @@ -244,16 +238,14 @@ void RegionBase<Tr>::verifyBBInRegion(BlockT *BB) const { BlockT *entry = getEntry(), *exit = getExit(); - for (BlockT *Succ : - make_range(BlockTraits::child_begin(BB), BlockTraits::child_end(BB))) { + for (BlockT *Succ : llvm::children<BlockT *>(BB)) { if (!contains(Succ) && exit != Succ) report_fatal_error("Broken region found: edges leaving the region must go " "to the exit node!"); } if (entry != BB) { - for (BlockT *Pred : make_range(InvBlockTraits::child_begin(BB), - InvBlockTraits::child_end(BB))) { + for (BlockT *Pred : llvm::inverse_children<BlockT *>(BB)) { // Allow predecessors that are unreachable, as these are ignored during // region analysis. if (!contains(Pred) && DT->isReachableFromEntry(Pred)) @@ -271,8 +263,7 @@ void RegionBase<Tr>::verifyWalk(BlockT *BB, std::set<BlockT *> *visited) const { verifyBBInRegion(BB); - for (BlockT *Succ : - make_range(BlockTraits::child_begin(BB), BlockTraits::child_end(BB))) { + for (BlockT *Succ : llvm::children<BlockT *>(BB)) { if (Succ != exit && visited->find(Succ) == visited->end()) verifyWalk(Succ, visited); } @@ -453,8 +444,7 @@ typename Tr::RegionT *RegionBase<Tr>::getExpandedRegion() const { RegionT *R = RI->getRegionFor(exit); if (R->getEntry() != exit) { - for (BlockT *Pred : make_range(InvBlockTraits::child_begin(getExit()), - InvBlockTraits::child_end(getExit()))) + for (BlockT *Pred : llvm::inverse_children<BlockT *>(getExit())) if (!contains(Pred)) return nullptr; if (Tr::getNumSuccessors(exit) == 1) @@ -465,8 +455,7 @@ typename Tr::RegionT *RegionBase<Tr>::getExpandedRegion() const { while (R->getParent() && R->getParent()->getEntry() == exit) R = R->getParent(); - for (BlockT *Pred : make_range(InvBlockTraits::child_begin(getExit()), - InvBlockTraits::child_end(getExit()))) { + for (BlockT *Pred : llvm::inverse_children<BlockT *>(getExit())) { if (!(contains(Pred) || R->contains(Pred))) return nullptr; } @@ -553,8 +542,7 @@ void RegionInfoBase<Tr>::verifyBBMap(const RegionT *R) const { template <class Tr> bool RegionInfoBase<Tr>::isCommonDomFrontier(BlockT *BB, BlockT *entry, BlockT *exit) const { - for (BlockT *P : make_range(InvBlockTraits::child_begin(BB), - InvBlockTraits::child_end(BB))) { + for (BlockT *P : llvm::inverse_children<BlockT *>(BB)) { if (DT->dominates(entry, P) && !DT->dominates(exit, P)) return false; } @@ -837,8 +825,7 @@ RegionInfoBase<Tr>::getMaxRegionExit(BlockT *BB) const { ExitR->getParent()->getEntry() == Exit) ExitR = ExitR->getParent(); - for (BlockT *Pred : make_range(InvBlockTraits::child_begin(Exit), - InvBlockTraits::child_end(Exit))) { + for (BlockT *Pred : llvm::inverse_children<BlockT *>(Exit)) { if (!R->contains(Pred) && !ExitR->contains(Pred)) break; } diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 9697278eaeae..3b615bc700bb 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -934,6 +934,12 @@ public: /// Should the Select Optimization pass be enabled and ran. bool enableSelectOptimize() const; + /// Should the Select Optimization pass treat the given instruction like a + /// select, potentially converting it to a conditional branch. This can + /// include select-like instructions like or(zext(c), x) that can be converted + /// to selects. + bool shouldTreatInstructionLikeSelect(const Instruction *I) const; + /// Enable matching of interleaved access groups. bool enableInterleavedAccessVectorization() const; @@ -1878,6 +1884,7 @@ public: virtual MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0; virtual bool enableSelectOptimize() = 0; + virtual bool shouldTreatInstructionLikeSelect(const Instruction *I) = 0; virtual bool enableInterleavedAccessVectorization() = 0; virtual bool enableMaskedInterleavedAccessVectorization() = 0; virtual bool isFPVectorizationPotentiallyUnsafe() = 0; @@ -2415,6 +2422,9 @@ public: bool enableSelectOptimize() override { return Impl.enableSelectOptimize(); } + bool shouldTreatInstructionLikeSelect(const Instruction *I) override { + return Impl.shouldTreatInstructionLikeSelect(I); + } bool enableInterleavedAccessVectorization() override { return Impl.enableInterleavedAccessVectorization(); } diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 60eab53fa2f6..9958b4daa6ed 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -378,6 +378,15 @@ public: bool enableSelectOptimize() const { return true; } + bool shouldTreatInstructionLikeSelect(const Instruction *I) { + // If the select is a logical-and/logical-or then it is better treated as a + // and/or by the backend. + using namespace llvm::PatternMatch; + return isa<SelectInst>(I) && + !match(I, m_CombineOr(m_LogicalAnd(m_Value(), m_Value()), + m_LogicalOr(m_Value(), m_Value()))); + } + bool enableInterleavedAccessVectorization() const { return false; } bool enableMaskedInterleavedAccessVectorization() const { return false; } diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def index b22bdd555cd4..f09e12f3038c 100644 --- a/llvm/include/llvm/Analysis/VecFuncs.def +++ b/llvm/include/llvm/Analysis/VecFuncs.def @@ -472,19 +472,33 @@ TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f16", FIXED(16), "_ZGV_LLVM_N16 TLI_DEFINE_VECFUNC("acos", "_ZGVnN2v_acos", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("acosh", "_ZGVnN2v_acosh", FIXED(2), "_ZGV_LLVM_N2v") + TLI_DEFINE_VECFUNC("asin", "_ZGVnN2v_asin", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("asinh", "_ZGVnN2v_asinh", FIXED(2), "_ZGV_LLVM_N2v") + TLI_DEFINE_VECFUNC("atan", "_ZGVnN2v_atan", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("atan2", "_ZGVnN2vv_atan2", FIXED(2), "_ZGV_LLVM_N2vv") TLI_DEFINE_VECFUNC("atanh", "_ZGVnN2v_atanh", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("cbrt", "_ZGVnN2v_cbrt", FIXED(2), "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("copysign", "_ZGVnN2vv_copysign", FIXED(2), "_ZGV_LLVM_N2vv") + TLI_DEFINE_VECFUNC("cos", "_ZGVnN2v_cos", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVnN2v_cos", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("cosh", "_ZGVnN2v_cosh", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("cospi", "_ZGVnN2v_cospi", FIXED(2), "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("erf", "_ZGVnN2v_erf", FIXED(2), "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("erfc", "_ZGVnN2v_erfc", FIXED(2), "_ZGV_LLVM_N2v") + TLI_DEFINE_VECFUNC("exp", "_ZGVnN2v_exp", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVnN2v_exp", FIXED(2), "_ZGV_LLVM_N2v") @@ -494,8 +508,24 @@ TLI_DEFINE_VECFUNC("llvm.exp10.f64", "_ZGVnN2v_exp10", FIXED(2), "_ZGV_LLVM_N2v" TLI_DEFINE_VECFUNC("exp2", "_ZGVnN2v_exp2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("llvm.exp2.f64", "_ZGVnN2v_exp2", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("expm1", "_ZGVnN2v_expm1", FIXED(2), "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("fdim", "_ZGVnN2vv_fdim", FIXED(2), "_ZGV_LLVM_N2vv") + +TLI_DEFINE_VECFUNC("fma", "_ZGVnN2vvv_fma", FIXED(2), "_ZGV_LLVM_N2vvv") + +TLI_DEFINE_VECFUNC("fmax", "_ZGVnN2vv_fmax", FIXED(2), "_ZGV_LLVM_N2vv") + +TLI_DEFINE_VECFUNC("fmin", "_ZGVnN2vv_fmin", FIXED(2), "_ZGV_LLVM_N2vv") + TLI_DEFINE_VECFUNC("fmod", "_ZGVnN2vv_fmod", FIXED(2), "_ZGV_LLVM_N2vv") +TLI_DEFINE_VECFUNC("hypot", "_ZGVnN2vv_hypot", FIXED(2), "_ZGV_LLVM_N2vv") + +TLI_DEFINE_VECFUNC("ilogb", "_ZGVnN2v_ilogb", FIXED(2), "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("ldexp", "_ZGVnN2vv_ldexp", FIXED(2), "_ZGV_LLVM_N2vv") + TLI_DEFINE_VECFUNC("lgamma", "_ZGVnN2v_lgamma", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("log", "_ZGVnN2v_log", FIXED(2), "_ZGV_LLVM_N2v") @@ -504,11 +534,15 @@ TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVnN2v_log", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("log10", "_ZGVnN2v_log10", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("llvm.log10.f64", "_ZGVnN2v_log10", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("log1p", "_ZGVnN2v_log1p", FIXED(2), "_ZGV_LLVM_N2v") + TLI_DEFINE_VECFUNC("log2", "_ZGVnN2v_log2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("llvm.log2.f64", "_ZGVnN2v_log2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("modf", "_ZGVnN2vl8_modf", FIXED(2), "_ZGV_LLVM_N2vl8") +TLI_DEFINE_VECFUNC("nextafter", "_ZGVnN2vv_nextafter", FIXED(2), "_ZGV_LLVM_N2vv") + TLI_DEFINE_VECFUNC("pow", "_ZGVnN2vv_pow", FIXED(2), "_ZGV_LLVM_N2vv") TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVnN2vv_pow", FIXED(2), "_ZGV_LLVM_N2vv") @@ -521,6 +555,8 @@ TLI_DEFINE_VECFUNC("sincospi", "_ZGVnN2vl8l8_sincospi", FIXED(2), "_ZGV_LLVM_N2v TLI_DEFINE_VECFUNC("sinh", "_ZGVnN2v_sinh", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("sinpi", "_ZGVnN2v_sinpi", FIXED(2), "_ZGV_LLVM_N2v") + TLI_DEFINE_VECFUNC("sqrt", "_ZGVnN2v_sqrt", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("tan", "_ZGVnN2v_tan", FIXED(2), "_ZGV_LLVM_N2v") @@ -533,19 +569,33 @@ TLI_DEFINE_VECFUNC("tgamma", "_ZGVnN2v_tgamma", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("acosf", "_ZGVnN4v_acosf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("acoshf", "_ZGVnN4v_acoshf", FIXED(4), "_ZGV_LLVM_N4v") + TLI_DEFINE_VECFUNC("asinf", "_ZGVnN4v_asinf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("asinhf", "_ZGVnN4v_asinhf", FIXED(4), "_ZGV_LLVM_N4v") + TLI_DEFINE_VECFUNC("atanf", "_ZGVnN4v_atanf", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("atan2f", "_ZGVnN4vv_atan2f", FIXED(4), "_ZGV_LLVM_N4vv") TLI_DEFINE_VECFUNC("atanhf", "_ZGVnN4v_atanhf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("cbrtf", "_ZGVnN4v_cbrtf", FIXED(4), "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("copysignf", "_ZGVnN4vv_copysignf", FIXED(4), "_ZGV_LLVM_N4vv") + TLI_DEFINE_VECFUNC("cosf", "_ZGVnN4v_cosf", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVnN4v_cosf", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("coshf", "_ZGVnN4v_coshf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("cospif", "_ZGVnN4v_cospif", FIXED(4), "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("erff", "_ZGVnN4v_erff", FIXED(4), "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("erfcf", "_ZGVnN4v_erfcf", FIXED(4), "_ZGV_LLVM_N4v") + TLI_DEFINE_VECFUNC("expf", "_ZGVnN4v_expf", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVnN4v_expf", FIXED(4), "_ZGV_LLVM_N4v") @@ -555,8 +605,24 @@ TLI_DEFINE_VECFUNC("llvm.exp10.f32", "_ZGVnN4v_exp10f", FIXED(4), "_ZGV_LLVM_N4v TLI_DEFINE_VECFUNC("exp2f", "_ZGVnN4v_exp2f", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("llvm.exp2.f32", "_ZGVnN4v_exp2f", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("expm1f", "_ZGVnN4v_expm1f", FIXED(4), "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("fdimf", "_ZGVnN4vv_fdimf", FIXED(4), "_ZGV_LLVM_N4vv") + +TLI_DEFINE_VECFUNC("fmaf", "_ZGVnN4vvv_fmaf", FIXED(4), "_ZGV_LLVM_N4vvv") + +TLI_DEFINE_VECFUNC("fmaxf", "_ZGVnN4vv_fmaxf", FIXED(4), "_ZGV_LLVM_N4vv") + +TLI_DEFINE_VECFUNC("fminf", "_ZGVnN4vv_fminf", FIXED(4), "_ZGV_LLVM_N4vv") + TLI_DEFINE_VECFUNC("fmodf", "_ZGVnN4vv_fmodf", FIXED(4), "_ZGV_LLVM_N4vv") +TLI_DEFINE_VECFUNC("hypotf", "_ZGVnN4vv_hypotf", FIXED(4), "_ZGV_LLVM_N4vv") + +TLI_DEFINE_VECFUNC("ilogbf", "_ZGVnN4v_ilogbf", FIXED(4), "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("ldexpf", "_ZGVnN4vv_ldexpf", FIXED(4), "_ZGV_LLVM_N4vv") + TLI_DEFINE_VECFUNC("lgammaf", "_ZGVnN4v_lgammaf", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("logf", "_ZGVnN4v_logf", FIXED(4), "_ZGV_LLVM_N4v") @@ -565,11 +631,15 @@ TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVnN4v_logf", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("log10f", "_ZGVnN4v_log10f", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("llvm.log10.f32", "_ZGVnN4v_log10f", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("log1pf", "_ZGVnN4v_log1pf", FIXED(4), "_ZGV_LLVM_N4v") + TLI_DEFINE_VECFUNC("log2f", "_ZGVnN4v_log2f", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("llvm.log2.f32", "_ZGVnN4v_log2f", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("modff", "_ZGVnN4vl4_modff", FIXED(4), "_ZGV_LLVM_N4vl4") +TLI_DEFINE_VECFUNC("nextafterf", "_ZGVnN4vv_nextafterf", FIXED(4), "_ZGV_LLVM_N4vv") + TLI_DEFINE_VECFUNC("powf", "_ZGVnN4vv_powf", FIXED(4), "_ZGV_LLVM_N4vv") TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVnN4vv_powf", FIXED(4), "_ZGV_LLVM_N4vv") @@ -582,6 +652,8 @@ TLI_DEFINE_VECFUNC("sincospif", "_ZGVnN4vl4l4_sincospif", FIXED(4), "_ZGV_LLVM_N TLI_DEFINE_VECFUNC("sinhf", "_ZGVnN4v_sinhf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("sinpif", "_ZGVnN4v_sinpif", FIXED(4), "_ZGV_LLVM_N4v") + TLI_DEFINE_VECFUNC("sqrtf", "_ZGVnN4v_sqrtf", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("tanf", "_ZGVnN4v_tanf", FIXED(4), "_ZGV_LLVM_N4v") @@ -595,9 +667,15 @@ TLI_DEFINE_VECFUNC("tgammaf", "_ZGVnN4v_tgammaf", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("acos", "_ZGVsMxv_acos", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("acosf", "_ZGVsMxv_acosf", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("acosh", "_ZGVsMxv_acosh", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("acoshf", "_ZGVsMxv_acoshf", SCALABLE(4), MASKED, "_ZGVsMxv") + TLI_DEFINE_VECFUNC("asin", "_ZGVsMxv_asin", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("asinf", "_ZGVsMxv_asinf", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("asinh", "_ZGVsMxv_asinh", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("asinhf", "_ZGVsMxv_asinhf", SCALABLE(4), MASKED, "_ZGVsMxv") + TLI_DEFINE_VECFUNC("atan", "_ZGVsMxv_atan", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("atanf", "_ZGVsMxv_atanf", SCALABLE(4), MASKED, "_ZGVsMxv") @@ -607,6 +685,12 @@ TLI_DEFINE_VECFUNC("atan2f", "_ZGVsMxvv_atan2f", SCALABLE(4), MASKED, "_ZGVsMxvv TLI_DEFINE_VECFUNC("atanh", "_ZGVsMxv_atanh", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("atanhf", "_ZGVsMxv_atanhf", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("cbrt", "_ZGVsMxv_cbrt", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("cbrtf", "_ZGVsMxv_cbrtf", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("copysign", "_ZGVsMxvv_copysign", SCALABLE(2), MASKED, "_ZGVsMxvv") +TLI_DEFINE_VECFUNC("copysignf", "_ZGVsMxvv_copysignf", SCALABLE(4), MASKED, "_ZGVsMxvv") + TLI_DEFINE_VECFUNC("cos", "_ZGVsMxv_cos", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("cosf", "_ZGVsMxv_cosf", SCALABLE(4), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVsMxv_cos", SCALABLE(2), MASKED, "_ZGVsMxv") @@ -615,6 +699,15 @@ TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVsMxv_cosf", SCALABLE(4), MASKED, "_ZGVsM TLI_DEFINE_VECFUNC("cosh", "_ZGVsMxv_cosh", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("coshf", "_ZGVsMxv_coshf", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("cospi", "_ZGVsMxv_cospi", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("cospif", "_ZGVsMxv_cospif", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("erf", "_ZGVsMxv_erf", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("erff", "_ZGVsMxv_erff", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("erfc", "_ZGVsMxv_erfc", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("erfcf", "_ZGVsMxv_erfcf", SCALABLE(4), MASKED, "_ZGVsMxv") + TLI_DEFINE_VECFUNC("exp", "_ZGVsMxv_exp", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("expf", "_ZGVsMxv_expf", SCALABLE(4), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVsMxv_exp", SCALABLE(2), MASKED, "_ZGVsMxv") @@ -630,9 +723,33 @@ TLI_DEFINE_VECFUNC("exp2f", "_ZGVsMxv_exp2f", SCALABLE(4), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.exp2.f64", "_ZGVsMxv_exp2", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.exp2.f32", "_ZGVsMxv_exp2f", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("expm1", "_ZGVsMxv_expm1", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("expm1f", "_ZGVsMxv_expm1f", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("fdim", "_ZGVsMxvv_fdim", SCALABLE(2), MASKED, "_ZGVsMxvv") +TLI_DEFINE_VECFUNC("fdimf", "_ZGVsMxvv_fdimf", SCALABLE(4), MASKED, "_ZGVsMxvv") + +TLI_DEFINE_VECFUNC("fma", "_ZGVsMxvvv_fma", SCALABLE(2), MASKED, "_ZGVsMxvvv") +TLI_DEFINE_VECFUNC("fmaf", "_ZGVsMxvvv_fmaf", SCALABLE(4), MASKED, "_ZGVsMxvvv") + +TLI_DEFINE_VECFUNC("fmax", "_ZGVsMxvv_fmax", SCALABLE(2), MASKED, "_ZGVsMxvv") +TLI_DEFINE_VECFUNC("fmaxf", "_ZGVsMxvv_fmaxf", SCALABLE(4), MASKED, "_ZGVsMxvv") + +TLI_DEFINE_VECFUNC("fmin", "_ZGVsMxvv_fmin", SCALABLE(2), MASKED, "_ZGVsMxvv") +TLI_DEFINE_VECFUNC("fminf", "_ZGVsMxvv_fminf", SCALABLE(4), MASKED, "_ZGVsMxvv") + TLI_DEFINE_VECFUNC("fmod", "_ZGVsMxvv_fmod", SCALABLE(2), MASKED, "_ZGVsMxvv") TLI_DEFINE_VECFUNC("fmodf", "_ZGVsMxvv_fmodf", SCALABLE(4), MASKED, "_ZGVsMxvv") +TLI_DEFINE_VECFUNC("hypot", "_ZGVsMxvv_hypot", SCALABLE(2), MASKED, "_ZGVsMxvv") +TLI_DEFINE_VECFUNC("hypotf", "_ZGVsMxvv_hypotf", SCALABLE(4), MASKED, "_ZGVsMxvv") + +TLI_DEFINE_VECFUNC("ilogb", "_ZGVsMxv_ilogb", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("ilogbf", "_ZGVsMxv_ilogbf", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("ldexp", "_ZGVsMxvv_ldexp", SCALABLE(2), MASKED, "_ZGVsMxvv") +TLI_DEFINE_VECFUNC("ldexpf", "_ZGVsMxvv_ldexpf", SCALABLE(4), MASKED, "_ZGVsMxvv") + TLI_DEFINE_VECFUNC("lgamma", "_ZGVsMxv_lgamma", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("lgammaf", "_ZGVsMxv_lgammaf", SCALABLE(4), MASKED, "_ZGVsMxv") @@ -646,6 +763,9 @@ TLI_DEFINE_VECFUNC("log10f", "_ZGVsMxv_log10f", SCALABLE(4), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.log10.f64", "_ZGVsMxv_log10", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.log10.f32", "_ZGVsMxv_log10f", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("log1p", "_ZGVsMxv_log1p", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("log1pf", "_ZGVsMxv_log1pf", SCALABLE(4), MASKED, "_ZGVsMxv") + TLI_DEFINE_VECFUNC("log2", "_ZGVsMxv_log2", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("log2f", "_ZGVsMxv_log2f", SCALABLE(4), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.log2.f64", "_ZGVsMxv_log2", SCALABLE(2), MASKED, "_ZGVsMxv") @@ -654,6 +774,9 @@ TLI_DEFINE_VECFUNC("llvm.log2.f32", "_ZGVsMxv_log2f", SCALABLE(4), MASKED, "_ZGV TLI_DEFINE_VECFUNC("modf", "_ZGVsMxvl8_modf", SCALABLE(2), MASKED, "_ZGVsMxvl8") TLI_DEFINE_VECFUNC("modff", "_ZGVsMxvl4_modff", SCALABLE(4), MASKED, "_ZGVsMxvl4") +TLI_DEFINE_VECFUNC("nextafter", "_ZGVsMxvv_nextafter", SCALABLE(2), MASKED, "_ZGVsMxvv") +TLI_DEFINE_VECFUNC("nextafterf", "_ZGVsMxvv_nextafterf", SCALABLE(4), MASKED, "_ZGVsMxvv") + TLI_DEFINE_VECFUNC("pow", "_ZGVsMxvv_pow", SCALABLE(2), MASKED, "_ZGVsMxvv") TLI_DEFINE_VECFUNC("powf", "_ZGVsMxvv_powf", SCALABLE(4), MASKED, "_ZGVsMxvv") TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVsMxvv_pow", SCALABLE(2), MASKED, "_ZGVsMxvv") @@ -673,6 +796,9 @@ TLI_DEFINE_VECFUNC("sincospif", "_ZGVsMxvl4l4_sincospif", SCALABLE(4), MASKED, " TLI_DEFINE_VECFUNC("sinh", "_ZGVsMxv_sinh", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("sinhf", "_ZGVsMxv_sinhf", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("sinpi", "_ZGVsMxv_sinpi", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("sinpif", "_ZGVsMxv_sinpif", SCALABLE(4), MASKED, "_ZGVsMxv") + TLI_DEFINE_VECFUNC("sqrt", "_ZGVsMxv_sqrt", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("sqrtf", "_ZGVsMxv_sqrtf", SCALABLE(4), MASKED, "_ZGVsMxv") @@ -747,6 +873,11 @@ TLI_DEFINE_VECFUNC("coshf", "armpl_vcoshq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v TLI_DEFINE_VECFUNC("cosh", "armpl_svcosh_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("coshf", "armpl_svcosh_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("cospi", "armpl_vcospiq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("cospif", "armpl_vcospiq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("cospi", "armpl_svcospi_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("cospif", "armpl_svcospi_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv") + TLI_DEFINE_VECFUNC("erf", "armpl_verfq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("erff", "armpl_verfq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("erf", "armpl_sverf_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") @@ -802,6 +933,11 @@ TLI_DEFINE_VECFUNC("fmaf", "armpl_vfmaq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vvv TLI_DEFINE_VECFUNC("fma", "armpl_svfma_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvvv") TLI_DEFINE_VECFUNC("fmaf", "armpl_svfma_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvvv") +TLI_DEFINE_VECFUNC("fmax", "armpl_vfmaxq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv") +TLI_DEFINE_VECFUNC("fmaxf", "armpl_vfmaxq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv") +TLI_DEFINE_VECFUNC("fmax", "armpl_svfmax_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvv") +TLI_DEFINE_VECFUNC("fmaxf", "armpl_svfmax_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvv") + TLI_DEFINE_VECFUNC("fmin", "armpl_vfminq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv") TLI_DEFINE_VECFUNC("fminf", "armpl_vfminq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv") TLI_DEFINE_VECFUNC("fmin", "armpl_svfmin_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvv") @@ -817,6 +953,16 @@ TLI_DEFINE_VECFUNC("hypotf", "armpl_vhypotq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N TLI_DEFINE_VECFUNC("hypot", "armpl_svhypot_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvv") TLI_DEFINE_VECFUNC("hypotf", "armpl_svhypot_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvv") +TLI_DEFINE_VECFUNC("ilogb", "armpl_vilogbq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("ilogbf", "armpl_vilogbq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("ilogb", "armpl_svilogb_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("ilogbf", "armpl_svilogb_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("ldexp", "armpl_vldexpq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv") +TLI_DEFINE_VECFUNC("ldexpf", "armpl_vldexpq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv") +TLI_DEFINE_VECFUNC("ldexp", "armpl_svldexp_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvv") +TLI_DEFINE_VECFUNC("ldexpf", "armpl_svldexp_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvv") + TLI_DEFINE_VECFUNC("lgamma", "armpl_vlgammaq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("lgammaf", "armpl_vlgammaq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("lgamma", "armpl_svlgamma_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index b1ecc8777c16..7a92e62b53c5 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -16,197 +16,12 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/IR/VFABIDemangler.h" #include "llvm/Support/CheckedArithmetic.h" namespace llvm { class TargetLibraryInfo; -/// Describes the type of Parameters -enum class VFParamKind { - Vector, // No semantic information. - OMP_Linear, // declare simd linear(i) - OMP_LinearRef, // declare simd linear(ref(i)) - OMP_LinearVal, // declare simd linear(val(i)) - OMP_LinearUVal, // declare simd linear(uval(i)) - OMP_LinearPos, // declare simd linear(i:c) uniform(c) - OMP_LinearValPos, // declare simd linear(val(i:c)) uniform(c) - OMP_LinearRefPos, // declare simd linear(ref(i:c)) uniform(c) - OMP_LinearUValPos, // declare simd linear(uval(i:c)) uniform(c) - OMP_Uniform, // declare simd uniform(i) - GlobalPredicate, // Global logical predicate that acts on all lanes - // of the input and output mask concurrently. For - // example, it is implied by the `M` token in the - // Vector Function ABI mangled name. - Unknown -}; - -/// Describes the type of Instruction Set Architecture -enum class VFISAKind { - AdvancedSIMD, // AArch64 Advanced SIMD (NEON) - SVE, // AArch64 Scalable Vector Extension - SSE, // x86 SSE - AVX, // x86 AVX - AVX2, // x86 AVX2 - AVX512, // x86 AVX512 - LLVM, // LLVM internal ISA for functions that are not - // attached to an existing ABI via name mangling. - Unknown // Unknown ISA -}; - -/// Encapsulates information needed to describe a parameter. -/// -/// The description of the parameter is not linked directly to -/// OpenMP or any other vector function description. This structure -/// is extendible to handle other paradigms that describe vector -/// functions and their parameters. -struct VFParameter { - unsigned ParamPos; // Parameter Position in Scalar Function. - VFParamKind ParamKind; // Kind of Parameter. - int LinearStepOrPos = 0; // Step or Position of the Parameter. - Align Alignment = Align(); // Optional alignment in bytes, defaulted to 1. - - // Comparison operator. - bool operator==(const VFParameter &Other) const { - return std::tie(ParamPos, ParamKind, LinearStepOrPos, Alignment) == - std::tie(Other.ParamPos, Other.ParamKind, Other.LinearStepOrPos, - Other.Alignment); - } -}; - -/// Contains the information about the kind of vectorization -/// available. -/// -/// This object in independent on the paradigm used to -/// represent vector functions. in particular, it is not attached to -/// any target-specific ABI. -struct VFShape { - ElementCount VF; // Vectorization factor. - SmallVector<VFParameter, 8> Parameters; // List of parameter information. - // Comparison operator. - bool operator==(const VFShape &Other) const { - return std::tie(VF, Parameters) == std::tie(Other.VF, Other.Parameters); - } - - /// Update the parameter in position P.ParamPos to P. - void updateParam(VFParameter P) { - assert(P.ParamPos < Parameters.size() && "Invalid parameter position."); - Parameters[P.ParamPos] = P; - assert(hasValidParameterList() && "Invalid parameter list"); - } - - /// Retrieve the VFShape that can be used to map a scalar function to itself, - /// with VF = 1. - static VFShape getScalarShape(const FunctionType *FTy) { - return VFShape::get(FTy, ElementCount::getFixed(1), - /*HasGlobalPredicate*/ false); - } - - /// Retrieve the basic vectorization shape of the function, where all - /// parameters are mapped to VFParamKind::Vector with \p EC lanes. Specifies - /// whether the function has a Global Predicate argument via \p HasGlobalPred. - static VFShape get(const FunctionType *FTy, ElementCount EC, - bool HasGlobalPred) { - SmallVector<VFParameter, 8> Parameters; - for (unsigned I = 0; I < FTy->getNumParams(); ++I) - Parameters.push_back(VFParameter({I, VFParamKind::Vector})); - if (HasGlobalPred) - Parameters.push_back( - VFParameter({FTy->getNumParams(), VFParamKind::GlobalPredicate})); - - return {EC, Parameters}; - } - /// Validation check on the Parameters in the VFShape. - bool hasValidParameterList() const; -}; - -/// Holds the VFShape for a specific scalar to vector function mapping. -struct VFInfo { - VFShape Shape; /// Classification of the vector function. - std::string ScalarName; /// Scalar Function Name. - std::string VectorName; /// Vector Function Name associated to this VFInfo. - VFISAKind ISA; /// Instruction Set Architecture. - - /// Returns the index of the first parameter with the kind 'GlobalPredicate', - /// if any exist. - std::optional<unsigned> getParamIndexForOptionalMask() const { - unsigned ParamCount = Shape.Parameters.size(); - for (unsigned i = 0; i < ParamCount; ++i) - if (Shape.Parameters[i].ParamKind == VFParamKind::GlobalPredicate) - return i; - - return std::nullopt; - } - - /// Returns true if at least one of the operands to the vectorized function - /// has the kind 'GlobalPredicate'. - bool isMasked() const { return getParamIndexForOptionalMask().has_value(); } -}; - -namespace VFABI { -/// LLVM Internal VFABI ISA token for vector functions. -static constexpr char const *_LLVM_ = "_LLVM_"; -/// Prefix for internal name redirection for vector function that -/// tells the compiler to scalarize the call using the scalar name -/// of the function. For example, a mangled name like -/// `_ZGV_LLVM_N2v_foo(_LLVM_Scalarize_foo)` would tell the -/// vectorizer to vectorize the scalar call `foo`, and to scalarize -/// it once vectorization is done. -static constexpr char const *_LLVM_Scalarize_ = "_LLVM_Scalarize_"; - -/// Function to construct a VFInfo out of a mangled names in the -/// following format: -/// -/// <VFABI_name>{(<redirection>)} -/// -/// where <VFABI_name> is the name of the vector function, mangled according -/// to the rules described in the Vector Function ABI of the target vector -/// extension (or <isa> from now on). The <VFABI_name> is in the following -/// format: -/// -/// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)] -/// -/// This methods support demangling rules for the following <isa>: -/// -/// * AArch64: https://developer.arm.com/docs/101129/latest -/// -/// * x86 (libmvec): https://sourceware.org/glibc/wiki/libmvec and -/// https://sourceware.org/glibc/wiki/libmvec?action=AttachFile&do=view&target=VectorABI.txt -/// -/// \param MangledName -> input string in the format -/// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)]. -/// \param FTy -> FunctionType of the scalar function which we're trying to find -/// a vectorized variant for. This is required to determine the vectorization -/// factor for scalable vectors, since the mangled name doesn't encode that; -/// it needs to be derived from the widest element types of vector arguments -/// or return values. -std::optional<VFInfo> tryDemangleForVFABI(StringRef MangledName, - const FunctionType *FTy); - -/// Retrieve the `VFParamKind` from a string token. -VFParamKind getVFParamKindFromString(const StringRef Token); - -// Name of the attribute where the variant mappings are stored. -static constexpr char const *MappingsAttrName = "vector-function-abi-variant"; - -/// Populates a set of strings representing the Vector Function ABI variants -/// associated to the CallInst CI. If the CI does not contain the -/// vector-function-abi-variant attribute, we return without populating -/// VariantMappings, i.e. callers of getVectorVariantNames need not check for -/// the presence of the attribute (see InjectTLIMappings). -void getVectorVariantNames(const CallInst &CI, - SmallVectorImpl<std::string> &VariantMappings); - -/// Constructs a FunctionType by applying vector function information to the -/// type of a matching scalar function. -/// \param Info gets the vectorization factor (VF) and the VFParamKind of the -/// parameters. -/// \param ScalarFTy gets the Type information of parameters, as it is not -/// stored in \p Info. -/// \returns a pointer to a newly created vector FunctionType -FunctionType *createFunctionType(const VFInfo &Info, - const FunctionType *ScalarFTy); -} // end namespace VFABI - /// The Vector Function Database. /// /// Helper class used to find the vector functions associated to a |