diff options
Diffstat (limited to 'lib/Transforms/Scalar/NewGVN.cpp')
-rw-r--r-- | lib/Transforms/Scalar/NewGVN.cpp | 2750 |
1 files changed, 1970 insertions, 780 deletions
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp index 57e6e3ddad94..3d8ce888867e 100644 --- a/lib/Transforms/Scalar/NewGVN.cpp +++ b/lib/Transforms/Scalar/NewGVN.cpp @@ -17,6 +17,27 @@ /// "A Sparse Algorithm for Predicated Global Value Numbering" from /// Karthik Gargi. /// +/// A brief overview of the algorithm: The algorithm is essentially the same as +/// the standard RPO value numbering algorithm (a good reference is the paper +/// "SCC based value numbering" by L. Taylor Simpson) with one major difference: +/// The RPO algorithm proceeds, on every iteration, to process every reachable +/// block and every instruction in that block. This is because the standard RPO +/// algorithm does not track what things have the same value number, it only +/// tracks what the value number of a given operation is (the mapping is +/// operation -> value number). Thus, when a value number of an operation +/// changes, it must reprocess everything to ensure all uses of a value number +/// get updated properly. In constrast, the sparse algorithm we use *also* +/// tracks what operations have a given value number (IE it also tracks the +/// reverse mapping from value number -> operations with that value number), so +/// that it only needs to reprocess the instructions that are affected when +/// something's value number changes. The rest of the algorithm is devoted to +/// performing symbolic evaluation, forward propagation, and simplification of +/// operations based on the value numbers deduced so far. +/// +/// We also do not perform elimination by using any published algorithm. All +/// published algorithms are O(Instructions). Instead, we use a technique that +/// is O(number of operations with the same value number), enabling us to skip +/// trying to eliminate things that have unique value numbers. //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/NewGVN.h" @@ -40,13 +61,10 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" -#include "llvm/Analysis/PHITransAddr.h" +#include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/GlobalVariable.h" @@ -55,24 +73,25 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/IR/PredIteratorCache.h" #include "llvm/IR/Type.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/DebugCounter.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVNExpression.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/MemorySSA.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Transforms/Utils/PredicateInfo.h" +#include "llvm/Transforms/Utils/VNCoercion.h" +#include <numeric> #include <unordered_map> #include <utility> #include <vector> using namespace llvm; using namespace PatternMatch; using namespace llvm::GVNExpression; - +using namespace llvm::VNCoercion; #define DEBUG_TYPE "newgvn" STATISTIC(NumGVNInstrDeleted, "Number of instructions deleted"); @@ -87,6 +106,15 @@ STATISTIC(NumGVNAvoidedSortedLeaderChanges, "Number of avoided sorted leader changes"); STATISTIC(NumGVNNotMostDominatingLeader, "Number of times a member dominated it's new classes' leader"); +STATISTIC(NumGVNDeadStores, "Number of redundant/dead stores eliminated"); +DEBUG_COUNTER(VNCounter, "newgvn-vn", + "Controls which instructions are value numbered") + +// Currently store defining access refinement is too slow due to basicaa being +// egregiously slow. This flag lets us keep it working while we work on this +// issue. +static cl::opt<bool> EnableStoreRefinement("enable-store-refinement", + cl::init(false), cl::Hidden); //===----------------------------------------------------------------------===// // GVN Pass @@ -105,6 +133,77 @@ PHIExpression::~PHIExpression() = default; } } +// Tarjan's SCC finding algorithm with Nuutila's improvements +// SCCIterator is actually fairly complex for the simple thing we want. +// It also wants to hand us SCC's that are unrelated to the phi node we ask +// about, and have us process them there or risk redoing work. +// Graph traits over a filter iterator also doesn't work that well here. +// This SCC finder is specialized to walk use-def chains, and only follows instructions, +// not generic values (arguments, etc). +struct TarjanSCC { + + TarjanSCC() : Components(1) {} + + void Start(const Instruction *Start) { + if (Root.lookup(Start) == 0) + FindSCC(Start); + } + + const SmallPtrSetImpl<const Value *> &getComponentFor(const Value *V) const { + unsigned ComponentID = ValueToComponent.lookup(V); + + assert(ComponentID > 0 && + "Asking for a component for a value we never processed"); + return Components[ComponentID]; + } + +private: + void FindSCC(const Instruction *I) { + Root[I] = ++DFSNum; + // Store the DFS Number we had before it possibly gets incremented. + unsigned int OurDFS = DFSNum; + for (auto &Op : I->operands()) { + if (auto *InstOp = dyn_cast<Instruction>(Op)) { + if (Root.lookup(Op) == 0) + FindSCC(InstOp); + if (!InComponent.count(Op)) + Root[I] = std::min(Root.lookup(I), Root.lookup(Op)); + } + } + // See if we really were the root of a component, by seeing if we still have our DFSNumber. + // If we do, we are the root of the component, and we have completed a component. If we do not, + // we are not the root of a component, and belong on the component stack. + if (Root.lookup(I) == OurDFS) { + unsigned ComponentID = Components.size(); + Components.resize(Components.size() + 1); + auto &Component = Components.back(); + Component.insert(I); + DEBUG(dbgs() << "Component root is " << *I << "\n"); + InComponent.insert(I); + ValueToComponent[I] = ComponentID; + // Pop a component off the stack and label it. + while (!Stack.empty() && Root.lookup(Stack.back()) >= OurDFS) { + auto *Member = Stack.back(); + DEBUG(dbgs() << "Component member is " << *Member << "\n"); + Component.insert(Member); + InComponent.insert(Member); + ValueToComponent[Member] = ComponentID; + Stack.pop_back(); + } + } else { + // Part of a component, push to stack + Stack.push_back(I); + } + } + unsigned int DFSNum = 1; + SmallPtrSet<const Value *, 8> InComponent; + DenseMap<const Value *, unsigned int> Root; + SmallVector<const Value *, 8> Stack; + // Store the components as vector of ptr sets, because we need the topo order + // of SCC's, but not individual member order + SmallVector<SmallPtrSet<const Value *, 8>, 8> Components; + DenseMap<const Value *, unsigned> ValueToComponent; +}; // Congruence classes represent the set of expressions/instructions // that are all the same *during some scope in the function*. // That is, because of the way we perform equality propagation, and @@ -115,43 +214,152 @@ PHIExpression::~PHIExpression() = default; // For any Value in the Member set, it is valid to replace any dominated member // with that Value. // -// Every congruence class has a leader, and the leader is used to -// symbolize instructions in a canonical way (IE every operand of an -// instruction that is a member of the same congruence class will -// always be replaced with leader during symbolization). -// To simplify symbolization, we keep the leader as a constant if class can be -// proved to be a constant value. -// Otherwise, the leader is a randomly chosen member of the value set, it does -// not matter which one is chosen. -// Each congruence class also has a defining expression, -// though the expression may be null. If it exists, it can be used for forward -// propagation and reassociation of values. -// -struct CongruenceClass { - using MemberSet = SmallPtrSet<Value *, 4>; +// Every congruence class has a leader, and the leader is used to symbolize +// instructions in a canonical way (IE every operand of an instruction that is a +// member of the same congruence class will always be replaced with leader +// during symbolization). To simplify symbolization, we keep the leader as a +// constant if class can be proved to be a constant value. Otherwise, the +// leader is the member of the value set with the smallest DFS number. Each +// congruence class also has a defining expression, though the expression may be +// null. If it exists, it can be used for forward propagation and reassociation +// of values. + +// For memory, we also track a representative MemoryAccess, and a set of memory +// members for MemoryPhis (which have no real instructions). Note that for +// memory, it seems tempting to try to split the memory members into a +// MemoryCongruenceClass or something. Unfortunately, this does not work +// easily. The value numbering of a given memory expression depends on the +// leader of the memory congruence class, and the leader of memory congruence +// class depends on the value numbering of a given memory expression. This +// leads to wasted propagation, and in some cases, missed optimization. For +// example: If we had value numbered two stores together before, but now do not, +// we move them to a new value congruence class. This in turn will move at one +// of the memorydefs to a new memory congruence class. Which in turn, affects +// the value numbering of the stores we just value numbered (because the memory +// congruence class is part of the value number). So while theoretically +// possible to split them up, it turns out to be *incredibly* complicated to get +// it to work right, because of the interdependency. While structurally +// slightly messier, it is algorithmically much simpler and faster to do what we +// do here, and track them both at once in the same class. +// Note: The default iterators for this class iterate over values +class CongruenceClass { +public: + using MemberType = Value; + using MemberSet = SmallPtrSet<MemberType *, 4>; + using MemoryMemberType = MemoryPhi; + using MemoryMemberSet = SmallPtrSet<const MemoryMemberType *, 2>; + + explicit CongruenceClass(unsigned ID) : ID(ID) {} + CongruenceClass(unsigned ID, Value *Leader, const Expression *E) + : ID(ID), RepLeader(Leader), DefiningExpr(E) {} + unsigned getID() const { return ID; } + // True if this class has no members left. This is mainly used for assertion + // purposes, and for skipping empty classes. + bool isDead() const { + // If it's both dead from a value perspective, and dead from a memory + // perspective, it's really dead. + return empty() && memory_empty(); + } + // Leader functions + Value *getLeader() const { return RepLeader; } + void setLeader(Value *Leader) { RepLeader = Leader; } + const std::pair<Value *, unsigned int> &getNextLeader() const { + return NextLeader; + } + void resetNextLeader() { NextLeader = {nullptr, ~0}; } + + void addPossibleNextLeader(std::pair<Value *, unsigned int> LeaderPair) { + if (LeaderPair.second < NextLeader.second) + NextLeader = LeaderPair; + } + + Value *getStoredValue() const { return RepStoredValue; } + void setStoredValue(Value *Leader) { RepStoredValue = Leader; } + const MemoryAccess *getMemoryLeader() const { return RepMemoryAccess; } + void setMemoryLeader(const MemoryAccess *Leader) { RepMemoryAccess = Leader; } + + // Forward propagation info + const Expression *getDefiningExpr() const { return DefiningExpr; } + void setDefiningExpr(const Expression *E) { DefiningExpr = E; } + + // Value member set + bool empty() const { return Members.empty(); } + unsigned size() const { return Members.size(); } + MemberSet::const_iterator begin() const { return Members.begin(); } + MemberSet::const_iterator end() const { return Members.end(); } + void insert(MemberType *M) { Members.insert(M); } + void erase(MemberType *M) { Members.erase(M); } + void swap(MemberSet &Other) { Members.swap(Other); } + + // Memory member set + bool memory_empty() const { return MemoryMembers.empty(); } + unsigned memory_size() const { return MemoryMembers.size(); } + MemoryMemberSet::const_iterator memory_begin() const { + return MemoryMembers.begin(); + } + MemoryMemberSet::const_iterator memory_end() const { + return MemoryMembers.end(); + } + iterator_range<MemoryMemberSet::const_iterator> memory() const { + return make_range(memory_begin(), memory_end()); + } + void memory_insert(const MemoryMemberType *M) { MemoryMembers.insert(M); } + void memory_erase(const MemoryMemberType *M) { MemoryMembers.erase(M); } + + // Store count + unsigned getStoreCount() const { return StoreCount; } + void incStoreCount() { ++StoreCount; } + void decStoreCount() { + assert(StoreCount != 0 && "Store count went negative"); + --StoreCount; + } + + // Return true if two congruence classes are equivalent to each other. This + // means + // that every field but the ID number and the dead field are equivalent. + bool isEquivalentTo(const CongruenceClass *Other) const { + if (!Other) + return false; + if (this == Other) + return true; + + if (std::tie(StoreCount, RepLeader, RepStoredValue, RepMemoryAccess) != + std::tie(Other->StoreCount, Other->RepLeader, Other->RepStoredValue, + Other->RepMemoryAccess)) + return false; + if (DefiningExpr != Other->DefiningExpr) + if (!DefiningExpr || !Other->DefiningExpr || + *DefiningExpr != *Other->DefiningExpr) + return false; + // We need some ordered set + std::set<Value *> AMembers(Members.begin(), Members.end()); + std::set<Value *> BMembers(Members.begin(), Members.end()); + return AMembers == BMembers; + } + +private: unsigned ID; // Representative leader. Value *RepLeader = nullptr; + // The most dominating leader after our current leader, because the member set + // is not sorted and is expensive to keep sorted all the time. + std::pair<Value *, unsigned int> NextLeader = {nullptr, ~0U}; + // If this is represented by a store, the value of the store. + Value *RepStoredValue = nullptr; + // If this class contains MemoryDefs or MemoryPhis, this is the leading memory + // access. + const MemoryAccess *RepMemoryAccess = nullptr; // Defining Expression. const Expression *DefiningExpr = nullptr; // Actual members of this class. MemberSet Members; - - // True if this class has no members left. This is mainly used for assertion - // purposes, and for skipping empty classes. - bool Dead = false; - + // This is the set of MemoryPhis that exist in the class. MemoryDefs and + // MemoryUses have real instructions representing them, so we only need to + // track MemoryPhis here. + MemoryMemberSet MemoryMembers; // Number of stores in this congruence class. // This is used so we can detect store equivalence changes properly. int StoreCount = 0; - - // The most dominating leader after our current leader, because the member set - // is not sorted and is expensive to keep sorted all the time. - std::pair<Value *, unsigned int> NextLeader = {nullptr, ~0U}; - - explicit CongruenceClass(unsigned ID) : ID(ID) {} - CongruenceClass(unsigned ID, Value *Leader, const Expression *E) - : ID(ID), RepLeader(Leader), DefiningExpr(E) {} }; namespace llvm { @@ -180,19 +388,34 @@ template <> struct DenseMapInfo<const Expression *> { }; } // end namespace llvm -class NewGVN : public FunctionPass { +namespace { +class NewGVN { + Function &F; DominatorTree *DT; - const DataLayout *DL; - const TargetLibraryInfo *TLI; AssumptionCache *AC; + const TargetLibraryInfo *TLI; AliasAnalysis *AA; MemorySSA *MSSA; MemorySSAWalker *MSSAWalker; + const DataLayout &DL; + std::unique_ptr<PredicateInfo> PredInfo; BumpPtrAllocator ExpressionAllocator; ArrayRecycler<Value *> ArgRecycler; + TarjanSCC SCCFinder; + + // Number of function arguments, used by ranking + unsigned int NumFuncArgs; + + // RPOOrdering of basic blocks + DenseMap<const DomTreeNode *, unsigned> RPOOrdering; // Congruence class info. - CongruenceClass *InitialClass; + + // This class is called INITIAL in the paper. It is the class everything + // startsout in, and represents any value. Being an optimistic analysis, + // anything in the TOP class has the value TOP, which is indeterminate and + // equivalent to everything. + CongruenceClass *TOPClass; std::vector<CongruenceClass *> CongruenceClasses; unsigned NextCongruenceNum; @@ -200,13 +423,38 @@ class NewGVN : public FunctionPass { DenseMap<Value *, CongruenceClass *> ValueToClass; DenseMap<Value *, const Expression *> ValueToExpression; + // Mapping from predicate info we used to the instructions we used it with. + // In order to correctly ensure propagation, we must keep track of what + // comparisons we used, so that when the values of the comparisons change, we + // propagate the information to the places we used the comparison. + DenseMap<const Value *, SmallPtrSet<Instruction *, 2>> PredicateToUsers; + // Mapping from MemoryAccess we used to the MemoryAccess we used it with. Has + // the same reasoning as PredicateToUsers. When we skip MemoryAccesses for + // stores, we no longer can rely solely on the def-use chains of MemorySSA. + DenseMap<const MemoryAccess *, SmallPtrSet<MemoryAccess *, 2>> MemoryToUsers; + // A table storing which memorydefs/phis represent a memory state provably // equivalent to another memory state. // We could use the congruence class machinery, but the MemoryAccess's are // abstract memory states, so they can only ever be equivalent to each other, // and not to constants, etc. - DenseMap<const MemoryAccess *, MemoryAccess *> MemoryAccessEquiv; - + DenseMap<const MemoryAccess *, CongruenceClass *> MemoryAccessToClass; + + // We could, if we wanted, build MemoryPhiExpressions and + // MemoryVariableExpressions, etc, and value number them the same way we value + // number phi expressions. For the moment, this seems like overkill. They + // can only exist in one of three states: they can be TOP (equal to + // everything), Equivalent to something else, or unique. Because we do not + // create expressions for them, we need to simulate leader change not just + // when they change class, but when they change state. Note: We can do the + // same thing for phis, and avoid having phi expressions if we wanted, We + // should eventually unify in one direction or the other, so this is a little + // bit of an experiment in which turns out easier to maintain. + enum MemoryPhiState { MPS_Invalid, MPS_TOP, MPS_Equivalent, MPS_Unique }; + DenseMap<const MemoryPhi *, MemoryPhiState> MemoryPhiState; + + enum PhiCycleState { PCS_Unknown, PCS_CycleFree, PCS_Cycle }; + DenseMap<const PHINode *, PhiCycleState> PhiCycleState; // Expression to class mapping. using ExpressionClassMap = DenseMap<const Expression *, CongruenceClass *>; ExpressionClassMap ExpressionToClass; @@ -231,8 +479,6 @@ class NewGVN : public FunctionPass { BitVector TouchedInstructions; DenseMap<const BasicBlock *, std::pair<unsigned, unsigned>> BlockInstRange; - DenseMap<const DomTreeNode *, std::pair<unsigned, unsigned>> - DominatedInstRange; #ifndef NDEBUG // Debugging for how many times each block and instruction got processed. @@ -240,56 +486,42 @@ class NewGVN : public FunctionPass { #endif // DFS info. - DenseMap<const BasicBlock *, std::pair<int, int>> DFSDomMap; + // This contains a mapping from Instructions to DFS numbers. + // The numbering starts at 1. An instruction with DFS number zero + // means that the instruction is dead. DenseMap<const Value *, unsigned> InstrDFS; + + // This contains the mapping DFS numbers to instructions. SmallVector<Value *, 32> DFSToInstr; // Deletion info. SmallPtrSet<Instruction *, 8> InstructionsToErase; public: - static char ID; // Pass identification, replacement for typeid. - NewGVN() : FunctionPass(ID) { - initializeNewGVNPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override; - bool runGVN(Function &F, DominatorTree *DT, AssumptionCache *AC, - TargetLibraryInfo *TLI, AliasAnalysis *AA, MemorySSA *MSSA); + NewGVN(Function &F, DominatorTree *DT, AssumptionCache *AC, + TargetLibraryInfo *TLI, AliasAnalysis *AA, MemorySSA *MSSA, + const DataLayout &DL) + : F(F), DT(DT), AC(AC), TLI(TLI), AA(AA), MSSA(MSSA), DL(DL), + PredInfo(make_unique<PredicateInfo>(F, *DT, *AC)) {} + bool runGVN(); private: - // This transformation requires dominator postdominator info. - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<AssumptionCacheTracker>(); - AU.addRequired<DominatorTreeWrapperPass>(); - AU.addRequired<TargetLibraryInfoWrapperPass>(); - AU.addRequired<MemorySSAWrapperPass>(); - AU.addRequired<AAResultsWrapperPass>(); - - AU.addPreserved<DominatorTreeWrapperPass>(); - AU.addPreserved<GlobalsAAWrapperPass>(); - } - // Expression handling. - const Expression *createExpression(Instruction *, const BasicBlock *); - const Expression *createBinaryExpression(unsigned, Type *, Value *, Value *, - const BasicBlock *); - PHIExpression *createPHIExpression(Instruction *); + const Expression *createExpression(Instruction *); + const Expression *createBinaryExpression(unsigned, Type *, Value *, Value *); + PHIExpression *createPHIExpression(Instruction *, bool &HasBackedge, + bool &AllConstant); const VariableExpression *createVariableExpression(Value *); const ConstantExpression *createConstantExpression(Constant *); - const Expression *createVariableOrConstant(Value *V, const BasicBlock *B); + const Expression *createVariableOrConstant(Value *V); const UnknownExpression *createUnknownExpression(Instruction *); - const StoreExpression *createStoreExpression(StoreInst *, MemoryAccess *, - const BasicBlock *); + const StoreExpression *createStoreExpression(StoreInst *, + const MemoryAccess *); LoadExpression *createLoadExpression(Type *, Value *, LoadInst *, - MemoryAccess *, const BasicBlock *); - - const CallExpression *createCallExpression(CallInst *, MemoryAccess *, - const BasicBlock *); - const AggregateValueExpression * - createAggregateValueExpression(Instruction *, const BasicBlock *); - bool setBasicExpressionInfo(Instruction *, BasicExpression *, - const BasicBlock *); + const MemoryAccess *); + const CallExpression *createCallExpression(CallInst *, const MemoryAccess *); + const AggregateValueExpression *createAggregateValueExpression(Instruction *); + bool setBasicExpressionInfo(Instruction *, BasicExpression *); // Congruence class handling. CongruenceClass *createCongruenceClass(Value *Leader, const Expression *E) { @@ -298,9 +530,21 @@ private: return result; } + CongruenceClass *createMemoryClass(MemoryAccess *MA) { + auto *CC = createCongruenceClass(nullptr, nullptr); + CC->setMemoryLeader(MA); + return CC; + } + CongruenceClass *ensureLeaderOfMemoryClass(MemoryAccess *MA) { + auto *CC = getMemoryClass(MA); + if (CC->getMemoryLeader() != MA) + CC = createMemoryClass(MA); + return CC; + } + CongruenceClass *createSingletonCongruenceClass(Value *Member) { CongruenceClass *CClass = createCongruenceClass(Member, nullptr); - CClass->Members.insert(Member); + CClass->insert(Member); ValueToClass[Member] = CClass; return CClass; } @@ -313,37 +557,49 @@ private: // Symbolic evaluation. const Expression *checkSimplificationResults(Expression *, Instruction *, Value *); - const Expression *performSymbolicEvaluation(Value *, const BasicBlock *); - const Expression *performSymbolicLoadEvaluation(Instruction *, - const BasicBlock *); - const Expression *performSymbolicStoreEvaluation(Instruction *, - const BasicBlock *); - const Expression *performSymbolicCallEvaluation(Instruction *, - const BasicBlock *); - const Expression *performSymbolicPHIEvaluation(Instruction *, - const BasicBlock *); - bool setMemoryAccessEquivTo(MemoryAccess *From, MemoryAccess *To); - const Expression *performSymbolicAggrValueEvaluation(Instruction *, - const BasicBlock *); + const Expression *performSymbolicEvaluation(Value *); + const Expression *performSymbolicLoadCoercion(Type *, Value *, LoadInst *, + Instruction *, MemoryAccess *); + const Expression *performSymbolicLoadEvaluation(Instruction *); + const Expression *performSymbolicStoreEvaluation(Instruction *); + const Expression *performSymbolicCallEvaluation(Instruction *); + const Expression *performSymbolicPHIEvaluation(Instruction *); + const Expression *performSymbolicAggrValueEvaluation(Instruction *); + const Expression *performSymbolicCmpEvaluation(Instruction *); + const Expression *performSymbolicPredicateInfoEvaluation(Instruction *); // Congruence finding. - // Templated to allow them to work both on BB's and BB-edges. - template <class T> - Value *lookupOperandLeader(Value *, const User *, const T &) const; + bool someEquivalentDominates(const Instruction *, const Instruction *) const; + Value *lookupOperandLeader(Value *) const; void performCongruenceFinding(Instruction *, const Expression *); - void moveValueToNewCongruenceClass(Instruction *, CongruenceClass *, - CongruenceClass *); + void moveValueToNewCongruenceClass(Instruction *, const Expression *, + CongruenceClass *, CongruenceClass *); + void moveMemoryToNewCongruenceClass(Instruction *, MemoryAccess *, + CongruenceClass *, CongruenceClass *); + Value *getNextValueLeader(CongruenceClass *) const; + const MemoryAccess *getNextMemoryLeader(CongruenceClass *) const; + bool setMemoryClass(const MemoryAccess *From, CongruenceClass *To); + CongruenceClass *getMemoryClass(const MemoryAccess *MA) const; + const MemoryAccess *lookupMemoryLeader(const MemoryAccess *) const; + bool isMemoryAccessTop(const MemoryAccess *) const; + + // Ranking + unsigned int getRank(const Value *) const; + bool shouldSwapOperands(const Value *, const Value *) const; + // Reachability handling. void updateReachableEdge(BasicBlock *, BasicBlock *); void processOutgoingEdges(TerminatorInst *, BasicBlock *); - bool isOnlyReachableViaThisEdge(const BasicBlockEdge &) const; - Value *findConditionEquivalence(Value *, BasicBlock *) const; - MemoryAccess *lookupMemoryAccessEquiv(MemoryAccess *) const; + Value *findConditionEquivalence(Value *) const; // Elimination. struct ValueDFS; - void convertDenseToDFSOrdered(CongruenceClass::MemberSet &, - SmallVectorImpl<ValueDFS> &); + void convertClassToDFSOrdered(const CongruenceClass &, + SmallVectorImpl<ValueDFS> &, + DenseMap<const Value *, unsigned int> &, + SmallPtrSetImpl<Instruction *> &) const; + void convertClassToLoadsAndStores(const CongruenceClass &, + SmallVectorImpl<ValueDFS> &) const; bool eliminateInstructions(Function &); void replaceInstruction(Instruction *, Value *); @@ -355,35 +611,58 @@ private: // Various instruction touch utilities void markUsersTouched(Value *); - void markMemoryUsersTouched(MemoryAccess *); - void markLeaderChangeTouched(CongruenceClass *CC); + void markMemoryUsersTouched(const MemoryAccess *); + void markMemoryDefTouched(const MemoryAccess *); + void markPredicateUsersTouched(Instruction *); + void markValueLeaderChangeTouched(CongruenceClass *CC); + void markMemoryLeaderChangeTouched(CongruenceClass *CC); + void addPredicateUsers(const PredicateBase *, Instruction *); + void addMemoryUsers(const MemoryAccess *To, MemoryAccess *U); + + // Main loop of value numbering + void iterateTouchedInstructions(); // Utilities. void cleanupTables(); std::pair<unsigned, unsigned> assignDFSNumbers(BasicBlock *, unsigned); void updateProcessedCount(Value *V); void verifyMemoryCongruency() const; + void verifyIterationSettled(Function &F); bool singleReachablePHIPath(const MemoryAccess *, const MemoryAccess *) const; -}; - -char NewGVN::ID = 0; + BasicBlock *getBlockForValue(Value *V) const; + void deleteExpression(const Expression *E); + unsigned InstrToDFSNum(const Value *V) const { + assert(isa<Instruction>(V) && "This should not be used for MemoryAccesses"); + return InstrDFS.lookup(V); + } -// createGVNPass - The public interface to this file. -FunctionPass *llvm::createNewGVNPass() { return new NewGVN(); } + unsigned InstrToDFSNum(const MemoryAccess *MA) const { + return MemoryToDFSNum(MA); + } + Value *InstrFromDFSNum(unsigned DFSNum) { return DFSToInstr[DFSNum]; } + // Given a MemoryAccess, return the relevant instruction DFS number. Note: + // This deliberately takes a value so it can be used with Use's, which will + // auto-convert to Value's but not to MemoryAccess's. + unsigned MemoryToDFSNum(const Value *MA) const { + assert(isa<MemoryAccess>(MA) && + "This should not be used with instructions"); + return isa<MemoryUseOrDef>(MA) + ? InstrToDFSNum(cast<MemoryUseOrDef>(MA)->getMemoryInst()) + : InstrDFS.lookup(MA); + } + bool isCycleFree(const PHINode *PN); + template <class T, class Range> T *getMinDFSOfRange(const Range &) const; + // Debug counter info. When verifying, we have to reset the value numbering + // debug counter to the same state it started in to get the same results. + std::pair<int, int> StartingVNCounter; +}; +} // end anonymous namespace template <typename T> static bool equalsLoadStoreHelper(const T &LHS, const Expression &RHS) { - if ((!isa<LoadExpression>(RHS) && !isa<StoreExpression>(RHS)) || - !LHS.BasicExpression::equals(RHS)) { + if (!isa<LoadExpression>(RHS) && !isa<StoreExpression>(RHS)) return false; - } else if (const auto *L = dyn_cast<LoadExpression>(&RHS)) { - if (LHS.getDefiningAccess() != L->getDefiningAccess()) - return false; - } else if (const auto *S = dyn_cast<StoreExpression>(&RHS)) { - if (LHS.getDefiningAccess() != S->getDefiningAccess()) - return false; - } - return true; + return LHS.MemoryExpression::equals(RHS); } bool LoadExpression::equals(const Expression &Other) const { @@ -391,7 +670,13 @@ bool LoadExpression::equals(const Expression &Other) const { } bool StoreExpression::equals(const Expression &Other) const { - return equalsLoadStoreHelper(*this, Other); + if (!equalsLoadStoreHelper(*this, Other)) + return false; + // Make sure that store vs store includes the value operand. + if (const auto *S = dyn_cast<StoreExpression>(&Other)) + if (getStoredValue() != S->getStoredValue()) + return false; + return true; } #ifndef NDEBUG @@ -400,16 +685,28 @@ static std::string getBlockName(const BasicBlock *B) { } #endif -INITIALIZE_PASS_BEGIN(NewGVN, "newgvn", "Global Value Numbering", false, false) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) -INITIALIZE_PASS_END(NewGVN, "newgvn", "Global Value Numbering", false, false) +// Get the basic block from an instruction/memory value. +BasicBlock *NewGVN::getBlockForValue(Value *V) const { + if (auto *I = dyn_cast<Instruction>(V)) + return I->getParent(); + else if (auto *MP = dyn_cast<MemoryPhi>(V)) + return MP->getBlock(); + llvm_unreachable("Should have been able to figure out a block for our value"); + return nullptr; +} -PHIExpression *NewGVN::createPHIExpression(Instruction *I) { +// Delete a definitely dead expression, so it can be reused by the expression +// allocator. Some of these are not in creation functions, so we have to accept +// const versions. +void NewGVN::deleteExpression(const Expression *E) { + assert(isa<BasicExpression>(E)); + auto *BE = cast<BasicExpression>(E); + const_cast<BasicExpression *>(BE)->deallocateOperands(ArgRecycler); + ExpressionAllocator.Deallocate(E); +} + +PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge, + bool &AllConstant) { BasicBlock *PHIBlock = I->getParent(); auto *PN = cast<PHINode>(I); auto *E = @@ -419,28 +716,32 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I) { E->setType(I->getType()); E->setOpcode(I->getOpcode()); - auto ReachablePhiArg = [&](const Use &U) { - return ReachableBlocks.count(PN->getIncomingBlock(U)); - }; + unsigned PHIRPO = RPOOrdering.lookup(DT->getNode(PHIBlock)); - // Filter out unreachable operands - auto Filtered = make_filter_range(PN->operands(), ReachablePhiArg); + // Filter out unreachable phi operands. + auto Filtered = make_filter_range(PN->operands(), [&](const Use &U) { + return ReachableEdges.count({PN->getIncomingBlock(U), PHIBlock}); + }); std::transform(Filtered.begin(), Filtered.end(), op_inserter(E), [&](const Use &U) -> Value * { + auto *BB = PN->getIncomingBlock(U); + auto *DTN = DT->getNode(BB); + if (RPOOrdering.lookup(DTN) >= PHIRPO) + HasBackedge = true; + AllConstant &= isa<UndefValue>(U) || isa<Constant>(U); + // Don't try to transform self-defined phis. if (U == PN) return PN; - const BasicBlockEdge BBE(PN->getIncomingBlock(U), PHIBlock); - return lookupOperandLeader(U, I, BBE); + return lookupOperandLeader(U); }); return E; } // Set basic expression info (Arguments, type, opcode) for Expression // E from Instruction I in block B. -bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E, - const BasicBlock *B) { +bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E) { bool AllConstant = true; if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) E->setType(GEP->getSourceElementType()); @@ -452,7 +753,7 @@ bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E, // Transform the operand array into an operand leader array, and keep track of // whether all members are constant. std::transform(I->op_begin(), I->op_end(), op_inserter(E), [&](Value *O) { - auto Operand = lookupOperandLeader(O, I, B); + auto Operand = lookupOperandLeader(O); AllConstant &= isa<Constant>(Operand); return Operand; }); @@ -461,8 +762,7 @@ bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E, } const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T, - Value *Arg1, Value *Arg2, - const BasicBlock *B) { + Value *Arg1, Value *Arg2) { auto *E = new (ExpressionAllocator) BasicExpression(2); E->setType(T); @@ -473,13 +773,13 @@ const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T, // of their operands get the same value number by sorting the operand value // numbers. Since all commutative instructions have two operands it is more // efficient to sort by hand rather than using, say, std::sort. - if (Arg1 > Arg2) + if (shouldSwapOperands(Arg1, Arg2)) std::swap(Arg1, Arg2); } - E->op_push_back(lookupOperandLeader(Arg1, nullptr, B)); - E->op_push_back(lookupOperandLeader(Arg2, nullptr, B)); + E->op_push_back(lookupOperandLeader(Arg1)); + E->op_push_back(lookupOperandLeader(Arg2)); - Value *V = SimplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), *DL, TLI, + Value *V = SimplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), DL, TLI, DT, AC); if (const Expression *SimplifiedE = checkSimplificationResults(E, nullptr, V)) return SimplifiedE; @@ -502,40 +802,32 @@ const Expression *NewGVN::checkSimplificationResults(Expression *E, NumGVNOpsSimplified++; assert(isa<BasicExpression>(E) && "We should always have had a basic expression here"); - - cast<BasicExpression>(E)->deallocateOperands(ArgRecycler); - ExpressionAllocator.Deallocate(E); + deleteExpression(E); return createConstantExpression(C); } else if (isa<Argument>(V) || isa<GlobalVariable>(V)) { if (I) DEBUG(dbgs() << "Simplified " << *I << " to " << " variable " << *V << "\n"); - cast<BasicExpression>(E)->deallocateOperands(ArgRecycler); - ExpressionAllocator.Deallocate(E); + deleteExpression(E); return createVariableExpression(V); } CongruenceClass *CC = ValueToClass.lookup(V); - if (CC && CC->DefiningExpr) { + if (CC && CC->getDefiningExpr()) { if (I) DEBUG(dbgs() << "Simplified " << *I << " to " << " expression " << *V << "\n"); NumGVNOpsSimplified++; - assert(isa<BasicExpression>(E) && - "We should always have had a basic expression here"); - cast<BasicExpression>(E)->deallocateOperands(ArgRecycler); - ExpressionAllocator.Deallocate(E); - return CC->DefiningExpr; + deleteExpression(E); + return CC->getDefiningExpr(); } return nullptr; } -const Expression *NewGVN::createExpression(Instruction *I, - const BasicBlock *B) { - +const Expression *NewGVN::createExpression(Instruction *I) { auto *E = new (ExpressionAllocator) BasicExpression(I->getNumOperands()); - bool AllConstant = setBasicExpressionInfo(I, E, B); + bool AllConstant = setBasicExpressionInfo(I, E); if (I->isCommutative()) { // Ensure that commutative instructions that only differ by a permutation @@ -543,7 +835,7 @@ const Expression *NewGVN::createExpression(Instruction *I, // numbers. Since all commutative instructions have two operands it is more // efficient to sort by hand rather than using, say, std::sort. assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!"); - if (E->getOperand(0) > E->getOperand(1)) + if (shouldSwapOperands(E->getOperand(0), E->getOperand(1))) E->swapOperands(0, 1); } @@ -559,48 +851,43 @@ const Expression *NewGVN::createExpression(Instruction *I, // Sort the operand value numbers so x<y and y>x get the same value // number. CmpInst::Predicate Predicate = CI->getPredicate(); - if (E->getOperand(0) > E->getOperand(1)) { + if (shouldSwapOperands(E->getOperand(0), E->getOperand(1))) { E->swapOperands(0, 1); Predicate = CmpInst::getSwappedPredicate(Predicate); } E->setOpcode((CI->getOpcode() << 8) | Predicate); // TODO: 25% of our time is spent in SimplifyCmpInst with pointer operands - // TODO: Since we noop bitcasts, we may need to check types before - // simplifying, so that we don't end up simplifying based on a wrong - // type assumption. We should clean this up so we can use constants of the - // wrong type - assert(I->getOperand(0)->getType() == I->getOperand(1)->getType() && "Wrong types on cmp instruction"); - if ((E->getOperand(0)->getType() == I->getOperand(0)->getType() && - E->getOperand(1)->getType() == I->getOperand(1)->getType())) { - Value *V = SimplifyCmpInst(Predicate, E->getOperand(0), E->getOperand(1), - *DL, TLI, DT, AC); - if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) - return SimplifiedE; - } + assert((E->getOperand(0)->getType() == I->getOperand(0)->getType() && + E->getOperand(1)->getType() == I->getOperand(1)->getType())); + Value *V = SimplifyCmpInst(Predicate, E->getOperand(0), E->getOperand(1), + DL, TLI, DT, AC); + if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) + return SimplifiedE; } else if (isa<SelectInst>(I)) { if (isa<Constant>(E->getOperand(0)) || - (E->getOperand(1)->getType() == I->getOperand(1)->getType() && - E->getOperand(2)->getType() == I->getOperand(2)->getType())) { + E->getOperand(0) == E->getOperand(1)) { + assert(E->getOperand(1)->getType() == I->getOperand(1)->getType() && + E->getOperand(2)->getType() == I->getOperand(2)->getType()); Value *V = SimplifySelectInst(E->getOperand(0), E->getOperand(1), - E->getOperand(2), *DL, TLI, DT, AC); + E->getOperand(2), DL, TLI, DT, AC); if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) return SimplifiedE; } } else if (I->isBinaryOp()) { Value *V = SimplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1), - *DL, TLI, DT, AC); + DL, TLI, DT, AC); if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) return SimplifiedE; } else if (auto *BI = dyn_cast<BitCastInst>(I)) { - Value *V = SimplifyInstruction(BI, *DL, TLI, DT, AC); + Value *V = SimplifyInstruction(BI, DL, TLI, DT, AC); if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) return SimplifiedE; } else if (isa<GetElementPtrInst>(I)) { Value *V = SimplifyGEPInst(E->getType(), ArrayRef<Value *>(E->op_begin(), E->op_end()), - *DL, TLI, DT, AC); + DL, TLI, DT, AC); if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) return SimplifiedE; } else if (AllConstant) { @@ -615,7 +902,7 @@ const Expression *NewGVN::createExpression(Instruction *I, for (Value *Arg : E->operands()) C.emplace_back(cast<Constant>(Arg)); - if (Value *V = ConstantFoldInstOperands(I, C, *DL, TLI)) + if (Value *V = ConstantFoldInstOperands(I, C, DL, TLI)) if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) return SimplifiedE; } @@ -623,18 +910,18 @@ const Expression *NewGVN::createExpression(Instruction *I, } const AggregateValueExpression * -NewGVN::createAggregateValueExpression(Instruction *I, const BasicBlock *B) { +NewGVN::createAggregateValueExpression(Instruction *I) { if (auto *II = dyn_cast<InsertValueInst>(I)) { auto *E = new (ExpressionAllocator) AggregateValueExpression(I->getNumOperands(), II->getNumIndices()); - setBasicExpressionInfo(I, E, B); + setBasicExpressionInfo(I, E); E->allocateIntOperands(ExpressionAllocator); std::copy(II->idx_begin(), II->idx_end(), int_op_inserter(E)); return E; } else if (auto *EI = dyn_cast<ExtractValueInst>(I)) { auto *E = new (ExpressionAllocator) AggregateValueExpression(I->getNumOperands(), EI->getNumIndices()); - setBasicExpressionInfo(EI, E, B); + setBasicExpressionInfo(EI, E); E->allocateIntOperands(ExpressionAllocator); std::copy(EI->idx_begin(), EI->idx_end(), int_op_inserter(E)); return E; @@ -648,12 +935,10 @@ const VariableExpression *NewGVN::createVariableExpression(Value *V) { return E; } -const Expression *NewGVN::createVariableOrConstant(Value *V, - const BasicBlock *B) { - auto Leader = lookupOperandLeader(V, nullptr, B); - if (auto *C = dyn_cast<Constant>(Leader)) +const Expression *NewGVN::createVariableOrConstant(Value *V) { + if (auto *C = dyn_cast<Constant>(V)) return createConstantExpression(C); - return createVariableExpression(Leader); + return createVariableExpression(V); } const ConstantExpression *NewGVN::createConstantExpression(Constant *C) { @@ -669,40 +954,90 @@ const UnknownExpression *NewGVN::createUnknownExpression(Instruction *I) { } const CallExpression *NewGVN::createCallExpression(CallInst *CI, - MemoryAccess *HV, - const BasicBlock *B) { + const MemoryAccess *MA) { // FIXME: Add operand bundles for calls. auto *E = - new (ExpressionAllocator) CallExpression(CI->getNumOperands(), CI, HV); - setBasicExpressionInfo(CI, E, B); + new (ExpressionAllocator) CallExpression(CI->getNumOperands(), CI, MA); + setBasicExpressionInfo(CI, E); return E; } +// Return true if some equivalent of instruction Inst dominates instruction U. +bool NewGVN::someEquivalentDominates(const Instruction *Inst, + const Instruction *U) const { + auto *CC = ValueToClass.lookup(Inst); + // This must be an instruction because we are only called from phi nodes + // in the case that the value it needs to check against is an instruction. + + // The most likely candiates for dominance are the leader and the next leader. + // The leader or nextleader will dominate in all cases where there is an + // equivalent that is higher up in the dom tree. + // We can't *only* check them, however, because the + // dominator tree could have an infinite number of non-dominating siblings + // with instructions that are in the right congruence class. + // A + // B C D E F G + // | + // H + // Instruction U could be in H, with equivalents in every other sibling. + // Depending on the rpo order picked, the leader could be the equivalent in + // any of these siblings. + if (!CC) + return false; + if (DT->dominates(cast<Instruction>(CC->getLeader()), U)) + return true; + if (CC->getNextLeader().first && + DT->dominates(cast<Instruction>(CC->getNextLeader().first), U)) + return true; + return llvm::any_of(*CC, [&](const Value *Member) { + return Member != CC->getLeader() && + DT->dominates(cast<Instruction>(Member), U); + }); +} + // See if we have a congruence class and leader for this operand, and if so, // return it. Otherwise, return the operand itself. -template <class T> -Value *NewGVN::lookupOperandLeader(Value *V, const User *U, const T &B) const { +Value *NewGVN::lookupOperandLeader(Value *V) const { CongruenceClass *CC = ValueToClass.lookup(V); - if (CC && (CC != InitialClass)) - return CC->RepLeader; + if (CC) { + // Everything in TOP is represneted by undef, as it can be any value. + // We do have to make sure we get the type right though, so we can't set the + // RepLeader to undef. + if (CC == TOPClass) + return UndefValue::get(V->getType()); + return CC->getStoredValue() ? CC->getStoredValue() : CC->getLeader(); + } + return V; } -MemoryAccess *NewGVN::lookupMemoryAccessEquiv(MemoryAccess *MA) const { - MemoryAccess *Result = MemoryAccessEquiv.lookup(MA); - return Result ? Result : MA; +const MemoryAccess *NewGVN::lookupMemoryLeader(const MemoryAccess *MA) const { + auto *CC = getMemoryClass(MA); + assert(CC->getMemoryLeader() && + "Every MemoryAccess should be mapped to a " + "congruence class with a represenative memory " + "access"); + return CC->getMemoryLeader(); +} + +// Return true if the MemoryAccess is really equivalent to everything. This is +// equivalent to the lattice value "TOP" in most lattices. This is the initial +// state of all MemoryAccesses. +bool NewGVN::isMemoryAccessTop(const MemoryAccess *MA) const { + return getMemoryClass(MA) == TOPClass; } LoadExpression *NewGVN::createLoadExpression(Type *LoadType, Value *PointerOp, - LoadInst *LI, MemoryAccess *DA, - const BasicBlock *B) { - auto *E = new (ExpressionAllocator) LoadExpression(1, LI, DA); + LoadInst *LI, + const MemoryAccess *MA) { + auto *E = + new (ExpressionAllocator) LoadExpression(1, LI, lookupMemoryLeader(MA)); E->allocateOperands(ArgRecycler, ExpressionAllocator); E->setType(LoadType); // Give store and loads same opcode so they value number together. E->setOpcode(0); - E->op_push_back(lookupOperandLeader(PointerOp, LI, B)); + E->op_push_back(PointerOp); if (LI) E->setAlignment(LI->getAlignment()); @@ -713,16 +1048,16 @@ LoadExpression *NewGVN::createLoadExpression(Type *LoadType, Value *PointerOp, } const StoreExpression *NewGVN::createStoreExpression(StoreInst *SI, - MemoryAccess *DA, - const BasicBlock *B) { - auto *E = - new (ExpressionAllocator) StoreExpression(SI->getNumOperands(), SI, DA); + const MemoryAccess *MA) { + auto *StoredValueLeader = lookupOperandLeader(SI->getValueOperand()); + auto *E = new (ExpressionAllocator) + StoreExpression(SI->getNumOperands(), SI, StoredValueLeader, MA); E->allocateOperands(ArgRecycler, ExpressionAllocator); E->setType(SI->getValueOperand()->getType()); // Give store and loads same opcode so they value number together. E->setOpcode(0); - E->op_push_back(lookupOperandLeader(SI->getPointerOperand(), SI, B)); + E->op_push_back(lookupOperandLeader(SI->getPointerOperand())); // TODO: Value number heap versions. We may be able to discover // things alias analysis can't on it's own (IE that a store and a @@ -730,44 +1065,140 @@ const StoreExpression *NewGVN::createStoreExpression(StoreInst *SI, return E; } -// Utility function to check whether the congruence class has a member other -// than the given instruction. -bool hasMemberOtherThanUs(const CongruenceClass *CC, Instruction *I) { - // Either it has more than one store, in which case it must contain something - // other than us (because it's indexed by value), or if it only has one store - // right now, that member should not be us. - return CC->StoreCount > 1 || CC->Members.count(I) == 0; -} - -const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I, - const BasicBlock *B) { +const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) { // Unlike loads, we never try to eliminate stores, so we do not check if they // are simple and avoid value numbering them. auto *SI = cast<StoreInst>(I); - MemoryAccess *StoreAccess = MSSA->getMemoryAccess(SI); - // See if we are defined by a previous store expression, it already has a - // value, and it's the same value as our current store. FIXME: Right now, we - // only do this for simple stores, we should expand to cover memcpys, etc. + auto *StoreAccess = MSSA->getMemoryAccess(SI); + // Get the expression, if any, for the RHS of the MemoryDef. + const MemoryAccess *StoreRHS = StoreAccess->getDefiningAccess(); + if (EnableStoreRefinement) + StoreRHS = MSSAWalker->getClobberingMemoryAccess(StoreAccess); + // If we bypassed the use-def chains, make sure we add a use. + if (StoreRHS != StoreAccess->getDefiningAccess()) + addMemoryUsers(StoreRHS, StoreAccess); + + StoreRHS = lookupMemoryLeader(StoreRHS); + // If we are defined by ourselves, use the live on entry def. + if (StoreRHS == StoreAccess) + StoreRHS = MSSA->getLiveOnEntryDef(); + if (SI->isSimple()) { - // Get the expression, if any, for the RHS of the MemoryDef. - MemoryAccess *StoreRHS = lookupMemoryAccessEquiv( - cast<MemoryDef>(StoreAccess)->getDefiningAccess()); - const Expression *OldStore = createStoreExpression(SI, StoreRHS, B); - CongruenceClass *CC = ExpressionToClass.lookup(OldStore); + // See if we are defined by a previous store expression, it already has a + // value, and it's the same value as our current store. FIXME: Right now, we + // only do this for simple stores, we should expand to cover memcpys, etc. + const auto *LastStore = createStoreExpression(SI, StoreRHS); + const auto *LastCC = ExpressionToClass.lookup(LastStore); // Basically, check if the congruence class the store is in is defined by a // store that isn't us, and has the same value. MemorySSA takes care of // ensuring the store has the same memory state as us already. - if (CC && CC->DefiningExpr && isa<StoreExpression>(CC->DefiningExpr) && - CC->RepLeader == lookupOperandLeader(SI->getValueOperand(), SI, B) && - hasMemberOtherThanUs(CC, I)) - return createStoreExpression(SI, StoreRHS, B); + // The RepStoredValue gets nulled if all the stores disappear in a class, so + // we don't need to check if the class contains a store besides us. + if (LastCC && + LastCC->getStoredValue() == lookupOperandLeader(SI->getValueOperand())) + return LastStore; + deleteExpression(LastStore); + // Also check if our value operand is defined by a load of the same memory + // location, and the memory state is the same as it was then (otherwise, it + // could have been overwritten later. See test32 in + // transforms/DeadStoreElimination/simple.ll). + if (auto *LI = + dyn_cast<LoadInst>(lookupOperandLeader(SI->getValueOperand()))) { + if ((lookupOperandLeader(LI->getPointerOperand()) == + lookupOperandLeader(SI->getPointerOperand())) && + (lookupMemoryLeader(MSSA->getMemoryAccess(LI)->getDefiningAccess()) == + StoreRHS)) + return createVariableExpression(LI); + } } - return createStoreExpression(SI, StoreAccess, B); + // If the store is not equivalent to anything, value number it as a store that + // produces a unique memory state (instead of using it's MemoryUse, we use + // it's MemoryDef). + return createStoreExpression(SI, StoreAccess); } -const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I, - const BasicBlock *B) { +// See if we can extract the value of a loaded pointer from a load, a store, or +// a memory instruction. +const Expression * +NewGVN::performSymbolicLoadCoercion(Type *LoadType, Value *LoadPtr, + LoadInst *LI, Instruction *DepInst, + MemoryAccess *DefiningAccess) { + assert((!LI || LI->isSimple()) && "Not a simple load"); + if (auto *DepSI = dyn_cast<StoreInst>(DepInst)) { + // Can't forward from non-atomic to atomic without violating memory model. + // Also don't need to coerce if they are the same type, we will just + // propogate.. + if (LI->isAtomic() > DepSI->isAtomic() || + LoadType == DepSI->getValueOperand()->getType()) + return nullptr; + int Offset = analyzeLoadFromClobberingStore(LoadType, LoadPtr, DepSI, DL); + if (Offset >= 0) { + if (auto *C = dyn_cast<Constant>( + lookupOperandLeader(DepSI->getValueOperand()))) { + DEBUG(dbgs() << "Coercing load from store " << *DepSI << " to constant " + << *C << "\n"); + return createConstantExpression( + getConstantStoreValueForLoad(C, Offset, LoadType, DL)); + } + } + + } else if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) { + // Can't forward from non-atomic to atomic without violating memory model. + if (LI->isAtomic() > DepLI->isAtomic()) + return nullptr; + int Offset = analyzeLoadFromClobberingLoad(LoadType, LoadPtr, DepLI, DL); + if (Offset >= 0) { + // We can coerce a constant load into a load + if (auto *C = dyn_cast<Constant>(lookupOperandLeader(DepLI))) + if (auto *PossibleConstant = + getConstantLoadValueForLoad(C, Offset, LoadType, DL)) { + DEBUG(dbgs() << "Coercing load from load " << *LI << " to constant " + << *PossibleConstant << "\n"); + return createConstantExpression(PossibleConstant); + } + } + + } else if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInst)) { + int Offset = analyzeLoadFromClobberingMemInst(LoadType, LoadPtr, DepMI, DL); + if (Offset >= 0) { + if (auto *PossibleConstant = + getConstantMemInstValueForLoad(DepMI, Offset, LoadType, DL)) { + DEBUG(dbgs() << "Coercing load from meminst " << *DepMI + << " to constant " << *PossibleConstant << "\n"); + return createConstantExpression(PossibleConstant); + } + } + } + + // All of the below are only true if the loaded pointer is produced + // by the dependent instruction. + if (LoadPtr != lookupOperandLeader(DepInst) && + !AA->isMustAlias(LoadPtr, DepInst)) + return nullptr; + // If this load really doesn't depend on anything, then we must be loading an + // undef value. This can happen when loading for a fresh allocation with no + // intervening stores, for example. Note that this is only true in the case + // that the result of the allocation is pointer equal to the load ptr. + if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI)) { + return createConstantExpression(UndefValue::get(LoadType)); + } + // If this load occurs either right after a lifetime begin, + // then the loaded value is undefined. + else if (auto *II = dyn_cast<IntrinsicInst>(DepInst)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start) + return createConstantExpression(UndefValue::get(LoadType)); + } + // If this load follows a calloc (which zero initializes memory), + // then the loaded value is zero + else if (isCallocLikeFn(DepInst, TLI)) { + return createConstantExpression(Constant::getNullValue(LoadType)); + } + + return nullptr; +} + +const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) { auto *LI = cast<LoadInst>(I); // We can eliminate in favor of non-simple loads, but we won't be able to @@ -775,7 +1206,7 @@ const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I, if (!LI->isSimple()) return nullptr; - Value *LoadAddressLeader = lookupOperandLeader(LI->getPointerOperand(), I, B); + Value *LoadAddressLeader = lookupOperandLeader(LI->getPointerOperand()); // Load of undef is undef. if (isa<UndefValue>(LoadAddressLeader)) return createConstantExpression(UndefValue::get(LI->getType())); @@ -788,61 +1219,233 @@ const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I, // If the defining instruction is not reachable, replace with undef. if (!ReachableBlocks.count(DefiningInst->getParent())) return createConstantExpression(UndefValue::get(LI->getType())); + // This will handle stores and memory insts. We only do if it the + // defining access has a different type, or it is a pointer produced by + // certain memory operations that cause the memory to have a fixed value + // (IE things like calloc). + if (const auto *CoercionResult = + performSymbolicLoadCoercion(LI->getType(), LoadAddressLeader, LI, + DefiningInst, DefiningAccess)) + return CoercionResult; } } - const Expression *E = - createLoadExpression(LI->getType(), LI->getPointerOperand(), LI, - lookupMemoryAccessEquiv(DefiningAccess), B); + const Expression *E = createLoadExpression(LI->getType(), LoadAddressLeader, + LI, DefiningAccess); return E; } +const Expression * +NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) { + auto *PI = PredInfo->getPredicateInfoFor(I); + if (!PI) + return nullptr; + + DEBUG(dbgs() << "Found predicate info from instruction !\n"); + + auto *PWC = dyn_cast<PredicateWithCondition>(PI); + if (!PWC) + return nullptr; + + auto *CopyOf = I->getOperand(0); + auto *Cond = PWC->Condition; + + // If this a copy of the condition, it must be either true or false depending + // on the predicate info type and edge + if (CopyOf == Cond) { + // We should not need to add predicate users because the predicate info is + // already a use of this operand. + if (isa<PredicateAssume>(PI)) + return createConstantExpression(ConstantInt::getTrue(Cond->getType())); + if (auto *PBranch = dyn_cast<PredicateBranch>(PI)) { + if (PBranch->TrueEdge) + return createConstantExpression(ConstantInt::getTrue(Cond->getType())); + return createConstantExpression(ConstantInt::getFalse(Cond->getType())); + } + if (auto *PSwitch = dyn_cast<PredicateSwitch>(PI)) + return createConstantExpression(cast<Constant>(PSwitch->CaseValue)); + } + + // Not a copy of the condition, so see what the predicates tell us about this + // value. First, though, we check to make sure the value is actually a copy + // of one of the condition operands. It's possible, in certain cases, for it + // to be a copy of a predicateinfo copy. In particular, if two branch + // operations use the same condition, and one branch dominates the other, we + // will end up with a copy of a copy. This is currently a small deficiency in + // predicateinfo. What will end up happening here is that we will value + // number both copies the same anyway. + + // Everything below relies on the condition being a comparison. + auto *Cmp = dyn_cast<CmpInst>(Cond); + if (!Cmp) + return nullptr; + + if (CopyOf != Cmp->getOperand(0) && CopyOf != Cmp->getOperand(1)) { + DEBUG(dbgs() << "Copy is not of any condition operands!"); + return nullptr; + } + Value *FirstOp = lookupOperandLeader(Cmp->getOperand(0)); + Value *SecondOp = lookupOperandLeader(Cmp->getOperand(1)); + bool SwappedOps = false; + // Sort the ops + if (shouldSwapOperands(FirstOp, SecondOp)) { + std::swap(FirstOp, SecondOp); + SwappedOps = true; + } + CmpInst::Predicate Predicate = + SwappedOps ? Cmp->getSwappedPredicate() : Cmp->getPredicate(); + + if (isa<PredicateAssume>(PI)) { + // If the comparison is true when the operands are equal, then we know the + // operands are equal, because assumes must always be true. + if (CmpInst::isTrueWhenEqual(Predicate)) { + addPredicateUsers(PI, I); + return createVariableOrConstant(FirstOp); + } + } + if (const auto *PBranch = dyn_cast<PredicateBranch>(PI)) { + // If we are *not* a copy of the comparison, we may equal to the other + // operand when the predicate implies something about equality of + // operations. In particular, if the comparison is true/false when the + // operands are equal, and we are on the right edge, we know this operation + // is equal to something. + if ((PBranch->TrueEdge && Predicate == CmpInst::ICMP_EQ) || + (!PBranch->TrueEdge && Predicate == CmpInst::ICMP_NE)) { + addPredicateUsers(PI, I); + return createVariableOrConstant(FirstOp); + } + // Handle the special case of floating point. + if (((PBranch->TrueEdge && Predicate == CmpInst::FCMP_OEQ) || + (!PBranch->TrueEdge && Predicate == CmpInst::FCMP_UNE)) && + isa<ConstantFP>(FirstOp) && !cast<ConstantFP>(FirstOp)->isZero()) { + addPredicateUsers(PI, I); + return createConstantExpression(cast<Constant>(FirstOp)); + } + } + return nullptr; +} + // Evaluate read only and pure calls, and create an expression result. -const Expression *NewGVN::performSymbolicCallEvaluation(Instruction *I, - const BasicBlock *B) { +const Expression *NewGVN::performSymbolicCallEvaluation(Instruction *I) { auto *CI = cast<CallInst>(I); - if (AA->doesNotAccessMemory(CI)) - return createCallExpression(CI, nullptr, B); - if (AA->onlyReadsMemory(CI)) { + if (auto *II = dyn_cast<IntrinsicInst>(I)) { + // Instrinsics with the returned attribute are copies of arguments. + if (auto *ReturnedValue = II->getReturnedArgOperand()) { + if (II->getIntrinsicID() == Intrinsic::ssa_copy) + if (const auto *Result = performSymbolicPredicateInfoEvaluation(I)) + return Result; + return createVariableOrConstant(ReturnedValue); + } + } + if (AA->doesNotAccessMemory(CI)) { + return createCallExpression(CI, TOPClass->getMemoryLeader()); + } else if (AA->onlyReadsMemory(CI)) { MemoryAccess *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(CI); - return createCallExpression(CI, lookupMemoryAccessEquiv(DefiningAccess), B); + return createCallExpression(CI, DefiningAccess); } return nullptr; } -// Update the memory access equivalence table to say that From is equal to To, +// Retrieve the memory class for a given MemoryAccess. +CongruenceClass *NewGVN::getMemoryClass(const MemoryAccess *MA) const { + + auto *Result = MemoryAccessToClass.lookup(MA); + assert(Result && "Should have found memory class"); + return Result; +} + +// Update the MemoryAccess equivalence table to say that From is equal to To, // and return true if this is different from what already existed in the table. -bool NewGVN::setMemoryAccessEquivTo(MemoryAccess *From, MemoryAccess *To) { - DEBUG(dbgs() << "Setting " << *From << " equivalent to "); - if (!To) - DEBUG(dbgs() << "itself"); - else - DEBUG(dbgs() << *To); +bool NewGVN::setMemoryClass(const MemoryAccess *From, + CongruenceClass *NewClass) { + assert(NewClass && + "Every MemoryAccess should be getting mapped to a non-null class"); + DEBUG(dbgs() << "Setting " << *From); + DEBUG(dbgs() << " equivalent to congruence class "); + DEBUG(dbgs() << NewClass->getID() << " with current MemoryAccess leader "); + DEBUG(dbgs() << *NewClass->getMemoryLeader()); DEBUG(dbgs() << "\n"); - auto LookupResult = MemoryAccessEquiv.find(From); + + auto LookupResult = MemoryAccessToClass.find(From); bool Changed = false; // If it's already in the table, see if the value changed. - if (LookupResult != MemoryAccessEquiv.end()) { - if (To && LookupResult->second != To) { + if (LookupResult != MemoryAccessToClass.end()) { + auto *OldClass = LookupResult->second; + if (OldClass != NewClass) { + // If this is a phi, we have to handle memory member updates. + if (auto *MP = dyn_cast<MemoryPhi>(From)) { + OldClass->memory_erase(MP); + NewClass->memory_insert(MP); + // This may have killed the class if it had no non-memory members + if (OldClass->getMemoryLeader() == From) { + if (OldClass->memory_empty()) { + OldClass->setMemoryLeader(nullptr); + } else { + OldClass->setMemoryLeader(getNextMemoryLeader(OldClass)); + DEBUG(dbgs() << "Memory class leader change for class " + << OldClass->getID() << " to " + << *OldClass->getMemoryLeader() + << " due to removal of a memory member " << *From + << "\n"); + markMemoryLeaderChangeTouched(OldClass); + } + } + } // It wasn't equivalent before, and now it is. - LookupResult->second = To; - Changed = true; - } else if (!To) { - // It used to be equivalent to something, and now it's not. - MemoryAccessEquiv.erase(LookupResult); + LookupResult->second = NewClass; Changed = true; } - } else { - assert(!To && - "Memory equivalence should never change from nothing to something"); } return Changed; } + +// Determine if a phi is cycle-free. That means the values in the phi don't +// depend on any expressions that can change value as a result of the phi. +// For example, a non-cycle free phi would be v = phi(0, v+1). +bool NewGVN::isCycleFree(const PHINode *PN) { + // In order to compute cycle-freeness, we do SCC finding on the phi, and see + // what kind of SCC it ends up in. If it is a singleton, it is cycle-free. + // If it is not in a singleton, it is only cycle free if the other members are + // all phi nodes (as they do not compute anything, they are copies). TODO: + // There are likely a few other intrinsics or expressions that could be + // included here, but this happens so infrequently already that it is not + // likely to be worth it. + auto PCS = PhiCycleState.lookup(PN); + if (PCS == PCS_Unknown) { + SCCFinder.Start(PN); + auto &SCC = SCCFinder.getComponentFor(PN); + // It's cycle free if it's size 1 or or the SCC is *only* phi nodes. + if (SCC.size() == 1) + PhiCycleState.insert({PN, PCS_CycleFree}); + else { + bool AllPhis = + llvm::all_of(SCC, [](const Value *V) { return isa<PHINode>(V); }); + PCS = AllPhis ? PCS_CycleFree : PCS_Cycle; + for (auto *Member : SCC) + if (auto *MemberPhi = dyn_cast<PHINode>(Member)) + PhiCycleState.insert({MemberPhi, PCS}); + } + } + if (PCS == PCS_Cycle) + return false; + return true; +} + // Evaluate PHI nodes symbolically, and create an expression result. -const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I, - const BasicBlock *B) { - auto *E = cast<PHIExpression>(createPHIExpression(I)); +const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) { + // True if one of the incoming phi edges is a backedge. + bool HasBackedge = false; + // All constant tracks the state of whether all the *original* phi operands + // were constant. + // This is really shorthand for "this phi cannot cycle due to forward + // propagation", as any + // change in value of the phi is guaranteed not to later change the value of + // the phi. + // IE it can't be v = phi(undef, v+1) + bool AllConstant = true; + auto *E = + cast<PHIExpression>(createPHIExpression(I, HasBackedge, AllConstant)); // We match the semantics of SimplifyPhiNode from InstructionSimplify here. // See if all arguaments are the same. @@ -861,14 +1464,15 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I, if (Filtered.begin() == Filtered.end()) { DEBUG(dbgs() << "Simplified PHI node " << *I << " to undef" << "\n"); - E->deallocateOperands(ArgRecycler); - ExpressionAllocator.Deallocate(E); + deleteExpression(E); return createConstantExpression(UndefValue::get(I->getType())); } + unsigned NumOps = 0; Value *AllSameValue = *(Filtered.begin()); ++Filtered.begin(); // Can't use std::equal here, sadly, because filter.begin moves. - if (llvm::all_of(Filtered, [AllSameValue](const Value *V) { + if (llvm::all_of(Filtered, [AllSameValue, &NumOps](const Value *V) { + ++NumOps; return V == AllSameValue; })) { // In LLVM's non-standard representation of phi nodes, it's possible to have @@ -881,27 +1485,32 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I, // We also special case undef, so that if we have an undef, we can't use the // common value unless it dominates the phi block. if (HasUndef) { + // If we have undef and at least one other value, this is really a + // multivalued phi, and we need to know if it's cycle free in order to + // evaluate whether we can ignore the undef. The other parts of this are + // just shortcuts. If there is no backedge, or all operands are + // constants, or all operands are ignored but the undef, it also must be + // cycle free. + if (!AllConstant && HasBackedge && NumOps > 0 && + !isa<UndefValue>(AllSameValue) && !isCycleFree(cast<PHINode>(I))) + return E; + // Only have to check for instructions if (auto *AllSameInst = dyn_cast<Instruction>(AllSameValue)) - if (!DT->dominates(AllSameInst, I)) + if (!someEquivalentDominates(AllSameInst, I)) return E; } NumGVNPhisAllSame++; DEBUG(dbgs() << "Simplified PHI node " << *I << " to " << *AllSameValue << "\n"); - E->deallocateOperands(ArgRecycler); - ExpressionAllocator.Deallocate(E); - if (auto *C = dyn_cast<Constant>(AllSameValue)) - return createConstantExpression(C); - return createVariableExpression(AllSameValue); + deleteExpression(E); + return createVariableOrConstant(AllSameValue); } return E; } -const Expression * -NewGVN::performSymbolicAggrValueEvaluation(Instruction *I, - const BasicBlock *B) { +const Expression *NewGVN::performSymbolicAggrValueEvaluation(Instruction *I) { if (auto *EI = dyn_cast<ExtractValueInst>(I)) { auto *II = dyn_cast<IntrinsicInst>(EI->getAggregateOperand()); if (II && EI->getNumIndices() == 1 && *EI->idx_begin() == 0) { @@ -931,19 +1540,130 @@ NewGVN::performSymbolicAggrValueEvaluation(Instruction *I, // expression. assert(II->getNumArgOperands() == 2 && "Expect two args for recognised intrinsics."); - return createBinaryExpression(Opcode, EI->getType(), - II->getArgOperand(0), - II->getArgOperand(1), B); + return createBinaryExpression( + Opcode, EI->getType(), II->getArgOperand(0), II->getArgOperand(1)); } } } - return createAggregateValueExpression(I, B); + return createAggregateValueExpression(I); +} +const Expression *NewGVN::performSymbolicCmpEvaluation(Instruction *I) { + auto *CI = dyn_cast<CmpInst>(I); + // See if our operands are equal to those of a previous predicate, and if so, + // if it implies true or false. + auto Op0 = lookupOperandLeader(CI->getOperand(0)); + auto Op1 = lookupOperandLeader(CI->getOperand(1)); + auto OurPredicate = CI->getPredicate(); + if (shouldSwapOperands(Op0, Op1)) { + std::swap(Op0, Op1); + OurPredicate = CI->getSwappedPredicate(); + } + + // Avoid processing the same info twice + const PredicateBase *LastPredInfo = nullptr; + // See if we know something about the comparison itself, like it is the target + // of an assume. + auto *CmpPI = PredInfo->getPredicateInfoFor(I); + if (dyn_cast_or_null<PredicateAssume>(CmpPI)) + return createConstantExpression(ConstantInt::getTrue(CI->getType())); + + if (Op0 == Op1) { + // This condition does not depend on predicates, no need to add users + if (CI->isTrueWhenEqual()) + return createConstantExpression(ConstantInt::getTrue(CI->getType())); + else if (CI->isFalseWhenEqual()) + return createConstantExpression(ConstantInt::getFalse(CI->getType())); + } + + // NOTE: Because we are comparing both operands here and below, and using + // previous comparisons, we rely on fact that predicateinfo knows to mark + // comparisons that use renamed operands as users of the earlier comparisons. + // It is *not* enough to just mark predicateinfo renamed operands as users of + // the earlier comparisons, because the *other* operand may have changed in a + // previous iteration. + // Example: + // icmp slt %a, %b + // %b.0 = ssa.copy(%b) + // false branch: + // icmp slt %c, %b.0 + + // %c and %a may start out equal, and thus, the code below will say the second + // %icmp is false. c may become equal to something else, and in that case the + // %second icmp *must* be reexamined, but would not if only the renamed + // %operands are considered users of the icmp. + + // *Currently* we only check one level of comparisons back, and only mark one + // level back as touched when changes appen . If you modify this code to look + // back farther through comparisons, you *must* mark the appropriate + // comparisons as users in PredicateInfo.cpp, or you will cause bugs. See if + // we know something just from the operands themselves + + // See if our operands have predicate info, so that we may be able to derive + // something from a previous comparison. + for (const auto &Op : CI->operands()) { + auto *PI = PredInfo->getPredicateInfoFor(Op); + if (const auto *PBranch = dyn_cast_or_null<PredicateBranch>(PI)) { + if (PI == LastPredInfo) + continue; + LastPredInfo = PI; + + // TODO: Along the false edge, we may know more things too, like icmp of + // same operands is false. + // TODO: We only handle actual comparison conditions below, not and/or. + auto *BranchCond = dyn_cast<CmpInst>(PBranch->Condition); + if (!BranchCond) + continue; + auto *BranchOp0 = lookupOperandLeader(BranchCond->getOperand(0)); + auto *BranchOp1 = lookupOperandLeader(BranchCond->getOperand(1)); + auto BranchPredicate = BranchCond->getPredicate(); + if (shouldSwapOperands(BranchOp0, BranchOp1)) { + std::swap(BranchOp0, BranchOp1); + BranchPredicate = BranchCond->getSwappedPredicate(); + } + if (BranchOp0 == Op0 && BranchOp1 == Op1) { + if (PBranch->TrueEdge) { + // If we know the previous predicate is true and we are in the true + // edge then we may be implied true or false. + if (CmpInst::isImpliedTrueByMatchingCmp(OurPredicate, + BranchPredicate)) { + addPredicateUsers(PI, I); + return createConstantExpression( + ConstantInt::getTrue(CI->getType())); + } + + if (CmpInst::isImpliedFalseByMatchingCmp(OurPredicate, + BranchPredicate)) { + addPredicateUsers(PI, I); + return createConstantExpression( + ConstantInt::getFalse(CI->getType())); + } + + } else { + // Just handle the ne and eq cases, where if we have the same + // operands, we may know something. + if (BranchPredicate == OurPredicate) { + addPredicateUsers(PI, I); + // Same predicate, same ops,we know it was false, so this is false. + return createConstantExpression( + ConstantInt::getFalse(CI->getType())); + } else if (BranchPredicate == + CmpInst::getInversePredicate(OurPredicate)) { + addPredicateUsers(PI, I); + // Inverse predicate, we know the other was false, so this is true. + return createConstantExpression( + ConstantInt::getTrue(CI->getType())); + } + } + } + } + } + // Create expression will take care of simplifyCmpInst + return createExpression(I); } // Substitute and symbolize the value before value numbering. -const Expression *NewGVN::performSymbolicEvaluation(Value *V, - const BasicBlock *B) { +const Expression *NewGVN::performSymbolicEvaluation(Value *V) { const Expression *E = nullptr; if (auto *C = dyn_cast<Constant>(V)) E = createConstantExpression(C); @@ -957,24 +1677,27 @@ const Expression *NewGVN::performSymbolicEvaluation(Value *V, switch (I->getOpcode()) { case Instruction::ExtractValue: case Instruction::InsertValue: - E = performSymbolicAggrValueEvaluation(I, B); + E = performSymbolicAggrValueEvaluation(I); break; case Instruction::PHI: - E = performSymbolicPHIEvaluation(I, B); + E = performSymbolicPHIEvaluation(I); break; case Instruction::Call: - E = performSymbolicCallEvaluation(I, B); + E = performSymbolicCallEvaluation(I); break; case Instruction::Store: - E = performSymbolicStoreEvaluation(I, B); + E = performSymbolicStoreEvaluation(I); break; case Instruction::Load: - E = performSymbolicLoadEvaluation(I, B); + E = performSymbolicLoadEvaluation(I); break; case Instruction::BitCast: { - E = createExpression(I, B); + E = createExpression(I); + } break; + case Instruction::ICmp: + case Instruction::FCmp: { + E = performSymbolicCmpEvaluation(I); } break; - case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: @@ -993,8 +1716,6 @@ const Expression *NewGVN::performSymbolicEvaluation(Value *V, case Instruction::And: case Instruction::Or: case Instruction::Xor: - case Instruction::ICmp: - case Instruction::FCmp: case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: @@ -1011,7 +1732,7 @@ const Expression *NewGVN::performSymbolicEvaluation(Value *V, case Instruction::InsertElement: case Instruction::ShuffleVector: case Instruction::GetElementPtr: - E = createExpression(I, B); + E = createExpression(I); break; default: return nullptr; @@ -1020,129 +1741,297 @@ const Expression *NewGVN::performSymbolicEvaluation(Value *V, return E; } -// There is an edge from 'Src' to 'Dst'. Return true if every path from -// the entry block to 'Dst' passes via this edge. In particular 'Dst' -// must not be reachable via another edge from 'Src'. -bool NewGVN::isOnlyReachableViaThisEdge(const BasicBlockEdge &E) const { - - // While in theory it is interesting to consider the case in which Dst has - // more than one predecessor, because Dst might be part of a loop which is - // only reachable from Src, in practice it is pointless since at the time - // GVN runs all such loops have preheaders, which means that Dst will have - // been changed to have only one predecessor, namely Src. - const BasicBlock *Pred = E.getEnd()->getSinglePredecessor(); - const BasicBlock *Src = E.getStart(); - assert((!Pred || Pred == Src) && "No edge between these basic blocks!"); - (void)Src; - return Pred != nullptr; -} - void NewGVN::markUsersTouched(Value *V) { // Now mark the users as touched. for (auto *User : V->users()) { assert(isa<Instruction>(User) && "Use of value not within an instruction?"); - TouchedInstructions.set(InstrDFS[User]); + TouchedInstructions.set(InstrToDFSNum(User)); } } -void NewGVN::markMemoryUsersTouched(MemoryAccess *MA) { - for (auto U : MA->users()) { - if (auto *MUD = dyn_cast<MemoryUseOrDef>(U)) - TouchedInstructions.set(InstrDFS[MUD->getMemoryInst()]); - else - TouchedInstructions.set(InstrDFS[U]); +void NewGVN::addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) { + DEBUG(dbgs() << "Adding memory user " << *U << " to " << *To << "\n"); + MemoryToUsers[To].insert(U); +} + +void NewGVN::markMemoryDefTouched(const MemoryAccess *MA) { + TouchedInstructions.set(MemoryToDFSNum(MA)); +} + +void NewGVN::markMemoryUsersTouched(const MemoryAccess *MA) { + if (isa<MemoryUse>(MA)) + return; + for (auto U : MA->users()) + TouchedInstructions.set(MemoryToDFSNum(U)); + const auto Result = MemoryToUsers.find(MA); + if (Result != MemoryToUsers.end()) { + for (auto *User : Result->second) + TouchedInstructions.set(MemoryToDFSNum(User)); + MemoryToUsers.erase(Result); + } +} + +// Add I to the set of users of a given predicate. +void NewGVN::addPredicateUsers(const PredicateBase *PB, Instruction *I) { + if (auto *PBranch = dyn_cast<PredicateBranch>(PB)) + PredicateToUsers[PBranch->Condition].insert(I); + else if (auto *PAssume = dyn_cast<PredicateBranch>(PB)) + PredicateToUsers[PAssume->Condition].insert(I); +} + +// Touch all the predicates that depend on this instruction. +void NewGVN::markPredicateUsersTouched(Instruction *I) { + const auto Result = PredicateToUsers.find(I); + if (Result != PredicateToUsers.end()) { + for (auto *User : Result->second) + TouchedInstructions.set(InstrToDFSNum(User)); + PredicateToUsers.erase(Result); } } +// Mark users affected by a memory leader change. +void NewGVN::markMemoryLeaderChangeTouched(CongruenceClass *CC) { + for (auto M : CC->memory()) + markMemoryDefTouched(M); +} + // Touch the instructions that need to be updated after a congruence class has a // leader change, and mark changed values. -void NewGVN::markLeaderChangeTouched(CongruenceClass *CC) { - for (auto M : CC->Members) { +void NewGVN::markValueLeaderChangeTouched(CongruenceClass *CC) { + for (auto M : *CC) { if (auto *I = dyn_cast<Instruction>(M)) - TouchedInstructions.set(InstrDFS[I]); + TouchedInstructions.set(InstrToDFSNum(I)); LeaderChanges.insert(M); } } +// Give a range of things that have instruction DFS numbers, this will return +// the member of the range with the smallest dfs number. +template <class T, class Range> +T *NewGVN::getMinDFSOfRange(const Range &R) const { + std::pair<T *, unsigned> MinDFS = {nullptr, ~0U}; + for (const auto X : R) { + auto DFSNum = InstrToDFSNum(X); + if (DFSNum < MinDFS.second) + MinDFS = {X, DFSNum}; + } + return MinDFS.first; +} + +// This function returns the MemoryAccess that should be the next leader of +// congruence class CC, under the assumption that the current leader is going to +// disappear. +const MemoryAccess *NewGVN::getNextMemoryLeader(CongruenceClass *CC) const { + // TODO: If this ends up to slow, we can maintain a next memory leader like we + // do for regular leaders. + // Make sure there will be a leader to find + assert((CC->getStoreCount() > 0 || !CC->memory_empty()) && + "Can't get next leader if there is none"); + if (CC->getStoreCount() > 0) { + if (auto *NL = dyn_cast_or_null<StoreInst>(CC->getNextLeader().first)) + return MSSA->getMemoryAccess(NL); + // Find the store with the minimum DFS number. + auto *V = getMinDFSOfRange<Value>(make_filter_range( + *CC, [&](const Value *V) { return isa<StoreInst>(V); })); + return MSSA->getMemoryAccess(cast<StoreInst>(V)); + } + assert(CC->getStoreCount() == 0); + + // Given our assertion, hitting this part must mean + // !OldClass->memory_empty() + if (CC->memory_size() == 1) + return *CC->memory_begin(); + return getMinDFSOfRange<const MemoryPhi>(CC->memory()); +} + +// This function returns the next value leader of a congruence class, under the +// assumption that the current leader is going away. This should end up being +// the next most dominating member. +Value *NewGVN::getNextValueLeader(CongruenceClass *CC) const { + // We don't need to sort members if there is only 1, and we don't care about + // sorting the TOP class because everything either gets out of it or is + // unreachable. + + if (CC->size() == 1 || CC == TOPClass) { + return *(CC->begin()); + } else if (CC->getNextLeader().first) { + ++NumGVNAvoidedSortedLeaderChanges; + return CC->getNextLeader().first; + } else { + ++NumGVNSortedLeaderChanges; + // NOTE: If this ends up to slow, we can maintain a dual structure for + // member testing/insertion, or keep things mostly sorted, and sort only + // here, or use SparseBitVector or .... + return getMinDFSOfRange<Value>(*CC); + } +} + +// Move a MemoryAccess, currently in OldClass, to NewClass, including updates to +// the memory members, etc for the move. +// +// The invariants of this function are: +// +// I must be moving to NewClass from OldClass The StoreCount of OldClass and +// NewClass is expected to have been updated for I already if it is is a store. +// The OldClass memory leader has not been updated yet if I was the leader. +void NewGVN::moveMemoryToNewCongruenceClass(Instruction *I, + MemoryAccess *InstMA, + CongruenceClass *OldClass, + CongruenceClass *NewClass) { + // If the leader is I, and we had a represenative MemoryAccess, it should + // be the MemoryAccess of OldClass. + assert((!InstMA || !OldClass->getMemoryLeader() || + OldClass->getLeader() != I || + OldClass->getMemoryLeader() == InstMA) && + "Representative MemoryAccess mismatch"); + // First, see what happens to the new class + if (!NewClass->getMemoryLeader()) { + // Should be a new class, or a store becoming a leader of a new class. + assert(NewClass->size() == 1 || + (isa<StoreInst>(I) && NewClass->getStoreCount() == 1)); + NewClass->setMemoryLeader(InstMA); + // Mark it touched if we didn't just create a singleton + DEBUG(dbgs() << "Memory class leader change for class " << NewClass->getID() + << " due to new memory instruction becoming leader\n"); + markMemoryLeaderChangeTouched(NewClass); + } + setMemoryClass(InstMA, NewClass); + // Now, fixup the old class if necessary + if (OldClass->getMemoryLeader() == InstMA) { + if (OldClass->getStoreCount() != 0 || !OldClass->memory_empty()) { + OldClass->setMemoryLeader(getNextMemoryLeader(OldClass)); + DEBUG(dbgs() << "Memory class leader change for class " + << OldClass->getID() << " to " + << *OldClass->getMemoryLeader() + << " due to removal of old leader " << *InstMA << "\n"); + markMemoryLeaderChangeTouched(OldClass); + } else + OldClass->setMemoryLeader(nullptr); + } +} + // Move a value, currently in OldClass, to be part of NewClass -// Update OldClass for the move (including changing leaders, etc) -void NewGVN::moveValueToNewCongruenceClass(Instruction *I, +// Update OldClass and NewClass for the move (including changing leaders, etc). +void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E, CongruenceClass *OldClass, CongruenceClass *NewClass) { - DEBUG(dbgs() << "New congruence class for " << I << " is " << NewClass->ID - << "\n"); - - if (I == OldClass->NextLeader.first) - OldClass->NextLeader = {nullptr, ~0U}; + if (I == OldClass->getNextLeader().first) + OldClass->resetNextLeader(); // It's possible, though unlikely, for us to discover equivalences such // that the current leader does not dominate the old one. // This statistic tracks how often this happens. // We assert on phi nodes when this happens, currently, for debugging, because // we want to make sure we name phi node cycles properly. - if (isa<Instruction>(NewClass->RepLeader) && NewClass->RepLeader && - I != NewClass->RepLeader && - DT->properlyDominates( - I->getParent(), - cast<Instruction>(NewClass->RepLeader)->getParent())) { - ++NumGVNNotMostDominatingLeader; - assert(!isa<PHINode>(I) && - "New class for instruction should not be dominated by instruction"); - } - - if (NewClass->RepLeader != I) { - auto DFSNum = InstrDFS.lookup(I); - if (DFSNum < NewClass->NextLeader.second) - NewClass->NextLeader = {I, DFSNum}; + if (isa<Instruction>(NewClass->getLeader()) && NewClass->getLeader() && + I != NewClass->getLeader()) { + auto *IBB = I->getParent(); + auto *NCBB = cast<Instruction>(NewClass->getLeader())->getParent(); + bool Dominated = + IBB == NCBB && InstrToDFSNum(I) < InstrToDFSNum(NewClass->getLeader()); + Dominated = Dominated || DT->properlyDominates(IBB, NCBB); + if (Dominated) { + ++NumGVNNotMostDominatingLeader; + assert( + !isa<PHINode>(I) && + "New class for instruction should not be dominated by instruction"); + } } - OldClass->Members.erase(I); - NewClass->Members.insert(I); - if (isa<StoreInst>(I)) { - --OldClass->StoreCount; - assert(OldClass->StoreCount >= 0); - ++NewClass->StoreCount; - assert(NewClass->StoreCount > 0); + if (NewClass->getLeader() != I) + NewClass->addPossibleNextLeader({I, InstrToDFSNum(I)}); + + OldClass->erase(I); + NewClass->insert(I); + // Handle our special casing of stores. + if (auto *SI = dyn_cast<StoreInst>(I)) { + OldClass->decStoreCount(); + // Okay, so when do we want to make a store a leader of a class? + // If we have a store defined by an earlier load, we want the earlier load + // to lead the class. + // If we have a store defined by something else, we want the store to lead + // the class so everything else gets the "something else" as a value. + // If we have a store as the single member of the class, we want the store + // as the leader + if (NewClass->getStoreCount() == 0 && !NewClass->getStoredValue()) { + // If it's a store expression we are using, it means we are not equivalent + // to something earlier. + if (isa<StoreExpression>(E)) { + assert(lookupOperandLeader(SI->getValueOperand()) != + NewClass->getLeader()); + NewClass->setStoredValue(lookupOperandLeader(SI->getValueOperand())); + markValueLeaderChangeTouched(NewClass); + // Shift the new class leader to be the store + DEBUG(dbgs() << "Changing leader of congruence class " + << NewClass->getID() << " from " << *NewClass->getLeader() + << " to " << *SI << " because store joined class\n"); + // If we changed the leader, we have to mark it changed because we don't + // know what it will do to symbolic evlauation. + NewClass->setLeader(SI); + } + // We rely on the code below handling the MemoryAccess change. + } + NewClass->incStoreCount(); } - + // True if there is no memory instructions left in a class that had memory + // instructions before. + + // If it's not a memory use, set the MemoryAccess equivalence + auto *InstMA = dyn_cast_or_null<MemoryDef>(MSSA->getMemoryAccess(I)); + bool InstWasMemoryLeader = InstMA && OldClass->getMemoryLeader() == InstMA; + if (InstMA) + moveMemoryToNewCongruenceClass(I, InstMA, OldClass, NewClass); ValueToClass[I] = NewClass; // See if we destroyed the class or need to swap leaders. - if (OldClass->Members.empty() && OldClass != InitialClass) { - if (OldClass->DefiningExpr) { - OldClass->Dead = true; - DEBUG(dbgs() << "Erasing expression " << OldClass->DefiningExpr + if (OldClass->empty() && OldClass != TOPClass) { + if (OldClass->getDefiningExpr()) { + DEBUG(dbgs() << "Erasing expression " << OldClass->getDefiningExpr() << " from table\n"); - ExpressionToClass.erase(OldClass->DefiningExpr); + ExpressionToClass.erase(OldClass->getDefiningExpr()); } - } else if (OldClass->RepLeader == I) { + } else if (OldClass->getLeader() == I) { // When the leader changes, the value numbering of // everything may change due to symbolization changes, so we need to // reprocess. - DEBUG(dbgs() << "Leader change!\n"); + DEBUG(dbgs() << "Value class leader change for class " << OldClass->getID() + << "\n"); ++NumGVNLeaderChanges; - // We don't need to sort members if there is only 1, and we don't care about - // sorting the initial class because everything either gets out of it or is - // unreachable. - if (OldClass->Members.size() == 1 || OldClass == InitialClass) { - OldClass->RepLeader = *(OldClass->Members.begin()); - } else if (OldClass->NextLeader.first) { - ++NumGVNAvoidedSortedLeaderChanges; - OldClass->RepLeader = OldClass->NextLeader.first; - OldClass->NextLeader = {nullptr, ~0U}; - } else { - ++NumGVNSortedLeaderChanges; - // TODO: If this ends up to slow, we can maintain a dual structure for - // member testing/insertion, or keep things mostly sorted, and sort only - // here, or .... - std::pair<Value *, unsigned> MinDFS = {nullptr, ~0U}; - for (const auto X : OldClass->Members) { - auto DFSNum = InstrDFS.lookup(X); - if (DFSNum < MinDFS.second) - MinDFS = {X, DFSNum}; - } - OldClass->RepLeader = MinDFS.first; + // Destroy the stored value if there are no more stores to represent it. + // Note that this is basically clean up for the expression removal that + // happens below. If we remove stores from a class, we may leave it as a + // class of equivalent memory phis. + if (OldClass->getStoreCount() == 0) { + if (OldClass->getStoredValue()) + OldClass->setStoredValue(nullptr); + } + // If we destroy the old access leader and it's a store, we have to + // effectively destroy the congruence class. When it comes to scalars, + // anything with the same value is as good as any other. That means that + // one leader is as good as another, and as long as you have some leader for + // the value, you are good.. When it comes to *memory states*, only one + // particular thing really represents the definition of a given memory + // state. Once it goes away, we need to re-evaluate which pieces of memory + // are really still equivalent. The best way to do this is to re-value + // number things. The only way to really make that happen is to destroy the + // rest of the class. In order to effectively destroy the class, we reset + // ExpressionToClass for each by using the ValueToExpression mapping. The + // members later get marked as touched due to the leader change. We will + // create new congruence classes, and the pieces that are still equivalent + // will end back together in a new class. If this becomes too expensive, it + // is possible to use a versioning scheme for the congruence classes to + // avoid the expressions finding this old class. Note that the situation is + // different for memory phis, becuase they are evaluated anew each time, and + // they become equal not by hashing, but by seeing if all operands are the + // same (or only one is reachable). + if (OldClass->getStoreCount() > 0 && InstWasMemoryLeader) { + DEBUG(dbgs() << "Kicking everything out of class " << OldClass->getID() + << " because MemoryAccess leader changed"); + for (auto Member : *OldClass) + ExpressionToClass.erase(ValueToExpression.lookup(Member)); } - markLeaderChangeTouched(OldClass); + OldClass->setLeader(getNextValueLeader(OldClass)); + OldClass->resetNextLeader(); + markValueLeaderChangeTouched(OldClass); } } @@ -1150,12 +2039,12 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) { ValueToExpression[I] = E; // This is guaranteed to return something, since it will at least find - // INITIAL. + // TOP. CongruenceClass *IClass = ValueToClass[I]; assert(IClass && "Should have found a IClass"); // Dead classes should have been eliminated from the mapping. - assert(!IClass->Dead && "Found a dead class"); + assert(!IClass->isDead() && "Found a dead class"); CongruenceClass *EClass; if (const auto *VE = dyn_cast<VariableExpression>(E)) { @@ -1171,79 +2060,52 @@ void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) { // Constants and variables should always be made the leader. if (const auto *CE = dyn_cast<ConstantExpression>(E)) { - NewClass->RepLeader = CE->getConstantValue(); + NewClass->setLeader(CE->getConstantValue()); } else if (const auto *SE = dyn_cast<StoreExpression>(E)) { StoreInst *SI = SE->getStoreInst(); - NewClass->RepLeader = - lookupOperandLeader(SI->getValueOperand(), SI, SI->getParent()); + NewClass->setLeader(SI); + NewClass->setStoredValue(lookupOperandLeader(SI->getValueOperand())); + // The RepMemoryAccess field will be filled in properly by the + // moveValueToNewCongruenceClass call. } else { - NewClass->RepLeader = I; + NewClass->setLeader(I); } assert(!isa<VariableExpression>(E) && "VariableExpression should have been handled already"); EClass = NewClass; DEBUG(dbgs() << "Created new congruence class for " << *I - << " using expression " << *E << " at " << NewClass->ID - << " and leader " << *(NewClass->RepLeader) << "\n"); - DEBUG(dbgs() << "Hash value was " << E->getHashValue() << "\n"); + << " using expression " << *E << " at " << NewClass->getID() + << " and leader " << *(NewClass->getLeader())); + if (NewClass->getStoredValue()) + DEBUG(dbgs() << " and stored value " << *(NewClass->getStoredValue())); + DEBUG(dbgs() << "\n"); } else { EClass = lookupResult.first->second; if (isa<ConstantExpression>(E)) - assert(isa<Constant>(EClass->RepLeader) && + assert((isa<Constant>(EClass->getLeader()) || + (EClass->getStoredValue() && + isa<Constant>(EClass->getStoredValue()))) && "Any class with a constant expression should have a " "constant leader"); assert(EClass && "Somehow don't have an eclass"); - assert(!EClass->Dead && "We accidentally looked up a dead class"); + assert(!EClass->isDead() && "We accidentally looked up a dead class"); } } bool ClassChanged = IClass != EClass; bool LeaderChanged = LeaderChanges.erase(I); if (ClassChanged || LeaderChanged) { - DEBUG(dbgs() << "Found class " << EClass->ID << " for expression " << E + DEBUG(dbgs() << "New class " << EClass->getID() << " for expression " << *E << "\n"); - if (ClassChanged) - moveValueToNewCongruenceClass(I, IClass, EClass); + moveValueToNewCongruenceClass(I, E, IClass, EClass); markUsersTouched(I); - if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) { - // If this is a MemoryDef, we need to update the equivalence table. If - // we determined the expression is congruent to a different memory - // state, use that different memory state. If we determined it didn't, - // we update that as well. Right now, we only support store - // expressions. - if (!isa<MemoryUse>(MA) && isa<StoreExpression>(E) && - EClass->Members.size() != 1) { - auto *DefAccess = cast<StoreExpression>(E)->getDefiningAccess(); - setMemoryAccessEquivTo(MA, DefAccess != MA ? DefAccess : nullptr); - } else { - setMemoryAccessEquivTo(MA, nullptr); - } + if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) markMemoryUsersTouched(MA); - } - } else if (auto *SI = dyn_cast<StoreInst>(I)) { - // There is, sadly, one complicating thing for stores. Stores do not - // produce values, only consume them. However, in order to make loads and - // stores value number the same, we ignore the value operand of the store. - // But the value operand will still be the leader of our class, and thus, it - // may change. Because the store is a use, the store will get reprocessed, - // but nothing will change about it, and so nothing above will catch it - // (since the class will not change). In order to make sure everything ends - // up okay, we need to recheck the leader of the class. Since stores of - // different values value number differently due to different memorydefs, we - // are guaranteed the leader is always the same between stores in the same - // class. - DEBUG(dbgs() << "Checking store leader\n"); - auto ProperLeader = - lookupOperandLeader(SI->getValueOperand(), SI, SI->getParent()); - if (EClass->RepLeader != ProperLeader) { - DEBUG(dbgs() << "Store leader changed, fixing\n"); - EClass->RepLeader = ProperLeader; - markLeaderChangeTouched(EClass); - markMemoryUsersTouched(MSSA->getMemoryAccess(SI)); - } + if (auto *CI = dyn_cast<CmpInst>(I)) + markPredicateUsersTouched(CI); } } @@ -1267,11 +2129,11 @@ void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) { // they are the only thing that depend on new edges. Anything using their // values will get propagated to if necessary. if (MemoryAccess *MemPhi = MSSA->getMemoryAccess(To)) - TouchedInstructions.set(InstrDFS[MemPhi]); + TouchedInstructions.set(InstrToDFSNum(MemPhi)); auto BI = To->begin(); while (isa<PHINode>(BI)) { - TouchedInstructions.set(InstrDFS[&*BI]); + TouchedInstructions.set(InstrToDFSNum(&*BI)); ++BI; } } @@ -1280,8 +2142,8 @@ void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) { // Given a predicate condition (from a switch, cmp, or whatever) and a block, // see if we know some constant value for it already. -Value *NewGVN::findConditionEquivalence(Value *Cond, BasicBlock *B) const { - auto Result = lookupOperandLeader(Cond, nullptr, B); +Value *NewGVN::findConditionEquivalence(Value *Cond) const { + auto Result = lookupOperandLeader(Cond); if (isa<Constant>(Result)) return Result; return nullptr; @@ -1293,10 +2155,10 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) { BranchInst *BR; if ((BR = dyn_cast<BranchInst>(TI)) && BR->isConditional()) { Value *Cond = BR->getCondition(); - Value *CondEvaluated = findConditionEquivalence(Cond, B); + Value *CondEvaluated = findConditionEquivalence(Cond); if (!CondEvaluated) { if (auto *I = dyn_cast<Instruction>(Cond)) { - const Expression *E = createExpression(I, B); + const Expression *E = createExpression(I); if (const auto *CE = dyn_cast<ConstantExpression>(E)) { CondEvaluated = CE->getConstantValue(); } @@ -1329,13 +2191,13 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) { SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges; Value *SwitchCond = SI->getCondition(); - Value *CondEvaluated = findConditionEquivalence(SwitchCond, B); + Value *CondEvaluated = findConditionEquivalence(SwitchCond); // See if we were able to turn this switch statement into a constant. if (CondEvaluated && isa<ConstantInt>(CondEvaluated)) { auto *CondVal = cast<ConstantInt>(CondEvaluated); // We should be able to get case value for this. - auto CaseVal = SI->findCaseValue(CondVal); - if (CaseVal.getCaseSuccessor() == SI->getDefaultDest()) { + auto Case = *SI->findCaseValue(CondVal); + if (Case.getCaseSuccessor() == SI->getDefaultDest()) { // We proved the value is outside of the range of the case. // We can't do anything other than mark the default dest as reachable, // and go home. @@ -1343,7 +2205,7 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) { return; } // Now get where it goes and mark it reachable. - BasicBlock *TargetBlock = CaseVal.getCaseSuccessor(); + BasicBlock *TargetBlock = Case.getCaseSuccessor(); updateReachableEdge(B, TargetBlock); } else { for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) { @@ -1361,45 +2223,66 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) { } // This also may be a memory defining terminator, in which case, set it - // equivalent to nothing. - if (MemoryAccess *MA = MSSA->getMemoryAccess(TI)) - setMemoryAccessEquivTo(MA, nullptr); + // equivalent only to itself. + // + auto *MA = MSSA->getMemoryAccess(TI); + if (MA && !isa<MemoryUse>(MA)) { + auto *CC = ensureLeaderOfMemoryClass(MA); + if (setMemoryClass(MA, CC)) + markMemoryUsersTouched(MA); + } } } -// The algorithm initially places the values of the routine in the INITIAL -// congruence -// class. The leader of INITIAL is the undetermined value `TOP`. -// When the algorithm has finished, values still in INITIAL are unreachable. +// The algorithm initially places the values of the routine in the TOP +// congruence class. The leader of TOP is the undetermined value `undef`. +// When the algorithm has finished, values still in TOP are unreachable. void NewGVN::initializeCongruenceClasses(Function &F) { - // FIXME now i can't remember why this is 2 - NextCongruenceNum = 2; - // Initialize all other instructions to be in INITIAL class. - CongruenceClass::MemberSet InitialValues; - InitialClass = createCongruenceClass(nullptr, nullptr); + NextCongruenceNum = 0; + + // Note that even though we use the live on entry def as a representative + // MemoryAccess, it is *not* the same as the actual live on entry def. We + // have no real equivalemnt to undef for MemoryAccesses, and so we really + // should be checking whether the MemoryAccess is top if we want to know if it + // is equivalent to everything. Otherwise, what this really signifies is that + // the access "it reaches all the way back to the beginning of the function" + + // Initialize all other instructions to be in TOP class. + TOPClass = createCongruenceClass(nullptr, nullptr); + TOPClass->setMemoryLeader(MSSA->getLiveOnEntryDef()); + // The live on entry def gets put into it's own class + MemoryAccessToClass[MSSA->getLiveOnEntryDef()] = + createMemoryClass(MSSA->getLiveOnEntryDef()); + for (auto &B : F) { - if (auto *MP = MSSA->getMemoryAccess(&B)) - MemoryAccessEquiv.insert({MP, MSSA->getLiveOnEntryDef()}); + // All MemoryAccesses are equivalent to live on entry to start. They must + // be initialized to something so that initial changes are noticed. For + // the maximal answer, we initialize them all to be the same as + // liveOnEntry. + auto *MemoryBlockDefs = MSSA->getBlockDefs(&B); + if (MemoryBlockDefs) + for (const auto &Def : *MemoryBlockDefs) { + MemoryAccessToClass[&Def] = TOPClass; + auto *MD = dyn_cast<MemoryDef>(&Def); + // Insert the memory phis into the member list. + if (!MD) { + const MemoryPhi *MP = cast<MemoryPhi>(&Def); + TOPClass->memory_insert(MP); + MemoryPhiState.insert({MP, MPS_TOP}); + } - for (auto &I : B) { - InitialValues.insert(&I); - ValueToClass[&I] = InitialClass; - // All memory accesses are equivalent to live on entry to start. They must - // be initialized to something so that initial changes are noticed. For - // the maximal answer, we initialize them all to be the same as - // liveOnEntry. Note that to save time, we only initialize the - // MemoryDef's for stores and all MemoryPhis to be equal. Right now, no - // other expression can generate a memory equivalence. If we start - // handling memcpy/etc, we can expand this. - if (isa<StoreInst>(&I)) { - MemoryAccessEquiv.insert( - {MSSA->getMemoryAccess(&I), MSSA->getLiveOnEntryDef()}); - ++InitialClass->StoreCount; - assert(InitialClass->StoreCount > 0); + if (MD && isa<StoreInst>(MD->getMemoryInst())) + TOPClass->incStoreCount(); } + for (auto &I : B) { + // Don't insert void terminators into the class. We don't value number + // them, and they just end up sitting in TOP. + if (isa<TerminatorInst>(I) && I.getType()->isVoidTy()) + continue; + TOPClass->insert(&I); + ValueToClass[&I] = TOPClass; } } - InitialClass->Members.swap(InitialValues); // Initialize arguments to be in their own unique congruence classes for (auto &FA : F.args()) @@ -1408,8 +2291,8 @@ void NewGVN::initializeCongruenceClasses(Function &F) { void NewGVN::cleanupTables() { for (unsigned i = 0, e = CongruenceClasses.size(); i != e; ++i) { - DEBUG(dbgs() << "Congruence class " << CongruenceClasses[i]->ID << " has " - << CongruenceClasses[i]->Members.size() << " members\n"); + DEBUG(dbgs() << "Congruence class " << CongruenceClasses[i]->getID() + << " has " << CongruenceClasses[i]->size() << " members\n"); // Make sure we delete the congruence class (probably worth switching to // a unique_ptr at some point. delete CongruenceClasses[i]; @@ -1427,15 +2310,14 @@ void NewGVN::cleanupTables() { #ifndef NDEBUG ProcessedCount.clear(); #endif - DFSDomMap.clear(); InstrDFS.clear(); InstructionsToErase.clear(); - DFSToInstr.clear(); BlockInstRange.clear(); TouchedInstructions.clear(); - DominatedInstRange.clear(); - MemoryAccessEquiv.clear(); + MemoryAccessToClass.clear(); + PredicateToUsers.clear(); + MemoryToUsers.clear(); } std::pair<unsigned, unsigned> NewGVN::assignDFSNumbers(BasicBlock *B, @@ -1447,6 +2329,16 @@ std::pair<unsigned, unsigned> NewGVN::assignDFSNumbers(BasicBlock *B, } for (auto &I : *B) { + // There's no need to call isInstructionTriviallyDead more than once on + // an instruction. Therefore, once we know that an instruction is dead + // we change its DFS number so that it doesn't get value numbered. + if (isInstructionTriviallyDead(&I, TLI)) { + InstrDFS[&I] = 0; + DEBUG(dbgs() << "Skipping trivially dead instruction " << I << "\n"); + markInstructionForDeletion(&I); + continue; + } + InstrDFS[&I] = End++; DFSToInstr.emplace_back(&I); } @@ -1462,7 +2354,7 @@ void NewGVN::updateProcessedCount(Value *V) { if (ProcessedCount.count(V) == 0) { ProcessedCount.insert({V, 1}); } else { - ProcessedCount[V] += 1; + ++ProcessedCount[V]; assert(ProcessedCount[V] < 100 && "Seem to have processed the same Value a lot"); } @@ -1472,26 +2364,33 @@ void NewGVN::updateProcessedCount(Value *V) { void NewGVN::valueNumberMemoryPhi(MemoryPhi *MP) { // If all the arguments are the same, the MemoryPhi has the same value as the // argument. - // Filter out unreachable blocks from our operands. + // Filter out unreachable blocks and self phis from our operands. + const BasicBlock *PHIBlock = MP->getBlock(); auto Filtered = make_filter_range(MP->operands(), [&](const Use &U) { - return ReachableBlocks.count(MP->getIncomingBlock(U)); + return lookupMemoryLeader(cast<MemoryAccess>(U)) != MP && + !isMemoryAccessTop(cast<MemoryAccess>(U)) && + ReachableEdges.count({MP->getIncomingBlock(U), PHIBlock}); }); - - assert(Filtered.begin() != Filtered.end() && - "We should not be processing a MemoryPhi in a completely " - "unreachable block"); + // If all that is left is nothing, our memoryphi is undef. We keep it as + // InitialClass. Note: The only case this should happen is if we have at + // least one self-argument. + if (Filtered.begin() == Filtered.end()) { + if (setMemoryClass(MP, TOPClass)) + markMemoryUsersTouched(MP); + return; + } // Transform the remaining operands into operand leaders. // FIXME: mapped_iterator should have a range version. auto LookupFunc = [&](const Use &U) { - return lookupMemoryAccessEquiv(cast<MemoryAccess>(U)); + return lookupMemoryLeader(cast<MemoryAccess>(U)); }; auto MappedBegin = map_iterator(Filtered.begin(), LookupFunc); auto MappedEnd = map_iterator(Filtered.end(), LookupFunc); // and now check if all the elements are equal. // Sadly, we can't use std::equals since these are random access iterators. - MemoryAccess *AllSameValue = *MappedBegin; + const auto *AllSameValue = *MappedBegin; ++MappedBegin; bool AllEqual = std::all_of( MappedBegin, MappedEnd, @@ -1501,8 +2400,18 @@ void NewGVN::valueNumberMemoryPhi(MemoryPhi *MP) { DEBUG(dbgs() << "Memory Phi value numbered to " << *AllSameValue << "\n"); else DEBUG(dbgs() << "Memory Phi value numbered to itself\n"); - - if (setMemoryAccessEquivTo(MP, AllEqual ? AllSameValue : nullptr)) + // If it's equal to something, it's in that class. Otherwise, it has to be in + // a class where it is the leader (other things may be equivalent to it, but + // it needs to start off in its own class, which means it must have been the + // leader, and it can't have stopped being the leader because it was never + // removed). + CongruenceClass *CC = + AllEqual ? getMemoryClass(AllSameValue) : ensureLeaderOfMemoryClass(MP); + auto OldState = MemoryPhiState.lookup(MP); + assert(OldState != MPS_Invalid && "Invalid memory phi state"); + auto NewState = AllEqual ? MPS_Equivalent : MPS_Unique; + MemoryPhiState[MP] = NewState; + if (setMemoryClass(MP, CC) || OldState != NewState) markMemoryUsersTouched(MP); } @@ -1510,21 +2419,25 @@ void NewGVN::valueNumberMemoryPhi(MemoryPhi *MP) { // congruence finding, and updating mappings. void NewGVN::valueNumberInstruction(Instruction *I) { DEBUG(dbgs() << "Processing instruction " << *I << "\n"); - if (isInstructionTriviallyDead(I, TLI)) { - DEBUG(dbgs() << "Skipping unused instruction\n"); - markInstructionForDeletion(I); - return; - } if (!I->isTerminator()) { - const auto *Symbolized = performSymbolicEvaluation(I, I->getParent()); + const Expression *Symbolized = nullptr; + if (DebugCounter::shouldExecute(VNCounter)) { + Symbolized = performSymbolicEvaluation(I); + } else { + // Mark the instruction as unused so we don't value number it again. + InstrDFS[I] = 0; + } // If we couldn't come up with a symbolic expression, use the unknown // expression - if (Symbolized == nullptr) + if (Symbolized == nullptr) { Symbolized = createUnknownExpression(I); + } + performCongruenceFinding(I, Symbolized); } else { // Handle terminators that return values. All of them produce values we - // don't currently understand. + // don't currently understand. We don't place non-value producing + // terminators in a class. if (!I->getType()->isVoidTy()) { auto *Symbolized = createUnknownExpression(I); performCongruenceFinding(I, Symbolized); @@ -1539,72 +2452,102 @@ bool NewGVN::singleReachablePHIPath(const MemoryAccess *First, const MemoryAccess *Second) const { if (First == Second) return true; - - if (auto *FirstDef = dyn_cast<MemoryUseOrDef>(First)) { - auto *DefAccess = FirstDef->getDefiningAccess(); - return singleReachablePHIPath(DefAccess, Second); - } else { - auto *MP = cast<MemoryPhi>(First); - auto ReachableOperandPred = [&](const Use &U) { - return ReachableBlocks.count(MP->getIncomingBlock(U)); - }; - auto FilteredPhiArgs = - make_filter_range(MP->operands(), ReachableOperandPred); - SmallVector<const Value *, 32> OperandList; - std::copy(FilteredPhiArgs.begin(), FilteredPhiArgs.end(), - std::back_inserter(OperandList)); - bool Okay = OperandList.size() == 1; - if (!Okay) - Okay = std::equal(OperandList.begin(), OperandList.end(), - OperandList.begin()); - if (Okay) - return singleReachablePHIPath(cast<MemoryAccess>(OperandList[0]), Second); + if (MSSA->isLiveOnEntryDef(First)) return false; + + const auto *EndDef = First; + for (auto *ChainDef : optimized_def_chain(First)) { + if (ChainDef == Second) + return true; + if (MSSA->isLiveOnEntryDef(ChainDef)) + return false; + EndDef = ChainDef; } + auto *MP = cast<MemoryPhi>(EndDef); + auto ReachableOperandPred = [&](const Use &U) { + return ReachableEdges.count({MP->getIncomingBlock(U), MP->getBlock()}); + }; + auto FilteredPhiArgs = + make_filter_range(MP->operands(), ReachableOperandPred); + SmallVector<const Value *, 32> OperandList; + std::copy(FilteredPhiArgs.begin(), FilteredPhiArgs.end(), + std::back_inserter(OperandList)); + bool Okay = OperandList.size() == 1; + if (!Okay) + Okay = + std::equal(OperandList.begin(), OperandList.end(), OperandList.begin()); + if (Okay) + return singleReachablePHIPath(cast<MemoryAccess>(OperandList[0]), Second); + return false; } // Verify the that the memory equivalence table makes sense relative to the // congruence classes. Note that this checking is not perfect, and is currently -// subject to very rare false negatives. It is only useful for testing/debugging. +// subject to very rare false negatives. It is only useful for +// testing/debugging. void NewGVN::verifyMemoryCongruency() const { - // Anything equivalent in the memory access table should be in the same +#ifndef NDEBUG + // Verify that the memory table equivalence and memory member set match + for (const auto *CC : CongruenceClasses) { + if (CC == TOPClass || CC->isDead()) + continue; + if (CC->getStoreCount() != 0) { + assert((CC->getStoredValue() || !isa<StoreInst>(CC->getLeader())) && + "Any class with a store as a " + "leader should have a " + "representative stored value\n"); + assert(CC->getMemoryLeader() && + "Any congruence class with a store should " + "have a representative access\n"); + } + + if (CC->getMemoryLeader()) + assert(MemoryAccessToClass.lookup(CC->getMemoryLeader()) == CC && + "Representative MemoryAccess does not appear to be reverse " + "mapped properly"); + for (auto M : CC->memory()) + assert(MemoryAccessToClass.lookup(M) == CC && + "Memory member does not appear to be reverse mapped properly"); + } + + // Anything equivalent in the MemoryAccess table should be in the same // congruence class. // Filter out the unreachable and trivially dead entries, because they may // never have been updated if the instructions were not processed. auto ReachableAccessPred = - [&](const std::pair<const MemoryAccess *, MemoryAccess *> Pair) { + [&](const std::pair<const MemoryAccess *, CongruenceClass *> Pair) { bool Result = ReachableBlocks.count(Pair.first->getBlock()); if (!Result) return false; + if (MSSA->isLiveOnEntryDef(Pair.first)) + return true; if (auto *MemDef = dyn_cast<MemoryDef>(Pair.first)) return !isInstructionTriviallyDead(MemDef->getMemoryInst()); + if (MemoryToDFSNum(Pair.first) == 0) + return false; return true; }; - auto Filtered = make_filter_range(MemoryAccessEquiv, ReachableAccessPred); + auto Filtered = make_filter_range(MemoryAccessToClass, ReachableAccessPred); for (auto KV : Filtered) { - assert(KV.first != KV.second && - "We added a useless equivalence to the memory equivalence table"); - // Unreachable instructions may not have changed because we never process - // them. - if (!ReachableBlocks.count(KV.first->getBlock())) - continue; + assert(KV.second != TOPClass && + "Memory not unreachable but ended up in TOP"); if (auto *FirstMUD = dyn_cast<MemoryUseOrDef>(KV.first)) { - auto *SecondMUD = dyn_cast<MemoryUseOrDef>(KV.second); + auto *SecondMUD = dyn_cast<MemoryUseOrDef>(KV.second->getMemoryLeader()); if (FirstMUD && SecondMUD) assert((singleReachablePHIPath(FirstMUD, SecondMUD) || - ValueToClass.lookup(FirstMUD->getMemoryInst()) == - ValueToClass.lookup(SecondMUD->getMemoryInst())) && - "The instructions for these memory operations should have " - "been in the same congruence class or reachable through" - "a single argument phi"); + ValueToClass.lookup(FirstMUD->getMemoryInst()) == + ValueToClass.lookup(SecondMUD->getMemoryInst())) && + "The instructions for these memory operations should have " + "been in the same congruence class or reachable through" + "a single argument phi"); } else if (auto *FirstMP = dyn_cast<MemoryPhi>(KV.first)) { - // We can only sanely verify that MemoryDefs in the operand list all have // the same class. auto ReachableOperandPred = [&](const Use &U) { - return ReachableBlocks.count(FirstMP->getIncomingBlock(U)) && + return ReachableEdges.count( + {FirstMP->getIncomingBlock(U), FirstMP->getBlock()}) && isa<MemoryDef>(U); }; @@ -1622,19 +2565,127 @@ void NewGVN::verifyMemoryCongruency() const { "All MemoryPhi arguments should be in the same class"); } } +#endif +} + +// Verify that the sparse propagation we did actually found the maximal fixpoint +// We do this by storing the value to class mapping, touching all instructions, +// and redoing the iteration to see if anything changed. +void NewGVN::verifyIterationSettled(Function &F) { +#ifndef NDEBUG + DEBUG(dbgs() << "Beginning iteration verification\n"); + if (DebugCounter::isCounterSet(VNCounter)) + DebugCounter::setCounterValue(VNCounter, StartingVNCounter); + + // Note that we have to store the actual classes, as we may change existing + // classes during iteration. This is because our memory iteration propagation + // is not perfect, and so may waste a little work. But it should generate + // exactly the same congruence classes we have now, with different IDs. + std::map<const Value *, CongruenceClass> BeforeIteration; + + for (auto &KV : ValueToClass) { + if (auto *I = dyn_cast<Instruction>(KV.first)) + // Skip unused/dead instructions. + if (InstrToDFSNum(I) == 0) + continue; + BeforeIteration.insert({KV.first, *KV.second}); + } + + TouchedInstructions.set(); + TouchedInstructions.reset(0); + iterateTouchedInstructions(); + DenseSet<std::pair<const CongruenceClass *, const CongruenceClass *>> + EqualClasses; + for (const auto &KV : ValueToClass) { + if (auto *I = dyn_cast<Instruction>(KV.first)) + // Skip unused/dead instructions. + if (InstrToDFSNum(I) == 0) + continue; + // We could sink these uses, but i think this adds a bit of clarity here as + // to what we are comparing. + auto *BeforeCC = &BeforeIteration.find(KV.first)->second; + auto *AfterCC = KV.second; + // Note that the classes can't change at this point, so we memoize the set + // that are equal. + if (!EqualClasses.count({BeforeCC, AfterCC})) { + assert(BeforeCC->isEquivalentTo(AfterCC) && + "Value number changed after main loop completed!"); + EqualClasses.insert({BeforeCC, AfterCC}); + } + } +#endif +} + +// This is the main value numbering loop, it iterates over the initial touched +// instruction set, propagating value numbers, marking things touched, etc, +// until the set of touched instructions is completely empty. +void NewGVN::iterateTouchedInstructions() { + unsigned int Iterations = 0; + // Figure out where touchedinstructions starts + int FirstInstr = TouchedInstructions.find_first(); + // Nothing set, nothing to iterate, just return. + if (FirstInstr == -1) + return; + BasicBlock *LastBlock = getBlockForValue(InstrFromDFSNum(FirstInstr)); + while (TouchedInstructions.any()) { + ++Iterations; + // Walk through all the instructions in all the blocks in RPO. + // TODO: As we hit a new block, we should push and pop equalities into a + // table lookupOperandLeader can use, to catch things PredicateInfo + // might miss, like edge-only equivalences. + for (int InstrNum = TouchedInstructions.find_first(); InstrNum != -1; + InstrNum = TouchedInstructions.find_next(InstrNum)) { + + // This instruction was found to be dead. We don't bother looking + // at it again. + if (InstrNum == 0) { + TouchedInstructions.reset(InstrNum); + continue; + } + + Value *V = InstrFromDFSNum(InstrNum); + BasicBlock *CurrBlock = getBlockForValue(V); + + // If we hit a new block, do reachability processing. + if (CurrBlock != LastBlock) { + LastBlock = CurrBlock; + bool BlockReachable = ReachableBlocks.count(CurrBlock); + const auto &CurrInstRange = BlockInstRange.lookup(CurrBlock); + + // If it's not reachable, erase any touched instructions and move on. + if (!BlockReachable) { + TouchedInstructions.reset(CurrInstRange.first, CurrInstRange.second); + DEBUG(dbgs() << "Skipping instructions in block " + << getBlockName(CurrBlock) + << " because it is unreachable\n"); + continue; + } + updateProcessedCount(CurrBlock); + } + + if (auto *MP = dyn_cast<MemoryPhi>(V)) { + DEBUG(dbgs() << "Processing MemoryPhi " << *MP << "\n"); + valueNumberMemoryPhi(MP); + } else if (auto *I = dyn_cast<Instruction>(V)) { + valueNumberInstruction(I); + } else { + llvm_unreachable("Should have been a MemoryPhi or Instruction"); + } + updateProcessedCount(V); + // Reset after processing (because we may mark ourselves as touched when + // we propagate equalities). + TouchedInstructions.reset(InstrNum); + } + } + NumGVNMaxIterations = std::max(NumGVNMaxIterations.getValue(), Iterations); } // This is the main transformation entry point. -bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC, - TargetLibraryInfo *_TLI, AliasAnalysis *_AA, - MemorySSA *_MSSA) { +bool NewGVN::runGVN() { + if (DebugCounter::isCounterSet(VNCounter)) + StartingVNCounter = DebugCounter::getCounterValue(VNCounter); bool Changed = false; - DT = _DT; - AC = _AC; - TLI = _TLI; - AA = _AA; - MSSA = _MSSA; - DL = &F.getParent()->getDataLayout(); + NumFuncArgs = F.arg_size(); MSSAWalker = MSSA->getWalker(); // Count number of instructions for sizing of hash tables, and come @@ -1642,15 +2693,14 @@ bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC, unsigned ICount = 1; // Add an empty instruction to account for the fact that we start at 1 DFSToInstr.emplace_back(nullptr); - // Note: We want RPO traversal of the blocks, which is not quite the same as - // dominator tree order, particularly with regard whether backedges get - // visited first or second, given a block with multiple successors. + // Note: We want ideal RPO traversal of the blocks, which is not quite the + // same as dominator tree order, particularly with regard whether backedges + // get visited first or second, given a block with multiple successors. // If we visit in the wrong order, we will end up performing N times as many // iterations. // The dominator tree does guarantee that, for a given dom tree node, it's // parent must occur before it in the RPO ordering. Thus, we only need to sort // the siblings. - DenseMap<const DomTreeNode *, unsigned> RPOOrdering; ReversePostOrderTraversal<Function *> RPOT(&F); unsigned Counter = 0; for (auto &B : RPOT) { @@ -1663,7 +2713,7 @@ bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC, auto *Node = DT->getNode(B); if (Node->getChildren().size() > 1) std::sort(Node->begin(), Node->end(), - [&RPOOrdering](const DomTreeNode *A, const DomTreeNode *B) { + [&](const DomTreeNode *A, const DomTreeNode *B) { return RPOOrdering[A] < RPOOrdering[B]; }); } @@ -1689,7 +2739,6 @@ bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC, } TouchedInstructions.resize(ICount); - DominatedInstRange.reserve(F.size()); // Ensure we don't end up resizing the expressionToClass map, as // that can be quite expensive. At most, we have one expression per // instruction. @@ -1701,62 +2750,10 @@ bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC, ReachableBlocks.insert(&F.getEntryBlock()); initializeCongruenceClasses(F); - - unsigned int Iterations = 0; - // We start out in the entry block. - BasicBlock *LastBlock = &F.getEntryBlock(); - while (TouchedInstructions.any()) { - ++Iterations; - // Walk through all the instructions in all the blocks in RPO. - for (int InstrNum = TouchedInstructions.find_first(); InstrNum != -1; - InstrNum = TouchedInstructions.find_next(InstrNum)) { - assert(InstrNum != 0 && "Bit 0 should never be set, something touched an " - "instruction not in the lookup table"); - Value *V = DFSToInstr[InstrNum]; - BasicBlock *CurrBlock = nullptr; - - if (auto *I = dyn_cast<Instruction>(V)) - CurrBlock = I->getParent(); - else if (auto *MP = dyn_cast<MemoryPhi>(V)) - CurrBlock = MP->getBlock(); - else - llvm_unreachable("DFSToInstr gave us an unknown type of instruction"); - - // If we hit a new block, do reachability processing. - if (CurrBlock != LastBlock) { - LastBlock = CurrBlock; - bool BlockReachable = ReachableBlocks.count(CurrBlock); - const auto &CurrInstRange = BlockInstRange.lookup(CurrBlock); - - // If it's not reachable, erase any touched instructions and move on. - if (!BlockReachable) { - TouchedInstructions.reset(CurrInstRange.first, CurrInstRange.second); - DEBUG(dbgs() << "Skipping instructions in block " - << getBlockName(CurrBlock) - << " because it is unreachable\n"); - continue; - } - updateProcessedCount(CurrBlock); - } - - if (auto *MP = dyn_cast<MemoryPhi>(V)) { - DEBUG(dbgs() << "Processing MemoryPhi " << *MP << "\n"); - valueNumberMemoryPhi(MP); - } else if (auto *I = dyn_cast<Instruction>(V)) { - valueNumberInstruction(I); - } else { - llvm_unreachable("Should have been a MemoryPhi or Instruction"); - } - updateProcessedCount(V); - // Reset after processing (because we may mark ourselves as touched when - // we propagate equalities). - TouchedInstructions.reset(InstrNum); - } - } - NumGVNMaxIterations = std::max(NumGVNMaxIterations.getValue(), Iterations); -#ifndef NDEBUG + iterateTouchedInstructions(); verifyMemoryCongruency(); -#endif + verifyIterationSettled(F); + Changed |= eliminateInstructions(F); // Delete all instructions marked for deletion. @@ -1783,36 +2780,6 @@ bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC, return Changed; } -bool NewGVN::runOnFunction(Function &F) { - if (skipFunction(F)) - return false; - return runGVN(F, &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), - &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F), - &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(), - &getAnalysis<AAResultsWrapperPass>().getAAResults(), - &getAnalysis<MemorySSAWrapperPass>().getMSSA()); -} - -PreservedAnalyses NewGVNPass::run(Function &F, AnalysisManager<Function> &AM) { - NewGVN Impl; - - // Apparently the order in which we get these results matter for - // the old GVN (see Chandler's comment in GVN.cpp). I'll keep - // the same order here, just in case. - auto &AC = AM.getResult<AssumptionAnalysis>(F); - auto &DT = AM.getResult<DominatorTreeAnalysis>(F); - auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); - auto &AA = AM.getResult<AAManager>(F); - auto &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA(); - bool Changed = Impl.runGVN(F, &DT, &AC, &TLI, &AA, &MSSA); - if (!Changed) - return PreservedAnalyses::all(); - PreservedAnalyses PA; - PA.preserve<DominatorTreeAnalysis>(); - PA.preserve<GlobalsAA>(); - return PA; -} - // Return true if V is a value that will always be available (IE can // be placed anywhere) in the function. We don't do globals here // because they are often worse to put in place. @@ -1821,21 +2788,15 @@ static bool alwaysAvailable(Value *V) { return isa<Constant>(V) || isa<Argument>(V); } -// Get the basic block from an instruction/value. -static BasicBlock *getBlockForValue(Value *V) { - if (auto *I = dyn_cast<Instruction>(V)) - return I->getParent(); - return nullptr; -} - struct NewGVN::ValueDFS { int DFSIn = 0; int DFSOut = 0; int LocalNum = 0; - // Only one of these will be set. - Value *Val = nullptr; + // Only one of Def and U will be set. + // The bool in the Def tells us whether the Def is the stored value of a + // store. + PointerIntPair<Value *, 1, bool> Def; Use *U = nullptr; - bool operator<(const ValueDFS &Other) const { // It's not enough that any given field be less than - we have sets // of fields that need to be evaluated together to give a proper ordering. @@ -1875,89 +2836,151 @@ struct NewGVN::ValueDFS { // but .val and .u. // It does not matter what order we replace these operands in. // You will always end up with the same IR, and this is guaranteed. - return std::tie(DFSIn, DFSOut, LocalNum, Val, U) < - std::tie(Other.DFSIn, Other.DFSOut, Other.LocalNum, Other.Val, + return std::tie(DFSIn, DFSOut, LocalNum, Def, U) < + std::tie(Other.DFSIn, Other.DFSOut, Other.LocalNum, Other.Def, Other.U); } }; -void NewGVN::convertDenseToDFSOrdered( - CongruenceClass::MemberSet &Dense, - SmallVectorImpl<ValueDFS> &DFSOrderedSet) { +// This function converts the set of members for a congruence class from values, +// to sets of defs and uses with associated DFS info. The total number of +// reachable uses for each value is stored in UseCount, and instructions that +// seem +// dead (have no non-dead uses) are stored in ProbablyDead. +void NewGVN::convertClassToDFSOrdered( + const CongruenceClass &Dense, SmallVectorImpl<ValueDFS> &DFSOrderedSet, + DenseMap<const Value *, unsigned int> &UseCounts, + SmallPtrSetImpl<Instruction *> &ProbablyDead) const { for (auto D : Dense) { // First add the value. BasicBlock *BB = getBlockForValue(D); // Constants are handled prior to ever calling this function, so // we should only be left with instructions as members. assert(BB && "Should have figured out a basic block for value"); - ValueDFS VD; - - std::pair<int, int> DFSPair = DFSDomMap[BB]; - assert(DFSPair.first != -1 && DFSPair.second != -1 && "Invalid DFS Pair"); - VD.DFSIn = DFSPair.first; - VD.DFSOut = DFSPair.second; - VD.Val = D; - // If it's an instruction, use the real local dfs number. - if (auto *I = dyn_cast<Instruction>(D)) - VD.LocalNum = InstrDFS[I]; - else - llvm_unreachable("Should have been an instruction"); - - DFSOrderedSet.emplace_back(VD); - - // Now add the users. - for (auto &U : D->uses()) { + ValueDFS VDDef; + DomTreeNode *DomNode = DT->getNode(BB); + VDDef.DFSIn = DomNode->getDFSNumIn(); + VDDef.DFSOut = DomNode->getDFSNumOut(); + // If it's a store, use the leader of the value operand, if it's always + // available, or the value operand. TODO: We could do dominance checks to + // find a dominating leader, but not worth it ATM. + if (auto *SI = dyn_cast<StoreInst>(D)) { + auto Leader = lookupOperandLeader(SI->getValueOperand()); + if (alwaysAvailable(Leader)) { + VDDef.Def.setPointer(Leader); + } else { + VDDef.Def.setPointer(SI->getValueOperand()); + VDDef.Def.setInt(true); + } + } else { + VDDef.Def.setPointer(D); + } + assert(isa<Instruction>(D) && + "The dense set member should always be an instruction"); + VDDef.LocalNum = InstrToDFSNum(D); + DFSOrderedSet.emplace_back(VDDef); + Instruction *Def = cast<Instruction>(D); + unsigned int UseCount = 0; + // Now add the uses. + for (auto &U : Def->uses()) { if (auto *I = dyn_cast<Instruction>(U.getUser())) { - ValueDFS VD; + // Don't try to replace into dead uses + if (InstructionsToErase.count(I)) + continue; + ValueDFS VDUse; // Put the phi node uses in the incoming block. BasicBlock *IBlock; if (auto *P = dyn_cast<PHINode>(I)) { IBlock = P->getIncomingBlock(U); // Make phi node users appear last in the incoming block // they are from. - VD.LocalNum = InstrDFS.size() + 1; + VDUse.LocalNum = InstrDFS.size() + 1; } else { IBlock = I->getParent(); - VD.LocalNum = InstrDFS[I]; + VDUse.LocalNum = InstrToDFSNum(I); } - std::pair<int, int> DFSPair = DFSDomMap[IBlock]; - VD.DFSIn = DFSPair.first; - VD.DFSOut = DFSPair.second; - VD.U = &U; - DFSOrderedSet.emplace_back(VD); + + // Skip uses in unreachable blocks, as we're going + // to delete them. + if (ReachableBlocks.count(IBlock) == 0) + continue; + + DomTreeNode *DomNode = DT->getNode(IBlock); + VDUse.DFSIn = DomNode->getDFSNumIn(); + VDUse.DFSOut = DomNode->getDFSNumOut(); + VDUse.U = &U; + ++UseCount; + DFSOrderedSet.emplace_back(VDUse); } } + + // If there are no uses, it's probably dead (but it may have side-effects, + // so not definitely dead. Otherwise, store the number of uses so we can + // track if it becomes dead later). + if (UseCount == 0) + ProbablyDead.insert(Def); + else + UseCounts[Def] = UseCount; } } -static void patchReplacementInstruction(Instruction *I, Value *Repl) { - // Patch the replacement so that it is not more restrictive than the value - // being replaced. - auto *Op = dyn_cast<BinaryOperator>(I); - auto *ReplOp = dyn_cast<BinaryOperator>(Repl); +// This function converts the set of members for a congruence class from values, +// to the set of defs for loads and stores, with associated DFS info. +void NewGVN::convertClassToLoadsAndStores( + const CongruenceClass &Dense, + SmallVectorImpl<ValueDFS> &LoadsAndStores) const { + for (auto D : Dense) { + if (!isa<LoadInst>(D) && !isa<StoreInst>(D)) + continue; - if (Op && ReplOp) - ReplOp->andIRFlags(Op); + BasicBlock *BB = getBlockForValue(D); + ValueDFS VD; + DomTreeNode *DomNode = DT->getNode(BB); + VD.DFSIn = DomNode->getDFSNumIn(); + VD.DFSOut = DomNode->getDFSNumOut(); + VD.Def.setPointer(D); - if (auto *ReplInst = dyn_cast<Instruction>(Repl)) { - // FIXME: If both the original and replacement value are part of the - // same control-flow region (meaning that the execution of one - // guarentees the executation of the other), then we can combine the - // noalias scopes here and do better than the general conservative - // answer used in combineMetadata(). + // If it's an instruction, use the real local dfs number. + if (auto *I = dyn_cast<Instruction>(D)) + VD.LocalNum = InstrToDFSNum(I); + else + llvm_unreachable("Should have been an instruction"); - // In general, GVN unifies expressions over different control-flow - // regions, and so we need a conservative combination of the noalias - // scopes. - unsigned KnownIDs[] = { - LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, - LLVMContext::MD_noalias, LLVMContext::MD_range, - LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load, - LLVMContext::MD_invariant_group}; - combineMetadata(ReplInst, I, KnownIDs); + LoadsAndStores.emplace_back(VD); } } +static void patchReplacementInstruction(Instruction *I, Value *Repl) { + auto *ReplInst = dyn_cast<Instruction>(Repl); + if (!ReplInst) + return; + + // Patch the replacement so that it is not more restrictive than the value + // being replaced. + // Note that if 'I' is a load being replaced by some operation, + // for example, by an arithmetic operation, then andIRFlags() + // would just erase all math flags from the original arithmetic + // operation, which is clearly not wanted and not needed. + if (!isa<LoadInst>(I)) + ReplInst->andIRFlags(I); + + // FIXME: If both the original and replacement value are part of the + // same control-flow region (meaning that the execution of one + // guarantees the execution of the other), then we can combine the + // noalias scopes here and do better than the general conservative + // answer used in combineMetadata(). + + // In general, GVN unifies expressions over different control-flow + // regions, and so we need a conservative combination of the noalias + // scopes. + static const unsigned KnownIDs[] = { + LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, + LLVMContext::MD_noalias, LLVMContext::MD_range, + LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load, + LLVMContext::MD_invariant_group}; + combineMetadata(ReplInst, I, KnownIDs); +} + static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) { patchReplacementInstruction(I, Repl); I->replaceAllUsesWith(Repl); @@ -1967,10 +2990,6 @@ void NewGVN::deleteInstructionsInBlock(BasicBlock *BB) { DEBUG(dbgs() << " BasicBlock Dead:" << *BB); ++NumGVNBlocksDeleted; - // Check to see if there are non-terminating instructions to delete. - if (isa<TerminatorInst>(BB->begin())) - return; - // Delete the instructions backwards, as it has a reduced likelihood of having // to update as many def-use and use-def chains. Start after the terminator. auto StartPoint = BB->rbegin(); @@ -1987,6 +3006,11 @@ void NewGVN::deleteInstructionsInBlock(BasicBlock *BB) { Inst.eraseFromParent(); ++NumGVNInstrDeleted; } + // Now insert something that simplifycfg will turn into an unreachable. + Type *Int8Ty = Type::getInt8Ty(BB->getContext()); + new StoreInst(UndefValue::get(Int8Ty), + Constant::getNullValue(Int8Ty->getPointerTo()), + BB->getTerminator()); } void NewGVN::markInstructionForDeletion(Instruction *I) { @@ -2086,59 +3110,59 @@ bool NewGVN::eliminateInstructions(Function &F) { } } } - DomTreeNode *Node = DT->getNode(&B); - if (Node) - DFSDomMap[&B] = {Node->getDFSNumIn(), Node->getDFSNumOut()}; } - for (CongruenceClass *CC : CongruenceClasses) { - // FIXME: We should eventually be able to replace everything still - // in the initial class with undef, as they should be unreachable. - // Right now, initial still contains some things we skip value - // numbering of (UNREACHABLE's, for example). - if (CC == InitialClass || CC->Dead) + // Map to store the use counts + DenseMap<const Value *, unsigned int> UseCounts; + for (CongruenceClass *CC : reverse(CongruenceClasses)) { + // Track the equivalent store info so we can decide whether to try + // dead store elimination. + SmallVector<ValueDFS, 8> PossibleDeadStores; + SmallPtrSet<Instruction *, 8> ProbablyDead; + if (CC->isDead() || CC->empty()) continue; - assert(CC->RepLeader && "We should have had a leader"); + // Everything still in the TOP class is unreachable or dead. + if (CC == TOPClass) { +#ifndef NDEBUG + for (auto M : *CC) + assert((!ReachableBlocks.count(cast<Instruction>(M)->getParent()) || + InstructionsToErase.count(cast<Instruction>(M))) && + "Everything in TOP should be unreachable or dead at this " + "point"); +#endif + continue; + } + assert(CC->getLeader() && "We should have had a leader"); // If this is a leader that is always available, and it's a // constant or has no equivalences, just replace everything with // it. We then update the congruence class with whatever members // are left. - if (alwaysAvailable(CC->RepLeader)) { - SmallPtrSet<Value *, 4> MembersLeft; - for (auto M : CC->Members) { - + Value *Leader = + CC->getStoredValue() ? CC->getStoredValue() : CC->getLeader(); + if (alwaysAvailable(Leader)) { + CongruenceClass::MemberSet MembersLeft; + for (auto M : *CC) { Value *Member = M; - // Void things have no uses we can replace. - if (Member == CC->RepLeader || Member->getType()->isVoidTy()) { + if (Member == Leader || !isa<Instruction>(Member) || + Member->getType()->isVoidTy()) { MembersLeft.insert(Member); continue; } - - DEBUG(dbgs() << "Found replacement " << *(CC->RepLeader) << " for " - << *Member << "\n"); - // Due to equality propagation, these may not always be - // instructions, they may be real values. We don't really - // care about trying to replace the non-instructions. - if (auto *I = dyn_cast<Instruction>(Member)) { - assert(CC->RepLeader != I && - "About to accidentally remove our leader"); - replaceInstruction(I, CC->RepLeader); - AnythingReplaced = true; - - continue; - } else { - MembersLeft.insert(I); - } + DEBUG(dbgs() << "Found replacement " << *(Leader) << " for " << *Member + << "\n"); + auto *I = cast<Instruction>(Member); + assert(Leader != I && "About to accidentally remove our leader"); + replaceInstruction(I, Leader); + AnythingReplaced = true; } - CC->Members.swap(MembersLeft); - + CC->swap(MembersLeft); } else { - DEBUG(dbgs() << "Eliminating in congruence class " << CC->ID << "\n"); + DEBUG(dbgs() << "Eliminating in congruence class " << CC->getID() + << "\n"); // If this is a singleton, we can skip it. - if (CC->Members.size() != 1) { - + if (CC->size() != 1) { // This is a stack because equality replacement/etc may place // constants in the middle of the member list, and we want to use // those constant values in preference to the current leader, over @@ -2147,24 +3171,19 @@ bool NewGVN::eliminateInstructions(Function &F) { // Convert the members to DFS ordered sets and then merge them. SmallVector<ValueDFS, 8> DFSOrderedSet; - convertDenseToDFSOrdered(CC->Members, DFSOrderedSet); + convertClassToDFSOrdered(*CC, DFSOrderedSet, UseCounts, ProbablyDead); // Sort the whole thing. std::sort(DFSOrderedSet.begin(), DFSOrderedSet.end()); - for (auto &VD : DFSOrderedSet) { int MemberDFSIn = VD.DFSIn; int MemberDFSOut = VD.DFSOut; - Value *Member = VD.Val; - Use *MemberUse = VD.U; - - if (Member) { - // We ignore void things because we can't get a value from them. - // FIXME: We could actually use this to kill dead stores that are - // dominated by equivalent earlier stores. - if (Member->getType()->isVoidTy()) - continue; - } + Value *Def = VD.Def.getPointer(); + bool FromStore = VD.Def.getInt(); + Use *U = VD.U; + // We ignore void things because we can't get a value from them. + if (Def && Def->getType()->isVoidTy()) + continue; if (EliminationStack.empty()) { DEBUG(dbgs() << "Elimination Stack is empty\n"); @@ -2189,69 +3208,240 @@ bool NewGVN::eliminateInstructions(Function &F) { // start using, we also push. // Otherwise, we walk along, processing members who are // dominated by this scope, and eliminate them. - bool ShouldPush = - Member && (EliminationStack.empty() || isa<Constant>(Member)); + bool ShouldPush = Def && EliminationStack.empty(); bool OutOfScope = !EliminationStack.isInScope(MemberDFSIn, MemberDFSOut); if (OutOfScope || ShouldPush) { // Sync to our current scope. EliminationStack.popUntilDFSScope(MemberDFSIn, MemberDFSOut); - ShouldPush |= Member && EliminationStack.empty(); + bool ShouldPush = Def && EliminationStack.empty(); if (ShouldPush) { - EliminationStack.push_back(Member, MemberDFSIn, MemberDFSOut); + EliminationStack.push_back(Def, MemberDFSIn, MemberDFSOut); + } + } + + // Skip the Def's, we only want to eliminate on their uses. But mark + // dominated defs as dead. + if (Def) { + // For anything in this case, what and how we value number + // guarantees that any side-effets that would have occurred (ie + // throwing, etc) can be proven to either still occur (because it's + // dominated by something that has the same side-effects), or never + // occur. Otherwise, we would not have been able to prove it value + // equivalent to something else. For these things, we can just mark + // it all dead. Note that this is different from the "ProbablyDead" + // set, which may not be dominated by anything, and thus, are only + // easy to prove dead if they are also side-effect free. Note that + // because stores are put in terms of the stored value, we skip + // stored values here. If the stored value is really dead, it will + // still be marked for deletion when we process it in its own class. + if (!EliminationStack.empty() && Def != EliminationStack.back() && + isa<Instruction>(Def) && !FromStore) + markInstructionForDeletion(cast<Instruction>(Def)); + continue; + } + // At this point, we know it is a Use we are trying to possibly + // replace. + + assert(isa<Instruction>(U->get()) && + "Current def should have been an instruction"); + assert(isa<Instruction>(U->getUser()) && + "Current user should have been an instruction"); + + // If the thing we are replacing into is already marked to be dead, + // this use is dead. Note that this is true regardless of whether + // we have anything dominating the use or not. We do this here + // because we are already walking all the uses anyway. + Instruction *InstUse = cast<Instruction>(U->getUser()); + if (InstructionsToErase.count(InstUse)) { + auto &UseCount = UseCounts[U->get()]; + if (--UseCount == 0) { + ProbablyDead.insert(cast<Instruction>(U->get())); } } // If we get to this point, and the stack is empty we must have a use - // with nothing we can use to eliminate it, just skip it. + // with nothing we can use to eliminate this use, so just skip it. if (EliminationStack.empty()) continue; - // Skip the Value's, we only want to eliminate on their uses. - if (Member) - continue; - Value *Result = EliminationStack.back(); + Value *DominatingLeader = EliminationStack.back(); // Don't replace our existing users with ourselves. - if (MemberUse->get() == Result) + if (U->get() == DominatingLeader) continue; - - DEBUG(dbgs() << "Found replacement " << *Result << " for " - << *MemberUse->get() << " in " << *(MemberUse->getUser()) - << "\n"); + DEBUG(dbgs() << "Found replacement " << *DominatingLeader << " for " + << *U->get() << " in " << *(U->getUser()) << "\n"); // If we replaced something in an instruction, handle the patching of - // metadata. - if (auto *ReplacedInst = dyn_cast<Instruction>(MemberUse->get())) - patchReplacementInstruction(ReplacedInst, Result); - - assert(isa<Instruction>(MemberUse->getUser())); - MemberUse->set(Result); + // metadata. Skip this if we are replacing predicateinfo with its + // original operand, as we already know we can just drop it. + auto *ReplacedInst = cast<Instruction>(U->get()); + auto *PI = PredInfo->getPredicateInfoFor(ReplacedInst); + if (!PI || DominatingLeader != PI->OriginalOp) + patchReplacementInstruction(ReplacedInst, DominatingLeader); + U->set(DominatingLeader); + // This is now a use of the dominating leader, which means if the + // dominating leader was dead, it's now live! + auto &LeaderUseCount = UseCounts[DominatingLeader]; + // It's about to be alive again. + if (LeaderUseCount == 0 && isa<Instruction>(DominatingLeader)) + ProbablyDead.erase(cast<Instruction>(DominatingLeader)); + ++LeaderUseCount; AnythingReplaced = true; } } } + // At this point, anything still in the ProbablyDead set is actually dead if + // would be trivially dead. + for (auto *I : ProbablyDead) + if (wouldInstructionBeTriviallyDead(I)) + markInstructionForDeletion(I); + // Cleanup the congruence class. - SmallPtrSet<Value *, 4> MembersLeft; - for (Value *Member : CC->Members) { - if (Member->getType()->isVoidTy()) { + CongruenceClass::MemberSet MembersLeft; + for (auto *Member : *CC) + if (!isa<Instruction>(Member) || + !InstructionsToErase.count(cast<Instruction>(Member))) MembersLeft.insert(Member); - continue; - } - - if (auto *MemberInst = dyn_cast<Instruction>(Member)) { - if (isInstructionTriviallyDead(MemberInst)) { - // TODO: Don't mark loads of undefs. - markInstructionForDeletion(MemberInst); - continue; + CC->swap(MembersLeft); + + // If we have possible dead stores to look at, try to eliminate them. + if (CC->getStoreCount() > 0) { + convertClassToLoadsAndStores(*CC, PossibleDeadStores); + std::sort(PossibleDeadStores.begin(), PossibleDeadStores.end()); + ValueDFSStack EliminationStack; + for (auto &VD : PossibleDeadStores) { + int MemberDFSIn = VD.DFSIn; + int MemberDFSOut = VD.DFSOut; + Instruction *Member = cast<Instruction>(VD.Def.getPointer()); + if (EliminationStack.empty() || + !EliminationStack.isInScope(MemberDFSIn, MemberDFSOut)) { + // Sync to our current scope. + EliminationStack.popUntilDFSScope(MemberDFSIn, MemberDFSOut); + if (EliminationStack.empty()) { + EliminationStack.push_back(Member, MemberDFSIn, MemberDFSOut); + continue; + } } + // We already did load elimination, so nothing to do here. + if (isa<LoadInst>(Member)) + continue; + assert(!EliminationStack.empty()); + Instruction *Leader = cast<Instruction>(EliminationStack.back()); + (void)Leader; + assert(DT->dominates(Leader->getParent(), Member->getParent())); + // Member is dominater by Leader, and thus dead + DEBUG(dbgs() << "Marking dead store " << *Member + << " that is dominated by " << *Leader << "\n"); + markInstructionForDeletion(Member); + CC->erase(Member); + ++NumGVNDeadStores; } - MembersLeft.insert(Member); } - CC->Members.swap(MembersLeft); } return AnythingReplaced; } + +// This function provides global ranking of operations so that we can place them +// in a canonical order. Note that rank alone is not necessarily enough for a +// complete ordering, as constants all have the same rank. However, generally, +// we will simplify an operation with all constants so that it doesn't matter +// what order they appear in. +unsigned int NewGVN::getRank(const Value *V) const { + // Prefer undef to anything else + if (isa<UndefValue>(V)) + return 0; + if (isa<Constant>(V)) + return 1; + else if (auto *A = dyn_cast<Argument>(V)) + return 2 + A->getArgNo(); + + // Need to shift the instruction DFS by number of arguments + 3 to account for + // the constant and argument ranking above. + unsigned Result = InstrToDFSNum(V); + if (Result > 0) + return 3 + NumFuncArgs + Result; + // Unreachable or something else, just return a really large number. + return ~0; +} + +// This is a function that says whether two commutative operations should +// have their order swapped when canonicalizing. +bool NewGVN::shouldSwapOperands(const Value *A, const Value *B) const { + // Because we only care about a total ordering, and don't rewrite expressions + // in this order, we order by rank, which will give a strict weak ordering to + // everything but constants, and then we order by pointer address. + return std::make_pair(getRank(A), A) > std::make_pair(getRank(B), B); +} + +class NewGVNLegacyPass : public FunctionPass { +public: + static char ID; // Pass identification, replacement for typeid. + NewGVNLegacyPass() : FunctionPass(ID) { + initializeNewGVNLegacyPassPass(*PassRegistry::getPassRegistry()); + } + bool runOnFunction(Function &F) override; + +private: + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<AssumptionCacheTracker>(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.addRequired<MemorySSAWrapperPass>(); + AU.addRequired<AAResultsWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addPreserved<GlobalsAAWrapperPass>(); + } +}; + +bool NewGVNLegacyPass::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + return NewGVN(F, &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), + &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F), + &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(), + &getAnalysis<AAResultsWrapperPass>().getAAResults(), + &getAnalysis<MemorySSAWrapperPass>().getMSSA(), + F.getParent()->getDataLayout()) + .runGVN(); +} + +INITIALIZE_PASS_BEGIN(NewGVNLegacyPass, "newgvn", "Global Value Numbering", + false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) +INITIALIZE_PASS_END(NewGVNLegacyPass, "newgvn", "Global Value Numbering", false, + false) + +char NewGVNLegacyPass::ID = 0; + +// createGVNPass - The public interface to this file. +FunctionPass *llvm::createNewGVNPass() { return new NewGVNLegacyPass(); } + +PreservedAnalyses NewGVNPass::run(Function &F, AnalysisManager<Function> &AM) { + // Apparently the order in which we get these results matter for + // the old GVN (see Chandler's comment in GVN.cpp). I'll keep + // the same order here, just in case. + auto &AC = AM.getResult<AssumptionAnalysis>(F); + auto &DT = AM.getResult<DominatorTreeAnalysis>(F); + auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); + auto &AA = AM.getResult<AAManager>(F); + auto &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA(); + bool Changed = + NewGVN(F, &DT, &AC, &TLI, &AA, &MSSA, F.getParent()->getDataLayout()) + .runGVN(); + if (!Changed) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve<DominatorTreeAnalysis>(); + PA.preserve<GlobalsAA>(); + return PA; +} |