diff options
Diffstat (limited to 'contrib/llvm-project/clang/lib/Analysis/CalledOnceCheck.cpp')
-rw-r--r-- | contrib/llvm-project/clang/lib/Analysis/CalledOnceCheck.cpp | 1525 |
1 files changed, 1525 insertions, 0 deletions
diff --git a/contrib/llvm-project/clang/lib/Analysis/CalledOnceCheck.cpp b/contrib/llvm-project/clang/lib/Analysis/CalledOnceCheck.cpp new file mode 100644 index 000000000000..883629a300dc --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/CalledOnceCheck.cpp @@ -0,0 +1,1525 @@ +//===- CalledOnceCheck.cpp - Check 'called once' parameters ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/CalledOnceCheck.h" +#include "clang/AST/Attr.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclBase.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprObjC.h" +#include "clang/AST/OperationKinds.h" +#include "clang/AST/ParentMap.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/StmtObjC.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/AST/Type.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "clang/Analysis/FlowSensitive/DataflowWorklist.h" +#include "clang/Basic/IdentifierTable.h" +#include "clang/Basic/LLVM.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/BitmaskEnum.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Sequence.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include <memory> + +using namespace clang; + +namespace { +static constexpr unsigned EXPECTED_MAX_NUMBER_OF_PARAMS = 2; +template <class T> +using ParamSizedVector = llvm::SmallVector<T, EXPECTED_MAX_NUMBER_OF_PARAMS>; +static constexpr unsigned EXPECTED_NUMBER_OF_BASIC_BLOCKS = 8; +template <class T> +using CFGSizedVector = llvm::SmallVector<T, EXPECTED_NUMBER_OF_BASIC_BLOCKS>; +constexpr llvm::StringLiteral CONVENTIONAL_NAMES[] = { + "completionHandler", "completion", "withCompletionHandler"}; +constexpr llvm::StringLiteral CONVENTIONAL_SUFFIXES[] = { + "WithCompletionHandler", "WithCompletion"}; +constexpr llvm::StringLiteral CONVENTIONAL_CONDITIONS[] = { + "error", "cancel", "shouldCall", "done", "OK", "success"}; + +class ParameterStatus { +public: + // Status kind is basically the main part of parameter's status. + // The kind represents our knowledge (so far) about a tracked parameter + // in the context of this analysis. + // + // Since we want to report on missing and extraneous calls, we need to + // track the fact whether paramater was called or not. This automatically + // decides two kinds: `NotCalled` and `Called`. + // + // One of the erroneous situations is the case when parameter is called only + // on some of the paths. We could've considered it `NotCalled`, but we want + // to report double call warnings even if these two calls are not guaranteed + // to happen in every execution. We also don't want to have it as `Called` + // because not calling tracked parameter on all of the paths is an error + // on its own. For these reasons, we need to have a separate kind, + // `MaybeCalled`, and change `Called` to `DefinitelyCalled` to avoid + // confusion. + // + // Two violations of calling parameter more than once and not calling it on + // every path are not, however, mutually exclusive. In situations where both + // violations take place, we prefer to report ONLY double call. It's always + // harder to pinpoint a bug that has arisen when a user neglects to take the + // right action (and therefore, no action is taken), than when a user takes + // the wrong action. And, in order to remember that we already reported + // a double call, we need another kind: `Reported`. + // + // Our analysis is intra-procedural and, while in the perfect world, + // developers only use tracked parameters to call them, in the real world, + // the picture might be different. Parameters can be stored in global + // variables or leaked into other functions that we know nothing about. + // We try to be lenient and trust users. Another kind `Escaped` reflects + // such situations. We don't know if it gets called there or not, but we + // should always think of `Escaped` as the best possible option. + // + // Some of the paths in the analyzed functions might end with a call + // to noreturn functions. Such paths are not required to have parameter + // calls and we want to track that. For the purposes of better diagnostics, + // we don't want to reuse `Escaped` and, thus, have another kind `NoReturn`. + // + // Additionally, we have `NotVisited` kind that tells us nothing about + // a tracked parameter, but is used for tracking analyzed (aka visited) + // basic blocks. + // + // If we consider `|` to be a JOIN operation of two kinds coming from + // two different paths, the following properties must hold: + // + // 1. for any Kind K: K | K == K + // Joining two identical kinds should result in the same kind. + // + // 2. for any Kind K: Reported | K == Reported + // Doesn't matter on which path it was reported, it still is. + // + // 3. for any Kind K: NoReturn | K == K + // We can totally ignore noreturn paths during merges. + // + // 4. DefinitelyCalled | NotCalled == MaybeCalled + // Called on one path, not called on another - that's simply + // a definition for MaybeCalled. + // + // 5. for any Kind K in [DefinitelyCalled, NotCalled, MaybeCalled]: + // Escaped | K == K + // Escaped mirrors other statuses after joins. + // Every situation, when we join any of the listed kinds K, + // is a violation. For this reason, in order to assume the + // best outcome for this escape, we consider it to be the + // same as the other path. + // + // 6. for any Kind K in [DefinitelyCalled, NotCalled]: + // MaybeCalled | K == MaybeCalled + // MaybeCalled should basically stay after almost every join. + enum Kind { + // No-return paths should be absolutely transparent for the analysis. + // 0x0 is the identity element for selected join operation (binary or). + NoReturn = 0x0, /* 0000 */ + // Escaped marks situations when marked parameter escaped into + // another function (so we can assume that it was possibly called there). + Escaped = 0x1, /* 0001 */ + // Parameter was definitely called once at this point. + DefinitelyCalled = 0x3, /* 0011 */ + // Kinds less or equal to NON_ERROR_STATUS are not considered errors. + NON_ERROR_STATUS = DefinitelyCalled, + // Parameter was not yet called. + NotCalled = 0x5, /* 0101 */ + // Parameter was not called at least on one path leading to this point, + // while there is also at least one path that it gets called. + MaybeCalled = 0x7, /* 0111 */ + // Parameter was not yet analyzed. + NotVisited = 0x8, /* 1000 */ + // We already reported a violation and stopped tracking calls for this + // parameter. + Reported = 0x15, /* 1111 */ + LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ Reported) + }; + + constexpr ParameterStatus() = default; + /* implicit */ ParameterStatus(Kind K) : StatusKind(K) { + assert(!seenAnyCalls(K) && "Can't initialize status without a call"); + } + ParameterStatus(Kind K, const Expr *Call) : StatusKind(K), Call(Call) { + assert(seenAnyCalls(K) && "This kind is not supposed to have a call"); + } + + const Expr &getCall() const { + assert(seenAnyCalls(getKind()) && "ParameterStatus doesn't have a call"); + return *Call; + } + static bool seenAnyCalls(Kind K) { + return (K & DefinitelyCalled) == DefinitelyCalled && K != Reported; + } + bool seenAnyCalls() const { return seenAnyCalls(getKind()); } + + static bool isErrorStatus(Kind K) { return K > NON_ERROR_STATUS; } + bool isErrorStatus() const { return isErrorStatus(getKind()); } + + Kind getKind() const { return StatusKind; } + + void join(const ParameterStatus &Other) { + // If we have a pointer already, let's keep it. + // For the purposes of the analysis, it doesn't really matter + // which call we report. + // + // If we don't have a pointer, let's take whatever gets joined. + if (!Call) { + Call = Other.Call; + } + // Join kinds. + StatusKind |= Other.getKind(); + } + + bool operator==(const ParameterStatus &Other) const { + // We compare only kinds, pointers on their own is only additional + // information. + return getKind() == Other.getKind(); + } + +private: + // It would've been a perfect place to use llvm::PointerIntPair, but + // unfortunately NumLowBitsAvailable for clang::Expr had been reduced to 2. + Kind StatusKind = NotVisited; + const Expr *Call = nullptr; +}; + +/// State aggregates statuses of all tracked parameters. +class State { +public: + State(unsigned Size, ParameterStatus::Kind K = ParameterStatus::NotVisited) + : ParamData(Size, K) {} + + /// Return status of a parameter with the given index. + /// \{ + ParameterStatus &getStatusFor(unsigned Index) { return ParamData[Index]; } + const ParameterStatus &getStatusFor(unsigned Index) const { + return ParamData[Index]; + } + /// \} + + /// Return true if parameter with the given index can be called. + bool seenAnyCalls(unsigned Index) const { + return getStatusFor(Index).seenAnyCalls(); + } + /// Return a reference that we consider a call. + /// + /// Should only be used for parameters that can be called. + const Expr &getCallFor(unsigned Index) const { + return getStatusFor(Index).getCall(); + } + /// Return status kind of parameter with the given index. + ParameterStatus::Kind getKindFor(unsigned Index) const { + return getStatusFor(Index).getKind(); + } + + bool isVisited() const { + return llvm::all_of(ParamData, [](const ParameterStatus &S) { + return S.getKind() != ParameterStatus::NotVisited; + }); + } + + // Join other state into the current state. + void join(const State &Other) { + assert(ParamData.size() == Other.ParamData.size() && + "Couldn't join statuses with different sizes"); + for (auto Pair : llvm::zip(ParamData, Other.ParamData)) { + std::get<0>(Pair).join(std::get<1>(Pair)); + } + } + + using iterator = ParamSizedVector<ParameterStatus>::iterator; + using const_iterator = ParamSizedVector<ParameterStatus>::const_iterator; + + iterator begin() { return ParamData.begin(); } + iterator end() { return ParamData.end(); } + + const_iterator begin() const { return ParamData.begin(); } + const_iterator end() const { return ParamData.end(); } + + bool operator==(const State &Other) const { + return ParamData == Other.ParamData; + } + +private: + ParamSizedVector<ParameterStatus> ParamData; +}; + +/// A simple class that finds DeclRefExpr in the given expression. +/// +/// However, we don't want to find ANY nested DeclRefExpr skipping whatever +/// expressions on our way. Only certain expressions considered "no-op" +/// for our task are indeed skipped. +class DeclRefFinder + : public ConstStmtVisitor<DeclRefFinder, const DeclRefExpr *> { +public: + /// Find a DeclRefExpr in the given expression. + /// + /// In its most basic form (ShouldRetrieveFromComparisons == false), + /// this function can be simply reduced to the following question: + /// + /// - If expression E is used as a function argument, could we say + /// that DeclRefExpr nested in E is used as an argument? + /// + /// According to this rule, we can say that parens, casts and dereferencing + /// (dereferencing only applied to function pointers, but this is our case) + /// can be skipped. + /// + /// When we should look into comparisons the question changes to: + /// + /// - If expression E is used as a condition, could we say that + /// DeclRefExpr is being checked? + /// + /// And even though, these are two different questions, they have quite a lot + /// in common. Actually, we can say that whatever expression answers + /// positively the first question also fits the second question as well. + /// + /// In addition, we skip binary operators == and !=, and unary opeartor !. + static const DeclRefExpr *find(const Expr *E, + bool ShouldRetrieveFromComparisons = false) { + return DeclRefFinder(ShouldRetrieveFromComparisons).Visit(E); + } + + const DeclRefExpr *VisitDeclRefExpr(const DeclRefExpr *DR) { return DR; } + + const DeclRefExpr *VisitUnaryOperator(const UnaryOperator *UO) { + switch (UO->getOpcode()) { + case UO_LNot: + // We care about logical not only if we care about comparisons. + if (!ShouldRetrieveFromComparisons) + return nullptr; + LLVM_FALLTHROUGH; + // Function pointer/references can be dereferenced before a call. + // That doesn't make it, however, any different from a regular call. + // For this reason, dereference operation is a "no-op". + case UO_Deref: + return Visit(UO->getSubExpr()); + default: + return nullptr; + } + } + + const DeclRefExpr *VisitBinaryOperator(const BinaryOperator *BO) { + if (!ShouldRetrieveFromComparisons) + return nullptr; + + switch (BO->getOpcode()) { + case BO_EQ: + case BO_NE: { + const DeclRefExpr *LHS = Visit(BO->getLHS()); + return LHS ? LHS : Visit(BO->getRHS()); + } + default: + return nullptr; + } + } + + const DeclRefExpr *VisitOpaqueValueExpr(const OpaqueValueExpr *OVE) { + return Visit(OVE->getSourceExpr()); + } + + const DeclRefExpr *VisitExpr(const Expr *E) { + // It is a fallback method that gets called whenever the actual type + // of the given expression is not covered. + // + // We first check if we have anything to skip. And then repeat the whole + // procedure for a nested expression instead. + const Expr *DeclutteredExpr = E->IgnoreParenCasts(); + return E != DeclutteredExpr ? Visit(DeclutteredExpr) : nullptr; + } + +private: + DeclRefFinder(bool ShouldRetrieveFromComparisons) + : ShouldRetrieveFromComparisons(ShouldRetrieveFromComparisons) {} + + bool ShouldRetrieveFromComparisons; +}; + +const DeclRefExpr *findDeclRefExpr(const Expr *In, + bool ShouldRetrieveFromComparisons = false) { + return DeclRefFinder::find(In, ShouldRetrieveFromComparisons); +} + +const ParmVarDecl * +findReferencedParmVarDecl(const Expr *In, + bool ShouldRetrieveFromComparisons = false) { + if (const DeclRefExpr *DR = + findDeclRefExpr(In, ShouldRetrieveFromComparisons)) { + return dyn_cast<ParmVarDecl>(DR->getDecl()); + } + + return nullptr; +} + +/// Return conditions expression of a statement if it has one. +const Expr *getCondition(const Stmt *S) { + if (!S) { + return nullptr; + } + + if (const auto *If = dyn_cast<IfStmt>(S)) { + return If->getCond(); + } + if (const auto *Ternary = dyn_cast<AbstractConditionalOperator>(S)) { + return Ternary->getCond(); + } + + return nullptr; +} + +/// A small helper class that collects all named identifiers in the given +/// expression. It traverses it recursively, so names from deeper levels +/// of the AST will end up in the results. +/// Results might have duplicate names, if this is a problem, convert to +/// string sets afterwards. +class NamesCollector : public RecursiveASTVisitor<NamesCollector> { +public: + static constexpr unsigned EXPECTED_NUMBER_OF_NAMES = 5; + using NameCollection = + llvm::SmallVector<llvm::StringRef, EXPECTED_NUMBER_OF_NAMES>; + + static NameCollection collect(const Expr *From) { + NamesCollector Impl; + Impl.TraverseStmt(const_cast<Expr *>(From)); + return Impl.Result; + } + + bool VisitDeclRefExpr(const DeclRefExpr *E) { + Result.push_back(E->getDecl()->getName()); + return true; + } + + bool VisitObjCPropertyRefExpr(const ObjCPropertyRefExpr *E) { + llvm::StringRef Name; + + if (E->isImplicitProperty()) { + ObjCMethodDecl *PropertyMethodDecl = nullptr; + if (E->isMessagingGetter()) { + PropertyMethodDecl = E->getImplicitPropertyGetter(); + } else { + PropertyMethodDecl = E->getImplicitPropertySetter(); + } + assert(PropertyMethodDecl && + "Implicit property must have associated declaration"); + Name = PropertyMethodDecl->getSelector().getNameForSlot(0); + } else { + assert(E->isExplicitProperty()); + Name = E->getExplicitProperty()->getName(); + } + + Result.push_back(Name); + return true; + } + +private: + NamesCollector() = default; + NameCollection Result; +}; + +/// Check whether the given expression mentions any of conventional names. +bool mentionsAnyOfConventionalNames(const Expr *E) { + NamesCollector::NameCollection MentionedNames = NamesCollector::collect(E); + + return llvm::any_of(MentionedNames, [](llvm::StringRef ConditionName) { + return llvm::any_of( + CONVENTIONAL_CONDITIONS, + [ConditionName](const llvm::StringLiteral &Conventional) { + return ConditionName.contains_lower(Conventional); + }); + }); +} + +/// Clarification is a simple pair of a reason why parameter is not called +/// on every path and a statement to blame. +struct Clarification { + NeverCalledReason Reason; + const Stmt *Location; +}; + +/// A helper class that can produce a clarification based on the given pair +/// of basic blocks. +class NotCalledClarifier + : public ConstStmtVisitor<NotCalledClarifier, + llvm::Optional<Clarification>> { +public: + /// The main entrypoint for the class, the function that tries to find the + /// clarification of how to explain which sub-path starts with a CFG edge + /// from Conditional to SuccWithoutCall. + /// + /// This means that this function has one precondition: + /// SuccWithoutCall should be a successor block for Conditional. + /// + /// Because clarification is not needed for non-trivial pairs of blocks + /// (i.e. SuccWithoutCall is not the only successor), it returns meaningful + /// results only for such cases. For this very reason, the parent basic + /// block, Conditional, is named that way, so it is clear what kind of + /// block is expected. + static llvm::Optional<Clarification> + clarify(const CFGBlock *Conditional, const CFGBlock *SuccWithoutCall) { + if (const Stmt *Terminator = Conditional->getTerminatorStmt()) { + return NotCalledClarifier{Conditional, SuccWithoutCall}.Visit(Terminator); + } + return llvm::None; + } + + llvm::Optional<Clarification> VisitIfStmt(const IfStmt *If) { + return VisitBranchingBlock(If, NeverCalledReason::IfThen); + } + + llvm::Optional<Clarification> + VisitAbstractConditionalOperator(const AbstractConditionalOperator *Ternary) { + return VisitBranchingBlock(Ternary, NeverCalledReason::IfThen); + } + + llvm::Optional<Clarification> VisitSwitchStmt(const SwitchStmt *Switch) { + const Stmt *CaseToBlame = SuccInQuestion->getLabel(); + if (!CaseToBlame) { + // If interesting basic block is not labeled, it means that this + // basic block does not represent any of the cases. + return Clarification{NeverCalledReason::SwitchSkipped, Switch}; + } + + for (const SwitchCase *Case = Switch->getSwitchCaseList(); Case; + Case = Case->getNextSwitchCase()) { + if (Case == CaseToBlame) { + return Clarification{NeverCalledReason::Switch, Case}; + } + } + + llvm_unreachable("Found unexpected switch structure"); + } + + llvm::Optional<Clarification> VisitForStmt(const ForStmt *For) { + return VisitBranchingBlock(For, NeverCalledReason::LoopEntered); + } + + llvm::Optional<Clarification> VisitWhileStmt(const WhileStmt *While) { + return VisitBranchingBlock(While, NeverCalledReason::LoopEntered); + } + + llvm::Optional<Clarification> + VisitBranchingBlock(const Stmt *Terminator, NeverCalledReason DefaultReason) { + assert(Parent->succ_size() == 2 && + "Branching block should have exactly two successors"); + unsigned SuccessorIndex = getSuccessorIndex(Parent, SuccInQuestion); + NeverCalledReason ActualReason = + updateForSuccessor(DefaultReason, SuccessorIndex); + return Clarification{ActualReason, Terminator}; + } + + llvm::Optional<Clarification> VisitBinaryOperator(const BinaryOperator *) { + // We don't want to report on short-curcuit logical operations. + return llvm::None; + } + + llvm::Optional<Clarification> VisitStmt(const Stmt *Terminator) { + // If we got here, we didn't have a visit function for more derived + // classes of statement that this terminator actually belongs to. + // + // This is not a good scenario and should not happen in practice, but + // at least we'll warn the user. + return Clarification{NeverCalledReason::FallbackReason, Terminator}; + } + + static unsigned getSuccessorIndex(const CFGBlock *Parent, + const CFGBlock *Child) { + CFGBlock::const_succ_iterator It = llvm::find(Parent->succs(), Child); + assert(It != Parent->succ_end() && + "Given blocks should be in parent-child relationship"); + return It - Parent->succ_begin(); + } + + static NeverCalledReason + updateForSuccessor(NeverCalledReason ReasonForTrueBranch, + unsigned SuccessorIndex) { + assert(SuccessorIndex <= 1); + unsigned RawReason = + static_cast<unsigned>(ReasonForTrueBranch) + SuccessorIndex; + assert(RawReason <= + static_cast<unsigned>(NeverCalledReason::LARGEST_VALUE)); + return static_cast<NeverCalledReason>(RawReason); + } + +private: + NotCalledClarifier(const CFGBlock *Parent, const CFGBlock *SuccInQuestion) + : Parent(Parent), SuccInQuestion(SuccInQuestion) {} + + const CFGBlock *Parent, *SuccInQuestion; +}; + +class CalledOnceChecker : public ConstStmtVisitor<CalledOnceChecker> { +public: + static void check(AnalysisDeclContext &AC, CalledOnceCheckHandler &Handler, + bool CheckConventionalParameters) { + CalledOnceChecker(AC, Handler, CheckConventionalParameters).check(); + } + +private: + CalledOnceChecker(AnalysisDeclContext &AC, CalledOnceCheckHandler &Handler, + bool CheckConventionalParameters) + : FunctionCFG(*AC.getCFG()), AC(AC), Handler(Handler), + CheckConventionalParameters(CheckConventionalParameters), + CurrentState(0) { + initDataStructures(); + assert((size() == 0 || !States.empty()) && + "Data structures are inconsistent"); + } + + //===----------------------------------------------------------------------===// + // Initializing functions + //===----------------------------------------------------------------------===// + + void initDataStructures() { + const Decl *AnalyzedDecl = AC.getDecl(); + + if (const auto *Function = dyn_cast<FunctionDecl>(AnalyzedDecl)) { + findParamsToTrack(Function); + } else if (const auto *Method = dyn_cast<ObjCMethodDecl>(AnalyzedDecl)) { + findParamsToTrack(Method); + } else if (const auto *Block = dyn_cast<BlockDecl>(AnalyzedDecl)) { + findCapturesToTrack(Block); + findParamsToTrack(Block); + } + + // Have something to track, let's init states for every block from the CFG. + if (size() != 0) { + States = + CFGSizedVector<State>(FunctionCFG.getNumBlockIDs(), State(size())); + } + } + + void findCapturesToTrack(const BlockDecl *Block) { + for (const auto &Capture : Block->captures()) { + if (const auto *P = dyn_cast<ParmVarDecl>(Capture.getVariable())) { + // Parameter DeclContext is its owning function or method. + const DeclContext *ParamContext = P->getDeclContext(); + if (shouldBeCalledOnce(ParamContext, P)) { + TrackedParams.push_back(P); + } + } + } + } + + template <class FunctionLikeDecl> + void findParamsToTrack(const FunctionLikeDecl *Function) { + for (unsigned Index : llvm::seq<unsigned>(0u, Function->param_size())) { + if (shouldBeCalledOnce(Function, Index)) { + TrackedParams.push_back(Function->getParamDecl(Index)); + } + } + } + + //===----------------------------------------------------------------------===// + // Main logic 'check' functions + //===----------------------------------------------------------------------===// + + void check() { + // Nothing to check here: we don't have marked parameters. + if (size() == 0 || isPossiblyEmptyImpl()) + return; + + assert( + llvm::none_of(States, [](const State &S) { return S.isVisited(); }) && + "None of the blocks should be 'visited' before the analysis"); + + // For our task, both backward and forward approaches suite well. + // However, in order to report better diagnostics, we decided to go with + // backward analysis. + // + // Let's consider the following CFG and how forward and backward analyses + // will work for it. + // + // FORWARD: | BACKWARD: + // #1 | #1 + // +---------+ | +-----------+ + // | if | | |MaybeCalled| + // +---------+ | +-----------+ + // |NotCalled| | | if | + // +---------+ | +-----------+ + // / \ | / \ + // #2 / \ #3 | #2 / \ #3 + // +----------------+ +---------+ | +----------------+ +---------+ + // | foo() | | ... | | |DefinitelyCalled| |NotCalled| + // +----------------+ +---------+ | +----------------+ +---------+ + // |DefinitelyCalled| |NotCalled| | | foo() | | ... | + // +----------------+ +---------+ | +----------------+ +---------+ + // \ / | \ / + // \ #4 / | \ #4 / + // +-----------+ | +---------+ + // | ... | | |NotCalled| + // +-----------+ | +---------+ + // |MaybeCalled| | | ... | + // +-----------+ | +---------+ + // + // The most natural way to report lacking call in the block #3 would be to + // message that the false branch of the if statement in the block #1 doesn't + // have a call. And while with the forward approach we'll need to find a + // least common ancestor or something like that to find the 'if' to blame, + // backward analysis gives it to us out of the box. + BackwardDataflowWorklist Worklist(FunctionCFG, AC); + + // Let's visit EXIT. + const CFGBlock *Exit = &FunctionCFG.getExit(); + assignState(Exit, State(size(), ParameterStatus::NotCalled)); + Worklist.enqueuePredecessors(Exit); + + while (const CFGBlock *BB = Worklist.dequeue()) { + assert(BB && "Worklist should filter out null blocks"); + check(BB); + assert(CurrentState.isVisited() && + "After the check, basic block should be visited"); + + // Traverse successor basic blocks if the status of this block + // has changed. + if (assignState(BB, CurrentState)) { + Worklist.enqueuePredecessors(BB); + } + } + + // Check that we have all tracked parameters at the last block. + // As we are performing a backward version of the analysis, + // it should be the ENTRY block. + checkEntry(&FunctionCFG.getEntry()); + } + + void check(const CFGBlock *BB) { + // We start with a state 'inherited' from all the successors. + CurrentState = joinSuccessors(BB); + assert(CurrentState.isVisited() && + "Shouldn't start with a 'not visited' state"); + + // This is the 'exit' situation, broken promises are probably OK + // in such scenarios. + if (BB->hasNoReturnElement()) { + markNoReturn(); + // This block still can have calls (even multiple calls) and + // for this reason there is no early return here. + } + + // We use a backward dataflow propagation and for this reason we + // should traverse basic blocks bottom-up. + for (const CFGElement &Element : llvm::reverse(*BB)) { + if (Optional<CFGStmt> S = Element.getAs<CFGStmt>()) { + check(S->getStmt()); + } + } + } + void check(const Stmt *S) { Visit(S); } + + void checkEntry(const CFGBlock *Entry) { + // We finalize this algorithm with the ENTRY block because + // we use a backward version of the analysis. This is where + // we can judge that some of the tracked parameters are not called on + // every path from ENTRY to EXIT. + + const State &EntryStatus = getState(Entry); + llvm::BitVector NotCalledOnEveryPath(size(), false); + llvm::BitVector NotUsedOnEveryPath(size(), false); + + // Check if there are no calls of the marked parameter at all + for (const auto &IndexedStatus : llvm::enumerate(EntryStatus)) { + const ParmVarDecl *Parameter = getParameter(IndexedStatus.index()); + + switch (IndexedStatus.value().getKind()) { + case ParameterStatus::NotCalled: + // If there were places where this parameter escapes (aka being used), + // we can provide a more useful diagnostic by pointing at the exact + // branches where it is not even mentioned. + if (!hasEverEscaped(IndexedStatus.index())) { + // This parameter is was not used at all, so we should report the + // most generic version of the warning. + if (isCaptured(Parameter)) { + // We want to specify that it was captured by the block. + Handler.handleCapturedNeverCalled(Parameter, AC.getDecl(), + !isExplicitlyMarked(Parameter)); + } else { + Handler.handleNeverCalled(Parameter, + !isExplicitlyMarked(Parameter)); + } + } else { + // Mark it as 'interesting' to figure out which paths don't even + // have escapes. + NotUsedOnEveryPath[IndexedStatus.index()] = true; + } + + break; + case ParameterStatus::MaybeCalled: + // If we have 'maybe called' at this point, we have an error + // that there is at least one path where this parameter + // is not called. + // + // However, reporting the warning with only that information can be + // too vague for the users. For this reason, we mark such parameters + // as "interesting" for further analysis. + NotCalledOnEveryPath[IndexedStatus.index()] = true; + break; + default: + break; + } + } + + // Early exit if we don't have parameters for extra analysis. + if (NotCalledOnEveryPath.none() && NotUsedOnEveryPath.none()) + return; + + // We are looking for a pair of blocks A, B so that the following is true: + // * A is a predecessor of B + // * B is marked as NotCalled + // * A has at least one successor marked as either + // Escaped or DefinitelyCalled + // + // In that situation, it is guaranteed that B is the first block of the path + // where the user doesn't call or use parameter in question. + // + // For this reason, branch A -> B can be used for reporting. + // + // This part of the algorithm is guarded by a condition that the function + // does indeed have a violation of contract. For this reason, we can + // spend more time to find a good spot to place the warning. + // + // The following algorithm has the worst case complexity of O(V + E), + // where V is the number of basic blocks in FunctionCFG, + // E is the number of edges between blocks in FunctionCFG. + for (const CFGBlock *BB : FunctionCFG) { + if (!BB) + continue; + + const State &BlockState = getState(BB); + + for (unsigned Index : llvm::seq(0u, size())) { + // We don't want to use 'isLosingCall' here because we want to report + // the following situation as well: + // + // MaybeCalled + // | ... | + // MaybeCalled NotCalled + // + // Even though successor is not 'DefinitelyCalled', it is still useful + // to report it, it is still a path without a call. + if (NotCalledOnEveryPath[Index] && + BlockState.getKindFor(Index) == ParameterStatus::MaybeCalled) { + + findAndReportNotCalledBranches(BB, Index); + } else if (NotUsedOnEveryPath[Index] && + isLosingEscape(BlockState, BB, Index)) { + + findAndReportNotCalledBranches(BB, Index, /* IsEscape = */ true); + } + } + } + } + + /// Check potential call of a tracked parameter. + void checkDirectCall(const CallExpr *Call) { + if (auto Index = getIndexOfCallee(Call)) { + processCallFor(*Index, Call); + } + } + + /// Check the call expression for being an indirect call of one of the tracked + /// parameters. It is indirect in the sense that this particular call is not + /// calling the parameter itself, but rather uses it as the argument. + template <class CallLikeExpr> + void checkIndirectCall(const CallLikeExpr *CallOrMessage) { + // CallExpr::arguments does not interact nicely with llvm::enumerate. + llvm::ArrayRef<const Expr *> Arguments = llvm::makeArrayRef( + CallOrMessage->getArgs(), CallOrMessage->getNumArgs()); + + // Let's check if any of the call arguments is a point of interest. + for (const auto &Argument : llvm::enumerate(Arguments)) { + if (auto Index = getIndexOfExpression(Argument.value())) { + ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(*Index); + + if (shouldBeCalledOnce(CallOrMessage, Argument.index())) { + // If the corresponding parameter is marked as 'called_once' we should + // consider it as a call. + processCallFor(*Index, CallOrMessage); + } else if (CurrentParamStatus.getKind() == ParameterStatus::NotCalled) { + // Otherwise, we mark this parameter as escaped, which can be + // interpreted both as called or not called depending on the context. + CurrentParamStatus = ParameterStatus::Escaped; + } + // Otherwise, let's keep the state as it is. + } + } + } + + /// Process call of the parameter with the given index + void processCallFor(unsigned Index, const Expr *Call) { + ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(Index); + + if (CurrentParamStatus.seenAnyCalls()) { + + // At this point, this parameter was called, so this is a second call. + const ParmVarDecl *Parameter = getParameter(Index); + Handler.handleDoubleCall( + Parameter, &CurrentState.getCallFor(Index), Call, + !isExplicitlyMarked(Parameter), + // We are sure that the second call is definitely + // going to happen if the status is 'DefinitelyCalled'. + CurrentParamStatus.getKind() == ParameterStatus::DefinitelyCalled); + + // Mark this parameter as already reported on, so we don't repeat + // warnings. + CurrentParamStatus = ParameterStatus::Reported; + + } else if (CurrentParamStatus.getKind() != ParameterStatus::Reported) { + // If we didn't report anything yet, let's mark this parameter + // as called. + ParameterStatus Called(ParameterStatus::DefinitelyCalled, Call); + CurrentParamStatus = Called; + } + } + + void findAndReportNotCalledBranches(const CFGBlock *Parent, unsigned Index, + bool IsEscape = false) { + for (const CFGBlock *Succ : Parent->succs()) { + if (!Succ) + continue; + + if (getState(Succ).getKindFor(Index) == ParameterStatus::NotCalled) { + assert(Parent->succ_size() >= 2 && + "Block should have at least two successors at this point"); + if (auto Clarification = NotCalledClarifier::clarify(Parent, Succ)) { + const ParmVarDecl *Parameter = getParameter(Index); + Handler.handleNeverCalled(Parameter, Clarification->Location, + Clarification->Reason, !IsEscape, + !isExplicitlyMarked(Parameter)); + } + } + } + } + + //===----------------------------------------------------------------------===// + // Predicate functions to check parameters + //===----------------------------------------------------------------------===// + + /// Return true if parameter is explicitly marked as 'called_once'. + static bool isExplicitlyMarked(const ParmVarDecl *Parameter) { + return Parameter->hasAttr<CalledOnceAttr>(); + } + + /// Return true if the given name matches conventional pattens. + static bool isConventional(llvm::StringRef Name) { + return llvm::count(CONVENTIONAL_NAMES, Name) != 0; + } + + /// Return true if the given name has conventional suffixes. + static bool hasConventionalSuffix(llvm::StringRef Name) { + return llvm::any_of(CONVENTIONAL_SUFFIXES, [Name](llvm::StringRef Suffix) { + return Name.endswith(Suffix); + }); + } + + /// Return true if the given type can be used for conventional parameters. + static bool isConventional(QualType Ty) { + if (!Ty->isBlockPointerType()) { + return false; + } + + QualType BlockType = Ty->getAs<BlockPointerType>()->getPointeeType(); + // Completion handlers should have a block type with void return type. + return BlockType->getAs<FunctionType>()->getReturnType()->isVoidType(); + } + + /// Return true if the only parameter of the function is conventional. + static bool isOnlyParameterConventional(const FunctionDecl *Function) { + IdentifierInfo *II = Function->getIdentifier(); + return Function->getNumParams() == 1 && II && + hasConventionalSuffix(II->getName()); + } + + /// Return true/false if 'swift_async' attribute states that the given + /// parameter is conventionally called once. + /// Return llvm::None if the given declaration doesn't have 'swift_async' + /// attribute. + static llvm::Optional<bool> isConventionalSwiftAsync(const Decl *D, + unsigned ParamIndex) { + if (const SwiftAsyncAttr *A = D->getAttr<SwiftAsyncAttr>()) { + if (A->getKind() == SwiftAsyncAttr::None) { + return false; + } + + return A->getCompletionHandlerIndex().getASTIndex() == ParamIndex; + } + return llvm::None; + } + + /// Return true if the specified selector piece matches conventions. + static bool isConventionalSelectorPiece(Selector MethodSelector, + unsigned PieceIndex, + QualType PieceType) { + if (!isConventional(PieceType)) { + return false; + } + + if (MethodSelector.getNumArgs() == 1) { + assert(PieceIndex == 0); + return hasConventionalSuffix(MethodSelector.getNameForSlot(0)); + } + + return isConventional(MethodSelector.getNameForSlot(PieceIndex)); + } + + bool shouldBeCalledOnce(const ParmVarDecl *Parameter) const { + return isExplicitlyMarked(Parameter) || + (CheckConventionalParameters && + isConventional(Parameter->getName()) && + isConventional(Parameter->getType())); + } + + bool shouldBeCalledOnce(const DeclContext *ParamContext, + const ParmVarDecl *Param) { + unsigned ParamIndex = Param->getFunctionScopeIndex(); + if (const auto *Function = dyn_cast<FunctionDecl>(ParamContext)) { + return shouldBeCalledOnce(Function, ParamIndex); + } + if (const auto *Method = dyn_cast<ObjCMethodDecl>(ParamContext)) { + return shouldBeCalledOnce(Method, ParamIndex); + } + return shouldBeCalledOnce(Param); + } + + bool shouldBeCalledOnce(const BlockDecl *Block, unsigned ParamIndex) const { + return shouldBeCalledOnce(Block->getParamDecl(ParamIndex)); + } + + bool shouldBeCalledOnce(const FunctionDecl *Function, + unsigned ParamIndex) const { + if (ParamIndex >= Function->getNumParams()) { + return false; + } + // 'swift_async' goes first and overrides anything else. + if (auto ConventionalAsync = + isConventionalSwiftAsync(Function, ParamIndex)) { + return ConventionalAsync.getValue(); + } + + return shouldBeCalledOnce(Function->getParamDecl(ParamIndex)) || + (CheckConventionalParameters && + isOnlyParameterConventional(Function)); + } + + bool shouldBeCalledOnce(const ObjCMethodDecl *Method, + unsigned ParamIndex) const { + Selector MethodSelector = Method->getSelector(); + if (ParamIndex >= MethodSelector.getNumArgs()) { + return false; + } + + // 'swift_async' goes first and overrides anything else. + if (auto ConventionalAsync = isConventionalSwiftAsync(Method, ParamIndex)) { + return ConventionalAsync.getValue(); + } + + const ParmVarDecl *Parameter = Method->getParamDecl(ParamIndex); + return shouldBeCalledOnce(Parameter) || + (CheckConventionalParameters && + isConventionalSelectorPiece(MethodSelector, ParamIndex, + Parameter->getType())); + } + + bool shouldBeCalledOnce(const CallExpr *Call, unsigned ParamIndex) const { + const FunctionDecl *Function = Call->getDirectCallee(); + return Function && shouldBeCalledOnce(Function, ParamIndex); + } + + bool shouldBeCalledOnce(const ObjCMessageExpr *Message, + unsigned ParamIndex) const { + const ObjCMethodDecl *Method = Message->getMethodDecl(); + return Method && ParamIndex < Method->param_size() && + shouldBeCalledOnce(Method, ParamIndex); + } + + //===----------------------------------------------------------------------===// + // Utility methods + //===----------------------------------------------------------------------===// + + bool isCaptured(const ParmVarDecl *Parameter) const { + if (const BlockDecl *Block = dyn_cast<BlockDecl>(AC.getDecl())) { + return Block->capturesVariable(Parameter); + } + return false; + } + + /// Return true if the analyzed function is actually a default implementation + /// of the method that has to be overriden. + /// + /// These functions can have tracked parameters, but wouldn't call them + /// because they are not designed to perform any meaningful actions. + /// + /// There are a couple of flavors of such default implementations: + /// 1. Empty methods or methods with a single return statement + /// 2. Methods that have one block with a call to no return function + /// 3. Methods with only assertion-like operations + bool isPossiblyEmptyImpl() const { + if (!isa<ObjCMethodDecl>(AC.getDecl())) { + // We care only about functions that are not supposed to be called. + // Only methods can be overriden. + return false; + } + + // Case #1 (without return statements) + if (FunctionCFG.size() == 2) { + // Method has only two blocks: ENTRY and EXIT. + // This is equivalent to empty function. + return true; + } + + // Case #2 + if (FunctionCFG.size() == 3) { + const CFGBlock &Entry = FunctionCFG.getEntry(); + if (Entry.succ_empty()) { + return false; + } + + const CFGBlock *OnlyBlock = *Entry.succ_begin(); + // Method has only one block, let's see if it has a no-return + // element. + if (OnlyBlock && OnlyBlock->hasNoReturnElement()) { + return true; + } + // Fallthrough, CFGs with only one block can fall into #1 and #3 as well. + } + + // Cases #1 (return statements) and #3. + // + // It is hard to detect that something is an assertion or came + // from assertion. Here we use a simple heuristic: + // + // - If it came from a macro, it can be an assertion. + // + // Additionally, we can't assume a number of basic blocks or the CFG's + // structure because assertions might include loops and conditions. + return llvm::all_of(FunctionCFG, [](const CFGBlock *BB) { + if (!BB) { + // Unreachable blocks are totally fine. + return true; + } + + // Return statements can have sub-expressions that are represented as + // separate statements of a basic block. We should allow this. + // This parent map will be initialized with a parent tree for all + // subexpressions of the block's return statement (if it has one). + std::unique_ptr<ParentMap> ReturnChildren; + + return llvm::all_of( + llvm::reverse(*BB), // we should start with return statements, if we + // have any, i.e. from the bottom of the block + [&ReturnChildren](const CFGElement &Element) { + if (Optional<CFGStmt> S = Element.getAs<CFGStmt>()) { + const Stmt *SuspiciousStmt = S->getStmt(); + + if (isa<ReturnStmt>(SuspiciousStmt)) { + // Let's initialize this structure to test whether + // some further statement is a part of this return. + ReturnChildren = std::make_unique<ParentMap>( + const_cast<Stmt *>(SuspiciousStmt)); + // Return statements are allowed as part of #1. + return true; + } + + return SuspiciousStmt->getBeginLoc().isMacroID() || + (ReturnChildren && + ReturnChildren->hasParent(SuspiciousStmt)); + } + return true; + }); + }); + } + + /// Check if parameter with the given index has ever escaped. + bool hasEverEscaped(unsigned Index) const { + return llvm::any_of(States, [Index](const State &StateForOneBB) { + return StateForOneBB.getKindFor(Index) == ParameterStatus::Escaped; + }); + } + + /// Return status stored for the given basic block. + /// \{ + State &getState(const CFGBlock *BB) { + assert(BB); + return States[BB->getBlockID()]; + } + const State &getState(const CFGBlock *BB) const { + assert(BB); + return States[BB->getBlockID()]; + } + /// \} + + /// Assign status to the given basic block. + /// + /// Returns true when the stored status changed. + bool assignState(const CFGBlock *BB, const State &ToAssign) { + State &Current = getState(BB); + if (Current == ToAssign) { + return false; + } + + Current = ToAssign; + return true; + } + + /// Join all incoming statuses for the given basic block. + State joinSuccessors(const CFGBlock *BB) const { + auto Succs = + llvm::make_filter_range(BB->succs(), [this](const CFGBlock *Succ) { + return Succ && this->getState(Succ).isVisited(); + }); + // We came to this block from somewhere after all. + assert(!Succs.empty() && + "Basic block should have at least one visited successor"); + + State Result = getState(*Succs.begin()); + + for (const CFGBlock *Succ : llvm::drop_begin(Succs, 1)) { + Result.join(getState(Succ)); + } + + if (const Expr *Condition = getCondition(BB->getTerminatorStmt())) { + handleConditional(BB, Condition, Result); + } + + return Result; + } + + void handleConditional(const CFGBlock *BB, const Expr *Condition, + State &ToAlter) const { + handleParameterCheck(BB, Condition, ToAlter); + if (SuppressOnConventionalErrorPaths) { + handleConventionalCheck(BB, Condition, ToAlter); + } + } + + void handleParameterCheck(const CFGBlock *BB, const Expr *Condition, + State &ToAlter) const { + // In this function, we try to deal with the following pattern: + // + // if (parameter) + // parameter(...); + // + // It's not good to show a warning here because clearly 'parameter' + // couldn't and shouldn't be called on the 'else' path. + // + // Let's check if this if statement has a check involving one of + // the tracked parameters. + if (const ParmVarDecl *Parameter = findReferencedParmVarDecl( + Condition, + /* ShouldRetrieveFromComparisons = */ true)) { + if (const auto Index = getIndex(*Parameter)) { + ParameterStatus &CurrentStatus = ToAlter.getStatusFor(*Index); + + // We don't want to deep dive into semantics of the check and + // figure out if that check was for null or something else. + // We simply trust the user that they know what they are doing. + // + // For this reason, in the following loop we look for the + // best-looking option. + for (const CFGBlock *Succ : BB->succs()) { + if (!Succ) + continue; + + const ParameterStatus &StatusInSucc = + getState(Succ).getStatusFor(*Index); + + if (StatusInSucc.isErrorStatus()) { + continue; + } + + // Let's use this status instead. + CurrentStatus = StatusInSucc; + + if (StatusInSucc.getKind() == ParameterStatus::DefinitelyCalled) { + // This is the best option to have and we already found it. + break; + } + + // If we found 'Escaped' first, we still might find 'DefinitelyCalled' + // on the other branch. And we prefer the latter. + } + } + } + } + + void handleConventionalCheck(const CFGBlock *BB, const Expr *Condition, + State &ToAlter) const { + // Even when the analysis is technically correct, it is a widespread pattern + // not to call completion handlers in some scenarios. These usually have + // typical conditional names, such as 'error' or 'cancel'. + if (!mentionsAnyOfConventionalNames(Condition)) { + return; + } + + for (const auto &IndexedStatus : llvm::enumerate(ToAlter)) { + const ParmVarDecl *Parameter = getParameter(IndexedStatus.index()); + // Conventions do not apply to explicitly marked parameters. + if (isExplicitlyMarked(Parameter)) { + continue; + } + + ParameterStatus &CurrentStatus = IndexedStatus.value(); + // If we did find that on one of the branches the user uses the callback + // and doesn't on the other path, we believe that they know what they are + // doing and trust them. + // + // There are two possible scenarios for that: + // 1. Current status is 'MaybeCalled' and one of the branches is + // 'DefinitelyCalled' + // 2. Current status is 'NotCalled' and one of the branches is 'Escaped' + if (isLosingCall(ToAlter, BB, IndexedStatus.index()) || + isLosingEscape(ToAlter, BB, IndexedStatus.index())) { + CurrentStatus = ParameterStatus::Escaped; + } + } + } + + bool isLosingCall(const State &StateAfterJoin, const CFGBlock *JoinBlock, + unsigned ParameterIndex) const { + // Let's check if the block represents DefinitelyCalled -> MaybeCalled + // transition. + return isLosingJoin(StateAfterJoin, JoinBlock, ParameterIndex, + ParameterStatus::MaybeCalled, + ParameterStatus::DefinitelyCalled); + } + + bool isLosingEscape(const State &StateAfterJoin, const CFGBlock *JoinBlock, + unsigned ParameterIndex) const { + // Let's check if the block represents Escaped -> NotCalled transition. + return isLosingJoin(StateAfterJoin, JoinBlock, ParameterIndex, + ParameterStatus::NotCalled, ParameterStatus::Escaped); + } + + bool isLosingJoin(const State &StateAfterJoin, const CFGBlock *JoinBlock, + unsigned ParameterIndex, ParameterStatus::Kind AfterJoin, + ParameterStatus::Kind BeforeJoin) const { + assert(!ParameterStatus::isErrorStatus(BeforeJoin) && + ParameterStatus::isErrorStatus(AfterJoin) && + "It's not a losing join if statuses do not represent " + "correct-to-error transition"); + + const ParameterStatus &CurrentStatus = + StateAfterJoin.getStatusFor(ParameterIndex); + + return CurrentStatus.getKind() == AfterJoin && + anySuccessorHasStatus(JoinBlock, ParameterIndex, BeforeJoin); + } + + /// Return true if any of the successors of the given basic block has + /// a specified status for the given parameter. + bool anySuccessorHasStatus(const CFGBlock *Parent, unsigned ParameterIndex, + ParameterStatus::Kind ToFind) const { + return llvm::any_of( + Parent->succs(), [this, ParameterIndex, ToFind](const CFGBlock *Succ) { + return Succ && getState(Succ).getKindFor(ParameterIndex) == ToFind; + }); + } + + /// Check given expression that was discovered to escape. + void checkEscapee(const Expr *E) { + if (const ParmVarDecl *Parameter = findReferencedParmVarDecl(E)) { + checkEscapee(*Parameter); + } + } + + /// Check given parameter that was discovered to escape. + void checkEscapee(const ParmVarDecl &Parameter) { + if (auto Index = getIndex(Parameter)) { + ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(*Index); + + if (CurrentParamStatus.getKind() == ParameterStatus::NotCalled) { + CurrentParamStatus = ParameterStatus::Escaped; + } + } + } + + /// Mark all parameters in the current state as 'no-return'. + void markNoReturn() { + for (ParameterStatus &PS : CurrentState) { + PS = ParameterStatus::NoReturn; + } + } + + /// Check if the given assignment represents suppression and act on it. + void checkSuppression(const BinaryOperator *Assignment) { + // Suppression has the following form: + // parameter = 0; + // 0 can be of any form (NULL, nil, etc.) + if (auto Index = getIndexOfExpression(Assignment->getLHS())) { + + // We don't care what is written in the RHS, it could be whatever + // we can interpret as 0. + if (auto Constant = + Assignment->getRHS()->IgnoreParenCasts()->getIntegerConstantExpr( + AC.getASTContext())) { + + ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(*Index); + + if (0 == *Constant && CurrentParamStatus.seenAnyCalls()) { + // Even though this suppression mechanism is introduced to tackle + // false positives for multiple calls, the fact that the user has + // to use suppression can also tell us that we couldn't figure out + // how different paths cancel each other out. And if that is true, + // we will most certainly have false positives about parameters not + // being called on certain paths. + // + // For this reason, we abandon tracking this parameter altogether. + CurrentParamStatus = ParameterStatus::Reported; + } + } + } + } + +public: + //===----------------------------------------------------------------------===// + // Tree traversal methods + //===----------------------------------------------------------------------===// + + void VisitCallExpr(const CallExpr *Call) { + // This call might be a direct call, i.e. a parameter call... + checkDirectCall(Call); + // ... or an indirect call, i.e. when parameter is an argument. + checkIndirectCall(Call); + } + + void VisitObjCMessageExpr(const ObjCMessageExpr *Message) { + // The most common situation that we are defending against here is + // copying a tracked parameter. + if (const Expr *Receiver = Message->getInstanceReceiver()) { + checkEscapee(Receiver); + } + // Message expressions unlike calls, could not be direct. + checkIndirectCall(Message); + } + + void VisitBlockExpr(const BlockExpr *Block) { + for (const auto &Capture : Block->getBlockDecl()->captures()) { + // If a block captures a tracked parameter, it should be + // considered escaped. + // On one hand, blocks that do that should definitely call it on + // every path. However, it is not guaranteed that the block + // itself gets called whenever it gets created. + // + // Because we don't want to track blocks and whether they get called, + // we consider such parameters simply escaped. + if (const auto *Param = dyn_cast<ParmVarDecl>(Capture.getVariable())) { + checkEscapee(*Param); + } + } + } + + void VisitBinaryOperator(const BinaryOperator *Op) { + if (Op->getOpcode() == clang::BO_Assign) { + // Let's check if one of the tracked parameters is assigned into + // something, and if it is we don't want to track extra variables, so we + // consider it as an escapee. + checkEscapee(Op->getRHS()); + + // Let's check whether this assignment is a suppression. + checkSuppression(Op); + } + } + + void VisitDeclStmt(const DeclStmt *DS) { + // Variable initialization is not assignment and should be handled + // separately. + // + // Multiple declarations can be a part of declaration statement. + for (const auto *Declaration : DS->getDeclGroup()) { + if (const auto *Var = dyn_cast<VarDecl>(Declaration)) { + if (Var->getInit()) { + checkEscapee(Var->getInit()); + } + } + } + } + + void VisitCStyleCastExpr(const CStyleCastExpr *Cast) { + // We consider '(void)parameter' as a manual no-op escape. + // It should be used to explicitly tell the analysis that this parameter + // is intentionally not called on this path. + if (Cast->getType().getCanonicalType()->isVoidType()) { + checkEscapee(Cast->getSubExpr()); + } + } + + void VisitObjCAtThrowStmt(const ObjCAtThrowStmt *) { + // It is OK not to call marked parameters on exceptional paths. + markNoReturn(); + } + +private: + unsigned size() const { return TrackedParams.size(); } + + llvm::Optional<unsigned> getIndexOfCallee(const CallExpr *Call) const { + return getIndexOfExpression(Call->getCallee()); + } + + llvm::Optional<unsigned> getIndexOfExpression(const Expr *E) const { + if (const ParmVarDecl *Parameter = findReferencedParmVarDecl(E)) { + return getIndex(*Parameter); + } + + return llvm::None; + } + + llvm::Optional<unsigned> getIndex(const ParmVarDecl &Parameter) const { + // Expected number of parameters that we actually track is 1. + // + // Also, the maximum number of declared parameters could not be on a scale + // of hundreds of thousands. + // + // In this setting, linear search seems reasonable and even performs better + // than bisection. + ParamSizedVector<const ParmVarDecl *>::const_iterator It = + llvm::find(TrackedParams, &Parameter); + + if (It != TrackedParams.end()) { + return It - TrackedParams.begin(); + } + + return llvm::None; + } + + const ParmVarDecl *getParameter(unsigned Index) const { + assert(Index < TrackedParams.size()); + return TrackedParams[Index]; + } + + const CFG &FunctionCFG; + AnalysisDeclContext &AC; + CalledOnceCheckHandler &Handler; + bool CheckConventionalParameters; + // As of now, we turn this behavior off. So, we still are going to report + // missing calls on paths that look like it was intentional. + // Technically such reports are true positives, but they can make some users + // grumpy because of the sheer number of warnings. + // It can be turned back on if we decide that we want to have the other way + // around. + bool SuppressOnConventionalErrorPaths = false; + + State CurrentState; + ParamSizedVector<const ParmVarDecl *> TrackedParams; + CFGSizedVector<State> States; +}; + +} // end anonymous namespace + +namespace clang { +void checkCalledOnceParameters(AnalysisDeclContext &AC, + CalledOnceCheckHandler &Handler, + bool CheckConventionalParameters) { + CalledOnceChecker::check(AC, Handler, CheckConventionalParameters); +} +} // end namespace clang |