diff options
Diffstat (limited to 'contrib/llvm-project/clang/lib/Analysis')
44 files changed, 13371 insertions, 1558 deletions
diff --git a/contrib/llvm-project/clang/lib/Analysis/AnalysisDeclContext.cpp b/contrib/llvm-project/clang/lib/Analysis/AnalysisDeclContext.cpp index 783de6442645..d3a1a993711f 100644 --- a/contrib/llvm-project/clang/lib/Analysis/AnalysisDeclContext.cpp +++ b/contrib/llvm-project/clang/lib/Analysis/AnalysisDeclContext.cpp @@ -142,7 +142,7 @@ bool AnalysisDeclContext::isBodyAutosynthesizedFromModelFile() const { /// Returns true if \param VD is an Objective-C implicit 'self' parameter. static bool isSelfDecl(const VarDecl *VD) { - return isa<ImplicitParamDecl>(VD) && VD->getName() == "self"; + return isa_and_nonnull<ImplicitParamDecl>(VD) && VD->getName() == "self"; } const ImplicitParamDecl *AnalysisDeclContext::getSelfDecl() const { @@ -169,8 +169,8 @@ const ImplicitParamDecl *AnalysisDeclContext::getSelfDecl() const { if (!LC.capturesVariable()) continue; - VarDecl *VD = LC.getCapturedVar(); - if (isSelfDecl(VD)) + ValueDecl *VD = LC.getCapturedVar(); + if (isSelfDecl(dyn_cast<VarDecl>(VD))) return dyn_cast<ImplicitParamDecl>(VD); } @@ -231,8 +231,7 @@ CFG *AnalysisDeclContext::getCFG() { CFG *AnalysisDeclContext::getUnoptimizedCFG() { if (!builtCompleteCFG) { - SaveAndRestore<bool> NotPrune(cfgBuildOptions.PruneTriviallyFalseEdges, - false); + SaveAndRestore NotPrune(cfgBuildOptions.PruneTriviallyFalseEdges, false); completeCFG = CFG::buildCFG(D, getBody(), &D->getASTContext(), cfgBuildOptions); // Even when the cfg is not successfully built, we don't @@ -337,6 +336,59 @@ bool AnalysisDeclContext::isInStdNamespace(const Decl *D) { return ND->isStdNamespace(); } +std::string AnalysisDeclContext::getFunctionName(const Decl *D) { + std::string Str; + llvm::raw_string_ostream OS(Str); + const ASTContext &Ctx = D->getASTContext(); + + if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) { + OS << FD->getQualifiedNameAsString(); + + // In C++, there are overloads. + + if (Ctx.getLangOpts().CPlusPlus) { + OS << '('; + for (const auto &P : FD->parameters()) { + if (P != *FD->param_begin()) + OS << ", "; + OS << P->getType(); + } + OS << ')'; + } + + } else if (isa<BlockDecl>(D)) { + PresumedLoc Loc = Ctx.getSourceManager().getPresumedLoc(D->getLocation()); + + if (Loc.isValid()) { + OS << "block (line: " << Loc.getLine() << ", col: " << Loc.getColumn() + << ')'; + } + + } else if (const ObjCMethodDecl *OMD = dyn_cast<ObjCMethodDecl>(D)) { + + // FIXME: copy-pasted from CGDebugInfo.cpp. + OS << (OMD->isInstanceMethod() ? '-' : '+') << '['; + const DeclContext *DC = OMD->getDeclContext(); + if (const auto *OID = dyn_cast<ObjCImplementationDecl>(DC)) { + OS << OID->getName(); + } else if (const auto *OID = dyn_cast<ObjCInterfaceDecl>(DC)) { + OS << OID->getName(); + } else if (const auto *OC = dyn_cast<ObjCCategoryDecl>(DC)) { + if (OC->IsClassExtension()) { + OS << OC->getClassInterface()->getName(); + } else { + OS << OC->getIdentifier()->getNameStart() << '(' + << OC->getIdentifier()->getNameStart() << ')'; + } + } else if (const auto *OCD = dyn_cast<ObjCCategoryImplDecl>(DC)) { + OS << OCD->getClassInterface()->getName() << '(' << OCD->getName() << ')'; + } + OS << ' ' << OMD->getSelector().getAsString() << ']'; + } + + return Str; +} + LocationContextManager &AnalysisDeclContext::getLocationContextManager() { assert( ADCMgr && @@ -456,7 +508,7 @@ void LocationContext::dumpStack(raw_ostream &Out) const { Out << "\t#" << Frame << ' '; ++Frame; if (const auto *D = dyn_cast<NamedDecl>(LCtx->getDecl())) - Out << "Calling " << D->getQualifiedNameAsString(); + Out << "Calling " << AnalysisDeclContext::getFunctionName(D); else Out << "Calling anonymous code"; if (const Stmt *S = cast<StackFrameContext>(LCtx)->getCallSite()) { diff --git a/contrib/llvm-project/clang/lib/Analysis/BodyFarm.cpp b/contrib/llvm-project/clang/lib/Analysis/BodyFarm.cpp index 603da6715625..127e843d4ead 100644 --- a/contrib/llvm-project/clang/lib/Analysis/BodyFarm.cpp +++ b/contrib/llvm-project/clang/lib/Analysis/BodyFarm.cpp @@ -20,9 +20,11 @@ #include "clang/AST/ExprObjC.h" #include "clang/AST/NestedNameSpecifier.h" #include "clang/Analysis/CodeInjector.h" +#include "clang/Basic/Builtins.h" #include "clang/Basic/OperatorKinds.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/Debug.h" +#include <optional> #define DEBUG_TYPE "body-farm" @@ -86,6 +88,9 @@ public: ImplicitCastExpr *makeImplicitCast(const Expr *Arg, QualType Ty, CastKind CK = CK_LValueToRValue); + /// Create a cast to reference type. + CastExpr *makeReferenceCast(const Expr *Arg, QualType Ty); + /// Create an Objective-C bool literal. ObjCBoolLiteralExpr *makeObjCBool(bool Val); @@ -116,7 +121,7 @@ BinaryOperator *ASTMaker::makeAssignment(const Expr *LHS, const Expr *RHS, QualType Ty) { return BinaryOperator::Create( C, const_cast<Expr *>(LHS), const_cast<Expr *>(RHS), BO_Assign, Ty, - VK_RValue, OK_Ordinary, SourceLocation(), FPOptionsOverride()); + VK_PRValue, OK_Ordinary, SourceLocation(), FPOptionsOverride()); } BinaryOperator *ASTMaker::makeComparison(const Expr *LHS, const Expr *RHS, @@ -125,12 +130,13 @@ BinaryOperator *ASTMaker::makeComparison(const Expr *LHS, const Expr *RHS, BinaryOperator::isComparisonOp(Op)); return BinaryOperator::Create( C, const_cast<Expr *>(LHS), const_cast<Expr *>(RHS), Op, - C.getLogicalOperationType(), VK_RValue, OK_Ordinary, SourceLocation(), + C.getLogicalOperationType(), VK_PRValue, OK_Ordinary, SourceLocation(), FPOptionsOverride()); } CompoundStmt *ASTMaker::makeCompound(ArrayRef<Stmt *> Stmts) { - return CompoundStmt::Create(C, Stmts, SourceLocation(), SourceLocation()); + return CompoundStmt::Create(C, Stmts, FPOptionsOverride(), SourceLocation(), + SourceLocation()); } DeclRefExpr *ASTMaker::makeDeclRefExpr( @@ -169,10 +175,20 @@ ImplicitCastExpr *ASTMaker::makeImplicitCast(const Expr *Arg, QualType Ty, /* CastKind=*/CK, /* Expr=*/const_cast<Expr *>(Arg), /* CXXCastPath=*/nullptr, - /* ExprValueKind=*/VK_RValue, + /* ExprValueKind=*/VK_PRValue, /* FPFeatures */ FPOptionsOverride()); } +CastExpr *ASTMaker::makeReferenceCast(const Expr *Arg, QualType Ty) { + assert(Ty->isReferenceType()); + return CXXStaticCastExpr::Create( + C, Ty.getNonReferenceType(), + Ty->isLValueReferenceType() ? VK_LValue : VK_XValue, CK_NoOp, + const_cast<Expr *>(Arg), /*CXXCastPath=*/nullptr, + /*Written=*/C.getTrivialTypeSourceInfo(Ty), FPOptionsOverride(), + SourceLocation(), SourceLocation(), SourceRange()); +} + Expr *ASTMaker::makeIntegralCast(const Expr *Arg, QualType Ty) { if (Arg->getType() == Ty) return const_cast<Expr*>(Arg); @@ -264,7 +280,7 @@ static CallExpr *create_call_once_funcptr_call(ASTContext &C, ASTMaker M, llvm_unreachable("Unexpected state"); } - return CallExpr::Create(C, SubExpr, CallArgs, C.VoidTy, VK_RValue, + return CallExpr::Create(C, SubExpr, CallArgs, C.VoidTy, VK_PRValue, SourceLocation(), FPOptionsOverride()); } @@ -291,11 +307,27 @@ static CallExpr *create_call_once_lambda_call(ASTContext &C, ASTMaker M, /*AstContext=*/C, OO_Call, callOperatorDeclRef, /*Args=*/CallArgs, /*QualType=*/C.VoidTy, - /*ExprValueType=*/VK_RValue, + /*ExprValueType=*/VK_PRValue, /*SourceLocation=*/SourceLocation(), /*FPFeatures=*/FPOptionsOverride()); } +/// Create a fake body for 'std::move' or 'std::forward'. This is just: +/// +/// \code +/// return static_cast<return_type>(param); +/// \endcode +static Stmt *create_std_move_forward(ASTContext &C, const FunctionDecl *D) { + LLVM_DEBUG(llvm::dbgs() << "Generating body for std::move / std::forward\n"); + + ASTMaker M(C); + + QualType ReturnType = D->getType()->castAs<FunctionType>()->getReturnType(); + Expr *Param = M.makeDeclRefExpr(D->getParamDecl(0)); + Expr *Cast = M.makeReferenceCast(Param, ReturnType); + return M.makeReturn(Cast); +} + /// Create a fake body for std::call_once. /// Emulates the following function body: /// @@ -451,7 +483,7 @@ static Stmt *create_call_once(ASTContext &C, const FunctionDecl *D) { CK_IntegralToBoolean), /* opc=*/UO_LNot, /* QualType=*/C.IntTy, - /* ExprValueKind=*/VK_RValue, + /* ExprValueKind=*/VK_PRValue, /* ExprObjectKind=*/OK_Ordinary, SourceLocation(), /* CanOverflow*/ false, FPOptionsOverride()); @@ -461,8 +493,7 @@ static Stmt *create_call_once(ASTContext &C, const FunctionDecl *D) { DerefType); auto *Out = - IfStmt::Create(C, SourceLocation(), - /* IsConstexpr=*/false, + IfStmt::Create(C, SourceLocation(), IfStatementKind::Ordinary, /* Init=*/nullptr, /* Var=*/nullptr, /* Cond=*/FlagCheck, @@ -511,15 +542,15 @@ static Stmt *create_dispatch_once(ASTContext &C, const FunctionDecl *D) { CallExpr *CE = CallExpr::Create( /*ASTContext=*/C, /*StmtClass=*/M.makeLvalueToRvalue(/*Expr=*/Block), - /*Args=*/None, + /*Args=*/std::nullopt, /*QualType=*/C.VoidTy, - /*ExprValueType=*/VK_RValue, + /*ExprValueType=*/VK_PRValue, /*SourceLocation=*/SourceLocation(), FPOptionsOverride()); // (2) Create the assignment to the predicate. Expr *DoneValue = UnaryOperator::Create(C, M.makeIntegerLiteral(0, C.LongTy), UO_Not, - C.LongTy, VK_RValue, OK_Ordinary, SourceLocation(), + C.LongTy, VK_PRValue, OK_Ordinary, SourceLocation(), /*CanOverflow*/ false, FPOptionsOverride()); BinaryOperator *B = @@ -547,8 +578,7 @@ static Stmt *create_dispatch_once(ASTContext &C, const FunctionDecl *D) { Expr *GuardCondition = M.makeComparison(LValToRval, DoneValue, BO_NE); // (5) Create the 'if' statement. - auto *If = IfStmt::Create(C, SourceLocation(), - /* IsConstexpr=*/false, + auto *If = IfStmt::Create(C, SourceLocation(), IfStatementKind::Ordinary, /* Init=*/nullptr, /* Var=*/nullptr, /* Cond=*/GuardCondition, @@ -580,7 +610,7 @@ static Stmt *create_dispatch_sync(ASTContext &C, const FunctionDecl *D) { ASTMaker M(C); DeclRefExpr *DR = M.makeDeclRefExpr(PV); ImplicitCastExpr *ICE = M.makeLvalueToRvalue(DR, Ty); - CallExpr *CE = CallExpr::Create(C, ICE, None, C.VoidTy, VK_RValue, + CallExpr *CE = CallExpr::Create(C, ICE, std::nullopt, C.VoidTy, VK_PRValue, SourceLocation(), FPOptionsOverride()); return CE; } @@ -658,8 +688,7 @@ static Stmt *create_OSAtomicCompareAndSwap(ASTContext &C, const FunctionDecl *D) /// Construct the If. auto *If = - IfStmt::Create(C, SourceLocation(), - /* IsConstexpr=*/false, + IfStmt::Create(C, SourceLocation(), IfStatementKind::Ordinary, /* Init=*/nullptr, /* Var=*/nullptr, Comparison, /* LPL=*/SourceLocation(), @@ -669,9 +698,9 @@ static Stmt *create_OSAtomicCompareAndSwap(ASTContext &C, const FunctionDecl *D) } Stmt *BodyFarm::getBody(const FunctionDecl *D) { - Optional<Stmt *> &Val = Bodies[D]; - if (Val.hasValue()) - return Val.getValue(); + std::optional<Stmt *> &Val = Bodies[D]; + if (Val) + return *Val; Val = nullptr; @@ -684,8 +713,21 @@ Stmt *BodyFarm::getBody(const FunctionDecl *D) { FunctionFarmer FF; - if (Name.startswith("OSAtomicCompareAndSwap") || - Name.startswith("objc_atomicCompareAndSwap")) { + if (unsigned BuiltinID = D->getBuiltinID()) { + switch (BuiltinID) { + case Builtin::BIas_const: + case Builtin::BIforward: + case Builtin::BIforward_like: + case Builtin::BImove: + case Builtin::BImove_if_noexcept: + FF = create_std_move_forward; + break; + default: + FF = nullptr; + break; + } + } else if (Name.starts_with("OSAtomicCompareAndSwap") || + Name.starts_with("objc_atomicCompareAndSwap")) { FF = create_OSAtomicCompareAndSwap; } else if (Name == "call_once" && D->getDeclContext()->isStdNamespace()) { FF = create_call_once; @@ -698,7 +740,7 @@ Stmt *BodyFarm::getBody(const FunctionDecl *D) { if (FF) { Val = FF(C, D); } else if (Injector) { Val = Injector->getBody(D); } - return Val.getValue(); + return *Val; } static const ObjCIvarDecl *findBackingIvar(const ObjCPropertyDecl *Prop) { @@ -742,8 +784,9 @@ static const ObjCIvarDecl *findBackingIvar(const ObjCPropertyDecl *Prop) { static Stmt *createObjCPropertyGetter(ASTContext &Ctx, const ObjCMethodDecl *MD) { - // First, find the backing ivar. + // First, find the backing ivar. const ObjCIvarDecl *IVar = nullptr; + const ObjCPropertyDecl *Prop = nullptr; // Property accessor stubs sometimes do not correspond to any property decl // in the current interface (but in a superclass). They still have a @@ -751,54 +794,56 @@ static Stmt *createObjCPropertyGetter(ASTContext &Ctx, if (MD->isSynthesizedAccessorStub()) { const ObjCInterfaceDecl *IntD = MD->getClassInterface(); const ObjCImplementationDecl *ImpD = IntD->getImplementation(); - for (const auto *PI: ImpD->property_impls()) { - if (const ObjCPropertyDecl *P = PI->getPropertyDecl()) { - if (P->getGetterName() == MD->getSelector()) - IVar = P->getPropertyIvarDecl(); + for (const auto *PI : ImpD->property_impls()) { + if (const ObjCPropertyDecl *Candidate = PI->getPropertyDecl()) { + if (Candidate->getGetterName() == MD->getSelector()) { + Prop = Candidate; + IVar = Prop->getPropertyIvarDecl(); + } } } } if (!IVar) { - const ObjCPropertyDecl *Prop = MD->findPropertyDecl(); - IVar = findBackingIvar(Prop); - if (!IVar) - return nullptr; + Prop = MD->findPropertyDecl(); + IVar = Prop ? findBackingIvar(Prop) : nullptr; + } - // Ignore weak variables, which have special behavior. - if (Prop->getPropertyAttributes() & ObjCPropertyAttribute::kind_weak) - return nullptr; + if (!IVar || !Prop) + return nullptr; - // Look to see if Sema has synthesized a body for us. This happens in - // Objective-C++ because the return value may be a C++ class type with a - // non-trivial copy constructor. We can only do this if we can find the - // @synthesize for this property, though (or if we know it's been auto- - // synthesized). - const ObjCImplementationDecl *ImplDecl = + // Ignore weak variables, which have special behavior. + if (Prop->getPropertyAttributes() & ObjCPropertyAttribute::kind_weak) + return nullptr; + + // Look to see if Sema has synthesized a body for us. This happens in + // Objective-C++ because the return value may be a C++ class type with a + // non-trivial copy constructor. We can only do this if we can find the + // @synthesize for this property, though (or if we know it's been auto- + // synthesized). + const ObjCImplementationDecl *ImplDecl = IVar->getContainingInterface()->getImplementation(); - if (ImplDecl) { - for (const auto *I : ImplDecl->property_impls()) { - if (I->getPropertyDecl() != Prop) - continue; - - if (I->getGetterCXXConstructor()) { - ASTMaker M(Ctx); - return M.makeReturn(I->getGetterCXXConstructor()); - } + if (ImplDecl) { + for (const auto *I : ImplDecl->property_impls()) { + if (I->getPropertyDecl() != Prop) + continue; + + if (I->getGetterCXXConstructor()) { + ASTMaker M(Ctx); + return M.makeReturn(I->getGetterCXXConstructor()); } } - - // Sanity check that the property is the same type as the ivar, or a - // reference to it, and that it is either an object pointer or trivially - // copyable. - if (!Ctx.hasSameUnqualifiedType(IVar->getType(), - Prop->getType().getNonReferenceType())) - return nullptr; - if (!IVar->getType()->isObjCLifetimeType() && - !IVar->getType().isTriviallyCopyableType(Ctx)) - return nullptr; } + // We expect that the property is the same type as the ivar, or a reference to + // it, and that it is either an object pointer or trivially copyable. + if (!Ctx.hasSameUnqualifiedType(IVar->getType(), + Prop->getType().getNonReferenceType())) + return nullptr; + if (!IVar->getType()->isObjCLifetimeType() && + !IVar->getType().isTriviallyCopyableType(Ctx)) + return nullptr; + // Generate our body: // return self->_ivar; ASTMaker M(Ctx); @@ -807,11 +852,8 @@ static Stmt *createObjCPropertyGetter(ASTContext &Ctx, if (!selfVar) return nullptr; - Expr *loadedIVar = - M.makeObjCIvarRef( - M.makeLvalueToRvalue( - M.makeDeclRefExpr(selfVar), - selfVar->getType()), + Expr *loadedIVar = M.makeObjCIvarRef( + M.makeLvalueToRvalue(M.makeDeclRefExpr(selfVar), selfVar->getType()), IVar); if (!MD->getReturnType()->isReferenceType()) @@ -832,9 +874,9 @@ Stmt *BodyFarm::getBody(const ObjCMethodDecl *D) { if (!D->isImplicit()) return nullptr; - Optional<Stmt *> &Val = Bodies[D]; - if (Val.hasValue()) - return Val.getValue(); + std::optional<Stmt *> &Val = Bodies[D]; + if (Val) + return *Val; Val = nullptr; // For now, we only synthesize getters. @@ -861,5 +903,5 @@ Stmt *BodyFarm::getBody(const ObjCMethodDecl *D) { Val = createObjCPropertyGetter(C, D); - return Val.getValue(); + return *Val; } diff --git a/contrib/llvm-project/clang/lib/Analysis/CFG.cpp b/contrib/llvm-project/clang/lib/Analysis/CFG.cpp index edc86c41c3b9..64e6155de090 100644 --- a/contrib/llvm-project/clang/lib/Analysis/CFG.cpp +++ b/contrib/llvm-project/clang/lib/Analysis/CFG.cpp @@ -40,7 +40,6 @@ #include "llvm/ADT/APSInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" @@ -56,6 +55,7 @@ #include "llvm/Support/raw_ostream.h" #include <cassert> #include <memory> +#include <optional> #include <string> #include <tuple> #include <utility> @@ -72,6 +72,10 @@ static SourceLocation GetEndLoc(Decl *D) { /// Returns true on constant values based around a single IntegerLiteral. /// Allow for use of parentheses, integer casts, and negative signs. +/// FIXME: it would be good to unify this function with +/// getIntegerLiteralSubexpressionValue at some point given the similarity +/// between the functions. + static bool IsIntegerLiteralConstantExpr(const Expr *E) { // Allow parentheses E = E->IgnoreParens(); @@ -296,6 +300,7 @@ public: int distance(const_iterator L); const_iterator shared_parent(const_iterator L); bool pointsToFirstDeclaredVar() { return VarIter == 1; } + bool inSameLocalScope(const_iterator rhs) { return Scope == rhs.Scope; } }; private: @@ -345,18 +350,33 @@ int LocalScope::const_iterator::distance(LocalScope::const_iterator L) { /// between this and shared_parent(L) end. LocalScope::const_iterator LocalScope::const_iterator::shared_parent(LocalScope::const_iterator L) { - llvm::SmallPtrSet<const LocalScope *, 4> ScopesOfL; + // one of iterators is not valid (we are not in scope), so common + // parent is const_iterator() (i.e. sentinel). + if ((*this == const_iterator()) || (L == const_iterator())) { + return const_iterator(); + } + + const_iterator F = *this; + if (F.inSameLocalScope(L)) { + // Iterators are in the same scope, get common subset of variables. + F.VarIter = std::min(F.VarIter, L.VarIter); + return F; + } + + llvm::SmallDenseMap<const LocalScope *, unsigned, 4> ScopesOfL; while (true) { - ScopesOfL.insert(L.Scope); + ScopesOfL.try_emplace(L.Scope, L.VarIter); if (L == const_iterator()) break; L = L.Scope->Prev; } - const_iterator F = *this; while (true) { - if (ScopesOfL.count(F.Scope)) + if (auto LIt = ScopesOfL.find(F.Scope); LIt != ScopesOfL.end()) { + // Get common subset of variables in given scope + F.VarIter = std::min(F.VarIter, LIt->getSecond()); return F; + } assert(F != const_iterator() && "L iterator is not reachable from F iterator."); F = F.Scope->Prev; @@ -432,8 +452,8 @@ reverse_children::reverse_children(Stmt *S) { // Note: Fill in this switch with more cases we want to optimize. case Stmt::InitListExprClass: { InitListExpr *IE = cast<InitListExpr>(S); - children = llvm::makeArrayRef(reinterpret_cast<Stmt**>(IE->getInits()), - IE->getNumInits()); + children = llvm::ArrayRef(reinterpret_cast<Stmt **>(IE->getInits()), + IE->getNumInits()); return; } default: @@ -441,8 +461,7 @@ reverse_children::reverse_children(Stmt *S) { } // Default case for all other statements. - for (Stmt *SubStmt : S->children()) - childrenBuf.push_back(SubStmt); + llvm::append_range(childrenBuf, S->children()); // This needs to be done *after* childrenBuf has been populated. children = childrenBuf; @@ -482,8 +501,10 @@ class CFGBuilder { CFGBlock *SwitchTerminatedBlock = nullptr; CFGBlock *DefaultCaseBlock = nullptr; - // This can point either to a try or a __try block. The frontend forbids - // mixing both kinds in one function, so having one for both is enough. + // This can point to either a C++ try, an Objective-C @try, or an SEH __try. + // try and @try can be mixed and generally work the same. + // The frontend forbids mixing SEH __try with either try or @try. + // So having one for all three is enough. CFGBlock *TryTerminatedBlock = nullptr; // Current position in local scope. @@ -508,9 +529,6 @@ class CFGBuilder { llvm::DenseMap<Expr *, const ConstructionContextLayer *> ConstructionContextMap; - using DeclsWithEndedScopeSetTy = llvm::SmallSetVector<VarDecl *, 16>; - DeclsWithEndedScopeSetTy DeclsWithEndedScope; - bool badCFG = false; const CFG::BuildOptions &BuildOpts; @@ -529,9 +547,7 @@ class CFGBuilder { public: explicit CFGBuilder(ASTContext *astContext, const CFG::BuildOptions &buildOpts) - : Context(astContext), cfg(new CFG()), // crew a new CFG - ConstructionContextMap(), BuildOpts(buildOpts) {} - + : Context(astContext), cfg(new CFG()), BuildOpts(buildOpts) {} // buildCFG - Used by external clients to construct the CFG. std::unique_ptr<CFG> buildCFG(const Decl *D, Stmt *Statement); @@ -542,6 +558,7 @@ private: // Visitors to walk an AST and construct the CFG. CFGBlock *VisitInitListExpr(InitListExpr *ILE, AddStmtChoice asc); CFGBlock *VisitAddrLabelExpr(AddrLabelExpr *A, AddStmtChoice asc); + CFGBlock *VisitAttributedStmt(AttributedStmt *A, AddStmtChoice asc); CFGBlock *VisitBinaryOperator(BinaryOperator *B, AddStmtChoice asc); CFGBlock *VisitBreakStmt(BreakStmt *B); CFGBlock *VisitCallExpr(CallExpr *C, AddStmtChoice asc); @@ -564,6 +581,7 @@ private: AddStmtChoice asc); CFGBlock *VisitCXXThrowExpr(CXXThrowExpr *T); CFGBlock *VisitCXXTryStmt(CXXTryStmt *S); + CFGBlock *VisitCXXTypeidExpr(CXXTypeidExpr *S, AddStmtChoice asc); CFGBlock *VisitDeclStmt(DeclStmt *DS); CFGBlock *VisitDeclSubExpr(DeclStmt *DS); CFGBlock *VisitDefaultStmt(DefaultStmt *D); @@ -597,6 +615,8 @@ private: CFGBlock *VisitObjCMessageExpr(ObjCMessageExpr *E, AddStmtChoice asc); CFGBlock *VisitPseudoObjectExpr(PseudoObjectExpr *E); CFGBlock *VisitReturnStmt(Stmt *S); + CFGBlock *VisitCoroutineSuspendExpr(CoroutineSuspendExpr *S, + AddStmtChoice asc); CFGBlock *VisitSEHExceptStmt(SEHExceptStmt *S); CFGBlock *VisitSEHFinallyStmt(SEHFinallyStmt *S); CFGBlock *VisitSEHLeaveStmt(SEHLeaveStmt *S); @@ -607,6 +627,7 @@ private: AddStmtChoice asc); CFGBlock *VisitUnaryOperator(UnaryOperator *U, AddStmtChoice asc); CFGBlock *VisitWhileStmt(WhileStmt *W); + CFGBlock *VisitArrayInitLoopExpr(ArrayInitLoopExpr *A, AddStmtChoice asc); CFGBlock *Visit(Stmt *S, AddStmtChoice asc = AddStmtChoice::NotAlwaysAdd, bool ExternallyDestructed = false); @@ -719,9 +740,9 @@ private: // hence strict duck-typing. template <typename CallLikeExpr, typename = std::enable_if_t< - std::is_base_of<CallExpr, CallLikeExpr>::value || - std::is_base_of<CXXConstructExpr, CallLikeExpr>::value || - std::is_base_of<ObjCMessageExpr, CallLikeExpr>::value>> + std::is_base_of_v<CallExpr, CallLikeExpr> || + std::is_base_of_v<CXXConstructExpr, CallLikeExpr> || + std::is_base_of_v<ObjCMessageExpr, CallLikeExpr>>> void findConstructionContextsForArguments(CallLikeExpr *E) { for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { Expr *Arg = E->getArg(i); @@ -748,18 +769,20 @@ private: CFGBlock *addInitializer(CXXCtorInitializer *I); void addLoopExit(const Stmt *LoopStmt); - void addAutomaticObjDtors(LocalScope::const_iterator B, - LocalScope::const_iterator E, Stmt *S); - void addLifetimeEnds(LocalScope::const_iterator B, - LocalScope::const_iterator E, Stmt *S); void addAutomaticObjHandling(LocalScope::const_iterator B, LocalScope::const_iterator E, Stmt *S); + void addAutomaticObjDestruction(LocalScope::const_iterator B, + LocalScope::const_iterator E, Stmt *S); + void addScopeExitHandling(LocalScope::const_iterator B, + LocalScope::const_iterator E, Stmt *S); void addImplicitDtorsForDestructor(const CXXDestructorDecl *DD); - void addScopesEnd(LocalScope::const_iterator B, LocalScope::const_iterator E, - Stmt *S); - - void getDeclsWithEndedScope(LocalScope::const_iterator B, - LocalScope::const_iterator E, Stmt *S); + void addScopeChangesHandling(LocalScope::const_iterator SrcPos, + LocalScope::const_iterator DstPos, + Stmt *S); + CFGBlock *createScopeChangesHandlingBlock(LocalScope::const_iterator SrcPos, + CFGBlock *SrcBlk, + LocalScope::const_iterator DstPost, + CFGBlock *DstBlk); // Local scopes creation. LocalScope* createOrReuseLocalScope(LocalScope* Scope); @@ -858,6 +881,10 @@ private: B->appendAutomaticObjDtor(VD, S, cfg->getBumpVectorContext()); } + void appendCleanupFunction(CFGBlock *B, VarDecl *VD) { + B->appendCleanupFunction(VD, cfg->getBumpVectorContext()); + } + void appendLifetimeEnds(CFGBlock *B, VarDecl *VD, Stmt *S) { B->appendLifetimeEnds(VD, S, cfg->getBumpVectorContext()); } @@ -870,18 +897,6 @@ private: B->appendDeleteDtor(RD, DE, cfg->getBumpVectorContext()); } - void prependAutomaticObjDtorsWithTerminator(CFGBlock *Blk, - LocalScope::const_iterator B, LocalScope::const_iterator E); - - void prependAutomaticObjLifetimeWithTerminator(CFGBlock *Blk, - LocalScope::const_iterator B, - LocalScope::const_iterator E); - - const VarDecl * - prependAutomaticObjScopeEndWithTerminator(CFGBlock *Blk, - LocalScope::const_iterator B, - LocalScope::const_iterator E); - void addSuccessor(CFGBlock *B, CFGBlock *S, bool IsReachable = true) { B->addSuccessor(CFGBlock::AdjacentBlock(S, IsReachable), cfg->getBumpVectorContext()); @@ -899,21 +914,11 @@ private: B->appendScopeBegin(VD, S, cfg->getBumpVectorContext()); } - void prependScopeBegin(CFGBlock *B, const VarDecl *VD, const Stmt *S) { - if (BuildOpts.AddScopes) - B->prependScopeBegin(VD, S, cfg->getBumpVectorContext()); - } - void appendScopeEnd(CFGBlock *B, const VarDecl *VD, const Stmt *S) { if (BuildOpts.AddScopes) B->appendScopeEnd(VD, S, cfg->getBumpVectorContext()); } - void prependScopeEnd(CFGBlock *B, const VarDecl *VD, const Stmt *S) { - if (BuildOpts.AddScopes) - B->prependScopeEnd(VD, S, cfg->getBumpVectorContext()); - } - /// Find a relational comparison with an expression evaluating to a /// boolean and a constant other than 0 and 1. /// e.g. if ((x < y) == 10) @@ -960,15 +965,16 @@ private: const Expr *LHSExpr = B->getLHS()->IgnoreParens(); const Expr *RHSExpr = B->getRHS()->IgnoreParens(); - const IntegerLiteral *IntLiteral = dyn_cast<IntegerLiteral>(LHSExpr); + std::optional<llvm::APInt> IntLiteral1 = + getIntegerLiteralSubexpressionValue(LHSExpr); const Expr *BoolExpr = RHSExpr; - if (!IntLiteral) { - IntLiteral = dyn_cast<IntegerLiteral>(RHSExpr); + if (!IntLiteral1) { + IntLiteral1 = getIntegerLiteralSubexpressionValue(RHSExpr); BoolExpr = LHSExpr; } - if (!IntLiteral) + if (!IntLiteral1) return TryResult(); const BinaryOperator *BitOp = dyn_cast<BinaryOperator>(BoolExpr); @@ -977,26 +983,26 @@ private: const Expr *LHSExpr2 = BitOp->getLHS()->IgnoreParens(); const Expr *RHSExpr2 = BitOp->getRHS()->IgnoreParens(); - const IntegerLiteral *IntLiteral2 = dyn_cast<IntegerLiteral>(LHSExpr2); + std::optional<llvm::APInt> IntLiteral2 = + getIntegerLiteralSubexpressionValue(LHSExpr2); if (!IntLiteral2) - IntLiteral2 = dyn_cast<IntegerLiteral>(RHSExpr2); + IntLiteral2 = getIntegerLiteralSubexpressionValue(RHSExpr2); if (!IntLiteral2) return TryResult(); - llvm::APInt L1 = IntLiteral->getValue(); - llvm::APInt L2 = IntLiteral2->getValue(); - if ((BitOp->getOpcode() == BO_And && (L2 & L1) != L1) || - (BitOp->getOpcode() == BO_Or && (L2 | L1) != L1)) { + if ((BitOp->getOpcode() == BO_And && + (*IntLiteral2 & *IntLiteral1) != *IntLiteral1) || + (BitOp->getOpcode() == BO_Or && + (*IntLiteral2 | *IntLiteral1) != *IntLiteral1)) { if (BuildOpts.Observer) BuildOpts.Observer->compareBitwiseEquality(B, B->getOpcode() != BO_EQ); - TryResult(B->getOpcode() != BO_EQ); + return TryResult(B->getOpcode() != BO_EQ); } } else if (BoolExpr->isKnownToHaveBooleanValue()) { - llvm::APInt IntValue = IntLiteral->getValue(); - if ((IntValue == 1) || (IntValue == 0)) { + if ((*IntLiteral1 == 1) || (*IntLiteral1 == 0)) { return TryResult(); } return TryResult(B->getOpcode() != BO_EQ); @@ -1005,6 +1011,47 @@ private: return TryResult(); } + // Helper function to get an APInt from an expression. Supports expressions + // which are an IntegerLiteral or a UnaryOperator and returns the value with + // all operations performed on it. + // FIXME: it would be good to unify this function with + // IsIntegerLiteralConstantExpr at some point given the similarity between the + // functions. + std::optional<llvm::APInt> + getIntegerLiteralSubexpressionValue(const Expr *E) { + + // If unary. + if (const auto *UnOp = dyn_cast<UnaryOperator>(E->IgnoreParens())) { + // Get the sub expression of the unary expression and get the Integer + // Literal. + const Expr *SubExpr = UnOp->getSubExpr()->IgnoreParens(); + + if (const auto *IntLiteral = dyn_cast<IntegerLiteral>(SubExpr)) { + + llvm::APInt Value = IntLiteral->getValue(); + + // Perform the operation manually. + switch (UnOp->getOpcode()) { + case UO_Plus: + return Value; + case UO_Minus: + return -Value; + case UO_Not: + return ~Value; + case UO_LNot: + return llvm::APInt(Context->getTypeSize(Context->IntTy), !Value); + default: + assert(false && "Unexpected unary operator!"); + return std::nullopt; + } + } + } else if (const auto *IntLiteral = + dyn_cast<IntegerLiteral>(E->IgnoreParens())) + return IntLiteral->getValue(); + + return std::nullopt; + } + TryResult analyzeLogicOperatorCondition(BinaryOperatorKind Relation, const llvm::APSInt &Value1, const llvm::APSInt &Value2) { @@ -1027,16 +1074,41 @@ private: } } - /// Find a pair of comparison expressions with or without parentheses + /// There are two checks handled by this function: + /// 1. Find a law-of-excluded-middle or law-of-noncontradiction expression + /// e.g. if (x || !x), if (x && !x) + /// 2. Find a pair of comparison expressions with or without parentheses /// with a shared variable and constants and a logical operator between them /// that always evaluates to either true or false. /// e.g. if (x != 3 || x != 4) TryResult checkIncorrectLogicOperator(const BinaryOperator *B) { assert(B->isLogicalOp()); - const BinaryOperator *LHS = - dyn_cast<BinaryOperator>(B->getLHS()->IgnoreParens()); - const BinaryOperator *RHS = - dyn_cast<BinaryOperator>(B->getRHS()->IgnoreParens()); + const Expr *LHSExpr = B->getLHS()->IgnoreParens(); + const Expr *RHSExpr = B->getRHS()->IgnoreParens(); + + auto CheckLogicalOpWithNegatedVariable = [this, B](const Expr *E1, + const Expr *E2) { + if (const auto *Negate = dyn_cast<UnaryOperator>(E1)) { + if (Negate->getOpcode() == UO_LNot && + Expr::isSameComparisonOperand(Negate->getSubExpr(), E2)) { + bool AlwaysTrue = B->getOpcode() == BO_LOr; + if (BuildOpts.Observer) + BuildOpts.Observer->logicAlwaysTrue(B, AlwaysTrue); + return TryResult(AlwaysTrue); + } + } + return TryResult(); + }; + + TryResult Result = CheckLogicalOpWithNegatedVariable(LHSExpr, RHSExpr); + if (Result.isKnown()) + return Result; + Result = CheckLogicalOpWithNegatedVariable(RHSExpr, LHSExpr); + if (Result.isKnown()) + return Result; + + const auto *LHS = dyn_cast<BinaryOperator>(LHSExpr); + const auto *RHS = dyn_cast<BinaryOperator>(RHSExpr); if (!LHS || !RHS) return {}; @@ -1278,11 +1350,24 @@ private: return {}; } - bool hasTrivialDestructor(VarDecl *VD); + bool hasTrivialDestructor(const VarDecl *VD) const; + bool needsAutomaticDestruction(const VarDecl *VD) const; }; } // namespace +Expr * +clang::extractElementInitializerFromNestedAILE(const ArrayInitLoopExpr *AILE) { + if (!AILE) + return nullptr; + + Expr *AILEInit = AILE->getSubExpr(); + while (const auto *E = dyn_cast<ArrayInitLoopExpr>(AILEInit)) + AILEInit = E->getSubExpr(); + + return AILEInit; +} + inline bool AddStmtChoice::alwaysAdd(CFGBuilder &builder, const Stmt *stmt) const { return builder.alwaysAdd(stmt) || kind == AlwaysAdd; @@ -1456,6 +1541,13 @@ void CFGBuilder::findConstructionContexts( // TODO: Handle other cases. For now, fail to find construction contexts. break; } + case Stmt::ParenExprClass: { + // If expression is placed into parenthesis we should propagate the parent + // construction context to subexpressions. + auto *PE = cast<ParenExpr>(Child); + findConstructionContexts(Layer, PE->getSubExpr()); + break; + } default: break; } @@ -1469,7 +1561,6 @@ void CFGBuilder::cleanupConstructionContext(Expr *E) { ConstructionContextMap.erase(E); } - /// BuildCFG - Constructs a CFG from an AST (a Stmt*). The AST can represent an /// arbitrary statement. Examples include a single expression or a function /// body (compound statement). The ownership of the returned CFG is @@ -1487,9 +1578,6 @@ std::unique_ptr<CFG> CFGBuilder::buildCFG(const Decl *D, Stmt *Statement) { assert(Succ == &cfg->getExit()); Block = nullptr; // the EXIT block is empty. Create all other blocks lazily. - assert(!(BuildOpts.AddImplicitDtors && BuildOpts.AddLifetime) && - "AddImplicitDtors and AddLifetime cannot be used at the same time"); - if (BuildOpts.AddImplicitDtors) if (const CXXDestructorDecl *DD = dyn_cast_or_null<CXXDestructorDecl>(D)) addImplicitDtorsForDestructor(DD); @@ -1553,16 +1641,11 @@ std::unique_ptr<CFG> CFGBuilder::buildCFG(const Decl *D, Stmt *Statement) { if (LI == LabelMap.end()) continue; JumpTarget JT = LI->second; - prependAutomaticObjLifetimeWithTerminator(B, I->scopePosition, - JT.scopePosition); - prependAutomaticObjDtorsWithTerminator(B, I->scopePosition, - JT.scopePosition); - const VarDecl *VD = prependAutomaticObjScopeEndWithTerminator( - B, I->scopePosition, JT.scopePosition); - appendScopeBegin(JT.block, VD, G); - addSuccessor(B, JT.block); - }; - if (auto *G = dyn_cast<GCCAsmStmt>(B->getTerminator())) { + + CFGBlock *SuccBlk = createScopeChangesHandlingBlock( + I->scopePosition, B, JT.scopePosition, JT.block); + addSuccessor(B, SuccBlk); + } else if (auto *G = dyn_cast<GCCAsmStmt>(B->getTerminator())) { CFGBlock *Successor = (I+1)->block; for (auto *L : G->labels()) { LabelMapTy::iterator LI = LabelMap.find(L->getLabel()); @@ -1605,7 +1688,7 @@ std::unique_ptr<CFG> CFGBuilder::buildCFG(const Decl *D, Stmt *Statement) { } /// createBlock - Used to lazily create blocks that are connected -/// to the current (global) succcessor. +/// to the current (global) successor. CFGBlock *CFGBuilder::createBlock(bool add_successor) { CFGBlock *B = cfg->createBlock(); if (add_successor && Succ) @@ -1648,9 +1731,14 @@ CFGBlock *CFGBuilder::addInitializer(CXXCtorInitializer *I) { appendInitializer(Block, I); if (Init) { + // If the initializer is an ArrayInitLoopExpr, we want to extract the + // initializer, that's used for each element. + auto *AILEInit = extractElementInitializerFromNestedAILE( + dyn_cast<ArrayInitLoopExpr>(Init)); + findConstructionContexts( ConstructionContextLayer::create(cfg->getBumpVectorContext(), I), - Init); + AILEInit ? AILEInit : Init); if (HasTemporaries) { // For expression with temporaries go directly to subexpression to omit @@ -1700,10 +1788,7 @@ static QualType getReferenceInitTemporaryType(const Expr *Init, } // Skip sub-object accesses into rvalues. - SmallVector<const Expr *, 2> CommaLHSs; - SmallVector<SubobjectAdjustment, 2> Adjustments; - const Expr *SkippedInit = - Init->skipRValueSubobjectAdjustments(CommaLHSs, Adjustments); + const Expr *SkippedInit = Init->skipRValueSubobjectAdjustments(); if (SkippedInit != Init) { Init = SkippedInit; continue; @@ -1724,153 +1809,198 @@ void CFGBuilder::addLoopExit(const Stmt *LoopStmt){ appendLoopExit(Block, LoopStmt); } -void CFGBuilder::getDeclsWithEndedScope(LocalScope::const_iterator B, - LocalScope::const_iterator E, Stmt *S) { - if (!BuildOpts.AddScopes) +/// Adds the CFG elements for leaving the scope of automatic objects in +/// range [B, E). This include following: +/// * AutomaticObjectDtor for variables with non-trivial destructor +/// * LifetimeEnds for all variables +/// * ScopeEnd for each scope left +void CFGBuilder::addAutomaticObjHandling(LocalScope::const_iterator B, + LocalScope::const_iterator E, + Stmt *S) { + if (!BuildOpts.AddScopes && !BuildOpts.AddImplicitDtors && + !BuildOpts.AddLifetime) return; if (B == E) return; - // To go from B to E, one first goes up the scopes from B to P - // then sideways in one scope from P to P' and then down - // the scopes from P' to E. - // The lifetime of all objects between B and P end. - LocalScope::const_iterator P = B.shared_parent(E); - int Dist = B.distance(P); - if (Dist <= 0) + // Not leaving the scope, only need to handle destruction and lifetime + if (B.inSameLocalScope(E)) { + addAutomaticObjDestruction(B, E, S); return; + } - for (LocalScope::const_iterator I = B; I != P; ++I) - if (I.pointsToFirstDeclaredVar()) - DeclsWithEndedScope.insert(*I); -} + // Extract information about all local scopes that are left + SmallVector<LocalScope::const_iterator, 10> LocalScopeEndMarkers; + LocalScopeEndMarkers.push_back(B); + for (LocalScope::const_iterator I = B; I != E; ++I) { + if (!I.inSameLocalScope(LocalScopeEndMarkers.back())) + LocalScopeEndMarkers.push_back(I); + } + LocalScopeEndMarkers.push_back(E); + + // We need to leave the scope in reverse order, so we reverse the end + // markers + std::reverse(LocalScopeEndMarkers.begin(), LocalScopeEndMarkers.end()); + auto Pairwise = + llvm::zip(LocalScopeEndMarkers, llvm::drop_begin(LocalScopeEndMarkers)); + for (auto [E, B] : Pairwise) { + if (!B.inSameLocalScope(E)) + addScopeExitHandling(B, E, S); + addAutomaticObjDestruction(B, E, S); + } +} + +/// Add CFG elements corresponding to call destructor and end of lifetime +/// of all automatic variables with non-trivial destructor in range [B, E). +/// This include AutomaticObjectDtor and LifetimeEnds elements. +void CFGBuilder::addAutomaticObjDestruction(LocalScope::const_iterator B, + LocalScope::const_iterator E, + Stmt *S) { + if (!BuildOpts.AddImplicitDtors && !BuildOpts.AddLifetime) + return; -void CFGBuilder::addAutomaticObjHandling(LocalScope::const_iterator B, - LocalScope::const_iterator E, - Stmt *S) { - getDeclsWithEndedScope(B, E, S); - if (BuildOpts.AddScopes) - addScopesEnd(B, E, S); - if (BuildOpts.AddImplicitDtors) - addAutomaticObjDtors(B, E, S); - if (BuildOpts.AddLifetime) - addLifetimeEnds(B, E, S); + if (B == E) + return; + + SmallVector<VarDecl *, 10> DeclsNeedDestruction; + DeclsNeedDestruction.reserve(B.distance(E)); + + for (VarDecl* D : llvm::make_range(B, E)) + if (needsAutomaticDestruction(D)) + DeclsNeedDestruction.push_back(D); + + for (VarDecl *VD : llvm::reverse(DeclsNeedDestruction)) { + if (BuildOpts.AddImplicitDtors) { + // If this destructor is marked as a no-return destructor, we need to + // create a new block for the destructor which does not have as a + // successor anything built thus far: control won't flow out of this + // block. + QualType Ty = VD->getType(); + if (Ty->isReferenceType()) + Ty = getReferenceInitTemporaryType(VD->getInit()); + Ty = Context->getBaseElementType(Ty); + + const CXXRecordDecl *CRD = Ty->getAsCXXRecordDecl(); + if (CRD && CRD->isAnyDestructorNoReturn()) + Block = createNoReturnBlock(); + } + + autoCreateBlock(); + + // Add LifetimeEnd after automatic obj with non-trivial destructors, + // as they end their lifetime when the destructor returns. For trivial + // objects, we end lifetime with scope end. + if (BuildOpts.AddLifetime) + appendLifetimeEnds(Block, VD, S); + if (BuildOpts.AddImplicitDtors && !hasTrivialDestructor(VD)) + appendAutomaticObjDtor(Block, VD, S); + if (VD->hasAttr<CleanupAttr>()) + appendCleanupFunction(Block, VD); + } } -/// Add to current block automatic objects that leave the scope. -void CFGBuilder::addLifetimeEnds(LocalScope::const_iterator B, - LocalScope::const_iterator E, Stmt *S) { - if (!BuildOpts.AddLifetime) +/// Add CFG elements corresponding to leaving a scope. +/// Assumes that range [B, E) corresponds to single scope. +/// This add following elements: +/// * LifetimeEnds for all variables with non-trivial destructor +/// * ScopeEnd for each scope left +void CFGBuilder::addScopeExitHandling(LocalScope::const_iterator B, + LocalScope::const_iterator E, Stmt *S) { + assert(!B.inSameLocalScope(E)); + if (!BuildOpts.AddLifetime && !BuildOpts.AddScopes) return; - if (B == E) - return; + if (BuildOpts.AddScopes) { + autoCreateBlock(); + appendScopeEnd(Block, B.getFirstVarInScope(), S); + } - // To go from B to E, one first goes up the scopes from B to P - // then sideways in one scope from P to P' and then down - // the scopes from P' to E. - // The lifetime of all objects between B and P end. - LocalScope::const_iterator P = B.shared_parent(E); - int dist = B.distance(P); - if (dist <= 0) + if (!BuildOpts.AddLifetime) return; // We need to perform the scope leaving in reverse order SmallVector<VarDecl *, 10> DeclsTrivial; - SmallVector<VarDecl *, 10> DeclsNonTrivial; - DeclsTrivial.reserve(dist); - DeclsNonTrivial.reserve(dist); + DeclsTrivial.reserve(B.distance(E)); - for (LocalScope::const_iterator I = B; I != P; ++I) - if (hasTrivialDestructor(*I)) - DeclsTrivial.push_back(*I); - else - DeclsNonTrivial.push_back(*I); + // Objects with trivial destructor ends their lifetime when their storage + // is destroyed, for automatic variables, this happens when the end of the + // scope is added. + for (VarDecl* D : llvm::make_range(B, E)) + if (!needsAutomaticDestruction(D)) + DeclsTrivial.push_back(D); - autoCreateBlock(); - // object with trivial destructor end their lifetime last (when storage - // duration ends) - for (SmallVectorImpl<VarDecl *>::reverse_iterator I = DeclsTrivial.rbegin(), - E = DeclsTrivial.rend(); - I != E; ++I) - appendLifetimeEnds(Block, *I, S); - - for (SmallVectorImpl<VarDecl *>::reverse_iterator - I = DeclsNonTrivial.rbegin(), - E = DeclsNonTrivial.rend(); - I != E; ++I) - appendLifetimeEnds(Block, *I, S); -} - -/// Add to current block markers for ending scopes. -void CFGBuilder::addScopesEnd(LocalScope::const_iterator B, - LocalScope::const_iterator E, Stmt *S) { - // If implicit destructors are enabled, we'll add scope ends in - // addAutomaticObjDtors. - if (BuildOpts.AddImplicitDtors) + if (DeclsTrivial.empty()) return; autoCreateBlock(); - - for (auto I = DeclsWithEndedScope.rbegin(), E = DeclsWithEndedScope.rend(); - I != E; ++I) - appendScopeEnd(Block, *I, S); - - return; + for (VarDecl *VD : llvm::reverse(DeclsTrivial)) + appendLifetimeEnds(Block, VD, S); } -/// addAutomaticObjDtors - Add to current block automatic objects destructors -/// for objects in range of local scope positions. Use S as trigger statement -/// for destructors. -void CFGBuilder::addAutomaticObjDtors(LocalScope::const_iterator B, - LocalScope::const_iterator E, Stmt *S) { - if (!BuildOpts.AddImplicitDtors) +/// addScopeChangesHandling - appends information about destruction, lifetime +/// and cfgScopeEnd for variables in the scope that was left by the jump, and +/// appends cfgScopeBegin for all scopes that where entered. +/// We insert the cfgScopeBegin at the end of the jump node, as depending on +/// the sourceBlock, each goto, may enter different amount of scopes. +void CFGBuilder::addScopeChangesHandling(LocalScope::const_iterator SrcPos, + LocalScope::const_iterator DstPos, + Stmt *S) { + assert(Block && "Source block should be always crated"); + if (!BuildOpts.AddImplicitDtors && !BuildOpts.AddLifetime && + !BuildOpts.AddScopes) { return; + } - if (B == E) + if (SrcPos == DstPos) return; - // We need to append the destructors in reverse order, but any one of them - // may be a no-return destructor which changes the CFG. As a result, buffer - // this sequence up and replay them in reverse order when appending onto the - // CFGBlock(s). - SmallVector<VarDecl*, 10> Decls; - Decls.reserve(B.distance(E)); - for (LocalScope::const_iterator I = B; I != E; ++I) - Decls.push_back(*I); - - for (SmallVectorImpl<VarDecl*>::reverse_iterator I = Decls.rbegin(), - E = Decls.rend(); - I != E; ++I) { - if (hasTrivialDestructor(*I)) { - // If AddScopes is enabled and *I is a first variable in a scope, add a - // ScopeEnd marker in a Block. - if (BuildOpts.AddScopes && DeclsWithEndedScope.count(*I)) { - autoCreateBlock(); - appendScopeEnd(Block, *I, S); - } - continue; - } - // If this destructor is marked as a no-return destructor, we need to - // create a new block for the destructor which does not have as a successor - // anything built thus far: control won't flow out of this block. - QualType Ty = (*I)->getType(); - if (Ty->isReferenceType()) { - Ty = getReferenceInitTemporaryType((*I)->getInit()); - } - Ty = Context->getBaseElementType(Ty); + // Get common scope, the jump leaves all scopes [SrcPos, BasePos), and + // enter all scopes between [DstPos, BasePos) + LocalScope::const_iterator BasePos = SrcPos.shared_parent(DstPos); - if (Ty->getAsCXXRecordDecl()->isAnyDestructorNoReturn()) - Block = createNoReturnBlock(); - else - autoCreateBlock(); - - // Add ScopeEnd just after automatic obj destructor. - if (BuildOpts.AddScopes && DeclsWithEndedScope.count(*I)) - appendScopeEnd(Block, *I, S); - appendAutomaticObjDtor(Block, *I, S); + // Append scope begins for scopes entered by goto + if (BuildOpts.AddScopes && !DstPos.inSameLocalScope(BasePos)) { + for (LocalScope::const_iterator I = DstPos; I != BasePos; ++I) + if (I.pointsToFirstDeclaredVar()) + appendScopeBegin(Block, *I, S); } + + // Append scopeEnds, destructor and lifetime with the terminator for + // block left by goto. + addAutomaticObjHandling(SrcPos, BasePos, S); +} + +/// createScopeChangesHandlingBlock - Creates a block with cfgElements +/// corresponding to changing the scope from the source scope of the GotoStmt, +/// to destination scope. Add destructor, lifetime and cfgScopeEnd +/// CFGElements to newly created CFGBlock, that will have the CFG terminator +/// transferred. +CFGBlock *CFGBuilder::createScopeChangesHandlingBlock( + LocalScope::const_iterator SrcPos, CFGBlock *SrcBlk, + LocalScope::const_iterator DstPos, CFGBlock *DstBlk) { + if (SrcPos == DstPos) + return DstBlk; + + if (!BuildOpts.AddImplicitDtors && !BuildOpts.AddLifetime && + (!BuildOpts.AddScopes || SrcPos.inSameLocalScope(DstPos))) + return DstBlk; + + // We will update CFBBuilder when creating new block, restore the + // previous state at exit. + SaveAndRestore save_Block(Block), save_Succ(Succ); + + // Create a new block, and transfer terminator + Block = createBlock(false); + Block->setTerminator(SrcBlk->getTerminator()); + SrcBlk->setTerminator(CFGTerminator()); + addSuccessor(Block, DstBlk); + + // Fill the created Block with the required elements. + addScopeChangesHandling(SrcPos, DstPos, Block->getTerminatorStmt()); + + assert(Block && "There should be at least one scope changing Block"); + return Block; } /// addImplicitDtorsForDestructor - Add implicit destructors generated for @@ -1886,7 +2016,7 @@ void CFGBuilder::addImplicitDtorsForDestructor(const CXXDestructorDecl *DD) { // (which is different from the current class) is responsible for // destroying them. const CXXRecordDecl *CD = VI.getType()->getAsCXXRecordDecl(); - if (!CD->hasTrivialDestructor()) { + if (CD && !CD->hasTrivialDestructor()) { autoCreateBlock(); appendBaseDtor(Block, &VI); } @@ -1896,7 +2026,7 @@ void CFGBuilder::addImplicitDtorsForDestructor(const CXXDestructorDecl *DD) { for (const auto &BI : RD->bases()) { if (!BI.isVirtual()) { const CXXRecordDecl *CD = BI.getType()->getAsCXXRecordDecl(); - if (!CD->hasTrivialDestructor()) { + if (CD && !CD->hasTrivialDestructor()) { autoCreateBlock(); appendBaseDtor(Block, &BI); } @@ -1907,9 +2037,10 @@ void CFGBuilder::addImplicitDtorsForDestructor(const CXXDestructorDecl *DD) { for (auto *FI : RD->fields()) { // Check for constant size array. Set type to array element type. QualType QT = FI->getType(); - if (const ConstantArrayType *AT = Context->getAsConstantArrayType(QT)) { - if (AT->getSize() == 0) - continue; + // It may be a multidimensional array. + while (const ConstantArrayType *AT = Context->getAsConstantArrayType(QT)) { + if (AT->isZeroSize()) + break; QT = AT->getElementType(); } @@ -1927,8 +2058,7 @@ LocalScope* CFGBuilder::createOrReuseLocalScope(LocalScope* Scope) { if (Scope) return Scope; llvm::BumpPtrAllocator &alloc = cfg->getAllocator(); - return new (alloc.Allocate<LocalScope>()) - LocalScope(BumpVectorContext(alloc), ScopePos); + return new (alloc) LocalScope(BumpVectorContext(alloc), ScopePos); } /// addLocalScopeForStmt - Add LocalScope to local scopes tree for statement @@ -1970,7 +2100,11 @@ LocalScope* CFGBuilder::addLocalScopeForDeclStmt(DeclStmt *DS, return Scope; } -bool CFGBuilder::hasTrivialDestructor(VarDecl *VD) { +bool CFGBuilder::needsAutomaticDestruction(const VarDecl *VD) const { + return !hasTrivialDestructor(VD) || VD->hasAttr<CleanupAttr>(); +} + +bool CFGBuilder::hasTrivialDestructor(const VarDecl *VD) const { // Check for const references bound to temporary. Set type to pointee. QualType QT = VD->getType(); if (QT->isReferenceType()) { @@ -1999,7 +2133,7 @@ bool CFGBuilder::hasTrivialDestructor(VarDecl *VD) { // Check for constant size array. Set type to array element type. while (const ConstantArrayType *AT = Context->getAsConstantArrayType(QT)) { - if (AT->getSize() == 0) + if (AT->isZeroSize()) return true; QT = AT->getElementType(); } @@ -2015,32 +2149,20 @@ bool CFGBuilder::hasTrivialDestructor(VarDecl *VD) { /// const reference. Will reuse Scope if not NULL. LocalScope* CFGBuilder::addLocalScopeForVarDecl(VarDecl *VD, LocalScope* Scope) { - assert(!(BuildOpts.AddImplicitDtors && BuildOpts.AddLifetime) && - "AddImplicitDtors and AddLifetime cannot be used at the same time"); if (!BuildOpts.AddImplicitDtors && !BuildOpts.AddLifetime && !BuildOpts.AddScopes) return Scope; // Check if variable is local. - switch (VD->getStorageClass()) { - case SC_None: - case SC_Auto: - case SC_Register: - break; - default: return Scope; - } + if (!VD->hasLocalStorage()) + return Scope; - if (BuildOpts.AddImplicitDtors) { - if (!hasTrivialDestructor(VD) || BuildOpts.AddScopes) { - // Add the variable to scope - Scope = createOrReuseLocalScope(Scope); - Scope->addVar(VD); - ScopePos = Scope->begin(); - } + if (!BuildOpts.AddLifetime && !BuildOpts.AddScopes && + !needsAutomaticDestruction(VD)) { + assert(BuildOpts.AddImplicitDtors); return Scope; } - assert(BuildOpts.AddLifetime); // Add the variable to scope Scope = createOrReuseLocalScope(Scope); Scope->addVar(VD); @@ -2056,63 +2178,6 @@ void CFGBuilder::addLocalScopeAndDtors(Stmt *S) { addAutomaticObjHandling(ScopePos, scopeBeginPos, S); } -/// prependAutomaticObjDtorsWithTerminator - Prepend destructor CFGElements for -/// variables with automatic storage duration to CFGBlock's elements vector. -/// Elements will be prepended to physical beginning of the vector which -/// happens to be logical end. Use blocks terminator as statement that specifies -/// destructors call site. -/// FIXME: This mechanism for adding automatic destructors doesn't handle -/// no-return destructors properly. -void CFGBuilder::prependAutomaticObjDtorsWithTerminator(CFGBlock *Blk, - LocalScope::const_iterator B, LocalScope::const_iterator E) { - if (!BuildOpts.AddImplicitDtors) - return; - BumpVectorContext &C = cfg->getBumpVectorContext(); - CFGBlock::iterator InsertPos - = Blk->beginAutomaticObjDtorsInsert(Blk->end(), B.distance(E), C); - for (LocalScope::const_iterator I = B; I != E; ++I) - InsertPos = Blk->insertAutomaticObjDtor(InsertPos, *I, - Blk->getTerminatorStmt()); -} - -/// prependAutomaticObjLifetimeWithTerminator - Prepend lifetime CFGElements for -/// variables with automatic storage duration to CFGBlock's elements vector. -/// Elements will be prepended to physical beginning of the vector which -/// happens to be logical end. Use blocks terminator as statement that specifies -/// where lifetime ends. -void CFGBuilder::prependAutomaticObjLifetimeWithTerminator( - CFGBlock *Blk, LocalScope::const_iterator B, LocalScope::const_iterator E) { - if (!BuildOpts.AddLifetime) - return; - BumpVectorContext &C = cfg->getBumpVectorContext(); - CFGBlock::iterator InsertPos = - Blk->beginLifetimeEndsInsert(Blk->end(), B.distance(E), C); - for (LocalScope::const_iterator I = B; I != E; ++I) { - InsertPos = - Blk->insertLifetimeEnds(InsertPos, *I, Blk->getTerminatorStmt()); - } -} - -/// prependAutomaticObjScopeEndWithTerminator - Prepend scope end CFGElements for -/// variables with automatic storage duration to CFGBlock's elements vector. -/// Elements will be prepended to physical beginning of the vector which -/// happens to be logical end. Use blocks terminator as statement that specifies -/// where scope ends. -const VarDecl * -CFGBuilder::prependAutomaticObjScopeEndWithTerminator( - CFGBlock *Blk, LocalScope::const_iterator B, LocalScope::const_iterator E) { - if (!BuildOpts.AddScopes) - return nullptr; - BumpVectorContext &C = cfg->getBumpVectorContext(); - CFGBlock::iterator InsertPos = - Blk->beginScopeEndInsert(Blk->end(), 1, C); - LocalScope::const_iterator PlaceToInsert = B; - for (LocalScope::const_iterator I = B; I != E; ++I) - PlaceToInsert = I; - Blk->insertScopeEnd(InsertPos, *PlaceToInsert, Blk->getTerminatorStmt()); - return *PlaceToInsert; -} - /// Visit - Walk the subtree of a statement and add extra /// blocks for ternary operators, &&, and ||. We also process "," and /// DeclStmts (which may contain nested control-flow). @@ -2142,6 +2207,9 @@ CFGBlock *CFGBuilder::Visit(Stmt * S, AddStmtChoice asc, case Stmt::InitListExprClass: return VisitInitListExpr(cast<InitListExpr>(S), asc); + case Stmt::AttributedStmtClass: + return VisitAttributedStmt(cast<AttributedStmt>(S), asc); + case Stmt::AddrLabelExprClass: return VisitAddrLabelExpr(cast<AddrLabelExpr>(S), asc); @@ -2190,8 +2258,7 @@ CFGBlock *CFGBuilder::Visit(Stmt * S, AddStmtChoice asc, // FIXME: The expression inside a CXXDefaultArgExpr is owned by the // called function's declaration, not by the caller. If we simply add // this expression to the CFG, we could end up with the same Expr - // appearing multiple times. - // PR13385 / <rdar://problem/12156507> + // appearing multiple times (PR13385). // // It's likewise possible for multiple CXXDefaultInitExprs for the same // expression to be used in the same function (through aggregate @@ -2222,6 +2289,9 @@ CFGBlock *CFGBuilder::Visit(Stmt * S, AddStmtChoice asc, case Stmt::CXXTryStmtClass: return VisitCXXTryStmt(cast<CXXTryStmt>(S)); + case Stmt::CXXTypeidExprClass: + return VisitCXXTypeidExpr(cast<CXXTypeidExpr>(S), asc); + case Stmt::CXXForRangeStmtClass: return VisitCXXForRangeStmt(cast<CXXForRangeStmt>(S)); @@ -2275,7 +2345,7 @@ CFGBlock *CFGBuilder::Visit(Stmt * S, AddStmtChoice asc, return VisitObjCAtCatchStmt(cast<ObjCAtCatchStmt>(S)); case Stmt::ObjCAutoreleasePoolStmtClass: - return VisitObjCAutoreleasePoolStmt(cast<ObjCAutoreleasePoolStmt>(S)); + return VisitObjCAutoreleasePoolStmt(cast<ObjCAutoreleasePoolStmt>(S)); case Stmt::ObjCAtSynchronizedStmtClass: return VisitObjCAtSynchronizedStmt(cast<ObjCAtSynchronizedStmt>(S)); @@ -2302,6 +2372,10 @@ CFGBlock *CFGBuilder::Visit(Stmt * S, AddStmtChoice asc, case Stmt::CoreturnStmtClass: return VisitReturnStmt(S); + case Stmt::CoyieldExprClass: + case Stmt::CoawaitExprClass: + return VisitCoroutineSuspendExpr(cast<CoroutineSuspendExpr>(S), asc); + case Stmt::SEHExceptStmtClass: return VisitSEHExceptStmt(cast<SEHExceptStmt>(S)); @@ -2329,6 +2403,9 @@ CFGBlock *CFGBuilder::Visit(Stmt * S, AddStmtChoice asc, case Stmt::WhileStmtClass: return VisitWhileStmt(cast<WhileStmt>(S)); + + case Stmt::ArrayInitLoopExprClass: + return VisitArrayInitLoopExpr(cast<ArrayInitLoopExpr>(S), asc); } } @@ -2391,8 +2468,32 @@ CFGBlock *CFGBuilder::VisitAddrLabelExpr(AddrLabelExpr *A, return Block; } -CFGBlock *CFGBuilder::VisitUnaryOperator(UnaryOperator *U, - AddStmtChoice asc) { +static bool isFallthroughStatement(const AttributedStmt *A) { + bool isFallthrough = hasSpecificAttr<FallThroughAttr>(A->getAttrs()); + assert((!isFallthrough || isa<NullStmt>(A->getSubStmt())) && + "expected fallthrough not to have children"); + return isFallthrough; +} + +CFGBlock *CFGBuilder::VisitAttributedStmt(AttributedStmt *A, + AddStmtChoice asc) { + // AttributedStmts for [[likely]] can have arbitrary statements as children, + // and the current visitation order here would add the AttributedStmts + // for [[likely]] after the child nodes, which is undesirable: For example, + // if the child contains an unconditional return, the [[likely]] would be + // considered unreachable. + // So only add the AttributedStmt for FallThrough, which has CFG effects and + // also no children, and omit the others. None of the other current StmtAttrs + // have semantic meaning for the CFG. + if (isFallthroughStatement(A) && asc.alwaysAdd(*this, A)) { + autoCreateBlock(); + appendStmt(Block, A); + } + + return VisitChildren(A); +} + +CFGBlock *CFGBuilder::VisitUnaryOperator(UnaryOperator *U, AddStmtChoice asc) { if (asc.alwaysAdd(*this, U)) { autoCreateBlock(); appendStmt(Block, U); @@ -2704,7 +2805,8 @@ CFGBlock *CFGBuilder::VisitChooseExpr(ChooseExpr *C, return addStmt(C->getCond()); } -CFGBlock *CFGBuilder::VisitCompoundStmt(CompoundStmt *C, bool ExternallyDestructed) { +CFGBlock *CFGBuilder::VisitCompoundStmt(CompoundStmt *C, + bool ExternallyDestructed) { LocalScope::const_iterator scopeBeginPos = ScopePos; addLocalScopeForStmt(C); @@ -2716,11 +2818,10 @@ CFGBlock *CFGBuilder::VisitCompoundStmt(CompoundStmt *C, bool ExternallyDestruct CFGBlock *LastBlock = Block; - for (CompoundStmt::reverse_body_iterator I=C->body_rbegin(), E=C->body_rend(); - I != E; ++I ) { + for (Stmt *S : llvm::reverse(C->body())) { // If we hit a segment of code just containing ';' (NullStmts), we can // get a null block back. In such cases, just use the LastBlock - CFGBlock *newBlock = Visit(*I, AddStmtChoice::AlwaysAdd, + CFGBlock *newBlock = Visit(S, AddStmtChoice::AlwaysAdd, ExternallyDestructed); if (newBlock) @@ -2895,12 +2996,30 @@ CFGBlock *CFGBuilder::VisitDeclSubExpr(DeclStmt *DS) { } } + // If we bind to a tuple-like type, we iterate over the HoldingVars, and + // create a DeclStmt for each of them. + if (const auto *DD = dyn_cast<DecompositionDecl>(VD)) { + for (auto *BD : llvm::reverse(DD->bindings())) { + if (auto *VD = BD->getHoldingVar()) { + DeclGroupRef DG(VD); + DeclStmt *DSNew = + new (Context) DeclStmt(DG, VD->getLocation(), GetEndLoc(VD)); + cfg->addSyntheticDeclStmt(DSNew, DS); + Block = VisitDeclSubExpr(DSNew); + } + } + } + autoCreateBlock(); appendStmt(Block, DS); + // If the initializer is an ArrayInitLoopExpr, we want to extract the + // initializer, that's used for each element. + const auto *AILE = dyn_cast_or_null<ArrayInitLoopExpr>(Init); + findConstructionContexts( ConstructionContextLayer::create(cfg->getBumpVectorContext(), DS), - Init); + AILE ? AILE->getSubExpr() : Init); // Keep track of the last non-null block, as 'Block' can be nulled out // if the initializer expression is something like a 'while' in a @@ -2959,7 +3078,7 @@ CFGBlock *CFGBuilder::VisitIfStmt(IfStmt *I) { // Save local scope position because in case of condition variable ScopePos // won't be restored when traversing AST. - SaveAndRestore<LocalScope::const_iterator> save_scope_pos(ScopePos); + SaveAndRestore save_scope_pos(ScopePos); // Create local scope for C++17 if init-stmt if one exists. if (Stmt *Init = I->getInit()) @@ -2984,7 +3103,7 @@ CFGBlock *CFGBuilder::VisitIfStmt(IfStmt *I) { CFGBlock *ElseBlock = Succ; if (Stmt *Else = I->getElse()) { - SaveAndRestore<CFGBlock*> sv(Succ); + SaveAndRestore sv(Succ); // NULL out Block so that the recursive call to Visit will // create a new basic block. @@ -3010,7 +3129,7 @@ CFGBlock *CFGBuilder::VisitIfStmt(IfStmt *I) { { Stmt *Then = I->getThen(); assert(Then); - SaveAndRestore<CFGBlock*> sv(Succ); + SaveAndRestore sv(Succ); Block = nullptr; // If branch is not a compound statement create implicit scope @@ -3040,7 +3159,7 @@ CFGBlock *CFGBuilder::VisitIfStmt(IfStmt *I) { // control-flow transfer of '&&' or '||' go directly into the then/else // blocks directly. BinaryOperator *Cond = - I->getConditionVariable() + (I->isConsteval() || I->getConditionVariable()) ? nullptr : dyn_cast<BinaryOperator>(I->getCond()->IgnoreParens()); CFGBlock *LastBlock; @@ -3054,7 +3173,9 @@ CFGBlock *CFGBuilder::VisitIfStmt(IfStmt *I) { Block->setTerminator(I); // See if this is a known constant. - const TryResult &KnownVal = tryEvaluateBool(I->getCond()); + TryResult KnownVal; + if (!I->isConsteval()) + KnownVal = tryEvaluateBool(I->getCond()); // Add the successors. If we know that specific branches are // unreachable, inform addSuccessor() of that knowledge. @@ -3115,9 +3236,41 @@ CFGBlock *CFGBuilder::VisitReturnStmt(Stmt *S) { if (Expr *O = RS->getRetValue()) return Visit(O, AddStmtChoice::AlwaysAdd, /*ExternallyDestructed=*/true); return Block; - } else { // co_return - return VisitChildren(S); } + + CoreturnStmt *CRS = cast<CoreturnStmt>(S); + auto *B = Block; + if (CFGBlock *R = Visit(CRS->getPromiseCall())) + B = R; + + if (Expr *RV = CRS->getOperand()) + if (RV->getType()->isVoidType() && !isa<InitListExpr>(RV)) + // A non-initlist void expression. + if (CFGBlock *R = Visit(RV)) + B = R; + + return B; +} + +CFGBlock *CFGBuilder::VisitCoroutineSuspendExpr(CoroutineSuspendExpr *E, + AddStmtChoice asc) { + // We're modelling the pre-coro-xform CFG. Thus just evalate the various + // active components of the co_await or co_yield. Note we do not model the + // edge from the builtin_suspend to the exit node. + if (asc.alwaysAdd(*this, E)) { + autoCreateBlock(); + appendStmt(Block, E); + } + CFGBlock *B = Block; + if (auto *R = Visit(E->getResumeExpr())) + B = R; + if (auto *R = Visit(E->getSuspendExpr())) + B = R; + if (auto *R = Visit(E->getReadyExpr())) + B = R; + if (auto *R = Visit(E->getCommonExpr())) + B = R; + return B; } CFGBlock *CFGBuilder::VisitSEHExceptStmt(SEHExceptStmt *ES) { @@ -3126,7 +3279,7 @@ CFGBlock *CFGBuilder::VisitSEHExceptStmt(SEHExceptStmt *ES) { // Save local scope position because in case of exception variable ScopePos // won't be restored when traversing AST. - SaveAndRestore<LocalScope::const_iterator> save_scope_pos(ScopePos); + SaveAndRestore save_scope_pos(ScopePos); addStmt(ES->getBlock()); CFGBlock *SEHExceptBlock = Block; @@ -3216,14 +3369,13 @@ CFGBlock *CFGBuilder::VisitSEHTryStmt(SEHTryStmt *Terminator) { Succ = SEHTrySuccessor; // Save the current "__try" context. - SaveAndRestore<CFGBlock *> save_try(TryTerminatedBlock, - NewTryTerminatedBlock); + SaveAndRestore SaveTry(TryTerminatedBlock, NewTryTerminatedBlock); cfg->addTryDispatchBlock(TryTerminatedBlock); // Save the current value for the __leave target. // All __leaves should go to the code following the __try // (FIXME: or if the __try has a __finally, to the __finally.) - SaveAndRestore<JumpTarget> save_break(SEHLeaveJumpTarget); + SaveAndRestore save_break(SEHLeaveJumpTarget); SEHLeaveJumpTarget = JumpTarget(SEHTrySuccessor, ScopePos); assert(Terminator->getTryBlock() && "__try must contain a non-NULL body"); @@ -3239,8 +3391,7 @@ CFGBlock *CFGBuilder::VisitLabelStmt(LabelStmt *L) { if (!LabelBlock) // This can happen when the body is empty, i.e. LabelBlock = createBlock(); // scopes that only contains NullStmts. - assert(LabelMap.find(L->getDecl()) == LabelMap.end() && - "label already in map"); + assert(!LabelMap.contains(L->getDecl()) && "label already in map"); LabelMap[L->getDecl()] = JumpTarget(LabelBlock, ScopePos); // Labels partition blocks, so this is the end of the basic block we were @@ -3251,7 +3402,7 @@ CFGBlock *CFGBuilder::VisitLabelStmt(LabelStmt *L) { if (badCFG) return nullptr; - // We set Block to NULL to allow lazy creation of a new block (if necessary); + // We set Block to NULL to allow lazy creation of a new block (if necessary). Block = nullptr; // This block is now the implicit successor of other blocks. @@ -3274,9 +3425,21 @@ CFGBlock *CFGBuilder::VisitBlockExpr(BlockExpr *E, AddStmtChoice asc) { CFGBlock *CFGBuilder::VisitLambdaExpr(LambdaExpr *E, AddStmtChoice asc) { CFGBlock *LastBlock = VisitNoRecurse(E, asc); + + unsigned Idx = 0; for (LambdaExpr::capture_init_iterator it = E->capture_init_begin(), - et = E->capture_init_end(); it != et; ++it) { + et = E->capture_init_end(); + it != et; ++it, ++Idx) { if (Expr *Init = *it) { + // If the initializer is an ArrayInitLoopExpr, we want to extract the + // initializer, that's used for each element. + auto *AILEInit = extractElementInitializerFromNestedAILE( + dyn_cast<ArrayInitLoopExpr>(Init)); + + findConstructionContexts(ConstructionContextLayer::create( + cfg->getBumpVectorContext(), {E, Idx}), + AILEInit ? AILEInit : Init); + CFGBlock *Tmp = Visit(Init); if (Tmp) LastBlock = Tmp; @@ -3300,8 +3463,8 @@ CFGBlock *CFGBuilder::VisitGotoStmt(GotoStmt *G) { BackpatchBlocks.push_back(JumpSource(Block, ScopePos)); else { JumpTarget JT = I->second; - addAutomaticObjHandling(ScopePos, JT.scopePosition, G); addSuccessor(Block, JT.block); + addScopeChangesHandling(ScopePos, JT.scopePosition, G); } return Block; @@ -3326,7 +3489,7 @@ CFGBlock *CFGBuilder::VisitGCCAsmStmt(GCCAsmStmt *G, AddStmtChoice asc) { // Save "Succ" in BackpatchBlocks. In the backpatch processing, "Succ" is // used to avoid adding "Succ" again. BackpatchBlocks.push_back(JumpSource(Succ, ScopePos)); - return Block; + return VisitChildren(G); } CFGBlock *CFGBuilder::VisitForStmt(ForStmt *F) { @@ -3334,7 +3497,7 @@ CFGBlock *CFGBuilder::VisitForStmt(ForStmt *F) { // Save local scope position because in case of condition variable ScopePos // won't be restored when traversing AST. - SaveAndRestore<LocalScope::const_iterator> save_scope_pos(ScopePos); + SaveAndRestore save_scope_pos(ScopePos); // Create local scope for init statement and possible condition variable. // Add destructor for init statement and condition variable. @@ -3362,7 +3525,7 @@ CFGBlock *CFGBuilder::VisitForStmt(ForStmt *F) { // Save the current value for the break targets. // All breaks should go to the code following the loop. - SaveAndRestore<JumpTarget> save_break(BreakJumpTarget); + SaveAndRestore save_break(BreakJumpTarget); BreakJumpTarget = JumpTarget(LoopSuccessor, ScopePos); CFGBlock *BodyBlock = nullptr, *TransitionBlock = nullptr; @@ -3372,8 +3535,8 @@ CFGBlock *CFGBuilder::VisitForStmt(ForStmt *F) { assert(F->getBody()); // Save the current values for Block, Succ, continue and break targets. - SaveAndRestore<CFGBlock*> save_Block(Block), save_Succ(Succ); - SaveAndRestore<JumpTarget> save_continue(ContinueJumpTarget); + SaveAndRestore save_Block(Block), save_Succ(Succ); + SaveAndRestore save_continue(ContinueJumpTarget); // Create an empty block to represent the transition block for looping back // to the head of the loop. If we have increment code, it will @@ -3381,6 +3544,11 @@ CFGBlock *CFGBuilder::VisitForStmt(ForStmt *F) { Block = Succ = TransitionBlock = createBlock(false); TransitionBlock->setLoopTarget(F); + + // Loop iteration (after increment) should end with destructor of Condition + // variable (if any). + addAutomaticObjHandling(ScopePos, LoopBeginScopePos, F); + if (Stmt *I = F->getInc()) { // Generate increment code in its own basic block. This is the target of // continue statements. @@ -3400,8 +3568,6 @@ CFGBlock *CFGBuilder::VisitForStmt(ForStmt *F) { ContinueJumpTarget = JumpTarget(Succ, ContinueScopePos); ContinueJumpTarget.block->setLoopTarget(F); - // Loop body should end with destructor of Condition variable (if any). - addAutomaticObjHandling(ScopePos, LoopBeginScopePos, F); // If body is not a compound statement create implicit scope // and add destructors. @@ -3428,7 +3594,7 @@ CFGBlock *CFGBuilder::VisitForStmt(ForStmt *F) { do { Expr *C = F->getCond(); - SaveAndRestore<LocalScope::const_iterator> save_scope_pos(ScopePos); + SaveAndRestore save_scope_pos(ScopePos); // Specially handle logical operators, which have a slightly // more optimal CFG representation. @@ -3494,7 +3660,7 @@ CFGBlock *CFGBuilder::VisitForStmt(ForStmt *F) { // If the loop contains initialization, create a new block for those // statements. This block can also contain statements that precede the loop. if (Stmt *I = F->getInit()) { - SaveAndRestore<LocalScope::const_iterator> save_scope_pos(ScopePos); + SaveAndRestore save_scope_pos(ScopePos); ScopePos = LoopBeginScopePos; Block = createBlock(); return addStmt(I); @@ -3597,9 +3763,9 @@ CFGBlock *CFGBuilder::VisitObjCForCollectionStmt(ObjCForCollectionStmt *S) { // Now create the true branch. { // Save the current values for Succ, continue and break targets. - SaveAndRestore<CFGBlock*> save_Block(Block), save_Succ(Succ); - SaveAndRestore<JumpTarget> save_continue(ContinueJumpTarget), - save_break(BreakJumpTarget); + SaveAndRestore save_Block(Block), save_Succ(Succ); + SaveAndRestore save_continue(ContinueJumpTarget), + save_break(BreakJumpTarget); // Add an intermediate block between the BodyBlock and the // EntryConditionBlock to represent the "loop back" transition, for looping @@ -3663,11 +3829,6 @@ CFGBlock *CFGBuilder::VisitObjCAtSynchronizedStmt(ObjCAtSynchronizedStmt *S) { return addStmt(S->getSynchExpr()); } -CFGBlock *CFGBuilder::VisitObjCAtTryStmt(ObjCAtTryStmt *S) { - // FIXME - return NYS(); -} - CFGBlock *CFGBuilder::VisitPseudoObjectExpr(PseudoObjectExpr *E) { autoCreateBlock(); @@ -3698,7 +3859,7 @@ CFGBlock *CFGBuilder::VisitWhileStmt(WhileStmt *W) { // Save local scope position because in case of condition variable ScopePos // won't be restored when traversing AST. - SaveAndRestore<LocalScope::const_iterator> save_scope_pos(ScopePos); + SaveAndRestore save_scope_pos(ScopePos); // Create local scope for possible condition variable. // Store scope position for continue statement. @@ -3727,9 +3888,9 @@ CFGBlock *CFGBuilder::VisitWhileStmt(WhileStmt *W) { assert(W->getBody()); // Save the current values for Block, Succ, continue and break targets. - SaveAndRestore<CFGBlock*> save_Block(Block), save_Succ(Succ); - SaveAndRestore<JumpTarget> save_continue(ContinueJumpTarget), - save_break(BreakJumpTarget); + SaveAndRestore save_Block(Block), save_Succ(Succ); + SaveAndRestore save_continue(ContinueJumpTarget), + save_break(BreakJumpTarget); // Create an empty block to represent the transition block for looping back // to the head of the loop. @@ -3828,16 +3989,58 @@ CFGBlock *CFGBuilder::VisitWhileStmt(WhileStmt *W) { return EntryConditionBlock; } -CFGBlock *CFGBuilder::VisitObjCAtCatchStmt(ObjCAtCatchStmt *S) { - // FIXME: For now we pretend that @catch and the code it contains does not - // exit. - return Block; +CFGBlock *CFGBuilder::VisitArrayInitLoopExpr(ArrayInitLoopExpr *A, + AddStmtChoice asc) { + if (asc.alwaysAdd(*this, A)) { + autoCreateBlock(); + appendStmt(Block, A); + } + + CFGBlock *B = Block; + + if (CFGBlock *R = Visit(A->getSubExpr())) + B = R; + + auto *OVE = dyn_cast<OpaqueValueExpr>(A->getCommonExpr()); + assert(OVE && "ArrayInitLoopExpr->getCommonExpr() should be wrapped in an " + "OpaqueValueExpr!"); + if (CFGBlock *R = Visit(OVE->getSourceExpr())) + B = R; + + return B; } -CFGBlock *CFGBuilder::VisitObjCAtThrowStmt(ObjCAtThrowStmt *S) { - // FIXME: This isn't complete. We basically treat @throw like a return - // statement. +CFGBlock *CFGBuilder::VisitObjCAtCatchStmt(ObjCAtCatchStmt *CS) { + // ObjCAtCatchStmt are treated like labels, so they are the first statement + // in a block. + // Save local scope position because in case of exception variable ScopePos + // won't be restored when traversing AST. + SaveAndRestore save_scope_pos(ScopePos); + + if (CS->getCatchBody()) + addStmt(CS->getCatchBody()); + + CFGBlock *CatchBlock = Block; + if (!CatchBlock) + CatchBlock = createBlock(); + + appendStmt(CatchBlock, CS); + + // Also add the ObjCAtCatchStmt as a label, like with regular labels. + CatchBlock->setLabel(CS); + + // Bail out if the CFG is bad. + if (badCFG) + return nullptr; + + // We set Block to NULL to allow lazy creation of a new block (if necessary). + Block = nullptr; + + return CatchBlock; +} + +CFGBlock *CFGBuilder::VisitObjCAtThrowStmt(ObjCAtThrowStmt *S) { // If we were in the middle of a block we stop processing that block. if (badCFG) return nullptr; @@ -3845,14 +4048,77 @@ CFGBlock *CFGBuilder::VisitObjCAtThrowStmt(ObjCAtThrowStmt *S) { // Create the new block. Block = createBlock(false); - // The Exit block is the only successor. - addSuccessor(Block, &cfg->getExit()); + if (TryTerminatedBlock) + // The current try statement is the only successor. + addSuccessor(Block, TryTerminatedBlock); + else + // otherwise the Exit block is the only successor. + addSuccessor(Block, &cfg->getExit()); // Add the statement to the block. This may create new blocks if S contains // control-flow (short-circuit operations). return VisitStmt(S, AddStmtChoice::AlwaysAdd); } +CFGBlock *CFGBuilder::VisitObjCAtTryStmt(ObjCAtTryStmt *Terminator) { + // "@try"/"@catch" is a control-flow statement. Thus we stop processing the + // current block. + CFGBlock *TrySuccessor = nullptr; + + if (Block) { + if (badCFG) + return nullptr; + TrySuccessor = Block; + } else + TrySuccessor = Succ; + + // FIXME: Implement @finally support. + if (Terminator->getFinallyStmt()) + return NYS(); + + CFGBlock *PrevTryTerminatedBlock = TryTerminatedBlock; + + // Create a new block that will contain the try statement. + CFGBlock *NewTryTerminatedBlock = createBlock(false); + // Add the terminator in the try block. + NewTryTerminatedBlock->setTerminator(Terminator); + + bool HasCatchAll = false; + for (ObjCAtCatchStmt *CS : Terminator->catch_stmts()) { + // The code after the try is the implicit successor. + Succ = TrySuccessor; + if (CS->hasEllipsis()) { + HasCatchAll = true; + } + Block = nullptr; + CFGBlock *CatchBlock = VisitObjCAtCatchStmt(CS); + if (!CatchBlock) + return nullptr; + // Add this block to the list of successors for the block with the try + // statement. + addSuccessor(NewTryTerminatedBlock, CatchBlock); + } + + // FIXME: This needs updating when @finally support is added. + if (!HasCatchAll) { + if (PrevTryTerminatedBlock) + addSuccessor(NewTryTerminatedBlock, PrevTryTerminatedBlock); + else + addSuccessor(NewTryTerminatedBlock, &cfg->getExit()); + } + + // The code after the try is the implicit successor. + Succ = TrySuccessor; + + // Save the current "try" context. + SaveAndRestore SaveTry(TryTerminatedBlock, NewTryTerminatedBlock); + cfg->addTryDispatchBlock(TryTerminatedBlock); + + assert(Terminator->getTryBody() && "try must contain a non-NULL body"); + Block = nullptr; + return addStmt(Terminator->getTryBody()); +} + CFGBlock *CFGBuilder::VisitObjCMessageExpr(ObjCMessageExpr *ME, AddStmtChoice asc) { findConstructionContextsForArguments(ME); @@ -3883,6 +4149,25 @@ CFGBlock *CFGBuilder::VisitCXXThrowExpr(CXXThrowExpr *T) { return VisitStmt(T, AddStmtChoice::AlwaysAdd); } +CFGBlock *CFGBuilder::VisitCXXTypeidExpr(CXXTypeidExpr *S, AddStmtChoice asc) { + if (asc.alwaysAdd(*this, S)) { + autoCreateBlock(); + appendStmt(Block, S); + } + + // C++ [expr.typeid]p3: + // When typeid is applied to an expression other than an glvalue of a + // polymorphic class type [...] [the] expression is an unevaluated + // operand. [...] + // We add only potentially evaluated statements to the block to avoid + // CFG generation for unevaluated operands. + if (!S->isTypeDependent() && S->isPotentiallyEvaluated()) + return VisitChildren(S); + + // Return block without CFG for unevaluated operands. + return Block; +} + CFGBlock *CFGBuilder::VisitDoStmt(DoStmt *D) { CFGBlock *LoopSuccessor = nullptr; @@ -3929,8 +4214,8 @@ CFGBlock *CFGBuilder::VisitDoStmt(DoStmt *D) { assert(D->getBody()); // Save the current values for Block, Succ, and continue and break targets - SaveAndRestore<CFGBlock*> save_Block(Block), save_Succ(Succ); - SaveAndRestore<JumpTarget> save_continue(ContinueJumpTarget), + SaveAndRestore save_Block(Block), save_Succ(Succ); + SaveAndRestore save_continue(ContinueJumpTarget), save_break(BreakJumpTarget); // All continues within this loop should go to the condition block @@ -4048,7 +4333,7 @@ CFGBlock *CFGBuilder::VisitSwitchStmt(SwitchStmt *Terminator) { // Save local scope position because in case of condition variable ScopePos // won't be restored when traversing AST. - SaveAndRestore<LocalScope::const_iterator> save_scope_pos(ScopePos); + SaveAndRestore save_scope_pos(ScopePos); // Create local scope for C++17 switch init-stmt if one exists. if (Stmt *Init = Terminator->getInit()) @@ -4068,9 +4353,9 @@ CFGBlock *CFGBuilder::VisitSwitchStmt(SwitchStmt *Terminator) { } else SwitchSuccessor = Succ; // Save the current "switch" context. - SaveAndRestore<CFGBlock*> save_switch(SwitchTerminatedBlock), - save_default(DefaultCaseBlock); - SaveAndRestore<JumpTarget> save_break(BreakJumpTarget); + SaveAndRestore save_switch(SwitchTerminatedBlock), + save_default(DefaultCaseBlock); + SaveAndRestore save_break(BreakJumpTarget); // Set the "default" case to be the block after the switch statement. If the // switch statement contains a "default:", this value will be overwritten with @@ -4093,15 +4378,13 @@ CFGBlock *CFGBuilder::VisitSwitchStmt(SwitchStmt *Terminator) { // For pruning unreachable case statements, save the current state // for tracking the condition value. - SaveAndRestore<bool> save_switchExclusivelyCovered(switchExclusivelyCovered, - false); + SaveAndRestore save_switchExclusivelyCovered(switchExclusivelyCovered, false); // Determine if the switch condition can be explicitly evaluated. assert(Terminator->getCond() && "switch condition must be non-NULL"); Expr::EvalResult result; bool b = tryEvaluate(Terminator->getCond(), result); - SaveAndRestore<Expr::EvalResult*> save_switchCond(switchCond, - b ? &result : nullptr); + SaveAndRestore save_switchCond(switchCond, b ? &result : nullptr); // If body is not a compound statement create implicit scope // and add destructors. @@ -4237,7 +4520,7 @@ CFGBlock *CFGBuilder::VisitCaseStmt(CaseStmt *CS) { shouldAddCase(switchExclusivelyCovered, switchCond, CS, *Context)); - // We set Block to NULL to allow lazy creation of a new block (if necessary) + // We set Block to NULL to allow lazy creation of a new block (if necessary). Block = nullptr; if (TopBlock) { @@ -4273,7 +4556,7 @@ CFGBlock *CFGBuilder::VisitDefaultStmt(DefaultStmt *Terminator) { // (including a fall-through to the code after the switch statement) to always // be the last successor of a switch-terminated block. - // We set Block to NULL to allow lazy creation of a new block (if necessary) + // We set Block to NULL to allow lazy creation of a new block (if necessary). Block = nullptr; // This block is now the implicit successor of other blocks. @@ -4291,7 +4574,8 @@ CFGBlock *CFGBuilder::VisitCXXTryStmt(CXXTryStmt *Terminator) { if (badCFG) return nullptr; TrySuccessor = Block; - } else TrySuccessor = Succ; + } else + TrySuccessor = Succ; CFGBlock *PrevTryTerminatedBlock = TryTerminatedBlock; @@ -4301,10 +4585,10 @@ CFGBlock *CFGBuilder::VisitCXXTryStmt(CXXTryStmt *Terminator) { NewTryTerminatedBlock->setTerminator(Terminator); bool HasCatchAll = false; - for (unsigned h = 0; h <Terminator->getNumHandlers(); ++h) { + for (unsigned I = 0, E = Terminator->getNumHandlers(); I != E; ++I) { // The code after the try is the implicit successor. Succ = TrySuccessor; - CXXCatchStmt *CS = Terminator->getHandler(h); + CXXCatchStmt *CS = Terminator->getHandler(I); if (CS->getExceptionDecl() == nullptr) { HasCatchAll = true; } @@ -4327,7 +4611,7 @@ CFGBlock *CFGBuilder::VisitCXXTryStmt(CXXTryStmt *Terminator) { Succ = TrySuccessor; // Save the current "try" context. - SaveAndRestore<CFGBlock*> save_try(TryTerminatedBlock, NewTryTerminatedBlock); + SaveAndRestore SaveTry(TryTerminatedBlock, NewTryTerminatedBlock); cfg->addTryDispatchBlock(TryTerminatedBlock); assert(Terminator->getTryBlock() && "try must contain a non-NULL body"); @@ -4341,7 +4625,7 @@ CFGBlock *CFGBuilder::VisitCXXCatchStmt(CXXCatchStmt *CS) { // Save local scope position because in case of exception variable ScopePos // won't be restored when traversing AST. - SaveAndRestore<LocalScope::const_iterator> save_scope_pos(ScopePos); + SaveAndRestore save_scope_pos(ScopePos); // Create local scope for possible exception variable. // Store scope position. Add implicit destructor. @@ -4372,7 +4656,7 @@ CFGBlock *CFGBuilder::VisitCXXCatchStmt(CXXCatchStmt *CS) { if (badCFG) return nullptr; - // We set Block to NULL to allow lazy creation of a new block (if necessary) + // We set Block to NULL to allow lazy creation of a new block (if necessary). Block = nullptr; return CatchBlock; @@ -4393,7 +4677,7 @@ CFGBlock *CFGBuilder::VisitCXXForRangeStmt(CXXForRangeStmt *S) { // } // Save local scope position before the addition of the implicit variables. - SaveAndRestore<LocalScope::const_iterator> save_scope_pos(ScopePos); + SaveAndRestore save_scope_pos(ScopePos); // Create local scopes and destructors for range, begin and end variables. if (Stmt *Range = S->getRangeStmt()) @@ -4418,7 +4702,7 @@ CFGBlock *CFGBuilder::VisitCXXForRangeStmt(CXXForRangeStmt *S) { // Save the current value for the break targets. // All breaks should go to the code following the loop. - SaveAndRestore<JumpTarget> save_break(BreakJumpTarget); + SaveAndRestore save_break(BreakJumpTarget); BreakJumpTarget = JumpTarget(LoopSuccessor, ScopePos); // The block for the __begin != __end expression. @@ -4451,8 +4735,8 @@ CFGBlock *CFGBuilder::VisitCXXForRangeStmt(CXXForRangeStmt *S) { assert(S->getBody()); // Save the current values for Block, Succ, and continue targets. - SaveAndRestore<CFGBlock*> save_Block(Block), save_Succ(Succ); - SaveAndRestore<JumpTarget> save_continue(ContinueJumpTarget); + SaveAndRestore save_Block(Block), save_Succ(Succ); + SaveAndRestore save_continue(ContinueJumpTarget); // Generate increment code in its own basic block. This is the target of // continue statements. @@ -4475,8 +4759,14 @@ CFGBlock *CFGBuilder::VisitCXXForRangeStmt(CXXForRangeStmt *S) { // Add implicit scope and dtors for loop variable. addLocalScopeAndDtors(S->getLoopVarStmt()); + // If body is not a compound statement create implicit scope + // and add destructors. + if (!isa<CompoundStmt>(S->getBody())) + addLocalScopeAndDtors(S->getBody()); + // Populate a new block to contain the loop body and loop variable. addStmt(S->getBody()); + if (badCFG) return nullptr; CFGBlock *LoopVarStmtBlock = addStmt(S->getLoopVarStmt()); @@ -4931,8 +5221,7 @@ CFGBlock *CFG::createBlock() { bool first_block = begin() == end(); // Create the block. - CFGBlock *Mem = getAllocator().Allocate<CFGBlock>(); - new (Mem) CFGBlock(NumBlockIDs++, BlkBVC, this); + CFGBlock *Mem = new (getAllocator()) CFGBlock(NumBlockIDs++, BlkBVC, this); Blocks.push_back(Mem, BlkBVC); // If this is the first block, set it as the Entry and Exit. @@ -5007,6 +5296,7 @@ CFGImplicitDtor::getDestructorDecl(ASTContext &astContext) const { case CFGElement::CXXRecordTypedCall: case CFGElement::ScopeBegin: case CFGElement::ScopeEnd: + case CFGElement::CleanupFunction: llvm_unreachable("getDestructorDecl should only be used with " "ImplicitDtors"); case CFGElement::AutomaticObjectDtor: { @@ -5049,8 +5339,19 @@ CFGImplicitDtor::getDestructorDecl(ASTContext &astContext) const { const CXXTemporary *temp = bindExpr->getTemporary(); return temp->getDestructor(); } + case CFGElement::MemberDtor: { + const FieldDecl *field = castAs<CFGMemberDtor>().getFieldDecl(); + QualType ty = field->getType(); + + while (const ArrayType *arrayType = astContext.getAsArrayType(ty)) { + ty = arrayType->getElementType(); + } + + const CXXRecordDecl *classDecl = ty->getAsCXXRecordDecl(); + assert(classDecl); + return classDecl->getDestructor(); + } case CFGElement::BaseDtor: - case CFGElement::MemberDtor: // Not yet supported. return nullptr; } @@ -5128,7 +5429,7 @@ public: unsigned j = 1; for (CFGBlock::const_iterator BI = (*I)->begin(), BEnd = (*I)->end() ; BI != BEnd; ++BI, ++j ) { - if (Optional<CFGStmt> SE = BI->getAs<CFGStmt>()) { + if (std::optional<CFGStmt> SE = BI->getAs<CFGStmt>()) { const Stmt *stmt= SE->getStmt(); std::pair<unsigned, unsigned> P((*I)->getBlockID(), j); StmtMap[stmt] = P; @@ -5274,13 +5575,11 @@ public: Terminator->getCond()->printPretty(OS, Helper, Policy); } - void VisitCXXTryStmt(CXXTryStmt *CS) { - OS << "try ..."; - } + void VisitCXXTryStmt(CXXTryStmt *) { OS << "try ..."; } - void VisitSEHTryStmt(SEHTryStmt *CS) { - OS << "__try ..."; - } + void VisitObjCAtTryStmt(ObjCAtTryStmt *) { OS << "@try ..."; } + + void VisitSEHTryStmt(SEHTryStmt *CS) { OS << "__try ..."; } void VisitAbstractConditionalOperator(AbstractConditionalOperator* C) { if (Stmt *Cond = C->getCond()) @@ -5426,6 +5725,12 @@ static void print_construction_context(raw_ostream &OS, Stmts.push_back(TOCC->getConstructorAfterElision()); break; } + case ConstructionContext::LambdaCaptureKind: { + const auto *LCC = cast<LambdaCaptureConstructionContext>(CC); + Helper.handledStmt(const_cast<LambdaExpr *>(LCC->getLambdaExpr()), OS); + OS << "+" << LCC->getIndex(); + return; + } case ConstructionContext::ArgumentKind: { const auto *ACC = cast<ArgumentConstructionContext>(CC); if (const Stmt *BTE = ACC->getCXXBindTemporaryExpr()) { @@ -5449,7 +5754,8 @@ static void print_elem(raw_ostream &OS, StmtPrinterHelper &Helper, const CFGElement &E); void CFGElement::dumpToStream(llvm::raw_ostream &OS) const { - StmtPrinterHelper Helper(nullptr, {}); + LangOptions LangOpts; + StmtPrinterHelper Helper(nullptr, LangOpts); print_elem(OS, Helper, *this); } @@ -5498,15 +5804,13 @@ static void print_elem(raw_ostream &OS, StmtPrinterHelper &Helper, OS << " (BindTemporary)"; } else if (const CXXConstructExpr *CCE = dyn_cast<CXXConstructExpr>(S)) { OS << " (CXXConstructExpr"; - if (Optional<CFGConstructor> CE = E.getAs<CFGConstructor>()) { + if (std::optional<CFGConstructor> CE = E.getAs<CFGConstructor>()) { print_construction_context(OS, Helper, CE->getConstructionContext()); } - OS << ", " << CCE->getType().getAsString() << ")"; + OS << ", " << CCE->getType() << ")"; } else if (const CastExpr *CE = dyn_cast<CastExpr>(S)) { - OS << " (" << CE->getStmtClassName() << ", " - << CE->getCastKindName() - << ", " << CE->getType().getAsString() - << ")"; + OS << " (" << CE->getStmtClassName() << ", " << CE->getCastKindName() + << ", " << CE->getType() << ")"; } // Expressions need a newline. @@ -5536,6 +5840,11 @@ static void print_elem(raw_ostream &OS, StmtPrinterHelper &Helper, break; } + case CFGElement::Kind::CleanupFunction: + OS << "CleanupFunction (" + << E.castAs<CFGCleanupFunction>().getFunctionDecl()->getName() << ")\n"; + break; + case CFGElement::Kind::LifetimeEnds: Helper.handleDecl(E.castAs<CFGLifetimeEnds>().getVarDecl(), OS); OS << " (Lifetime ends)\n"; @@ -5596,7 +5905,8 @@ static void print_elem(raw_ostream &OS, StmtPrinterHelper &Helper, } case CFGElement::Kind::TemporaryDtor: { - const CXXBindTemporaryExpr *BT = E.castAs<CFGTemporaryDtor>().getBindTemporaryExpr(); + const CXXBindTemporaryExpr *BT = + E.castAs<CFGTemporaryDtor>().getBindTemporaryExpr(); OS << "~"; BT->getType().print(OS, PrintingPolicy(Helper.getLangOpts())); OS << "() (Temporary object destructor)\n"; @@ -5640,21 +5950,25 @@ static void print_block(raw_ostream &OS, const CFG* cfg, OS << L->getName(); else if (CaseStmt *C = dyn_cast<CaseStmt>(Label)) { OS << "case "; - if (C->getLHS()) - C->getLHS()->printPretty(OS, &Helper, - PrintingPolicy(Helper.getLangOpts())); - if (C->getRHS()) { + if (const Expr *LHS = C->getLHS()) + LHS->printPretty(OS, &Helper, PrintingPolicy(Helper.getLangOpts())); + if (const Expr *RHS = C->getRHS()) { OS << " ... "; - C->getRHS()->printPretty(OS, &Helper, - PrintingPolicy(Helper.getLangOpts())); + RHS->printPretty(OS, &Helper, PrintingPolicy(Helper.getLangOpts())); } } else if (isa<DefaultStmt>(Label)) OS << "default"; else if (CXXCatchStmt *CS = dyn_cast<CXXCatchStmt>(Label)) { OS << "catch ("; - if (CS->getExceptionDecl()) - CS->getExceptionDecl()->print(OS, PrintingPolicy(Helper.getLangOpts()), - 0); + if (const VarDecl *ED = CS->getExceptionDecl()) + ED->print(OS, PrintingPolicy(Helper.getLangOpts()), 0); + else + OS << "..."; + OS << ")"; + } else if (ObjCAtCatchStmt *CS = dyn_cast<ObjCAtCatchStmt>(Label)) { + OS << "@catch ("; + if (const VarDecl *PD = CS->getCatchParamDecl()) + PD->print(OS, PrintingPolicy(Helper.getLangOpts()), 0); else OS << "..."; OS << ")"; @@ -5869,8 +6183,8 @@ static bool isImmediateSinkBlock(const CFGBlock *Blk) { // at least for now, but once we have better support for exceptions, // we'd need to carefully handle the case when the throw is being // immediately caught. - if (std::any_of(Blk->begin(), Blk->end(), [](const CFGElement &Elm) { - if (Optional<CFGStmt> StmtElm = Elm.getAs<CFGStmt>()) + if (llvm::any_of(*Blk, [](const CFGElement &Elm) { + if (std::optional<CFGStmt> StmtElm = Elm.getAs<CFGStmt>()) if (isa<CXXThrowExpr>(StmtElm->getStmt())) return true; return false; @@ -6015,17 +6329,13 @@ Stmt *CFGBlock::getTerminatorCondition(bool StripParens) { // CFG Graphviz Visualization //===----------------------------------------------------------------------===// -#ifndef NDEBUG -static StmtPrinterHelper* GraphHelper; -#endif +static StmtPrinterHelper *GraphHelper; void CFG::viewCFG(const LangOptions &LO) const { -#ifndef NDEBUG StmtPrinterHelper H(this, LO); GraphHelper = &H; llvm::ViewGraph(this,"CFG"); GraphHelper = nullptr; -#endif } namespace llvm { @@ -6034,8 +6344,7 @@ template<> struct DOTGraphTraits<const CFG*> : public DefaultDOTGraphTraits { DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} - static std::string getNodeLabel(const CFGBlock *Node, const CFG* Graph) { -#ifndef NDEBUG + static std::string getNodeLabel(const CFGBlock *Node, const CFG *Graph) { std::string OutSStr; llvm::raw_string_ostream Out(OutSStr); print_block(Out,Graph, *Node, *GraphHelper, false, false); @@ -6051,9 +6360,6 @@ struct DOTGraphTraits<const CFG*> : public DefaultDOTGraphTraits { } return OutStr; -#else - return {}; -#endif } }; diff --git a/contrib/llvm-project/clang/lib/Analysis/CFGStmtMap.cpp b/contrib/llvm-project/clang/lib/Analysis/CFGStmtMap.cpp index d1c23e3c879b..c3a4581e1fb1 100644 --- a/contrib/llvm-project/clang/lib/Analysis/CFGStmtMap.cpp +++ b/contrib/llvm-project/clang/lib/Analysis/CFGStmtMap.cpp @@ -15,6 +15,7 @@ #include "clang/AST/ParentMap.h" #include "clang/Analysis/CFG.h" #include "clang/Analysis/CFGStmtMap.h" +#include <optional> using namespace clang; @@ -49,7 +50,7 @@ static void Accumulate(SMap &SM, CFGBlock *B) { // First walk the block-level expressions. for (CFGBlock::iterator I = B->begin(), E = B->end(); I != E; ++I) { const CFGElement &CE = *I; - Optional<CFGStmt> CS = CE.getAs<CFGStmt>(); + std::optional<CFGStmt> CS = CE.getAs<CFGStmt>(); if (!CS) continue; diff --git a/contrib/llvm-project/clang/lib/Analysis/CallGraph.cpp b/contrib/llvm-project/clang/lib/Analysis/CallGraph.cpp index 59cc939b6fd1..f892980ed313 100644 --- a/contrib/llvm-project/clang/lib/Analysis/CallGraph.cpp +++ b/contrib/llvm-project/clang/lib/Analysis/CallGraph.cpp @@ -168,7 +168,7 @@ bool CallGraph::includeCalleeInGraph(const Decl *D) { return false; IdentifierInfo *II = FD->getIdentifier(); - if (II && II->getName().startswith("__inline")) + if (II && II->getName().starts_with("__inline")) return false; } diff --git a/contrib/llvm-project/clang/lib/Analysis/CalledOnceCheck.cpp b/contrib/llvm-project/clang/lib/Analysis/CalledOnceCheck.cpp index 883629a300dc..30cbd257b65e 100644 --- a/contrib/llvm-project/clang/lib/Analysis/CalledOnceCheck.cpp +++ b/contrib/llvm-project/clang/lib/Analysis/CalledOnceCheck.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "clang/Analysis/Analyses/CalledOnceCheck.h" +#include "clang/AST/ASTContext.h" #include "clang/AST/Attr.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclBase.h" @@ -22,11 +23,11 @@ #include "clang/Analysis/AnalysisDeclContext.h" #include "clang/Analysis/CFG.h" #include "clang/Analysis/FlowSensitive/DataflowWorklist.h" +#include "clang/Basic/Builtins.h" #include "clang/Basic/IdentifierTable.h" #include "clang/Basic/LLVM.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/BitmaskEnum.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Sequence.h" @@ -36,6 +37,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include <memory> +#include <optional> using namespace clang; @@ -47,12 +49,29 @@ static constexpr unsigned EXPECTED_NUMBER_OF_BASIC_BLOCKS = 8; template <class T> using CFGSizedVector = llvm::SmallVector<T, EXPECTED_NUMBER_OF_BASIC_BLOCKS>; constexpr llvm::StringLiteral CONVENTIONAL_NAMES[] = { - "completionHandler", "completion", "withCompletionHandler"}; + "completionHandler", "completion", "withCompletionHandler", + "withCompletion", "completionBlock", "withCompletionBlock", + "replyTo", "reply", "withReplyTo"}; constexpr llvm::StringLiteral CONVENTIONAL_SUFFIXES[] = { - "WithCompletionHandler", "WithCompletion"}; + "WithCompletionHandler", "WithCompletion", "WithCompletionBlock", + "WithReplyTo", "WithReply"}; constexpr llvm::StringLiteral CONVENTIONAL_CONDITIONS[] = { "error", "cancel", "shouldCall", "done", "OK", "success"}; +struct KnownCalledOnceParameter { + llvm::StringLiteral FunctionName; + unsigned ParamIndex; +}; +constexpr KnownCalledOnceParameter KNOWN_CALLED_ONCE_PARAMETERS[] = { + {llvm::StringLiteral{"dispatch_async"}, 1}, + {llvm::StringLiteral{"dispatch_async_and_wait"}, 1}, + {llvm::StringLiteral{"dispatch_after"}, 2}, + {llvm::StringLiteral{"dispatch_sync"}, 1}, + {llvm::StringLiteral{"dispatch_once"}, 1}, + {llvm::StringLiteral{"dispatch_barrier_async"}, 1}, + {llvm::StringLiteral{"dispatch_barrier_async_and_wait"}, 1}, + {llvm::StringLiteral{"dispatch_barrier_sync"}, 1}}; + class ParameterStatus { public: // Status kind is basically the main part of parameter's status. @@ -144,7 +163,7 @@ public: NotVisited = 0x8, /* 1000 */ // We already reported a violation and stopped tracking calls for this // parameter. - Reported = 0x15, /* 1111 */ + Reported = 0xF, /* 1111 */ LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ Reported) }; @@ -300,7 +319,7 @@ public: // We care about logical not only if we care about comparisons. if (!ShouldRetrieveFromComparisons) return nullptr; - LLVM_FALLTHROUGH; + [[fallthrough]]; // Function pointer/references can be dereferenced before a call. // That doesn't make it, however, any different from a regular call. // For this reason, dereference operation is a "no-op". @@ -330,6 +349,29 @@ public: return Visit(OVE->getSourceExpr()); } + const DeclRefExpr *VisitCallExpr(const CallExpr *CE) { + if (!ShouldRetrieveFromComparisons) + return nullptr; + + // We want to see through some of the boolean builtin functions + // that we are likely to see in conditions. + switch (CE->getBuiltinCallee()) { + case Builtin::BI__builtin_expect: + case Builtin::BI__builtin_expect_with_probability: { + assert(CE->getNumArgs() >= 2); + + const DeclRefExpr *Candidate = Visit(CE->getArg(0)); + return Candidate != nullptr ? Candidate : Visit(CE->getArg(1)); + } + + case Builtin::BI__builtin_unpredictable: + return Visit(CE->getArg(0)); + + default: + return nullptr; + } + } + const DeclRefExpr *VisitExpr(const Expr *E) { // It is a fallback method that gets called whenever the actual type // of the given expression is not covered. @@ -436,7 +478,7 @@ bool mentionsAnyOfConventionalNames(const Expr *E) { return llvm::any_of( CONVENTIONAL_CONDITIONS, [ConditionName](const llvm::StringLiteral &Conventional) { - return ConditionName.contains_lower(Conventional); + return ConditionName.contains_insensitive(Conventional); }); }); } @@ -452,7 +494,7 @@ struct Clarification { /// of basic blocks. class NotCalledClarifier : public ConstStmtVisitor<NotCalledClarifier, - llvm::Optional<Clarification>> { + std::optional<Clarification>> { public: /// The main entrypoint for the class, the function that tries to find the /// clarification of how to explain which sub-path starts with a CFG edge @@ -466,24 +508,24 @@ public: /// results only for such cases. For this very reason, the parent basic /// block, Conditional, is named that way, so it is clear what kind of /// block is expected. - static llvm::Optional<Clarification> - clarify(const CFGBlock *Conditional, const CFGBlock *SuccWithoutCall) { + static std::optional<Clarification> clarify(const CFGBlock *Conditional, + const CFGBlock *SuccWithoutCall) { if (const Stmt *Terminator = Conditional->getTerminatorStmt()) { return NotCalledClarifier{Conditional, SuccWithoutCall}.Visit(Terminator); } - return llvm::None; + return std::nullopt; } - llvm::Optional<Clarification> VisitIfStmt(const IfStmt *If) { + std::optional<Clarification> VisitIfStmt(const IfStmt *If) { return VisitBranchingBlock(If, NeverCalledReason::IfThen); } - llvm::Optional<Clarification> + std::optional<Clarification> VisitAbstractConditionalOperator(const AbstractConditionalOperator *Ternary) { return VisitBranchingBlock(Ternary, NeverCalledReason::IfThen); } - llvm::Optional<Clarification> VisitSwitchStmt(const SwitchStmt *Switch) { + std::optional<Clarification> VisitSwitchStmt(const SwitchStmt *Switch) { const Stmt *CaseToBlame = SuccInQuestion->getLabel(); if (!CaseToBlame) { // If interesting basic block is not labeled, it means that this @@ -501,15 +543,15 @@ public: llvm_unreachable("Found unexpected switch structure"); } - llvm::Optional<Clarification> VisitForStmt(const ForStmt *For) { + std::optional<Clarification> VisitForStmt(const ForStmt *For) { return VisitBranchingBlock(For, NeverCalledReason::LoopEntered); } - llvm::Optional<Clarification> VisitWhileStmt(const WhileStmt *While) { + std::optional<Clarification> VisitWhileStmt(const WhileStmt *While) { return VisitBranchingBlock(While, NeverCalledReason::LoopEntered); } - llvm::Optional<Clarification> + std::optional<Clarification> VisitBranchingBlock(const Stmt *Terminator, NeverCalledReason DefaultReason) { assert(Parent->succ_size() == 2 && "Branching block should have exactly two successors"); @@ -519,12 +561,12 @@ public: return Clarification{ActualReason, Terminator}; } - llvm::Optional<Clarification> VisitBinaryOperator(const BinaryOperator *) { + std::optional<Clarification> VisitBinaryOperator(const BinaryOperator *) { // We don't want to report on short-curcuit logical operations. - return llvm::None; + return std::nullopt; } - llvm::Optional<Clarification> VisitStmt(const Stmt *Terminator) { + std::optional<Clarification> VisitStmt(const Stmt *Terminator) { // If we got here, we didn't have a visit function for more derived // classes of statement that this terminator actually belongs to. // @@ -711,7 +753,7 @@ private: // We use a backward dataflow propagation and for this reason we // should traverse basic blocks bottom-up. for (const CFGElement &Element : llvm::reverse(*BB)) { - if (Optional<CFGStmt> S = Element.getAs<CFGStmt>()) { + if (std::optional<CFGStmt> S = Element.getAs<CFGStmt>()) { check(S->getStmt()); } } @@ -770,8 +812,12 @@ private: } } - // Early exit if we don't have parameters for extra analysis. - if (NotCalledOnEveryPath.none() && NotUsedOnEveryPath.none()) + // Early exit if we don't have parameters for extra analysis... + if (NotCalledOnEveryPath.none() && NotUsedOnEveryPath.none() && + // ... or if we've seen variables with cleanup functions. + // We can't reason that we've seen every path in this case, + // and thus abandon reporting any warnings that imply that. + !FunctionHasCleanupVars) return; // We are looking for a pair of blocks A, B so that the following is true: @@ -834,22 +880,20 @@ private: template <class CallLikeExpr> void checkIndirectCall(const CallLikeExpr *CallOrMessage) { // CallExpr::arguments does not interact nicely with llvm::enumerate. - llvm::ArrayRef<const Expr *> Arguments = llvm::makeArrayRef( - CallOrMessage->getArgs(), CallOrMessage->getNumArgs()); + llvm::ArrayRef<const Expr *> Arguments = + llvm::ArrayRef(CallOrMessage->getArgs(), CallOrMessage->getNumArgs()); // Let's check if any of the call arguments is a point of interest. for (const auto &Argument : llvm::enumerate(Arguments)) { if (auto Index = getIndexOfExpression(Argument.value())) { - ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(*Index); - if (shouldBeCalledOnce(CallOrMessage, Argument.index())) { // If the corresponding parameter is marked as 'called_once' we should // consider it as a call. processCallFor(*Index, CallOrMessage); - } else if (CurrentParamStatus.getKind() == ParameterStatus::NotCalled) { + } else { // Otherwise, we mark this parameter as escaped, which can be // interpreted both as called or not called depending on the context. - CurrentParamStatus = ParameterStatus::Escaped; + processEscapeFor(*Index); } // Otherwise, let's keep the state as it is. } @@ -883,6 +927,17 @@ private: } } + /// Process escape of the parameter with the given index + void processEscapeFor(unsigned Index) { + ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(Index); + + // Escape overrides whatever error we think happened. + if (CurrentParamStatus.isErrorStatus() && + CurrentParamStatus.getKind() != ParameterStatus::Kind::Reported) { + CurrentParamStatus = ParameterStatus::Escaped; + } + } + void findAndReportNotCalledBranches(const CFGBlock *Parent, unsigned Index, bool IsEscape = false) { for (const CFGBlock *Succ : Parent->succs()) { @@ -894,9 +949,9 @@ private: "Block should have at least two successors at this point"); if (auto Clarification = NotCalledClarifier::clarify(Parent, Succ)) { const ParmVarDecl *Parameter = getParameter(Index); - Handler.handleNeverCalled(Parameter, Clarification->Location, - Clarification->Reason, !IsEscape, - !isExplicitlyMarked(Parameter)); + Handler.handleNeverCalled( + Parameter, AC.getDecl(), Clarification->Location, + Clarification->Reason, !IsEscape, !isExplicitlyMarked(Parameter)); } } } @@ -919,7 +974,7 @@ private: /// Return true if the given name has conventional suffixes. static bool hasConventionalSuffix(llvm::StringRef Name) { return llvm::any_of(CONVENTIONAL_SUFFIXES, [Name](llvm::StringRef Suffix) { - return Name.endswith(Suffix); + return Name.ends_with(Suffix); }); } @@ -929,9 +984,9 @@ private: return false; } - QualType BlockType = Ty->getAs<BlockPointerType>()->getPointeeType(); + QualType BlockType = Ty->castAs<BlockPointerType>()->getPointeeType(); // Completion handlers should have a block type with void return type. - return BlockType->getAs<FunctionType>()->getReturnType()->isVoidType(); + return BlockType->castAs<FunctionType>()->getReturnType()->isVoidType(); } /// Return true if the only parameter of the function is conventional. @@ -943,10 +998,10 @@ private: /// Return true/false if 'swift_async' attribute states that the given /// parameter is conventionally called once. - /// Return llvm::None if the given declaration doesn't have 'swift_async' + /// Return std::nullopt if the given declaration doesn't have 'swift_async' /// attribute. - static llvm::Optional<bool> isConventionalSwiftAsync(const Decl *D, - unsigned ParamIndex) { + static std::optional<bool> isConventionalSwiftAsync(const Decl *D, + unsigned ParamIndex) { if (const SwiftAsyncAttr *A = D->getAttr<SwiftAsyncAttr>()) { if (A->getKind() == SwiftAsyncAttr::None) { return false; @@ -954,14 +1009,19 @@ private: return A->getCompletionHandlerIndex().getASTIndex() == ParamIndex; } - return llvm::None; + return std::nullopt; + } + + /// Return true if the specified selector represents init method. + static bool isInitMethod(Selector MethodSelector) { + return MethodSelector.getMethodFamily() == OMF_init; } /// Return true if the specified selector piece matches conventions. static bool isConventionalSelectorPiece(Selector MethodSelector, unsigned PieceIndex, QualType PieceType) { - if (!isConventional(PieceType)) { + if (!isConventional(PieceType) || isInitMethod(MethodSelector)) { return false; } @@ -970,13 +1030,15 @@ private: return hasConventionalSuffix(MethodSelector.getNameForSlot(0)); } - return isConventional(MethodSelector.getNameForSlot(PieceIndex)); + llvm::StringRef PieceName = MethodSelector.getNameForSlot(PieceIndex); + return isConventional(PieceName) || hasConventionalSuffix(PieceName); } bool shouldBeCalledOnce(const ParmVarDecl *Parameter) const { return isExplicitlyMarked(Parameter) || (CheckConventionalParameters && - isConventional(Parameter->getName()) && + (isConventional(Parameter->getName()) || + hasConventionalSuffix(Parameter->getName())) && isConventional(Parameter->getType())); } @@ -1004,7 +1066,7 @@ private: // 'swift_async' goes first and overrides anything else. if (auto ConventionalAsync = isConventionalSwiftAsync(Function, ParamIndex)) { - return ConventionalAsync.getValue(); + return *ConventionalAsync; } return shouldBeCalledOnce(Function->getParamDecl(ParamIndex)) || @@ -1021,7 +1083,7 @@ private: // 'swift_async' goes first and overrides anything else. if (auto ConventionalAsync = isConventionalSwiftAsync(Method, ParamIndex)) { - return ConventionalAsync.getValue(); + return *ConventionalAsync; } const ParmVarDecl *Parameter = Method->getParamDecl(ParamIndex); @@ -1054,6 +1116,91 @@ private: return false; } + // Return a call site where the block is called exactly once or null otherwise + const Expr *getBlockGuaraneedCallSite(const BlockExpr *Block) const { + ParentMap &PM = AC.getParentMap(); + + // We don't want to track the block through assignments and so on, instead + // we simply see how the block used and if it's used directly in a call, + // we decide based on call to what it is. + // + // In order to do this, we go up the parents of the block looking for + // a call or a message expressions. These might not be immediate parents + // of the actual block expression due to casts and parens, so we skip them. + for (const Stmt *Prev = Block, *Current = PM.getParent(Block); + Current != nullptr; Prev = Current, Current = PM.getParent(Current)) { + // Skip no-op (for our case) operations. + if (isa<CastExpr>(Current) || isa<ParenExpr>(Current)) + continue; + + // At this point, Prev represents our block as an immediate child of the + // call. + if (const auto *Call = dyn_cast<CallExpr>(Current)) { + // It might be the call of the Block itself... + if (Call->getCallee() == Prev) + return Call; + + // ...or it can be an indirect call of the block. + return shouldBlockArgumentBeCalledOnce(Call, Prev) ? Call : nullptr; + } + if (const auto *Message = dyn_cast<ObjCMessageExpr>(Current)) { + return shouldBlockArgumentBeCalledOnce(Message, Prev) ? Message + : nullptr; + } + + break; + } + + return nullptr; + } + + template <class CallLikeExpr> + bool shouldBlockArgumentBeCalledOnce(const CallLikeExpr *CallOrMessage, + const Stmt *BlockArgument) const { + // CallExpr::arguments does not interact nicely with llvm::enumerate. + llvm::ArrayRef<const Expr *> Arguments = + llvm::ArrayRef(CallOrMessage->getArgs(), CallOrMessage->getNumArgs()); + + for (const auto &Argument : llvm::enumerate(Arguments)) { + if (Argument.value() == BlockArgument) { + return shouldBlockArgumentBeCalledOnce(CallOrMessage, Argument.index()); + } + } + + return false; + } + + bool shouldBlockArgumentBeCalledOnce(const CallExpr *Call, + unsigned ParamIndex) const { + const FunctionDecl *Function = Call->getDirectCallee(); + return shouldBlockArgumentBeCalledOnce(Function, ParamIndex) || + shouldBeCalledOnce(Call, ParamIndex); + } + + bool shouldBlockArgumentBeCalledOnce(const ObjCMessageExpr *Message, + unsigned ParamIndex) const { + // At the moment, we don't have any Obj-C methods we want to specifically + // check in here. + return shouldBeCalledOnce(Message, ParamIndex); + } + + static bool shouldBlockArgumentBeCalledOnce(const FunctionDecl *Function, + unsigned ParamIndex) { + // There is a list of important API functions that while not following + // conventions nor being directly annotated, still guarantee that the + // callback parameter will be called exactly once. + // + // Here we check if this is the case. + return Function && + llvm::any_of(KNOWN_CALLED_ONCE_PARAMETERS, + [Function, ParamIndex]( + const KnownCalledOnceParameter &Reference) { + return Reference.FunctionName == + Function->getName() && + Reference.ParamIndex == ParamIndex; + }); + } + /// Return true if the analyzed function is actually a default implementation /// of the method that has to be overriden. /// @@ -1119,7 +1266,7 @@ private: llvm::reverse(*BB), // we should start with return statements, if we // have any, i.e. from the bottom of the block [&ReturnChildren](const CFGElement &Element) { - if (Optional<CFGStmt> S = Element.getAs<CFGStmt>()) { + if (std::optional<CFGStmt> S = Element.getAs<CFGStmt>()) { const Stmt *SuspiciousStmt = S->getStmt(); if (isa<ReturnStmt>(SuspiciousStmt)) { @@ -1336,11 +1483,7 @@ private: /// Check given parameter that was discovered to escape. void checkEscapee(const ParmVarDecl &Parameter) { if (auto Index = getIndex(Parameter)) { - ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(*Index); - - if (CurrentParamStatus.getKind() == ParameterStatus::NotCalled) { - CurrentParamStatus = ParameterStatus::Escaped; - } + processEscapeFor(*Index); } } @@ -1404,17 +1547,44 @@ public: } void VisitBlockExpr(const BlockExpr *Block) { + // Block expressions are tricky. It is a very common practice to capture + // completion handlers by blocks and use them there. + // For this reason, it is important to analyze blocks and report warnings + // for completion handler misuse in blocks. + // + // However, it can be quite difficult to track how the block itself is being + // used. The full precise anlysis of that will be similar to alias analysis + // for completion handlers and can be too heavyweight for a compile-time + // diagnostic. Instead, we judge about the immediate use of the block. + // + // Here, we try to find a call expression where we know due to conventions, + // annotations, or other reasons that the block is called once and only + // once. + const Expr *CalledOnceCallSite = getBlockGuaraneedCallSite(Block); + + // We need to report this information to the handler because in the + // situation when we know that the block is called exactly once, we can be + // stricter in terms of reported diagnostics. + if (CalledOnceCallSite) { + Handler.handleBlockThatIsGuaranteedToBeCalledOnce(Block->getBlockDecl()); + } else { + Handler.handleBlockWithNoGuarantees(Block->getBlockDecl()); + } + for (const auto &Capture : Block->getBlockDecl()->captures()) { - // If a block captures a tracked parameter, it should be - // considered escaped. - // On one hand, blocks that do that should definitely call it on - // every path. However, it is not guaranteed that the block - // itself gets called whenever it gets created. - // - // Because we don't want to track blocks and whether they get called, - // we consider such parameters simply escaped. if (const auto *Param = dyn_cast<ParmVarDecl>(Capture.getVariable())) { - checkEscapee(*Param); + if (auto Index = getIndex(*Param)) { + if (CalledOnceCallSite) { + // The call site of a block can be considered a call site of the + // captured parameter we track. + processCallFor(*Index, CalledOnceCallSite); + } else { + // We still should consider this block as an escape for parameter, + // if we don't know about its call site or the number of time it + // can be invoked. + processEscapeFor(*Index); + } + } } } } @@ -1441,6 +1611,10 @@ public: if (Var->getInit()) { checkEscapee(Var->getInit()); } + + if (Var->hasAttr<CleanupAttr>()) { + FunctionHasCleanupVars = true; + } } } } @@ -1462,19 +1636,19 @@ public: private: unsigned size() const { return TrackedParams.size(); } - llvm::Optional<unsigned> getIndexOfCallee(const CallExpr *Call) const { + std::optional<unsigned> getIndexOfCallee(const CallExpr *Call) const { return getIndexOfExpression(Call->getCallee()); } - llvm::Optional<unsigned> getIndexOfExpression(const Expr *E) const { + std::optional<unsigned> getIndexOfExpression(const Expr *E) const { if (const ParmVarDecl *Parameter = findReferencedParmVarDecl(E)) { return getIndex(*Parameter); } - return llvm::None; + return std::nullopt; } - llvm::Optional<unsigned> getIndex(const ParmVarDecl &Parameter) const { + std::optional<unsigned> getIndex(const ParmVarDecl &Parameter) const { // Expected number of parameters that we actually track is 1. // // Also, the maximum number of declared parameters could not be on a scale @@ -1489,7 +1663,7 @@ private: return It - TrackedParams.begin(); } - return llvm::None; + return std::nullopt; } const ParmVarDecl *getParameter(unsigned Index) const { @@ -1509,6 +1683,13 @@ private: // around. bool SuppressOnConventionalErrorPaths = false; + // The user can annotate variable declarations with cleanup functions, which + // essentially imposes a custom destructor logic on that variable. + // It is possible to use it, however, to call tracked parameters on all exits + // from the function. For this reason, we track the fact that the function + // actually has these. + bool FunctionHasCleanupVars = false; + State CurrentState; ParamSizedVector<const ParmVarDecl *> TrackedParams; CFGSizedVector<State> States; diff --git a/contrib/llvm-project/clang/lib/Analysis/CloneDetection.cpp b/contrib/llvm-project/clang/lib/Analysis/CloneDetection.cpp index 0a1122bd5a4a..65ac4ad6a5e5 100644 --- a/contrib/llvm-project/clang/lib/Analysis/CloneDetection.cpp +++ b/contrib/llvm-project/clang/lib/Analysis/CloneDetection.cpp @@ -147,9 +147,8 @@ void OnlyLargestCloneConstraint::constrain( // Erasing a list of indexes from the vector should be done with decreasing // indexes. As IndexesToRemove is constructed with increasing values, we just // reverse iterate over it to get the desired order. - for (auto I = IndexesToRemove.rbegin(); I != IndexesToRemove.rend(); ++I) { - Result.erase(Result.begin() + *I); - } + for (unsigned I : llvm::reverse(IndexesToRemove)) + Result.erase(Result.begin() + I); } bool FilenamePatternConstraint::isAutoGenerated( diff --git a/contrib/llvm-project/clang/lib/Analysis/CocoaConventions.cpp b/contrib/llvm-project/clang/lib/Analysis/CocoaConventions.cpp index 571d72e1a841..836859c22345 100644 --- a/contrib/llvm-project/clang/lib/Analysis/CocoaConventions.cpp +++ b/contrib/llvm-project/clang/lib/Analysis/CocoaConventions.cpp @@ -26,10 +26,10 @@ bool cocoa::isRefType(QualType RetTy, StringRef Prefix, // Recursively walk the typedef stack, allowing typedefs of reference types. while (const TypedefType *TD = RetTy->getAs<TypedefType>()) { StringRef TDName = TD->getDecl()->getIdentifier()->getName(); - if (TDName.startswith(Prefix) && TDName.endswith("Ref")) + if (TDName.starts_with(Prefix) && TDName.ends_with("Ref")) return true; // XPC unfortunately uses CF-style function names, but aren't CF types. - if (TDName.startswith("xpc_")) + if (TDName.starts_with("xpc_")) return false; RetTy = TD->getDecl()->getUnderlyingType(); } @@ -43,7 +43,7 @@ bool cocoa::isRefType(QualType RetTy, StringRef Prefix, return false; // Does the name start with the prefix? - return Name.startswith(Prefix); + return Name.starts_with(Prefix); } /// Returns true when the passed-in type is a CF-style reference-counted @@ -127,10 +127,9 @@ bool coreFoundation::followsCreateRule(const FunctionDecl *fn) { // Scan for *lowercase* 'reate' or 'opy', followed by no lowercase // character. StringRef suffix = functionName.substr(it - start); - if (suffix.startswith("reate")) { + if (suffix.starts_with("reate")) { it += 5; - } - else if (suffix.startswith("opy")) { + } else if (suffix.starts_with("opy")) { it += 3; } else { // Keep scanning. diff --git a/contrib/llvm-project/clang/lib/Analysis/ConstructionContext.cpp b/contrib/llvm-project/clang/lib/Analysis/ConstructionContext.cpp index 6ba1e2173d2c..8a862c06f13a 100644 --- a/contrib/llvm-project/clang/lib/Analysis/ConstructionContext.cpp +++ b/contrib/llvm-project/clang/lib/Analysis/ConstructionContext.cpp @@ -156,6 +156,12 @@ const ConstructionContext *ConstructionContext::createBoundTemporaryFromLayers( return create<CXX17ElidedCopyConstructorInitializerConstructionContext>( C, I, BTE); } + case ConstructionContextItem::LambdaCaptureKind: { + assert(ParentLayer->isLast()); + const auto *E = cast<LambdaExpr>(ParentItem.getStmt()); + return create<LambdaCaptureConstructionContext>(C, E, + ParentItem.getIndex()); + } } // switch (ParentItem.getKind()) llvm_unreachable("Unexpected construction context with destructor!"); @@ -200,6 +206,11 @@ const ConstructionContext *ConstructionContext::createFromLayers( case ConstructionContextItem::ElidableConstructorKind: { llvm_unreachable("The argument needs to be materialized first!"); } + case ConstructionContextItem::LambdaCaptureKind: { + assert(TopLayer->isLast()); + const auto *E = cast<LambdaExpr>(TopItem.getStmt()); + return create<LambdaCaptureConstructionContext>(C, E, TopItem.getIndex()); + } case ConstructionContextItem::InitializerKind: { assert(TopLayer->isLast()); const CXXCtorInitializer *I = TopItem.getCXXCtorInitializer(); diff --git a/contrib/llvm-project/clang/lib/Analysis/Consumed.cpp b/contrib/llvm-project/clang/lib/Analysis/Consumed.cpp index 9560248b173f..d01c7f688e8b 100644 --- a/contrib/llvm-project/clang/lib/Analysis/Consumed.cpp +++ b/contrib/llvm-project/clang/lib/Analysis/Consumed.cpp @@ -27,13 +27,13 @@ #include "clang/Basic/OperatorKinds.h" #include "clang/Basic/SourceLocation.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include <cassert> #include <memory> +#include <optional> #include <utility> // TODO: Adjust states of args to constructors in the same way that arguments to @@ -62,7 +62,7 @@ static SourceLocation getFirstStmtLoc(const CFGBlock *Block) { // Find the source location of the first statement in the block, if the block // is not empty. for (const auto &B : *Block) - if (Optional<CFGStmt> CS = B.getAs<CFGStmt>()) + if (std::optional<CFGStmt> CS = B.getAs<CFGStmt>()) return CS->getStmt()->getBeginLoc(); // Block is empty. @@ -81,7 +81,7 @@ static SourceLocation getLastStmtLoc(const CFGBlock *Block) { } else { for (CFGBlock::const_reverse_iterator BI = Block->rbegin(), BE = Block->rend(); BI != BE; ++BI) { - if (Optional<CFGStmt> CS = BI->getAs<CFGStmt>()) + if (std::optional<CFGStmt> CS = BI->getAs<CFGStmt>()) return CS->getStmt()->getBeginLoc(); } } @@ -771,7 +771,7 @@ void ConsumedStmtVisitor::VisitCXXBindTemporaryExpr( void ConsumedStmtVisitor::VisitCXXConstructExpr(const CXXConstructExpr *Call) { CXXConstructorDecl *Constructor = Call->getConstructor(); - QualType ThisType = Constructor->getThisType()->getPointeeType(); + QualType ThisType = Constructor->getFunctionObjectParameterType(); if (!isConsumableType(ThisType)) return; @@ -1199,7 +1199,7 @@ void ConsumedAnalyzer::determineExpectedReturnState(AnalysisDeclContext &AC, const FunctionDecl *D) { QualType ReturnType; if (const auto *Constructor = dyn_cast<CXXConstructorDecl>(D)) { - ReturnType = Constructor->getThisType()->getPointeeType(); + ReturnType = Constructor->getFunctionObjectParameterType(); } else ReturnType = D->getCallResultType(); diff --git a/contrib/llvm-project/clang/lib/Analysis/ExprMutationAnalyzer.cpp b/contrib/llvm-project/clang/lib/Analysis/ExprMutationAnalyzer.cpp index e9ff5e5e8765..6d726ae44104 100644 --- a/contrib/llvm-project/clang/lib/Analysis/ExprMutationAnalyzer.cpp +++ b/contrib/llvm-project/clang/lib/Analysis/ExprMutationAnalyzer.cpp @@ -15,6 +15,81 @@ namespace clang { using namespace ast_matchers; +// Check if result of Source expression could be a Target expression. +// Checks: +// - Implicit Casts +// - Binary Operators +// - ConditionalOperator +// - BinaryConditionalOperator +static bool canExprResolveTo(const Expr *Source, const Expr *Target) { + + const auto IgnoreDerivedToBase = [](const Expr *E, auto Matcher) { + if (Matcher(E)) + return true; + if (const auto *Cast = dyn_cast<ImplicitCastExpr>(E)) { + if ((Cast->getCastKind() == CK_DerivedToBase || + Cast->getCastKind() == CK_UncheckedDerivedToBase) && + Matcher(Cast->getSubExpr())) + return true; + } + return false; + }; + + const auto EvalCommaExpr = [](const Expr *E, auto Matcher) { + const Expr *Result = E; + while (const auto *BOComma = + dyn_cast_or_null<BinaryOperator>(Result->IgnoreParens())) { + if (!BOComma->isCommaOp()) + break; + Result = BOComma->getRHS(); + } + + return Result != E && Matcher(Result); + }; + + // The 'ConditionalOperatorM' matches on `<anything> ? <expr> : <expr>`. + // This matching must be recursive because `<expr>` can be anything resolving + // to the `InnerMatcher`, for example another conditional operator. + // The edge-case `BaseClass &b = <cond> ? DerivedVar1 : DerivedVar2;` + // is handled, too. The implicit cast happens outside of the conditional. + // This is matched by `IgnoreDerivedToBase(canResolveToExpr(InnerMatcher))` + // below. + const auto ConditionalOperatorM = [Target](const Expr *E) { + if (const auto *OP = dyn_cast<ConditionalOperator>(E)) { + if (const auto *TE = OP->getTrueExpr()->IgnoreParens()) + if (canExprResolveTo(TE, Target)) + return true; + if (const auto *FE = OP->getFalseExpr()->IgnoreParens()) + if (canExprResolveTo(FE, Target)) + return true; + } + return false; + }; + + const auto ElvisOperator = [Target](const Expr *E) { + if (const auto *OP = dyn_cast<BinaryConditionalOperator>(E)) { + if (const auto *TE = OP->getTrueExpr()->IgnoreParens()) + if (canExprResolveTo(TE, Target)) + return true; + if (const auto *FE = OP->getFalseExpr()->IgnoreParens()) + if (canExprResolveTo(FE, Target)) + return true; + } + return false; + }; + + const Expr *SourceExprP = Source->IgnoreParens(); + return IgnoreDerivedToBase(SourceExprP, + [&](const Expr *E) { + return E == Target || ConditionalOperatorM(E) || + ElvisOperator(E); + }) || + EvalCommaExpr(SourceExprP, [&](const Expr *E) { + return IgnoreDerivedToBase( + E->IgnoreParens(), [&](const Expr *EE) { return EE == Target; }); + }); +} + namespace { AST_MATCHER_P(LambdaExpr, hasCaptureInit, const Expr *, E) { @@ -27,51 +102,14 @@ AST_MATCHER_P(CXXForRangeStmt, hasRangeStmt, return InnerMatcher.matches(*Range, Finder, Builder); } -AST_MATCHER_P(Expr, maybeEvalCommaExpr, ast_matchers::internal::Matcher<Expr>, - InnerMatcher) { - const Expr *Result = &Node; - while (const auto *BOComma = - dyn_cast_or_null<BinaryOperator>(Result->IgnoreParens())) { - if (!BOComma->isCommaOp()) - break; - Result = BOComma->getRHS(); - } - return InnerMatcher.matches(*Result, Finder, Builder); -} - -AST_MATCHER_P(Expr, canResolveToExpr, ast_matchers::internal::Matcher<Expr>, - InnerMatcher) { - auto DerivedToBase = [](const ast_matchers::internal::Matcher<Expr> &Inner) { - return implicitCastExpr(anyOf(hasCastKind(CK_DerivedToBase), - hasCastKind(CK_UncheckedDerivedToBase)), - hasSourceExpression(Inner)); - }; - auto IgnoreDerivedToBase = - [&DerivedToBase](const ast_matchers::internal::Matcher<Expr> &Inner) { - return ignoringParens(expr(anyOf(Inner, DerivedToBase(Inner)))); - }; - - // The 'ConditionalOperator' matches on `<anything> ? <expr> : <expr>`. - // This matching must be recursive because `<expr>` can be anything resolving - // to the `InnerMatcher`, for example another conditional operator. - // The edge-case `BaseClass &b = <cond> ? DerivedVar1 : DerivedVar2;` - // is handled, too. The implicit cast happens outside of the conditional. - // This is matched by `IgnoreDerivedToBase(canResolveToExpr(InnerMatcher))` - // below. - auto const ConditionalOperator = conditionalOperator(anyOf( - hasTrueExpression(ignoringParens(canResolveToExpr(InnerMatcher))), - hasFalseExpression(ignoringParens(canResolveToExpr(InnerMatcher))))); - auto const ElvisOperator = binaryConditionalOperator(anyOf( - hasTrueExpression(ignoringParens(canResolveToExpr(InnerMatcher))), - hasFalseExpression(ignoringParens(canResolveToExpr(InnerMatcher))))); - - auto const ComplexMatcher = ignoringParens( - expr(anyOf(IgnoreDerivedToBase(InnerMatcher), - maybeEvalCommaExpr(IgnoreDerivedToBase(InnerMatcher)), - IgnoreDerivedToBase(ConditionalOperator), - IgnoreDerivedToBase(ElvisOperator)))); - - return ComplexMatcher.matches(Node, Finder, Builder); +AST_MATCHER_P(Stmt, canResolveToExpr, const Stmt *, Inner) { + auto *Exp = dyn_cast<Expr>(&Node); + if (!Exp) + return true; + auto *Target = dyn_cast<Expr>(Inner); + if (!Target) + return false; + return canExprResolveTo(Exp, Target); } // Similar to 'hasAnyArgument', but does not work because 'InitListExpr' does @@ -95,11 +133,33 @@ AST_MATCHER(CXXTypeidExpr, isPotentiallyEvaluated) { return Node.isPotentiallyEvaluated(); } +AST_MATCHER(CXXMemberCallExpr, isConstCallee) { + const Decl *CalleeDecl = Node.getCalleeDecl(); + const auto *VD = dyn_cast_or_null<ValueDecl>(CalleeDecl); + if (!VD) + return false; + const QualType T = VD->getType().getCanonicalType(); + const auto *MPT = dyn_cast<MemberPointerType>(T); + const auto *FPT = MPT ? cast<FunctionProtoType>(MPT->getPointeeType()) + : dyn_cast<FunctionProtoType>(T); + if (!FPT) + return false; + return FPT->isConst(); +} + AST_MATCHER_P(GenericSelectionExpr, hasControllingExpr, ast_matchers::internal::Matcher<Expr>, InnerMatcher) { + if (Node.isTypePredicate()) + return false; return InnerMatcher.matches(*Node.getControllingExpr(), Finder, Builder); } +template <typename T> +ast_matchers::internal::Matcher<T> +findFirst(const ast_matchers::internal::Matcher<T> &Matcher) { + return anyOf(Matcher, hasDescendant(Matcher)); +} + const auto nonConstReferenceType = [] { return hasUnqualifiedDesugaredType( referenceType(pointee(unless(isConstQualified())))); @@ -126,9 +186,10 @@ template <> struct NodeID<Decl> { static constexpr StringRef value = "decl"; }; constexpr StringRef NodeID<Expr>::value; constexpr StringRef NodeID<Decl>::value; -template <class T, class F = const Stmt *(ExprMutationAnalyzer::*)(const T *)> +template <class T, + class F = const Stmt *(ExprMutationAnalyzer::Analyzer::*)(const T *)> const Stmt *tryEachMatch(ArrayRef<ast_matchers::BoundNodes> Matches, - ExprMutationAnalyzer *Analyzer, F Finder) { + ExprMutationAnalyzer::Analyzer *Analyzer, F Finder) { const StringRef ID = NodeID<T>::value; for (const auto &Nodes : Matches) { if (const Stmt *S = (Analyzer->*Finder)(Nodes.getNodeAs<T>(ID))) @@ -139,53 +200,67 @@ const Stmt *tryEachMatch(ArrayRef<ast_matchers::BoundNodes> Matches, } // namespace -const Stmt *ExprMutationAnalyzer::findMutation(const Expr *Exp) { - return findMutationMemoized(Exp, - {&ExprMutationAnalyzer::findDirectMutation, - &ExprMutationAnalyzer::findMemberMutation, - &ExprMutationAnalyzer::findArrayElementMutation, - &ExprMutationAnalyzer::findCastMutation, - &ExprMutationAnalyzer::findRangeLoopMutation, - &ExprMutationAnalyzer::findReferenceMutation, - &ExprMutationAnalyzer::findFunctionArgMutation}, - Results); +const Stmt *ExprMutationAnalyzer::Analyzer::findMutation(const Expr *Exp) { + return findMutationMemoized( + Exp, + {&ExprMutationAnalyzer::Analyzer::findDirectMutation, + &ExprMutationAnalyzer::Analyzer::findMemberMutation, + &ExprMutationAnalyzer::Analyzer::findArrayElementMutation, + &ExprMutationAnalyzer::Analyzer::findCastMutation, + &ExprMutationAnalyzer::Analyzer::findRangeLoopMutation, + &ExprMutationAnalyzer::Analyzer::findReferenceMutation, + &ExprMutationAnalyzer::Analyzer::findFunctionArgMutation}, + Memorized.Results); } -const Stmt *ExprMutationAnalyzer::findMutation(const Decl *Dec) { - return tryEachDeclRef(Dec, &ExprMutationAnalyzer::findMutation); +const Stmt *ExprMutationAnalyzer::Analyzer::findMutation(const Decl *Dec) { + return tryEachDeclRef(Dec, &ExprMutationAnalyzer::Analyzer::findMutation); } -const Stmt *ExprMutationAnalyzer::findPointeeMutation(const Expr *Exp) { - return findMutationMemoized(Exp, {/*TODO*/}, PointeeResults); +const Stmt * +ExprMutationAnalyzer::Analyzer::findPointeeMutation(const Expr *Exp) { + return findMutationMemoized(Exp, {/*TODO*/}, Memorized.PointeeResults); } -const Stmt *ExprMutationAnalyzer::findPointeeMutation(const Decl *Dec) { - return tryEachDeclRef(Dec, &ExprMutationAnalyzer::findPointeeMutation); +const Stmt * +ExprMutationAnalyzer::Analyzer::findPointeeMutation(const Decl *Dec) { + return tryEachDeclRef(Dec, + &ExprMutationAnalyzer::Analyzer::findPointeeMutation); } -const Stmt *ExprMutationAnalyzer::findMutationMemoized( +const Stmt *ExprMutationAnalyzer::Analyzer::findMutationMemoized( const Expr *Exp, llvm::ArrayRef<MutationFinder> Finders, - ResultMap &MemoizedResults) { + Memoized::ResultMap &MemoizedResults) { const auto Memoized = MemoizedResults.find(Exp); if (Memoized != MemoizedResults.end()) return Memoized->second; + // Assume Exp is not mutated before analyzing Exp. + MemoizedResults[Exp] = nullptr; if (isUnevaluated(Exp)) - return MemoizedResults[Exp] = nullptr; + return nullptr; for (const auto &Finder : Finders) { if (const Stmt *S = (this->*Finder)(Exp)) return MemoizedResults[Exp] = S; } - return MemoizedResults[Exp] = nullptr; + return nullptr; } -const Stmt *ExprMutationAnalyzer::tryEachDeclRef(const Decl *Dec, - MutationFinder Finder) { - const auto Refs = - match(findAll(declRefExpr(to(equalsNode(Dec))).bind(NodeID<Expr>::value)), - Stm, Context); +const Stmt * +ExprMutationAnalyzer::Analyzer::tryEachDeclRef(const Decl *Dec, + MutationFinder Finder) { + const auto Refs = match( + findAll( + declRefExpr(to( + // `Dec` or a binding if `Dec` is a decomposition. + anyOf(equalsNode(Dec), + bindingDecl(forDecomposition(equalsNode(Dec)))) + // + )) + .bind(NodeID<Expr>::value)), + Stm, Context); for (const auto &RefNodes : Refs) { const auto *E = RefNodes.getNodeAs<Expr>(NodeID<Expr>::value); if ((this->*Finder)(E)) @@ -194,12 +269,14 @@ const Stmt *ExprMutationAnalyzer::tryEachDeclRef(const Decl *Dec, return nullptr; } -bool ExprMutationAnalyzer::isUnevaluated(const Expr *Exp) { - return selectFirst<Expr>( +bool ExprMutationAnalyzer::Analyzer::isUnevaluated(const Stmt *Exp, + const Stmt &Stm, + ASTContext &Context) { + return selectFirst<Stmt>( NodeID<Expr>::value, match( - findAll( - expr(canResolveToExpr(equalsNode(Exp)), + findFirst( + stmt(canResolveToExpr(Exp), anyOf( // `Exp` is part of the underlying expression of // decltype/typeof if it has an ancestor of @@ -225,68 +302,76 @@ bool ExprMutationAnalyzer::isUnevaluated(const Expr *Exp) { Stm, Context)) != nullptr; } +bool ExprMutationAnalyzer::Analyzer::isUnevaluated(const Expr *Exp) { + return isUnevaluated(Exp, Stm, Context); +} + const Stmt * -ExprMutationAnalyzer::findExprMutation(ArrayRef<BoundNodes> Matches) { - return tryEachMatch<Expr>(Matches, this, &ExprMutationAnalyzer::findMutation); +ExprMutationAnalyzer::Analyzer::findExprMutation(ArrayRef<BoundNodes> Matches) { + return tryEachMatch<Expr>(Matches, this, + &ExprMutationAnalyzer::Analyzer::findMutation); } const Stmt * -ExprMutationAnalyzer::findDeclMutation(ArrayRef<BoundNodes> Matches) { - return tryEachMatch<Decl>(Matches, this, &ExprMutationAnalyzer::findMutation); +ExprMutationAnalyzer::Analyzer::findDeclMutation(ArrayRef<BoundNodes> Matches) { + return tryEachMatch<Decl>(Matches, this, + &ExprMutationAnalyzer::Analyzer::findMutation); } -const Stmt *ExprMutationAnalyzer::findExprPointeeMutation( +const Stmt *ExprMutationAnalyzer::Analyzer::findExprPointeeMutation( ArrayRef<ast_matchers::BoundNodes> Matches) { - return tryEachMatch<Expr>(Matches, this, - &ExprMutationAnalyzer::findPointeeMutation); + return tryEachMatch<Expr>( + Matches, this, &ExprMutationAnalyzer::Analyzer::findPointeeMutation); } -const Stmt *ExprMutationAnalyzer::findDeclPointeeMutation( +const Stmt *ExprMutationAnalyzer::Analyzer::findDeclPointeeMutation( ArrayRef<ast_matchers::BoundNodes> Matches) { - return tryEachMatch<Decl>(Matches, this, - &ExprMutationAnalyzer::findPointeeMutation); + return tryEachMatch<Decl>( + Matches, this, &ExprMutationAnalyzer::Analyzer::findPointeeMutation); } -const Stmt *ExprMutationAnalyzer::findDirectMutation(const Expr *Exp) { +const Stmt * +ExprMutationAnalyzer::Analyzer::findDirectMutation(const Expr *Exp) { // LHS of any assignment operators. - const auto AsAssignmentLhs = binaryOperator( - isAssignmentOperator(), hasLHS(canResolveToExpr(equalsNode(Exp)))); + const auto AsAssignmentLhs = + binaryOperator(isAssignmentOperator(), hasLHS(canResolveToExpr(Exp))); // Operand of increment/decrement operators. const auto AsIncDecOperand = unaryOperator(anyOf(hasOperatorName("++"), hasOperatorName("--")), - hasUnaryOperand(canResolveToExpr(equalsNode(Exp)))); + hasUnaryOperand(canResolveToExpr(Exp))); // Invoking non-const member function. // A member function is assumed to be non-const when it is unresolved. const auto NonConstMethod = cxxMethodDecl(unless(isConst())); const auto AsNonConstThis = expr(anyOf( - cxxMemberCallExpr(callee(NonConstMethod), - on(canResolveToExpr(equalsNode(Exp)))), + cxxMemberCallExpr(on(canResolveToExpr(Exp)), unless(isConstCallee())), cxxOperatorCallExpr(callee(NonConstMethod), - hasArgument(0, canResolveToExpr(equalsNode(Exp)))), + hasArgument(0, canResolveToExpr(Exp))), // In case of a templated type, calling overloaded operators is not // resolved and modelled as `binaryOperator` on a dependent type. // Such instances are considered a modification, because they can modify // in different instantiations of the template. - binaryOperator(hasEitherOperand( - allOf(ignoringImpCasts(canResolveToExpr(equalsNode(Exp))), - isTypeDependent()))), + binaryOperator(isTypeDependent(), + hasEitherOperand(ignoringImpCasts(canResolveToExpr(Exp)))), + // A fold expression may contain `Exp` as it's initializer. + // We don't know if the operator modifies `Exp` because the + // operator is type dependent due to the parameter pack. + cxxFoldExpr(hasFoldInit(ignoringImpCasts(canResolveToExpr(Exp)))), // Within class templates and member functions the member expression might // not be resolved. In that case, the `callExpr` is considered to be a // modification. - callExpr( - callee(expr(anyOf(unresolvedMemberExpr(hasObjectExpression( - canResolveToExpr(equalsNode(Exp)))), - cxxDependentScopeMemberExpr(hasObjectExpression( - canResolveToExpr(equalsNode(Exp)))))))), + callExpr(callee(expr(anyOf( + unresolvedMemberExpr(hasObjectExpression(canResolveToExpr(Exp))), + cxxDependentScopeMemberExpr( + hasObjectExpression(canResolveToExpr(Exp))))))), // Match on a call to a known method, but the call itself is type // dependent (e.g. `vector<T> v; v.push(T{});` in a templated function). - callExpr(allOf(isTypeDependent(), - callee(memberExpr(hasDeclaration(NonConstMethod), - hasObjectExpression(canResolveToExpr( - equalsNode(Exp))))))))); + callExpr(allOf( + isTypeDependent(), + callee(memberExpr(hasDeclaration(NonConstMethod), + hasObjectExpression(canResolveToExpr(Exp)))))))); // Taking address of 'Exp'. // We're assuming 'Exp' is mutated as soon as its address is taken, though in @@ -296,11 +381,10 @@ const Stmt *ExprMutationAnalyzer::findDirectMutation(const Expr *Exp) { unaryOperator(hasOperatorName("&"), // A NoOp implicit cast is adding const. unless(hasParent(implicitCastExpr(hasCastKind(CK_NoOp)))), - hasUnaryOperand(canResolveToExpr(equalsNode(Exp)))); - const auto AsPointerFromArrayDecay = - castExpr(hasCastKind(CK_ArrayToPointerDecay), - unless(hasParent(arraySubscriptExpr())), - has(canResolveToExpr(equalsNode(Exp)))); + hasUnaryOperand(canResolveToExpr(Exp))); + const auto AsPointerFromArrayDecay = castExpr( + hasCastKind(CK_ArrayToPointerDecay), + unless(hasParent(arraySubscriptExpr())), has(canResolveToExpr(Exp))); // Treat calling `operator->()` of move-only classes as taking address. // These are typically smart pointers with unique ownership so we treat // mutation of pointee as mutation of the smart pointer itself. @@ -308,7 +392,7 @@ const Stmt *ExprMutationAnalyzer::findDirectMutation(const Expr *Exp) { hasOverloadedOperatorName("->"), callee( cxxMethodDecl(ofClass(isMoveOnly()), returns(nonConstPointerType()))), - argumentCountIs(1), hasArgument(0, canResolveToExpr(equalsNode(Exp)))); + argumentCountIs(1), hasArgument(0, canResolveToExpr(Exp))); // Used as non-const-ref argument when calling a function. // An argument is assumed to be non-const-ref when the function is unresolved. @@ -316,31 +400,28 @@ const Stmt *ExprMutationAnalyzer::findDirectMutation(const Expr *Exp) { // findFunctionArgMutation which has additional smarts for handling forwarding // references. const auto NonConstRefParam = forEachArgumentWithParamType( - anyOf(canResolveToExpr(equalsNode(Exp)), - memberExpr(hasObjectExpression(canResolveToExpr(equalsNode(Exp))))), + anyOf(canResolveToExpr(Exp), + memberExpr(hasObjectExpression(canResolveToExpr(Exp)))), nonConstReferenceType()); const auto NotInstantiated = unless(hasDeclaration(isInstantiated())); - const auto TypeDependentCallee = - callee(expr(anyOf(unresolvedLookupExpr(), unresolvedMemberExpr(), - cxxDependentScopeMemberExpr(), - hasType(templateTypeParmType()), isTypeDependent()))); - - const auto AsNonConstRefArg = anyOf( - callExpr(NonConstRefParam, NotInstantiated), - cxxConstructExpr(NonConstRefParam, NotInstantiated), - callExpr(TypeDependentCallee, - hasAnyArgument(canResolveToExpr(equalsNode(Exp)))), - cxxUnresolvedConstructExpr( - hasAnyArgument(canResolveToExpr(equalsNode(Exp)))), - // Previous False Positive in the following Code: - // `template <typename T> void f() { int i = 42; new Type<T>(i); }` - // Where the constructor of `Type` takes its argument as reference. - // The AST does not resolve in a `cxxConstructExpr` because it is - // type-dependent. - parenListExpr(hasDescendant(expr(canResolveToExpr(equalsNode(Exp))))), - // If the initializer is for a reference type, there is no cast for - // the variable. Values are cast to RValue first. - initListExpr(hasAnyInit(expr(canResolveToExpr(equalsNode(Exp)))))); + + const auto AsNonConstRefArg = + anyOf(callExpr(NonConstRefParam, NotInstantiated), + cxxConstructExpr(NonConstRefParam, NotInstantiated), + // If the call is type-dependent, we can't properly process any + // argument because required type conversions and implicit casts + // will be inserted only after specialization. + callExpr(isTypeDependent(), hasAnyArgument(canResolveToExpr(Exp))), + cxxUnresolvedConstructExpr(hasAnyArgument(canResolveToExpr(Exp))), + // Previous False Positive in the following Code: + // `template <typename T> void f() { int i = 42; new Type<T>(i); }` + // Where the constructor of `Type` takes its argument as reference. + // The AST does not resolve in a `cxxConstructExpr` because it is + // type-dependent. + parenListExpr(hasDescendant(expr(canResolveToExpr(Exp)))), + // If the initializer is for a reference type, there is no cast for + // the variable. Values are cast to RValue first. + initListExpr(hasAnyInit(expr(canResolveToExpr(Exp))))); // Captured by a lambda by reference. // If we're initializing a capture with 'Exp' directly then we're initializing @@ -354,74 +435,74 @@ const Stmt *ExprMutationAnalyzer::findDirectMutation(const Expr *Exp) { // For returning by const-ref there will be an ImplicitCastExpr <NoOp> (for // adding const.) const auto AsNonConstRefReturn = - returnStmt(hasReturnValue(canResolveToExpr(equalsNode(Exp)))); + returnStmt(hasReturnValue(canResolveToExpr(Exp))); - // It is used as a non-const-reference for initalizing a range-for loop. - const auto AsNonConstRefRangeInit = cxxForRangeStmt( - hasRangeInit(declRefExpr(allOf(canResolveToExpr(equalsNode(Exp)), - hasType(nonConstReferenceType()))))); + // It is used as a non-const-reference for initializing a range-for loop. + const auto AsNonConstRefRangeInit = cxxForRangeStmt(hasRangeInit(declRefExpr( + allOf(canResolveToExpr(Exp), hasType(nonConstReferenceType()))))); const auto Matches = match( - traverse(TK_AsIs, - findAll(stmt(anyOf(AsAssignmentLhs, AsIncDecOperand, - AsNonConstThis, AsAmpersandOperand, - AsPointerFromArrayDecay, AsOperatorArrowThis, - AsNonConstRefArg, AsLambdaRefCaptureInit, - AsNonConstRefReturn, AsNonConstRefRangeInit)) - .bind("stmt"))), + traverse( + TK_AsIs, + findFirst(stmt(anyOf(AsAssignmentLhs, AsIncDecOperand, AsNonConstThis, + AsAmpersandOperand, AsPointerFromArrayDecay, + AsOperatorArrowThis, AsNonConstRefArg, + AsLambdaRefCaptureInit, AsNonConstRefReturn, + AsNonConstRefRangeInit)) + .bind("stmt"))), Stm, Context); return selectFirst<Stmt>("stmt", Matches); } -const Stmt *ExprMutationAnalyzer::findMemberMutation(const Expr *Exp) { +const Stmt * +ExprMutationAnalyzer::Analyzer::findMemberMutation(const Expr *Exp) { // Check whether any member of 'Exp' is mutated. - const auto MemberExprs = - match(findAll(expr(anyOf(memberExpr(hasObjectExpression( - canResolveToExpr(equalsNode(Exp)))), - cxxDependentScopeMemberExpr(hasObjectExpression( - canResolveToExpr(equalsNode(Exp)))))) - .bind(NodeID<Expr>::value)), - Stm, Context); + const auto MemberExprs = match( + findAll(expr(anyOf(memberExpr(hasObjectExpression(canResolveToExpr(Exp))), + cxxDependentScopeMemberExpr( + hasObjectExpression(canResolveToExpr(Exp))), + binaryOperator(hasOperatorName(".*"), + hasLHS(equalsNode(Exp))))) + .bind(NodeID<Expr>::value)), + Stm, Context); return findExprMutation(MemberExprs); } -const Stmt *ExprMutationAnalyzer::findArrayElementMutation(const Expr *Exp) { +const Stmt * +ExprMutationAnalyzer::Analyzer::findArrayElementMutation(const Expr *Exp) { // Check whether any element of an array is mutated. - const auto SubscriptExprs = - match(findAll(arraySubscriptExpr( - anyOf(hasBase(canResolveToExpr(equalsNode(Exp))), - hasBase(implicitCastExpr( - allOf(hasCastKind(CK_ArrayToPointerDecay), - hasSourceExpression(canResolveToExpr( - equalsNode(Exp)))))))) - .bind(NodeID<Expr>::value)), - Stm, Context); + const auto SubscriptExprs = match( + findAll(arraySubscriptExpr( + anyOf(hasBase(canResolveToExpr(Exp)), + hasBase(implicitCastExpr(allOf( + hasCastKind(CK_ArrayToPointerDecay), + hasSourceExpression(canResolveToExpr(Exp))))))) + .bind(NodeID<Expr>::value)), + Stm, Context); return findExprMutation(SubscriptExprs); } -const Stmt *ExprMutationAnalyzer::findCastMutation(const Expr *Exp) { +const Stmt *ExprMutationAnalyzer::Analyzer::findCastMutation(const Expr *Exp) { // If the 'Exp' is explicitly casted to a non-const reference type the // 'Exp' is considered to be modified. - const auto ExplicitCast = match( - findAll( - stmt(castExpr(hasSourceExpression(canResolveToExpr(equalsNode(Exp))), - explicitCastExpr( - hasDestinationType(nonConstReferenceType())))) - .bind("stmt")), - Stm, Context); + const auto ExplicitCast = + match(findFirst(stmt(castExpr(hasSourceExpression(canResolveToExpr(Exp)), + explicitCastExpr(hasDestinationType( + nonConstReferenceType())))) + .bind("stmt")), + Stm, Context); if (const auto *CastStmt = selectFirst<Stmt>("stmt", ExplicitCast)) return CastStmt; // If 'Exp' is casted to any non-const reference type, check the castExpr. const auto Casts = match( - findAll( - expr(castExpr(hasSourceExpression(canResolveToExpr(equalsNode(Exp))), - anyOf(explicitCastExpr( - hasDestinationType(nonConstReferenceType())), - implicitCastExpr(hasImplicitDestinationType( - nonConstReferenceType()))))) - .bind(NodeID<Expr>::value)), + findAll(expr(castExpr(hasSourceExpression(canResolveToExpr(Exp)), + anyOf(explicitCastExpr(hasDestinationType( + nonConstReferenceType())), + implicitCastExpr(hasImplicitDestinationType( + nonConstReferenceType()))))) + .bind(NodeID<Expr>::value)), Stm, Context); if (const Stmt *S = findExprMutation(Casts)) @@ -430,13 +511,14 @@ const Stmt *ExprMutationAnalyzer::findCastMutation(const Expr *Exp) { const auto Calls = match(findAll(callExpr(callee(namedDecl( hasAnyName("::std::move", "::std::forward"))), - hasArgument(0, canResolveToExpr(equalsNode(Exp)))) + hasArgument(0, canResolveToExpr(Exp))) .bind("expr")), Stm, Context); return findExprMutation(Calls); } -const Stmt *ExprMutationAnalyzer::findRangeLoopMutation(const Expr *Exp) { +const Stmt * +ExprMutationAnalyzer::Analyzer::findRangeLoopMutation(const Expr *Exp) { // Keep the ordering for the specific initialization matches to happen first, // because it is cheaper to match all potential modifications of the loop // variable. @@ -446,12 +528,14 @@ const Stmt *ExprMutationAnalyzer::findRangeLoopMutation(const Expr *Exp) { const auto DeclStmtToNonRefToArray = declStmt(hasSingleDecl(varDecl(hasType( hasUnqualifiedDesugaredType(referenceType(pointee(arrayType()))))))); const auto RefToArrayRefToElements = match( - findAll(stmt(cxxForRangeStmt( - hasLoopVariable(varDecl(hasType(nonConstReferenceType())) - .bind(NodeID<Decl>::value)), - hasRangeStmt(DeclStmtToNonRefToArray), - hasRangeInit(canResolveToExpr(equalsNode(Exp))))) - .bind("stmt")), + findFirst(stmt(cxxForRangeStmt( + hasLoopVariable( + varDecl(anyOf(hasType(nonConstReferenceType()), + hasType(nonConstPointerType()))) + .bind(NodeID<Decl>::value)), + hasRangeStmt(DeclStmtToNonRefToArray), + hasRangeInit(canResolveToExpr(Exp)))) + .bind("stmt")), Stm, Context); if (const auto *BadRangeInitFromArray = @@ -475,12 +559,12 @@ const Stmt *ExprMutationAnalyzer::findRangeLoopMutation(const Expr *Exp) { hasSingleDecl(varDecl(hasType(hasUnqualifiedDesugaredType(referenceType( pointee(hasDeclaration(cxxRecordDecl(HasAnyNonConstIterator))))))))); - const auto RefToContainerBadIterators = - match(findAll(stmt(cxxForRangeStmt(allOf( - hasRangeStmt(DeclStmtToNonConstIteratorContainer), - hasRangeInit(canResolveToExpr(equalsNode(Exp)))))) - .bind("stmt")), - Stm, Context); + const auto RefToContainerBadIterators = match( + findFirst(stmt(cxxForRangeStmt(allOf( + hasRangeStmt(DeclStmtToNonConstIteratorContainer), + hasRangeInit(canResolveToExpr(Exp))))) + .bind("stmt")), + Stm, Context); if (const auto *BadIteratorsContainer = selectFirst<Stmt>("stmt", RefToContainerBadIterators)) @@ -492,48 +576,48 @@ const Stmt *ExprMutationAnalyzer::findRangeLoopMutation(const Expr *Exp) { match(findAll(cxxForRangeStmt( hasLoopVariable(varDecl(hasType(nonConstReferenceType())) .bind(NodeID<Decl>::value)), - hasRangeInit(canResolveToExpr(equalsNode(Exp))))), + hasRangeInit(canResolveToExpr(Exp)))), Stm, Context); return findDeclMutation(LoopVars); } -const Stmt *ExprMutationAnalyzer::findReferenceMutation(const Expr *Exp) { +const Stmt * +ExprMutationAnalyzer::Analyzer::findReferenceMutation(const Expr *Exp) { // Follow non-const reference returned by `operator*()` of move-only classes. // These are typically smart pointers with unique ownership so we treat // mutation of pointee as mutation of the smart pointer itself. - const auto Ref = - match(findAll(cxxOperatorCallExpr( - hasOverloadedOperatorName("*"), - callee(cxxMethodDecl(ofClass(isMoveOnly()), - returns(nonConstReferenceType()))), - argumentCountIs(1), - hasArgument(0, canResolveToExpr(equalsNode(Exp)))) - .bind(NodeID<Expr>::value)), - Stm, Context); + const auto Ref = match( + findAll(cxxOperatorCallExpr( + hasOverloadedOperatorName("*"), + callee(cxxMethodDecl(ofClass(isMoveOnly()), + returns(nonConstReferenceType()))), + argumentCountIs(1), hasArgument(0, canResolveToExpr(Exp))) + .bind(NodeID<Expr>::value)), + Stm, Context); if (const Stmt *S = findExprMutation(Ref)) return S; // If 'Exp' is bound to a non-const reference, check all declRefExpr to that. const auto Refs = match( stmt(forEachDescendant( - varDecl( - hasType(nonConstReferenceType()), - hasInitializer(anyOf(canResolveToExpr(equalsNode(Exp)), - memberExpr(hasObjectExpression( - canResolveToExpr(equalsNode(Exp)))))), - hasParent(declStmt().bind("stmt")), - // Don't follow the reference in range statement, we've - // handled that separately. - unless(hasParent(declStmt(hasParent( - cxxForRangeStmt(hasRangeStmt(equalsBoundNode("stmt")))))))) + varDecl(hasType(nonConstReferenceType()), + hasInitializer(anyOf( + canResolveToExpr(Exp), + memberExpr(hasObjectExpression(canResolveToExpr(Exp))))), + hasParent(declStmt().bind("stmt")), + // Don't follow the reference in range statement, we've + // handled that separately. + unless(hasParent(declStmt(hasParent(cxxForRangeStmt( + hasRangeStmt(equalsBoundNode("stmt")))))))) .bind(NodeID<Decl>::value))), Stm, Context); return findDeclMutation(Refs); } -const Stmt *ExprMutationAnalyzer::findFunctionArgMutation(const Expr *Exp) { +const Stmt * +ExprMutationAnalyzer::Analyzer::findFunctionArgMutation(const Expr *Exp) { const auto NonConstRefParam = forEachArgumentWithParam( - canResolveToExpr(equalsNode(Exp)), + canResolveToExpr(Exp), parmVarDecl(hasType(nonConstReferenceType())).bind("parm")); const auto IsInstantiated = hasDeclaration(isInstantiated()); const auto FuncDecl = hasDeclaration(functionDecl().bind("func")); @@ -569,10 +653,9 @@ const Stmt *ExprMutationAnalyzer::findFunctionArgMutation(const Expr *Exp) { if (const auto *RefType = ParmType->getAs<RValueReferenceType>()) { if (!RefType->getPointeeType().getQualifiers() && RefType->getPointeeType()->getAs<TemplateTypeParmType>()) { - std::unique_ptr<FunctionParmMutationAnalyzer> &Analyzer = - FuncParmAnalyzer[Func]; - if (!Analyzer) - Analyzer.reset(new FunctionParmMutationAnalyzer(*Func, Context)); + FunctionParmMutationAnalyzer *Analyzer = + FunctionParmMutationAnalyzer::getFunctionParmMutationAnalyzer( + *Func, Context, Memorized); if (Analyzer->findMutation(Parm)) return Exp; continue; @@ -585,15 +668,17 @@ const Stmt *ExprMutationAnalyzer::findFunctionArgMutation(const Expr *Exp) { } FunctionParmMutationAnalyzer::FunctionParmMutationAnalyzer( - const FunctionDecl &Func, ASTContext &Context) - : BodyAnalyzer(*Func.getBody(), Context) { + const FunctionDecl &Func, ASTContext &Context, + ExprMutationAnalyzer::Memoized &Memorized) + : BodyAnalyzer(*Func.getBody(), Context, Memorized) { if (const auto *Ctor = dyn_cast<CXXConstructorDecl>(&Func)) { // CXXCtorInitializer might also mutate Param but they're not part of // function body, check them eagerly here since they're typically trivial. for (const CXXCtorInitializer *Init : Ctor->inits()) { - ExprMutationAnalyzer InitAnalyzer(*Init->getInit(), Context); + ExprMutationAnalyzer::Analyzer InitAnalyzer(*Init->getInit(), Context, + Memorized); for (const ParmVarDecl *Parm : Ctor->parameters()) { - if (Results.find(Parm) != Results.end()) + if (Results.contains(Parm)) continue; if (const Stmt *S = InitAnalyzer.findMutation(Parm)) Results[Parm] = S; @@ -607,11 +692,14 @@ FunctionParmMutationAnalyzer::findMutation(const ParmVarDecl *Parm) { const auto Memoized = Results.find(Parm); if (Memoized != Results.end()) return Memoized->second; - + // To handle call A -> call B -> call A. Assume parameters of A is not mutated + // before analyzing parameters of A. Then when analyzing the second "call A", + // FunctionParmMutationAnalyzer can use this memoized value to avoid infinite + // recursion. + Results[Parm] = nullptr; if (const Stmt *S = BodyAnalyzer.findMutation(Parm)) return Results[Parm] = S; - - return Results[Parm] = nullptr; + return Results[Parm]; } } // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/ASTOps.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/ASTOps.cpp new file mode 100644 index 000000000000..27d42a7b5085 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/ASTOps.cpp @@ -0,0 +1,287 @@ +//===-- ASTOps.cc -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Operations on AST nodes that are used in flow-sensitive analysis. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/ASTOps.h" +#include "clang/AST/ComputeDependence.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclBase.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/Type.h" +#include "clang/Analysis/FlowSensitive/StorageLocation.h" +#include "clang/Basic/LLVM.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" +#include <cassert> +#include <iterator> +#include <vector> + +#define DEBUG_TYPE "dataflow" + +namespace clang::dataflow { + +const Expr &ignoreCFGOmittedNodes(const Expr &E) { + const Expr *Current = &E; + const Expr *Last = nullptr; + while (Current != Last) { + Last = Current; + if (auto *EWC = dyn_cast<ExprWithCleanups>(Current)) { + Current = EWC->getSubExpr(); + assert(Current != nullptr); + } + if (auto *CE = dyn_cast<ConstantExpr>(Current)) { + Current = CE->getSubExpr(); + assert(Current != nullptr); + } + Current = Current->IgnoreParens(); + assert(Current != nullptr); + } + return *Current; +} + +const Stmt &ignoreCFGOmittedNodes(const Stmt &S) { + if (auto *E = dyn_cast<Expr>(&S)) + return ignoreCFGOmittedNodes(*E); + return S; +} + +// FIXME: Does not precisely handle non-virtual diamond inheritance. A single +// field decl will be modeled for all instances of the inherited field. +static void getFieldsFromClassHierarchy(QualType Type, FieldSet &Fields) { + if (Type->isIncompleteType() || Type->isDependentType() || + !Type->isRecordType()) + return; + + for (const FieldDecl *Field : Type->getAsRecordDecl()->fields()) + Fields.insert(Field); + if (auto *CXXRecord = Type->getAsCXXRecordDecl()) + for (const CXXBaseSpecifier &Base : CXXRecord->bases()) + getFieldsFromClassHierarchy(Base.getType(), Fields); +} + +/// Gets the set of all fields in the type. +FieldSet getObjectFields(QualType Type) { + FieldSet Fields; + getFieldsFromClassHierarchy(Type, Fields); + return Fields; +} + +bool containsSameFields(const FieldSet &Fields, + const RecordStorageLocation::FieldToLoc &FieldLocs) { + if (Fields.size() != FieldLocs.size()) + return false; + for ([[maybe_unused]] auto [Field, Loc] : FieldLocs) + if (!Fields.contains(cast_or_null<FieldDecl>(Field))) + return false; + return true; +} + +/// Returns the fields of a `RecordDecl` that are initialized by an +/// `InitListExpr` or `CXXParenListInitExpr`, in the order in which they appear +/// in `InitListExpr::inits()` / `CXXParenListInitExpr::getInitExprs()`. +/// `InitList->getType()` must be a record type. +template <class InitListT> +static std::vector<const FieldDecl *> +getFieldsForInitListExpr(const InitListT *InitList) { + const RecordDecl *RD = InitList->getType()->getAsRecordDecl(); + assert(RD != nullptr); + + std::vector<const FieldDecl *> Fields; + + if (InitList->getType()->isUnionType()) { + if (const FieldDecl *Field = InitList->getInitializedFieldInUnion()) + Fields.push_back(Field); + return Fields; + } + + // Unnamed bitfields are only used for padding and do not appear in + // `InitListExpr`'s inits. However, those fields do appear in `RecordDecl`'s + // field list, and we thus need to remove them before mapping inits to + // fields to avoid mapping inits to the wrongs fields. + llvm::copy_if( + RD->fields(), std::back_inserter(Fields), + [](const FieldDecl *Field) { return !Field->isUnnamedBitField(); }); + return Fields; +} + +RecordInitListHelper::RecordInitListHelper(const InitListExpr *InitList) + : RecordInitListHelper(InitList->getType(), + getFieldsForInitListExpr(InitList), + InitList->inits()) {} + +RecordInitListHelper::RecordInitListHelper( + const CXXParenListInitExpr *ParenInitList) + : RecordInitListHelper(ParenInitList->getType(), + getFieldsForInitListExpr(ParenInitList), + ParenInitList->getInitExprs()) {} + +RecordInitListHelper::RecordInitListHelper( + QualType Ty, std::vector<const FieldDecl *> Fields, + ArrayRef<Expr *> Inits) { + auto *RD = Ty->getAsCXXRecordDecl(); + assert(RD != nullptr); + + // Unions initialized with an empty initializer list need special treatment. + // For structs/classes initialized with an empty initializer list, Clang + // puts `ImplicitValueInitExpr`s in `InitListExpr::inits()`, but for unions, + // it doesn't do this -- so we create an `ImplicitValueInitExpr` ourselves. + SmallVector<Expr *> InitsForUnion; + if (Ty->isUnionType() && Inits.empty()) { + assert(Fields.size() <= 1); + if (!Fields.empty()) { + ImplicitValueInitForUnion.emplace(Fields.front()->getType()); + InitsForUnion.push_back(&*ImplicitValueInitForUnion); + } + Inits = InitsForUnion; + } + + size_t InitIdx = 0; + + assert(Fields.size() + RD->getNumBases() == Inits.size()); + for (const CXXBaseSpecifier &Base : RD->bases()) { + assert(InitIdx < Inits.size()); + Expr *Init = Inits[InitIdx++]; + BaseInits.emplace_back(&Base, Init); + } + + assert(Fields.size() == Inits.size() - InitIdx); + for (const FieldDecl *Field : Fields) { + assert(InitIdx < Inits.size()); + Expr *Init = Inits[InitIdx++]; + FieldInits.emplace_back(Field, Init); + } +} + +static void insertIfGlobal(const Decl &D, + llvm::DenseSet<const VarDecl *> &Globals) { + if (auto *V = dyn_cast<VarDecl>(&D)) + if (V->hasGlobalStorage()) + Globals.insert(V); +} + +static void insertIfFunction(const Decl &D, + llvm::DenseSet<const FunctionDecl *> &Funcs) { + if (auto *FD = dyn_cast<FunctionDecl>(&D)) + Funcs.insert(FD); +} + +static MemberExpr *getMemberForAccessor(const CXXMemberCallExpr &C) { + // Use getCalleeDecl instead of getMethodDecl in order to handle + // pointer-to-member calls. + const auto *MethodDecl = dyn_cast_or_null<CXXMethodDecl>(C.getCalleeDecl()); + if (!MethodDecl) + return nullptr; + auto *Body = dyn_cast_or_null<CompoundStmt>(MethodDecl->getBody()); + if (!Body || Body->size() != 1) + return nullptr; + if (auto *RS = dyn_cast<ReturnStmt>(*Body->body_begin())) + if (auto *Return = RS->getRetValue()) + return dyn_cast<MemberExpr>(Return->IgnoreParenImpCasts()); + return nullptr; +} + +class ReferencedDeclsVisitor + : public AnalysisASTVisitor<ReferencedDeclsVisitor> { +public: + ReferencedDeclsVisitor(ReferencedDecls &Referenced) + : Referenced(Referenced) {} + + void TraverseConstructorInits(const CXXConstructorDecl *Ctor) { + for (const CXXCtorInitializer *Init : Ctor->inits()) { + if (Init->isMemberInitializer()) { + Referenced.Fields.insert(Init->getMember()); + } else if (Init->isIndirectMemberInitializer()) { + for (const auto *I : Init->getIndirectMember()->chain()) + Referenced.Fields.insert(cast<FieldDecl>(I)); + } + + Expr *InitExpr = Init->getInit(); + + // Also collect declarations referenced in `InitExpr`. + TraverseStmt(InitExpr); + + // If this is a `CXXDefaultInitExpr`, also collect declarations referenced + // within the default expression. + if (auto *DefaultInit = dyn_cast<CXXDefaultInitExpr>(InitExpr)) + TraverseStmt(DefaultInit->getExpr()); + } + } + + bool VisitDecl(Decl *D) { + insertIfGlobal(*D, Referenced.Globals); + insertIfFunction(*D, Referenced.Functions); + return true; + } + + bool VisitDeclRefExpr(DeclRefExpr *E) { + insertIfGlobal(*E->getDecl(), Referenced.Globals); + insertIfFunction(*E->getDecl(), Referenced.Functions); + return true; + } + + bool VisitCXXMemberCallExpr(CXXMemberCallExpr *C) { + // If this is a method that returns a member variable but does nothing else, + // model the field of the return value. + if (MemberExpr *E = getMemberForAccessor(*C)) + if (const auto *FD = dyn_cast<FieldDecl>(E->getMemberDecl())) + Referenced.Fields.insert(FD); + return true; + } + + bool VisitMemberExpr(MemberExpr *E) { + // FIXME: should we be using `E->getFoundDecl()`? + const ValueDecl *VD = E->getMemberDecl(); + insertIfGlobal(*VD, Referenced.Globals); + insertIfFunction(*VD, Referenced.Functions); + if (const auto *FD = dyn_cast<FieldDecl>(VD)) + Referenced.Fields.insert(FD); + return true; + } + + bool VisitInitListExpr(InitListExpr *InitList) { + if (InitList->getType()->isRecordType()) + for (const auto *FD : getFieldsForInitListExpr(InitList)) + Referenced.Fields.insert(FD); + return true; + } + + bool VisitCXXParenListInitExpr(CXXParenListInitExpr *ParenInitList) { + if (ParenInitList->getType()->isRecordType()) + for (const auto *FD : getFieldsForInitListExpr(ParenInitList)) + Referenced.Fields.insert(FD); + return true; + } + +private: + ReferencedDecls &Referenced; +}; + +ReferencedDecls getReferencedDecls(const FunctionDecl &FD) { + ReferencedDecls Result; + ReferencedDeclsVisitor Visitor(Result); + Visitor.TraverseStmt(FD.getBody()); + if (const auto *CtorDecl = dyn_cast<CXXConstructorDecl>(&FD)) + Visitor.TraverseConstructorInits(CtorDecl); + + return Result; +} + +ReferencedDecls getReferencedDecls(const Stmt &S) { + ReferencedDecls Result; + ReferencedDeclsVisitor Visitor(Result); + Visitor.TraverseStmt(const_cast<Stmt *>(&S)); + return Result; +} + +} // namespace clang::dataflow diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/AdornedCFG.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/AdornedCFG.cpp new file mode 100644 index 000000000000..255543021a99 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/AdornedCFG.cpp @@ -0,0 +1,183 @@ +//===- AdornedCFG.cpp ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines an `AdornedCFG` class that is used by dataflow analyses +// that run over Control-Flow Graphs (CFGs). +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/AdornedCFG.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Stmt.h" +#include "clang/Analysis/CFG.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/Error.h" +#include <utility> + +namespace clang { +namespace dataflow { + +/// Returns a map from statements to basic blocks that contain them. +static llvm::DenseMap<const Stmt *, const CFGBlock *> +buildStmtToBasicBlockMap(const CFG &Cfg) { + llvm::DenseMap<const Stmt *, const CFGBlock *> StmtToBlock; + for (const CFGBlock *Block : Cfg) { + if (Block == nullptr) + continue; + + for (const CFGElement &Element : *Block) { + auto Stmt = Element.getAs<CFGStmt>(); + if (!Stmt) + continue; + + StmtToBlock[Stmt->getStmt()] = Block; + } + } + // Some terminator conditions don't appear as a `CFGElement` anywhere else - + // for example, this is true if the terminator condition is a `&&` or `||` + // operator. + // We associate these conditions with the block the terminator appears in, + // but only if the condition has not already appeared as a regular + // `CFGElement`. (The `insert()` below does nothing if the key already exists + // in the map.) + for (const CFGBlock *Block : Cfg) { + if (Block != nullptr) + if (const Stmt *TerminatorCond = Block->getTerminatorCondition()) + StmtToBlock.insert({TerminatorCond, Block}); + } + // Terminator statements typically don't appear as a `CFGElement` anywhere + // else, so we want to associate them with the block that they terminate. + // However, there are some important special cases: + // - The conditional operator is a type of terminator, but it also appears + // as a regular `CFGElement`, and we want to associate it with the block + // in which it appears as a `CFGElement`. + // - The `&&` and `||` operators are types of terminators, but like the + // conditional operator, they can appear as a regular `CFGElement` or + // as a terminator condition (see above). + // We process terminators last to make sure that we only associate them with + // the block they terminate if they haven't previously occurred as a regular + // `CFGElement` or as a terminator condition. + for (const CFGBlock *Block : Cfg) { + if (Block != nullptr) + if (const Stmt *TerminatorStmt = Block->getTerminatorStmt()) + StmtToBlock.insert({TerminatorStmt, Block}); + } + return StmtToBlock; +} + +static llvm::BitVector findReachableBlocks(const CFG &Cfg) { + llvm::BitVector BlockReachable(Cfg.getNumBlockIDs(), false); + + llvm::SmallVector<const CFGBlock *> BlocksToVisit; + BlocksToVisit.push_back(&Cfg.getEntry()); + while (!BlocksToVisit.empty()) { + const CFGBlock *Block = BlocksToVisit.back(); + BlocksToVisit.pop_back(); + + if (BlockReachable[Block->getBlockID()]) + continue; + + BlockReachable[Block->getBlockID()] = true; + + for (const CFGBlock *Succ : Block->succs()) + if (Succ) + BlocksToVisit.push_back(Succ); + } + + return BlockReachable; +} + +static llvm::DenseSet<const CFGBlock *> +buildContainsExprConsumedInDifferentBlock( + const CFG &Cfg, + const llvm::DenseMap<const Stmt *, const CFGBlock *> &StmtToBlock) { + llvm::DenseSet<const CFGBlock *> Result; + + auto CheckChildExprs = [&Result, &StmtToBlock](const Stmt *S, + const CFGBlock *Block) { + for (const Stmt *Child : S->children()) { + if (!isa_and_nonnull<Expr>(Child)) + continue; + const CFGBlock *ChildBlock = StmtToBlock.lookup(Child); + if (ChildBlock != Block) + Result.insert(ChildBlock); + } + }; + + for (const CFGBlock *Block : Cfg) { + if (Block == nullptr) + continue; + + for (const CFGElement &Element : *Block) + if (auto S = Element.getAs<CFGStmt>()) + CheckChildExprs(S->getStmt(), Block); + + if (const Stmt *TerminatorCond = Block->getTerminatorCondition()) + CheckChildExprs(TerminatorCond, Block); + } + + return Result; +} + +llvm::Expected<AdornedCFG> AdornedCFG::build(const FunctionDecl &Func) { + if (!Func.doesThisDeclarationHaveABody()) + return llvm::createStringError( + std::make_error_code(std::errc::invalid_argument), + "Cannot analyze function without a body"); + + return build(Func, *Func.getBody(), Func.getASTContext()); +} + +llvm::Expected<AdornedCFG> AdornedCFG::build(const Decl &D, Stmt &S, + ASTContext &C) { + if (D.isTemplated()) + return llvm::createStringError( + std::make_error_code(std::errc::invalid_argument), + "Cannot analyze templated declarations"); + + // The shape of certain elements of the AST can vary depending on the + // language. We currently only support C++. + if (!C.getLangOpts().CPlusPlus || C.getLangOpts().ObjC) + return llvm::createStringError( + std::make_error_code(std::errc::invalid_argument), + "Can only analyze C++"); + + CFG::BuildOptions Options; + Options.PruneTriviallyFalseEdges = true; + Options.AddImplicitDtors = true; + Options.AddTemporaryDtors = true; + Options.AddInitializers = true; + Options.AddCXXDefaultInitExprInCtors = true; + Options.AddLifetime = true; + + // Ensure that all sub-expressions in basic blocks are evaluated. + Options.setAllAlwaysAdd(); + + auto Cfg = CFG::buildCFG(&D, &S, &C, Options); + if (Cfg == nullptr) + return llvm::createStringError( + std::make_error_code(std::errc::invalid_argument), + "CFG::buildCFG failed"); + + llvm::DenseMap<const Stmt *, const CFGBlock *> StmtToBlock = + buildStmtToBasicBlockMap(*Cfg); + + llvm::BitVector BlockReachable = findReachableBlocks(*Cfg); + + llvm::DenseSet<const CFGBlock *> ContainsExprConsumedInDifferentBlock = + buildContainsExprConsumedInDifferentBlock(*Cfg, StmtToBlock); + + return AdornedCFG(D, std::move(Cfg), std::move(StmtToBlock), + std::move(BlockReachable), + std::move(ContainsExprConsumedInDifferentBlock)); +} + +} // namespace dataflow +} // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Arena.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Arena.cpp new file mode 100644 index 000000000000..81137e8088e3 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Arena.cpp @@ -0,0 +1,213 @@ +//===-- Arena.cpp ---------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/Arena.h" +#include "clang/Analysis/FlowSensitive/Formula.h" +#include "clang/Analysis/FlowSensitive/Value.h" +#include "llvm/Support/Error.h" +#include <string> + +namespace clang::dataflow { + +static std::pair<const Formula *, const Formula *> +canonicalFormulaPair(const Formula &LHS, const Formula &RHS) { + auto Res = std::make_pair(&LHS, &RHS); + if (&RHS < &LHS) // FIXME: use a deterministic order instead + std::swap(Res.first, Res.second); + return Res; +} + +template <class Key, class ComputeFunc> +const Formula &cached(llvm::DenseMap<Key, const Formula *> &Cache, Key K, + ComputeFunc &&Compute) { + auto [It, Inserted] = Cache.try_emplace(std::forward<Key>(K)); + if (Inserted) + It->second = Compute(); + return *It->second; +} + +const Formula &Arena::makeAtomRef(Atom A) { + return cached(AtomRefs, A, [&] { + return &Formula::create(Alloc, Formula::AtomRef, {}, + static_cast<unsigned>(A)); + }); +} + +const Formula &Arena::makeAnd(const Formula &LHS, const Formula &RHS) { + return cached(Ands, canonicalFormulaPair(LHS, RHS), [&] { + if (&LHS == &RHS) + return &LHS; + if (LHS.kind() == Formula::Literal) + return LHS.literal() ? &RHS : &LHS; + if (RHS.kind() == Formula::Literal) + return RHS.literal() ? &LHS : &RHS; + + return &Formula::create(Alloc, Formula::And, {&LHS, &RHS}); + }); +} + +const Formula &Arena::makeOr(const Formula &LHS, const Formula &RHS) { + return cached(Ors, canonicalFormulaPair(LHS, RHS), [&] { + if (&LHS == &RHS) + return &LHS; + if (LHS.kind() == Formula::Literal) + return LHS.literal() ? &LHS : &RHS; + if (RHS.kind() == Formula::Literal) + return RHS.literal() ? &RHS : &LHS; + + return &Formula::create(Alloc, Formula::Or, {&LHS, &RHS}); + }); +} + +const Formula &Arena::makeNot(const Formula &Val) { + return cached(Nots, &Val, [&] { + if (Val.kind() == Formula::Not) + return Val.operands()[0]; + if (Val.kind() == Formula::Literal) + return &makeLiteral(!Val.literal()); + + return &Formula::create(Alloc, Formula::Not, {&Val}); + }); +} + +const Formula &Arena::makeImplies(const Formula &LHS, const Formula &RHS) { + return cached(Implies, std::make_pair(&LHS, &RHS), [&] { + if (&LHS == &RHS) + return &makeLiteral(true); + if (LHS.kind() == Formula::Literal) + return LHS.literal() ? &RHS : &makeLiteral(true); + if (RHS.kind() == Formula::Literal) + return RHS.literal() ? &RHS : &makeNot(LHS); + + return &Formula::create(Alloc, Formula::Implies, {&LHS, &RHS}); + }); +} + +const Formula &Arena::makeEquals(const Formula &LHS, const Formula &RHS) { + return cached(Equals, canonicalFormulaPair(LHS, RHS), [&] { + if (&LHS == &RHS) + return &makeLiteral(true); + if (LHS.kind() == Formula::Literal) + return LHS.literal() ? &RHS : &makeNot(RHS); + if (RHS.kind() == Formula::Literal) + return RHS.literal() ? &LHS : &makeNot(LHS); + + return &Formula::create(Alloc, Formula::Equal, {&LHS, &RHS}); + }); +} + +IntegerValue &Arena::makeIntLiteral(llvm::APInt Value) { + auto [It, Inserted] = IntegerLiterals.try_emplace(Value, nullptr); + + if (Inserted) + It->second = &create<IntegerValue>(); + return *It->second; +} + +BoolValue &Arena::makeBoolValue(const Formula &F) { + auto [It, Inserted] = FormulaValues.try_emplace(&F); + if (Inserted) + It->second = (F.kind() == Formula::AtomRef) + ? (BoolValue *)&create<AtomicBoolValue>(F) + : &create<FormulaBoolValue>(F); + return *It->second; +} + +namespace { +const Formula *parse(Arena &A, llvm::StringRef &In) { + auto EatSpaces = [&] { In = In.ltrim(' '); }; + EatSpaces(); + + if (In.consume_front("!")) { + if (auto *Arg = parse(A, In)) + return &A.makeNot(*Arg); + return nullptr; + } + + if (In.consume_front("(")) { + auto *Arg1 = parse(A, In); + if (!Arg1) + return nullptr; + + EatSpaces(); + decltype(&Arena::makeOr) Op; + if (In.consume_front("|")) + Op = &Arena::makeOr; + else if (In.consume_front("&")) + Op = &Arena::makeAnd; + else if (In.consume_front("=>")) + Op = &Arena::makeImplies; + else if (In.consume_front("=")) + Op = &Arena::makeEquals; + else + return nullptr; + + auto *Arg2 = parse(A, In); + if (!Arg2) + return nullptr; + + EatSpaces(); + if (!In.consume_front(")")) + return nullptr; + + return &(A.*Op)(*Arg1, *Arg2); + } + + // For now, only support unnamed variables V0, V1 etc. + // FIXME: parse e.g. "X" by allocating an atom and storing a name somewhere. + if (In.consume_front("V")) { + std::underlying_type_t<Atom> At; + if (In.consumeInteger(10, At)) + return nullptr; + return &A.makeAtomRef(static_cast<Atom>(At)); + } + + if (In.consume_front("true")) + return &A.makeLiteral(true); + if (In.consume_front("false")) + return &A.makeLiteral(false); + + return nullptr; +} + +class FormulaParseError : public llvm::ErrorInfo<FormulaParseError> { + std::string Formula; + unsigned Offset; + +public: + static char ID; + FormulaParseError(llvm::StringRef Formula, unsigned Offset) + : Formula(Formula), Offset(Offset) {} + + void log(raw_ostream &OS) const override { + OS << "bad formula at offset " << Offset << "\n"; + OS << Formula << "\n"; + OS.indent(Offset) << "^"; + } + + std::error_code convertToErrorCode() const override { + return std::make_error_code(std::errc::invalid_argument); + } +}; + +char FormulaParseError::ID = 0; + +} // namespace + +llvm::Expected<const Formula &> Arena::parseFormula(llvm::StringRef In) { + llvm::StringRef Rest = In; + auto *Result = parse(*this, Rest); + if (!Result) // parse() hit something unparseable + return llvm::make_error<FormulaParseError>(In, In.size() - Rest.size()); + Rest = Rest.ltrim(); + if (!Rest.empty()) // parse didn't consume all the input + return llvm::make_error<FormulaParseError>(In, In.size() - Rest.size()); + return *Result; +} + +} // namespace clang::dataflow diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/CNFFormula.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/CNFFormula.cpp new file mode 100644 index 000000000000..2410ce1e7bd6 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/CNFFormula.cpp @@ -0,0 +1,303 @@ +//===- CNFFormula.cpp -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A representation of a boolean formula in 3-CNF. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/CNFFormula.h" +#include "llvm/ADT/DenseSet.h" + +#include <queue> + +namespace clang { +namespace dataflow { + +namespace { + +/// Applies simplifications while building up a BooleanFormula. +/// We keep track of unit clauses, which tell us variables that must be +/// true/false in any model that satisfies the overall formula. +/// Such variables can be dropped from subsequently-added clauses, which +/// may in turn yield more unit clauses or even a contradiction. +/// The total added complexity of this preprocessing is O(N) where we +/// for every clause, we do a lookup for each unit clauses. +/// The lookup is O(1) on average. This method won't catch all +/// contradictory formulas, more passes can in principle catch +/// more cases but we leave all these and the general case to the +/// proper SAT solver. +struct CNFFormulaBuilder { + // Formula should outlive CNFFormulaBuilder. + explicit CNFFormulaBuilder(CNFFormula &CNF) : Formula(CNF) {} + + /// Adds the `L1 v ... v Ln` clause to the formula. Applies + /// simplifications, based on single-literal clauses. + /// + /// Requirements: + /// + /// `Li` must not be `NullLit`. + /// + /// All literals must be distinct. + void addClause(ArrayRef<Literal> Literals) { + // We generate clauses with up to 3 literals in this file. + assert(!Literals.empty() && Literals.size() <= 3); + // Contains literals of the simplified clause. + llvm::SmallVector<Literal> Simplified; + for (auto L : Literals) { + assert(L != NullLit && + llvm::all_of(Simplified, [L](Literal S) { return S != L; })); + auto X = var(L); + if (trueVars.contains(X)) { // X must be true + if (isPosLit(L)) + return; // Omit clause `(... v X v ...)`, it is `true`. + else + continue; // Omit `!X` from `(... v !X v ...)`. + } + if (falseVars.contains(X)) { // X must be false + if (isNegLit(L)) + return; // Omit clause `(... v !X v ...)`, it is `true`. + else + continue; // Omit `X` from `(... v X v ...)`. + } + Simplified.push_back(L); + } + if (Simplified.empty()) { + // Simplification made the clause empty, which is equivalent to `false`. + // We already know that this formula is unsatisfiable. + Formula.addClause(Simplified); + return; + } + if (Simplified.size() == 1) { + // We have new unit clause. + const Literal lit = Simplified.front(); + const Variable v = var(lit); + if (isPosLit(lit)) + trueVars.insert(v); + else + falseVars.insert(v); + } + Formula.addClause(Simplified); + } + + /// Returns true if we observed a contradiction while adding clauses. + /// In this case then the formula is already known to be unsatisfiable. + bool isKnownContradictory() { return Formula.knownContradictory(); } + +private: + CNFFormula &Formula; + llvm::DenseSet<Variable> trueVars; + llvm::DenseSet<Variable> falseVars; +}; + +} // namespace + +CNFFormula::CNFFormula(Variable LargestVar) + : LargestVar(LargestVar), KnownContradictory(false) { + Clauses.push_back(0); + ClauseStarts.push_back(0); +} + +void CNFFormula::addClause(ArrayRef<Literal> lits) { + assert(llvm::all_of(lits, [](Literal L) { return L != NullLit; })); + + if (lits.empty()) + KnownContradictory = true; + + const size_t S = Clauses.size(); + ClauseStarts.push_back(S); + Clauses.insert(Clauses.end(), lits.begin(), lits.end()); +} + +CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Formulas, + llvm::DenseMap<Variable, Atom> &Atomics) { + // The general strategy of the algorithm implemented below is to map each + // of the sub-values in `Vals` to a unique variable and use these variables in + // the resulting CNF expression to avoid exponential blow up. The number of + // literals in the resulting formula is guaranteed to be linear in the number + // of sub-formulas in `Vals`. + + // Map each sub-formula in `Vals` to a unique variable. + llvm::DenseMap<const Formula *, Variable> FormulaToVar; + // Store variable identifiers and Atom of atomic booleans. + Variable NextVar = 1; + { + std::queue<const Formula *> UnprocessedFormulas; + for (const Formula *F : Formulas) + UnprocessedFormulas.push(F); + while (!UnprocessedFormulas.empty()) { + Variable Var = NextVar; + const Formula *F = UnprocessedFormulas.front(); + UnprocessedFormulas.pop(); + + if (!FormulaToVar.try_emplace(F, Var).second) + continue; + ++NextVar; + + for (const Formula *Op : F->operands()) + UnprocessedFormulas.push(Op); + if (F->kind() == Formula::AtomRef) + Atomics[Var] = F->getAtom(); + } + } + + auto GetVar = [&FormulaToVar](const Formula *F) { + auto ValIt = FormulaToVar.find(F); + assert(ValIt != FormulaToVar.end()); + return ValIt->second; + }; + + CNFFormula CNF(NextVar - 1); + std::vector<bool> ProcessedSubVals(NextVar, false); + CNFFormulaBuilder builder(CNF); + + // Add a conjunct for each variable that represents a top-level conjunction + // value in `Vals`. + for (const Formula *F : Formulas) + builder.addClause(posLit(GetVar(F))); + + // Add conjuncts that represent the mapping between newly-created variables + // and their corresponding sub-formulas. + std::queue<const Formula *> UnprocessedFormulas; + for (const Formula *F : Formulas) + UnprocessedFormulas.push(F); + while (!UnprocessedFormulas.empty()) { + const Formula *F = UnprocessedFormulas.front(); + UnprocessedFormulas.pop(); + const Variable Var = GetVar(F); + + if (ProcessedSubVals[Var]) + continue; + ProcessedSubVals[Var] = true; + + switch (F->kind()) { + case Formula::AtomRef: + break; + case Formula::Literal: + CNF.addClause(F->literal() ? posLit(Var) : negLit(Var)); + break; + case Formula::And: { + const Variable LHS = GetVar(F->operands()[0]); + const Variable RHS = GetVar(F->operands()[1]); + + if (LHS == RHS) { + // `X <=> (A ^ A)` is equivalent to `(!X v A) ^ (X v !A)` which is + // already in conjunctive normal form. Below we add each of the + // conjuncts of the latter expression to the result. + builder.addClause({negLit(Var), posLit(LHS)}); + builder.addClause({posLit(Var), negLit(LHS)}); + } else { + // `X <=> (A ^ B)` is equivalent to `(!X v A) ^ (!X v B) ^ (X v !A v + // !B)` which is already in conjunctive normal form. Below we add each + // of the conjuncts of the latter expression to the result. + builder.addClause({negLit(Var), posLit(LHS)}); + builder.addClause({negLit(Var), posLit(RHS)}); + builder.addClause({posLit(Var), negLit(LHS), negLit(RHS)}); + } + break; + } + case Formula::Or: { + const Variable LHS = GetVar(F->operands()[0]); + const Variable RHS = GetVar(F->operands()[1]); + + if (LHS == RHS) { + // `X <=> (A v A)` is equivalent to `(!X v A) ^ (X v !A)` which is + // already in conjunctive normal form. Below we add each of the + // conjuncts of the latter expression to the result. + builder.addClause({negLit(Var), posLit(LHS)}); + builder.addClause({posLit(Var), negLit(LHS)}); + } else { + // `X <=> (A v B)` is equivalent to `(!X v A v B) ^ (X v !A) ^ (X v + // !B)` which is already in conjunctive normal form. Below we add each + // of the conjuncts of the latter expression to the result. + builder.addClause({negLit(Var), posLit(LHS), posLit(RHS)}); + builder.addClause({posLit(Var), negLit(LHS)}); + builder.addClause({posLit(Var), negLit(RHS)}); + } + break; + } + case Formula::Not: { + const Variable Operand = GetVar(F->operands()[0]); + + // `X <=> !Y` is equivalent to `(!X v !Y) ^ (X v Y)` which is + // already in conjunctive normal form. Below we add each of the + // conjuncts of the latter expression to the result. + builder.addClause({negLit(Var), negLit(Operand)}); + builder.addClause({posLit(Var), posLit(Operand)}); + break; + } + case Formula::Implies: { + const Variable LHS = GetVar(F->operands()[0]); + const Variable RHS = GetVar(F->operands()[1]); + + // `X <=> (A => B)` is equivalent to + // `(X v A) ^ (X v !B) ^ (!X v !A v B)` which is already in + // conjunctive normal form. Below we add each of the conjuncts of + // the latter expression to the result. + builder.addClause({posLit(Var), posLit(LHS)}); + builder.addClause({posLit(Var), negLit(RHS)}); + builder.addClause({negLit(Var), negLit(LHS), posLit(RHS)}); + break; + } + case Formula::Equal: { + const Variable LHS = GetVar(F->operands()[0]); + const Variable RHS = GetVar(F->operands()[1]); + + if (LHS == RHS) { + // `X <=> (A <=> A)` is equivalent to `X` which is already in + // conjunctive normal form. Below we add each of the conjuncts of the + // latter expression to the result. + builder.addClause(posLit(Var)); + + // No need to visit the sub-values of `Val`. + continue; + } + // `X <=> (A <=> B)` is equivalent to + // `(X v A v B) ^ (X v !A v !B) ^ (!X v A v !B) ^ (!X v !A v B)` which + // is already in conjunctive normal form. Below we add each of the + // conjuncts of the latter expression to the result. + builder.addClause({posLit(Var), posLit(LHS), posLit(RHS)}); + builder.addClause({posLit(Var), negLit(LHS), negLit(RHS)}); + builder.addClause({negLit(Var), posLit(LHS), negLit(RHS)}); + builder.addClause({negLit(Var), negLit(LHS), posLit(RHS)}); + break; + } + } + if (builder.isKnownContradictory()) { + return CNF; + } + for (const Formula *Child : F->operands()) + UnprocessedFormulas.push(Child); + } + + // Unit clauses that were added later were not + // considered for the simplification of earlier clauses. Do a final + // pass to find more opportunities for simplification. + CNFFormula FinalCNF(NextVar - 1); + CNFFormulaBuilder FinalBuilder(FinalCNF); + + // Collect unit clauses. + for (ClauseID C = 1; C <= CNF.numClauses(); ++C) { + if (CNF.clauseSize(C) == 1) { + FinalBuilder.addClause(CNF.clauseLiterals(C)[0]); + } + } + + // Add all clauses that were added previously, preserving the order. + for (ClauseID C = 1; C <= CNF.numClauses(); ++C) { + FinalBuilder.addClause(CNF.clauseLiterals(C)); + if (FinalBuilder.isKnownContradictory()) { + break; + } + } + // It is possible there were new unit clauses again, but + // we stop here and leave the rest to the solver algorithm. + return FinalCNF; +} + +} // namespace dataflow +} // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp new file mode 100644 index 000000000000..4b86daa56d7b --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp @@ -0,0 +1,362 @@ +//===-- DataflowAnalysisContext.cpp -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a DataflowAnalysisContext class that owns objects that +// encompass the state of a program and stores context that is used during +// dataflow analysis. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" +#include "clang/AST/ExprCXX.h" +#include "clang/Analysis/FlowSensitive/ASTOps.h" +#include "clang/Analysis/FlowSensitive/DebugSupport.h" +#include "clang/Analysis/FlowSensitive/Formula.h" +#include "clang/Analysis/FlowSensitive/Logger.h" +#include "clang/Analysis/FlowSensitive/SimplifyConstraints.h" +#include "clang/Analysis/FlowSensitive/Value.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <memory> +#include <string> +#include <utility> +#include <vector> + +static llvm::cl::opt<std::string> DataflowLog( + "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional, + llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual " + "log to stderr. With an arg, writes HTML logs under the " + "specified directory (one per analyzed function).")); + +namespace clang { +namespace dataflow { + +FieldSet DataflowAnalysisContext::getModeledFields(QualType Type) { + // During context-sensitive analysis, a struct may be allocated in one + // function, but its field accessed in a function lower in the stack than + // the allocation. Since we only collect fields used in the function where + // the allocation occurs, we can't apply that filter when performing + // context-sensitive analysis. But, this only applies to storage locations, + // since field access it not allowed to fail. In contrast, field *values* + // don't need this allowance, since the API allows for uninitialized fields. + if (Opts.ContextSensitiveOpts) + return getObjectFields(Type); + + return llvm::set_intersection(getObjectFields(Type), ModeledFields); +} + +void DataflowAnalysisContext::addModeledFields(const FieldSet &Fields) { + ModeledFields.set_union(Fields); +} + +StorageLocation &DataflowAnalysisContext::createStorageLocation(QualType Type) { + if (!Type.isNull() && Type->isRecordType()) { + llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs; + for (const FieldDecl *Field : getModeledFields(Type)) + if (Field->getType()->isReferenceType()) + FieldLocs.insert({Field, nullptr}); + else + FieldLocs.insert({Field, &createStorageLocation( + Field->getType().getNonReferenceType())}); + + RecordStorageLocation::SyntheticFieldMap SyntheticFields; + for (const auto &Entry : getSyntheticFields(Type)) + SyntheticFields.insert( + {Entry.getKey(), + &createStorageLocation(Entry.getValue().getNonReferenceType())}); + + return createRecordStorageLocation(Type, std::move(FieldLocs), + std::move(SyntheticFields)); + } + return arena().create<ScalarStorageLocation>(Type); +} + +// Returns the keys for a given `StringMap`. +// Can't use `StringSet` as the return type as it doesn't support `operator==`. +template <typename T> +static llvm::DenseSet<llvm::StringRef> getKeys(const llvm::StringMap<T> &Map) { + return llvm::DenseSet<llvm::StringRef>(Map.keys().begin(), Map.keys().end()); +} + +RecordStorageLocation &DataflowAnalysisContext::createRecordStorageLocation( + QualType Type, RecordStorageLocation::FieldToLoc FieldLocs, + RecordStorageLocation::SyntheticFieldMap SyntheticFields) { + assert(Type->isRecordType()); + assert(containsSameFields(getModeledFields(Type), FieldLocs)); + assert(getKeys(getSyntheticFields(Type)) == getKeys(SyntheticFields)); + + RecordStorageLocationCreated = true; + return arena().create<RecordStorageLocation>(Type, std::move(FieldLocs), + std::move(SyntheticFields)); +} + +StorageLocation & +DataflowAnalysisContext::getStableStorageLocation(const ValueDecl &D) { + if (auto *Loc = DeclToLoc.lookup(&D)) + return *Loc; + auto &Loc = createStorageLocation(D.getType().getNonReferenceType()); + DeclToLoc[&D] = &Loc; + return Loc; +} + +StorageLocation & +DataflowAnalysisContext::getStableStorageLocation(const Expr &E) { + const Expr &CanonE = ignoreCFGOmittedNodes(E); + + if (auto *Loc = ExprToLoc.lookup(&CanonE)) + return *Loc; + auto &Loc = createStorageLocation(CanonE.getType()); + ExprToLoc[&CanonE] = &Loc; + return Loc; +} + +PointerValue & +DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) { + auto CanonicalPointeeType = + PointeeType.isNull() ? PointeeType : PointeeType.getCanonicalType(); + auto Res = NullPointerVals.try_emplace(CanonicalPointeeType, nullptr); + if (Res.second) { + auto &PointeeLoc = createStorageLocation(CanonicalPointeeType); + Res.first->second = &arena().create<PointerValue>(PointeeLoc); + } + return *Res.first->second; +} + +void DataflowAnalysisContext::addInvariant(const Formula &Constraint) { + if (Invariant == nullptr) + Invariant = &Constraint; + else + Invariant = &arena().makeAnd(*Invariant, Constraint); +} + +void DataflowAnalysisContext::addFlowConditionConstraint( + Atom Token, const Formula &Constraint) { + auto Res = FlowConditionConstraints.try_emplace(Token, &Constraint); + if (!Res.second) { + Res.first->second = + &arena().makeAnd(*Res.first->second, Constraint); + } +} + +Atom DataflowAnalysisContext::forkFlowCondition(Atom Token) { + Atom ForkToken = arena().makeFlowConditionToken(); + FlowConditionDeps[ForkToken].insert(Token); + addFlowConditionConstraint(ForkToken, arena().makeAtomRef(Token)); + return ForkToken; +} + +Atom +DataflowAnalysisContext::joinFlowConditions(Atom FirstToken, + Atom SecondToken) { + Atom Token = arena().makeFlowConditionToken(); + FlowConditionDeps[Token].insert(FirstToken); + FlowConditionDeps[Token].insert(SecondToken); + addFlowConditionConstraint(Token, + arena().makeOr(arena().makeAtomRef(FirstToken), + arena().makeAtomRef(SecondToken))); + return Token; +} + +Solver::Result DataflowAnalysisContext::querySolver( + llvm::SetVector<const Formula *> Constraints) { + return S.solve(Constraints.getArrayRef()); +} + +bool DataflowAnalysisContext::flowConditionImplies(Atom Token, + const Formula &F) { + if (F.isLiteral(true)) + return true; + + // Returns true if and only if truth assignment of the flow condition implies + // that `F` is also true. We prove whether or not this property holds by + // reducing the problem to satisfiability checking. In other words, we attempt + // to show that assuming `F` is false makes the constraints induced by the + // flow condition unsatisfiable. + llvm::SetVector<const Formula *> Constraints; + Constraints.insert(&arena().makeAtomRef(Token)); + Constraints.insert(&arena().makeNot(F)); + addTransitiveFlowConditionConstraints(Token, Constraints); + return isUnsatisfiable(std::move(Constraints)); +} + +bool DataflowAnalysisContext::flowConditionAllows(Atom Token, + const Formula &F) { + if (F.isLiteral(false)) + return false; + + llvm::SetVector<const Formula *> Constraints; + Constraints.insert(&arena().makeAtomRef(Token)); + Constraints.insert(&F); + addTransitiveFlowConditionConstraints(Token, Constraints); + return isSatisfiable(std::move(Constraints)); +} + +bool DataflowAnalysisContext::equivalentFormulas(const Formula &Val1, + const Formula &Val2) { + llvm::SetVector<const Formula *> Constraints; + Constraints.insert(&arena().makeNot(arena().makeEquals(Val1, Val2))); + return isUnsatisfiable(std::move(Constraints)); +} + +void DataflowAnalysisContext::addTransitiveFlowConditionConstraints( + Atom Token, llvm::SetVector<const Formula *> &Constraints) { + llvm::DenseSet<Atom> AddedTokens; + std::vector<Atom> Remaining = {Token}; + + if (Invariant) + Constraints.insert(Invariant); + // Define all the flow conditions that might be referenced in constraints. + while (!Remaining.empty()) { + auto Token = Remaining.back(); + Remaining.pop_back(); + if (!AddedTokens.insert(Token).second) + continue; + + auto ConstraintsIt = FlowConditionConstraints.find(Token); + if (ConstraintsIt == FlowConditionConstraints.end()) { + Constraints.insert(&arena().makeAtomRef(Token)); + } else { + // Bind flow condition token via `iff` to its set of constraints: + // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints + Constraints.insert(&arena().makeEquals(arena().makeAtomRef(Token), + *ConstraintsIt->second)); + } + + if (auto DepsIt = FlowConditionDeps.find(Token); + DepsIt != FlowConditionDeps.end()) + for (Atom A : DepsIt->second) + Remaining.push_back(A); + } +} + +static void printAtomList(const llvm::SmallVector<Atom> &Atoms, + llvm::raw_ostream &OS) { + OS << "("; + for (size_t i = 0; i < Atoms.size(); ++i) { + OS << Atoms[i]; + if (i + 1 < Atoms.size()) + OS << ", "; + } + OS << ")\n"; +} + +void DataflowAnalysisContext::dumpFlowCondition(Atom Token, + llvm::raw_ostream &OS) { + llvm::SetVector<const Formula *> Constraints; + Constraints.insert(&arena().makeAtomRef(Token)); + addTransitiveFlowConditionConstraints(Token, Constraints); + + OS << "Flow condition token: " << Token << "\n"; + SimplifyConstraintsInfo Info; + llvm::SetVector<const Formula *> OriginalConstraints = Constraints; + simplifyConstraints(Constraints, arena(), &Info); + if (!Constraints.empty()) { + OS << "Constraints:\n"; + for (const auto *Constraint : Constraints) { + Constraint->print(OS); + OS << "\n"; + } + } + if (!Info.TrueAtoms.empty()) { + OS << "True atoms: "; + printAtomList(Info.TrueAtoms, OS); + } + if (!Info.FalseAtoms.empty()) { + OS << "False atoms: "; + printAtomList(Info.FalseAtoms, OS); + } + if (!Info.EquivalentAtoms.empty()) { + OS << "Equivalent atoms:\n"; + for (const llvm::SmallVector<Atom> &Class : Info.EquivalentAtoms) + printAtomList(Class, OS); + } + + OS << "\nFlow condition constraints before simplification:\n"; + for (const auto *Constraint : OriginalConstraints) { + Constraint->print(OS); + OS << "\n"; + } +} + +const AdornedCFG * +DataflowAnalysisContext::getAdornedCFG(const FunctionDecl *F) { + // Canonicalize the key: + F = F->getDefinition(); + if (F == nullptr) + return nullptr; + auto It = FunctionContexts.find(F); + if (It != FunctionContexts.end()) + return &It->second; + + if (F->doesThisDeclarationHaveABody()) { + auto ACFG = AdornedCFG::build(*F); + // FIXME: Handle errors. + assert(ACFG); + auto Result = FunctionContexts.insert({F, std::move(*ACFG)}); + return &Result.first->second; + } + + return nullptr; +} + +static std::unique_ptr<Logger> makeLoggerFromCommandLine() { + if (DataflowLog.empty()) + return Logger::textual(llvm::errs()); + + llvm::StringRef Dir = DataflowLog; + if (auto EC = llvm::sys::fs::create_directories(Dir)) + llvm::errs() << "Failed to create log dir: " << EC.message() << "\n"; + // All analysis runs within a process will log to the same directory. + // Share a counter so they don't all overwrite each other's 0.html. + // (Don't share a logger, it's not threadsafe). + static std::atomic<unsigned> Counter = {0}; + auto StreamFactory = + [Dir(Dir.str())]() mutable -> std::unique_ptr<llvm::raw_ostream> { + llvm::SmallString<256> File(Dir); + llvm::sys::path::append(File, + std::to_string(Counter.fetch_add(1)) + ".html"); + std::error_code EC; + auto OS = std::make_unique<llvm::raw_fd_ostream>(File, EC); + if (EC) { + llvm::errs() << "Failed to create log " << File << ": " << EC.message() + << "\n"; + return std::make_unique<llvm::raw_null_ostream>(); + } + return OS; + }; + return Logger::html(std::move(StreamFactory)); +} + +DataflowAnalysisContext::DataflowAnalysisContext( + Solver &S, std::unique_ptr<Solver> &&OwnedSolver, Options Opts) + : S(S), OwnedSolver(std::move(OwnedSolver)), A(std::make_unique<Arena>()), + Opts(Opts) { + // If the -dataflow-log command-line flag was set, synthesize a logger. + // This is ugly but provides a uniform method for ad-hoc debugging dataflow- + // based tools. + if (Opts.Log == nullptr) { + if (DataflowLog.getNumOccurrences() > 0) { + LogOwner = makeLoggerFromCommandLine(); + this->Opts.Log = LogOwner.get(); + // FIXME: if the flag is given a value, write an HTML log to a file. + } else { + this->Opts.Log = &Logger::null(); + } + } +} + +DataflowAnalysisContext::~DataflowAnalysisContext() = default; + +} // namespace dataflow +} // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp new file mode 100644 index 000000000000..8d7fe1848821 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp @@ -0,0 +1,1257 @@ +//===-- DataflowEnvironment.cpp ---------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines an Environment class that is used by dataflow analyses +// that run over Control-Flow Graphs (CFGs) to keep track of the state of the +// program at given program points. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/Type.h" +#include "clang/Analysis/FlowSensitive/ASTOps.h" +#include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" +#include "clang/Analysis/FlowSensitive/DataflowLattice.h" +#include "clang/Analysis/FlowSensitive/Value.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/Support/ErrorHandling.h" +#include <algorithm> +#include <cassert> +#include <memory> +#include <utility> + +#define DEBUG_TYPE "dataflow" + +namespace clang { +namespace dataflow { + +// FIXME: convert these to parameters of the analysis or environment. Current +// settings have been experimentaly validated, but only for a particular +// analysis. +static constexpr int MaxCompositeValueDepth = 3; +static constexpr int MaxCompositeValueSize = 1000; + +/// Returns a map consisting of key-value entries that are present in both maps. +static llvm::DenseMap<const ValueDecl *, StorageLocation *> intersectDeclToLoc( + const llvm::DenseMap<const ValueDecl *, StorageLocation *> &DeclToLoc1, + const llvm::DenseMap<const ValueDecl *, StorageLocation *> &DeclToLoc2) { + llvm::DenseMap<const ValueDecl *, StorageLocation *> Result; + for (auto &Entry : DeclToLoc1) { + auto It = DeclToLoc2.find(Entry.first); + if (It != DeclToLoc2.end() && Entry.second == It->second) + Result.insert({Entry.first, Entry.second}); + } + return Result; +} + +// Performs a join on either `ExprToLoc` or `ExprToVal`. +// The maps must be consistent in the sense that any entries for the same +// expression must map to the same location / value. This is the case if we are +// performing a join for control flow within a full-expression (which is the +// only case when this function should be used). +template <typename MapT> MapT joinExprMaps(const MapT &Map1, const MapT &Map2) { + MapT Result = Map1; + + for (const auto &Entry : Map2) { + [[maybe_unused]] auto [It, Inserted] = Result.insert(Entry); + // If there was an existing entry, its value should be the same as for the + // entry we were trying to insert. + assert(It->second == Entry.second); + } + + return Result; +} + +// Whether to consider equivalent two values with an unknown relation. +// +// FIXME: this function is a hack enabling unsoundness to support +// convergence. Once we have widening support for the reference/pointer and +// struct built-in models, this should be unconditionally `false` (and inlined +// as such at its call sites). +static bool equateUnknownValues(Value::Kind K) { + switch (K) { + case Value::Kind::Integer: + case Value::Kind::Pointer: + return true; + default: + return false; + } +} + +static bool compareDistinctValues(QualType Type, Value &Val1, + const Environment &Env1, Value &Val2, + const Environment &Env2, + Environment::ValueModel &Model) { + // Note: Potentially costly, but, for booleans, we could check whether both + // can be proven equivalent in their respective environments. + + // FIXME: move the reference/pointers logic from `areEquivalentValues` to here + // and implement separate, join/widen specific handling for + // reference/pointers. + switch (Model.compare(Type, Val1, Env1, Val2, Env2)) { + case ComparisonResult::Same: + return true; + case ComparisonResult::Different: + return false; + case ComparisonResult::Unknown: + return equateUnknownValues(Val1.getKind()); + } + llvm_unreachable("All cases covered in switch"); +} + +/// Attempts to join distinct values `Val1` and `Val2` in `Env1` and `Env2`, +/// respectively, of the same type `Type`. Joining generally produces a single +/// value that (soundly) approximates the two inputs, although the actual +/// meaning depends on `Model`. +static Value *joinDistinctValues(QualType Type, Value &Val1, + const Environment &Env1, Value &Val2, + const Environment &Env2, + Environment &JoinedEnv, + Environment::ValueModel &Model) { + // Join distinct boolean values preserving information about the constraints + // in the respective path conditions. + if (isa<BoolValue>(&Val1) && isa<BoolValue>(&Val2)) { + // FIXME: Checking both values should be unnecessary, since they should have + // a consistent shape. However, right now we can end up with BoolValue's in + // integer-typed variables due to our incorrect handling of + // boolean-to-integer casts (we just propagate the BoolValue to the result + // of the cast). So, a join can encounter an integer in one branch but a + // bool in the other. + // For example: + // ``` + // std::optional<bool> o; + // int x; + // if (o.has_value()) + // x = o.value(); + // ``` + auto &Expr1 = cast<BoolValue>(Val1).formula(); + auto &Expr2 = cast<BoolValue>(Val2).formula(); + auto &A = JoinedEnv.arena(); + auto &JoinedVal = A.makeAtomRef(A.makeAtom()); + JoinedEnv.assume( + A.makeOr(A.makeAnd(A.makeAtomRef(Env1.getFlowConditionToken()), + A.makeEquals(JoinedVal, Expr1)), + A.makeAnd(A.makeAtomRef(Env2.getFlowConditionToken()), + A.makeEquals(JoinedVal, Expr2)))); + return &A.makeBoolValue(JoinedVal); + } + + Value *JoinedVal = JoinedEnv.createValue(Type); + if (JoinedVal) + Model.join(Type, Val1, Env1, Val2, Env2, *JoinedVal, JoinedEnv); + + return JoinedVal; +} + +static WidenResult widenDistinctValues(QualType Type, Value &Prev, + const Environment &PrevEnv, + Value &Current, Environment &CurrentEnv, + Environment::ValueModel &Model) { + // Boolean-model widening. + if (isa<BoolValue>(Prev) && isa<BoolValue>(Current)) { + // FIXME: Checking both values should be unnecessary, but we can currently + // end up with `BoolValue`s in integer-typed variables. See comment in + // `joinDistinctValues()` for details. + auto &PrevBool = cast<BoolValue>(Prev); + auto &CurBool = cast<BoolValue>(Current); + + if (isa<TopBoolValue>(Prev)) + // Safe to return `Prev` here, because Top is never dependent on the + // environment. + return {&Prev, LatticeEffect::Unchanged}; + + // We may need to widen to Top, but before we do so, check whether both + // values are implied to be either true or false in the current environment. + // In that case, we can simply return a literal instead. + bool TruePrev = PrevEnv.proves(PrevBool.formula()); + bool TrueCur = CurrentEnv.proves(CurBool.formula()); + if (TruePrev && TrueCur) + return {&CurrentEnv.getBoolLiteralValue(true), LatticeEffect::Unchanged}; + if (!TruePrev && !TrueCur && + PrevEnv.proves(PrevEnv.arena().makeNot(PrevBool.formula())) && + CurrentEnv.proves(CurrentEnv.arena().makeNot(CurBool.formula()))) + return {&CurrentEnv.getBoolLiteralValue(false), LatticeEffect::Unchanged}; + + return {&CurrentEnv.makeTopBoolValue(), LatticeEffect::Changed}; + } + + // FIXME: Add other built-in model widening. + + // Custom-model widening. + if (auto Result = Model.widen(Type, Prev, PrevEnv, Current, CurrentEnv)) + return *Result; + + return {&Current, equateUnknownValues(Prev.getKind()) + ? LatticeEffect::Unchanged + : LatticeEffect::Changed}; +} + +// Returns whether the values in `Map1` and `Map2` compare equal for those +// keys that `Map1` and `Map2` have in common. +template <typename Key> +bool compareKeyToValueMaps(const llvm::MapVector<Key, Value *> &Map1, + const llvm::MapVector<Key, Value *> &Map2, + const Environment &Env1, const Environment &Env2, + Environment::ValueModel &Model) { + for (auto &Entry : Map1) { + Key K = Entry.first; + assert(K != nullptr); + + Value *Val = Entry.second; + assert(Val != nullptr); + + auto It = Map2.find(K); + if (It == Map2.end()) + continue; + assert(It->second != nullptr); + + if (!areEquivalentValues(*Val, *It->second) && + !compareDistinctValues(K->getType(), *Val, Env1, *It->second, Env2, + Model)) + return false; + } + + return true; +} + +// Perform a join on two `LocToVal` maps. +static llvm::MapVector<const StorageLocation *, Value *> +joinLocToVal(const llvm::MapVector<const StorageLocation *, Value *> &LocToVal, + const llvm::MapVector<const StorageLocation *, Value *> &LocToVal2, + const Environment &Env1, const Environment &Env2, + Environment &JoinedEnv, Environment::ValueModel &Model) { + llvm::MapVector<const StorageLocation *, Value *> Result; + for (auto &Entry : LocToVal) { + const StorageLocation *Loc = Entry.first; + assert(Loc != nullptr); + + Value *Val = Entry.second; + assert(Val != nullptr); + + auto It = LocToVal2.find(Loc); + if (It == LocToVal2.end()) + continue; + assert(It->second != nullptr); + + if (Value *JoinedVal = Environment::joinValues( + Loc->getType(), Val, Env1, It->second, Env2, JoinedEnv, Model)) { + Result.insert({Loc, JoinedVal}); + } + } + + return Result; +} + +// Perform widening on either `LocToVal` or `ExprToVal`. `Key` must be either +// `const StorageLocation *` or `const Expr *`. +template <typename Key> +llvm::MapVector<Key, Value *> +widenKeyToValueMap(const llvm::MapVector<Key, Value *> &CurMap, + const llvm::MapVector<Key, Value *> &PrevMap, + Environment &CurEnv, const Environment &PrevEnv, + Environment::ValueModel &Model, LatticeEffect &Effect) { + llvm::MapVector<Key, Value *> WidenedMap; + for (auto &Entry : CurMap) { + Key K = Entry.first; + assert(K != nullptr); + + Value *Val = Entry.second; + assert(Val != nullptr); + + auto PrevIt = PrevMap.find(K); + if (PrevIt == PrevMap.end()) + continue; + assert(PrevIt->second != nullptr); + + if (areEquivalentValues(*Val, *PrevIt->second)) { + WidenedMap.insert({K, Val}); + continue; + } + + auto [WidenedVal, ValEffect] = widenDistinctValues( + K->getType(), *PrevIt->second, PrevEnv, *Val, CurEnv, Model); + WidenedMap.insert({K, WidenedVal}); + if (ValEffect == LatticeEffect::Changed) + Effect = LatticeEffect::Changed; + } + + return WidenedMap; +} + +namespace { + +// Visitor that builds a map from record prvalues to result objects. +// For each result object that it encounters, it propagates the storage location +// of the result object to all record prvalues that can initialize it. +class ResultObjectVisitor : public AnalysisASTVisitor<ResultObjectVisitor> { +public: + // `ResultObjectMap` will be filled with a map from record prvalues to result + // object. If this visitor will traverse a function that returns a record by + // value, `LocForRecordReturnVal` is the location to which this record should + // be written; otherwise, it is null. + explicit ResultObjectVisitor( + llvm::DenseMap<const Expr *, RecordStorageLocation *> &ResultObjectMap, + RecordStorageLocation *LocForRecordReturnVal, + DataflowAnalysisContext &DACtx) + : ResultObjectMap(ResultObjectMap), + LocForRecordReturnVal(LocForRecordReturnVal), DACtx(DACtx) {} + + // Traverse all member and base initializers of `Ctor`. This function is not + // called by `RecursiveASTVisitor`; it should be called manually if we are + // analyzing a constructor. `ThisPointeeLoc` is the storage location that + // `this` points to. + void TraverseConstructorInits(const CXXConstructorDecl *Ctor, + RecordStorageLocation *ThisPointeeLoc) { + assert(ThisPointeeLoc != nullptr); + for (const CXXCtorInitializer *Init : Ctor->inits()) { + Expr *InitExpr = Init->getInit(); + if (FieldDecl *Field = Init->getMember(); + Field != nullptr && Field->getType()->isRecordType()) { + PropagateResultObject(InitExpr, cast<RecordStorageLocation>( + ThisPointeeLoc->getChild(*Field))); + } else if (Init->getBaseClass()) { + PropagateResultObject(InitExpr, ThisPointeeLoc); + } + + // Ensure that any result objects within `InitExpr` (e.g. temporaries) + // are also propagated to the prvalues that initialize them. + TraverseStmt(InitExpr); + + // If this is a `CXXDefaultInitExpr`, also propagate any result objects + // within the default expression. + if (auto *DefaultInit = dyn_cast<CXXDefaultInitExpr>(InitExpr)) + TraverseStmt(DefaultInit->getExpr()); + } + } + + bool VisitVarDecl(VarDecl *VD) { + if (VD->getType()->isRecordType() && VD->hasInit()) + PropagateResultObject( + VD->getInit(), + &cast<RecordStorageLocation>(DACtx.getStableStorageLocation(*VD))); + return true; + } + + bool VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *MTE) { + if (MTE->getType()->isRecordType()) + PropagateResultObject( + MTE->getSubExpr(), + &cast<RecordStorageLocation>(DACtx.getStableStorageLocation(*MTE))); + return true; + } + + bool VisitReturnStmt(ReturnStmt *Return) { + Expr *RetValue = Return->getRetValue(); + if (RetValue != nullptr && RetValue->getType()->isRecordType() && + RetValue->isPRValue()) + PropagateResultObject(RetValue, LocForRecordReturnVal); + return true; + } + + bool VisitExpr(Expr *E) { + // Clang's AST can have record-type prvalues without a result object -- for + // example as full-expressions contained in a compound statement or as + // arguments of call expressions. We notice this if we get here and a + // storage location has not yet been associated with `E`. In this case, + // treat this as if it was a `MaterializeTemporaryExpr`. + if (E->isPRValue() && E->getType()->isRecordType() && + !ResultObjectMap.contains(E)) + PropagateResultObject( + E, &cast<RecordStorageLocation>(DACtx.getStableStorageLocation(*E))); + return true; + } + + void + PropagateResultObjectToRecordInitList(const RecordInitListHelper &InitList, + RecordStorageLocation *Loc) { + for (auto [Base, Init] : InitList.base_inits()) { + assert(Base->getType().getCanonicalType() == + Init->getType().getCanonicalType()); + + // Storage location for the base class is the same as that of the + // derived class because we "flatten" the object hierarchy and put all + // fields in `RecordStorageLocation` of the derived class. + PropagateResultObject(Init, Loc); + } + + for (auto [Field, Init] : InitList.field_inits()) { + // Fields of non-record type are handled in + // `TransferVisitor::VisitInitListExpr()`. + if (Field->getType()->isRecordType()) + PropagateResultObject( + Init, cast<RecordStorageLocation>(Loc->getChild(*Field))); + } + } + + // Assigns `Loc` as the result object location of `E`, then propagates the + // location to all lower-level prvalues that initialize the same object as + // `E` (or one of its base classes or member variables). + void PropagateResultObject(Expr *E, RecordStorageLocation *Loc) { + if (!E->isPRValue() || !E->getType()->isRecordType()) { + assert(false); + // Ensure we don't propagate the result object if we hit this in a + // release build. + return; + } + + ResultObjectMap[E] = Loc; + + // The following AST node kinds are "original initializers": They are the + // lowest-level AST node that initializes a given object, and nothing + // below them can initialize the same object (or part of it). + if (isa<CXXConstructExpr>(E) || isa<CallExpr>(E) || isa<LambdaExpr>(E) || + isa<CXXDefaultArgExpr>(E) || isa<CXXStdInitializerListExpr>(E) || + isa<AtomicExpr>(E) || + // We treat `BuiltinBitCastExpr` as an "original initializer" too as + // it may not even be casting from a record type -- and even if it is, + // the two objects are in general of unrelated type. + isa<BuiltinBitCastExpr>(E)) { + return; + } + if (auto *Op = dyn_cast<BinaryOperator>(E); + Op && Op->getOpcode() == BO_Cmp) { + // Builtin `<=>` returns a `std::strong_ordering` object. + return; + } + + if (auto *InitList = dyn_cast<InitListExpr>(E)) { + if (!InitList->isSemanticForm()) + return; + if (InitList->isTransparent()) { + PropagateResultObject(InitList->getInit(0), Loc); + return; + } + + PropagateResultObjectToRecordInitList(RecordInitListHelper(InitList), + Loc); + return; + } + + if (auto *ParenInitList = dyn_cast<CXXParenListInitExpr>(E)) { + PropagateResultObjectToRecordInitList(RecordInitListHelper(ParenInitList), + Loc); + return; + } + + if (auto *Op = dyn_cast<BinaryOperator>(E); Op && Op->isCommaOp()) { + PropagateResultObject(Op->getRHS(), Loc); + return; + } + + if (auto *Cond = dyn_cast<AbstractConditionalOperator>(E)) { + PropagateResultObject(Cond->getTrueExpr(), Loc); + PropagateResultObject(Cond->getFalseExpr(), Loc); + return; + } + + if (auto *SE = dyn_cast<StmtExpr>(E)) { + PropagateResultObject(cast<Expr>(SE->getSubStmt()->body_back()), Loc); + return; + } + + if (auto *DIE = dyn_cast<CXXDefaultInitExpr>(E)) { + PropagateResultObject(DIE->getExpr(), Loc); + return; + } + + // All other expression nodes that propagate a record prvalue should have + // exactly one child. + SmallVector<Stmt *, 1> Children(E->child_begin(), E->child_end()); + LLVM_DEBUG({ + if (Children.size() != 1) + E->dump(); + }); + assert(Children.size() == 1); + for (Stmt *S : Children) + PropagateResultObject(cast<Expr>(S), Loc); + } + +private: + llvm::DenseMap<const Expr *, RecordStorageLocation *> &ResultObjectMap; + RecordStorageLocation *LocForRecordReturnVal; + DataflowAnalysisContext &DACtx; +}; + +} // namespace + +void Environment::initialize() { + if (InitialTargetStmt == nullptr) + return; + + if (InitialTargetFunc == nullptr) { + initFieldsGlobalsAndFuncs(getReferencedDecls(*InitialTargetStmt)); + ResultObjectMap = + std::make_shared<PrValueToResultObject>(buildResultObjectMap( + DACtx, InitialTargetStmt, getThisPointeeStorageLocation(), + /*LocForRecordReturnValue=*/nullptr)); + return; + } + + initFieldsGlobalsAndFuncs(getReferencedDecls(*InitialTargetFunc)); + + for (const auto *ParamDecl : InitialTargetFunc->parameters()) { + assert(ParamDecl != nullptr); + setStorageLocation(*ParamDecl, createObject(*ParamDecl, nullptr)); + } + + if (InitialTargetFunc->getReturnType()->isRecordType()) + LocForRecordReturnVal = &cast<RecordStorageLocation>( + createStorageLocation(InitialTargetFunc->getReturnType())); + + if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(InitialTargetFunc)) { + auto *Parent = MethodDecl->getParent(); + assert(Parent != nullptr); + + if (Parent->isLambda()) { + for (const auto &Capture : Parent->captures()) { + if (Capture.capturesVariable()) { + const auto *VarDecl = Capture.getCapturedVar(); + assert(VarDecl != nullptr); + setStorageLocation(*VarDecl, createObject(*VarDecl, nullptr)); + } else if (Capture.capturesThis()) { + if (auto *Ancestor = InitialTargetFunc->getNonClosureAncestor()) { + const auto *SurroundingMethodDecl = cast<CXXMethodDecl>(Ancestor); + QualType ThisPointeeType = + SurroundingMethodDecl->getFunctionObjectParameterType(); + setThisPointeeStorageLocation( + cast<RecordStorageLocation>(createObject(ThisPointeeType))); + } else if (auto *FieldBeingInitialized = + dyn_cast<FieldDecl>(Parent->getLambdaContextDecl())) { + // This is in a field initializer, rather than a method. + setThisPointeeStorageLocation( + cast<RecordStorageLocation>(createObject(QualType( + FieldBeingInitialized->getParent()->getTypeForDecl(), 0)))); + } else { + assert(false && "Unexpected this-capturing lambda context."); + } + } + } + } else if (MethodDecl->isImplicitObjectMemberFunction()) { + QualType ThisPointeeType = MethodDecl->getFunctionObjectParameterType(); + auto &ThisLoc = + cast<RecordStorageLocation>(createStorageLocation(ThisPointeeType)); + setThisPointeeStorageLocation(ThisLoc); + // Initialize fields of `*this` with values, but only if we're not + // analyzing a constructor; after all, it's the constructor's job to do + // this (and we want to be able to test that). + if (!isa<CXXConstructorDecl>(MethodDecl)) + initializeFieldsWithValues(ThisLoc); + } + } + + // We do this below the handling of `CXXMethodDecl` above so that we can + // be sure that the storage location for `this` has been set. + ResultObjectMap = + std::make_shared<PrValueToResultObject>(buildResultObjectMap( + DACtx, InitialTargetFunc, getThisPointeeStorageLocation(), + LocForRecordReturnVal)); +} + +// FIXME: Add support for resetting globals after function calls to enable the +// implementation of sound analyses. + +void Environment::initFieldsGlobalsAndFuncs(const ReferencedDecls &Referenced) { + // These have to be added before the lines that follow to ensure that + // `create*` work correctly for structs. + DACtx->addModeledFields(Referenced.Fields); + + for (const VarDecl *D : Referenced.Globals) { + if (getStorageLocation(*D) != nullptr) + continue; + + // We don't run transfer functions on the initializers of global variables, + // so they won't be associated with a value or storage location. We + // therefore intentionally don't pass an initializer to `createObject()`; in + // particular, this ensures that `createObject()` will initialize the fields + // of record-type variables with values. + setStorageLocation(*D, createObject(*D, nullptr)); + } + + for (const FunctionDecl *FD : Referenced.Functions) { + if (getStorageLocation(*FD) != nullptr) + continue; + auto &Loc = createStorageLocation(*FD); + setStorageLocation(*FD, Loc); + } +} + +Environment Environment::fork() const { + Environment Copy(*this); + Copy.FlowConditionToken = DACtx->forkFlowCondition(FlowConditionToken); + return Copy; +} + +bool Environment::canDescend(unsigned MaxDepth, + const FunctionDecl *Callee) const { + return CallStack.size() < MaxDepth && !llvm::is_contained(CallStack, Callee); +} + +Environment Environment::pushCall(const CallExpr *Call) const { + Environment Env(*this); + + if (const auto *MethodCall = dyn_cast<CXXMemberCallExpr>(Call)) { + if (const Expr *Arg = MethodCall->getImplicitObjectArgument()) { + if (!isa<CXXThisExpr>(Arg)) + Env.ThisPointeeLoc = + cast<RecordStorageLocation>(getStorageLocation(*Arg)); + // Otherwise (when the argument is `this`), retain the current + // environment's `ThisPointeeLoc`. + } + } + + if (Call->getType()->isRecordType() && Call->isPRValue()) + Env.LocForRecordReturnVal = &Env.getResultObjectLocation(*Call); + + Env.pushCallInternal(Call->getDirectCallee(), + llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); + + return Env; +} + +Environment Environment::pushCall(const CXXConstructExpr *Call) const { + Environment Env(*this); + + Env.ThisPointeeLoc = &Env.getResultObjectLocation(*Call); + Env.LocForRecordReturnVal = &Env.getResultObjectLocation(*Call); + + Env.pushCallInternal(Call->getConstructor(), + llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); + + return Env; +} + +void Environment::pushCallInternal(const FunctionDecl *FuncDecl, + ArrayRef<const Expr *> Args) { + // Canonicalize to the definition of the function. This ensures that we're + // putting arguments into the same `ParamVarDecl`s` that the callee will later + // be retrieving them from. + assert(FuncDecl->getDefinition() != nullptr); + FuncDecl = FuncDecl->getDefinition(); + + CallStack.push_back(FuncDecl); + + initFieldsGlobalsAndFuncs(getReferencedDecls(*FuncDecl)); + + const auto *ParamIt = FuncDecl->param_begin(); + + // FIXME: Parameters don't always map to arguments 1:1; examples include + // overloaded operators implemented as member functions, and parameter packs. + for (unsigned ArgIndex = 0; ArgIndex < Args.size(); ++ParamIt, ++ArgIndex) { + assert(ParamIt != FuncDecl->param_end()); + const VarDecl *Param = *ParamIt; + setStorageLocation(*Param, createObject(*Param, Args[ArgIndex])); + } + + ResultObjectMap = std::make_shared<PrValueToResultObject>( + buildResultObjectMap(DACtx, FuncDecl, getThisPointeeStorageLocation(), + LocForRecordReturnVal)); +} + +void Environment::popCall(const CallExpr *Call, const Environment &CalleeEnv) { + // We ignore some entries of `CalleeEnv`: + // - `DACtx` because is already the same in both + // - We don't want the callee's `DeclCtx`, `ReturnVal`, `ReturnLoc` or + // `ThisPointeeLoc` because they don't apply to us. + // - `DeclToLoc`, `ExprToLoc`, and `ExprToVal` capture information from the + // callee's local scope, so when popping that scope, we do not propagate + // the maps. + this->LocToVal = std::move(CalleeEnv.LocToVal); + this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken); + + if (Call->isGLValue()) { + if (CalleeEnv.ReturnLoc != nullptr) + setStorageLocation(*Call, *CalleeEnv.ReturnLoc); + } else if (!Call->getType()->isVoidType()) { + if (CalleeEnv.ReturnVal != nullptr) + setValue(*Call, *CalleeEnv.ReturnVal); + } +} + +void Environment::popCall(const CXXConstructExpr *Call, + const Environment &CalleeEnv) { + // See also comment in `popCall(const CallExpr *, const Environment &)` above. + this->LocToVal = std::move(CalleeEnv.LocToVal); + this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken); +} + +bool Environment::equivalentTo(const Environment &Other, + Environment::ValueModel &Model) const { + assert(DACtx == Other.DACtx); + + if (ReturnVal != Other.ReturnVal) + return false; + + if (ReturnLoc != Other.ReturnLoc) + return false; + + if (LocForRecordReturnVal != Other.LocForRecordReturnVal) + return false; + + if (ThisPointeeLoc != Other.ThisPointeeLoc) + return false; + + if (DeclToLoc != Other.DeclToLoc) + return false; + + if (ExprToLoc != Other.ExprToLoc) + return false; + + if (!compareKeyToValueMaps(ExprToVal, Other.ExprToVal, *this, Other, Model)) + return false; + + if (!compareKeyToValueMaps(LocToVal, Other.LocToVal, *this, Other, Model)) + return false; + + return true; +} + +LatticeEffect Environment::widen(const Environment &PrevEnv, + Environment::ValueModel &Model) { + assert(DACtx == PrevEnv.DACtx); + assert(ReturnVal == PrevEnv.ReturnVal); + assert(ReturnLoc == PrevEnv.ReturnLoc); + assert(LocForRecordReturnVal == PrevEnv.LocForRecordReturnVal); + assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc); + assert(CallStack == PrevEnv.CallStack); + assert(ResultObjectMap == PrevEnv.ResultObjectMap); + assert(InitialTargetFunc == PrevEnv.InitialTargetFunc); + assert(InitialTargetStmt == PrevEnv.InitialTargetStmt); + + auto Effect = LatticeEffect::Unchanged; + + // By the API, `PrevEnv` is a previous version of the environment for the same + // block, so we have some guarantees about its shape. In particular, it will + // be the result of a join or widen operation on previous values for this + // block. For `DeclToLoc`, `ExprToVal`, and `ExprToLoc`, join guarantees that + // these maps are subsets of the maps in `PrevEnv`. So, as long as we maintain + // this property here, we don't need change their current values to widen. + assert(DeclToLoc.size() <= PrevEnv.DeclToLoc.size()); + assert(ExprToVal.size() <= PrevEnv.ExprToVal.size()); + assert(ExprToLoc.size() <= PrevEnv.ExprToLoc.size()); + + ExprToVal = widenKeyToValueMap(ExprToVal, PrevEnv.ExprToVal, *this, PrevEnv, + Model, Effect); + + LocToVal = widenKeyToValueMap(LocToVal, PrevEnv.LocToVal, *this, PrevEnv, + Model, Effect); + if (DeclToLoc.size() != PrevEnv.DeclToLoc.size() || + ExprToLoc.size() != PrevEnv.ExprToLoc.size() || + ExprToVal.size() != PrevEnv.ExprToVal.size() || + LocToVal.size() != PrevEnv.LocToVal.size()) + Effect = LatticeEffect::Changed; + + return Effect; +} + +Environment Environment::join(const Environment &EnvA, const Environment &EnvB, + Environment::ValueModel &Model, + ExprJoinBehavior ExprBehavior) { + assert(EnvA.DACtx == EnvB.DACtx); + assert(EnvA.LocForRecordReturnVal == EnvB.LocForRecordReturnVal); + assert(EnvA.ThisPointeeLoc == EnvB.ThisPointeeLoc); + assert(EnvA.CallStack == EnvB.CallStack); + assert(EnvA.ResultObjectMap == EnvB.ResultObjectMap); + assert(EnvA.InitialTargetFunc == EnvB.InitialTargetFunc); + assert(EnvA.InitialTargetStmt == EnvB.InitialTargetStmt); + + Environment JoinedEnv(*EnvA.DACtx); + + JoinedEnv.CallStack = EnvA.CallStack; + JoinedEnv.ResultObjectMap = EnvA.ResultObjectMap; + JoinedEnv.LocForRecordReturnVal = EnvA.LocForRecordReturnVal; + JoinedEnv.ThisPointeeLoc = EnvA.ThisPointeeLoc; + JoinedEnv.InitialTargetFunc = EnvA.InitialTargetFunc; + JoinedEnv.InitialTargetStmt = EnvA.InitialTargetStmt; + + const FunctionDecl *Func = EnvA.getCurrentFunc(); + if (!Func) { + JoinedEnv.ReturnVal = nullptr; + } else { + JoinedEnv.ReturnVal = + joinValues(Func->getReturnType(), EnvA.ReturnVal, EnvA, EnvB.ReturnVal, + EnvB, JoinedEnv, Model); + } + + if (EnvA.ReturnLoc == EnvB.ReturnLoc) + JoinedEnv.ReturnLoc = EnvA.ReturnLoc; + else + JoinedEnv.ReturnLoc = nullptr; + + JoinedEnv.DeclToLoc = intersectDeclToLoc(EnvA.DeclToLoc, EnvB.DeclToLoc); + + // FIXME: update join to detect backedges and simplify the flow condition + // accordingly. + JoinedEnv.FlowConditionToken = EnvA.DACtx->joinFlowConditions( + EnvA.FlowConditionToken, EnvB.FlowConditionToken); + + JoinedEnv.LocToVal = + joinLocToVal(EnvA.LocToVal, EnvB.LocToVal, EnvA, EnvB, JoinedEnv, Model); + + if (ExprBehavior == KeepExprState) { + JoinedEnv.ExprToVal = joinExprMaps(EnvA.ExprToVal, EnvB.ExprToVal); + JoinedEnv.ExprToLoc = joinExprMaps(EnvA.ExprToLoc, EnvB.ExprToLoc); + } + + return JoinedEnv; +} + +Value *Environment::joinValues(QualType Ty, Value *Val1, + const Environment &Env1, Value *Val2, + const Environment &Env2, Environment &JoinedEnv, + Environment::ValueModel &Model) { + if (Val1 == nullptr || Val2 == nullptr) + // We can't say anything about the joined value -- even if one of the values + // is non-null, we don't want to simply propagate it, because it would be + // too specific: Because the other value is null, that means we have no + // information at all about the value (i.e. the value is unconstrained). + return nullptr; + + if (areEquivalentValues(*Val1, *Val2)) + // Arbitrarily return one of the two values. + return Val1; + + return joinDistinctValues(Ty, *Val1, Env1, *Val2, Env2, JoinedEnv, Model); +} + +StorageLocation &Environment::createStorageLocation(QualType Type) { + return DACtx->createStorageLocation(Type); +} + +StorageLocation &Environment::createStorageLocation(const ValueDecl &D) { + // Evaluated declarations are always assigned the same storage locations to + // ensure that the environment stabilizes across loop iterations. Storage + // locations for evaluated declarations are stored in the analysis context. + return DACtx->getStableStorageLocation(D); +} + +StorageLocation &Environment::createStorageLocation(const Expr &E) { + // Evaluated expressions are always assigned the same storage locations to + // ensure that the environment stabilizes across loop iterations. Storage + // locations for evaluated expressions are stored in the analysis context. + return DACtx->getStableStorageLocation(E); +} + +void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) { + assert(!DeclToLoc.contains(&D)); + // The only kinds of declarations that may have a "variable" storage location + // are declarations of reference type and `BindingDecl`. For all other + // declaration, the storage location should be the stable storage location + // returned by `createStorageLocation()`. + assert(D.getType()->isReferenceType() || isa<BindingDecl>(D) || + &Loc == &createStorageLocation(D)); + DeclToLoc[&D] = &Loc; +} + +StorageLocation *Environment::getStorageLocation(const ValueDecl &D) const { + auto It = DeclToLoc.find(&D); + if (It == DeclToLoc.end()) + return nullptr; + + StorageLocation *Loc = It->second; + + return Loc; +} + +void Environment::removeDecl(const ValueDecl &D) { DeclToLoc.erase(&D); } + +void Environment::setStorageLocation(const Expr &E, StorageLocation &Loc) { + // `DeclRefExpr`s to builtin function types aren't glvalues, for some reason, + // but we still want to be able to associate a `StorageLocation` with them, + // so allow these as an exception. + assert(E.isGLValue() || + E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn)); + const Expr &CanonE = ignoreCFGOmittedNodes(E); + assert(!ExprToLoc.contains(&CanonE)); + ExprToLoc[&CanonE] = &Loc; +} + +StorageLocation *Environment::getStorageLocation(const Expr &E) const { + // See comment in `setStorageLocation()`. + assert(E.isGLValue() || + E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn)); + auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E)); + return It == ExprToLoc.end() ? nullptr : &*It->second; +} + +RecordStorageLocation & +Environment::getResultObjectLocation(const Expr &RecordPRValue) const { + assert(RecordPRValue.getType()->isRecordType()); + assert(RecordPRValue.isPRValue()); + + assert(ResultObjectMap != nullptr); + RecordStorageLocation *Loc = ResultObjectMap->lookup(&RecordPRValue); + assert(Loc != nullptr); + // In release builds, use the "stable" storage location if the map lookup + // failed. + if (Loc == nullptr) + return cast<RecordStorageLocation>( + DACtx->getStableStorageLocation(RecordPRValue)); + return *Loc; +} + +PointerValue &Environment::getOrCreateNullPointerValue(QualType PointeeType) { + return DACtx->getOrCreateNullPointerValue(PointeeType); +} + +void Environment::initializeFieldsWithValues(RecordStorageLocation &Loc, + QualType Type) { + llvm::DenseSet<QualType> Visited; + int CreatedValuesCount = 0; + initializeFieldsWithValues(Loc, Type, Visited, 0, CreatedValuesCount); + if (CreatedValuesCount > MaxCompositeValueSize) { + llvm::errs() << "Attempting to initialize a huge value of type: " << Type + << '\n'; + } +} + +void Environment::setValue(const StorageLocation &Loc, Value &Val) { + // Records should not be associated with values. + assert(!isa<RecordStorageLocation>(Loc)); + LocToVal[&Loc] = &Val; +} + +void Environment::setValue(const Expr &E, Value &Val) { + const Expr &CanonE = ignoreCFGOmittedNodes(E); + + assert(CanonE.isPRValue()); + // Records should not be associated with values. + assert(!CanonE.getType()->isRecordType()); + ExprToVal[&CanonE] = &Val; +} + +Value *Environment::getValue(const StorageLocation &Loc) const { + // Records should not be associated with values. + assert(!isa<RecordStorageLocation>(Loc)); + return LocToVal.lookup(&Loc); +} + +Value *Environment::getValue(const ValueDecl &D) const { + auto *Loc = getStorageLocation(D); + if (Loc == nullptr) + return nullptr; + return getValue(*Loc); +} + +Value *Environment::getValue(const Expr &E) const { + // Records should not be associated with values. + assert(!E.getType()->isRecordType()); + + if (E.isPRValue()) { + auto It = ExprToVal.find(&ignoreCFGOmittedNodes(E)); + return It == ExprToVal.end() ? nullptr : It->second; + } + + auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E)); + if (It == ExprToLoc.end()) + return nullptr; + return getValue(*It->second); +} + +Value *Environment::createValue(QualType Type) { + llvm::DenseSet<QualType> Visited; + int CreatedValuesCount = 0; + Value *Val = createValueUnlessSelfReferential(Type, Visited, /*Depth=*/0, + CreatedValuesCount); + if (CreatedValuesCount > MaxCompositeValueSize) { + llvm::errs() << "Attempting to initialize a huge value of type: " << Type + << '\n'; + } + return Val; +} + +Value *Environment::createValueUnlessSelfReferential( + QualType Type, llvm::DenseSet<QualType> &Visited, int Depth, + int &CreatedValuesCount) { + assert(!Type.isNull()); + assert(!Type->isReferenceType()); + assert(!Type->isRecordType()); + + // Allow unlimited fields at depth 1; only cap at deeper nesting levels. + if ((Depth > 1 && CreatedValuesCount > MaxCompositeValueSize) || + Depth > MaxCompositeValueDepth) + return nullptr; + + if (Type->isBooleanType()) { + CreatedValuesCount++; + return &makeAtomicBoolValue(); + } + + if (Type->isIntegerType()) { + // FIXME: consider instead `return nullptr`, given that we do nothing useful + // with integers, and so distinguishing them serves no purpose, but could + // prevent convergence. + CreatedValuesCount++; + return &arena().create<IntegerValue>(); + } + + if (Type->isPointerType()) { + CreatedValuesCount++; + QualType PointeeType = Type->getPointeeType(); + StorageLocation &PointeeLoc = + createLocAndMaybeValue(PointeeType, Visited, Depth, CreatedValuesCount); + + return &arena().create<PointerValue>(PointeeLoc); + } + + return nullptr; +} + +StorageLocation & +Environment::createLocAndMaybeValue(QualType Ty, + llvm::DenseSet<QualType> &Visited, + int Depth, int &CreatedValuesCount) { + if (!Visited.insert(Ty.getCanonicalType()).second) + return createStorageLocation(Ty.getNonReferenceType()); + auto EraseVisited = llvm::make_scope_exit( + [&Visited, Ty] { Visited.erase(Ty.getCanonicalType()); }); + + Ty = Ty.getNonReferenceType(); + + if (Ty->isRecordType()) { + auto &Loc = cast<RecordStorageLocation>(createStorageLocation(Ty)); + initializeFieldsWithValues(Loc, Ty, Visited, Depth, CreatedValuesCount); + return Loc; + } + + StorageLocation &Loc = createStorageLocation(Ty); + + if (Value *Val = createValueUnlessSelfReferential(Ty, Visited, Depth, + CreatedValuesCount)) + setValue(Loc, *Val); + + return Loc; +} + +void Environment::initializeFieldsWithValues(RecordStorageLocation &Loc, + QualType Type, + llvm::DenseSet<QualType> &Visited, + int Depth, + int &CreatedValuesCount) { + auto initField = [&](QualType FieldType, StorageLocation &FieldLoc) { + if (FieldType->isRecordType()) { + auto &FieldRecordLoc = cast<RecordStorageLocation>(FieldLoc); + initializeFieldsWithValues(FieldRecordLoc, FieldRecordLoc.getType(), + Visited, Depth + 1, CreatedValuesCount); + } else { + if (getValue(FieldLoc) != nullptr) + return; + if (!Visited.insert(FieldType.getCanonicalType()).second) + return; + if (Value *Val = createValueUnlessSelfReferential( + FieldType, Visited, Depth + 1, CreatedValuesCount)) + setValue(FieldLoc, *Val); + Visited.erase(FieldType.getCanonicalType()); + } + }; + + for (const FieldDecl *Field : DACtx->getModeledFields(Type)) { + assert(Field != nullptr); + QualType FieldType = Field->getType(); + + if (FieldType->isReferenceType()) { + Loc.setChild(*Field, + &createLocAndMaybeValue(FieldType, Visited, Depth + 1, + CreatedValuesCount)); + } else { + StorageLocation *FieldLoc = Loc.getChild(*Field); + assert(FieldLoc != nullptr); + initField(FieldType, *FieldLoc); + } + } + for (const auto &[FieldName, FieldType] : DACtx->getSyntheticFields(Type)) { + // Synthetic fields cannot have reference type, so we don't need to deal + // with this case. + assert(!FieldType->isReferenceType()); + initField(FieldType, Loc.getSyntheticField(FieldName)); + } +} + +StorageLocation &Environment::createObjectInternal(const ValueDecl *D, + QualType Ty, + const Expr *InitExpr) { + if (Ty->isReferenceType()) { + // Although variables of reference type always need to be initialized, it + // can happen that we can't see the initializer, so `InitExpr` may still + // be null. + if (InitExpr) { + if (auto *InitExprLoc = getStorageLocation(*InitExpr)) + return *InitExprLoc; + } + + // Even though we have an initializer, we might not get an + // InitExprLoc, for example if the InitExpr is a CallExpr for which we + // don't have a function body. In this case, we just invent a storage + // location and value -- it's the best we can do. + return createObjectInternal(D, Ty.getNonReferenceType(), nullptr); + } + + StorageLocation &Loc = + D ? createStorageLocation(*D) : createStorageLocation(Ty); + + if (Ty->isRecordType()) { + auto &RecordLoc = cast<RecordStorageLocation>(Loc); + if (!InitExpr) + initializeFieldsWithValues(RecordLoc); + } else { + Value *Val = nullptr; + if (InitExpr) + // In the (few) cases where an expression is intentionally + // "uninterpreted", `InitExpr` is not associated with a value. There are + // two ways to handle this situation: propagate the status, so that + // uninterpreted initializers result in uninterpreted variables, or + // provide a default value. We choose the latter so that later refinements + // of the variable can be used for reasoning about the surrounding code. + // For this reason, we let this case be handled by the `createValue()` + // call below. + // + // FIXME. If and when we interpret all language cases, change this to + // assert that `InitExpr` is interpreted, rather than supplying a + // default value (assuming we don't update the environment API to return + // references). + Val = getValue(*InitExpr); + if (!Val) + Val = createValue(Ty); + if (Val) + setValue(Loc, *Val); + } + + return Loc; +} + +void Environment::assume(const Formula &F) { + DACtx->addFlowConditionConstraint(FlowConditionToken, F); +} + +bool Environment::proves(const Formula &F) const { + return DACtx->flowConditionImplies(FlowConditionToken, F); +} + +bool Environment::allows(const Formula &F) const { + return DACtx->flowConditionAllows(FlowConditionToken, F); +} + +void Environment::dump(raw_ostream &OS) const { + llvm::DenseMap<const StorageLocation *, std::string> LocToName; + if (LocForRecordReturnVal != nullptr) + LocToName[LocForRecordReturnVal] = "(returned record)"; + if (ThisPointeeLoc != nullptr) + LocToName[ThisPointeeLoc] = "this"; + + OS << "DeclToLoc:\n"; + for (auto [D, L] : DeclToLoc) { + auto Iter = LocToName.insert({L, D->getNameAsString()}).first; + OS << " [" << Iter->second << ", " << L << "]\n"; + } + OS << "ExprToLoc:\n"; + for (auto [E, L] : ExprToLoc) + OS << " [" << E << ", " << L << "]\n"; + + OS << "ExprToVal:\n"; + for (auto [E, V] : ExprToVal) + OS << " [" << E << ", " << V << ": " << *V << "]\n"; + + OS << "LocToVal:\n"; + for (auto [L, V] : LocToVal) { + OS << " [" << L; + if (auto Iter = LocToName.find(L); Iter != LocToName.end()) + OS << " (" << Iter->second << ")"; + OS << ", " << V << ": " << *V << "]\n"; + } + + if (const FunctionDecl *Func = getCurrentFunc()) { + if (Func->getReturnType()->isReferenceType()) { + OS << "ReturnLoc: " << ReturnLoc; + if (auto Iter = LocToName.find(ReturnLoc); Iter != LocToName.end()) + OS << " (" << Iter->second << ")"; + OS << "\n"; + } else if (Func->getReturnType()->isRecordType() || + isa<CXXConstructorDecl>(Func)) { + OS << "LocForRecordReturnVal: " << LocForRecordReturnVal << "\n"; + } else if (!Func->getReturnType()->isVoidType()) { + if (ReturnVal == nullptr) + OS << "ReturnVal: nullptr\n"; + else + OS << "ReturnVal: " << *ReturnVal << "\n"; + } + + if (isa<CXXMethodDecl>(Func)) { + OS << "ThisPointeeLoc: " << ThisPointeeLoc << "\n"; + } + } + + OS << "\n"; + DACtx->dumpFlowCondition(FlowConditionToken, OS); +} + +void Environment::dump() const { dump(llvm::dbgs()); } + +Environment::PrValueToResultObject Environment::buildResultObjectMap( + DataflowAnalysisContext *DACtx, const FunctionDecl *FuncDecl, + RecordStorageLocation *ThisPointeeLoc, + RecordStorageLocation *LocForRecordReturnVal) { + assert(FuncDecl->doesThisDeclarationHaveABody()); + + PrValueToResultObject Map = buildResultObjectMap( + DACtx, FuncDecl->getBody(), ThisPointeeLoc, LocForRecordReturnVal); + + ResultObjectVisitor Visitor(Map, LocForRecordReturnVal, *DACtx); + if (const auto *Ctor = dyn_cast<CXXConstructorDecl>(FuncDecl)) + Visitor.TraverseConstructorInits(Ctor, ThisPointeeLoc); + + return Map; +} + +Environment::PrValueToResultObject Environment::buildResultObjectMap( + DataflowAnalysisContext *DACtx, Stmt *S, + RecordStorageLocation *ThisPointeeLoc, + RecordStorageLocation *LocForRecordReturnVal) { + PrValueToResultObject Map; + ResultObjectVisitor Visitor(Map, LocForRecordReturnVal, *DACtx); + Visitor.TraverseStmt(S); + return Map; +} + +RecordStorageLocation *getImplicitObjectLocation(const CXXMemberCallExpr &MCE, + const Environment &Env) { + Expr *ImplicitObject = MCE.getImplicitObjectArgument(); + if (ImplicitObject == nullptr) + return nullptr; + if (ImplicitObject->getType()->isPointerType()) { + if (auto *Val = Env.get<PointerValue>(*ImplicitObject)) + return &cast<RecordStorageLocation>(Val->getPointeeLoc()); + return nullptr; + } + return cast_or_null<RecordStorageLocation>( + Env.getStorageLocation(*ImplicitObject)); +} + +RecordStorageLocation *getBaseObjectLocation(const MemberExpr &ME, + const Environment &Env) { + Expr *Base = ME.getBase(); + if (Base == nullptr) + return nullptr; + if (ME.isArrow()) { + if (auto *Val = Env.get<PointerValue>(*Base)) + return &cast<RecordStorageLocation>(Val->getPointeeLoc()); + return nullptr; + } + return Env.get<RecordStorageLocation>(*Base); +} + +} // namespace dataflow +} // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DebugSupport.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DebugSupport.cpp new file mode 100644 index 000000000000..d40aab7a7f10 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DebugSupport.cpp @@ -0,0 +1,77 @@ +//===- DebugSupport.cpp -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines functions which generate more readable forms of data +// structures used in the dataflow analyses, for debugging purposes. +// +//===----------------------------------------------------------------------===// + +#include <utility> + +#include "clang/Analysis/FlowSensitive/DebugSupport.h" +#include "clang/Analysis/FlowSensitive/Solver.h" +#include "clang/Analysis/FlowSensitive/Value.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/ErrorHandling.h" + +namespace clang { +namespace dataflow { + +llvm::StringRef debugString(Value::Kind Kind) { + switch (Kind) { + case Value::Kind::Integer: + return "Integer"; + case Value::Kind::Pointer: + return "Pointer"; + case Value::Kind::AtomicBool: + return "AtomicBool"; + case Value::Kind::TopBool: + return "TopBool"; + case Value::Kind::FormulaBool: + return "FormulaBool"; + } + llvm_unreachable("Unhandled value kind"); +} + +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + Solver::Result::Assignment Assignment) { + switch (Assignment) { + case Solver::Result::Assignment::AssignedFalse: + return OS << "False"; + case Solver::Result::Assignment::AssignedTrue: + return OS << "True"; + } + llvm_unreachable("Booleans can only be assigned true/false"); +} + +llvm::StringRef debugString(Solver::Result::Status Status) { + switch (Status) { + case Solver::Result::Status::Satisfiable: + return "Satisfiable"; + case Solver::Result::Status::Unsatisfiable: + return "Unsatisfiable"; + case Solver::Result::Status::TimedOut: + return "TimedOut"; + } + llvm_unreachable("Unhandled SAT check result status"); +} + +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Solver::Result &R) { + OS << debugString(R.getStatus()) << "\n"; + if (auto Solution = R.getSolution()) { + std::vector<std::pair<Atom, Solver::Result::Assignment>> Sorted = { + Solution->begin(), Solution->end()}; + llvm::sort(Sorted); + for (const auto &Entry : Sorted) + OS << Entry.first << " = " << Entry.second << "\n"; + } + return OS; +} + +} // namespace dataflow +} // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Formula.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Formula.cpp new file mode 100644 index 000000000000..ef7d23ff6c56 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Formula.cpp @@ -0,0 +1,94 @@ +//===- Formula.cpp ----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/Formula.h" +#include "clang/Basic/LLVM.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/ErrorHandling.h" +#include <cassert> +#include <type_traits> + +namespace clang::dataflow { + +const Formula &Formula::create(llvm::BumpPtrAllocator &Alloc, Kind K, + ArrayRef<const Formula *> Operands, + unsigned Value) { + assert(Operands.size() == numOperands(K)); + if (Value != 0) // Currently, formulas have values or operands, not both. + assert(numOperands(K) == 0); + void *Mem = Alloc.Allocate(sizeof(Formula) + + Operands.size() * sizeof(Operands.front()), + alignof(Formula)); + Formula *Result = new (Mem) Formula(); + Result->FormulaKind = K; + Result->Value = Value; + // Operands are stored as `const Formula *`s after the formula itself. + // We don't need to construct an object as pointers are trivial types. + // Formula is alignas(const Formula *), so alignment is satisfied. + llvm::copy(Operands, reinterpret_cast<const Formula **>(Result + 1)); + return *Result; +} + +static llvm::StringLiteral sigil(Formula::Kind K) { + switch (K) { + case Formula::AtomRef: + case Formula::Literal: + return ""; + case Formula::Not: + return "!"; + case Formula::And: + return " & "; + case Formula::Or: + return " | "; + case Formula::Implies: + return " => "; + case Formula::Equal: + return " = "; + } + llvm_unreachable("unhandled formula kind"); +} + +void Formula::print(llvm::raw_ostream &OS, const AtomNames *Names) const { + if (Names && kind() == AtomRef) + if (auto It = Names->find(getAtom()); It != Names->end()) { + OS << It->second; + return; + } + + switch (numOperands(kind())) { + case 0: + switch (kind()) { + case AtomRef: + OS << getAtom(); + break; + case Literal: + OS << (literal() ? "true" : "false"); + break; + default: + llvm_unreachable("unhandled formula kind"); + } + break; + case 1: + OS << sigil(kind()); + operands()[0]->print(OS, Names); + break; + case 2: + OS << '('; + operands()[0]->print(OS, Names); + OS << sigil(kind()); + operands()[1]->print(OS, Names); + OS << ')'; + break; + default: + llvm_unreachable("unhandled formula arity"); + } +} + +} // namespace clang::dataflow
\ No newline at end of file diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp new file mode 100644 index 000000000000..a36cb41a63df --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp @@ -0,0 +1,584 @@ +//===-- HTMLLogger.cpp ----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the HTML logger. Given a directory dir/, we write +// dir/0.html for the first analysis, etc. +// These files contain a visualization that allows inspecting the CFG and the +// state of the analysis at each point. +// Static assets (HTMLLogger.js, HTMLLogger.css) and SVG graphs etc are embedded +// so each output file is self-contained. +// +// VIEWS +// +// The timeline and function view are always shown. These allow selecting basic +// blocks, statements within them, and processing iterations (BBs are visited +// multiple times when e.g. loops are involved). +// These are written directly into the HTML body. +// +// There are also listings of particular basic blocks, and dumps of the state +// at particular analysis points (i.e. BB2 iteration 3 statement 2). +// These are only shown when the relevant BB/analysis point is *selected*. +// +// DATA AND TEMPLATES +// +// The HTML proper is mostly static. +// The analysis data is in a JSON object HTMLLoggerData which is embedded as +// a <script> in the <head>. +// This gets rendered into DOM by a simple template processor which substitutes +// the data into <template> tags embedded in the HTML. (see inflate() in JS). +// +// SELECTION +// +// This is the only real interactive mechanism. +// +// At any given time, there are several named selections, e.g.: +// bb: B2 (basic block 0 is selected) +// elt: B2.4 (statement 4 is selected) +// iter: B2:1 (iteration 1 of the basic block is selected) +// hover: B3 (hovering over basic block 3) +// +// The selection is updated by mouse events: hover by moving the mouse and +// others by clicking. Elements that are click targets generally have attributes +// (id or data-foo) that define what they should select. +// See watchSelection() in JS for the exact logic. +// +// When the "bb" selection is set to "B2": +// - sections <section data-selection="bb"> get shown +// - templates under such sections get re-rendered +// - elements with class/id "B2" get class "bb-select" +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/AdornedCFG.h" +#include "clang/Analysis/FlowSensitive/DebugSupport.h" +#include "clang/Analysis/FlowSensitive/Logger.h" +#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" +#include "clang/Analysis/FlowSensitive/Value.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Lex/Lexer.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/ScopedPrinter.h" +#include "llvm/Support/raw_ostream.h" +// Defines assets: HTMLLogger_{html_js,css} +#include "HTMLLogger.inc" + +namespace clang::dataflow { +namespace { + +// Render a graphviz graph specification to SVG using the `dot` tool. +llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph); + +using StreamFactory = std::function<std::unique_ptr<llvm::raw_ostream>()>; + +// Recursively dumps Values/StorageLocations as JSON +class ModelDumper { +public: + ModelDumper(llvm::json::OStream &JOS, const Environment &Env) + : JOS(JOS), Env(Env) {} + + void dump(Value &V) { + JOS.attribute("value_id", llvm::to_string(&V)); + if (!Visited.insert(&V).second) + return; + + JOS.attribute("kind", debugString(V.getKind())); + + switch (V.getKind()) { + case Value::Kind::Integer: + case Value::Kind::TopBool: + case Value::Kind::AtomicBool: + case Value::Kind::FormulaBool: + break; + case Value::Kind::Pointer: + JOS.attributeObject( + "pointee", [&] { dump(cast<PointerValue>(V).getPointeeLoc()); }); + break; + } + + for (const auto& Prop : V.properties()) + JOS.attributeObject(("p:" + Prop.first()).str(), + [&] { dump(*Prop.second); }); + + // Running the SAT solver is expensive, but knowing which booleans are + // guaranteed true/false here is valuable and hard to determine by hand. + if (auto *B = llvm::dyn_cast<BoolValue>(&V)) { + JOS.attribute("formula", llvm::to_string(B->formula())); + JOS.attribute("truth", Env.proves(B->formula()) ? "true" + : Env.proves(Env.arena().makeNot(B->formula())) + ? "false" + : "unknown"); + } + } + void dump(const StorageLocation &L) { + JOS.attribute("location", llvm::to_string(&L)); + if (!Visited.insert(&L).second) + return; + + JOS.attribute("type", L.getType().getAsString()); + if (!L.getType()->isRecordType()) + if (auto *V = Env.getValue(L)) + dump(*V); + + if (auto *RLoc = dyn_cast<RecordStorageLocation>(&L)) { + for (const auto &Child : RLoc->children()) + JOS.attributeObject("f:" + Child.first->getNameAsString(), [&] { + if (Child.second) + if (Value *Val = Env.getValue(*Child.second)) + dump(*Val); + }); + + for (const auto &SyntheticField : RLoc->synthetic_fields()) + JOS.attributeObject(("sf:" + SyntheticField.first()).str(), + [&] { dump(*SyntheticField.second); }); + } + } + + llvm::DenseSet<const void*> Visited; + llvm::json::OStream &JOS; + const Environment &Env; +}; + +class HTMLLogger : public Logger { + struct Iteration { + const CFGBlock *Block; + unsigned Iter; + bool PostVisit; + bool Converged; + }; + + StreamFactory Streams; + std::unique_ptr<llvm::raw_ostream> OS; + std::string JSON; + llvm::raw_string_ostream JStringStream{JSON}; + llvm::json::OStream JOS{JStringStream, /*Indent=*/2}; + + const AdornedCFG *ACFG; + // Timeline of iterations of CFG block visitation. + std::vector<Iteration> Iters; + // Indexes in `Iters` of the iterations for each block. + llvm::DenseMap<const CFGBlock *, llvm::SmallVector<size_t>> BlockIters; + // For a given block ID, did the block converge (on the last iteration)? + llvm::BitVector BlockConverged; + // The messages logged in the current context but not yet written. + std::string ContextLogs; + // The number of elements we have visited within the current CFG block. + unsigned ElementIndex; + +public: + explicit HTMLLogger(StreamFactory Streams) : Streams(std::move(Streams)) {} + void beginAnalysis(const AdornedCFG &ACFG, + TypeErasedDataflowAnalysis &A) override { + OS = Streams(); + this->ACFG = &ACFG; + *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").first; + + BlockConverged.resize(ACFG.getCFG().getNumBlockIDs()); + + const auto &D = ACFG.getDecl(); + const auto &SM = A.getASTContext().getSourceManager(); + *OS << "<title>"; + if (const auto *ND = dyn_cast<NamedDecl>(&D)) + *OS << ND->getNameAsString() << " at "; + *OS << SM.getFilename(D.getLocation()) << ":" + << SM.getSpellingLineNumber(D.getLocation()); + *OS << "</title>\n"; + + *OS << "<style>" << HTMLLogger_css << "</style>\n"; + *OS << "<script>" << HTMLLogger_js << "</script>\n"; + + writeCode(); + JOS.objectBegin(); + JOS.attributeBegin("states"); + JOS.objectBegin(); + } + // Between beginAnalysis() and endAnalysis() we write all the states for + // particular analysis points into the `timeline` array. + void endAnalysis() override { + JOS.objectEnd(); + JOS.attributeEnd(); + + JOS.attributeArray("timeline", [&] { + for (const auto &E : Iters) { + JOS.object([&] { + JOS.attribute("block", blockID(E.Block->getBlockID())); + JOS.attribute("iter", E.Iter); + JOS.attribute("post_visit", E.PostVisit); + JOS.attribute("converged", E.Converged); + }); + } + }); + JOS.attributeObject("cfg", [&] { + for (const auto &E : BlockIters) + writeBlock(*E.first, E.second); + }); + + JOS.objectEnd(); + + writeCFG(); + + *OS << "<script>var HTMLLoggerData = \n"; + *OS << JSON; + *OS << ";\n</script>\n"; + *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").second; + } + + void enterBlock(const CFGBlock &B, bool PostVisit) override { + llvm::SmallVector<size_t> &BIter = BlockIters[&B]; + unsigned IterNum = BIter.size() + 1; + BIter.push_back(Iters.size()); + Iters.push_back({&B, IterNum, PostVisit, /*Converged=*/false}); + if (!PostVisit) + BlockConverged[B.getBlockID()] = false; + ElementIndex = 0; + } + void enterElement(const CFGElement &E) override { + ++ElementIndex; + } + + static std::string blockID(unsigned Block) { + return llvm::formatv("B{0}", Block); + } + static std::string eltID(unsigned Block, unsigned Element) { + return llvm::formatv("B{0}.{1}", Block, Element); + } + static std::string iterID(unsigned Block, unsigned Iter) { + return llvm::formatv("B{0}:{1}", Block, Iter); + } + static std::string elementIterID(unsigned Block, unsigned Iter, + unsigned Element) { + return llvm::formatv("B{0}:{1}_B{0}.{2}", Block, Iter, Element); + } + + // Write the analysis state associated with a particular analysis point. + // FIXME: this dump is fairly opaque. We should show: + // - values associated with the current Stmt + // - values associated with its children + // - meaningful names for values + // - which boolean values are implied true/false by the flow condition + void recordState(TypeErasedDataflowAnalysisState &State) override { + unsigned Block = Iters.back().Block->getBlockID(); + unsigned Iter = Iters.back().Iter; + bool PostVisit = Iters.back().PostVisit; + JOS.attributeObject(elementIterID(Block, Iter, ElementIndex), [&] { + JOS.attribute("block", blockID(Block)); + JOS.attribute("iter", Iter); + JOS.attribute("post_visit", PostVisit); + JOS.attribute("element", ElementIndex); + + // If this state immediately follows an Expr, show its built-in model. + if (ElementIndex > 0) { + auto S = + Iters.back().Block->Elements[ElementIndex - 1].getAs<CFGStmt>(); + if (const Expr *E = S ? llvm::dyn_cast<Expr>(S->getStmt()) : nullptr) { + if (E->isPRValue()) { + if (!E->getType()->isRecordType()) + if (auto *V = State.Env.getValue(*E)) + JOS.attributeObject( + "value", [&] { ModelDumper(JOS, State.Env).dump(*V); }); + } else { + if (auto *Loc = State.Env.getStorageLocation(*E)) + JOS.attributeObject( + "value", [&] { ModelDumper(JOS, State.Env).dump(*Loc); }); + } + } + } + if (!ContextLogs.empty()) { + JOS.attribute("logs", ContextLogs); + ContextLogs.clear(); + } + { + std::string BuiltinLattice; + llvm::raw_string_ostream BuiltinLatticeS(BuiltinLattice); + State.Env.dump(BuiltinLatticeS); + JOS.attribute("builtinLattice", BuiltinLattice); + } + }); + } + void blockConverged() override { + Iters.back().Converged = true; + BlockConverged[Iters.back().Block->getBlockID()] = true; + } + + void logText(llvm::StringRef S) override { + ContextLogs.append(S.begin(), S.end()); + ContextLogs.push_back('\n'); + } + +private: + // Write the CFG block details. + // Currently this is just the list of elements in execution order. + // FIXME: an AST dump would be a useful view, too. + void writeBlock(const CFGBlock &B, llvm::ArrayRef<size_t> ItersForB) { + JOS.attributeObject(blockID(B.getBlockID()), [&] { + JOS.attributeArray("iters", [&] { + for (size_t IterIdx : ItersForB) { + const Iteration &Iter = Iters[IterIdx]; + JOS.object([&] { + JOS.attribute("iter", Iter.Iter); + JOS.attribute("post_visit", Iter.PostVisit); + JOS.attribute("converged", Iter.Converged); + }); + } + }); + JOS.attributeArray("elements", [&] { + for (const auto &Elt : B.Elements) { + std::string Dump; + llvm::raw_string_ostream DumpS(Dump); + Elt.dumpToStream(DumpS); + JOS.value(Dump); + } + }); + }); + } + + // Write the code of function being examined. + // We want to overlay the code with <span>s that mark which BB particular + // tokens are associated with, and even which BB element (so that clicking + // can select the right element). + void writeCode() { + const auto &AST = ACFG->getDecl().getASTContext(); + bool Invalid = false; + + // Extract the source code from the original file. + // Pretty-printing from the AST would probably be nicer (no macros or + // indentation to worry about), but we need the boundaries of particular + // AST nodes and the printer doesn't provide this. + auto Range = clang::Lexer::makeFileCharRange( + CharSourceRange::getTokenRange(ACFG->getDecl().getSourceRange()), + AST.getSourceManager(), AST.getLangOpts()); + if (Range.isInvalid()) + return; + llvm::StringRef Code = clang::Lexer::getSourceText( + Range, AST.getSourceManager(), AST.getLangOpts(), &Invalid); + if (Invalid) + return; + + // TokenInfo stores the BB and set of elements that a token is part of. + struct TokenInfo { + enum : unsigned { Missing = static_cast<unsigned>(-1) }; + + // The basic block this is part of. + // This is the BB of the stmt with the smallest containing range. + unsigned BB = Missing; + unsigned BBPriority = 0; + // The most specific stmt this is part of (smallest range). + unsigned Elt = Missing; + unsigned EltPriority = 0; + // All stmts this is part of. + SmallVector<unsigned> Elts; + + // Mark this token as being part of BB.Elt. + // RangeLen is the character length of the element's range, used to + // distinguish inner vs outer statements. + // For example in `a==0`, token "a" is part of the stmts "a" and "a==0". + // However "a" has a smaller range, so is more specific. Clicking on the + // token "a" should select the stmt "a". + void assign(unsigned BB, unsigned Elt, unsigned RangeLen) { + // A worse BB (larger range) => ignore. + if (this->BB != Missing && BB != this->BB && BBPriority <= RangeLen) + return; + if (BB != this->BB) { + this->BB = BB; + Elts.clear(); + BBPriority = RangeLen; + } + BBPriority = std::min(BBPriority, RangeLen); + Elts.push_back(Elt); + if (this->Elt == Missing || EltPriority > RangeLen) + this->Elt = Elt; + } + bool operator==(const TokenInfo &Other) const { + return std::tie(BB, Elt, Elts) == + std::tie(Other.BB, Other.Elt, Other.Elts); + } + // Write the attributes for the <span> on this token. + void write(llvm::raw_ostream &OS) const { + OS << "class='c"; + if (BB != Missing) + OS << " " << blockID(BB); + for (unsigned Elt : Elts) + OS << " " << eltID(BB, Elt); + OS << "'"; + + if (Elt != Missing) + OS << " data-elt='" << eltID(BB, Elt) << "'"; + if (BB != Missing) + OS << " data-bb='" << blockID(BB) << "'"; + } + }; + + // Construct one TokenInfo per character in a flat array. + // This is inefficient (chars in a token all have the same info) but simple. + std::vector<TokenInfo> State(Code.size()); + for (const auto *Block : ACFG->getCFG()) { + unsigned EltIndex = 0; + for (const auto& Elt : *Block) { + ++EltIndex; + if (const auto S = Elt.getAs<CFGStmt>()) { + auto EltRange = clang::Lexer::makeFileCharRange( + CharSourceRange::getTokenRange(S->getStmt()->getSourceRange()), + AST.getSourceManager(), AST.getLangOpts()); + if (EltRange.isInvalid()) + continue; + if (EltRange.getBegin() < Range.getBegin() || + EltRange.getEnd() >= Range.getEnd() || + EltRange.getEnd() < Range.getBegin() || + EltRange.getEnd() >= Range.getEnd()) + continue; + unsigned Off = EltRange.getBegin().getRawEncoding() - + Range.getBegin().getRawEncoding(); + unsigned Len = EltRange.getEnd().getRawEncoding() - + EltRange.getBegin().getRawEncoding(); + for (unsigned I = 0; I < Len; ++I) + State[Off + I].assign(Block->getBlockID(), EltIndex, Len); + } + } + } + + // Finally, write the code with the correct <span>s. + unsigned Line = + AST.getSourceManager().getSpellingLineNumber(Range.getBegin()); + *OS << "<template data-copy='code'>\n"; + *OS << "<code class='filename'>"; + llvm::printHTMLEscaped( + llvm::sys::path::filename( + AST.getSourceManager().getFilename(Range.getBegin())), + *OS); + *OS << "</code>"; + *OS << "<code class='line' data-line='" << Line++ << "'>"; + for (unsigned I = 0; I < Code.size(); ++I) { + // Don't actually write a <span> around each character, only break spans + // when the TokenInfo changes. + bool NeedOpen = I == 0 || !(State[I] == State[I-1]); + bool NeedClose = I + 1 == Code.size() || !(State[I] == State[I + 1]); + if (NeedOpen) { + *OS << "<span "; + State[I].write(*OS); + *OS << ">"; + } + if (Code[I] == '\n') + *OS << "</code>\n<code class='line' data-line='" << Line++ << "'>"; + else + llvm::printHTMLEscaped(Code.substr(I, 1), *OS); + if (NeedClose) *OS << "</span>"; + } + *OS << "</code>\n"; + *OS << "</template>"; + } + + // Write the CFG diagram, a graph of basic blocks. + // Laying out graphs is hard, so we construct a graphviz description and shell + // out to `dot` to turn it into an SVG. + void writeCFG() { + *OS << "<template data-copy='cfg'>\n"; + if (auto SVG = renderSVG(buildCFGDot(ACFG->getCFG()))) + *OS << *SVG; + else + *OS << "Can't draw CFG: " << toString(SVG.takeError()); + *OS << "</template>\n"; + } + + // Produce a graphviz description of a CFG. + std::string buildCFGDot(const clang::CFG &CFG) { + std::string Graph; + llvm::raw_string_ostream GraphS(Graph); + // Graphviz likes to add unhelpful tooltips everywhere, " " suppresses. + GraphS << R"(digraph { + tooltip=" " + node[class=bb, shape=square, fontname="sans-serif", tooltip=" "] + edge[tooltip = " "] +)"; + for (unsigned I = 0; I < CFG.getNumBlockIDs(); ++I) { + std::string Name = blockID(I); + // Rightwards arrow, vertical line + const char *ConvergenceMarker = (const char *)u8"\\n\u2192\u007c"; + if (BlockConverged[I]) + Name += ConvergenceMarker; + GraphS << " " << blockID(I) << " [id=" << blockID(I) << " label=\"" + << Name << "\"]\n"; + } + for (const auto *Block : CFG) { + for (const auto &Succ : Block->succs()) { + if (Succ.getReachableBlock()) + GraphS << " " << blockID(Block->getBlockID()) << " -> " + << blockID(Succ.getReachableBlock()->getBlockID()) << "\n"; + } + } + GraphS << "}\n"; + return Graph; + } +}; + +// Nothing interesting here, just subprocess/temp-file plumbing. +llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph) { + std::string DotPath; + if (const auto *FromEnv = ::getenv("GRAPHVIZ_DOT")) + DotPath = FromEnv; + else { + auto FromPath = llvm::sys::findProgramByName("dot"); + if (!FromPath) + return llvm::createStringError(FromPath.getError(), + "'dot' not found on PATH"); + DotPath = FromPath.get(); + } + + // Create input and output files for `dot` subprocess. + // (We create the output file as empty, to reserve the temp filename). + llvm::SmallString<256> Input, Output; + int InputFD; + if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".dot", InputFD, + Input)) + return llvm::createStringError(EC, "failed to create `dot` temp input"); + llvm::raw_fd_ostream(InputFD, /*shouldClose=*/true) << DotGraph; + auto DeleteInput = + llvm::make_scope_exit([&] { llvm::sys::fs::remove(Input); }); + if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".svg", Output)) + return llvm::createStringError(EC, "failed to create `dot` temp output"); + auto DeleteOutput = + llvm::make_scope_exit([&] { llvm::sys::fs::remove(Output); }); + + std::vector<std::optional<llvm::StringRef>> Redirects = { + Input, Output, + /*stderr=*/std::nullopt}; + std::string ErrMsg; + int Code = llvm::sys::ExecuteAndWait( + DotPath, {"dot", "-Tsvg"}, /*Env=*/std::nullopt, Redirects, + /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg); + if (!ErrMsg.empty()) + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "'dot' failed: " + ErrMsg); + if (Code != 0) + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "'dot' failed (" + llvm::Twine(Code) + ")"); + + auto Buf = llvm::MemoryBuffer::getFile(Output); + if (!Buf) + return llvm::createStringError(Buf.getError(), "Can't read `dot` output"); + + // Output has <?xml> prefix we don't want. Skip to <svg> tag. + llvm::StringRef Result = Buf.get()->getBuffer(); + auto Pos = Result.find("<svg"); + if (Pos == llvm::StringRef::npos) + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "Can't find <svg> tag in `dot` output"); + return Result.substr(Pos).str(); +} + +} // namespace + +std::unique_ptr<Logger> +Logger::html(std::function<std::unique_ptr<llvm::raw_ostream>()> Streams) { + return std::make_unique<HTMLLogger>(std::move(Streams)); +} + +} // namespace clang::dataflow diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.css b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.css new file mode 100644 index 000000000000..e25270430efc --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.css @@ -0,0 +1,169 @@ +/*===-- HTMLLogger.css ----------------------------------------------------=== +* +* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +* See https://llvm.org/LICENSE.txt for license information. +* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +* +*===----------------------------------------------------------------------===*/ +html { font-family: sans-serif; } +body { margin: 0; display: flex; justify-content: left; } +body > * { box-sizing: border-box; } +body > section { + border: 1px solid black; + min-width: 20em; + overflow: auto; + max-height: 100vh; +} +section header { + background-color: #008; + color: white; + font-weight: bold; + font-size: large; + padding-right: 0.5em; +} +section h2 { + font-size: medium; + margin-bottom: 0.5em; + padding-top: 0.5em; + border-top: 1px solid #aaa; +} +#timeline { + min-width: max-content; + counter-reset: entry_counter; +} +#timeline .entry .counter::before { + counter-increment: entry_counter; + content: counter(entry_counter) ":"; +} +#timeline .entry .counter { + display: inline-block; + min-width: 2em; /* Enough space for two digits and a colon */ + text-align: right; +} +#timeline .entry.hover { + background-color: #aaa; +} +#timeline .entry.iter-select { + background-color: #aac; +} + +#bb-elements { + font-family: monospace; + font-size: x-small; + border-collapse: collapse; +} +#bb-elements td:nth-child(1) { + text-align: right; + width: 4em; + border-right: 1px solid #008; + padding: 0.3em 0.5em; + + font-weight: bold; + color: #888; +}; +#bb-elements tr.hover { + background-color: #abc; +} +#bb-elements tr.elt-select { + background-color: #acf; +} +#iterations { + display: flex; +} +#iterations .chooser { + flex-grow: 1; + text-align: center; + padding-left: 0.2em; +} +#iterations .chooser :last-child { + padding-right: 0.2em; +} +#iterations .chooser:not(.iter-select).hover { + background-color: #ddd; +} +#iterations .iter-select { + font-weight: bold; +} +#iterations .chooser:not(.iter-select) { + text-decoration: underline; + color: blue; + cursor: pointer; + background-color: #ccc; +} + +code.filename { + font-weight: bold; + color: black; + background-color: #ccc; + display: block; + text-align: center; +} +code.line { + display: block; + white-space: pre; +} +code.line:before { /* line numbers */ + content: attr(data-line); + display: inline-block; + width: 2em; + text-align: right; + padding-right: 2px; + background-color: #ccc; + border-right: 1px solid #888; + margin-right: 8px; +} +code.line:has(.bb-select):before { + border-right: 4px solid black; + margin-right: 5px; +} +.c.hover, .bb.hover { + filter: saturate(200%) brightness(90%); +} +.c.elt-select { + box-shadow: inset 0 -4px 2px -2px #a00; +} +.bb.bb-select polygon { + stroke-width: 4px; + filter: brightness(70%) saturate(150%); +} +.bb { user-select: none; } +.bb polygon { fill: white; } +#cfg { + position: relative; + margin-left: 0.5em; +} + +.value { + border: 1px solid #888; + font-size: x-small; + flex-grow: 1; +} +.value > summary { + background-color: #ace; + display: flex; + cursor: pointer; +} +.value > summary::before { + content: '\25ba'; /* Black Right-Pointing Pointer */ + margin-right: 0.5em; + font-size: 0.9em; +} +.value[open] > summary::before { + content: '\25bc'; /* Black Down-Pointing Triangle */ +} +.value > summary > .location { + margin-left: auto; +} +.value .address { + font-size: xx-small; + font-family: monospace; + color: #888; +} +.value .property { + display: flex; + margin-top: 0.5em; +} +.value .property .key { + font-weight: bold; + min-width: 5em; +} diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.html b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.html new file mode 100644 index 000000000000..be173e8b2854 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.html @@ -0,0 +1,119 @@ +<!doctype html> +<html> +<!-- HTMLLogger.cpp ---------------------------------------------------- + + Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + See https://llvm.org/LICENSE.txt for license information. + SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +//===------------------------------------------------------------------------> + +<head> +<?INJECT?> + +<template id="value-template"> + <details class="value" open> + <summary> + <span>{{v.kind}} + <template data-if="v.value_id"><span class="address">#{{v.value_id}}</span></template> + </span> + <template data-if="v.location"> + <span class="location">{{v.type}} <span class="address">@{{v.location}}</span></span> + </template> + </summary> + <template + data-for="kv in Object.entries(v)" + data-if="['kind', 'value_id', 'type', 'location'].indexOf(kv[0]) < 0"> + <div class="property"><span class="key">{{kv[0]}}</span> + <template data-if="typeof(kv[1]) != 'object'">{{kv[1]}}</template> + <template data-if="typeof(kv[1]) == 'object'" data-let="v = kv[1]"> + <template data-use="value-template"></template> + </template> + </div> + </template> + </details> +</template> + +</head> + +<body> + +<section id="timeline" data-selection=""> +<header>Timeline</header> +<template data-for="entry in timeline"> + <div id="{{entry.block}}:{{entry.iter}}" data-bb="{{entry.block}}" class="entry"> + <span class="counter"></span> + {{entry.block}} + <template data-if="entry.post_visit">(post-visit)</template> + <template data-if="!entry.post_visit">({{entry.iter}})</template> + <template data-if="entry.converged"> →|<!--Rightwards arrow, vertical line--></template> + </div> +</template> +</section> + +<section id="function" data-selection=""> +<header>Function</header> +<div id="code"></div> +<div id="cfg"></div> +</section> + +<section id="block" data-selection="bb"> +<header><template>Block {{selection.bb}}</template></header> +<div id="iterations"> + <template data-for="iter in cfg[selection.bb].iters"> + <a class="chooser {{selection.bb}}:{{iter.iter}}" data-iter="{{selection.bb}}:{{iter.iter}}"> + <template data-if="iter.post_visit">Post-visit</template> + <template data-if="!iter.post_visit">{{iter.iter}}</template> + <template data-if="iter.converged"> →|<!--Rightwards arrow, vertical line--></template> + </a> + </template> +</div> +<table id="bb-elements"> +<template> + <tr id="{{selection.bb}}.0"> + <td class="{{selection.bb}}">{{selection.bb}}.0</td> + <td>(initial state)</td> + </tr> +</template> +<template data-for="elt in cfg[selection.bb].elements"> + <tr id="{{selection.bb}}.{{elt_index+1}}"> + <td class="{{selection.bb}}">{{selection.bb}}.{{elt_index+1}}</td> + <td>{{elt}}</td> + </tr> +</template> +</table> +</section> + +<section id="element" data-selection="iter,elt"> +<template data-let="state = states[selection.iter + '_' + selection.elt]"> +<header> + <template data-if="state.element == 0">{{state.block}} initial state</template> + <template data-if="state.element != 0">Element {{selection.elt}}</template> + <template data-if="state.post_visit"> (post-visit)</template> + <template data-if="!state.post_visit"> (iteration {{state.iter}})</template> +</header> +<template data-if="state.value" data-let="v = state.value"> + <h2>Value</h2> + <template data-use="value-template"></template> +</template> +<template data-if="state.logs"> + <h2>Logs</h2> + <pre>{{state.logs}}</pre> +</template> +<h2>Built-in lattice</h2> +<pre>{{state.builtinLattice}}</pre> +</template> +</section> + +<script> +addBBColors(Object.keys(HTMLLoggerData.cfg).length); +watchSelection(HTMLLoggerData); +updateSelection({}, HTMLLoggerData); +// Copy code and cfg from <template>s into the body. +for (tmpl of document.querySelectorAll('template[data-copy]')) + document.getElementById(tmpl.dataset.copy).replaceChildren( + ...tmpl.content.cloneNode(/*deep=*/true).childNodes); +</script> + +</body> +</html> diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.js b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.js new file mode 100644 index 000000000000..6e04bc00f663 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.js @@ -0,0 +1,219 @@ +//===-- HTMLLogger.js -----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Based on selected objects, hide/show sections & populate data from templates. +// +// For example, if the selection is {bb="BB4", elt="BB4.6" iter="BB4:2"}: +// - show the "block" and "element" sections +// - re-render templates within these sections (if selection changed) +// - apply "bb-select" to items with class class "BB4", etc +let selection = {}; +function updateSelection(changes, data) { + Object.assign(selection, changes); + + data = Object.create(data); + data.selection = selection; + for (root of document.querySelectorAll('[data-selection]')) + updateSection(root, data); + + for (var k in changes) + applyClassIf(k + '-select', classSelector(changes[k])); +} + +// Given <section data-selection="x,y">: +// - hide section if selections x or y are null +// - re-render templates if x or y have changed +function updateSection(root, data) { + let changed = root.selection == null; + root.selection ||= {}; + for (key of root.dataset.selection.split(',')) { + if (!key) continue; + if (data.selection[key] != root.selection[key]) { + root.selection[key] = data.selection[key]; + changed = true; + } + if (data.selection[key] == null) { + root.hidden = true; + return; + } + } + if (changed) { + root.hidden = false; + for (tmpl of root.getElementsByTagName('template')) + reinflate(tmpl, data); + } +} + +// Expands template `tmpl` based on input `data`: +// - interpolates {{expressions}} in text and attributes +// - <template> tags can modify expansion: if, for etc +// Outputs to `parent` element, inserting before `next`. +function inflate(tmpl, data, parent, next) { + // We use eval() as our expression language in templates! + // The templates are static and trusted. + let evalExpr = (expr, data) => eval('with (data) { ' + expr + ' }'); + let interpolate = (str, data) => + str.replace(/\{\{(.*?)\}\}/g, (_, expr) => evalExpr(expr, data)) + // Anything other than <template> tag: copy, interpolate, recursively inflate. + if (tmpl.nodeName != 'TEMPLATE') { + let clone = tmpl.cloneNode(); + clone.inflated = true; + if (clone instanceof Text) + clone.textContent = interpolate(clone.textContent, data); + if (clone instanceof Element) { + for (attr of clone.attributes) + attr.value = interpolate(attr.value, data); + for (c of tmpl.childNodes) + inflate(c, data, clone, /*next=*/null); + } + return parent.insertBefore(clone, next); + } + // data-use="xyz": use <template id="xyz"> instead. (Allows recursion.) + if ('use' in tmpl.dataset) + return inflate(document.getElementById(tmpl.dataset.use), data, parent, next); + // <template> tag handling. Base case: recursively inflate. + function handle(data) { + for (c of tmpl.content.childNodes) + inflate(c, data, parent, next); + } + // Directives on <template> tags modify behavior. + const directives = { + // data-for="x in expr": expr is enumerable, bind x to each in turn + 'for': (nameInExpr, data, proceed) => { + let [name, expr] = nameInExpr.split(' in '); + let newData = Object.create(data); + let index = 0; + for (val of evalExpr(expr, data) || []) { + newData[name] = val; + newData[name + '_index'] = index++; + proceed(newData); + } + }, + // data-if="expr": only include contents if expression is truthy + 'if': (expr, data, proceed) => { if (evalExpr(expr, data)) proceed(data); }, + // data-let="x = expr": bind x to value of expr + 'let': (nameEqExpr, data, proceed) => { + let [name, expr] = nameEqExpr.split(' = '); + let newData = Object.create(data); + newData[name] = evalExpr(expr, data); + proceed(newData); + }, + } + // Compose directive handlers on top of the base handler. + for (let [dir, value] of Object.entries(tmpl.dataset).reverse()) { + if (dir in directives) { + let proceed = handle; + handle = (data) => directives[dir](value, data, proceed); + } + } + handle(data); +} +// Expand a template, after first removing any prior expansion of it. +function reinflate(tmpl, data) { + // Clear previously rendered template contents. + while (tmpl.nextSibling && tmpl.nextSibling.inflated) + tmpl.parentNode.removeChild(tmpl.nextSibling); + inflate(tmpl, data, tmpl.parentNode, tmpl.nextSibling); +} + +// Handle a mouse event on a region containing selectable items. +// This might end up changing the hover state or the selection state. +// +// targetSelector describes what target HTML element is selectable. +// targetToID specifies how to determine the selection from it: +// hover: a function from target to the class name to highlight +// bb: a function from target to the basic-block name to select (BB4) +// elt: a function from target to the CFG element name to select (BB4.5) +// iter: a function from target to the BB iteration to select (BB4:2) +// If an entry is missing, the selection is unmodified. +// If an entry is null, the selection is always cleared. +function mouseEventHandler(event, targetSelector, targetToID, data) { + var target = event.type == "mouseout" ? null : event.target.closest(targetSelector); + let selTarget = k => (target && targetToID[k]) ? targetToID[k](target) : null; + if (event.type == "click") { + let newSel = {}; + for (var k in targetToID) { + if (k == 'hover') continue; + let t = selTarget(k); + newSel[k] = t; + } + updateSelection(newSel, data); + } else if ("hover" in targetToID) { + applyClassIf("hover", classSelector(selTarget("hover"))); + } +} +function watch(rootSelector, targetSelector, targetToID, data) { + var root = document.querySelector(rootSelector); + for (event of ['mouseout', 'mousemove', 'click']) + root.addEventListener(event, e => mouseEventHandler(e, targetSelector, targetToID, data)); +} +function watchSelection(data) { + let lastIter = (bb) => `${bb}:${data.cfg[bb].iters}`; + watch('#code', '.c', { + hover: e => e.dataset.elt, + bb: e => e.dataset.bb, + elt: e => e.dataset.elt, + // If we're already viewing an iteration of this BB, stick with the same. + iter: e => (selection.iter && selection.bb == e.dataset.bb) ? selection.iter : lastIter(e.dataset.bb), + }, data); + watch('#cfg', '.bb', { + hover: e => e.id, + bb: e => e.id, + elt: e => e.id + ".0", + iter: e => lastIter(e.id), + }, data); + watch('#timeline', '.entry', { + hover: e => [e.id, e.dataset.bb], + bb: e => e.dataset.bb, + elt: e => e.dataset.bb + ".0", + iter: e => e.id, + }, data); + watch('#bb-elements', 'tr', { + hover: e => e.id, + elt: e => e.id, + }, data); + watch('#iterations', '.chooser', { + hover: e => e.dataset.iter, + iter: e => e.dataset.iter, + }, data); + updateSelection({}, data); +} +function applyClassIf(cls, query) { + document.querySelectorAll('.' + cls).forEach(elt => elt.classList.remove(cls)); + document.querySelectorAll(query).forEach(elt => elt.classList.add(cls)); +} +// Turns a class name into a CSS selector matching it, with some wrinkles: +// - we treat id="foo" just like class="foo" to avoid repetition in the HTML +// - cls can be an array of strings, we match them all +function classSelector(cls) { + if (cls == null) return null; + if (Array.isArray(cls)) return cls.map(classSelector).join(', '); + var escaped = cls.replace('.', '\\.').replace(':', '\\:'); + // don't require id="foo" class="foo" + return '.' + escaped + ", #" + escaped; +} + +// Add a stylesheet defining colors for n basic blocks. +function addBBColors(n) { + let sheet = new CSSStyleSheet(); + // hex values to subtract from fff to get a base color + options = [0x001, 0x010, 0x011, 0x100, 0x101, 0x110, 0x111]; + function color(hex) { + return "#" + hex.toString(16).padStart(3, "0"); + } + function add(selector, property, hex) { + sheet.insertRule(`${selector} { ${property}: ${color(hex)}; }`) + } + for (var i = 0; i < n; ++i) { + let opt = options[i%options.length]; + add(`.B${i}`, 'background-color', 0xfff - 2*opt); + add(`#B${i} polygon`, 'fill', 0xfff - 2*opt); + add(`#B${i} polygon`, 'stroke', 0x888 - 4*opt); + } + document.adoptedStyleSheets.push(sheet); +} diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Logger.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Logger.cpp new file mode 100644 index 000000000000..8f40768171c9 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Logger.cpp @@ -0,0 +1,111 @@ +//===-- Logger.cpp --------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/Logger.h" +#include "clang/Analysis/FlowSensitive/AdornedCFG.h" +#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" +#include "llvm/Support/WithColor.h" + +namespace clang::dataflow { + +Logger &Logger::null() { + struct NullLogger final : Logger {}; + static auto *Instance = new NullLogger(); + return *Instance; +} + +namespace { +struct TextualLogger final : Logger { + llvm::raw_ostream &OS; + const CFG *CurrentCFG; + const CFGBlock *CurrentBlock; + const CFGElement *CurrentElement; + unsigned CurrentElementIndex; + bool ShowColors; + llvm::DenseMap<const CFGBlock *, unsigned> VisitCount; + TypeErasedDataflowAnalysis *CurrentAnalysis; + + TextualLogger(llvm::raw_ostream &OS) + : OS(OS), ShowColors(llvm::WithColor::defaultAutoDetectFunction()(OS)) {} + + virtual void beginAnalysis(const AdornedCFG &ACFG, + TypeErasedDataflowAnalysis &Analysis) override { + { + llvm::WithColor Header(OS, llvm::raw_ostream::Colors::RED, /*Bold=*/true); + OS << "=== Beginning data flow analysis ===\n"; + } + auto &D = ACFG.getDecl(); + D.print(OS); + OS << "\n"; + D.dump(OS); + CurrentCFG = &ACFG.getCFG(); + CurrentCFG->print(OS, Analysis.getASTContext().getLangOpts(), ShowColors); + CurrentAnalysis = &Analysis; + } + virtual void endAnalysis() override { + llvm::WithColor Header(OS, llvm::raw_ostream::Colors::RED, /*Bold=*/true); + unsigned Blocks = 0, Steps = 0; + for (const auto &E : VisitCount) { + ++Blocks; + Steps += E.second; + } + llvm::errs() << "=== Finished analysis: " << Blocks << " blocks in " + << Steps << " total steps ===\n"; + } + virtual void enterBlock(const CFGBlock &Block, bool PostVisit) override { + unsigned Count = ++VisitCount[&Block]; + { + llvm::WithColor Header(OS, llvm::raw_ostream::Colors::RED, /*Bold=*/true); + OS << "=== Entering block B" << Block.getBlockID(); + if (PostVisit) + OS << " (post-visit)"; + else + OS << " (iteration " << Count << ")"; + OS << " ===\n"; + } + Block.print(OS, CurrentCFG, CurrentAnalysis->getASTContext().getLangOpts(), + ShowColors); + CurrentBlock = &Block; + CurrentElement = nullptr; + CurrentElementIndex = 0; + } + virtual void enterElement(const CFGElement &Element) override { + ++CurrentElementIndex; + CurrentElement = ∈ + { + llvm::WithColor Subheader(OS, llvm::raw_ostream::Colors::CYAN, + /*Bold=*/true); + OS << "Processing element B" << CurrentBlock->getBlockID() << "." + << CurrentElementIndex << ": "; + Element.dumpToStream(OS); + } + } + void recordState(TypeErasedDataflowAnalysisState &State) override { + { + llvm::WithColor Subheader(OS, llvm::raw_ostream::Colors::CYAN, + /*Bold=*/true); + OS << "Computed state for B" << CurrentBlock->getBlockID() << "." + << CurrentElementIndex << ":\n"; + } + // FIXME: currently the environment dump is verbose and unenlightening. + // FIXME: dump the user-defined lattice, too. + State.Env.dump(OS); + OS << "\n"; + } + void blockConverged() override { + OS << "B" << CurrentBlock->getBlockID() << " has converged!\n"; + } + virtual void logText(llvm::StringRef S) override { OS << S << "\n"; } +}; +} // namespace + +std::unique_ptr<Logger> Logger::textual(llvm::raw_ostream &OS) { + return std::make_unique<TextualLogger>(OS); +} + +} // namespace clang::dataflow diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Models/ChromiumCheckModel.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Models/ChromiumCheckModel.cpp new file mode 100644 index 000000000000..5ac71e1d6bf6 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Models/ChromiumCheckModel.cpp @@ -0,0 +1,71 @@ +//===-- ChromiumCheckModel.cpp ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/Models/ChromiumCheckModel.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" +#include "llvm/ADT/DenseSet.h" + +namespace clang { +namespace dataflow { + +/// Determines whether `D` is one of the methods used to implement Chromium's +/// `CHECK` macros. Populates `CheckDecls`, if empty. +bool isCheckLikeMethod(llvm::SmallDenseSet<const CXXMethodDecl *> &CheckDecls, + const CXXMethodDecl &D) { + // All of the methods of interest are static, so avoid any lookup for + // non-static methods (the common case). + if (!D.isStatic()) + return false; + + if (CheckDecls.empty()) { + // Attempt to initialize `CheckDecls` with the methods in class + // `CheckError`. + const CXXRecordDecl *ParentClass = D.getParent(); + if (ParentClass == nullptr || !ParentClass->getDeclName().isIdentifier() || + ParentClass->getName() != "CheckError") + return false; + + // Check whether namespace is "logging". + const auto *N = + dyn_cast_or_null<NamespaceDecl>(ParentClass->getDeclContext()); + if (N == nullptr || !N->getDeclName().isIdentifier() || + N->getName() != "logging") + return false; + + // Check whether "logging" is a top-level namespace. + if (N->getParent() == nullptr || !N->getParent()->isTranslationUnit()) + return false; + + for (const CXXMethodDecl *M : ParentClass->methods()) + if (M->getDeclName().isIdentifier() && M->getName().ends_with("Check")) + CheckDecls.insert(M); + } + + return CheckDecls.contains(&D); +} + +bool ChromiumCheckModel::transfer(const CFGElement &Element, Environment &Env) { + auto CS = Element.getAs<CFGStmt>(); + if (!CS) + return false; + auto Stmt = CS->getStmt(); + if (const auto *Call = dyn_cast<CallExpr>(Stmt)) { + if (const auto *M = dyn_cast<CXXMethodDecl>(Call->getDirectCallee())) { + if (isCheckLikeMethod(CheckDecls, *M)) { + // Mark this branch as unreachable. + Env.assume(Env.arena().makeLiteral(false)); + return true; + } + } + } + return false; +} + +} // namespace dataflow +} // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp new file mode 100644 index 000000000000..0707aa662e4c --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp @@ -0,0 +1,944 @@ +//===-- UncheckedOptionalAccessModel.cpp ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a dataflow analysis that detects unsafe uses of optional +// values. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/Stmt.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/ASTMatchers/ASTMatchersMacros.h" +#include "clang/Analysis/CFG.h" +#include "clang/Analysis/FlowSensitive/CFGMatchSwitch.h" +#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" +#include "clang/Analysis/FlowSensitive/Formula.h" +#include "clang/Analysis/FlowSensitive/NoopLattice.h" +#include "clang/Analysis/FlowSensitive/StorageLocation.h" +#include "clang/Analysis/FlowSensitive/Value.h" +#include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include <cassert> +#include <memory> +#include <optional> +#include <utility> + +namespace clang { +namespace dataflow { + +static bool isTopLevelNamespaceWithName(const NamespaceDecl &NS, + llvm::StringRef Name) { + return NS.getDeclName().isIdentifier() && NS.getName() == Name && + NS.getParent() != nullptr && NS.getParent()->isTranslationUnit(); +} + +static bool hasOptionalClassName(const CXXRecordDecl &RD) { + if (!RD.getDeclName().isIdentifier()) + return false; + + if (RD.getName() == "optional") { + if (const auto *N = dyn_cast_or_null<NamespaceDecl>(RD.getDeclContext())) + return N->isStdNamespace() || isTopLevelNamespaceWithName(*N, "absl"); + return false; + } + + if (RD.getName() == "Optional") { + // Check whether namespace is "::base" or "::folly". + const auto *N = dyn_cast_or_null<NamespaceDecl>(RD.getDeclContext()); + return N != nullptr && (isTopLevelNamespaceWithName(*N, "base") || + isTopLevelNamespaceWithName(*N, "folly")); + } + + return false; +} + +static const CXXRecordDecl *getOptionalBaseClass(const CXXRecordDecl *RD) { + if (RD == nullptr) + return nullptr; + if (hasOptionalClassName(*RD)) + return RD; + + if (!RD->hasDefinition()) + return nullptr; + + for (const CXXBaseSpecifier &Base : RD->bases()) + if (const CXXRecordDecl *BaseClass = + getOptionalBaseClass(Base.getType()->getAsCXXRecordDecl())) + return BaseClass; + + return nullptr; +} + +namespace { + +using namespace ::clang::ast_matchers; +using LatticeTransferState = TransferState<NoopLattice>; + +AST_MATCHER(CXXRecordDecl, optionalClass) { return hasOptionalClassName(Node); } + +AST_MATCHER(CXXRecordDecl, optionalOrDerivedClass) { + return getOptionalBaseClass(&Node) != nullptr; +} + +auto desugarsToOptionalType() { + return hasUnqualifiedDesugaredType( + recordType(hasDeclaration(cxxRecordDecl(optionalClass())))); +} + +auto desugarsToOptionalOrDerivedType() { + return hasUnqualifiedDesugaredType( + recordType(hasDeclaration(cxxRecordDecl(optionalOrDerivedClass())))); +} + +auto hasOptionalType() { return hasType(desugarsToOptionalType()); } + +/// Matches any of the spellings of the optional types and sugar, aliases, +/// derived classes, etc. +auto hasOptionalOrDerivedType() { + return hasType(desugarsToOptionalOrDerivedType()); +} + +QualType getPublicType(const Expr *E) { + auto *Cast = dyn_cast<ImplicitCastExpr>(E->IgnoreParens()); + if (Cast == nullptr || Cast->getCastKind() != CK_UncheckedDerivedToBase) { + QualType Ty = E->getType(); + if (Ty->isPointerType()) + return Ty->getPointeeType(); + return Ty; + } + + // Is the derived type that we're casting from the type of `*this`? In this + // special case, we can upcast to the base class even if the base is + // non-public. + bool CastingFromThis = isa<CXXThisExpr>(Cast->getSubExpr()); + + // Find the least-derived type in the path (i.e. the last entry in the list) + // that we can access. + const CXXBaseSpecifier *PublicBase = nullptr; + for (const CXXBaseSpecifier *Base : Cast->path()) { + if (Base->getAccessSpecifier() != AS_public && !CastingFromThis) + break; + PublicBase = Base; + CastingFromThis = false; + } + + if (PublicBase != nullptr) + return PublicBase->getType(); + + // We didn't find any public type that we could cast to. There may be more + // casts in `getSubExpr()`, so recurse. (If there aren't any more casts, this + // will return the type of `getSubExpr()`.) + return getPublicType(Cast->getSubExpr()); +} + +// Returns the least-derived type for the receiver of `MCE` that +// `MCE.getImplicitObjectArgument()->IgnoreParentImpCasts()` can be downcast to. +// Effectively, we upcast until we reach a non-public base class, unless that +// base is a base of `*this`. +// +// This is needed to correctly match methods called on types derived from +// `std::optional`. +// +// Say we have a `struct Derived : public std::optional<int> {} d;` For a call +// `d.has_value()`, the `getImplicitObjectArgument()` looks like this: +// +// ImplicitCastExpr 'const std::__optional_storage_base<int>' lvalue +// | <UncheckedDerivedToBase (optional -> __optional_storage_base)> +// `-DeclRefExpr 'Derived' lvalue Var 'd' 'Derived' +// +// The type of the implicit object argument is `__optional_storage_base` +// (since this is the internal type that `has_value()` is declared on). If we +// call `IgnoreParenImpCasts()` on the implicit object argument, we get the +// `DeclRefExpr`, which has type `Derived`. Neither of these types is +// `optional`, and hence neither is sufficient for querying whether we are +// calling a method on `optional`. +// +// Instead, starting with the most derived type, we need to follow the chain of +// casts +QualType getPublicReceiverType(const CXXMemberCallExpr &MCE) { + return getPublicType(MCE.getImplicitObjectArgument()); +} + +AST_MATCHER_P(CXXMemberCallExpr, publicReceiverType, + ast_matchers::internal::Matcher<QualType>, InnerMatcher) { + return InnerMatcher.matches(getPublicReceiverType(Node), Finder, Builder); +} + +auto isOptionalMemberCallWithNameMatcher( + ast_matchers::internal::Matcher<NamedDecl> matcher, + const std::optional<StatementMatcher> &Ignorable = std::nullopt) { + return cxxMemberCallExpr(Ignorable ? on(expr(unless(*Ignorable))) + : anything(), + publicReceiverType(desugarsToOptionalType()), + callee(cxxMethodDecl(matcher))); +} + +auto isOptionalOperatorCallWithName( + llvm::StringRef operator_name, + const std::optional<StatementMatcher> &Ignorable = std::nullopt) { + return cxxOperatorCallExpr( + hasOverloadedOperatorName(operator_name), + callee(cxxMethodDecl(ofClass(optionalClass()))), + Ignorable ? callExpr(unless(hasArgument(0, *Ignorable))) : callExpr()); +} + +auto isMakeOptionalCall() { + return callExpr(callee(functionDecl(hasAnyName( + "std::make_optional", "base::make_optional", + "absl::make_optional", "folly::make_optional"))), + hasOptionalType()); +} + +auto nulloptTypeDecl() { + return namedDecl(hasAnyName("std::nullopt_t", "absl::nullopt_t", + "base::nullopt_t", "folly::None")); +} + +auto hasNulloptType() { return hasType(nulloptTypeDecl()); } + +auto inPlaceClass() { + return recordDecl(hasAnyName("std::in_place_t", "absl::in_place_t", + "base::in_place_t", "folly::in_place_t")); +} + +auto isOptionalNulloptConstructor() { + return cxxConstructExpr( + hasDeclaration(cxxConstructorDecl(parameterCountIs(1), + hasParameter(0, hasNulloptType()))), + hasOptionalOrDerivedType()); +} + +auto isOptionalInPlaceConstructor() { + return cxxConstructExpr(hasArgument(0, hasType(inPlaceClass())), + hasOptionalOrDerivedType()); +} + +auto isOptionalValueOrConversionConstructor() { + return cxxConstructExpr( + unless(hasDeclaration( + cxxConstructorDecl(anyOf(isCopyConstructor(), isMoveConstructor())))), + argumentCountIs(1), hasArgument(0, unless(hasNulloptType())), + hasOptionalOrDerivedType()); +} + +auto isOptionalValueOrConversionAssignment() { + return cxxOperatorCallExpr( + hasOverloadedOperatorName("="), + callee(cxxMethodDecl(ofClass(optionalOrDerivedClass()))), + unless(hasDeclaration(cxxMethodDecl( + anyOf(isCopyAssignmentOperator(), isMoveAssignmentOperator())))), + argumentCountIs(2), hasArgument(1, unless(hasNulloptType()))); +} + +auto isOptionalNulloptAssignment() { + return cxxOperatorCallExpr( + hasOverloadedOperatorName("="), + callee(cxxMethodDecl(ofClass(optionalOrDerivedClass()))), + argumentCountIs(2), hasArgument(1, hasNulloptType())); +} + +auto isStdSwapCall() { + return callExpr(callee(functionDecl(hasName("std::swap"))), + argumentCountIs(2), + hasArgument(0, hasOptionalOrDerivedType()), + hasArgument(1, hasOptionalOrDerivedType())); +} + +auto isStdForwardCall() { + return callExpr(callee(functionDecl(hasName("std::forward"))), + argumentCountIs(1), + hasArgument(0, hasOptionalOrDerivedType())); +} + +constexpr llvm::StringLiteral ValueOrCallID = "ValueOrCall"; + +auto isValueOrStringEmptyCall() { + // `opt.value_or("").empty()` + return cxxMemberCallExpr( + callee(cxxMethodDecl(hasName("empty"))), + onImplicitObjectArgument(ignoringImplicit( + cxxMemberCallExpr(on(expr(unless(cxxThisExpr()))), + callee(cxxMethodDecl(hasName("value_or"), + ofClass(optionalClass()))), + hasArgument(0, stringLiteral(hasSize(0)))) + .bind(ValueOrCallID)))); +} + +auto isValueOrNotEqX() { + auto ComparesToSame = [](ast_matchers::internal::Matcher<Stmt> Arg) { + return hasOperands( + ignoringImplicit( + cxxMemberCallExpr(on(expr(unless(cxxThisExpr()))), + callee(cxxMethodDecl(hasName("value_or"), + ofClass(optionalClass()))), + hasArgument(0, Arg)) + .bind(ValueOrCallID)), + ignoringImplicit(Arg)); + }; + + // `opt.value_or(X) != X`, for X is `nullptr`, `""`, or `0`. Ideally, we'd + // support this pattern for any expression, but the AST does not have a + // generic expression comparison facility, so we specialize to common cases + // seen in practice. FIXME: define a matcher that compares values across + // nodes, which would let us generalize this to any `X`. + return binaryOperation(hasOperatorName("!="), + anyOf(ComparesToSame(cxxNullPtrLiteralExpr()), + ComparesToSame(stringLiteral(hasSize(0))), + ComparesToSame(integerLiteral(equals(0))))); +} + +auto isCallReturningOptional() { + return callExpr(hasType(qualType( + anyOf(desugarsToOptionalOrDerivedType(), + referenceType(pointee(desugarsToOptionalOrDerivedType())))))); +} + +template <typename L, typename R> +auto isComparisonOperatorCall(L lhs_arg_matcher, R rhs_arg_matcher) { + return cxxOperatorCallExpr( + anyOf(hasOverloadedOperatorName("=="), hasOverloadedOperatorName("!=")), + argumentCountIs(2), hasArgument(0, lhs_arg_matcher), + hasArgument(1, rhs_arg_matcher)); +} + +/// Ensures that `Expr` is mapped to a `BoolValue` and returns its formula. +const Formula &forceBoolValue(Environment &Env, const Expr &Expr) { + auto *Value = Env.get<BoolValue>(Expr); + if (Value != nullptr) + return Value->formula(); + + Value = &Env.makeAtomicBoolValue(); + Env.setValue(Expr, *Value); + return Value->formula(); +} + +StorageLocation &locForHasValue(const RecordStorageLocation &OptionalLoc) { + return OptionalLoc.getSyntheticField("has_value"); +} + +StorageLocation &locForValue(const RecordStorageLocation &OptionalLoc) { + return OptionalLoc.getSyntheticField("value"); +} + +/// Sets `HasValueVal` as the symbolic value that represents the "has_value" +/// property of the optional at `OptionalLoc`. +void setHasValue(RecordStorageLocation &OptionalLoc, BoolValue &HasValueVal, + Environment &Env) { + Env.setValue(locForHasValue(OptionalLoc), HasValueVal); +} + +/// Returns the symbolic value that represents the "has_value" property of the +/// optional at `OptionalLoc`. Returns null if `OptionalLoc` is null. +BoolValue *getHasValue(Environment &Env, RecordStorageLocation *OptionalLoc) { + if (OptionalLoc == nullptr) + return nullptr; + StorageLocation &HasValueLoc = locForHasValue(*OptionalLoc); + auto *HasValueVal = Env.get<BoolValue>(HasValueLoc); + if (HasValueVal == nullptr) { + HasValueVal = &Env.makeAtomicBoolValue(); + Env.setValue(HasValueLoc, *HasValueVal); + } + return HasValueVal; +} + +QualType valueTypeFromOptionalDecl(const CXXRecordDecl &RD) { + auto &CTSD = cast<ClassTemplateSpecializationDecl>(RD); + return CTSD.getTemplateArgs()[0].getAsType(); +} + +/// Returns the number of optional wrappers in `Type`. +/// +/// For example, if `Type` is `optional<optional<int>>`, the result of this +/// function will be 2. +int countOptionalWrappers(const ASTContext &ASTCtx, QualType Type) { + const CXXRecordDecl *Optional = + getOptionalBaseClass(Type->getAsCXXRecordDecl()); + if (Optional == nullptr) + return 0; + return 1 + countOptionalWrappers( + ASTCtx, + valueTypeFromOptionalDecl(*Optional).getDesugaredType(ASTCtx)); +} + +StorageLocation *getLocBehindPossiblePointer(const Expr &E, + const Environment &Env) { + if (E.isPRValue()) { + if (auto *PointerVal = dyn_cast_or_null<PointerValue>(Env.getValue(E))) + return &PointerVal->getPointeeLoc(); + return nullptr; + } + return Env.getStorageLocation(E); +} + +void transferUnwrapCall(const Expr *UnwrapExpr, const Expr *ObjectExpr, + LatticeTransferState &State) { + if (auto *OptionalLoc = cast_or_null<RecordStorageLocation>( + getLocBehindPossiblePointer(*ObjectExpr, State.Env))) { + if (State.Env.getStorageLocation(*UnwrapExpr) == nullptr) + State.Env.setStorageLocation(*UnwrapExpr, locForValue(*OptionalLoc)); + } +} + +void transferArrowOpCall(const Expr *UnwrapExpr, const Expr *ObjectExpr, + LatticeTransferState &State) { + if (auto *OptionalLoc = cast_or_null<RecordStorageLocation>( + getLocBehindPossiblePointer(*ObjectExpr, State.Env))) + State.Env.setValue( + *UnwrapExpr, State.Env.create<PointerValue>(locForValue(*OptionalLoc))); +} + +void transferMakeOptionalCall(const CallExpr *E, + const MatchFinder::MatchResult &, + LatticeTransferState &State) { + setHasValue(State.Env.getResultObjectLocation(*E), + State.Env.getBoolLiteralValue(true), State.Env); +} + +void transferOptionalHasValueCall(const CXXMemberCallExpr *CallExpr, + const MatchFinder::MatchResult &, + LatticeTransferState &State) { + if (auto *HasValueVal = getHasValue( + State.Env, getImplicitObjectLocation(*CallExpr, State.Env))) { + State.Env.setValue(*CallExpr, *HasValueVal); + } +} + +/// `ModelPred` builds a logical formula relating the predicate in +/// `ValueOrPredExpr` to the optional's `has_value` property. +void transferValueOrImpl( + const clang::Expr *ValueOrPredExpr, const MatchFinder::MatchResult &Result, + LatticeTransferState &State, + const Formula &(*ModelPred)(Environment &Env, const Formula &ExprVal, + const Formula &HasValueVal)) { + auto &Env = State.Env; + + const auto *MCE = + Result.Nodes.getNodeAs<clang::CXXMemberCallExpr>(ValueOrCallID); + + auto *HasValueVal = + getHasValue(State.Env, getImplicitObjectLocation(*MCE, State.Env)); + if (HasValueVal == nullptr) + return; + + Env.assume(ModelPred(Env, forceBoolValue(Env, *ValueOrPredExpr), + HasValueVal->formula())); +} + +void transferValueOrStringEmptyCall(const clang::Expr *ComparisonExpr, + const MatchFinder::MatchResult &Result, + LatticeTransferState &State) { + return transferValueOrImpl(ComparisonExpr, Result, State, + [](Environment &Env, const Formula &ExprVal, + const Formula &HasValueVal) -> const Formula & { + auto &A = Env.arena(); + // If the result is *not* empty, then we know the + // optional must have been holding a value. If + // `ExprVal` is true, though, we don't learn + // anything definite about `has_value`, so we + // don't add any corresponding implications to + // the flow condition. + return A.makeImplies(A.makeNot(ExprVal), + HasValueVal); + }); +} + +void transferValueOrNotEqX(const Expr *ComparisonExpr, + const MatchFinder::MatchResult &Result, + LatticeTransferState &State) { + transferValueOrImpl(ComparisonExpr, Result, State, + [](Environment &Env, const Formula &ExprVal, + const Formula &HasValueVal) -> const Formula & { + auto &A = Env.arena(); + // We know that if `(opt.value_or(X) != X)` then + // `opt.hasValue()`, even without knowing further + // details about the contents of `opt`. + return A.makeImplies(ExprVal, HasValueVal); + }); +} + +void transferCallReturningOptional(const CallExpr *E, + const MatchFinder::MatchResult &Result, + LatticeTransferState &State) { + RecordStorageLocation *Loc = nullptr; + if (E->isPRValue()) { + Loc = &State.Env.getResultObjectLocation(*E); + } else { + Loc = State.Env.get<RecordStorageLocation>(*E); + if (Loc == nullptr) { + Loc = &cast<RecordStorageLocation>(State.Env.createStorageLocation(*E)); + State.Env.setStorageLocation(*E, *Loc); + } + } + + if (State.Env.getValue(locForHasValue(*Loc)) != nullptr) + return; + + setHasValue(*Loc, State.Env.makeAtomicBoolValue(), State.Env); +} + +void constructOptionalValue(const Expr &E, Environment &Env, + BoolValue &HasValueVal) { + RecordStorageLocation &Loc = Env.getResultObjectLocation(E); + setHasValue(Loc, HasValueVal, Env); +} + +/// Returns a symbolic value for the "has_value" property of an `optional<T>` +/// value that is constructed/assigned from a value of type `U` or `optional<U>` +/// where `T` is constructible from `U`. +BoolValue &valueOrConversionHasValue(QualType DestType, const Expr &E, + const MatchFinder::MatchResult &MatchRes, + LatticeTransferState &State) { + const int DestTypeOptionalWrappersCount = + countOptionalWrappers(*MatchRes.Context, DestType); + const int ArgTypeOptionalWrappersCount = countOptionalWrappers( + *MatchRes.Context, E.getType().getNonReferenceType()); + + // Is this an constructor of the form `template<class U> optional(U &&)` / + // assignment of the form `template<class U> optional& operator=(U &&)` + // (where `T` is assignable / constructible from `U`)? + // We recognize this because the number of optionals in the optional being + // assigned to is different from the function argument type. + if (DestTypeOptionalWrappersCount != ArgTypeOptionalWrappersCount) + return State.Env.getBoolLiteralValue(true); + + // Otherwise, this must be a constructor of the form + // `template <class U> optional<optional<U> &&)` / assignment of the form + // `template <class U> optional& operator=(optional<U> &&) + // (where, again, `T` is assignable / constructible from `U`). + auto *Loc = State.Env.get<RecordStorageLocation>(E); + if (auto *HasValueVal = getHasValue(State.Env, Loc)) + return *HasValueVal; + return State.Env.makeAtomicBoolValue(); +} + +void transferValueOrConversionConstructor( + const CXXConstructExpr *E, const MatchFinder::MatchResult &MatchRes, + LatticeTransferState &State) { + assert(E->getNumArgs() > 0); + + constructOptionalValue( + *E, State.Env, + valueOrConversionHasValue( + E->getConstructor()->getThisType()->getPointeeType(), *E->getArg(0), + MatchRes, State)); +} + +void transferAssignment(const CXXOperatorCallExpr *E, BoolValue &HasValueVal, + LatticeTransferState &State) { + assert(E->getNumArgs() > 0); + + if (auto *Loc = State.Env.get<RecordStorageLocation>(*E->getArg(0))) { + setHasValue(*Loc, HasValueVal, State.Env); + + // Assign a storage location for the whole expression. + State.Env.setStorageLocation(*E, *Loc); + } +} + +void transferValueOrConversionAssignment( + const CXXOperatorCallExpr *E, const MatchFinder::MatchResult &MatchRes, + LatticeTransferState &State) { + assert(E->getNumArgs() > 1); + transferAssignment( + E, + valueOrConversionHasValue(E->getArg(0)->getType().getNonReferenceType(), + *E->getArg(1), MatchRes, State), + State); +} + +void transferNulloptAssignment(const CXXOperatorCallExpr *E, + const MatchFinder::MatchResult &, + LatticeTransferState &State) { + transferAssignment(E, State.Env.getBoolLiteralValue(false), State); +} + +void transferSwap(RecordStorageLocation *Loc1, RecordStorageLocation *Loc2, + Environment &Env) { + // We account for cases where one or both of the optionals are not modeled, + // either lacking associated storage locations, or lacking values associated + // to such storage locations. + + if (Loc1 == nullptr) { + if (Loc2 != nullptr) + setHasValue(*Loc2, Env.makeAtomicBoolValue(), Env); + return; + } + if (Loc2 == nullptr) { + setHasValue(*Loc1, Env.makeAtomicBoolValue(), Env); + return; + } + + // Both expressions have locations, though they may not have corresponding + // values. In that case, we create a fresh value at this point. Note that if + // two branches both do this, they will not share the value, but it at least + // allows for local reasoning about the value. To avoid the above, we would + // need *lazy* value allocation. + // FIXME: allocate values lazily, instead of just creating a fresh value. + BoolValue *BoolVal1 = getHasValue(Env, Loc1); + if (BoolVal1 == nullptr) + BoolVal1 = &Env.makeAtomicBoolValue(); + + BoolValue *BoolVal2 = getHasValue(Env, Loc2); + if (BoolVal2 == nullptr) + BoolVal2 = &Env.makeAtomicBoolValue(); + + setHasValue(*Loc1, *BoolVal2, Env); + setHasValue(*Loc2, *BoolVal1, Env); +} + +void transferSwapCall(const CXXMemberCallExpr *E, + const MatchFinder::MatchResult &, + LatticeTransferState &State) { + assert(E->getNumArgs() == 1); + auto *OtherLoc = State.Env.get<RecordStorageLocation>(*E->getArg(0)); + transferSwap(getImplicitObjectLocation(*E, State.Env), OtherLoc, State.Env); +} + +void transferStdSwapCall(const CallExpr *E, const MatchFinder::MatchResult &, + LatticeTransferState &State) { + assert(E->getNumArgs() == 2); + auto *Arg0Loc = State.Env.get<RecordStorageLocation>(*E->getArg(0)); + auto *Arg1Loc = State.Env.get<RecordStorageLocation>(*E->getArg(1)); + transferSwap(Arg0Loc, Arg1Loc, State.Env); +} + +void transferStdForwardCall(const CallExpr *E, const MatchFinder::MatchResult &, + LatticeTransferState &State) { + assert(E->getNumArgs() == 1); + + if (auto *Loc = State.Env.getStorageLocation(*E->getArg(0))) + State.Env.setStorageLocation(*E, *Loc); +} + +const Formula &evaluateEquality(Arena &A, const Formula &EqVal, + const Formula &LHS, const Formula &RHS) { + // Logically, an optional<T> object is composed of two values - a `has_value` + // bit and a value of type T. Equality of optional objects compares both + // values. Therefore, merely comparing the `has_value` bits isn't sufficient: + // when two optional objects are engaged, the equality of their respective + // values of type T matters. Since we only track the `has_value` bits, we + // can't make any conclusions about equality when we know that two optional + // objects are engaged. + // + // We express this as two facts about the equality: + // a) EqVal => (LHS & RHS) v (!RHS & !LHS) + // If they are equal, then either both are set or both are unset. + // b) (!LHS & !RHS) => EqVal + // If neither is set, then they are equal. + // We rewrite b) as !EqVal => (LHS v RHS), for a more compact formula. + return A.makeAnd( + A.makeImplies(EqVal, A.makeOr(A.makeAnd(LHS, RHS), + A.makeAnd(A.makeNot(LHS), A.makeNot(RHS)))), + A.makeImplies(A.makeNot(EqVal), A.makeOr(LHS, RHS))); +} + +void transferOptionalAndOptionalCmp(const clang::CXXOperatorCallExpr *CmpExpr, + const MatchFinder::MatchResult &, + LatticeTransferState &State) { + Environment &Env = State.Env; + auto &A = Env.arena(); + auto *CmpValue = &forceBoolValue(Env, *CmpExpr); + auto *Arg0Loc = Env.get<RecordStorageLocation>(*CmpExpr->getArg(0)); + if (auto *LHasVal = getHasValue(Env, Arg0Loc)) { + auto *Arg1Loc = Env.get<RecordStorageLocation>(*CmpExpr->getArg(1)); + if (auto *RHasVal = getHasValue(Env, Arg1Loc)) { + if (CmpExpr->getOperator() == clang::OO_ExclaimEqual) + CmpValue = &A.makeNot(*CmpValue); + Env.assume(evaluateEquality(A, *CmpValue, LHasVal->formula(), + RHasVal->formula())); + } + } +} + +void transferOptionalAndValueCmp(const clang::CXXOperatorCallExpr *CmpExpr, + const clang::Expr *E, Environment &Env) { + auto &A = Env.arena(); + auto *CmpValue = &forceBoolValue(Env, *CmpExpr); + auto *Loc = Env.get<RecordStorageLocation>(*E); + if (auto *HasVal = getHasValue(Env, Loc)) { + if (CmpExpr->getOperator() == clang::OO_ExclaimEqual) + CmpValue = &A.makeNot(*CmpValue); + Env.assume( + evaluateEquality(A, *CmpValue, HasVal->formula(), A.makeLiteral(true))); + } +} + +void transferOptionalAndNulloptCmp(const clang::CXXOperatorCallExpr *CmpExpr, + const clang::Expr *E, Environment &Env) { + auto &A = Env.arena(); + auto *CmpValue = &forceBoolValue(Env, *CmpExpr); + auto *Loc = Env.get<RecordStorageLocation>(*E); + if (auto *HasVal = getHasValue(Env, Loc)) { + if (CmpExpr->getOperator() == clang::OO_ExclaimEqual) + CmpValue = &A.makeNot(*CmpValue); + Env.assume(evaluateEquality(A, *CmpValue, HasVal->formula(), + A.makeLiteral(false))); + } +} + +std::optional<StatementMatcher> +ignorableOptional(const UncheckedOptionalAccessModelOptions &Options) { + if (Options.IgnoreSmartPointerDereference) { + auto SmartPtrUse = expr(ignoringParenImpCasts(cxxOperatorCallExpr( + anyOf(hasOverloadedOperatorName("->"), hasOverloadedOperatorName("*")), + unless(hasArgument(0, expr(hasOptionalType())))))); + return expr( + anyOf(SmartPtrUse, memberExpr(hasObjectExpression(SmartPtrUse)))); + } + return std::nullopt; +} + +StatementMatcher +valueCall(const std::optional<StatementMatcher> &IgnorableOptional) { + return isOptionalMemberCallWithNameMatcher(hasName("value"), + IgnorableOptional); +} + +StatementMatcher +valueOperatorCall(const std::optional<StatementMatcher> &IgnorableOptional) { + return expr(anyOf(isOptionalOperatorCallWithName("*", IgnorableOptional), + isOptionalOperatorCallWithName("->", IgnorableOptional))); +} + +auto buildTransferMatchSwitch() { + // FIXME: Evaluate the efficiency of matchers. If using matchers results in a + // lot of duplicated work (e.g. string comparisons), consider providing APIs + // that avoid it through memoization. + return CFGMatchSwitchBuilder<LatticeTransferState>() + // make_optional + .CaseOfCFGStmt<CallExpr>(isMakeOptionalCall(), transferMakeOptionalCall) + + // optional::optional (in place) + .CaseOfCFGStmt<CXXConstructExpr>( + isOptionalInPlaceConstructor(), + [](const CXXConstructExpr *E, const MatchFinder::MatchResult &, + LatticeTransferState &State) { + constructOptionalValue(*E, State.Env, + State.Env.getBoolLiteralValue(true)); + }) + // optional::optional(nullopt_t) + .CaseOfCFGStmt<CXXConstructExpr>( + isOptionalNulloptConstructor(), + [](const CXXConstructExpr *E, const MatchFinder::MatchResult &, + LatticeTransferState &State) { + constructOptionalValue(*E, State.Env, + State.Env.getBoolLiteralValue(false)); + }) + // optional::optional (value/conversion) + .CaseOfCFGStmt<CXXConstructExpr>(isOptionalValueOrConversionConstructor(), + transferValueOrConversionConstructor) + + // optional::operator= + .CaseOfCFGStmt<CXXOperatorCallExpr>( + isOptionalValueOrConversionAssignment(), + transferValueOrConversionAssignment) + .CaseOfCFGStmt<CXXOperatorCallExpr>(isOptionalNulloptAssignment(), + transferNulloptAssignment) + + // optional::value + .CaseOfCFGStmt<CXXMemberCallExpr>( + valueCall(std::nullopt), + [](const CXXMemberCallExpr *E, const MatchFinder::MatchResult &, + LatticeTransferState &State) { + transferUnwrapCall(E, E->getImplicitObjectArgument(), State); + }) + + // optional::operator* + .CaseOfCFGStmt<CallExpr>(isOptionalOperatorCallWithName("*"), + [](const CallExpr *E, + const MatchFinder::MatchResult &, + LatticeTransferState &State) { + transferUnwrapCall(E, E->getArg(0), State); + }) + + // optional::operator-> + .CaseOfCFGStmt<CallExpr>(isOptionalOperatorCallWithName("->"), + [](const CallExpr *E, + const MatchFinder::MatchResult &, + LatticeTransferState &State) { + transferArrowOpCall(E, E->getArg(0), State); + }) + + // optional::has_value, optional::hasValue + // Of the supported optionals only folly::Optional uses hasValue, but this + // will also pass for other types + .CaseOfCFGStmt<CXXMemberCallExpr>( + isOptionalMemberCallWithNameMatcher( + hasAnyName("has_value", "hasValue")), + transferOptionalHasValueCall) + + // optional::operator bool + .CaseOfCFGStmt<CXXMemberCallExpr>( + isOptionalMemberCallWithNameMatcher(hasName("operator bool")), + transferOptionalHasValueCall) + + // optional::emplace + .CaseOfCFGStmt<CXXMemberCallExpr>( + isOptionalMemberCallWithNameMatcher(hasName("emplace")), + [](const CXXMemberCallExpr *E, const MatchFinder::MatchResult &, + LatticeTransferState &State) { + if (RecordStorageLocation *Loc = + getImplicitObjectLocation(*E, State.Env)) { + setHasValue(*Loc, State.Env.getBoolLiteralValue(true), State.Env); + } + }) + + // optional::reset + .CaseOfCFGStmt<CXXMemberCallExpr>( + isOptionalMemberCallWithNameMatcher(hasName("reset")), + [](const CXXMemberCallExpr *E, const MatchFinder::MatchResult &, + LatticeTransferState &State) { + if (RecordStorageLocation *Loc = + getImplicitObjectLocation(*E, State.Env)) { + setHasValue(*Loc, State.Env.getBoolLiteralValue(false), + State.Env); + } + }) + + // optional::swap + .CaseOfCFGStmt<CXXMemberCallExpr>( + isOptionalMemberCallWithNameMatcher(hasName("swap")), + transferSwapCall) + + // std::swap + .CaseOfCFGStmt<CallExpr>(isStdSwapCall(), transferStdSwapCall) + + // std::forward + .CaseOfCFGStmt<CallExpr>(isStdForwardCall(), transferStdForwardCall) + + // opt.value_or("").empty() + .CaseOfCFGStmt<Expr>(isValueOrStringEmptyCall(), + transferValueOrStringEmptyCall) + + // opt.value_or(X) != X + .CaseOfCFGStmt<Expr>(isValueOrNotEqX(), transferValueOrNotEqX) + + // Comparisons (==, !=): + .CaseOfCFGStmt<CXXOperatorCallExpr>( + isComparisonOperatorCall(hasOptionalType(), hasOptionalType()), + transferOptionalAndOptionalCmp) + .CaseOfCFGStmt<CXXOperatorCallExpr>( + isComparisonOperatorCall(hasOptionalType(), hasNulloptType()), + [](const clang::CXXOperatorCallExpr *Cmp, + const MatchFinder::MatchResult &, LatticeTransferState &State) { + transferOptionalAndNulloptCmp(Cmp, Cmp->getArg(0), State.Env); + }) + .CaseOfCFGStmt<CXXOperatorCallExpr>( + isComparisonOperatorCall(hasNulloptType(), hasOptionalType()), + [](const clang::CXXOperatorCallExpr *Cmp, + const MatchFinder::MatchResult &, LatticeTransferState &State) { + transferOptionalAndNulloptCmp(Cmp, Cmp->getArg(1), State.Env); + }) + .CaseOfCFGStmt<CXXOperatorCallExpr>( + isComparisonOperatorCall( + hasOptionalType(), + unless(anyOf(hasOptionalType(), hasNulloptType()))), + [](const clang::CXXOperatorCallExpr *Cmp, + const MatchFinder::MatchResult &, LatticeTransferState &State) { + transferOptionalAndValueCmp(Cmp, Cmp->getArg(0), State.Env); + }) + .CaseOfCFGStmt<CXXOperatorCallExpr>( + isComparisonOperatorCall( + unless(anyOf(hasOptionalType(), hasNulloptType())), + hasOptionalType()), + [](const clang::CXXOperatorCallExpr *Cmp, + const MatchFinder::MatchResult &, LatticeTransferState &State) { + transferOptionalAndValueCmp(Cmp, Cmp->getArg(1), State.Env); + }) + + // returns optional + .CaseOfCFGStmt<CallExpr>(isCallReturningOptional(), + transferCallReturningOptional) + + .Build(); +} + +llvm::SmallVector<SourceLocation> diagnoseUnwrapCall(const Expr *ObjectExpr, + const Environment &Env) { + if (auto *OptionalLoc = cast_or_null<RecordStorageLocation>( + getLocBehindPossiblePointer(*ObjectExpr, Env))) { + auto *Prop = Env.getValue(locForHasValue(*OptionalLoc)); + if (auto *HasValueVal = cast_or_null<BoolValue>(Prop)) { + if (Env.proves(HasValueVal->formula())) + return {}; + } + } + + // Record that this unwrap is *not* provably safe. + // FIXME: include either the name of the optional (if applicable) or a source + // range of the access for easier interpretation of the result. + return {ObjectExpr->getBeginLoc()}; +} + +auto buildDiagnoseMatchSwitch( + const UncheckedOptionalAccessModelOptions &Options) { + // FIXME: Evaluate the efficiency of matchers. If using matchers results in a + // lot of duplicated work (e.g. string comparisons), consider providing APIs + // that avoid it through memoization. + auto IgnorableOptional = ignorableOptional(Options); + return CFGMatchSwitchBuilder<const Environment, + llvm::SmallVector<SourceLocation>>() + // optional::value + .CaseOfCFGStmt<CXXMemberCallExpr>( + valueCall(IgnorableOptional), + [](const CXXMemberCallExpr *E, const MatchFinder::MatchResult &, + const Environment &Env) { + return diagnoseUnwrapCall(E->getImplicitObjectArgument(), Env); + }) + + // optional::operator*, optional::operator-> + .CaseOfCFGStmt<CallExpr>(valueOperatorCall(IgnorableOptional), + [](const CallExpr *E, + const MatchFinder::MatchResult &, + const Environment &Env) { + return diagnoseUnwrapCall(E->getArg(0), Env); + }) + .Build(); +} + +} // namespace + +ast_matchers::DeclarationMatcher +UncheckedOptionalAccessModel::optionalClassDecl() { + return cxxRecordDecl(optionalClass()); +} + +UncheckedOptionalAccessModel::UncheckedOptionalAccessModel(ASTContext &Ctx, + Environment &Env) + : DataflowAnalysis<UncheckedOptionalAccessModel, NoopLattice>(Ctx), + TransferMatchSwitch(buildTransferMatchSwitch()) { + Env.getDataflowAnalysisContext().setSyntheticFieldCallback( + [&Ctx](QualType Ty) -> llvm::StringMap<QualType> { + const CXXRecordDecl *Optional = + getOptionalBaseClass(Ty->getAsCXXRecordDecl()); + if (Optional == nullptr) + return {}; + return {{"value", valueTypeFromOptionalDecl(*Optional)}, + {"has_value", Ctx.BoolTy}}; + }); +} + +void UncheckedOptionalAccessModel::transfer(const CFGElement &Elt, + NoopLattice &L, Environment &Env) { + LatticeTransferState State(L, Env); + TransferMatchSwitch(Elt, getASTContext(), State); +} + +UncheckedOptionalAccessDiagnoser::UncheckedOptionalAccessDiagnoser( + UncheckedOptionalAccessModelOptions Options) + : DiagnoseMatchSwitch(buildDiagnoseMatchSwitch(Options)) {} + +} // namespace dataflow +} // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/RecordOps.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/RecordOps.cpp new file mode 100644 index 000000000000..b8401230a83d --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/RecordOps.cpp @@ -0,0 +1,133 @@ +//===-- RecordOps.cpp -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Operations on records (structs, classes, and unions). +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/RecordOps.h" + +#define DEBUG_TYPE "dataflow" + +namespace clang::dataflow { + +static void copyField(const ValueDecl &Field, StorageLocation *SrcFieldLoc, + StorageLocation *DstFieldLoc, RecordStorageLocation &Dst, + Environment &Env) { + assert(Field.getType()->isReferenceType() || + (SrcFieldLoc != nullptr && DstFieldLoc != nullptr)); + + if (Field.getType()->isRecordType()) { + copyRecord(cast<RecordStorageLocation>(*SrcFieldLoc), + cast<RecordStorageLocation>(*DstFieldLoc), Env); + } else if (Field.getType()->isReferenceType()) { + Dst.setChild(Field, SrcFieldLoc); + } else { + if (Value *Val = Env.getValue(*SrcFieldLoc)) + Env.setValue(*DstFieldLoc, *Val); + else + Env.clearValue(*DstFieldLoc); + } +} + +static void copySyntheticField(QualType FieldType, StorageLocation &SrcFieldLoc, + StorageLocation &DstFieldLoc, Environment &Env) { + if (FieldType->isRecordType()) { + copyRecord(cast<RecordStorageLocation>(SrcFieldLoc), + cast<RecordStorageLocation>(DstFieldLoc), Env); + } else { + if (Value *Val = Env.getValue(SrcFieldLoc)) + Env.setValue(DstFieldLoc, *Val); + else + Env.clearValue(DstFieldLoc); + } +} + +void copyRecord(RecordStorageLocation &Src, RecordStorageLocation &Dst, + Environment &Env) { + auto SrcType = Src.getType().getCanonicalType().getUnqualifiedType(); + auto DstType = Dst.getType().getCanonicalType().getUnqualifiedType(); + + auto SrcDecl = SrcType->getAsCXXRecordDecl(); + auto DstDecl = DstType->getAsCXXRecordDecl(); + + [[maybe_unused]] bool compatibleTypes = + SrcType == DstType || + (SrcDecl != nullptr && DstDecl != nullptr && + (SrcDecl->isDerivedFrom(DstDecl) || DstDecl->isDerivedFrom(SrcDecl))); + + LLVM_DEBUG({ + if (!compatibleTypes) { + llvm::dbgs() << "Source type " << Src.getType() << "\n"; + llvm::dbgs() << "Destination type " << Dst.getType() << "\n"; + } + }); + assert(compatibleTypes); + + if (SrcType == DstType || (SrcDecl != nullptr && DstDecl != nullptr && + SrcDecl->isDerivedFrom(DstDecl))) { + for (auto [Field, DstFieldLoc] : Dst.children()) + copyField(*Field, Src.getChild(*Field), DstFieldLoc, Dst, Env); + for (const auto &[Name, DstFieldLoc] : Dst.synthetic_fields()) + copySyntheticField(DstFieldLoc->getType(), Src.getSyntheticField(Name), + *DstFieldLoc, Env); + } else { + for (auto [Field, SrcFieldLoc] : Src.children()) + copyField(*Field, SrcFieldLoc, Dst.getChild(*Field), Dst, Env); + for (const auto &[Name, SrcFieldLoc] : Src.synthetic_fields()) + copySyntheticField(SrcFieldLoc->getType(), *SrcFieldLoc, + Dst.getSyntheticField(Name), Env); + } +} + +bool recordsEqual(const RecordStorageLocation &Loc1, const Environment &Env1, + const RecordStorageLocation &Loc2, const Environment &Env2) { + LLVM_DEBUG({ + if (Loc2.getType().getCanonicalType().getUnqualifiedType() != + Loc1.getType().getCanonicalType().getUnqualifiedType()) { + llvm::dbgs() << "Loc1 type " << Loc1.getType() << "\n"; + llvm::dbgs() << "Loc2 type " << Loc2.getType() << "\n"; + } + }); + assert(Loc2.getType().getCanonicalType().getUnqualifiedType() == + Loc1.getType().getCanonicalType().getUnqualifiedType()); + + for (auto [Field, FieldLoc1] : Loc1.children()) { + StorageLocation *FieldLoc2 = Loc2.getChild(*Field); + + assert(Field->getType()->isReferenceType() || + (FieldLoc1 != nullptr && FieldLoc2 != nullptr)); + + if (Field->getType()->isRecordType()) { + if (!recordsEqual(cast<RecordStorageLocation>(*FieldLoc1), Env1, + cast<RecordStorageLocation>(*FieldLoc2), Env2)) + return false; + } else if (Field->getType()->isReferenceType()) { + if (FieldLoc1 != FieldLoc2) + return false; + } else if (Env1.getValue(*FieldLoc1) != Env2.getValue(*FieldLoc2)) { + return false; + } + } + + for (const auto &[Name, SynthFieldLoc1] : Loc1.synthetic_fields()) { + if (SynthFieldLoc1->getType()->isRecordType()) { + if (!recordsEqual( + *cast<RecordStorageLocation>(SynthFieldLoc1), Env1, + cast<RecordStorageLocation>(Loc2.getSyntheticField(Name)), Env2)) + return false; + } else if (Env1.getValue(*SynthFieldLoc1) != + Env2.getValue(Loc2.getSyntheticField(Name))) { + return false; + } + } + + return true; +} + +} // namespace clang::dataflow diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/SimplifyConstraints.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/SimplifyConstraints.cpp new file mode 100644 index 000000000000..cc20202768b9 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/SimplifyConstraints.cpp @@ -0,0 +1,180 @@ +//===-- SimplifyConstraints.cpp ---------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/SimplifyConstraints.h" +#include "llvm/ADT/EquivalenceClasses.h" + +namespace clang { +namespace dataflow { + +// Substitutes all occurrences of a given atom in `F` by a given formula and +// returns the resulting formula. +static const Formula & +substitute(const Formula &F, + const llvm::DenseMap<Atom, const Formula *> &Substitutions, + Arena &arena) { + switch (F.kind()) { + case Formula::AtomRef: + if (auto iter = Substitutions.find(F.getAtom()); + iter != Substitutions.end()) + return *iter->second; + return F; + case Formula::Literal: + return F; + case Formula::Not: + return arena.makeNot(substitute(*F.operands()[0], Substitutions, arena)); + case Formula::And: + return arena.makeAnd(substitute(*F.operands()[0], Substitutions, arena), + substitute(*F.operands()[1], Substitutions, arena)); + case Formula::Or: + return arena.makeOr(substitute(*F.operands()[0], Substitutions, arena), + substitute(*F.operands()[1], Substitutions, arena)); + case Formula::Implies: + return arena.makeImplies( + substitute(*F.operands()[0], Substitutions, arena), + substitute(*F.operands()[1], Substitutions, arena)); + case Formula::Equal: + return arena.makeEquals(substitute(*F.operands()[0], Substitutions, arena), + substitute(*F.operands()[1], Substitutions, arena)); + } + llvm_unreachable("Unknown formula kind"); +} + +// Returns the result of replacing atoms in `Atoms` with the leader of their +// equivalence class in `EquivalentAtoms`. +// Atoms that don't have an equivalence class in `EquivalentAtoms` are inserted +// into it as single-member equivalence classes. +static llvm::DenseSet<Atom> +projectToLeaders(const llvm::DenseSet<Atom> &Atoms, + llvm::EquivalenceClasses<Atom> &EquivalentAtoms) { + llvm::DenseSet<Atom> Result; + + for (Atom Atom : Atoms) + Result.insert(EquivalentAtoms.getOrInsertLeaderValue(Atom)); + + return Result; +} + +// Returns the atoms in the equivalence class for the leader identified by +// `LeaderIt`. +static llvm::SmallVector<Atom> +atomsInEquivalenceClass(const llvm::EquivalenceClasses<Atom> &EquivalentAtoms, + llvm::EquivalenceClasses<Atom>::iterator LeaderIt) { + llvm::SmallVector<Atom> Result; + for (auto MemberIt = EquivalentAtoms.member_begin(LeaderIt); + MemberIt != EquivalentAtoms.member_end(); ++MemberIt) + Result.push_back(*MemberIt); + return Result; +} + +void simplifyConstraints(llvm::SetVector<const Formula *> &Constraints, + Arena &arena, SimplifyConstraintsInfo *Info) { + auto contradiction = [&]() { + Constraints.clear(); + Constraints.insert(&arena.makeLiteral(false)); + }; + + llvm::EquivalenceClasses<Atom> EquivalentAtoms; + llvm::DenseSet<Atom> TrueAtoms; + llvm::DenseSet<Atom> FalseAtoms; + + while (true) { + for (const auto *Constraint : Constraints) { + switch (Constraint->kind()) { + case Formula::AtomRef: + TrueAtoms.insert(Constraint->getAtom()); + break; + case Formula::Not: + if (Constraint->operands()[0]->kind() == Formula::AtomRef) + FalseAtoms.insert(Constraint->operands()[0]->getAtom()); + break; + case Formula::Equal: { + ArrayRef<const Formula *> operands = Constraint->operands(); + if (operands[0]->kind() == Formula::AtomRef && + operands[1]->kind() == Formula::AtomRef) { + EquivalentAtoms.unionSets(operands[0]->getAtom(), + operands[1]->getAtom()); + } + break; + } + default: + break; + } + } + + TrueAtoms = projectToLeaders(TrueAtoms, EquivalentAtoms); + FalseAtoms = projectToLeaders(FalseAtoms, EquivalentAtoms); + + llvm::DenseMap<Atom, const Formula *> Substitutions; + for (auto It = EquivalentAtoms.begin(); It != EquivalentAtoms.end(); ++It) { + Atom TheAtom = It->getData(); + Atom Leader = EquivalentAtoms.getLeaderValue(TheAtom); + if (TrueAtoms.contains(Leader)) { + if (FalseAtoms.contains(Leader)) { + contradiction(); + return; + } + Substitutions.insert({TheAtom, &arena.makeLiteral(true)}); + } else if (FalseAtoms.contains(Leader)) { + Substitutions.insert({TheAtom, &arena.makeLiteral(false)}); + } else if (TheAtom != Leader) { + Substitutions.insert({TheAtom, &arena.makeAtomRef(Leader)}); + } + } + + llvm::SetVector<const Formula *> NewConstraints; + for (const auto *Constraint : Constraints) { + const Formula &NewConstraint = + substitute(*Constraint, Substitutions, arena); + if (NewConstraint.isLiteral(true)) + continue; + if (NewConstraint.isLiteral(false)) { + contradiction(); + return; + } + if (NewConstraint.kind() == Formula::And) { + NewConstraints.insert(NewConstraint.operands()[0]); + NewConstraints.insert(NewConstraint.operands()[1]); + continue; + } + NewConstraints.insert(&NewConstraint); + } + + if (NewConstraints == Constraints) + break; + Constraints = std::move(NewConstraints); + } + + if (Info) { + for (auto It = EquivalentAtoms.begin(), End = EquivalentAtoms.end(); + It != End; ++It) { + if (!It->isLeader()) + continue; + Atom At = *EquivalentAtoms.findLeader(It); + if (TrueAtoms.contains(At) || FalseAtoms.contains(At)) + continue; + llvm::SmallVector<Atom> Atoms = + atomsInEquivalenceClass(EquivalentAtoms, It); + if (Atoms.size() == 1) + continue; + std::sort(Atoms.begin(), Atoms.end()); + Info->EquivalentAtoms.push_back(std::move(Atoms)); + } + for (Atom At : TrueAtoms) + Info->TrueAtoms.append(atomsInEquivalenceClass( + EquivalentAtoms, EquivalentAtoms.findValue(At))); + std::sort(Info->TrueAtoms.begin(), Info->TrueAtoms.end()); + for (Atom At : FalseAtoms) + Info->FalseAtoms.append(atomsInEquivalenceClass( + EquivalentAtoms, EquivalentAtoms.findValue(At))); + std::sort(Info->FalseAtoms.begin(), Info->FalseAtoms.end()); + } +} + +} // namespace dataflow +} // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Transfer.cpp new file mode 100644 index 000000000000..3c896d373a21 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Transfer.cpp @@ -0,0 +1,897 @@ +//===-- Transfer.cpp --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines transfer functions that evaluate program statements and +// update an environment accordingly. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/Transfer.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclBase.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/OperationKinds.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/Analysis/FlowSensitive/ASTOps.h" +#include "clang/Analysis/FlowSensitive/AdornedCFG.h" +#include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" +#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" +#include "clang/Analysis/FlowSensitive/NoopAnalysis.h" +#include "clang/Analysis/FlowSensitive/RecordOps.h" +#include "clang/Analysis/FlowSensitive/Value.h" +#include "clang/Basic/Builtins.h" +#include "clang/Basic/OperatorKinds.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include <assert.h> +#include <cassert> + +#define DEBUG_TYPE "dataflow" + +namespace clang { +namespace dataflow { + +const Environment *StmtToEnvMap::getEnvironment(const Stmt &S) const { + auto BlockIt = ACFG.getStmtToBlock().find(&ignoreCFGOmittedNodes(S)); + if (BlockIt == ACFG.getStmtToBlock().end()) { + assert(false); + // Return null to avoid dereferencing the end iterator in non-assert builds. + return nullptr; + } + if (!ACFG.isBlockReachable(*BlockIt->getSecond())) + return nullptr; + if (BlockIt->getSecond()->getBlockID() == CurBlockID) + return &CurState.Env; + const auto &State = BlockToState[BlockIt->getSecond()->getBlockID()]; + if (!(State)) + return nullptr; + return &State->Env; +} + +static BoolValue &evaluateBooleanEquality(const Expr &LHS, const Expr &RHS, + Environment &Env) { + Value *LHSValue = Env.getValue(LHS); + Value *RHSValue = Env.getValue(RHS); + + if (LHSValue == RHSValue) + return Env.getBoolLiteralValue(true); + + if (auto *LHSBool = dyn_cast_or_null<BoolValue>(LHSValue)) + if (auto *RHSBool = dyn_cast_or_null<BoolValue>(RHSValue)) + return Env.makeIff(*LHSBool, *RHSBool); + + if (auto *LHSPtr = dyn_cast_or_null<PointerValue>(LHSValue)) + if (auto *RHSPtr = dyn_cast_or_null<PointerValue>(RHSValue)) + // If the storage locations are the same, the pointers definitely compare + // the same. If the storage locations are different, they may still alias, + // so we fall through to the case below that returns an atom. + if (&LHSPtr->getPointeeLoc() == &RHSPtr->getPointeeLoc()) + return Env.getBoolLiteralValue(true); + + return Env.makeAtomicBoolValue(); +} + +static BoolValue &unpackValue(BoolValue &V, Environment &Env) { + if (auto *Top = llvm::dyn_cast<TopBoolValue>(&V)) { + auto &A = Env.getDataflowAnalysisContext().arena(); + return A.makeBoolValue(A.makeAtomRef(Top->getAtom())); + } + return V; +} + +// Unpacks the value (if any) associated with `E` and updates `E` to the new +// value, if any unpacking occured. Also, does the lvalue-to-rvalue conversion, +// by skipping past the reference. +static Value *maybeUnpackLValueExpr(const Expr &E, Environment &Env) { + auto *Loc = Env.getStorageLocation(E); + if (Loc == nullptr) + return nullptr; + auto *Val = Env.getValue(*Loc); + + auto *B = dyn_cast_or_null<BoolValue>(Val); + if (B == nullptr) + return Val; + + auto &UnpackedVal = unpackValue(*B, Env); + if (&UnpackedVal == Val) + return Val; + Env.setValue(*Loc, UnpackedVal); + return &UnpackedVal; +} + +static void propagateValue(const Expr &From, const Expr &To, Environment &Env) { + if (From.getType()->isRecordType()) + return; + if (auto *Val = Env.getValue(From)) + Env.setValue(To, *Val); +} + +static void propagateStorageLocation(const Expr &From, const Expr &To, + Environment &Env) { + if (auto *Loc = Env.getStorageLocation(From)) + Env.setStorageLocation(To, *Loc); +} + +// Propagates the value or storage location of `From` to `To` in cases where +// `From` may be either a glvalue or a prvalue. `To` must be a glvalue iff +// `From` is a glvalue. +static void propagateValueOrStorageLocation(const Expr &From, const Expr &To, + Environment &Env) { + assert(From.isGLValue() == To.isGLValue()); + if (From.isGLValue()) + propagateStorageLocation(From, To, Env); + else + propagateValue(From, To, Env); +} + +namespace { + +class TransferVisitor : public ConstStmtVisitor<TransferVisitor> { +public: + TransferVisitor(const StmtToEnvMap &StmtToEnv, Environment &Env, + Environment::ValueModel &Model) + : StmtToEnv(StmtToEnv), Env(Env), Model(Model) {} + + void VisitBinaryOperator(const BinaryOperator *S) { + const Expr *LHS = S->getLHS(); + assert(LHS != nullptr); + + const Expr *RHS = S->getRHS(); + assert(RHS != nullptr); + + // Do compound assignments up-front, as there are so many of them and we + // don't want to list all of them in the switch statement below. + // To avoid generating unnecessary values, we don't create a new value but + // instead leave it to the specific analysis to do this if desired. + if (S->isCompoundAssignmentOp()) + propagateStorageLocation(*S->getLHS(), *S, Env); + + switch (S->getOpcode()) { + case BO_Assign: { + auto *LHSLoc = Env.getStorageLocation(*LHS); + if (LHSLoc == nullptr) + break; + + auto *RHSVal = Env.getValue(*RHS); + if (RHSVal == nullptr) + break; + + // Assign a value to the storage location of the left-hand side. + Env.setValue(*LHSLoc, *RHSVal); + + // Assign a storage location for the whole expression. + Env.setStorageLocation(*S, *LHSLoc); + break; + } + case BO_LAnd: + case BO_LOr: { + BoolValue &LHSVal = getLogicOperatorSubExprValue(*LHS); + BoolValue &RHSVal = getLogicOperatorSubExprValue(*RHS); + + if (S->getOpcode() == BO_LAnd) + Env.setValue(*S, Env.makeAnd(LHSVal, RHSVal)); + else + Env.setValue(*S, Env.makeOr(LHSVal, RHSVal)); + break; + } + case BO_NE: + case BO_EQ: { + auto &LHSEqRHSValue = evaluateBooleanEquality(*LHS, *RHS, Env); + Env.setValue(*S, S->getOpcode() == BO_EQ ? LHSEqRHSValue + : Env.makeNot(LHSEqRHSValue)); + break; + } + case BO_Comma: { + propagateValueOrStorageLocation(*RHS, *S, Env); + break; + } + default: + break; + } + } + + void VisitDeclRefExpr(const DeclRefExpr *S) { + const ValueDecl *VD = S->getDecl(); + assert(VD != nullptr); + + // Some `DeclRefExpr`s aren't glvalues, so we can't associate them with a + // `StorageLocation`, and there's also no sensible `Value` that we can + // assign to them. Examples: + // - Non-static member variables + // - Non static member functions + // Note: Member operators are an exception to this, but apparently only + // if the `DeclRefExpr` is used within the callee of a + // `CXXOperatorCallExpr`. In other cases, for example when applying the + // address-of operator, the `DeclRefExpr` is a prvalue. + if (!S->isGLValue()) + return; + + auto *DeclLoc = Env.getStorageLocation(*VD); + if (DeclLoc == nullptr) + return; + + Env.setStorageLocation(*S, *DeclLoc); + } + + void VisitDeclStmt(const DeclStmt *S) { + // Group decls are converted into single decls in the CFG so the cast below + // is safe. + const auto &D = *cast<VarDecl>(S->getSingleDecl()); + + ProcessVarDecl(D); + } + + void ProcessVarDecl(const VarDecl &D) { + // Static local vars are already initialized in `Environment`. + if (D.hasGlobalStorage()) + return; + + // If this is the holding variable for a `BindingDecl`, we may already + // have a storage location set up -- so check. (See also explanation below + // where we process the `BindingDecl`.) + if (D.getType()->isReferenceType() && Env.getStorageLocation(D) != nullptr) + return; + + assert(Env.getStorageLocation(D) == nullptr); + + Env.setStorageLocation(D, Env.createObject(D)); + + // `DecompositionDecl` must be handled after we've interpreted the loc + // itself, because the binding expression refers back to the + // `DecompositionDecl` (even though it has no written name). + if (const auto *Decomp = dyn_cast<DecompositionDecl>(&D)) { + // If VarDecl is a DecompositionDecl, evaluate each of its bindings. This + // needs to be evaluated after initializing the values in the storage for + // VarDecl, as the bindings refer to them. + // FIXME: Add support for ArraySubscriptExpr. + // FIXME: Consider adding AST nodes used in BindingDecls to the CFG. + for (const auto *B : Decomp->bindings()) { + if (auto *ME = dyn_cast_or_null<MemberExpr>(B->getBinding())) { + auto *DE = dyn_cast_or_null<DeclRefExpr>(ME->getBase()); + if (DE == nullptr) + continue; + + // ME and its base haven't been visited because they aren't included + // in the statements of the CFG basic block. + VisitDeclRefExpr(DE); + VisitMemberExpr(ME); + + if (auto *Loc = Env.getStorageLocation(*ME)) + Env.setStorageLocation(*B, *Loc); + } else if (auto *VD = B->getHoldingVar()) { + // Holding vars are used to back the `BindingDecl`s of tuple-like + // types. The holding var declarations appear after the + // `DecompositionDecl`, so we have to explicitly process them here + // to know their storage location. They will be processed a second + // time when we visit their `VarDecl`s, so we have code that protects + // against this above. + ProcessVarDecl(*VD); + auto *VDLoc = Env.getStorageLocation(*VD); + assert(VDLoc != nullptr); + Env.setStorageLocation(*B, *VDLoc); + } + } + } + } + + void VisitImplicitCastExpr(const ImplicitCastExpr *S) { + const Expr *SubExpr = S->getSubExpr(); + assert(SubExpr != nullptr); + + switch (S->getCastKind()) { + case CK_IntegralToBoolean: { + // This cast creates a new, boolean value from the integral value. We + // model that with a fresh value in the environment, unless it's already a + // boolean. + if (auto *SubExprVal = + dyn_cast_or_null<BoolValue>(Env.getValue(*SubExpr))) + Env.setValue(*S, *SubExprVal); + else + // FIXME: If integer modeling is added, then update this code to create + // the boolean based on the integer model. + Env.setValue(*S, Env.makeAtomicBoolValue()); + break; + } + + case CK_LValueToRValue: { + // When an L-value is used as an R-value, it may result in sharing, so we + // need to unpack any nested `Top`s. + auto *SubExprVal = maybeUnpackLValueExpr(*SubExpr, Env); + if (SubExprVal == nullptr) + break; + + Env.setValue(*S, *SubExprVal); + break; + } + + case CK_IntegralCast: + // FIXME: This cast creates a new integral value from the + // subexpression. But, because we don't model integers, we don't + // distinguish between this new value and the underlying one. If integer + // modeling is added, then update this code to create a fresh location and + // value. + case CK_UncheckedDerivedToBase: + case CK_ConstructorConversion: + case CK_UserDefinedConversion: + // FIXME: Add tests that excercise CK_UncheckedDerivedToBase, + // CK_ConstructorConversion, and CK_UserDefinedConversion. + case CK_NoOp: { + // FIXME: Consider making `Environment::getStorageLocation` skip noop + // expressions (this and other similar expressions in the file) instead + // of assigning them storage locations. + propagateValueOrStorageLocation(*SubExpr, *S, Env); + break; + } + case CK_NullToPointer: { + auto &NullPointerVal = + Env.getOrCreateNullPointerValue(S->getType()->getPointeeType()); + Env.setValue(*S, NullPointerVal); + break; + } + case CK_NullToMemberPointer: + // FIXME: Implement pointers to members. For now, don't associate a value + // with this expression. + break; + case CK_FunctionToPointerDecay: { + StorageLocation *PointeeLoc = Env.getStorageLocation(*SubExpr); + if (PointeeLoc == nullptr) + break; + + Env.setValue(*S, Env.create<PointerValue>(*PointeeLoc)); + break; + } + case CK_BuiltinFnToFnPtr: + // Despite its name, the result type of `BuiltinFnToFnPtr` is a function, + // not a function pointer. In addition, builtin functions can only be + // called directly; it is not legal to take their address. We therefore + // don't need to create a value or storage location for them. + break; + default: + break; + } + } + + void VisitUnaryOperator(const UnaryOperator *S) { + const Expr *SubExpr = S->getSubExpr(); + assert(SubExpr != nullptr); + + switch (S->getOpcode()) { + case UO_Deref: { + const auto *SubExprVal = Env.get<PointerValue>(*SubExpr); + if (SubExprVal == nullptr) + break; + + Env.setStorageLocation(*S, SubExprVal->getPointeeLoc()); + break; + } + case UO_AddrOf: { + // FIXME: Model pointers to members. + if (S->getType()->isMemberPointerType()) + break; + + if (StorageLocation *PointeeLoc = Env.getStorageLocation(*SubExpr)) + Env.setValue(*S, Env.create<PointerValue>(*PointeeLoc)); + break; + } + case UO_LNot: { + auto *SubExprVal = dyn_cast_or_null<BoolValue>(Env.getValue(*SubExpr)); + if (SubExprVal == nullptr) + break; + + Env.setValue(*S, Env.makeNot(*SubExprVal)); + break; + } + case UO_PreInc: + case UO_PreDec: + // Propagate the storage location and clear out any value associated with + // it (to represent the fact that the value has definitely changed). + // To avoid generating unnecessary values, we leave it to the specific + // analysis to create a new value if desired. + propagateStorageLocation(*S->getSubExpr(), *S, Env); + if (StorageLocation *Loc = Env.getStorageLocation(*S->getSubExpr())) + Env.clearValue(*Loc); + break; + case UO_PostInc: + case UO_PostDec: + // Propagate the old value, then clear out any value associated with the + // storage location (to represent the fact that the value has definitely + // changed). See above for rationale. + propagateValue(*S->getSubExpr(), *S, Env); + if (StorageLocation *Loc = Env.getStorageLocation(*S->getSubExpr())) + Env.clearValue(*Loc); + break; + default: + break; + } + } + + void VisitCXXThisExpr(const CXXThisExpr *S) { + auto *ThisPointeeLoc = Env.getThisPointeeStorageLocation(); + if (ThisPointeeLoc == nullptr) + // Unions are not supported yet, and will not have a location for the + // `this` expression's pointee. + return; + + Env.setValue(*S, Env.create<PointerValue>(*ThisPointeeLoc)); + } + + void VisitCXXNewExpr(const CXXNewExpr *S) { + if (Value *Val = Env.createValue(S->getType())) + Env.setValue(*S, *Val); + } + + void VisitCXXDeleteExpr(const CXXDeleteExpr *S) { + // Empty method. + // We consciously don't do anything on deletes. Diagnosing double deletes + // (for example) should be done by a specific analysis, not by the + // framework. + } + + void VisitReturnStmt(const ReturnStmt *S) { + if (!Env.getDataflowAnalysisContext().getOptions().ContextSensitiveOpts) + return; + + auto *Ret = S->getRetValue(); + if (Ret == nullptr) + return; + + if (Ret->isPRValue()) { + if (Ret->getType()->isRecordType()) + return; + + auto *Val = Env.getValue(*Ret); + if (Val == nullptr) + return; + + // FIXME: Model NRVO. + Env.setReturnValue(Val); + } else { + auto *Loc = Env.getStorageLocation(*Ret); + if (Loc == nullptr) + return; + + // FIXME: Model NRVO. + Env.setReturnStorageLocation(Loc); + } + } + + void VisitMemberExpr(const MemberExpr *S) { + ValueDecl *Member = S->getMemberDecl(); + assert(Member != nullptr); + + // FIXME: Consider assigning pointer values to function member expressions. + if (Member->isFunctionOrFunctionTemplate()) + return; + + // FIXME: if/when we add support for modeling enums, use that support here. + if (isa<EnumConstantDecl>(Member)) + return; + + if (auto *D = dyn_cast<VarDecl>(Member)) { + if (D->hasGlobalStorage()) { + auto *VarDeclLoc = Env.getStorageLocation(*D); + if (VarDeclLoc == nullptr) + return; + + Env.setStorageLocation(*S, *VarDeclLoc); + return; + } + } + + RecordStorageLocation *BaseLoc = getBaseObjectLocation(*S, Env); + if (BaseLoc == nullptr) + return; + + auto *MemberLoc = BaseLoc->getChild(*Member); + if (MemberLoc == nullptr) + return; + Env.setStorageLocation(*S, *MemberLoc); + } + + void VisitCXXDefaultArgExpr(const CXXDefaultArgExpr *S) { + const Expr *ArgExpr = S->getExpr(); + assert(ArgExpr != nullptr); + propagateValueOrStorageLocation(*ArgExpr, *S, Env); + + if (S->isPRValue() && S->getType()->isRecordType()) { + auto &Loc = Env.getResultObjectLocation(*S); + Env.initializeFieldsWithValues(Loc); + } + } + + void VisitCXXDefaultInitExpr(const CXXDefaultInitExpr *S) { + const Expr *InitExpr = S->getExpr(); + assert(InitExpr != nullptr); + + // If this is a prvalue of record type, the handler for `*InitExpr` (if one + // exists) will initialize the result object; there is no value to propgate + // here. + if (S->getType()->isRecordType() && S->isPRValue()) + return; + + propagateValueOrStorageLocation(*InitExpr, *S, Env); + } + + void VisitCXXConstructExpr(const CXXConstructExpr *S) { + const CXXConstructorDecl *ConstructorDecl = S->getConstructor(); + assert(ConstructorDecl != nullptr); + + // `CXXConstructExpr` can have array type if default-initializing an array + // of records. We don't handle this specifically beyond potentially inlining + // the call. + if (!S->getType()->isRecordType()) { + transferInlineCall(S, ConstructorDecl); + return; + } + + RecordStorageLocation &Loc = Env.getResultObjectLocation(*S); + + if (ConstructorDecl->isCopyOrMoveConstructor()) { + // It is permissible for a copy/move constructor to have additional + // parameters as long as they have default arguments defined for them. + assert(S->getNumArgs() != 0); + + const Expr *Arg = S->getArg(0); + assert(Arg != nullptr); + + auto *ArgLoc = Env.get<RecordStorageLocation>(*Arg); + if (ArgLoc == nullptr) + return; + + // Even if the copy/move constructor call is elidable, we choose to copy + // the record in all cases (which isn't wrong, just potentially not + // optimal). + copyRecord(*ArgLoc, Loc, Env); + return; + } + + Env.initializeFieldsWithValues(Loc, S->getType()); + + transferInlineCall(S, ConstructorDecl); + } + + void VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *S) { + if (S->getOperator() == OO_Equal) { + assert(S->getNumArgs() == 2); + + const Expr *Arg0 = S->getArg(0); + assert(Arg0 != nullptr); + + const Expr *Arg1 = S->getArg(1); + assert(Arg1 != nullptr); + + // Evaluate only copy and move assignment operators. + const auto *Method = + dyn_cast_or_null<CXXMethodDecl>(S->getDirectCallee()); + if (!Method) + return; + if (!Method->isCopyAssignmentOperator() && + !Method->isMoveAssignmentOperator()) + return; + + RecordStorageLocation *LocSrc = nullptr; + if (Arg1->isPRValue()) { + LocSrc = &Env.getResultObjectLocation(*Arg1); + } else { + LocSrc = Env.get<RecordStorageLocation>(*Arg1); + } + auto *LocDst = Env.get<RecordStorageLocation>(*Arg0); + + if (LocSrc == nullptr || LocDst == nullptr) + return; + + copyRecord(*LocSrc, *LocDst, Env); + + // The assignment operator can have an arbitrary return type. We model the + // return value only if the return type is the same as or a base class of + // the destination type. + if (S->getType().getCanonicalType().getUnqualifiedType() != + LocDst->getType().getCanonicalType().getUnqualifiedType()) { + auto ReturnDecl = S->getType()->getAsCXXRecordDecl(); + auto DstDecl = LocDst->getType()->getAsCXXRecordDecl(); + if (ReturnDecl == nullptr || DstDecl == nullptr) + return; + if (!DstDecl->isDerivedFrom(ReturnDecl)) + return; + } + + if (S->isGLValue()) + Env.setStorageLocation(*S, *LocDst); + else + copyRecord(*LocDst, Env.getResultObjectLocation(*S), Env); + + return; + } + + // `CXXOperatorCallExpr` can be a prvalue. Call `VisitCallExpr`() to + // initialize the prvalue's fields with values. + VisitCallExpr(S); + } + + void VisitCXXRewrittenBinaryOperator(const CXXRewrittenBinaryOperator *RBO) { + propagateValue(*RBO->getSemanticForm(), *RBO, Env); + } + + void VisitCallExpr(const CallExpr *S) { + // Of clang's builtins, only `__builtin_expect` is handled explicitly, since + // others (like trap, debugtrap, and unreachable) are handled by CFG + // construction. + if (S->isCallToStdMove()) { + assert(S->getNumArgs() == 1); + + const Expr *Arg = S->getArg(0); + assert(Arg != nullptr); + + auto *ArgLoc = Env.getStorageLocation(*Arg); + if (ArgLoc == nullptr) + return; + + Env.setStorageLocation(*S, *ArgLoc); + } else if (S->getDirectCallee() != nullptr && + S->getDirectCallee()->getBuiltinID() == + Builtin::BI__builtin_expect) { + assert(S->getNumArgs() > 0); + assert(S->getArg(0) != nullptr); + auto *ArgVal = Env.getValue(*S->getArg(0)); + if (ArgVal == nullptr) + return; + Env.setValue(*S, *ArgVal); + } else if (const FunctionDecl *F = S->getDirectCallee()) { + transferInlineCall(S, F); + + // If this call produces a prvalue of record type, initialize its fields + // with values. + if (S->getType()->isRecordType() && S->isPRValue()) { + RecordStorageLocation &Loc = Env.getResultObjectLocation(*S); + Env.initializeFieldsWithValues(Loc); + } + } + } + + void VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *S) { + const Expr *SubExpr = S->getSubExpr(); + assert(SubExpr != nullptr); + + StorageLocation &Loc = Env.createStorageLocation(*S); + Env.setStorageLocation(*S, Loc); + + if (SubExpr->getType()->isRecordType()) + // Nothing else left to do -- we initialized the record when transferring + // `SubExpr`. + return; + + if (Value *SubExprVal = Env.getValue(*SubExpr)) + Env.setValue(Loc, *SubExprVal); + } + + void VisitCXXBindTemporaryExpr(const CXXBindTemporaryExpr *S) { + const Expr *SubExpr = S->getSubExpr(); + assert(SubExpr != nullptr); + + propagateValue(*SubExpr, *S, Env); + } + + void VisitCXXStaticCastExpr(const CXXStaticCastExpr *S) { + if (S->getCastKind() == CK_NoOp) { + const Expr *SubExpr = S->getSubExpr(); + assert(SubExpr != nullptr); + + propagateValueOrStorageLocation(*SubExpr, *S, Env); + } + } + + void VisitConditionalOperator(const ConditionalOperator *S) { + const Environment *TrueEnv = StmtToEnv.getEnvironment(*S->getTrueExpr()); + const Environment *FalseEnv = StmtToEnv.getEnvironment(*S->getFalseExpr()); + + if (TrueEnv == nullptr || FalseEnv == nullptr) { + // If the true or false branch is dead, we may not have an environment for + // it. We could handle this specifically by forwarding the value or + // location of the live branch, but this case is rare enough that this + // probably isn't worth the additional complexity. + return; + } + + if (S->isGLValue()) { + StorageLocation *TrueLoc = TrueEnv->getStorageLocation(*S->getTrueExpr()); + StorageLocation *FalseLoc = + FalseEnv->getStorageLocation(*S->getFalseExpr()); + if (TrueLoc == FalseLoc && TrueLoc != nullptr) + Env.setStorageLocation(*S, *TrueLoc); + } else if (!S->getType()->isRecordType()) { + // The conditional operator can evaluate to either of the values of the + // two branches. To model this, join these two values together to yield + // the result of the conditional operator. + // Note: Most joins happen in `computeBlockInputState()`, but this case is + // different: + // - `computeBlockInputState()` (which in turn calls `Environment::join()` + // joins values associated with the _same_ expression or storage + // location, then associates the joined value with that expression or + // storage location. This join has nothing to do with transfer -- + // instead, it joins together the results of performing transfer on two + // different blocks. + // - Here, we join values associated with _different_ expressions (the + // true and false branch), then associate the joined value with a third + // expression (the conditional operator itself). This join is what it + // means to perform transfer on the conditional operator. + if (Value *Val = Environment::joinValues( + S->getType(), TrueEnv->getValue(*S->getTrueExpr()), *TrueEnv, + FalseEnv->getValue(*S->getFalseExpr()), *FalseEnv, Env, Model)) + Env.setValue(*S, *Val); + } + } + + void VisitInitListExpr(const InitListExpr *S) { + QualType Type = S->getType(); + + if (!Type->isRecordType()) { + // Until array initialization is implemented, we skip arrays and don't + // need to care about cases where `getNumInits() > 1`. + if (!Type->isArrayType() && S->getNumInits() == 1) + propagateValueOrStorageLocation(*S->getInit(0), *S, Env); + return; + } + + // If the initializer list is transparent, there's nothing to do. + if (S->isSemanticForm() && S->isTransparent()) + return; + + RecordStorageLocation &Loc = Env.getResultObjectLocation(*S); + + // Initialization of base classes and fields of record type happens when we + // visit the nested `CXXConstructExpr` or `InitListExpr` for that base class + // or field. We therefore only need to deal with fields of non-record type + // here. + + RecordInitListHelper InitListHelper(S); + + for (auto [Field, Init] : InitListHelper.field_inits()) { + if (Field->getType()->isRecordType()) + continue; + if (Field->getType()->isReferenceType()) { + assert(Field->getType().getCanonicalType()->getPointeeType() == + Init->getType().getCanonicalType()); + Loc.setChild(*Field, &Env.createObject(Field->getType(), Init)); + continue; + } + assert(Field->getType().getCanonicalType().getUnqualifiedType() == + Init->getType().getCanonicalType().getUnqualifiedType()); + StorageLocation *FieldLoc = Loc.getChild(*Field); + // Locations for non-reference fields must always be non-null. + assert(FieldLoc != nullptr); + Value *Val = Env.getValue(*Init); + if (Val == nullptr && isa<ImplicitValueInitExpr>(Init) && + Init->getType()->isPointerType()) + Val = + &Env.getOrCreateNullPointerValue(Init->getType()->getPointeeType()); + if (Val == nullptr) + Val = Env.createValue(Field->getType()); + if (Val != nullptr) + Env.setValue(*FieldLoc, *Val); + } + + for (const auto &[FieldName, FieldLoc] : Loc.synthetic_fields()) { + QualType FieldType = FieldLoc->getType(); + if (FieldType->isRecordType()) { + Env.initializeFieldsWithValues(*cast<RecordStorageLocation>(FieldLoc)); + } else { + if (Value *Val = Env.createValue(FieldType)) + Env.setValue(*FieldLoc, *Val); + } + } + + // FIXME: Implement array initialization. + } + + void VisitCXXBoolLiteralExpr(const CXXBoolLiteralExpr *S) { + Env.setValue(*S, Env.getBoolLiteralValue(S->getValue())); + } + + void VisitIntegerLiteral(const IntegerLiteral *S) { + Env.setValue(*S, Env.getIntLiteralValue(S->getValue())); + } + + void VisitParenExpr(const ParenExpr *S) { + // The CFG does not contain `ParenExpr` as top-level statements in basic + // blocks, however manual traversal to sub-expressions may encounter them. + // Redirect to the sub-expression. + auto *SubExpr = S->getSubExpr(); + assert(SubExpr != nullptr); + Visit(SubExpr); + } + + void VisitExprWithCleanups(const ExprWithCleanups *S) { + // The CFG does not contain `ExprWithCleanups` as top-level statements in + // basic blocks, however manual traversal to sub-expressions may encounter + // them. Redirect to the sub-expression. + auto *SubExpr = S->getSubExpr(); + assert(SubExpr != nullptr); + Visit(SubExpr); + } + +private: + /// Returns the value for the sub-expression `SubExpr` of a logic operator. + BoolValue &getLogicOperatorSubExprValue(const Expr &SubExpr) { + // `SubExpr` and its parent logic operator might be part of different basic + // blocks. We try to access the value that is assigned to `SubExpr` in the + // corresponding environment. + if (const Environment *SubExprEnv = StmtToEnv.getEnvironment(SubExpr)) + if (auto *Val = + dyn_cast_or_null<BoolValue>(SubExprEnv->getValue(SubExpr))) + return *Val; + + // The sub-expression may lie within a basic block that isn't reachable, + // even if we need it to evaluate the current (reachable) expression + // (see https://discourse.llvm.org/t/70775). In this case, visit `SubExpr` + // within the current environment and then try to get the value that gets + // assigned to it. + if (Env.getValue(SubExpr) == nullptr) + Visit(&SubExpr); + if (auto *Val = dyn_cast_or_null<BoolValue>(Env.getValue(SubExpr))) + return *Val; + + // If the value of `SubExpr` is still unknown, we create a fresh symbolic + // boolean value for it. + return Env.makeAtomicBoolValue(); + } + + // If context sensitivity is enabled, try to analyze the body of the callee + // `F` of `S`. The type `E` must be either `CallExpr` or `CXXConstructExpr`. + template <typename E> + void transferInlineCall(const E *S, const FunctionDecl *F) { + const auto &Options = Env.getDataflowAnalysisContext().getOptions(); + if (!(Options.ContextSensitiveOpts && + Env.canDescend(Options.ContextSensitiveOpts->Depth, F))) + return; + + const AdornedCFG *ACFG = Env.getDataflowAnalysisContext().getAdornedCFG(F); + if (!ACFG) + return; + + // FIXME: We don't support context-sensitive analysis of recursion, so + // we should return early here if `F` is the same as the `FunctionDecl` + // holding `S` itself. + + auto ExitBlock = ACFG->getCFG().getExit().getBlockID(); + + auto CalleeEnv = Env.pushCall(S); + + // FIXME: Use the same analysis as the caller for the callee. Note, + // though, that doing so would require support for changing the analysis's + // ASTContext. + auto Analysis = NoopAnalysis(ACFG->getDecl().getASTContext(), + DataflowAnalysisOptions{Options}); + + auto BlockToOutputState = + dataflow::runDataflowAnalysis(*ACFG, Analysis, CalleeEnv); + assert(BlockToOutputState); + assert(ExitBlock < BlockToOutputState->size()); + + auto &ExitState = (*BlockToOutputState)[ExitBlock]; + assert(ExitState); + + Env.popCall(S, ExitState->Env); + } + + const StmtToEnvMap &StmtToEnv; + Environment &Env; + Environment::ValueModel &Model; +}; + +} // namespace + +void transfer(const StmtToEnvMap &StmtToEnv, const Stmt &S, Environment &Env, + Environment::ValueModel &Model) { + TransferVisitor(StmtToEnv, Env, Model).Visit(&S); +} + +} // namespace dataflow +} // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp new file mode 100644 index 000000000000..200682faafd6 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -0,0 +1,571 @@ +//===- TypeErasedDataflowAnalysis.cpp -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines type-erased base types and functions for building dataflow +// analyses that run over Control-Flow Graphs (CFGs). +// +//===----------------------------------------------------------------------===// + +#include <optional> +#include <system_error> +#include <utility> +#include <vector> + +#include "clang/AST/ASTDumper.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/OperationKinds.h" +#include "clang/AST/StmtCXX.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/Analysis/Analyses/PostOrderCFGView.h" +#include "clang/Analysis/CFG.h" +#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" +#include "clang/Analysis/FlowSensitive/DataflowLattice.h" +#include "clang/Analysis/FlowSensitive/DataflowWorklist.h" +#include "clang/Analysis/FlowSensitive/RecordOps.h" +#include "clang/Analysis/FlowSensitive/Transfer.h" +#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" +#include "clang/Analysis/FlowSensitive/Value.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" + +#define DEBUG_TYPE "clang-dataflow" + +namespace clang { +namespace dataflow { + +/// Returns the index of `Block` in the successors of `Pred`. +static int blockIndexInPredecessor(const CFGBlock &Pred, + const CFGBlock &Block) { + auto BlockPos = llvm::find_if( + Pred.succs(), [&Block](const CFGBlock::AdjacentBlock &Succ) { + return Succ && Succ->getBlockID() == Block.getBlockID(); + }); + return BlockPos - Pred.succ_begin(); +} + +// A "backedge" node is a block introduced in the CFG exclusively to indicate a +// loop backedge. They are exactly identified by the presence of a non-null +// pointer to the entry block of the loop condition. Note that this is not +// necessarily the block with the loop statement as terminator, because +// short-circuit operators will result in multiple blocks encoding the loop +// condition, only one of which will contain the loop statement as terminator. +static bool isBackedgeNode(const CFGBlock &B) { + return B.getLoopTarget() != nullptr; +} + +namespace { + +/// Extracts the terminator's condition expression. +class TerminatorVisitor + : public ConstStmtVisitor<TerminatorVisitor, const Expr *> { +public: + TerminatorVisitor() = default; + const Expr *VisitIfStmt(const IfStmt *S) { return S->getCond(); } + const Expr *VisitWhileStmt(const WhileStmt *S) { return S->getCond(); } + const Expr *VisitDoStmt(const DoStmt *S) { return S->getCond(); } + const Expr *VisitForStmt(const ForStmt *S) { return S->getCond(); } + const Expr *VisitCXXForRangeStmt(const CXXForRangeStmt *) { + // Don't do anything special for CXXForRangeStmt, because the condition + // (being implicitly generated) isn't visible from the loop body. + return nullptr; + } + const Expr *VisitBinaryOperator(const BinaryOperator *S) { + assert(S->getOpcode() == BO_LAnd || S->getOpcode() == BO_LOr); + return S->getLHS(); + } + const Expr *VisitConditionalOperator(const ConditionalOperator *S) { + return S->getCond(); + } +}; + +/// Holds data structures required for running dataflow analysis. +struct AnalysisContext { + AnalysisContext(const AdornedCFG &ACFG, TypeErasedDataflowAnalysis &Analysis, + const Environment &InitEnv, + llvm::ArrayRef<std::optional<TypeErasedDataflowAnalysisState>> + BlockStates) + : ACFG(ACFG), Analysis(Analysis), InitEnv(InitEnv), + Log(*InitEnv.getDataflowAnalysisContext().getOptions().Log), + BlockStates(BlockStates) { + Log.beginAnalysis(ACFG, Analysis); + } + ~AnalysisContext() { Log.endAnalysis(); } + + /// Contains the CFG being analyzed. + const AdornedCFG &ACFG; + /// The analysis to be run. + TypeErasedDataflowAnalysis &Analysis; + /// Initial state to start the analysis. + const Environment &InitEnv; + Logger &Log; + /// Stores the state of a CFG block if it has been evaluated by the analysis. + /// The indices correspond to the block IDs. + llvm::ArrayRef<std::optional<TypeErasedDataflowAnalysisState>> BlockStates; +}; + +class PrettyStackTraceAnalysis : public llvm::PrettyStackTraceEntry { +public: + PrettyStackTraceAnalysis(const AdornedCFG &ACFG, const char *Message) + : ACFG(ACFG), Message(Message) {} + + void print(raw_ostream &OS) const override { + OS << Message << "\n"; + OS << "Decl:\n"; + ACFG.getDecl().dump(OS); + OS << "CFG:\n"; + ACFG.getCFG().print(OS, LangOptions(), false); + } + +private: + const AdornedCFG &ACFG; + const char *Message; +}; + +class PrettyStackTraceCFGElement : public llvm::PrettyStackTraceEntry { +public: + PrettyStackTraceCFGElement(const CFGElement &Element, int BlockIdx, + int ElementIdx, const char *Message) + : Element(Element), BlockIdx(BlockIdx), ElementIdx(ElementIdx), + Message(Message) {} + + void print(raw_ostream &OS) const override { + OS << Message << ": Element [B" << BlockIdx << "." << ElementIdx << "]\n"; + if (auto Stmt = Element.getAs<CFGStmt>()) { + OS << "Stmt:\n"; + ASTDumper Dumper(OS, false); + Dumper.Visit(Stmt->getStmt()); + } + } + +private: + const CFGElement ∈ + int BlockIdx; + int ElementIdx; + const char *Message; +}; + +// Builds a joined TypeErasedDataflowAnalysisState from 0 or more sources, +// each of which may be owned (built as part of the join) or external (a +// reference to an Environment that will outlive the builder). +// Avoids unneccesary copies of the environment. +class JoinedStateBuilder { + AnalysisContext &AC; + Environment::ExprJoinBehavior JoinBehavior; + std::vector<const TypeErasedDataflowAnalysisState *> All; + std::deque<TypeErasedDataflowAnalysisState> Owned; + + TypeErasedDataflowAnalysisState + join(const TypeErasedDataflowAnalysisState &L, + const TypeErasedDataflowAnalysisState &R) { + return {AC.Analysis.joinTypeErased(L.Lattice, R.Lattice), + Environment::join(L.Env, R.Env, AC.Analysis, JoinBehavior)}; + } + +public: + JoinedStateBuilder(AnalysisContext &AC, + Environment::ExprJoinBehavior JoinBehavior) + : AC(AC), JoinBehavior(JoinBehavior) {} + + void addOwned(TypeErasedDataflowAnalysisState State) { + Owned.push_back(std::move(State)); + All.push_back(&Owned.back()); + } + void addUnowned(const TypeErasedDataflowAnalysisState &State) { + All.push_back(&State); + } + TypeErasedDataflowAnalysisState take() && { + if (All.empty()) + // FIXME: Consider passing `Block` to Analysis.typeErasedInitialElement + // to enable building analyses like computation of dominators that + // initialize the state of each basic block differently. + return {AC.Analysis.typeErasedInitialElement(), AC.InitEnv.fork()}; + if (All.size() == 1) + // Join the environment with itself so that we discard expression state if + // desired. + // FIXME: We could consider writing special-case code for this that only + // does the discarding, but it's not clear if this is worth it. + return {All[0]->Lattice, Environment::join(All[0]->Env, All[0]->Env, + AC.Analysis, JoinBehavior)}; + + auto Result = join(*All[0], *All[1]); + for (unsigned I = 2; I < All.size(); ++I) + Result = join(Result, *All[I]); + return Result; + } +}; +} // namespace + +static const Expr *getTerminatorCondition(const Stmt *TerminatorStmt) { + return TerminatorStmt == nullptr ? nullptr + : TerminatorVisitor().Visit(TerminatorStmt); +} + +/// Computes the input state for a given basic block by joining the output +/// states of its predecessors. +/// +/// Requirements: +/// +/// All predecessors of `Block` except those with loop back edges must have +/// already been transferred. States in `AC.BlockStates` that are set to +/// `std::nullopt` represent basic blocks that are not evaluated yet. +static TypeErasedDataflowAnalysisState +computeBlockInputState(const CFGBlock &Block, AnalysisContext &AC) { + std::vector<const CFGBlock *> Preds(Block.pred_begin(), Block.pred_end()); + if (Block.getTerminator().isTemporaryDtorsBranch()) { + // This handles a special case where the code that produced the CFG includes + // a conditional operator with a branch that constructs a temporary and + // calls a destructor annotated as noreturn. The CFG models this as follows: + // + // B1 (contains the condition of the conditional operator) - succs: B2, B3 + // B2 (contains code that does not call a noreturn destructor) - succs: B4 + // B3 (contains code that calls a noreturn destructor) - succs: B4 + // B4 (has temporary destructor terminator) - succs: B5, B6 + // B5 (noreturn block that is associated with the noreturn destructor call) + // B6 (contains code that follows the conditional operator statement) + // + // The first successor (B5 above) of a basic block with a temporary + // destructor terminator (B4 above) is the block that evaluates the + // destructor. If that block has a noreturn element then the predecessor + // block that constructed the temporary object (B3 above) is effectively a + // noreturn block and its state should not be used as input for the state + // of the block that has a temporary destructor terminator (B4 above). This + // holds regardless of which branch of the ternary operator calls the + // noreturn destructor. However, it doesn't cases where a nested ternary + // operator includes a branch that contains a noreturn destructor call. + // + // See `NoreturnDestructorTest` for concrete examples. + if (Block.succ_begin()->getReachableBlock() != nullptr && + Block.succ_begin()->getReachableBlock()->hasNoReturnElement()) { + auto &StmtToBlock = AC.ACFG.getStmtToBlock(); + auto StmtBlock = StmtToBlock.find(Block.getTerminatorStmt()); + assert(StmtBlock != StmtToBlock.end()); + llvm::erase(Preds, StmtBlock->getSecond()); + } + } + + // If any of the predecessor blocks contains an expression consumed in a + // different block, we need to keep expression state. + // Note that in this case, we keep expression state for all predecessors, + // rather than only those predecessors that actually contain an expression + // consumed in a different block. While this is potentially suboptimal, it's + // actually likely, if we have control flow within a full expression, that + // all predecessors have expression state consumed in a different block. + Environment::ExprJoinBehavior JoinBehavior = Environment::DiscardExprState; + for (const CFGBlock *Pred : Preds) { + if (Pred && AC.ACFG.containsExprConsumedInDifferentBlock(*Pred)) { + JoinBehavior = Environment::KeepExprState; + break; + } + } + + JoinedStateBuilder Builder(AC, JoinBehavior); + for (const CFGBlock *Pred : Preds) { + // Skip if the `Block` is unreachable or control flow cannot get past it. + if (!Pred || Pred->hasNoReturnElement()) + continue; + + // Skip if `Pred` was not evaluated yet. This could happen if `Pred` has a + // loop back edge to `Block`. + const std::optional<TypeErasedDataflowAnalysisState> &MaybePredState = + AC.BlockStates[Pred->getBlockID()]; + if (!MaybePredState) + continue; + + const TypeErasedDataflowAnalysisState &PredState = *MaybePredState; + const Expr *Cond = getTerminatorCondition(Pred->getTerminatorStmt()); + if (Cond == nullptr) { + Builder.addUnowned(PredState); + continue; + } + + bool BranchVal = blockIndexInPredecessor(*Pred, Block) == 0; + + // `transferBranch` may need to mutate the environment to describe the + // dynamic effect of the terminator for a given branch. Copy now. + TypeErasedDataflowAnalysisState Copy = MaybePredState->fork(); + if (AC.Analysis.builtinOptions()) { + auto *CondVal = Copy.Env.get<BoolValue>(*Cond); + // In transferCFGBlock(), we ensure that we always have a `Value` + // for the terminator condition, so assert this. We consciously + // assert ourselves instead of asserting via `cast()` so that we get + // a more meaningful line number if the assertion fails. + assert(CondVal != nullptr); + BoolValue *AssertedVal = + BranchVal ? CondVal : &Copy.Env.makeNot(*CondVal); + Copy.Env.assume(AssertedVal->formula()); + } + AC.Analysis.transferBranchTypeErased(BranchVal, Cond, Copy.Lattice, + Copy.Env); + Builder.addOwned(std::move(Copy)); + } + return std::move(Builder).take(); +} + +/// Built-in transfer function for `CFGStmt`. +static void +builtinTransferStatement(unsigned CurBlockID, const CFGStmt &Elt, + TypeErasedDataflowAnalysisState &InputState, + AnalysisContext &AC) { + const Stmt *S = Elt.getStmt(); + assert(S != nullptr); + transfer(StmtToEnvMap(AC.ACFG, AC.BlockStates, CurBlockID, InputState), *S, + InputState.Env, AC.Analysis); +} + +/// Built-in transfer function for `CFGInitializer`. +static void +builtinTransferInitializer(const CFGInitializer &Elt, + TypeErasedDataflowAnalysisState &InputState) { + const CXXCtorInitializer *Init = Elt.getInitializer(); + assert(Init != nullptr); + + auto &Env = InputState.Env; + auto &ThisLoc = *Env.getThisPointeeStorageLocation(); + + if (!Init->isAnyMemberInitializer()) + // FIXME: Handle base initialization + return; + + auto *InitExpr = Init->getInit(); + assert(InitExpr != nullptr); + + const FieldDecl *Member = nullptr; + RecordStorageLocation *ParentLoc = &ThisLoc; + StorageLocation *MemberLoc = nullptr; + if (Init->isMemberInitializer()) { + Member = Init->getMember(); + MemberLoc = ThisLoc.getChild(*Member); + } else { + IndirectFieldDecl *IndirectField = Init->getIndirectMember(); + assert(IndirectField != nullptr); + MemberLoc = &ThisLoc; + for (const auto *I : IndirectField->chain()) { + Member = cast<FieldDecl>(I); + ParentLoc = cast<RecordStorageLocation>(MemberLoc); + MemberLoc = ParentLoc->getChild(*Member); + } + } + assert(Member != nullptr); + + // FIXME: Instead of these case distinctions, we would ideally want to be able + // to simply use `Environment::createObject()` here, the same way that we do + // this in `TransferVisitor::VisitInitListExpr()`. However, this would require + // us to be able to build a list of fields that we then use to initialize an + // `RecordStorageLocation` -- and the problem is that, when we get here, + // the `RecordStorageLocation` already exists. We should explore if there's + // anything that we can do to change this. + if (Member->getType()->isReferenceType()) { + auto *InitExprLoc = Env.getStorageLocation(*InitExpr); + if (InitExprLoc == nullptr) + return; + + ParentLoc->setChild(*Member, InitExprLoc); + // Record-type initializers construct themselves directly into the result + // object, so there is no need to handle them here. + } else if (!Member->getType()->isRecordType()) { + assert(MemberLoc != nullptr); + if (auto *InitExprVal = Env.getValue(*InitExpr)) + Env.setValue(*MemberLoc, *InitExprVal); + } +} + +static void builtinTransfer(unsigned CurBlockID, const CFGElement &Elt, + TypeErasedDataflowAnalysisState &State, + AnalysisContext &AC) { + switch (Elt.getKind()) { + case CFGElement::Statement: + builtinTransferStatement(CurBlockID, Elt.castAs<CFGStmt>(), State, AC); + break; + case CFGElement::Initializer: + builtinTransferInitializer(Elt.castAs<CFGInitializer>(), State); + break; + case CFGElement::LifetimeEnds: + // Removing declarations when their lifetime ends serves two purposes: + // - Eliminate unnecessary clutter from `Environment::DeclToLoc` + // - Allow us to assert that, when joining two `Environment`s, the two + // `DeclToLoc` maps never contain entries that map the same declaration to + // different storage locations. + if (const ValueDecl *VD = Elt.castAs<CFGLifetimeEnds>().getVarDecl()) + State.Env.removeDecl(*VD); + break; + default: + // FIXME: Evaluate other kinds of `CFGElement` + break; + } +} + +/// Transfers `State` by evaluating each element in the `Block` based on the +/// `AC.Analysis` specified. +/// +/// Built-in transfer functions (if the option for `ApplyBuiltinTransfer` is set +/// by the analysis) will be applied to the element before evaluation by the +/// user-specified analysis. +/// `PostVisitCFG` (if provided) will be applied to the element after evaluation +/// by the user-specified analysis. +static TypeErasedDataflowAnalysisState +transferCFGBlock(const CFGBlock &Block, AnalysisContext &AC, + const CFGEltCallbacksTypeErased &PostAnalysisCallbacks = {}) { + AC.Log.enterBlock(Block, PostAnalysisCallbacks.Before != nullptr || + PostAnalysisCallbacks.After != nullptr); + auto State = computeBlockInputState(Block, AC); + AC.Log.recordState(State); + int ElementIdx = 1; + for (const auto &Element : Block) { + PrettyStackTraceCFGElement CrashInfo(Element, Block.getBlockID(), + ElementIdx++, "transferCFGBlock"); + + AC.Log.enterElement(Element); + + if (PostAnalysisCallbacks.Before) { + PostAnalysisCallbacks.Before(Element, State); + } + + // Built-in analysis + if (AC.Analysis.builtinOptions()) { + builtinTransfer(Block.getBlockID(), Element, State, AC); + } + + // User-provided analysis + AC.Analysis.transferTypeErased(Element, State.Lattice, State.Env); + + if (PostAnalysisCallbacks.After) { + PostAnalysisCallbacks.After(Element, State); + } + + AC.Log.recordState(State); + } + + // If we have a terminator, evaluate its condition. + // This `Expr` may not appear as a `CFGElement` anywhere else, and it's + // important that we evaluate it here (rather than while processing the + // terminator) so that we put the corresponding value in the right + // environment. + if (const Expr *TerminatorCond = + dyn_cast_or_null<Expr>(Block.getTerminatorCondition())) { + if (State.Env.getValue(*TerminatorCond) == nullptr) + // FIXME: This only runs the builtin transfer, not the analysis-specific + // transfer. Fixing this isn't trivial, as the analysis-specific transfer + // takes a `CFGElement` as input, but some expressions only show up as a + // terminator condition, but not as a `CFGElement`. The condition of an if + // statement is one such example. + transfer(StmtToEnvMap(AC.ACFG, AC.BlockStates, Block.getBlockID(), State), + *TerminatorCond, State.Env, AC.Analysis); + + // If the transfer function didn't produce a value, create an atom so that + // we have *some* value for the condition expression. This ensures that + // when we extend the flow condition, it actually changes. + if (State.Env.getValue(*TerminatorCond) == nullptr) + State.Env.setValue(*TerminatorCond, State.Env.makeAtomicBoolValue()); + AC.Log.recordState(State); + } + + return State; +} + +llvm::Expected<std::vector<std::optional<TypeErasedDataflowAnalysisState>>> +runTypeErasedDataflowAnalysis( + const AdornedCFG &ACFG, TypeErasedDataflowAnalysis &Analysis, + const Environment &InitEnv, + const CFGEltCallbacksTypeErased &PostAnalysisCallbacks, + std::int32_t MaxBlockVisits) { + PrettyStackTraceAnalysis CrashInfo(ACFG, "runTypeErasedDataflowAnalysis"); + + std::optional<Environment> MaybeStartingEnv; + if (InitEnv.callStackSize() == 0) { + MaybeStartingEnv = InitEnv.fork(); + MaybeStartingEnv->initialize(); + } + const Environment &StartingEnv = + MaybeStartingEnv ? *MaybeStartingEnv : InitEnv; + + const clang::CFG &CFG = ACFG.getCFG(); + PostOrderCFGView POV(&CFG); + ForwardDataflowWorklist Worklist(CFG, &POV); + + std::vector<std::optional<TypeErasedDataflowAnalysisState>> BlockStates( + CFG.size()); + + // The entry basic block doesn't contain statements so it can be skipped. + const CFGBlock &Entry = CFG.getEntry(); + BlockStates[Entry.getBlockID()] = {Analysis.typeErasedInitialElement(), + StartingEnv.fork()}; + Worklist.enqueueSuccessors(&Entry); + + AnalysisContext AC(ACFG, Analysis, StartingEnv, BlockStates); + std::int32_t BlockVisits = 0; + while (const CFGBlock *Block = Worklist.dequeue()) { + LLVM_DEBUG(llvm::dbgs() + << "Processing Block " << Block->getBlockID() << "\n"); + if (++BlockVisits > MaxBlockVisits) { + return llvm::createStringError(std::errc::timed_out, + "maximum number of blocks processed"); + } + + const std::optional<TypeErasedDataflowAnalysisState> &OldBlockState = + BlockStates[Block->getBlockID()]; + TypeErasedDataflowAnalysisState NewBlockState = + transferCFGBlock(*Block, AC); + LLVM_DEBUG({ + llvm::errs() << "New Env:\n"; + NewBlockState.Env.dump(); + }); + + if (OldBlockState) { + LLVM_DEBUG({ + llvm::errs() << "Old Env:\n"; + OldBlockState->Env.dump(); + }); + if (isBackedgeNode(*Block)) { + LatticeJoinEffect Effect1 = Analysis.widenTypeErased( + NewBlockState.Lattice, OldBlockState->Lattice); + LatticeJoinEffect Effect2 = + NewBlockState.Env.widen(OldBlockState->Env, Analysis); + if (Effect1 == LatticeJoinEffect::Unchanged && + Effect2 == LatticeJoinEffect::Unchanged) { + // The state of `Block` didn't change from widening so there's no need + // to revisit its successors. + AC.Log.blockConverged(); + continue; + } + } else if (Analysis.isEqualTypeErased(OldBlockState->Lattice, + NewBlockState.Lattice) && + OldBlockState->Env.equivalentTo(NewBlockState.Env, Analysis)) { + // The state of `Block` didn't change after transfer so there's no need + // to revisit its successors. + AC.Log.blockConverged(); + continue; + } + } + + BlockStates[Block->getBlockID()] = std::move(NewBlockState); + + // Do not add unreachable successor blocks to `Worklist`. + if (Block->hasNoReturnElement()) + continue; + + Worklist.enqueueSuccessors(Block); + } + // FIXME: Consider evaluating unreachable basic blocks (those that have a + // state set to `std::nullopt` at this point) to also analyze dead code. + + if (PostAnalysisCallbacks.Before || PostAnalysisCallbacks.After) { + for (const CFGBlock *Block : ACFG.getCFG()) { + // Skip blocks that were not evaluated. + if (!BlockStates[Block->getBlockID()]) + continue; + transferCFGBlock(*Block, AC, PostAnalysisCallbacks); + } + } + + return std::move(BlockStates); +} + +} // namespace dataflow +} // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Value.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Value.cpp new file mode 100644 index 000000000000..d70e5a82ea23 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Value.cpp @@ -0,0 +1,60 @@ +//===-- Value.cpp -----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines support functions for the `Value` type. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/Value.h" +#include "clang/Analysis/FlowSensitive/DebugSupport.h" +#include "llvm/Support/Casting.h" + +namespace clang { +namespace dataflow { + +static bool areEquivalentIndirectionValues(const Value &Val1, + const Value &Val2) { + if (auto *IndVal1 = dyn_cast<PointerValue>(&Val1)) { + auto *IndVal2 = cast<PointerValue>(&Val2); + return &IndVal1->getPointeeLoc() == &IndVal2->getPointeeLoc(); + } + return false; +} + +bool areEquivalentValues(const Value &Val1, const Value &Val2) { + if (&Val1 == &Val2) + return true; + if (Val1.getKind() != Val2.getKind()) + return false; + // If values are distinct and have properties, we don't consider them equal, + // leaving equality up to the user model. + if (!Val1.properties().empty() || !Val2.properties().empty()) + return false; + if (isa<TopBoolValue>(&Val1)) + return true; + return areEquivalentIndirectionValues(Val1, Val2); +} + +raw_ostream &operator<<(raw_ostream &OS, const Value &Val) { + switch (Val.getKind()) { + case Value::Kind::Integer: + return OS << "Integer(@" << &Val << ")"; + case Value::Kind::Pointer: + return OS << "Pointer(" << &cast<PointerValue>(Val).getPointeeLoc() << ")"; + case Value::Kind::TopBool: + return OS << "TopBool(" << cast<TopBoolValue>(Val).getAtom() << ")"; + case Value::Kind::AtomicBool: + return OS << "AtomicBool(" << cast<AtomicBoolValue>(Val).getAtom() << ")"; + case Value::Kind::FormulaBool: + return OS << "FormulaBool(" << cast<FormulaBoolValue>(Val).formula() << ")"; + } + llvm_unreachable("Unknown clang::dataflow::Value::Kind enum"); +} + +} // namespace dataflow +} // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp new file mode 100644 index 000000000000..a39f0e0b29ad --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp @@ -0,0 +1,418 @@ +//===- WatchedLiteralsSolver.cpp --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a SAT solver implementation that can be used by dataflow +// analyses. +// +//===----------------------------------------------------------------------===// + +#include <cassert> +#include <vector> + +#include "clang/Analysis/FlowSensitive/CNFFormula.h" +#include "clang/Analysis/FlowSensitive/Formula.h" +#include "clang/Analysis/FlowSensitive/Solver.h" +#include "clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" + + +namespace clang { +namespace dataflow { + +namespace { + +class WatchedLiteralsSolverImpl { + /// Stores the variable identifier and Atom for atomic booleans in the + /// formula. + llvm::DenseMap<Variable, Atom> Atomics; + + /// A boolean formula in conjunctive normal form that the solver will attempt + /// to prove satisfiable. The formula will be modified in the process. + CNFFormula CNF; + + /// Maps literals (indices of the vector) to clause identifiers (elements of + /// the vector) that watch the respective literals. + /// + /// For a given clause, its watched literal is always its first literal in + /// `Clauses`. This invariant is maintained when watched literals change. + std::vector<ClauseID> WatchedHead; + + /// Maps clause identifiers (elements of the vector) to identifiers of other + /// clauses that watch the same literals, forming a set of linked lists. + /// + /// The element at index 0 stands for the identifier of the clause that + /// follows the null clause. It is set to 0 and isn't used. Identifiers of + /// clauses in the formula start from the element at index 1. + std::vector<ClauseID> NextWatched; + + /// The search for a satisfying assignment of the variables in `Formula` will + /// proceed in levels, starting from 1 and going up to `Formula.LargestVar` + /// (inclusive). The current level is stored in `Level`. At each level the + /// solver will assign a value to an unassigned variable. If this leads to a + /// consistent partial assignment, `Level` will be incremented. Otherwise, if + /// it results in a conflict, the solver will backtrack by decrementing + /// `Level` until it reaches the most recent level where a decision was made. + size_t Level = 0; + + /// Maps levels (indices of the vector) to variables (elements of the vector) + /// that are assigned values at the respective levels. + /// + /// The element at index 0 isn't used. Variables start from the element at + /// index 1. + std::vector<Variable> LevelVars; + + /// State of the solver at a particular level. + enum class State : uint8_t { + /// Indicates that the solver made a decision. + Decision = 0, + + /// Indicates that the solver made a forced move. + Forced = 1, + }; + + /// State of the solver at a particular level. It keeps track of previous + /// decisions that the solver can refer to when backtracking. + /// + /// The element at index 0 isn't used. States start from the element at index + /// 1. + std::vector<State> LevelStates; + + enum class Assignment : int8_t { + Unassigned = -1, + AssignedFalse = 0, + AssignedTrue = 1 + }; + + /// Maps variables (indices of the vector) to their assignments (elements of + /// the vector). + /// + /// The element at index 0 isn't used. Variable assignments start from the + /// element at index 1. + std::vector<Assignment> VarAssignments; + + /// A set of unassigned variables that appear in watched literals in + /// `Formula`. The vector is guaranteed to contain unique elements. + std::vector<Variable> ActiveVars; + +public: + explicit WatchedLiteralsSolverImpl( + const llvm::ArrayRef<const Formula *> &Vals) + // `Atomics` needs to be initialized first so that we can use it as an + // output argument of `buildCNF()`. + : Atomics(), CNF(buildCNF(Vals, Atomics)), + LevelVars(CNF.largestVar() + 1), LevelStates(CNF.largestVar() + 1) { + assert(!Vals.empty()); + + // Skip initialization if the formula is known to be contradictory. + if (CNF.knownContradictory()) + return; + + // Initialize `NextWatched` and `WatchedHead`. + NextWatched.push_back(0); + const size_t NumLiterals = 2 * CNF.largestVar() + 1; + WatchedHead.resize(NumLiterals + 1, 0); + for (ClauseID C = 1; C <= CNF.numClauses(); ++C) { + // Designate the first literal as the "watched" literal of the clause. + Literal FirstLit = CNF.clauseLiterals(C).front(); + NextWatched.push_back(WatchedHead[FirstLit]); + WatchedHead[FirstLit] = C; + } + + // Initialize the state at the root level to a decision so that in + // `reverseForcedMoves` we don't have to check that `Level >= 0` on each + // iteration. + LevelStates[0] = State::Decision; + + // Initialize all variables as unassigned. + VarAssignments.resize(CNF.largestVar() + 1, Assignment::Unassigned); + + // Initialize the active variables. + for (Variable Var = CNF.largestVar(); Var != NullVar; --Var) { + if (isWatched(posLit(Var)) || isWatched(negLit(Var))) + ActiveVars.push_back(Var); + } + } + + // Returns the `Result` and the number of iterations "remaining" from + // `MaxIterations` (that is, `MaxIterations` - iterations in this call). + std::pair<Solver::Result, std::int64_t> solve(std::int64_t MaxIterations) && { + if (CNF.knownContradictory()) { + // Short-cut the solving process. We already found out at CNF + // construction time that the formula is unsatisfiable. + return std::make_pair(Solver::Result::Unsatisfiable(), MaxIterations); + } + size_t I = 0; + while (I < ActiveVars.size()) { + if (MaxIterations == 0) + return std::make_pair(Solver::Result::TimedOut(), 0); + --MaxIterations; + + // Assert that the following invariants hold: + // 1. All active variables are unassigned. + // 2. All active variables form watched literals. + // 3. Unassigned variables that form watched literals are active. + // FIXME: Consider replacing these with test cases that fail if the any + // of the invariants is broken. That might not be easy due to the + // transformations performed by `buildCNF`. + assert(activeVarsAreUnassigned()); + assert(activeVarsFormWatchedLiterals()); + assert(unassignedVarsFormingWatchedLiteralsAreActive()); + + const Variable ActiveVar = ActiveVars[I]; + + // Look for unit clauses that contain the active variable. + const bool unitPosLit = watchedByUnitClause(posLit(ActiveVar)); + const bool unitNegLit = watchedByUnitClause(negLit(ActiveVar)); + if (unitPosLit && unitNegLit) { + // We found a conflict! + + // Backtrack and rewind the `Level` until the most recent non-forced + // assignment. + reverseForcedMoves(); + + // If the root level is reached, then all possible assignments lead to + // a conflict. + if (Level == 0) + return std::make_pair(Solver::Result::Unsatisfiable(), MaxIterations); + + // Otherwise, take the other branch at the most recent level where a + // decision was made. + LevelStates[Level] = State::Forced; + const Variable Var = LevelVars[Level]; + VarAssignments[Var] = VarAssignments[Var] == Assignment::AssignedTrue + ? Assignment::AssignedFalse + : Assignment::AssignedTrue; + + updateWatchedLiterals(); + } else if (unitPosLit || unitNegLit) { + // We found a unit clause! The value of its unassigned variable is + // forced. + ++Level; + + LevelVars[Level] = ActiveVar; + LevelStates[Level] = State::Forced; + VarAssignments[ActiveVar] = + unitPosLit ? Assignment::AssignedTrue : Assignment::AssignedFalse; + + // Remove the variable that was just assigned from the set of active + // variables. + if (I + 1 < ActiveVars.size()) { + // Replace the variable that was just assigned with the last active + // variable for efficient removal. + ActiveVars[I] = ActiveVars.back(); + } else { + // This was the last active variable. Repeat the process from the + // beginning. + I = 0; + } + ActiveVars.pop_back(); + + updateWatchedLiterals(); + } else if (I + 1 == ActiveVars.size()) { + // There are no remaining unit clauses in the formula! Make a decision + // for one of the active variables at the current level. + ++Level; + + LevelVars[Level] = ActiveVar; + LevelStates[Level] = State::Decision; + VarAssignments[ActiveVar] = decideAssignment(ActiveVar); + + // Remove the variable that was just assigned from the set of active + // variables. + ActiveVars.pop_back(); + + updateWatchedLiterals(); + + // This was the last active variable. Repeat the process from the + // beginning. + I = 0; + } else { + ++I; + } + } + return std::make_pair(Solver::Result::Satisfiable(buildSolution()), + MaxIterations); + } + +private: + /// Returns a satisfying truth assignment to the atoms in the boolean formula. + llvm::DenseMap<Atom, Solver::Result::Assignment> buildSolution() { + llvm::DenseMap<Atom, Solver::Result::Assignment> Solution; + for (auto &Atomic : Atomics) { + // A variable may have a definite true/false assignment, or it may be + // unassigned indicating its truth value does not affect the result of + // the formula. Unassigned variables are assigned to true as a default. + Solution[Atomic.second] = + VarAssignments[Atomic.first] == Assignment::AssignedFalse + ? Solver::Result::Assignment::AssignedFalse + : Solver::Result::Assignment::AssignedTrue; + } + return Solution; + } + + /// Reverses forced moves until the most recent level where a decision was + /// made on the assignment of a variable. + void reverseForcedMoves() { + for (; LevelStates[Level] == State::Forced; --Level) { + const Variable Var = LevelVars[Level]; + + VarAssignments[Var] = Assignment::Unassigned; + + // If the variable that we pass through is watched then we add it to the + // active variables. + if (isWatched(posLit(Var)) || isWatched(negLit(Var))) + ActiveVars.push_back(Var); + } + } + + /// Updates watched literals that are affected by a variable assignment. + void updateWatchedLiterals() { + const Variable Var = LevelVars[Level]; + + // Update the watched literals of clauses that currently watch the literal + // that falsifies `Var`. + const Literal FalseLit = VarAssignments[Var] == Assignment::AssignedTrue + ? negLit(Var) + : posLit(Var); + ClauseID FalseLitWatcher = WatchedHead[FalseLit]; + WatchedHead[FalseLit] = NullClause; + while (FalseLitWatcher != NullClause) { + const ClauseID NextFalseLitWatcher = NextWatched[FalseLitWatcher]; + + // Pick the first non-false literal as the new watched literal. + const CNFFormula::Iterator FalseLitWatcherStart = + CNF.startOfClause(FalseLitWatcher); + CNFFormula::Iterator NewWatchedLitIter = FalseLitWatcherStart.next(); + while (isCurrentlyFalse(*NewWatchedLitIter)) + ++NewWatchedLitIter; + const Literal NewWatchedLit = *NewWatchedLitIter; + const Variable NewWatchedLitVar = var(NewWatchedLit); + + // Swap the old watched literal for the new one in `FalseLitWatcher` to + // maintain the invariant that the watched literal is at the beginning of + // the clause. + *NewWatchedLitIter = FalseLit; + *FalseLitWatcherStart = NewWatchedLit; + + // If the new watched literal isn't watched by any other clause and its + // variable isn't assigned we need to add it to the active variables. + if (!isWatched(NewWatchedLit) && !isWatched(notLit(NewWatchedLit)) && + VarAssignments[NewWatchedLitVar] == Assignment::Unassigned) + ActiveVars.push_back(NewWatchedLitVar); + + NextWatched[FalseLitWatcher] = WatchedHead[NewWatchedLit]; + WatchedHead[NewWatchedLit] = FalseLitWatcher; + + // Go to the next clause that watches `FalseLit`. + FalseLitWatcher = NextFalseLitWatcher; + } + } + + /// Returns true if and only if one of the clauses that watch `Lit` is a unit + /// clause. + bool watchedByUnitClause(Literal Lit) const { + for (ClauseID LitWatcher = WatchedHead[Lit]; LitWatcher != NullClause; + LitWatcher = NextWatched[LitWatcher]) { + llvm::ArrayRef<Literal> Clause = CNF.clauseLiterals(LitWatcher); + + // Assert the invariant that the watched literal is always the first one + // in the clause. + // FIXME: Consider replacing this with a test case that fails if the + // invariant is broken by `updateWatchedLiterals`. That might not be easy + // due to the transformations performed by `buildCNF`. + assert(Clause.front() == Lit); + + if (isUnit(Clause)) + return true; + } + return false; + } + + /// Returns true if and only if `Clause` is a unit clause. + bool isUnit(llvm::ArrayRef<Literal> Clause) const { + return llvm::all_of(Clause.drop_front(), + [this](Literal L) { return isCurrentlyFalse(L); }); + } + + /// Returns true if and only if `Lit` evaluates to `false` in the current + /// partial assignment. + bool isCurrentlyFalse(Literal Lit) const { + return static_cast<int8_t>(VarAssignments[var(Lit)]) == + static_cast<int8_t>(Lit & 1); + } + + /// Returns true if and only if `Lit` is watched by a clause in `Formula`. + bool isWatched(Literal Lit) const { return WatchedHead[Lit] != NullClause; } + + /// Returns an assignment for an unassigned variable. + Assignment decideAssignment(Variable Var) const { + return !isWatched(posLit(Var)) || isWatched(negLit(Var)) + ? Assignment::AssignedFalse + : Assignment::AssignedTrue; + } + + /// Returns a set of all watched literals. + llvm::DenseSet<Literal> watchedLiterals() const { + llvm::DenseSet<Literal> WatchedLiterals; + for (Literal Lit = 2; Lit < WatchedHead.size(); Lit++) { + if (WatchedHead[Lit] == NullClause) + continue; + WatchedLiterals.insert(Lit); + } + return WatchedLiterals; + } + + /// Returns true if and only if all active variables are unassigned. + bool activeVarsAreUnassigned() const { + return llvm::all_of(ActiveVars, [this](Variable Var) { + return VarAssignments[Var] == Assignment::Unassigned; + }); + } + + /// Returns true if and only if all active variables form watched literals. + bool activeVarsFormWatchedLiterals() const { + const llvm::DenseSet<Literal> WatchedLiterals = watchedLiterals(); + return llvm::all_of(ActiveVars, [&WatchedLiterals](Variable Var) { + return WatchedLiterals.contains(posLit(Var)) || + WatchedLiterals.contains(negLit(Var)); + }); + } + + /// Returns true if and only if all unassigned variables that are forming + /// watched literals are active. + bool unassignedVarsFormingWatchedLiteralsAreActive() const { + const llvm::DenseSet<Variable> ActiveVarsSet(ActiveVars.begin(), + ActiveVars.end()); + for (Literal Lit : watchedLiterals()) { + const Variable Var = var(Lit); + if (VarAssignments[Var] != Assignment::Unassigned) + continue; + if (ActiveVarsSet.contains(Var)) + continue; + return false; + } + return true; + } +}; + +} // namespace + +Solver::Result +WatchedLiteralsSolver::solve(llvm::ArrayRef<const Formula *> Vals) { + if (Vals.empty()) + return Solver::Result::Satisfiable({{}}); + auto [Res, Iterations] = WatchedLiteralsSolverImpl(Vals).solve(MaxIterations); + MaxIterations = Iterations; + return Res; +} + +} // namespace dataflow +} // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/IntervalPartition.cpp b/contrib/llvm-project/clang/lib/Analysis/IntervalPartition.cpp new file mode 100644 index 000000000000..5f06606ec132 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/IntervalPartition.cpp @@ -0,0 +1,241 @@ +//===- IntervalPartition.cpp - CFG Partitioning into Intervals --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines functionality for partitioning a CFG into intervals. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/IntervalPartition.h" +#include "clang/Analysis/CFG.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include <optional> +#include <queue> +#include <vector> + +namespace clang { + +// Intermediate data used in constructing a CFGIntervalNode. +template <typename Node> struct BuildResult { + // Use a vector to maintain the insertion order. Given the expected small + // number of nodes, vector should be sufficiently efficient. Elements must not + // be null. + std::vector<const Node *> Nodes; + // Elements must not be null. + llvm::SmallDenseSet<const Node *> Successors; +}; + +namespace internal { +static unsigned getID(const CFGBlock &B) { return B.getBlockID(); } +static unsigned getID(const CFGIntervalNode &I) { return I.ID; } + +// `Node` must be one of `CFGBlock` or `CFGIntervalNode`. +template <typename Node> +BuildResult<Node> buildInterval(llvm::BitVector &Partitioned, + const Node *Header) { + assert(Header != nullptr); + BuildResult<Node> Interval; + Interval.Nodes.push_back(Header); + Partitioned.set(getID(*Header)); + + // FIXME: Compare performance against using RPO to consider nodes, rather than + // following successors. + // + // Elements must not be null. Duplicates are prevented using `Workset`, below. + std::queue<const Node *> Worklist; + llvm::BitVector Workset(Partitioned.size(), false); + for (const Node *S : Header->succs()) + if (S != nullptr) + if (auto SID = getID(*S); !Partitioned.test(SID)) { + // Successors are unique, so we don't test against `Workset` before + // adding to `Worklist`. + Worklist.push(S); + Workset.set(SID); + } + + // Contains successors of blocks in the interval that couldn't be added to the + // interval on their first encounter. This occurs when they have a predecessor + // that is either definitively outside the interval or hasn't been considered + // yet. In the latter case, we'll revisit the block through some other path + // from the interval. At the end of processing the worklist, we filter out any + // that ended up in the interval to produce the output set of interval + // successors. Elements are never null. + std::vector<const Node *> MaybeSuccessors; + + while (!Worklist.empty()) { + const auto *B = Worklist.front(); + auto ID = getID(*B); + Worklist.pop(); + Workset.reset(ID); + + // Check whether all predecessors are in the interval, in which case `B` + // is included as well. + bool AllInInterval = llvm::all_of(B->preds(), [&](const Node *P) { + return llvm::is_contained(Interval.Nodes, P); + }); + if (AllInInterval) { + Interval.Nodes.push_back(B); + Partitioned.set(ID); + for (const Node *S : B->succs()) + if (S != nullptr) + if (auto SID = getID(*S); + !Partitioned.test(SID) && !Workset.test(SID)) { + Worklist.push(S); + Workset.set(SID); + } + } else { + MaybeSuccessors.push_back(B); + } + } + + // Any block successors not in the current interval are interval successors. + for (const Node *B : MaybeSuccessors) + if (!llvm::is_contained(Interval.Nodes, B)) + Interval.Successors.insert(B); + + return Interval; +} + +template <typename Node> +void fillIntervalNode(CFGIntervalGraph &Graph, + std::vector<CFGIntervalNode *> &Index, + std::queue<const Node *> &Successors, + llvm::BitVector &Partitioned, const Node *Header) { + BuildResult<Node> Result = buildInterval(Partitioned, Header); + for (const auto *S : Result.Successors) + Successors.push(S); + + CFGIntervalNode &Interval = Graph.emplace_back(Graph.size()); + + // Index the nodes of the new interval. The index maps nodes from the input + // graph (specifically, `Result.Nodes`) to identifiers of nodes in the output + // graph. In this case, the new interval has identifier `ID` so all of its + // nodes (`Result.Nodes`) map to `ID`. + for (const auto *N : Result.Nodes) { + assert(N != nullptr); + assert(getID(*N) < Index.size()); + Index[getID(*N)] = &Interval; + } + + if constexpr (std::is_same_v<std::decay_t<Node>, CFGBlock>) + Interval.Nodes = std::move(Result.Nodes); + else { + std::vector<const CFGBlock *> Nodes; + // Flatten the sub vectors into a single list. + size_t Count = 0; + for (auto &N : Result.Nodes) + Count += N->Nodes.size(); + Nodes.reserve(Count); + for (auto &N : Result.Nodes) + Nodes.insert(Nodes.end(), N->Nodes.begin(), N->Nodes.end()); + Interval.Nodes = std::move(Nodes); + } +} + +template <typename Node> +CFGIntervalGraph partitionIntoIntervalsImpl(unsigned NumBlockIDs, + const Node *EntryBlock) { + assert(EntryBlock != nullptr); + CFGIntervalGraph Graph; + // `Index` maps all of the nodes of the input graph to the interval to which + // they are assigned in the output graph. The values (interval pointers) are + // never null. + std::vector<CFGIntervalNode *> Index(NumBlockIDs, nullptr); + + // Lists header nodes (from the input graph) and their associated + // interval. Since header nodes can vary in type and are only needed within + // this function, we record them separately from `CFGIntervalNode`. This + // choice enables to express `CFGIntervalNode` without using a variant. + std::vector<std::pair<const Node *, CFGIntervalNode *>> Intervals; + llvm::BitVector Partitioned(NumBlockIDs, false); + std::queue<const Node *> Successors; + + fillIntervalNode(Graph, Index, Successors, Partitioned, EntryBlock); + Intervals.emplace_back(EntryBlock, &Graph.back()); + + while (!Successors.empty()) { + const auto *B = Successors.front(); + Successors.pop(); + assert(B != nullptr); + if (Partitioned.test(getID(*B))) + continue; + + // B has not been partitioned, but it has a predecessor that has. Create a + // new interval from `B`. + fillIntervalNode(Graph, Index, Successors, Partitioned, B); + Intervals.emplace_back(B, &Graph.back()); + } + + // Go back and patch up all the Intervals -- the successors and predecessors. + for (auto [H, N] : Intervals) { + // Map input-graph predecessors to output-graph nodes and mark those as + // predecessors of `N`. Then, mark `N` as a successor of said predecessor. + for (const Node *P : H->preds()) { + if (P == nullptr) + continue; + + assert(getID(*P) < NumBlockIDs); + CFGIntervalNode *Pred = Index[getID(*P)]; + if (Pred == nullptr) + // Unreachable node. + continue; + if (Pred != N // Not a backedge. + && N->Predecessors.insert(Pred).second) + // Note: given the guard above, which guarantees we only ever insert + // unique elements, we could use a simple list (like `vector`) for + // `Successors`, rather than a set. + Pred->Successors.insert(N); + } + } + + return Graph; +} + +std::vector<const CFGBlock *> buildInterval(const CFGBlock *Header) { + llvm::BitVector Partitioned(Header->getParent()->getNumBlockIDs(), false); + return buildInterval(Partitioned, Header).Nodes; +} + +CFGIntervalGraph partitionIntoIntervals(const CFG &Cfg) { + return partitionIntoIntervalsImpl(Cfg.getNumBlockIDs(), &Cfg.getEntry()); +} + +CFGIntervalGraph partitionIntoIntervals(const CFGIntervalGraph &Graph) { + return partitionIntoIntervalsImpl(Graph.size(), &Graph[0]); +} +} // namespace internal + +std::optional<std::vector<const CFGBlock *>> getIntervalWTO(const CFG &Cfg) { + // Backing storage for the allocated nodes in each graph. + unsigned PrevSize = Cfg.size(); + if (PrevSize == 0) + return {}; + internal::CFGIntervalGraph Graph = internal::partitionIntoIntervals(Cfg); + unsigned Size = Graph.size(); + while (Size > 1 && Size < PrevSize) { + PrevSize = Graph.size(); + Graph = internal::partitionIntoIntervals(Graph); + Size = Graph.size(); + } + if (Size > 1) + // Not reducible. + return std::nullopt; + + assert(Size != 0); + return std::move(Graph[0].Nodes); +} + +WTOCompare::WTOCompare(const WeakTopologicalOrdering &WTO) { + if (WTO.empty()) + return; + auto N = WTO[0]->getParent()->getNumBlockIDs(); + BlockOrder.resize(N, 0); + for (unsigned I = 0, S = WTO.size(); I < S; ++I) + BlockOrder[WTO[I]->getBlockID()] = I + 1; +} +} // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/IssueHash.cpp b/contrib/llvm-project/clang/lib/Analysis/IssueHash.cpp index 94816747668d..4d56e774b76a 100644 --- a/contrib/llvm-project/clang/lib/Analysis/IssueHash.cpp +++ b/contrib/llvm-project/clang/lib/Analysis/IssueHash.cpp @@ -21,6 +21,7 @@ #include "llvm/Support/Path.h" #include <functional> +#include <optional> #include <sstream> #include <string> @@ -121,7 +122,7 @@ static std::string GetEnclosingDeclContextSignature(const Decl *D) { return ""; } -static StringRef GetNthLineOfFile(llvm::Optional<llvm::MemoryBufferRef> Buffer, +static StringRef GetNthLineOfFile(std::optional<llvm::MemoryBufferRef> Buffer, int Line) { if (!Buffer) return ""; @@ -146,7 +147,7 @@ static std::string NormalizeLine(const SourceManager &SM, const FullSourceLoc &L col++; SourceLocation StartOfLine = SM.translateLineCol(SM.getFileID(L), L.getExpansionLineNumber(), col); - Optional<llvm::MemoryBufferRef> Buffer = + std::optional<llvm::MemoryBufferRef> Buffer = SM.getBufferOrNone(SM.getFileID(StartOfLine), StartOfLine); if (!Buffer) return {}; diff --git a/contrib/llvm-project/clang/lib/Analysis/LiveVariables.cpp b/contrib/llvm-project/clang/lib/Analysis/LiveVariables.cpp index 8cdc4cc5bd61..6d03dd05ca3d 100644 --- a/contrib/llvm-project/clang/lib/Analysis/LiveVariables.cpp +++ b/contrib/llvm-project/clang/lib/Analysis/LiveVariables.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> +#include <optional> #include <vector> using namespace clang; @@ -72,6 +73,11 @@ bool LiveVariables::LivenessValues::isLive(const VarDecl *D) const { bool alive = false; for (const BindingDecl *BD : DD->bindings()) alive |= liveBindings.contains(BD); + + // Note: the only known case this condition is necessary, is when a bindig + // to a tuple-like structure is created. The HoldingVar initializers have a + // DeclRefExpr to the DecompositionDecl. + alive |= liveDecls.contains(DD); return alive; } return liveDecls.contains(D); @@ -325,6 +331,11 @@ static bool writeShouldKill(const VarDecl *VD) { } void TransferFunctions::VisitBinaryOperator(BinaryOperator *B) { + if (LV.killAtAssign && B->getOpcode() == BO_Assign) { + if (const auto *DR = dyn_cast<DeclRefExpr>(B->getLHS()->IgnoreParens())) { + LV.inAssignment[DR] = 1; + } + } if (B->isAssignmentOp()) { if (!LV.killAtAssign) return; @@ -338,8 +349,12 @@ void TransferFunctions::VisitBinaryOperator(BinaryOperator *B) { if (const BindingDecl* BD = dyn_cast<BindingDecl>(D)) { Killed = !BD->getType()->isReferenceType(); - if (Killed) + if (Killed) { + if (const auto *HV = BD->getHoldingVar()) + val.liveDecls = LV.DSetFact.remove(val.liveDecls, HV); + val.liveBindings = LV.BSetFact.remove(val.liveBindings, BD); + } } else if (const auto *VD = dyn_cast<VarDecl>(D)) { Killed = writeShouldKill(VD); if (Killed) @@ -366,8 +381,12 @@ void TransferFunctions::VisitDeclRefExpr(DeclRefExpr *DR) { const Decl* D = DR->getDecl(); bool InAssignment = LV.inAssignment[DR]; if (const auto *BD = dyn_cast<BindingDecl>(D)) { - if (!InAssignment) + if (!InAssignment) { + if (const auto *HV = BD->getHoldingVar()) + val.liveDecls = LV.DSetFact.add(val.liveDecls, HV); + val.liveBindings = LV.BSetFact.add(val.liveBindings, BD); + } } else if (const auto *VD = dyn_cast<VarDecl>(D)) { if (!InAssignment && !isAlwaysAlive(VD)) val.liveDecls = LV.DSetFact.add(val.liveDecls, VD); @@ -377,8 +396,16 @@ void TransferFunctions::VisitDeclRefExpr(DeclRefExpr *DR) { void TransferFunctions::VisitDeclStmt(DeclStmt *DS) { for (const auto *DI : DS->decls()) { if (const auto *DD = dyn_cast<DecompositionDecl>(DI)) { - for (const auto *BD : DD->bindings()) + for (const auto *BD : DD->bindings()) { + if (const auto *HV = BD->getHoldingVar()) + val.liveDecls = LV.DSetFact.remove(val.liveDecls, HV); + val.liveBindings = LV.BSetFact.remove(val.liveBindings, BD); + } + + // When a bindig to a tuple-like structure is created, the HoldingVar + // initializers have a DeclRefExpr to the DecompositionDecl. + val.liveDecls = LV.DSetFact.remove(val.liveDecls, DD); } else if (const auto *VD = dyn_cast<VarDecl>(DI)) { if (!isAlwaysAlive(VD)) val.liveDecls = LV.DSetFact.remove(val.liveDecls, VD); @@ -464,7 +491,7 @@ LiveVariablesImpl::runOnBlock(const CFGBlock *block, ei = block->rend(); it != ei; ++it) { const CFGElement &elem = *it; - if (Optional<CFGAutomaticObjDtor> Dtor = + if (std::optional<CFGAutomaticObjDtor> Dtor = elem.getAs<CFGAutomaticObjDtor>()) { val.liveDecls = DSetFact.add(val.liveDecls, Dtor->getVarDecl()); continue; @@ -513,29 +540,8 @@ LiveVariables::computeLiveness(AnalysisDeclContext &AC, bool killAtAssign) { llvm::BitVector everAnalyzedBlock(cfg->getNumBlockIDs()); // FIXME: we should enqueue using post order. - for (CFG::const_iterator it = cfg->begin(), ei = cfg->end(); it != ei; ++it) { - const CFGBlock *block = *it; - worklist.enqueueBlock(block); - - // FIXME: Scan for DeclRefExprs using in the LHS of an assignment. - // We need to do this because we lack context in the reverse analysis - // to determine if a DeclRefExpr appears in such a context, and thus - // doesn't constitute a "use". - if (killAtAssign) - for (CFGBlock::const_iterator bi = block->begin(), be = block->end(); - bi != be; ++bi) { - if (Optional<CFGStmt> cs = bi->getAs<CFGStmt>()) { - const Stmt* stmt = cs->getStmt(); - if (const auto *BO = dyn_cast<BinaryOperator>(stmt)) { - if (BO->getOpcode() == BO_Assign) { - if (const auto *DR = - dyn_cast<DeclRefExpr>(BO->getLHS()->IgnoreParens())) { - LV->inAssignment[DR] = 1; - } - } - } - } - } + for (const CFGBlock *B : cfg->nodes()) { + worklist.enqueueBlock(B); } while (const CFGBlock *block = worklist.dequeue()) { diff --git a/contrib/llvm-project/clang/lib/Analysis/MacroExpansionContext.cpp b/contrib/llvm-project/clang/lib/Analysis/MacroExpansionContext.cpp new file mode 100644 index 000000000000..b212b7f24579 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/MacroExpansionContext.cpp @@ -0,0 +1,232 @@ +//===- MacroExpansionContext.cpp - Macro expansion information --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/MacroExpansionContext.h" +#include "llvm/Support/Debug.h" +#include <optional> + +#define DEBUG_TYPE "macro-expansion-context" + +static void dumpTokenInto(const clang::Preprocessor &PP, llvm::raw_ostream &OS, + clang::Token Tok); + +namespace clang { +namespace detail { +class MacroExpansionRangeRecorder : public PPCallbacks { + const Preprocessor &PP; + SourceManager &SM; + MacroExpansionContext::ExpansionRangeMap &ExpansionRanges; + +public: + explicit MacroExpansionRangeRecorder( + const Preprocessor &PP, SourceManager &SM, + MacroExpansionContext::ExpansionRangeMap &ExpansionRanges) + : PP(PP), SM(SM), ExpansionRanges(ExpansionRanges) {} + + void MacroExpands(const Token &MacroName, const MacroDefinition &MD, + SourceRange Range, const MacroArgs *Args) override { + // Ignore annotation tokens like: _Pragma("pack(push, 1)") + if (MacroName.getIdentifierInfo()->getName() == "_Pragma") + return; + + SourceLocation MacroNameBegin = SM.getExpansionLoc(MacroName.getLocation()); + assert(MacroNameBegin == SM.getExpansionLoc(Range.getBegin())); + + const SourceLocation ExpansionEnd = [Range, &SM = SM, &MacroName] { + // If the range is empty, use the length of the macro. + if (Range.getBegin() == Range.getEnd()) + return SM.getExpansionLoc( + MacroName.getLocation().getLocWithOffset(MacroName.getLength())); + + // Include the last character. + return SM.getExpansionLoc(Range.getEnd()).getLocWithOffset(1); + }(); + + (void)PP; + LLVM_DEBUG(llvm::dbgs() << "MacroExpands event: '"; + dumpTokenInto(PP, llvm::dbgs(), MacroName); + llvm::dbgs() + << "' with length " << MacroName.getLength() << " at "; + MacroNameBegin.print(llvm::dbgs(), SM); + llvm::dbgs() << ", expansion end at "; + ExpansionEnd.print(llvm::dbgs(), SM); llvm::dbgs() << '\n';); + + // If the expansion range is empty, use the identifier of the macro as a + // range. + MacroExpansionContext::ExpansionRangeMap::iterator It; + bool Inserted; + std::tie(It, Inserted) = + ExpansionRanges.try_emplace(MacroNameBegin, ExpansionEnd); + if (Inserted) { + LLVM_DEBUG(llvm::dbgs() << "maps "; + It->getFirst().print(llvm::dbgs(), SM); llvm::dbgs() << " to "; + It->getSecond().print(llvm::dbgs(), SM); + llvm::dbgs() << '\n';); + } else { + if (SM.isBeforeInTranslationUnit(It->getSecond(), ExpansionEnd)) { + It->getSecond() = ExpansionEnd; + LLVM_DEBUG( + llvm::dbgs() << "remaps "; It->getFirst().print(llvm::dbgs(), SM); + llvm::dbgs() << " to "; It->getSecond().print(llvm::dbgs(), SM); + llvm::dbgs() << '\n';); + } + } + } +}; +} // namespace detail +} // namespace clang + +using namespace clang; + +MacroExpansionContext::MacroExpansionContext(const LangOptions &LangOpts) + : LangOpts(LangOpts) {} + +void MacroExpansionContext::registerForPreprocessor(Preprocessor &NewPP) { + PP = &NewPP; + SM = &NewPP.getSourceManager(); + + // Make sure that the Preprocessor does not outlive the MacroExpansionContext. + PP->addPPCallbacks(std::make_unique<detail::MacroExpansionRangeRecorder>( + *PP, *SM, ExpansionRanges)); + // Same applies here. + PP->setTokenWatcher([this](const Token &Tok) { onTokenLexed(Tok); }); +} + +std::optional<StringRef> +MacroExpansionContext::getExpandedText(SourceLocation MacroExpansionLoc) const { + if (MacroExpansionLoc.isMacroID()) + return std::nullopt; + + // If there was no macro expansion at that location, return std::nullopt. + if (ExpansionRanges.find_as(MacroExpansionLoc) == ExpansionRanges.end()) + return std::nullopt; + + // There was macro expansion, but resulted in no tokens, return empty string. + const auto It = ExpandedTokens.find_as(MacroExpansionLoc); + if (It == ExpandedTokens.end()) + return StringRef{""}; + + // Otherwise we have the actual token sequence as string. + return It->getSecond().str(); +} + +std::optional<StringRef> +MacroExpansionContext::getOriginalText(SourceLocation MacroExpansionLoc) const { + if (MacroExpansionLoc.isMacroID()) + return std::nullopt; + + const auto It = ExpansionRanges.find_as(MacroExpansionLoc); + if (It == ExpansionRanges.end()) + return std::nullopt; + + assert(It->getFirst() != It->getSecond() && + "Every macro expansion must cover a non-empty range."); + + return Lexer::getSourceText( + CharSourceRange::getCharRange(It->getFirst(), It->getSecond()), *SM, + LangOpts); +} + +void MacroExpansionContext::dumpExpansionRanges() const { + dumpExpansionRangesToStream(llvm::dbgs()); +} +void MacroExpansionContext::dumpExpandedTexts() const { + dumpExpandedTextsToStream(llvm::dbgs()); +} + +void MacroExpansionContext::dumpExpansionRangesToStream(raw_ostream &OS) const { + std::vector<std::pair<SourceLocation, SourceLocation>> LocalExpansionRanges; + LocalExpansionRanges.reserve(ExpansionRanges.size()); + for (const auto &Record : ExpansionRanges) + LocalExpansionRanges.emplace_back( + std::make_pair(Record.getFirst(), Record.getSecond())); + llvm::sort(LocalExpansionRanges); + + OS << "\n=============== ExpansionRanges ===============\n"; + for (const auto &Record : LocalExpansionRanges) { + OS << "> "; + Record.first.print(OS, *SM); + OS << ", "; + Record.second.print(OS, *SM); + OS << '\n'; + } +} + +void MacroExpansionContext::dumpExpandedTextsToStream(raw_ostream &OS) const { + std::vector<std::pair<SourceLocation, MacroExpansionText>> + LocalExpandedTokens; + LocalExpandedTokens.reserve(ExpandedTokens.size()); + for (const auto &Record : ExpandedTokens) + LocalExpandedTokens.emplace_back( + std::make_pair(Record.getFirst(), Record.getSecond())); + llvm::sort(LocalExpandedTokens); + + OS << "\n=============== ExpandedTokens ===============\n"; + for (const auto &Record : LocalExpandedTokens) { + OS << "> "; + Record.first.print(OS, *SM); + OS << " -> '" << Record.second << "'\n"; + } +} + +static void dumpTokenInto(const Preprocessor &PP, raw_ostream &OS, Token Tok) { + assert(Tok.isNot(tok::raw_identifier)); + + // Ignore annotation tokens like: _Pragma("pack(push, 1)") + if (Tok.isAnnotation()) + return; + + if (IdentifierInfo *II = Tok.getIdentifierInfo()) { + // FIXME: For now, we don't respect whitespaces between macro expanded + // tokens. We just emit a space after every identifier to produce a valid + // code for `int a ;` like expansions. + // ^-^-- Space after the 'int' and 'a' identifiers. + OS << II->getName() << ' '; + } else if (Tok.isLiteral() && !Tok.needsCleaning() && Tok.getLiteralData()) { + OS << StringRef(Tok.getLiteralData(), Tok.getLength()); + } else { + char Tmp[256]; + if (Tok.getLength() < sizeof(Tmp)) { + const char *TokPtr = Tmp; + // FIXME: Might use a different overload for cleaner callsite. + unsigned Len = PP.getSpelling(Tok, TokPtr); + OS.write(TokPtr, Len); + } else { + OS << "<too long token>"; + } + } +} + +void MacroExpansionContext::onTokenLexed(const Token &Tok) { + SourceLocation SLoc = Tok.getLocation(); + if (SLoc.isFileID()) + return; + + LLVM_DEBUG(llvm::dbgs() << "lexed macro expansion token '"; + dumpTokenInto(*PP, llvm::dbgs(), Tok); llvm::dbgs() << "' at "; + SLoc.print(llvm::dbgs(), *SM); llvm::dbgs() << '\n';); + + // Remove spelling location. + SourceLocation CurrExpansionLoc = SM->getExpansionLoc(SLoc); + + MacroExpansionText TokenAsString; + llvm::raw_svector_ostream OS(TokenAsString); + + // FIXME: Prepend newlines and space to produce the exact same output as the + // preprocessor would for this token. + + dumpTokenInto(*PP, OS, Tok); + + ExpansionMap::iterator It; + bool Inserted; + std::tie(It, Inserted) = + ExpandedTokens.try_emplace(CurrExpansionLoc, std::move(TokenAsString)); + if (!Inserted) + It->getSecond().append(TokenAsString); +} + diff --git a/contrib/llvm-project/clang/lib/Analysis/ObjCNoReturn.cpp b/contrib/llvm-project/clang/lib/Analysis/ObjCNoReturn.cpp index fe1edb496859..9e651c29e085 100644 --- a/contrib/llvm-project/clang/lib/Analysis/ObjCNoReturn.cpp +++ b/contrib/llvm-project/clang/lib/Analysis/ObjCNoReturn.cpp @@ -17,7 +17,8 @@ using namespace clang; -static bool isSubclass(const ObjCInterfaceDecl *Class, IdentifierInfo *II) { +static bool isSubclass(const ObjCInterfaceDecl *Class, + const IdentifierInfo *II) { if (!Class) return false; if (Class->getIdentifier() == II) @@ -30,7 +31,7 @@ ObjCNoReturn::ObjCNoReturn(ASTContext &C) NSExceptionII(&C.Idents.get("NSException")) { // Generate selectors. - SmallVector<IdentifierInfo*, 3> II; + SmallVector<const IdentifierInfo *, 3> II; // raise:format: II.push_back(&C.Idents.get("raise")); @@ -54,12 +55,9 @@ bool ObjCNoReturn::isImplicitNoReturn(const ObjCMessageExpr *ME) { } if (const ObjCInterfaceDecl *ID = ME->getReceiverInterface()) { - if (isSubclass(ID, NSExceptionII)) { - for (unsigned i = 0; i < NUM_RAISE_SELECTORS; ++i) { - if (S == NSExceptionInstanceRaiseSelectors[i]) - return true; - } - } + if (isSubclass(ID, NSExceptionII) && + llvm::is_contained(NSExceptionInstanceRaiseSelectors, S)) + return true; } return false; diff --git a/contrib/llvm-project/clang/lib/Analysis/PathDiagnostic.cpp b/contrib/llvm-project/clang/lib/Analysis/PathDiagnostic.cpp index b42f47fb68c5..35472e705cfd 100644 --- a/contrib/llvm-project/clang/lib/Analysis/PathDiagnostic.cpp +++ b/contrib/llvm-project/clang/lib/Analysis/PathDiagnostic.cpp @@ -32,8 +32,6 @@ #include "clang/Basic/SourceManager.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/None.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -45,18 +43,14 @@ #include <cassert> #include <cstring> #include <memory> +#include <optional> #include <utility> #include <vector> using namespace clang; using namespace ento; -static StringRef StripTrailingDots(StringRef s) { - for (StringRef::size_type i = s.size(); i != 0; --i) - if (s[i - 1] != '.') - return s.substr(0, i); - return {}; -} +static StringRef StripTrailingDots(StringRef s) { return s.rtrim('.'); } PathDiagnosticPiece::PathDiagnosticPiece(StringRef s, Kind k, DisplayHint hint) @@ -121,14 +115,17 @@ PathDiagnostic::PathDiagnostic( StringRef CheckerName, const Decl *declWithIssue, StringRef bugtype, StringRef verboseDesc, StringRef shortDesc, StringRef category, PathDiagnosticLocation LocationToUnique, const Decl *DeclToUnique, + const Decl *AnalysisEntryPoint, std::unique_ptr<FilesToLineNumsMap> ExecutedLines) : CheckerName(CheckerName), DeclWithIssue(declWithIssue), BugType(StripTrailingDots(bugtype)), VerboseDesc(StripTrailingDots(verboseDesc)), ShortDesc(StripTrailingDots(shortDesc)), Category(StripTrailingDots(category)), UniqueingLoc(LocationToUnique), - UniqueingDecl(DeclToUnique), ExecutedLines(std::move(ExecutedLines)), - path(pathImpl) {} + UniqueingDecl(DeclToUnique), AnalysisEntryPoint(AnalysisEntryPoint), + ExecutedLines(std::move(ExecutedLines)), path(pathImpl) { + assert(AnalysisEntryPoint); +} void PathDiagnosticConsumer::anchor() {} @@ -226,9 +223,10 @@ void PathDiagnosticConsumer::HandlePathDiagnostic( Diags.InsertNode(D.release()); } -static Optional<bool> comparePath(const PathPieces &X, const PathPieces &Y); +static std::optional<bool> comparePath(const PathPieces &X, + const PathPieces &Y); -static Optional<bool> +static std::optional<bool> compareControlFlow(const PathDiagnosticControlFlowPiece &X, const PathDiagnosticControlFlowPiece &Y) { FullSourceLoc XSL = X.getStartLocation().asLocation(); @@ -239,16 +237,16 @@ compareControlFlow(const PathDiagnosticControlFlowPiece &X, FullSourceLoc YEL = Y.getEndLocation().asLocation(); if (XEL != YEL) return XEL.isBeforeInTranslationUnitThan(YEL); - return None; + return std::nullopt; } -static Optional<bool> compareMacro(const PathDiagnosticMacroPiece &X, - const PathDiagnosticMacroPiece &Y) { +static std::optional<bool> compareMacro(const PathDiagnosticMacroPiece &X, + const PathDiagnosticMacroPiece &Y) { return comparePath(X.subPieces, Y.subPieces); } -static Optional<bool> compareCall(const PathDiagnosticCallPiece &X, - const PathDiagnosticCallPiece &Y) { +static std::optional<bool> compareCall(const PathDiagnosticCallPiece &X, + const PathDiagnosticCallPiece &Y) { FullSourceLoc X_CEL = X.callEnter.asLocation(); FullSourceLoc Y_CEL = Y.callEnter.asLocation(); if (X_CEL != Y_CEL) @@ -264,8 +262,8 @@ static Optional<bool> compareCall(const PathDiagnosticCallPiece &X, return comparePath(X.path, Y.path); } -static Optional<bool> comparePiece(const PathDiagnosticPiece &X, - const PathDiagnosticPiece &Y) { +static std::optional<bool> comparePiece(const PathDiagnosticPiece &X, + const PathDiagnosticPiece &Y) { if (X.getKind() != Y.getKind()) return X.getKind() < Y.getKind(); @@ -305,25 +303,24 @@ static Optional<bool> comparePiece(const PathDiagnosticPiece &X, case PathDiagnosticPiece::Event: case PathDiagnosticPiece::Note: case PathDiagnosticPiece::PopUp: - return None; + return std::nullopt; } llvm_unreachable("all cases handled"); } -static Optional<bool> comparePath(const PathPieces &X, const PathPieces &Y) { +static std::optional<bool> comparePath(const PathPieces &X, + const PathPieces &Y) { if (X.size() != Y.size()) return X.size() < Y.size(); PathPieces::const_iterator X_I = X.begin(), X_end = X.end(); PathPieces::const_iterator Y_I = Y.begin(), Y_end = Y.end(); - for ( ; X_I != X_end && Y_I != Y_end; ++X_I, ++Y_I) { - Optional<bool> b = comparePiece(**X_I, **Y_I); - if (b.hasValue()) - return b.getValue(); - } + for (; X_I != X_end && Y_I != Y_end; ++X_I, ++Y_I) + if (std::optional<bool> b = comparePiece(**X_I, **Y_I)) + return *b; - return None; + return std::nullopt; } static bool compareCrossTUSourceLocs(FullSourceLoc XL, FullSourceLoc YL) { @@ -337,13 +334,15 @@ static bool compareCrossTUSourceLocs(FullSourceLoc XL, FullSourceLoc YL) { std::pair<bool, bool> InSameTU = SM.isInTheSameTranslationUnit(XOffs, YOffs); if (InSameTU.first) return XL.isBeforeInTranslationUnitThan(YL); - const FileEntry *XFE = SM.getFileEntryForID(XL.getSpellingLoc().getFileID()); - const FileEntry *YFE = SM.getFileEntryForID(YL.getSpellingLoc().getFileID()); + OptionalFileEntryRef XFE = + SM.getFileEntryRefForID(XL.getSpellingLoc().getFileID()); + OptionalFileEntryRef YFE = + SM.getFileEntryRefForID(YL.getSpellingLoc().getFileID()); if (!XFE || !YFE) return XFE && !YFE; int NameCmp = XFE->getName().compare(YFE->getName()); if (NameCmp != 0) - return NameCmp == -1; + return NameCmp < 0; // Last resort: Compare raw file IDs that are possibly expansions. return XL.getFileID() < YL.getFileID(); } @@ -365,9 +364,10 @@ static bool compare(const PathDiagnostic &X, const PathDiagnostic &Y) { return X.getVerboseDescription() < Y.getVerboseDescription(); if (X.getShortDescription() != Y.getShortDescription()) return X.getShortDescription() < Y.getShortDescription(); - auto CompareDecls = [&XL](const Decl *D1, const Decl *D2) -> Optional<bool> { + auto CompareDecls = [&XL](const Decl *D1, + const Decl *D2) -> std::optional<bool> { if (D1 == D2) - return None; + return std::nullopt; if (!D1) return true; if (!D2) @@ -379,7 +379,7 @@ static bool compare(const PathDiagnostic &X, const PathDiagnostic &Y) { return compareCrossTUSourceLocs(FullSourceLoc(D1L, SM), FullSourceLoc(D2L, SM)); } - return None; + return std::nullopt; }; if (auto Result = CompareDecls(X.getDeclWithIssue(), Y.getDeclWithIssue())) return *Result; @@ -395,9 +395,7 @@ static bool compare(const PathDiagnostic &X, const PathDiagnostic &Y) { if (*XI != *YI) return (*XI) < (*YI); } - Optional<bool> b = comparePath(X.path, Y.path); - assert(b.hasValue()); - return b.getValue(); + return *comparePath(X.path, Y.path); } void PathDiagnosticConsumer::FlushDiagnostics( @@ -434,8 +432,8 @@ void PathDiagnosticConsumer::FlushDiagnostics( } PathDiagnosticConsumer::FilesMade::~FilesMade() { - for (PDFileEntry &Entry : Set) - Entry.~PDFileEntry(); + for (auto It = Set.begin(); It != Set.end();) + (It++)->~PDFileEntry(); } void PathDiagnosticConsumer::FilesMade::addDiagnostic(const PathDiagnostic &PD, @@ -567,6 +565,7 @@ getLocationForCaller(const StackFrameContext *SFC, } case CFGElement::ScopeBegin: case CFGElement::ScopeEnd: + case CFGElement::CleanupFunction: llvm_unreachable("not yet implemented!"); case CFGElement::LifetimeEnds: case CFGElement::LoopExit: @@ -586,6 +585,7 @@ PathDiagnosticLocation PathDiagnosticLocation::createBegin(const Stmt *S, const SourceManager &SM, LocationOrAnalysisDeclContext LAC) { + assert(S && "Statement cannot be null"); return PathDiagnosticLocation(getValidSourceLocation(S, LAC), SM, SingleLocK); } @@ -665,7 +665,7 @@ PathDiagnosticLocation PathDiagnosticLocation::create(const ProgramPoint& P, const SourceManager &SMng) { const Stmt* S = nullptr; - if (Optional<BlockEdge> BE = P.getAs<BlockEdge>()) { + if (std::optional<BlockEdge> BE = P.getAs<BlockEdge>()) { const CFGBlock *BSrc = BE->getSrc(); if (BSrc->getTerminator().isVirtualBaseBranch()) { // TODO: VirtualBaseBranches should also appear for destructors. @@ -685,22 +685,23 @@ PathDiagnosticLocation::create(const ProgramPoint& P, P.getLocationContext()->getDecl(), SMng); } } - } else if (Optional<StmtPoint> SP = P.getAs<StmtPoint>()) { + } else if (std::optional<StmtPoint> SP = P.getAs<StmtPoint>()) { S = SP->getStmt(); if (P.getAs<PostStmtPurgeDeadSymbols>()) return PathDiagnosticLocation::createEnd(S, SMng, P.getLocationContext()); - } else if (Optional<PostInitializer> PIP = P.getAs<PostInitializer>()) { + } else if (std::optional<PostInitializer> PIP = P.getAs<PostInitializer>()) { return PathDiagnosticLocation(PIP->getInitializer()->getSourceLocation(), SMng); - } else if (Optional<PreImplicitCall> PIC = P.getAs<PreImplicitCall>()) { + } else if (std::optional<PreImplicitCall> PIC = P.getAs<PreImplicitCall>()) { return PathDiagnosticLocation(PIC->getLocation(), SMng); - } else if (Optional<PostImplicitCall> PIE = P.getAs<PostImplicitCall>()) { + } else if (std::optional<PostImplicitCall> PIE = + P.getAs<PostImplicitCall>()) { return PathDiagnosticLocation(PIE->getLocation(), SMng); - } else if (Optional<CallEnter> CE = P.getAs<CallEnter>()) { + } else if (std::optional<CallEnter> CE = P.getAs<CallEnter>()) { return getLocationForCaller(CE->getCalleeContext(), CE->getLocationContext(), SMng); - } else if (Optional<CallExitEnd> CEE = P.getAs<CallExitEnd>()) { + } else if (std::optional<CallExitEnd> CEE = P.getAs<CallExitEnd>()) { return getLocationForCaller(CEE->getCalleeContext(), CEE->getLocationContext(), SMng); @@ -710,8 +711,8 @@ PathDiagnosticLocation::create(const ProgramPoint& P, CEB->getLocationContext()); return PathDiagnosticLocation( CEB->getLocationContext()->getDecl()->getSourceRange().getEnd(), SMng); - } else if (Optional<BlockEntrance> BE = P.getAs<BlockEntrance>()) { - if (Optional<CFGElement> BlockFront = BE->getFirstElement()) { + } else if (std::optional<BlockEntrance> BE = P.getAs<BlockEntrance>()) { + if (std::optional<CFGElement> BlockFront = BE->getFirstElement()) { if (auto StmtElt = BlockFront->getAs<CFGStmt>()) { return PathDiagnosticLocation(StmtElt->getStmt()->getBeginLoc(), SMng); } else if (auto NewAllocElt = BlockFront->getAs<CFGNewAllocator>()) { @@ -723,7 +724,8 @@ PathDiagnosticLocation::create(const ProgramPoint& P, return PathDiagnosticLocation( BE->getBlock()->getTerminatorStmt()->getBeginLoc(), SMng); - } else if (Optional<FunctionExitPoint> FE = P.getAs<FunctionExitPoint>()) { + } else if (std::optional<FunctionExitPoint> FE = + P.getAs<FunctionExitPoint>()) { return PathDiagnosticLocation(FE->getStmt(), SMng, FE->getLocationContext()); } else { @@ -898,7 +900,7 @@ static void describeTemplateParameter(raw_ostream &Out, if (TArg.getKind() == TemplateArgument::ArgKind::Pack) { describeTemplateParameters(Out, TArg.getPackAsArray(), LO); } else { - TArg.print(PrintingPolicy(LO), Out); + TArg.print(PrintingPolicy(LO), Out, /*IncludeType*/ true); } } diff --git a/contrib/llvm-project/clang/lib/Analysis/ReachableCode.cpp b/contrib/llvm-project/clang/lib/Analysis/ReachableCode.cpp index 221d137dadb8..acbe1470b389 100644 --- a/contrib/llvm-project/clang/lib/Analysis/ReachableCode.cpp +++ b/contrib/llvm-project/clang/lib/Analysis/ReachableCode.cpp @@ -12,10 +12,12 @@ //===----------------------------------------------------------------------===// #include "clang/Analysis/Analyses/ReachableCode.h" +#include "clang/AST/Attr.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/ExprObjC.h" #include "clang/AST/ParentMap.h" +#include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/StmtCXX.h" #include "clang/Analysis/AnalysisDeclContext.h" #include "clang/Analysis/CFG.h" @@ -24,6 +26,7 @@ #include "clang/Lex/Preprocessor.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" +#include <optional> using namespace clang; @@ -73,7 +76,7 @@ static bool isBuiltinAssumeFalse(const CFGBlock *B, const Stmt *S, // (e.g. a CFGBlock containing only a goto). return false; } - if (Optional<CFGStmt> CS = B->back().getAs<CFGStmt>()) { + if (std::optional<CFGStmt> CS = B->back().getAs<CFGStmt>()) { if (const auto *CE = dyn_cast<CallExpr>(CS->getStmt())) { return CE->getCallee()->IgnoreCasts() == S && CE->isBuiltinAssumeFalse(C); } @@ -87,10 +90,8 @@ static bool isDeadReturn(const CFGBlock *B, const Stmt *S) { // block, or may be in a subsequent block because of destructors. const CFGBlock *Current = B; while (true) { - for (CFGBlock::const_reverse_iterator I = Current->rbegin(), - E = Current->rend(); - I != E; ++I) { - if (Optional<CFGStmt> CS = I->getAs<CFGStmt>()) { + for (const CFGElement &CE : llvm::reverse(*Current)) { + if (std::optional<CFGStmt> CS = CE.getAs<CFGStmt>()) { if (const ReturnStmt *RS = dyn_cast<ReturnStmt>(CS->getStmt())) { if (RS == S) return true; @@ -220,14 +221,15 @@ static bool isConfigurationValue(const Stmt *S, return isConfigurationValue(cast<DeclRefExpr>(S)->getDecl(), PP); case Stmt::ObjCBoolLiteralExprClass: IgnoreYES_NO = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case Stmt::CXXBoolLiteralExprClass: case Stmt::IntegerLiteralClass: { const Expr *E = cast<Expr>(S); if (IncludeIntegers) { if (SilenceableCondVal && !SilenceableCondVal->getBegin().isValid()) *SilenceableCondVal = E->getSourceRange(); - return WrappedInParens || isExpandedFromConfigurationMacro(E, PP, IgnoreYES_NO); + return WrappedInParens || + isExpandedFromConfigurationMacro(E, PP, IgnoreYES_NO); } return false; } @@ -300,6 +302,12 @@ static bool shouldTreatSuccessorsAsReachable(const CFGBlock *B, if (isa<BinaryOperator>(Term)) { return isConfigurationValue(Term, PP); } + // Do not treat constexpr if statement successors as unreachable in warnings + // since the point of these statements is to determine branches at compile + // time. + if (const auto *IS = dyn_cast<IfStmt>(Term); + IS != nullptr && IS->isConstexpr()) + return true; } const Stmt *Cond = B->getTerminatorCondition(/* stripParens */ false); @@ -334,7 +342,7 @@ static unsigned scanFromBlock(const CFGBlock *Start, // This allows us to potentially uncover some "always unreachable" code // within the "sometimes unreachable" code. // Look at the successors and mark then reachable. - Optional<bool> TreatAllSuccessorsAsReachable; + std::optional<bool> TreatAllSuccessorsAsReachable; if (!IncludeSometimesUnreachableEdges) TreatAllSuccessorsAsReachable = false; @@ -346,13 +354,13 @@ static unsigned scanFromBlock(const CFGBlock *Start, if (!UB) break; - if (!TreatAllSuccessorsAsReachable.hasValue()) { + if (!TreatAllSuccessorsAsReachable) { assert(PP); TreatAllSuccessorsAsReachable = shouldTreatSuccessorsAsReachable(item, *PP); } - if (TreatAllSuccessorsAsReachable.getValue()) { + if (*TreatAllSuccessorsAsReachable) { B = UB; break; } @@ -446,26 +454,68 @@ bool DeadCodeScan::isDeadCodeRoot(const clang::CFGBlock *Block) { return isDeadRoot; } -static bool isValidDeadStmt(const Stmt *S) { +// Check if the given `DeadStmt` is a coroutine statement and is a substmt of +// the coroutine statement. `Block` is the CFGBlock containing the `DeadStmt`. +static bool isInCoroutineStmt(const Stmt *DeadStmt, const CFGBlock *Block) { + // The coroutine statement, co_return, co_await, or co_yield. + const Stmt *CoroStmt = nullptr; + // Find the first coroutine statement after the DeadStmt in the block. + bool AfterDeadStmt = false; + for (CFGBlock::const_iterator I = Block->begin(), E = Block->end(); I != E; + ++I) + if (std::optional<CFGStmt> CS = I->getAs<CFGStmt>()) { + const Stmt *S = CS->getStmt(); + if (S == DeadStmt) + AfterDeadStmt = true; + if (AfterDeadStmt && + // For simplicity, we only check simple coroutine statements. + (llvm::isa<CoreturnStmt>(S) || llvm::isa<CoroutineSuspendExpr>(S))) { + CoroStmt = S; + break; + } + } + if (!CoroStmt) + return false; + struct Checker : RecursiveASTVisitor<Checker> { + const Stmt *DeadStmt; + bool CoroutineSubStmt = false; + Checker(const Stmt *S) : DeadStmt(S) {} + bool VisitStmt(const Stmt *S) { + if (S == DeadStmt) + CoroutineSubStmt = true; + return true; + } + // Statements captured in the CFG can be implicit. + bool shouldVisitImplicitCode() const { return true; } + }; + Checker checker(DeadStmt); + checker.TraverseStmt(const_cast<Stmt *>(CoroStmt)); + return checker.CoroutineSubStmt; +} + +static bool isValidDeadStmt(const Stmt *S, const clang::CFGBlock *Block) { if (S->getBeginLoc().isInvalid()) return false; if (const BinaryOperator *BO = dyn_cast<BinaryOperator>(S)) return BO->getOpcode() != BO_Comma; - return true; + // Coroutine statements are never considered dead statements, because removing + // them may change the function semantic if it is the only coroutine statement + // of the coroutine. + return !isInCoroutineStmt(S, Block); } const Stmt *DeadCodeScan::findDeadCode(const clang::CFGBlock *Block) { for (CFGBlock::const_iterator I = Block->begin(), E = Block->end(); I!=E; ++I) - if (Optional<CFGStmt> CS = I->getAs<CFGStmt>()) { + if (std::optional<CFGStmt> CS = I->getAs<CFGStmt>()) { const Stmt *S = CS->getStmt(); - if (isValidDeadStmt(S)) + if (isValidDeadStmt(S, Block)) return S; } CFGTerminator T = Block->getTerminator(); if (T.isStmtBranch()) { const Stmt *S = T.getStmt(); - if (S && isValidDeadStmt(S)) + if (S && isValidDeadStmt(S, Block)) return S; } @@ -530,12 +580,11 @@ unsigned DeadCodeScan::scanBackwards(const clang::CFGBlock *Start, // earliest location. if (!DeferredLocs.empty()) { llvm::array_pod_sort(DeferredLocs.begin(), DeferredLocs.end(), SrcCmp); - for (DeferredLocsTy::iterator I = DeferredLocs.begin(), - E = DeferredLocs.end(); I != E; ++I) { - const CFGBlock *Block = I->first; + for (const auto &I : DeferredLocs) { + const CFGBlock *Block = I.first; if (Reachable[Block->getBlockID()]) continue; - reportDeadCode(Block, I->second, CB); + reportDeadCode(Block, I.second, CB); count += scanMaybeReachableFromBlock(Block, PP, Reachable); } } @@ -624,6 +673,10 @@ void DeadCodeScan::reportDeadCode(const CFGBlock *B, UK = reachable_code::UK_Return; } + const auto *AS = dyn_cast<AttributedStmt>(S); + bool HasFallThroughAttr = + AS && hasSpecificAttr<FallThroughAttr>(AS->getAttrs()); + SourceRange SilenceableCondVal; if (UK == reachable_code::UK_Other) { @@ -640,8 +693,9 @@ void DeadCodeScan::reportDeadCode(const CFGBlock *B, R2 = Inc->getSourceRange(); } - CB.HandleUnreachable(reachable_code::UK_Loop_Increment, - Loc, SourceRange(), SourceRange(Loc, Loc), R2); + CB.HandleUnreachable(reachable_code::UK_Loop_Increment, Loc, + SourceRange(), SourceRange(Loc, Loc), R2, + HasFallThroughAttr); return; } @@ -660,7 +714,7 @@ void DeadCodeScan::reportDeadCode(const CFGBlock *B, SourceRange R1, R2; SourceLocation Loc = GetUnreachableLoc(S, R1, R2); - CB.HandleUnreachable(UK, Loc, SilenceableCondVal, R1, R2); + CB.HandleUnreachable(UK, Loc, SilenceableCondVal, R1, R2, HasFallThroughAttr); } //===----------------------------------------------------------------------===// @@ -694,18 +748,15 @@ void FindUnreachableCode(AnalysisDeclContext &AC, Preprocessor &PP, // If there aren't explicit EH edges, we should include the 'try' dispatch // blocks as roots. if (!AC.getCFGBuildOptions().AddEHEdges) { - for (CFG::try_block_iterator I = cfg->try_blocks_begin(), - E = cfg->try_blocks_end() ; I != E; ++I) { - numReachable += scanMaybeReachableFromBlock(*I, PP, reachable); - } + for (const CFGBlock *B : cfg->try_blocks()) + numReachable += scanMaybeReachableFromBlock(B, PP, reachable); if (numReachable == cfg->getNumBlockIDs()) return; } // There are some unreachable blocks. We need to find the root blocks that // contain code that should be considered unreachable. - for (CFG::iterator I = cfg->begin(), E = cfg->end(); I != E; ++I) { - const CFGBlock *block = *I; + for (const CFGBlock *block : *cfg) { // A block may have been marked reachable during this loop. if (reachable[block->getBlockID()]) continue; diff --git a/contrib/llvm-project/clang/lib/Analysis/RetainSummaryManager.cpp b/contrib/llvm-project/clang/lib/Analysis/RetainSummaryManager.cpp index 9f45a8efe546..8d279d969b61 100644 --- a/contrib/llvm-project/clang/lib/Analysis/RetainSummaryManager.cpp +++ b/contrib/llvm-project/clang/lib/Analysis/RetainSummaryManager.cpp @@ -19,6 +19,7 @@ #include "clang/AST/DeclObjC.h" #include "clang/AST/ParentMap.h" #include "clang/ASTMatchers/ASTMatchFinder.h" +#include <optional> using namespace clang; using namespace ento; @@ -32,7 +33,7 @@ constexpr static bool isOneOf() { /// rest of varargs. template <class T, class P, class... ToCompare> constexpr static bool isOneOf() { - return std::is_same<T, P>::value || isOneOf<T, ToCompare...>(); + return std::is_same_v<T, P> || isOneOf<T, ToCompare...>(); } namespace { @@ -65,13 +66,13 @@ struct GeneralizedConsumedAttr { } template <class T> -Optional<ObjKind> RetainSummaryManager::hasAnyEnabledAttrOf(const Decl *D, - QualType QT) { +std::optional<ObjKind> RetainSummaryManager::hasAnyEnabledAttrOf(const Decl *D, + QualType QT) { ObjKind K; if (isOneOf<T, CFConsumedAttr, CFReturnsRetainedAttr, CFReturnsNotRetainedAttr>()) { if (!TrackObjCAndCFObjects) - return None; + return std::nullopt; K = ObjKind::CF; } else if (isOneOf<T, NSConsumedAttr, NSConsumesSelfAttr, @@ -79,19 +80,19 @@ Optional<ObjKind> RetainSummaryManager::hasAnyEnabledAttrOf(const Decl *D, NSReturnsNotRetainedAttr, NSConsumesSelfAttr>()) { if (!TrackObjCAndCFObjects) - return None; + return std::nullopt; if (isOneOf<T, NSReturnsRetainedAttr, NSReturnsAutoreleasedAttr, NSReturnsNotRetainedAttr>() && !cocoa::isCocoaObjectRef(QT)) - return None; + return std::nullopt; K = ObjKind::ObjC; } else if (isOneOf<T, OSConsumedAttr, OSConsumesThisAttr, OSReturnsNotRetainedAttr, OSReturnsRetainedAttr, OSReturnsRetainedOnZeroAttr, OSReturnsRetainedOnNonZeroAttr>()) { if (!TrackOSObjects) - return None; + return std::nullopt; K = ObjKind::OS; } else if (isOneOf<T, GeneralizedReturnsNotRetainedAttr, GeneralizedReturnsRetainedAttr, @@ -102,12 +103,12 @@ Optional<ObjKind> RetainSummaryManager::hasAnyEnabledAttrOf(const Decl *D, } if (D->hasAttr<T>()) return K; - return None; + return std::nullopt; } template <class T1, class T2, class... Others> -Optional<ObjKind> RetainSummaryManager::hasAnyEnabledAttrOf(const Decl *D, - QualType QT) { +std::optional<ObjKind> RetainSummaryManager::hasAnyEnabledAttrOf(const Decl *D, + QualType QT) { if (auto Out = hasAnyEnabledAttrOf<T1>(D, QT)) return Out; return hasAnyEnabledAttrOf<T2, Others...>(D, QT); @@ -145,16 +146,20 @@ static bool isSubclass(const Decl *D, return !(match(SubclassM, *D, D->getASTContext()).empty()); } -static bool isOSObjectSubclass(const Decl *D) { - // OSSymbols are particular OSObjects that are allocated globally - // and therefore aren't really refcounted, so we ignore them. - return D && isSubclass(D, "OSMetaClassBase") && !isSubclass(D, "OSSymbol"); +static bool isExactClass(const Decl *D, StringRef ClassName) { + using namespace ast_matchers; + DeclarationMatcher sameClassM = + cxxRecordDecl(hasName(std::string(ClassName))); + return !(match(sameClassM, *D, D->getASTContext()).empty()); } -static bool isOSObjectDynamicCast(StringRef S) { - return S == "safeMetaCast"; +static bool isOSObjectSubclass(const Decl *D) { + return D && isSubclass(D, "OSMetaClassBase") && + !isExactClass(D, "OSMetaClass"); } +static bool isOSObjectDynamicCast(StringRef S) { return S == "safeMetaCast"; } + static bool isOSObjectRequiredCast(StringRef S) { return S == "requiredMetaCast"; } @@ -169,7 +174,7 @@ static bool isOSObjectPtr(QualType QT) { } static bool isISLObjectRef(QualType Ty) { - return StringRef(Ty.getAsString()).startswith("isl_"); + return StringRef(Ty.getAsString()).starts_with("isl_"); } static bool isOSIteratorSubclass(const Decl *D) { @@ -185,20 +190,22 @@ static bool hasRCAnnotation(const Decl *D, StringRef rcAnnotation) { } static bool isRetain(const FunctionDecl *FD, StringRef FName) { - return FName.startswith_lower("retain") || FName.endswith_lower("retain"); + return FName.starts_with_insensitive("retain") || + FName.ends_with_insensitive("retain"); } static bool isRelease(const FunctionDecl *FD, StringRef FName) { - return FName.startswith_lower("release") || FName.endswith_lower("release"); + return FName.starts_with_insensitive("release") || + FName.ends_with_insensitive("release"); } static bool isAutorelease(const FunctionDecl *FD, StringRef FName) { - return FName.startswith_lower("autorelease") || - FName.endswith_lower("autorelease"); + return FName.starts_with_insensitive("autorelease") || + FName.ends_with_insensitive("autorelease"); } static bool isMakeCollectable(StringRef FName) { - return FName.contains_lower("MakeCollectable"); + return FName.contains_insensitive("MakeCollectable"); } /// A function is OSObject related if it is declared on a subclass @@ -248,13 +255,13 @@ RetainSummaryManager::getSummaryForOSObject(const FunctionDecl *FD, // TODO: Add support for the slightly common *Matching(table) idiom. // Cf. IOService::nameMatching() etc. - these function have an unusual // contract of returning at +0 or +1 depending on their last argument. - if (FName.endswith("Matching")) { + if (FName.ends_with("Matching")) { return getPersistentStopSummary(); } // All objects returned with functions *not* starting with 'get', // or iterators, are returned at +1. - if ((!FName.startswith("get") && !FName.startswith("Get")) || + if ((!FName.starts_with("get") && !FName.starts_with("Get")) || isOSIteratorSubclass(PD)) { return getOSSummaryCreateRule(FD); } else { @@ -294,8 +301,9 @@ const RetainSummary *RetainSummaryManager::getSummaryForObjCOrCFObject( std::string RetTyName = RetTy.getAsString(); if (FName == "pthread_create" || FName == "pthread_setspecific") { - // Part of: <rdar://problem/7299394> and <rdar://problem/11282706>. - // This will be addressed better with IPA. + // It's not uncommon to pass a tracked object into the thread + // as 'void *arg', and then release it inside the thread. + // FIXME: We could build a much more precise model for these functions. return getPersistentStopSummary(); } else if(FName == "NSMakeCollectable") { // Handle: id NSMakeCollectable(CFTypeRef) @@ -304,7 +312,8 @@ const RetainSummary *RetainSummaryManager::getSummaryForObjCOrCFObject( : getPersistentStopSummary(); } else if (FName == "CMBufferQueueDequeueAndRetain" || FName == "CMBufferQueueDequeueIfDataReadyAndRetain") { - // Part of: <rdar://problem/39390714>. + // These API functions are known to NOT act as a CFRetain wrapper. + // They simply make a new object owned by the caller. return getPersistentSummary(RetEffect::MakeOwned(ObjKind::CF), ScratchArgs, ArgEffect(DoNothing), @@ -317,40 +326,39 @@ const RetainSummary *RetainSummaryManager::getSummaryForObjCOrCFObject( FName == "IOServiceNameMatching" || FName == "IORegistryEntryIDMatching" || FName == "IOOpenFirmwarePathMatching"))) { - // Part of <rdar://problem/6961230>. (IOKit) - // This should be addressed using a API table. + // Yes, these IOKit functions return CF objects. + // They also violate the CF naming convention. return getPersistentSummary(RetEffect::MakeOwned(ObjKind::CF), ScratchArgs, ArgEffect(DoNothing), ArgEffect(DoNothing)); } else if (FName == "IOServiceGetMatchingService" || FName == "IOServiceGetMatchingServices") { - // FIXES: <rdar://problem/6326900> - // This should be addressed using a API table. This strcmp is also - // a little gross, but there is no need to super optimize here. + // These IOKit functions accept CF objects as arguments. + // They also consume them without an appropriate annotation. ScratchArgs = AF.add(ScratchArgs, 1, ArgEffect(DecRef, ObjKind::CF)); return getPersistentSummary(RetEffect::MakeNoRet(), ScratchArgs, ArgEffect(DoNothing), ArgEffect(DoNothing)); } else if (FName == "IOServiceAddNotification" || FName == "IOServiceAddMatchingNotification") { - // Part of <rdar://problem/6961230>. (IOKit) - // This should be addressed using a API table. + // More IOKit functions suddenly accepting (and even more suddenly, + // consuming) CF objects. ScratchArgs = AF.add(ScratchArgs, 2, ArgEffect(DecRef, ObjKind::CF)); return getPersistentSummary(RetEffect::MakeNoRet(), ScratchArgs, ArgEffect(DoNothing), ArgEffect(DoNothing)); } else if (FName == "CVPixelBufferCreateWithBytes") { - // FIXES: <rdar://problem/7283567> // Eventually this can be improved by recognizing that the pixel // buffer passed to CVPixelBufferCreateWithBytes is released via // a callback and doing full IPA to make sure this is done correctly. - // FIXME: This function has an out parameter that returns an + // Note that it's passed as a 'void *', so it's hard to annotate. + // FIXME: This function also has an out parameter that returns an // allocated object. ScratchArgs = AF.add(ScratchArgs, 7, ArgEffect(StopTracking)); return getPersistentSummary(RetEffect::MakeNoRet(), ScratchArgs, ArgEffect(DoNothing), ArgEffect(DoNothing)); } else if (FName == "CGBitmapContextCreateWithData") { - // FIXES: <rdar://problem/7358899> + // This is similar to the CVPixelBufferCreateWithBytes situation above. // Eventually this can be improved by recognizing that 'releaseInfo' // passed to CGBitmapContextCreateWithData is released via // a callback and doing full IPA to make sure this is done correctly. @@ -358,17 +366,14 @@ const RetainSummary *RetainSummaryManager::getSummaryForObjCOrCFObject( return getPersistentSummary(RetEffect::MakeOwned(ObjKind::CF), ScratchArgs, ArgEffect(DoNothing), ArgEffect(DoNothing)); } else if (FName == "CVPixelBufferCreateWithPlanarBytes") { - // FIXES: <rdar://problem/7283567> - // Eventually this can be improved by recognizing that the pixel - // buffer passed to CVPixelBufferCreateWithPlanarBytes is released - // via a callback and doing full IPA to make sure this is done - // correctly. + // Same as CVPixelBufferCreateWithBytes, just more arguments. ScratchArgs = AF.add(ScratchArgs, 12, ArgEffect(StopTracking)); return getPersistentSummary(RetEffect::MakeNoRet(), ScratchArgs, ArgEffect(DoNothing), ArgEffect(DoNothing)); - } else if (FName == "VTCompressionSessionEncodeFrame") { - // The context argument passed to VTCompressionSessionEncodeFrame() + } else if (FName == "VTCompressionSessionEncodeFrame" || + FName == "VTCompressionSessionEncodeMultiImageFrame") { + // The context argument passed to VTCompressionSessionEncodeFrame() et.al. // is passed to the callback specified when creating the session // (e.g. with VTCompressionSessionCreate()) which can release it. // To account for this possibility, conservatively stop tracking @@ -379,22 +384,19 @@ const RetainSummary *RetainSummaryManager::getSummaryForObjCOrCFObject( ArgEffect(DoNothing), ArgEffect(DoNothing)); } else if (FName == "dispatch_set_context" || FName == "xpc_connection_set_context") { - // <rdar://problem/11059275> - The analyzer currently doesn't have - // a good way to reason about the finalizer function for libdispatch. + // The analyzer currently doesn't have a good way to reason about + // dispatch_set_finalizer_f() which typically cleans up the context. // If we pass a context object that is memory managed, stop tracking it. - // <rdar://problem/13783514> - Same problem, but for XPC. - // FIXME: this hack should possibly go away once we can handle - // libdispatch and XPC finalizers. + // Same with xpc_connection_set_finalizer_f(). ScratchArgs = AF.add(ScratchArgs, 1, ArgEffect(StopTracking)); return getPersistentSummary(RetEffect::MakeNoRet(), ScratchArgs, ArgEffect(DoNothing), ArgEffect(DoNothing)); - } else if (FName.startswith("NSLog")) { + } else if (FName.starts_with("NSLog")) { return getDoNothingSummary(); - } else if (FName.startswith("NS") && - (FName.find("Insert") != StringRef::npos)) { - // Whitelist NSXXInsertXX, for example NSMapInsertIfAbsent, since they can - // be deallocated by NSMapRemove. (radar://11152419) + } else if (FName.starts_with("NS") && FName.contains("Insert")) { + // Allowlist NSXXInsertXX, for example NSMapInsertIfAbsent, since they can + // be deallocated by NSMapRemove. ScratchArgs = AF.add(ScratchArgs, 1, ArgEffect(StopTracking)); ScratchArgs = AF.add(ScratchArgs, 2, ArgEffect(StopTracking)); return getPersistentSummary(RetEffect::MakeNoRet(), @@ -451,9 +453,9 @@ const RetainSummary *RetainSummaryManager::getSummaryForObjCOrCFObject( // Check for release functions, the only kind of functions that we care // about that don't return a pointer type. - if (FName.startswith("CG") || FName.startswith("CF")) { + if (FName.starts_with("CG") || FName.starts_with("CF")) { // Test for 'CGCF'. - FName = FName.substr(FName.startswith("CGCF") ? 4 : 2); + FName = FName.substr(FName.starts_with("CGCF") ? 4 : 2); if (isRelease(FD, FName)) return getUnarySummary(FT, DecRef); @@ -713,13 +715,13 @@ bool RetainSummaryManager::isTrustedReferenceCountImplementation( return hasRCAnnotation(FD, "rc_ownership_trusted_implementation"); } -Optional<RetainSummaryManager::BehaviorSummary> +std::optional<RetainSummaryManager::BehaviorSummary> RetainSummaryManager::canEval(const CallExpr *CE, const FunctionDecl *FD, bool &hasTrustedImplementationAnnotation) { IdentifierInfo *II = FD->getIdentifier(); if (!II) - return None; + return std::nullopt; StringRef FName = II->getName(); FName = FName.substr(FName.find_first_not_of('_')); @@ -734,9 +736,9 @@ RetainSummaryManager::canEval(const CallExpr *CE, const FunctionDecl *FD, // It's okay to be a little sloppy here. if (FName == "CMBufferQueueDequeueAndRetain" || FName == "CMBufferQueueDequeueIfDataReadyAndRetain") { - // Part of: <rdar://problem/39390714>. - // These are not retain. They just return something and retain it. - return None; + // These API functions are known to NOT act as a CFRetain wrapper. + // They simply make a new object owned by the caller. + return std::nullopt; } if (CE->getNumArgs() == 1 && (cocoa::isRefType(ResultTy, "CF", FName) || @@ -776,7 +778,7 @@ RetainSummaryManager::canEval(const CallExpr *CE, const FunctionDecl *FD, return BehaviorSummary::NoOp; } - return None; + return std::nullopt; } const RetainSummary * @@ -786,7 +788,7 @@ RetainSummaryManager::getUnarySummary(const FunctionType* FT, // Unary functions have no arg effects by definition. ArgEffects ScratchArgs(AF.getEmptyMap()); - // Sanity check that this is *really* a unary function. This can + // Verify that this is *really* a unary function. This can // happen if people do weird things. const FunctionProtoType* FTP = dyn_cast<FunctionProtoType>(FT); if (!FTP || FTP->getNumParams() != 1) @@ -859,7 +861,7 @@ RetainSummaryManager::getCFSummaryGetRule(const FunctionDecl *FD) { // Summary creation for Selectors. //===----------------------------------------------------------------------===// -Optional<RetEffect> +std::optional<RetEffect> RetainSummaryManager::getRetEffectFromAnnotations(QualType RetTy, const Decl *D) { if (hasAnyEnabledAttrOf<NSReturnsRetainedAttr>(D, RetTy)) @@ -880,14 +882,14 @@ RetainSummaryManager::getRetEffectFromAnnotations(QualType RetTy, if (auto RE = getRetEffectFromAnnotations(RetTy, PD)) return RE; - return None; + return std::nullopt; } -/// \return Whether the chain of typedefs starting from {@code QT} -/// has a typedef with a given name {@code Name}. +/// \return Whether the chain of typedefs starting from @c QT +/// has a typedef with a given name @c Name. static bool hasTypedefNamed(QualType QT, StringRef Name) { - while (auto *T = dyn_cast<TypedefType>(QT)) { + while (auto *T = QT->getAs<TypedefType>()) { const auto &Context = T->getDecl()->getASTContext(); if (T->getDecl()->getIdentifier() == &Context.Idents.get(Name)) return true; @@ -985,7 +987,7 @@ RetainSummaryManager::updateSummaryFromAnnotations(const RetainSummary *&Summ, applyParamAnnotationEffect(*pi, parm_idx, FD, Template); QualType RetTy = FD->getReturnType(); - if (Optional<RetEffect> RetE = getRetEffectFromAnnotations(RetTy, FD)) + if (std::optional<RetEffect> RetE = getRetEffectFromAnnotations(RetTy, FD)) Template->setRetEffect(*RetE); if (hasAnyEnabledAttrOf<OSConsumesThisAttr>(FD, RetTy)) @@ -1012,7 +1014,7 @@ RetainSummaryManager::updateSummaryFromAnnotations(const RetainSummary *&Summ, applyParamAnnotationEffect(*pi, parm_idx, MD, Template); QualType RetTy = MD->getReturnType(); - if (Optional<RetEffect> RetE = getRetEffectFromAnnotations(RetTy, MD)) + if (std::optional<RetEffect> RetE = getRetEffectFromAnnotations(RetTy, MD)) Template->setRetEffect(*RetE); } @@ -1096,7 +1098,7 @@ RetainSummaryManager::getStandardMethodSummary(const ObjCMethodDecl *MD, if (S.isKeywordSelector()) { for (unsigned i = 0, e = S.getNumArgs(); i != e; ++i) { StringRef Slot = S.getNameForSlot(i); - if (Slot.substr(Slot.size() - 8).equals_lower("delegate")) { + if (Slot.ends_with_insensitive("delegate")) { if (ResultEff == ObjCInitRetE) ResultEff = RetEffect::MakeNoRetHard(); else @@ -1237,8 +1239,6 @@ void RetainSummaryManager::InitializeMethodSummaries() { // FIXME: For now we opt for false negatives with NSWindow, as these objects // self-own themselves. However, they only do this once they are displayed. // Thus, we need to track an NSWindow's display status. - // This is tracked in <rdar://problem/6062711>. - // See also http://llvm.org/bugs/show_bug.cgi?id=3714. const RetainSummary *NoTrackYet = getPersistentSummary(RetEffect::MakeNoRet(), ScratchArgs, ArgEffect(StopTracking), ArgEffect(StopTracking)); @@ -1253,7 +1253,6 @@ void RetainSummaryManager::InitializeMethodSummaries() { // For NSNull, objects returned by +null are singletons that ignore // retain/release semantics. Just don't track them. - // <rdar://problem/12858915> addClassMethSummary("NSNull", "null", NoTrackYet); // Don't track allocated autorelease pools, as it is okay to prematurely diff --git a/contrib/llvm-project/clang/lib/Analysis/ThreadSafety.cpp b/contrib/llvm-project/clang/lib/Analysis/ThreadSafety.cpp index 21583e92c72d..e25b843c9bf8 100644 --- a/contrib/llvm-project/clang/lib/Analysis/ThreadSafety.cpp +++ b/contrib/llvm-project/clang/lib/Analysis/ThreadSafety.cpp @@ -40,8 +40,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/ImmutableMap.h" -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" @@ -54,6 +52,7 @@ #include <functional> #include <iterator> #include <memory> +#include <optional> #include <string> #include <type_traits> #include <utility> @@ -75,7 +74,7 @@ static void warnInvalidLock(ThreadSafetyHandler &Handler, // FIXME: add a note about the attribute location in MutexExp or D if (Loc.isValid()) - Handler.handleInvalidLockExp(Kind, Loc); + Handler.handleInvalidLockExp(Loc); } namespace { @@ -86,11 +85,9 @@ class CapExprSet : public SmallVector<CapabilityExpr, 4> { public: /// Push M onto list, but discard duplicates. void push_back_nodup(const CapabilityExpr &CapE) { - iterator It = std::find_if(begin(), end(), - [=](const CapabilityExpr &CapE2) { - return CapE.equals(CapE2); - }); - if (It == end()) + if (llvm::none_of(*this, [=](const CapabilityExpr &CapE2) { + return CapE.equals(CapE2); + })) push_back(CapE); } }; @@ -105,44 +102,49 @@ class FactSet; /// /// FIXME: this analysis does not currently support re-entrant locking. class FactEntry : public CapabilityExpr { +public: + /// Where a fact comes from. + enum SourceKind { + Acquired, ///< The fact has been directly acquired. + Asserted, ///< The fact has been asserted to be held. + Declared, ///< The fact is assumed to be held by callers. + Managed, ///< The fact has been acquired through a scoped capability. + }; + private: /// Exclusive or shared. - LockKind LKind; + LockKind LKind : 8; + + // How it was acquired. + SourceKind Source : 8; /// Where it was acquired. SourceLocation AcquireLoc; - /// True if the lock was asserted. - bool Asserted; - - /// True if the lock was declared. - bool Declared; - public: FactEntry(const CapabilityExpr &CE, LockKind LK, SourceLocation Loc, - bool Asrt, bool Declrd = false) - : CapabilityExpr(CE), LKind(LK), AcquireLoc(Loc), Asserted(Asrt), - Declared(Declrd) {} + SourceKind Src) + : CapabilityExpr(CE), LKind(LK), Source(Src), AcquireLoc(Loc) {} virtual ~FactEntry() = default; LockKind kind() const { return LKind; } SourceLocation loc() const { return AcquireLoc; } - bool asserted() const { return Asserted; } - bool declared() const { return Declared; } - void setDeclared(bool D) { Declared = D; } + bool asserted() const { return Source == Asserted; } + bool declared() const { return Source == Declared; } + bool managed() const { return Source == Managed; } virtual void handleRemovalFromIntersection(const FactSet &FSet, FactManager &FactMan, SourceLocation JoinLoc, LockErrorKind LEK, ThreadSafetyHandler &Handler) const = 0; virtual void handleLock(FactSet &FSet, FactManager &FactMan, - const FactEntry &entry, ThreadSafetyHandler &Handler, - StringRef DiagKind) const = 0; + const FactEntry &entry, + ThreadSafetyHandler &Handler) const = 0; virtual void handleUnlock(FactSet &FSet, FactManager &FactMan, const CapabilityExpr &Cp, SourceLocation UnlockLoc, - bool FullyRemove, ThreadSafetyHandler &Handler, - StringRef DiagKind) const = 0; + bool FullyRemove, + ThreadSafetyHandler &Handler) const = 0; // Return true if LKind >= LK, where exclusive > shared bool isAtLeast(LockKind LK) const { @@ -400,7 +402,7 @@ public: // The map with which Exp should be interpreted. Context Ctx; - bool isReference() { return !Exp; } + bool isReference() const { return !Exp; } private: // Create ordinary variable definition @@ -415,7 +417,6 @@ public: private: Context::Factory ContextFactory; std::vector<VarDefinition> VarDefinitions; - std::vector<unsigned> CtxIndices; std::vector<std::pair<const Stmt *, Context>> SavedContexts; public: @@ -501,9 +502,8 @@ public: for (Context::iterator I = C.begin(), E = C.end(); I != E; ++I) { const NamedDecl *D = I.getKey(); D->printName(llvm::errs()); - const unsigned *i = C.lookup(D); llvm::errs() << " -> "; - dumpVarDefinitionName(*i); + dumpVarDefinitionName(I.getData()); llvm::errs() << "\n"; } } @@ -728,8 +728,6 @@ void LocalVariableMap::traverseCFG(CFG *CFGraph, std::vector<CFGBlockInfo> &BlockInfo) { PostOrderCFGView::CFGBlockSet VisitedBlocks(CFGraph); - CtxIndices.resize(CFGraph->getNumBlockIDs()); - for (const auto *CurrBlock : *SortedGraph) { unsigned CurrBlockID = CurrBlock->getBlockID(); CFGBlockInfo *CurrBlockInfo = &BlockInfo[CurrBlockID]; @@ -821,7 +819,7 @@ static void findBlockLocations(CFG *CFGraph, for (CFGBlock::const_reverse_iterator BI = CurrBlock->rbegin(), BE = CurrBlock->rend(); BI != BE; ++BI) { // FIXME: Handle other CFGElement kinds. - if (Optional<CFGStmt> CS = BI->getAs<CFGStmt>()) { + if (std::optional<CFGStmt> CS = BI->getAs<CFGStmt>()) { CurrBlockInfo->ExitLoc = CS->getStmt()->getBeginLoc(); break; } @@ -833,7 +831,7 @@ static void findBlockLocations(CFG *CFGraph, // of the first statement in the block. for (const auto &BI : *CurrBlock) { // FIXME: Handle other CFGElement kinds. - if (Optional<CFGStmt> CS = BI.getAs<CFGStmt>()) { + if (std::optional<CFGStmt> CS = BI.getAs<CFGStmt>()) { CurrBlockInfo->EntryLoc = CS->getStmt()->getBeginLoc(); break; } @@ -844,6 +842,11 @@ static void findBlockLocations(CFG *CFGraph, // location. CurrBlockInfo->EntryLoc = CurrBlockInfo->ExitLoc = BlockInfo[(*CurrBlock->pred_begin())->getBlockID()].ExitLoc; + } else if (CurrBlock->succ_size() == 1 && *CurrBlock->succ_begin()) { + // The block is empty, and has a single successor. Use its entry + // location. + CurrBlockInfo->EntryLoc = CurrBlockInfo->ExitLoc = + BlockInfo[(*CurrBlock->succ_begin())->getBlockID()].EntryLoc; } } } @@ -851,35 +854,31 @@ static void findBlockLocations(CFG *CFGraph, namespace { class LockableFactEntry : public FactEntry { -private: - /// managed by ScopedLockable object - bool Managed; - public: LockableFactEntry(const CapabilityExpr &CE, LockKind LK, SourceLocation Loc, - bool Mng = false, bool Asrt = false) - : FactEntry(CE, LK, Loc, Asrt), Managed(Mng) {} + SourceKind Src = Acquired) + : FactEntry(CE, LK, Loc, Src) {} void handleRemovalFromIntersection(const FactSet &FSet, FactManager &FactMan, SourceLocation JoinLoc, LockErrorKind LEK, ThreadSafetyHandler &Handler) const override { - if (!Managed && !asserted() && !negative() && !isUniversal()) { - Handler.handleMutexHeldEndOfScope("mutex", toString(), loc(), JoinLoc, + if (!asserted() && !negative() && !isUniversal()) { + Handler.handleMutexHeldEndOfScope(getKind(), toString(), loc(), JoinLoc, LEK); } } void handleLock(FactSet &FSet, FactManager &FactMan, const FactEntry &entry, - ThreadSafetyHandler &Handler, - StringRef DiagKind) const override { - Handler.handleDoubleLock(DiagKind, entry.toString(), loc(), entry.loc()); + ThreadSafetyHandler &Handler) const override { + Handler.handleDoubleLock(entry.getKind(), entry.toString(), loc(), + entry.loc()); } void handleUnlock(FactSet &FSet, FactManager &FactMan, const CapabilityExpr &Cp, SourceLocation UnlockLoc, - bool FullyRemove, ThreadSafetyHandler &Handler, - StringRef DiagKind) const override { + bool FullyRemove, + ThreadSafetyHandler &Handler) const override { FSet.removeLock(FactMan, Cp); if (!Cp.negative()) { FSet.addLock(FactMan, std::make_unique<LockableFactEntry>( @@ -896,25 +895,27 @@ private: UCK_ReleasedExclusive, ///< Exclusive capability that was released. }; - using UnderlyingCapability = - llvm::PointerIntPair<const til::SExpr *, 2, UnderlyingCapabilityKind>; + struct UnderlyingCapability { + CapabilityExpr Cap; + UnderlyingCapabilityKind Kind; + }; - SmallVector<UnderlyingCapability, 4> UnderlyingMutexes; + SmallVector<UnderlyingCapability, 2> UnderlyingMutexes; public: ScopedLockableFactEntry(const CapabilityExpr &CE, SourceLocation Loc) - : FactEntry(CE, LK_Exclusive, Loc, false) {} + : FactEntry(CE, LK_Exclusive, Loc, Acquired) {} void addLock(const CapabilityExpr &M) { - UnderlyingMutexes.emplace_back(M.sexpr(), UCK_Acquired); + UnderlyingMutexes.push_back(UnderlyingCapability{M, UCK_Acquired}); } void addExclusiveUnlock(const CapabilityExpr &M) { - UnderlyingMutexes.emplace_back(M.sexpr(), UCK_ReleasedExclusive); + UnderlyingMutexes.push_back(UnderlyingCapability{M, UCK_ReleasedExclusive}); } void addSharedUnlock(const CapabilityExpr &M) { - UnderlyingMutexes.emplace_back(M.sexpr(), UCK_ReleasedShared); + UnderlyingMutexes.push_back(UnderlyingCapability{M, UCK_ReleasedShared}); } void @@ -922,51 +923,45 @@ public: SourceLocation JoinLoc, LockErrorKind LEK, ThreadSafetyHandler &Handler) const override { for (const auto &UnderlyingMutex : UnderlyingMutexes) { - const auto *Entry = FSet.findLock( - FactMan, CapabilityExpr(UnderlyingMutex.getPointer(), false)); - if ((UnderlyingMutex.getInt() == UCK_Acquired && Entry) || - (UnderlyingMutex.getInt() != UCK_Acquired && !Entry)) { + const auto *Entry = FSet.findLock(FactMan, UnderlyingMutex.Cap); + if ((UnderlyingMutex.Kind == UCK_Acquired && Entry) || + (UnderlyingMutex.Kind != UCK_Acquired && !Entry)) { // If this scoped lock manages another mutex, and if the underlying // mutex is still/not held, then warn about the underlying mutex. - Handler.handleMutexHeldEndOfScope( - "mutex", sx::toString(UnderlyingMutex.getPointer()), loc(), JoinLoc, - LEK); + Handler.handleMutexHeldEndOfScope(UnderlyingMutex.Cap.getKind(), + UnderlyingMutex.Cap.toString(), loc(), + JoinLoc, LEK); } } } void handleLock(FactSet &FSet, FactManager &FactMan, const FactEntry &entry, - ThreadSafetyHandler &Handler, - StringRef DiagKind) const override { + ThreadSafetyHandler &Handler) const override { for (const auto &UnderlyingMutex : UnderlyingMutexes) { - CapabilityExpr UnderCp(UnderlyingMutex.getPointer(), false); - - if (UnderlyingMutex.getInt() == UCK_Acquired) - lock(FSet, FactMan, UnderCp, entry.kind(), entry.loc(), &Handler, - DiagKind); + if (UnderlyingMutex.Kind == UCK_Acquired) + lock(FSet, FactMan, UnderlyingMutex.Cap, entry.kind(), entry.loc(), + &Handler); else - unlock(FSet, FactMan, UnderCp, entry.loc(), &Handler, DiagKind); + unlock(FSet, FactMan, UnderlyingMutex.Cap, entry.loc(), &Handler); } } void handleUnlock(FactSet &FSet, FactManager &FactMan, const CapabilityExpr &Cp, SourceLocation UnlockLoc, - bool FullyRemove, ThreadSafetyHandler &Handler, - StringRef DiagKind) const override { + bool FullyRemove, + ThreadSafetyHandler &Handler) const override { assert(!Cp.negative() && "Managing object cannot be negative."); for (const auto &UnderlyingMutex : UnderlyingMutexes) { - CapabilityExpr UnderCp(UnderlyingMutex.getPointer(), false); - // Remove/lock the underlying mutex if it exists/is still unlocked; warn // on double unlocking/locking if we're not destroying the scoped object. ThreadSafetyHandler *TSHandler = FullyRemove ? nullptr : &Handler; - if (UnderlyingMutex.getInt() == UCK_Acquired) { - unlock(FSet, FactMan, UnderCp, UnlockLoc, TSHandler, DiagKind); + if (UnderlyingMutex.Kind == UCK_Acquired) { + unlock(FSet, FactMan, UnderlyingMutex.Cap, UnlockLoc, TSHandler); } else { - LockKind kind = UnderlyingMutex.getInt() == UCK_ReleasedShared + LockKind kind = UnderlyingMutex.Kind == UCK_ReleasedShared ? LK_Shared : LK_Exclusive; - lock(FSet, FactMan, UnderCp, kind, UnlockLoc, TSHandler, DiagKind); + lock(FSet, FactMan, UnderlyingMutex.Cap, kind, UnlockLoc, TSHandler); } } if (FullyRemove) @@ -975,21 +970,21 @@ public: private: void lock(FactSet &FSet, FactManager &FactMan, const CapabilityExpr &Cp, - LockKind kind, SourceLocation loc, ThreadSafetyHandler *Handler, - StringRef DiagKind) const { + LockKind kind, SourceLocation loc, + ThreadSafetyHandler *Handler) const { if (const FactEntry *Fact = FSet.findLock(FactMan, Cp)) { if (Handler) - Handler->handleDoubleLock(DiagKind, Cp.toString(), Fact->loc(), loc); + Handler->handleDoubleLock(Cp.getKind(), Cp.toString(), Fact->loc(), + loc); } else { FSet.removeLock(FactMan, !Cp); FSet.addLock(FactMan, - std::make_unique<LockableFactEntry>(Cp, kind, loc)); + std::make_unique<LockableFactEntry>(Cp, kind, loc, Managed)); } } void unlock(FactSet &FSet, FactManager &FactMan, const CapabilityExpr &Cp, - SourceLocation loc, ThreadSafetyHandler *Handler, - StringRef DiagKind) const { + SourceLocation loc, ThreadSafetyHandler *Handler) const { if (FSet.findLock(FactMan, Cp)) { FSet.removeLock(FactMan, Cp); FSet.addLock(FactMan, std::make_unique<LockableFactEntry>( @@ -998,7 +993,7 @@ private: SourceLocation PrevLoc; if (const FactEntry *Neg = FSet.findLock(FactMan, !Cp)) PrevLoc = Neg->loc(); - Handler->handleUnmatchedUnlock(DiagKind, Cp.toString(), loc, PrevLoc); + Handler->handleUnmatchedUnlock(Cp.getKind(), Cp.toString(), loc, PrevLoc); } } }; @@ -1013,8 +1008,10 @@ class ThreadSafetyAnalyzer { threadSafety::SExprBuilder SxBuilder; ThreadSafetyHandler &Handler; - const CXXMethodDecl *CurrentMethod; + const FunctionDecl *CurrentFunction; LocalVariableMap LocalVarMap; + // Maps constructed objects to `this` placeholder prior to initialization. + llvm::SmallDenseMap<const Expr *, til::LiteralPtr *> ConstructedObjects; FactManager FactMan; std::vector<CFGBlockInfo> BlockInfo; @@ -1027,14 +1024,13 @@ public: bool inCurrentScope(const CapabilityExpr &CapE); void addLock(FactSet &FSet, std::unique_ptr<FactEntry> Entry, - StringRef DiagKind, bool ReqAttr = false); + bool ReqAttr = false); void removeLock(FactSet &FSet, const CapabilityExpr &CapE, - SourceLocation UnlockLoc, bool FullyRemove, LockKind Kind, - StringRef DiagKind); + SourceLocation UnlockLoc, bool FullyRemove, LockKind Kind); template <typename AttrType> void getMutexIDs(CapExprSet &Mtxs, AttrType *Attr, const Expr *Exp, - const NamedDecl *D, VarDecl *SelfDecl = nullptr); + const NamedDecl *D, til::SExpr *Self = nullptr); template <class AttrType> void getMutexIDs(CapExprSet &Mtxs, AttrType *Attr, const Expr *Exp, @@ -1049,18 +1045,31 @@ public: const CFGBlock* PredBlock, const CFGBlock *CurrBlock); - void intersectAndWarn(FactSet &FSet1, const FactSet &FSet2, - SourceLocation JoinLoc, - LockErrorKind LEK1, LockErrorKind LEK2, - bool Modify=true); + bool join(const FactEntry &a, const FactEntry &b, bool CanModify); + + void intersectAndWarn(FactSet &EntrySet, const FactSet &ExitSet, + SourceLocation JoinLoc, LockErrorKind EntryLEK, + LockErrorKind ExitLEK); - void intersectAndWarn(FactSet &FSet1, const FactSet &FSet2, - SourceLocation JoinLoc, LockErrorKind LEK1, - bool Modify=true) { - intersectAndWarn(FSet1, FSet2, JoinLoc, LEK1, LEK1, Modify); + void intersectAndWarn(FactSet &EntrySet, const FactSet &ExitSet, + SourceLocation JoinLoc, LockErrorKind LEK) { + intersectAndWarn(EntrySet, ExitSet, JoinLoc, LEK, LEK); } void runAnalysis(AnalysisDeclContext &AC); + + void warnIfMutexNotHeld(const FactSet &FSet, const NamedDecl *D, + const Expr *Exp, AccessKind AK, Expr *MutexExp, + ProtectedOperationKind POK, til::LiteralPtr *Self, + SourceLocation Loc); + void warnIfMutexHeld(const FactSet &FSet, const NamedDecl *D, const Expr *Exp, + Expr *MutexExp, til::LiteralPtr *Self, + SourceLocation Loc); + + void checkAccess(const FactSet &FSet, const Expr *Exp, AccessKind AK, + ProtectedOperationKind POK); + void checkPtAccess(const FactSet &FSet, const Expr *Exp, AccessKind AK, + ProtectedOperationKind POK); }; } // namespace @@ -1168,7 +1177,7 @@ void BeforeSet::checkBeforeAfter(const ValueDecl* StartVd, } // Transitively search other before sets, and warn on cycles. if (traverse(Vdb)) { - if (CycMap.find(Vd) == CycMap.end()) { + if (!CycMap.contains(Vd)) { CycMap.insert(std::make_pair(Vd, true)); StringRef L1 = Vd->getName(); Analyzer.Handler.handleBeforeAfterCycle(L1, Vd->getLocation()); @@ -1218,53 +1227,6 @@ public: } // namespace -static StringRef ClassifyDiagnostic(const CapabilityAttr *A) { - return A->getName(); -} - -static StringRef ClassifyDiagnostic(QualType VDT) { - // We need to look at the declaration of the type of the value to determine - // which it is. The type should either be a record or a typedef, or a pointer - // or reference thereof. - if (const auto *RT = VDT->getAs<RecordType>()) { - if (const auto *RD = RT->getDecl()) - if (const auto *CA = RD->getAttr<CapabilityAttr>()) - return ClassifyDiagnostic(CA); - } else if (const auto *TT = VDT->getAs<TypedefType>()) { - if (const auto *TD = TT->getDecl()) - if (const auto *CA = TD->getAttr<CapabilityAttr>()) - return ClassifyDiagnostic(CA); - } else if (VDT->isPointerType() || VDT->isReferenceType()) - return ClassifyDiagnostic(VDT->getPointeeType()); - - return "mutex"; -} - -static StringRef ClassifyDiagnostic(const ValueDecl *VD) { - assert(VD && "No ValueDecl passed"); - - // The ValueDecl is the declaration of a mutex or role (hopefully). - return ClassifyDiagnostic(VD->getType()); -} - -template <typename AttrTy> -static std::enable_if_t<!has_arg_iterator_range<AttrTy>::value, StringRef> -ClassifyDiagnostic(const AttrTy *A) { - if (const ValueDecl *VD = getValueDecl(A->getArg())) - return ClassifyDiagnostic(VD); - return "mutex"; -} - -template <typename AttrTy> -static std::enable_if_t<has_arg_iterator_range<AttrTy>::value, StringRef> -ClassifyDiagnostic(const AttrTy *A) { - for (const auto *Arg : A->args()) { - if (const ValueDecl *VD = getValueDecl(Arg)) - return ClassifyDiagnostic(VD); - } - return "mutex"; -} - bool ThreadSafetyAnalyzer::inCurrentScope(const CapabilityExpr &CapE) { const threadSafety::til::SExpr *SExp = CapE.sexpr(); assert(SExp && "Null expressions should be ignored"); @@ -1272,7 +1234,7 @@ bool ThreadSafetyAnalyzer::inCurrentScope(const CapabilityExpr &CapE) { if (const auto *LP = dyn_cast<til::LiteralPtr>(SExp)) { const ValueDecl *VD = LP->clangDecl(); // Variables defined in a function are always inaccessible. - if (!VD->isDefinedOutsideFunctionOrMethod()) + if (!VD || !VD->isDefinedOutsideFunctionOrMethod()) return false; // For now we consider static class members to be inaccessible. if (isa<CXXRecordDecl>(VD->getDeclContext())) @@ -1283,10 +1245,10 @@ bool ThreadSafetyAnalyzer::inCurrentScope(const CapabilityExpr &CapE) { // Members are in scope from methods of the same class. if (const auto *P = dyn_cast<til::Project>(SExp)) { - if (!CurrentMethod) + if (!isa_and_nonnull<CXXMethodDecl>(CurrentFunction)) return false; const ValueDecl *VD = P->clangDecl(); - return VD->getDeclContext() == CurrentMethod->getDeclContext(); + return VD->getDeclContext() == CurrentFunction->getDeclContext(); } return false; @@ -1296,7 +1258,7 @@ bool ThreadSafetyAnalyzer::inCurrentScope(const CapabilityExpr &CapE) { /// \param ReqAttr -- true if this is part of an initial Requires attribute. void ThreadSafetyAnalyzer::addLock(FactSet &FSet, std::unique_ptr<FactEntry> Entry, - StringRef DiagKind, bool ReqAttr) { + bool ReqAttr) { if (Entry->shouldIgnore()) return; @@ -1309,7 +1271,7 @@ void ThreadSafetyAnalyzer::addLock(FactSet &FSet, } else { if (inCurrentScope(*Entry) && !Entry->asserted()) - Handler.handleNegativeNotHeld(DiagKind, Entry->toString(), + Handler.handleNegativeNotHeld(Entry->getKind(), Entry->toString(), NegC.toString(), Entry->loc()); } } @@ -1318,13 +1280,13 @@ void ThreadSafetyAnalyzer::addLock(FactSet &FSet, if (Handler.issueBetaWarnings() && !Entry->asserted() && !Entry->declared()) { GlobalBeforeSet->checkBeforeAfter(Entry->valueDecl(), FSet, *this, - Entry->loc(), DiagKind); + Entry->loc(), Entry->getKind()); } // FIXME: Don't always warn when we have support for reentrant locks. if (const FactEntry *Cp = FSet.findLock(FactMan, *Entry)) { if (!Entry->asserted()) - Cp->handleLock(FSet, FactMan, *Entry, Handler, DiagKind); + Cp->handleLock(FSet, FactMan, *Entry, Handler); } else { FSet.addLock(FactMan, std::move(Entry)); } @@ -1334,8 +1296,7 @@ void ThreadSafetyAnalyzer::addLock(FactSet &FSet, /// \param UnlockLoc The source location of the unlock (only used in error msg) void ThreadSafetyAnalyzer::removeLock(FactSet &FSet, const CapabilityExpr &Cp, SourceLocation UnlockLoc, - bool FullyRemove, LockKind ReceivedKind, - StringRef DiagKind) { + bool FullyRemove, LockKind ReceivedKind) { if (Cp.shouldIgnore()) return; @@ -1344,19 +1305,19 @@ void ThreadSafetyAnalyzer::removeLock(FactSet &FSet, const CapabilityExpr &Cp, SourceLocation PrevLoc; if (const FactEntry *Neg = FSet.findLock(FactMan, !Cp)) PrevLoc = Neg->loc(); - Handler.handleUnmatchedUnlock(DiagKind, Cp.toString(), UnlockLoc, PrevLoc); + Handler.handleUnmatchedUnlock(Cp.getKind(), Cp.toString(), UnlockLoc, + PrevLoc); return; } // Generic lock removal doesn't care about lock kind mismatches, but // otherwise diagnose when the lock kinds are mismatched. if (ReceivedKind != LK_Generic && LDat->kind() != ReceivedKind) { - Handler.handleIncorrectUnlockKind(DiagKind, Cp.toString(), LDat->kind(), + Handler.handleIncorrectUnlockKind(Cp.getKind(), Cp.toString(), LDat->kind(), ReceivedKind, LDat->loc(), UnlockLoc); } - LDat->handleUnlock(FSet, FactMan, Cp, UnlockLoc, FullyRemove, Handler, - DiagKind); + LDat->handleUnlock(FSet, FactMan, Cp, UnlockLoc, FullyRemove, Handler); } /// Extract the list of mutexIDs from the attribute on an expression, @@ -1364,13 +1325,13 @@ void ThreadSafetyAnalyzer::removeLock(FactSet &FSet, const CapabilityExpr &Cp, template <typename AttrType> void ThreadSafetyAnalyzer::getMutexIDs(CapExprSet &Mtxs, AttrType *Attr, const Expr *Exp, const NamedDecl *D, - VarDecl *SelfDecl) { + til::SExpr *Self) { if (Attr->args_size() == 0) { // The mutex held is the "this" object. - CapabilityExpr Cp = SxBuilder.translateAttrExpr(nullptr, D, Exp, SelfDecl); + CapabilityExpr Cp = SxBuilder.translateAttrExpr(nullptr, D, Exp, Self); if (Cp.isInvalid()) { - warnInvalidLock(Handler, nullptr, D, Exp, ClassifyDiagnostic(Attr)); - return; + warnInvalidLock(Handler, nullptr, D, Exp, Cp.getKind()); + return; } //else if (!Cp.shouldIgnore()) @@ -1379,10 +1340,10 @@ void ThreadSafetyAnalyzer::getMutexIDs(CapExprSet &Mtxs, AttrType *Attr, } for (const auto *Arg : Attr->args()) { - CapabilityExpr Cp = SxBuilder.translateAttrExpr(Arg, D, Exp, SelfDecl); + CapabilityExpr Cp = SxBuilder.translateAttrExpr(Arg, D, Exp, Self); if (Cp.isInvalid()) { - warnInvalidLock(Handler, nullptr, D, Exp, ClassifyDiagnostic(Attr)); - continue; + warnInvalidLock(Handler, nullptr, D, Exp, Cp.getKind()); + continue; } //else if (!Cp.shouldIgnore()) @@ -1521,7 +1482,6 @@ void ThreadSafetyAnalyzer::getEdgeLockset(FactSet& Result, bool Negate = false; const CFGBlockInfo *PredBlockInfo = &BlockInfo[PredBlock->getBlockID()]; const LocalVarContext &LVarCtx = PredBlockInfo->ExitContext; - StringRef CapDiagKind = "mutex"; const auto *Exp = getTrylockCallExpr(Cond, LVarCtx, Negate); if (!Exp) @@ -1542,21 +1502,18 @@ void ThreadSafetyAnalyzer::getEdgeLockset(FactSet& Result, getMutexIDs(A->isShared() ? SharedLocksToAdd : ExclusiveLocksToAdd, A, Exp, FunDecl, PredBlock, CurrBlock, A->getSuccessValue(), Negate); - CapDiagKind = ClassifyDiagnostic(A); break; }; case attr::ExclusiveTrylockFunction: { const auto *A = cast<ExclusiveTrylockFunctionAttr>(Attr); - getMutexIDs(ExclusiveLocksToAdd, A, Exp, FunDecl, - PredBlock, CurrBlock, A->getSuccessValue(), Negate); - CapDiagKind = ClassifyDiagnostic(A); + getMutexIDs(ExclusiveLocksToAdd, A, Exp, FunDecl, PredBlock, CurrBlock, + A->getSuccessValue(), Negate); break; } case attr::SharedTrylockFunction: { const auto *A = cast<SharedTrylockFunctionAttr>(Attr); - getMutexIDs(SharedLocksToAdd, A, Exp, FunDecl, - PredBlock, CurrBlock, A->getSuccessValue(), Negate); - CapDiagKind = ClassifyDiagnostic(A); + getMutexIDs(SharedLocksToAdd, A, Exp, FunDecl, PredBlock, CurrBlock, + A->getSuccessValue(), Negate); break; } default: @@ -1568,12 +1525,10 @@ void ThreadSafetyAnalyzer::getEdgeLockset(FactSet& Result, SourceLocation Loc = Exp->getExprLoc(); for (const auto &ExclusiveLockToAdd : ExclusiveLocksToAdd) addLock(Result, std::make_unique<LockableFactEntry>(ExclusiveLockToAdd, - LK_Exclusive, Loc), - CapDiagKind); + LK_Exclusive, Loc)); for (const auto &SharedLockToAdd : SharedLocksToAdd) addLock(Result, std::make_unique<LockableFactEntry>(SharedLockToAdd, - LK_Shared, Loc), - CapDiagKind); + LK_Shared, Loc)); } namespace { @@ -1588,31 +1543,36 @@ class BuildLockset : public ConstStmtVisitor<BuildLockset> { ThreadSafetyAnalyzer *Analyzer; FactSet FSet; + // The fact set for the function on exit. + const FactSet &FunctionExitFSet; LocalVariableMap::Context LVarCtx; unsigned CtxIndex; // helper functions - void warnIfMutexNotHeld(const NamedDecl *D, const Expr *Exp, AccessKind AK, - Expr *MutexExp, ProtectedOperationKind POK, - StringRef DiagKind, SourceLocation Loc); - void warnIfMutexHeld(const NamedDecl *D, const Expr *Exp, Expr *MutexExp, - StringRef DiagKind); void checkAccess(const Expr *Exp, AccessKind AK, - ProtectedOperationKind POK = POK_VarAccess); + ProtectedOperationKind POK = POK_VarAccess) { + Analyzer->checkAccess(FSet, Exp, AK, POK); + } void checkPtAccess(const Expr *Exp, AccessKind AK, - ProtectedOperationKind POK = POK_VarAccess); + ProtectedOperationKind POK = POK_VarAccess) { + Analyzer->checkPtAccess(FSet, Exp, AK, POK); + } - void handleCall(const Expr *Exp, const NamedDecl *D, VarDecl *VD = nullptr); + void handleCall(const Expr *Exp, const NamedDecl *D, + til::LiteralPtr *Self = nullptr, + SourceLocation Loc = SourceLocation()); void examineArguments(const FunctionDecl *FD, CallExpr::const_arg_iterator ArgBegin, CallExpr::const_arg_iterator ArgEnd, bool SkipFirstParam = false); public: - BuildLockset(ThreadSafetyAnalyzer *Anlzr, CFGBlockInfo &Info) + BuildLockset(ThreadSafetyAnalyzer *Anlzr, CFGBlockInfo &Info, + const FactSet &FunctionExitFSet) : ConstStmtVisitor<BuildLockset>(), Analyzer(Anlzr), FSet(Info.EntrySet), - LVarCtx(Info.EntryContext), CtxIndex(Info.EntryIndex) {} + FunctionExitFSet(FunctionExitFSet), LVarCtx(Info.EntryContext), + CtxIndex(Info.EntryIndex) {} void VisitUnaryOperator(const UnaryOperator *UO); void VisitBinaryOperator(const BinaryOperator *BO); @@ -1620,21 +1580,22 @@ public: void VisitCallExpr(const CallExpr *Exp); void VisitCXXConstructExpr(const CXXConstructExpr *Exp); void VisitDeclStmt(const DeclStmt *S); + void VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *Exp); + void VisitReturnStmt(const ReturnStmt *S); }; } // namespace /// Warn if the LSet does not contain a lock sufficient to protect access /// of at least the passed in AccessKind. -void BuildLockset::warnIfMutexNotHeld(const NamedDecl *D, const Expr *Exp, - AccessKind AK, Expr *MutexExp, - ProtectedOperationKind POK, - StringRef DiagKind, SourceLocation Loc) { +void ThreadSafetyAnalyzer::warnIfMutexNotHeld( + const FactSet &FSet, const NamedDecl *D, const Expr *Exp, AccessKind AK, + Expr *MutexExp, ProtectedOperationKind POK, til::LiteralPtr *Self, + SourceLocation Loc) { LockKind LK = getLockKindFromAccessKind(AK); - - CapabilityExpr Cp = Analyzer->SxBuilder.translateAttrExpr(MutexExp, D, Exp); + CapabilityExpr Cp = SxBuilder.translateAttrExpr(MutexExp, D, Exp, Self); if (Cp.isInvalid()) { - warnInvalidLock(Analyzer->Handler, MutexExp, D, Exp, DiagKind); + warnInvalidLock(Handler, MutexExp, D, Exp, Cp.getKind()); return; } else if (Cp.shouldIgnore()) { return; @@ -1642,66 +1603,67 @@ void BuildLockset::warnIfMutexNotHeld(const NamedDecl *D, const Expr *Exp, if (Cp.negative()) { // Negative capabilities act like locks excluded - const FactEntry *LDat = FSet.findLock(Analyzer->FactMan, !Cp); + const FactEntry *LDat = FSet.findLock(FactMan, !Cp); if (LDat) { - Analyzer->Handler.handleFunExcludesLock( - DiagKind, D->getNameAsString(), (!Cp).toString(), Loc); + Handler.handleFunExcludesLock(Cp.getKind(), D->getNameAsString(), + (!Cp).toString(), Loc); return; } // If this does not refer to a negative capability in the same class, // then stop here. - if (!Analyzer->inCurrentScope(Cp)) + if (!inCurrentScope(Cp)) return; // Otherwise the negative requirement must be propagated to the caller. - LDat = FSet.findLock(Analyzer->FactMan, Cp); + LDat = FSet.findLock(FactMan, Cp); if (!LDat) { - Analyzer->Handler.handleNegativeNotHeld(D, Cp.toString(), Loc); + Handler.handleNegativeNotHeld(D, Cp.toString(), Loc); } return; } - const FactEntry *LDat = FSet.findLockUniv(Analyzer->FactMan, Cp); + const FactEntry *LDat = FSet.findLockUniv(FactMan, Cp); bool NoError = true; if (!LDat) { // No exact match found. Look for a partial match. - LDat = FSet.findPartialMatch(Analyzer->FactMan, Cp); + LDat = FSet.findPartialMatch(FactMan, Cp); if (LDat) { // Warn that there's no precise match. std::string PartMatchStr = LDat->toString(); StringRef PartMatchName(PartMatchStr); - Analyzer->Handler.handleMutexNotHeld(DiagKind, D, POK, Cp.toString(), - LK, Loc, &PartMatchName); + Handler.handleMutexNotHeld(Cp.getKind(), D, POK, Cp.toString(), LK, Loc, + &PartMatchName); } else { // Warn that there's no match at all. - Analyzer->Handler.handleMutexNotHeld(DiagKind, D, POK, Cp.toString(), - LK, Loc); + Handler.handleMutexNotHeld(Cp.getKind(), D, POK, Cp.toString(), LK, Loc); } NoError = false; } // Make sure the mutex we found is the right kind. if (NoError && LDat && !LDat->isAtLeast(LK)) { - Analyzer->Handler.handleMutexNotHeld(DiagKind, D, POK, Cp.toString(), - LK, Loc); + Handler.handleMutexNotHeld(Cp.getKind(), D, POK, Cp.toString(), LK, Loc); } } /// Warn if the LSet contains the given lock. -void BuildLockset::warnIfMutexHeld(const NamedDecl *D, const Expr *Exp, - Expr *MutexExp, StringRef DiagKind) { - CapabilityExpr Cp = Analyzer->SxBuilder.translateAttrExpr(MutexExp, D, Exp); +void ThreadSafetyAnalyzer::warnIfMutexHeld(const FactSet &FSet, + const NamedDecl *D, const Expr *Exp, + Expr *MutexExp, + til::LiteralPtr *Self, + SourceLocation Loc) { + CapabilityExpr Cp = SxBuilder.translateAttrExpr(MutexExp, D, Exp, Self); if (Cp.isInvalid()) { - warnInvalidLock(Analyzer->Handler, MutexExp, D, Exp, DiagKind); + warnInvalidLock(Handler, MutexExp, D, Exp, Cp.getKind()); return; } else if (Cp.shouldIgnore()) { return; } - const FactEntry *LDat = FSet.findLock(Analyzer->FactMan, Cp); + const FactEntry *LDat = FSet.findLock(FactMan, Cp); if (LDat) { - Analyzer->Handler.handleFunExcludesLock( - DiagKind, D->getNameAsString(), Cp.toString(), Exp->getExprLoc()); + Handler.handleFunExcludesLock(Cp.getKind(), D->getNameAsString(), + Cp.toString(), Loc); } } @@ -1710,8 +1672,9 @@ void BuildLockset::warnIfMutexHeld(const NamedDecl *D, const Expr *Exp, /// marked with guarded_by, we must ensure the appropriate mutexes are held. /// Similarly, we check if the access is to an expression that dereferences /// a pointer marked with pt_guarded_by. -void BuildLockset::checkAccess(const Expr *Exp, AccessKind AK, - ProtectedOperationKind POK) { +void ThreadSafetyAnalyzer::checkAccess(const FactSet &FSet, const Expr *Exp, + AccessKind AK, + ProtectedOperationKind POK) { Exp = Exp->IgnoreImplicit()->IgnoreParenCasts(); SourceLocation Loc = Exp->getExprLoc(); @@ -1735,39 +1698,50 @@ void BuildLockset::checkAccess(const Expr *Exp, AccessKind AK, if (const auto *UO = dyn_cast<UnaryOperator>(Exp)) { // For dereferences if (UO->getOpcode() == UO_Deref) - checkPtAccess(UO->getSubExpr(), AK, POK); + checkPtAccess(FSet, UO->getSubExpr(), AK, POK); return; } + if (const auto *BO = dyn_cast<BinaryOperator>(Exp)) { + switch (BO->getOpcode()) { + case BO_PtrMemD: // .* + return checkAccess(FSet, BO->getLHS(), AK, POK); + case BO_PtrMemI: // ->* + return checkPtAccess(FSet, BO->getLHS(), AK, POK); + default: + return; + } + } + if (const auto *AE = dyn_cast<ArraySubscriptExpr>(Exp)) { - checkPtAccess(AE->getLHS(), AK, POK); + checkPtAccess(FSet, AE->getLHS(), AK, POK); return; } if (const auto *ME = dyn_cast<MemberExpr>(Exp)) { if (ME->isArrow()) - checkPtAccess(ME->getBase(), AK, POK); + checkPtAccess(FSet, ME->getBase(), AK, POK); else - checkAccess(ME->getBase(), AK, POK); + checkAccess(FSet, ME->getBase(), AK, POK); } const ValueDecl *D = getValueDecl(Exp); if (!D || !D->hasAttrs()) return; - if (D->hasAttr<GuardedVarAttr>() && FSet.isEmpty(Analyzer->FactMan)) { - Analyzer->Handler.handleNoMutexHeld("mutex", D, POK, AK, Loc); + if (D->hasAttr<GuardedVarAttr>() && FSet.isEmpty(FactMan)) { + Handler.handleNoMutexHeld(D, POK, AK, Loc); } for (const auto *I : D->specific_attrs<GuardedByAttr>()) - warnIfMutexNotHeld(D, Exp, AK, I->getArg(), POK, - ClassifyDiagnostic(I), Loc); + warnIfMutexNotHeld(FSet, D, Exp, AK, I->getArg(), POK, nullptr, Loc); } /// Checks pt_guarded_by and pt_guarded_var attributes. /// POK is the same operationKind that was passed to checkAccess. -void BuildLockset::checkPtAccess(const Expr *Exp, AccessKind AK, - ProtectedOperationKind POK) { +void ThreadSafetyAnalyzer::checkPtAccess(const FactSet &FSet, const Expr *Exp, + AccessKind AK, + ProtectedOperationKind POK) { while (true) { if (const auto *PE = dyn_cast<ParenExpr>(Exp)) { Exp = PE->getSubExpr(); @@ -1777,7 +1751,7 @@ void BuildLockset::checkPtAccess(const Expr *Exp, AccessKind AK, if (CE->getCastKind() == CK_ArrayToPointerDecay) { // If it's an actual array, and not a pointer, then it's elements // are protected by GUARDED_BY, not PT_GUARDED_BY; - checkAccess(CE->getSubExpr(), AK, POK); + checkAccess(FSet, CE->getSubExpr(), AK, POK); return; } Exp = CE->getSubExpr(); @@ -1789,18 +1763,19 @@ void BuildLockset::checkPtAccess(const Expr *Exp, AccessKind AK, // Pass by reference warnings are under a different flag. ProtectedOperationKind PtPOK = POK_VarDereference; if (POK == POK_PassByRef) PtPOK = POK_PtPassByRef; + if (POK == POK_ReturnByRef) + PtPOK = POK_PtReturnByRef; const ValueDecl *D = getValueDecl(Exp); if (!D || !D->hasAttrs()) return; - if (D->hasAttr<PtGuardedVarAttr>() && FSet.isEmpty(Analyzer->FactMan)) - Analyzer->Handler.handleNoMutexHeld("mutex", D, PtPOK, AK, - Exp->getExprLoc()); + if (D->hasAttr<PtGuardedVarAttr>() && FSet.isEmpty(FactMan)) + Handler.handleNoMutexHeld(D, PtPOK, AK, Exp->getExprLoc()); for (auto const *I : D->specific_attrs<PtGuardedByAttr>()) - warnIfMutexNotHeld(D, Exp, AK, I->getArg(), PtPOK, - ClassifyDiagnostic(I), Exp->getExprLoc()); + warnIfMutexNotHeld(FSet, D, Exp, AK, I->getArg(), PtPOK, nullptr, + Exp->getExprLoc()); } /// Process a function call, method call, constructor call, @@ -1813,22 +1788,36 @@ void BuildLockset::checkPtAccess(const Expr *Exp, AccessKind AK, /// and check that the appropriate locks are held. Non-const method calls with /// the same signature as const method calls can be also treated as reads. /// +/// \param Exp The call expression. +/// \param D The callee declaration. +/// \param Self If \p Exp = nullptr, the implicit this argument or the argument +/// of an implicitly called cleanup function. +/// \param Loc If \p Exp = nullptr, the location. void BuildLockset::handleCall(const Expr *Exp, const NamedDecl *D, - VarDecl *VD) { - SourceLocation Loc = Exp->getExprLoc(); + til::LiteralPtr *Self, SourceLocation Loc) { CapExprSet ExclusiveLocksToAdd, SharedLocksToAdd; CapExprSet ExclusiveLocksToRemove, SharedLocksToRemove, GenericLocksToRemove; CapExprSet ScopedReqsAndExcludes; - StringRef CapDiagKind = "mutex"; // Figure out if we're constructing an object of scoped lockable class - bool isScopedVar = false; - if (VD) { - if (const auto *CD = dyn_cast<const CXXConstructorDecl>(D)) { - const CXXRecordDecl* PD = CD->getParent(); - if (PD && PD->hasAttr<ScopedLockableAttr>()) - isScopedVar = true; + CapabilityExpr Scp; + if (Exp) { + assert(!Self); + const auto *TagT = Exp->getType()->getAs<TagType>(); + if (TagT && Exp->isPRValue()) { + std::pair<til::LiteralPtr *, StringRef> Placeholder = + Analyzer->SxBuilder.createThisPlaceholder(Exp); + [[maybe_unused]] auto inserted = + Analyzer->ConstructedObjects.insert({Exp, Placeholder.first}); + assert(inserted.second && "Are we visiting the same expression again?"); + if (isa<CXXConstructExpr>(Exp)) + Self = Placeholder.first; + if (TagT->getDecl()->hasAttr<ScopedLockableAttr>()) + Scp = CapabilityExpr(Placeholder.first, Placeholder.second, false); } + + assert(Loc.isInvalid()); + Loc = Exp->getExprLoc(); } for(const Attr *At : D->attrs()) { @@ -1839,9 +1828,7 @@ void BuildLockset::handleCall(const Expr *Exp, const NamedDecl *D, const auto *A = cast<AcquireCapabilityAttr>(At); Analyzer->getMutexIDs(A->isShared() ? SharedLocksToAdd : ExclusiveLocksToAdd, - A, Exp, D, VD); - - CapDiagKind = ClassifyDiagnostic(A); + A, Exp, D, Self); break; } @@ -1852,38 +1839,34 @@ void BuildLockset::handleCall(const Expr *Exp, const NamedDecl *D, const auto *A = cast<AssertExclusiveLockAttr>(At); CapExprSet AssertLocks; - Analyzer->getMutexIDs(AssertLocks, A, Exp, D, VD); + Analyzer->getMutexIDs(AssertLocks, A, Exp, D, Self); for (const auto &AssertLock : AssertLocks) - Analyzer->addLock(FSet, - std::make_unique<LockableFactEntry>( - AssertLock, LK_Exclusive, Loc, false, true), - ClassifyDiagnostic(A)); + Analyzer->addLock( + FSet, std::make_unique<LockableFactEntry>( + AssertLock, LK_Exclusive, Loc, FactEntry::Asserted)); break; } case attr::AssertSharedLock: { const auto *A = cast<AssertSharedLockAttr>(At); CapExprSet AssertLocks; - Analyzer->getMutexIDs(AssertLocks, A, Exp, D, VD); + Analyzer->getMutexIDs(AssertLocks, A, Exp, D, Self); for (const auto &AssertLock : AssertLocks) - Analyzer->addLock(FSet, - std::make_unique<LockableFactEntry>( - AssertLock, LK_Shared, Loc, false, true), - ClassifyDiagnostic(A)); + Analyzer->addLock( + FSet, std::make_unique<LockableFactEntry>( + AssertLock, LK_Shared, Loc, FactEntry::Asserted)); break; } case attr::AssertCapability: { const auto *A = cast<AssertCapabilityAttr>(At); CapExprSet AssertLocks; - Analyzer->getMutexIDs(AssertLocks, A, Exp, D, VD); + Analyzer->getMutexIDs(AssertLocks, A, Exp, D, Self); for (const auto &AssertLock : AssertLocks) - Analyzer->addLock(FSet, - std::make_unique<LockableFactEntry>( - AssertLock, - A->isShared() ? LK_Shared : LK_Exclusive, Loc, - false, true), - ClassifyDiagnostic(A)); + Analyzer->addLock(FSet, std::make_unique<LockableFactEntry>( + AssertLock, + A->isShared() ? LK_Shared : LK_Exclusive, + Loc, FactEntry::Asserted)); break; } @@ -1892,25 +1875,23 @@ void BuildLockset::handleCall(const Expr *Exp, const NamedDecl *D, case attr::ReleaseCapability: { const auto *A = cast<ReleaseCapabilityAttr>(At); if (A->isGeneric()) - Analyzer->getMutexIDs(GenericLocksToRemove, A, Exp, D, VD); + Analyzer->getMutexIDs(GenericLocksToRemove, A, Exp, D, Self); else if (A->isShared()) - Analyzer->getMutexIDs(SharedLocksToRemove, A, Exp, D, VD); + Analyzer->getMutexIDs(SharedLocksToRemove, A, Exp, D, Self); else - Analyzer->getMutexIDs(ExclusiveLocksToRemove, A, Exp, D, VD); - - CapDiagKind = ClassifyDiagnostic(A); + Analyzer->getMutexIDs(ExclusiveLocksToRemove, A, Exp, D, Self); break; } case attr::RequiresCapability: { const auto *A = cast<RequiresCapabilityAttr>(At); for (auto *Arg : A->args()) { - warnIfMutexNotHeld(D, Exp, A->isShared() ? AK_Read : AK_Written, Arg, - POK_FunctionCall, ClassifyDiagnostic(A), - Exp->getExprLoc()); + Analyzer->warnIfMutexNotHeld(FSet, D, Exp, + A->isShared() ? AK_Read : AK_Written, + Arg, POK_FunctionCall, Self, Loc); // use for adopting a lock - if (isScopedVar) - Analyzer->getMutexIDs(ScopedReqsAndExcludes, A, Exp, D, VD); + if (!Scp.shouldIgnore()) + Analyzer->getMutexIDs(ScopedReqsAndExcludes, A, Exp, D, Self); } break; } @@ -1918,10 +1899,10 @@ void BuildLockset::handleCall(const Expr *Exp, const NamedDecl *D, case attr::LocksExcluded: { const auto *A = cast<LocksExcludedAttr>(At); for (auto *Arg : A->args()) { - warnIfMutexHeld(D, Exp, Arg, ClassifyDiagnostic(A)); + Analyzer->warnIfMutexHeld(FSet, D, Exp, Arg, Self, Loc); // use for deferring a lock - if (isScopedVar) - Analyzer->getMutexIDs(ScopedReqsAndExcludes, A, Exp, D, VD); + if (!Scp.shouldIgnore()) + Analyzer->getMutexIDs(ScopedReqsAndExcludes, A, Exp, D, Self); } break; } @@ -1936,31 +1917,25 @@ void BuildLockset::handleCall(const Expr *Exp, const NamedDecl *D, // FIXME -- should only fully remove if the attribute refers to 'this'. bool Dtor = isa<CXXDestructorDecl>(D); for (const auto &M : ExclusiveLocksToRemove) - Analyzer->removeLock(FSet, M, Loc, Dtor, LK_Exclusive, CapDiagKind); + Analyzer->removeLock(FSet, M, Loc, Dtor, LK_Exclusive); for (const auto &M : SharedLocksToRemove) - Analyzer->removeLock(FSet, M, Loc, Dtor, LK_Shared, CapDiagKind); + Analyzer->removeLock(FSet, M, Loc, Dtor, LK_Shared); for (const auto &M : GenericLocksToRemove) - Analyzer->removeLock(FSet, M, Loc, Dtor, LK_Generic, CapDiagKind); + Analyzer->removeLock(FSet, M, Loc, Dtor, LK_Generic); // Add locks. + FactEntry::SourceKind Source = + !Scp.shouldIgnore() ? FactEntry::Managed : FactEntry::Acquired; for (const auto &M : ExclusiveLocksToAdd) - Analyzer->addLock(FSet, std::make_unique<LockableFactEntry>( - M, LK_Exclusive, Loc, isScopedVar), - CapDiagKind); + Analyzer->addLock(FSet, std::make_unique<LockableFactEntry>(M, LK_Exclusive, + Loc, Source)); for (const auto &M : SharedLocksToAdd) - Analyzer->addLock(FSet, std::make_unique<LockableFactEntry>( - M, LK_Shared, Loc, isScopedVar), - CapDiagKind); + Analyzer->addLock( + FSet, std::make_unique<LockableFactEntry>(M, LK_Shared, Loc, Source)); - if (isScopedVar) { + if (!Scp.shouldIgnore()) { // Add the managing object as a dummy mutex, mapped to the underlying mutex. - SourceLocation MLoc = VD->getLocation(); - DeclRefExpr DRE(VD->getASTContext(), VD, false, VD->getType(), VK_LValue, - VD->getLocation()); - // FIXME: does this store a pointer to DRE? - CapabilityExpr Scp = Analyzer->SxBuilder.translateAttrExpr(&DRE, nullptr); - - auto ScopedEntry = std::make_unique<ScopedLockableFactEntry>(Scp, MLoc); + auto ScopedEntry = std::make_unique<ScopedLockableFactEntry>(Scp, Loc); for (const auto &M : ExclusiveLocksToAdd) ScopedEntry->addLock(M); for (const auto &M : SharedLocksToAdd) @@ -1971,7 +1946,7 @@ void BuildLockset::handleCall(const Expr *Exp, const NamedDecl *D, ScopedEntry->addExclusiveUnlock(M); for (const auto &M : SharedLocksToRemove) ScopedEntry->addSharedUnlock(M); - Analyzer->addLock(FSet, std::move(ScopedEntry), CapDiagKind); + Analyzer->addLock(FSet, std::move(ScopedEntry)); } } @@ -2051,37 +2026,44 @@ void BuildLockset::VisitCallExpr(const CallExpr *Exp) { if (ME && MD) { if (ME->isArrow()) { - if (MD->isConst()) - checkPtAccess(CE->getImplicitObjectArgument(), AK_Read); - else // FIXME -- should be AK_Written - checkPtAccess(CE->getImplicitObjectArgument(), AK_Read); + // Should perhaps be AK_Written if !MD->isConst(). + checkPtAccess(CE->getImplicitObjectArgument(), AK_Read); } else { - if (MD->isConst()) - checkAccess(CE->getImplicitObjectArgument(), AK_Read); - else // FIXME -- should be AK_Written - checkAccess(CE->getImplicitObjectArgument(), AK_Read); + // Should perhaps be AK_Written if !MD->isConst(). + checkAccess(CE->getImplicitObjectArgument(), AK_Read); } } examineArguments(CE->getDirectCallee(), CE->arg_begin(), CE->arg_end()); } else if (const auto *OE = dyn_cast<CXXOperatorCallExpr>(Exp)) { - auto OEop = OE->getOperator(); + OverloadedOperatorKind OEop = OE->getOperator(); switch (OEop) { - case OO_Equal: { - const Expr *Target = OE->getArg(0); - const Expr *Source = OE->getArg(1); - checkAccess(Target, AK_Written); - checkAccess(Source, AK_Read); + case OO_Equal: + case OO_PlusEqual: + case OO_MinusEqual: + case OO_StarEqual: + case OO_SlashEqual: + case OO_PercentEqual: + case OO_CaretEqual: + case OO_AmpEqual: + case OO_PipeEqual: + case OO_LessLessEqual: + case OO_GreaterGreaterEqual: + checkAccess(OE->getArg(1), AK_Read); + [[fallthrough]]; + case OO_PlusPlus: + case OO_MinusMinus: + checkAccess(OE->getArg(0), AK_Written); break; - } case OO_Star: + case OO_ArrowStar: case OO_Arrow: case OO_Subscript: if (!(OEop == OO_Star && OE->getNumArgs() > 1)) { // Grrr. operator* can be multiplication... checkPtAccess(OE->getArg(0), AK_Read); } - LLVM_FALLTHROUGH; + [[fallthrough]]; default: { // TODO: get rid of this, and rely on pass-by-ref instead. const Expr *Obj = OE->getArg(0); @@ -2113,33 +2095,21 @@ void BuildLockset::VisitCXXConstructExpr(const CXXConstructExpr *Exp) { } else { examineArguments(D, Exp->arg_begin(), Exp->arg_end()); } + if (D && D->hasAttrs()) + handleCall(Exp, D); } -static CXXConstructorDecl * -findConstructorForByValueReturn(const CXXRecordDecl *RD) { - // Prefer a move constructor over a copy constructor. If there's more than - // one copy constructor or more than one move constructor, we arbitrarily - // pick the first declared such constructor rather than trying to guess which - // one is more appropriate. - CXXConstructorDecl *CopyCtor = nullptr; - for (auto *Ctor : RD->ctors()) { - if (Ctor->isDeleted()) - continue; - if (Ctor->isMoveConstructor()) - return Ctor; - if (!CopyCtor && Ctor->isCopyConstructor()) - CopyCtor = Ctor; - } - return CopyCtor; -} - -static Expr *buildFakeCtorCall(CXXConstructorDecl *CD, ArrayRef<Expr *> Args, - SourceLocation Loc) { - ASTContext &Ctx = CD->getASTContext(); - return CXXConstructExpr::Create(Ctx, Ctx.getRecordType(CD->getParent()), Loc, - CD, true, Args, false, false, false, false, - CXXConstructExpr::CK_Complete, - SourceRange(Loc, Loc)); +static const Expr *UnpackConstruction(const Expr *E) { + if (auto *CE = dyn_cast<CastExpr>(E)) + if (CE->getCastKind() == CK_NoOp) + E = CE->getSubExpr()->IgnoreParens(); + if (auto *CE = dyn_cast<CastExpr>(E)) + if (CE->getCastKind() == CK_ConstructorConversion || + CE->getCastKind() == CK_UserDefinedConversion) + E = CE->getSubExpr(); + if (auto *BTE = dyn_cast<CXXBindTemporaryExpr>(E)) + E = BTE->getSubExpr(); + return E; } void BuildLockset::VisitDeclStmt(const DeclStmt *S) { @@ -2148,7 +2118,7 @@ void BuildLockset::VisitDeclStmt(const DeclStmt *S) { for (auto *D : S->getDeclGroup()) { if (auto *VD = dyn_cast_or_null<VarDecl>(D)) { - Expr *E = VD->getInit(); + const Expr *E = VD->getInit(); if (!E) continue; E = E->IgnoreParens(); @@ -2156,37 +2126,74 @@ void BuildLockset::VisitDeclStmt(const DeclStmt *S) { // handle constructors that involve temporaries if (auto *EWC = dyn_cast<ExprWithCleanups>(E)) E = EWC->getSubExpr()->IgnoreParens(); - if (auto *CE = dyn_cast<CastExpr>(E)) - if (CE->getCastKind() == CK_NoOp || - CE->getCastKind() == CK_ConstructorConversion || - CE->getCastKind() == CK_UserDefinedConversion) - E = CE->getSubExpr()->IgnoreParens(); - if (auto *BTE = dyn_cast<CXXBindTemporaryExpr>(E)) - E = BTE->getSubExpr()->IgnoreParens(); - - if (const auto *CE = dyn_cast<CXXConstructExpr>(E)) { - const auto *CtorD = dyn_cast_or_null<NamedDecl>(CE->getConstructor()); - if (!CtorD || !CtorD->hasAttrs()) - continue; - handleCall(E, CtorD, VD); - } else if (isa<CallExpr>(E) && E->isRValue()) { - // If the object is initialized by a function call that returns a - // scoped lockable by value, use the attributes on the copy or move - // constructor to figure out what effect that should have on the - // lockset. - // FIXME: Is this really the best way to handle this situation? - auto *RD = E->getType()->getAsCXXRecordDecl(); - if (!RD || !RD->hasAttr<ScopedLockableAttr>()) - continue; - CXXConstructorDecl *CtorD = findConstructorForByValueReturn(RD); - if (!CtorD || !CtorD->hasAttrs()) - continue; - handleCall(buildFakeCtorCall(CtorD, {E}, E->getBeginLoc()), CtorD, VD); + E = UnpackConstruction(E); + + if (auto Object = Analyzer->ConstructedObjects.find(E); + Object != Analyzer->ConstructedObjects.end()) { + Object->second->setClangDecl(VD); + Analyzer->ConstructedObjects.erase(Object); } } } } +void BuildLockset::VisitMaterializeTemporaryExpr( + const MaterializeTemporaryExpr *Exp) { + if (const ValueDecl *ExtD = Exp->getExtendingDecl()) { + if (auto Object = Analyzer->ConstructedObjects.find( + UnpackConstruction(Exp->getSubExpr())); + Object != Analyzer->ConstructedObjects.end()) { + Object->second->setClangDecl(ExtD); + Analyzer->ConstructedObjects.erase(Object); + } + } +} + +void BuildLockset::VisitReturnStmt(const ReturnStmt *S) { + if (Analyzer->CurrentFunction == nullptr) + return; + const Expr *RetVal = S->getRetValue(); + if (!RetVal) + return; + + // If returning by reference, check that the function requires the appropriate + // capabilities. + const QualType ReturnType = + Analyzer->CurrentFunction->getReturnType().getCanonicalType(); + if (ReturnType->isLValueReferenceType()) { + Analyzer->checkAccess( + FunctionExitFSet, RetVal, + ReturnType->getPointeeType().isConstQualified() ? AK_Read : AK_Written, + POK_ReturnByRef); + } +} + +/// Given two facts merging on a join point, possibly warn and decide whether to +/// keep or replace. +/// +/// \param CanModify Whether we can replace \p A by \p B. +/// \return false if we should keep \p A, true if we should take \p B. +bool ThreadSafetyAnalyzer::join(const FactEntry &A, const FactEntry &B, + bool CanModify) { + if (A.kind() != B.kind()) { + // For managed capabilities, the destructor should unlock in the right mode + // anyway. For asserted capabilities no unlocking is needed. + if ((A.managed() || A.asserted()) && (B.managed() || B.asserted())) { + // The shared capability subsumes the exclusive capability, if possible. + bool ShouldTakeB = B.kind() == LK_Shared; + if (CanModify || !ShouldTakeB) + return ShouldTakeB; + } + Handler.handleExclusiveAndShared(B.getKind(), B.toString(), B.loc(), + A.loc()); + // Take the exclusive capability to reduce further warnings. + return CanModify && B.kind() == LK_Exclusive; + } else { + // The non-asserted capability is the one we want to track. + return CanModify && A.asserted() && !B.asserted(); + } +} + /// Compute the intersection of two locksets and issue warnings for any /// locks in the symmetric difference. /// @@ -2196,55 +2203,44 @@ void BuildLockset::VisitDeclStmt(const DeclStmt *S) { /// are the same. In the event of a difference, we use the intersection of these /// two locksets at the start of D. /// -/// \param FSet1 The first lockset. -/// \param FSet2 The second lockset. +/// \param EntrySet A lockset for entry into a (possibly new) block. +/// \param ExitSet The lockset on exiting a preceding block. /// \param JoinLoc The location of the join point for error reporting -/// \param LEK1 The error message to report if a mutex is missing from LSet1 -/// \param LEK2 The error message to report if a mutex is missing from Lset2 -void ThreadSafetyAnalyzer::intersectAndWarn(FactSet &FSet1, - const FactSet &FSet2, +/// \param EntryLEK The warning if a mutex is missing from \p EntrySet. +/// \param ExitLEK The warning if a mutex is missing from \p ExitSet. +void ThreadSafetyAnalyzer::intersectAndWarn(FactSet &EntrySet, + const FactSet &ExitSet, SourceLocation JoinLoc, - LockErrorKind LEK1, - LockErrorKind LEK2, - bool Modify) { - FactSet FSet1Orig = FSet1; - - // Find locks in FSet2 that conflict or are not in FSet1, and warn. - for (const auto &Fact : FSet2) { - const FactEntry *LDat1 = nullptr; - const FactEntry *LDat2 = &FactMan[Fact]; - FactSet::iterator Iter1 = FSet1.findLockIter(FactMan, *LDat2); - if (Iter1 != FSet1.end()) LDat1 = &FactMan[*Iter1]; - - if (LDat1) { - if (LDat1->kind() != LDat2->kind()) { - Handler.handleExclusiveAndShared("mutex", LDat2->toString(), - LDat2->loc(), LDat1->loc()); - if (Modify && LDat1->kind() != LK_Exclusive) { - // Take the exclusive lock, which is the one in FSet2. - *Iter1 = Fact; - } - } - else if (Modify && LDat1->asserted() && !LDat2->asserted()) { - // The non-asserted lock in FSet2 is the one we want to track. - *Iter1 = Fact; - } - } else { - LDat2->handleRemovalFromIntersection(FSet2, FactMan, JoinLoc, LEK1, - Handler); + LockErrorKind EntryLEK, + LockErrorKind ExitLEK) { + FactSet EntrySetOrig = EntrySet; + + // Find locks in ExitSet that conflict or are not in EntrySet, and warn. + for (const auto &Fact : ExitSet) { + const FactEntry &ExitFact = FactMan[Fact]; + + FactSet::iterator EntryIt = EntrySet.findLockIter(FactMan, ExitFact); + if (EntryIt != EntrySet.end()) { + if (join(FactMan[*EntryIt], ExitFact, + EntryLEK != LEK_LockedSomeLoopIterations)) + *EntryIt = Fact; + } else if (!ExitFact.managed()) { + ExitFact.handleRemovalFromIntersection(ExitSet, FactMan, JoinLoc, + EntryLEK, Handler); } } - // Find locks in FSet1 that are not in FSet2, and remove them. - for (const auto &Fact : FSet1Orig) { - const FactEntry *LDat1 = &FactMan[Fact]; - const FactEntry *LDat2 = FSet2.findLock(FactMan, *LDat1); + // Find locks in EntrySet that are not in ExitSet, and remove them. + for (const auto &Fact : EntrySetOrig) { + const FactEntry *EntryFact = &FactMan[Fact]; + const FactEntry *ExitFact = ExitSet.findLock(FactMan, *EntryFact); - if (!LDat2) { - LDat1->handleRemovalFromIntersection(FSet1Orig, FactMan, JoinLoc, LEK2, - Handler); - if (Modify) - FSet1.removeLock(FactMan, *LDat1); + if (!ExitFact) { + if (!EntryFact->managed() || ExitLEK == LEK_LockedSomeLoopIterations) + EntryFact->handleRemovalFromIntersection(EntrySetOrig, FactMan, JoinLoc, + ExitLEK, Handler); + if (ExitLEK == LEK_LockedSomePredecessors) + EntrySet.removeLock(FactMan, *EntryFact); } } } @@ -2257,7 +2253,7 @@ static bool neverReturns(const CFGBlock *B) { return false; CFGElement Last = B->back(); - if (Optional<CFGStmt> S = Last.getAs<CFGStmt>()) { + if (std::optional<CFGStmt> S = Last.getAs<CFGStmt>()) { if (isa<CXXThrowExpr>(S->getStmt())) return true; } @@ -2281,8 +2277,7 @@ void ThreadSafetyAnalyzer::runAnalysis(AnalysisDeclContext &AC) { CFG *CFGraph = walker.getGraph(); const NamedDecl *D = walker.getDecl(); - const auto *CurrentFunction = dyn_cast<FunctionDecl>(D); - CurrentMethod = dyn_cast<CXXMethodDecl>(D); + CurrentFunction = dyn_cast<FunctionDecl>(D); if (D->hasAttr<NoThreadSafetyAnalysisAttr>()) return; @@ -2307,8 +2302,11 @@ void ThreadSafetyAnalyzer::runAnalysis(AnalysisDeclContext &AC) { const PostOrderCFGView *SortedGraph = walker.getSortedGraph(); PostOrderCFGView::CFGBlockSet VisitedBlocks(CFGraph); + CFGBlockInfo &Initial = BlockInfo[CFGraph->getEntry().getBlockID()]; + CFGBlockInfo &Final = BlockInfo[CFGraph->getExit().getBlockID()]; + // Mark entry block as reachable - BlockInfo[CFGraph->getEntry().getBlockID()].Reachable = true; + Initial.Reachable = true; // Compute SSA names for local variables LocalVarMap.traverseCFG(CFGraph, SortedGraph, BlockInfo); @@ -2324,12 +2322,11 @@ void ThreadSafetyAnalyzer::runAnalysis(AnalysisDeclContext &AC) { // to initial lockset. Also turn off checking for lock and unlock functions. // FIXME: is there a more intelligent way to check lock/unlock functions? if (!SortedGraph->empty() && D->hasAttrs()) { - const CFGBlock *FirstBlock = *SortedGraph->begin(); - FactSet &InitialLockset = BlockInfo[FirstBlock->getBlockID()].EntrySet; + assert(*SortedGraph->begin() == &CFGraph->getEntry()); + FactSet &InitialLockset = Initial.EntrySet; CapExprSet ExclusiveLocksToAdd; CapExprSet SharedLocksToAdd; - StringRef CapDiagKind = "mutex"; SourceLocation Loc = D->getLocation(); for (const auto *Attr : D->attrs()) { @@ -2337,7 +2334,6 @@ void ThreadSafetyAnalyzer::runAnalysis(AnalysisDeclContext &AC) { if (const auto *A = dyn_cast<RequiresCapabilityAttr>(Attr)) { getMutexIDs(A->isShared() ? SharedLocksToAdd : ExclusiveLocksToAdd, A, nullptr, D); - CapDiagKind = ClassifyDiagnostic(A); } else if (const auto *A = dyn_cast<ReleaseCapabilityAttr>(Attr)) { // UNLOCK_FUNCTION() is used to hide the underlying lock implementation. // We must ignore such methods. @@ -2346,14 +2342,12 @@ void ThreadSafetyAnalyzer::runAnalysis(AnalysisDeclContext &AC) { getMutexIDs(A->isShared() ? SharedLocksToAdd : ExclusiveLocksToAdd, A, nullptr, D); getMutexIDs(LocksReleased, A, nullptr, D); - CapDiagKind = ClassifyDiagnostic(A); } else if (const auto *A = dyn_cast<AcquireCapabilityAttr>(Attr)) { if (A->args_size() == 0) return; getMutexIDs(A->isShared() ? SharedLocksAcquired : ExclusiveLocksAcquired, A, nullptr, D); - CapDiagKind = ClassifyDiagnostic(A); } else if (isa<ExclusiveTrylockFunctionAttr>(Attr)) { // Don't try to check trylock functions for now. return; @@ -2368,17 +2362,36 @@ void ThreadSafetyAnalyzer::runAnalysis(AnalysisDeclContext &AC) { // FIXME -- Loc can be wrong here. for (const auto &Mu : ExclusiveLocksToAdd) { - auto Entry = std::make_unique<LockableFactEntry>(Mu, LK_Exclusive, Loc); - Entry->setDeclared(true); - addLock(InitialLockset, std::move(Entry), CapDiagKind, true); + auto Entry = std::make_unique<LockableFactEntry>(Mu, LK_Exclusive, Loc, + FactEntry::Declared); + addLock(InitialLockset, std::move(Entry), true); } for (const auto &Mu : SharedLocksToAdd) { - auto Entry = std::make_unique<LockableFactEntry>(Mu, LK_Shared, Loc); - Entry->setDeclared(true); - addLock(InitialLockset, std::move(Entry), CapDiagKind, true); + auto Entry = std::make_unique<LockableFactEntry>(Mu, LK_Shared, Loc, + FactEntry::Declared); + addLock(InitialLockset, std::move(Entry), true); } } + // Compute the expected exit set. + // By default, we expect all locks held on entry to be held on exit. + FactSet ExpectedFunctionExitSet = Initial.EntrySet; + + // Adjust the expected exit set by adding or removing locks, as declared + // by *-LOCK_FUNCTION and UNLOCK_FUNCTION. The intersect below will then + // issue the appropriate warning. + // FIXME: the location here is not quite right. + for (const auto &Lock : ExclusiveLocksAcquired) + ExpectedFunctionExitSet.addLock( + FactMan, std::make_unique<LockableFactEntry>(Lock, LK_Exclusive, + D->getLocation())); + for (const auto &Lock : SharedLocksAcquired) + ExpectedFunctionExitSet.addLock( + FactMan, + std::make_unique<LockableFactEntry>(Lock, LK_Shared, D->getLocation())); + for (const auto &Lock : LocksReleased) + ExpectedFunctionExitSet.removeLock(FactMan, Lock); + for (const auto *CurrBlock : *SortedGraph) { unsigned CurrBlockID = CurrBlock->getBlockID(); CFGBlockInfo *CurrBlockInfo = &BlockInfo[CurrBlockID]; @@ -2400,7 +2413,6 @@ void ThreadSafetyAnalyzer::runAnalysis(AnalysisDeclContext &AC) { // union because the real error is probably that we forgot to unlock M on // all code paths. bool LocksetInitialized = false; - SmallVector<CFGBlock *, 8> SpecialBlocks; for (CFGBlock::const_pred_iterator PI = CurrBlock->pred_begin(), PE = CurrBlock->pred_end(); PI != PE; ++PI) { // if *PI -> CurrBlock is a back edge @@ -2417,17 +2429,6 @@ void ThreadSafetyAnalyzer::runAnalysis(AnalysisDeclContext &AC) { // Okay, we can reach this block from the entry. CurrBlockInfo->Reachable = true; - // If the previous block ended in a 'continue' or 'break' statement, then - // a difference in locksets is probably due to a bug in that block, rather - // than in some other predecessor. In that case, keep the other - // predecessor's lockset. - if (const Stmt *Terminator = (*PI)->getTerminatorStmt()) { - if (isa<ContinueStmt>(Terminator) || isa<BreakStmt>(Terminator)) { - SpecialBlocks.push_back(*PI); - continue; - } - } - FactSet PrevLockset; getEdgeLockset(PrevLockset, PrevBlockInfo->ExitSet, *PI, CurrBlock); @@ -2435,9 +2436,14 @@ void ThreadSafetyAnalyzer::runAnalysis(AnalysisDeclContext &AC) { CurrBlockInfo->EntrySet = PrevLockset; LocksetInitialized = true; } else { - intersectAndWarn(CurrBlockInfo->EntrySet, PrevLockset, - CurrBlockInfo->EntryLoc, - LEK_LockedSomePredecessors); + // Surprisingly 'continue' doesn't always produce back edges, because + // the CFG has empty "transition" blocks where they meet with the end + // of the regular loop body. We still want to diagnose them as loop. + intersectAndWarn( + CurrBlockInfo->EntrySet, PrevLockset, CurrBlockInfo->EntryLoc, + isa_and_nonnull<ContinueStmt>((*PI)->getTerminatorStmt()) + ? LEK_LockedSomeLoopIterations + : LEK_LockedSomePredecessors); } } @@ -2445,38 +2451,7 @@ void ThreadSafetyAnalyzer::runAnalysis(AnalysisDeclContext &AC) { if (!CurrBlockInfo->Reachable) continue; - // Process continue and break blocks. Assume that the lockset for the - // resulting block is unaffected by any discrepancies in them. - for (const auto *PrevBlock : SpecialBlocks) { - unsigned PrevBlockID = PrevBlock->getBlockID(); - CFGBlockInfo *PrevBlockInfo = &BlockInfo[PrevBlockID]; - - if (!LocksetInitialized) { - CurrBlockInfo->EntrySet = PrevBlockInfo->ExitSet; - LocksetInitialized = true; - } else { - // Determine whether this edge is a loop terminator for diagnostic - // purposes. FIXME: A 'break' statement might be a loop terminator, but - // it might also be part of a switch. Also, a subsequent destructor - // might add to the lockset, in which case the real issue might be a - // double lock on the other path. - const Stmt *Terminator = PrevBlock->getTerminatorStmt(); - bool IsLoop = Terminator && isa<ContinueStmt>(Terminator); - - FactSet PrevLockset; - getEdgeLockset(PrevLockset, PrevBlockInfo->ExitSet, - PrevBlock, CurrBlock); - - // Do not update EntrySet. - intersectAndWarn(CurrBlockInfo->EntrySet, PrevLockset, - PrevBlockInfo->ExitLoc, - IsLoop ? LEK_LockedSomeLoopIterations - : LEK_LockedSomePredecessors, - false); - } - } - - BuildLockset LocksetBuilder(this, *CurrBlockInfo); + BuildLockset LocksetBuilder(this, *CurrBlockInfo, ExpectedFunctionExitSet); // Visit all the statements in the basic block. for (const auto &BI : *CurrBlock) { @@ -2486,19 +2461,42 @@ void ThreadSafetyAnalyzer::runAnalysis(AnalysisDeclContext &AC) { LocksetBuilder.Visit(CS.getStmt()); break; } - // Ignore BaseDtor, MemberDtor, and TemporaryDtor for now. + // Ignore BaseDtor and MemberDtor for now. case CFGElement::AutomaticObjectDtor: { CFGAutomaticObjDtor AD = BI.castAs<CFGAutomaticObjDtor>(); const auto *DD = AD.getDestructorDecl(AC.getASTContext()); if (!DD->hasAttrs()) break; - // Create a dummy expression, - auto *VD = const_cast<VarDecl *>(AD.getVarDecl()); - DeclRefExpr DRE(VD->getASTContext(), VD, false, - VD->getType().getNonReferenceType(), VK_LValue, - AD.getTriggerStmt()->getEndLoc()); - LocksetBuilder.handleCall(&DRE, DD); + LocksetBuilder.handleCall(nullptr, DD, + SxBuilder.createVariable(AD.getVarDecl()), + AD.getTriggerStmt()->getEndLoc()); + break; + } + + case CFGElement::CleanupFunction: { + const CFGCleanupFunction &CF = BI.castAs<CFGCleanupFunction>(); + LocksetBuilder.handleCall(/*Exp=*/nullptr, CF.getFunctionDecl(), + SxBuilder.createVariable(CF.getVarDecl()), + CF.getVarDecl()->getLocation()); + break; + } + + case CFGElement::TemporaryDtor: { + auto TD = BI.castAs<CFGTemporaryDtor>(); + + // Clean up constructed object even if there are no attributes to + // keep the number of objects in limbo as small as possible. + if (auto Object = ConstructedObjects.find( + TD.getBindTemporaryExpr()->getSubExpr()); + Object != ConstructedObjects.end()) { + const auto *DD = TD.getDestructorDecl(AC.getASTContext()); + if (DD->hasAttrs()) + // TODO: the location here isn't quite correct. + LocksetBuilder.handleCall(nullptr, DD, Object->second, + TD.getBindTemporaryExpr()->getEndLoc()); + ConstructedObjects.erase(Object); + } break; } default: @@ -2520,42 +2518,18 @@ void ThreadSafetyAnalyzer::runAnalysis(AnalysisDeclContext &AC) { CFGBlock *FirstLoopBlock = *SI; CFGBlockInfo *PreLoop = &BlockInfo[FirstLoopBlock->getBlockID()]; CFGBlockInfo *LoopEnd = &BlockInfo[CurrBlockID]; - intersectAndWarn(LoopEnd->ExitSet, PreLoop->EntrySet, - PreLoop->EntryLoc, - LEK_LockedSomeLoopIterations, - false); + intersectAndWarn(PreLoop->EntrySet, LoopEnd->ExitSet, PreLoop->EntryLoc, + LEK_LockedSomeLoopIterations); } } - CFGBlockInfo *Initial = &BlockInfo[CFGraph->getEntry().getBlockID()]; - CFGBlockInfo *Final = &BlockInfo[CFGraph->getExit().getBlockID()]; - // Skip the final check if the exit block is unreachable. - if (!Final->Reachable) + if (!Final.Reachable) return; - // By default, we expect all locks held on entry to be held on exit. - FactSet ExpectedExitSet = Initial->EntrySet; - - // Adjust the expected exit set by adding or removing locks, as declared - // by *-LOCK_FUNCTION and UNLOCK_FUNCTION. The intersect below will then - // issue the appropriate warning. - // FIXME: the location here is not quite right. - for (const auto &Lock : ExclusiveLocksAcquired) - ExpectedExitSet.addLock(FactMan, std::make_unique<LockableFactEntry>( - Lock, LK_Exclusive, D->getLocation())); - for (const auto &Lock : SharedLocksAcquired) - ExpectedExitSet.addLock(FactMan, std::make_unique<LockableFactEntry>( - Lock, LK_Shared, D->getLocation())); - for (const auto &Lock : LocksReleased) - ExpectedExitSet.removeLock(FactMan, Lock); - // FIXME: Should we call this function for all blocks which exit the function? - intersectAndWarn(ExpectedExitSet, Final->ExitSet, - Final->ExitLoc, - LEK_LockedAtEndOfFunction, - LEK_NotLockedAtEndOfFunction, - false); + intersectAndWarn(ExpectedFunctionExitSet, Final.ExitSet, Final.ExitLoc, + LEK_LockedAtEndOfFunction, LEK_NotLockedAtEndOfFunction); Handler.leaveFunction(CurrentFunction); } diff --git a/contrib/llvm-project/clang/lib/Analysis/ThreadSafetyCommon.cpp b/contrib/llvm-project/clang/lib/Analysis/ThreadSafetyCommon.cpp index 0c5d1857cc2b..3e8c959ccee4 100644 --- a/contrib/llvm-project/clang/lib/Analysis/ThreadSafetyCommon.cpp +++ b/contrib/llvm-project/clang/lib/Analysis/ThreadSafetyCommon.cpp @@ -26,6 +26,7 @@ #include "clang/Basic/LLVM.h" #include "clang/Basic/OperatorKinds.h" #include "clang/Basic/Specifiers.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Casting.h" #include <algorithm> @@ -40,7 +41,7 @@ using namespace threadSafety; std::string threadSafety::getSourceLiteralString(const Expr *CE) { switch (CE->getStmtClass()) { case Stmt::IntegerLiteralClass: - return cast<IntegerLiteral>(CE)->getValue().toString(10, true); + return toString(cast<IntegerLiteral>(CE)->getValue(), 10, true); case Stmt::StringLiteralClass: { std::string ret("\""); ret += cast<StringLiteral>(CE)->getString(); @@ -68,12 +69,7 @@ static bool isIncompletePhi(const til::SExpr *E) { using CallingContext = SExprBuilder::CallingContext; -til::SExpr *SExprBuilder::lookupStmt(const Stmt *S) { - auto It = SMap.find(S); - if (It != SMap.end()) - return It->second; - return nullptr; -} +til::SExpr *SExprBuilder::lookupStmt(const Stmt *S) { return SMap.lookup(S); } til::SCFG *SExprBuilder::buildCFG(CFGWalker &Walker) { Walker.walk(*this); @@ -85,6 +81,28 @@ static bool isCalleeArrow(const Expr *E) { return ME ? ME->isArrow() : false; } +static StringRef ClassifyDiagnostic(const CapabilityAttr *A) { + return A->getName(); +} + +static StringRef ClassifyDiagnostic(QualType VDT) { + // We need to look at the declaration of the type of the value to determine + // which it is. The type should either be a record or a typedef, or a pointer + // or reference thereof. + if (const auto *RT = VDT->getAs<RecordType>()) { + if (const auto *RD = RT->getDecl()) + if (const auto *CA = RD->getAttr<CapabilityAttr>()) + return ClassifyDiagnostic(CA); + } else if (const auto *TT = VDT->getAs<TypedefType>()) { + if (const auto *TD = TT->getDecl()) + if (const auto *CA = TD->getAttr<CapabilityAttr>()) + return ClassifyDiagnostic(CA); + } else if (VDT->isPointerType() || VDT->isReferenceType()) + return ClassifyDiagnostic(VDT->getPointeeType()); + + return "mutex"; +} + /// Translate a clang expression in an attribute to a til::SExpr. /// Constructs the context from D, DeclExp, and SelfDecl. /// @@ -92,19 +110,23 @@ static bool isCalleeArrow(const Expr *E) { /// \param D The declaration to which the attribute is attached. /// \param DeclExp An expression involving the Decl to which the attribute /// is attached. E.g. the call to a function. +/// \param Self S-expression to substitute for a \ref CXXThisExpr in a call, +/// or argument to a cleanup function. CapabilityExpr SExprBuilder::translateAttrExpr(const Expr *AttrExp, const NamedDecl *D, const Expr *DeclExp, - VarDecl *SelfDecl) { + til::SExpr *Self) { // If we are processing a raw attribute expression, with no substitutions. - if (!DeclExp) + if (!DeclExp && !Self) return translateAttrExpr(AttrExp, nullptr); CallingContext Ctx(nullptr, D); // Examine DeclExp to find SelfArg and FunArgs, which are used to substitute // for formal parameters when we call buildMutexID later. - if (const auto *ME = dyn_cast<MemberExpr>(DeclExp)) { + if (!DeclExp) + /* We'll use Self. */; + else if (const auto *ME = dyn_cast<MemberExpr>(DeclExp)) { Ctx.SelfArg = ME->getBase(); Ctx.SelfArrow = ME->isArrow(); } else if (const auto *CE = dyn_cast<CXXMemberCallExpr>(DeclExp)) { @@ -119,29 +141,30 @@ CapabilityExpr SExprBuilder::translateAttrExpr(const Expr *AttrExp, Ctx.SelfArg = nullptr; // Will be set below Ctx.NumArgs = CE->getNumArgs(); Ctx.FunArgs = CE->getArgs(); - } else if (D && isa<CXXDestructorDecl>(D)) { - // There's no such thing as a "destructor call" in the AST. - Ctx.SelfArg = DeclExp; } - // Hack to handle constructors, where self cannot be recovered from - // the expression. - if (SelfDecl && !Ctx.SelfArg) { - DeclRefExpr SelfDRE(SelfDecl->getASTContext(), SelfDecl, false, - SelfDecl->getType(), VK_LValue, - SelfDecl->getLocation()); - Ctx.SelfArg = &SelfDRE; + if (Self) { + assert(!Ctx.SelfArg && "Ambiguous self argument"); + assert(isa<FunctionDecl>(D) && "Self argument requires function"); + if (isa<CXXMethodDecl>(D)) + Ctx.SelfArg = Self; + else + Ctx.FunArgs = Self; // If the attribute has no arguments, then assume the argument is "this". if (!AttrExp) - return translateAttrExpr(Ctx.SelfArg, nullptr); + return CapabilityExpr( + Self, + ClassifyDiagnostic( + cast<CXXMethodDecl>(D)->getFunctionObjectParameterType()), + false); else // For most attributes. return translateAttrExpr(AttrExp, &Ctx); } // If the attribute has no arguments, then assume the argument is "this". if (!AttrExp) - return translateAttrExpr(Ctx.SelfArg, nullptr); + return translateAttrExpr(cast<const Expr *>(Ctx.SelfArg), nullptr); else // For most attributes. return translateAttrExpr(AttrExp, &Ctx); } @@ -151,16 +174,17 @@ CapabilityExpr SExprBuilder::translateAttrExpr(const Expr *AttrExp, CapabilityExpr SExprBuilder::translateAttrExpr(const Expr *AttrExp, CallingContext *Ctx) { if (!AttrExp) - return CapabilityExpr(nullptr, false); + return CapabilityExpr(); if (const auto* SLit = dyn_cast<StringLiteral>(AttrExp)) { - if (SLit->getString() == StringRef("*")) + if (SLit->getString() == "*") // The "*" expr is a universal lock, which essentially turns off // checks until it is removed from the lockset. - return CapabilityExpr(new (Arena) til::Wildcard(), false); + return CapabilityExpr(new (Arena) til::Wildcard(), StringRef("wildcard"), + false); else // Ignore other string literals for now. - return CapabilityExpr(nullptr, false); + return CapabilityExpr(); } bool Neg = false; @@ -173,7 +197,7 @@ CapabilityExpr SExprBuilder::translateAttrExpr(const Expr *AttrExp, else if (const auto *UO = dyn_cast<UnaryOperator>(AttrExp)) { if (UO->getOpcode() == UO_LNot) { Neg = true; - AttrExp = UO->getSubExpr(); + AttrExp = UO->getSubExpr()->IgnoreImplicit(); } } @@ -182,14 +206,26 @@ CapabilityExpr SExprBuilder::translateAttrExpr(const Expr *AttrExp, // Trap mutex expressions like nullptr, or 0. // Any literal value is nonsense. if (!E || isa<til::Literal>(E)) - return CapabilityExpr(nullptr, false); + return CapabilityExpr(); + + StringRef Kind = ClassifyDiagnostic(AttrExp->getType()); // Hack to deal with smart pointers -- strip off top-level pointer casts. if (const auto *CE = dyn_cast<til::Cast>(E)) { if (CE->castOpcode() == til::CAST_objToPtr) - return CapabilityExpr(CE->expr(), Neg); + return CapabilityExpr(CE->expr(), Kind, Neg); } - return CapabilityExpr(E, Neg); + return CapabilityExpr(E, Kind, Neg); +} + +til::LiteralPtr *SExprBuilder::createVariable(const VarDecl *VD) { + return new (Arena) til::LiteralPtr(VD); +} + +std::pair<til::LiteralPtr *, StringRef> +SExprBuilder::createThisPlaceholder(const Expr *Exp) { + return {new (Arena) til::LiteralPtr(nullptr), + ClassifyDiagnostic(Exp->getType())}; } // Translate a clang statement or expression to a TIL expression. @@ -283,8 +319,14 @@ til::SExpr *SExprBuilder::translateDeclRefExpr(const DeclRefExpr *DRE, ? (cast<FunctionDecl>(D)->getCanonicalDecl() == Canonical) : (cast<ObjCMethodDecl>(D)->getCanonicalDecl() == Canonical)) { // Substitute call arguments for references to function parameters - assert(I < Ctx->NumArgs); - return translate(Ctx->FunArgs[I], Ctx->Prev); + if (const Expr *const *FunArgs = + Ctx->FunArgs.dyn_cast<const Expr *const *>()) { + assert(I < Ctx->NumArgs); + return translate(FunArgs[I], Ctx->Prev); + } + + assert(I == 0); + return Ctx->FunArgs.get<til::SExpr *>(); } } // Map the param back to the param of the original function declaration @@ -301,8 +343,12 @@ til::SExpr *SExprBuilder::translateDeclRefExpr(const DeclRefExpr *DRE, til::SExpr *SExprBuilder::translateCXXThisExpr(const CXXThisExpr *TE, CallingContext *Ctx) { // Substitute for 'this' - if (Ctx && Ctx->SelfArg) - return translate(Ctx->SelfArg, Ctx->Prev); + if (Ctx && Ctx->SelfArg) { + if (const auto *SelfArg = dyn_cast<const Expr *>(Ctx->SelfArg)) + return translate(SelfArg, Ctx->Prev); + else + return cast<til::SExpr *>(Ctx->SelfArg); + } assert(SelfVar && "We have no variable for 'this'!"); return SelfVar; } @@ -611,7 +657,7 @@ SExprBuilder::translateAbstractConditionalOperator( til::SExpr * SExprBuilder::translateDeclStmt(const DeclStmt *S, CallingContext *Ctx) { DeclGroupRef DGrp = S->getDeclGroup(); - for (auto I : DGrp) { + for (auto *I : DGrp) { if (auto *VD = dyn_cast_or_null<VarDecl>(I)) { Expr *E = VD->getInit(); til::SExpr* SE = translate(E, Ctx); @@ -949,7 +995,7 @@ void SExprBuilder::exitCFG(const CFGBlock *Last) { IncompleteArgs.clear(); } -/* +#ifndef NDEBUG namespace { class TILPrinter : @@ -970,4 +1016,4 @@ void printSCFG(CFGWalker &Walker) { } // namespace threadSafety } // namespace clang -*/ +#endif // NDEBUG diff --git a/contrib/llvm-project/clang/lib/Analysis/UninitializedValues.cpp b/contrib/llvm-project/clang/lib/Analysis/UninitializedValues.cpp index 67cd39728c35..bf2f73061865 100644 --- a/contrib/llvm-project/clang/lib/Analysis/UninitializedValues.cpp +++ b/contrib/llvm-project/clang/lib/Analysis/UninitializedValues.cpp @@ -28,25 +28,43 @@ #include "clang/Basic/LLVM.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/None.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/PackedVector.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Casting.h" #include <algorithm> #include <cassert> +#include <optional> using namespace clang; #define DEBUG_LOGGING 0 +static bool recordIsNotEmpty(const RecordDecl *RD) { + // We consider a record decl to be empty if it contains only unnamed bit- + // fields, zero-width fields, and fields of empty record type. + for (const auto *FD : RD->fields()) { + if (FD->isUnnamedBitField()) + continue; + if (FD->isZeroSize(FD->getASTContext())) + continue; + // The only case remaining to check is for a field declaration of record + // type and whether that record itself is empty. + if (const auto *FieldRD = FD->getType()->getAsRecordDecl(); + !FieldRD || recordIsNotEmpty(FieldRD)) + return true; + } + return false; +} + static bool isTrackedVar(const VarDecl *vd, const DeclContext *dc) { if (vd->isLocalVarDecl() && !vd->hasGlobalStorage() && - !vd->isExceptionVariable() && !vd->isInitCapture() && - !vd->isImplicit() && vd->getDeclContext() == dc) { + !vd->isExceptionVariable() && !vd->isInitCapture() && !vd->isImplicit() && + vd->getDeclContext() == dc) { QualType ty = vd->getType(); - return ty->isScalarType() || ty->isVectorType() || ty->isRecordType(); + if (const auto *RD = ty->getAsRecordDecl()) + return recordIsNotEmpty(RD); + return ty->isScalarType() || ty->isVectorType() || ty->isRVVSizelessBuiltinType(); } return false; } @@ -70,7 +88,7 @@ public: unsigned size() const { return map.size(); } /// Returns the bit vector index for a given declaration. - Optional<unsigned> getValueIndex(const VarDecl *d) const; + std::optional<unsigned> getValueIndex(const VarDecl *d) const; }; } // namespace @@ -86,10 +104,10 @@ void DeclToIndex::computeMap(const DeclContext &dc) { } } -Optional<unsigned> DeclToIndex::getValueIndex(const VarDecl *d) const { +std::optional<unsigned> DeclToIndex::getValueIndex(const VarDecl *d) const { llvm::DenseMap<const VarDecl *, unsigned>::const_iterator I = map.find(d); if (I == map.end()) - return None; + return std::nullopt; return I->second; } @@ -147,9 +165,8 @@ public: Value getValue(const CFGBlock *block, const CFGBlock *dstBlock, const VarDecl *vd) { - const Optional<unsigned> &idx = declToIndex.getValueIndex(vd); - assert(idx.hasValue()); - return getValueVector(block)[idx.getValue()]; + std::optional<unsigned> idx = declToIndex.getValueIndex(vd); + return getValueVector(block)[*idx]; } }; @@ -208,9 +225,7 @@ void CFGBlockValues::resetScratch() { } ValueVector::reference CFGBlockValues::operator[](const VarDecl *vd) { - const Optional<unsigned> &idx = declToIndex.getValueIndex(vd); - assert(idx.hasValue()); - return scratch[idx.getValue()]; + return scratch[*declToIndex.getValueIndex(vd)]; } //------------------------------------------------------------------------====// @@ -589,28 +604,6 @@ public: continue; } - if (AtPredExit == MayUninitialized) { - // If the predecessor's terminator is an "asm goto" that initializes - // the variable, then it won't be counted as "initialized" on the - // non-fallthrough paths. - CFGTerminator term = Pred->getTerminator(); - if (const auto *as = dyn_cast_or_null<GCCAsmStmt>(term.getStmt())) { - const CFGBlock *fallthrough = *Pred->succ_begin(); - if (as->isAsmGoto() && - llvm::any_of(as->outputs(), [&](const Expr *output) { - return vd == findVar(output).getDecl() && - llvm::any_of(as->labels(), - [&](const AddrLabelExpr *label) { - return label->getLabel()->getStmt() == B->Label && - B != fallthrough; - }); - })) { - Use.setUninitAfterDecl(); - continue; - } - } - } - unsigned &SV = SuccsVisited[Pred->getBlockID()]; if (!SV) { // When visiting the first successor of a block, mark all NULL @@ -810,13 +803,22 @@ void TransferFunctions::VisitGCCAsmStmt(GCCAsmStmt *as) { if (!as->isAsmGoto()) return; - for (const Expr *o : as->outputs()) - if (const VarDecl *VD = findVar(o).getDecl()) + ASTContext &C = ac.getASTContext(); + for (const Expr *O : as->outputs()) { + const Expr *Ex = stripCasts(C, O); + + // Strip away any unary operators. Invalid l-values are reported by other + // semantic analysis passes. + while (const auto *UO = dyn_cast<UnaryOperator>(Ex)) + Ex = stripCasts(C, UO->getSubExpr()); + + // Mark the variable as potentially uninitialized for those cases where + // it's used on an indirect path, where it's not guaranteed to be + // defined. + if (const VarDecl *VD = findVar(Ex).getDecl()) if (vals[VD] != Initialized) - // If the variable isn't initialized by the time we get here, then we - // mark it as potentially uninitialized for those cases where it's used - // on an indirect path, where it's not guaranteed to be defined. vals[VD] = MayUninitialized; + } } void TransferFunctions::VisitObjCMessageExpr(ObjCMessageExpr *ME) { @@ -853,7 +855,7 @@ static bool runOnBlock(const CFGBlock *block, const CFG &cfg, // Apply the transfer function. TransferFunctions tf(vals, cfg, block, ac, classification, handler); for (const auto &I : *block) { - if (Optional<CFGStmt> cs = I.getAs<CFGStmt>()) + if (std::optional<CFGStmt> cs = I.getAs<CFGStmt>()) tf.Visit(const_cast<Stmt *>(cs->getStmt())); } CFGTerminator terminator = block->getTerminator(); @@ -894,7 +896,7 @@ struct PruneBlocksHandler : public UninitVariablesHandler { hadUse[currentBlock] = true; hadAnyUse = true; } - + /// Called when the uninitialized variable analysis detects the /// idiom 'int x = x'. All other uses of 'x' within the initializer /// are handled by handleUseOfUninitVariable. diff --git a/contrib/llvm-project/clang/lib/Analysis/UnsafeBufferUsage.cpp b/contrib/llvm-project/clang/lib/Analysis/UnsafeBufferUsage.cpp new file mode 100644 index 000000000000..866222380974 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -0,0 +1,3330 @@ +//===- UnsafeBufferUsage.cpp - Replace pointers with modern C++ -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/UnsafeBufferUsage.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Basic/CharInfo.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Lex/Lexer.h" +#include "clang/Lex/Preprocessor.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include <memory> +#include <optional> +#include <queue> +#include <sstream> + +using namespace llvm; +using namespace clang; +using namespace ast_matchers; + +#ifndef NDEBUG +namespace { +class StmtDebugPrinter + : public ConstStmtVisitor<StmtDebugPrinter, std::string> { +public: + std::string VisitStmt(const Stmt *S) { return S->getStmtClassName(); } + + std::string VisitBinaryOperator(const BinaryOperator *BO) { + return "BinaryOperator(" + BO->getOpcodeStr().str() + ")"; + } + + std::string VisitUnaryOperator(const UnaryOperator *UO) { + return "UnaryOperator(" + UO->getOpcodeStr(UO->getOpcode()).str() + ")"; + } + + std::string VisitImplicitCastExpr(const ImplicitCastExpr *ICE) { + return "ImplicitCastExpr(" + std::string(ICE->getCastKindName()) + ")"; + } +}; + +// Returns a string of ancestor `Stmt`s of the given `DRE` in such a form: +// "DRE ==> parent-of-DRE ==> grandparent-of-DRE ==> ...". +static std::string getDREAncestorString(const DeclRefExpr *DRE, + ASTContext &Ctx) { + std::stringstream SS; + const Stmt *St = DRE; + StmtDebugPrinter StmtPriner; + + do { + SS << StmtPriner.Visit(St); + + DynTypedNodeList StParents = Ctx.getParents(*St); + + if (StParents.size() > 1) + return "unavailable due to multiple parents"; + if (StParents.size() == 0) + break; + St = StParents.begin()->get<Stmt>(); + if (St) + SS << " ==> "; + } while (St); + return SS.str(); +} +} // namespace +#endif /* NDEBUG */ + +namespace clang::ast_matchers { +// A `RecursiveASTVisitor` that traverses all descendants of a given node "n" +// except for those belonging to a different callable of "n". +class MatchDescendantVisitor + : public RecursiveASTVisitor<MatchDescendantVisitor> { +public: + typedef RecursiveASTVisitor<MatchDescendantVisitor> VisitorBase; + + // Creates an AST visitor that matches `Matcher` on all + // descendants of a given node "n" except for the ones + // belonging to a different callable of "n". + MatchDescendantVisitor(const internal::DynTypedMatcher *Matcher, + internal::ASTMatchFinder *Finder, + internal::BoundNodesTreeBuilder *Builder, + internal::ASTMatchFinder::BindKind Bind, + const bool ignoreUnevaluatedContext) + : Matcher(Matcher), Finder(Finder), Builder(Builder), Bind(Bind), + Matches(false), ignoreUnevaluatedContext(ignoreUnevaluatedContext) {} + + // Returns true if a match is found in a subtree of `DynNode`, which belongs + // to the same callable of `DynNode`. + bool findMatch(const DynTypedNode &DynNode) { + Matches = false; + if (const Stmt *StmtNode = DynNode.get<Stmt>()) { + TraverseStmt(const_cast<Stmt *>(StmtNode)); + *Builder = ResultBindings; + return Matches; + } + return false; + } + + // The following are overriding methods from the base visitor class. + // They are public only to allow CRTP to work. They are *not *part + // of the public API of this class. + + // For the matchers so far used in safe buffers, we only need to match + // `Stmt`s. To override more as needed. + + bool TraverseDecl(Decl *Node) { + if (!Node) + return true; + if (!match(*Node)) + return false; + // To skip callables: + if (isa<FunctionDecl, BlockDecl, ObjCMethodDecl>(Node)) + return true; + // Traverse descendants + return VisitorBase::TraverseDecl(Node); + } + + bool TraverseGenericSelectionExpr(GenericSelectionExpr *Node) { + // These are unevaluated, except the result expression. + if (ignoreUnevaluatedContext) + return TraverseStmt(Node->getResultExpr()); + return VisitorBase::TraverseGenericSelectionExpr(Node); + } + + bool TraverseUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *Node) { + // Unevaluated context. + if (ignoreUnevaluatedContext) + return true; + return VisitorBase::TraverseUnaryExprOrTypeTraitExpr(Node); + } + + bool TraverseTypeOfExprTypeLoc(TypeOfExprTypeLoc Node) { + // Unevaluated context. + if (ignoreUnevaluatedContext) + return true; + return VisitorBase::TraverseTypeOfExprTypeLoc(Node); + } + + bool TraverseDecltypeTypeLoc(DecltypeTypeLoc Node) { + // Unevaluated context. + if (ignoreUnevaluatedContext) + return true; + return VisitorBase::TraverseDecltypeTypeLoc(Node); + } + + bool TraverseCXXNoexceptExpr(CXXNoexceptExpr *Node) { + // Unevaluated context. + if (ignoreUnevaluatedContext) + return true; + return VisitorBase::TraverseCXXNoexceptExpr(Node); + } + + bool TraverseCXXTypeidExpr(CXXTypeidExpr *Node) { + // Unevaluated context. + if (ignoreUnevaluatedContext) + return true; + return VisitorBase::TraverseCXXTypeidExpr(Node); + } + + bool TraverseStmt(Stmt *Node, DataRecursionQueue *Queue = nullptr) { + if (!Node) + return true; + if (!match(*Node)) + return false; + return VisitorBase::TraverseStmt(Node); + } + + bool shouldVisitTemplateInstantiations() const { return true; } + bool shouldVisitImplicitCode() const { + // TODO: let's ignore implicit code for now + return false; + } + +private: + // Sets 'Matched' to true if 'Matcher' matches 'Node' + // + // Returns 'true' if traversal should continue after this function + // returns, i.e. if no match is found or 'Bind' is 'BK_All'. + template <typename T> bool match(const T &Node) { + internal::BoundNodesTreeBuilder RecursiveBuilder(*Builder); + + if (Matcher->matches(DynTypedNode::create(Node), Finder, + &RecursiveBuilder)) { + ResultBindings.addMatch(RecursiveBuilder); + Matches = true; + if (Bind != internal::ASTMatchFinder::BK_All) + return false; // Abort as soon as a match is found. + } + return true; + } + + const internal::DynTypedMatcher *const Matcher; + internal::ASTMatchFinder *const Finder; + internal::BoundNodesTreeBuilder *const Builder; + internal::BoundNodesTreeBuilder ResultBindings; + const internal::ASTMatchFinder::BindKind Bind; + bool Matches; + bool ignoreUnevaluatedContext; +}; + +// Because we're dealing with raw pointers, let's define what we mean by that. +static auto hasPointerType() { + return hasType(hasCanonicalType(pointerType())); +} + +static auto hasArrayType() { return hasType(hasCanonicalType(arrayType())); } + +AST_MATCHER_P(Stmt, forEachDescendantEvaluatedStmt, internal::Matcher<Stmt>, + innerMatcher) { + const DynTypedMatcher &DTM = static_cast<DynTypedMatcher>(innerMatcher); + + MatchDescendantVisitor Visitor(&DTM, Finder, Builder, ASTMatchFinder::BK_All, + true); + return Visitor.findMatch(DynTypedNode::create(Node)); +} + +AST_MATCHER_P(Stmt, forEachDescendantStmt, internal::Matcher<Stmt>, + innerMatcher) { + const DynTypedMatcher &DTM = static_cast<DynTypedMatcher>(innerMatcher); + + MatchDescendantVisitor Visitor(&DTM, Finder, Builder, ASTMatchFinder::BK_All, + false); + return Visitor.findMatch(DynTypedNode::create(Node)); +} + +// Matches a `Stmt` node iff the node is in a safe-buffer opt-out region +AST_MATCHER_P(Stmt, notInSafeBufferOptOut, const UnsafeBufferUsageHandler *, + Handler) { + return !Handler->isSafeBufferOptOut(Node.getBeginLoc()); +} + +AST_MATCHER_P(Stmt, ignoreUnsafeBufferInContainer, + const UnsafeBufferUsageHandler *, Handler) { + return Handler->ignoreUnsafeBufferInContainer(Node.getBeginLoc()); +} + +AST_MATCHER_P(CastExpr, castSubExpr, internal::Matcher<Expr>, innerMatcher) { + return innerMatcher.matches(*Node.getSubExpr(), Finder, Builder); +} + +// Matches a `UnaryOperator` whose operator is pre-increment: +AST_MATCHER(UnaryOperator, isPreInc) { + return Node.getOpcode() == UnaryOperator::Opcode::UO_PreInc; +} + +// Returns a matcher that matches any expression 'e' such that `innerMatcher` +// matches 'e' and 'e' is in an Unspecified Lvalue Context. +static auto isInUnspecifiedLvalueContext(internal::Matcher<Expr> innerMatcher) { + // clang-format off + return + expr(anyOf( + implicitCastExpr( + hasCastKind(CastKind::CK_LValueToRValue), + castSubExpr(innerMatcher)), + binaryOperator( + hasAnyOperatorName("="), + hasLHS(innerMatcher) + ) + )); + // clang-format on +} + +// Returns a matcher that matches any expression `e` such that `InnerMatcher` +// matches `e` and `e` is in an Unspecified Pointer Context (UPC). +static internal::Matcher<Stmt> +isInUnspecifiedPointerContext(internal::Matcher<Stmt> InnerMatcher) { + // A UPC can be + // 1. an argument of a function call (except the callee has [[unsafe_...]] + // attribute), or + // 2. the operand of a pointer-to-(integer or bool) cast operation; or + // 3. the operand of a comparator operation; or + // 4. the operand of a pointer subtraction operation + // (i.e., computing the distance between two pointers); or ... + + // clang-format off + auto CallArgMatcher = callExpr( + forEachArgumentWithParamType( + InnerMatcher, + isAnyPointer() /* array also decays to pointer type*/), + unless(callee( + functionDecl(hasAttr(attr::UnsafeBufferUsage))))); + + auto CastOperandMatcher = + castExpr(anyOf(hasCastKind(CastKind::CK_PointerToIntegral), + hasCastKind(CastKind::CK_PointerToBoolean)), + castSubExpr(allOf(hasPointerType(), InnerMatcher))); + + auto CompOperandMatcher = + binaryOperator(hasAnyOperatorName("!=", "==", "<", "<=", ">", ">="), + eachOf(hasLHS(allOf(hasPointerType(), InnerMatcher)), + hasRHS(allOf(hasPointerType(), InnerMatcher)))); + + // A matcher that matches pointer subtractions: + auto PtrSubtractionMatcher = + binaryOperator(hasOperatorName("-"), + // Note that here we need both LHS and RHS to be + // pointer. Then the inner matcher can match any of + // them: + allOf(hasLHS(hasPointerType()), + hasRHS(hasPointerType())), + eachOf(hasLHS(InnerMatcher), + hasRHS(InnerMatcher))); + // clang-format on + + return stmt(anyOf(CallArgMatcher, CastOperandMatcher, CompOperandMatcher, + PtrSubtractionMatcher)); + // FIXME: any more cases? (UPC excludes the RHS of an assignment. For now we + // don't have to check that.) +} + +// Returns a matcher that matches any expression 'e' such that `innerMatcher` +// matches 'e' and 'e' is in an unspecified untyped context (i.e the expression +// 'e' isn't evaluated to an RValue). For example, consider the following code: +// int *p = new int[4]; +// int *q = new int[4]; +// if ((p = q)) {} +// p = q; +// The expression `p = q` in the conditional of the `if` statement +// `if ((p = q))` is evaluated as an RValue, whereas the expression `p = q;` +// in the assignment statement is in an untyped context. +static internal::Matcher<Stmt> +isInUnspecifiedUntypedContext(internal::Matcher<Stmt> InnerMatcher) { + // An unspecified context can be + // 1. A compound statement, + // 2. The body of an if statement + // 3. Body of a loop + auto CompStmt = compoundStmt(forEach(InnerMatcher)); + auto IfStmtThen = ifStmt(hasThen(InnerMatcher)); + auto IfStmtElse = ifStmt(hasElse(InnerMatcher)); + // FIXME: Handle loop bodies. + return stmt(anyOf(CompStmt, IfStmtThen, IfStmtElse)); +} + +// Given a two-param std::span construct call, matches iff the call has the +// following forms: +// 1. `std::span<T>{new T[n], n}`, where `n` is a literal or a DRE +// 2. `std::span<T>{new T, 1}` +// 3. `std::span<T>{&var, 1}` +// 4. `std::span<T>{a, n}`, where `a` is of an array-of-T with constant size +// `n` +// 5. `std::span<T>{any, 0}` +AST_MATCHER(CXXConstructExpr, isSafeSpanTwoParamConstruct) { + assert(Node.getNumArgs() == 2 && + "expecting a two-parameter std::span constructor"); + const Expr *Arg0 = Node.getArg(0)->IgnoreImplicit(); + const Expr *Arg1 = Node.getArg(1)->IgnoreImplicit(); + auto HaveEqualConstantValues = [&Finder](const Expr *E0, const Expr *E1) { + if (auto E0CV = E0->getIntegerConstantExpr(Finder->getASTContext())) + if (auto E1CV = E1->getIntegerConstantExpr(Finder->getASTContext())) { + return APSInt::compareValues(*E0CV, *E1CV) == 0; + } + return false; + }; + auto AreSameDRE = [](const Expr *E0, const Expr *E1) { + if (auto *DRE0 = dyn_cast<DeclRefExpr>(E0)) + if (auto *DRE1 = dyn_cast<DeclRefExpr>(E1)) { + return DRE0->getDecl() == DRE1->getDecl(); + } + return false; + }; + std::optional<APSInt> Arg1CV = + Arg1->getIntegerConstantExpr(Finder->getASTContext()); + + if (Arg1CV && Arg1CV->isZero()) + // Check form 5: + return true; + switch (Arg0->IgnoreImplicit()->getStmtClass()) { + case Stmt::CXXNewExprClass: + if (auto Size = cast<CXXNewExpr>(Arg0)->getArraySize()) { + // Check form 1: + return AreSameDRE((*Size)->IgnoreImplicit(), Arg1) || + HaveEqualConstantValues(*Size, Arg1); + } + // TODO: what's placeholder type? avoid it for now. + if (!cast<CXXNewExpr>(Arg0)->hasPlaceholderType()) { + // Check form 2: + return Arg1CV && Arg1CV->isOne(); + } + break; + case Stmt::UnaryOperatorClass: + if (cast<UnaryOperator>(Arg0)->getOpcode() == + UnaryOperator::Opcode::UO_AddrOf) + // Check form 3: + return Arg1CV && Arg1CV->isOne(); + break; + default: + break; + } + + QualType Arg0Ty = Arg0->IgnoreImplicit()->getType(); + + if (Arg0Ty->isConstantArrayType()) { + const APSInt ConstArrSize = + APSInt(cast<ConstantArrayType>(Arg0Ty)->getSize()); + + // Check form 4: + return Arg1CV && APSInt::compareValues(ConstArrSize, *Arg1CV) == 0; + } + return false; +} + +AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) { + // FIXME: Proper solution: + // - refactor Sema::CheckArrayAccess + // - split safe/OOB/unknown decision logic from diagnostics emitting code + // - e. g. "Try harder to find a NamedDecl to point at in the note." + // already duplicated + // - call both from Sema and from here + + const auto *BaseDRE = + dyn_cast<DeclRefExpr>(Node.getBase()->IgnoreParenImpCasts()); + if (!BaseDRE) + return false; + if (!BaseDRE->getDecl()) + return false; + const auto *CATy = Finder->getASTContext().getAsConstantArrayType( + BaseDRE->getDecl()->getType()); + if (!CATy) + return false; + + if (const auto *IdxLit = dyn_cast<IntegerLiteral>(Node.getIdx())) { + const APInt ArrIdx = IdxLit->getValue(); + // FIXME: ArrIdx.isNegative() we could immediately emit an error as that's a + // bug + if (ArrIdx.isNonNegative() && + ArrIdx.getLimitedValue() < CATy->getLimitedSize()) + return true; + } + + return false; +} + +} // namespace clang::ast_matchers + +namespace { +// Because the analysis revolves around variables and their types, we'll need to +// track uses of variables (aka DeclRefExprs). +using DeclUseList = SmallVector<const DeclRefExpr *, 1>; + +// Convenience typedef. +using FixItList = SmallVector<FixItHint, 4>; +} // namespace + +namespace { +/// Gadget is an individual operation in the code that may be of interest to +/// this analysis. Each (non-abstract) subclass corresponds to a specific +/// rigid AST structure that constitutes an operation on a pointer-type object. +/// Discovery of a gadget in the code corresponds to claiming that we understand +/// what this part of code is doing well enough to potentially improve it. +/// Gadgets can be warning (immediately deserving a warning) or fixable (not +/// always deserving a warning per se, but requires our attention to identify +/// it warrants a fixit). +class Gadget { +public: + enum class Kind { +#define GADGET(x) x, +#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" + }; + + /// Common type of ASTMatchers used for discovering gadgets. + /// Useful for implementing the static matcher() methods + /// that are expected from all non-abstract subclasses. + using Matcher = decltype(stmt()); + + Gadget(Kind K) : K(K) {} + + Kind getKind() const { return K; } + +#ifndef NDEBUG + StringRef getDebugName() const { + switch (K) { +#define GADGET(x) \ + case Kind::x: \ + return #x; +#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" + } + llvm_unreachable("Unhandled Gadget::Kind enum"); + } +#endif + + virtual bool isWarningGadget() const = 0; + // TODO remove this method from WarningGadget interface. It's only used for + // debug prints in FixableGadget. + virtual SourceLocation getSourceLoc() const = 0; + + /// Returns the list of pointer-type variables on which this gadget performs + /// its operation. Typically, there's only one variable. This isn't a list + /// of all DeclRefExprs in the gadget's AST! + virtual DeclUseList getClaimedVarUseSites() const = 0; + + virtual ~Gadget() = default; + +private: + Kind K; +}; + +/// Warning gadgets correspond to unsafe code patterns that warrants +/// an immediate warning. +class WarningGadget : public Gadget { +public: + WarningGadget(Kind K) : Gadget(K) {} + + static bool classof(const Gadget *G) { return G->isWarningGadget(); } + bool isWarningGadget() const final { return true; } + + virtual void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler, + bool IsRelatedToDecl, + ASTContext &Ctx) const = 0; +}; + +/// Fixable gadgets correspond to code patterns that aren't always unsafe but +/// need to be properly recognized in order to emit fixes. For example, if a raw +/// pointer-type variable is replaced by a safe C++ container, every use of such +/// variable must be carefully considered and possibly updated. +class FixableGadget : public Gadget { +public: + FixableGadget(Kind K) : Gadget(K) {} + + static bool classof(const Gadget *G) { return !G->isWarningGadget(); } + bool isWarningGadget() const final { return false; } + + /// Returns a fixit that would fix the current gadget according to + /// the current strategy. Returns std::nullopt if the fix cannot be produced; + /// returns an empty list if no fixes are necessary. + virtual std::optional<FixItList> getFixits(const FixitStrategy &) const { + return std::nullopt; + } + + /// Returns a list of two elements where the first element is the LHS of a + /// pointer assignment statement and the second element is the RHS. This + /// two-element list represents the fact that the LHS buffer gets its bounds + /// information from the RHS buffer. This information will be used later to + /// group all those variables whose types must be modified together to prevent + /// type mismatches. + virtual std::optional<std::pair<const VarDecl *, const VarDecl *>> + getStrategyImplications() const { + return std::nullopt; + } +}; + +static auto toSupportedVariable() { return to(varDecl()); } + +using FixableGadgetList = std::vector<std::unique_ptr<FixableGadget>>; +using WarningGadgetList = std::vector<std::unique_ptr<WarningGadget>>; + +/// An increment of a pointer-type value is unsafe as it may run the pointer +/// out of bounds. +class IncrementGadget : public WarningGadget { + static constexpr const char *const OpTag = "op"; + const UnaryOperator *Op; + +public: + IncrementGadget(const MatchFinder::MatchResult &Result) + : WarningGadget(Kind::Increment), + Op(Result.Nodes.getNodeAs<UnaryOperator>(OpTag)) {} + + static bool classof(const Gadget *G) { + return G->getKind() == Kind::Increment; + } + + static Matcher matcher() { + return stmt( + unaryOperator(hasOperatorName("++"), + hasUnaryOperand(ignoringParenImpCasts(hasPointerType()))) + .bind(OpTag)); + } + + void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler, + bool IsRelatedToDecl, + ASTContext &Ctx) const override { + Handler.handleUnsafeOperation(Op, IsRelatedToDecl, Ctx); + } + SourceLocation getSourceLoc() const override { return Op->getBeginLoc(); } + + DeclUseList getClaimedVarUseSites() const override { + SmallVector<const DeclRefExpr *, 2> Uses; + if (const auto *DRE = + dyn_cast<DeclRefExpr>(Op->getSubExpr()->IgnoreParenImpCasts())) { + Uses.push_back(DRE); + } + + return std::move(Uses); + } +}; + +/// A decrement of a pointer-type value is unsafe as it may run the pointer +/// out of bounds. +class DecrementGadget : public WarningGadget { + static constexpr const char *const OpTag = "op"; + const UnaryOperator *Op; + +public: + DecrementGadget(const MatchFinder::MatchResult &Result) + : WarningGadget(Kind::Decrement), + Op(Result.Nodes.getNodeAs<UnaryOperator>(OpTag)) {} + + static bool classof(const Gadget *G) { + return G->getKind() == Kind::Decrement; + } + + static Matcher matcher() { + return stmt( + unaryOperator(hasOperatorName("--"), + hasUnaryOperand(ignoringParenImpCasts(hasPointerType()))) + .bind(OpTag)); + } + + void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler, + bool IsRelatedToDecl, + ASTContext &Ctx) const override { + Handler.handleUnsafeOperation(Op, IsRelatedToDecl, Ctx); + } + SourceLocation getSourceLoc() const override { return Op->getBeginLoc(); } + + DeclUseList getClaimedVarUseSites() const override { + if (const auto *DRE = + dyn_cast<DeclRefExpr>(Op->getSubExpr()->IgnoreParenImpCasts())) { + return {DRE}; + } + + return {}; + } +}; + +/// Array subscript expressions on raw pointers as if they're arrays. Unsafe as +/// it doesn't have any bounds checks for the array. +class ArraySubscriptGadget : public WarningGadget { + static constexpr const char *const ArraySubscrTag = "ArraySubscript"; + const ArraySubscriptExpr *ASE; + +public: + ArraySubscriptGadget(const MatchFinder::MatchResult &Result) + : WarningGadget(Kind::ArraySubscript), + ASE(Result.Nodes.getNodeAs<ArraySubscriptExpr>(ArraySubscrTag)) {} + + static bool classof(const Gadget *G) { + return G->getKind() == Kind::ArraySubscript; + } + + static Matcher matcher() { + // clang-format off + return stmt(arraySubscriptExpr( + hasBase(ignoringParenImpCasts( + anyOf(hasPointerType(), hasArrayType()))), + unless(anyOf( + isSafeArraySubscript(), + hasIndex( + anyOf(integerLiteral(equals(0)), arrayInitIndexExpr()) + ) + ))).bind(ArraySubscrTag)); + // clang-format on + } + + void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler, + bool IsRelatedToDecl, + ASTContext &Ctx) const override { + Handler.handleUnsafeOperation(ASE, IsRelatedToDecl, Ctx); + } + SourceLocation getSourceLoc() const override { return ASE->getBeginLoc(); } + + DeclUseList getClaimedVarUseSites() const override { + if (const auto *DRE = + dyn_cast<DeclRefExpr>(ASE->getBase()->IgnoreParenImpCasts())) { + return {DRE}; + } + + return {}; + } +}; + +/// A pointer arithmetic expression of one of the forms: +/// \code +/// ptr + n | n + ptr | ptr - n | ptr += n | ptr -= n +/// \endcode +class PointerArithmeticGadget : public WarningGadget { + static constexpr const char *const PointerArithmeticTag = "ptrAdd"; + static constexpr const char *const PointerArithmeticPointerTag = "ptrAddPtr"; + const BinaryOperator *PA; // pointer arithmetic expression + const Expr *Ptr; // the pointer expression in `PA` + +public: + PointerArithmeticGadget(const MatchFinder::MatchResult &Result) + : WarningGadget(Kind::PointerArithmetic), + PA(Result.Nodes.getNodeAs<BinaryOperator>(PointerArithmeticTag)), + Ptr(Result.Nodes.getNodeAs<Expr>(PointerArithmeticPointerTag)) {} + + static bool classof(const Gadget *G) { + return G->getKind() == Kind::PointerArithmetic; + } + + static Matcher matcher() { + auto HasIntegerType = anyOf(hasType(isInteger()), hasType(enumType())); + auto PtrAtRight = + allOf(hasOperatorName("+"), + hasRHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag)), + hasLHS(HasIntegerType)); + auto PtrAtLeft = + allOf(anyOf(hasOperatorName("+"), hasOperatorName("-"), + hasOperatorName("+="), hasOperatorName("-=")), + hasLHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag)), + hasRHS(HasIntegerType)); + + return stmt(binaryOperator(anyOf(PtrAtLeft, PtrAtRight)) + .bind(PointerArithmeticTag)); + } + + void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler, + bool IsRelatedToDecl, + ASTContext &Ctx) const override { + Handler.handleUnsafeOperation(PA, IsRelatedToDecl, Ctx); + } + SourceLocation getSourceLoc() const override { return PA->getBeginLoc(); } + + DeclUseList getClaimedVarUseSites() const override { + if (const auto *DRE = dyn_cast<DeclRefExpr>(Ptr->IgnoreParenImpCasts())) { + return {DRE}; + } + + return {}; + } + // FIXME: pointer adding zero should be fine + // FIXME: this gadge will need a fix-it +}; + +class SpanTwoParamConstructorGadget : public WarningGadget { + static constexpr const char *const SpanTwoParamConstructorTag = + "spanTwoParamConstructor"; + const CXXConstructExpr *Ctor; // the span constructor expression + +public: + SpanTwoParamConstructorGadget(const MatchFinder::MatchResult &Result) + : WarningGadget(Kind::SpanTwoParamConstructor), + Ctor(Result.Nodes.getNodeAs<CXXConstructExpr>( + SpanTwoParamConstructorTag)) {} + + static bool classof(const Gadget *G) { + return G->getKind() == Kind::SpanTwoParamConstructor; + } + + static Matcher matcher() { + auto HasTwoParamSpanCtorDecl = hasDeclaration( + cxxConstructorDecl(hasDeclContext(isInStdNamespace()), hasName("span"), + parameterCountIs(2))); + + return stmt(cxxConstructExpr(HasTwoParamSpanCtorDecl, + unless(isSafeSpanTwoParamConstruct())) + .bind(SpanTwoParamConstructorTag)); + } + + void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler, + bool IsRelatedToDecl, + ASTContext &Ctx) const override { + Handler.handleUnsafeOperationInContainer(Ctor, IsRelatedToDecl, Ctx); + } + SourceLocation getSourceLoc() const override { return Ctor->getBeginLoc(); } + + DeclUseList getClaimedVarUseSites() const override { + // If the constructor call is of the form `std::span{var, n}`, `var` is + // considered an unsafe variable. + if (auto *DRE = dyn_cast<DeclRefExpr>(Ctor->getArg(0))) { + if (isa<VarDecl>(DRE->getDecl())) + return {DRE}; + } + return {}; + } +}; + +/// A pointer initialization expression of the form: +/// \code +/// int *p = q; +/// \endcode +class PointerInitGadget : public FixableGadget { +private: + static constexpr const char *const PointerInitLHSTag = "ptrInitLHS"; + static constexpr const char *const PointerInitRHSTag = "ptrInitRHS"; + const VarDecl *PtrInitLHS; // the LHS pointer expression in `PI` + const DeclRefExpr *PtrInitRHS; // the RHS pointer expression in `PI` + +public: + PointerInitGadget(const MatchFinder::MatchResult &Result) + : FixableGadget(Kind::PointerInit), + PtrInitLHS(Result.Nodes.getNodeAs<VarDecl>(PointerInitLHSTag)), + PtrInitRHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerInitRHSTag)) {} + + static bool classof(const Gadget *G) { + return G->getKind() == Kind::PointerInit; + } + + static Matcher matcher() { + auto PtrInitStmt = declStmt(hasSingleDecl( + varDecl(hasInitializer(ignoringImpCasts( + declRefExpr(hasPointerType(), toSupportedVariable()) + .bind(PointerInitRHSTag)))) + .bind(PointerInitLHSTag))); + + return stmt(PtrInitStmt); + } + + virtual std::optional<FixItList> + getFixits(const FixitStrategy &S) const override; + SourceLocation getSourceLoc() const override { + return PtrInitRHS->getBeginLoc(); + } + + virtual DeclUseList getClaimedVarUseSites() const override { + return DeclUseList{PtrInitRHS}; + } + + virtual std::optional<std::pair<const VarDecl *, const VarDecl *>> + getStrategyImplications() const override { + return std::make_pair(PtrInitLHS, cast<VarDecl>(PtrInitRHS->getDecl())); + } +}; + +/// A pointer assignment expression of the form: +/// \code +/// p = q; +/// \endcode +/// where both `p` and `q` are pointers. +class PtrToPtrAssignmentGadget : public FixableGadget { +private: + static constexpr const char *const PointerAssignLHSTag = "ptrLHS"; + static constexpr const char *const PointerAssignRHSTag = "ptrRHS"; + const DeclRefExpr *PtrLHS; // the LHS pointer expression in `PA` + const DeclRefExpr *PtrRHS; // the RHS pointer expression in `PA` + +public: + PtrToPtrAssignmentGadget(const MatchFinder::MatchResult &Result) + : FixableGadget(Kind::PtrToPtrAssignment), + PtrLHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerAssignLHSTag)), + PtrRHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerAssignRHSTag)) {} + + static bool classof(const Gadget *G) { + return G->getKind() == Kind::PtrToPtrAssignment; + } + + static Matcher matcher() { + auto PtrAssignExpr = binaryOperator( + allOf(hasOperatorName("="), + hasRHS(ignoringParenImpCasts( + declRefExpr(hasPointerType(), toSupportedVariable()) + .bind(PointerAssignRHSTag))), + hasLHS(declRefExpr(hasPointerType(), toSupportedVariable()) + .bind(PointerAssignLHSTag)))); + + return stmt(isInUnspecifiedUntypedContext(PtrAssignExpr)); + } + + virtual std::optional<FixItList> + getFixits(const FixitStrategy &S) const override; + SourceLocation getSourceLoc() const override { return PtrLHS->getBeginLoc(); } + + virtual DeclUseList getClaimedVarUseSites() const override { + return DeclUseList{PtrLHS, PtrRHS}; + } + + virtual std::optional<std::pair<const VarDecl *, const VarDecl *>> + getStrategyImplications() const override { + return std::make_pair(cast<VarDecl>(PtrLHS->getDecl()), + cast<VarDecl>(PtrRHS->getDecl())); + } +}; + +/// An assignment expression of the form: +/// \code +/// ptr = array; +/// \endcode +/// where `p` is a pointer and `array` is a constant size array. +class CArrayToPtrAssignmentGadget : public FixableGadget { +private: + static constexpr const char *const PointerAssignLHSTag = "ptrLHS"; + static constexpr const char *const PointerAssignRHSTag = "ptrRHS"; + const DeclRefExpr *PtrLHS; // the LHS pointer expression in `PA` + const DeclRefExpr *PtrRHS; // the RHS pointer expression in `PA` + +public: + CArrayToPtrAssignmentGadget(const MatchFinder::MatchResult &Result) + : FixableGadget(Kind::CArrayToPtrAssignment), + PtrLHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerAssignLHSTag)), + PtrRHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerAssignRHSTag)) {} + + static bool classof(const Gadget *G) { + return G->getKind() == Kind::CArrayToPtrAssignment; + } + + static Matcher matcher() { + auto PtrAssignExpr = binaryOperator( + allOf(hasOperatorName("="), + hasRHS(ignoringParenImpCasts( + declRefExpr(hasType(hasCanonicalType(constantArrayType())), + toSupportedVariable()) + .bind(PointerAssignRHSTag))), + hasLHS(declRefExpr(hasPointerType(), toSupportedVariable()) + .bind(PointerAssignLHSTag)))); + + return stmt(isInUnspecifiedUntypedContext(PtrAssignExpr)); + } + + virtual std::optional<FixItList> + getFixits(const FixitStrategy &S) const override; + SourceLocation getSourceLoc() const override { return PtrLHS->getBeginLoc(); } + + virtual DeclUseList getClaimedVarUseSites() const override { + return DeclUseList{PtrLHS, PtrRHS}; + } + + virtual std::optional<std::pair<const VarDecl *, const VarDecl *>> + getStrategyImplications() const override { + return {}; + } +}; + +/// A call of a function or method that performs unchecked buffer operations +/// over one of its pointer parameters. +class UnsafeBufferUsageAttrGadget : public WarningGadget { + constexpr static const char *const OpTag = "call_expr"; + const CallExpr *Op; + +public: + UnsafeBufferUsageAttrGadget(const MatchFinder::MatchResult &Result) + : WarningGadget(Kind::UnsafeBufferUsageAttr), + Op(Result.Nodes.getNodeAs<CallExpr>(OpTag)) {} + + static bool classof(const Gadget *G) { + return G->getKind() == Kind::UnsafeBufferUsageAttr; + } + + static Matcher matcher() { + auto HasUnsafeFnDecl = + callee(functionDecl(hasAttr(attr::UnsafeBufferUsage))); + return stmt(callExpr(HasUnsafeFnDecl).bind(OpTag)); + } + + void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler, + bool IsRelatedToDecl, + ASTContext &Ctx) const override { + Handler.handleUnsafeOperation(Op, IsRelatedToDecl, Ctx); + } + SourceLocation getSourceLoc() const override { return Op->getBeginLoc(); } + + DeclUseList getClaimedVarUseSites() const override { return {}; } +}; + +/// A call of a constructor that performs unchecked buffer operations +/// over one of its pointer parameters, or constructs a class object that will +/// perform buffer operations that depend on the correctness of the parameters. +class UnsafeBufferUsageCtorAttrGadget : public WarningGadget { + constexpr static const char *const OpTag = "cxx_construct_expr"; + const CXXConstructExpr *Op; + +public: + UnsafeBufferUsageCtorAttrGadget(const MatchFinder::MatchResult &Result) + : WarningGadget(Kind::UnsafeBufferUsageCtorAttr), + Op(Result.Nodes.getNodeAs<CXXConstructExpr>(OpTag)) {} + + static bool classof(const Gadget *G) { + return G->getKind() == Kind::UnsafeBufferUsageCtorAttr; + } + + static Matcher matcher() { + auto HasUnsafeCtorDecl = + hasDeclaration(cxxConstructorDecl(hasAttr(attr::UnsafeBufferUsage))); + // std::span(ptr, size) ctor is handled by SpanTwoParamConstructorGadget. + auto HasTwoParamSpanCtorDecl = SpanTwoParamConstructorGadget::matcher(); + return stmt( + cxxConstructExpr(HasUnsafeCtorDecl, unless(HasTwoParamSpanCtorDecl)) + .bind(OpTag)); + } + + void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler, + bool IsRelatedToDecl, + ASTContext &Ctx) const override { + Handler.handleUnsafeOperation(Op, IsRelatedToDecl, Ctx); + } + SourceLocation getSourceLoc() const override { return Op->getBeginLoc(); } + + DeclUseList getClaimedVarUseSites() const override { return {}; } +}; + +// Warning gadget for unsafe invocation of span::data method. +// Triggers when the pointer returned by the invocation is immediately +// cast to a larger type. + +class DataInvocationGadget : public WarningGadget { + constexpr static const char *const OpTag = "data_invocation_expr"; + const ExplicitCastExpr *Op; + +public: + DataInvocationGadget(const MatchFinder::MatchResult &Result) + : WarningGadget(Kind::DataInvocation), + Op(Result.Nodes.getNodeAs<ExplicitCastExpr>(OpTag)) {} + + static bool classof(const Gadget *G) { + return G->getKind() == Kind::DataInvocation; + } + + static Matcher matcher() { + Matcher callExpr = cxxMemberCallExpr( + callee(cxxMethodDecl(hasName("data"), ofClass(hasName("std::span"))))); + return stmt( + explicitCastExpr(anyOf(has(callExpr), has(parenExpr(has(callExpr))))) + .bind(OpTag)); + } + + void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler, + bool IsRelatedToDecl, + ASTContext &Ctx) const override { + Handler.handleUnsafeOperation(Op, IsRelatedToDecl, Ctx); + } + SourceLocation getSourceLoc() const override { return Op->getBeginLoc(); } + + DeclUseList getClaimedVarUseSites() const override { return {}; } +}; + +// Represents expressions of the form `DRE[*]` in the Unspecified Lvalue +// Context (see `isInUnspecifiedLvalueContext`). +// Note here `[]` is the built-in subscript operator. +class ULCArraySubscriptGadget : public FixableGadget { +private: + static constexpr const char *const ULCArraySubscriptTag = + "ArraySubscriptUnderULC"; + const ArraySubscriptExpr *Node; + +public: + ULCArraySubscriptGadget(const MatchFinder::MatchResult &Result) + : FixableGadget(Kind::ULCArraySubscript), + Node(Result.Nodes.getNodeAs<ArraySubscriptExpr>(ULCArraySubscriptTag)) { + assert(Node != nullptr && "Expecting a non-null matching result"); + } + + static bool classof(const Gadget *G) { + return G->getKind() == Kind::ULCArraySubscript; + } + + static Matcher matcher() { + auto ArrayOrPtr = anyOf(hasPointerType(), hasArrayType()); + auto BaseIsArrayOrPtrDRE = hasBase( + ignoringParenImpCasts(declRefExpr(ArrayOrPtr, toSupportedVariable()))); + auto Target = + arraySubscriptExpr(BaseIsArrayOrPtrDRE).bind(ULCArraySubscriptTag); + + return expr(isInUnspecifiedLvalueContext(Target)); + } + + virtual std::optional<FixItList> + getFixits(const FixitStrategy &S) const override; + SourceLocation getSourceLoc() const override { return Node->getBeginLoc(); } + + virtual DeclUseList getClaimedVarUseSites() const override { + if (const auto *DRE = + dyn_cast<DeclRefExpr>(Node->getBase()->IgnoreImpCasts())) { + return {DRE}; + } + return {}; + } +}; + +// Fixable gadget to handle stand alone pointers of the form `UPC(DRE)` in the +// unspecified pointer context (isInUnspecifiedPointerContext). The gadget emits +// fixit of the form `UPC(DRE.data())`. +class UPCStandalonePointerGadget : public FixableGadget { +private: + static constexpr const char *const DeclRefExprTag = "StandalonePointer"; + const DeclRefExpr *Node; + +public: + UPCStandalonePointerGadget(const MatchFinder::MatchResult &Result) + : FixableGadget(Kind::UPCStandalonePointer), + Node(Result.Nodes.getNodeAs<DeclRefExpr>(DeclRefExprTag)) { + assert(Node != nullptr && "Expecting a non-null matching result"); + } + + static bool classof(const Gadget *G) { + return G->getKind() == Kind::UPCStandalonePointer; + } + + static Matcher matcher() { + auto ArrayOrPtr = anyOf(hasPointerType(), hasArrayType()); + auto target = expr(ignoringParenImpCasts( + declRefExpr(allOf(ArrayOrPtr, toSupportedVariable())) + .bind(DeclRefExprTag))); + return stmt(isInUnspecifiedPointerContext(target)); + } + + virtual std::optional<FixItList> + getFixits(const FixitStrategy &S) const override; + SourceLocation getSourceLoc() const override { return Node->getBeginLoc(); } + + virtual DeclUseList getClaimedVarUseSites() const override { return {Node}; } +}; + +class PointerDereferenceGadget : public FixableGadget { + static constexpr const char *const BaseDeclRefExprTag = "BaseDRE"; + static constexpr const char *const OperatorTag = "op"; + + const DeclRefExpr *BaseDeclRefExpr = nullptr; + const UnaryOperator *Op = nullptr; + +public: + PointerDereferenceGadget(const MatchFinder::MatchResult &Result) + : FixableGadget(Kind::PointerDereference), + BaseDeclRefExpr( + Result.Nodes.getNodeAs<DeclRefExpr>(BaseDeclRefExprTag)), + Op(Result.Nodes.getNodeAs<UnaryOperator>(OperatorTag)) {} + + static bool classof(const Gadget *G) { + return G->getKind() == Kind::PointerDereference; + } + + static Matcher matcher() { + auto Target = + unaryOperator( + hasOperatorName("*"), + has(expr(ignoringParenImpCasts( + declRefExpr(toSupportedVariable()).bind(BaseDeclRefExprTag))))) + .bind(OperatorTag); + + return expr(isInUnspecifiedLvalueContext(Target)); + } + + DeclUseList getClaimedVarUseSites() const override { + return {BaseDeclRefExpr}; + } + + virtual std::optional<FixItList> + getFixits(const FixitStrategy &S) const override; + SourceLocation getSourceLoc() const override { return Op->getBeginLoc(); } +}; + +// Represents expressions of the form `&DRE[any]` in the Unspecified Pointer +// Context (see `isInUnspecifiedPointerContext`). +// Note here `[]` is the built-in subscript operator. +class UPCAddressofArraySubscriptGadget : public FixableGadget { +private: + static constexpr const char *const UPCAddressofArraySubscriptTag = + "AddressofArraySubscriptUnderUPC"; + const UnaryOperator *Node; // the `&DRE[any]` node + +public: + UPCAddressofArraySubscriptGadget(const MatchFinder::MatchResult &Result) + : FixableGadget(Kind::ULCArraySubscript), + Node(Result.Nodes.getNodeAs<UnaryOperator>( + UPCAddressofArraySubscriptTag)) { + assert(Node != nullptr && "Expecting a non-null matching result"); + } + + static bool classof(const Gadget *G) { + return G->getKind() == Kind::UPCAddressofArraySubscript; + } + + static Matcher matcher() { + return expr(isInUnspecifiedPointerContext(expr(ignoringImpCasts( + unaryOperator( + hasOperatorName("&"), + hasUnaryOperand(arraySubscriptExpr(hasBase( + ignoringParenImpCasts(declRefExpr(toSupportedVariable())))))) + .bind(UPCAddressofArraySubscriptTag))))); + } + + virtual std::optional<FixItList> + getFixits(const FixitStrategy &) const override; + SourceLocation getSourceLoc() const override { return Node->getBeginLoc(); } + + virtual DeclUseList getClaimedVarUseSites() const override { + const auto *ArraySubst = cast<ArraySubscriptExpr>(Node->getSubExpr()); + const auto *DRE = + cast<DeclRefExpr>(ArraySubst->getBase()->IgnoreParenImpCasts()); + return {DRE}; + } +}; +} // namespace + +namespace { +// An auxiliary tracking facility for the fixit analysis. It helps connect +// declarations to its uses and make sure we've covered all uses with our +// analysis before we try to fix the declaration. +class DeclUseTracker { + using UseSetTy = SmallSet<const DeclRefExpr *, 16>; + using DefMapTy = DenseMap<const VarDecl *, const DeclStmt *>; + + // Allocate on the heap for easier move. + std::unique_ptr<UseSetTy> Uses{std::make_unique<UseSetTy>()}; + DefMapTy Defs{}; + +public: + DeclUseTracker() = default; + DeclUseTracker(const DeclUseTracker &) = delete; // Let's avoid copies. + DeclUseTracker &operator=(const DeclUseTracker &) = delete; + DeclUseTracker(DeclUseTracker &&) = default; + DeclUseTracker &operator=(DeclUseTracker &&) = default; + + // Start tracking a freshly discovered DRE. + void discoverUse(const DeclRefExpr *DRE) { Uses->insert(DRE); } + + // Stop tracking the DRE as it's been fully figured out. + void claimUse(const DeclRefExpr *DRE) { + assert(Uses->count(DRE) && + "DRE not found or claimed by multiple matchers!"); + Uses->erase(DRE); + } + + // A variable is unclaimed if at least one use is unclaimed. + bool hasUnclaimedUses(const VarDecl *VD) const { + // FIXME: Can this be less linear? Maybe maintain a map from VDs to DREs? + return any_of(*Uses, [VD](const DeclRefExpr *DRE) { + return DRE->getDecl()->getCanonicalDecl() == VD->getCanonicalDecl(); + }); + } + + UseSetTy getUnclaimedUses(const VarDecl *VD) const { + UseSetTy ReturnSet; + for (auto use : *Uses) { + if (use->getDecl()->getCanonicalDecl() == VD->getCanonicalDecl()) { + ReturnSet.insert(use); + } + } + return ReturnSet; + } + + void discoverDecl(const DeclStmt *DS) { + for (const Decl *D : DS->decls()) { + if (const auto *VD = dyn_cast<VarDecl>(D)) { + // FIXME: Assertion temporarily disabled due to a bug in + // ASTMatcher internal behavior in presence of GNU + // statement-expressions. We need to properly investigate this + // because it can screw up our algorithm in other ways. + // assert(Defs.count(VD) == 0 && "Definition already discovered!"); + Defs[VD] = DS; + } + } + } + + const DeclStmt *lookupDecl(const VarDecl *VD) const { + return Defs.lookup(VD); + } +}; +} // namespace + +// Representing a pointer type expression of the form `++Ptr` in an Unspecified +// Pointer Context (UPC): +class UPCPreIncrementGadget : public FixableGadget { +private: + static constexpr const char *const UPCPreIncrementTag = + "PointerPreIncrementUnderUPC"; + const UnaryOperator *Node; // the `++Ptr` node + +public: + UPCPreIncrementGadget(const MatchFinder::MatchResult &Result) + : FixableGadget(Kind::UPCPreIncrement), + Node(Result.Nodes.getNodeAs<UnaryOperator>(UPCPreIncrementTag)) { + assert(Node != nullptr && "Expecting a non-null matching result"); + } + + static bool classof(const Gadget *G) { + return G->getKind() == Kind::UPCPreIncrement; + } + + static Matcher matcher() { + // Note here we match `++Ptr` for any expression `Ptr` of pointer type. + // Although currently we can only provide fix-its when `Ptr` is a DRE, we + // can have the matcher be general, so long as `getClaimedVarUseSites` does + // things right. + return stmt(isInUnspecifiedPointerContext(expr(ignoringImpCasts( + unaryOperator(isPreInc(), + hasUnaryOperand(declRefExpr(toSupportedVariable()))) + .bind(UPCPreIncrementTag))))); + } + + virtual std::optional<FixItList> + getFixits(const FixitStrategy &S) const override; + SourceLocation getSourceLoc() const override { return Node->getBeginLoc(); } + + virtual DeclUseList getClaimedVarUseSites() const override { + return {dyn_cast<DeclRefExpr>(Node->getSubExpr())}; + } +}; + +// Representing a pointer type expression of the form `Ptr += n` in an +// Unspecified Untyped Context (UUC): +class UUCAddAssignGadget : public FixableGadget { +private: + static constexpr const char *const UUCAddAssignTag = + "PointerAddAssignUnderUUC"; + static constexpr const char *const OffsetTag = "Offset"; + + const BinaryOperator *Node; // the `Ptr += n` node + const Expr *Offset = nullptr; + +public: + UUCAddAssignGadget(const MatchFinder::MatchResult &Result) + : FixableGadget(Kind::UUCAddAssign), + Node(Result.Nodes.getNodeAs<BinaryOperator>(UUCAddAssignTag)), + Offset(Result.Nodes.getNodeAs<Expr>(OffsetTag)) { + assert(Node != nullptr && "Expecting a non-null matching result"); + } + + static bool classof(const Gadget *G) { + return G->getKind() == Kind::UUCAddAssign; + } + + static Matcher matcher() { + // clang-format off + return stmt(isInUnspecifiedUntypedContext(expr(ignoringImpCasts( + binaryOperator(hasOperatorName("+="), + hasLHS( + declRefExpr( + hasPointerType(), + toSupportedVariable())), + hasRHS(expr().bind(OffsetTag))) + .bind(UUCAddAssignTag))))); + // clang-format on + } + + virtual std::optional<FixItList> + getFixits(const FixitStrategy &S) const override; + SourceLocation getSourceLoc() const override { return Node->getBeginLoc(); } + + virtual DeclUseList getClaimedVarUseSites() const override { + return {dyn_cast<DeclRefExpr>(Node->getLHS())}; + } +}; + +// Representing a fixable expression of the form `*(ptr + 123)` or `*(123 + +// ptr)`: +class DerefSimplePtrArithFixableGadget : public FixableGadget { + static constexpr const char *const BaseDeclRefExprTag = "BaseDRE"; + static constexpr const char *const DerefOpTag = "DerefOp"; + static constexpr const char *const AddOpTag = "AddOp"; + static constexpr const char *const OffsetTag = "Offset"; + + const DeclRefExpr *BaseDeclRefExpr = nullptr; + const UnaryOperator *DerefOp = nullptr; + const BinaryOperator *AddOp = nullptr; + const IntegerLiteral *Offset = nullptr; + +public: + DerefSimplePtrArithFixableGadget(const MatchFinder::MatchResult &Result) + : FixableGadget(Kind::DerefSimplePtrArithFixable), + BaseDeclRefExpr( + Result.Nodes.getNodeAs<DeclRefExpr>(BaseDeclRefExprTag)), + DerefOp(Result.Nodes.getNodeAs<UnaryOperator>(DerefOpTag)), + AddOp(Result.Nodes.getNodeAs<BinaryOperator>(AddOpTag)), + Offset(Result.Nodes.getNodeAs<IntegerLiteral>(OffsetTag)) {} + + static Matcher matcher() { + // clang-format off + auto ThePtr = expr(hasPointerType(), + ignoringImpCasts(declRefExpr(toSupportedVariable()). + bind(BaseDeclRefExprTag))); + auto PlusOverPtrAndInteger = expr(anyOf( + binaryOperator(hasOperatorName("+"), hasLHS(ThePtr), + hasRHS(integerLiteral().bind(OffsetTag))) + .bind(AddOpTag), + binaryOperator(hasOperatorName("+"), hasRHS(ThePtr), + hasLHS(integerLiteral().bind(OffsetTag))) + .bind(AddOpTag))); + return isInUnspecifiedLvalueContext(unaryOperator( + hasOperatorName("*"), + hasUnaryOperand(ignoringParens(PlusOverPtrAndInteger))) + .bind(DerefOpTag)); + // clang-format on + } + + virtual std::optional<FixItList> + getFixits(const FixitStrategy &s) const final; + SourceLocation getSourceLoc() const override { + return DerefOp->getBeginLoc(); + } + + virtual DeclUseList getClaimedVarUseSites() const final { + return {BaseDeclRefExpr}; + } +}; + +/// Scan the function and return a list of gadgets found with provided kits. +static std::tuple<FixableGadgetList, WarningGadgetList, DeclUseTracker> +findGadgets(const Decl *D, const UnsafeBufferUsageHandler &Handler, + bool EmitSuggestions) { + + struct GadgetFinderCallback : MatchFinder::MatchCallback { + FixableGadgetList FixableGadgets; + WarningGadgetList WarningGadgets; + DeclUseTracker Tracker; + + void run(const MatchFinder::MatchResult &Result) override { + // In debug mode, assert that we've found exactly one gadget. + // This helps us avoid conflicts in .bind() tags. +#if NDEBUG +#define NEXT return +#else + [[maybe_unused]] int numFound = 0; +#define NEXT ++numFound +#endif + + if (const auto *DRE = Result.Nodes.getNodeAs<DeclRefExpr>("any_dre")) { + Tracker.discoverUse(DRE); + NEXT; + } + + if (const auto *DS = Result.Nodes.getNodeAs<DeclStmt>("any_ds")) { + Tracker.discoverDecl(DS); + NEXT; + } + + // Figure out which matcher we've found, and call the appropriate + // subclass constructor. + // FIXME: Can we do this more logarithmically? +#define FIXABLE_GADGET(name) \ + if (Result.Nodes.getNodeAs<Stmt>(#name)) { \ + FixableGadgets.push_back(std::make_unique<name##Gadget>(Result)); \ + NEXT; \ + } +#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" +#define WARNING_GADGET(name) \ + if (Result.Nodes.getNodeAs<Stmt>(#name)) { \ + WarningGadgets.push_back(std::make_unique<name##Gadget>(Result)); \ + NEXT; \ + } +#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" + + assert(numFound >= 1 && "Gadgets not found in match result!"); + assert(numFound <= 1 && "Conflicting bind tags in gadgets!"); + } + }; + + MatchFinder M; + GadgetFinderCallback CB; + + // clang-format off + M.addMatcher( + stmt( + forEachDescendantEvaluatedStmt(stmt(anyOf( + // Add Gadget::matcher() for every gadget in the registry. +#define WARNING_GADGET(x) \ + allOf(x ## Gadget::matcher().bind(#x), \ + notInSafeBufferOptOut(&Handler)), +#define WARNING_CONTAINER_GADGET(x) \ + allOf(x ## Gadget::matcher().bind(#x), \ + notInSafeBufferOptOut(&Handler), \ + unless(ignoreUnsafeBufferInContainer(&Handler))), +#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" + // Avoid a hanging comma. + unless(stmt()) + ))) + ), + &CB + ); + // clang-format on + + if (EmitSuggestions) { + // clang-format off + M.addMatcher( + stmt( + forEachDescendantStmt(stmt(eachOf( +#define FIXABLE_GADGET(x) \ + x ## Gadget::matcher().bind(#x), +#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" + // In parallel, match all DeclRefExprs so that to find out + // whether there are any uncovered by gadgets. + declRefExpr(anyOf(hasPointerType(), hasArrayType()), + to(anyOf(varDecl(), bindingDecl()))).bind("any_dre"), + // Also match DeclStmts because we'll need them when fixing + // their underlying VarDecls that otherwise don't have + // any backreferences to DeclStmts. + declStmt().bind("any_ds") + ))) + ), + &CB + ); + // clang-format on + } + + M.match(*D->getBody(), D->getASTContext()); + return {std::move(CB.FixableGadgets), std::move(CB.WarningGadgets), + std::move(CB.Tracker)}; +} + +// Compares AST nodes by source locations. +template <typename NodeTy> struct CompareNode { + bool operator()(const NodeTy *N1, const NodeTy *N2) const { + return N1->getBeginLoc().getRawEncoding() < + N2->getBeginLoc().getRawEncoding(); + } +}; + +struct WarningGadgetSets { + std::map<const VarDecl *, std::set<const WarningGadget *>, + // To keep keys sorted by their locations in the map so that the + // order is deterministic: + CompareNode<VarDecl>> + byVar; + // These Gadgets are not related to pointer variables (e. g. temporaries). + llvm::SmallVector<const WarningGadget *, 16> noVar; +}; + +static WarningGadgetSets +groupWarningGadgetsByVar(const WarningGadgetList &AllUnsafeOperations) { + WarningGadgetSets result; + // If some gadgets cover more than one + // variable, they'll appear more than once in the map. + for (auto &G : AllUnsafeOperations) { + DeclUseList ClaimedVarUseSites = G->getClaimedVarUseSites(); + + bool AssociatedWithVarDecl = false; + for (const DeclRefExpr *DRE : ClaimedVarUseSites) { + if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) { + result.byVar[VD].insert(G.get()); + AssociatedWithVarDecl = true; + } + } + + if (!AssociatedWithVarDecl) { + result.noVar.push_back(G.get()); + continue; + } + } + return result; +} + +struct FixableGadgetSets { + std::map<const VarDecl *, std::set<const FixableGadget *>, + // To keep keys sorted by their locations in the map so that the + // order is deterministic: + CompareNode<VarDecl>> + byVar; +}; + +static FixableGadgetSets +groupFixablesByVar(FixableGadgetList &&AllFixableOperations) { + FixableGadgetSets FixablesForUnsafeVars; + for (auto &F : AllFixableOperations) { + DeclUseList DREs = F->getClaimedVarUseSites(); + + for (const DeclRefExpr *DRE : DREs) { + if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) { + FixablesForUnsafeVars.byVar[VD].insert(F.get()); + } + } + } + return FixablesForUnsafeVars; +} + +bool clang::internal::anyConflict(const SmallVectorImpl<FixItHint> &FixIts, + const SourceManager &SM) { + // A simple interval overlap detection algorithm. Sorts all ranges by their + // begin location then finds the first overlap in one pass. + std::vector<const FixItHint *> All; // a copy of `FixIts` + + for (const FixItHint &H : FixIts) + All.push_back(&H); + std::sort(All.begin(), All.end(), + [&SM](const FixItHint *H1, const FixItHint *H2) { + return SM.isBeforeInTranslationUnit(H1->RemoveRange.getBegin(), + H2->RemoveRange.getBegin()); + }); + + const FixItHint *CurrHint = nullptr; + + for (const FixItHint *Hint : All) { + if (!CurrHint || + SM.isBeforeInTranslationUnit(CurrHint->RemoveRange.getEnd(), + Hint->RemoveRange.getBegin())) { + // Either to initialize `CurrHint` or `CurrHint` does not + // overlap with `Hint`: + CurrHint = Hint; + } else + // In case `Hint` overlaps the `CurrHint`, we found at least one + // conflict: + return true; + } + return false; +} + +std::optional<FixItList> +PtrToPtrAssignmentGadget::getFixits(const FixitStrategy &S) const { + const auto *LeftVD = cast<VarDecl>(PtrLHS->getDecl()); + const auto *RightVD = cast<VarDecl>(PtrRHS->getDecl()); + switch (S.lookup(LeftVD)) { + case FixitStrategy::Kind::Span: + if (S.lookup(RightVD) == FixitStrategy::Kind::Span) + return FixItList{}; + return std::nullopt; + case FixitStrategy::Kind::Wontfix: + return std::nullopt; + case FixitStrategy::Kind::Iterator: + case FixitStrategy::Kind::Array: + return std::nullopt; + case FixitStrategy::Kind::Vector: + llvm_unreachable("unsupported strategies for FixableGadgets"); + } + return std::nullopt; +} + +/// \returns fixit that adds .data() call after \DRE. +static inline std::optional<FixItList> createDataFixit(const ASTContext &Ctx, + const DeclRefExpr *DRE); + +std::optional<FixItList> +CArrayToPtrAssignmentGadget::getFixits(const FixitStrategy &S) const { + const auto *LeftVD = cast<VarDecl>(PtrLHS->getDecl()); + const auto *RightVD = cast<VarDecl>(PtrRHS->getDecl()); + // TLDR: Implementing fixits for non-Wontfix strategy on both LHS and RHS is + // non-trivial. + // + // CArrayToPtrAssignmentGadget doesn't have strategy implications because + // constant size array propagates its bounds. Because of that LHS and RHS are + // addressed by two different fixits. + // + // At the same time FixitStrategy S doesn't reflect what group a fixit belongs + // to and can't be generally relied on in multi-variable Fixables! + // + // E. g. If an instance of this gadget is fixing variable on LHS then the + // variable on RHS is fixed by a different fixit and its strategy for LHS + // fixit is as if Wontfix. + // + // The only exception is Wontfix strategy for a given variable as that is + // valid for any fixit produced for the given input source code. + if (S.lookup(LeftVD) == FixitStrategy::Kind::Span) { + if (S.lookup(RightVD) == FixitStrategy::Kind::Wontfix) { + return FixItList{}; + } + } else if (S.lookup(LeftVD) == FixitStrategy::Kind::Wontfix) { + if (S.lookup(RightVD) == FixitStrategy::Kind::Array) { + return createDataFixit(RightVD->getASTContext(), PtrRHS); + } + } + return std::nullopt; +} + +std::optional<FixItList> +PointerInitGadget::getFixits(const FixitStrategy &S) const { + const auto *LeftVD = PtrInitLHS; + const auto *RightVD = cast<VarDecl>(PtrInitRHS->getDecl()); + switch (S.lookup(LeftVD)) { + case FixitStrategy::Kind::Span: + if (S.lookup(RightVD) == FixitStrategy::Kind::Span) + return FixItList{}; + return std::nullopt; + case FixitStrategy::Kind::Wontfix: + return std::nullopt; + case FixitStrategy::Kind::Iterator: + case FixitStrategy::Kind::Array: + return std::nullopt; + case FixitStrategy::Kind::Vector: + llvm_unreachable("unsupported strategies for FixableGadgets"); + } + return std::nullopt; +} + +static bool isNonNegativeIntegerExpr(const Expr *Expr, const VarDecl *VD, + const ASTContext &Ctx) { + if (auto ConstVal = Expr->getIntegerConstantExpr(Ctx)) { + if (ConstVal->isNegative()) + return false; + } else if (!Expr->getType()->isUnsignedIntegerType()) + return false; + return true; +} + +std::optional<FixItList> +ULCArraySubscriptGadget::getFixits(const FixitStrategy &S) const { + if (const auto *DRE = + dyn_cast<DeclRefExpr>(Node->getBase()->IgnoreImpCasts())) + if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) { + switch (S.lookup(VD)) { + case FixitStrategy::Kind::Span: { + + // If the index has a negative constant value, we give up as no valid + // fix-it can be generated: + const ASTContext &Ctx = // FIXME: we need ASTContext to be passed in! + VD->getASTContext(); + if (!isNonNegativeIntegerExpr(Node->getIdx(), VD, Ctx)) + return std::nullopt; + // no-op is a good fix-it, otherwise + return FixItList{}; + } + case FixitStrategy::Kind::Array: + return FixItList{}; + case FixitStrategy::Kind::Wontfix: + case FixitStrategy::Kind::Iterator: + case FixitStrategy::Kind::Vector: + llvm_unreachable("unsupported strategies for FixableGadgets"); + } + } + return std::nullopt; +} + +static std::optional<FixItList> // forward declaration +fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node); + +std::optional<FixItList> +UPCAddressofArraySubscriptGadget::getFixits(const FixitStrategy &S) const { + auto DREs = getClaimedVarUseSites(); + const auto *VD = cast<VarDecl>(DREs.front()->getDecl()); + + switch (S.lookup(VD)) { + case FixitStrategy::Kind::Span: + return fixUPCAddressofArraySubscriptWithSpan(Node); + case FixitStrategy::Kind::Wontfix: + case FixitStrategy::Kind::Iterator: + case FixitStrategy::Kind::Array: + return std::nullopt; + case FixitStrategy::Kind::Vector: + llvm_unreachable("unsupported strategies for FixableGadgets"); + } + return std::nullopt; // something went wrong, no fix-it +} + +// FIXME: this function should be customizable through format +static StringRef getEndOfLine() { + static const char *const EOL = "\n"; + return EOL; +} + +// Returns the text indicating that the user needs to provide input there: +std::string getUserFillPlaceHolder(StringRef HintTextToUser = "placeholder") { + std::string s = std::string("<# "); + s += HintTextToUser; + s += " #>"; + return s; +} + +// Return the source location of the last character of the AST `Node`. +template <typename NodeTy> +static std::optional<SourceLocation> +getEndCharLoc(const NodeTy *Node, const SourceManager &SM, + const LangOptions &LangOpts) { + unsigned TkLen = Lexer::MeasureTokenLength(Node->getEndLoc(), SM, LangOpts); + SourceLocation Loc = Node->getEndLoc().getLocWithOffset(TkLen - 1); + + if (Loc.isValid()) + return Loc; + + return std::nullopt; +} + +// Return the source location just past the last character of the AST `Node`. +template <typename NodeTy> +static std::optional<SourceLocation> getPastLoc(const NodeTy *Node, + const SourceManager &SM, + const LangOptions &LangOpts) { + SourceLocation Loc = + Lexer::getLocForEndOfToken(Node->getEndLoc(), 0, SM, LangOpts); + if (Loc.isValid()) + return Loc; + return std::nullopt; +} + +// Return text representation of an `Expr`. +static std::optional<StringRef> getExprText(const Expr *E, + const SourceManager &SM, + const LangOptions &LangOpts) { + std::optional<SourceLocation> LastCharLoc = getPastLoc(E, SM, LangOpts); + + if (LastCharLoc) + return Lexer::getSourceText( + CharSourceRange::getCharRange(E->getBeginLoc(), *LastCharLoc), SM, + LangOpts); + + return std::nullopt; +} + +// Returns the literal text in `SourceRange SR`, if `SR` is a valid range. +static std::optional<StringRef> getRangeText(SourceRange SR, + const SourceManager &SM, + const LangOptions &LangOpts) { + bool Invalid = false; + CharSourceRange CSR = CharSourceRange::getCharRange(SR); + StringRef Text = Lexer::getSourceText(CSR, SM, LangOpts, &Invalid); + + if (!Invalid) + return Text; + return std::nullopt; +} + +// Returns the begin location of the identifier of the given variable +// declaration. +static SourceLocation getVarDeclIdentifierLoc(const VarDecl *VD) { + // According to the implementation of `VarDecl`, `VD->getLocation()` actually + // returns the begin location of the identifier of the declaration: + return VD->getLocation(); +} + +// Returns the literal text of the identifier of the given variable declaration. +static std::optional<StringRef> +getVarDeclIdentifierText(const VarDecl *VD, const SourceManager &SM, + const LangOptions &LangOpts) { + SourceLocation ParmIdentBeginLoc = getVarDeclIdentifierLoc(VD); + SourceLocation ParmIdentEndLoc = + Lexer::getLocForEndOfToken(ParmIdentBeginLoc, 0, SM, LangOpts); + + if (ParmIdentEndLoc.isMacroID() && + !Lexer::isAtEndOfMacroExpansion(ParmIdentEndLoc, SM, LangOpts)) + return std::nullopt; + return getRangeText({ParmIdentBeginLoc, ParmIdentEndLoc}, SM, LangOpts); +} + +// We cannot fix a variable declaration if it has some other specifiers than the +// type specifier. Because the source ranges of those specifiers could overlap +// with the source range that is being replaced using fix-its. Especially when +// we often cannot obtain accurate source ranges of cv-qualified type +// specifiers. +// FIXME: also deal with type attributes +static bool hasUnsupportedSpecifiers(const VarDecl *VD, + const SourceManager &SM) { + // AttrRangeOverlapping: true if at least one attribute of `VD` overlaps the + // source range of `VD`: + bool AttrRangeOverlapping = llvm::any_of(VD->attrs(), [&](Attr *At) -> bool { + return !(SM.isBeforeInTranslationUnit(At->getRange().getEnd(), + VD->getBeginLoc())) && + !(SM.isBeforeInTranslationUnit(VD->getEndLoc(), + At->getRange().getBegin())); + }); + return VD->isInlineSpecified() || VD->isConstexpr() || + VD->hasConstantInitialization() || !VD->hasLocalStorage() || + AttrRangeOverlapping; +} + +// Returns the `SourceRange` of `D`. The reason why this function exists is +// that `D->getSourceRange()` may return a range where the end location is the +// starting location of the last token. The end location of the source range +// returned by this function is the last location of the last token. +static SourceRange getSourceRangeToTokenEnd(const Decl *D, + const SourceManager &SM, + const LangOptions &LangOpts) { + SourceLocation Begin = D->getBeginLoc(); + SourceLocation + End = // `D->getEndLoc` should always return the starting location of the + // last token, so we should get the end of the token + Lexer::getLocForEndOfToken(D->getEndLoc(), 0, SM, LangOpts); + + return SourceRange(Begin, End); +} + +// Returns the text of the pointee type of `T` from a `VarDecl` of a pointer +// type. The text is obtained through from `TypeLoc`s. Since `TypeLoc` does not +// have source ranges of qualifiers ( The `QualifiedTypeLoc` looks hacky too me +// :( ), `Qualifiers` of the pointee type is returned separately through the +// output parameter `QualifiersToAppend`. +static std::optional<std::string> +getPointeeTypeText(const VarDecl *VD, const SourceManager &SM, + const LangOptions &LangOpts, + std::optional<Qualifiers> *QualifiersToAppend) { + QualType Ty = VD->getType(); + QualType PteTy; + + assert(Ty->isPointerType() && !Ty->isFunctionPointerType() && + "Expecting a VarDecl of type of pointer to object type"); + PteTy = Ty->getPointeeType(); + + TypeLoc TyLoc = VD->getTypeSourceInfo()->getTypeLoc().getUnqualifiedLoc(); + TypeLoc PteTyLoc; + + // We only deal with the cases that we know `TypeLoc::getNextTypeLoc` returns + // the `TypeLoc` of the pointee type: + switch (TyLoc.getTypeLocClass()) { + case TypeLoc::ConstantArray: + case TypeLoc::IncompleteArray: + case TypeLoc::VariableArray: + case TypeLoc::DependentSizedArray: + case TypeLoc::Decayed: + assert(isa<ParmVarDecl>(VD) && "An array type shall not be treated as a " + "pointer type unless it decays."); + PteTyLoc = TyLoc.getNextTypeLoc(); + break; + case TypeLoc::Pointer: + PteTyLoc = TyLoc.castAs<PointerTypeLoc>().getPointeeLoc(); + break; + default: + return std::nullopt; + } + if (PteTyLoc.isNull()) + // Sometimes we cannot get a useful `TypeLoc` for the pointee type, e.g., + // when the pointer type is `auto`. + return std::nullopt; + + SourceLocation IdentLoc = getVarDeclIdentifierLoc(VD); + + if (!(IdentLoc.isValid() && PteTyLoc.getSourceRange().isValid())) { + // We are expecting these locations to be valid. But in some cases, they are + // not all valid. It is a Clang bug to me and we are not responsible for + // fixing it. So we will just give up for now when it happens. + return std::nullopt; + } + + // Note that TypeLoc.getEndLoc() returns the begin location of the last token: + SourceLocation PteEndOfTokenLoc = + Lexer::getLocForEndOfToken(PteTyLoc.getEndLoc(), 0, SM, LangOpts); + + if (!PteEndOfTokenLoc.isValid()) + // Sometimes we cannot get the end location of the pointee type, e.g., when + // there are macros involved. + return std::nullopt; + if (!SM.isBeforeInTranslationUnit(PteEndOfTokenLoc, IdentLoc)) { + // We only deal with the cases where the source text of the pointee type + // appears on the left-hand side of the variable identifier completely, + // including the following forms: + // `T ident`, + // `T ident[]`, where `T` is any type. + // Examples of excluded cases are `T (*ident)[]` or `T ident[][n]`. + return std::nullopt; + } + if (PteTy.hasQualifiers()) { + // TypeLoc does not provide source ranges for qualifiers (it says it's + // intentional but seems fishy to me), so we cannot get the full text + // `PteTy` via source ranges. + *QualifiersToAppend = PteTy.getQualifiers(); + } + return getRangeText({PteTyLoc.getBeginLoc(), PteEndOfTokenLoc}, SM, LangOpts) + ->str(); +} + +// Returns the text of the name (with qualifiers) of a `FunctionDecl`. +static std::optional<StringRef> getFunNameText(const FunctionDecl *FD, + const SourceManager &SM, + const LangOptions &LangOpts) { + SourceLocation BeginLoc = FD->getQualifier() + ? FD->getQualifierLoc().getBeginLoc() + : FD->getNameInfo().getBeginLoc(); + // Note that `FD->getNameInfo().getEndLoc()` returns the begin location of the + // last token: + SourceLocation EndLoc = Lexer::getLocForEndOfToken( + FD->getNameInfo().getEndLoc(), 0, SM, LangOpts); + SourceRange NameRange{BeginLoc, EndLoc}; + + return getRangeText(NameRange, SM, LangOpts); +} + +// Returns the text representing a `std::span` type where the element type is +// represented by `EltTyText`. +// +// Note the optional parameter `Qualifiers`: one needs to pass qualifiers +// explicitly if the element type needs to be qualified. +static std::string +getSpanTypeText(StringRef EltTyText, + std::optional<Qualifiers> Quals = std::nullopt) { + const char *const SpanOpen = "std::span<"; + + if (Quals) + return SpanOpen + EltTyText.str() + ' ' + Quals->getAsString() + '>'; + return SpanOpen + EltTyText.str() + '>'; +} + +std::optional<FixItList> +DerefSimplePtrArithFixableGadget::getFixits(const FixitStrategy &s) const { + const VarDecl *VD = dyn_cast<VarDecl>(BaseDeclRefExpr->getDecl()); + + if (VD && s.lookup(VD) == FixitStrategy::Kind::Span) { + ASTContext &Ctx = VD->getASTContext(); + // std::span can't represent elements before its begin() + if (auto ConstVal = Offset->getIntegerConstantExpr(Ctx)) + if (ConstVal->isNegative()) + return std::nullopt; + + // note that the expr may (oddly) has multiple layers of parens + // example: + // *((..(pointer + 123)..)) + // goal: + // pointer[123] + // Fix-It: + // remove '*(' + // replace ' + ' with '[' + // replace ')' with ']' + + // example: + // *((..(123 + pointer)..)) + // goal: + // 123[pointer] + // Fix-It: + // remove '*(' + // replace ' + ' with '[' + // replace ')' with ']' + + const Expr *LHS = AddOp->getLHS(), *RHS = AddOp->getRHS(); + const SourceManager &SM = Ctx.getSourceManager(); + const LangOptions &LangOpts = Ctx.getLangOpts(); + CharSourceRange StarWithTrailWhitespace = + clang::CharSourceRange::getCharRange(DerefOp->getOperatorLoc(), + LHS->getBeginLoc()); + + std::optional<SourceLocation> LHSLocation = getPastLoc(LHS, SM, LangOpts); + if (!LHSLocation) + return std::nullopt; + + CharSourceRange PlusWithSurroundingWhitespace = + clang::CharSourceRange::getCharRange(*LHSLocation, RHS->getBeginLoc()); + + std::optional<SourceLocation> AddOpLocation = + getPastLoc(AddOp, SM, LangOpts); + std::optional<SourceLocation> DerefOpLocation = + getPastLoc(DerefOp, SM, LangOpts); + + if (!AddOpLocation || !DerefOpLocation) + return std::nullopt; + + CharSourceRange ClosingParenWithPrecWhitespace = + clang::CharSourceRange::getCharRange(*AddOpLocation, *DerefOpLocation); + + return FixItList{ + {FixItHint::CreateRemoval(StarWithTrailWhitespace), + FixItHint::CreateReplacement(PlusWithSurroundingWhitespace, "["), + FixItHint::CreateReplacement(ClosingParenWithPrecWhitespace, "]")}}; + } + return std::nullopt; // something wrong or unsupported, give up +} + +std::optional<FixItList> +PointerDereferenceGadget::getFixits(const FixitStrategy &S) const { + const VarDecl *VD = cast<VarDecl>(BaseDeclRefExpr->getDecl()); + switch (S.lookup(VD)) { + case FixitStrategy::Kind::Span: { + ASTContext &Ctx = VD->getASTContext(); + SourceManager &SM = Ctx.getSourceManager(); + // Required changes: *(ptr); => (ptr[0]); and *ptr; => ptr[0] + // Deletes the *operand + CharSourceRange derefRange = clang::CharSourceRange::getCharRange( + Op->getBeginLoc(), Op->getBeginLoc().getLocWithOffset(1)); + // Inserts the [0] + if (auto LocPastOperand = + getPastLoc(BaseDeclRefExpr, SM, Ctx.getLangOpts())) { + return FixItList{{FixItHint::CreateRemoval(derefRange), + FixItHint::CreateInsertion(*LocPastOperand, "[0]")}}; + } + break; + } + case FixitStrategy::Kind::Iterator: + case FixitStrategy::Kind::Array: + return std::nullopt; + case FixitStrategy::Kind::Vector: + llvm_unreachable("FixitStrategy not implemented yet!"); + case FixitStrategy::Kind::Wontfix: + llvm_unreachable("Invalid strategy!"); + } + + return std::nullopt; +} + +static inline std::optional<FixItList> createDataFixit(const ASTContext &Ctx, + const DeclRefExpr *DRE) { + const SourceManager &SM = Ctx.getSourceManager(); + // Inserts the .data() after the DRE + std::optional<SourceLocation> EndOfOperand = + getPastLoc(DRE, SM, Ctx.getLangOpts()); + + if (EndOfOperand) + return FixItList{{FixItHint::CreateInsertion(*EndOfOperand, ".data()")}}; + + return std::nullopt; +} + +// Generates fix-its replacing an expression of the form UPC(DRE) with +// `DRE.data()` +std::optional<FixItList> +UPCStandalonePointerGadget::getFixits(const FixitStrategy &S) const { + const auto VD = cast<VarDecl>(Node->getDecl()); + switch (S.lookup(VD)) { + case FixitStrategy::Kind::Array: + case FixitStrategy::Kind::Span: { + return createDataFixit(VD->getASTContext(), Node); + // FIXME: Points inside a macro expansion. + break; + } + case FixitStrategy::Kind::Wontfix: + case FixitStrategy::Kind::Iterator: + return std::nullopt; + case FixitStrategy::Kind::Vector: + llvm_unreachable("unsupported strategies for FixableGadgets"); + } + + return std::nullopt; +} + +// Generates fix-its replacing an expression of the form `&DRE[e]` with +// `&DRE.data()[e]`: +static std::optional<FixItList> +fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node) { + const auto *ArraySub = cast<ArraySubscriptExpr>(Node->getSubExpr()); + const auto *DRE = cast<DeclRefExpr>(ArraySub->getBase()->IgnoreImpCasts()); + // FIXME: this `getASTContext` call is costly, we should pass the + // ASTContext in: + const ASTContext &Ctx = DRE->getDecl()->getASTContext(); + const Expr *Idx = ArraySub->getIdx(); + const SourceManager &SM = Ctx.getSourceManager(); + const LangOptions &LangOpts = Ctx.getLangOpts(); + std::stringstream SS; + bool IdxIsLitZero = false; + + if (auto ICE = Idx->getIntegerConstantExpr(Ctx)) + if ((*ICE).isZero()) + IdxIsLitZero = true; + std::optional<StringRef> DreString = getExprText(DRE, SM, LangOpts); + if (!DreString) + return std::nullopt; + + if (IdxIsLitZero) { + // If the index is literal zero, we produce the most concise fix-it: + SS << (*DreString).str() << ".data()"; + } else { + std::optional<StringRef> IndexString = getExprText(Idx, SM, LangOpts); + if (!IndexString) + return std::nullopt; + + SS << "&" << (*DreString).str() << ".data()" + << "[" << (*IndexString).str() << "]"; + } + return FixItList{ + FixItHint::CreateReplacement(Node->getSourceRange(), SS.str())}; +} + +std::optional<FixItList> +UUCAddAssignGadget::getFixits(const FixitStrategy &S) const { + DeclUseList DREs = getClaimedVarUseSites(); + + if (DREs.size() != 1) + return std::nullopt; // In cases of `Ptr += n` where `Ptr` is not a DRE, we + // give up + if (const VarDecl *VD = dyn_cast<VarDecl>(DREs.front()->getDecl())) { + if (S.lookup(VD) == FixitStrategy::Kind::Span) { + FixItList Fixes; + + const Stmt *AddAssignNode = Node; + StringRef varName = VD->getName(); + const ASTContext &Ctx = VD->getASTContext(); + + if (!isNonNegativeIntegerExpr(Offset, VD, Ctx)) + return std::nullopt; + + // To transform UUC(p += n) to UUC(p = p.subspan(..)): + bool NotParenExpr = + (Offset->IgnoreParens()->getBeginLoc() == Offset->getBeginLoc()); + std::string SS = varName.str() + " = " + varName.str() + ".subspan"; + if (NotParenExpr) + SS += "("; + + std::optional<SourceLocation> AddAssignLocation = getEndCharLoc( + AddAssignNode, Ctx.getSourceManager(), Ctx.getLangOpts()); + if (!AddAssignLocation) + return std::nullopt; + + Fixes.push_back(FixItHint::CreateReplacement( + SourceRange(AddAssignNode->getBeginLoc(), Node->getOperatorLoc()), + SS)); + if (NotParenExpr) + Fixes.push_back(FixItHint::CreateInsertion( + Offset->getEndLoc().getLocWithOffset(1), ")")); + return Fixes; + } + } + return std::nullopt; // Not in the cases that we can handle for now, give up. +} + +std::optional<FixItList> +UPCPreIncrementGadget::getFixits(const FixitStrategy &S) const { + DeclUseList DREs = getClaimedVarUseSites(); + + if (DREs.size() != 1) + return std::nullopt; // In cases of `++Ptr` where `Ptr` is not a DRE, we + // give up + if (const VarDecl *VD = dyn_cast<VarDecl>(DREs.front()->getDecl())) { + if (S.lookup(VD) == FixitStrategy::Kind::Span) { + FixItList Fixes; + std::stringstream SS; + StringRef varName = VD->getName(); + const ASTContext &Ctx = VD->getASTContext(); + + // To transform UPC(++p) to UPC((p = p.subspan(1)).data()): + SS << "(" << varName.data() << " = " << varName.data() + << ".subspan(1)).data()"; + std::optional<SourceLocation> PreIncLocation = + getEndCharLoc(Node, Ctx.getSourceManager(), Ctx.getLangOpts()); + if (!PreIncLocation) + return std::nullopt; + + Fixes.push_back(FixItHint::CreateReplacement( + SourceRange(Node->getBeginLoc(), *PreIncLocation), SS.str())); + return Fixes; + } + } + return std::nullopt; // Not in the cases that we can handle for now, give up. +} + +// For a non-null initializer `Init` of `T *` type, this function returns +// `FixItHint`s producing a list initializer `{Init, S}` as a part of a fix-it +// to output stream. +// In many cases, this function cannot figure out the actual extent `S`. It +// then will use a place holder to replace `S` to ask users to fill `S` in. The +// initializer shall be used to initialize a variable of type `std::span<T>`. +// In some cases (e. g. constant size array) the initializer should remain +// unchanged and the function returns empty list. In case the function can't +// provide the right fixit it will return nullopt. +// +// FIXME: Support multi-level pointers +// +// Parameters: +// `Init` a pointer to the initializer expression +// `Ctx` a reference to the ASTContext +static std::optional<FixItList> +FixVarInitializerWithSpan(const Expr *Init, ASTContext &Ctx, + const StringRef UserFillPlaceHolder) { + const SourceManager &SM = Ctx.getSourceManager(); + const LangOptions &LangOpts = Ctx.getLangOpts(); + + // If `Init` has a constant value that is (or equivalent to) a + // NULL pointer, we use the default constructor to initialize the span + // object, i.e., a `std:span` variable declaration with no initializer. + // So the fix-it is just to remove the initializer. + if (Init->isNullPointerConstant( + Ctx, + // FIXME: Why does this function not ask for `const ASTContext + // &`? It should. Maybe worth an NFC patch later. + Expr::NullPointerConstantValueDependence:: + NPC_ValueDependentIsNotNull)) { + std::optional<SourceLocation> InitLocation = + getEndCharLoc(Init, SM, LangOpts); + if (!InitLocation) + return std::nullopt; + + SourceRange SR(Init->getBeginLoc(), *InitLocation); + + return FixItList{FixItHint::CreateRemoval(SR)}; + } + + FixItList FixIts{}; + std::string ExtentText = UserFillPlaceHolder.data(); + StringRef One = "1"; + + // Insert `{` before `Init`: + FixIts.push_back(FixItHint::CreateInsertion(Init->getBeginLoc(), "{")); + // Try to get the data extent. Break into different cases: + if (auto CxxNew = dyn_cast<CXXNewExpr>(Init->IgnoreImpCasts())) { + // In cases `Init` is `new T[n]` and there is no explicit cast over + // `Init`, we know that `Init` must evaluates to a pointer to `n` objects + // of `T`. So the extent is `n` unless `n` has side effects. Similar but + // simpler for the case where `Init` is `new T`. + if (const Expr *Ext = CxxNew->getArraySize().value_or(nullptr)) { + if (!Ext->HasSideEffects(Ctx)) { + std::optional<StringRef> ExtentString = getExprText(Ext, SM, LangOpts); + if (!ExtentString) + return std::nullopt; + ExtentText = *ExtentString; + } + } else if (!CxxNew->isArray()) + // Although the initializer is not allocating a buffer, the pointer + // variable could still be used in buffer access operations. + ExtentText = One; + } else if (Ctx.getAsConstantArrayType(Init->IgnoreImpCasts()->getType())) { + // std::span has a single parameter constructor for initialization with + // constant size array. The size is auto-deduced as the constructor is a + // function template. The correct fixit is empty - no changes should happen. + return FixItList{}; + } else { + // In cases `Init` is of the form `&Var` after stripping of implicit + // casts, where `&` is the built-in operator, the extent is 1. + if (auto AddrOfExpr = dyn_cast<UnaryOperator>(Init->IgnoreImpCasts())) + if (AddrOfExpr->getOpcode() == UnaryOperatorKind::UO_AddrOf && + isa_and_present<DeclRefExpr>(AddrOfExpr->getSubExpr())) + ExtentText = One; + // TODO: we can handle more cases, e.g., `&a[0]`, `&a`, `std::addressof`, + // and explicit casting, etc. etc. + } + + SmallString<32> StrBuffer{}; + std::optional<SourceLocation> LocPassInit = getPastLoc(Init, SM, LangOpts); + + if (!LocPassInit) + return std::nullopt; + + StrBuffer.append(", "); + StrBuffer.append(ExtentText); + StrBuffer.append("}"); + FixIts.push_back(FixItHint::CreateInsertion(*LocPassInit, StrBuffer.str())); + return FixIts; +} + +#ifndef NDEBUG +#define DEBUG_NOTE_DECL_FAIL(D, Msg) \ + Handler.addDebugNoteForVar((D), (D)->getBeginLoc(), \ + "failed to produce fixit for declaration '" + \ + (D)->getNameAsString() + "'" + (Msg)) +#else +#define DEBUG_NOTE_DECL_FAIL(D, Msg) +#endif + +// For the given variable declaration with a pointer-to-T type, returns the text +// `std::span<T>`. If it is unable to generate the text, returns +// `std::nullopt`. +static std::optional<std::string> +createSpanTypeForVarDecl(const VarDecl *VD, const ASTContext &Ctx) { + assert(VD->getType()->isPointerType()); + + std::optional<Qualifiers> PteTyQualifiers = std::nullopt; + std::optional<std::string> PteTyText = getPointeeTypeText( + VD, Ctx.getSourceManager(), Ctx.getLangOpts(), &PteTyQualifiers); + + if (!PteTyText) + return std::nullopt; + + std::string SpanTyText = "std::span<"; + + SpanTyText.append(*PteTyText); + // Append qualifiers to span element type if any: + if (PteTyQualifiers) { + SpanTyText.append(" "); + SpanTyText.append(PteTyQualifiers->getAsString()); + } + SpanTyText.append(">"); + return SpanTyText; +} + +// For a `VarDecl` of the form `T * var (= Init)?`, this +// function generates fix-its that +// 1) replace `T * var` with `std::span<T> var`; and +// 2) change `Init` accordingly to a span constructor, if it exists. +// +// FIXME: support Multi-level pointers +// +// Parameters: +// `D` a pointer the variable declaration node +// `Ctx` a reference to the ASTContext +// `UserFillPlaceHolder` the user-input placeholder text +// Returns: +// the non-empty fix-it list, if fix-its are successfuly generated; empty +// list otherwise. +static FixItList fixLocalVarDeclWithSpan(const VarDecl *D, ASTContext &Ctx, + const StringRef UserFillPlaceHolder, + UnsafeBufferUsageHandler &Handler) { + if (hasUnsupportedSpecifiers(D, Ctx.getSourceManager())) + return {}; + + FixItList FixIts{}; + std::optional<std::string> SpanTyText = createSpanTypeForVarDecl(D, Ctx); + + if (!SpanTyText) { + DEBUG_NOTE_DECL_FAIL(D, " : failed to generate 'std::span' type"); + return {}; + } + + // Will hold the text for `std::span<T> Ident`: + std::stringstream SS; + + SS << *SpanTyText; + // Fix the initializer if it exists: + if (const Expr *Init = D->getInit()) { + std::optional<FixItList> InitFixIts = + FixVarInitializerWithSpan(Init, Ctx, UserFillPlaceHolder); + if (!InitFixIts) + return {}; + FixIts.insert(FixIts.end(), std::make_move_iterator(InitFixIts->begin()), + std::make_move_iterator(InitFixIts->end())); + } + // For declaration of the form `T * ident = init;`, we want to replace + // `T * ` with `std::span<T>`. + // We ignore CV-qualifiers so for `T * const ident;` we also want to replace + // just `T *` with `std::span<T>`. + const SourceLocation EndLocForReplacement = D->getTypeSpecEndLoc(); + if (!EndLocForReplacement.isValid()) { + DEBUG_NOTE_DECL_FAIL(D, " : failed to locate the end of the declaration"); + return {}; + } + // The only exception is that for `T *ident` we'll add a single space between + // "std::span<T>" and "ident". + // FIXME: The condition is false for identifiers expended from macros. + if (EndLocForReplacement.getLocWithOffset(1) == getVarDeclIdentifierLoc(D)) + SS << " "; + + FixIts.push_back(FixItHint::CreateReplacement( + SourceRange(D->getBeginLoc(), EndLocForReplacement), SS.str())); + return FixIts; +} + +static bool hasConflictingOverload(const FunctionDecl *FD) { + return !FD->getDeclContext()->lookup(FD->getDeclName()).isSingleResult(); +} + +// For a `FunctionDecl`, whose `ParmVarDecl`s are being changed to have new +// types, this function produces fix-its to make the change self-contained. Let +// 'F' be the entity defined by the original `FunctionDecl` and "NewF" be the +// entity defined by the `FunctionDecl` after the change to the parameters. +// Fix-its produced by this function are +// 1. Add the `[[clang::unsafe_buffer_usage]]` attribute to each declaration +// of 'F'; +// 2. Create a declaration of "NewF" next to each declaration of `F`; +// 3. Create a definition of "F" (as its' original definition is now belongs +// to "NewF") next to its original definition. The body of the creating +// definition calls to "NewF". +// +// Example: +// +// void f(int *p); // original declaration +// void f(int *p) { // original definition +// p[5]; +// } +// +// To change the parameter `p` to be of `std::span<int>` type, we +// also add overloads: +// +// [[clang::unsafe_buffer_usage]] void f(int *p); // original decl +// void f(std::span<int> p); // added overload decl +// void f(std::span<int> p) { // original def where param is changed +// p[5]; +// } +// [[clang::unsafe_buffer_usage]] void f(int *p) { // added def +// return f(std::span(p, <# size #>)); +// } +// +static std::optional<FixItList> +createOverloadsForFixedParams(const FixitStrategy &S, const FunctionDecl *FD, + const ASTContext &Ctx, + UnsafeBufferUsageHandler &Handler) { + // FIXME: need to make this conflict checking better: + if (hasConflictingOverload(FD)) + return std::nullopt; + + const SourceManager &SM = Ctx.getSourceManager(); + const LangOptions &LangOpts = Ctx.getLangOpts(); + const unsigned NumParms = FD->getNumParams(); + std::vector<std::string> NewTysTexts(NumParms); + std::vector<bool> ParmsMask(NumParms, false); + bool AtLeastOneParmToFix = false; + + for (unsigned i = 0; i < NumParms; i++) { + const ParmVarDecl *PVD = FD->getParamDecl(i); + + if (S.lookup(PVD) == FixitStrategy::Kind::Wontfix) + continue; + if (S.lookup(PVD) != FixitStrategy::Kind::Span) + // Not supported, not suppose to happen: + return std::nullopt; + + std::optional<Qualifiers> PteTyQuals = std::nullopt; + std::optional<std::string> PteTyText = + getPointeeTypeText(PVD, SM, LangOpts, &PteTyQuals); + + if (!PteTyText) + // something wrong in obtaining the text of the pointee type, give up + return std::nullopt; + // FIXME: whether we should create std::span type depends on the + // FixitStrategy. + NewTysTexts[i] = getSpanTypeText(*PteTyText, PteTyQuals); + ParmsMask[i] = true; + AtLeastOneParmToFix = true; + } + if (!AtLeastOneParmToFix) + // No need to create function overloads: + return {}; + // FIXME Respect indentation of the original code. + + // A lambda that creates the text representation of a function declaration + // with the new type signatures: + const auto NewOverloadSignatureCreator = + [&SM, &LangOpts, &NewTysTexts, + &ParmsMask](const FunctionDecl *FD) -> std::optional<std::string> { + std::stringstream SS; + + SS << ";"; + SS << getEndOfLine().str(); + // Append: ret-type func-name "(" + if (auto Prefix = getRangeText( + SourceRange(FD->getBeginLoc(), (*FD->param_begin())->getBeginLoc()), + SM, LangOpts)) + SS << Prefix->str(); + else + return std::nullopt; // give up + // Append: parameter-type-list + const unsigned NumParms = FD->getNumParams(); + + for (unsigned i = 0; i < NumParms; i++) { + const ParmVarDecl *Parm = FD->getParamDecl(i); + + if (Parm->isImplicit()) + continue; + if (ParmsMask[i]) { + // This `i`-th parameter will be fixed with `NewTysTexts[i]` being its + // new type: + SS << NewTysTexts[i]; + // print parameter name if provided: + if (IdentifierInfo *II = Parm->getIdentifier()) + SS << ' ' << II->getName().str(); + } else if (auto ParmTypeText = + getRangeText(getSourceRangeToTokenEnd(Parm, SM, LangOpts), + SM, LangOpts)) { + // print the whole `Parm` without modification: + SS << ParmTypeText->str(); + } else + return std::nullopt; // something wrong, give up + if (i != NumParms - 1) + SS << ", "; + } + SS << ")"; + return SS.str(); + }; + + // A lambda that creates the text representation of a function definition with + // the original signature: + const auto OldOverloadDefCreator = + [&Handler, &SM, &LangOpts, &NewTysTexts, + &ParmsMask](const FunctionDecl *FD) -> std::optional<std::string> { + std::stringstream SS; + + SS << getEndOfLine().str(); + // Append: attr-name ret-type func-name "(" param-list ")" "{" + if (auto FDPrefix = getRangeText( + SourceRange(FD->getBeginLoc(), FD->getBody()->getBeginLoc()), SM, + LangOpts)) + SS << Handler.getUnsafeBufferUsageAttributeTextAt(FD->getBeginLoc(), " ") + << FDPrefix->str() << "{"; + else + return std::nullopt; + // Append: "return" func-name "(" + if (auto FunQualName = getFunNameText(FD, SM, LangOpts)) + SS << "return " << FunQualName->str() << "("; + else + return std::nullopt; + + // Append: arg-list + const unsigned NumParms = FD->getNumParams(); + for (unsigned i = 0; i < NumParms; i++) { + const ParmVarDecl *Parm = FD->getParamDecl(i); + + if (Parm->isImplicit()) + continue; + // FIXME: If a parameter has no name, it is unused in the + // definition. So we could just leave it as it is. + if (!Parm->getIdentifier()) + // If a parameter of a function definition has no name: + return std::nullopt; + if (ParmsMask[i]) + // This is our spanified paramter! + SS << NewTysTexts[i] << "(" << Parm->getIdentifier()->getName().str() + << ", " << getUserFillPlaceHolder("size") << ")"; + else + SS << Parm->getIdentifier()->getName().str(); + if (i != NumParms - 1) + SS << ", "; + } + // finish call and the body + SS << ");}" << getEndOfLine().str(); + // FIXME: 80-char line formatting? + return SS.str(); + }; + + FixItList FixIts{}; + for (FunctionDecl *FReDecl : FD->redecls()) { + std::optional<SourceLocation> Loc = getPastLoc(FReDecl, SM, LangOpts); + + if (!Loc) + return {}; + if (FReDecl->isThisDeclarationADefinition()) { + assert(FReDecl == FD && "inconsistent function definition"); + // Inserts a definition with the old signature to the end of + // `FReDecl`: + if (auto OldOverloadDef = OldOverloadDefCreator(FReDecl)) + FixIts.emplace_back(FixItHint::CreateInsertion(*Loc, *OldOverloadDef)); + else + return {}; // give up + } else { + // Adds the unsafe-buffer attribute (if not already there) to `FReDecl`: + if (!FReDecl->hasAttr<UnsafeBufferUsageAttr>()) { + FixIts.emplace_back(FixItHint::CreateInsertion( + FReDecl->getBeginLoc(), Handler.getUnsafeBufferUsageAttributeTextAt( + FReDecl->getBeginLoc(), " "))); + } + // Inserts a declaration with the new signature to the end of `FReDecl`: + if (auto NewOverloadDecl = NewOverloadSignatureCreator(FReDecl)) + FixIts.emplace_back(FixItHint::CreateInsertion(*Loc, *NewOverloadDecl)); + else + return {}; + } + } + return FixIts; +} + +// To fix a `ParmVarDecl` to be of `std::span` type. +static FixItList fixParamWithSpan(const ParmVarDecl *PVD, const ASTContext &Ctx, + UnsafeBufferUsageHandler &Handler) { + if (hasUnsupportedSpecifiers(PVD, Ctx.getSourceManager())) { + DEBUG_NOTE_DECL_FAIL(PVD, " : has unsupport specifier(s)"); + return {}; + } + if (PVD->hasDefaultArg()) { + // FIXME: generate fix-its for default values: + DEBUG_NOTE_DECL_FAIL(PVD, " : has default arg"); + return {}; + } + + std::optional<Qualifiers> PteTyQualifiers = std::nullopt; + std::optional<std::string> PteTyText = getPointeeTypeText( + PVD, Ctx.getSourceManager(), Ctx.getLangOpts(), &PteTyQualifiers); + + if (!PteTyText) { + DEBUG_NOTE_DECL_FAIL(PVD, " : invalid pointee type"); + return {}; + } + + std::optional<StringRef> PVDNameText = PVD->getIdentifier()->getName(); + + if (!PVDNameText) { + DEBUG_NOTE_DECL_FAIL(PVD, " : invalid identifier name"); + return {}; + } + + std::stringstream SS; + std::optional<std::string> SpanTyText = createSpanTypeForVarDecl(PVD, Ctx); + + if (PteTyQualifiers) + // Append qualifiers if they exist: + SS << getSpanTypeText(*PteTyText, PteTyQualifiers); + else + SS << getSpanTypeText(*PteTyText); + // Append qualifiers to the type of the parameter: + if (PVD->getType().hasQualifiers()) + SS << ' ' << PVD->getType().getQualifiers().getAsString(); + // Append parameter's name: + SS << ' ' << PVDNameText->str(); + // Add replacement fix-it: + return {FixItHint::CreateReplacement(PVD->getSourceRange(), SS.str())}; +} + +static FixItList fixVariableWithSpan(const VarDecl *VD, + const DeclUseTracker &Tracker, + ASTContext &Ctx, + UnsafeBufferUsageHandler &Handler) { + const DeclStmt *DS = Tracker.lookupDecl(VD); + if (!DS) { + DEBUG_NOTE_DECL_FAIL(VD, + " : variables declared this way not implemented yet"); + return {}; + } + if (!DS->isSingleDecl()) { + // FIXME: to support handling multiple `VarDecl`s in a single `DeclStmt` + DEBUG_NOTE_DECL_FAIL(VD, " : multiple VarDecls"); + return {}; + } + // Currently DS is an unused variable but we'll need it when + // non-single decls are implemented, where the pointee type name + // and the '*' are spread around the place. + (void)DS; + + // FIXME: handle cases where DS has multiple declarations + return fixLocalVarDeclWithSpan(VD, Ctx, getUserFillPlaceHolder(), Handler); +} + +static FixItList fixVarDeclWithArray(const VarDecl *D, const ASTContext &Ctx, + UnsafeBufferUsageHandler &Handler) { + FixItList FixIts{}; + + // Note: the code below expects the declaration to not use any type sugar like + // typedef. + if (auto CAT = dyn_cast<clang::ConstantArrayType>(D->getType())) { + const QualType &ArrayEltT = CAT->getElementType(); + assert(!ArrayEltT.isNull() && "Trying to fix a non-array type variable!"); + // FIXME: support multi-dimensional arrays + if (isa<clang::ArrayType>(ArrayEltT.getCanonicalType())) + return {}; + + const SourceLocation IdentifierLoc = getVarDeclIdentifierLoc(D); + + // Get the spelling of the element type as written in the source file + // (including macros, etc.). + auto MaybeElemTypeTxt = + getRangeText({D->getBeginLoc(), IdentifierLoc}, Ctx.getSourceManager(), + Ctx.getLangOpts()); + if (!MaybeElemTypeTxt) + return {}; + const llvm::StringRef ElemTypeTxt = MaybeElemTypeTxt->trim(); + + // Find the '[' token. + std::optional<Token> NextTok = Lexer::findNextToken( + IdentifierLoc, Ctx.getSourceManager(), Ctx.getLangOpts()); + while (NextTok && !NextTok->is(tok::l_square) && + NextTok->getLocation() <= D->getSourceRange().getEnd()) + NextTok = Lexer::findNextToken(NextTok->getLocation(), + Ctx.getSourceManager(), Ctx.getLangOpts()); + if (!NextTok) + return {}; + const SourceLocation LSqBracketLoc = NextTok->getLocation(); + + // Get the spelling of the array size as written in the source file + // (including macros, etc.). + auto MaybeArraySizeTxt = getRangeText( + {LSqBracketLoc.getLocWithOffset(1), D->getTypeSpecEndLoc()}, + Ctx.getSourceManager(), Ctx.getLangOpts()); + if (!MaybeArraySizeTxt) + return {}; + const llvm::StringRef ArraySizeTxt = MaybeArraySizeTxt->trim(); + if (ArraySizeTxt.empty()) { + // FIXME: Support array size getting determined from the initializer. + // Examples: + // int arr1[] = {0, 1, 2}; + // int arr2{3, 4, 5}; + // We might be able to preserve the non-specified size with `auto` and + // `std::to_array`: + // auto arr1 = std::to_array<int>({0, 1, 2}); + return {}; + } + + std::optional<StringRef> IdentText = + getVarDeclIdentifierText(D, Ctx.getSourceManager(), Ctx.getLangOpts()); + + if (!IdentText) { + DEBUG_NOTE_DECL_FAIL(D, " : failed to locate the identifier"); + return {}; + } + + SmallString<32> Replacement; + raw_svector_ostream OS(Replacement); + OS << "std::array<" << ElemTypeTxt << ", " << ArraySizeTxt << "> " + << IdentText->str(); + + FixIts.push_back(FixItHint::CreateReplacement( + SourceRange{D->getBeginLoc(), D->getTypeSpecEndLoc()}, OS.str())); + } + + return FixIts; +} + +static FixItList fixVariableWithArray(const VarDecl *VD, + const DeclUseTracker &Tracker, + const ASTContext &Ctx, + UnsafeBufferUsageHandler &Handler) { + const DeclStmt *DS = Tracker.lookupDecl(VD); + assert(DS && "Fixing non-local variables not implemented yet!"); + if (!DS->isSingleDecl()) { + // FIXME: to support handling multiple `VarDecl`s in a single `DeclStmt` + return {}; + } + // Currently DS is an unused variable but we'll need it when + // non-single decls are implemented, where the pointee type name + // and the '*' are spread around the place. + (void)DS; + + // FIXME: handle cases where DS has multiple declarations + return fixVarDeclWithArray(VD, Ctx, Handler); +} + +// TODO: we should be consistent to use `std::nullopt` to represent no-fix due +// to any unexpected problem. +static FixItList +fixVariable(const VarDecl *VD, FixitStrategy::Kind K, + /* The function decl under analysis */ const Decl *D, + const DeclUseTracker &Tracker, ASTContext &Ctx, + UnsafeBufferUsageHandler &Handler) { + if (const auto *PVD = dyn_cast<ParmVarDecl>(VD)) { + auto *FD = dyn_cast<clang::FunctionDecl>(PVD->getDeclContext()); + if (!FD || FD != D) { + // `FD != D` means that `PVD` belongs to a function that is not being + // analyzed currently. Thus `FD` may not be complete. + DEBUG_NOTE_DECL_FAIL(VD, " : function not currently analyzed"); + return {}; + } + + // TODO If function has a try block we can't change params unless we check + // also its catch block for their use. + // FIXME We might support static class methods, some select methods, + // operators and possibly lamdas. + if (FD->isMain() || FD->isConstexpr() || + FD->getTemplatedKind() != FunctionDecl::TemplatedKind::TK_NonTemplate || + FD->isVariadic() || + // also covers call-operator of lamdas + isa<CXXMethodDecl>(FD) || + // skip when the function body is a try-block + (FD->hasBody() && isa<CXXTryStmt>(FD->getBody())) || + FD->isOverloadedOperator()) { + DEBUG_NOTE_DECL_FAIL(VD, " : unsupported function decl"); + return {}; // TODO test all these cases + } + } + + switch (K) { + case FixitStrategy::Kind::Span: { + if (VD->getType()->isPointerType()) { + if (const auto *PVD = dyn_cast<ParmVarDecl>(VD)) + return fixParamWithSpan(PVD, Ctx, Handler); + + if (VD->isLocalVarDecl()) + return fixVariableWithSpan(VD, Tracker, Ctx, Handler); + } + DEBUG_NOTE_DECL_FAIL(VD, " : not a pointer"); + return {}; + } + case FixitStrategy::Kind::Array: { + if (VD->isLocalVarDecl() && + isa<clang::ConstantArrayType>(VD->getType().getCanonicalType())) + return fixVariableWithArray(VD, Tracker, Ctx, Handler); + + DEBUG_NOTE_DECL_FAIL(VD, " : not a local const-size array"); + return {}; + } + case FixitStrategy::Kind::Iterator: + case FixitStrategy::Kind::Vector: + llvm_unreachable("FixitStrategy not implemented yet!"); + case FixitStrategy::Kind::Wontfix: + llvm_unreachable("Invalid strategy!"); + } + llvm_unreachable("Unknown strategy!"); +} + +// Returns true iff there exists a `FixItHint` 'h' in `FixIts` such that the +// `RemoveRange` of 'h' overlaps with a macro use. +static bool overlapWithMacro(const FixItList &FixIts) { + // FIXME: For now we only check if the range (or the first token) is (part of) + // a macro expansion. Ideally, we want to check for all tokens in the range. + return llvm::any_of(FixIts, [](const FixItHint &Hint) { + auto Range = Hint.RemoveRange; + if (Range.getBegin().isMacroID() || Range.getEnd().isMacroID()) + // If the range (or the first token) is (part of) a macro expansion: + return true; + return false; + }); +} + +// Returns true iff `VD` is a parameter of the declaration `D`: +static bool isParameterOf(const VarDecl *VD, const Decl *D) { + return isa<ParmVarDecl>(VD) && + VD->getDeclContext() == dyn_cast<DeclContext>(D); +} + +// Erases variables in `FixItsForVariable`, if such a variable has an unfixable +// group mate. A variable `v` is unfixable iff `FixItsForVariable` does not +// contain `v`. +static void eraseVarsForUnfixableGroupMates( + std::map<const VarDecl *, FixItList> &FixItsForVariable, + const VariableGroupsManager &VarGrpMgr) { + // Variables will be removed from `FixItsForVariable`: + SmallVector<const VarDecl *, 8> ToErase; + + for (const auto &[VD, Ignore] : FixItsForVariable) { + VarGrpRef Grp = VarGrpMgr.getGroupOfVar(VD); + if (llvm::any_of(Grp, + [&FixItsForVariable](const VarDecl *GrpMember) -> bool { + return !FixItsForVariable.count(GrpMember); + })) { + // At least one group member cannot be fixed, so we have to erase the + // whole group: + for (const VarDecl *Member : Grp) + ToErase.push_back(Member); + } + } + for (auto *VarToErase : ToErase) + FixItsForVariable.erase(VarToErase); +} + +// Returns the fix-its that create bounds-safe function overloads for the +// function `D`, if `D`'s parameters will be changed to safe-types through +// fix-its in `FixItsForVariable`. +// +// NOTE: In case `D`'s parameters will be changed but bounds-safe function +// overloads cannot created, the whole group that contains the parameters will +// be erased from `FixItsForVariable`. +static FixItList createFunctionOverloadsForParms( + std::map<const VarDecl *, FixItList> &FixItsForVariable /* mutable */, + const VariableGroupsManager &VarGrpMgr, const FunctionDecl *FD, + const FixitStrategy &S, ASTContext &Ctx, + UnsafeBufferUsageHandler &Handler) { + FixItList FixItsSharedByParms{}; + + std::optional<FixItList> OverloadFixes = + createOverloadsForFixedParams(S, FD, Ctx, Handler); + + if (OverloadFixes) { + FixItsSharedByParms.append(*OverloadFixes); + } else { + // Something wrong in generating `OverloadFixes`, need to remove the + // whole group, where parameters are in, from `FixItsForVariable` (Note + // that all parameters should be in the same group): + for (auto *Member : VarGrpMgr.getGroupOfParms()) + FixItsForVariable.erase(Member); + } + return FixItsSharedByParms; +} + +// Constructs self-contained fix-its for each variable in `FixablesForAllVars`. +static std::map<const VarDecl *, FixItList> +getFixIts(FixableGadgetSets &FixablesForAllVars, const FixitStrategy &S, + ASTContext &Ctx, + /* The function decl under analysis */ const Decl *D, + const DeclUseTracker &Tracker, UnsafeBufferUsageHandler &Handler, + const VariableGroupsManager &VarGrpMgr) { + // `FixItsForVariable` will map each variable to a set of fix-its directly + // associated to the variable itself. Fix-its of distinct variables in + // `FixItsForVariable` are disjoint. + std::map<const VarDecl *, FixItList> FixItsForVariable; + + // Populate `FixItsForVariable` with fix-its directly associated with each + // variable. Fix-its directly associated to a variable 'v' are the ones + // produced by the `FixableGadget`s whose claimed variable is 'v'. + for (const auto &[VD, Fixables] : FixablesForAllVars.byVar) { + FixItsForVariable[VD] = + fixVariable(VD, S.lookup(VD), D, Tracker, Ctx, Handler); + // If we fail to produce Fix-It for the declaration we have to skip the + // variable entirely. + if (FixItsForVariable[VD].empty()) { + FixItsForVariable.erase(VD); + continue; + } + for (const auto &F : Fixables) { + std::optional<FixItList> Fixits = F->getFixits(S); + + if (Fixits) { + FixItsForVariable[VD].insert(FixItsForVariable[VD].end(), + Fixits->begin(), Fixits->end()); + continue; + } +#ifndef NDEBUG + Handler.addDebugNoteForVar( + VD, F->getSourceLoc(), + ("gadget '" + F->getDebugName() + "' refused to produce a fix") + .str()); +#endif + FixItsForVariable.erase(VD); + break; + } + } + + // `FixItsForVariable` now contains only variables that can be + // fixed. A variable can be fixed if its' declaration and all Fixables + // associated to it can all be fixed. + + // To further remove from `FixItsForVariable` variables whose group mates + // cannot be fixed... + eraseVarsForUnfixableGroupMates(FixItsForVariable, VarGrpMgr); + // Now `FixItsForVariable` gets further reduced: a variable is in + // `FixItsForVariable` iff it can be fixed and all its group mates can be + // fixed. + + // Fix-its of bounds-safe overloads of `D` are shared by parameters of `D`. + // That is, when fixing multiple parameters in one step, these fix-its will + // be applied only once (instead of being applied per parameter). + FixItList FixItsSharedByParms{}; + + if (auto *FD = dyn_cast<FunctionDecl>(D)) + FixItsSharedByParms = createFunctionOverloadsForParms( + FixItsForVariable, VarGrpMgr, FD, S, Ctx, Handler); + + // The map that maps each variable `v` to fix-its for the whole group where + // `v` is in: + std::map<const VarDecl *, FixItList> FinalFixItsForVariable{ + FixItsForVariable}; + + for (auto &[Var, Ignore] : FixItsForVariable) { + bool AnyParm = false; + const auto VarGroupForVD = VarGrpMgr.getGroupOfVar(Var, &AnyParm); + + for (const VarDecl *GrpMate : VarGroupForVD) { + if (Var == GrpMate) + continue; + if (FixItsForVariable.count(GrpMate)) + FinalFixItsForVariable[Var].append(FixItsForVariable[GrpMate]); + } + if (AnyParm) { + // This assertion should never fail. Otherwise we have a bug. + assert(!FixItsSharedByParms.empty() && + "Should not try to fix a parameter that does not belong to a " + "FunctionDecl"); + FinalFixItsForVariable[Var].append(FixItsSharedByParms); + } + } + // Fix-its that will be applied in one step shall NOT: + // 1. overlap with macros or/and templates; or + // 2. conflict with each other. + // Otherwise, the fix-its will be dropped. + for (auto Iter = FinalFixItsForVariable.begin(); + Iter != FinalFixItsForVariable.end();) + if (overlapWithMacro(Iter->second) || + clang::internal::anyConflict(Iter->second, Ctx.getSourceManager())) { + Iter = FinalFixItsForVariable.erase(Iter); + } else + Iter++; + return FinalFixItsForVariable; +} + +template <typename VarDeclIterTy> +static FixitStrategy +getNaiveStrategy(llvm::iterator_range<VarDeclIterTy> UnsafeVars) { + FixitStrategy S; + for (const VarDecl *VD : UnsafeVars) { + if (isa<ConstantArrayType>(VD->getType().getCanonicalType())) + S.set(VD, FixitStrategy::Kind::Array); + else + S.set(VD, FixitStrategy::Kind::Span); + } + return S; +} + +// Manages variable groups: +class VariableGroupsManagerImpl : public VariableGroupsManager { + const std::vector<VarGrpTy> Groups; + const std::map<const VarDecl *, unsigned> &VarGrpMap; + const llvm::SetVector<const VarDecl *> &GrpsUnionForParms; + +public: + VariableGroupsManagerImpl( + const std::vector<VarGrpTy> &Groups, + const std::map<const VarDecl *, unsigned> &VarGrpMap, + const llvm::SetVector<const VarDecl *> &GrpsUnionForParms) + : Groups(Groups), VarGrpMap(VarGrpMap), + GrpsUnionForParms(GrpsUnionForParms) {} + + VarGrpRef getGroupOfVar(const VarDecl *Var, bool *HasParm) const override { + if (GrpsUnionForParms.contains(Var)) { + if (HasParm) + *HasParm = true; + return GrpsUnionForParms.getArrayRef(); + } + if (HasParm) + *HasParm = false; + + auto It = VarGrpMap.find(Var); + + if (It == VarGrpMap.end()) + return std::nullopt; + return Groups[It->second]; + } + + VarGrpRef getGroupOfParms() const override { + return GrpsUnionForParms.getArrayRef(); + } +}; + +void clang::checkUnsafeBufferUsage(const Decl *D, + UnsafeBufferUsageHandler &Handler, + bool EmitSuggestions) { +#ifndef NDEBUG + Handler.clearDebugNotes(); +#endif + + assert(D && D->getBody()); + // We do not want to visit a Lambda expression defined inside a method + // independently. Instead, it should be visited along with the outer method. + // FIXME: do we want to do the same thing for `BlockDecl`s? + if (const auto *fd = dyn_cast<CXXMethodDecl>(D)) { + if (fd->getParent()->isLambda() && fd->getParent()->isLocalClass()) + return; + } + + // Do not emit fixit suggestions for functions declared in an + // extern "C" block. + if (const auto *FD = dyn_cast<FunctionDecl>(D)) { + for (FunctionDecl *FReDecl : FD->redecls()) { + if (FReDecl->isExternC()) { + EmitSuggestions = false; + break; + } + } + } + + WarningGadgetSets UnsafeOps; + FixableGadgetSets FixablesForAllVars; + + auto [FixableGadgets, WarningGadgets, Tracker] = + findGadgets(D, Handler, EmitSuggestions); + + if (!EmitSuggestions) { + // Our job is very easy without suggestions. Just warn about + // every problematic operation and consider it done. No need to deal + // with fixable gadgets, no need to group operations by variable. + for (const auto &G : WarningGadgets) { + G->handleUnsafeOperation(Handler, /*IsRelatedToDecl=*/false, + D->getASTContext()); + } + + // This return guarantees that most of the machine doesn't run when + // suggestions aren't requested. + assert(FixableGadgets.size() == 0 && + "Fixable gadgets found but suggestions not requested!"); + return; + } + + // If no `WarningGadget`s ever matched, there is no unsafe operations in the + // function under the analysis. No need to fix any Fixables. + if (!WarningGadgets.empty()) { + // Gadgets "claim" variables they're responsible for. Once this loop + // finishes, the tracker will only track DREs that weren't claimed by any + // gadgets, i.e. not understood by the analysis. + for (const auto &G : FixableGadgets) { + for (const auto *DRE : G->getClaimedVarUseSites()) { + Tracker.claimUse(DRE); + } + } + } + + // If no `WarningGadget`s ever matched, there is no unsafe operations in the + // function under the analysis. Thus, it early returns here as there is + // nothing needs to be fixed. + // + // Note this claim is based on the assumption that there is no unsafe + // variable whose declaration is invisible from the analyzing function. + // Otherwise, we need to consider if the uses of those unsafe varuables needs + // fix. + // So far, we are not fixing any global variables or class members. And, + // lambdas will be analyzed along with the enclosing function. So this early + // return is correct for now. + if (WarningGadgets.empty()) + return; + + UnsafeOps = groupWarningGadgetsByVar(std::move(WarningGadgets)); + FixablesForAllVars = groupFixablesByVar(std::move(FixableGadgets)); + + std::map<const VarDecl *, FixItList> FixItsForVariableGroup; + + // Filter out non-local vars and vars with unclaimed DeclRefExpr-s. + for (auto it = FixablesForAllVars.byVar.cbegin(); + it != FixablesForAllVars.byVar.cend();) { + // FIXME: need to deal with global variables later + if ((!it->first->isLocalVarDecl() && !isa<ParmVarDecl>(it->first))) { +#ifndef NDEBUG + Handler.addDebugNoteForVar(it->first, it->first->getBeginLoc(), + ("failed to produce fixit for '" + + it->first->getNameAsString() + + "' : neither local nor a parameter")); +#endif + it = FixablesForAllVars.byVar.erase(it); + } else if (it->first->getType().getCanonicalType()->isReferenceType()) { +#ifndef NDEBUG + Handler.addDebugNoteForVar(it->first, it->first->getBeginLoc(), + ("failed to produce fixit for '" + + it->first->getNameAsString() + + "' : has a reference type")); +#endif + it = FixablesForAllVars.byVar.erase(it); + } else if (Tracker.hasUnclaimedUses(it->first)) { + it = FixablesForAllVars.byVar.erase(it); + } else if (it->first->isInitCapture()) { +#ifndef NDEBUG + Handler.addDebugNoteForVar(it->first, it->first->getBeginLoc(), + ("failed to produce fixit for '" + + it->first->getNameAsString() + + "' : init capture")); +#endif + it = FixablesForAllVars.byVar.erase(it); + } else { + ++it; + } + } + +#ifndef NDEBUG + for (const auto &it : UnsafeOps.byVar) { + const VarDecl *const UnsafeVD = it.first; + auto UnclaimedDREs = Tracker.getUnclaimedUses(UnsafeVD); + if (UnclaimedDREs.empty()) + continue; + const auto UnfixedVDName = UnsafeVD->getNameAsString(); + for (const clang::DeclRefExpr *UnclaimedDRE : UnclaimedDREs) { + std::string UnclaimedUseTrace = + getDREAncestorString(UnclaimedDRE, D->getASTContext()); + + Handler.addDebugNoteForVar( + UnsafeVD, UnclaimedDRE->getBeginLoc(), + ("failed to produce fixit for '" + UnfixedVDName + + "' : has an unclaimed use\nThe unclaimed DRE trace: " + + UnclaimedUseTrace)); + } + } +#endif + + // Fixpoint iteration for pointer assignments + using DepMapTy = DenseMap<const VarDecl *, llvm::SetVector<const VarDecl *>>; + DepMapTy DependenciesMap{}; + DepMapTy PtrAssignmentGraph{}; + + for (auto it : FixablesForAllVars.byVar) { + for (const FixableGadget *fixable : it.second) { + std::optional<std::pair<const VarDecl *, const VarDecl *>> ImplPair = + fixable->getStrategyImplications(); + if (ImplPair) { + std::pair<const VarDecl *, const VarDecl *> Impl = std::move(*ImplPair); + PtrAssignmentGraph[Impl.first].insert(Impl.second); + } + } + } + + /* + The following code does a BFS traversal of the `PtrAssignmentGraph` + considering all unsafe vars as starting nodes and constructs an undirected + graph `DependenciesMap`. Constructing the `DependenciesMap` in this manner + elimiates all variables that are unreachable from any unsafe var. In other + words, this removes all dependencies that don't include any unsafe variable + and consequently don't need any fixit generation. + Note: A careful reader would observe that the code traverses + `PtrAssignmentGraph` using `CurrentVar` but adds edges between `Var` and + `Adj` and not between `CurrentVar` and `Adj`. Both approaches would + achieve the same result but the one used here dramatically cuts the + amount of hoops the second part of the algorithm needs to jump, given that + a lot of these connections become "direct". The reader is advised not to + imagine how the graph is transformed because of using `Var` instead of + `CurrentVar`. The reader can continue reading as if `CurrentVar` was used, + and think about why it's equivalent later. + */ + std::set<const VarDecl *> VisitedVarsDirected{}; + for (const auto &[Var, ignore] : UnsafeOps.byVar) { + if (VisitedVarsDirected.find(Var) == VisitedVarsDirected.end()) { + + std::queue<const VarDecl *> QueueDirected{}; + QueueDirected.push(Var); + while (!QueueDirected.empty()) { + const VarDecl *CurrentVar = QueueDirected.front(); + QueueDirected.pop(); + VisitedVarsDirected.insert(CurrentVar); + auto AdjacentNodes = PtrAssignmentGraph[CurrentVar]; + for (const VarDecl *Adj : AdjacentNodes) { + if (VisitedVarsDirected.find(Adj) == VisitedVarsDirected.end()) { + QueueDirected.push(Adj); + } + DependenciesMap[Var].insert(Adj); + DependenciesMap[Adj].insert(Var); + } + } + } + } + + // `Groups` stores the set of Connected Components in the graph. + std::vector<VarGrpTy> Groups; + // `VarGrpMap` maps variables that need fix to the groups (indexes) that the + // variables belong to. Group indexes refer to the elements in `Groups`. + // `VarGrpMap` is complete in that every variable that needs fix is in it. + std::map<const VarDecl *, unsigned> VarGrpMap; + // The union group over the ones in "Groups" that contain parameters of `D`: + llvm::SetVector<const VarDecl *> + GrpsUnionForParms; // these variables need to be fixed in one step + + // Group Connected Components for Unsafe Vars + // (Dependencies based on pointer assignments) + std::set<const VarDecl *> VisitedVars{}; + for (const auto &[Var, ignore] : UnsafeOps.byVar) { + if (VisitedVars.find(Var) == VisitedVars.end()) { + VarGrpTy &VarGroup = Groups.emplace_back(); + std::queue<const VarDecl *> Queue{}; + + Queue.push(Var); + while (!Queue.empty()) { + const VarDecl *CurrentVar = Queue.front(); + Queue.pop(); + VisitedVars.insert(CurrentVar); + VarGroup.push_back(CurrentVar); + auto AdjacentNodes = DependenciesMap[CurrentVar]; + for (const VarDecl *Adj : AdjacentNodes) { + if (VisitedVars.find(Adj) == VisitedVars.end()) { + Queue.push(Adj); + } + } + } + + bool HasParm = false; + unsigned GrpIdx = Groups.size() - 1; + + for (const VarDecl *V : VarGroup) { + VarGrpMap[V] = GrpIdx; + if (!HasParm && isParameterOf(V, D)) + HasParm = true; + } + if (HasParm) + GrpsUnionForParms.insert(VarGroup.begin(), VarGroup.end()); + } + } + + // Remove a `FixableGadget` if the associated variable is not in the graph + // computed above. We do not want to generate fix-its for such variables, + // since they are neither warned nor reachable from a warned one. + // + // Note a variable is not warned if it is not directly used in any unsafe + // operation. A variable `v` is NOT reachable from an unsafe variable, if it + // does not exist another variable `u` such that `u` is warned and fixing `u` + // (transitively) implicates fixing `v`. + // + // For example, + // ``` + // void f(int * p) { + // int * a = p; *p = 0; + // } + // ``` + // `*p = 0` is a fixable gadget associated with a variable `p` that is neither + // warned nor reachable from a warned one. If we add `a[5] = 0` to the end of + // the function above, `p` becomes reachable from a warned variable. + for (auto I = FixablesForAllVars.byVar.begin(); + I != FixablesForAllVars.byVar.end();) { + // Note `VisitedVars` contain all the variables in the graph: + if (!VisitedVars.count((*I).first)) { + // no such var in graph: + I = FixablesForAllVars.byVar.erase(I); + } else + ++I; + } + + // We assign strategies to variables that are 1) in the graph and 2) can be + // fixed. Other variables have the default "Won't fix" strategy. + FixitStrategy NaiveStrategy = getNaiveStrategy(llvm::make_filter_range( + VisitedVars, [&FixablesForAllVars](const VarDecl *V) { + // If a warned variable has no "Fixable", it is considered unfixable: + return FixablesForAllVars.byVar.count(V); + })); + VariableGroupsManagerImpl VarGrpMgr(Groups, VarGrpMap, GrpsUnionForParms); + + if (isa<NamedDecl>(D)) + // The only case where `D` is not a `NamedDecl` is when `D` is a + // `BlockDecl`. Let's not fix variables in blocks for now + FixItsForVariableGroup = + getFixIts(FixablesForAllVars, NaiveStrategy, D->getASTContext(), D, + Tracker, Handler, VarGrpMgr); + + for (const auto &G : UnsafeOps.noVar) { + G->handleUnsafeOperation(Handler, /*IsRelatedToDecl=*/false, + D->getASTContext()); + } + + for (const auto &[VD, WarningGadgets] : UnsafeOps.byVar) { + auto FixItsIt = FixItsForVariableGroup.find(VD); + Handler.handleUnsafeVariableGroup(VD, VarGrpMgr, + FixItsIt != FixItsForVariableGroup.end() + ? std::move(FixItsIt->second) + : FixItList{}, + D, NaiveStrategy); + for (const auto &G : WarningGadgets) { + G->handleUnsafeOperation(Handler, /*IsRelatedToDecl=*/true, + D->getASTContext()); + } + } +} |