diff options
Diffstat (limited to 'clang/lib/StaticAnalyzer')
65 files changed, 3751 insertions, 2289 deletions
diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp index 59163c1f31fa..605b11874ef5 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp @@ -16,7 +16,7 @@ #include "clang/StaticAnalyzer/Core/Checker.h" #include "clang/StaticAnalyzer/Core/CheckerManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicSize.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" using namespace clang; diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp index 7c264bba4b6a..2a5fe9d8ed92 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -19,7 +19,7 @@ #include "clang/StaticAnalyzer/Core/CheckerManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicSize.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/raw_ostream.h" @@ -179,7 +179,7 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad, // CHECK UPPER BOUND: Is byteOffset >= size(baseRegion)? If so, // we are doing a load/store after the last valid offset. const MemRegion *MR = rawOffset.getRegion(); - DefinedOrUnknownSVal Size = getDynamicSize(state, MR, svalBuilder); + DefinedOrUnknownSVal Size = getDynamicExtent(state, MR, svalBuilder); if (!Size.getAs<NonLoc>()) break; diff --git a/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp index 233ce57c3ac9..13781b336426 100644 --- a/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp @@ -16,7 +16,7 @@ #include "clang/StaticAnalyzer/Core/CheckerManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicSize.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" using namespace clang; using namespace ento; @@ -92,12 +92,8 @@ bool BuiltinFunctionChecker::evalCall(const CallEvent &Call, if (Size.isUndef()) return true; // Return true to model purity. - SValBuilder& svalBuilder = C.getSValBuilder(); - DefinedOrUnknownSVal DynSize = getDynamicSize(state, R, svalBuilder); - DefinedOrUnknownSVal DynSizeMatchesSizeArg = - svalBuilder.evalEQ(state, DynSize, Size.castAs<DefinedOrUnknownSVal>()); - state = state->assume(DynSizeMatchesSizeArg, true); - assert(state && "The region should not have any previous constraints"); + state = setDynamicExtent(state, R, Size.castAs<DefinedOrUnknownSVal>(), + C.getSValBuilder()); C.addTransition(state->BindExpr(CE, LCtx, loc::MemRegionVal(R))); return true; diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp index 30fd62f887c4..69b90be9aa7e 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp @@ -19,7 +19,7 @@ #include "clang/StaticAnalyzer/Core/CheckerManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicSize.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" @@ -346,7 +346,7 @@ ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C, // Get the size of the array. const auto *superReg = cast<SubRegion>(ER->getSuperRegion()); DefinedOrUnknownSVal Size = - getDynamicSize(state, superReg, C.getSValBuilder()); + getDynamicExtent(state, superReg, C.getSValBuilder()); // Get the index of the accessed element. DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>(); @@ -923,7 +923,7 @@ bool CStringChecker::IsFirstBufInBound(CheckerContext &C, // Get the size of the array. const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion()); - DefinedOrUnknownSVal SizeDV = getDynamicSize(state, superReg, svalBuilder); + DefinedOrUnknownSVal SizeDV = getDynamicExtent(state, superReg, svalBuilder); // Get the index of the accessed element. DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>(); @@ -1060,7 +1060,7 @@ bool CStringChecker::memsetAux(const Expr *DstBuffer, SVal CharVal, if (Offset.isValid() && !Offset.hasSymbolicOffset() && Offset.getOffset() == 0) { // Get the base region's size. - DefinedOrUnknownSVal SizeDV = getDynamicSize(State, BR, svalBuilder); + DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, BR, svalBuilder); ProgramStateRef StateWholeReg, StateNotWholeReg; std::tie(StateWholeReg, StateNotWholeReg) = @@ -2039,7 +2039,7 @@ void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE, RightStrRef = RightStrRef.substr(0, s2Term); // Use StringRef's comparison methods to compute the actual result. - int compareRes = IgnoreCase ? LeftStrRef.compare_lower(RightStrRef) + int compareRes = IgnoreCase ? LeftStrRef.compare_insensitive(RightStrRef) : LeftStrRef.compare(RightStrRef); // The strcmp function returns an integer greater than, equal to, or less diff --git a/clang/lib/StaticAnalyzer/Checkers/CastSizeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CastSizeChecker.cpp index a498f252e693..2d2e14de3f2b 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CastSizeChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CastSizeChecker.cpp @@ -17,7 +17,7 @@ #include "clang/StaticAnalyzer/Core/Checker.h" #include "clang/StaticAnalyzer/Core/CheckerManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicSize.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" using namespace clang; using namespace ento; @@ -112,7 +112,7 @@ void CastSizeChecker::checkPreStmt(const CastExpr *CE,CheckerContext &C) const { SValBuilder &svalBuilder = C.getSValBuilder(); - DefinedOrUnknownSVal Size = getDynamicSize(state, SR, svalBuilder); + DefinedOrUnknownSVal Size = getDynamicExtent(state, SR, svalBuilder); const llvm::APSInt *SizeInt = svalBuilder.getKnownValue(state, Size); if (!SizeInt) return; diff --git a/clang/lib/StaticAnalyzer/Checkers/CheckPlacementNew.cpp b/clang/lib/StaticAnalyzer/Checkers/CheckPlacementNew.cpp index dc9cd717be9e..99e11a15c08d 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CheckPlacementNew.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CheckPlacementNew.cpp @@ -13,7 +13,7 @@ #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicSize.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" #include "llvm/Support/FormatVariadic.h" using namespace clang; @@ -64,7 +64,7 @@ private: SVal PlacementNewChecker::getExtentSizeOfPlace(const CXXNewExpr *NE, CheckerContext &C) const { const Expr *Place = NE->getPlacementArg(0); - return getDynamicSizeWithOffset(C.getState(), C.getSVal(Place)); + return getDynamicExtentWithOffset(C.getState(), C.getSVal(Place)); } SVal PlacementNewChecker::getExtentSizeOfNewTarget(const CXXNewExpr *NE, diff --git a/clang/lib/StaticAnalyzer/Checkers/ContainerModeling.cpp b/clang/lib/StaticAnalyzer/Checkers/ContainerModeling.cpp index 73c6517fd0eb..1a7f0d5ab74c 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ContainerModeling.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ContainerModeling.cpp @@ -763,14 +763,14 @@ bool isBeginCall(const FunctionDecl *Func) { const auto *IdInfo = Func->getIdentifier(); if (!IdInfo) return false; - return IdInfo->getName().endswith_lower("begin"); + return IdInfo->getName().endswith_insensitive("begin"); } bool isEndCall(const FunctionDecl *Func) { const auto *IdInfo = Func->getIdentifier(); if (!IdInfo) return false; - return IdInfo->getName().endswith_lower("end"); + return IdInfo->getName().endswith_insensitive("end"); } const CXXRecordDecl *getCXXRecordDecl(ProgramStateRef State, diff --git a/clang/lib/StaticAnalyzer/Checkers/DeadStoresChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/DeadStoresChecker.cpp index 6bc186aa2755..8070d869f678 100644 --- a/clang/lib/StaticAnalyzer/Checkers/DeadStoresChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/DeadStoresChecker.cpp @@ -11,17 +11,18 @@ // //===----------------------------------------------------------------------===// -#include "clang/Lex/Lexer.h" -#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Attr.h" #include "clang/AST/ParentMap.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/Analysis/Analyses/LiveVariables.h" +#include "clang/Lex/Lexer.h" +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" #include "clang/StaticAnalyzer/Core/Checker.h" #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/SaveAndRestore.h" @@ -260,8 +261,8 @@ public: break; } - BR.EmitBasicReport(AC->getDecl(), Checker, BugType, "Dead store", os.str(), - L, R, Fixits); + BR.EmitBasicReport(AC->getDecl(), Checker, BugType, categories::UnusedCode, + os.str(), L, R, Fixits); } void CheckVarDecl(const VarDecl *VD, const Expr *Ex, const Expr *Val, @@ -408,15 +409,17 @@ public: // Special case: check for initializations with constants. // // e.g. : int x = 0; + // struct A = {0, 1}; + // struct B = {{0}, {1, 2}}; // // If x is EVER assigned a new value later, don't issue // a warning. This is because such initialization can be // due to defensive programming. - if (E->isEvaluatable(Ctx)) + if (isConstant(E)) return; if (const DeclRefExpr *DRE = - dyn_cast<DeclRefExpr>(E->IgnoreParenCasts())) + dyn_cast<DeclRefExpr>(E->IgnoreParenCasts())) if (const VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl())) { // Special case: check for initialization from constant // variables. @@ -444,6 +447,29 @@ public: } } } + +private: + /// Return true if the given init list can be interpreted as constant + bool isConstant(const InitListExpr *Candidate) const { + // We consider init list to be constant if each member of the list can be + // interpreted as constant. + return llvm::all_of(Candidate->inits(), + [this](const Expr *Init) { return isConstant(Init); }); + } + + /// Return true if the given expression can be interpreted as constant + bool isConstant(const Expr *E) const { + // It looks like E itself is a constant + if (E->isEvaluatable(Ctx)) + return true; + + // We should also allow defensive initialization of structs, i.e. { 0 } + if (const auto *ILE = dyn_cast<InitListExpr>(E->IgnoreParenCasts())) { + return isConstant(ILE); + } + + return false; + } }; } // end anonymous namespace diff --git a/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp index adfc2f8cb8fe..4a9c7ce3c66d 100644 --- a/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp @@ -193,7 +193,7 @@ void DereferenceChecker::reportBug(DerefKind K, ProgramStateRef State, } auto report = std::make_unique<PathSensitiveBugReport>( - *BT, buf.empty() ? BT->getDescription() : StringRef(buf), N); + *BT, buf.empty() ? BT->getDescription() : buf.str(), N); bugreporter::trackExpressionValue(N, bugreporter::getDerefExpr(S), *report); diff --git a/clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp index c0167b53ae26..2ce1bef6d228 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp @@ -14,7 +14,7 @@ #include "clang/StaticAnalyzer/Core/Checker.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicSize.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/ScopedPrinter.h" @@ -22,8 +22,8 @@ using namespace clang; using namespace ento; namespace { -class ExprInspectionChecker : public Checker<eval::Call, check::DeadSymbols, - check::EndAnalysis> { +class ExprInspectionChecker + : public Checker<eval::Call, check::DeadSymbols, check::EndAnalysis> { mutable std::unique_ptr<BugType> BT; // These stats are per-analysis, not per-branch, hence they shouldn't @@ -44,6 +44,8 @@ class ExprInspectionChecker : public Checker<eval::Call, check::DeadSymbols, void analyzerExplain(const CallExpr *CE, CheckerContext &C) const; void analyzerPrintState(const CallExpr *CE, CheckerContext &C) const; void analyzerGetExtent(const CallExpr *CE, CheckerContext &C) const; + void analyzerDumpExtent(const CallExpr *CE, CheckerContext &C) const; + void analyzerDumpElementCount(const CallExpr *CE, CheckerContext &C) const; void analyzerHashDump(const CallExpr *CE, CheckerContext &C) const; void analyzerDenote(const CallExpr *CE, CheckerContext &C) const; void analyzerExpress(const CallExpr *CE, CheckerContext &C) const; @@ -55,17 +57,19 @@ class ExprInspectionChecker : public Checker<eval::Call, check::DeadSymbols, // Optional parameter `ExprVal` for expression value to be marked interesting. ExplodedNode *reportBug(llvm::StringRef Msg, CheckerContext &C, Optional<SVal> ExprVal = None) const; - ExplodedNode *reportBug(llvm::StringRef Msg, BugReporter &BR, - ExplodedNode *N, + ExplodedNode *reportBug(llvm::StringRef Msg, BugReporter &BR, ExplodedNode *N, Optional<SVal> ExprVal = None) const; + const Expr *getArgExpr(const CallExpr *CE, CheckerContext &C) const; + const MemRegion *getArgRegion(const CallExpr *CE, CheckerContext &C) const; + public: bool evalCall(const CallEvent &Call, CheckerContext &C) const; void checkDeadSymbols(SymbolReaper &SymReaper, CheckerContext &C) const; void checkEndAnalysis(ExplodedGraph &G, BugReporter &BR, ExprEngine &Eng) const; }; -} +} // namespace REGISTER_SET_WITH_PROGRAMSTATE(MarkedSymbols, SymbolRef) REGISTER_MAP_WITH_PROGRAMSTATE(DenotedSymbols, SymbolRef, const StringLiteral *) @@ -90,6 +94,10 @@ bool ExprInspectionChecker::evalCall(const CallEvent &Call, &ExprInspectionChecker::analyzerWarnOnDeadSymbol) .StartsWith("clang_analyzer_explain", &ExprInspectionChecker::analyzerExplain) + .Case("clang_analyzer_dumpExtent", + &ExprInspectionChecker::analyzerDumpExtent) + .Case("clang_analyzer_dumpElementCount", + &ExprInspectionChecker::analyzerDumpElementCount) .StartsWith("clang_analyzer_dump", &ExprInspectionChecker::analyzerDump) .Case("clang_analyzer_getExtent", @@ -131,7 +139,7 @@ static const char *getArgumentValueString(const CallExpr *CE, ProgramStateRef StTrue, StFalse; std::tie(StTrue, StFalse) = - State->assume(AssertionVal.castAs<DefinedOrUnknownSVal>()); + State->assume(AssertionVal.castAs<DefinedOrUnknownSVal>()); if (StTrue) { if (StFalse) @@ -155,8 +163,7 @@ ExplodedNode *ExprInspectionChecker::reportBug(llvm::StringRef Msg, } ExplodedNode *ExprInspectionChecker::reportBug(llvm::StringRef Msg, - BugReporter &BR, - ExplodedNode *N, + BugReporter &BR, ExplodedNode *N, Optional<SVal> ExprVal) const { if (!N) return nullptr; @@ -172,6 +179,30 @@ ExplodedNode *ExprInspectionChecker::reportBug(llvm::StringRef Msg, return N; } +const Expr *ExprInspectionChecker::getArgExpr(const CallExpr *CE, + CheckerContext &C) const { + if (CE->getNumArgs() == 0) { + reportBug("Missing argument", C); + return nullptr; + } + return CE->getArg(0); +} + +const MemRegion *ExprInspectionChecker::getArgRegion(const CallExpr *CE, + CheckerContext &C) const { + const Expr *Arg = getArgExpr(CE, C); + if (!Arg) + return nullptr; + + const MemRegion *MR = C.getSVal(Arg).getAsRegion(); + if (!MR) { + reportBug("Cannot obtain the region", C); + return nullptr; + } + + return MR; +} + void ExprInspectionChecker::analyzerEval(const CallExpr *CE, CheckerContext &C) const { const LocationContext *LC = C.getPredecessor()->getLocationContext(); @@ -215,24 +246,22 @@ void ExprInspectionChecker::analyzerCheckInlined(const CallExpr *CE, void ExprInspectionChecker::analyzerExplain(const CallExpr *CE, CheckerContext &C) const { - if (CE->getNumArgs() == 0) { - reportBug("Missing argument for explaining", C); + const Expr *Arg = getArgExpr(CE, C); + if (!Arg) return; - } - SVal V = C.getSVal(CE->getArg(0)); + SVal V = C.getSVal(Arg); SValExplainer Ex(C.getASTContext()); reportBug(Ex.Visit(V), C); } void ExprInspectionChecker::analyzerDump(const CallExpr *CE, CheckerContext &C) const { - if (CE->getNumArgs() == 0) { - reportBug("Missing argument for dumping", C); + const Expr *Arg = getArgExpr(CE, C); + if (!Arg) return; - } - SVal V = C.getSVal(CE->getArg(0)); + SVal V = C.getSVal(Arg); llvm::SmallString<32> Str; llvm::raw_svector_ostream OS(Str); @@ -242,24 +271,57 @@ void ExprInspectionChecker::analyzerDump(const CallExpr *CE, void ExprInspectionChecker::analyzerGetExtent(const CallExpr *CE, CheckerContext &C) const { - if (CE->getNumArgs() == 0) { - reportBug("Missing region for obtaining extent", C); + const MemRegion *MR = getArgRegion(CE, C); + if (!MR) return; - } - - auto MR = dyn_cast_or_null<SubRegion>(C.getSVal(CE->getArg(0)).getAsRegion()); - if (!MR) { - reportBug("Obtaining extent of a non-region", C); - return; - } ProgramStateRef State = C.getState(); - DefinedOrUnknownSVal Size = getDynamicSize(State, MR, C.getSValBuilder()); + DefinedOrUnknownSVal Size = getDynamicExtent(State, MR, C.getSValBuilder()); State = State->BindExpr(CE, C.getLocationContext(), Size); C.addTransition(State); } +void ExprInspectionChecker::analyzerDumpExtent(const CallExpr *CE, + CheckerContext &C) const { + const MemRegion *MR = getArgRegion(CE, C); + if (!MR) + return; + + DefinedOrUnknownSVal Size = + getDynamicExtent(C.getState(), MR, C.getSValBuilder()); + + SmallString<64> Msg; + llvm::raw_svector_ostream Out(Msg); + Out << Size; + reportBug(Out.str(), C); +} + +void ExprInspectionChecker::analyzerDumpElementCount(const CallExpr *CE, + CheckerContext &C) const { + const MemRegion *MR = getArgRegion(CE, C); + if (!MR) + return; + + QualType ElementTy; + if (const auto *TVR = MR->getAs<TypedValueRegion>()) { + ElementTy = TVR->getValueType(); + } else { + ElementTy = + MR->castAs<SymbolicRegion>()->getSymbol()->getType()->getPointeeType(); + } + + assert(!ElementTy->isPointerType()); + + DefinedOrUnknownSVal ElementCount = + getDynamicElementCount(C.getState(), MR, C.getSValBuilder(), ElementTy); + + SmallString<128> Msg; + llvm::raw_svector_ostream Out(Msg); + Out << ElementCount; + reportBug(Out.str(), C); +} + void ExprInspectionChecker::analyzerPrintState(const CallExpr *CE, CheckerContext &C) const { C.getState()->dump(); @@ -267,9 +329,11 @@ void ExprInspectionChecker::analyzerPrintState(const CallExpr *CE, void ExprInspectionChecker::analyzerWarnOnDeadSymbol(const CallExpr *CE, CheckerContext &C) const { - if (CE->getNumArgs() == 0) + const Expr *Arg = getArgExpr(CE, C); + if (!Arg) return; - SVal Val = C.getSVal(CE->getArg(0)); + + SVal Val = C.getSVal(Arg); SymbolRef Sym = Val.getAsSymbol(); if (!Sym) return; @@ -306,7 +370,7 @@ void ExprInspectionChecker::checkDeadSymbols(SymbolReaper &SymReaper, void ExprInspectionChecker::checkEndAnalysis(ExplodedGraph &G, BugReporter &BR, ExprEngine &Eng) const { - for (auto Item: ReachedStats) { + for (auto Item : ReachedStats) { unsigned NumTimesReached = Item.second.NumTimesReached; ExplodedNode *N = Item.second.ExampleNode; @@ -373,9 +437,7 @@ public: return None; } - Optional<std::string> VisitSymExpr(const SymExpr *S) { - return lookup(S); - } + Optional<std::string> VisitSymExpr(const SymExpr *S) { return lookup(S); } Optional<std::string> VisitSymIntExpr(const SymIntExpr *S) { if (Optional<std::string> Str = lookup(S)) @@ -394,7 +456,8 @@ public: if (Optional<std::string> Str1 = Visit(S->getLHS())) if (Optional<std::string> Str2 = Visit(S->getRHS())) return (*Str1 + " " + BinaryOperator::getOpcodeStr(S->getOpcode()) + - " " + *Str2).str(); + " " + *Str2) + .str(); return None; } @@ -410,10 +473,9 @@ public: void ExprInspectionChecker::analyzerExpress(const CallExpr *CE, CheckerContext &C) const { - if (CE->getNumArgs() == 0) { - reportBug("clang_analyzer_express() requires a symbol", C); + const Expr *Arg = getArgExpr(CE, C); + if (!Arg) return; - } SVal ArgVal = C.getSVal(CE->getArg(0)); SymbolRef Sym = ArgVal.getAsSymbol(); diff --git a/clang/lib/StaticAnalyzer/Checkers/GCDAntipatternChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GCDAntipatternChecker.cpp index 63fbe75fd498..8e02ef74c668 100644 --- a/clang/lib/StaticAnalyzer/Checkers/GCDAntipatternChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/GCDAntipatternChecker.cpp @@ -80,7 +80,7 @@ static bool isTest(const Decl *D) { if (const auto *CD = dyn_cast<ObjCContainerDecl>(OD->getParent())) { std::string ContainerName = CD->getNameAsString(); StringRef CN(ContainerName); - if (CN.contains_lower("test") || CN.contains_lower("mock")) + if (CN.contains_insensitive("test") || CN.contains_insensitive("mock")) return true; } } diff --git a/clang/lib/StaticAnalyzer/Checkers/InnerPointerChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/InnerPointerChecker.cpp index 65e52e139ee4..bcae73378028 100644 --- a/clang/lib/StaticAnalyzer/Checkers/InnerPointerChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/InnerPointerChecker.cpp @@ -34,9 +34,9 @@ namespace { class InnerPointerChecker : public Checker<check::DeadSymbols, check::PostCall> { - CallDescription AppendFn, AssignFn, ClearFn, CStrFn, DataFn, EraseFn, - InsertFn, PopBackFn, PushBackFn, ReplaceFn, ReserveFn, ResizeFn, - ShrinkToFitFn, SwapFn; + CallDescription AppendFn, AssignFn, AddressofFn, ClearFn, CStrFn, DataFn, + DataMemberFn, EraseFn, InsertFn, PopBackFn, PushBackFn, ReplaceFn, + ReserveFn, ResizeFn, ShrinkToFitFn, SwapFn; public: class InnerPointerBRVisitor : public BugReporterVisitor { @@ -73,9 +73,10 @@ public: InnerPointerChecker() : AppendFn({"std", "basic_string", "append"}), AssignFn({"std", "basic_string", "assign"}), + AddressofFn({"std", "addressof"}), ClearFn({"std", "basic_string", "clear"}), - CStrFn({"std", "basic_string", "c_str"}), - DataFn({"std", "basic_string", "data"}), + CStrFn({"std", "basic_string", "c_str"}), DataFn({"std", "data"}, 1), + DataMemberFn({"std", "basic_string", "data"}), EraseFn({"std", "basic_string", "erase"}), InsertFn({"std", "basic_string", "insert"}), PopBackFn({"std", "basic_string", "pop_back"}), @@ -90,6 +91,9 @@ public: /// pointers referring to the container object's inner buffer. bool isInvalidatingMemberFunction(const CallEvent &Call) const; + /// Check whether the called function returns a raw inner pointer. + bool isInnerPointerAccessFunction(const CallEvent &Call) const; + /// Mark pointer symbols associated with the given memory region released /// in the program state. void markPtrSymbolsReleased(const CallEvent &Call, ProgramStateRef State, @@ -130,6 +134,12 @@ bool InnerPointerChecker::isInvalidatingMemberFunction( Call.isCalled(SwapFn)); } +bool InnerPointerChecker::isInnerPointerAccessFunction( + const CallEvent &Call) const { + return (Call.isCalled(CStrFn) || Call.isCalled(DataFn) || + Call.isCalled(DataMemberFn)); +} + void InnerPointerChecker::markPtrSymbolsReleased(const CallEvent &Call, ProgramStateRef State, const MemRegion *MR, @@ -172,6 +182,11 @@ void InnerPointerChecker::checkFunctionArguments(const CallEvent &Call, if (!ArgRegion) continue; + // std::addressof function accepts a non-const reference as an argument, + // but doesn't modify it. + if (Call.isCalled(AddressofFn)) + continue; + markPtrSymbolsReleased(Call, State, ArgRegion, C); } } @@ -195,36 +210,49 @@ void InnerPointerChecker::checkPostCall(const CallEvent &Call, CheckerContext &C) const { ProgramStateRef State = C.getState(); + // TODO: Do we need these to be typed? + const TypedValueRegion *ObjRegion = nullptr; + if (const auto *ICall = dyn_cast<CXXInstanceCall>(&Call)) { - // TODO: Do we need these to be typed? - const auto *ObjRegion = dyn_cast_or_null<TypedValueRegion>( + ObjRegion = dyn_cast_or_null<TypedValueRegion>( ICall->getCXXThisVal().getAsRegion()); - if (!ObjRegion) - return; - if (Call.isCalled(CStrFn) || Call.isCalled(DataFn)) { - SVal RawPtr = Call.getReturnValue(); - if (SymbolRef Sym = RawPtr.getAsSymbol(/*IncludeBaseRegions=*/true)) { - // Start tracking this raw pointer by adding it to the set of symbols - // associated with this container object in the program state map. + // Check [string.require] / second point. + if (isInvalidatingMemberFunction(Call)) { + markPtrSymbolsReleased(Call, State, ObjRegion, C); + return; + } + } - PtrSet::Factory &F = State->getStateManager().get_context<PtrSet>(); - const PtrSet *SetPtr = State->get<RawPtrMap>(ObjRegion); - PtrSet Set = SetPtr ? *SetPtr : F.getEmptySet(); - assert(C.wasInlined || !Set.contains(Sym)); - Set = F.add(Set, Sym); + if (isInnerPointerAccessFunction(Call)) { - State = State->set<RawPtrMap>(ObjRegion, Set); - C.addTransition(State); - } - return; + if (isa<SimpleFunctionCall>(Call)) { + // NOTE: As of now, we only have one free access function: std::data. + // If we add more functions like this in the list, hardcoded + // argument index should be changed. + ObjRegion = + dyn_cast_or_null<TypedValueRegion>(Call.getArgSVal(0).getAsRegion()); } - // Check [string.require] / second point. - if (isInvalidatingMemberFunction(Call)) { - markPtrSymbolsReleased(Call, State, ObjRegion, C); + if (!ObjRegion) return; + + SVal RawPtr = Call.getReturnValue(); + if (SymbolRef Sym = RawPtr.getAsSymbol(/*IncludeBaseRegions=*/true)) { + // Start tracking this raw pointer by adding it to the set of symbols + // associated with this container object in the program state map. + + PtrSet::Factory &F = State->getStateManager().get_context<PtrSet>(); + const PtrSet *SetPtr = State->get<RawPtrMap>(ObjRegion); + PtrSet Set = SetPtr ? *SetPtr : F.getEmptySet(); + assert(C.wasInlined || !Set.contains(Sym)); + Set = F.add(Set, Sym); + + State = State->set<RawPtrMap>(ObjRegion, Set); + C.addTransition(State); } + + return; } // Check [string.require] / first point. diff --git a/clang/lib/StaticAnalyzer/Checkers/Iterator.cpp b/clang/lib/StaticAnalyzer/Checkers/Iterator.cpp index ac0f24603dd9..496190149991 100644 --- a/clang/lib/StaticAnalyzer/Checkers/Iterator.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/Iterator.cpp @@ -29,8 +29,8 @@ bool isIterator(const CXXRecordDecl *CRD) { return false; const auto Name = CRD->getName(); - if (!(Name.endswith_lower("iterator") || Name.endswith_lower("iter") || - Name.endswith_lower("it"))) + if (!(Name.endswith_insensitive("iterator") || + Name.endswith_insensitive("iter") || Name.endswith_insensitive("it"))) return false; bool HasCopyCtor = false, HasCopyAssign = true, HasDtor = false, diff --git a/clang/lib/StaticAnalyzer/Checkers/IteratorRangeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/IteratorRangeChecker.cpp index dd014648eb6f..a47484497771 100644 --- a/clang/lib/StaticAnalyzer/Checkers/IteratorRangeChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/IteratorRangeChecker.cpp @@ -228,7 +228,7 @@ void IteratorRangeChecker::verifyRandomIncrOrDecr(CheckerContext &C, Value = State->getRawSVal(*ValAsLoc); } - if (Value.isUnknown()) + if (Value.isUnknownOrUndef()) return; // Incremention or decremention by 0 is never a bug. diff --git a/clang/lib/StaticAnalyzer/Checkers/MIGChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MIGChecker.cpp index 837213875a60..b72d72580c28 100644 --- a/clang/lib/StaticAnalyzer/Checkers/MIGChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/MIGChecker.cpp @@ -284,8 +284,9 @@ void MIGChecker::checkReturnAux(const ReturnStmt *RS, CheckerContext &C) const { N); R->addRange(RS->getSourceRange()); - bugreporter::trackExpressionValue(N, RS->getRetValue(), *R, - bugreporter::TrackingKind::Thorough, false); + bugreporter::trackExpressionValue( + N, RS->getRetValue(), *R, + {bugreporter::TrackingKind::Thorough, /*EnableNullFPSuppression=*/false}); C.emitReport(std::move(R)); } diff --git a/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIChecker.cpp index 7ac7a38dacf3..5d6bd381d3cc 100644 --- a/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIChecker.cpp @@ -16,7 +16,7 @@ #include "MPIChecker.h" #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicSize.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" namespace clang { namespace ento { diff --git a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp index f117d5505ecb..a6470da09c45 100644 --- a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp @@ -63,7 +63,7 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicSize.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h" @@ -509,10 +509,6 @@ private: ProgramStateRef State, AllocationFamily Family); - LLVM_NODISCARD - static ProgramStateRef addExtentSize(CheckerContext &C, const CXXNewExpr *NE, - ProgramStateRef State, SVal Target); - // Check if this malloc() for special flags. At present that means M_ZERO or // __GFP_ZERO (in which case, treat it like calloc). LLVM_NODISCARD @@ -1424,7 +1420,6 @@ MallocChecker::processNewAllocation(const CXXAllocatorCall &Call, // existing binding. SVal Target = Call.getObjectUnderConstruction(); State = MallocUpdateRefState(C, NE, State, Family, Target); - State = addExtentSize(C, NE, State, Target); State = ProcessZeroAllocCheck(Call, 0, State, Target); return State; } @@ -1439,52 +1434,6 @@ void MallocChecker::checkNewAllocator(const CXXAllocatorCall &Call, } } -// Sets the extent value of the MemRegion allocated by -// new expression NE to its size in Bytes. -// -ProgramStateRef MallocChecker::addExtentSize(CheckerContext &C, - const CXXNewExpr *NE, - ProgramStateRef State, - SVal Target) { - if (!State) - return nullptr; - SValBuilder &svalBuilder = C.getSValBuilder(); - SVal ElementCount; - const SubRegion *Region; - if (NE->isArray()) { - const Expr *SizeExpr = *NE->getArraySize(); - ElementCount = C.getSVal(SizeExpr); - // Store the extent size for the (symbolic)region - // containing the elements. - Region = Target.getAsRegion() - ->castAs<SubRegion>() - ->StripCasts() - ->castAs<SubRegion>(); - } else { - ElementCount = svalBuilder.makeIntVal(1, true); - Region = Target.getAsRegion()->castAs<SubRegion>(); - } - - // Set the region's extent equal to the Size in Bytes. - QualType ElementType = NE->getAllocatedType(); - ASTContext &AstContext = C.getASTContext(); - CharUnits TypeSize = AstContext.getTypeSizeInChars(ElementType); - - if (ElementCount.getAs<NonLoc>()) { - DefinedOrUnknownSVal DynSize = getDynamicSize(State, Region, svalBuilder); - - // size in Bytes = ElementCount*TypeSize - SVal SizeInBytes = svalBuilder.evalBinOpNN( - State, BO_Mul, ElementCount.castAs<NonLoc>(), - svalBuilder.makeArrayIndex(TypeSize.getQuantity()), - svalBuilder.getArrayIndexType()); - DefinedOrUnknownSVal DynSizeMatchesSize = svalBuilder.evalEQ( - State, DynSize, SizeInBytes.castAs<DefinedOrUnknownSVal>()); - State = State->assume(DynSizeMatchesSize, true); - } - return State; -} - static bool isKnownDeallocObjCMethodName(const ObjCMethodCall &Call) { // If the first selector piece is one of the names below, assume that the // object takes ownership of the memory, promising to eventually deallocate it @@ -1588,21 +1537,9 @@ ProgramStateRef MallocChecker::MallocMemAux(CheckerContext &C, // Fill the region with the initialization value. State = State->bindDefaultInitial(RetVal, Init, LCtx); - // Set the region's extent equal to the Size parameter. - const SymbolicRegion *R = - dyn_cast_or_null<SymbolicRegion>(RetVal.getAsRegion()); - if (!R) - return nullptr; - if (Optional<DefinedOrUnknownSVal> DefinedSize = - Size.getAs<DefinedOrUnknownSVal>()) { - DefinedOrUnknownSVal DynSize = getDynamicSize(State, R, svalBuilder); - - DefinedOrUnknownSVal DynSizeMatchesSize = - svalBuilder.evalEQ(State, DynSize, *DefinedSize); - - State = State->assume(DynSizeMatchesSize, true); - assert(State); - } + // Set the region's extent. + State = setDynamicExtent(State, RetVal.getAsRegion(), + Size.castAs<DefinedOrUnknownSVal>(), svalBuilder); return MallocUpdateRefState(C, CE, State, Family); } @@ -2186,7 +2123,7 @@ void MallocChecker::HandleMismatchedDealloc(CheckerContext &C, os.str(), N); R->markInteresting(Sym); R->addRange(Range); - R->addVisitor(std::make_unique<MallocBugVisitor>(Sym)); + R->addVisitor<MallocBugVisitor>(Sym); C.emitReport(std::move(R)); } } @@ -2279,7 +2216,7 @@ void MallocChecker::HandleUseAfterFree(CheckerContext &C, SourceRange Range, R->markInteresting(Sym); R->addRange(Range); - R->addVisitor(std::make_unique<MallocBugVisitor>(Sym)); + R->addVisitor<MallocBugVisitor>(Sym); if (AF == AF_InnerBuffer) R->addVisitor(allocation_state::getInnerPointerBRVisitor(Sym)); @@ -2315,7 +2252,7 @@ void MallocChecker::HandleDoubleFree(CheckerContext &C, SourceRange Range, R->markInteresting(Sym); if (PrevSym) R->markInteresting(PrevSym); - R->addVisitor(std::make_unique<MallocBugVisitor>(Sym)); + R->addVisitor<MallocBugVisitor>(Sym); C.emitReport(std::move(R)); } } @@ -2341,7 +2278,7 @@ void MallocChecker::HandleDoubleDelete(CheckerContext &C, SymbolRef Sym) const { *BT_DoubleDelete, "Attempt to delete released memory", N); R->markInteresting(Sym); - R->addVisitor(std::make_unique<MallocBugVisitor>(Sym)); + R->addVisitor<MallocBugVisitor>(Sym); C.emitReport(std::move(R)); } } @@ -2371,7 +2308,7 @@ void MallocChecker::HandleUseZeroAlloc(CheckerContext &C, SourceRange Range, R->addRange(Range); if (Sym) { R->markInteresting(Sym); - R->addVisitor(std::make_unique<MallocBugVisitor>(Sym)); + R->addVisitor<MallocBugVisitor>(Sym); } C.emitReport(std::move(R)); } @@ -2641,7 +2578,7 @@ void MallocChecker::HandleLeak(SymbolRef Sym, ExplodedNode *N, *BT_Leak[*CheckKind], os.str(), N, LocUsedForUniqueing, AllocNode->getLocationContext()->getDecl()); R->markInteresting(Sym); - R->addVisitor(std::make_unique<MallocBugVisitor>(Sym, true)); + R->addVisitor<MallocBugVisitor>(Sym, true); C.emitReport(std::move(R)); } @@ -3208,9 +3145,10 @@ static SymbolRef findFailedReallocSymbol(ProgramStateRef currState, static bool isReferenceCountingPointerDestructor(const CXXDestructorDecl *DD) { if (const IdentifierInfo *II = DD->getParent()->getIdentifier()) { StringRef N = II->getName(); - if (N.contains_lower("ptr") || N.contains_lower("pointer")) { - if (N.contains_lower("ref") || N.contains_lower("cnt") || - N.contains_lower("intrusive") || N.contains_lower("shared")) { + if (N.contains_insensitive("ptr") || N.contains_insensitive("pointer")) { + if (N.contains_insensitive("ref") || N.contains_insensitive("cnt") || + N.contains_insensitive("intrusive") || + N.contains_insensitive("shared")) { return true; } } diff --git a/clang/lib/StaticAnalyzer/Checkers/MallocSizeofChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MallocSizeofChecker.cpp index 71f593cb2b56..4b5206a102b8 100644 --- a/clang/lib/StaticAnalyzer/Checkers/MallocSizeofChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/MallocSizeofChecker.cpp @@ -139,6 +139,10 @@ static bool typesCompatible(ASTContext &C, QualType A, QualType B) { if (B->isVoidPointerType() && A->getAs<PointerType>()) return true; + // sizeof(pointer type) is compatible with void* + if (A->isVoidPointerType() && B->getAs<PointerType>()) + return true; + while (true) { A = A.getCanonicalType(); B = B.getCanonicalType(); diff --git a/clang/lib/StaticAnalyzer/Checkers/MoveChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MoveChecker.cpp index a38298a7abed..cbe938982000 100644 --- a/clang/lib/StaticAnalyzer/Checkers/MoveChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/MoveChecker.cpp @@ -202,7 +202,7 @@ public: }; private: - mutable std::unique_ptr<BugType> BT; + BugType BT{this, "Use-after-move", categories::CXXMoveSemantics}; // Check if the given form of potential misuse of a given object // should be reported. If so, get it reported. The callback from which @@ -393,11 +393,6 @@ ExplodedNode *MoveChecker::reportBug(const MemRegion *Region, MisuseKind MK) const { if (ExplodedNode *N = misuseCausesCrash(MK) ? C.generateErrorNode() : C.generateNonFatalErrorNode()) { - - if (!BT) - BT.reset(new BugType(this, "Use-after-move", - "C++ move semantics")); - // Uniqueing report to the same object. PathDiagnosticLocation LocUsedForUniqueing; const ExplodedNode *MoveNode = getMoveLocation(N, Region, C); @@ -431,7 +426,7 @@ ExplodedNode *MoveChecker::reportBug(const MemRegion *Region, } auto R = std::make_unique<PathSensitiveBugReport>( - *BT, OS.str(), N, LocUsedForUniqueing, + BT, OS.str(), N, LocUsedForUniqueing, MoveNode->getLocationContext()->getDecl()); R->addVisitor(std::make_unique<MovedBugVisitor>(*this, Region, RD, MK)); C.emitReport(std::move(R)); @@ -477,7 +472,7 @@ void MoveChecker::checkPostCall(const CallEvent &Call, const MemRegion *BaseRegion = ArgRegion->getBaseRegion(); // Skip temp objects because of their short lifetime. if (BaseRegion->getAs<CXXTempObjectRegion>() || - AFC->getArgExpr(0)->isRValue()) + AFC->getArgExpr(0)->isPRValue()) return; // If it has already been reported do not need to modify the state. diff --git a/clang/lib/StaticAnalyzer/Checkers/NonnullGlobalConstantsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/NonnullGlobalConstantsChecker.cpp index 80b705fb7392..c5437b16c688 100644 --- a/clang/lib/StaticAnalyzer/Checkers/NonnullGlobalConstantsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/NonnullGlobalConstantsChecker.cpp @@ -89,7 +89,7 @@ void NonnullGlobalConstantsChecker::checkLocation(SVal location, bool isLoad, } /// \param V loaded lvalue. -/// \return whether {@code val} is a string-like const global. +/// \return whether @c val is a string-like const global. bool NonnullGlobalConstantsChecker::isGlobalConstString(SVal V) const { Optional<loc::MemRegionVal> RegionVal = V.getAs<loc::MemRegionVal>(); if (!RegionVal) @@ -127,7 +127,7 @@ bool NonnullGlobalConstantsChecker::isGlobalConstString(SVal V) const { return false; } -/// \return whether {@code type} is extremely unlikely to be null +/// \return whether @c type is extremely unlikely to be null bool NonnullGlobalConstantsChecker::isNonnullType(QualType Ty) const { if (Ty->isPointerType() && Ty->getPointeeType()->isCharType()) diff --git a/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp index bc7a8a3b12a1..fe8f7e7bf69e 100644 --- a/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp @@ -170,7 +170,7 @@ private: auto R = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N); if (Region) { R->markInteresting(Region); - R->addVisitor(std::make_unique<NullabilityBugVisitor>(Region)); + R->addVisitor<NullabilityBugVisitor>(Region); } if (ValueExpr) { R->addRange(ValueExpr->getSourceRange()); diff --git a/clang/lib/StaticAnalyzer/Checkers/OSObjectCStyleCast.cpp b/clang/lib/StaticAnalyzer/Checkers/OSObjectCStyleCast.cpp index 270b66dab020..0a8379d9ab99 100644 --- a/clang/lib/StaticAnalyzer/Checkers/OSObjectCStyleCast.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/OSObjectCStyleCast.cpp @@ -32,7 +32,21 @@ public: void checkASTCodeBody(const Decl *D, AnalysisManager &AM, BugReporter &BR) const; }; +} // namespace + +namespace clang { +namespace ast_matchers { +AST_MATCHER_P(StringLiteral, mentionsBoundType, std::string, BindingID) { + return Builder->removeBindings([this, &Node](const BoundNodesMap &Nodes) { + const auto &BN = Nodes.getNode(this->BindingID); + if (const auto *ND = BN.get<NamedDecl>()) { + return ND->getName() != Node.getString(); + } + return true; + }); } +} // end namespace ast_matchers +} // end namespace clang static void emitDiagnostics(const BoundNodes &Nodes, BugReporter &BR, @@ -63,22 +77,41 @@ static decltype(auto) hasTypePointingTo(DeclarationMatcher DeclM) { return hasType(pointerType(pointee(hasDeclaration(DeclM)))); } -void OSObjectCStyleCastChecker::checkASTCodeBody(const Decl *D, AnalysisManager &AM, +void OSObjectCStyleCastChecker::checkASTCodeBody(const Decl *D, + AnalysisManager &AM, BugReporter &BR) const { AnalysisDeclContext *ADC = AM.getAnalysisDeclContext(D); auto DynamicCastM = callExpr(callee(functionDecl(hasName("safeMetaCast")))); - - auto OSObjTypeM = hasTypePointingTo(cxxRecordDecl(isDerivedFrom("OSMetaClassBase"))); + // 'allocClassWithName' allocates an object with the given type. + // The type is actually provided as a string argument (type's name). + // This makes the following pattern possible: + // + // Foo *object = (Foo *)allocClassWithName("Foo"); + // + // While OSRequiredCast can be used here, it is still not a useful warning. + auto AllocClassWithNameM = callExpr( + callee(functionDecl(hasName("allocClassWithName"))), + // Here we want to make sure that the string argument matches the + // type in the cast expression. + hasArgument(0, stringLiteral(mentionsBoundType(WarnRecordDecl)))); + + auto OSObjTypeM = + hasTypePointingTo(cxxRecordDecl(isDerivedFrom("OSMetaClassBase"))); auto OSObjSubclassM = hasTypePointingTo( - cxxRecordDecl(isDerivedFrom("OSObject")).bind(WarnRecordDecl)); - - auto CastM = cStyleCastExpr( - allOf(hasSourceExpression(allOf(OSObjTypeM, unless(DynamicCastM))), - OSObjSubclassM)).bind(WarnAtNode); - - auto Matches = match(stmt(forEachDescendant(CastM)), *D->getBody(), AM.getASTContext()); + cxxRecordDecl(isDerivedFrom("OSObject")).bind(WarnRecordDecl)); + + auto CastM = + cStyleCastExpr( + allOf(OSObjSubclassM, + hasSourceExpression( + allOf(OSObjTypeM, + unless(anyOf(DynamicCastM, AllocClassWithNameM)))))) + .bind(WarnAtNode); + + auto Matches = + match(stmt(forEachDescendant(CastM)), *D->getBody(), AM.getASTContext()); for (BoundNodes Match : Matches) emitDiagnostics(Match, BR, ADC, this); } diff --git a/clang/lib/StaticAnalyzer/Checkers/ObjCAutoreleaseWriteChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ObjCAutoreleaseWriteChecker.cpp index 7fd6e2abef4c..c8eab3288094 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ObjCAutoreleaseWriteChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ObjCAutoreleaseWriteChecker.cpp @@ -8,7 +8,7 @@ // // This file defines ObjCAutoreleaseWriteChecker which warns against writes // into autoreleased out parameters which cause crashes. -// An example of a problematic write is a write to {@code error} in the example +// An example of a problematic write is a write to @c error in the example // below: // // - (BOOL) mymethod:(NSError *__autoreleasing *)error list:(NSArray*) list { diff --git a/clang/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp index 8c2008a7ceb4..13985af76b00 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp @@ -147,8 +147,9 @@ void ObjCContainersChecker::checkPreStmt(const CallExpr *CE, auto R = std::make_unique<PathSensitiveBugReport>( *BT, "Index is out of bounds", N); R->addRange(IdxExpr->getSourceRange()); - bugreporter::trackExpressionValue( - N, IdxExpr, *R, bugreporter::TrackingKind::Thorough, false); + bugreporter::trackExpressionValue(N, IdxExpr, *R, + {bugreporter::TrackingKind::Thorough, + /*EnableNullFPSuppression=*/false}); C.emitReport(std::move(R)); return; } diff --git a/clang/lib/StaticAnalyzer/Checkers/PaddingChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/PaddingChecker.cpp index 96f0d9bb3c3d..40472ccfe7e6 100644 --- a/clang/lib/StaticAnalyzer/Checkers/PaddingChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/PaddingChecker.cpp @@ -193,6 +193,11 @@ public: CharUnits PaddingSum; CharUnits Offset = ASTContext.toCharUnitsFromBits(RL.getFieldOffset(0)); for (const FieldDecl *FD : RD->fields()) { + // Skip field that is a subobject of zero size, marked with + // [[no_unique_address]] or an empty bitfield, because its address can be + // set the same as the other fields addresses. + if (FD->isZeroSize(ASTContext)) + continue; // This checker only cares about the padded size of the // field, and not the data size. If the field is a record // with tail padding, then we won't put that number in our @@ -249,7 +254,7 @@ public: RetVal.Field = FD; auto &Ctx = FD->getASTContext(); auto Info = Ctx.getTypeInfoInChars(FD->getType()); - RetVal.Size = Info.Width; + RetVal.Size = FD->isZeroSize(Ctx) ? CharUnits::Zero() : Info.Width; RetVal.Align = Info.Align; assert(llvm::isPowerOf2_64(RetVal.Align.getQuantity())); if (auto Max = FD->getMaxAlignment()) diff --git a/clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp index 88e80c481a5a..ee71b55a39e6 100644 --- a/clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp @@ -339,7 +339,16 @@ void PthreadLockChecker::printState(raw_ostream &Out, ProgramStateRef State, } } - // TODO: Dump destroyed mutex symbols? + DestroyRetValTy DRV = State->get<DestroyRetVal>(); + if (!DRV.isEmpty()) { + Out << Sep << "Mutexes in unresolved possibly destroyed state:" << NL; + for (auto I : DRV) { + I.first->dumpToStream(Out); + Out << ": "; + I.second->dumpToStream(Out); + Out << NL; + } + } } void PthreadLockChecker::AcquirePthreadLock(const CallEvent &Call, @@ -638,8 +647,10 @@ void PthreadLockChecker::checkDeadSymbols(SymbolReaper &SymReaper, for (auto I : State->get<LockMap>()) { // Stop tracking dead mutex regions as well. - if (!SymReaper.isLiveRegion(I.first)) + if (!SymReaper.isLiveRegion(I.first)) { State = State->remove<LockMap>(I.first); + State = State->remove<DestroyRetVal>(I.first); + } } // TODO: We probably need to clean up the lock stack as well. diff --git a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp index 1d903530201f..64ac6bc4c06b 100644 --- a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp @@ -13,6 +13,8 @@ #include "RetainCountDiagnostics.h" #include "RetainCountChecker.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" using namespace clang; using namespace ento; @@ -89,7 +91,7 @@ static std::string getPrettyTypeName(QualType QT) { return QT.getAsString(); } -/// Write information about the type state change to {@code os}, +/// Write information about the type state change to @c os, /// return whether the note should be generated. static bool shouldGenerateNote(llvm::raw_string_ostream &os, const RefVal *PrevT, @@ -164,8 +166,8 @@ static bool shouldGenerateNote(llvm::raw_string_ostream &os, return true; } -/// Finds argument index of the out paramter in the call {@code S} -/// corresponding to the symbol {@code Sym}. +/// Finds argument index of the out paramter in the call @c S +/// corresponding to the symbol @c Sym. /// If none found, returns None. static Optional<unsigned> findArgIdxOfSymbol(ProgramStateRef CurrSt, const LocationContext *LCtx, @@ -337,11 +339,15 @@ public: class RefLeakReportVisitor : public RefCountReportVisitor { public: - RefLeakReportVisitor(SymbolRef sym) : RefCountReportVisitor(sym) {} + RefLeakReportVisitor(SymbolRef Sym, const MemRegion *LastBinding) + : RefCountReportVisitor(Sym), LastBinding(LastBinding) {} PathDiagnosticPieceRef getEndPath(BugReporterContext &BRC, const ExplodedNode *N, PathSensitiveBugReport &BR) override; + +private: + const MemRegion *LastBinding; }; } // end namespace retaincountchecker @@ -610,6 +616,41 @@ static Optional<std::string> describeRegion(const MemRegion *MR) { return None; } +using Bindings = llvm::SmallVector<std::pair<const MemRegion *, SVal>, 4>; + +class VarBindingsCollector : public StoreManager::BindingsHandler { + SymbolRef Sym; + Bindings &Result; + +public: + VarBindingsCollector(SymbolRef Sym, Bindings &ToFill) + : Sym(Sym), Result(ToFill) {} + + bool HandleBinding(StoreManager &SMgr, Store Store, const MemRegion *R, + SVal Val) override { + SymbolRef SymV = Val.getAsLocSymbol(); + if (!SymV || SymV != Sym) + return true; + + if (isa<NonParamVarRegion>(R)) + Result.emplace_back(R, Val); + + return true; + } +}; + +Bindings getAllVarBindingsForSymbol(ProgramStateManager &Manager, + const ExplodedNode *Node, SymbolRef Sym) { + Bindings Result; + VarBindingsCollector Collector{Sym, Result}; + while (Result.empty() && Node) { + Manager.iterBindings(Node->getState(), Collector); + Node = Node->getFirstPred(); + } + + return Result; +} + namespace { // Find the first node in the current function context that referred to the // tracked symbol and the memory location that value was stored to. Note, the @@ -729,14 +770,6 @@ RefLeakReportVisitor::getEndPath(BugReporterContext &BRC, // assigned to different variables, etc. BR.markInteresting(Sym); - // We are reporting a leak. Walk up the graph to get to the first node where - // the symbol appeared, and also get the first VarDecl that tracked object - // is stored to. - AllocationInfo AllocI = GetAllocationSite(BRC.getStateManager(), EndN, Sym); - - const MemRegion* FirstBinding = AllocI.R; - BR.markInteresting(AllocI.InterestingMethodContext); - PathDiagnosticLocation L = cast<RefLeakReport>(BR).getEndOfPath(); std::string sbuf; @@ -744,7 +777,7 @@ RefLeakReportVisitor::getEndPath(BugReporterContext &BRC, os << "Object leaked: "; - Optional<std::string> RegionDescription = describeRegion(FirstBinding); + Optional<std::string> RegionDescription = describeRegion(LastBinding); if (RegionDescription) { os << "object allocated and stored into '" << *RegionDescription << '\''; } else { @@ -753,7 +786,7 @@ RefLeakReportVisitor::getEndPath(BugReporterContext &BRC, } // Get the retain count. - const RefVal* RV = getRefBinding(EndN->getState(), Sym); + const RefVal *RV = getRefBinding(EndN->getState(), Sym); assert(RV); if (RV->getKind() == RefVal::ErrorLeakReturned) { @@ -794,14 +827,15 @@ RefLeakReportVisitor::getEndPath(BugReporterContext &BRC, " Foundation"; } else if (RV->getObjKind() == ObjKind::OS) { std::string FuncName = FD->getNameAsString(); - os << "whose name ('" << FuncName - << "') starts with '" << StringRef(FuncName).substr(0, 3) << "'"; + os << "whose name ('" << FuncName << "') starts with '" + << StringRef(FuncName).substr(0, 3) << "'"; } } } } else { os << " is not referenced later in this execution path and has a retain " - "count of +" << RV->getCount(); + "count of +" + << RV->getCount(); } return std::make_shared<PathDiagnosticEventPiece>(L, os.str()); @@ -812,7 +846,7 @@ RefCountReport::RefCountReport(const RefCountBug &D, const LangOptions &LOpts, : PathSensitiveBugReport(D, D.getDescription(), n), Sym(sym), isLeak(isLeak) { if (!isLeak) - addVisitor(std::make_unique<RefCountReportVisitor>(sym)); + addVisitor<RefCountReportVisitor>(sym); } RefCountReport::RefCountReport(const RefCountBug &D, const LangOptions &LOpts, @@ -820,19 +854,19 @@ RefCountReport::RefCountReport(const RefCountBug &D, const LangOptions &LOpts, StringRef endText) : PathSensitiveBugReport(D, D.getDescription(), endText, n) { - addVisitor(std::make_unique<RefCountReportVisitor>(sym)); + addVisitor<RefCountReportVisitor>(sym); } -void RefLeakReport::deriveParamLocation(CheckerContext &Ctx, SymbolRef sym) { - const SourceManager& SMgr = Ctx.getSourceManager(); +void RefLeakReport::deriveParamLocation(CheckerContext &Ctx) { + const SourceManager &SMgr = Ctx.getSourceManager(); - if (!sym->getOriginRegion()) + if (!Sym->getOriginRegion()) return; - auto *Region = dyn_cast<DeclRegion>(sym->getOriginRegion()); + auto *Region = dyn_cast<DeclRegion>(Sym->getOriginRegion()); if (Region) { const Decl *PDecl = Region->getDecl(); - if (PDecl && isa<ParmVarDecl>(PDecl)) { + if (isa_and_nonnull<ParmVarDecl>(PDecl)) { PathDiagnosticLocation ParamLocation = PathDiagnosticLocation::create(PDecl, SMgr); Location = ParamLocation; @@ -842,8 +876,7 @@ void RefLeakReport::deriveParamLocation(CheckerContext &Ctx, SymbolRef sym) { } } -void RefLeakReport::deriveAllocLocation(CheckerContext &Ctx, - SymbolRef sym) { +void RefLeakReport::deriveAllocLocation(CheckerContext &Ctx) { // Most bug reports are cached at the location where they occurred. // With leaks, we want to unique them by the location where they were // allocated, and only report a single path. To do this, we need to find @@ -854,13 +887,13 @@ void RefLeakReport::deriveAllocLocation(CheckerContext &Ctx, // same SourceLocation. const ExplodedNode *AllocNode = nullptr; - const SourceManager& SMgr = Ctx.getSourceManager(); + const SourceManager &SMgr = Ctx.getSourceManager(); AllocationInfo AllocI = - GetAllocationSite(Ctx.getStateManager(), getErrorNode(), sym); + GetAllocationSite(Ctx.getStateManager(), getErrorNode(), Sym); AllocNode = AllocI.N; - AllocBinding = AllocI.R; + AllocFirstBinding = AllocI.R; markInteresting(AllocI.InterestingMethodContext); // Get the SourceLocation for the allocation site. @@ -870,13 +903,12 @@ void RefLeakReport::deriveAllocLocation(CheckerContext &Ctx, AllocStmt = AllocNode->getStmtForDiagnostics(); if (!AllocStmt) { - AllocBinding = nullptr; + AllocFirstBinding = nullptr; return; } - PathDiagnosticLocation AllocLocation = - PathDiagnosticLocation::createBegin(AllocStmt, SMgr, - AllocNode->getLocationContext()); + PathDiagnosticLocation AllocLocation = PathDiagnosticLocation::createBegin( + AllocStmt, SMgr, AllocNode->getLocationContext()); Location = AllocLocation; // Set uniqieing info, which will be used for unique the bug reports. The @@ -891,7 +923,8 @@ void RefLeakReport::createDescription(CheckerContext &Ctx) { llvm::raw_string_ostream os(Description); os << "Potential leak of an object"; - Optional<std::string> RegionDescription = describeRegion(AllocBinding); + Optional<std::string> RegionDescription = + describeRegion(AllocBindingToReport); if (RegionDescription) { os << " stored into '" << *RegionDescription << '\''; } else { @@ -901,16 +934,75 @@ void RefLeakReport::createDescription(CheckerContext &Ctx) { } } +void RefLeakReport::findBindingToReport(CheckerContext &Ctx, + ExplodedNode *Node) { + if (!AllocFirstBinding) + // If we don't have any bindings, we won't be able to find any + // better binding to report. + return; + + // If the original region still contains the leaking symbol... + if (Node->getState()->getSVal(AllocFirstBinding).getAsSymbol() == Sym) { + // ...it is the best binding to report. + AllocBindingToReport = AllocFirstBinding; + return; + } + + // At this point, we know that the original region doesn't contain the leaking + // when the actual leak happens. It means that it can be confusing for the + // user to see such description in the message. + // + // Let's consider the following example: + // Object *Original = allocate(...); + // Object *New = Original; + // Original = allocate(...); + // Original->release(); + // + // Complaining about a leaking object "stored into Original" might cause a + // rightful confusion because 'Original' is actually released. + // We should complain about 'New' instead. + Bindings AllVarBindings = + getAllVarBindingsForSymbol(Ctx.getStateManager(), Node, Sym); + + // While looking for the last var bindings, we can still find + // `AllocFirstBinding` to be one of them. In situations like this, + // it would still be the easiest case to explain to our users. + if (!AllVarBindings.empty() && + llvm::count_if(AllVarBindings, + [this](const std::pair<const MemRegion *, SVal> Binding) { + return Binding.first == AllocFirstBinding; + }) == 0) { + // Let's pick one of them at random (if there is something to pick from). + AllocBindingToReport = AllVarBindings[0].first; + + // Because 'AllocBindingToReport' is not the the same as + // 'AllocFirstBinding', we need to explain how the leaking object + // got from one to another. + // + // NOTE: We use the actual SVal stored in AllocBindingToReport here because + // trackStoredValue compares SVal's and it can get trickier for + // something like derived regions if we want to construct SVal from + // Sym. Instead, we take the value that is definitely stored in that + // region, thus guaranteeing that trackStoredValue will work. + bugreporter::trackStoredValue(AllVarBindings[0].second.castAs<KnownSVal>(), + AllocBindingToReport, *this); + } else { + AllocBindingToReport = AllocFirstBinding; + } +} + RefLeakReport::RefLeakReport(const RefCountBug &D, const LangOptions &LOpts, - ExplodedNode *n, SymbolRef sym, + ExplodedNode *N, SymbolRef Sym, CheckerContext &Ctx) - : RefCountReport(D, LOpts, n, sym, /*isLeak=*/true) { + : RefCountReport(D, LOpts, N, Sym, /*isLeak=*/true) { + + deriveAllocLocation(Ctx); + findBindingToReport(Ctx, N); - deriveAllocLocation(Ctx, sym); - if (!AllocBinding) - deriveParamLocation(Ctx, sym); + if (!AllocFirstBinding) + deriveParamLocation(Ctx); createDescription(Ctx); - addVisitor(std::make_unique<RefLeakReportVisitor>(sym)); + addVisitor<RefLeakReportVisitor>(Sym, AllocBindingToReport); } diff --git a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.h b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.h index 286a8ae2ef7d..d05900895c6a 100644 --- a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.h +++ b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.h @@ -68,17 +68,20 @@ public: }; class RefLeakReport : public RefCountReport { - const MemRegion* AllocBinding; - const Stmt *AllocStmt; + const MemRegion *AllocFirstBinding = nullptr; + const MemRegion *AllocBindingToReport = nullptr; + const Stmt *AllocStmt = nullptr; PathDiagnosticLocation Location; // Finds the function declaration where a leak warning for the parameter // 'sym' should be raised. - void deriveParamLocation(CheckerContext &Ctx, SymbolRef sym); - // Finds the location where a leak warning for 'sym' should be raised. - void deriveAllocLocation(CheckerContext &Ctx, SymbolRef sym); + void deriveParamLocation(CheckerContext &Ctx); + // Finds the location where the leaking object is allocated. + void deriveAllocLocation(CheckerContext &Ctx); // Produces description of a leak warning which is printed on the console. void createDescription(CheckerContext &Ctx); + // Finds the binding that we should use in a leak warning. + void findBindingToReport(CheckerContext &Ctx, ExplodedNode *Node); public: RefLeakReport(const RefCountBug &D, const LangOptions &LOpts, ExplodedNode *n, diff --git a/clang/lib/StaticAnalyzer/Checkers/ReturnPointerRangeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ReturnPointerRangeChecker.cpp index 1a94ccdc2825..885750218b9e 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ReturnPointerRangeChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ReturnPointerRangeChecker.cpp @@ -16,7 +16,7 @@ #include "clang/StaticAnalyzer/Core/Checker.h" #include "clang/StaticAnalyzer/Core/CheckerManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicSize.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" using namespace clang; diff --git a/clang/lib/StaticAnalyzer/Checkers/RunLoopAutoreleaseLeakChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/RunLoopAutoreleaseLeakChecker.cpp index d9dc72ddaa21..2cf6c6ff47f1 100644 --- a/clang/lib/StaticAnalyzer/Checkers/RunLoopAutoreleaseLeakChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/RunLoopAutoreleaseLeakChecker.cpp @@ -57,8 +57,8 @@ public: } // end anonymous namespace -/// \return Whether {@code A} occurs before {@code B} in traversal of -/// {@code Parent}. +/// \return Whether @c A occurs before @c B in traversal of +/// @c Parent. /// Conceptually a very incomplete/unsound approximation of happens-before /// relationship (A is likely to be evaluated before B), /// but useful enough in this case. diff --git a/clang/lib/StaticAnalyzer/Checkers/SmartPtr.h b/clang/lib/StaticAnalyzer/Checkers/SmartPtr.h index 92c386bbb2b0..6a40f8eda5fa 100644 --- a/clang/lib/StaticAnalyzer/Checkers/SmartPtr.h +++ b/clang/lib/StaticAnalyzer/Checkers/SmartPtr.h @@ -22,6 +22,10 @@ namespace smartptr { /// Returns true if the event call is on smart pointer. bool isStdSmartPtrCall(const CallEvent &Call); +bool isStdSmartPtr(const CXXRecordDecl *RD); +bool isStdSmartPtr(const Expr *E); + +bool isStdSmartPtr(const CXXRecordDecl *RD); /// Returns whether the smart pointer is null or not. bool isNullSmartPtr(const ProgramStateRef State, const MemRegion *ThisRegion); diff --git a/clang/lib/StaticAnalyzer/Checkers/SmartPtrModeling.cpp b/clang/lib/StaticAnalyzer/Checkers/SmartPtrModeling.cpp index 6ee7bd9252b3..09e885e8133f 100644 --- a/clang/lib/StaticAnalyzer/Checkers/SmartPtrModeling.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/SmartPtrModeling.cpp @@ -25,16 +25,20 @@ #include "clang/StaticAnalyzer/Core/CheckerManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h" #include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h" #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h" #include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h" #include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Support/ErrorHandling.h" #include <string> using namespace clang; using namespace ento; namespace { + class SmartPtrModeling : public Checker<eval::Call, check::DeadSymbols, check::RegionChanges, check::LiveSymbols> { @@ -60,7 +64,7 @@ public: private: void handleReset(const CallEvent &Call, CheckerContext &C) const; void handleRelease(const CallEvent &Call, CheckerContext &C) const; - void handleSwap(const CallEvent &Call, CheckerContext &C) const; + void handleSwapMethod(const CallEvent &Call, CheckerContext &C) const; void handleGet(const CallEvent &Call, CheckerContext &C) const; bool handleAssignOp(const CallEvent &Call, CheckerContext &C) const; bool handleMoveCtr(const CallEvent &Call, CheckerContext &C, @@ -68,19 +72,56 @@ private: bool updateMovedSmartPointers(CheckerContext &C, const MemRegion *ThisRegion, const MemRegion *OtherSmartPtrRegion) const; void handleBoolConversion(const CallEvent &Call, CheckerContext &C) const; + bool handleComparisionOp(const CallEvent &Call, CheckerContext &C) const; + bool handleOstreamOperator(const CallEvent &Call, CheckerContext &C) const; + bool handleSwap(ProgramStateRef State, SVal First, SVal Second, + CheckerContext &C) const; + std::pair<SVal, ProgramStateRef> + retrieveOrConjureInnerPtrVal(ProgramStateRef State, + const MemRegion *ThisRegion, const Expr *E, + QualType Type, CheckerContext &C) const; using SmartPtrMethodHandlerFn = void (SmartPtrModeling::*)(const CallEvent &Call, CheckerContext &) const; CallDescriptionMap<SmartPtrMethodHandlerFn> SmartPtrMethodHandlers{ {{"reset"}, &SmartPtrModeling::handleReset}, {{"release"}, &SmartPtrModeling::handleRelease}, - {{"swap", 1}, &SmartPtrModeling::handleSwap}, + {{"swap", 1}, &SmartPtrModeling::handleSwapMethod}, {{"get"}, &SmartPtrModeling::handleGet}}; + const CallDescription StdSwapCall{{"std", "swap"}, 2}; + const CallDescription StdMakeUniqueCall{{"std", "make_unique"}}; + const CallDescription StdMakeUniqueForOverwriteCall{ + {"std", "make_unique_for_overwrite"}}; }; } // end of anonymous namespace REGISTER_MAP_WITH_PROGRAMSTATE(TrackedRegionMap, const MemRegion *, SVal) +// Checks if RD has name in Names and is in std namespace +static bool hasStdClassWithName(const CXXRecordDecl *RD, + ArrayRef<llvm::StringLiteral> Names) { + if (!RD || !RD->getDeclContext()->isStdNamespace()) + return false; + if (RD->getDeclName().isIdentifier()) { + StringRef Name = RD->getName(); + return llvm::any_of(Names, [&Name](StringRef GivenName) -> bool { + return Name == GivenName; + }); + } + return false; +} + +constexpr llvm::StringLiteral STD_PTR_NAMES[] = {"shared_ptr", "unique_ptr", + "weak_ptr"}; + +static bool isStdSmartPtr(const CXXRecordDecl *RD) { + return hasStdClassWithName(RD, STD_PTR_NAMES); +} + +static bool isStdSmartPtr(const Expr *E) { + return isStdSmartPtr(E->getType()->getAsCXXRecordDecl()); +} + // Define the inter-checker API. namespace clang { namespace ento { @@ -89,18 +130,24 @@ bool isStdSmartPtrCall(const CallEvent &Call) { const auto *MethodDecl = dyn_cast_or_null<CXXMethodDecl>(Call.getDecl()); if (!MethodDecl || !MethodDecl->getParent()) return false; + return isStdSmartPtr(MethodDecl->getParent()); +} - const auto *RecordDecl = MethodDecl->getParent(); - if (!RecordDecl || !RecordDecl->getDeclContext()->isStdNamespace()) +bool isStdSmartPtr(const CXXRecordDecl *RD) { + if (!RD || !RD->getDeclContext()->isStdNamespace()) return false; - if (RecordDecl->getDeclName().isIdentifier()) { - StringRef Name = RecordDecl->getName(); + if (RD->getDeclName().isIdentifier()) { + StringRef Name = RD->getName(); return Name == "shared_ptr" || Name == "unique_ptr" || Name == "weak_ptr"; } return false; } +bool isStdSmartPtr(const Expr *E) { + return isStdSmartPtr(E->getType()->getAsCXXRecordDecl()); +} + bool isNullSmartPtr(const ProgramStateRef State, const MemRegion *ThisRegion) { const auto *InnerPointVal = State->get<TrackedRegionMap>(ThisRegion); return InnerPointVal && @@ -135,28 +182,47 @@ static ProgramStateRef updateSwappedRegion(ProgramStateRef State, return State; } -// Helper method to get the inner pointer type of specialized smart pointer -// Returns empty type if not found valid inner pointer type. -static QualType getInnerPointerType(const CallEvent &Call, CheckerContext &C) { - const auto *MethodDecl = dyn_cast_or_null<CXXMethodDecl>(Call.getDecl()); - if (!MethodDecl || !MethodDecl->getParent()) - return {}; - - const auto *RecordDecl = MethodDecl->getParent(); - if (!RecordDecl || !RecordDecl->isInStdNamespace()) +static QualType getInnerPointerType(CheckerContext C, const CXXRecordDecl *RD) { + if (!RD || !RD->isInStdNamespace()) return {}; - const auto *TSD = dyn_cast<ClassTemplateSpecializationDecl>(RecordDecl); + const auto *TSD = dyn_cast<ClassTemplateSpecializationDecl>(RD); if (!TSD) return {}; auto TemplateArgs = TSD->getTemplateArgs().asArray(); - if (TemplateArgs.size() == 0) + if (TemplateArgs.empty()) return {}; auto InnerValueType = TemplateArgs[0].getAsType(); return C.getASTContext().getPointerType(InnerValueType.getCanonicalType()); } +// This is for use with standalone-functions like std::make_unique, +// std::make_unique_for_overwrite, etc. It reads the template parameter and +// returns the pointer type corresponding to it, +static QualType getPointerTypeFromTemplateArg(const CallEvent &Call, + CheckerContext &C) { + const auto *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl()); + if (!FD || !FD->isFunctionTemplateSpecialization()) + return {}; + const auto &TemplateArgs = FD->getTemplateSpecializationArgs()->asArray(); + if (TemplateArgs.size() == 0) + return {}; + auto ValueType = TemplateArgs[0].getAsType(); + return C.getASTContext().getPointerType(ValueType.getCanonicalType()); +} + +// Helper method to get the inner pointer type of specialized smart pointer +// Returns empty type if not found valid inner pointer type. +static QualType getInnerPointerType(const CallEvent &Call, CheckerContext &C) { + const auto *MethodDecl = dyn_cast_or_null<CXXMethodDecl>(Call.getDecl()); + if (!MethodDecl || !MethodDecl->getParent()) + return {}; + + const auto *RecordDecl = MethodDecl->getParent(); + return getInnerPointerType(C, RecordDecl); +} + // Helper method to pretty print region and avoid extra spacing. static void checkAndPrettyPrintRegion(llvm::raw_ostream &OS, const MemRegion *Region) { @@ -175,9 +241,107 @@ bool SmartPtrModeling::isBoolConversionMethod(const CallEvent &Call) const { return CD && CD->getConversionType()->isBooleanType(); } +constexpr llvm::StringLiteral BASIC_OSTREAM_NAMES[] = {"basic_ostream"}; + +bool isStdBasicOstream(const Expr *E) { + const auto *RD = E->getType()->getAsCXXRecordDecl(); + return hasStdClassWithName(RD, BASIC_OSTREAM_NAMES); +} + +static bool isStdFunctionCall(const CallEvent &Call) { + return Call.getDecl() && Call.getDecl()->getDeclContext()->isStdNamespace(); +} + +bool isStdOstreamOperatorCall(const CallEvent &Call) { + if (Call.getNumArgs() != 2 || !isStdFunctionCall(Call)) + return false; + const auto *FC = dyn_cast<SimpleFunctionCall>(&Call); + if (!FC) + return false; + const FunctionDecl *FD = FC->getDecl(); + if (!FD->isOverloadedOperator()) + return false; + const OverloadedOperatorKind OOK = FD->getOverloadedOperator(); + if (OOK != clang::OO_LessLess) + return false; + return isStdSmartPtr(Call.getArgExpr(1)) && + isStdBasicOstream(Call.getArgExpr(0)); +} + +static bool isPotentiallyComparisionOpCall(const CallEvent &Call) { + if (Call.getNumArgs() != 2 || !isStdFunctionCall(Call)) + return false; + return smartptr::isStdSmartPtr(Call.getArgExpr(0)) || + smartptr::isStdSmartPtr(Call.getArgExpr(1)); +} + bool SmartPtrModeling::evalCall(const CallEvent &Call, CheckerContext &C) const { + ProgramStateRef State = C.getState(); + + // If any one of the arg is a unique_ptr, then + // we can try this function + if (ModelSmartPtrDereference && isPotentiallyComparisionOpCall(Call)) + if (handleComparisionOp(Call, C)) + return true; + + if (ModelSmartPtrDereference && isStdOstreamOperatorCall(Call)) + return handleOstreamOperator(Call, C); + + if (Call.isCalled(StdSwapCall)) { + // Check the first arg, if it is of std::unique_ptr type. + assert(Call.getNumArgs() == 2 && "std::swap should have two arguments"); + const Expr *FirstArg = Call.getArgExpr(0); + if (!smartptr::isStdSmartPtr(FirstArg->getType()->getAsCXXRecordDecl())) + return false; + return handleSwap(State, Call.getArgSVal(0), Call.getArgSVal(1), C); + } + + if (Call.isCalled(StdMakeUniqueCall) || + Call.isCalled(StdMakeUniqueForOverwriteCall)) { + if (!ModelSmartPtrDereference) + return false; + + const Optional<SVal> ThisRegionOpt = Call.getReturnValueUnderConstruction(); + if (!ThisRegionOpt) + return false; + + const auto PtrVal = C.getSValBuilder().getConjuredHeapSymbolVal( + Call.getOriginExpr(), C.getLocationContext(), + getPointerTypeFromTemplateArg(Call, C), C.blockCount()); + + const MemRegion *ThisRegion = ThisRegionOpt->getAsRegion(); + State = State->set<TrackedRegionMap>(ThisRegion, PtrVal); + State = State->assume(PtrVal, true); + + // TODO: ExprEngine should do this for us. + // For a bit more context: + // 1) Why do we need this? Since we are modelling a "function" + // that returns a constructed object we need to store this information in + // the program state. + // + // 2) Why does this work? + // `updateObjectsUnderConstruction` does exactly as it sounds. + // + // 3) How should it look like when moved to the Engine? + // It would be nice if we can just + // pretend we don't need to know about this - ie, completely automatic work. + // However, realistically speaking, I think we would need to "signal" the + // ExprEngine evalCall handler that we are constructing an object with this + // function call (constructors obviously construct, hence can be + // automatically deduced). + auto &Engine = State->getStateManager().getOwningEngine(); + State = Engine.updateObjectsUnderConstruction( + *ThisRegionOpt, nullptr, State, C.getLocationContext(), + Call.getConstructionContext(), {}); + + // We don't leave a note here since it is guaranteed the + // unique_ptr from this call is non-null (hence is safe to de-reference). + C.addTransition(State); + return true; + } + if (!smartptr::isStdSmartPtrCall(Call)) return false; @@ -272,6 +436,108 @@ bool SmartPtrModeling::evalCall(const CallEvent &Call, return C.isDifferent(); } +std::pair<SVal, ProgramStateRef> SmartPtrModeling::retrieveOrConjureInnerPtrVal( + ProgramStateRef State, const MemRegion *ThisRegion, const Expr *E, + QualType Type, CheckerContext &C) const { + const auto *Ptr = State->get<TrackedRegionMap>(ThisRegion); + if (Ptr) + return {*Ptr, State}; + auto Val = C.getSValBuilder().conjureSymbolVal(E, C.getLocationContext(), + Type, C.blockCount()); + State = State->set<TrackedRegionMap>(ThisRegion, Val); + return {Val, State}; +} + +bool SmartPtrModeling::handleComparisionOp(const CallEvent &Call, + CheckerContext &C) const { + const auto *FC = dyn_cast<SimpleFunctionCall>(&Call); + if (!FC) + return false; + const FunctionDecl *FD = FC->getDecl(); + if (!FD->isOverloadedOperator()) + return false; + const OverloadedOperatorKind OOK = FD->getOverloadedOperator(); + if (!(OOK == OO_EqualEqual || OOK == OO_ExclaimEqual || OOK == OO_Less || + OOK == OO_LessEqual || OOK == OO_Greater || OOK == OO_GreaterEqual || + OOK == OO_Spaceship)) + return false; + + // There are some special cases about which we can infer about + // the resulting answer. + // For reference, there is a discussion at https://reviews.llvm.org/D104616. + // Also, the cppreference page is good to look at + // https://en.cppreference.com/w/cpp/memory/unique_ptr/operator_cmp. + + auto makeSValFor = [&C, this](ProgramStateRef State, const Expr *E, + SVal S) -> std::pair<SVal, ProgramStateRef> { + if (S.isZeroConstant()) { + return {S, State}; + } + const MemRegion *Reg = S.getAsRegion(); + assert(Reg && + "this pointer of std::unique_ptr should be obtainable as MemRegion"); + QualType Type = getInnerPointerType(C, E->getType()->getAsCXXRecordDecl()); + return retrieveOrConjureInnerPtrVal(State, Reg, E, Type, C); + }; + + SVal First = Call.getArgSVal(0); + SVal Second = Call.getArgSVal(1); + const auto *FirstExpr = Call.getArgExpr(0); + const auto *SecondExpr = Call.getArgExpr(1); + + const auto *ResultExpr = Call.getOriginExpr(); + const auto *LCtx = C.getLocationContext(); + auto &Bldr = C.getSValBuilder(); + ProgramStateRef State = C.getState(); + + SVal FirstPtrVal, SecondPtrVal; + std::tie(FirstPtrVal, State) = makeSValFor(State, FirstExpr, First); + std::tie(SecondPtrVal, State) = makeSValFor(State, SecondExpr, Second); + BinaryOperatorKind BOK = + operationKindFromOverloadedOperator(OOK, true).GetBinaryOpUnsafe(); + auto RetVal = Bldr.evalBinOp(State, BOK, FirstPtrVal, SecondPtrVal, + Call.getResultType()); + + if (OOK != OO_Spaceship) { + ProgramStateRef TrueState, FalseState; + std::tie(TrueState, FalseState) = + State->assume(*RetVal.getAs<DefinedOrUnknownSVal>()); + if (TrueState) + C.addTransition( + TrueState->BindExpr(ResultExpr, LCtx, Bldr.makeTruthVal(true))); + if (FalseState) + C.addTransition( + FalseState->BindExpr(ResultExpr, LCtx, Bldr.makeTruthVal(false))); + } else { + C.addTransition(State->BindExpr(ResultExpr, LCtx, RetVal)); + } + return true; +} + +bool SmartPtrModeling::handleOstreamOperator(const CallEvent &Call, + CheckerContext &C) const { + // operator<< does not modify the smart pointer. + // And we don't really have much of modelling of basic_ostream. + // So, we are better off: + // 1) Invalidating the mem-region of the ostream object at hand. + // 2) Setting the SVal of the basic_ostream as the return value. + // Not very satisfying, but it gets the job done, and is better + // than the default handling. :) + + ProgramStateRef State = C.getState(); + const auto StreamVal = Call.getArgSVal(0); + const MemRegion *StreamThisRegion = StreamVal.getAsRegion(); + if (!StreamThisRegion) + return false; + State = + State->invalidateRegions({StreamThisRegion}, Call.getOriginExpr(), + C.blockCount(), C.getLocationContext(), false); + State = + State->BindExpr(Call.getOriginExpr(), C.getLocationContext(), StreamVal); + C.addTransition(State); + return true; +} + void SmartPtrModeling::checkDeadSymbols(SymbolReaper &SymReaper, CheckerContext &C) const { ProgramStateRef State = C.getState(); @@ -395,43 +661,52 @@ void SmartPtrModeling::handleRelease(const CallEvent &Call, // pointer. } -void SmartPtrModeling::handleSwap(const CallEvent &Call, - CheckerContext &C) const { +void SmartPtrModeling::handleSwapMethod(const CallEvent &Call, + CheckerContext &C) const { // To model unique_ptr::swap() method. const auto *IC = dyn_cast<CXXInstanceCall>(&Call); if (!IC) return; - const MemRegion *ThisRegion = IC->getCXXThisVal().getAsRegion(); - if (!ThisRegion) - return; + auto State = C.getState(); + handleSwap(State, IC->getCXXThisVal(), Call.getArgSVal(0), C); +} - const auto *ArgRegion = Call.getArgSVal(0).getAsRegion(); - if (!ArgRegion) - return; +bool SmartPtrModeling::handleSwap(ProgramStateRef State, SVal First, + SVal Second, CheckerContext &C) const { + const MemRegion *FirstThisRegion = First.getAsRegion(); + if (!FirstThisRegion) + return false; + const MemRegion *SecondThisRegion = Second.getAsRegion(); + if (!SecondThisRegion) + return false; - auto State = C.getState(); - const auto *ThisRegionInnerPointerVal = - State->get<TrackedRegionMap>(ThisRegion); - const auto *ArgRegionInnerPointerVal = - State->get<TrackedRegionMap>(ArgRegion); + const auto *FirstInnerPtrVal = State->get<TrackedRegionMap>(FirstThisRegion); + const auto *SecondInnerPtrVal = + State->get<TrackedRegionMap>(SecondThisRegion); - // Swap the tracked region values. - State = updateSwappedRegion(State, ThisRegion, ArgRegionInnerPointerVal); - State = updateSwappedRegion(State, ArgRegion, ThisRegionInnerPointerVal); + State = updateSwappedRegion(State, FirstThisRegion, SecondInnerPtrVal); + State = updateSwappedRegion(State, SecondThisRegion, FirstInnerPtrVal); - C.addTransition( - State, C.getNoteTag([ThisRegion, ArgRegion](PathSensitiveBugReport &BR, - llvm::raw_ostream &OS) { - if (&BR.getBugType() != smartptr::getNullDereferenceBugType() || - !BR.isInteresting(ThisRegion)) - return; - BR.markInteresting(ArgRegion); - OS << "Swapped null smart pointer"; - checkAndPrettyPrintRegion(OS, ArgRegion); - OS << " with smart pointer"; - checkAndPrettyPrintRegion(OS, ThisRegion); - })); + C.addTransition(State, C.getNoteTag([FirstThisRegion, SecondThisRegion]( + PathSensitiveBugReport &BR, + llvm::raw_ostream &OS) { + if (&BR.getBugType() != smartptr::getNullDereferenceBugType()) + return; + if (BR.isInteresting(FirstThisRegion) && + !BR.isInteresting(SecondThisRegion)) { + BR.markInteresting(SecondThisRegion); + BR.markNotInteresting(FirstThisRegion); + } + if (BR.isInteresting(SecondThisRegion) && + !BR.isInteresting(FirstThisRegion)) { + BR.markInteresting(FirstThisRegion); + BR.markNotInteresting(SecondThisRegion); + } + // TODO: We need to emit some note here probably!! + })); + + return true; } void SmartPtrModeling::handleGet(const CallEvent &Call, @@ -446,15 +721,8 @@ void SmartPtrModeling::handleGet(const CallEvent &Call, return; SVal InnerPointerVal; - if (const auto *InnerValPtr = State->get<TrackedRegionMap>(ThisRegion)) { - InnerPointerVal = *InnerValPtr; - } else { - const auto *CallExpr = Call.getOriginExpr(); - InnerPointerVal = C.getSValBuilder().conjureSymbolVal( - CallExpr, C.getLocationContext(), Call.getResultType(), C.blockCount()); - State = State->set<TrackedRegionMap>(ThisRegion, InnerPointerVal); - } - + std::tie(InnerPointerVal, State) = retrieveOrConjureInnerPtrVal( + State, ThisRegion, Call.getOriginExpr(), Call.getResultType(), C); State = State->BindExpr(Call.getOriginExpr(), C.getLocationContext(), InnerPointerVal); // TODO: Add NoteTag, for how the raw pointer got using 'get' method. diff --git a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp index d1c366a94fac..e758b465af1b 100644 --- a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp @@ -56,7 +56,11 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicSize.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" + +#include <string> using namespace clang; using namespace clang::ento; @@ -87,6 +91,10 @@ class StdLibraryFunctionsChecker typedef uint32_t ArgNo; static const ArgNo Ret; + /// Returns the string representation of an argument index. + /// E.g.: (1) -> '1st arg', (2) - > '2nd arg' + static SmallString<8> getArgDesc(ArgNo); + class ValueConstraint; // Pointer to the ValueConstraint. We need a copyable, polymorphic and @@ -126,8 +134,24 @@ class StdLibraryFunctionsChecker } ArgNo getArgNo() const { return ArgN; } + // Return those arguments that should be tracked when we report a bug. By + // default it is the argument that is constrained, however, in some special + // cases we need to track other arguments as well. E.g. a buffer size might + // be encoded in another argument. + virtual std::vector<ArgNo> getArgsToTrack() const { return {ArgN}; } + virtual StringRef getName() const = 0; + // Give a description that explains the constraint to the user. Used when + // the bug is reported. + virtual std::string describe(ProgramStateRef State, + const Summary &Summary) const { + // There are some descendant classes that are not used as argument + // constraints, e.g. ComparisonConstraint. In that case we can safely + // ignore the implementation of this function. + llvm_unreachable("Not implemented"); + } + protected: ArgNo ArgN; // Argument to which we apply the constraint. @@ -158,6 +182,9 @@ class StdLibraryFunctionsChecker RangeConstraint(ArgNo ArgN, RangeKind Kind, const IntRangeVector &Ranges) : ValueConstraint(ArgN), Kind(Kind), Ranges(Ranges) {} + std::string describe(ProgramStateRef State, + const Summary &Summary) const override; + const IntRangeVector &getRanges() const { return Ranges; } private: @@ -225,6 +252,8 @@ class StdLibraryFunctionsChecker bool CannotBeNull = true; public: + std::string describe(ProgramStateRef State, + const Summary &Summary) const override; StringRef getName() const override { return "NonNull"; } ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call, const Summary &Summary, @@ -286,6 +315,18 @@ class StdLibraryFunctionsChecker : ValueConstraint(Buffer), SizeArgN(BufSize), SizeMultiplierArgN(BufSizeMultiplier) {} + std::vector<ArgNo> getArgsToTrack() const override { + std::vector<ArgNo> Result{ArgN}; + if (SizeArgN) + Result.push_back(*SizeArgN); + if (SizeMultiplierArgN) + Result.push_back(*SizeMultiplierArgN); + return Result; + } + + std::string describe(ProgramStateRef State, + const Summary &Summary) const override; + ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call, const Summary &Summary, CheckerContext &C) const override { @@ -297,24 +338,22 @@ class StdLibraryFunctionsChecker const SVal SizeV = [this, &State, &Call, &Summary, &SvalBuilder]() { if (ConcreteSize) { return SVal(SvalBuilder.makeIntVal(*ConcreteSize)); - } else if (SizeArgN) { - // The size argument. - SVal SizeV = getArgSVal(Call, *SizeArgN); - // Multiply with another argument if given. - if (SizeMultiplierArgN) { - SVal SizeMulV = getArgSVal(Call, *SizeMultiplierArgN); - SizeV = SvalBuilder.evalBinOp(State, BO_Mul, SizeV, SizeMulV, - Summary.getArgType(*SizeArgN)); - } - return SizeV; - } else { - llvm_unreachable("The constraint must be either a concrete value or " - "encoded in an arguement."); } + assert(SizeArgN && "The constraint must be either a concrete value or " + "encoded in an argument."); + // The size argument. + SVal SizeV = getArgSVal(Call, *SizeArgN); + // Multiply with another argument if given. + if (SizeMultiplierArgN) { + SVal SizeMulV = getArgSVal(Call, *SizeMultiplierArgN); + SizeV = SvalBuilder.evalBinOp(State, BO_Mul, SizeV, SizeMulV, + Summary.getArgType(*SizeArgN)); + } + return SizeV; }(); // The dynamic size of the buffer argument, got from the analyzer engine. - SVal BufDynSize = getDynamicSizeWithOffset(State, BufV); + SVal BufDynSize = getDynamicExtentWithOffset(State, BufV); SVal Feasible = SvalBuilder.evalBinOp(State, Op, SizeV, BufDynSize, SvalBuilder.getContext().BoolTy); @@ -508,6 +547,7 @@ class StdLibraryFunctionsChecker mutable FunctionSummaryMapType FunctionSummaryMap; mutable std::unique_ptr<BugType> BT_InvalidArg; + mutable bool SummariesInitialized = false; static SVal getArgSVal(const CallEvent &Call, ArgNo ArgN) { return ArgN == Ret ? Call.getReturnValue() : Call.getArgSVal(ArgN); @@ -538,24 +578,30 @@ private: void initFunctionSummaries(CheckerContext &C) const; void reportBug(const CallEvent &Call, ExplodedNode *N, - const ValueConstraint *VC, CheckerContext &C) const { + const ValueConstraint *VC, const Summary &Summary, + CheckerContext &C) const { if (!ChecksEnabled[CK_StdCLibraryFunctionArgsChecker]) return; - // TODO Add more detailed diagnostic. std::string Msg = (Twine("Function argument constraint is not satisfied, constraint: ") + - VC->getName().data() + ", ArgN: " + Twine(VC->getArgNo())) + VC->getName().data()) .str(); if (!BT_InvalidArg) BT_InvalidArg = std::make_unique<BugType>( CheckNames[CK_StdCLibraryFunctionArgsChecker], "Unsatisfied argument constraints", categories::LogicError); auto R = std::make_unique<PathSensitiveBugReport>(*BT_InvalidArg, Msg, N); - bugreporter::trackExpressionValue(N, Call.getArgExpr(VC->getArgNo()), *R); + + for (ArgNo ArgN : VC->getArgsToTrack()) + bugreporter::trackExpressionValue(N, Call.getArgExpr(ArgN), *R); // Highlight the range of the argument that was violated. R->addRange(Call.getArgSourceRange(VC->getArgNo())); + // Describe the argument constraint in a note. + R->addNote(VC->describe(C.getState(), Summary), R->getLocation(), + Call.getArgSourceRange(VC->getArgNo())); + C.emitReport(std::move(R)); } }; @@ -565,6 +611,85 @@ const StdLibraryFunctionsChecker::ArgNo StdLibraryFunctionsChecker::Ret = } // end of anonymous namespace +static BasicValueFactory &getBVF(ProgramStateRef State) { + ProgramStateManager &Mgr = State->getStateManager(); + SValBuilder &SVB = Mgr.getSValBuilder(); + return SVB.getBasicValueFactory(); +} + +std::string StdLibraryFunctionsChecker::NotNullConstraint::describe( + ProgramStateRef State, const Summary &Summary) const { + SmallString<48> Result; + Result += "The "; + Result += getArgDesc(ArgN); + Result += " should not be NULL"; + return Result.c_str(); +} + +std::string StdLibraryFunctionsChecker::RangeConstraint::describe( + ProgramStateRef State, const Summary &Summary) const { + + BasicValueFactory &BVF = getBVF(State); + + QualType T = Summary.getArgType(getArgNo()); + SmallString<48> Result; + Result += "The "; + Result += getArgDesc(ArgN); + Result += " should be "; + + // Range kind as a string. + Kind == OutOfRange ? Result += "out of" : Result += "within"; + + // Get the range values as a string. + Result += " the range "; + if (Ranges.size() > 1) + Result += "["; + unsigned I = Ranges.size(); + for (const std::pair<RangeInt, RangeInt> &R : Ranges) { + Result += "["; + const llvm::APSInt &Min = BVF.getValue(R.first, T); + const llvm::APSInt &Max = BVF.getValue(R.second, T); + Min.toString(Result); + Result += ", "; + Max.toString(Result); + Result += "]"; + if (--I > 0) + Result += ", "; + } + if (Ranges.size() > 1) + Result += "]"; + + return Result.c_str(); +} + +SmallString<8> +StdLibraryFunctionsChecker::getArgDesc(StdLibraryFunctionsChecker::ArgNo ArgN) { + SmallString<8> Result; + Result += std::to_string(ArgN + 1); + Result += llvm::getOrdinalSuffix(ArgN + 1); + Result += " arg"; + return Result; +} + +std::string StdLibraryFunctionsChecker::BufferSizeConstraint::describe( + ProgramStateRef State, const Summary &Summary) const { + SmallString<96> Result; + Result += "The size of the "; + Result += getArgDesc(ArgN); + Result += " should be equal to or less than the value of "; + if (ConcreteSize) { + ConcreteSize->toString(Result); + } else if (SizeArgN) { + Result += "the "; + Result += getArgDesc(*SizeArgN); + if (SizeMultiplierArgN) { + Result += " times the "; + Result += getArgDesc(*SizeMultiplierArgN); + } + } + return Result.c_str(); +} + ProgramStateRef StdLibraryFunctionsChecker::RangeConstraint::applyAsOutOfRange( ProgramStateRef State, const CallEvent &Call, const Summary &Summary) const { @@ -692,7 +817,7 @@ void StdLibraryFunctionsChecker::checkPreCall(const CallEvent &Call, // The argument constraint is not satisfied. if (FailureSt && !SuccessSt) { if (ExplodedNode *N = C.generateErrorNode(NewState)) - reportBug(Call, N, Constraint.get(), C); + reportBug(Call, N, Constraint.get(), Summary, C); break; } else { // We will apply the constraint even if we cannot reason about the @@ -823,7 +948,7 @@ StdLibraryFunctionsChecker::findFunctionSummary(const CallEvent &Call, void StdLibraryFunctionsChecker::initFunctionSummaries( CheckerContext &C) const { - if (!FunctionSummaryMap.empty()) + if (SummariesInitialized) return; SValBuilder &SVB = C.getSValBuilder(); @@ -841,7 +966,7 @@ void StdLibraryFunctionsChecker::initFunctionSummaries( llvm::Optional<QualType> operator()(StringRef Name) { IdentifierInfo &II = ACtx.Idents.get(Name); auto LookupRes = ACtx.getTranslationUnitDecl()->lookup(&II); - if (LookupRes.size() == 0) + if (LookupRes.empty()) return None; // Prioritze typedef declarations. @@ -993,7 +1118,7 @@ void StdLibraryFunctionsChecker::initFunctionSummaries( return false; IdentifierInfo &II = ACtx.Idents.get(Name); auto LookupRes = ACtx.getTranslationUnitDecl()->lookup(&II); - if (LookupRes.size() == 0) + if (LookupRes.empty()) return false; for (Decl *D : LookupRes) { if (auto *FD = dyn_cast<FunctionDecl>(D)) { @@ -2441,6 +2566,35 @@ void StdLibraryFunctionsChecker::initFunctionSummaries( // Functions for testing. if (ChecksEnabled[CK_StdCLibraryFunctionsTesterChecker]) { addToFunctionSummaryMap( + "__not_null", Signature(ArgTypes{IntPtrTy}, RetType{IntTy}), + Summary(EvalCallAsPure).ArgConstraint(NotNull(ArgNo(0)))); + + // Test range values. + addToFunctionSummaryMap( + "__single_val_1", Signature(ArgTypes{IntTy}, RetType{IntTy}), + Summary(EvalCallAsPure) + .ArgConstraint(ArgumentCondition(0U, WithinRange, SingleValue(1)))); + addToFunctionSummaryMap( + "__range_1_2", Signature(ArgTypes{IntTy}, RetType{IntTy}), + Summary(EvalCallAsPure) + .ArgConstraint(ArgumentCondition(0U, WithinRange, Range(1, 2)))); + addToFunctionSummaryMap("__range_1_2__4_5", + Signature(ArgTypes{IntTy}, RetType{IntTy}), + Summary(EvalCallAsPure) + .ArgConstraint(ArgumentCondition( + 0U, WithinRange, Range({1, 2}, {4, 5})))); + + // Test range kind. + addToFunctionSummaryMap( + "__within", Signature(ArgTypes{IntTy}, RetType{IntTy}), + Summary(EvalCallAsPure) + .ArgConstraint(ArgumentCondition(0U, WithinRange, SingleValue(1)))); + addToFunctionSummaryMap( + "__out_of", Signature(ArgTypes{IntTy}, RetType{IntTy}), + Summary(EvalCallAsPure) + .ArgConstraint(ArgumentCondition(0U, OutOfRange, SingleValue(1)))); + + addToFunctionSummaryMap( "__two_constrained_args", Signature(ArgTypes{IntTy, IntTy}, RetType{IntTy}), Summary(EvalCallAsPure) @@ -2485,6 +2639,8 @@ void StdLibraryFunctionsChecker::initFunctionSummaries( Signature(ArgTypes{VoidPtrRestrictTy}, RetType{VoidTy}), Summary(EvalCallAsPure)); } + + SummariesInitialized = true; } void ento::registerStdCLibraryFunctionsChecker(CheckerManager &mgr) { diff --git a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp index 6b176b3c4e2b..dd65f8c035aa 100644 --- a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp @@ -25,6 +25,10 @@ using namespace clang; using namespace ento; using namespace std::placeholders; +//===----------------------------------------------------------------------===// +// Definition of state data structures. +//===----------------------------------------------------------------------===// + namespace { struct FnDescription; @@ -146,6 +150,14 @@ struct StreamState { } }; +} // namespace + +//===----------------------------------------------------------------------===// +// StreamChecker class and utility functions. +//===----------------------------------------------------------------------===// + +namespace { + class StreamChecker; using FnCheck = std::function<void(const StreamChecker *, const FnDescription *, const CallEvent &, CheckerContext &)>; @@ -219,6 +231,8 @@ public: /// If true, evaluate special testing stream functions. bool TestMode = false; + const BugType *getBT_StreamEof() const { return &BT_StreamEof; } + private: CallDescriptionMap<FnDescription> FnDescriptions = { {{"fopen"}, {nullptr, &StreamChecker::evalFopen, ArgNone}}, @@ -306,7 +320,8 @@ private: /// If it can only be NULL a fatal error is emitted and nullptr returned. /// Otherwise the return value is a new state where the stream is constrained /// to be non-null. - ProgramStateRef ensureStreamNonNull(SVal StreamVal, CheckerContext &C, + ProgramStateRef ensureStreamNonNull(SVal StreamVal, const Expr *StreamE, + CheckerContext &C, ProgramStateRef State) const; /// Check that the stream is the opened state. @@ -336,7 +351,8 @@ private: /// There will be always a state transition into the passed State, /// by the new non-fatal error node or (if failed) a normal transition, /// to ensure uniform handling. - void reportFEofWarning(CheckerContext &C, ProgramStateRef State) const; + void reportFEofWarning(SymbolRef StreamSym, CheckerContext &C, + ProgramStateRef State) const; /// Emit resource leak warnings for the given symbols. /// Createn a non-fatal error node for these, and returns it (if any warnings @@ -362,14 +378,14 @@ private: /// Generate a message for BugReporterVisitor if the stored symbol is /// marked as interesting by the actual bug report. + // FIXME: Use lambda instead. struct NoteFn { - const CheckerNameRef CheckerName; + const BugType *BT_ResourceLeak; SymbolRef StreamSym; std::string Message; std::string operator()(PathSensitiveBugReport &BR) const { - if (BR.isInteresting(StreamSym) && - CheckerName == BR.getBugType().getCheckerName()) + if (BR.isInteresting(StreamSym) && &BR.getBugType() == BT_ResourceLeak) return Message; return ""; @@ -378,7 +394,20 @@ private: const NoteTag *constructNoteTag(CheckerContext &C, SymbolRef StreamSym, const std::string &Message) const { - return C.getNoteTag(NoteFn{getCheckerName(), StreamSym, Message}); + return C.getNoteTag(NoteFn{&BT_ResourceLeak, StreamSym, Message}); + } + + const NoteTag *constructSetEofNoteTag(CheckerContext &C, + SymbolRef StreamSym) const { + return C.getNoteTag([this, StreamSym](PathSensitiveBugReport &BR) { + if (!BR.isInteresting(StreamSym) || + &BR.getBugType() != this->getBT_StreamEof()) + return ""; + + BR.markNotInteresting(StreamSym); + + return "Assuming stream reaches end-of-file here"; + }); } /// Searches for the ExplodedNode where the file descriptor was acquired for @@ -390,6 +419,9 @@ private: } // end anonymous namespace +// This map holds the state of a stream. +// The stream is identified with a SymbolRef that is created when a stream +// opening function is modeled by the checker. REGISTER_MAP_WITH_PROGRAMSTATE(StreamMap, SymbolRef, StreamState) inline void assertStreamStateOpened(const StreamState *SS) { @@ -418,6 +450,10 @@ const ExplodedNode *StreamChecker::getAcquisitionSite(const ExplodedNode *N, return nullptr; } +//===----------------------------------------------------------------------===// +// Methods of StreamChecker. +//===----------------------------------------------------------------------===// + void StreamChecker::checkPreCall(const CallEvent &Call, CheckerContext &C) const { const FnDescription *Desc = lookupFn(Call); @@ -472,7 +508,8 @@ void StreamChecker::preFreopen(const FnDescription *Desc, const CallEvent &Call, CheckerContext &C) const { // Do not allow NULL as passed stream pointer but allow a closed stream. ProgramStateRef State = C.getState(); - State = ensureStreamNonNull(getStreamArg(Desc, Call), C, State); + State = ensureStreamNonNull(getStreamArg(Desc, Call), + Call.getArgExpr(Desc->StreamArgNo), C, State); if (!State) return; @@ -549,7 +586,8 @@ void StreamChecker::preFread(const FnDescription *Desc, const CallEvent &Call, CheckerContext &C) const { ProgramStateRef State = C.getState(); SVal StreamVal = getStreamArg(Desc, Call); - State = ensureStreamNonNull(StreamVal, C, State); + State = ensureStreamNonNull(StreamVal, Call.getArgExpr(Desc->StreamArgNo), C, + State); if (!State) return; State = ensureStreamOpened(StreamVal, C, State); @@ -563,7 +601,7 @@ void StreamChecker::preFread(const FnDescription *Desc, const CallEvent &Call, if (Sym && State->get<StreamMap>(Sym)) { const StreamState *SS = State->get<StreamMap>(Sym); if (SS->ErrorState & ErrorFEof) - reportFEofWarning(C, State); + reportFEofWarning(Sym, C, State); } else { C.addTransition(State); } @@ -573,7 +611,8 @@ void StreamChecker::preFwrite(const FnDescription *Desc, const CallEvent &Call, CheckerContext &C) const { ProgramStateRef State = C.getState(); SVal StreamVal = getStreamArg(Desc, Call); - State = ensureStreamNonNull(StreamVal, C, State); + State = ensureStreamNonNull(StreamVal, Call.getArgExpr(Desc->StreamArgNo), C, + State); if (!State) return; State = ensureStreamOpened(StreamVal, C, State); @@ -605,11 +644,11 @@ void StreamChecker::evalFreadFwrite(const FnDescription *Desc, if (!NMembVal) return; - const StreamState *SS = State->get<StreamMap>(StreamSym); - if (!SS) + const StreamState *OldSS = State->get<StreamMap>(StreamSym); + if (!OldSS) return; - assertStreamStateOpened(SS); + assertStreamStateOpened(OldSS); // C'99 standard, §7.19.8.1.3, the return value of fread: // The fread function returns the number of elements successfully read, which @@ -628,7 +667,7 @@ void StreamChecker::evalFreadFwrite(const FnDescription *Desc, // Generate a transition for the success state. // If we know the state to be FEOF at fread, do not add a success state. - if (!IsFread || (SS->ErrorState != ErrorFEof)) { + if (!IsFread || (OldSS->ErrorState != ErrorFEof)) { ProgramStateRef StateNotFailed = State->BindExpr(CE, C.getLocationContext(), *NMembVal); if (StateNotFailed) { @@ -657,21 +696,26 @@ void StreamChecker::evalFreadFwrite(const FnDescription *Desc, StreamErrorState NewES; if (IsFread) - NewES = (SS->ErrorState == ErrorFEof) ? ErrorFEof : ErrorFEof | ErrorFError; + NewES = + (OldSS->ErrorState == ErrorFEof) ? ErrorFEof : ErrorFEof | ErrorFError; else NewES = ErrorFError; // If a (non-EOF) error occurs, the resulting value of the file position // indicator for the stream is indeterminate. - StreamState NewState = StreamState::getOpened(Desc, NewES, !NewES.isFEof()); - StateFailed = StateFailed->set<StreamMap>(StreamSym, NewState); - C.addTransition(StateFailed); + StreamState NewSS = StreamState::getOpened(Desc, NewES, !NewES.isFEof()); + StateFailed = StateFailed->set<StreamMap>(StreamSym, NewSS); + if (IsFread && OldSS->ErrorState != ErrorFEof) + C.addTransition(StateFailed, constructSetEofNoteTag(C, StreamSym)); + else + C.addTransition(StateFailed); } void StreamChecker::preFseek(const FnDescription *Desc, const CallEvent &Call, CheckerContext &C) const { ProgramStateRef State = C.getState(); SVal StreamVal = getStreamArg(Desc, Call); - State = ensureStreamNonNull(StreamVal, C, State); + State = ensureStreamNonNull(StreamVal, Call.getArgExpr(Desc->StreamArgNo), C, + State); if (!State) return; State = ensureStreamOpened(StreamVal, C, State); @@ -722,7 +766,7 @@ void StreamChecker::evalFseek(const FnDescription *Desc, const CallEvent &Call, StreamState::getOpened(Desc, ErrorNone | ErrorFEof | ErrorFError, true)); C.addTransition(StateNotFailed); - C.addTransition(StateFailed); + C.addTransition(StateFailed, constructSetEofNoteTag(C, StreamSym)); } void StreamChecker::evalClearerr(const FnDescription *Desc, @@ -790,7 +834,8 @@ void StreamChecker::preDefault(const FnDescription *Desc, const CallEvent &Call, CheckerContext &C) const { ProgramStateRef State = C.getState(); SVal StreamVal = getStreamArg(Desc, Call); - State = ensureStreamNonNull(StreamVal, C, State); + State = ensureStreamNonNull(StreamVal, Call.getArgExpr(Desc->StreamArgNo), C, + State); if (!State) return; State = ensureStreamOpened(StreamVal, C, State); @@ -814,7 +859,8 @@ void StreamChecker::evalSetFeofFerror(const FnDescription *Desc, } ProgramStateRef -StreamChecker::ensureStreamNonNull(SVal StreamVal, CheckerContext &C, +StreamChecker::ensureStreamNonNull(SVal StreamVal, const Expr *StreamE, + CheckerContext &C, ProgramStateRef State) const { auto Stream = StreamVal.getAs<DefinedSVal>(); if (!Stream) @@ -827,8 +873,11 @@ StreamChecker::ensureStreamNonNull(SVal StreamVal, CheckerContext &C, if (!StateNotNull && StateNull) { if (ExplodedNode *N = C.generateErrorNode(StateNull)) { - C.emitReport(std::make_unique<PathSensitiveBugReport>( - BT_FileNull, "Stream pointer might be NULL.", N)); + auto R = std::make_unique<PathSensitiveBugReport>( + BT_FileNull, "Stream pointer might be NULL.", N); + if (StreamE) + bugreporter::trackExpressionValue(N, StreamE, *R); + C.emitReport(std::move(R)); } return nullptr; } @@ -950,14 +999,16 @@ StreamChecker::ensureFseekWhenceCorrect(SVal WhenceVal, CheckerContext &C, return State; } -void StreamChecker::reportFEofWarning(CheckerContext &C, +void StreamChecker::reportFEofWarning(SymbolRef StreamSym, CheckerContext &C, ProgramStateRef State) const { if (ExplodedNode *N = C.generateNonFatalErrorNode(State)) { - C.emitReport(std::make_unique<PathSensitiveBugReport>( + auto R = std::make_unique<PathSensitiveBugReport>( BT_StreamEof, "Read function called when stream is in EOF state. " "Function has no effect.", - N)); + N); + R->markInteresting(StreamSym); + C.emitReport(std::move(R)); return; } C.addTransition(State); @@ -1048,6 +1099,10 @@ ProgramStateRef StreamChecker::checkPointerEscape( return State; } +//===----------------------------------------------------------------------===// +// Checker registration. +//===----------------------------------------------------------------------===// + void ento::registerStreamChecker(CheckerManager &Mgr) { Mgr.registerChecker<StreamChecker>(); } @@ -1063,4 +1118,4 @@ void ento::registerStreamTesterChecker(CheckerManager &Mgr) { bool ento::shouldRegisterStreamTesterChecker(const CheckerManager &Mgr) { return true; -} +}
\ No newline at end of file diff --git a/clang/lib/StaticAnalyzer/Checkers/UndefCapturedBlockVarChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UndefCapturedBlockVarChecker.cpp index e457513d8de4..816a547cadc3 100644 --- a/clang/lib/StaticAnalyzer/Checkers/UndefCapturedBlockVarChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/UndefCapturedBlockVarChecker.cpp @@ -86,9 +86,9 @@ UndefCapturedBlockVarChecker::checkPostStmt(const BlockExpr *BE, auto R = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), N); if (const Expr *Ex = FindBlockDeclRefExpr(BE->getBody(), VD)) R->addRange(Ex->getSourceRange()); - R->addVisitor(std::make_unique<FindLastStoreBRVisitor>( - *V, VR, /*EnableNullFPSuppression*/ false, - bugreporter::TrackingKind::Thorough)); + bugreporter::trackStoredValue(*V, VR, *R, + {bugreporter::TrackingKind::Thorough, + /*EnableNullFPSuppression*/ false}); R->disablePathPruning(); // need location of block C.emitReport(std::move(R)); diff --git a/clang/lib/StaticAnalyzer/Checkers/UndefResultChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UndefResultChecker.cpp index 392da4818098..477d910bc653 100644 --- a/clang/lib/StaticAnalyzer/Checkers/UndefResultChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/UndefResultChecker.cpp @@ -16,7 +16,7 @@ #include "clang/StaticAnalyzer/Core/Checker.h" #include "clang/StaticAnalyzer/Core/CheckerManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicSize.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/raw_ostream.h" diff --git a/clang/lib/StaticAnalyzer/Checkers/UnreachableCodeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UnreachableCodeChecker.cpp index 74eec81ffb3e..d231be64c2e1 100644 --- a/clang/lib/StaticAnalyzer/Checkers/UnreachableCodeChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/UnreachableCodeChecker.cpp @@ -169,7 +169,7 @@ void UnreachableCodeChecker::checkEndAnalysis(ExplodedGraph &G, if (SM.isInSystemHeader(SL) || SM.isInExternCSystemHeader(SL)) continue; - B.EmitBasicReport(D, this, "Unreachable code", "Dead code", + B.EmitBasicReport(D, this, "Unreachable code", categories::UnusedCode, "This statement is never executed", DL, SR); } } diff --git a/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp index d76b2a06aba5..96501215c689 100644 --- a/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp @@ -20,7 +20,7 @@ #include "clang/StaticAnalyzer/Core/Checker.h" #include "clang/StaticAnalyzer/Core/CheckerManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicSize.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/raw_ostream.h" @@ -285,21 +285,11 @@ void VLASizeChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const { return; } - // VLASizeChecker is responsible for defining the extent of the array being - // declared. We do this by multiplying the array length by the element size, - // then matching that with the array region's extent symbol. - + // VLASizeChecker is responsible for defining the extent of the array. if (VD) { - // Assume that the array's size matches the region size. - const LocationContext *LC = C.getLocationContext(); - DefinedOrUnknownSVal DynSize = - getDynamicSize(State, State->getRegion(VD, LC), SVB); - - DefinedOrUnknownSVal SizeIsKnown = SVB.evalEQ(State, DynSize, *ArraySizeNL); - State = State->assume(SizeIsKnown, true); - - // Assume should not fail at this point. - assert(State); + State = + setDynamicExtent(State, State->getRegion(VD, C.getLocationContext()), + ArraySize.castAs<DefinedOrUnknownSVal>(), SVB); } // Remember our assumptions! diff --git a/clang/lib/StaticAnalyzer/Core/BasicValueFactory.cpp b/clang/lib/StaticAnalyzer/Core/BasicValueFactory.cpp index d1f5ac02278f..40cdaef1bfa7 100644 --- a/clang/lib/StaticAnalyzer/Core/BasicValueFactory.cpp +++ b/clang/lib/StaticAnalyzer/Core/BasicValueFactory.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/ImmutableList.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include <cassert> #include <cstdint> #include <utility> @@ -176,28 +177,73 @@ const PointerToMemberData *BasicValueFactory::getPointerToMemberData( return D; } +LLVM_ATTRIBUTE_UNUSED bool hasNoRepeatedElements( + llvm::ImmutableList<const CXXBaseSpecifier *> BaseSpecList) { + llvm::SmallPtrSet<QualType, 16> BaseSpecSeen; + for (const CXXBaseSpecifier *BaseSpec : BaseSpecList) { + QualType BaseType = BaseSpec->getType(); + // Check whether inserted + if (!BaseSpecSeen.insert(BaseType).second) + return false; + } + return true; +} + const PointerToMemberData *BasicValueFactory::accumCXXBase( llvm::iterator_range<CastExpr::path_const_iterator> PathRange, - const nonloc::PointerToMember &PTM) { + const nonloc::PointerToMember &PTM, const CastKind &kind) { + assert((kind == CK_DerivedToBaseMemberPointer || + kind == CK_BaseToDerivedMemberPointer || + kind == CK_ReinterpretMemberPointer) && + "accumCXXBase called with wrong CastKind"); nonloc::PointerToMember::PTMDataType PTMDT = PTM.getPTMData(); const NamedDecl *ND = nullptr; - llvm::ImmutableList<const CXXBaseSpecifier *> PathList; + llvm::ImmutableList<const CXXBaseSpecifier *> BaseSpecList; if (PTMDT.isNull() || PTMDT.is<const NamedDecl *>()) { if (PTMDT.is<const NamedDecl *>()) ND = PTMDT.get<const NamedDecl *>(); - PathList = CXXBaseListFactory.getEmptyList(); - } else { // const PointerToMemberData * + BaseSpecList = CXXBaseListFactory.getEmptyList(); + } else { const PointerToMemberData *PTMD = PTMDT.get<const PointerToMemberData *>(); ND = PTMD->getDeclaratorDecl(); - PathList = PTMD->getCXXBaseList(); + BaseSpecList = PTMD->getCXXBaseList(); } - for (const auto &I : llvm::reverse(PathRange)) - PathList = prependCXXBase(I, PathList); - return getPointerToMemberData(ND, PathList); + assert(hasNoRepeatedElements(BaseSpecList) && + "CXXBaseSpecifier list of PointerToMemberData must not have repeated " + "elements"); + + if (kind == CK_DerivedToBaseMemberPointer) { + // Here we pop off matching CXXBaseSpecifiers from BaseSpecList. + // Because, CK_DerivedToBaseMemberPointer comes from a static_cast and + // serves to remove a matching implicit cast. Note that static_cast's that + // are no-ops do not count since they produce an empty PathRange, a nice + // thing about Clang AST. + + // Now we know that there are no repetitions in BaseSpecList. + // So, popping the first element from it corresponding to each element in + // PathRange is equivalent to only including elements that are in + // BaseSpecList but not it PathRange + auto ReducedBaseSpecList = CXXBaseListFactory.getEmptyList(); + for (const CXXBaseSpecifier *BaseSpec : BaseSpecList) { + auto IsSameAsBaseSpec = [&BaseSpec](const CXXBaseSpecifier *I) -> bool { + return BaseSpec->getType() == I->getType(); + }; + if (llvm::none_of(PathRange, IsSameAsBaseSpec)) + ReducedBaseSpecList = + CXXBaseListFactory.add(BaseSpec, ReducedBaseSpecList); + } + + return getPointerToMemberData(ND, ReducedBaseSpecList); + } + // FIXME: Reinterpret casts on member-pointers are not handled properly by + // this code + for (const CXXBaseSpecifier *I : llvm::reverse(PathRange)) + BaseSpecList = prependCXXBase(I, BaseSpecList); + return getPointerToMemberData(ND, BaseSpecList); } const llvm::APSInt* diff --git a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp index bf38891b370a..d6f69ae03afe 100644 --- a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp +++ b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp @@ -1988,14 +1988,6 @@ PathDiagnosticBuilder::generate(const PathDiagnosticConsumer *PDC) const { const SourceManager &SM = getSourceManager(); const AnalyzerOptions &Opts = getAnalyzerOptions(); - StringRef ErrorTag = ErrorNode->getLocation().getTag()->getTagDescription(); - - // See whether we need to silence the checker/package. - // FIXME: This will not work if the report was emitted with an incorrect tag. - for (const std::string &CheckerOrPackage : Opts.SilencedCheckersAndPackages) { - if (ErrorTag.startswith(CheckerOrPackage)) - return nullptr; - } if (!PDC->shouldGenerateDiagnostics()) return generateEmptyDiagnosticForReport(R, getSourceManager()); @@ -2257,10 +2249,24 @@ void PathSensitiveBugReport::markInteresting(SymbolRef sym, insertToInterestingnessMap(InterestingSymbols, sym, TKind); + // FIXME: No tests exist for this code and it is questionable: + // How to handle multiple metadata for the same region? if (const auto *meta = dyn_cast<SymbolMetadata>(sym)) markInteresting(meta->getRegion(), TKind); } +void PathSensitiveBugReport::markNotInteresting(SymbolRef sym) { + if (!sym) + return; + InterestingSymbols.erase(sym); + + // The metadata part of markInteresting is not reversed here. + // Just making the same region not interesting is incorrect + // in specific cases. + if (const auto *meta = dyn_cast<SymbolMetadata>(sym)) + markNotInteresting(meta->getRegion()); +} + void PathSensitiveBugReport::markInteresting(const MemRegion *R, bugreporter::TrackingKind TKind) { if (!R) @@ -2273,6 +2279,17 @@ void PathSensitiveBugReport::markInteresting(const MemRegion *R, markInteresting(SR->getSymbol(), TKind); } +void PathSensitiveBugReport::markNotInteresting(const MemRegion *R) { + if (!R) + return; + + R = R->getBaseRegion(); + InterestingRegions.erase(R); + + if (const auto *SR = dyn_cast<SymbolicRegion>(R)) + markNotInteresting(SR->getSymbol()); +} + void PathSensitiveBugReport::markInteresting(SVal V, bugreporter::TrackingKind TKind) { markInteresting(V.getAsRegion(), TKind); @@ -2738,8 +2755,8 @@ static void CompactMacroExpandedPieces(PathPieces &path, } /// Generate notes from all visitors. -/// Notes associated with {@code ErrorNode} are generated using -/// {@code getEndPath}, and the rest are generated with {@code VisitNode}. +/// Notes associated with @c ErrorNode are generated using +/// @c getEndPath, and the rest are generated with @c VisitNode. static std::unique_ptr<VisitorsDiagnosticsTy> generateVisitorsDiagnostics(PathSensitiveBugReport *R, const ExplodedNode *ErrorNode, @@ -2749,7 +2766,7 @@ generateVisitorsDiagnostics(PathSensitiveBugReport *R, PathSensitiveBugReport::VisitorList visitors; // Run visitors on all nodes starting from the node *before* the last one. - // The last node is reserved for notes generated with {@code getEndPath}. + // The last node is reserved for notes generated with @c getEndPath. const ExplodedNode *NextNode = ErrorNode->getFirstPred(); while (NextNode) { @@ -2811,12 +2828,12 @@ Optional<PathDiagnosticBuilder> PathDiagnosticBuilder::findValidReport( // Register refutation visitors first, if they mark the bug invalid no // further analysis is required - R->addVisitor(std::make_unique<LikelyFalsePositiveSuppressionBRVisitor>()); + R->addVisitor<LikelyFalsePositiveSuppressionBRVisitor>(); // Register additional node visitors. - R->addVisitor(std::make_unique<NilReceiverBRVisitor>()); - R->addVisitor(std::make_unique<ConditionBRVisitor>()); - R->addVisitor(std::make_unique<TagVisitor>()); + R->addVisitor<NilReceiverBRVisitor>(); + R->addVisitor<ConditionBRVisitor>(); + R->addVisitor<TagVisitor>(); BugReporterContext BRC(Reporter); @@ -2829,7 +2846,7 @@ Optional<PathDiagnosticBuilder> PathDiagnosticBuilder::findValidReport( // If crosscheck is enabled, remove all visitors, add the refutation // visitor and check again R->clearVisitors(); - R->addVisitor(std::make_unique<FalsePositiveRefutationBRVisitor>()); + R->addVisitor<FalsePositiveRefutationBRVisitor>(); // We don't overwrite the notes inserted by other visitors because the // refutation manager does not add any new note to the path @@ -3041,6 +3058,14 @@ void BugReporter::FlushReport(BugReportEquivClass& EQ) { if (!report) return; + // See whether we need to silence the checker/package. + for (const std::string &CheckerOrPackage : + getAnalyzerOptions().SilencedCheckersAndPackages) { + if (report->getBugType().getCheckerName().startswith( + CheckerOrPackage)) + return; + } + ArrayRef<PathDiagnosticConsumer*> Consumers = getPathDiagnosticConsumers(); std::unique_ptr<DiagnosticForConsumerMapTy> Diagnostics = generateDiagnosticForConsumerMap(report, Consumers, bugReports); diff --git a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp index bc72f4f8c1e3..d06a2d493303 100644 --- a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp +++ b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp @@ -65,6 +65,7 @@ using namespace clang; using namespace ento; +using namespace bugreporter; //===----------------------------------------------------------------------===// // Utility functions. @@ -153,6 +154,28 @@ const Expr *bugreporter::getDerefExpr(const Stmt *S) { return E; } +static const MemRegion * +getLocationRegionIfReference(const Expr *E, const ExplodedNode *N, + bool LookingForReference = true) { + if (const auto *DR = dyn_cast<DeclRefExpr>(E)) { + if (const auto *VD = dyn_cast<VarDecl>(DR->getDecl())) { + if (LookingForReference && !VD->getType()->isReferenceType()) + return nullptr; + return N->getState() + ->getLValue(VD, N->getLocationContext()) + .getAsRegion(); + } + } + + // FIXME: This does not handle other kinds of null references, + // for example, references from FieldRegions: + // struct Wrapper { int &ref; }; + // Wrapper w = { *(int *)0 }; + // w.ref = 1; + + return nullptr; +} + /// Comparing internal representations of symbolic values (via /// SVal::operator==()) is a valid way to check if the value was updated, /// unless it's a LazyCompoundVal that may have a different internal @@ -830,10 +853,10 @@ public: bool EnableNullFPSuppression, PathSensitiveBugReport &BR, const SVal V) { AnalyzerOptions &Options = N->getState()->getAnalysisManager().options; - if (EnableNullFPSuppression && - Options.ShouldSuppressNullReturnPaths && V.getAs<Loc>()) - BR.addVisitor(std::make_unique<MacroNullReturnSuppressionVisitor>( - R->getAs<SubRegion>(), V)); + if (EnableNullFPSuppression && Options.ShouldSuppressNullReturnPaths && + V.getAs<Loc>()) + BR.addVisitor<MacroNullReturnSuppressionVisitor>(R->getAs<SubRegion>(), + V); } void* getTag() const { @@ -883,7 +906,7 @@ namespace { /// /// This visitor is intended to be used when another visitor discovers that an /// interesting value comes from an inlined function call. -class ReturnVisitor : public BugReporterVisitor { +class ReturnVisitor : public TrackingBugReporterVisitor { const StackFrameContext *CalleeSFC; enum { Initial, @@ -897,10 +920,11 @@ class ReturnVisitor : public BugReporterVisitor { bugreporter::TrackingKind TKind; public: - ReturnVisitor(const StackFrameContext *Frame, bool Suppressed, - AnalyzerOptions &Options, bugreporter::TrackingKind TKind) - : CalleeSFC(Frame), EnableNullFPSuppression(Suppressed), - Options(Options), TKind(TKind) {} + ReturnVisitor(TrackerRef ParentTracker, const StackFrameContext *Frame, + bool Suppressed, AnalyzerOptions &Options, + bugreporter::TrackingKind TKind) + : TrackingBugReporterVisitor(ParentTracker), CalleeSFC(Frame), + EnableNullFPSuppression(Suppressed), Options(Options), TKind(TKind) {} static void *getTag() { static int Tag = 0; @@ -913,92 +937,6 @@ public: ID.AddBoolean(EnableNullFPSuppression); } - /// Adds a ReturnVisitor if the given statement represents a call that was - /// inlined. - /// - /// This will search back through the ExplodedGraph, starting from the given - /// node, looking for when the given statement was processed. If it turns out - /// the statement is a call that was inlined, we add the visitor to the - /// bug report, so it can print a note later. - static void addVisitorIfNecessary(const ExplodedNode *Node, const Stmt *S, - PathSensitiveBugReport &BR, - bool InEnableNullFPSuppression, - bugreporter::TrackingKind TKind) { - if (!CallEvent::isCallStmt(S)) - return; - - // First, find when we processed the statement. - // If we work with a 'CXXNewExpr' that is going to be purged away before - // its call take place. We would catch that purge in the last condition - // as a 'StmtPoint' so we have to bypass it. - const bool BypassCXXNewExprEval = isa<CXXNewExpr>(S); - - // This is moving forward when we enter into another context. - const StackFrameContext *CurrentSFC = Node->getStackFrame(); - - do { - // If that is satisfied we found our statement as an inlined call. - if (Optional<CallExitEnd> CEE = Node->getLocationAs<CallExitEnd>()) - if (CEE->getCalleeContext()->getCallSite() == S) - break; - - // Try to move forward to the end of the call-chain. - Node = Node->getFirstPred(); - if (!Node) - break; - - const StackFrameContext *PredSFC = Node->getStackFrame(); - - // If that is satisfied we found our statement. - // FIXME: This code currently bypasses the call site for the - // conservatively evaluated allocator. - if (!BypassCXXNewExprEval) - if (Optional<StmtPoint> SP = Node->getLocationAs<StmtPoint>()) - // See if we do not enter into another context. - if (SP->getStmt() == S && CurrentSFC == PredSFC) - break; - - CurrentSFC = PredSFC; - } while (Node->getStackFrame() == CurrentSFC); - - // Next, step over any post-statement checks. - while (Node && Node->getLocation().getAs<PostStmt>()) - Node = Node->getFirstPred(); - if (!Node) - return; - - // Finally, see if we inlined the call. - Optional<CallExitEnd> CEE = Node->getLocationAs<CallExitEnd>(); - if (!CEE) - return; - - const StackFrameContext *CalleeContext = CEE->getCalleeContext(); - if (CalleeContext->getCallSite() != S) - return; - - // Check the return value. - ProgramStateRef State = Node->getState(); - SVal RetVal = Node->getSVal(S); - - // Handle cases where a reference is returned and then immediately used. - if (cast<Expr>(S)->isGLValue()) - if (Optional<Loc> LValue = RetVal.getAs<Loc>()) - RetVal = State->getSVal(*LValue); - - // See if the return value is NULL. If so, suppress the report. - AnalyzerOptions &Options = State->getAnalysisManager().options; - - bool EnableNullFPSuppression = false; - if (InEnableNullFPSuppression && - Options.ShouldSuppressNullReturnPaths) - if (Optional<Loc> RetLoc = RetVal.getAs<Loc>()) - EnableNullFPSuppression = State->isNull(*RetLoc).isConstrainedTrue(); - - BR.addVisitor(std::make_unique<ReturnVisitor>(CalleeContext, - EnableNullFPSuppression, - Options, TKind)); - } - PathDiagnosticPieceRef visitNodeInitial(const ExplodedNode *N, BugReporterContext &BRC, PathSensitiveBugReport &BR) { @@ -1045,8 +983,7 @@ public: RetE = RetE->IgnoreParenCasts(); // Let's track the return value. - bugreporter::trackExpressionValue( - N, RetE, BR, TKind, EnableNullFPSuppression); + getParentTracker().track(RetE, N, {TKind, EnableNullFPSuppression}); // Build an appropriate message based on the return value. SmallString<64> Msg; @@ -1162,7 +1099,9 @@ public: if (!State->isNull(*ArgV).isConstrainedTrue()) continue; - if (trackExpressionValue(N, ArgE, BR, TKind, EnableNullFPSuppression)) + if (getParentTracker() + .track(ArgE, N, {TKind, EnableNullFPSuppression}) + .FoundSomethingToTrack) ShouldInvalidate = false; // If we /can't/ track the null pointer, we should err on the side of @@ -1198,16 +1137,52 @@ public: } // end of anonymous namespace //===----------------------------------------------------------------------===// -// Implementation of FindLastStoreBRVisitor. +// StoreSiteFinder //===----------------------------------------------------------------------===// -void FindLastStoreBRVisitor::Profile(llvm::FoldingSetNodeID &ID) const { +/// Finds last store into the given region, +/// which is different from a given symbolic value. +class StoreSiteFinder final : public TrackingBugReporterVisitor { + const MemRegion *R; + SVal V; + bool Satisfied = false; + + TrackingOptions Options; + const StackFrameContext *OriginSFC; + +public: + /// \param V We're searching for the store where \c R received this value. + /// \param R The region we're tracking. + /// \param TKind May limit the amount of notes added to the bug report. + /// \param OriginSFC Only adds notes when the last store happened in a + /// different stackframe to this one. Disregarded if the tracking kind + /// is thorough. + /// This is useful, because for non-tracked regions, notes about + /// changes to its value in a nested stackframe could be pruned, and + /// this visitor can prevent that without polluting the bugpath too + /// much. + StoreSiteFinder(bugreporter::TrackerRef ParentTracker, KnownSVal V, + const MemRegion *R, TrackingOptions Options, + const StackFrameContext *OriginSFC = nullptr) + : TrackingBugReporterVisitor(ParentTracker), R(R), V(V), Options(Options), + OriginSFC(OriginSFC) { + assert(R); + } + + void Profile(llvm::FoldingSetNodeID &ID) const override; + + PathDiagnosticPieceRef VisitNode(const ExplodedNode *N, + BugReporterContext &BRC, + PathSensitiveBugReport &BR) override; +}; + +void StoreSiteFinder::Profile(llvm::FoldingSetNodeID &ID) const { static int tag = 0; ID.AddPointer(&tag); ID.AddPointer(R); ID.Add(V); - ID.AddInteger(static_cast<int>(TKind)); - ID.AddBoolean(EnableNullFPSuppression); + ID.AddInteger(static_cast<int>(Options.Kind)); + ID.AddBoolean(Options.EnableNullFPSuppression); } /// Returns true if \p N represents the DeclStmt declaring and initializing @@ -1239,127 +1214,152 @@ static bool isInitializationOfVar(const ExplodedNode *N, const VarRegion *VR) { return FrameSpace->getStackFrame() == LCtx->getStackFrame(); } +static bool isObjCPointer(const MemRegion *R) { + if (R->isBoundable()) + if (const auto *TR = dyn_cast<TypedValueRegion>(R)) + return TR->getValueType()->isObjCObjectPointerType(); + + return false; +} + +static bool isObjCPointer(const ValueDecl *D) { + return D->getType()->isObjCObjectPointerType(); +} + /// Show diagnostics for initializing or declaring a region \p R with a bad value. -static void showBRDiagnostics(const char *action, llvm::raw_svector_ostream &os, - const MemRegion *R, SVal V, const DeclStmt *DS) { - if (R->canPrintPretty()) { - R->printPretty(os); - os << " "; - } - - if (V.getAs<loc::ConcreteInt>()) { - bool b = false; - if (R->isBoundable()) { - if (const auto *TR = dyn_cast<TypedValueRegion>(R)) { - if (TR->getValueType()->isObjCObjectPointerType()) { - os << action << "nil"; - b = true; - } - } - } - if (!b) - os << action << "a null pointer value"; - - } else if (auto CVal = V.getAs<nonloc::ConcreteInt>()) { - os << action << CVal->getValue(); - } else if (DS) { - if (V.isUndef()) { - if (isa<VarRegion>(R)) { +static void showBRDiagnostics(llvm::raw_svector_ostream &OS, StoreInfo SI) { + const bool HasPrefix = SI.Dest->canPrintPretty(); + + if (HasPrefix) { + SI.Dest->printPretty(OS); + OS << " "; + } + + const char *Action = nullptr; + + switch (SI.StoreKind) { + case StoreInfo::Initialization: + Action = HasPrefix ? "initialized to " : "Initializing to "; + break; + case StoreInfo::BlockCapture: + Action = HasPrefix ? "captured by block as " : "Captured by block as "; + break; + default: + llvm_unreachable("Unexpected store kind"); + } + + if (SI.Value.getAs<loc::ConcreteInt>()) { + OS << Action << (isObjCPointer(SI.Dest) ? "nil" : "a null pointer value"); + + } else if (auto CVal = SI.Value.getAs<nonloc::ConcreteInt>()) { + OS << Action << CVal->getValue(); + + } else if (SI.Origin && SI.Origin->canPrintPretty()) { + OS << Action << "the value of "; + SI.Origin->printPretty(OS); + + } else if (SI.StoreKind == StoreInfo::Initialization) { + // We don't need to check here, all these conditions were + // checked by StoreSiteFinder, when it figured out that it is + // initialization. + const auto *DS = + cast<DeclStmt>(SI.StoreSite->getLocationAs<PostStmt>()->getStmt()); + + if (SI.Value.isUndef()) { + if (isa<VarRegion>(SI.Dest)) { const auto *VD = cast<VarDecl>(DS->getSingleDecl()); + if (VD->getInit()) { - os << (R->canPrintPretty() ? "initialized" : "Initializing") - << " to a garbage value"; + OS << (HasPrefix ? "initialized" : "Initializing") + << " to a garbage value"; } else { - os << (R->canPrintPretty() ? "declared" : "Declaring") - << " without an initial value"; + OS << (HasPrefix ? "declared" : "Declaring") + << " without an initial value"; } } } else { - os << (R->canPrintPretty() ? "initialized" : "Initialized") - << " here"; + OS << (HasPrefix ? "initialized" : "Initialized") << " here"; } } } /// Display diagnostics for passing bad region as a parameter. -static void showBRParamDiagnostics(llvm::raw_svector_ostream& os, - const VarRegion *VR, - SVal V) { +static void showBRParamDiagnostics(llvm::raw_svector_ostream &OS, + StoreInfo SI) { + const auto *VR = cast<VarRegion>(SI.Dest); const auto *Param = cast<ParmVarDecl>(VR->getDecl()); - os << "Passing "; + OS << "Passing "; + + if (SI.Value.getAs<loc::ConcreteInt>()) { + OS << (isObjCPointer(Param) ? "nil object reference" + : "null pointer value"); + + } else if (SI.Value.isUndef()) { + OS << "uninitialized value"; + + } else if (auto CI = SI.Value.getAs<nonloc::ConcreteInt>()) { + OS << "the value " << CI->getValue(); + + } else if (SI.Origin && SI.Origin->canPrintPretty()) { + SI.Origin->printPretty(OS); - if (V.getAs<loc::ConcreteInt>()) { - if (Param->getType()->isObjCObjectPointerType()) - os << "nil object reference"; - else - os << "null pointer value"; - } else if (V.isUndef()) { - os << "uninitialized value"; - } else if (auto CI = V.getAs<nonloc::ConcreteInt>()) { - os << "the value " << CI->getValue(); } else { - os << "value"; + OS << "value"; } // Printed parameter indexes are 1-based, not 0-based. unsigned Idx = Param->getFunctionScopeIndex() + 1; - os << " via " << Idx << llvm::getOrdinalSuffix(Idx) << " parameter"; + OS << " via " << Idx << llvm::getOrdinalSuffix(Idx) << " parameter"; if (VR->canPrintPretty()) { - os << " "; - VR->printPretty(os); + OS << " "; + VR->printPretty(OS); } } /// Show default diagnostics for storing bad region. -static void showBRDefaultDiagnostics(llvm::raw_svector_ostream &os, - const MemRegion *R, SVal V) { - if (V.getAs<loc::ConcreteInt>()) { - bool b = false; - if (R->isBoundable()) { - if (const auto *TR = dyn_cast<TypedValueRegion>(R)) { - if (TR->getValueType()->isObjCObjectPointerType()) { - os << "nil object reference stored"; - b = true; - } - } - } - if (!b) { - if (R->canPrintPretty()) - os << "Null pointer value stored"; - else - os << "Storing null pointer value"; - } - - } else if (V.isUndef()) { - if (R->canPrintPretty()) - os << "Uninitialized value stored"; +static void showBRDefaultDiagnostics(llvm::raw_svector_ostream &OS, + StoreInfo SI) { + const bool HasSuffix = SI.Dest->canPrintPretty(); + + if (SI.Value.getAs<loc::ConcreteInt>()) { + OS << (isObjCPointer(SI.Dest) ? "nil object reference stored" + : (HasSuffix ? "Null pointer value stored" + : "Storing null pointer value")); + + } else if (SI.Value.isUndef()) { + OS << (HasSuffix ? "Uninitialized value stored" + : "Storing uninitialized value"); + + } else if (auto CV = SI.Value.getAs<nonloc::ConcreteInt>()) { + if (HasSuffix) + OS << "The value " << CV->getValue() << " is assigned"; else - os << "Storing uninitialized value"; + OS << "Assigning " << CV->getValue(); - } else if (auto CV = V.getAs<nonloc::ConcreteInt>()) { - if (R->canPrintPretty()) - os << "The value " << CV->getValue() << " is assigned"; - else - os << "Assigning " << CV->getValue(); + } else if (SI.Origin && SI.Origin->canPrintPretty()) { + if (HasSuffix) { + OS << "The value of "; + SI.Origin->printPretty(OS); + OS << " is assigned"; + } else { + OS << "Assigning the value of "; + SI.Origin->printPretty(OS); + } } else { - if (R->canPrintPretty()) - os << "Value assigned"; - else - os << "Assigning value"; + OS << (HasSuffix ? "Value assigned" : "Assigning value"); } - if (R->canPrintPretty()) { - os << " to "; - R->printPretty(os); + if (HasSuffix) { + OS << " to "; + SI.Dest->printPretty(OS); } } -PathDiagnosticPieceRef -FindLastStoreBRVisitor::VisitNode(const ExplodedNode *Succ, - BugReporterContext &BRC, - PathSensitiveBugReport &BR) { +PathDiagnosticPieceRef StoreSiteFinder::VisitNode(const ExplodedNode *Succ, + BugReporterContext &BRC, + PathSensitiveBugReport &BR) { if (Satisfied) return nullptr; @@ -1451,11 +1451,78 @@ FindLastStoreBRVisitor::VisitNode(const ExplodedNode *Succ, if (!IsParam) InitE = InitE->IgnoreParenCasts(); - bugreporter::trackExpressionValue( - StoreSite, InitE, BR, TKind, EnableNullFPSuppression); + getParentTracker().track(InitE, StoreSite, Options); } - if (TKind == TrackingKind::Condition && + // Let's try to find the region where the value came from. + const MemRegion *OldRegion = nullptr; + + // If we have init expression, it might be simply a reference + // to a variable, so we can use it. + if (InitE) { + // That region might still be not exactly what we are looking for. + // In situations like `int &ref = val;`, we can't say that + // `ref` is initialized with `val`, rather refers to `val`. + // + // In order, to mitigate situations like this, we check if the last + // stored value in that region is the value that we track. + // + // TODO: support other situations better. + if (const MemRegion *Candidate = + getLocationRegionIfReference(InitE, Succ, false)) { + const StoreManager &SM = BRC.getStateManager().getStoreManager(); + + // Here we traverse the graph up to find the last node where the + // candidate region is still in the store. + for (const ExplodedNode *N = StoreSite; N; N = N->getFirstPred()) { + if (SM.includedInBindings(N->getState()->getStore(), Candidate)) { + // And if it was bound to the target value, we can use it. + if (N->getState()->getSVal(Candidate) == V) { + OldRegion = Candidate; + } + break; + } + } + } + } + + // Otherwise, if the current region does indeed contain the value + // we are looking for, we can look for a region where this value + // was before. + // + // It can be useful for situations like: + // new = identity(old) + // where the analyzer knows that 'identity' returns the value of its + // first argument. + // + // NOTE: If the region R is not a simple var region, it can contain + // V in one of its subregions. + if (!OldRegion && StoreSite->getState()->getSVal(R) == V) { + // Let's go up the graph to find the node where the region is + // bound to V. + const ExplodedNode *NodeWithoutBinding = StoreSite->getFirstPred(); + for (; + NodeWithoutBinding && NodeWithoutBinding->getState()->getSVal(R) == V; + NodeWithoutBinding = NodeWithoutBinding->getFirstPred()) { + } + + if (NodeWithoutBinding) { + // Let's try to find a unique binding for the value in that node. + // We want to use this to find unique bindings because of the following + // situations: + // b = a; + // c = identity(b); + // + // Telling the user that the value of 'a' is assigned to 'c', while + // correct, can be confusing. + StoreManager::FindUniqueBinding FB(V.getAsLocSymbol()); + BRC.getStateManager().iterBindings(NodeWithoutBinding->getState(), FB); + if (FB) + OldRegion = FB.getRegion(); + } + } + + if (Options.Kind == TrackingKind::Condition && OriginSFC && !OriginSFC->isParentOf(StoreSite->getStackFrame())) return nullptr; @@ -1463,60 +1530,41 @@ FindLastStoreBRVisitor::VisitNode(const ExplodedNode *Succ, SmallString<256> sbuf; llvm::raw_svector_ostream os(sbuf); + StoreInfo SI = {StoreInfo::Assignment, // default kind + StoreSite, + InitE, + V, + R, + OldRegion}; + if (Optional<PostStmt> PS = StoreSite->getLocationAs<PostStmt>()) { const Stmt *S = PS->getStmt(); - const char *action = nullptr; const auto *DS = dyn_cast<DeclStmt>(S); const auto *VR = dyn_cast<VarRegion>(R); if (DS) { - action = R->canPrintPretty() ? "initialized to " : - "Initializing to "; + SI.StoreKind = StoreInfo::Initialization; } else if (isa<BlockExpr>(S)) { - action = R->canPrintPretty() ? "captured by block as " : - "Captured by block as "; + SI.StoreKind = StoreInfo::BlockCapture; if (VR) { // See if we can get the BlockVarRegion. ProgramStateRef State = StoreSite->getState(); SVal V = StoreSite->getSVal(S); if (const auto *BDR = - dyn_cast_or_null<BlockDataRegion>(V.getAsRegion())) { + dyn_cast_or_null<BlockDataRegion>(V.getAsRegion())) { if (const VarRegion *OriginalR = BDR->getOriginalRegion(VR)) { - if (auto KV = State->getSVal(OriginalR).getAs<KnownSVal>()) - BR.addVisitor(std::make_unique<FindLastStoreBRVisitor>( - *KV, OriginalR, EnableNullFPSuppression, TKind, OriginSFC)); + getParentTracker().track(State->getSVal(OriginalR), OriginalR, + Options, OriginSFC); } } } } - if (action) - showBRDiagnostics(action, os, R, V, DS); - - } else if (StoreSite->getLocation().getAs<CallEnter>()) { - if (const auto *VR = dyn_cast<VarRegion>(R)) - showBRParamDiagnostics(os, VR, V); + } else if (SI.StoreSite->getLocation().getAs<CallEnter>() && + isa<VarRegion>(SI.Dest)) { + SI.StoreKind = StoreInfo::CallArgument; } - if (os.str().empty()) - showBRDefaultDiagnostics(os, R, V); - - if (TKind == bugreporter::TrackingKind::Condition) - os << WillBeUsedForACondition; - - // Construct a new PathDiagnosticPiece. - ProgramPoint P = StoreSite->getLocation(); - PathDiagnosticLocation L; - if (P.getAs<CallEnter>() && InitE) - L = PathDiagnosticLocation(InitE, BRC.getSourceManager(), - P.getLocationContext()); - - if (!L.isValid() || !L.asLocation().isValid()) - L = PathDiagnosticLocation::create(P, BRC.getSourceManager()); - - if (!L.isValid() || !L.asLocation().isValid()) - return nullptr; - - return std::make_shared<PathDiagnosticEventPiece>(L, os.str()); + return getParentTracker().handle(SI, BRC, Options); } //===----------------------------------------------------------------------===// @@ -1705,14 +1753,17 @@ namespace { /// An error is emitted at line 3. This visitor realizes that the branch /// on line 2 is a control dependency of line 3, and tracks it's condition via /// trackExpressionValue(). -class TrackControlDependencyCondBRVisitor final : public BugReporterVisitor { +class TrackControlDependencyCondBRVisitor final + : public TrackingBugReporterVisitor { const ExplodedNode *Origin; ControlDependencyCalculator ControlDeps; llvm::SmallSet<const CFGBlock *, 32> VisitedBlocks; public: - TrackControlDependencyCondBRVisitor(const ExplodedNode *O) - : Origin(O), ControlDeps(&O->getCFG()) {} + TrackControlDependencyCondBRVisitor(TrackerRef ParentTracker, + const ExplodedNode *O) + : TrackingBugReporterVisitor(ParentTracker), Origin(O), + ControlDeps(&O->getCFG()) {} void Profile(llvm::FoldingSetNodeID &ID) const override { static int x = 0; @@ -1810,9 +1861,9 @@ TrackControlDependencyCondBRVisitor::VisitNode(const ExplodedNode *N, // isn't sufficient, because a new visitor is created for each tracked // expression, hence the BugReport level set. if (BR.addTrackedCondition(N)) { - bugreporter::trackExpressionValue( - N, Condition, BR, bugreporter::TrackingKind::Condition, - /*EnableNullFPSuppression=*/false); + getParentTracker().track(Condition, N, + {bugreporter::TrackingKind::Condition, + /*EnableNullFPSuppression=*/false}); return constructDebugPieceForTrackedCondition(Condition, N, BRC); } } @@ -1825,28 +1876,7 @@ TrackControlDependencyCondBRVisitor::VisitNode(const ExplodedNode *N, // Implementation of trackExpressionValue. //===----------------------------------------------------------------------===// -static const MemRegion *getLocationRegionIfReference(const Expr *E, - const ExplodedNode *N) { - if (const auto *DR = dyn_cast<DeclRefExpr>(E)) { - if (const auto *VD = dyn_cast<VarDecl>(DR->getDecl())) { - if (!VD->getType()->isReferenceType()) - return nullptr; - ProgramStateManager &StateMgr = N->getState()->getStateManager(); - MemRegionManager &MRMgr = StateMgr.getRegionManager(); - return MRMgr.getVarRegion(VD, N->getLocationContext()); - } - } - - // FIXME: This does not handle other kinds of null references, - // for example, references from FieldRegions: - // struct Wrapper { int &ref; }; - // Wrapper w = { *(int *)0 }; - // w.ref = 1; - - return nullptr; -} - -/// \return A subexpression of {@code Ex} which represents the +/// \return A subexpression of @c Ex which represents the /// expression-of-interest. static const Expr *peelOffOuterExpr(const Expr *Ex, const ExplodedNode *N) { @@ -1924,152 +1954,472 @@ static const ExplodedNode* findNodeForExpression(const ExplodedNode *N, return N; } -bool bugreporter::trackExpressionValue(const ExplodedNode *InputNode, - const Expr *E, - PathSensitiveBugReport &report, - bugreporter::TrackingKind TKind, - bool EnableNullFPSuppression) { +//===----------------------------------------------------------------------===// +// Tracker implementation +//===----------------------------------------------------------------------===// - if (!E || !InputNode) - return false; +PathDiagnosticPieceRef StoreHandler::constructNote(StoreInfo SI, + BugReporterContext &BRC, + StringRef NodeText) { + // Construct a new PathDiagnosticPiece. + ProgramPoint P = SI.StoreSite->getLocation(); + PathDiagnosticLocation L; + if (P.getAs<CallEnter>() && SI.SourceOfTheValue) + L = PathDiagnosticLocation(SI.SourceOfTheValue, BRC.getSourceManager(), + P.getLocationContext()); - const Expr *Inner = peelOffOuterExpr(E, InputNode); - const ExplodedNode *LVNode = findNodeForExpression(InputNode, Inner); - if (!LVNode) - return false; + if (!L.isValid() || !L.asLocation().isValid()) + L = PathDiagnosticLocation::create(P, BRC.getSourceManager()); - ProgramStateRef LVState = LVNode->getState(); - const StackFrameContext *SFC = LVNode->getStackFrame(); - - // We only track expressions if we believe that they are important. Chances - // are good that control dependencies to the tracking point are also improtant - // because of this, let's explain why we believe control reached this point. - // TODO: Shouldn't we track control dependencies of every bug location, rather - // than only tracked expressions? - if (LVState->getAnalysisManager().getAnalyzerOptions().ShouldTrackConditions) - report.addVisitor(std::make_unique<TrackControlDependencyCondBRVisitor>( - InputNode)); - - // The message send could be nil due to the receiver being nil. - // At this point in the path, the receiver should be live since we are at the - // message send expr. If it is nil, start tracking it. - if (const Expr *Receiver = NilReceiverBRVisitor::getNilReceiver(Inner, LVNode)) - trackExpressionValue( - LVNode, Receiver, report, TKind, EnableNullFPSuppression); - - // Track the index if this is an array subscript. - if (const auto *Arr = dyn_cast<ArraySubscriptExpr>(Inner)) - trackExpressionValue( - LVNode, Arr->getIdx(), report, TKind, /*EnableNullFPSuppression*/false); - - // See if the expression we're interested refers to a variable. - // If so, we can track both its contents and constraints on its value. - if (ExplodedGraph::isInterestingLValueExpr(Inner)) { - SVal LVal = LVNode->getSVal(Inner); - - const MemRegion *RR = getLocationRegionIfReference(Inner, LVNode); - bool LVIsNull = LVState->isNull(LVal).isConstrainedTrue(); - - // If this is a C++ reference to a null pointer, we are tracking the - // pointer. In addition, we should find the store at which the reference - // got initialized. - if (RR && !LVIsNull) - if (auto KV = LVal.getAs<KnownSVal>()) - report.addVisitor(std::make_unique<FindLastStoreBRVisitor>( - *KV, RR, EnableNullFPSuppression, TKind, SFC)); - - // In case of C++ references, we want to differentiate between a null - // reference and reference to null pointer. - // If the LVal is null, check if we are dealing with null reference. - // For those, we want to track the location of the reference. - const MemRegion *R = (RR && LVIsNull) ? RR : - LVNode->getSVal(Inner).getAsRegion(); - - if (R) { - - // Mark both the variable region and its contents as interesting. - SVal V = LVState->getRawSVal(loc::MemRegionVal(R)); - report.addVisitor( - std::make_unique<NoStoreFuncVisitor>(cast<SubRegion>(R), TKind)); - - MacroNullReturnSuppressionVisitor::addMacroVisitorIfNecessary( - LVNode, R, EnableNullFPSuppression, report, V); - - report.markInteresting(V, TKind); - report.addVisitor(std::make_unique<UndefOrNullArgVisitor>(R)); - - // If the contents are symbolic and null, find out when they became null. - if (V.getAsLocSymbol(/*IncludeBaseRegions=*/true)) - if (LVState->isNull(V).isConstrainedTrue()) - report.addVisitor(std::make_unique<TrackConstraintBRVisitor>( - V.castAs<DefinedSVal>(), false)); - - // Add visitor, which will suppress inline defensive checks. - if (auto DV = V.getAs<DefinedSVal>()) - if (!DV->isZeroConstant() && EnableNullFPSuppression) { - // Note that LVNode may be too late (i.e., too far from the InputNode) - // because the lvalue may have been computed before the inlined call - // was evaluated. InputNode may as well be too early here, because - // the symbol is already dead; this, however, is fine because we can - // still find the node in which it collapsed to null previously. - report.addVisitor( - std::make_unique<SuppressInlineDefensiveChecksVisitor>( - *DV, InputNode)); - } + if (!L.isValid() || !L.asLocation().isValid()) + return nullptr; - if (auto KV = V.getAs<KnownSVal>()) - report.addVisitor(std::make_unique<FindLastStoreBRVisitor>( - *KV, R, EnableNullFPSuppression, TKind, SFC)); - return true; + return std::make_shared<PathDiagnosticEventPiece>(L, NodeText); +} + +class DefaultStoreHandler final : public StoreHandler { +public: + using StoreHandler::StoreHandler; + + PathDiagnosticPieceRef handle(StoreInfo SI, BugReporterContext &BRC, + TrackingOptions Opts) override { + // Okay, we've found the binding. Emit an appropriate message. + SmallString<256> Buffer; + llvm::raw_svector_ostream OS(Buffer); + + switch (SI.StoreKind) { + case StoreInfo::Initialization: + case StoreInfo::BlockCapture: + showBRDiagnostics(OS, SI); + break; + case StoreInfo::CallArgument: + showBRParamDiagnostics(OS, SI); + break; + case StoreInfo::Assignment: + showBRDefaultDiagnostics(OS, SI); + break; + } + + if (Opts.Kind == bugreporter::TrackingKind::Condition) + OS << WillBeUsedForACondition; + + return constructNote(SI, BRC, OS.str()); + } +}; + +class ControlDependencyHandler final : public ExpressionHandler { +public: + using ExpressionHandler::ExpressionHandler; + + Tracker::Result handle(const Expr *Inner, const ExplodedNode *InputNode, + const ExplodedNode *LVNode, + TrackingOptions Opts) override { + PathSensitiveBugReport &Report = getParentTracker().getReport(); + + // We only track expressions if we believe that they are important. Chances + // are good that control dependencies to the tracking point are also + // important because of this, let's explain why we believe control reached + // this point. + // TODO: Shouldn't we track control dependencies of every bug location, + // rather than only tracked expressions? + if (LVNode->getState() + ->getAnalysisManager() + .getAnalyzerOptions() + .ShouldTrackConditions) { + Report.addVisitor<TrackControlDependencyCondBRVisitor>( + &getParentTracker(), InputNode); + return {/*FoundSomethingToTrack=*/true}; } + + return {}; } +}; + +class NilReceiverHandler final : public ExpressionHandler { +public: + using ExpressionHandler::ExpressionHandler; + + Tracker::Result handle(const Expr *Inner, const ExplodedNode *InputNode, + const ExplodedNode *LVNode, + TrackingOptions Opts) override { + // The message send could be nil due to the receiver being nil. + // At this point in the path, the receiver should be live since we are at + // the message send expr. If it is nil, start tracking it. + if (const Expr *Receiver = + NilReceiverBRVisitor::getNilReceiver(Inner, LVNode)) + return getParentTracker().track(Receiver, LVNode, Opts); + + return {}; + } +}; + +class ArrayIndexHandler final : public ExpressionHandler { +public: + using ExpressionHandler::ExpressionHandler; + + Tracker::Result handle(const Expr *Inner, const ExplodedNode *InputNode, + const ExplodedNode *LVNode, + TrackingOptions Opts) override { + // Track the index if this is an array subscript. + if (const auto *Arr = dyn_cast<ArraySubscriptExpr>(Inner)) + return getParentTracker().track( + Arr->getIdx(), LVNode, + {Opts.Kind, /*EnableNullFPSuppression*/ false}); + + return {}; + } +}; + +// TODO: extract it into more handlers +class InterestingLValueHandler final : public ExpressionHandler { +public: + using ExpressionHandler::ExpressionHandler; + + Tracker::Result handle(const Expr *Inner, const ExplodedNode *InputNode, + const ExplodedNode *LVNode, + TrackingOptions Opts) override { + ProgramStateRef LVState = LVNode->getState(); + const StackFrameContext *SFC = LVNode->getStackFrame(); + PathSensitiveBugReport &Report = getParentTracker().getReport(); + Tracker::Result Result; + + // See if the expression we're interested refers to a variable. + // If so, we can track both its contents and constraints on its value. + if (ExplodedGraph::isInterestingLValueExpr(Inner)) { + SVal LVal = LVNode->getSVal(Inner); + + const MemRegion *RR = getLocationRegionIfReference(Inner, LVNode); + bool LVIsNull = LVState->isNull(LVal).isConstrainedTrue(); + + // If this is a C++ reference to a null pointer, we are tracking the + // pointer. In addition, we should find the store at which the reference + // got initialized. + if (RR && !LVIsNull) + Result.combineWith(getParentTracker().track(LVal, RR, Opts, SFC)); + + // In case of C++ references, we want to differentiate between a null + // reference and reference to null pointer. + // If the LVal is null, check if we are dealing with null reference. + // For those, we want to track the location of the reference. + const MemRegion *R = + (RR && LVIsNull) ? RR : LVNode->getSVal(Inner).getAsRegion(); + + if (R) { + + // Mark both the variable region and its contents as interesting. + SVal V = LVState->getRawSVal(loc::MemRegionVal(R)); + Report.addVisitor<NoStoreFuncVisitor>(cast<SubRegion>(R), Opts.Kind); + + // When we got here, we do have something to track, and we will + // interrupt. + Result.FoundSomethingToTrack = true; + Result.WasInterrupted = true; + + MacroNullReturnSuppressionVisitor::addMacroVisitorIfNecessary( + LVNode, R, Opts.EnableNullFPSuppression, Report, V); + + Report.markInteresting(V, Opts.Kind); + Report.addVisitor<UndefOrNullArgVisitor>(R); + + // If the contents are symbolic and null, find out when they became + // null. + if (V.getAsLocSymbol(/*IncludeBaseRegions=*/true)) + if (LVState->isNull(V).isConstrainedTrue()) + Report.addVisitor<TrackConstraintBRVisitor>(V.castAs<DefinedSVal>(), + false); + + // Add visitor, which will suppress inline defensive checks. + if (auto DV = V.getAs<DefinedSVal>()) + if (!DV->isZeroConstant() && Opts.EnableNullFPSuppression) + // Note that LVNode may be too late (i.e., too far from the + // InputNode) because the lvalue may have been computed before the + // inlined call was evaluated. InputNode may as well be too early + // here, because the symbol is already dead; this, however, is fine + // because we can still find the node in which it collapsed to null + // previously. + Report.addVisitor<SuppressInlineDefensiveChecksVisitor>(*DV, + InputNode); + getParentTracker().track(V, R, Opts, SFC); + } + } + + return Result; + } +}; + +/// Adds a ReturnVisitor if the given statement represents a call that was +/// inlined. +/// +/// This will search back through the ExplodedGraph, starting from the given +/// node, looking for when the given statement was processed. If it turns out +/// the statement is a call that was inlined, we add the visitor to the +/// bug report, so it can print a note later. +class InlinedFunctionCallHandler final : public ExpressionHandler { + using ExpressionHandler::ExpressionHandler; + + Tracker::Result handle(const Expr *E, const ExplodedNode *InputNode, + const ExplodedNode *ExprNode, + TrackingOptions Opts) override { + if (!CallEvent::isCallStmt(E)) + return {}; + + // First, find when we processed the statement. + // If we work with a 'CXXNewExpr' that is going to be purged away before + // its call take place. We would catch that purge in the last condition + // as a 'StmtPoint' so we have to bypass it. + const bool BypassCXXNewExprEval = isa<CXXNewExpr>(E); - // If the expression is not an "lvalue expression", we can still - // track the constraints on its contents. - SVal V = LVState->getSValAsScalarOrLoc(Inner, LVNode->getLocationContext()); + // This is moving forward when we enter into another context. + const StackFrameContext *CurrentSFC = ExprNode->getStackFrame(); + + do { + // If that is satisfied we found our statement as an inlined call. + if (Optional<CallExitEnd> CEE = ExprNode->getLocationAs<CallExitEnd>()) + if (CEE->getCalleeContext()->getCallSite() == E) + break; + + // Try to move forward to the end of the call-chain. + ExprNode = ExprNode->getFirstPred(); + if (!ExprNode) + break; + + const StackFrameContext *PredSFC = ExprNode->getStackFrame(); + + // If that is satisfied we found our statement. + // FIXME: This code currently bypasses the call site for the + // conservatively evaluated allocator. + if (!BypassCXXNewExprEval) + if (Optional<StmtPoint> SP = ExprNode->getLocationAs<StmtPoint>()) + // See if we do not enter into another context. + if (SP->getStmt() == E && CurrentSFC == PredSFC) + break; + + CurrentSFC = PredSFC; + } while (ExprNode->getStackFrame() == CurrentSFC); + + // Next, step over any post-statement checks. + while (ExprNode && ExprNode->getLocation().getAs<PostStmt>()) + ExprNode = ExprNode->getFirstPred(); + if (!ExprNode) + return {}; + + // Finally, see if we inlined the call. + Optional<CallExitEnd> CEE = ExprNode->getLocationAs<CallExitEnd>(); + if (!CEE) + return {}; + + const StackFrameContext *CalleeContext = CEE->getCalleeContext(); + if (CalleeContext->getCallSite() != E) + return {}; + + // Check the return value. + ProgramStateRef State = ExprNode->getState(); + SVal RetVal = ExprNode->getSVal(E); + + // Handle cases where a reference is returned and then immediately used. + if (cast<Expr>(E)->isGLValue()) + if (Optional<Loc> LValue = RetVal.getAs<Loc>()) + RetVal = State->getSVal(*LValue); + + // See if the return value is NULL. If so, suppress the report. + AnalyzerOptions &Options = State->getAnalysisManager().options; + + bool EnableNullFPSuppression = false; + if (Opts.EnableNullFPSuppression && Options.ShouldSuppressNullReturnPaths) + if (Optional<Loc> RetLoc = RetVal.getAs<Loc>()) + EnableNullFPSuppression = State->isNull(*RetLoc).isConstrainedTrue(); - ReturnVisitor::addVisitorIfNecessary( - LVNode, Inner, report, EnableNullFPSuppression, TKind); + PathSensitiveBugReport &Report = getParentTracker().getReport(); + Report.addVisitor<ReturnVisitor>(&getParentTracker(), CalleeContext, + EnableNullFPSuppression, Options, + Opts.Kind); + return {true}; + } +}; - // Is it a symbolic value? - if (auto L = V.getAs<loc::MemRegionVal>()) { - // FIXME: this is a hack for fixing a later crash when attempting to - // dereference a void* pointer. - // We should not try to dereference pointers at all when we don't care - // what is written inside the pointer. - bool CanDereference = true; - if (const auto *SR = L->getRegionAs<SymbolicRegion>()) { - if (SR->getSymbol()->getType()->getPointeeType()->isVoidType()) +class DefaultExpressionHandler final : public ExpressionHandler { +public: + using ExpressionHandler::ExpressionHandler; + + Tracker::Result handle(const Expr *Inner, const ExplodedNode *InputNode, + const ExplodedNode *LVNode, + TrackingOptions Opts) override { + ProgramStateRef LVState = LVNode->getState(); + const StackFrameContext *SFC = LVNode->getStackFrame(); + PathSensitiveBugReport &Report = getParentTracker().getReport(); + Tracker::Result Result; + + // If the expression is not an "lvalue expression", we can still + // track the constraints on its contents. + SVal V = LVState->getSValAsScalarOrLoc(Inner, LVNode->getLocationContext()); + + // Is it a symbolic value? + if (auto L = V.getAs<loc::MemRegionVal>()) { + // FIXME: this is a hack for fixing a later crash when attempting to + // dereference a void* pointer. + // We should not try to dereference pointers at all when we don't care + // what is written inside the pointer. + bool CanDereference = true; + if (const auto *SR = L->getRegionAs<SymbolicRegion>()) { + if (SR->getSymbol()->getType()->getPointeeType()->isVoidType()) + CanDereference = false; + } else if (L->getRegionAs<AllocaRegion>()) CanDereference = false; - } else if (L->getRegionAs<AllocaRegion>()) - CanDereference = false; - - // At this point we are dealing with the region's LValue. - // However, if the rvalue is a symbolic region, we should track it as well. - // Try to use the correct type when looking up the value. - SVal RVal; - if (ExplodedGraph::isInterestingLValueExpr(Inner)) - RVal = LVState->getRawSVal(L.getValue(), Inner->getType()); - else if (CanDereference) - RVal = LVState->getSVal(L->getRegion()); - - if (CanDereference) { - report.addVisitor( - std::make_unique<UndefOrNullArgVisitor>(L->getRegion())); - - if (auto KV = RVal.getAs<KnownSVal>()) - report.addVisitor(std::make_unique<FindLastStoreBRVisitor>( - *KV, L->getRegion(), EnableNullFPSuppression, TKind, SFC)); + + // At this point we are dealing with the region's LValue. + // However, if the rvalue is a symbolic region, we should track it as + // well. Try to use the correct type when looking up the value. + SVal RVal; + if (ExplodedGraph::isInterestingLValueExpr(Inner)) + RVal = LVState->getRawSVal(L.getValue(), Inner->getType()); + else if (CanDereference) + RVal = LVState->getSVal(L->getRegion()); + + if (CanDereference) { + Report.addVisitor<UndefOrNullArgVisitor>(L->getRegion()); + Result.FoundSomethingToTrack = true; + + if (auto KV = RVal.getAs<KnownSVal>()) + Result.combineWith( + getParentTracker().track(*KV, L->getRegion(), Opts, SFC)); + } + + const MemRegion *RegionRVal = RVal.getAsRegion(); + if (isa_and_nonnull<SymbolicRegion>(RegionRVal)) { + Report.markInteresting(RegionRVal, Opts.Kind); + Report.addVisitor<TrackConstraintBRVisitor>( + loc::MemRegionVal(RegionRVal), + /*assumption=*/false); + Result.FoundSomethingToTrack = true; + } } - const MemRegion *RegionRVal = RVal.getAsRegion(); - if (RegionRVal && isa<SymbolicRegion>(RegionRVal)) { - report.markInteresting(RegionRVal, TKind); - report.addVisitor(std::make_unique<TrackConstraintBRVisitor>( - loc::MemRegionVal(RegionRVal), /*assumption=*/false)); + return Result; + } +}; + +/// Attempts to add visitors to track an RValue expression back to its point of +/// origin. +class PRValueHandler final : public ExpressionHandler { +public: + using ExpressionHandler::ExpressionHandler; + + Tracker::Result handle(const Expr *E, const ExplodedNode *InputNode, + const ExplodedNode *ExprNode, + TrackingOptions Opts) override { + if (!E->isPRValue()) + return {}; + + const ExplodedNode *RVNode = findNodeForExpression(ExprNode, E); + if (!RVNode) + return {}; + + ProgramStateRef RVState = RVNode->getState(); + SVal V = RVState->getSValAsScalarOrLoc(E, RVNode->getLocationContext()); + const auto *BO = dyn_cast<BinaryOperator>(E); + + if (!BO || !BO->isMultiplicativeOp() || !V.isZeroConstant()) + return {}; + + SVal RHSV = RVState->getSVal(BO->getRHS(), RVNode->getLocationContext()); + SVal LHSV = RVState->getSVal(BO->getLHS(), RVNode->getLocationContext()); + + // Track both LHS and RHS of a multiplication. + Tracker::Result CombinedResult; + Tracker &Parent = getParentTracker(); + + const auto track = [&CombinedResult, &Parent, ExprNode, Opts](Expr *Inner) { + CombinedResult.combineWith(Parent.track(Inner, ExprNode, Opts)); + }; + + if (BO->getOpcode() == BO_Mul) { + if (LHSV.isZeroConstant()) + track(BO->getLHS()); + if (RHSV.isZeroConstant()) + track(BO->getRHS()); + } else { // Track only the LHS of a division or a modulo. + if (LHSV.isZeroConstant()) + track(BO->getLHS()); } + + return CombinedResult; } - return true; +}; + +Tracker::Tracker(PathSensitiveBugReport &Report) : Report(Report) { + // Default expression handlers. + addLowPriorityHandler<ControlDependencyHandler>(); + addLowPriorityHandler<NilReceiverHandler>(); + addLowPriorityHandler<ArrayIndexHandler>(); + addLowPriorityHandler<InterestingLValueHandler>(); + addLowPriorityHandler<InlinedFunctionCallHandler>(); + addLowPriorityHandler<DefaultExpressionHandler>(); + addLowPriorityHandler<PRValueHandler>(); + // Default store handlers. + addHighPriorityHandler<DefaultStoreHandler>(); +} + +Tracker::Result Tracker::track(const Expr *E, const ExplodedNode *N, + TrackingOptions Opts) { + if (!E || !N) + return {}; + + const Expr *Inner = peelOffOuterExpr(E, N); + const ExplodedNode *LVNode = findNodeForExpression(N, Inner); + if (!LVNode) + return {}; + + Result CombinedResult; + // Iterate through the handlers in the order according to their priorities. + for (ExpressionHandlerPtr &Handler : ExpressionHandlers) { + CombinedResult.combineWith(Handler->handle(Inner, N, LVNode, Opts)); + if (CombinedResult.WasInterrupted) { + // There is no need to confuse our users here. + // We got interrupted, but our users don't need to know about it. + CombinedResult.WasInterrupted = false; + break; + } + } + + return CombinedResult; +} + +Tracker::Result Tracker::track(SVal V, const MemRegion *R, TrackingOptions Opts, + const StackFrameContext *Origin) { + if (auto KV = V.getAs<KnownSVal>()) { + Report.addVisitor<StoreSiteFinder>(this, *KV, R, Opts, Origin); + return {true}; + } + return {}; +} + +PathDiagnosticPieceRef Tracker::handle(StoreInfo SI, BugReporterContext &BRC, + TrackingOptions Opts) { + // Iterate through the handlers in the order according to their priorities. + for (StoreHandlerPtr &Handler : StoreHandlers) { + if (PathDiagnosticPieceRef Result = Handler->handle(SI, BRC, Opts)) + // If the handler produced a non-null piece, return it. + // There is no need in asking other handlers. + return Result; + } + return {}; +} + +bool bugreporter::trackExpressionValue(const ExplodedNode *InputNode, + const Expr *E, + + PathSensitiveBugReport &Report, + TrackingOptions Opts) { + return Tracker::create(Report) + ->track(E, InputNode, Opts) + .FoundSomethingToTrack; +} + +void bugreporter::trackStoredValue(KnownSVal V, const MemRegion *R, + PathSensitiveBugReport &Report, + TrackingOptions Opts, + const StackFrameContext *Origin) { + Tracker::create(Report)->track(V, R, Opts, Origin); } //===----------------------------------------------------------------------===// @@ -2118,9 +2468,9 @@ NilReceiverBRVisitor::VisitNode(const ExplodedNode *N, BugReporterContext &BRC, // The receiver was nil, and hence the method was skipped. // Register a BugReporterVisitor to issue a message telling us how // the receiver was null. - bugreporter::trackExpressionValue( - N, Receiver, BR, bugreporter::TrackingKind::Thorough, - /*EnableNullFPSuppression*/ false); + bugreporter::trackExpressionValue(N, Receiver, BR, + {bugreporter::TrackingKind::Thorough, + /*EnableNullFPSuppression*/ false}); // Issue a message saying that the method was skipped. PathDiagnosticLocation L(Receiver, BRC.getSourceManager(), N->getLocationContext()); diff --git a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp index a55d9302ca58..3785f498414f 100644 --- a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp +++ b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp @@ -47,6 +47,7 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/Store.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/ImmutableList.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/PointerIntPair.h" @@ -86,7 +87,7 @@ QualType CallEvent::getResultType() const { case VK_XValue: ResultTy = Ctx.getRValueReferenceType(ResultTy); break; - case VK_RValue: + case VK_PRValue: // No adjustment is necessary. break; } @@ -466,6 +467,42 @@ bool CallEvent::isVariadic(const Decl *D) { llvm_unreachable("unknown callable kind"); } +static bool isTransparentUnion(QualType T) { + const RecordType *UT = T->getAsUnionType(); + return UT && UT->getDecl()->hasAttr<TransparentUnionAttr>(); +} + +// In some cases, symbolic cases should be transformed before we associate +// them with parameters. This function incapsulates such cases. +static SVal processArgument(SVal Value, const Expr *ArgumentExpr, + const ParmVarDecl *Parameter, SValBuilder &SVB) { + QualType ParamType = Parameter->getType(); + QualType ArgumentType = ArgumentExpr->getType(); + + // Transparent unions allow users to easily convert values of union field + // types into union-typed objects. + // + // Also, more importantly, they allow users to define functions with different + // different parameter types, substituting types matching transparent union + // field types with the union type itself. + // + // Here, we check specifically for latter cases and prevent binding + // field-typed values to union-typed regions. + if (isTransparentUnion(ParamType) && + // Let's check that we indeed trying to bind different types. + !isTransparentUnion(ArgumentType)) { + BasicValueFactory &BVF = SVB.getBasicValueFactory(); + + llvm::ImmutableList<SVal> CompoundSVals = BVF.getEmptySValList(); + CompoundSVals = BVF.prependSVal(Value, CompoundSVals); + + // Wrap it with compound value. + return SVB.makeCompoundVal(ParamType, CompoundSVals); + } + + return Value; +} + static void addParameterValuesToBindings(const StackFrameContext *CalleeCtx, CallEvent::BindingsTy &Bindings, SValBuilder &SVB, @@ -490,10 +527,12 @@ static void addParameterValuesToBindings(const StackFrameContext *CalleeCtx, // determined in compile-time but not represented as arg-expressions, // which makes getArgSVal() fail and return UnknownVal. SVal ArgVal = Call.getArgSVal(Idx); + const Expr *ArgExpr = Call.getArgExpr(Idx); if (!ArgVal.isUnknown()) { Loc ParamLoc = SVB.makeLoc( MRMgr.getParamVarRegion(Call.getOriginExpr(), Idx, CalleeCtx)); - Bindings.push_back(std::make_pair(ParamLoc, ArgVal)); + Bindings.push_back( + std::make_pair(ParamLoc, processArgument(ArgVal, ArgExpr, *I, SVB))); } } diff --git a/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp b/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp index 3d44d2cbc069..3d64ce453479 100644 --- a/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp +++ b/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp @@ -19,6 +19,10 @@ using namespace clang; using namespace ento; const FunctionDecl *CheckerContext::getCalleeDecl(const CallExpr *CE) const { + const FunctionDecl *D = CE->getDirectCallee(); + if (D) + return D; + const Expr *Callee = CE->getCallee(); SVal L = Pred->getSVal(Callee); return L.getAsFunctionDecl(); diff --git a/clang/lib/StaticAnalyzer/Core/CheckerHelpers.cpp b/clang/lib/StaticAnalyzer/Core/CheckerHelpers.cpp index cae728815b41..626ae1ae8066 100644 --- a/clang/lib/StaticAnalyzer/Core/CheckerHelpers.cpp +++ b/clang/lib/StaticAnalyzer/Core/CheckerHelpers.cpp @@ -148,5 +148,39 @@ llvm::Optional<int> tryExpandAsInteger(StringRef Macro, return IntValue.getSExtValue(); } +OperatorKind operationKindFromOverloadedOperator(OverloadedOperatorKind OOK, + bool IsBinary) { + llvm::StringMap<BinaryOperatorKind> BinOps{ +#define BINARY_OPERATION(Name, Spelling) {Spelling, BO_##Name}, +#include "clang/AST/OperationKinds.def" + }; + llvm::StringMap<UnaryOperatorKind> UnOps{ +#define UNARY_OPERATION(Name, Spelling) {Spelling, UO_##Name}, +#include "clang/AST/OperationKinds.def" + }; + + switch (OOK) { +#define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly) \ + case OO_##Name: \ + if (IsBinary) { \ + auto BinOpIt = BinOps.find(Spelling); \ + if (BinOpIt != BinOps.end()) \ + return OperatorKind(BinOpIt->second); \ + else \ + llvm_unreachable("operator was expected to be binary but is not"); \ + } else { \ + auto UnOpIt = UnOps.find(Spelling); \ + if (UnOpIt != UnOps.end()) \ + return OperatorKind(UnOpIt->second); \ + else \ + llvm_unreachable("operator was expected to be unary but is not"); \ + } \ + break; +#include "clang/Basic/OperatorKinds.def" + default: + llvm_unreachable("unexpected operator kind"); + } +} + } // namespace ento } // namespace clang diff --git a/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp b/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp index 86cecf6524f0..e09399a83589 100644 --- a/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp @@ -33,21 +33,20 @@ using namespace clang; using namespace ento; bool CheckerManager::hasPathSensitiveCheckers() const { - return !StmtCheckers.empty() || - !PreObjCMessageCheckers.empty() || - !PostObjCMessageCheckers.empty() || - !PreCallCheckers.empty() || - !PostCallCheckers.empty() || - !LocationCheckers.empty() || - !BindCheckers.empty() || - !EndAnalysisCheckers.empty() || - !EndFunctionCheckers.empty() || - !BranchConditionCheckers.empty() || - !LiveSymbolsCheckers.empty() || - !DeadSymbolsCheckers.empty() || - !RegionChangesCheckers.empty() || - !EvalAssumeCheckers.empty() || - !EvalCallCheckers.empty(); + const auto IfAnyAreNonEmpty = [](const auto &... Callbacks) -> bool { + bool Result = false; + // FIXME: Use fold expressions in C++17. + LLVM_ATTRIBUTE_UNUSED int Unused[]{0, (Result |= !Callbacks.empty())...}; + return Result; + }; + return IfAnyAreNonEmpty( + StmtCheckers, PreObjCMessageCheckers, ObjCMessageNilCheckers, + PostObjCMessageCheckers, PreCallCheckers, PostCallCheckers, + LocationCheckers, BindCheckers, EndAnalysisCheckers, + BeginFunctionCheckers, EndFunctionCheckers, BranchConditionCheckers, + NewAllocatorCheckers, LiveSymbolsCheckers, DeadSymbolsCheckers, + RegionChangesCheckers, PointerEscapeCheckers, EvalAssumeCheckers, + EvalCallCheckers, EndOfTranslationUnitCheckers); } void CheckerManager::finishedCheckerRegistration() { diff --git a/clang/lib/StaticAnalyzer/Core/CommonBugCategories.cpp b/clang/lib/StaticAnalyzer/Core/CommonBugCategories.cpp index a601370775b4..d12c35ef156a 100644 --- a/clang/lib/StaticAnalyzer/Core/CommonBugCategories.cpp +++ b/clang/lib/StaticAnalyzer/Core/CommonBugCategories.cpp @@ -20,7 +20,9 @@ const char *const MemoryRefCount = const char *const MemoryError = "Memory error"; const char *const UnixAPI = "Unix API"; const char *const CXXObjectLifecycle = "C++ object lifecycle"; +const char *const CXXMoveSemantics = "C++ move semantics"; const char *const SecurityError = "Security error"; +const char *const UnusedCode = "Unused code"; } // namespace categories } // namespace ento } // namespace clang diff --git a/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp b/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp index 70deb13a8e1a..bc939d252800 100644 --- a/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp @@ -219,13 +219,14 @@ void CoreEngine::HandleBlockEdge(const BlockEdge &L, ExplodedNode *Pred) { // and we're taking the path that skips virtual base constructors. if (L.getSrc()->getTerminator().isVirtualBaseBranch() && L.getDst() == *L.getSrc()->succ_begin()) { - ProgramPoint P = L.withTag(getNoteTags().makeNoteTag( + ProgramPoint P = L.withTag(getDataTags().make<NoteTag>( [](BugReporterContext &, PathSensitiveBugReport &) -> std::string { // TODO: Just call out the name of the most derived class // when we know it. return "Virtual base initialization skipped because " "it has already been handled by the most derived class"; - }, /*IsPrunable=*/true)); + }, + /*IsPrunable=*/true)); // Perform the transition. ExplodedNodeSet Dst; NodeBuilder Bldr(Pred, Dst, BuilderCtx); @@ -349,6 +350,7 @@ void CoreEngine::HandleBlockExit(const CFGBlock * B, ExplodedNode *Pred) { HandleBranch(cast<ForStmt>(Term)->getCond(), Term, B, Pred); return; + case Stmt::SEHLeaveStmtClass: case Stmt::ContinueStmtClass: case Stmt::BreakStmtClass: case Stmt::GotoStmtClass: diff --git a/clang/lib/StaticAnalyzer/Core/DynamicSize.cpp b/clang/lib/StaticAnalyzer/Core/DynamicExtent.cpp index 8b2172db445c..db9698b4086e 100644 --- a/clang/lib/StaticAnalyzer/Core/DynamicSize.cpp +++ b/clang/lib/StaticAnalyzer/Core/DynamicExtent.cpp @@ -1,4 +1,4 @@ -//===- DynamicSize.cpp - Dynamic size related APIs --------------*- C++ -*-===// +//===- DynamicExtent.cpp - Dynamic extent related APIs ----------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,11 +6,11 @@ // //===----------------------------------------------------------------------===// // -// This file defines APIs that track and query dynamic size information. +// This file defines APIs that track and query dynamic extent information. // //===----------------------------------------------------------------------===// -#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicSize.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" #include "clang/AST/Expr.h" #include "clang/Basic/LLVM.h" #include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h" @@ -19,32 +19,43 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h" #include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h" +REGISTER_MAP_WITH_PROGRAMSTATE(DynamicExtentMap, const clang::ento::MemRegion *, + clang::ento::DefinedOrUnknownSVal) + namespace clang { namespace ento { -DefinedOrUnknownSVal getDynamicSize(ProgramStateRef State, const MemRegion *MR, - SValBuilder &SVB) { +DefinedOrUnknownSVal getDynamicExtent(ProgramStateRef State, + const MemRegion *MR, SValBuilder &SVB) { + MR = MR->StripCasts(); + + if (const DefinedOrUnknownSVal *Size = State->get<DynamicExtentMap>(MR)) + return *Size; + return MR->getMemRegionManager().getStaticSize(MR, SVB); } +DefinedOrUnknownSVal getElementExtent(QualType Ty, SValBuilder &SVB) { + return SVB.makeIntVal(SVB.getContext().getTypeSizeInChars(Ty).getQuantity(), + SVB.getArrayIndexType()); +} + DefinedOrUnknownSVal getDynamicElementCount(ProgramStateRef State, const MemRegion *MR, SValBuilder &SVB, QualType ElementTy) { - MemRegionManager &MemMgr = MR->getMemRegionManager(); - ASTContext &Ctx = MemMgr.getContext(); + MR = MR->StripCasts(); - DefinedOrUnknownSVal Size = getDynamicSize(State, MR, SVB); - SVal ElementSizeV = SVB.makeIntVal( - Ctx.getTypeSizeInChars(ElementTy).getQuantity(), SVB.getArrayIndexType()); + DefinedOrUnknownSVal Size = getDynamicExtent(State, MR, SVB); + SVal ElementSize = getElementExtent(ElementTy, SVB); - SVal DivisionV = - SVB.evalBinOp(State, BO_Div, Size, ElementSizeV, SVB.getArrayIndexType()); + SVal ElementCount = + SVB.evalBinOp(State, BO_Div, Size, ElementSize, SVB.getArrayIndexType()); - return DivisionV.castAs<DefinedOrUnknownSVal>(); + return ElementCount.castAs<DefinedOrUnknownSVal>(); } -SVal getDynamicSizeWithOffset(ProgramStateRef State, const SVal &BufV) { +SVal getDynamicExtentWithOffset(ProgramStateRef State, SVal BufV) { SValBuilder &SvalBuilder = State->getStateManager().getSValBuilder(); const MemRegion *MRegion = BufV.getAsRegion(); if (!MRegion) @@ -60,12 +71,22 @@ SVal getDynamicSizeWithOffset(ProgramStateRef State, const SVal &BufV) { Offset.getOffset() / MRegion->getMemRegionManager().getContext().getCharWidth()); DefinedOrUnknownSVal ExtentInBytes = - getDynamicSize(State, BaseRegion, SvalBuilder); + getDynamicExtent(State, BaseRegion, SvalBuilder); return SvalBuilder.evalBinOp(State, BinaryOperator::Opcode::BO_Sub, ExtentInBytes, OffsetInBytes, SvalBuilder.getArrayIndexType()); } +ProgramStateRef setDynamicExtent(ProgramStateRef State, const MemRegion *MR, + DefinedOrUnknownSVal Size, SValBuilder &SVB) { + MR = MR->StripCasts(); + + if (Size.isUnknown()) + return State; + + return State->set<DynamicExtentMap>(MR->StripCasts(), Size); +} + } // namespace ento } // namespace clang diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index f285b652c175..66332d3b848c 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -1238,6 +1238,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred, case Stmt::SEHExceptStmtClass: case Stmt::SEHLeaveStmtClass: case Stmt::SEHFinallyStmtClass: + case Stmt::OMPCanonicalLoopClass: case Stmt::OMPParallelDirectiveClass: case Stmt::OMPSimdDirectiveClass: case Stmt::OMPForDirectiveClass: @@ -1292,7 +1293,12 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred, case Stmt::OMPTargetTeamsDistributeParallelForDirectiveClass: case Stmt::OMPTargetTeamsDistributeParallelForSimdDirectiveClass: case Stmt::OMPTargetTeamsDistributeSimdDirectiveClass: - case Stmt::CapturedStmtClass: { + case Stmt::OMPTileDirectiveClass: + case Stmt::OMPInteropDirectiveClass: + case Stmt::OMPDispatchDirectiveClass: + case Stmt::OMPMaskedDirectiveClass: + case Stmt::CapturedStmtClass: + case Stmt::OMPUnrollDirectiveClass: { const ExplodedNode *node = Bldr.generateSink(S, Pred, Pred->getState()); Engine.addAbortedBlock(node, currBldrCtx->getBlock()); break; @@ -1414,6 +1420,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred, case Stmt::OMPArraySectionExprClass: case Stmt::OMPArrayShapingExprClass: case Stmt::OMPIteratorExprClass: + case Stmt::SYCLUniqueStableNameExprClass: case Stmt::TypeTraitExprClass: { Bldr.takeNodes(Pred); ExplodedNodeSet preVisit; @@ -3137,8 +3144,8 @@ struct DOTGraphTraits<ExplodedGraph*> : public DefaultDOTGraphTraits { /// \p PreCallback: callback before break. /// \p PostCallback: callback after break. - /// \p Stop: stop iteration if returns {@code true} - /// \return Whether {@code Stop} ever returned {@code true}. + /// \p Stop: stop iteration if returns @c true + /// \return Whether @c Stop ever returned @c true. static bool traverseHiddenNodes( const ExplodedNode *N, llvm::function_ref<void(const ExplodedNode *)> PreCallback, diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp index 18d1b2169eed..7ad3dca831ac 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp @@ -282,29 +282,14 @@ ProgramStateRef ExprEngine::handleLValueBitCast( return state; } -ProgramStateRef ExprEngine::handleLVectorSplat( - ProgramStateRef state, const LocationContext* LCtx, const CastExpr* CastE, - StmtNodeBuilder &Bldr, ExplodedNode* Pred) { - // Recover some path sensitivity by conjuring a new value. - QualType resultType = CastE->getType(); - if (CastE->isGLValue()) - resultType = getContext().getPointerType(resultType); - SVal result = svalBuilder.conjureSymbolVal(nullptr, CastE, LCtx, - resultType, - currBldrCtx->blockCount()); - state = state->BindExpr(CastE, LCtx, result); - Bldr.generateNode(CastE, Pred, state); - - return state; -} - void ExprEngine::VisitCast(const CastExpr *CastE, const Expr *Ex, ExplodedNode *Pred, ExplodedNodeSet &Dst) { ExplodedNodeSet dstPreStmt; getCheckerManager().runCheckersForPreStmt(dstPreStmt, Pred, CastE, *this); - if (CastE->getCastKind() == CK_LValueToRValue) { + if (CastE->getCastKind() == CK_LValueToRValue || + CastE->getCastKind() == CK_LValueToRValueBitCast) { for (ExplodedNodeSet::iterator I = dstPreStmt.begin(), E = dstPreStmt.end(); I!=E; ++I) { ExplodedNode *subExprNode = *I; @@ -332,6 +317,7 @@ void ExprEngine::VisitCast(const CastExpr *CastE, const Expr *Ex, switch (CastE->getCastKind()) { case CK_LValueToRValue: + case CK_LValueToRValueBitCast: llvm_unreachable("LValueToRValue casts handled earlier."); case CK_ToVoid: continue; @@ -380,7 +366,6 @@ void ExprEngine::VisitCast(const CastExpr *CastE, const Expr *Ex, case CK_Dependent: case CK_ArrayToPointerDecay: case CK_BitCast: - case CK_LValueToRValueBitCast: case CK_AddressSpaceConversion: case CK_BooleanToSignedIntegral: case CK_IntegralToPointer: @@ -526,22 +511,28 @@ void ExprEngine::VisitCast(const CastExpr *CastE, const Expr *Ex, case CK_ReinterpretMemberPointer: { SVal V = state->getSVal(Ex, LCtx); if (auto PTMSV = V.getAs<nonloc::PointerToMember>()) { - SVal CastedPTMSV = svalBuilder.makePointerToMember( - getBasicVals().accumCXXBase( - llvm::make_range<CastExpr::path_const_iterator>( - CastE->path_begin(), CastE->path_end()), *PTMSV)); + SVal CastedPTMSV = + svalBuilder.makePointerToMember(getBasicVals().accumCXXBase( + CastE->path(), *PTMSV, CastE->getCastKind())); state = state->BindExpr(CastE, LCtx, CastedPTMSV); Bldr.generateNode(CastE, Pred, state); continue; } // Explicitly proceed with default handler for this case cascade. - state = handleLVectorSplat(state, LCtx, CastE, Bldr, Pred); - continue; } + LLVM_FALLTHROUGH; // Various C++ casts that are not handled yet. case CK_ToUnion: + case CK_MatrixCast: case CK_VectorSplat: { - state = handleLVectorSplat(state, LCtx, CastE, Bldr, Pred); + QualType resultType = CastE->getType(); + if (CastE->isGLValue()) + resultType = getContext().getPointerType(resultType); + SVal result = svalBuilder.conjureSymbolVal( + /*symbolTag=*/nullptr, CastE, LCtx, resultType, + currBldrCtx->blockCount()); + state = state->BindExpr(CastE, LCtx, result); + Bldr.generateNode(CastE, Pred, state); continue; } } diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp index 996d3644e018..e6918e071a4f 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp @@ -18,6 +18,7 @@ #include "clang/Analysis/ConstructionContext.h" #include "clang/StaticAnalyzer/Core/CheckerManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" @@ -689,16 +690,30 @@ ProgramStateRef ExprEngine::bindReturnValue(const CallEvent &Call, // See if we need to conjure a heap pointer instead of // a regular unknown pointer. - bool IsHeapPointer = false; - if (const auto *CNE = dyn_cast<CXXNewExpr>(E)) - if (CNE->getOperatorNew()->isReplaceableGlobalAllocationFunction()) { - // FIXME: Delegate this to evalCall in MallocChecker? - IsHeapPointer = true; + const auto *CNE = dyn_cast<CXXNewExpr>(E); + if (CNE && CNE->getOperatorNew()->isReplaceableGlobalAllocationFunction()) { + R = svalBuilder.getConjuredHeapSymbolVal(E, LCtx, Count); + const MemRegion *MR = R.getAsRegion()->StripCasts(); + + // Store the extent of the allocated object(s). + SVal ElementCount; + if (const Expr *SizeExpr = CNE->getArraySize().getValueOr(nullptr)) { + ElementCount = State->getSVal(SizeExpr, LCtx); + } else { + ElementCount = svalBuilder.makeIntVal(1, /*IsUnsigned=*/true); } - R = IsHeapPointer ? svalBuilder.getConjuredHeapSymbolVal(E, LCtx, Count) - : svalBuilder.conjureSymbolVal(nullptr, E, LCtx, ResultTy, - Count); + SVal ElementSize = getElementExtent(CNE->getAllocatedType(), svalBuilder); + + SVal Size = + svalBuilder.evalBinOp(State, BO_Mul, ElementCount, ElementSize, + svalBuilder.getArrayIndexType()); + + State = setDynamicExtent(State, MR, Size.castAs<DefinedOrUnknownSVal>(), + svalBuilder); + } else { + R = svalBuilder.conjureSymbolVal(nullptr, E, LCtx, ResultTy, Count); + } } return State->BindExpr(E, LCtx, R); } diff --git a/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp b/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp index 149459cf986a..64fc32ea7554 100644 --- a/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp +++ b/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp @@ -10,11 +10,12 @@ // //===----------------------------------------------------------------------===// -#include "clang/Analysis/IssueHash.h" -#include "clang/Analysis/PathDiagnostic.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclBase.h" #include "clang/AST/Stmt.h" +#include "clang/Analysis/IssueHash.h" +#include "clang/Analysis/MacroExpansionContext.h" +#include "clang/Analysis/PathDiagnostic.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/SourceLocation.h" @@ -135,14 +136,16 @@ private: void ento::createHTMLDiagnosticConsumer( PathDiagnosticConsumerOptions DiagOpts, PathDiagnosticConsumers &C, const std::string &OutputDir, const Preprocessor &PP, - const cross_tu::CrossTranslationUnitContext &CTU) { + const cross_tu::CrossTranslationUnitContext &CTU, + const MacroExpansionContext &MacroExpansions) { // FIXME: HTML is currently our default output type, but if the output // directory isn't specified, it acts like if it was in the minimal text // output mode. This doesn't make much sense, we should have the minimal text // as our default. In the case of backward compatibility concerns, this could // be preserved with -analyzer-config-compatibility-mode=true. - createTextMinimalPathDiagnosticConsumer(DiagOpts, C, OutputDir, PP, CTU); + createTextMinimalPathDiagnosticConsumer(DiagOpts, C, OutputDir, PP, CTU, + MacroExpansions); // TODO: Emit an error here. if (OutputDir.empty()) @@ -154,8 +157,10 @@ void ento::createHTMLDiagnosticConsumer( void ento::createHTMLSingleFileDiagnosticConsumer( PathDiagnosticConsumerOptions DiagOpts, PathDiagnosticConsumers &C, const std::string &OutputDir, const Preprocessor &PP, - const cross_tu::CrossTranslationUnitContext &CTU) { - createTextMinimalPathDiagnosticConsumer(DiagOpts, C, OutputDir, PP, CTU); + const cross_tu::CrossTranslationUnitContext &CTU, + const clang::MacroExpansionContext &MacroExpansions) { + createTextMinimalPathDiagnosticConsumer(DiagOpts, C, OutputDir, PP, CTU, + MacroExpansions); // TODO: Emit an error here. if (OutputDir.empty()) @@ -167,13 +172,29 @@ void ento::createHTMLSingleFileDiagnosticConsumer( void ento::createPlistHTMLDiagnosticConsumer( PathDiagnosticConsumerOptions DiagOpts, PathDiagnosticConsumers &C, const std::string &prefix, const Preprocessor &PP, - const cross_tu::CrossTranslationUnitContext &CTU) { + const cross_tu::CrossTranslationUnitContext &CTU, + const MacroExpansionContext &MacroExpansions) { createHTMLDiagnosticConsumer( - DiagOpts, C, std::string(llvm::sys::path::parent_path(prefix)), PP, - CTU); - createPlistMultiFileDiagnosticConsumer(DiagOpts, C, prefix, PP, CTU); + DiagOpts, C, std::string(llvm::sys::path::parent_path(prefix)), PP, CTU, + MacroExpansions); + createPlistMultiFileDiagnosticConsumer(DiagOpts, C, prefix, PP, CTU, + MacroExpansions); createTextMinimalPathDiagnosticConsumer(std::move(DiagOpts), C, prefix, PP, - CTU); + CTU, MacroExpansions); +} + +void ento::createSarifHTMLDiagnosticConsumer( + PathDiagnosticConsumerOptions DiagOpts, PathDiagnosticConsumers &C, + const std::string &sarif_file, const Preprocessor &PP, + const cross_tu::CrossTranslationUnitContext &CTU, + const MacroExpansionContext &MacroExpansions) { + createHTMLDiagnosticConsumer( + DiagOpts, C, std::string(llvm::sys::path::parent_path(sarif_file)), PP, + CTU, MacroExpansions); + createSarifDiagnosticConsumer(DiagOpts, C, sarif_file, PP, CTU, + MacroExpansions); + createTextMinimalPathDiagnosticConsumer(std::move(DiagOpts), C, sarif_file, + PP, CTU, MacroExpansions); } //===----------------------------------------------------------------------===// @@ -254,11 +275,11 @@ void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D, << "' absolute: " << EC.message() << '\n'; return; } - if (std::error_code EC = - llvm::sys::fs::createUniqueFile(Model, FD, ResultPath)) { - llvm::errs() << "warning: could not create file in '" << Directory - << "': " << EC.message() << '\n'; - return; + if (std::error_code EC = llvm::sys::fs::createUniqueFile( + Model, FD, ResultPath, llvm::sys::fs::OF_Text)) { + llvm::errs() << "warning: could not create file in '" << Directory + << "': " << EC.message() << '\n'; + return; } } else { int i = 1; diff --git a/clang/lib/StaticAnalyzer/Core/LoopUnrolling.cpp b/clang/lib/StaticAnalyzer/Core/LoopUnrolling.cpp index dc268e562237..e5f4e9ea30c9 100644 --- a/clang/lib/StaticAnalyzer/Core/LoopUnrolling.cpp +++ b/clang/lib/StaticAnalyzer/Core/LoopUnrolling.cpp @@ -79,14 +79,17 @@ ProgramStateRef processLoopEnd(const Stmt *LoopStmt, ProgramStateRef State) { return State; } -static internal::Matcher<Stmt> simpleCondition(StringRef BindName) { - return binaryOperator(anyOf(hasOperatorName("<"), hasOperatorName(">"), - hasOperatorName("<="), hasOperatorName(">="), - hasOperatorName("!=")), - hasEitherOperand(ignoringParenImpCasts(declRefExpr( - to(varDecl(hasType(isInteger())).bind(BindName))))), - hasEitherOperand(ignoringParenImpCasts( - integerLiteral().bind("boundNum")))) +static internal::Matcher<Stmt> simpleCondition(StringRef BindName, + StringRef RefName) { + return binaryOperator( + anyOf(hasOperatorName("<"), hasOperatorName(">"), + hasOperatorName("<="), hasOperatorName(">="), + hasOperatorName("!=")), + hasEitherOperand(ignoringParenImpCasts( + declRefExpr(to(varDecl(hasType(isInteger())).bind(BindName))) + .bind(RefName))), + hasEitherOperand( + ignoringParenImpCasts(integerLiteral().bind("boundNum")))) .bind("conditionOperator"); } @@ -138,7 +141,7 @@ static internal::Matcher<Stmt> hasSuspiciousStmt(StringRef NodeName) { static internal::Matcher<Stmt> forLoopMatcher() { return forStmt( - hasCondition(simpleCondition("initVarName")), + hasCondition(simpleCondition("initVarName", "initVarRef")), // Initialization should match the form: 'int i = 6' or 'i = 42'. hasLoopInit( anyOf(declStmt(hasSingleDecl( @@ -156,17 +159,52 @@ static internal::Matcher<Stmt> forLoopMatcher() { hasUnaryOperand(declRefExpr( to(varDecl(allOf(equalsBoundNode("initVarName"), hasType(isInteger())))))))), - unless(hasBody(hasSuspiciousStmt("initVarName")))).bind("forLoop"); + unless(hasBody(hasSuspiciousStmt("initVarName")))) + .bind("forLoop"); } -static bool isPossiblyEscaped(const VarDecl *VD, ExplodedNode *N) { - // Global variables assumed as escaped variables. +static bool isCapturedByReference(ExplodedNode *N, const DeclRefExpr *DR) { + + // Get the lambda CXXRecordDecl + assert(DR->refersToEnclosingVariableOrCapture()); + const LocationContext *LocCtxt = N->getLocationContext(); + const Decl *D = LocCtxt->getDecl(); + const auto *MD = cast<CXXMethodDecl>(D); + assert(MD && MD->getParent()->isLambda() && + "Captured variable should only be seen while evaluating a lambda"); + const CXXRecordDecl *LambdaCXXRec = MD->getParent(); + + // Lookup the fields of the lambda + llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; + FieldDecl *LambdaThisCaptureField; + LambdaCXXRec->getCaptureFields(LambdaCaptureFields, LambdaThisCaptureField); + + // Check if the counter is captured by reference + const VarDecl *VD = cast<VarDecl>(DR->getDecl()->getCanonicalDecl()); + assert(VD); + const FieldDecl *FD = LambdaCaptureFields[VD]; + assert(FD && "Captured variable without a corresponding field"); + return FD->getType()->isReferenceType(); +} + +// A loop counter is considered escaped if: +// case 1: It is a global variable. +// case 2: It is a reference parameter or a reference capture. +// case 3: It is assigned to a non-const reference variable or parameter. +// case 4: Has its address taken. +static bool isPossiblyEscaped(ExplodedNode *N, const DeclRefExpr *DR) { + const VarDecl *VD = cast<VarDecl>(DR->getDecl()->getCanonicalDecl()); + assert(VD); + // Case 1: if (VD->hasGlobalStorage()) return true; - const bool isParm = isa<ParmVarDecl>(VD); - // Reference parameters are assumed as escaped variables. - if (isParm && VD->getType()->isReferenceType()) + const bool IsRefParamOrCapture = + isa<ParmVarDecl>(VD) || DR->refersToEnclosingVariableOrCapture(); + // Case 2: + if ((DR->refersToEnclosingVariableOrCapture() && + isCapturedByReference(N, DR)) || + (IsRefParamOrCapture && VD->getType()->isReferenceType())) return true; while (!N->pred_empty()) { @@ -189,6 +227,7 @@ static bool isPossiblyEscaped(const VarDecl *VD, ExplodedNode *N) { // on VD and reference initialized by VD. ASTContext &ASTCtx = N->getLocationContext()->getAnalysisDeclContext()->getASTContext(); + // Case 3 and 4: auto Match = match(stmt(anyOf(callByRef(equalsNode(VD)), getAddrTo(equalsNode(VD)), assignedToRef(equalsNode(VD)))), @@ -199,8 +238,8 @@ static bool isPossiblyEscaped(const VarDecl *VD, ExplodedNode *N) { N = N->getFirstPred(); } - // Parameter declaration will not be found. - if (isParm) + // Reference parameter and reference capture will not be found. + if (IsRefParamOrCapture) return false; llvm_unreachable("Reached root without finding the declaration of VD"); @@ -218,7 +257,7 @@ bool shouldCompletelyUnroll(const Stmt *LoopStmt, ASTContext &ASTCtx, if (Matches.empty()) return false; - auto CounterVar = Matches[0].getNodeAs<VarDecl>("initVarName"); + const auto *CounterVarRef = Matches[0].getNodeAs<DeclRefExpr>("initVarRef"); llvm::APInt BoundNum = Matches[0].getNodeAs<IntegerLiteral>("boundNum")->getValue(); llvm::APInt InitNum = @@ -235,7 +274,7 @@ bool shouldCompletelyUnroll(const Stmt *LoopStmt, ASTContext &ASTCtx, maxStep = (BoundNum - InitNum).abs().getZExtValue(); // Check if the counter of the loop is not escaped before. - return !isPossiblyEscaped(CounterVar->getCanonicalDecl(), Pred); + return !isPossiblyEscaped(Pred, CounterVarRef); } bool madeNewBranch(ExplodedNode *N, const Stmt *LoopStmt) { diff --git a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp index 455adf53ac99..bd725ee9eaa3 100644 --- a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp +++ b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp @@ -28,6 +28,7 @@ #include "clang/Basic/IdentifierTable.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/SourceManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h" #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h" #include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h" @@ -729,13 +730,6 @@ SourceRange MemRegion::sourceRange() const { // MemRegionManager methods. //===----------------------------------------------------------------------===// -static DefinedOrUnknownSVal getTypeSize(QualType Ty, ASTContext &Ctx, - SValBuilder &SVB) { - CharUnits Size = Ctx.getTypeSizeInChars(Ty); - QualType SizeTy = SVB.getArrayIndexType(); - return SVB.makeIntVal(Size.getQuantity(), SizeTy); -} - DefinedOrUnknownSVal MemRegionManager::getStaticSize(const MemRegion *MR, SValBuilder &SVB) const { const auto *SR = cast<SubRegion>(MR); @@ -766,7 +760,7 @@ DefinedOrUnknownSVal MemRegionManager::getStaticSize(const MemRegion *MR, if (Ty->isIncompleteType()) return UnknownVal(); - return getTypeSize(Ty, Ctx, SVB); + return getElementExtent(Ty, SVB); } case MemRegion::FieldRegionKind: { // Force callers to deal with bitfields explicitly. @@ -774,7 +768,7 @@ DefinedOrUnknownSVal MemRegionManager::getStaticSize(const MemRegion *MR, return UnknownVal(); QualType Ty = cast<TypedValueRegion>(SR)->getDesugaredValueType(Ctx); - DefinedOrUnknownSVal Size = getTypeSize(Ty, Ctx, SVB); + DefinedOrUnknownSVal Size = getElementExtent(Ty, SVB); // A zero-length array at the end of a struct often stands for dynamically // allocated extra memory. diff --git a/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp b/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp index 35e320c7755f..92104d628711 100644 --- a/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp +++ b/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "clang/Analysis/IssueHash.h" +#include "clang/Analysis/MacroExpansionContext.h" #include "clang/Analysis/PathDiagnostic.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/PlistSupport.h" @@ -43,6 +44,7 @@ namespace { const std::string OutputFile; const Preprocessor &PP; const cross_tu::CrossTranslationUnitContext &CTU; + const MacroExpansionContext &MacroExpansions; const bool SupportsCrossFileDiagnostics; void printBugPath(llvm::raw_ostream &o, const FIDMap &FM, @@ -52,6 +54,7 @@ namespace { PlistDiagnostics(PathDiagnosticConsumerOptions DiagOpts, const std::string &OutputFile, const Preprocessor &PP, const cross_tu::CrossTranslationUnitContext &CTU, + const MacroExpansionContext &MacroExpansions, bool supportsMultipleFiles); ~PlistDiagnostics() override {} @@ -80,14 +83,14 @@ class PlistPrinter { const FIDMap& FM; const Preprocessor &PP; const cross_tu::CrossTranslationUnitContext &CTU; + const MacroExpansionContext &MacroExpansions; llvm::SmallVector<const PathDiagnosticMacroPiece *, 0> MacroPieces; public: - PlistPrinter(const FIDMap& FM, - const Preprocessor &PP, - const cross_tu::CrossTranslationUnitContext &CTU) - : FM(FM), PP(PP), CTU(CTU) { - } + PlistPrinter(const FIDMap &FM, const Preprocessor &PP, + const cross_tu::CrossTranslationUnitContext &CTU, + const MacroExpansionContext &MacroExpansions) + : FM(FM), PP(PP), CTU(CTU), MacroExpansions(MacroExpansions) {} void ReportDiag(raw_ostream &o, const PathDiagnosticPiece& P) { ReportPiece(o, P, /*indent*/ 4, /*depth*/ 0, /*includeControlFlow*/ true); @@ -154,28 +157,17 @@ private: } // end of anonymous namespace -namespace { - -struct ExpansionInfo { - std::string MacroName; - std::string Expansion; - ExpansionInfo(std::string N, std::string E) - : MacroName(std::move(N)), Expansion(std::move(E)) {} -}; - -} // end of anonymous namespace - -/// Print coverage information to output stream {@code o}. -/// May modify the used list of files {@code Fids} by inserting new ones. +/// Print coverage information to output stream @c o. +/// May modify the used list of files @c Fids by inserting new ones. static void printCoverage(const PathDiagnostic *D, unsigned InputIndentLevel, SmallVectorImpl<FileID> &Fids, FIDMap &FM, llvm::raw_fd_ostream &o); -static ExpansionInfo -getExpandedMacro(SourceLocation MacroLoc, const Preprocessor &PP, - const cross_tu::CrossTranslationUnitContext &CTU); +static Optional<StringRef> getExpandedMacro( + SourceLocation MacroLoc, const cross_tu::CrossTranslationUnitContext &CTU, + const MacroExpansionContext &MacroExpansions, const SourceManager &SM); //===----------------------------------------------------------------------===// // Methods of PlistPrinter. @@ -388,7 +380,17 @@ void PlistPrinter::ReportMacroExpansions(raw_ostream &o, unsigned indent) { for (const PathDiagnosticMacroPiece *P : MacroPieces) { const SourceManager &SM = PP.getSourceManager(); - ExpansionInfo EI = getExpandedMacro(P->getLocation().asLocation(), PP, CTU); + + SourceLocation MacroExpansionLoc = + P->getLocation().asLocation().getExpansionLoc(); + + const Optional<StringRef> MacroName = + MacroExpansions.getOriginalText(MacroExpansionLoc); + const Optional<StringRef> ExpansionText = + getExpandedMacro(MacroExpansionLoc, CTU, MacroExpansions, SM); + + if (!MacroName.hasValue() || !ExpansionText.hasValue()) + continue; Indent(o, indent) << "<dict>\n"; ++indent; @@ -405,11 +407,11 @@ void PlistPrinter::ReportMacroExpansions(raw_ostream &o, unsigned indent) { // Output the macro name. Indent(o, indent) << "<key>name</key>"; - EmitString(o, EI.MacroName) << '\n'; + EmitString(o, MacroName.getValue()) << '\n'; // Output what it expands into. Indent(o, indent) << "<key>expansion</key>"; - EmitString(o, EI.Expansion) << '\n'; + EmitString(o, ExpansionText.getValue()) << '\n'; // Finish up. --indent; @@ -482,8 +484,8 @@ void PlistPrinter::ReportPopUp(raw_ostream &o, // Static function definitions. //===----------------------------------------------------------------------===// -/// Print coverage information to output stream {@code o}. -/// May modify the used list of files {@code Fids} by inserting new ones. +/// Print coverage information to output stream @c o. +/// May modify the used list of files @c Fids by inserting new ones. static void printCoverage(const PathDiagnostic *D, unsigned InputIndentLevel, SmallVectorImpl<FileID> &Fids, @@ -522,8 +524,9 @@ static void printCoverage(const PathDiagnostic *D, PlistDiagnostics::PlistDiagnostics( PathDiagnosticConsumerOptions DiagOpts, const std::string &output, const Preprocessor &PP, const cross_tu::CrossTranslationUnitContext &CTU, - bool supportsMultipleFiles) + const MacroExpansionContext &MacroExpansions, bool supportsMultipleFiles) : DiagOpts(std::move(DiagOpts)), OutputFile(output), PP(PP), CTU(CTU), + MacroExpansions(MacroExpansions), SupportsCrossFileDiagnostics(supportsMultipleFiles) { // FIXME: Will be used by a later planned change. (void)this->CTU; @@ -532,36 +535,40 @@ PlistDiagnostics::PlistDiagnostics( void ento::createPlistDiagnosticConsumer( PathDiagnosticConsumerOptions DiagOpts, PathDiagnosticConsumers &C, const std::string &OutputFile, const Preprocessor &PP, - const cross_tu::CrossTranslationUnitContext &CTU) { + const cross_tu::CrossTranslationUnitContext &CTU, + const MacroExpansionContext &MacroExpansions) { // TODO: Emit an error here. if (OutputFile.empty()) return; C.push_back(new PlistDiagnostics(DiagOpts, OutputFile, PP, CTU, + MacroExpansions, /*supportsMultipleFiles=*/false)); createTextMinimalPathDiagnosticConsumer(std::move(DiagOpts), C, OutputFile, - PP, CTU); + PP, CTU, MacroExpansions); } void ento::createPlistMultiFileDiagnosticConsumer( PathDiagnosticConsumerOptions DiagOpts, PathDiagnosticConsumers &C, const std::string &OutputFile, const Preprocessor &PP, - const cross_tu::CrossTranslationUnitContext &CTU) { + const cross_tu::CrossTranslationUnitContext &CTU, + const MacroExpansionContext &MacroExpansions) { // TODO: Emit an error here. if (OutputFile.empty()) return; C.push_back(new PlistDiagnostics(DiagOpts, OutputFile, PP, CTU, + MacroExpansions, /*supportsMultipleFiles=*/true)); createTextMinimalPathDiagnosticConsumer(std::move(DiagOpts), C, OutputFile, - PP, CTU); + PP, CTU, MacroExpansions); } void PlistDiagnostics::printBugPath(llvm::raw_ostream &o, const FIDMap &FM, const PathPieces &Path) { - PlistPrinter Printer(FM, PP, CTU); + PlistPrinter Printer(FM, PP, CTU, MacroExpansions); assert(std::is_partitioned(Path.begin(), Path.end(), [](const PathDiagnosticPieceRef &E) { return E->getKind() == PathDiagnosticPiece::Note; @@ -653,7 +660,7 @@ void PlistDiagnostics::FlushDiagnosticsImpl( // Open the file. std::error_code EC; - llvm::raw_fd_ostream o(OutputFile, EC, llvm::sys::fs::OF_Text); + llvm::raw_fd_ostream o(OutputFile, EC, llvm::sys::fs::OF_TextWithCRLF); if (EC) { llvm::errs() << "warning: could not create file: " << EC.message() << '\n'; return; @@ -815,570 +822,17 @@ void PlistDiagnostics::FlushDiagnosticsImpl( } //===----------------------------------------------------------------------===// -// Declarations of helper functions and data structures for expanding macros. -//===----------------------------------------------------------------------===// - -namespace { - -using ArgTokensTy = llvm::SmallVector<Token, 2>; - -} // end of anonymous namespace - -LLVM_DUMP_METHOD static void dumpArgTokensToStream(llvm::raw_ostream &Out, - const Preprocessor &PP, - const ArgTokensTy &Toks); - -namespace { -/// Maps unexpanded macro parameters to expanded arguments. A macro argument may -/// need to expanded further when it is nested inside another macro. -class MacroParamMap : public std::map<const IdentifierInfo *, ArgTokensTy> { -public: - void expandFromPrevMacro(const MacroParamMap &Super); - - LLVM_DUMP_METHOD void dump(const Preprocessor &PP) const { - dumpToStream(llvm::errs(), PP); - } - - LLVM_DUMP_METHOD void dumpToStream(llvm::raw_ostream &Out, - const Preprocessor &PP) const; -}; - -struct MacroExpansionInfo { - std::string Name; - const MacroInfo *MI = nullptr; - MacroParamMap ParamMap; - - MacroExpansionInfo(std::string N, const MacroInfo *MI, MacroParamMap M) - : Name(std::move(N)), MI(MI), ParamMap(std::move(M)) {} -}; - -class TokenPrinter { - llvm::raw_ostream &OS; - const Preprocessor &PP; - - Token PrevTok, PrevPrevTok; - TokenConcatenation ConcatInfo; - -public: - TokenPrinter(llvm::raw_ostream &OS, const Preprocessor &PP) - : OS(OS), PP(PP), ConcatInfo(PP) { - PrevTok.setKind(tok::unknown); - PrevPrevTok.setKind(tok::unknown); - } - - void printToken(const Token &Tok); -}; - -/// Wrapper around a Lexer object that can lex tokens one-by-one. Its possible -/// to "inject" a range of tokens into the stream, in which case the next token -/// is retrieved from the next element of the range, until the end of the range -/// is reached. -class TokenStream { -public: - TokenStream(SourceLocation ExpanLoc, const SourceManager &SM, - const LangOptions &LangOpts) - : ExpanLoc(ExpanLoc) { - FileID File; - unsigned Offset; - std::tie(File, Offset) = SM.getDecomposedLoc(ExpanLoc); - llvm::MemoryBufferRef MB = SM.getBufferOrFake(File); - const char *MacroNameTokenPos = MB.getBufferStart() + Offset; - - RawLexer = std::make_unique<Lexer>(SM.getLocForStartOfFile(File), LangOpts, - MB.getBufferStart(), MacroNameTokenPos, - MB.getBufferEnd()); - } - - void next(Token &Result) { - if (CurrTokenIt == TokenRange.end()) { - RawLexer->LexFromRawLexer(Result); - return; - } - Result = *CurrTokenIt; - CurrTokenIt++; - } - - void injectRange(const ArgTokensTy &Range) { - TokenRange = Range; - CurrTokenIt = TokenRange.begin(); - } - - std::unique_ptr<Lexer> RawLexer; - ArgTokensTy TokenRange; - ArgTokensTy::iterator CurrTokenIt = TokenRange.begin(); - SourceLocation ExpanLoc; -}; - -} // end of anonymous namespace - -/// The implementation method of getMacroExpansion: It prints the expansion of -/// a macro to \p Printer, and returns with the name of the macro. -/// -/// Since macros can be nested in one another, this function may call itself -/// recursively. -/// -/// Unfortunately, macro arguments have to expanded manually. To understand why, -/// observe the following example: -/// -/// #define PRINT(x) print(x) -/// #define DO_SOMETHING(str) PRINT(str) -/// -/// DO_SOMETHING("Cute panda cubs."); -/// -/// As we expand the last line, we'll immediately replace PRINT(str) with -/// print(x). The information that both 'str' and 'x' refers to the same string -/// is an information we have to forward, hence the argument \p PrevParamMap. -/// -/// To avoid infinite recursion we maintain the already processed tokens in -/// a set. This is carried as a parameter through the recursive calls. The set -/// is extended with the currently processed token and after processing it, the -/// token is removed. If the token is already in the set, then recursion stops: -/// -/// #define f(y) x -/// #define x f(x) -static std::string getMacroNameAndPrintExpansion( - TokenPrinter &Printer, SourceLocation MacroLoc, const Preprocessor &PP, - const MacroParamMap &PrevParamMap, - llvm::SmallPtrSet<IdentifierInfo *, 8> &AlreadyProcessedTokens); - -/// Retrieves the name of the macro and what it's parameters expand into -/// at \p ExpanLoc. -/// -/// For example, for the following macro expansion: -/// -/// #define SET_TO_NULL(x) x = 0 -/// #define NOT_SUSPICIOUS(a) \ -/// { \ -/// int b = 0; \ -/// } \ -/// SET_TO_NULL(a) -/// -/// int *ptr = new int(4); -/// NOT_SUSPICIOUS(&ptr); -/// *ptr = 5; -/// -/// When \p ExpanLoc references the last line, the macro name "NOT_SUSPICIOUS" -/// and the MacroArgMap map { (a, &ptr) } will be returned. -/// -/// When \p ExpanLoc references "SET_TO_NULL(a)" within the definition of -/// "NOT_SUSPICOUS", the macro name "SET_TO_NULL" and the MacroArgMap map -/// { (x, a) } will be returned. -static MacroExpansionInfo -getMacroExpansionInfo(const MacroParamMap &PrevParamMap, - SourceLocation ExpanLoc, const Preprocessor &PP); - -/// Retrieves the ')' token that matches '(' \p It points to. -static MacroInfo::tokens_iterator getMatchingRParen( - MacroInfo::tokens_iterator It, - MacroInfo::tokens_iterator End); - -/// Retrieves the macro info for \p II refers to at \p Loc. This is important -/// because macros can be redefined or undefined. -static const MacroInfo *getMacroInfoForLocation(const Preprocessor &PP, - const SourceManager &SM, - const IdentifierInfo *II, - SourceLocation Loc); - -//===----------------------------------------------------------------------===// // Definitions of helper functions and methods for expanding macros. //===----------------------------------------------------------------------===// -static ExpansionInfo -getExpandedMacro(SourceLocation MacroLoc, const Preprocessor &PP, - const cross_tu::CrossTranslationUnitContext &CTU) { - - const Preprocessor *PPToUse = &PP; - if (auto LocAndUnit = CTU.getImportedFromSourceLocation(MacroLoc)) { - MacroLoc = LocAndUnit->first; - PPToUse = &LocAndUnit->second->getPreprocessor(); - } - - llvm::SmallString<200> ExpansionBuf; - llvm::raw_svector_ostream OS(ExpansionBuf); - TokenPrinter Printer(OS, *PPToUse); - llvm::SmallPtrSet<IdentifierInfo*, 8> AlreadyProcessedTokens; - - std::string MacroName = getMacroNameAndPrintExpansion( - Printer, MacroLoc, *PPToUse, MacroParamMap{}, AlreadyProcessedTokens); - return {MacroName, std::string(OS.str())}; -} - -static std::string getMacroNameAndPrintExpansion( - TokenPrinter &Printer, SourceLocation MacroLoc, const Preprocessor &PP, - const MacroParamMap &PrevParamMap, - llvm::SmallPtrSet<IdentifierInfo *, 8> &AlreadyProcessedTokens) { - - const SourceManager &SM = PP.getSourceManager(); - - MacroExpansionInfo MExpInfo = - getMacroExpansionInfo(PrevParamMap, SM.getExpansionLoc(MacroLoc), PP); - IdentifierInfo *MacroNameII = PP.getIdentifierInfo(MExpInfo.Name); - - // TODO: If the macro definition contains another symbol then this function is - // called recursively. In case this symbol is the one being defined, it will - // be an infinite recursion which is stopped by this "if" statement. However, - // in this case we don't get the full expansion text in the Plist file. See - // the test file where "value" is expanded to "garbage_" instead of - // "garbage_value". - if (!AlreadyProcessedTokens.insert(MacroNameII).second) - return MExpInfo.Name; - - if (!MExpInfo.MI) - return MExpInfo.Name; - - // Manually expand its arguments from the previous macro. - MExpInfo.ParamMap.expandFromPrevMacro(PrevParamMap); - - // Iterate over the macro's tokens and stringify them. - for (auto It = MExpInfo.MI->tokens_begin(), E = MExpInfo.MI->tokens_end(); - It != E; ++It) { - Token T = *It; - - // If this token is not an identifier, we only need to print it. - if (T.isNot(tok::identifier)) { - Printer.printToken(T); - continue; - } - - const auto *II = T.getIdentifierInfo(); - assert(II && - "This token is an identifier but has no IdentifierInfo!"); - - // If this token is a macro that should be expanded inside the current - // macro. - if (getMacroInfoForLocation(PP, SM, II, T.getLocation())) { - getMacroNameAndPrintExpansion(Printer, T.getLocation(), PP, - MExpInfo.ParamMap, AlreadyProcessedTokens); - - // If this is a function-like macro, skip its arguments, as - // getExpandedMacro() already printed them. If this is the case, let's - // first jump to the '(' token. - auto N = std::next(It); - if (N != E && N->is(tok::l_paren)) - It = getMatchingRParen(++It, E); - continue; - } - - // If this token is the current macro's argument, we should expand it. - auto ParamToArgIt = MExpInfo.ParamMap.find(II); - if (ParamToArgIt != MExpInfo.ParamMap.end()) { - for (MacroInfo::tokens_iterator ArgIt = ParamToArgIt->second.begin(), - ArgEnd = ParamToArgIt->second.end(); - ArgIt != ArgEnd; ++ArgIt) { - - // These tokens may still be macros, if that is the case, handle it the - // same way we did above. - const auto *ArgII = ArgIt->getIdentifierInfo(); - if (!ArgII) { - Printer.printToken(*ArgIt); - continue; - } - - const auto *MI = PP.getMacroInfo(ArgII); - if (!MI) { - Printer.printToken(*ArgIt); - continue; - } - - getMacroNameAndPrintExpansion(Printer, ArgIt->getLocation(), PP, - MExpInfo.ParamMap, - AlreadyProcessedTokens); - // Peek the next token if it is a tok::l_paren. This way we can decide - // if this is the application or just a reference to a function maxro - // symbol: - // - // #define apply(f) ... - // #define func(x) ... - // apply(func) - // apply(func(42)) - auto N = std::next(ArgIt); - if (N != ArgEnd && N->is(tok::l_paren)) - ArgIt = getMatchingRParen(++ArgIt, ArgEnd); - } - continue; - } - - // If control reached here, then this token isn't a macro identifier, nor an - // unexpanded macro argument that we need to handle, print it. - Printer.printToken(T); +static Optional<StringRef> +getExpandedMacro(SourceLocation MacroExpansionLoc, + const cross_tu::CrossTranslationUnitContext &CTU, + const MacroExpansionContext &MacroExpansions, + const SourceManager &SM) { + if (auto CTUMacroExpCtx = + CTU.getMacroExpansionContextForSourceLocation(MacroExpansionLoc)) { + return CTUMacroExpCtx->getExpandedText(MacroExpansionLoc); } - - AlreadyProcessedTokens.erase(MacroNameII); - - return MExpInfo.Name; -} - -static MacroExpansionInfo -getMacroExpansionInfo(const MacroParamMap &PrevParamMap, - SourceLocation ExpanLoc, const Preprocessor &PP) { - - const SourceManager &SM = PP.getSourceManager(); - const LangOptions &LangOpts = PP.getLangOpts(); - - // First, we create a Lexer to lex *at the expansion location* the tokens - // referring to the macro's name and its arguments. - TokenStream TStream(ExpanLoc, SM, LangOpts); - - // Acquire the macro's name. - Token TheTok; - TStream.next(TheTok); - - std::string MacroName = PP.getSpelling(TheTok); - - const auto *II = PP.getIdentifierInfo(MacroName); - assert(II && "Failed to acquire the IdentifierInfo for the macro!"); - - const MacroInfo *MI = getMacroInfoForLocation(PP, SM, II, ExpanLoc); - // assert(MI && "The macro must've been defined at it's expansion location!"); - // - // We should always be able to obtain the MacroInfo in a given TU, but if - // we're running the analyzer with CTU, the Preprocessor won't contain the - // directive history (or anything for that matter) from another TU. - // TODO: assert when we're not running with CTU. - if (!MI) - return { MacroName, MI, {} }; - - // Acquire the macro's arguments at the expansion point. - // - // The rough idea here is to lex from the first left parentheses to the last - // right parentheses, and map the macro's parameter to what they will be - // expanded to. A macro argument may contain several token (like '3 + 4'), so - // we'll lex until we find a tok::comma or tok::r_paren, at which point we - // start lexing the next argument or finish. - ArrayRef<const IdentifierInfo *> MacroParams = MI->params(); - if (MacroParams.empty()) - return { MacroName, MI, {} }; - - TStream.next(TheTok); - // When this is a token which expands to another macro function then its - // parentheses are not at its expansion locaiton. For example: - // - // #define foo(x) int bar() { return x; } - // #define apply_zero(f) f(0) - // apply_zero(foo) - // ^ - // This is not a tok::l_paren, but foo is a function. - if (TheTok.isNot(tok::l_paren)) - return { MacroName, MI, {} }; - - MacroParamMap ParamMap; - - // When the argument is a function call, like - // CALL_FN(someFunctionName(param1, param2)) - // we will find tok::l_paren, tok::r_paren, and tok::comma that do not divide - // actual macro arguments, or do not represent the macro argument's closing - // parentheses, so we'll count how many parentheses aren't closed yet. - // If ParanthesesDepth - // * = 0, then there are no more arguments to lex. - // * = 1, then if we find a tok::comma, we can start lexing the next arg. - // * > 1, then tok::comma is a part of the current arg. - int ParenthesesDepth = 1; - - // If we encounter the variadic arg, we will lex until the closing - // tok::r_paren, even if we lex a tok::comma and ParanthesesDepth == 1. - const IdentifierInfo *VariadicParamII = PP.getIdentifierInfo("__VA_ARGS__"); - if (MI->isGNUVarargs()) { - // If macro uses GNU-style variadic args, the param name is user-supplied, - // an not "__VA_ARGS__". E.g.: - // #define FOO(a, b, myvargs...) - // In this case, just use the last parameter: - VariadicParamII = *(MacroParams.rbegin()); - } - - for (const IdentifierInfo *CurrParamII : MacroParams) { - MacroParamMap::mapped_type ArgTokens; - - // One could also simply not supply a single argument to __VA_ARGS__ -- this - // results in a preprocessor warning, but is not an error: - // #define VARIADIC(ptr, ...) \ - // someVariadicTemplateFunction(__VA_ARGS__) - // - // int *ptr; - // VARIADIC(ptr); // Note that there are no commas, this isn't just an - // // empty parameter -- there are no parameters for '...'. - // In any other case, ParenthesesDepth mustn't be 0 here. - if (ParenthesesDepth != 0) { - - // Lex the first token of the next macro parameter. - TStream.next(TheTok); - - while (CurrParamII == VariadicParamII || ParenthesesDepth != 1 || - !TheTok.is(tok::comma)) { - assert(TheTok.isNot(tok::eof) && - "EOF encountered while looking for expanded macro args!"); - - if (TheTok.is(tok::l_paren)) - ++ParenthesesDepth; - - if (TheTok.is(tok::r_paren)) - --ParenthesesDepth; - - if (ParenthesesDepth == 0) - break; - - if (TheTok.is(tok::raw_identifier)) { - PP.LookUpIdentifierInfo(TheTok); - // This token is a variadic parameter: - // - // #define PARAMS_RESOLVE_TO_VA_ARGS(i, fmt) foo(i, fmt); \ - // i = 0; - // #define DISPATCH(...) \ - // PARAMS_RESOLVE_TO_VA_ARGS(__VA_ARGS__); - // // ^~~~~~~~~~~ Variadic parameter here - // - // void multipleParamsResolveToVA_ARGS(void) { - // int x = 1; - // DISPATCH(x, "LF1M healer"); // Multiple arguments are mapped to - // // a single __VA_ARGS__ parameter. - // (void)(10 / x); - // } - // - // We will stumble across this while trying to expand - // PARAMS_RESOLVE_TO_VA_ARGS. By this point, we already noted during - // the processing of DISPATCH what __VA_ARGS__ maps to, so we'll - // retrieve the next series of tokens from that. - if (TheTok.getIdentifierInfo() == VariadicParamII) { - TStream.injectRange(PrevParamMap.at(VariadicParamII)); - TStream.next(TheTok); - continue; - } - } - - ArgTokens.push_back(TheTok); - TStream.next(TheTok); - } - } else { - assert(CurrParamII == VariadicParamII && - "No more macro arguments are found, but the current parameter " - "isn't the variadic arg!"); - } - - ParamMap.emplace(CurrParamII, std::move(ArgTokens)); - } - - assert(TheTok.is(tok::r_paren) && - "Expanded macro argument acquisition failed! After the end of the loop" - " this token should be ')'!"); - - return {MacroName, MI, ParamMap}; -} - -static MacroInfo::tokens_iterator getMatchingRParen( - MacroInfo::tokens_iterator It, - MacroInfo::tokens_iterator End) { - - assert(It->is(tok::l_paren) && "This token should be '('!"); - - // Skip until we find the closing ')'. - int ParenthesesDepth = 1; - while (ParenthesesDepth != 0) { - ++It; - - assert(It->isNot(tok::eof) && - "Encountered EOF while attempting to skip macro arguments!"); - assert(It != End && - "End of the macro definition reached before finding ')'!"); - - if (It->is(tok::l_paren)) - ++ParenthesesDepth; - - if (It->is(tok::r_paren)) - --ParenthesesDepth; - } - return It; -} - -static const MacroInfo *getMacroInfoForLocation(const Preprocessor &PP, - const SourceManager &SM, - const IdentifierInfo *II, - SourceLocation Loc) { - - const MacroDirective *MD = PP.getLocalMacroDirectiveHistory(II); - if (!MD) - return nullptr; - - return MD->findDirectiveAtLoc(Loc, SM).getMacroInfo(); -} - -void MacroParamMap::expandFromPrevMacro(const MacroParamMap &Super) { - - for (value_type &Pair : *this) { - ArgTokensTy &CurrArgTokens = Pair.second; - - // For each token in the expanded macro argument. - auto It = CurrArgTokens.begin(); - while (It != CurrArgTokens.end()) { - if (It->isNot(tok::identifier)) { - ++It; - continue; - } - - const auto *II = It->getIdentifierInfo(); - assert(II); - - // Is this an argument that "Super" expands further? - if (!Super.count(II)) { - ++It; - continue; - } - - const ArgTokensTy &SuperArgTokens = Super.at(II); - - It = CurrArgTokens.insert(It, SuperArgTokens.begin(), - SuperArgTokens.end()); - std::advance(It, SuperArgTokens.size()); - It = CurrArgTokens.erase(It); - } - } -} - -void MacroParamMap::dumpToStream(llvm::raw_ostream &Out, - const Preprocessor &PP) const { - for (const std::pair<const IdentifierInfo *, ArgTokensTy> Pair : *this) { - Out << Pair.first->getName() << " -> "; - dumpArgTokensToStream(Out, PP, Pair.second); - Out << '\n'; - } -} - -static void dumpArgTokensToStream(llvm::raw_ostream &Out, - const Preprocessor &PP, - const ArgTokensTy &Toks) { - TokenPrinter Printer(Out, PP); - for (Token Tok : Toks) - Printer.printToken(Tok); -} - -void TokenPrinter::printToken(const Token &Tok) { - // TODO: Handle GNU extensions where hash and hashhash occurs right before - // __VA_ARGS__. - // cppreference.com: "some compilers offer an extension that allows ## to - // appear after a comma and before __VA_ARGS__, in which case the ## does - // nothing when the variable arguments are present, but removes the comma when - // the variable arguments are not present: this makes it possible to define - // macros such as fprintf (stderr, format, ##__VA_ARGS__)" - // FIXME: Handle named variadic macro parameters (also a GNU extension). - - // If this is the first token to be printed, don't print space. - if (PrevTok.isNot(tok::unknown)) { - // If the tokens were already space separated, or if they must be to avoid - // them being implicitly pasted, add a space between them. - if(Tok.hasLeadingSpace() || ConcatInfo.AvoidConcat(PrevPrevTok, PrevTok, - Tok)) { - // AvoidConcat doesn't check for ##, don't print a space around it. - if (PrevTok.isNot(tok::hashhash) && Tok.isNot(tok::hashhash)) { - OS << ' '; - } - } - } - - if (!Tok.isOneOf(tok::hash, tok::hashhash)) { - if (PrevTok.is(tok::hash)) - OS << '\"' << PP.getSpelling(Tok) << '\"'; - else - OS << PP.getSpelling(Tok); - } - - PrevPrevTok = PrevTok; - PrevTok = Tok; + return MacroExpansions.getExpandedText(MacroExpansionLoc); } diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp index a481bde1651b..69554576bdb2 100644 --- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -19,7 +19,13 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/ImmutableSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <iterator> using namespace clang; using namespace ento; @@ -97,47 +103,63 @@ public: return CmpOpTable[getIndexFromOp(CurrentOP)][CmpOpCount]; } }; + //===----------------------------------------------------------------------===// // RangeSet implementation //===----------------------------------------------------------------------===// -void RangeSet::IntersectInRange(BasicValueFactory &BV, Factory &F, - const llvm::APSInt &Lower, - const llvm::APSInt &Upper, - PrimRangeSet &newRanges, - PrimRangeSet::iterator &i, - PrimRangeSet::iterator &e) const { - // There are six cases for each range R in the set: - // 1. R is entirely before the intersection range. - // 2. R is entirely after the intersection range. - // 3. R contains the entire intersection range. - // 4. R starts before the intersection range and ends in the middle. - // 5. R starts in the middle of the intersection range and ends after it. - // 6. R is entirely contained in the intersection range. - // These correspond to each of the conditions below. - for (/* i = begin(), e = end() */; i != e; ++i) { - if (i->To() < Lower) { - continue; - } - if (i->From() > Upper) { - break; - } +RangeSet::ContainerType RangeSet::Factory::EmptySet{}; - if (i->Includes(Lower)) { - if (i->Includes(Upper)) { - newRanges = - F.add(newRanges, Range(BV.getValue(Lower), BV.getValue(Upper))); - break; - } else - newRanges = F.add(newRanges, Range(BV.getValue(Lower), i->To())); - } else { - if (i->Includes(Upper)) { - newRanges = F.add(newRanges, Range(i->From(), BV.getValue(Upper))); - break; - } else - newRanges = F.add(newRanges, *i); - } +RangeSet RangeSet::Factory::add(RangeSet Original, Range Element) { + ContainerType Result; + Result.reserve(Original.size() + 1); + + const_iterator Lower = llvm::lower_bound(Original, Element); + Result.insert(Result.end(), Original.begin(), Lower); + Result.push_back(Element); + Result.insert(Result.end(), Lower, Original.end()); + + return makePersistent(std::move(Result)); +} + +RangeSet RangeSet::Factory::add(RangeSet Original, const llvm::APSInt &Point) { + return add(Original, Range(Point)); +} + +RangeSet RangeSet::Factory::getRangeSet(Range From) { + ContainerType Result; + Result.push_back(From); + return makePersistent(std::move(Result)); +} + +RangeSet RangeSet::Factory::makePersistent(ContainerType &&From) { + llvm::FoldingSetNodeID ID; + void *InsertPos; + + From.Profile(ID); + ContainerType *Result = Cache.FindNodeOrInsertPos(ID, InsertPos); + + if (!Result) { + // It is cheaper to fully construct the resulting range on stack + // and move it to the freshly allocated buffer if we don't have + // a set like this already. + Result = construct(std::move(From)); + Cache.InsertNode(Result, InsertPos); } + + return Result; +} + +RangeSet::ContainerType *RangeSet::Factory::construct(ContainerType &&From) { + void *Buffer = Arena.Allocate(); + return new (Buffer) ContainerType(std::move(From)); +} + +RangeSet RangeSet::Factory::add(RangeSet LHS, RangeSet RHS) { + ContainerType Result; + std::merge(LHS.begin(), LHS.end(), RHS.begin(), RHS.end(), + std::back_inserter(Result)); + return makePersistent(std::move(Result)); } const llvm::APSInt &RangeSet::getMinValue() const { @@ -147,22 +169,31 @@ const llvm::APSInt &RangeSet::getMinValue() const { const llvm::APSInt &RangeSet::getMaxValue() const { assert(!isEmpty()); - // NOTE: It's a shame that we can't implement 'getMaxValue' without scanning - // the whole tree to get to the last element. - // llvm::ImmutableSet should support decrement for 'end' iterators - // or reverse order iteration. - auto It = begin(); - for (auto End = end(); std::next(It) != End; ++It) { - } - return It->To(); + return std::prev(end())->To(); } -bool RangeSet::pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const { - if (isEmpty()) { - // This range is already infeasible. +bool RangeSet::containsImpl(llvm::APSInt &Point) const { + if (isEmpty() || !pin(Point)) + return false; + + Range Dummy(Point); + const_iterator It = llvm::upper_bound(*this, Dummy); + if (It == begin()) return false; - } + return std::prev(It)->Includes(Point); +} + +bool RangeSet::pin(llvm::APSInt &Point) const { + APSIntType Type(getMinValue()); + if (Type.testInRange(Point, true) != APSIntType::RTR_Within) + return false; + + Type.apply(Point); + return true; +} + +bool RangeSet::pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const { // This function has nine cases, the cartesian product of range-testing // both the upper and lower bounds against the symbol's type. // Each case requires a different pinning operation. @@ -243,129 +274,216 @@ bool RangeSet::pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const { return true; } -// Returns a set containing the values in the receiving set, intersected with -// the closed range [Lower, Upper]. Unlike the Range type, this range uses -// modular arithmetic, corresponding to the common treatment of C integer -// overflow. Thus, if the Lower bound is greater than the Upper bound, the -// range is taken to wrap around. This is equivalent to taking the -// intersection with the two ranges [Min, Upper] and [Lower, Max], -// or, alternatively, /removing/ all integers between Upper and Lower. -RangeSet RangeSet::Intersect(BasicValueFactory &BV, Factory &F, - llvm::APSInt Lower, llvm::APSInt Upper) const { - PrimRangeSet newRanges = F.getEmptySet(); - - if (isEmpty() || !pin(Lower, Upper)) - return newRanges; - - PrimRangeSet::iterator i = begin(), e = end(); - if (Lower <= Upper) - IntersectInRange(BV, F, Lower, Upper, newRanges, i, e); - else { - // The order of the next two statements is important! - // IntersectInRange() does not reset the iteration state for i and e. - // Therefore, the lower range most be handled first. - IntersectInRange(BV, F, BV.getMinValue(Upper), Upper, newRanges, i, e); - IntersectInRange(BV, F, Lower, BV.getMaxValue(Lower), newRanges, i, e); - } - - return newRanges; -} - -// Returns a set containing the values in the receiving set, intersected with -// the range set passed as parameter. -RangeSet RangeSet::Intersect(BasicValueFactory &BV, Factory &F, - const RangeSet &Other) const { - PrimRangeSet newRanges = F.getEmptySet(); - - for (iterator i = Other.begin(), e = Other.end(); i != e; ++i) { - RangeSet newPiece = Intersect(BV, F, i->From(), i->To()); - for (iterator j = newPiece.begin(), ee = newPiece.end(); j != ee; ++j) { - newRanges = F.add(newRanges, *j); - } +RangeSet RangeSet::Factory::intersect(RangeSet What, llvm::APSInt Lower, + llvm::APSInt Upper) { + if (What.isEmpty() || !What.pin(Lower, Upper)) + return getEmptySet(); + + ContainerType DummyContainer; + + if (Lower <= Upper) { + // [Lower, Upper] is a regular range. + // + // Shortcut: check that there is even a possibility of the intersection + // by checking the two following situations: + // + // <---[ What ]---[------]------> + // Lower Upper + // -or- + // <----[------]----[ What ]----> + // Lower Upper + if (What.getMaxValue() < Lower || Upper < What.getMinValue()) + return getEmptySet(); + + DummyContainer.push_back( + Range(ValueFactory.getValue(Lower), ValueFactory.getValue(Upper))); + } else { + // [Lower, Upper] is an inverted range, i.e. [MIN, Upper] U [Lower, MAX] + // + // Shortcut: check that there is even a possibility of the intersection + // by checking the following situation: + // + // <------]---[ What ]---[------> + // Upper Lower + if (What.getMaxValue() < Lower && Upper < What.getMinValue()) + return getEmptySet(); + + DummyContainer.push_back( + Range(ValueFactory.getMinValue(Upper), ValueFactory.getValue(Upper))); + DummyContainer.push_back( + Range(ValueFactory.getValue(Lower), ValueFactory.getMaxValue(Lower))); } - return newRanges; + return intersect(*What.Impl, DummyContainer); } -// Turn all [A, B] ranges to [-B, -A], when "-" is a C-like unary minus -// operation under the values of the type. -// -// We also handle MIN because applying unary minus to MIN does not change it. -// Example 1: -// char x = -128; // -128 is a MIN value in a range of 'char' -// char y = -x; // y: -128 -// Example 2: -// unsigned char x = 0; // 0 is a MIN value in a range of 'unsigned char' -// unsigned char y = -x; // y: 0 -// -// And it makes us to separate the range -// like [MIN, N] to [MIN, MIN] U [-N,MAX]. -// For instance, whole range is {-128..127} and subrange is [-128,-126], -// thus [-128,-127,-126,.....] negates to [-128,.....,126,127]. -// -// Negate restores disrupted ranges on bounds, -// e.g. [MIN, B] => [MIN, MIN] U [-B, MAX] => [MIN, B]. -RangeSet RangeSet::Negate(BasicValueFactory &BV, Factory &F) const { - PrimRangeSet newRanges = F.getEmptySet(); +RangeSet RangeSet::Factory::intersect(const RangeSet::ContainerType &LHS, + const RangeSet::ContainerType &RHS) { + ContainerType Result; + Result.reserve(std::max(LHS.size(), RHS.size())); + + const_iterator First = LHS.begin(), Second = RHS.begin(), + FirstEnd = LHS.end(), SecondEnd = RHS.end(); + + const auto SwapIterators = [&First, &FirstEnd, &Second, &SecondEnd]() { + std::swap(First, Second); + std::swap(FirstEnd, SecondEnd); + }; + + // If we ran out of ranges in one set, but not in the other, + // it means that those elements are definitely not in the + // intersection. + while (First != FirstEnd && Second != SecondEnd) { + // We want to keep the following invariant at all times: + // + // ----[ First ----------------------> + // --------[ Second -----------------> + if (Second->From() < First->From()) + SwapIterators(); + + // Loop where the invariant holds: + do { + // Check for the following situation: + // + // ----[ First ]---------------------> + // ---------------[ Second ]---------> + // + // which means that... + if (Second->From() > First->To()) { + // ...First is not in the intersection. + // + // We should move on to the next range after First and break out of the + // loop because the invariant might not be true. + ++First; + break; + } - if (isEmpty()) - return newRanges; + // We have a guaranteed intersection at this point! + // And this is the current situation: + // + // ----[ First ]-----------------> + // -------[ Second ------------------> + // + // Additionally, it definitely starts with Second->From(). + const llvm::APSInt &IntersectionStart = Second->From(); + + // It is important to know which of the two ranges' ends + // is greater. That "longer" range might have some other + // intersections, while the "shorter" range might not. + if (Second->To() > First->To()) { + // Here we make a decision to keep First as the "longer" + // range. + SwapIterators(); + } - const llvm::APSInt sampleValue = getMinValue(); - const llvm::APSInt &MIN = BV.getMinValue(sampleValue); - const llvm::APSInt &MAX = BV.getMaxValue(sampleValue); + // At this point, we have the following situation: + // + // ---- First ]--------------------> + // ---- Second ]--[ Second+1 ----------> + // + // We don't know the relationship between First->From and + // Second->From and we don't know whether Second+1 intersects + // with First. + // + // However, we know that [IntersectionStart, Second->To] is + // a part of the intersection... + Result.push_back(Range(IntersectionStart, Second->To())); + ++Second; + // ...and that the invariant will hold for a valid Second+1 + // because First->From <= Second->To < (Second+1)->From. + } while (Second != SecondEnd); + } + + if (Result.empty()) + return getEmptySet(); + + return makePersistent(std::move(Result)); +} + +RangeSet RangeSet::Factory::intersect(RangeSet LHS, RangeSet RHS) { + // Shortcut: let's see if the intersection is even possible. + if (LHS.isEmpty() || RHS.isEmpty() || LHS.getMaxValue() < RHS.getMinValue() || + RHS.getMaxValue() < LHS.getMinValue()) + return getEmptySet(); + + return intersect(*LHS.Impl, *RHS.Impl); +} + +RangeSet RangeSet::Factory::intersect(RangeSet LHS, llvm::APSInt Point) { + if (LHS.containsImpl(Point)) + return getRangeSet(ValueFactory.getValue(Point)); + + return getEmptySet(); +} + +RangeSet RangeSet::Factory::negate(RangeSet What) { + if (What.isEmpty()) + return getEmptySet(); + + const llvm::APSInt SampleValue = What.getMinValue(); + const llvm::APSInt &MIN = ValueFactory.getMinValue(SampleValue); + const llvm::APSInt &MAX = ValueFactory.getMaxValue(SampleValue); + + ContainerType Result; + Result.reserve(What.size() + (SampleValue == MIN)); // Handle a special case for MIN value. - iterator i = begin(); - const llvm::APSInt &from = i->From(); - const llvm::APSInt &to = i->To(); - if (from == MIN) { - // If [from, to] are [MIN, MAX], then just return the same [MIN, MAX]. - if (to == MAX) { - newRanges = ranges; + const_iterator It = What.begin(); + const_iterator End = What.end(); + + const llvm::APSInt &From = It->From(); + const llvm::APSInt &To = It->To(); + + if (From == MIN) { + // If the range [From, To] is [MIN, MAX], then result is also [MIN, MAX]. + if (To == MAX) { + return What; + } + + const_iterator Last = std::prev(End); + + // Try to find and unite the following ranges: + // [MIN, MIN] & [MIN + 1, N] => [MIN, N]. + if (Last->To() == MAX) { + // It means that in the original range we have ranges + // [MIN, A], ... , [B, MAX] + // And the result should be [MIN, -B], ..., [-A, MAX] + Result.emplace_back(MIN, ValueFactory.getValue(-Last->From())); + // We already negated Last, so we can skip it. + End = Last; } else { - // Add separate range for the lowest value. - newRanges = F.add(newRanges, Range(MIN, MIN)); - // Skip adding the second range in case when [from, to] are [MIN, MIN]. - if (to != MIN) { - newRanges = F.add(newRanges, Range(BV.getValue(-to), MAX)); - } + // Add a separate range for the lowest value. + Result.emplace_back(MIN, MIN); } + + // Skip adding the second range in case when [From, To] are [MIN, MIN]. + if (To != MIN) { + Result.emplace_back(ValueFactory.getValue(-To), MAX); + } + // Skip the first range in the loop. - ++i; + ++It; } // Negate all other ranges. - for (iterator e = end(); i != e; ++i) { + for (; It != End; ++It) { // Negate int values. - const llvm::APSInt &newFrom = BV.getValue(-i->To()); - const llvm::APSInt &newTo = BV.getValue(-i->From()); - // Add a negated range. - newRanges = F.add(newRanges, Range(newFrom, newTo)); - } + const llvm::APSInt &NewFrom = ValueFactory.getValue(-It->To()); + const llvm::APSInt &NewTo = ValueFactory.getValue(-It->From()); - if (newRanges.isSingleton()) - return newRanges; - - // Try to find and unite next ranges: - // [MIN, MIN] & [MIN + 1, N] => [MIN, N]. - iterator iter1 = newRanges.begin(); - iterator iter2 = std::next(iter1); - - if (iter1->To() == MIN && (iter2->From() - 1) == MIN) { - const llvm::APSInt &to = iter2->To(); - // remove adjacent ranges - newRanges = F.remove(newRanges, *iter1); - newRanges = F.remove(newRanges, *newRanges.begin()); - // add united range - newRanges = F.add(newRanges, Range(MIN, to)); + // Add a negated range. + Result.emplace_back(NewFrom, NewTo); } - return newRanges; + llvm::sort(Result); + return makePersistent(std::move(Result)); } -RangeSet RangeSet::Delete(BasicValueFactory &BV, Factory &F, - const llvm::APSInt &Point) const { +RangeSet RangeSet::Factory::deletePoint(RangeSet From, + const llvm::APSInt &Point) { + if (!From.contains(Point)) + return From; + llvm::APSInt Upper = Point; llvm::APSInt Lower = Point; @@ -373,22 +491,17 @@ RangeSet RangeSet::Delete(BasicValueFactory &BV, Factory &F, --Lower; // Notice that the lower bound is greater than the upper bound. - return Intersect(BV, F, Upper, Lower); + return intersect(From, Upper, Lower); } -void RangeSet::print(raw_ostream &os) const { - bool isFirst = true; - os << "{ "; - for (iterator i = begin(), e = end(); i != e; ++i) { - if (isFirst) - isFirst = false; - else - os << ", "; +void Range::dump(raw_ostream &OS) const { + OS << '[' << toString(From(), 10) << ", " << toString(To(), 10) << ']'; +} - os << '[' << i->From().toString(10) << ", " << i->To().toString(10) - << ']'; - } - os << " }"; +void RangeSet::dump(raw_ostream &OS) const { + OS << "{ "; + llvm::interleaveComma(*this, OS, [&OS](const Range &R) { R.dump(OS); }); + OS << " }"; } REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(SymbolSet, SymbolRef) @@ -436,33 +549,43 @@ public: SymbolRef Sym); /// Merge classes for the given symbols and return a new state. - LLVM_NODISCARD static inline ProgramStateRef - merge(BasicValueFactory &BV, RangeSet::Factory &F, ProgramStateRef State, - SymbolRef First, SymbolRef Second); + LLVM_NODISCARD static inline ProgramStateRef merge(RangeSet::Factory &F, + ProgramStateRef State, + SymbolRef First, + SymbolRef Second); // Merge this class with the given class and return a new state. - LLVM_NODISCARD inline ProgramStateRef merge(BasicValueFactory &BV, - RangeSet::Factory &F, - ProgramStateRef State, - EquivalenceClass Other); + LLVM_NODISCARD inline ProgramStateRef + merge(RangeSet::Factory &F, ProgramStateRef State, EquivalenceClass Other); /// Return a set of class members for the given state. - LLVM_NODISCARD inline SymbolSet getClassMembers(ProgramStateRef State); + LLVM_NODISCARD inline SymbolSet getClassMembers(ProgramStateRef State) const; + /// Return true if the current class is trivial in the given state. - LLVM_NODISCARD inline bool isTrivial(ProgramStateRef State); + /// A class is trivial if and only if there is not any member relations stored + /// to it in State/ClassMembers. + /// An equivalence class with one member might seem as it does not hold any + /// meaningful information, i.e. that is a tautology. However, during the + /// removal of dead symbols we do not remove classes with one member for + /// resource and performance reasons. Consequently, a class with one member is + /// not necessarily trivial. It could happen that we have a class with two + /// members and then during the removal of dead symbols we remove one of its + /// members. In this case, the class is still non-trivial (it still has the + /// mappings in ClassMembers), even though it has only one member. + LLVM_NODISCARD inline bool isTrivial(ProgramStateRef State) const; + /// Return true if the current class is trivial and its only member is dead. LLVM_NODISCARD inline bool isTriviallyDead(ProgramStateRef State, - SymbolReaper &Reaper); + SymbolReaper &Reaper) const; LLVM_NODISCARD static inline ProgramStateRef - markDisequal(BasicValueFactory &BV, RangeSet::Factory &F, - ProgramStateRef State, SymbolRef First, SymbolRef Second); + markDisequal(RangeSet::Factory &F, ProgramStateRef State, SymbolRef First, + SymbolRef Second); LLVM_NODISCARD static inline ProgramStateRef - markDisequal(BasicValueFactory &BV, RangeSet::Factory &F, - ProgramStateRef State, EquivalenceClass First, - EquivalenceClass Second); + markDisequal(RangeSet::Factory &F, ProgramStateRef State, + EquivalenceClass First, EquivalenceClass Second); LLVM_NODISCARD inline ProgramStateRef - markDisequal(BasicValueFactory &BV, RangeSet::Factory &F, - ProgramStateRef State, EquivalenceClass Other) const; + markDisequal(RangeSet::Factory &F, ProgramStateRef State, + EquivalenceClass Other) const; LLVM_NODISCARD static inline ClassSet getDisequalClasses(ProgramStateRef State, SymbolRef Sym); LLVM_NODISCARD inline ClassSet @@ -470,9 +593,23 @@ public: LLVM_NODISCARD inline ClassSet getDisequalClasses(DisequalityMapTy Map, ClassSet::Factory &Factory) const; + LLVM_NODISCARD static inline Optional<bool> areEqual(ProgramStateRef State, + EquivalenceClass First, + EquivalenceClass Second); LLVM_NODISCARD static inline Optional<bool> areEqual(ProgramStateRef State, SymbolRef First, SymbolRef Second); + /// Iterate over all symbols and try to simplify them. + LLVM_NODISCARD static inline ProgramStateRef simplify(SValBuilder &SVB, + RangeSet::Factory &F, + ProgramStateRef State, + EquivalenceClass Class); + + void dumpToStream(ProgramStateRef State, raw_ostream &os) const; + LLVM_DUMP_METHOD void dump(ProgramStateRef State) const { + dumpToStream(State, llvm::errs()); + } + /// Check equivalence data for consistency. LLVM_NODISCARD LLVM_ATTRIBUTE_UNUSED static bool isClassDataConsistent(ProgramStateRef State); @@ -515,15 +652,13 @@ private: } static inline SymbolSet::Factory &getMembersFactory(ProgramStateRef State); - inline ProgramStateRef mergeImpl(BasicValueFactory &BV, RangeSet::Factory &F, - ProgramStateRef State, SymbolSet Members, - EquivalenceClass Other, + inline ProgramStateRef mergeImpl(RangeSet::Factory &F, ProgramStateRef State, + SymbolSet Members, EquivalenceClass Other, SymbolSet OtherMembers); - static inline void + static inline bool addToDisequalityInfo(DisequalityMapTy &Info, ConstraintRangeTy &Constraints, - BasicValueFactory &BV, RangeSet::Factory &F, - ProgramStateRef State, EquivalenceClass First, - EquivalenceClass Second); + RangeSet::Factory &F, ProgramStateRef State, + EquivalenceClass First, EquivalenceClass Second); /// This is a unique identifier of the class. uintptr_t ID; @@ -533,6 +668,15 @@ private: // Constraint functions //===----------------------------------------------------------------------===// +LLVM_NODISCARD LLVM_ATTRIBUTE_UNUSED bool +areFeasible(ConstraintRangeTy Constraints) { + return llvm::none_of( + Constraints, + [](const std::pair<EquivalenceClass, RangeSet> &ClassConstraint) { + return ClassConstraint.second.isEmpty(); + }); +} + LLVM_NODISCARD inline const RangeSet *getConstraint(ProgramStateRef State, EquivalenceClass Class) { return State->get<ConstraintRange>(Class); @@ -543,70 +687,52 @@ LLVM_NODISCARD inline const RangeSet *getConstraint(ProgramStateRef State, return getConstraint(State, EquivalenceClass::find(State, Sym)); } +LLVM_NODISCARD ProgramStateRef setConstraint(ProgramStateRef State, + EquivalenceClass Class, + RangeSet Constraint) { + return State->set<ConstraintRange>(Class, Constraint); +} + +LLVM_NODISCARD ProgramStateRef setConstraints(ProgramStateRef State, + ConstraintRangeTy Constraints) { + return State->set<ConstraintRange>(Constraints); +} + //===----------------------------------------------------------------------===// // Equality/diseqiality abstraction //===----------------------------------------------------------------------===// -/// A small helper structure representing symbolic equality. +/// A small helper function for detecting symbolic (dis)equality. /// /// Equality check can have different forms (like a == b or a - b) and this /// class encapsulates those away if the only thing the user wants to check - -/// whether it's equality/diseqiality or not and have an easy access to the -/// compared symbols. -struct EqualityInfo { -public: - SymbolRef Left, Right; - // true for equality and false for disequality. - bool IsEquality = true; - - void invert() { IsEquality = !IsEquality; } - /// Extract equality information from the given symbol and the constants. - /// - /// This function assumes the following expression Sym + Adjustment != Int. - /// It is a default because the most widespread case of the equality check - /// is (A == B) + 0 != 0. - static Optional<EqualityInfo> extract(SymbolRef Sym, const llvm::APSInt &Int, - const llvm::APSInt &Adjustment) { - // As of now, the only equality form supported is Sym + 0 != 0. - if (!Int.isNullValue() || !Adjustment.isNullValue()) - return llvm::None; - - return extract(Sym); - } - /// Extract equality information from the given symbol. - static Optional<EqualityInfo> extract(SymbolRef Sym) { - return EqualityExtractor().Visit(Sym); +/// whether it's equality/diseqiality or not. +/// +/// \returns true if assuming this Sym to be true means equality of operands +/// false if it means disequality of operands +/// None otherwise +Optional<bool> meansEquality(const SymSymExpr *Sym) { + switch (Sym->getOpcode()) { + case BO_Sub: + // This case is: A - B != 0 -> disequality check. + return false; + case BO_EQ: + // This case is: A == B != 0 -> equality check. + return true; + case BO_NE: + // This case is: A != B != 0 -> diseqiality check. + return false; + default: + return llvm::None; } - -private: - class EqualityExtractor - : public SymExprVisitor<EqualityExtractor, Optional<EqualityInfo>> { - public: - Optional<EqualityInfo> VisitSymSymExpr(const SymSymExpr *Sym) const { - switch (Sym->getOpcode()) { - case BO_Sub: - // This case is: A - B != 0 -> disequality check. - return EqualityInfo{Sym->getLHS(), Sym->getRHS(), false}; - case BO_EQ: - // This case is: A == B != 0 -> equality check. - return EqualityInfo{Sym->getLHS(), Sym->getRHS(), true}; - case BO_NE: - // This case is: A != B != 0 -> diseqiality check. - return EqualityInfo{Sym->getLHS(), Sym->getRHS(), false}; - default: - return llvm::None; - } - } - }; -}; +} //===----------------------------------------------------------------------===// // Intersection functions //===----------------------------------------------------------------------===// template <class SecondTy, class... RestTy> -LLVM_NODISCARD inline RangeSet intersect(BasicValueFactory &BV, - RangeSet::Factory &F, RangeSet Head, +LLVM_NODISCARD inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head, SecondTy Second, RestTy... Tail); template <class... RangeTy> struct IntersectionTraits; @@ -629,15 +755,14 @@ struct IntersectionTraits<OptionalOrPointer, TailTy...> { }; template <class EndTy> -LLVM_NODISCARD inline EndTy intersect(BasicValueFactory &BV, - RangeSet::Factory &F, EndTy End) { +LLVM_NODISCARD inline EndTy intersect(RangeSet::Factory &F, EndTy End) { // If the list contains only RangeSet or Optional<RangeSet>, simply return // that range set. return End; } LLVM_NODISCARD LLVM_ATTRIBUTE_UNUSED inline Optional<RangeSet> -intersect(BasicValueFactory &BV, RangeSet::Factory &F, const RangeSet *End) { +intersect(RangeSet::Factory &F, const RangeSet *End) { // This is an extraneous conversion from a raw pointer into Optional<RangeSet> if (End) { return *End; @@ -646,25 +771,23 @@ intersect(BasicValueFactory &BV, RangeSet::Factory &F, const RangeSet *End) { } template <class... RestTy> -LLVM_NODISCARD inline RangeSet intersect(BasicValueFactory &BV, - RangeSet::Factory &F, RangeSet Head, +LLVM_NODISCARD inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head, RangeSet Second, RestTy... Tail) { // Here we call either the <RangeSet,RangeSet,...> or <RangeSet,...> version // of the function and can be sure that the result is RangeSet. - return intersect(BV, F, Head.Intersect(BV, F, Second), Tail...); + return intersect(F, F.intersect(Head, Second), Tail...); } template <class SecondTy, class... RestTy> -LLVM_NODISCARD inline RangeSet intersect(BasicValueFactory &BV, - RangeSet::Factory &F, RangeSet Head, +LLVM_NODISCARD inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head, SecondTy Second, RestTy... Tail) { if (Second) { // Here we call the <RangeSet,RangeSet,...> version of the function... - return intersect(BV, F, Head, *Second, Tail...); + return intersect(F, Head, *Second, Tail...); } // ...and here it is either <RangeSet,RangeSet,...> or <RangeSet,...>, which // means that the result is definitely RangeSet. - return intersect(BV, F, Head, Tail...); + return intersect(F, Head, Tail...); } /// Main generic intersect function. @@ -689,12 +812,12 @@ LLVM_NODISCARD inline RangeSet intersect(BasicValueFactory &BV, template <class HeadTy, class SecondTy, class... RestTy> LLVM_NODISCARD inline typename IntersectionTraits<HeadTy, SecondTy, RestTy...>::Type - intersect(BasicValueFactory &BV, RangeSet::Factory &F, HeadTy Head, - SecondTy Second, RestTy... Tail) { + intersect(RangeSet::Factory &F, HeadTy Head, SecondTy Second, + RestTy... Tail) { if (Head) { - return intersect(BV, F, *Head, Second, Tail...); + return intersect(F, *Head, Second, Tail...); } - return intersect(BV, F, Second, Tail...); + return intersect(F, Second, Tail...); } //===----------------------------------------------------------------------===// @@ -710,9 +833,9 @@ class SymbolicRangeInferrer : public SymExprVisitor<SymbolicRangeInferrer, RangeSet> { public: template <class SourceType> - static RangeSet inferRange(BasicValueFactory &BV, RangeSet::Factory &F, - ProgramStateRef State, SourceType Origin) { - SymbolicRangeInferrer Inferrer(BV, F, State); + static RangeSet inferRange(RangeSet::Factory &F, ProgramStateRef State, + SourceType Origin) { + SymbolicRangeInferrer Inferrer(F, State); return Inferrer.infer(Origin); } @@ -733,13 +856,18 @@ public: } RangeSet VisitSymSymExpr(const SymSymExpr *Sym) { - return VisitBinaryOperator(Sym); + return intersect( + RangeFactory, + // If Sym is (dis)equality, we might have some information + // on that in our equality classes data structure. + getRangeForEqualities(Sym), + // And we should always check what we can get from the operands. + VisitBinaryOperator(Sym)); } private: - SymbolicRangeInferrer(BasicValueFactory &BV, RangeSet::Factory &F, - ProgramStateRef S) - : ValueFactory(BV), RangeFactory(F), State(S) {} + SymbolicRangeInferrer(RangeSet::Factory &F, ProgramStateRef S) + : ValueFactory(F.getValueFactory()), RangeFactory(F), State(S) {} /// Infer range information from the given integer constant. /// @@ -763,26 +891,25 @@ private: } RangeSet infer(SymbolRef Sym) { - if (Optional<RangeSet> ConstraintBasedRange = intersect( - ValueFactory, RangeFactory, getConstraint(State, Sym), - // If Sym is a difference of symbols A - B, then maybe we have range - // set stored for B - A. - // - // If we have range set stored for both A - B and B - A then - // calculate the effective range set by intersecting the range set - // for A - B and the negated range set of B - A. - getRangeForNegatedSub(Sym), getRangeForEqualities(Sym))) { - return *ConstraintBasedRange; - } - - // If Sym is a comparison expression (except <=>), - // find any other comparisons with the same operands. - // See function description. - if (Optional<RangeSet> CmpRangeSet = getRangeForComparisonSymbol(Sym)) { - return *CmpRangeSet; - } - - return Visit(Sym); + return intersect( + RangeFactory, + // Of course, we should take the constraint directly associated with + // this symbol into consideration. + getConstraint(State, Sym), + // If Sym is a difference of symbols A - B, then maybe we have range + // set stored for B - A. + // + // If we have range set stored for both A - B and B - A then + // calculate the effective range set by intersecting the range set + // for A - B and the negated range set of B - A. + getRangeForNegatedSub(Sym), + // If Sym is a comparison expression (except <=>), + // find any other comparisons with the same operands. + // See function description. + getRangeForComparisonSymbol(Sym), + // Apart from the Sym itself, we can infer quite a lot if we look + // into subexpressions of Sym. + Visit(Sym)); } RangeSet infer(EquivalenceClass Class) { @@ -940,7 +1067,7 @@ private: /// Return a range set subtracting zero from \p Domain. RangeSet assumeNonZero(RangeSet Domain, QualType T) { APSIntType IntType = ValueFactory.getAPSIntType(T); - return Domain.Delete(ValueFactory, RangeFactory, IntType.getZeroValue()); + return RangeFactory.deletePoint(Domain, IntType.getZeroValue()); } // FIXME: Once SValBuilder supports unary minus, we should use SValBuilder to @@ -963,7 +1090,7 @@ private: SymMgr.getSymSymExpr(SSE->getRHS(), BO_Sub, SSE->getLHS(), T); if (const RangeSet *NegatedRange = getConstraint(State, NegatedSym)) { - return NegatedRange->Negate(ValueFactory, RangeFactory); + return RangeFactory.negate(*NegatedRange); } } } @@ -1054,17 +1181,21 @@ private: return llvm::None; } - Optional<RangeSet> getRangeForEqualities(SymbolRef Sym) { - Optional<EqualityInfo> Equality = EqualityInfo::extract(Sym); + Optional<RangeSet> getRangeForEqualities(const SymSymExpr *Sym) { + Optional<bool> Equality = meansEquality(Sym); if (!Equality) return llvm::None; - if (Optional<bool> AreEqual = EquivalenceClass::areEqual( - State, Equality->Left, Equality->Right)) { - if (*AreEqual == Equality->IsEquality) { + if (Optional<bool> AreEqual = + EquivalenceClass::areEqual(State, Sym->getLHS(), Sym->getRHS())) { + // Here we cover two cases at once: + // * if Sym is equality and its operands are known to be equal -> true + // * if Sym is disequality and its operands are disequal -> true + if (*AreEqual == *Equality) { return getTrueRange(Sym->getType()); } + // Opposite combinations result in false. return getFalseRange(Sym->getType()); } @@ -1251,13 +1382,215 @@ RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Rem>(Range LHS, } //===----------------------------------------------------------------------===// +// Constraint assignment logic +//===----------------------------------------------------------------------===// + +/// ConstraintAssignorBase is a small utility class that unifies visitor +/// for ranges with a visitor for constraints (rangeset/range/constant). +/// +/// It is designed to have one derived class, but generally it can have more. +/// Derived class can control which types we handle by defining methods of the +/// following form: +/// +/// bool handle${SYMBOL}To${CONSTRAINT}(const SYMBOL *Sym, +/// CONSTRAINT Constraint); +/// +/// where SYMBOL is the type of the symbol (e.g. SymSymExpr, SymbolCast, etc.) +/// CONSTRAINT is the type of constraint (RangeSet/Range/Const) +/// return value signifies whether we should try other handle methods +/// (i.e. false would mean to stop right after calling this method) +template <class Derived> class ConstraintAssignorBase { +public: + using Const = const llvm::APSInt &; + +#define DISPATCH(CLASS) return assign##CLASS##Impl(cast<CLASS>(Sym), Constraint) + +#define ASSIGN(CLASS, TO, SYM, CONSTRAINT) \ + if (!static_cast<Derived *>(this)->assign##CLASS##To##TO(SYM, CONSTRAINT)) \ + return false + + void assign(SymbolRef Sym, RangeSet Constraint) { + assignImpl(Sym, Constraint); + } + + bool assignImpl(SymbolRef Sym, RangeSet Constraint) { + switch (Sym->getKind()) { +#define SYMBOL(Id, Parent) \ + case SymExpr::Id##Kind: \ + DISPATCH(Id); +#include "clang/StaticAnalyzer/Core/PathSensitive/Symbols.def" + } + llvm_unreachable("Unknown SymExpr kind!"); + } + +#define DEFAULT_ASSIGN(Id) \ + bool assign##Id##To##RangeSet(const Id *Sym, RangeSet Constraint) { \ + return true; \ + } \ + bool assign##Id##To##Range(const Id *Sym, Range Constraint) { return true; } \ + bool assign##Id##To##Const(const Id *Sym, Const Constraint) { return true; } + + // When we dispatch for constraint types, we first try to check + // if the new constraint is the constant and try the corresponding + // assignor methods. If it didn't interrupt, we can proceed to the + // range, and finally to the range set. +#define CONSTRAINT_DISPATCH(Id) \ + if (const llvm::APSInt *Const = Constraint.getConcreteValue()) { \ + ASSIGN(Id, Const, Sym, *Const); \ + } \ + if (Constraint.size() == 1) { \ + ASSIGN(Id, Range, Sym, *Constraint.begin()); \ + } \ + ASSIGN(Id, RangeSet, Sym, Constraint) + + // Our internal assign method first tries to call assignor methods for all + // constraint types that apply. And if not interrupted, continues with its + // parent class. +#define SYMBOL(Id, Parent) \ + bool assign##Id##Impl(const Id *Sym, RangeSet Constraint) { \ + CONSTRAINT_DISPATCH(Id); \ + DISPATCH(Parent); \ + } \ + DEFAULT_ASSIGN(Id) +#define ABSTRACT_SYMBOL(Id, Parent) SYMBOL(Id, Parent) +#include "clang/StaticAnalyzer/Core/PathSensitive/Symbols.def" + + // Default implementations for the top class that doesn't have parents. + bool assignSymExprImpl(const SymExpr *Sym, RangeSet Constraint) { + CONSTRAINT_DISPATCH(SymExpr); + return true; + } + DEFAULT_ASSIGN(SymExpr); + +#undef DISPATCH +#undef CONSTRAINT_DISPATCH +#undef DEFAULT_ASSIGN +#undef ASSIGN +}; + +/// A little component aggregating all of the reasoning we have about +/// assigning new constraints to symbols. +/// +/// The main purpose of this class is to associate constraints to symbols, +/// and impose additional constraints on other symbols, when we can imply +/// them. +/// +/// It has a nice symmetry with SymbolicRangeInferrer. When the latter +/// can provide more precise ranges by looking into the operands of the +/// expression in question, ConstraintAssignor looks into the operands +/// to see if we can imply more from the new constraint. +class ConstraintAssignor : public ConstraintAssignorBase<ConstraintAssignor> { +public: + template <class ClassOrSymbol> + LLVM_NODISCARD static ProgramStateRef + assign(ProgramStateRef State, SValBuilder &Builder, RangeSet::Factory &F, + ClassOrSymbol CoS, RangeSet NewConstraint) { + if (!State || NewConstraint.isEmpty()) + return nullptr; + + ConstraintAssignor Assignor{State, Builder, F}; + return Assignor.assign(CoS, NewConstraint); + } + + inline bool assignSymExprToConst(const SymExpr *Sym, Const Constraint); + inline bool assignSymSymExprToRangeSet(const SymSymExpr *Sym, + RangeSet Constraint); + +private: + ConstraintAssignor(ProgramStateRef State, SValBuilder &Builder, + RangeSet::Factory &F) + : State(State), Builder(Builder), RangeFactory(F) {} + using Base = ConstraintAssignorBase<ConstraintAssignor>; + + /// Base method for handling new constraints for symbols. + LLVM_NODISCARD ProgramStateRef assign(SymbolRef Sym, RangeSet NewConstraint) { + // All constraints are actually associated with equivalence classes, and + // that's what we are going to do first. + State = assign(EquivalenceClass::find(State, Sym), NewConstraint); + if (!State) + return nullptr; + + // And after that we can check what other things we can get from this + // constraint. + Base::assign(Sym, NewConstraint); + return State; + } + + /// Base method for handling new constraints for classes. + LLVM_NODISCARD ProgramStateRef assign(EquivalenceClass Class, + RangeSet NewConstraint) { + // There is a chance that we might need to update constraints for the + // classes that are known to be disequal to Class. + // + // In order for this to be even possible, the new constraint should + // be simply a constant because we can't reason about range disequalities. + if (const llvm::APSInt *Point = NewConstraint.getConcreteValue()) { + + ConstraintRangeTy Constraints = State->get<ConstraintRange>(); + ConstraintRangeTy::Factory &CF = State->get_context<ConstraintRange>(); + + // Add new constraint. + Constraints = CF.add(Constraints, Class, NewConstraint); + + for (EquivalenceClass DisequalClass : Class.getDisequalClasses(State)) { + RangeSet UpdatedConstraint = SymbolicRangeInferrer::inferRange( + RangeFactory, State, DisequalClass); + + UpdatedConstraint = RangeFactory.deletePoint(UpdatedConstraint, *Point); + + // If we end up with at least one of the disequal classes to be + // constrained with an empty range-set, the state is infeasible. + if (UpdatedConstraint.isEmpty()) + return nullptr; + + Constraints = CF.add(Constraints, DisequalClass, UpdatedConstraint); + } + assert(areFeasible(Constraints) && "Constraint manager shouldn't produce " + "a state with infeasible constraints"); + + return setConstraints(State, Constraints); + } + + return setConstraint(State, Class, NewConstraint); + } + + ProgramStateRef trackDisequality(ProgramStateRef State, SymbolRef LHS, + SymbolRef RHS) { + return EquivalenceClass::markDisequal(RangeFactory, State, LHS, RHS); + } + + ProgramStateRef trackEquality(ProgramStateRef State, SymbolRef LHS, + SymbolRef RHS) { + return EquivalenceClass::merge(RangeFactory, State, LHS, RHS); + } + + LLVM_NODISCARD Optional<bool> interpreteAsBool(RangeSet Constraint) { + assert(!Constraint.isEmpty() && "Empty ranges shouldn't get here"); + + if (Constraint.getConcreteValue()) + return !Constraint.getConcreteValue()->isNullValue(); + + APSIntType T{Constraint.getMinValue()}; + Const Zero = T.getZeroValue(); + if (!Constraint.contains(Zero)) + return true; + + return llvm::None; + } + + ProgramStateRef State; + SValBuilder &Builder; + RangeSet::Factory &RangeFactory; +}; + +//===----------------------------------------------------------------------===// // Constraint manager implementation details //===----------------------------------------------------------------------===// class RangeConstraintManager : public RangedConstraintManager { public: RangeConstraintManager(ExprEngine *EE, SValBuilder &SVB) - : RangedConstraintManager(EE, SVB) {} + : RangedConstraintManager(EE, SVB), F(getBasicVals()) {} //===------------------------------------------------------------------===// // Implementation for interface from ConstraintManager. @@ -1284,6 +1617,15 @@ public: void printJson(raw_ostream &Out, ProgramStateRef State, const char *NL = "\n", unsigned int Space = 0, bool IsDot = false) const override; + void printConstraints(raw_ostream &Out, ProgramStateRef State, + const char *NL = "\n", unsigned int Space = 0, + bool IsDot = false) const; + void printEquivalenceClasses(raw_ostream &Out, ProgramStateRef State, + const char *NL = "\n", unsigned int Space = 0, + bool IsDot = false) const; + void printDisequalities(raw_ostream &Out, ProgramStateRef State, + const char *NL = "\n", unsigned int Space = 0, + bool IsDot = false) const; //===------------------------------------------------------------------===// // Implementation for interface from RangedConstraintManager. @@ -1326,6 +1668,10 @@ private: RangeSet getRange(ProgramStateRef State, SymbolRef Sym); RangeSet getRange(ProgramStateRef State, EquivalenceClass Class); + ProgramStateRef setRange(ProgramStateRef State, SymbolRef Sym, + RangeSet Range); + ProgramStateRef setRange(ProgramStateRef State, EquivalenceClass Class, + RangeSet Range); RangeSet getSymLTRange(ProgramStateRef St, SymbolRef Sym, const llvm::APSInt &Int, @@ -1342,88 +1688,63 @@ private: RangeSet getSymGERange(ProgramStateRef St, SymbolRef Sym, const llvm::APSInt &Int, const llvm::APSInt &Adjustment); +}; - //===------------------------------------------------------------------===// - // Equality tracking implementation - //===------------------------------------------------------------------===// - - ProgramStateRef trackEQ(RangeSet NewConstraint, ProgramStateRef State, - SymbolRef Sym, const llvm::APSInt &Int, - const llvm::APSInt &Adjustment) { - return track<true>(NewConstraint, State, Sym, Int, Adjustment); +bool ConstraintAssignor::assignSymExprToConst(const SymExpr *Sym, + const llvm::APSInt &Constraint) { + llvm::SmallSet<EquivalenceClass, 4> SimplifiedClasses; + // Iterate over all equivalence classes and try to simplify them. + ClassMembersTy Members = State->get<ClassMembers>(); + for (std::pair<EquivalenceClass, SymbolSet> ClassToSymbolSet : Members) { + EquivalenceClass Class = ClassToSymbolSet.first; + State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class); + if (!State) + return false; + SimplifiedClasses.insert(Class); } - ProgramStateRef trackNE(RangeSet NewConstraint, ProgramStateRef State, - SymbolRef Sym, const llvm::APSInt &Int, - const llvm::APSInt &Adjustment) { - return track<false>(NewConstraint, State, Sym, Int, Adjustment); + // Trivial equivalence classes (those that have only one symbol member) are + // not stored in the State. Thus, we must skim through the constraints as + // well. And we try to simplify symbols in the constraints. + ConstraintRangeTy Constraints = State->get<ConstraintRange>(); + for (std::pair<EquivalenceClass, RangeSet> ClassConstraint : Constraints) { + EquivalenceClass Class = ClassConstraint.first; + if (SimplifiedClasses.count(Class)) // Already simplified. + continue; + State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class); + if (!State) + return false; } - template <bool EQ> - ProgramStateRef track(RangeSet NewConstraint, ProgramStateRef State, - SymbolRef Sym, const llvm::APSInt &Int, - const llvm::APSInt &Adjustment) { - if (NewConstraint.isEmpty()) - // This is an infeasible assumption. - return nullptr; + return true; +} - ProgramStateRef NewState = setConstraint(State, Sym, NewConstraint); - if (auto Equality = EqualityInfo::extract(Sym, Int, Adjustment)) { - // If the original assumption is not Sym + Adjustment !=/</> Int, - // we should invert IsEquality flag. - Equality->IsEquality = Equality->IsEquality != EQ; - return track(NewState, *Equality); - } +bool ConstraintAssignor::assignSymSymExprToRangeSet(const SymSymExpr *Sym, + RangeSet Constraint) { + Optional<bool> ConstraintAsBool = interpreteAsBool(Constraint); - return NewState; - } + if (!ConstraintAsBool) + return true; - ProgramStateRef track(ProgramStateRef State, EqualityInfo ToTrack) { - if (ToTrack.IsEquality) { - return trackEquality(State, ToTrack.Left, ToTrack.Right); + if (Optional<bool> Equality = meansEquality(Sym)) { + // Here we cover two cases: + // * if Sym is equality and the new constraint is true -> Sym's operands + // should be marked as equal + // * if Sym is disequality and the new constraint is false -> Sym's + // operands should be also marked as equal + if (*Equality == *ConstraintAsBool) { + State = trackEquality(State, Sym->getLHS(), Sym->getRHS()); + } else { + // Other combinations leave as with disequal operands. + State = trackDisequality(State, Sym->getLHS(), Sym->getRHS()); } - return trackDisequality(State, ToTrack.Left, ToTrack.Right); - } - ProgramStateRef trackDisequality(ProgramStateRef State, SymbolRef LHS, - SymbolRef RHS) { - return EquivalenceClass::markDisequal(getBasicVals(), F, State, LHS, RHS); - } - - ProgramStateRef trackEquality(ProgramStateRef State, SymbolRef LHS, - SymbolRef RHS) { - return EquivalenceClass::merge(getBasicVals(), F, State, LHS, RHS); - } - - LLVM_NODISCARD inline ProgramStateRef setConstraint(ProgramStateRef State, - EquivalenceClass Class, - RangeSet Constraint) { - ConstraintRangeTy Constraints = State->get<ConstraintRange>(); - ConstraintRangeTy::Factory &CF = State->get_context<ConstraintRange>(); - - // Add new constraint. - Constraints = CF.add(Constraints, Class, Constraint); - - // There is a chance that we might need to update constraints for the - // classes that are known to be disequal to Class. - // - // In order for this to be even possible, the new constraint should - // be simply a constant because we can't reason about range disequalities. - if (const llvm::APSInt *Point = Constraint.getConcreteValue()) - for (EquivalenceClass DisequalClass : Class.getDisequalClasses(State)) { - RangeSet UpdatedConstraint = - getRange(State, DisequalClass).Delete(getBasicVals(), F, *Point); - Constraints = CF.add(Constraints, DisequalClass, UpdatedConstraint); - } - - return State->set<ConstraintRange>(Constraints); + if (!State) + return false; } - LLVM_NODISCARD inline ProgramStateRef - setConstraint(ProgramStateRef State, SymbolRef Sym, RangeSet Constraint) { - return setConstraint(State, EquivalenceClass::find(State, Sym), Constraint); - } -}; + return true; +} } // end anonymous namespace @@ -1455,8 +1776,19 @@ ConstraintMap ento::getConstraintMap(ProgramStateRef State) { // EqualityClass implementation details //===----------------------------------------------------------------------===// +LLVM_DUMP_METHOD void EquivalenceClass::dumpToStream(ProgramStateRef State, + raw_ostream &os) const { + SymbolSet ClassMembers = getClassMembers(State); + for (const SymbolRef &MemberSym : ClassMembers) { + MemberSym->dump(); + os << "\n"; + } +} + inline EquivalenceClass EquivalenceClass::find(ProgramStateRef State, SymbolRef Sym) { + assert(State && "State should not be null"); + assert(Sym && "Symbol should not be null"); // We store far from all Symbol -> Class mappings if (const EquivalenceClass *NontrivialClass = State->get<ClassMap>(Sym)) return *NontrivialClass; @@ -1465,19 +1797,17 @@ inline EquivalenceClass EquivalenceClass::find(ProgramStateRef State, return Sym; } -inline ProgramStateRef EquivalenceClass::merge(BasicValueFactory &BV, - RangeSet::Factory &F, +inline ProgramStateRef EquivalenceClass::merge(RangeSet::Factory &F, ProgramStateRef State, SymbolRef First, SymbolRef Second) { EquivalenceClass FirstClass = find(State, First); EquivalenceClass SecondClass = find(State, Second); - return FirstClass.merge(BV, F, State, SecondClass); + return FirstClass.merge(F, State, SecondClass); } -inline ProgramStateRef EquivalenceClass::merge(BasicValueFactory &BV, - RangeSet::Factory &F, +inline ProgramStateRef EquivalenceClass::merge(RangeSet::Factory &F, ProgramStateRef State, EquivalenceClass Other) { // It is already the same class. @@ -1505,15 +1835,14 @@ inline ProgramStateRef EquivalenceClass::merge(BasicValueFactory &BV, // its members. Merging is not a trivial operation, so it's easier to // merge the smaller class into the bigger one. if (Members.getHeight() >= OtherMembers.getHeight()) { - return mergeImpl(BV, F, State, Members, Other, OtherMembers); + return mergeImpl(F, State, Members, Other, OtherMembers); } else { - return Other.mergeImpl(BV, F, State, OtherMembers, *this, Members); + return Other.mergeImpl(F, State, OtherMembers, *this, Members); } } inline ProgramStateRef -EquivalenceClass::mergeImpl(BasicValueFactory &ValueFactory, - RangeSet::Factory &RangeFactory, +EquivalenceClass::mergeImpl(RangeSet::Factory &RangeFactory, ProgramStateRef State, SymbolSet MyMembers, EquivalenceClass Other, SymbolSet OtherMembers) { // Essentially what we try to recreate here is some kind of union-find @@ -1536,7 +1865,7 @@ EquivalenceClass::mergeImpl(BasicValueFactory &ValueFactory, // Intersection here makes perfect sense because both of these constraints // must hold for the whole new class. if (Optional<RangeSet> NewClassConstraint = - intersect(ValueFactory, RangeFactory, getConstraint(State, *this), + intersect(RangeFactory, getConstraint(State, *this), getConstraint(State, Other))) { // NOTE: Essentially, NewClassConstraint should NEVER be infeasible because // range inferrer shouldn't generate ranges incompatible with @@ -1552,6 +1881,9 @@ EquivalenceClass::mergeImpl(BasicValueFactory &ValueFactory, // Assign new constraints for this class. Constraints = CRF.add(Constraints, *this, *NewClassConstraint); + assert(areFeasible(Constraints) && "Constraint manager shouldn't produce " + "a state with infeasible constraints"); + State = State->set<ConstraintRange>(Constraints); } @@ -1585,6 +1917,11 @@ EquivalenceClass::mergeImpl(BasicValueFactory &ValueFactory, // 4. Update disequality relations ClassSet DisequalToOther = Other.getDisequalClasses(DisequalityInfo, CF); + // We are about to merge two classes but they are already known to be + // non-equal. This is a contradiction. + if (DisequalToOther.contains(*this)) + return nullptr; + if (!DisequalToOther.isEmpty()) { ClassSet DisequalToThis = getDisequalClasses(DisequalityInfo, CF); DisequalityInfo = DF.remove(DisequalityInfo, Other); @@ -1622,7 +1959,7 @@ EquivalenceClass::getMembersFactory(ProgramStateRef State) { return State->get_context<SymbolSet>(); } -SymbolSet EquivalenceClass::getClassMembers(ProgramStateRef State) { +SymbolSet EquivalenceClass::getClassMembers(ProgramStateRef State) const { if (const SymbolSet *Members = State->get<ClassMembers>(*this)) return *Members; @@ -1632,34 +1969,31 @@ SymbolSet EquivalenceClass::getClassMembers(ProgramStateRef State) { return F.add(F.getEmptySet(), getRepresentativeSymbol()); } -bool EquivalenceClass::isTrivial(ProgramStateRef State) { +bool EquivalenceClass::isTrivial(ProgramStateRef State) const { return State->get<ClassMembers>(*this) == nullptr; } bool EquivalenceClass::isTriviallyDead(ProgramStateRef State, - SymbolReaper &Reaper) { + SymbolReaper &Reaper) const { return isTrivial(State) && Reaper.isDead(getRepresentativeSymbol()); } -inline ProgramStateRef EquivalenceClass::markDisequal(BasicValueFactory &VF, - RangeSet::Factory &RF, +inline ProgramStateRef EquivalenceClass::markDisequal(RangeSet::Factory &RF, ProgramStateRef State, SymbolRef First, SymbolRef Second) { - return markDisequal(VF, RF, State, find(State, First), find(State, Second)); + return markDisequal(RF, State, find(State, First), find(State, Second)); } -inline ProgramStateRef EquivalenceClass::markDisequal(BasicValueFactory &VF, - RangeSet::Factory &RF, +inline ProgramStateRef EquivalenceClass::markDisequal(RangeSet::Factory &RF, ProgramStateRef State, EquivalenceClass First, EquivalenceClass Second) { - return First.markDisequal(VF, RF, State, Second); + return First.markDisequal(RF, State, Second); } inline ProgramStateRef -EquivalenceClass::markDisequal(BasicValueFactory &VF, RangeSet::Factory &RF, - ProgramStateRef State, +EquivalenceClass::markDisequal(RangeSet::Factory &RF, ProgramStateRef State, EquivalenceClass Other) const { // If we know that two classes are equal, we can only produce an infeasible // state. @@ -1672,10 +2006,14 @@ EquivalenceClass::markDisequal(BasicValueFactory &VF, RangeSet::Factory &RF, // Disequality is a symmetric relation, so if we mark A as disequal to B, // we should also mark B as disequalt to A. - addToDisequalityInfo(DisequalityInfo, Constraints, VF, RF, State, *this, - Other); - addToDisequalityInfo(DisequalityInfo, Constraints, VF, RF, State, Other, - *this); + if (!addToDisequalityInfo(DisequalityInfo, Constraints, RF, State, *this, + Other) || + !addToDisequalityInfo(DisequalityInfo, Constraints, RF, State, Other, + *this)) + return nullptr; + + assert(areFeasible(Constraints) && "Constraint manager shouldn't produce " + "a state with infeasible constraints"); State = State->set<DisequalityMap>(DisequalityInfo); State = State->set<ConstraintRange>(Constraints); @@ -1683,10 +2021,10 @@ EquivalenceClass::markDisequal(BasicValueFactory &VF, RangeSet::Factory &RF, return State; } -inline void EquivalenceClass::addToDisequalityInfo( +inline bool EquivalenceClass::addToDisequalityInfo( DisequalityMapTy &Info, ConstraintRangeTy &Constraints, - BasicValueFactory &VF, RangeSet::Factory &RF, ProgramStateRef State, - EquivalenceClass First, EquivalenceClass Second) { + RangeSet::Factory &RF, ProgramStateRef State, EquivalenceClass First, + EquivalenceClass Second) { // 1. Get all of the required factories. DisequalityMapTy::Factory &F = State->get_context<DisequalityMap>(); @@ -1709,19 +2047,31 @@ inline void EquivalenceClass::addToDisequalityInfo( if (const llvm::APSInt *Point = SecondConstraint->getConcreteValue()) { RangeSet FirstConstraint = SymbolicRangeInferrer::inferRange( - VF, RF, State, First.getRepresentativeSymbol()); + RF, State, First.getRepresentativeSymbol()); + + FirstConstraint = RF.deletePoint(FirstConstraint, *Point); + + // If the First class is about to be constrained with an empty + // range-set, the state is infeasible. + if (FirstConstraint.isEmpty()) + return false; - FirstConstraint = FirstConstraint.Delete(VF, RF, *Point); Constraints = CRF.add(Constraints, First, FirstConstraint); } + + return true; } inline Optional<bool> EquivalenceClass::areEqual(ProgramStateRef State, SymbolRef FirstSym, SymbolRef SecondSym) { - EquivalenceClass First = find(State, FirstSym); - EquivalenceClass Second = find(State, SecondSym); + return EquivalenceClass::areEqual(State, find(State, FirstSym), + find(State, SecondSym)); +} +inline Optional<bool> EquivalenceClass::areEqual(ProgramStateRef State, + EquivalenceClass First, + EquivalenceClass Second) { // The same equivalence class => symbols are equal. if (First == Second) return true; @@ -1736,6 +2086,29 @@ inline Optional<bool> EquivalenceClass::areEqual(ProgramStateRef State, return llvm::None; } +// Iterate over all symbols and try to simplify them. Once a symbol is +// simplified then we check if we can merge the simplified symbol's equivalence +// class to this class. This way, we simplify not just the symbols but the +// classes as well: we strive to keep the number of the classes to be the +// absolute minimum. +LLVM_NODISCARD ProgramStateRef +EquivalenceClass::simplify(SValBuilder &SVB, RangeSet::Factory &F, + ProgramStateRef State, EquivalenceClass Class) { + SymbolSet ClassMembers = Class.getClassMembers(State); + for (const SymbolRef &MemberSym : ClassMembers) { + SymbolRef SimplifiedMemberSym = ento::simplify(State, MemberSym); + if (SimplifiedMemberSym && MemberSym != SimplifiedMemberSym) { + // The simplified symbol should be the member of the original Class, + // however, it might be in another existing class at the moment. We + // have to merge these classes. + State = merge(F, State, MemberSym, SimplifiedMemberSym); + if (!State) + return nullptr; + } + } + return State; +} + inline ClassSet EquivalenceClass::getDisequalClasses(ProgramStateRef State, SymbolRef Sym) { return find(State, Sym).getDisequalClasses(State); @@ -1862,7 +2235,7 @@ ConditionTruthVal RangeConstraintManager::checkNull(ProgramStateRef State, llvm::APSInt Zero = IntType.getZeroValue(); // Check if zero is in the set of possible values. - if (Ranges->Intersect(BV, F, Zero, Zero).isEmpty()) + if (!Ranges->contains(Zero)) return false; // Zero is a possible value, but it is not the /only/ possible value. @@ -2017,12 +2390,13 @@ RangeConstraintManager::removeDeadBindings(ProgramStateRef State, RangeSet RangeConstraintManager::getRange(ProgramStateRef State, SymbolRef Sym) { - return SymbolicRangeInferrer::inferRange(getBasicVals(), F, State, Sym); + return SymbolicRangeInferrer::inferRange(F, State, Sym); } -RangeSet RangeConstraintManager::getRange(ProgramStateRef State, - EquivalenceClass Class) { - return SymbolicRangeInferrer::inferRange(getBasicVals(), F, State, Class); +ProgramStateRef RangeConstraintManager::setRange(ProgramStateRef State, + SymbolRef Sym, + RangeSet Range) { + return ConstraintAssignor::assign(State, getSValBuilder(), F, Sym, Range); } //===------------------------------------------------------------------------=== @@ -2047,10 +2421,10 @@ RangeConstraintManager::assumeSymNE(ProgramStateRef St, SymbolRef Sym, return St; llvm::APSInt Point = AdjustmentType.convert(Int) - Adjustment; + RangeSet New = getRange(St, Sym); + New = F.deletePoint(New, Point); - RangeSet New = getRange(St, Sym).Delete(getBasicVals(), F, Point); - - return trackNE(New, St, Sym, Int, Adjustment); + return setRange(St, Sym, New); } ProgramStateRef @@ -2064,9 +2438,10 @@ RangeConstraintManager::assumeSymEQ(ProgramStateRef St, SymbolRef Sym, // [Int-Adjustment, Int-Adjustment] llvm::APSInt AdjInt = AdjustmentType.convert(Int) - Adjustment; - RangeSet New = getRange(St, Sym).Intersect(getBasicVals(), F, AdjInt, AdjInt); + RangeSet New = getRange(St, Sym); + New = F.intersect(New, AdjInt); - return trackEQ(New, St, Sym, Int, Adjustment); + return setRange(St, Sym, New); } RangeSet RangeConstraintManager::getSymLTRange(ProgramStateRef St, @@ -2094,7 +2469,8 @@ RangeSet RangeConstraintManager::getSymLTRange(ProgramStateRef St, llvm::APSInt Upper = ComparisonVal - Adjustment; --Upper; - return getRange(St, Sym).Intersect(getBasicVals(), F, Lower, Upper); + RangeSet Result = getRange(St, Sym); + return F.intersect(Result, Lower, Upper); } ProgramStateRef @@ -2102,7 +2478,7 @@ RangeConstraintManager::assumeSymLT(ProgramStateRef St, SymbolRef Sym, const llvm::APSInt &Int, const llvm::APSInt &Adjustment) { RangeSet New = getSymLTRange(St, Sym, Int, Adjustment); - return trackNE(New, St, Sym, Int, Adjustment); + return setRange(St, Sym, New); } RangeSet RangeConstraintManager::getSymGTRange(ProgramStateRef St, @@ -2130,7 +2506,8 @@ RangeSet RangeConstraintManager::getSymGTRange(ProgramStateRef St, llvm::APSInt Upper = Max - Adjustment; ++Lower; - return getRange(St, Sym).Intersect(getBasicVals(), F, Lower, Upper); + RangeSet SymRange = getRange(St, Sym); + return F.intersect(SymRange, Lower, Upper); } ProgramStateRef @@ -2138,7 +2515,7 @@ RangeConstraintManager::assumeSymGT(ProgramStateRef St, SymbolRef Sym, const llvm::APSInt &Int, const llvm::APSInt &Adjustment) { RangeSet New = getSymGTRange(St, Sym, Int, Adjustment); - return trackNE(New, St, Sym, Int, Adjustment); + return setRange(St, Sym, New); } RangeSet RangeConstraintManager::getSymGERange(ProgramStateRef St, @@ -2166,7 +2543,8 @@ RangeSet RangeConstraintManager::getSymGERange(ProgramStateRef St, llvm::APSInt Lower = ComparisonVal - Adjustment; llvm::APSInt Upper = Max - Adjustment; - return getRange(St, Sym).Intersect(getBasicVals(), F, Lower, Upper); + RangeSet SymRange = getRange(St, Sym); + return F.intersect(SymRange, Lower, Upper); } ProgramStateRef @@ -2174,7 +2552,7 @@ RangeConstraintManager::assumeSymGE(ProgramStateRef St, SymbolRef Sym, const llvm::APSInt &Int, const llvm::APSInt &Adjustment) { RangeSet New = getSymGERange(St, Sym, Int, Adjustment); - return New.isEmpty() ? nullptr : setConstraint(St, Sym, New); + return setRange(St, Sym, New); } RangeSet @@ -2202,7 +2580,8 @@ RangeConstraintManager::getSymLERange(llvm::function_ref<RangeSet()> RS, llvm::APSInt Lower = Min - Adjustment; llvm::APSInt Upper = ComparisonVal - Adjustment; - return RS().Intersect(getBasicVals(), F, Lower, Upper); + RangeSet Default = RS(); + return F.intersect(Default, Lower, Upper); } RangeSet RangeConstraintManager::getSymLERange(ProgramStateRef St, @@ -2217,7 +2596,7 @@ RangeConstraintManager::assumeSymLE(ProgramStateRef St, SymbolRef Sym, const llvm::APSInt &Int, const llvm::APSInt &Adjustment) { RangeSet New = getSymLERange(St, Sym, Int, Adjustment); - return New.isEmpty() ? nullptr : setConstraint(St, Sym, New); + return setRange(St, Sym, New); } ProgramStateRef RangeConstraintManager::assumeSymWithinInclusiveRange( @@ -2227,7 +2606,7 @@ ProgramStateRef RangeConstraintManager::assumeSymWithinInclusiveRange( if (New.isEmpty()) return nullptr; RangeSet Out = getSymLERange([&] { return New; }, To, Adjustment); - return Out.isEmpty() ? nullptr : setConstraint(State, Sym, Out); + return setRange(State, Sym, Out); } ProgramStateRef RangeConstraintManager::assumeSymOutsideInclusiveRange( @@ -2235,8 +2614,8 @@ ProgramStateRef RangeConstraintManager::assumeSymOutsideInclusiveRange( const llvm::APSInt &To, const llvm::APSInt &Adjustment) { RangeSet RangeLT = getSymLTRange(State, Sym, From, Adjustment); RangeSet RangeGT = getSymGTRange(State, Sym, To, Adjustment); - RangeSet New(RangeLT.addRange(F, RangeGT)); - return New.isEmpty() ? nullptr : setConstraint(State, Sym, New); + RangeSet New(F.add(RangeLT, RangeGT)); + return setRange(State, Sym, New); } //===----------------------------------------------------------------------===// @@ -2246,6 +2625,23 @@ ProgramStateRef RangeConstraintManager::assumeSymOutsideInclusiveRange( void RangeConstraintManager::printJson(raw_ostream &Out, ProgramStateRef State, const char *NL, unsigned int Space, bool IsDot) const { + printConstraints(Out, State, NL, Space, IsDot); + printEquivalenceClasses(Out, State, NL, Space, IsDot); + printDisequalities(Out, State, NL, Space, IsDot); +} + +static std::string toString(const SymbolRef &Sym) { + std::string S; + llvm::raw_string_ostream O(S); + Sym->dumpToStream(O); + return O.str(); +} + +void RangeConstraintManager::printConstraints(raw_ostream &Out, + ProgramStateRef State, + const char *NL, + unsigned int Space, + bool IsDot) const { ConstraintRangeTy Constraints = State->get<ConstraintRange>(); Indent(Out, Space, IsDot) << "\"constraints\": "; @@ -2254,25 +2650,162 @@ void RangeConstraintManager::printJson(raw_ostream &Out, ProgramStateRef State, return; } + std::map<std::string, RangeSet> OrderedConstraints; + for (std::pair<EquivalenceClass, RangeSet> P : Constraints) { + SymbolSet ClassMembers = P.first.getClassMembers(State); + for (const SymbolRef &ClassMember : ClassMembers) { + bool insertion_took_place; + std::tie(std::ignore, insertion_took_place) = + OrderedConstraints.insert({toString(ClassMember), P.second}); + assert(insertion_took_place && + "two symbols should not have the same dump"); + } + } + ++Space; Out << '[' << NL; bool First = true; - for (std::pair<EquivalenceClass, RangeSet> P : Constraints) { - SymbolSet ClassMembers = P.first.getClassMembers(State); + for (std::pair<std::string, RangeSet> P : OrderedConstraints) { + if (First) { + First = false; + } else { + Out << ','; + Out << NL; + } + Indent(Out, Space, IsDot) + << "{ \"symbol\": \"" << P.first << "\", \"range\": \""; + P.second.dump(Out); + Out << "\" }"; + } + Out << NL; - // We can print the same constraint for every class member. - for (SymbolRef ClassMember : ClassMembers) { - if (First) { - First = false; - } else { - Out << ','; - Out << NL; + --Space; + Indent(Out, Space, IsDot) << "]," << NL; +} + +static std::string toString(ProgramStateRef State, EquivalenceClass Class) { + SymbolSet ClassMembers = Class.getClassMembers(State); + llvm::SmallVector<SymbolRef, 8> ClassMembersSorted(ClassMembers.begin(), + ClassMembers.end()); + llvm::sort(ClassMembersSorted, + [](const SymbolRef &LHS, const SymbolRef &RHS) { + return toString(LHS) < toString(RHS); + }); + + bool FirstMember = true; + + std::string Str; + llvm::raw_string_ostream Out(Str); + Out << "[ "; + for (SymbolRef ClassMember : ClassMembersSorted) { + if (FirstMember) + FirstMember = false; + else + Out << ", "; + Out << "\"" << ClassMember << "\""; + } + Out << " ]"; + return Out.str(); +} + +void RangeConstraintManager::printEquivalenceClasses(raw_ostream &Out, + ProgramStateRef State, + const char *NL, + unsigned int Space, + bool IsDot) const { + ClassMembersTy Members = State->get<ClassMembers>(); + + Indent(Out, Space, IsDot) << "\"equivalence_classes\": "; + if (Members.isEmpty()) { + Out << "null," << NL; + return; + } + + std::set<std::string> MembersStr; + for (std::pair<EquivalenceClass, SymbolSet> ClassToSymbolSet : Members) + MembersStr.insert(toString(State, ClassToSymbolSet.first)); + + ++Space; + Out << '[' << NL; + bool FirstClass = true; + for (const std::string &Str : MembersStr) { + if (FirstClass) { + FirstClass = false; + } else { + Out << ','; + Out << NL; + } + Indent(Out, Space, IsDot); + Out << Str; + } + Out << NL; + + --Space; + Indent(Out, Space, IsDot) << "]," << NL; +} + +void RangeConstraintManager::printDisequalities(raw_ostream &Out, + ProgramStateRef State, + const char *NL, + unsigned int Space, + bool IsDot) const { + DisequalityMapTy Disequalities = State->get<DisequalityMap>(); + + Indent(Out, Space, IsDot) << "\"disequality_info\": "; + if (Disequalities.isEmpty()) { + Out << "null," << NL; + return; + } + + // Transform the disequality info to an ordered map of + // [string -> (ordered set of strings)] + using EqClassesStrTy = std::set<std::string>; + using DisequalityInfoStrTy = std::map<std::string, EqClassesStrTy>; + DisequalityInfoStrTy DisequalityInfoStr; + for (std::pair<EquivalenceClass, ClassSet> ClassToDisEqSet : Disequalities) { + EquivalenceClass Class = ClassToDisEqSet.first; + ClassSet DisequalClasses = ClassToDisEqSet.second; + EqClassesStrTy MembersStr; + for (EquivalenceClass DisEqClass : DisequalClasses) + MembersStr.insert(toString(State, DisEqClass)); + DisequalityInfoStr.insert({toString(State, Class), MembersStr}); + } + + ++Space; + Out << '[' << NL; + bool FirstClass = true; + for (std::pair<std::string, EqClassesStrTy> ClassToDisEqSet : + DisequalityInfoStr) { + const std::string &Class = ClassToDisEqSet.first; + if (FirstClass) { + FirstClass = false; + } else { + Out << ','; + Out << NL; + } + Indent(Out, Space, IsDot) << "{" << NL; + unsigned int DisEqSpace = Space + 1; + Indent(Out, DisEqSpace, IsDot) << "\"class\": "; + Out << Class; + const EqClassesStrTy &DisequalClasses = ClassToDisEqSet.second; + if (!DisequalClasses.empty()) { + Out << "," << NL; + Indent(Out, DisEqSpace, IsDot) << "\"disequal_to\": [" << NL; + unsigned int DisEqClassSpace = DisEqSpace + 1; + Indent(Out, DisEqClassSpace, IsDot); + bool FirstDisEqClass = true; + for (const std::string &DisEqClass : DisequalClasses) { + if (FirstDisEqClass) { + FirstDisEqClass = false; + } else { + Out << ',' << NL; + Indent(Out, DisEqClassSpace, IsDot); + } + Out << DisEqClass; } - Indent(Out, Space, IsDot) - << "{ \"symbol\": \"" << ClassMember << "\", \"range\": \""; - P.second.print(Out); - Out << "\" }"; + Out << "]" << NL; } + Indent(Out, Space, IsDot) << "}"; } Out << NL; diff --git a/clang/lib/StaticAnalyzer/Core/RangedConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangedConstraintManager.cpp index e7a03e6ed582..d227c025fb20 100644 --- a/clang/lib/StaticAnalyzer/Core/RangedConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangedConstraintManager.cpp @@ -23,12 +23,14 @@ RangedConstraintManager::~RangedConstraintManager() {} ProgramStateRef RangedConstraintManager::assumeSym(ProgramStateRef State, SymbolRef Sym, bool Assumption) { + Sym = simplify(State, Sym); + // Handle SymbolData. - if (isa<SymbolData>(Sym)) { + if (isa<SymbolData>(Sym)) return assumeSymUnsupported(State, Sym, Assumption); - // Handle symbolic expression. - } else if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(Sym)) { + // Handle symbolic expression. + if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(Sym)) { // We can only simplify expressions whose RHS is an integer. BinaryOperator::Opcode op = SIE->getOpcode(); @@ -93,6 +95,9 @@ ProgramStateRef RangedConstraintManager::assumeSym(ProgramStateRef State, ProgramStateRef RangedConstraintManager::assumeSymInclusiveRange( ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From, const llvm::APSInt &To, bool InRange) { + + Sym = simplify(State, Sym); + // Get the type used for calculating wraparound. BasicValueFactory &BVF = getBasicVals(); APSIntType WraparoundType = BVF.getAPSIntType(Sym->getType()); @@ -121,6 +126,8 @@ ProgramStateRef RangedConstraintManager::assumeSymInclusiveRange( ProgramStateRef RangedConstraintManager::assumeSymUnsupported(ProgramStateRef State, SymbolRef Sym, bool Assumption) { + Sym = simplify(State, Sym); + BasicValueFactory &BVF = getBasicVals(); QualType T = Sym->getType(); @@ -219,6 +226,13 @@ void RangedConstraintManager::computeAdjustment(SymbolRef &Sym, } } -} // end of namespace ento +SymbolRef simplify(ProgramStateRef State, SymbolRef Sym) { + SValBuilder &SVB = State->getStateManager().getSValBuilder(); + SVal SimplifiedVal = SVB.simplifySVal(State, SVB.makeSymbolVal(Sym)); + if (SymbolRef SimplifiedSym = SimplifiedVal.getAsSymbol()) + return SimplifiedSym; + return Sym; +} +} // end of namespace ento } // end of namespace clang diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp index 57fde32bc01d..4ffa1aacb41f 100644 --- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp +++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp @@ -23,7 +23,6 @@ #include "clang/Basic/TargetInfo.h" #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicSize.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" #include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" @@ -1479,7 +1478,7 @@ SVal RegionStoreManager::getBinding(RegionBindingsConstRef B, Loc L, QualType T) return UnknownVal(); if (const FieldRegion* FR = dyn_cast<FieldRegion>(R)) - return CastRetrievedVal(getBindingForField(B, FR), FR, T); + return svalBuilder.evalCast(getBindingForField(B, FR), T, QualType{}); if (const ElementRegion* ER = dyn_cast<ElementRegion>(R)) { // FIXME: Here we actually perform an implicit conversion from the loaded @@ -1487,7 +1486,7 @@ SVal RegionStoreManager::getBinding(RegionBindingsConstRef B, Loc L, QualType T) // more intelligently. For example, an 'element' can encompass multiple // bound regions (e.g., several bound bytes), or could be a subset of // a larger value. - return CastRetrievedVal(getBindingForElement(B, ER), ER, T); + return svalBuilder.evalCast(getBindingForElement(B, ER), T, QualType{}); } if (const ObjCIvarRegion *IVR = dyn_cast<ObjCIvarRegion>(R)) { @@ -1497,7 +1496,7 @@ SVal RegionStoreManager::getBinding(RegionBindingsConstRef B, Loc L, QualType T) // reinterpretted, it is possible we stored a different value that could // fit within the ivar. Either we need to cast these when storing them // or reinterpret them lazily (as we do here). - return CastRetrievedVal(getBindingForObjCIvar(B, IVR), IVR, T); + return svalBuilder.evalCast(getBindingForObjCIvar(B, IVR), T, QualType{}); } if (const VarRegion *VR = dyn_cast<VarRegion>(R)) { @@ -1507,7 +1506,7 @@ SVal RegionStoreManager::getBinding(RegionBindingsConstRef B, Loc L, QualType T) // variable is reinterpretted, it is possible we stored a different value // that could fit within the variable. Either we need to cast these when // storing them or reinterpret them lazily (as we do here). - return CastRetrievedVal(getBindingForVar(B, VR), VR, T); + return svalBuilder.evalCast(getBindingForVar(B, VR), T, QualType{}); } const SVal *V = B.lookup(R, BindingKey::Direct); diff --git a/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp index 72b8ada1dfab..b459b5adb511 100644 --- a/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp +++ b/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp @@ -107,7 +107,7 @@ SVal SValBuilder::convertToArrayIndex(SVal val) { return val; } - return evalCastFromNonLoc(val.castAs<NonLoc>(), ArrayIndexTy); + return evalCast(val, ArrayIndexTy, QualType{}); } nonloc::ConcreteInt SValBuilder::makeBoolVal(const CXXBoolLiteralExpr *boolean){ @@ -192,12 +192,19 @@ SValBuilder::getConjuredHeapSymbolVal(const Expr *E, const LocationContext *LCtx, unsigned VisitCount) { QualType T = E->getType(); - assert(Loc::isLocType(T)); - assert(SymbolManager::canSymbolicate(T)); - if (T->isNullPtrType()) - return makeZeroVal(T); + return getConjuredHeapSymbolVal(E, LCtx, T, VisitCount); +} + +DefinedOrUnknownSVal +SValBuilder::getConjuredHeapSymbolVal(const Expr *E, + const LocationContext *LCtx, + QualType type, unsigned VisitCount) { + assert(Loc::isLocType(type)); + assert(SymbolManager::canSymbolicate(type)); + if (type->isNullPtrType()) + return makeZeroVal(type); - SymbolRef sym = SymMgr.conjureSymbol(E, LCtx, T, VisitCount); + SymbolRef sym = SymMgr.conjureSymbol(E, LCtx, type, VisitCount); return loc::MemRegionVal(MemMgr.getSymbolicHeapRegion(sym)); } @@ -268,6 +275,13 @@ DefinedSVal SValBuilder::getBlockPointer(const BlockDecl *block, return loc::MemRegionVal(BD); } +Optional<loc::MemRegionVal> +SValBuilder::getCastedMemRegionVal(const MemRegion *R, QualType Ty) { + if (auto OptR = StateMgr.getStoreManager().castRegion(R, Ty)) + return loc::MemRegionVal(*OptR); + return None; +} + /// Return a memory region for the 'this' object reference. loc::MemRegionVal SValBuilder::getCXXThis(const CXXMethodDecl *D, const StackFrameContext *SFC) { @@ -423,6 +437,14 @@ SVal SValBuilder::evalBinOp(ProgramStateRef state, BinaryOperator::Opcode op, return UnknownVal(); } + if (op == BinaryOperatorKind::BO_Cmp) { + // We can't reason about C++20 spaceship operator yet. + // + // FIXME: Support C++20 spaceship operator. + // The main problem here is that the result is not integer. + return UnknownVal(); + } + if (Optional<Loc> LV = lhs.getAs<Loc>()) { if (Optional<Loc> RV = rhs.getAs<Loc>()) return evalBinOpLL(state, op, *LV, *RV, type); @@ -530,108 +552,272 @@ SVal SValBuilder::evalIntegralCast(ProgramStateRef state, SVal val, return evalCast(val, castTy, originalTy); } -// FIXME: should rewrite according to the cast kind. -SVal SValBuilder::evalCast(SVal val, QualType castTy, QualType originalTy) { - castTy = Context.getCanonicalType(castTy); - originalTy = Context.getCanonicalType(originalTy); - if (val.isUnknownOrUndef() || castTy == originalTy) - return val; +//===----------------------------------------------------------------------===// +// Cast methods. +// `evalCast` is the main method +// `evalCastKind` and `evalCastSubKind` are helpers +//===----------------------------------------------------------------------===// - if (castTy->isBooleanType()) { - if (val.isUnknownOrUndef()) - return val; - if (val.isConstant()) - return makeTruthVal(!val.isZeroConstant(), castTy); - if (!Loc::isLocType(originalTy) && - !originalTy->isIntegralOrEnumerationType() && - !originalTy->isMemberPointerType()) - return UnknownVal(); - if (SymbolRef Sym = val.getAsSymbol(true)) { - BasicValueFactory &BVF = getBasicValueFactory(); - // FIXME: If we had a state here, we could see if the symbol is known to - // be zero, but we don't. - return makeNonLoc(Sym, BO_NE, BVF.getValue(0, Sym->getType()), castTy); - } - // Loc values are not always true, they could be weakly linked functions. - if (Optional<Loc> L = val.getAs<Loc>()) - return evalCastFromLoc(*L, castTy); +/// Cast a given SVal to another SVal using given QualType's. +/// \param V -- SVal that should be casted. +/// \param CastTy -- QualType that V should be casted according to. +/// \param OriginalTy -- QualType which is associated to V. It provides +/// additional information about what type the cast performs from. +/// \returns the most appropriate casted SVal. +/// Note: Many cases don't use an exact OriginalTy. It can be extracted +/// from SVal or the cast can performs unconditionaly. Always pass OriginalTy! +/// It can be crucial in certain cases and generates different results. +/// FIXME: If `OriginalTy.isNull()` is true, then cast performs based on CastTy +/// only. This behavior is uncertain and should be improved. +SVal SValBuilder::evalCast(SVal V, QualType CastTy, QualType OriginalTy) { + if (CastTy.isNull()) + return V; + + CastTy = Context.getCanonicalType(CastTy); + + const bool IsUnknownOriginalType = OriginalTy.isNull(); + if (!IsUnknownOriginalType) { + OriginalTy = Context.getCanonicalType(OriginalTy); + + if (CastTy == OriginalTy) + return V; + + // FIXME: Move this check to the most appropriate + // evalCastKind/evalCastSubKind function. For const casts, casts to void, + // just propagate the value. + if (!CastTy->isVariableArrayType() && !OriginalTy->isVariableArrayType()) + if (shouldBeModeledWithNoOp(Context, Context.getPointerType(CastTy), + Context.getPointerType(OriginalTy))) + return V; + } - Loc L = val.castAs<nonloc::LocAsInteger>().getLoc(); - return evalCastFromLoc(L, castTy); + // Cast SVal according to kinds. + switch (V.getBaseKind()) { + case SVal::UndefinedValKind: + return evalCastKind(V.castAs<UndefinedVal>(), CastTy, OriginalTy); + case SVal::UnknownValKind: + return evalCastKind(V.castAs<UnknownVal>(), CastTy, OriginalTy); + case SVal::LocKind: + return evalCastKind(V.castAs<Loc>(), CastTy, OriginalTy); + case SVal::NonLocKind: + return evalCastKind(V.castAs<NonLoc>(), CastTy, OriginalTy); } - // For const casts, casts to void, just propagate the value. - if (!castTy->isVariableArrayType() && !originalTy->isVariableArrayType()) - if (shouldBeModeledWithNoOp(Context, Context.getPointerType(castTy), - Context.getPointerType(originalTy))) - return val; + llvm_unreachable("Unknown SVal kind"); +} - // Check for casts from pointers to integers. - if (castTy->isIntegralOrEnumerationType() && Loc::isLocType(originalTy)) - return evalCastFromLoc(val.castAs<Loc>(), castTy); - - // Check for casts from integers to pointers. - if (Loc::isLocType(castTy) && originalTy->isIntegralOrEnumerationType()) { - if (Optional<nonloc::LocAsInteger> LV = val.getAs<nonloc::LocAsInteger>()) { - if (const MemRegion *R = LV->getLoc().getAsRegion()) { - StoreManager &storeMgr = StateMgr.getStoreManager(); - R = storeMgr.castRegion(R, castTy); - return R ? SVal(loc::MemRegionVal(R)) : UnknownVal(); - } - return LV->getLoc(); - } - return dispatchCast(val, castTy); +SVal SValBuilder::evalCastKind(UndefinedVal V, QualType CastTy, + QualType OriginalTy) { + return V; +} + +SVal SValBuilder::evalCastKind(UnknownVal V, QualType CastTy, + QualType OriginalTy) { + return V; +} + +SVal SValBuilder::evalCastKind(Loc V, QualType CastTy, QualType OriginalTy) { + switch (V.getSubKind()) { + case loc::ConcreteIntKind: + return evalCastSubKind(V.castAs<loc::ConcreteInt>(), CastTy, OriginalTy); + case loc::GotoLabelKind: + return evalCastSubKind(V.castAs<loc::GotoLabel>(), CastTy, OriginalTy); + case loc::MemRegionValKind: + return evalCastSubKind(V.castAs<loc::MemRegionVal>(), CastTy, OriginalTy); } - // Just pass through function and block pointers. - if (originalTy->isBlockPointerType() || originalTy->isFunctionPointerType()) { - assert(Loc::isLocType(castTy)); - return val; + llvm_unreachable("Unknown SVal kind"); +} + +SVal SValBuilder::evalCastKind(NonLoc V, QualType CastTy, QualType OriginalTy) { + switch (V.getSubKind()) { + case nonloc::CompoundValKind: + return evalCastSubKind(V.castAs<nonloc::CompoundVal>(), CastTy, OriginalTy); + case nonloc::ConcreteIntKind: + return evalCastSubKind(V.castAs<nonloc::ConcreteInt>(), CastTy, OriginalTy); + case nonloc::LazyCompoundValKind: + return evalCastSubKind(V.castAs<nonloc::LazyCompoundVal>(), CastTy, + OriginalTy); + case nonloc::LocAsIntegerKind: + return evalCastSubKind(V.castAs<nonloc::LocAsInteger>(), CastTy, + OriginalTy); + case nonloc::SymbolValKind: + return evalCastSubKind(V.castAs<nonloc::SymbolVal>(), CastTy, OriginalTy); + case nonloc::PointerToMemberKind: + return evalCastSubKind(V.castAs<nonloc::PointerToMember>(), CastTy, + OriginalTy); } - // Check for casts from array type to another type. - if (const auto *arrayT = - dyn_cast<ArrayType>(originalTy.getCanonicalType())) { - // We will always decay to a pointer. - QualType elemTy = arrayT->getElementType(); - val = StateMgr.ArrayToPointer(val.castAs<Loc>(), elemTy); + llvm_unreachable("Unknown SVal kind"); +} - // Are we casting from an array to a pointer? If so just pass on - // the decayed value. - if (castTy->isPointerType() || castTy->isReferenceType()) - return val; +SVal SValBuilder::evalCastSubKind(loc::ConcreteInt V, QualType CastTy, + QualType OriginalTy) { + // Pointer to bool. + if (CastTy->isBooleanType()) + return makeTruthVal(V.getValue().getBoolValue(), CastTy); + + // Pointer to integer. + if (CastTy->isIntegralOrEnumerationType()) { + llvm::APSInt Value = V.getValue(); + BasicVals.getAPSIntType(CastTy).apply(Value); + return makeIntVal(Value); + } + + // Pointer to any pointer. + if (Loc::isLocType(CastTy)) + return V; + + // Pointer to whatever else. + return UnknownVal(); +} + +SVal SValBuilder::evalCastSubKind(loc::GotoLabel V, QualType CastTy, + QualType OriginalTy) { + // Pointer to bool. + if (CastTy->isBooleanType()) + // Labels are always true. + return makeTruthVal(true, CastTy); + + // Pointer to integer. + if (CastTy->isIntegralOrEnumerationType()) { + const unsigned BitWidth = Context.getIntWidth(CastTy); + return makeLocAsInteger(V, BitWidth); + } + + const bool IsUnknownOriginalType = OriginalTy.isNull(); + if (!IsUnknownOriginalType) { + // Array to pointer. + if (isa<ArrayType>(OriginalTy)) + if (CastTy->isPointerType() || CastTy->isReferenceType()) + return UnknownVal(); + } + + // Pointer to any pointer. + if (Loc::isLocType(CastTy)) + return V; + + // Pointer to whatever else. + return UnknownVal(); +} - // Are we casting from an array to an integer? If so, cast the decayed - // pointer value to an integer. - assert(castTy->isIntegralOrEnumerationType()); +static bool hasSameUnqualifiedPointeeType(QualType ty1, QualType ty2) { + return ty1->getPointeeType().getCanonicalType().getTypePtr() == + ty2->getPointeeType().getCanonicalType().getTypePtr(); +} + +SVal SValBuilder::evalCastSubKind(loc::MemRegionVal V, QualType CastTy, + QualType OriginalTy) { + // Pointer to bool. + if (CastTy->isBooleanType()) { + const MemRegion *R = V.getRegion(); + if (const FunctionCodeRegion *FTR = dyn_cast<FunctionCodeRegion>(R)) + if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(FTR->getDecl())) + if (FD->isWeak()) + // FIXME: Currently we are using an extent symbol here, + // because there are no generic region address metadata + // symbols to use, only content metadata. + return nonloc::SymbolVal(SymMgr.getExtentSymbol(FTR)); + + if (const SymbolicRegion *SymR = R->getSymbolicBase()) { + SymbolRef Sym = SymR->getSymbol(); + QualType Ty = Sym->getType(); + // This change is needed for architectures with varying + // pointer widths. See the amdgcn opencl reproducer with + // this change as an example: solver-sym-simplification-ptr-bool.cl + // FIXME: We could encounter a reference here, + // try returning a concrete 'true' since it might + // be easier on the solver. + // FIXME: Cleanup remainder of `getZeroWithPtrWidth ()` + // and `getIntWithPtrWidth()` functions to prevent future + // confusion + const llvm::APSInt &Zero = Ty->isReferenceType() + ? BasicVals.getZeroWithPtrWidth() + : BasicVals.getZeroWithTypeSize(Ty); + return makeNonLoc(Sym, BO_NE, Zero, CastTy); + } + // Non-symbolic memory regions are always true. + return makeTruthVal(true, CastTy); + } - // FIXME: Keep these here for now in case we decide soon that we - // need the original decayed type. - // QualType elemTy = cast<ArrayType>(originalTy)->getElementType(); - // QualType pointerTy = C.getPointerType(elemTy); - return evalCastFromLoc(val.castAs<Loc>(), castTy); + const bool IsUnknownOriginalType = OriginalTy.isNull(); + // Try to cast to array + const auto *ArrayTy = + IsUnknownOriginalType + ? nullptr + : dyn_cast<ArrayType>(OriginalTy.getCanonicalType()); + + // Pointer to integer. + if (CastTy->isIntegralOrEnumerationType()) { + SVal Val = V; + // Array to integer. + if (ArrayTy) { + // We will always decay to a pointer. + QualType ElemTy = ArrayTy->getElementType(); + Val = StateMgr.ArrayToPointer(V, ElemTy); + // FIXME: Keep these here for now in case we decide soon that we + // need the original decayed type. + // QualType elemTy = cast<ArrayType>(originalTy)->getElementType(); + // QualType pointerTy = C.getPointerType(elemTy); + } + const unsigned BitWidth = Context.getIntWidth(CastTy); + return makeLocAsInteger(Val.castAs<Loc>(), BitWidth); } - // Check for casts from a region to a specific type. - if (const MemRegion *R = val.getAsRegion()) { - // Handle other casts of locations to integers. - if (castTy->isIntegralOrEnumerationType()) - return evalCastFromLoc(loc::MemRegionVal(R), castTy); - - // FIXME: We should handle the case where we strip off view layers to get - // to a desugared type. - if (!Loc::isLocType(castTy)) { - // FIXME: There can be gross cases where one casts the result of a function - // (that returns a pointer) to some other value that happens to fit - // within that pointer value. We currently have no good way to - // model such operations. When this happens, the underlying operation - // is that the caller is reasoning about bits. Conceptually we are - // layering a "view" of a location on top of those bits. Perhaps - // we need to be more lazy about mutual possible views, even on an - // SVal? This may be necessary for bit-level reasoning as well. - return UnknownVal(); + // Pointer to pointer. + if (Loc::isLocType(CastTy)) { + + if (IsUnknownOriginalType) { + // When retrieving symbolic pointer and expecting a non-void pointer, + // wrap them into element regions of the expected type if necessary. + // It is necessary to make sure that the retrieved value makes sense, + // because there's no other cast in the AST that would tell us to cast + // it to the correct pointer type. We might need to do that for non-void + // pointers as well. + // FIXME: We really need a single good function to perform casts for us + // correctly every time we need it. + const MemRegion *R = V.getRegion(); + if (CastTy->isPointerType() && !CastTy->isVoidPointerType()) { + if (const auto *SR = dyn_cast<SymbolicRegion>(R)) { + QualType SRTy = SR->getSymbol()->getType(); + if (!hasSameUnqualifiedPointeeType(SRTy, CastTy)) { + if (auto OptMemRegV = getCastedMemRegionVal(SR, CastTy)) + return *OptMemRegV; + } + } + } + // Next fixes pointer dereference using type different from its initial + // one. See PR37503 and PR49007 for details. + if (const auto *ER = dyn_cast<ElementRegion>(R)) { + if (auto OptMemRegV = getCastedMemRegionVal(ER, CastTy)) + return *OptMemRegV; + } + + return V; + } + + if (OriginalTy->isIntegralOrEnumerationType() || + OriginalTy->isBlockPointerType() || OriginalTy->isFunctionPointerType()) + return V; + + // Array to pointer. + if (ArrayTy) { + // Are we casting from an array to a pointer? If so just pass on + // the decayed value. + if (CastTy->isPointerType() || CastTy->isReferenceType()) { + // We will always decay to a pointer. + QualType ElemTy = ArrayTy->getElementType(); + return StateMgr.ArrayToPointer(V, ElemTy); + } + // Are we casting from an array to an integer? If so, cast the decayed + // pointer value to an integer. + assert(CastTy->isIntegralOrEnumerationType()); } + // Other pointer to pointer. + assert(Loc::isLocType(OriginalTy) || OriginalTy->isFunctionType() || + CastTy->isReferenceType()); + // We get a symbolic function pointer for a dereference of a function // pointer, but it is of function type. Example: @@ -647,17 +833,161 @@ SVal SValBuilder::evalCast(SVal val, QualType castTy, QualType originalTy) { // return bar(x)+1; // no-warning // } - assert(Loc::isLocType(originalTy) || originalTy->isFunctionType() || - originalTy->isBlockPointerType() || castTy->isReferenceType()); + // Get the result of casting a region to a different type. + const MemRegion *R = V.getRegion(); + if (auto OptMemRegV = getCastedMemRegionVal(R, CastTy)) + return *OptMemRegV; + } + + // Pointer to whatever else. + // FIXME: There can be gross cases where one casts the result of a + // function (that returns a pointer) to some other value that happens to + // fit within that pointer value. We currently have no good way to model + // such operations. When this happens, the underlying operation is that + // the caller is reasoning about bits. Conceptually we are layering a + // "view" of a location on top of those bits. Perhaps we need to be more + // lazy about mutual possible views, even on an SVal? This may be + // necessary for bit-level reasoning as well. + return UnknownVal(); +} + +SVal SValBuilder::evalCastSubKind(nonloc::CompoundVal V, QualType CastTy, + QualType OriginalTy) { + // Compound to whatever. + return UnknownVal(); +} + +SVal SValBuilder::evalCastSubKind(nonloc::ConcreteInt V, QualType CastTy, + QualType OriginalTy) { + auto CastedValue = [V, CastTy, this]() { + llvm::APSInt Value = V.getValue(); + BasicVals.getAPSIntType(CastTy).apply(Value); + return Value; + }; + + // Integer to bool. + if (CastTy->isBooleanType()) + return makeTruthVal(V.getValue().getBoolValue(), CastTy); + + // Integer to pointer. + if (CastTy->isIntegralOrEnumerationType()) + return makeIntVal(CastedValue()); + + // Integer to pointer. + if (Loc::isLocType(CastTy)) + return makeIntLocVal(CastedValue()); + + // Pointer to whatever else. + return UnknownVal(); +} + +SVal SValBuilder::evalCastSubKind(nonloc::LazyCompoundVal V, QualType CastTy, + QualType OriginalTy) { + // Compound to whatever. + return UnknownVal(); +} + +SVal SValBuilder::evalCastSubKind(nonloc::LocAsInteger V, QualType CastTy, + QualType OriginalTy) { + Loc L = V.getLoc(); + + // Pointer as integer to bool. + if (CastTy->isBooleanType()) + // Pass to Loc function. + return evalCastKind(L, CastTy, OriginalTy); + + const bool IsUnknownOriginalType = OriginalTy.isNull(); + // Pointer as integer to pointer. + if (!IsUnknownOriginalType && Loc::isLocType(CastTy) && + OriginalTy->isIntegralOrEnumerationType()) { + if (const MemRegion *R = L.getAsRegion()) + if (auto OptMemRegV = getCastedMemRegionVal(R, CastTy)) + return *OptMemRegV; + return L; + } + + // Pointer as integer with region to integer/pointer. + const MemRegion *R = L.getAsRegion(); + if (!IsUnknownOriginalType && R) { + if (CastTy->isIntegralOrEnumerationType()) + return evalCastSubKind(loc::MemRegionVal(R), CastTy, OriginalTy); + + if (Loc::isLocType(CastTy)) { + assert(Loc::isLocType(OriginalTy) || OriginalTy->isFunctionType() || + CastTy->isReferenceType()); + // Delegate to store manager to get the result of casting a region to a + // different type. If the MemRegion* returned is NULL, this expression + // Evaluates to UnknownVal. + if (auto OptMemRegV = getCastedMemRegionVal(R, CastTy)) + return *OptMemRegV; + } + } else { + if (Loc::isLocType(CastTy)) { + if (IsUnknownOriginalType) + return evalCastSubKind(loc::MemRegionVal(R), CastTy, OriginalTy); + return L; + } + + SymbolRef SE = nullptr; + if (R) { + if (const SymbolicRegion *SR = + dyn_cast<SymbolicRegion>(R->StripCasts())) { + SE = SR->getSymbol(); + } + } + + if (!CastTy->isFloatingType() || !SE || SE->getType()->isFloatingType()) { + // FIXME: Correctly support promotions/truncations. + const unsigned CastSize = Context.getIntWidth(CastTy); + if (CastSize == V.getNumBits()) + return V; + + return makeLocAsInteger(L, CastSize); + } + } - StoreManager &storeMgr = StateMgr.getStoreManager(); + // Pointer as integer to whatever else. + return UnknownVal(); +} - // Delegate to store manager to get the result of casting a region to a - // different type. If the MemRegion* returned is NULL, this expression - // Evaluates to UnknownVal. - R = storeMgr.castRegion(R, castTy); - return R ? SVal(loc::MemRegionVal(R)) : UnknownVal(); +SVal SValBuilder::evalCastSubKind(nonloc::SymbolVal V, QualType CastTy, + QualType OriginalTy) { + SymbolRef SE = V.getSymbol(); + + const bool IsUnknownOriginalType = OriginalTy.isNull(); + // Symbol to bool. + if (!IsUnknownOriginalType && CastTy->isBooleanType()) { + // Non-float to bool. + if (Loc::isLocType(OriginalTy) || + OriginalTy->isIntegralOrEnumerationType() || + OriginalTy->isMemberPointerType()) { + BasicValueFactory &BVF = getBasicValueFactory(); + return makeNonLoc(SE, BO_NE, BVF.getValue(0, SE->getType()), CastTy); + } + } else { + // Symbol to integer, float. + QualType T = Context.getCanonicalType(SE->getType()); + // If types are the same or both are integers, ignore the cast. + // FIXME: Remove this hack when we support symbolic truncation/extension. + // HACK: If both castTy and T are integers, ignore the cast. This is + // not a permanent solution. Eventually we want to precisely handle + // extension/truncation of symbolic integers. This prevents us from losing + // precision when we assign 'x = y' and 'y' is symbolic and x and y are + // different integer types. + if (haveSameType(T, CastTy)) + return V; + if (!Loc::isLocType(CastTy)) + if (!IsUnknownOriginalType || !CastTy->isFloatingType() || + T->isFloatingType()) + return makeNonLoc(SE, T, CastTy); } - return dispatchCast(val, castTy); + // Symbol to pointer and whatever else. + return UnknownVal(); +} + +SVal SValBuilder::evalCastSubKind(nonloc::PointerToMember V, QualType CastTy, + QualType OriginalTy) { + // Member pointer to whatever. + return V; } diff --git a/clang/lib/StaticAnalyzer/Core/SVals.cpp b/clang/lib/StaticAnalyzer/Core/SVals.cpp index 252596887e4f..117546e43b1a 100644 --- a/clang/lib/StaticAnalyzer/Core/SVals.cpp +++ b/clang/lib/StaticAnalyzer/Core/SVals.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h" +#include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/Expr.h" @@ -21,6 +22,7 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h" #include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h" #include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h" #include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h" #include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h" #include "llvm/ADT/Optional.h" @@ -136,6 +138,63 @@ const MemRegion *SVal::getAsRegion() const { return nullptr; } +namespace { +class TypeRetrievingVisitor + : public FullSValVisitor<TypeRetrievingVisitor, QualType> { +private: + const ASTContext &Context; + +public: + TypeRetrievingVisitor(const ASTContext &Context) : Context(Context) {} + + QualType VisitLocMemRegionVal(loc::MemRegionVal MRV) { + return Visit(MRV.getRegion()); + } + QualType VisitLocGotoLabel(loc::GotoLabel GL) { + return QualType{Context.VoidPtrTy}; + } + template <class ConcreteInt> QualType VisitConcreteInt(ConcreteInt CI) { + const llvm::APSInt &Value = CI.getValue(); + return Context.getIntTypeForBitwidth(Value.getBitWidth(), Value.isSigned()); + } + QualType VisitLocConcreteInt(loc::ConcreteInt CI) { + return VisitConcreteInt(CI); + } + QualType VisitNonLocConcreteInt(nonloc::ConcreteInt CI) { + return VisitConcreteInt(CI); + } + QualType VisitNonLocLocAsInteger(nonloc::LocAsInteger LI) { + QualType NestedType = Visit(LI.getLoc()); + if (NestedType.isNull()) + return NestedType; + + return Context.getIntTypeForBitwidth(LI.getNumBits(), + NestedType->isSignedIntegerType()); + } + QualType VisitNonLocCompoundVal(nonloc::CompoundVal CV) { + return CV.getValue()->getType(); + } + QualType VisitNonLocLazyCompoundVal(nonloc::LazyCompoundVal LCV) { + return LCV.getRegion()->getValueType(); + } + QualType VisitNonLocSymbolVal(nonloc::SymbolVal SV) { + return Visit(SV.getSymbol()); + } + QualType VisitSymbolicRegion(const SymbolicRegion *SR) { + return Visit(SR->getSymbol()); + } + QualType VisitTypedRegion(const TypedRegion *TR) { + return TR->getLocationType(); + } + QualType VisitSymExpr(const SymExpr *SE) { return SE->getType(); } +}; +} // end anonymous namespace + +QualType SVal::getType(const ASTContext &Context) const { + TypeRetrievingVisitor TRV{Context}; + return TRV.Visit(*this); +} + const MemRegion *loc::MemRegionVal::stripCasts(bool StripBaseCasts) const { const MemRegion *R = getRegion(); return R ? R->StripCasts(StripBaseCasts) : nullptr; diff --git a/clang/lib/StaticAnalyzer/Core/SarifDiagnostics.cpp b/clang/lib/StaticAnalyzer/Core/SarifDiagnostics.cpp index f93d04ccd61a..e1319a4c2e41 100644 --- a/clang/lib/StaticAnalyzer/Core/SarifDiagnostics.cpp +++ b/clang/lib/StaticAnalyzer/Core/SarifDiagnostics.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "clang/Analysis/MacroExpansionContext.h" #include "clang/Analysis/PathDiagnostic.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/Version.h" @@ -48,7 +49,8 @@ public: void ento::createSarifDiagnosticConsumer( PathDiagnosticConsumerOptions DiagOpts, PathDiagnosticConsumers &C, const std::string &Output, const Preprocessor &PP, - const cross_tu::CrossTranslationUnitContext &CTU) { + const cross_tu::CrossTranslationUnitContext &CTU, + const MacroExpansionContext &MacroExpansions) { // TODO: Emit an error here. if (Output.empty()) @@ -56,7 +58,7 @@ void ento::createSarifDiagnosticConsumer( C.push_back(new SarifDiagnostics(Output, PP.getLangOpts())); createTextMinimalPathDiagnosticConsumer(std::move(DiagOpts), C, Output, PP, - CTU); + CTU, MacroExpansions); } static StringRef getFileName(const FileEntry &FE) { @@ -385,7 +387,7 @@ void SarifDiagnostics::FlushDiagnosticsImpl( // file can become large very quickly, so decoding into JSON to append a run // may be an expensive operation. std::error_code EC; - llvm::raw_fd_ostream OS(OutputFile, EC, llvm::sys::fs::OF_Text); + llvm::raw_fd_ostream OS(OutputFile, EC, llvm::sys::fs::OF_TextWithCRLF); if (EC) { llvm::errs() << "warning: could not create file: " << EC.message() << '\n'; return; diff --git a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp index facadaf1225f..e57d92fbcebb 100644 --- a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp +++ b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp @@ -22,11 +22,6 @@ using namespace ento; namespace { class SimpleSValBuilder : public SValBuilder { -protected: - SVal dispatchCast(SVal val, QualType castTy) override; - SVal evalCastFromNonLoc(NonLoc val, QualType castTy) override; - SVal evalCastFromLoc(Loc val, QualType castTy) override; - public: SimpleSValBuilder(llvm::BumpPtrAllocator &alloc, ASTContext &context, ProgramStateManager &stateMgr) @@ -62,133 +57,6 @@ SValBuilder *ento::createSimpleSValBuilder(llvm::BumpPtrAllocator &alloc, } //===----------------------------------------------------------------------===// -// Transfer function for Casts. -//===----------------------------------------------------------------------===// - -SVal SimpleSValBuilder::dispatchCast(SVal Val, QualType CastTy) { - assert(Val.getAs<Loc>() || Val.getAs<NonLoc>()); - return Val.getAs<Loc>() ? evalCastFromLoc(Val.castAs<Loc>(), CastTy) - : evalCastFromNonLoc(Val.castAs<NonLoc>(), CastTy); -} - -SVal SimpleSValBuilder::evalCastFromNonLoc(NonLoc val, QualType castTy) { - bool isLocType = Loc::isLocType(castTy); - if (val.getAs<nonloc::PointerToMember>()) - return val; - - if (Optional<nonloc::LocAsInteger> LI = val.getAs<nonloc::LocAsInteger>()) { - if (isLocType) - return LI->getLoc(); - // FIXME: Correctly support promotions/truncations. - unsigned castSize = Context.getIntWidth(castTy); - if (castSize == LI->getNumBits()) - return val; - return makeLocAsInteger(LI->getLoc(), castSize); - } - - if (SymbolRef se = val.getAsSymbol()) { - QualType T = Context.getCanonicalType(se->getType()); - // If types are the same or both are integers, ignore the cast. - // FIXME: Remove this hack when we support symbolic truncation/extension. - // HACK: If both castTy and T are integers, ignore the cast. This is - // not a permanent solution. Eventually we want to precisely handle - // extension/truncation of symbolic integers. This prevents us from losing - // precision when we assign 'x = y' and 'y' is symbolic and x and y are - // different integer types. - if (haveSameType(T, castTy)) - return val; - - if (!isLocType) - return makeNonLoc(se, T, castTy); - return UnknownVal(); - } - - // If value is a non-integer constant, produce unknown. - if (!val.getAs<nonloc::ConcreteInt>()) - return UnknownVal(); - - // Handle casts to a boolean type. - if (castTy->isBooleanType()) { - bool b = val.castAs<nonloc::ConcreteInt>().getValue().getBoolValue(); - return makeTruthVal(b, castTy); - } - - // Only handle casts from integers to integers - if val is an integer constant - // being cast to a non-integer type, produce unknown. - if (!isLocType && !castTy->isIntegralOrEnumerationType()) - return UnknownVal(); - - llvm::APSInt i = val.castAs<nonloc::ConcreteInt>().getValue(); - BasicVals.getAPSIntType(castTy).apply(i); - - if (isLocType) - return makeIntLocVal(i); - else - return makeIntVal(i); -} - -SVal SimpleSValBuilder::evalCastFromLoc(Loc val, QualType castTy) { - - // Casts from pointers -> pointers, just return the lval. - // - // Casts from pointers -> references, just return the lval. These - // can be introduced by the frontend for corner cases, e.g - // casting from va_list* to __builtin_va_list&. - // - if (Loc::isLocType(castTy) || castTy->isReferenceType()) - return val; - - // FIXME: Handle transparent unions where a value can be "transparently" - // lifted into a union type. - if (castTy->isUnionType()) - return UnknownVal(); - - // Casting a Loc to a bool will almost always be true, - // unless this is a weak function or a symbolic region. - if (castTy->isBooleanType()) { - switch (val.getSubKind()) { - case loc::MemRegionValKind: { - const MemRegion *R = val.castAs<loc::MemRegionVal>().getRegion(); - if (const FunctionCodeRegion *FTR = dyn_cast<FunctionCodeRegion>(R)) - if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(FTR->getDecl())) - if (FD->isWeak()) - // FIXME: Currently we are using an extent symbol here, - // because there are no generic region address metadata - // symbols to use, only content metadata. - return nonloc::SymbolVal(SymMgr.getExtentSymbol(FTR)); - - if (const SymbolicRegion *SymR = R->getSymbolicBase()) - return makeNonLoc(SymR->getSymbol(), BO_NE, - BasicVals.getZeroWithPtrWidth(), castTy); - - // FALL-THROUGH - LLVM_FALLTHROUGH; - } - - case loc::GotoLabelKind: - // Labels and non-symbolic memory regions are always true. - return makeTruthVal(true, castTy); - } - } - - if (castTy->isIntegralOrEnumerationType()) { - unsigned BitWidth = Context.getIntWidth(castTy); - - if (!val.getAs<loc::ConcreteInt>()) - return makeLocAsInteger(val, BitWidth); - - llvm::APSInt i = val.castAs<loc::ConcreteInt>().getValue(); - BasicVals.getAPSIntType(castTy).apply(i); - return makeIntVal(i); - } - - // All other cases: return 'UnknownVal'. This includes casting pointers - // to floats, which is probably badness it itself, but this is a good - // intermediate solution until we do something better. - return UnknownVal(); -} - -//===----------------------------------------------------------------------===// // Transfer function for unary operators. //===----------------------------------------------------------------------===// @@ -276,10 +144,10 @@ SVal SimpleSValBuilder::MakeSymIntVal(const SymExpr *LHS, } // Idempotent ops (like a*1) can still change the type of an expression. - // Wrap the LHS up in a NonLoc again and let evalCastFromNonLoc do the + // Wrap the LHS up in a NonLoc again and let evalCast do the // dirty work. if (isIdempotent) - return evalCastFromNonLoc(nonloc::SymbolVal(LHS), resultTy); + return evalCast(nonloc::SymbolVal(LHS), resultTy, QualType{}); // If we reach this point, the expression cannot be simplified. // Make a SymbolVal for the entire expression, after converting the RHS. @@ -525,10 +393,11 @@ SVal SimpleSValBuilder::evalBinOpNN(ProgramStateRef state, case BO_Sub: if (resultTy->isIntegralOrEnumerationType()) return makeIntVal(0, resultTy); - return evalCastFromNonLoc(makeIntVal(0, /*isUnsigned=*/false), resultTy); + return evalCast(makeIntVal(0, /*isUnsigned=*/false), resultTy, + QualType{}); case BO_Or: case BO_And: - return evalCastFromNonLoc(lhs, resultTy); + return evalCast(lhs, resultTy, QualType{}); } while (1) { @@ -645,13 +514,15 @@ SVal SimpleSValBuilder::evalBinOpNN(ProgramStateRef state, case BO_Shr: // (~0)>>a if (LHSValue.isAllOnesValue() && LHSValue.isSigned()) - return evalCastFromNonLoc(lhs, resultTy); + return evalCast(lhs, resultTy, QualType{}); LLVM_FALLTHROUGH; case BO_Shl: // 0<<a and 0>>a if (LHSValue == 0) - return evalCastFromNonLoc(lhs, resultTy); + return evalCast(lhs, resultTy, QualType{}); return makeSymExprValNN(op, InputLHS, InputRHS, resultTy); + case BO_Div: + // 0 / x == 0 case BO_Rem: // 0 % x == 0 if (LHSValue == 0) @@ -865,7 +736,7 @@ SVal SimpleSValBuilder::evalBinOpLL(ProgramStateRef state, default: break; case BO_Sub: - return evalCastFromLoc(lhs, resultTy); + return evalCast(lhs, resultTy, QualType{}); case BO_EQ: case BO_LE: case BO_LT: @@ -902,7 +773,7 @@ SVal SimpleSValBuilder::evalBinOpLL(ProgramStateRef state, SVal ResultVal = lhs.castAs<loc::ConcreteInt>().evalBinOp(BasicVals, op, *rInt); if (Optional<NonLoc> Result = ResultVal.getAs<NonLoc>()) - return evalCastFromNonLoc(*Result, resultTy); + return evalCast(*Result, resultTy, QualType{}); assert(!ResultVal.getAs<Loc>() && "Loc-Loc ops should not produce Locs"); return UnknownVal(); @@ -947,11 +818,11 @@ SVal SimpleSValBuilder::evalBinOpLL(ProgramStateRef state, // to be non-NULL. if (rInt->isZeroConstant()) { if (op == BO_Sub) - return evalCastFromLoc(lhs, resultTy); + return evalCast(lhs, resultTy, QualType{}); if (BinaryOperator::isComparisonOp(op)) { QualType boolType = getContext().BoolTy; - NonLoc l = evalCastFromLoc(lhs, boolType).castAs<NonLoc>(); + NonLoc l = evalCast(lhs, boolType, QualType{}).castAs<NonLoc>(); NonLoc r = makeTruthVal(false, boolType).castAs<NonLoc>(); return evalBinOpNN(state, op, l, r, resultTy); } @@ -1033,7 +904,7 @@ SVal SimpleSValBuilder::evalBinOpLL(ProgramStateRef state, Optional<NonLoc> LeftIndex = LeftIndexVal.getAs<NonLoc>(); if (!LeftIndex) return UnknownVal(); - LeftIndexVal = evalCastFromNonLoc(*LeftIndex, ArrayIndexTy); + LeftIndexVal = evalCast(*LeftIndex, ArrayIndexTy, QualType{}); LeftIndex = LeftIndexVal.getAs<NonLoc>(); if (!LeftIndex) return UnknownVal(); @@ -1043,7 +914,7 @@ SVal SimpleSValBuilder::evalBinOpLL(ProgramStateRef state, Optional<NonLoc> RightIndex = RightIndexVal.getAs<NonLoc>(); if (!RightIndex) return UnknownVal(); - RightIndexVal = evalCastFromNonLoc(*RightIndex, ArrayIndexTy); + RightIndexVal = evalCast(*RightIndex, ArrayIndexTy, QualType{}); RightIndex = RightIndexVal.getAs<NonLoc>(); if (!RightIndex) return UnknownVal(); diff --git a/clang/lib/StaticAnalyzer/Core/Store.cpp b/clang/lib/StaticAnalyzer/Core/Store.cpp index ea617bbeeba1..b867b0746f90 100644 --- a/clang/lib/StaticAnalyzer/Core/Store.cpp +++ b/clang/lib/StaticAnalyzer/Core/Store.cpp @@ -71,7 +71,8 @@ const ElementRegion *StoreManager::GetElementZeroRegion(const SubRegion *R, return MRMgr.getElementRegion(T, idx, R, Ctx); } -const MemRegion *StoreManager::castRegion(const MemRegion *R, QualType CastToTy) { +Optional<const MemRegion *> StoreManager::castRegion(const MemRegion *R, + QualType CastToTy) { ASTContext &Ctx = StateMgr.getContext(); // Handle casts to Objective-C objects. @@ -88,7 +89,7 @@ const MemRegion *StoreManager::castRegion(const MemRegion *R, QualType CastToTy) // We don't know what to make of it. Return a NULL region, which // will be interpreted as UnknownVal. - return nullptr; + return None; } // Now assume we are casting from pointer to pointer. Other cases should @@ -168,7 +169,7 @@ const MemRegion *StoreManager::castRegion(const MemRegion *R, QualType CastToTy) // If we cannot compute a raw offset, throw up our hands and return // a NULL MemRegion*. if (!baseR) - return nullptr; + return None; CharUnits off = rawOff.getOffset(); @@ -394,48 +395,6 @@ SVal StoreManager::attemptDownCast(SVal Base, QualType TargetType, return UnknownVal(); } -static bool hasSameUnqualifiedPointeeType(QualType ty1, QualType ty2) { - return ty1->getPointeeType().getCanonicalType().getTypePtr() == - ty2->getPointeeType().getCanonicalType().getTypePtr(); -} - -/// CastRetrievedVal - Used by subclasses of StoreManager to implement -/// implicit casts that arise from loads from regions that are reinterpreted -/// as another region. -SVal StoreManager::CastRetrievedVal(SVal V, const TypedValueRegion *R, - QualType castTy) { - if (castTy.isNull() || V.isUnknownOrUndef()) - return V; - - // The dispatchCast() call below would convert the int into a float. - // What we want, however, is a bit-by-bit reinterpretation of the int - // as a float, which usually yields nothing garbage. For now skip casts - // from ints to floats. - // TODO: What other combinations of types are affected? - if (castTy->isFloatingType()) { - SymbolRef Sym = V.getAsSymbol(); - if (Sym && !Sym->getType()->isFloatingType()) - return UnknownVal(); - } - - // When retrieving symbolic pointer and expecting a non-void pointer, - // wrap them into element regions of the expected type if necessary. - // SValBuilder::dispatchCast() doesn't do that, but it is necessary to - // make sure that the retrieved value makes sense, because there's no other - // cast in the AST that would tell us to cast it to the correct pointer type. - // We might need to do that for non-void pointers as well. - // FIXME: We really need a single good function to perform casts for us - // correctly every time we need it. - if (castTy->isPointerType() && !castTy->isVoidPointerType()) - if (const auto *SR = dyn_cast_or_null<SymbolicRegion>(V.getAsRegion())) { - QualType sr = SR->getSymbol()->getType(); - if (!hasSameUnqualifiedPointeeType(sr, castTy)) - return loc::MemRegionVal(castRegion(SR, castTy)); - } - - return svalBuilder.dispatchCast(V, castTy); -} - SVal StoreManager::getLValueFieldOrIvar(const Decl *D, SVal Base) { if (Base.isUnknownOrUndef()) return Base; diff --git a/clang/lib/StaticAnalyzer/Core/TextDiagnostics.cpp b/clang/lib/StaticAnalyzer/Core/TextDiagnostics.cpp index ae2bad7ee77c..4f3be7cae331 100644 --- a/clang/lib/StaticAnalyzer/Core/TextDiagnostics.cpp +++ b/clang/lib/StaticAnalyzer/Core/TextDiagnostics.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "clang/Analysis/MacroExpansionContext.h" #include "clang/Analysis/PathDiagnostic.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/Version.h" @@ -138,8 +139,9 @@ public: void ento::createTextPathDiagnosticConsumer( PathDiagnosticConsumerOptions DiagOpts, PathDiagnosticConsumers &C, - const std::string &Prefix, const clang::Preprocessor &PP, - const cross_tu::CrossTranslationUnitContext &CTU) { + const std::string &Prefix, const Preprocessor &PP, + const cross_tu::CrossTranslationUnitContext &CTU, + const MacroExpansionContext &MacroExpansions) { C.emplace_back(new TextDiagnostics(std::move(DiagOpts), PP.getDiagnostics(), PP.getLangOpts(), /*ShouldDisplayPathNotes=*/true)); @@ -147,8 +149,9 @@ void ento::createTextPathDiagnosticConsumer( void ento::createTextMinimalPathDiagnosticConsumer( PathDiagnosticConsumerOptions DiagOpts, PathDiagnosticConsumers &C, - const std::string &Prefix, const clang::Preprocessor &PP, - const cross_tu::CrossTranslationUnitContext &CTU) { + const std::string &Prefix, const Preprocessor &PP, + const cross_tu::CrossTranslationUnitContext &CTU, + const MacroExpansionContext &MacroExpansions) { C.emplace_back(new TextDiagnostics(std::move(DiagOpts), PP.getDiagnostics(), PP.getLangOpts(), /*ShouldDisplayPathNotes=*/false)); diff --git a/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp b/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp index f2a19b2ccc90..31de49033ac2 100644 --- a/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp +++ b/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp @@ -20,6 +20,7 @@ #include "clang/Analysis/CFG.h" #include "clang/Analysis/CallGraph.h" #include "clang/Analysis/CodeInjector.h" +#include "clang/Analysis/MacroExpansionContext.h" #include "clang/Analysis/PathDiagnostic.h" #include "clang/Basic/SourceManager.h" #include "clang/CrossTU/CrossTranslationUnit.h" @@ -98,6 +99,8 @@ public: /// working with a PCH file. SetOfDecls LocalTUDecls; + MacroExpansionContext MacroExpansions; + // Set of PathDiagnosticConsumers. Owned by AnalysisManager. PathDiagnosticConsumers PathConsumers; @@ -122,9 +125,11 @@ public: CodeInjector *injector) : RecVisitorMode(0), RecVisitorBR(nullptr), Ctx(nullptr), PP(CI.getPreprocessor()), OutDir(outdir), Opts(std::move(opts)), - Plugins(plugins), Injector(injector), CTU(CI) { + Plugins(plugins), Injector(injector), CTU(CI), + MacroExpansions(CI.getLangOpts()) { DigestAnalyzerOptions(); - if (Opts->PrintStats || Opts->ShouldSerializeStats) { + if (Opts->AnalyzerDisplayProgress || Opts->PrintStats || + Opts->ShouldSerializeStats) { AnalyzerTimers = std::make_unique<llvm::TimerGroup>( "analyzer", "Analyzer timers"); SyntaxCheckTimer = std::make_unique<llvm::Timer>( @@ -134,8 +139,14 @@ public: BugReporterTimer = std::make_unique<llvm::Timer>( "bugreporter", "Path-sensitive report post-processing time", *AnalyzerTimers); + } + + if (Opts->PrintStats || Opts->ShouldSerializeStats) { llvm::EnableStatistics(/* PrintOnExit= */ false); } + + if (Opts->ShouldDisplayMacroExpansions) + MacroExpansions.registerForPreprocessor(PP); } ~AnalysisConsumer() override { @@ -150,7 +161,8 @@ public: break; #define ANALYSIS_DIAGNOSTICS(NAME, CMDFLAG, DESC, CREATEFN) \ case PD_##NAME: \ - CREATEFN(Opts->getDiagOpts(), PathConsumers, OutDir, PP, CTU); \ + CREATEFN(Opts->getDiagOpts(), PathConsumers, OutDir, PP, CTU, \ + MacroExpansions); \ break; #include "clang/StaticAnalyzer/Core/Analyses.def" default: @@ -175,6 +187,14 @@ public: } } + void DisplayTime(llvm::TimeRecord &Time) { + if (!Opts->AnalyzerDisplayProgress) { + return; + } + llvm::errs() << " : " << llvm::format("%1.1f", Time.getWallTime() * 1000) + << " ms\n"; + } + void DisplayFunction(const Decl *D, AnalysisMode Mode, ExprEngine::InliningModes IMode) { if (!Opts->AnalyzerDisplayProgress) @@ -201,8 +221,8 @@ public: } else assert(Mode == (AM_Syntax | AM_Path) && "Unexpected mode!"); - llvm::errs() << ": " << Loc.getFilename() << ' ' << getFunctionName(D) - << '\n'; + llvm::errs() << ": " << Loc.getFilename() << ' ' + << AnalysisDeclContext::getFunctionName(D); } } @@ -560,63 +580,10 @@ void AnalysisConsumer::HandleTranslationUnit(ASTContext &C) { Mgr.reset(); } -std::string AnalysisConsumer::getFunctionName(const Decl *D) { - std::string Str; - llvm::raw_string_ostream OS(Str); - - if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) { - OS << FD->getQualifiedNameAsString(); - - // In C++, there are overloads. - if (Ctx->getLangOpts().CPlusPlus) { - OS << '('; - for (const auto &P : FD->parameters()) { - if (P != *FD->param_begin()) - OS << ", "; - OS << P->getType().getAsString(); - } - OS << ')'; - } - - } else if (isa<BlockDecl>(D)) { - PresumedLoc Loc = Ctx->getSourceManager().getPresumedLoc(D->getLocation()); - - if (Loc.isValid()) { - OS << "block (line: " << Loc.getLine() << ", col: " << Loc.getColumn() - << ')'; - } - - } else if (const ObjCMethodDecl *OMD = dyn_cast<ObjCMethodDecl>(D)) { - - // FIXME: copy-pasted from CGDebugInfo.cpp. - OS << (OMD->isInstanceMethod() ? '-' : '+') << '['; - const DeclContext *DC = OMD->getDeclContext(); - if (const auto *OID = dyn_cast<ObjCImplementationDecl>(DC)) { - OS << OID->getName(); - } else if (const auto *OID = dyn_cast<ObjCInterfaceDecl>(DC)) { - OS << OID->getName(); - } else if (const auto *OC = dyn_cast<ObjCCategoryDecl>(DC)) { - if (OC->IsClassExtension()) { - OS << OC->getClassInterface()->getName(); - } else { - OS << OC->getIdentifier()->getNameStart() << '(' - << OC->getIdentifier()->getNameStart() << ')'; - } - } else if (const auto *OCD = dyn_cast<ObjCCategoryImplDecl>(DC)) { - OS << OCD->getClassInterface()->getName() << '(' - << OCD->getName() << ')'; - } - OS << ' ' << OMD->getSelector().getAsString() << ']'; - - } - - return OS.str(); -} - AnalysisConsumer::AnalysisMode AnalysisConsumer::getModeForDecl(Decl *D, AnalysisMode Mode) { if (!Opts->AnalyzeSpecificFunction.empty() && - getFunctionName(D) != Opts->AnalyzeSpecificFunction) + AnalysisDeclContext::getFunctionName(D) != Opts->AnalyzeSpecificFunction) return AM_None; // Unless -analyze-all is specified, treat decls differently depending on @@ -653,19 +620,26 @@ void AnalysisConsumer::HandleCode(Decl *D, AnalysisMode Mode, if (Mgr->getAnalysisDeclContext(D)->isBodyAutosynthesized()) return; - DisplayFunction(D, Mode, IMode); CFG *DeclCFG = Mgr->getCFG(D); if (DeclCFG) MaxCFGSize.updateMax(DeclCFG->size()); + DisplayFunction(D, Mode, IMode); BugReporter BR(*Mgr); if (Mode & AM_Syntax) { - if (SyntaxCheckTimer) + llvm::TimeRecord CheckerStartTime; + if (SyntaxCheckTimer) { + CheckerStartTime = SyntaxCheckTimer->getTotalTime(); SyntaxCheckTimer->startTimer(); + } checkerMgr->runCheckersOnASTBody(D, *Mgr, BR); - if (SyntaxCheckTimer) + if (SyntaxCheckTimer) { SyntaxCheckTimer->stopTimer(); + llvm::TimeRecord CheckerEndTime = SyntaxCheckTimer->getTotalTime(); + CheckerEndTime -= CheckerStartTime; + DisplayTime(CheckerEndTime); + } } BR.FlushReports(); @@ -696,12 +670,19 @@ void AnalysisConsumer::RunPathSensitiveChecks(Decl *D, ExprEngine Eng(CTU, *Mgr, VisitedCallees, &FunctionSummaries, IMode); // Execute the worklist algorithm. - if (ExprEngineTimer) + llvm::TimeRecord ExprEngineStartTime; + if (ExprEngineTimer) { + ExprEngineStartTime = ExprEngineTimer->getTotalTime(); ExprEngineTimer->startTimer(); + } Eng.ExecuteWorkList(Mgr->getAnalysisDeclContextManager().getStackFrame(D), Mgr->options.MaxNodesPerTopLevelFunction); - if (ExprEngineTimer) + if (ExprEngineTimer) { ExprEngineTimer->stopTimer(); + llvm::TimeRecord ExprEngineEndTime = ExprEngineTimer->getTotalTime(); + ExprEngineEndTime -= ExprEngineStartTime; + DisplayTime(ExprEngineEndTime); + } if (!Mgr->options.DumpExplodedGraphTo.empty()) Eng.DumpGraph(Mgr->options.TrimGraph, Mgr->options.DumpExplodedGraphTo); |