diff options
Diffstat (limited to 'contrib/llvm-project/clang/lib/Analysis/FlowSensitive')
21 files changed, 7252 insertions, 0 deletions
diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/ASTOps.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/ASTOps.cpp new file mode 100644 index 000000000000..27d42a7b5085 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/ASTOps.cpp @@ -0,0 +1,287 @@ +//===-- ASTOps.cc -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Operations on AST nodes that are used in flow-sensitive analysis. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/ASTOps.h" +#include "clang/AST/ComputeDependence.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclBase.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/Type.h" +#include "clang/Analysis/FlowSensitive/StorageLocation.h" +#include "clang/Basic/LLVM.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" +#include <cassert> +#include <iterator> +#include <vector> + +#define DEBUG_TYPE "dataflow" + +namespace clang::dataflow { + +const Expr &ignoreCFGOmittedNodes(const Expr &E) { + const Expr *Current = &E; + const Expr *Last = nullptr; + while (Current != Last) { + Last = Current; + if (auto *EWC = dyn_cast<ExprWithCleanups>(Current)) { + Current = EWC->getSubExpr(); + assert(Current != nullptr); + } + if (auto *CE = dyn_cast<ConstantExpr>(Current)) { + Current = CE->getSubExpr(); + assert(Current != nullptr); + } + Current = Current->IgnoreParens(); + assert(Current != nullptr); + } + return *Current; +} + +const Stmt &ignoreCFGOmittedNodes(const Stmt &S) { + if (auto *E = dyn_cast<Expr>(&S)) + return ignoreCFGOmittedNodes(*E); + return S; +} + +// FIXME: Does not precisely handle non-virtual diamond inheritance. A single +// field decl will be modeled for all instances of the inherited field. +static void getFieldsFromClassHierarchy(QualType Type, FieldSet &Fields) { + if (Type->isIncompleteType() || Type->isDependentType() || + !Type->isRecordType()) + return; + + for (const FieldDecl *Field : Type->getAsRecordDecl()->fields()) + Fields.insert(Field); + if (auto *CXXRecord = Type->getAsCXXRecordDecl()) + for (const CXXBaseSpecifier &Base : CXXRecord->bases()) + getFieldsFromClassHierarchy(Base.getType(), Fields); +} + +/// Gets the set of all fields in the type. +FieldSet getObjectFields(QualType Type) { + FieldSet Fields; + getFieldsFromClassHierarchy(Type, Fields); + return Fields; +} + +bool containsSameFields(const FieldSet &Fields, + const RecordStorageLocation::FieldToLoc &FieldLocs) { + if (Fields.size() != FieldLocs.size()) + return false; + for ([[maybe_unused]] auto [Field, Loc] : FieldLocs) + if (!Fields.contains(cast_or_null<FieldDecl>(Field))) + return false; + return true; +} + +/// Returns the fields of a `RecordDecl` that are initialized by an +/// `InitListExpr` or `CXXParenListInitExpr`, in the order in which they appear +/// in `InitListExpr::inits()` / `CXXParenListInitExpr::getInitExprs()`. +/// `InitList->getType()` must be a record type. +template <class InitListT> +static std::vector<const FieldDecl *> +getFieldsForInitListExpr(const InitListT *InitList) { + const RecordDecl *RD = InitList->getType()->getAsRecordDecl(); + assert(RD != nullptr); + + std::vector<const FieldDecl *> Fields; + + if (InitList->getType()->isUnionType()) { + if (const FieldDecl *Field = InitList->getInitializedFieldInUnion()) + Fields.push_back(Field); + return Fields; + } + + // Unnamed bitfields are only used for padding and do not appear in + // `InitListExpr`'s inits. However, those fields do appear in `RecordDecl`'s + // field list, and we thus need to remove them before mapping inits to + // fields to avoid mapping inits to the wrongs fields. + llvm::copy_if( + RD->fields(), std::back_inserter(Fields), + [](const FieldDecl *Field) { return !Field->isUnnamedBitField(); }); + return Fields; +} + +RecordInitListHelper::RecordInitListHelper(const InitListExpr *InitList) + : RecordInitListHelper(InitList->getType(), + getFieldsForInitListExpr(InitList), + InitList->inits()) {} + +RecordInitListHelper::RecordInitListHelper( + const CXXParenListInitExpr *ParenInitList) + : RecordInitListHelper(ParenInitList->getType(), + getFieldsForInitListExpr(ParenInitList), + ParenInitList->getInitExprs()) {} + +RecordInitListHelper::RecordInitListHelper( + QualType Ty, std::vector<const FieldDecl *> Fields, + ArrayRef<Expr *> Inits) { + auto *RD = Ty->getAsCXXRecordDecl(); + assert(RD != nullptr); + + // Unions initialized with an empty initializer list need special treatment. + // For structs/classes initialized with an empty initializer list, Clang + // puts `ImplicitValueInitExpr`s in `InitListExpr::inits()`, but for unions, + // it doesn't do this -- so we create an `ImplicitValueInitExpr` ourselves. + SmallVector<Expr *> InitsForUnion; + if (Ty->isUnionType() && Inits.empty()) { + assert(Fields.size() <= 1); + if (!Fields.empty()) { + ImplicitValueInitForUnion.emplace(Fields.front()->getType()); + InitsForUnion.push_back(&*ImplicitValueInitForUnion); + } + Inits = InitsForUnion; + } + + size_t InitIdx = 0; + + assert(Fields.size() + RD->getNumBases() == Inits.size()); + for (const CXXBaseSpecifier &Base : RD->bases()) { + assert(InitIdx < Inits.size()); + Expr *Init = Inits[InitIdx++]; + BaseInits.emplace_back(&Base, Init); + } + + assert(Fields.size() == Inits.size() - InitIdx); + for (const FieldDecl *Field : Fields) { + assert(InitIdx < Inits.size()); + Expr *Init = Inits[InitIdx++]; + FieldInits.emplace_back(Field, Init); + } +} + +static void insertIfGlobal(const Decl &D, + llvm::DenseSet<const VarDecl *> &Globals) { + if (auto *V = dyn_cast<VarDecl>(&D)) + if (V->hasGlobalStorage()) + Globals.insert(V); +} + +static void insertIfFunction(const Decl &D, + llvm::DenseSet<const FunctionDecl *> &Funcs) { + if (auto *FD = dyn_cast<FunctionDecl>(&D)) + Funcs.insert(FD); +} + +static MemberExpr *getMemberForAccessor(const CXXMemberCallExpr &C) { + // Use getCalleeDecl instead of getMethodDecl in order to handle + // pointer-to-member calls. + const auto *MethodDecl = dyn_cast_or_null<CXXMethodDecl>(C.getCalleeDecl()); + if (!MethodDecl) + return nullptr; + auto *Body = dyn_cast_or_null<CompoundStmt>(MethodDecl->getBody()); + if (!Body || Body->size() != 1) + return nullptr; + if (auto *RS = dyn_cast<ReturnStmt>(*Body->body_begin())) + if (auto *Return = RS->getRetValue()) + return dyn_cast<MemberExpr>(Return->IgnoreParenImpCasts()); + return nullptr; +} + +class ReferencedDeclsVisitor + : public AnalysisASTVisitor<ReferencedDeclsVisitor> { +public: + ReferencedDeclsVisitor(ReferencedDecls &Referenced) + : Referenced(Referenced) {} + + void TraverseConstructorInits(const CXXConstructorDecl *Ctor) { + for (const CXXCtorInitializer *Init : Ctor->inits()) { + if (Init->isMemberInitializer()) { + Referenced.Fields.insert(Init->getMember()); + } else if (Init->isIndirectMemberInitializer()) { + for (const auto *I : Init->getIndirectMember()->chain()) + Referenced.Fields.insert(cast<FieldDecl>(I)); + } + + Expr *InitExpr = Init->getInit(); + + // Also collect declarations referenced in `InitExpr`. + TraverseStmt(InitExpr); + + // If this is a `CXXDefaultInitExpr`, also collect declarations referenced + // within the default expression. + if (auto *DefaultInit = dyn_cast<CXXDefaultInitExpr>(InitExpr)) + TraverseStmt(DefaultInit->getExpr()); + } + } + + bool VisitDecl(Decl *D) { + insertIfGlobal(*D, Referenced.Globals); + insertIfFunction(*D, Referenced.Functions); + return true; + } + + bool VisitDeclRefExpr(DeclRefExpr *E) { + insertIfGlobal(*E->getDecl(), Referenced.Globals); + insertIfFunction(*E->getDecl(), Referenced.Functions); + return true; + } + + bool VisitCXXMemberCallExpr(CXXMemberCallExpr *C) { + // If this is a method that returns a member variable but does nothing else, + // model the field of the return value. + if (MemberExpr *E = getMemberForAccessor(*C)) + if (const auto *FD = dyn_cast<FieldDecl>(E->getMemberDecl())) + Referenced.Fields.insert(FD); + return true; + } + + bool VisitMemberExpr(MemberExpr *E) { + // FIXME: should we be using `E->getFoundDecl()`? + const ValueDecl *VD = E->getMemberDecl(); + insertIfGlobal(*VD, Referenced.Globals); + insertIfFunction(*VD, Referenced.Functions); + if (const auto *FD = dyn_cast<FieldDecl>(VD)) + Referenced.Fields.insert(FD); + return true; + } + + bool VisitInitListExpr(InitListExpr *InitList) { + if (InitList->getType()->isRecordType()) + for (const auto *FD : getFieldsForInitListExpr(InitList)) + Referenced.Fields.insert(FD); + return true; + } + + bool VisitCXXParenListInitExpr(CXXParenListInitExpr *ParenInitList) { + if (ParenInitList->getType()->isRecordType()) + for (const auto *FD : getFieldsForInitListExpr(ParenInitList)) + Referenced.Fields.insert(FD); + return true; + } + +private: + ReferencedDecls &Referenced; +}; + +ReferencedDecls getReferencedDecls(const FunctionDecl &FD) { + ReferencedDecls Result; + ReferencedDeclsVisitor Visitor(Result); + Visitor.TraverseStmt(FD.getBody()); + if (const auto *CtorDecl = dyn_cast<CXXConstructorDecl>(&FD)) + Visitor.TraverseConstructorInits(CtorDecl); + + return Result; +} + +ReferencedDecls getReferencedDecls(const Stmt &S) { + ReferencedDecls Result; + ReferencedDeclsVisitor Visitor(Result); + Visitor.TraverseStmt(const_cast<Stmt *>(&S)); + return Result; +} + +} // namespace clang::dataflow diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/AdornedCFG.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/AdornedCFG.cpp new file mode 100644 index 000000000000..255543021a99 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/AdornedCFG.cpp @@ -0,0 +1,183 @@ +//===- AdornedCFG.cpp ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines an `AdornedCFG` class that is used by dataflow analyses +// that run over Control-Flow Graphs (CFGs). +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/AdornedCFG.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Stmt.h" +#include "clang/Analysis/CFG.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/Error.h" +#include <utility> + +namespace clang { +namespace dataflow { + +/// Returns a map from statements to basic blocks that contain them. +static llvm::DenseMap<const Stmt *, const CFGBlock *> +buildStmtToBasicBlockMap(const CFG &Cfg) { + llvm::DenseMap<const Stmt *, const CFGBlock *> StmtToBlock; + for (const CFGBlock *Block : Cfg) { + if (Block == nullptr) + continue; + + for (const CFGElement &Element : *Block) { + auto Stmt = Element.getAs<CFGStmt>(); + if (!Stmt) + continue; + + StmtToBlock[Stmt->getStmt()] = Block; + } + } + // Some terminator conditions don't appear as a `CFGElement` anywhere else - + // for example, this is true if the terminator condition is a `&&` or `||` + // operator. + // We associate these conditions with the block the terminator appears in, + // but only if the condition has not already appeared as a regular + // `CFGElement`. (The `insert()` below does nothing if the key already exists + // in the map.) + for (const CFGBlock *Block : Cfg) { + if (Block != nullptr) + if (const Stmt *TerminatorCond = Block->getTerminatorCondition()) + StmtToBlock.insert({TerminatorCond, Block}); + } + // Terminator statements typically don't appear as a `CFGElement` anywhere + // else, so we want to associate them with the block that they terminate. + // However, there are some important special cases: + // - The conditional operator is a type of terminator, but it also appears + // as a regular `CFGElement`, and we want to associate it with the block + // in which it appears as a `CFGElement`. + // - The `&&` and `||` operators are types of terminators, but like the + // conditional operator, they can appear as a regular `CFGElement` or + // as a terminator condition (see above). + // We process terminators last to make sure that we only associate them with + // the block they terminate if they haven't previously occurred as a regular + // `CFGElement` or as a terminator condition. + for (const CFGBlock *Block : Cfg) { + if (Block != nullptr) + if (const Stmt *TerminatorStmt = Block->getTerminatorStmt()) + StmtToBlock.insert({TerminatorStmt, Block}); + } + return StmtToBlock; +} + +static llvm::BitVector findReachableBlocks(const CFG &Cfg) { + llvm::BitVector BlockReachable(Cfg.getNumBlockIDs(), false); + + llvm::SmallVector<const CFGBlock *> BlocksToVisit; + BlocksToVisit.push_back(&Cfg.getEntry()); + while (!BlocksToVisit.empty()) { + const CFGBlock *Block = BlocksToVisit.back(); + BlocksToVisit.pop_back(); + + if (BlockReachable[Block->getBlockID()]) + continue; + + BlockReachable[Block->getBlockID()] = true; + + for (const CFGBlock *Succ : Block->succs()) + if (Succ) + BlocksToVisit.push_back(Succ); + } + + return BlockReachable; +} + +static llvm::DenseSet<const CFGBlock *> +buildContainsExprConsumedInDifferentBlock( + const CFG &Cfg, + const llvm::DenseMap<const Stmt *, const CFGBlock *> &StmtToBlock) { + llvm::DenseSet<const CFGBlock *> Result; + + auto CheckChildExprs = [&Result, &StmtToBlock](const Stmt *S, + const CFGBlock *Block) { + for (const Stmt *Child : S->children()) { + if (!isa_and_nonnull<Expr>(Child)) + continue; + const CFGBlock *ChildBlock = StmtToBlock.lookup(Child); + if (ChildBlock != Block) + Result.insert(ChildBlock); + } + }; + + for (const CFGBlock *Block : Cfg) { + if (Block == nullptr) + continue; + + for (const CFGElement &Element : *Block) + if (auto S = Element.getAs<CFGStmt>()) + CheckChildExprs(S->getStmt(), Block); + + if (const Stmt *TerminatorCond = Block->getTerminatorCondition()) + CheckChildExprs(TerminatorCond, Block); + } + + return Result; +} + +llvm::Expected<AdornedCFG> AdornedCFG::build(const FunctionDecl &Func) { + if (!Func.doesThisDeclarationHaveABody()) + return llvm::createStringError( + std::make_error_code(std::errc::invalid_argument), + "Cannot analyze function without a body"); + + return build(Func, *Func.getBody(), Func.getASTContext()); +} + +llvm::Expected<AdornedCFG> AdornedCFG::build(const Decl &D, Stmt &S, + ASTContext &C) { + if (D.isTemplated()) + return llvm::createStringError( + std::make_error_code(std::errc::invalid_argument), + "Cannot analyze templated declarations"); + + // The shape of certain elements of the AST can vary depending on the + // language. We currently only support C++. + if (!C.getLangOpts().CPlusPlus || C.getLangOpts().ObjC) + return llvm::createStringError( + std::make_error_code(std::errc::invalid_argument), + "Can only analyze C++"); + + CFG::BuildOptions Options; + Options.PruneTriviallyFalseEdges = true; + Options.AddImplicitDtors = true; + Options.AddTemporaryDtors = true; + Options.AddInitializers = true; + Options.AddCXXDefaultInitExprInCtors = true; + Options.AddLifetime = true; + + // Ensure that all sub-expressions in basic blocks are evaluated. + Options.setAllAlwaysAdd(); + + auto Cfg = CFG::buildCFG(&D, &S, &C, Options); + if (Cfg == nullptr) + return llvm::createStringError( + std::make_error_code(std::errc::invalid_argument), + "CFG::buildCFG failed"); + + llvm::DenseMap<const Stmt *, const CFGBlock *> StmtToBlock = + buildStmtToBasicBlockMap(*Cfg); + + llvm::BitVector BlockReachable = findReachableBlocks(*Cfg); + + llvm::DenseSet<const CFGBlock *> ContainsExprConsumedInDifferentBlock = + buildContainsExprConsumedInDifferentBlock(*Cfg, StmtToBlock); + + return AdornedCFG(D, std::move(Cfg), std::move(StmtToBlock), + std::move(BlockReachable), + std::move(ContainsExprConsumedInDifferentBlock)); +} + +} // namespace dataflow +} // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Arena.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Arena.cpp new file mode 100644 index 000000000000..81137e8088e3 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Arena.cpp @@ -0,0 +1,213 @@ +//===-- Arena.cpp ---------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/Arena.h" +#include "clang/Analysis/FlowSensitive/Formula.h" +#include "clang/Analysis/FlowSensitive/Value.h" +#include "llvm/Support/Error.h" +#include <string> + +namespace clang::dataflow { + +static std::pair<const Formula *, const Formula *> +canonicalFormulaPair(const Formula &LHS, const Formula &RHS) { + auto Res = std::make_pair(&LHS, &RHS); + if (&RHS < &LHS) // FIXME: use a deterministic order instead + std::swap(Res.first, Res.second); + return Res; +} + +template <class Key, class ComputeFunc> +const Formula &cached(llvm::DenseMap<Key, const Formula *> &Cache, Key K, + ComputeFunc &&Compute) { + auto [It, Inserted] = Cache.try_emplace(std::forward<Key>(K)); + if (Inserted) + It->second = Compute(); + return *It->second; +} + +const Formula &Arena::makeAtomRef(Atom A) { + return cached(AtomRefs, A, [&] { + return &Formula::create(Alloc, Formula::AtomRef, {}, + static_cast<unsigned>(A)); + }); +} + +const Formula &Arena::makeAnd(const Formula &LHS, const Formula &RHS) { + return cached(Ands, canonicalFormulaPair(LHS, RHS), [&] { + if (&LHS == &RHS) + return &LHS; + if (LHS.kind() == Formula::Literal) + return LHS.literal() ? &RHS : &LHS; + if (RHS.kind() == Formula::Literal) + return RHS.literal() ? &LHS : &RHS; + + return &Formula::create(Alloc, Formula::And, {&LHS, &RHS}); + }); +} + +const Formula &Arena::makeOr(const Formula &LHS, const Formula &RHS) { + return cached(Ors, canonicalFormulaPair(LHS, RHS), [&] { + if (&LHS == &RHS) + return &LHS; + if (LHS.kind() == Formula::Literal) + return LHS.literal() ? &LHS : &RHS; + if (RHS.kind() == Formula::Literal) + return RHS.literal() ? &RHS : &LHS; + + return &Formula::create(Alloc, Formula::Or, {&LHS, &RHS}); + }); +} + +const Formula &Arena::makeNot(const Formula &Val) { + return cached(Nots, &Val, [&] { + if (Val.kind() == Formula::Not) + return Val.operands()[0]; + if (Val.kind() == Formula::Literal) + return &makeLiteral(!Val.literal()); + + return &Formula::create(Alloc, Formula::Not, {&Val}); + }); +} + +const Formula &Arena::makeImplies(const Formula &LHS, const Formula &RHS) { + return cached(Implies, std::make_pair(&LHS, &RHS), [&] { + if (&LHS == &RHS) + return &makeLiteral(true); + if (LHS.kind() == Formula::Literal) + return LHS.literal() ? &RHS : &makeLiteral(true); + if (RHS.kind() == Formula::Literal) + return RHS.literal() ? &RHS : &makeNot(LHS); + + return &Formula::create(Alloc, Formula::Implies, {&LHS, &RHS}); + }); +} + +const Formula &Arena::makeEquals(const Formula &LHS, const Formula &RHS) { + return cached(Equals, canonicalFormulaPair(LHS, RHS), [&] { + if (&LHS == &RHS) + return &makeLiteral(true); + if (LHS.kind() == Formula::Literal) + return LHS.literal() ? &RHS : &makeNot(RHS); + if (RHS.kind() == Formula::Literal) + return RHS.literal() ? &LHS : &makeNot(LHS); + + return &Formula::create(Alloc, Formula::Equal, {&LHS, &RHS}); + }); +} + +IntegerValue &Arena::makeIntLiteral(llvm::APInt Value) { + auto [It, Inserted] = IntegerLiterals.try_emplace(Value, nullptr); + + if (Inserted) + It->second = &create<IntegerValue>(); + return *It->second; +} + +BoolValue &Arena::makeBoolValue(const Formula &F) { + auto [It, Inserted] = FormulaValues.try_emplace(&F); + if (Inserted) + It->second = (F.kind() == Formula::AtomRef) + ? (BoolValue *)&create<AtomicBoolValue>(F) + : &create<FormulaBoolValue>(F); + return *It->second; +} + +namespace { +const Formula *parse(Arena &A, llvm::StringRef &In) { + auto EatSpaces = [&] { In = In.ltrim(' '); }; + EatSpaces(); + + if (In.consume_front("!")) { + if (auto *Arg = parse(A, In)) + return &A.makeNot(*Arg); + return nullptr; + } + + if (In.consume_front("(")) { + auto *Arg1 = parse(A, In); + if (!Arg1) + return nullptr; + + EatSpaces(); + decltype(&Arena::makeOr) Op; + if (In.consume_front("|")) + Op = &Arena::makeOr; + else if (In.consume_front("&")) + Op = &Arena::makeAnd; + else if (In.consume_front("=>")) + Op = &Arena::makeImplies; + else if (In.consume_front("=")) + Op = &Arena::makeEquals; + else + return nullptr; + + auto *Arg2 = parse(A, In); + if (!Arg2) + return nullptr; + + EatSpaces(); + if (!In.consume_front(")")) + return nullptr; + + return &(A.*Op)(*Arg1, *Arg2); + } + + // For now, only support unnamed variables V0, V1 etc. + // FIXME: parse e.g. "X" by allocating an atom and storing a name somewhere. + if (In.consume_front("V")) { + std::underlying_type_t<Atom> At; + if (In.consumeInteger(10, At)) + return nullptr; + return &A.makeAtomRef(static_cast<Atom>(At)); + } + + if (In.consume_front("true")) + return &A.makeLiteral(true); + if (In.consume_front("false")) + return &A.makeLiteral(false); + + return nullptr; +} + +class FormulaParseError : public llvm::ErrorInfo<FormulaParseError> { + std::string Formula; + unsigned Offset; + +public: + static char ID; + FormulaParseError(llvm::StringRef Formula, unsigned Offset) + : Formula(Formula), Offset(Offset) {} + + void log(raw_ostream &OS) const override { + OS << "bad formula at offset " << Offset << "\n"; + OS << Formula << "\n"; + OS.indent(Offset) << "^"; + } + + std::error_code convertToErrorCode() const override { + return std::make_error_code(std::errc::invalid_argument); + } +}; + +char FormulaParseError::ID = 0; + +} // namespace + +llvm::Expected<const Formula &> Arena::parseFormula(llvm::StringRef In) { + llvm::StringRef Rest = In; + auto *Result = parse(*this, Rest); + if (!Result) // parse() hit something unparseable + return llvm::make_error<FormulaParseError>(In, In.size() - Rest.size()); + Rest = Rest.ltrim(); + if (!Rest.empty()) // parse didn't consume all the input + return llvm::make_error<FormulaParseError>(In, In.size() - Rest.size()); + return *Result; +} + +} // namespace clang::dataflow diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/CNFFormula.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/CNFFormula.cpp new file mode 100644 index 000000000000..2410ce1e7bd6 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/CNFFormula.cpp @@ -0,0 +1,303 @@ +//===- CNFFormula.cpp -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A representation of a boolean formula in 3-CNF. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/CNFFormula.h" +#include "llvm/ADT/DenseSet.h" + +#include <queue> + +namespace clang { +namespace dataflow { + +namespace { + +/// Applies simplifications while building up a BooleanFormula. +/// We keep track of unit clauses, which tell us variables that must be +/// true/false in any model that satisfies the overall formula. +/// Such variables can be dropped from subsequently-added clauses, which +/// may in turn yield more unit clauses or even a contradiction. +/// The total added complexity of this preprocessing is O(N) where we +/// for every clause, we do a lookup for each unit clauses. +/// The lookup is O(1) on average. This method won't catch all +/// contradictory formulas, more passes can in principle catch +/// more cases but we leave all these and the general case to the +/// proper SAT solver. +struct CNFFormulaBuilder { + // Formula should outlive CNFFormulaBuilder. + explicit CNFFormulaBuilder(CNFFormula &CNF) : Formula(CNF) {} + + /// Adds the `L1 v ... v Ln` clause to the formula. Applies + /// simplifications, based on single-literal clauses. + /// + /// Requirements: + /// + /// `Li` must not be `NullLit`. + /// + /// All literals must be distinct. + void addClause(ArrayRef<Literal> Literals) { + // We generate clauses with up to 3 literals in this file. + assert(!Literals.empty() && Literals.size() <= 3); + // Contains literals of the simplified clause. + llvm::SmallVector<Literal> Simplified; + for (auto L : Literals) { + assert(L != NullLit && + llvm::all_of(Simplified, [L](Literal S) { return S != L; })); + auto X = var(L); + if (trueVars.contains(X)) { // X must be true + if (isPosLit(L)) + return; // Omit clause `(... v X v ...)`, it is `true`. + else + continue; // Omit `!X` from `(... v !X v ...)`. + } + if (falseVars.contains(X)) { // X must be false + if (isNegLit(L)) + return; // Omit clause `(... v !X v ...)`, it is `true`. + else + continue; // Omit `X` from `(... v X v ...)`. + } + Simplified.push_back(L); + } + if (Simplified.empty()) { + // Simplification made the clause empty, which is equivalent to `false`. + // We already know that this formula is unsatisfiable. + Formula.addClause(Simplified); + return; + } + if (Simplified.size() == 1) { + // We have new unit clause. + const Literal lit = Simplified.front(); + const Variable v = var(lit); + if (isPosLit(lit)) + trueVars.insert(v); + else + falseVars.insert(v); + } + Formula.addClause(Simplified); + } + + /// Returns true if we observed a contradiction while adding clauses. + /// In this case then the formula is already known to be unsatisfiable. + bool isKnownContradictory() { return Formula.knownContradictory(); } + +private: + CNFFormula &Formula; + llvm::DenseSet<Variable> trueVars; + llvm::DenseSet<Variable> falseVars; +}; + +} // namespace + +CNFFormula::CNFFormula(Variable LargestVar) + : LargestVar(LargestVar), KnownContradictory(false) { + Clauses.push_back(0); + ClauseStarts.push_back(0); +} + +void CNFFormula::addClause(ArrayRef<Literal> lits) { + assert(llvm::all_of(lits, [](Literal L) { return L != NullLit; })); + + if (lits.empty()) + KnownContradictory = true; + + const size_t S = Clauses.size(); + ClauseStarts.push_back(S); + Clauses.insert(Clauses.end(), lits.begin(), lits.end()); +} + +CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Formulas, + llvm::DenseMap<Variable, Atom> &Atomics) { + // The general strategy of the algorithm implemented below is to map each + // of the sub-values in `Vals` to a unique variable and use these variables in + // the resulting CNF expression to avoid exponential blow up. The number of + // literals in the resulting formula is guaranteed to be linear in the number + // of sub-formulas in `Vals`. + + // Map each sub-formula in `Vals` to a unique variable. + llvm::DenseMap<const Formula *, Variable> FormulaToVar; + // Store variable identifiers and Atom of atomic booleans. + Variable NextVar = 1; + { + std::queue<const Formula *> UnprocessedFormulas; + for (const Formula *F : Formulas) + UnprocessedFormulas.push(F); + while (!UnprocessedFormulas.empty()) { + Variable Var = NextVar; + const Formula *F = UnprocessedFormulas.front(); + UnprocessedFormulas.pop(); + + if (!FormulaToVar.try_emplace(F, Var).second) + continue; + ++NextVar; + + for (const Formula *Op : F->operands()) + UnprocessedFormulas.push(Op); + if (F->kind() == Formula::AtomRef) + Atomics[Var] = F->getAtom(); + } + } + + auto GetVar = [&FormulaToVar](const Formula *F) { + auto ValIt = FormulaToVar.find(F); + assert(ValIt != FormulaToVar.end()); + return ValIt->second; + }; + + CNFFormula CNF(NextVar - 1); + std::vector<bool> ProcessedSubVals(NextVar, false); + CNFFormulaBuilder builder(CNF); + + // Add a conjunct for each variable that represents a top-level conjunction + // value in `Vals`. + for (const Formula *F : Formulas) + builder.addClause(posLit(GetVar(F))); + + // Add conjuncts that represent the mapping between newly-created variables + // and their corresponding sub-formulas. + std::queue<const Formula *> UnprocessedFormulas; + for (const Formula *F : Formulas) + UnprocessedFormulas.push(F); + while (!UnprocessedFormulas.empty()) { + const Formula *F = UnprocessedFormulas.front(); + UnprocessedFormulas.pop(); + const Variable Var = GetVar(F); + + if (ProcessedSubVals[Var]) + continue; + ProcessedSubVals[Var] = true; + + switch (F->kind()) { + case Formula::AtomRef: + break; + case Formula::Literal: + CNF.addClause(F->literal() ? posLit(Var) : negLit(Var)); + break; + case Formula::And: { + const Variable LHS = GetVar(F->operands()[0]); + const Variable RHS = GetVar(F->operands()[1]); + + if (LHS == RHS) { + // `X <=> (A ^ A)` is equivalent to `(!X v A) ^ (X v !A)` which is + // already in conjunctive normal form. Below we add each of the + // conjuncts of the latter expression to the result. + builder.addClause({negLit(Var), posLit(LHS)}); + builder.addClause({posLit(Var), negLit(LHS)}); + } else { + // `X <=> (A ^ B)` is equivalent to `(!X v A) ^ (!X v B) ^ (X v !A v + // !B)` which is already in conjunctive normal form. Below we add each + // of the conjuncts of the latter expression to the result. + builder.addClause({negLit(Var), posLit(LHS)}); + builder.addClause({negLit(Var), posLit(RHS)}); + builder.addClause({posLit(Var), negLit(LHS), negLit(RHS)}); + } + break; + } + case Formula::Or: { + const Variable LHS = GetVar(F->operands()[0]); + const Variable RHS = GetVar(F->operands()[1]); + + if (LHS == RHS) { + // `X <=> (A v A)` is equivalent to `(!X v A) ^ (X v !A)` which is + // already in conjunctive normal form. Below we add each of the + // conjuncts of the latter expression to the result. + builder.addClause({negLit(Var), posLit(LHS)}); + builder.addClause({posLit(Var), negLit(LHS)}); + } else { + // `X <=> (A v B)` is equivalent to `(!X v A v B) ^ (X v !A) ^ (X v + // !B)` which is already in conjunctive normal form. Below we add each + // of the conjuncts of the latter expression to the result. + builder.addClause({negLit(Var), posLit(LHS), posLit(RHS)}); + builder.addClause({posLit(Var), negLit(LHS)}); + builder.addClause({posLit(Var), negLit(RHS)}); + } + break; + } + case Formula::Not: { + const Variable Operand = GetVar(F->operands()[0]); + + // `X <=> !Y` is equivalent to `(!X v !Y) ^ (X v Y)` which is + // already in conjunctive normal form. Below we add each of the + // conjuncts of the latter expression to the result. + builder.addClause({negLit(Var), negLit(Operand)}); + builder.addClause({posLit(Var), posLit(Operand)}); + break; + } + case Formula::Implies: { + const Variable LHS = GetVar(F->operands()[0]); + const Variable RHS = GetVar(F->operands()[1]); + + // `X <=> (A => B)` is equivalent to + // `(X v A) ^ (X v !B) ^ (!X v !A v B)` which is already in + // conjunctive normal form. Below we add each of the conjuncts of + // the latter expression to the result. + builder.addClause({posLit(Var), posLit(LHS)}); + builder.addClause({posLit(Var), negLit(RHS)}); + builder.addClause({negLit(Var), negLit(LHS), posLit(RHS)}); + break; + } + case Formula::Equal: { + const Variable LHS = GetVar(F->operands()[0]); + const Variable RHS = GetVar(F->operands()[1]); + + if (LHS == RHS) { + // `X <=> (A <=> A)` is equivalent to `X` which is already in + // conjunctive normal form. Below we add each of the conjuncts of the + // latter expression to the result. + builder.addClause(posLit(Var)); + + // No need to visit the sub-values of `Val`. + continue; + } + // `X <=> (A <=> B)` is equivalent to + // `(X v A v B) ^ (X v !A v !B) ^ (!X v A v !B) ^ (!X v !A v B)` which + // is already in conjunctive normal form. Below we add each of the + // conjuncts of the latter expression to the result. + builder.addClause({posLit(Var), posLit(LHS), posLit(RHS)}); + builder.addClause({posLit(Var), negLit(LHS), negLit(RHS)}); + builder.addClause({negLit(Var), posLit(LHS), negLit(RHS)}); + builder.addClause({negLit(Var), negLit(LHS), posLit(RHS)}); + break; + } + } + if (builder.isKnownContradictory()) { + return CNF; + } + for (const Formula *Child : F->operands()) + UnprocessedFormulas.push(Child); + } + + // Unit clauses that were added later were not + // considered for the simplification of earlier clauses. Do a final + // pass to find more opportunities for simplification. + CNFFormula FinalCNF(NextVar - 1); + CNFFormulaBuilder FinalBuilder(FinalCNF); + + // Collect unit clauses. + for (ClauseID C = 1; C <= CNF.numClauses(); ++C) { + if (CNF.clauseSize(C) == 1) { + FinalBuilder.addClause(CNF.clauseLiterals(C)[0]); + } + } + + // Add all clauses that were added previously, preserving the order. + for (ClauseID C = 1; C <= CNF.numClauses(); ++C) { + FinalBuilder.addClause(CNF.clauseLiterals(C)); + if (FinalBuilder.isKnownContradictory()) { + break; + } + } + // It is possible there were new unit clauses again, but + // we stop here and leave the rest to the solver algorithm. + return FinalCNF; +} + +} // namespace dataflow +} // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp new file mode 100644 index 000000000000..4b86daa56d7b --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp @@ -0,0 +1,362 @@ +//===-- DataflowAnalysisContext.cpp -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a DataflowAnalysisContext class that owns objects that +// encompass the state of a program and stores context that is used during +// dataflow analysis. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" +#include "clang/AST/ExprCXX.h" +#include "clang/Analysis/FlowSensitive/ASTOps.h" +#include "clang/Analysis/FlowSensitive/DebugSupport.h" +#include "clang/Analysis/FlowSensitive/Formula.h" +#include "clang/Analysis/FlowSensitive/Logger.h" +#include "clang/Analysis/FlowSensitive/SimplifyConstraints.h" +#include "clang/Analysis/FlowSensitive/Value.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <memory> +#include <string> +#include <utility> +#include <vector> + +static llvm::cl::opt<std::string> DataflowLog( + "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional, + llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual " + "log to stderr. With an arg, writes HTML logs under the " + "specified directory (one per analyzed function).")); + +namespace clang { +namespace dataflow { + +FieldSet DataflowAnalysisContext::getModeledFields(QualType Type) { + // During context-sensitive analysis, a struct may be allocated in one + // function, but its field accessed in a function lower in the stack than + // the allocation. Since we only collect fields used in the function where + // the allocation occurs, we can't apply that filter when performing + // context-sensitive analysis. But, this only applies to storage locations, + // since field access it not allowed to fail. In contrast, field *values* + // don't need this allowance, since the API allows for uninitialized fields. + if (Opts.ContextSensitiveOpts) + return getObjectFields(Type); + + return llvm::set_intersection(getObjectFields(Type), ModeledFields); +} + +void DataflowAnalysisContext::addModeledFields(const FieldSet &Fields) { + ModeledFields.set_union(Fields); +} + +StorageLocation &DataflowAnalysisContext::createStorageLocation(QualType Type) { + if (!Type.isNull() && Type->isRecordType()) { + llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs; + for (const FieldDecl *Field : getModeledFields(Type)) + if (Field->getType()->isReferenceType()) + FieldLocs.insert({Field, nullptr}); + else + FieldLocs.insert({Field, &createStorageLocation( + Field->getType().getNonReferenceType())}); + + RecordStorageLocation::SyntheticFieldMap SyntheticFields; + for (const auto &Entry : getSyntheticFields(Type)) + SyntheticFields.insert( + {Entry.getKey(), + &createStorageLocation(Entry.getValue().getNonReferenceType())}); + + return createRecordStorageLocation(Type, std::move(FieldLocs), + std::move(SyntheticFields)); + } + return arena().create<ScalarStorageLocation>(Type); +} + +// Returns the keys for a given `StringMap`. +// Can't use `StringSet` as the return type as it doesn't support `operator==`. +template <typename T> +static llvm::DenseSet<llvm::StringRef> getKeys(const llvm::StringMap<T> &Map) { + return llvm::DenseSet<llvm::StringRef>(Map.keys().begin(), Map.keys().end()); +} + +RecordStorageLocation &DataflowAnalysisContext::createRecordStorageLocation( + QualType Type, RecordStorageLocation::FieldToLoc FieldLocs, + RecordStorageLocation::SyntheticFieldMap SyntheticFields) { + assert(Type->isRecordType()); + assert(containsSameFields(getModeledFields(Type), FieldLocs)); + assert(getKeys(getSyntheticFields(Type)) == getKeys(SyntheticFields)); + + RecordStorageLocationCreated = true; + return arena().create<RecordStorageLocation>(Type, std::move(FieldLocs), + std::move(SyntheticFields)); +} + +StorageLocation & +DataflowAnalysisContext::getStableStorageLocation(const ValueDecl &D) { + if (auto *Loc = DeclToLoc.lookup(&D)) + return *Loc; + auto &Loc = createStorageLocation(D.getType().getNonReferenceType()); + DeclToLoc[&D] = &Loc; + return Loc; +} + +StorageLocation & +DataflowAnalysisContext::getStableStorageLocation(const Expr &E) { + const Expr &CanonE = ignoreCFGOmittedNodes(E); + + if (auto *Loc = ExprToLoc.lookup(&CanonE)) + return *Loc; + auto &Loc = createStorageLocation(CanonE.getType()); + ExprToLoc[&CanonE] = &Loc; + return Loc; +} + +PointerValue & +DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) { + auto CanonicalPointeeType = + PointeeType.isNull() ? PointeeType : PointeeType.getCanonicalType(); + auto Res = NullPointerVals.try_emplace(CanonicalPointeeType, nullptr); + if (Res.second) { + auto &PointeeLoc = createStorageLocation(CanonicalPointeeType); + Res.first->second = &arena().create<PointerValue>(PointeeLoc); + } + return *Res.first->second; +} + +void DataflowAnalysisContext::addInvariant(const Formula &Constraint) { + if (Invariant == nullptr) + Invariant = &Constraint; + else + Invariant = &arena().makeAnd(*Invariant, Constraint); +} + +void DataflowAnalysisContext::addFlowConditionConstraint( + Atom Token, const Formula &Constraint) { + auto Res = FlowConditionConstraints.try_emplace(Token, &Constraint); + if (!Res.second) { + Res.first->second = + &arena().makeAnd(*Res.first->second, Constraint); + } +} + +Atom DataflowAnalysisContext::forkFlowCondition(Atom Token) { + Atom ForkToken = arena().makeFlowConditionToken(); + FlowConditionDeps[ForkToken].insert(Token); + addFlowConditionConstraint(ForkToken, arena().makeAtomRef(Token)); + return ForkToken; +} + +Atom +DataflowAnalysisContext::joinFlowConditions(Atom FirstToken, + Atom SecondToken) { + Atom Token = arena().makeFlowConditionToken(); + FlowConditionDeps[Token].insert(FirstToken); + FlowConditionDeps[Token].insert(SecondToken); + addFlowConditionConstraint(Token, + arena().makeOr(arena().makeAtomRef(FirstToken), + arena().makeAtomRef(SecondToken))); + return Token; +} + +Solver::Result DataflowAnalysisContext::querySolver( + llvm::SetVector<const Formula *> Constraints) { + return S.solve(Constraints.getArrayRef()); +} + +bool DataflowAnalysisContext::flowConditionImplies(Atom Token, + const Formula &F) { + if (F.isLiteral(true)) + return true; + + // Returns true if and only if truth assignment of the flow condition implies + // that `F` is also true. We prove whether or not this property holds by + // reducing the problem to satisfiability checking. In other words, we attempt + // to show that assuming `F` is false makes the constraints induced by the + // flow condition unsatisfiable. + llvm::SetVector<const Formula *> Constraints; + Constraints.insert(&arena().makeAtomRef(Token)); + Constraints.insert(&arena().makeNot(F)); + addTransitiveFlowConditionConstraints(Token, Constraints); + return isUnsatisfiable(std::move(Constraints)); +} + +bool DataflowAnalysisContext::flowConditionAllows(Atom Token, + const Formula &F) { + if (F.isLiteral(false)) + return false; + + llvm::SetVector<const Formula *> Constraints; + Constraints.insert(&arena().makeAtomRef(Token)); + Constraints.insert(&F); + addTransitiveFlowConditionConstraints(Token, Constraints); + return isSatisfiable(std::move(Constraints)); +} + +bool DataflowAnalysisContext::equivalentFormulas(const Formula &Val1, + const Formula &Val2) { + llvm::SetVector<const Formula *> Constraints; + Constraints.insert(&arena().makeNot(arena().makeEquals(Val1, Val2))); + return isUnsatisfiable(std::move(Constraints)); +} + +void DataflowAnalysisContext::addTransitiveFlowConditionConstraints( + Atom Token, llvm::SetVector<const Formula *> &Constraints) { + llvm::DenseSet<Atom> AddedTokens; + std::vector<Atom> Remaining = {Token}; + + if (Invariant) + Constraints.insert(Invariant); + // Define all the flow conditions that might be referenced in constraints. + while (!Remaining.empty()) { + auto Token = Remaining.back(); + Remaining.pop_back(); + if (!AddedTokens.insert(Token).second) + continue; + + auto ConstraintsIt = FlowConditionConstraints.find(Token); + if (ConstraintsIt == FlowConditionConstraints.end()) { + Constraints.insert(&arena().makeAtomRef(Token)); + } else { + // Bind flow condition token via `iff` to its set of constraints: + // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints + Constraints.insert(&arena().makeEquals(arena().makeAtomRef(Token), + *ConstraintsIt->second)); + } + + if (auto DepsIt = FlowConditionDeps.find(Token); + DepsIt != FlowConditionDeps.end()) + for (Atom A : DepsIt->second) + Remaining.push_back(A); + } +} + +static void printAtomList(const llvm::SmallVector<Atom> &Atoms, + llvm::raw_ostream &OS) { + OS << "("; + for (size_t i = 0; i < Atoms.size(); ++i) { + OS << Atoms[i]; + if (i + 1 < Atoms.size()) + OS << ", "; + } + OS << ")\n"; +} + +void DataflowAnalysisContext::dumpFlowCondition(Atom Token, + llvm::raw_ostream &OS) { + llvm::SetVector<const Formula *> Constraints; + Constraints.insert(&arena().makeAtomRef(Token)); + addTransitiveFlowConditionConstraints(Token, Constraints); + + OS << "Flow condition token: " << Token << "\n"; + SimplifyConstraintsInfo Info; + llvm::SetVector<const Formula *> OriginalConstraints = Constraints; + simplifyConstraints(Constraints, arena(), &Info); + if (!Constraints.empty()) { + OS << "Constraints:\n"; + for (const auto *Constraint : Constraints) { + Constraint->print(OS); + OS << "\n"; + } + } + if (!Info.TrueAtoms.empty()) { + OS << "True atoms: "; + printAtomList(Info.TrueAtoms, OS); + } + if (!Info.FalseAtoms.empty()) { + OS << "False atoms: "; + printAtomList(Info.FalseAtoms, OS); + } + if (!Info.EquivalentAtoms.empty()) { + OS << "Equivalent atoms:\n"; + for (const llvm::SmallVector<Atom> &Class : Info.EquivalentAtoms) + printAtomList(Class, OS); + } + + OS << "\nFlow condition constraints before simplification:\n"; + for (const auto *Constraint : OriginalConstraints) { + Constraint->print(OS); + OS << "\n"; + } +} + +const AdornedCFG * +DataflowAnalysisContext::getAdornedCFG(const FunctionDecl *F) { + // Canonicalize the key: + F = F->getDefinition(); + if (F == nullptr) + return nullptr; + auto It = FunctionContexts.find(F); + if (It != FunctionContexts.end()) + return &It->second; + + if (F->doesThisDeclarationHaveABody()) { + auto ACFG = AdornedCFG::build(*F); + // FIXME: Handle errors. + assert(ACFG); + auto Result = FunctionContexts.insert({F, std::move(*ACFG)}); + return &Result.first->second; + } + + return nullptr; +} + +static std::unique_ptr<Logger> makeLoggerFromCommandLine() { + if (DataflowLog.empty()) + return Logger::textual(llvm::errs()); + + llvm::StringRef Dir = DataflowLog; + if (auto EC = llvm::sys::fs::create_directories(Dir)) + llvm::errs() << "Failed to create log dir: " << EC.message() << "\n"; + // All analysis runs within a process will log to the same directory. + // Share a counter so they don't all overwrite each other's 0.html. + // (Don't share a logger, it's not threadsafe). + static std::atomic<unsigned> Counter = {0}; + auto StreamFactory = + [Dir(Dir.str())]() mutable -> std::unique_ptr<llvm::raw_ostream> { + llvm::SmallString<256> File(Dir); + llvm::sys::path::append(File, + std::to_string(Counter.fetch_add(1)) + ".html"); + std::error_code EC; + auto OS = std::make_unique<llvm::raw_fd_ostream>(File, EC); + if (EC) { + llvm::errs() << "Failed to create log " << File << ": " << EC.message() + << "\n"; + return std::make_unique<llvm::raw_null_ostream>(); + } + return OS; + }; + return Logger::html(std::move(StreamFactory)); +} + +DataflowAnalysisContext::DataflowAnalysisContext( + Solver &S, std::unique_ptr<Solver> &&OwnedSolver, Options Opts) + : S(S), OwnedSolver(std::move(OwnedSolver)), A(std::make_unique<Arena>()), + Opts(Opts) { + // If the -dataflow-log command-line flag was set, synthesize a logger. + // This is ugly but provides a uniform method for ad-hoc debugging dataflow- + // based tools. + if (Opts.Log == nullptr) { + if (DataflowLog.getNumOccurrences() > 0) { + LogOwner = makeLoggerFromCommandLine(); + this->Opts.Log = LogOwner.get(); + // FIXME: if the flag is given a value, write an HTML log to a file. + } else { + this->Opts.Log = &Logger::null(); + } + } +} + +DataflowAnalysisContext::~DataflowAnalysisContext() = default; + +} // namespace dataflow +} // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp new file mode 100644 index 000000000000..8d7fe1848821 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp @@ -0,0 +1,1257 @@ +//===-- DataflowEnvironment.cpp ---------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines an Environment class that is used by dataflow analyses +// that run over Control-Flow Graphs (CFGs) to keep track of the state of the +// program at given program points. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/Type.h" +#include "clang/Analysis/FlowSensitive/ASTOps.h" +#include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" +#include "clang/Analysis/FlowSensitive/DataflowLattice.h" +#include "clang/Analysis/FlowSensitive/Value.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/Support/ErrorHandling.h" +#include <algorithm> +#include <cassert> +#include <memory> +#include <utility> + +#define DEBUG_TYPE "dataflow" + +namespace clang { +namespace dataflow { + +// FIXME: convert these to parameters of the analysis or environment. Current +// settings have been experimentaly validated, but only for a particular +// analysis. +static constexpr int MaxCompositeValueDepth = 3; +static constexpr int MaxCompositeValueSize = 1000; + +/// Returns a map consisting of key-value entries that are present in both maps. +static llvm::DenseMap<const ValueDecl *, StorageLocation *> intersectDeclToLoc( + const llvm::DenseMap<const ValueDecl *, StorageLocation *> &DeclToLoc1, + const llvm::DenseMap<const ValueDecl *, StorageLocation *> &DeclToLoc2) { + llvm::DenseMap<const ValueDecl *, StorageLocation *> Result; + for (auto &Entry : DeclToLoc1) { + auto It = DeclToLoc2.find(Entry.first); + if (It != DeclToLoc2.end() && Entry.second == It->second) + Result.insert({Entry.first, Entry.second}); + } + return Result; +} + +// Performs a join on either `ExprToLoc` or `ExprToVal`. +// The maps must be consistent in the sense that any entries for the same +// expression must map to the same location / value. This is the case if we are +// performing a join for control flow within a full-expression (which is the +// only case when this function should be used). +template <typename MapT> MapT joinExprMaps(const MapT &Map1, const MapT &Map2) { + MapT Result = Map1; + + for (const auto &Entry : Map2) { + [[maybe_unused]] auto [It, Inserted] = Result.insert(Entry); + // If there was an existing entry, its value should be the same as for the + // entry we were trying to insert. + assert(It->second == Entry.second); + } + + return Result; +} + +// Whether to consider equivalent two values with an unknown relation. +// +// FIXME: this function is a hack enabling unsoundness to support +// convergence. Once we have widening support for the reference/pointer and +// struct built-in models, this should be unconditionally `false` (and inlined +// as such at its call sites). +static bool equateUnknownValues(Value::Kind K) { + switch (K) { + case Value::Kind::Integer: + case Value::Kind::Pointer: + return true; + default: + return false; + } +} + +static bool compareDistinctValues(QualType Type, Value &Val1, + const Environment &Env1, Value &Val2, + const Environment &Env2, + Environment::ValueModel &Model) { + // Note: Potentially costly, but, for booleans, we could check whether both + // can be proven equivalent in their respective environments. + + // FIXME: move the reference/pointers logic from `areEquivalentValues` to here + // and implement separate, join/widen specific handling for + // reference/pointers. + switch (Model.compare(Type, Val1, Env1, Val2, Env2)) { + case ComparisonResult::Same: + return true; + case ComparisonResult::Different: + return false; + case ComparisonResult::Unknown: + return equateUnknownValues(Val1.getKind()); + } + llvm_unreachable("All cases covered in switch"); +} + +/// Attempts to join distinct values `Val1` and `Val2` in `Env1` and `Env2`, +/// respectively, of the same type `Type`. Joining generally produces a single +/// value that (soundly) approximates the two inputs, although the actual +/// meaning depends on `Model`. +static Value *joinDistinctValues(QualType Type, Value &Val1, + const Environment &Env1, Value &Val2, + const Environment &Env2, + Environment &JoinedEnv, + Environment::ValueModel &Model) { + // Join distinct boolean values preserving information about the constraints + // in the respective path conditions. + if (isa<BoolValue>(&Val1) && isa<BoolValue>(&Val2)) { + // FIXME: Checking both values should be unnecessary, since they should have + // a consistent shape. However, right now we can end up with BoolValue's in + // integer-typed variables due to our incorrect handling of + // boolean-to-integer casts (we just propagate the BoolValue to the result + // of the cast). So, a join can encounter an integer in one branch but a + // bool in the other. + // For example: + // ``` + // std::optional<bool> o; + // int x; + // if (o.has_value()) + // x = o.value(); + // ``` + auto &Expr1 = cast<BoolValue>(Val1).formula(); + auto &Expr2 = cast<BoolValue>(Val2).formula(); + auto &A = JoinedEnv.arena(); + auto &JoinedVal = A.makeAtomRef(A.makeAtom()); + JoinedEnv.assume( + A.makeOr(A.makeAnd(A.makeAtomRef(Env1.getFlowConditionToken()), + A.makeEquals(JoinedVal, Expr1)), + A.makeAnd(A.makeAtomRef(Env2.getFlowConditionToken()), + A.makeEquals(JoinedVal, Expr2)))); + return &A.makeBoolValue(JoinedVal); + } + + Value *JoinedVal = JoinedEnv.createValue(Type); + if (JoinedVal) + Model.join(Type, Val1, Env1, Val2, Env2, *JoinedVal, JoinedEnv); + + return JoinedVal; +} + +static WidenResult widenDistinctValues(QualType Type, Value &Prev, + const Environment &PrevEnv, + Value &Current, Environment &CurrentEnv, + Environment::ValueModel &Model) { + // Boolean-model widening. + if (isa<BoolValue>(Prev) && isa<BoolValue>(Current)) { + // FIXME: Checking both values should be unnecessary, but we can currently + // end up with `BoolValue`s in integer-typed variables. See comment in + // `joinDistinctValues()` for details. + auto &PrevBool = cast<BoolValue>(Prev); + auto &CurBool = cast<BoolValue>(Current); + + if (isa<TopBoolValue>(Prev)) + // Safe to return `Prev` here, because Top is never dependent on the + // environment. + return {&Prev, LatticeEffect::Unchanged}; + + // We may need to widen to Top, but before we do so, check whether both + // values are implied to be either true or false in the current environment. + // In that case, we can simply return a literal instead. + bool TruePrev = PrevEnv.proves(PrevBool.formula()); + bool TrueCur = CurrentEnv.proves(CurBool.formula()); + if (TruePrev && TrueCur) + return {&CurrentEnv.getBoolLiteralValue(true), LatticeEffect::Unchanged}; + if (!TruePrev && !TrueCur && + PrevEnv.proves(PrevEnv.arena().makeNot(PrevBool.formula())) && + CurrentEnv.proves(CurrentEnv.arena().makeNot(CurBool.formula()))) + return {&CurrentEnv.getBoolLiteralValue(false), LatticeEffect::Unchanged}; + + return {&CurrentEnv.makeTopBoolValue(), LatticeEffect::Changed}; + } + + // FIXME: Add other built-in model widening. + + // Custom-model widening. + if (auto Result = Model.widen(Type, Prev, PrevEnv, Current, CurrentEnv)) + return *Result; + + return {&Current, equateUnknownValues(Prev.getKind()) + ? LatticeEffect::Unchanged + : LatticeEffect::Changed}; +} + +// Returns whether the values in `Map1` and `Map2` compare equal for those +// keys that `Map1` and `Map2` have in common. +template <typename Key> +bool compareKeyToValueMaps(const llvm::MapVector<Key, Value *> &Map1, + const llvm::MapVector<Key, Value *> &Map2, + const Environment &Env1, const Environment &Env2, + Environment::ValueModel &Model) { + for (auto &Entry : Map1) { + Key K = Entry.first; + assert(K != nullptr); + + Value *Val = Entry.second; + assert(Val != nullptr); + + auto It = Map2.find(K); + if (It == Map2.end()) + continue; + assert(It->second != nullptr); + + if (!areEquivalentValues(*Val, *It->second) && + !compareDistinctValues(K->getType(), *Val, Env1, *It->second, Env2, + Model)) + return false; + } + + return true; +} + +// Perform a join on two `LocToVal` maps. +static llvm::MapVector<const StorageLocation *, Value *> +joinLocToVal(const llvm::MapVector<const StorageLocation *, Value *> &LocToVal, + const llvm::MapVector<const StorageLocation *, Value *> &LocToVal2, + const Environment &Env1, const Environment &Env2, + Environment &JoinedEnv, Environment::ValueModel &Model) { + llvm::MapVector<const StorageLocation *, Value *> Result; + for (auto &Entry : LocToVal) { + const StorageLocation *Loc = Entry.first; + assert(Loc != nullptr); + + Value *Val = Entry.second; + assert(Val != nullptr); + + auto It = LocToVal2.find(Loc); + if (It == LocToVal2.end()) + continue; + assert(It->second != nullptr); + + if (Value *JoinedVal = Environment::joinValues( + Loc->getType(), Val, Env1, It->second, Env2, JoinedEnv, Model)) { + Result.insert({Loc, JoinedVal}); + } + } + + return Result; +} + +// Perform widening on either `LocToVal` or `ExprToVal`. `Key` must be either +// `const StorageLocation *` or `const Expr *`. +template <typename Key> +llvm::MapVector<Key, Value *> +widenKeyToValueMap(const llvm::MapVector<Key, Value *> &CurMap, + const llvm::MapVector<Key, Value *> &PrevMap, + Environment &CurEnv, const Environment &PrevEnv, + Environment::ValueModel &Model, LatticeEffect &Effect) { + llvm::MapVector<Key, Value *> WidenedMap; + for (auto &Entry : CurMap) { + Key K = Entry.first; + assert(K != nullptr); + + Value *Val = Entry.second; + assert(Val != nullptr); + + auto PrevIt = PrevMap.find(K); + if (PrevIt == PrevMap.end()) + continue; + assert(PrevIt->second != nullptr); + + if (areEquivalentValues(*Val, *PrevIt->second)) { + WidenedMap.insert({K, Val}); + continue; + } + + auto [WidenedVal, ValEffect] = widenDistinctValues( + K->getType(), *PrevIt->second, PrevEnv, *Val, CurEnv, Model); + WidenedMap.insert({K, WidenedVal}); + if (ValEffect == LatticeEffect::Changed) + Effect = LatticeEffect::Changed; + } + + return WidenedMap; +} + +namespace { + +// Visitor that builds a map from record prvalues to result objects. +// For each result object that it encounters, it propagates the storage location +// of the result object to all record prvalues that can initialize it. +class ResultObjectVisitor : public AnalysisASTVisitor<ResultObjectVisitor> { +public: + // `ResultObjectMap` will be filled with a map from record prvalues to result + // object. If this visitor will traverse a function that returns a record by + // value, `LocForRecordReturnVal` is the location to which this record should + // be written; otherwise, it is null. + explicit ResultObjectVisitor( + llvm::DenseMap<const Expr *, RecordStorageLocation *> &ResultObjectMap, + RecordStorageLocation *LocForRecordReturnVal, + DataflowAnalysisContext &DACtx) + : ResultObjectMap(ResultObjectMap), + LocForRecordReturnVal(LocForRecordReturnVal), DACtx(DACtx) {} + + // Traverse all member and base initializers of `Ctor`. This function is not + // called by `RecursiveASTVisitor`; it should be called manually if we are + // analyzing a constructor. `ThisPointeeLoc` is the storage location that + // `this` points to. + void TraverseConstructorInits(const CXXConstructorDecl *Ctor, + RecordStorageLocation *ThisPointeeLoc) { + assert(ThisPointeeLoc != nullptr); + for (const CXXCtorInitializer *Init : Ctor->inits()) { + Expr *InitExpr = Init->getInit(); + if (FieldDecl *Field = Init->getMember(); + Field != nullptr && Field->getType()->isRecordType()) { + PropagateResultObject(InitExpr, cast<RecordStorageLocation>( + ThisPointeeLoc->getChild(*Field))); + } else if (Init->getBaseClass()) { + PropagateResultObject(InitExpr, ThisPointeeLoc); + } + + // Ensure that any result objects within `InitExpr` (e.g. temporaries) + // are also propagated to the prvalues that initialize them. + TraverseStmt(InitExpr); + + // If this is a `CXXDefaultInitExpr`, also propagate any result objects + // within the default expression. + if (auto *DefaultInit = dyn_cast<CXXDefaultInitExpr>(InitExpr)) + TraverseStmt(DefaultInit->getExpr()); + } + } + + bool VisitVarDecl(VarDecl *VD) { + if (VD->getType()->isRecordType() && VD->hasInit()) + PropagateResultObject( + VD->getInit(), + &cast<RecordStorageLocation>(DACtx.getStableStorageLocation(*VD))); + return true; + } + + bool VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *MTE) { + if (MTE->getType()->isRecordType()) + PropagateResultObject( + MTE->getSubExpr(), + &cast<RecordStorageLocation>(DACtx.getStableStorageLocation(*MTE))); + return true; + } + + bool VisitReturnStmt(ReturnStmt *Return) { + Expr *RetValue = Return->getRetValue(); + if (RetValue != nullptr && RetValue->getType()->isRecordType() && + RetValue->isPRValue()) + PropagateResultObject(RetValue, LocForRecordReturnVal); + return true; + } + + bool VisitExpr(Expr *E) { + // Clang's AST can have record-type prvalues without a result object -- for + // example as full-expressions contained in a compound statement or as + // arguments of call expressions. We notice this if we get here and a + // storage location has not yet been associated with `E`. In this case, + // treat this as if it was a `MaterializeTemporaryExpr`. + if (E->isPRValue() && E->getType()->isRecordType() && + !ResultObjectMap.contains(E)) + PropagateResultObject( + E, &cast<RecordStorageLocation>(DACtx.getStableStorageLocation(*E))); + return true; + } + + void + PropagateResultObjectToRecordInitList(const RecordInitListHelper &InitList, + RecordStorageLocation *Loc) { + for (auto [Base, Init] : InitList.base_inits()) { + assert(Base->getType().getCanonicalType() == + Init->getType().getCanonicalType()); + + // Storage location for the base class is the same as that of the + // derived class because we "flatten" the object hierarchy and put all + // fields in `RecordStorageLocation` of the derived class. + PropagateResultObject(Init, Loc); + } + + for (auto [Field, Init] : InitList.field_inits()) { + // Fields of non-record type are handled in + // `TransferVisitor::VisitInitListExpr()`. + if (Field->getType()->isRecordType()) + PropagateResultObject( + Init, cast<RecordStorageLocation>(Loc->getChild(*Field))); + } + } + + // Assigns `Loc` as the result object location of `E`, then propagates the + // location to all lower-level prvalues that initialize the same object as + // `E` (or one of its base classes or member variables). + void PropagateResultObject(Expr *E, RecordStorageLocation *Loc) { + if (!E->isPRValue() || !E->getType()->isRecordType()) { + assert(false); + // Ensure we don't propagate the result object if we hit this in a + // release build. + return; + } + + ResultObjectMap[E] = Loc; + + // The following AST node kinds are "original initializers": They are the + // lowest-level AST node that initializes a given object, and nothing + // below them can initialize the same object (or part of it). + if (isa<CXXConstructExpr>(E) || isa<CallExpr>(E) || isa<LambdaExpr>(E) || + isa<CXXDefaultArgExpr>(E) || isa<CXXStdInitializerListExpr>(E) || + isa<AtomicExpr>(E) || + // We treat `BuiltinBitCastExpr` as an "original initializer" too as + // it may not even be casting from a record type -- and even if it is, + // the two objects are in general of unrelated type. + isa<BuiltinBitCastExpr>(E)) { + return; + } + if (auto *Op = dyn_cast<BinaryOperator>(E); + Op && Op->getOpcode() == BO_Cmp) { + // Builtin `<=>` returns a `std::strong_ordering` object. + return; + } + + if (auto *InitList = dyn_cast<InitListExpr>(E)) { + if (!InitList->isSemanticForm()) + return; + if (InitList->isTransparent()) { + PropagateResultObject(InitList->getInit(0), Loc); + return; + } + + PropagateResultObjectToRecordInitList(RecordInitListHelper(InitList), + Loc); + return; + } + + if (auto *ParenInitList = dyn_cast<CXXParenListInitExpr>(E)) { + PropagateResultObjectToRecordInitList(RecordInitListHelper(ParenInitList), + Loc); + return; + } + + if (auto *Op = dyn_cast<BinaryOperator>(E); Op && Op->isCommaOp()) { + PropagateResultObject(Op->getRHS(), Loc); + return; + } + + if (auto *Cond = dyn_cast<AbstractConditionalOperator>(E)) { + PropagateResultObject(Cond->getTrueExpr(), Loc); + PropagateResultObject(Cond->getFalseExpr(), Loc); + return; + } + + if (auto *SE = dyn_cast<StmtExpr>(E)) { + PropagateResultObject(cast<Expr>(SE->getSubStmt()->body_back()), Loc); + return; + } + + if (auto *DIE = dyn_cast<CXXDefaultInitExpr>(E)) { + PropagateResultObject(DIE->getExpr(), Loc); + return; + } + + // All other expression nodes that propagate a record prvalue should have + // exactly one child. + SmallVector<Stmt *, 1> Children(E->child_begin(), E->child_end()); + LLVM_DEBUG({ + if (Children.size() != 1) + E->dump(); + }); + assert(Children.size() == 1); + for (Stmt *S : Children) + PropagateResultObject(cast<Expr>(S), Loc); + } + +private: + llvm::DenseMap<const Expr *, RecordStorageLocation *> &ResultObjectMap; + RecordStorageLocation *LocForRecordReturnVal; + DataflowAnalysisContext &DACtx; +}; + +} // namespace + +void Environment::initialize() { + if (InitialTargetStmt == nullptr) + return; + + if (InitialTargetFunc == nullptr) { + initFieldsGlobalsAndFuncs(getReferencedDecls(*InitialTargetStmt)); + ResultObjectMap = + std::make_shared<PrValueToResultObject>(buildResultObjectMap( + DACtx, InitialTargetStmt, getThisPointeeStorageLocation(), + /*LocForRecordReturnValue=*/nullptr)); + return; + } + + initFieldsGlobalsAndFuncs(getReferencedDecls(*InitialTargetFunc)); + + for (const auto *ParamDecl : InitialTargetFunc->parameters()) { + assert(ParamDecl != nullptr); + setStorageLocation(*ParamDecl, createObject(*ParamDecl, nullptr)); + } + + if (InitialTargetFunc->getReturnType()->isRecordType()) + LocForRecordReturnVal = &cast<RecordStorageLocation>( + createStorageLocation(InitialTargetFunc->getReturnType())); + + if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(InitialTargetFunc)) { + auto *Parent = MethodDecl->getParent(); + assert(Parent != nullptr); + + if (Parent->isLambda()) { + for (const auto &Capture : Parent->captures()) { + if (Capture.capturesVariable()) { + const auto *VarDecl = Capture.getCapturedVar(); + assert(VarDecl != nullptr); + setStorageLocation(*VarDecl, createObject(*VarDecl, nullptr)); + } else if (Capture.capturesThis()) { + if (auto *Ancestor = InitialTargetFunc->getNonClosureAncestor()) { + const auto *SurroundingMethodDecl = cast<CXXMethodDecl>(Ancestor); + QualType ThisPointeeType = + SurroundingMethodDecl->getFunctionObjectParameterType(); + setThisPointeeStorageLocation( + cast<RecordStorageLocation>(createObject(ThisPointeeType))); + } else if (auto *FieldBeingInitialized = + dyn_cast<FieldDecl>(Parent->getLambdaContextDecl())) { + // This is in a field initializer, rather than a method. + setThisPointeeStorageLocation( + cast<RecordStorageLocation>(createObject(QualType( + FieldBeingInitialized->getParent()->getTypeForDecl(), 0)))); + } else { + assert(false && "Unexpected this-capturing lambda context."); + } + } + } + } else if (MethodDecl->isImplicitObjectMemberFunction()) { + QualType ThisPointeeType = MethodDecl->getFunctionObjectParameterType(); + auto &ThisLoc = + cast<RecordStorageLocation>(createStorageLocation(ThisPointeeType)); + setThisPointeeStorageLocation(ThisLoc); + // Initialize fields of `*this` with values, but only if we're not + // analyzing a constructor; after all, it's the constructor's job to do + // this (and we want to be able to test that). + if (!isa<CXXConstructorDecl>(MethodDecl)) + initializeFieldsWithValues(ThisLoc); + } + } + + // We do this below the handling of `CXXMethodDecl` above so that we can + // be sure that the storage location for `this` has been set. + ResultObjectMap = + std::make_shared<PrValueToResultObject>(buildResultObjectMap( + DACtx, InitialTargetFunc, getThisPointeeStorageLocation(), + LocForRecordReturnVal)); +} + +// FIXME: Add support for resetting globals after function calls to enable the +// implementation of sound analyses. + +void Environment::initFieldsGlobalsAndFuncs(const ReferencedDecls &Referenced) { + // These have to be added before the lines that follow to ensure that + // `create*` work correctly for structs. + DACtx->addModeledFields(Referenced.Fields); + + for (const VarDecl *D : Referenced.Globals) { + if (getStorageLocation(*D) != nullptr) + continue; + + // We don't run transfer functions on the initializers of global variables, + // so they won't be associated with a value or storage location. We + // therefore intentionally don't pass an initializer to `createObject()`; in + // particular, this ensures that `createObject()` will initialize the fields + // of record-type variables with values. + setStorageLocation(*D, createObject(*D, nullptr)); + } + + for (const FunctionDecl *FD : Referenced.Functions) { + if (getStorageLocation(*FD) != nullptr) + continue; + auto &Loc = createStorageLocation(*FD); + setStorageLocation(*FD, Loc); + } +} + +Environment Environment::fork() const { + Environment Copy(*this); + Copy.FlowConditionToken = DACtx->forkFlowCondition(FlowConditionToken); + return Copy; +} + +bool Environment::canDescend(unsigned MaxDepth, + const FunctionDecl *Callee) const { + return CallStack.size() < MaxDepth && !llvm::is_contained(CallStack, Callee); +} + +Environment Environment::pushCall(const CallExpr *Call) const { + Environment Env(*this); + + if (const auto *MethodCall = dyn_cast<CXXMemberCallExpr>(Call)) { + if (const Expr *Arg = MethodCall->getImplicitObjectArgument()) { + if (!isa<CXXThisExpr>(Arg)) + Env.ThisPointeeLoc = + cast<RecordStorageLocation>(getStorageLocation(*Arg)); + // Otherwise (when the argument is `this`), retain the current + // environment's `ThisPointeeLoc`. + } + } + + if (Call->getType()->isRecordType() && Call->isPRValue()) + Env.LocForRecordReturnVal = &Env.getResultObjectLocation(*Call); + + Env.pushCallInternal(Call->getDirectCallee(), + llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); + + return Env; +} + +Environment Environment::pushCall(const CXXConstructExpr *Call) const { + Environment Env(*this); + + Env.ThisPointeeLoc = &Env.getResultObjectLocation(*Call); + Env.LocForRecordReturnVal = &Env.getResultObjectLocation(*Call); + + Env.pushCallInternal(Call->getConstructor(), + llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); + + return Env; +} + +void Environment::pushCallInternal(const FunctionDecl *FuncDecl, + ArrayRef<const Expr *> Args) { + // Canonicalize to the definition of the function. This ensures that we're + // putting arguments into the same `ParamVarDecl`s` that the callee will later + // be retrieving them from. + assert(FuncDecl->getDefinition() != nullptr); + FuncDecl = FuncDecl->getDefinition(); + + CallStack.push_back(FuncDecl); + + initFieldsGlobalsAndFuncs(getReferencedDecls(*FuncDecl)); + + const auto *ParamIt = FuncDecl->param_begin(); + + // FIXME: Parameters don't always map to arguments 1:1; examples include + // overloaded operators implemented as member functions, and parameter packs. + for (unsigned ArgIndex = 0; ArgIndex < Args.size(); ++ParamIt, ++ArgIndex) { + assert(ParamIt != FuncDecl->param_end()); + const VarDecl *Param = *ParamIt; + setStorageLocation(*Param, createObject(*Param, Args[ArgIndex])); + } + + ResultObjectMap = std::make_shared<PrValueToResultObject>( + buildResultObjectMap(DACtx, FuncDecl, getThisPointeeStorageLocation(), + LocForRecordReturnVal)); +} + +void Environment::popCall(const CallExpr *Call, const Environment &CalleeEnv) { + // We ignore some entries of `CalleeEnv`: + // - `DACtx` because is already the same in both + // - We don't want the callee's `DeclCtx`, `ReturnVal`, `ReturnLoc` or + // `ThisPointeeLoc` because they don't apply to us. + // - `DeclToLoc`, `ExprToLoc`, and `ExprToVal` capture information from the + // callee's local scope, so when popping that scope, we do not propagate + // the maps. + this->LocToVal = std::move(CalleeEnv.LocToVal); + this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken); + + if (Call->isGLValue()) { + if (CalleeEnv.ReturnLoc != nullptr) + setStorageLocation(*Call, *CalleeEnv.ReturnLoc); + } else if (!Call->getType()->isVoidType()) { + if (CalleeEnv.ReturnVal != nullptr) + setValue(*Call, *CalleeEnv.ReturnVal); + } +} + +void Environment::popCall(const CXXConstructExpr *Call, + const Environment &CalleeEnv) { + // See also comment in `popCall(const CallExpr *, const Environment &)` above. + this->LocToVal = std::move(CalleeEnv.LocToVal); + this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken); +} + +bool Environment::equivalentTo(const Environment &Other, + Environment::ValueModel &Model) const { + assert(DACtx == Other.DACtx); + + if (ReturnVal != Other.ReturnVal) + return false; + + if (ReturnLoc != Other.ReturnLoc) + return false; + + if (LocForRecordReturnVal != Other.LocForRecordReturnVal) + return false; + + if (ThisPointeeLoc != Other.ThisPointeeLoc) + return false; + + if (DeclToLoc != Other.DeclToLoc) + return false; + + if (ExprToLoc != Other.ExprToLoc) + return false; + + if (!compareKeyToValueMaps(ExprToVal, Other.ExprToVal, *this, Other, Model)) + return false; + + if (!compareKeyToValueMaps(LocToVal, Other.LocToVal, *this, Other, Model)) + return false; + + return true; +} + +LatticeEffect Environment::widen(const Environment &PrevEnv, + Environment::ValueModel &Model) { + assert(DACtx == PrevEnv.DACtx); + assert(ReturnVal == PrevEnv.ReturnVal); + assert(ReturnLoc == PrevEnv.ReturnLoc); + assert(LocForRecordReturnVal == PrevEnv.LocForRecordReturnVal); + assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc); + assert(CallStack == PrevEnv.CallStack); + assert(ResultObjectMap == PrevEnv.ResultObjectMap); + assert(InitialTargetFunc == PrevEnv.InitialTargetFunc); + assert(InitialTargetStmt == PrevEnv.InitialTargetStmt); + + auto Effect = LatticeEffect::Unchanged; + + // By the API, `PrevEnv` is a previous version of the environment for the same + // block, so we have some guarantees about its shape. In particular, it will + // be the result of a join or widen operation on previous values for this + // block. For `DeclToLoc`, `ExprToVal`, and `ExprToLoc`, join guarantees that + // these maps are subsets of the maps in `PrevEnv`. So, as long as we maintain + // this property here, we don't need change their current values to widen. + assert(DeclToLoc.size() <= PrevEnv.DeclToLoc.size()); + assert(ExprToVal.size() <= PrevEnv.ExprToVal.size()); + assert(ExprToLoc.size() <= PrevEnv.ExprToLoc.size()); + + ExprToVal = widenKeyToValueMap(ExprToVal, PrevEnv.ExprToVal, *this, PrevEnv, + Model, Effect); + + LocToVal = widenKeyToValueMap(LocToVal, PrevEnv.LocToVal, *this, PrevEnv, + Model, Effect); + if (DeclToLoc.size() != PrevEnv.DeclToLoc.size() || + ExprToLoc.size() != PrevEnv.ExprToLoc.size() || + ExprToVal.size() != PrevEnv.ExprToVal.size() || + LocToVal.size() != PrevEnv.LocToVal.size()) + Effect = LatticeEffect::Changed; + + return Effect; +} + +Environment Environment::join(const Environment &EnvA, const Environment &EnvB, + Environment::ValueModel &Model, + ExprJoinBehavior ExprBehavior) { + assert(EnvA.DACtx == EnvB.DACtx); + assert(EnvA.LocForRecordReturnVal == EnvB.LocForRecordReturnVal); + assert(EnvA.ThisPointeeLoc == EnvB.ThisPointeeLoc); + assert(EnvA.CallStack == EnvB.CallStack); + assert(EnvA.ResultObjectMap == EnvB.ResultObjectMap); + assert(EnvA.InitialTargetFunc == EnvB.InitialTargetFunc); + assert(EnvA.InitialTargetStmt == EnvB.InitialTargetStmt); + + Environment JoinedEnv(*EnvA.DACtx); + + JoinedEnv.CallStack = EnvA.CallStack; + JoinedEnv.ResultObjectMap = EnvA.ResultObjectMap; + JoinedEnv.LocForRecordReturnVal = EnvA.LocForRecordReturnVal; + JoinedEnv.ThisPointeeLoc = EnvA.ThisPointeeLoc; + JoinedEnv.InitialTargetFunc = EnvA.InitialTargetFunc; + JoinedEnv.InitialTargetStmt = EnvA.InitialTargetStmt; + + const FunctionDecl *Func = EnvA.getCurrentFunc(); + if (!Func) { + JoinedEnv.ReturnVal = nullptr; + } else { + JoinedEnv.ReturnVal = + joinValues(Func->getReturnType(), EnvA.ReturnVal, EnvA, EnvB.ReturnVal, + EnvB, JoinedEnv, Model); + } + + if (EnvA.ReturnLoc == EnvB.ReturnLoc) + JoinedEnv.ReturnLoc = EnvA.ReturnLoc; + else + JoinedEnv.ReturnLoc = nullptr; + + JoinedEnv.DeclToLoc = intersectDeclToLoc(EnvA.DeclToLoc, EnvB.DeclToLoc); + + // FIXME: update join to detect backedges and simplify the flow condition + // accordingly. + JoinedEnv.FlowConditionToken = EnvA.DACtx->joinFlowConditions( + EnvA.FlowConditionToken, EnvB.FlowConditionToken); + + JoinedEnv.LocToVal = + joinLocToVal(EnvA.LocToVal, EnvB.LocToVal, EnvA, EnvB, JoinedEnv, Model); + + if (ExprBehavior == KeepExprState) { + JoinedEnv.ExprToVal = joinExprMaps(EnvA.ExprToVal, EnvB.ExprToVal); + JoinedEnv.ExprToLoc = joinExprMaps(EnvA.ExprToLoc, EnvB.ExprToLoc); + } + + return JoinedEnv; +} + +Value *Environment::joinValues(QualType Ty, Value *Val1, + const Environment &Env1, Value *Val2, + const Environment &Env2, Environment &JoinedEnv, + Environment::ValueModel &Model) { + if (Val1 == nullptr || Val2 == nullptr) + // We can't say anything about the joined value -- even if one of the values + // is non-null, we don't want to simply propagate it, because it would be + // too specific: Because the other value is null, that means we have no + // information at all about the value (i.e. the value is unconstrained). + return nullptr; + + if (areEquivalentValues(*Val1, *Val2)) + // Arbitrarily return one of the two values. + return Val1; + + return joinDistinctValues(Ty, *Val1, Env1, *Val2, Env2, JoinedEnv, Model); +} + +StorageLocation &Environment::createStorageLocation(QualType Type) { + return DACtx->createStorageLocation(Type); +} + +StorageLocation &Environment::createStorageLocation(const ValueDecl &D) { + // Evaluated declarations are always assigned the same storage locations to + // ensure that the environment stabilizes across loop iterations. Storage + // locations for evaluated declarations are stored in the analysis context. + return DACtx->getStableStorageLocation(D); +} + +StorageLocation &Environment::createStorageLocation(const Expr &E) { + // Evaluated expressions are always assigned the same storage locations to + // ensure that the environment stabilizes across loop iterations. Storage + // locations for evaluated expressions are stored in the analysis context. + return DACtx->getStableStorageLocation(E); +} + +void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) { + assert(!DeclToLoc.contains(&D)); + // The only kinds of declarations that may have a "variable" storage location + // are declarations of reference type and `BindingDecl`. For all other + // declaration, the storage location should be the stable storage location + // returned by `createStorageLocation()`. + assert(D.getType()->isReferenceType() || isa<BindingDecl>(D) || + &Loc == &createStorageLocation(D)); + DeclToLoc[&D] = &Loc; +} + +StorageLocation *Environment::getStorageLocation(const ValueDecl &D) const { + auto It = DeclToLoc.find(&D); + if (It == DeclToLoc.end()) + return nullptr; + + StorageLocation *Loc = It->second; + + return Loc; +} + +void Environment::removeDecl(const ValueDecl &D) { DeclToLoc.erase(&D); } + +void Environment::setStorageLocation(const Expr &E, StorageLocation &Loc) { + // `DeclRefExpr`s to builtin function types aren't glvalues, for some reason, + // but we still want to be able to associate a `StorageLocation` with them, + // so allow these as an exception. + assert(E.isGLValue() || + E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn)); + const Expr &CanonE = ignoreCFGOmittedNodes(E); + assert(!ExprToLoc.contains(&CanonE)); + ExprToLoc[&CanonE] = &Loc; +} + +StorageLocation *Environment::getStorageLocation(const Expr &E) const { + // See comment in `setStorageLocation()`. + assert(E.isGLValue() || + E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn)); + auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E)); + return It == ExprToLoc.end() ? nullptr : &*It->second; +} + +RecordStorageLocation & +Environment::getResultObjectLocation(const Expr &RecordPRValue) const { + assert(RecordPRValue.getType()->isRecordType()); + assert(RecordPRValue.isPRValue()); + + assert(ResultObjectMap != nullptr); + RecordStorageLocation *Loc = ResultObjectMap->lookup(&RecordPRValue); + assert(Loc != nullptr); + // In release builds, use the "stable" storage location if the map lookup + // failed. + if (Loc == nullptr) + return cast<RecordStorageLocation>( + DACtx->getStableStorageLocation(RecordPRValue)); + return *Loc; +} + +PointerValue &Environment::getOrCreateNullPointerValue(QualType PointeeType) { + return DACtx->getOrCreateNullPointerValue(PointeeType); +} + +void Environment::initializeFieldsWithValues(RecordStorageLocation &Loc, + QualType Type) { + llvm::DenseSet<QualType> Visited; + int CreatedValuesCount = 0; + initializeFieldsWithValues(Loc, Type, Visited, 0, CreatedValuesCount); + if (CreatedValuesCount > MaxCompositeValueSize) { + llvm::errs() << "Attempting to initialize a huge value of type: " << Type + << '\n'; + } +} + +void Environment::setValue(const StorageLocation &Loc, Value &Val) { + // Records should not be associated with values. + assert(!isa<RecordStorageLocation>(Loc)); + LocToVal[&Loc] = &Val; +} + +void Environment::setValue(const Expr &E, Value &Val) { + const Expr &CanonE = ignoreCFGOmittedNodes(E); + + assert(CanonE.isPRValue()); + // Records should not be associated with values. + assert(!CanonE.getType()->isRecordType()); + ExprToVal[&CanonE] = &Val; +} + +Value *Environment::getValue(const StorageLocation &Loc) const { + // Records should not be associated with values. + assert(!isa<RecordStorageLocation>(Loc)); + return LocToVal.lookup(&Loc); +} + +Value *Environment::getValue(const ValueDecl &D) const { + auto *Loc = getStorageLocation(D); + if (Loc == nullptr) + return nullptr; + return getValue(*Loc); +} + +Value *Environment::getValue(const Expr &E) const { + // Records should not be associated with values. + assert(!E.getType()->isRecordType()); + + if (E.isPRValue()) { + auto It = ExprToVal.find(&ignoreCFGOmittedNodes(E)); + return It == ExprToVal.end() ? nullptr : It->second; + } + + auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E)); + if (It == ExprToLoc.end()) + return nullptr; + return getValue(*It->second); +} + +Value *Environment::createValue(QualType Type) { + llvm::DenseSet<QualType> Visited; + int CreatedValuesCount = 0; + Value *Val = createValueUnlessSelfReferential(Type, Visited, /*Depth=*/0, + CreatedValuesCount); + if (CreatedValuesCount > MaxCompositeValueSize) { + llvm::errs() << "Attempting to initialize a huge value of type: " << Type + << '\n'; + } + return Val; +} + +Value *Environment::createValueUnlessSelfReferential( + QualType Type, llvm::DenseSet<QualType> &Visited, int Depth, + int &CreatedValuesCount) { + assert(!Type.isNull()); + assert(!Type->isReferenceType()); + assert(!Type->isRecordType()); + + // Allow unlimited fields at depth 1; only cap at deeper nesting levels. + if ((Depth > 1 && CreatedValuesCount > MaxCompositeValueSize) || + Depth > MaxCompositeValueDepth) + return nullptr; + + if (Type->isBooleanType()) { + CreatedValuesCount++; + return &makeAtomicBoolValue(); + } + + if (Type->isIntegerType()) { + // FIXME: consider instead `return nullptr`, given that we do nothing useful + // with integers, and so distinguishing them serves no purpose, but could + // prevent convergence. + CreatedValuesCount++; + return &arena().create<IntegerValue>(); + } + + if (Type->isPointerType()) { + CreatedValuesCount++; + QualType PointeeType = Type->getPointeeType(); + StorageLocation &PointeeLoc = + createLocAndMaybeValue(PointeeType, Visited, Depth, CreatedValuesCount); + + return &arena().create<PointerValue>(PointeeLoc); + } + + return nullptr; +} + +StorageLocation & +Environment::createLocAndMaybeValue(QualType Ty, + llvm::DenseSet<QualType> &Visited, + int Depth, int &CreatedValuesCount) { + if (!Visited.insert(Ty.getCanonicalType()).second) + return createStorageLocation(Ty.getNonReferenceType()); + auto EraseVisited = llvm::make_scope_exit( + [&Visited, Ty] { Visited.erase(Ty.getCanonicalType()); }); + + Ty = Ty.getNonReferenceType(); + + if (Ty->isRecordType()) { + auto &Loc = cast<RecordStorageLocation>(createStorageLocation(Ty)); + initializeFieldsWithValues(Loc, Ty, Visited, Depth, CreatedValuesCount); + return Loc; + } + + StorageLocation &Loc = createStorageLocation(Ty); + + if (Value *Val = createValueUnlessSelfReferential(Ty, Visited, Depth, + CreatedValuesCount)) + setValue(Loc, *Val); + + return Loc; +} + +void Environment::initializeFieldsWithValues(RecordStorageLocation &Loc, + QualType Type, + llvm::DenseSet<QualType> &Visited, + int Depth, + int &CreatedValuesCount) { + auto initField = [&](QualType FieldType, StorageLocation &FieldLoc) { + if (FieldType->isRecordType()) { + auto &FieldRecordLoc = cast<RecordStorageLocation>(FieldLoc); + initializeFieldsWithValues(FieldRecordLoc, FieldRecordLoc.getType(), + Visited, Depth + 1, CreatedValuesCount); + } else { + if (getValue(FieldLoc) != nullptr) + return; + if (!Visited.insert(FieldType.getCanonicalType()).second) + return; + if (Value *Val = createValueUnlessSelfReferential( + FieldType, Visited, Depth + 1, CreatedValuesCount)) + setValue(FieldLoc, *Val); + Visited.erase(FieldType.getCanonicalType()); + } + }; + + for (const FieldDecl *Field : DACtx->getModeledFields(Type)) { + assert(Field != nullptr); + QualType FieldType = Field->getType(); + + if (FieldType->isReferenceType()) { + Loc.setChild(*Field, + &createLocAndMaybeValue(FieldType, Visited, Depth + 1, + CreatedValuesCount)); + } else { + StorageLocation *FieldLoc = Loc.getChild(*Field); + assert(FieldLoc != nullptr); + initField(FieldType, *FieldLoc); + } + } + for (const auto &[FieldName, FieldType] : DACtx->getSyntheticFields(Type)) { + // Synthetic fields cannot have reference type, so we don't need to deal + // with this case. + assert(!FieldType->isReferenceType()); + initField(FieldType, Loc.getSyntheticField(FieldName)); + } +} + +StorageLocation &Environment::createObjectInternal(const ValueDecl *D, + QualType Ty, + const Expr *InitExpr) { + if (Ty->isReferenceType()) { + // Although variables of reference type always need to be initialized, it + // can happen that we can't see the initializer, so `InitExpr` may still + // be null. + if (InitExpr) { + if (auto *InitExprLoc = getStorageLocation(*InitExpr)) + return *InitExprLoc; + } + + // Even though we have an initializer, we might not get an + // InitExprLoc, for example if the InitExpr is a CallExpr for which we + // don't have a function body. In this case, we just invent a storage + // location and value -- it's the best we can do. + return createObjectInternal(D, Ty.getNonReferenceType(), nullptr); + } + + StorageLocation &Loc = + D ? createStorageLocation(*D) : createStorageLocation(Ty); + + if (Ty->isRecordType()) { + auto &RecordLoc = cast<RecordStorageLocation>(Loc); + if (!InitExpr) + initializeFieldsWithValues(RecordLoc); + } else { + Value *Val = nullptr; + if (InitExpr) + // In the (few) cases where an expression is intentionally + // "uninterpreted", `InitExpr` is not associated with a value. There are + // two ways to handle this situation: propagate the status, so that + // uninterpreted initializers result in uninterpreted variables, or + // provide a default value. We choose the latter so that later refinements + // of the variable can be used for reasoning about the surrounding code. + // For this reason, we let this case be handled by the `createValue()` + // call below. + // + // FIXME. If and when we interpret all language cases, change this to + // assert that `InitExpr` is interpreted, rather than supplying a + // default value (assuming we don't update the environment API to return + // references). + Val = getValue(*InitExpr); + if (!Val) + Val = createValue(Ty); + if (Val) + setValue(Loc, *Val); + } + + return Loc; +} + +void Environment::assume(const Formula &F) { + DACtx->addFlowConditionConstraint(FlowConditionToken, F); +} + +bool Environment::proves(const Formula &F) const { + return DACtx->flowConditionImplies(FlowConditionToken, F); +} + +bool Environment::allows(const Formula &F) const { + return DACtx->flowConditionAllows(FlowConditionToken, F); +} + +void Environment::dump(raw_ostream &OS) const { + llvm::DenseMap<const StorageLocation *, std::string> LocToName; + if (LocForRecordReturnVal != nullptr) + LocToName[LocForRecordReturnVal] = "(returned record)"; + if (ThisPointeeLoc != nullptr) + LocToName[ThisPointeeLoc] = "this"; + + OS << "DeclToLoc:\n"; + for (auto [D, L] : DeclToLoc) { + auto Iter = LocToName.insert({L, D->getNameAsString()}).first; + OS << " [" << Iter->second << ", " << L << "]\n"; + } + OS << "ExprToLoc:\n"; + for (auto [E, L] : ExprToLoc) + OS << " [" << E << ", " << L << "]\n"; + + OS << "ExprToVal:\n"; + for (auto [E, V] : ExprToVal) + OS << " [" << E << ", " << V << ": " << *V << "]\n"; + + OS << "LocToVal:\n"; + for (auto [L, V] : LocToVal) { + OS << " [" << L; + if (auto Iter = LocToName.find(L); Iter != LocToName.end()) + OS << " (" << Iter->second << ")"; + OS << ", " << V << ": " << *V << "]\n"; + } + + if (const FunctionDecl *Func = getCurrentFunc()) { + if (Func->getReturnType()->isReferenceType()) { + OS << "ReturnLoc: " << ReturnLoc; + if (auto Iter = LocToName.find(ReturnLoc); Iter != LocToName.end()) + OS << " (" << Iter->second << ")"; + OS << "\n"; + } else if (Func->getReturnType()->isRecordType() || + isa<CXXConstructorDecl>(Func)) { + OS << "LocForRecordReturnVal: " << LocForRecordReturnVal << "\n"; + } else if (!Func->getReturnType()->isVoidType()) { + if (ReturnVal == nullptr) + OS << "ReturnVal: nullptr\n"; + else + OS << "ReturnVal: " << *ReturnVal << "\n"; + } + + if (isa<CXXMethodDecl>(Func)) { + OS << "ThisPointeeLoc: " << ThisPointeeLoc << "\n"; + } + } + + OS << "\n"; + DACtx->dumpFlowCondition(FlowConditionToken, OS); +} + +void Environment::dump() const { dump(llvm::dbgs()); } + +Environment::PrValueToResultObject Environment::buildResultObjectMap( + DataflowAnalysisContext *DACtx, const FunctionDecl *FuncDecl, + RecordStorageLocation *ThisPointeeLoc, + RecordStorageLocation *LocForRecordReturnVal) { + assert(FuncDecl->doesThisDeclarationHaveABody()); + + PrValueToResultObject Map = buildResultObjectMap( + DACtx, FuncDecl->getBody(), ThisPointeeLoc, LocForRecordReturnVal); + + ResultObjectVisitor Visitor(Map, LocForRecordReturnVal, *DACtx); + if (const auto *Ctor = dyn_cast<CXXConstructorDecl>(FuncDecl)) + Visitor.TraverseConstructorInits(Ctor, ThisPointeeLoc); + + return Map; +} + +Environment::PrValueToResultObject Environment::buildResultObjectMap( + DataflowAnalysisContext *DACtx, Stmt *S, + RecordStorageLocation *ThisPointeeLoc, + RecordStorageLocation *LocForRecordReturnVal) { + PrValueToResultObject Map; + ResultObjectVisitor Visitor(Map, LocForRecordReturnVal, *DACtx); + Visitor.TraverseStmt(S); + return Map; +} + +RecordStorageLocation *getImplicitObjectLocation(const CXXMemberCallExpr &MCE, + const Environment &Env) { + Expr *ImplicitObject = MCE.getImplicitObjectArgument(); + if (ImplicitObject == nullptr) + return nullptr; + if (ImplicitObject->getType()->isPointerType()) { + if (auto *Val = Env.get<PointerValue>(*ImplicitObject)) + return &cast<RecordStorageLocation>(Val->getPointeeLoc()); + return nullptr; + } + return cast_or_null<RecordStorageLocation>( + Env.getStorageLocation(*ImplicitObject)); +} + +RecordStorageLocation *getBaseObjectLocation(const MemberExpr &ME, + const Environment &Env) { + Expr *Base = ME.getBase(); + if (Base == nullptr) + return nullptr; + if (ME.isArrow()) { + if (auto *Val = Env.get<PointerValue>(*Base)) + return &cast<RecordStorageLocation>(Val->getPointeeLoc()); + return nullptr; + } + return Env.get<RecordStorageLocation>(*Base); +} + +} // namespace dataflow +} // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DebugSupport.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DebugSupport.cpp new file mode 100644 index 000000000000..d40aab7a7f10 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DebugSupport.cpp @@ -0,0 +1,77 @@ +//===- DebugSupport.cpp -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines functions which generate more readable forms of data +// structures used in the dataflow analyses, for debugging purposes. +// +//===----------------------------------------------------------------------===// + +#include <utility> + +#include "clang/Analysis/FlowSensitive/DebugSupport.h" +#include "clang/Analysis/FlowSensitive/Solver.h" +#include "clang/Analysis/FlowSensitive/Value.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/ErrorHandling.h" + +namespace clang { +namespace dataflow { + +llvm::StringRef debugString(Value::Kind Kind) { + switch (Kind) { + case Value::Kind::Integer: + return "Integer"; + case Value::Kind::Pointer: + return "Pointer"; + case Value::Kind::AtomicBool: + return "AtomicBool"; + case Value::Kind::TopBool: + return "TopBool"; + case Value::Kind::FormulaBool: + return "FormulaBool"; + } + llvm_unreachable("Unhandled value kind"); +} + +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + Solver::Result::Assignment Assignment) { + switch (Assignment) { + case Solver::Result::Assignment::AssignedFalse: + return OS << "False"; + case Solver::Result::Assignment::AssignedTrue: + return OS << "True"; + } + llvm_unreachable("Booleans can only be assigned true/false"); +} + +llvm::StringRef debugString(Solver::Result::Status Status) { + switch (Status) { + case Solver::Result::Status::Satisfiable: + return "Satisfiable"; + case Solver::Result::Status::Unsatisfiable: + return "Unsatisfiable"; + case Solver::Result::Status::TimedOut: + return "TimedOut"; + } + llvm_unreachable("Unhandled SAT check result status"); +} + +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Solver::Result &R) { + OS << debugString(R.getStatus()) << "\n"; + if (auto Solution = R.getSolution()) { + std::vector<std::pair<Atom, Solver::Result::Assignment>> Sorted = { + Solution->begin(), Solution->end()}; + llvm::sort(Sorted); + for (const auto &Entry : Sorted) + OS << Entry.first << " = " << Entry.second << "\n"; + } + return OS; +} + +} // namespace dataflow +} // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Formula.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Formula.cpp new file mode 100644 index 000000000000..ef7d23ff6c56 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Formula.cpp @@ -0,0 +1,94 @@ +//===- Formula.cpp ----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/Formula.h" +#include "clang/Basic/LLVM.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/ErrorHandling.h" +#include <cassert> +#include <type_traits> + +namespace clang::dataflow { + +const Formula &Formula::create(llvm::BumpPtrAllocator &Alloc, Kind K, + ArrayRef<const Formula *> Operands, + unsigned Value) { + assert(Operands.size() == numOperands(K)); + if (Value != 0) // Currently, formulas have values or operands, not both. + assert(numOperands(K) == 0); + void *Mem = Alloc.Allocate(sizeof(Formula) + + Operands.size() * sizeof(Operands.front()), + alignof(Formula)); + Formula *Result = new (Mem) Formula(); + Result->FormulaKind = K; + Result->Value = Value; + // Operands are stored as `const Formula *`s after the formula itself. + // We don't need to construct an object as pointers are trivial types. + // Formula is alignas(const Formula *), so alignment is satisfied. + llvm::copy(Operands, reinterpret_cast<const Formula **>(Result + 1)); + return *Result; +} + +static llvm::StringLiteral sigil(Formula::Kind K) { + switch (K) { + case Formula::AtomRef: + case Formula::Literal: + return ""; + case Formula::Not: + return "!"; + case Formula::And: + return " & "; + case Formula::Or: + return " | "; + case Formula::Implies: + return " => "; + case Formula::Equal: + return " = "; + } + llvm_unreachable("unhandled formula kind"); +} + +void Formula::print(llvm::raw_ostream &OS, const AtomNames *Names) const { + if (Names && kind() == AtomRef) + if (auto It = Names->find(getAtom()); It != Names->end()) { + OS << It->second; + return; + } + + switch (numOperands(kind())) { + case 0: + switch (kind()) { + case AtomRef: + OS << getAtom(); + break; + case Literal: + OS << (literal() ? "true" : "false"); + break; + default: + llvm_unreachable("unhandled formula kind"); + } + break; + case 1: + OS << sigil(kind()); + operands()[0]->print(OS, Names); + break; + case 2: + OS << '('; + operands()[0]->print(OS, Names); + OS << sigil(kind()); + operands()[1]->print(OS, Names); + OS << ')'; + break; + default: + llvm_unreachable("unhandled formula arity"); + } +} + +} // namespace clang::dataflow
\ No newline at end of file diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp new file mode 100644 index 000000000000..a36cb41a63df --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp @@ -0,0 +1,584 @@ +//===-- HTMLLogger.cpp ----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the HTML logger. Given a directory dir/, we write +// dir/0.html for the first analysis, etc. +// These files contain a visualization that allows inspecting the CFG and the +// state of the analysis at each point. +// Static assets (HTMLLogger.js, HTMLLogger.css) and SVG graphs etc are embedded +// so each output file is self-contained. +// +// VIEWS +// +// The timeline and function view are always shown. These allow selecting basic +// blocks, statements within them, and processing iterations (BBs are visited +// multiple times when e.g. loops are involved). +// These are written directly into the HTML body. +// +// There are also listings of particular basic blocks, and dumps of the state +// at particular analysis points (i.e. BB2 iteration 3 statement 2). +// These are only shown when the relevant BB/analysis point is *selected*. +// +// DATA AND TEMPLATES +// +// The HTML proper is mostly static. +// The analysis data is in a JSON object HTMLLoggerData which is embedded as +// a <script> in the <head>. +// This gets rendered into DOM by a simple template processor which substitutes +// the data into <template> tags embedded in the HTML. (see inflate() in JS). +// +// SELECTION +// +// This is the only real interactive mechanism. +// +// At any given time, there are several named selections, e.g.: +// bb: B2 (basic block 0 is selected) +// elt: B2.4 (statement 4 is selected) +// iter: B2:1 (iteration 1 of the basic block is selected) +// hover: B3 (hovering over basic block 3) +// +// The selection is updated by mouse events: hover by moving the mouse and +// others by clicking. Elements that are click targets generally have attributes +// (id or data-foo) that define what they should select. +// See watchSelection() in JS for the exact logic. +// +// When the "bb" selection is set to "B2": +// - sections <section data-selection="bb"> get shown +// - templates under such sections get re-rendered +// - elements with class/id "B2" get class "bb-select" +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/AdornedCFG.h" +#include "clang/Analysis/FlowSensitive/DebugSupport.h" +#include "clang/Analysis/FlowSensitive/Logger.h" +#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" +#include "clang/Analysis/FlowSensitive/Value.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Lex/Lexer.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/ScopedPrinter.h" +#include "llvm/Support/raw_ostream.h" +// Defines assets: HTMLLogger_{html_js,css} +#include "HTMLLogger.inc" + +namespace clang::dataflow { +namespace { + +// Render a graphviz graph specification to SVG using the `dot` tool. +llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph); + +using StreamFactory = std::function<std::unique_ptr<llvm::raw_ostream>()>; + +// Recursively dumps Values/StorageLocations as JSON +class ModelDumper { +public: + ModelDumper(llvm::json::OStream &JOS, const Environment &Env) + : JOS(JOS), Env(Env) {} + + void dump(Value &V) { + JOS.attribute("value_id", llvm::to_string(&V)); + if (!Visited.insert(&V).second) + return; + + JOS.attribute("kind", debugString(V.getKind())); + + switch (V.getKind()) { + case Value::Kind::Integer: + case Value::Kind::TopBool: + case Value::Kind::AtomicBool: + case Value::Kind::FormulaBool: + break; + case Value::Kind::Pointer: + JOS.attributeObject( + "pointee", [&] { dump(cast<PointerValue>(V).getPointeeLoc()); }); + break; + } + + for (const auto& Prop : V.properties()) + JOS.attributeObject(("p:" + Prop.first()).str(), + [&] { dump(*Prop.second); }); + + // Running the SAT solver is expensive, but knowing which booleans are + // guaranteed true/false here is valuable and hard to determine by hand. + if (auto *B = llvm::dyn_cast<BoolValue>(&V)) { + JOS.attribute("formula", llvm::to_string(B->formula())); + JOS.attribute("truth", Env.proves(B->formula()) ? "true" + : Env.proves(Env.arena().makeNot(B->formula())) + ? "false" + : "unknown"); + } + } + void dump(const StorageLocation &L) { + JOS.attribute("location", llvm::to_string(&L)); + if (!Visited.insert(&L).second) + return; + + JOS.attribute("type", L.getType().getAsString()); + if (!L.getType()->isRecordType()) + if (auto *V = Env.getValue(L)) + dump(*V); + + if (auto *RLoc = dyn_cast<RecordStorageLocation>(&L)) { + for (const auto &Child : RLoc->children()) + JOS.attributeObject("f:" + Child.first->getNameAsString(), [&] { + if (Child.second) + if (Value *Val = Env.getValue(*Child.second)) + dump(*Val); + }); + + for (const auto &SyntheticField : RLoc->synthetic_fields()) + JOS.attributeObject(("sf:" + SyntheticField.first()).str(), + [&] { dump(*SyntheticField.second); }); + } + } + + llvm::DenseSet<const void*> Visited; + llvm::json::OStream &JOS; + const Environment &Env; +}; + +class HTMLLogger : public Logger { + struct Iteration { + const CFGBlock *Block; + unsigned Iter; + bool PostVisit; + bool Converged; + }; + + StreamFactory Streams; + std::unique_ptr<llvm::raw_ostream> OS; + std::string JSON; + llvm::raw_string_ostream JStringStream{JSON}; + llvm::json::OStream JOS{JStringStream, /*Indent=*/2}; + + const AdornedCFG *ACFG; + // Timeline of iterations of CFG block visitation. + std::vector<Iteration> Iters; + // Indexes in `Iters` of the iterations for each block. + llvm::DenseMap<const CFGBlock *, llvm::SmallVector<size_t>> BlockIters; + // For a given block ID, did the block converge (on the last iteration)? + llvm::BitVector BlockConverged; + // The messages logged in the current context but not yet written. + std::string ContextLogs; + // The number of elements we have visited within the current CFG block. + unsigned ElementIndex; + +public: + explicit HTMLLogger(StreamFactory Streams) : Streams(std::move(Streams)) {} + void beginAnalysis(const AdornedCFG &ACFG, + TypeErasedDataflowAnalysis &A) override { + OS = Streams(); + this->ACFG = &ACFG; + *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").first; + + BlockConverged.resize(ACFG.getCFG().getNumBlockIDs()); + + const auto &D = ACFG.getDecl(); + const auto &SM = A.getASTContext().getSourceManager(); + *OS << "<title>"; + if (const auto *ND = dyn_cast<NamedDecl>(&D)) + *OS << ND->getNameAsString() << " at "; + *OS << SM.getFilename(D.getLocation()) << ":" + << SM.getSpellingLineNumber(D.getLocation()); + *OS << "</title>\n"; + + *OS << "<style>" << HTMLLogger_css << "</style>\n"; + *OS << "<script>" << HTMLLogger_js << "</script>\n"; + + writeCode(); + JOS.objectBegin(); + JOS.attributeBegin("states"); + JOS.objectBegin(); + } + // Between beginAnalysis() and endAnalysis() we write all the states for + // particular analysis points into the `timeline` array. + void endAnalysis() override { + JOS.objectEnd(); + JOS.attributeEnd(); + + JOS.attributeArray("timeline", [&] { + for (const auto &E : Iters) { + JOS.object([&] { + JOS.attribute("block", blockID(E.Block->getBlockID())); + JOS.attribute("iter", E.Iter); + JOS.attribute("post_visit", E.PostVisit); + JOS.attribute("converged", E.Converged); + }); + } + }); + JOS.attributeObject("cfg", [&] { + for (const auto &E : BlockIters) + writeBlock(*E.first, E.second); + }); + + JOS.objectEnd(); + + writeCFG(); + + *OS << "<script>var HTMLLoggerData = \n"; + *OS << JSON; + *OS << ";\n</script>\n"; + *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").second; + } + + void enterBlock(const CFGBlock &B, bool PostVisit) override { + llvm::SmallVector<size_t> &BIter = BlockIters[&B]; + unsigned IterNum = BIter.size() + 1; + BIter.push_back(Iters.size()); + Iters.push_back({&B, IterNum, PostVisit, /*Converged=*/false}); + if (!PostVisit) + BlockConverged[B.getBlockID()] = false; + ElementIndex = 0; + } + void enterElement(const CFGElement &E) override { + ++ElementIndex; + } + + static std::string blockID(unsigned Block) { + return llvm::formatv("B{0}", Block); + } + static std::string eltID(unsigned Block, unsigned Element) { + return llvm::formatv("B{0}.{1}", Block, Element); + } + static std::string iterID(unsigned Block, unsigned Iter) { + return llvm::formatv("B{0}:{1}", Block, Iter); + } + static std::string elementIterID(unsigned Block, unsigned Iter, + unsigned Element) { + return llvm::formatv("B{0}:{1}_B{0}.{2}", Block, Iter, Element); + } + + // Write the analysis state associated with a particular analysis point. + // FIXME: this dump is fairly opaque. We should show: + // - values associated with the current Stmt + // - values associated with its children + // - meaningful names for values + // - which boolean values are implied true/false by the flow condition + void recordState(TypeErasedDataflowAnalysisState &State) override { + unsigned Block = Iters.back().Block->getBlockID(); + unsigned Iter = Iters.back().Iter; + bool PostVisit = Iters.back().PostVisit; + JOS.attributeObject(elementIterID(Block, Iter, ElementIndex), [&] { + JOS.attribute("block", blockID(Block)); + JOS.attribute("iter", Iter); + JOS.attribute("post_visit", PostVisit); + JOS.attribute("element", ElementIndex); + + // If this state immediately follows an Expr, show its built-in model. + if (ElementIndex > 0) { + auto S = + Iters.back().Block->Elements[ElementIndex - 1].getAs<CFGStmt>(); + if (const Expr *E = S ? llvm::dyn_cast<Expr>(S->getStmt()) : nullptr) { + if (E->isPRValue()) { + if (!E->getType()->isRecordType()) + if (auto *V = State.Env.getValue(*E)) + JOS.attributeObject( + "value", [&] { ModelDumper(JOS, State.Env).dump(*V); }); + } else { + if (auto *Loc = State.Env.getStorageLocation(*E)) + JOS.attributeObject( + "value", [&] { ModelDumper(JOS, State.Env).dump(*Loc); }); + } + } + } + if (!ContextLogs.empty()) { + JOS.attribute("logs", ContextLogs); + ContextLogs.clear(); + } + { + std::string BuiltinLattice; + llvm::raw_string_ostream BuiltinLatticeS(BuiltinLattice); + State.Env.dump(BuiltinLatticeS); + JOS.attribute("builtinLattice", BuiltinLattice); + } + }); + } + void blockConverged() override { + Iters.back().Converged = true; + BlockConverged[Iters.back().Block->getBlockID()] = true; + } + + void logText(llvm::StringRef S) override { + ContextLogs.append(S.begin(), S.end()); + ContextLogs.push_back('\n'); + } + +private: + // Write the CFG block details. + // Currently this is just the list of elements in execution order. + // FIXME: an AST dump would be a useful view, too. + void writeBlock(const CFGBlock &B, llvm::ArrayRef<size_t> ItersForB) { + JOS.attributeObject(blockID(B.getBlockID()), [&] { + JOS.attributeArray("iters", [&] { + for (size_t IterIdx : ItersForB) { + const Iteration &Iter = Iters[IterIdx]; + JOS.object([&] { + JOS.attribute("iter", Iter.Iter); + JOS.attribute("post_visit", Iter.PostVisit); + JOS.attribute("converged", Iter.Converged); + }); + } + }); + JOS.attributeArray("elements", [&] { + for (const auto &Elt : B.Elements) { + std::string Dump; + llvm::raw_string_ostream DumpS(Dump); + Elt.dumpToStream(DumpS); + JOS.value(Dump); + } + }); + }); + } + + // Write the code of function being examined. + // We want to overlay the code with <span>s that mark which BB particular + // tokens are associated with, and even which BB element (so that clicking + // can select the right element). + void writeCode() { + const auto &AST = ACFG->getDecl().getASTContext(); + bool Invalid = false; + + // Extract the source code from the original file. + // Pretty-printing from the AST would probably be nicer (no macros or + // indentation to worry about), but we need the boundaries of particular + // AST nodes and the printer doesn't provide this. + auto Range = clang::Lexer::makeFileCharRange( + CharSourceRange::getTokenRange(ACFG->getDecl().getSourceRange()), + AST.getSourceManager(), AST.getLangOpts()); + if (Range.isInvalid()) + return; + llvm::StringRef Code = clang::Lexer::getSourceText( + Range, AST.getSourceManager(), AST.getLangOpts(), &Invalid); + if (Invalid) + return; + + // TokenInfo stores the BB and set of elements that a token is part of. + struct TokenInfo { + enum : unsigned { Missing = static_cast<unsigned>(-1) }; + + // The basic block this is part of. + // This is the BB of the stmt with the smallest containing range. + unsigned BB = Missing; + unsigned BBPriority = 0; + // The most specific stmt this is part of (smallest range). + unsigned Elt = Missing; + unsigned EltPriority = 0; + // All stmts this is part of. + SmallVector<unsigned> Elts; + + // Mark this token as being part of BB.Elt. + // RangeLen is the character length of the element's range, used to + // distinguish inner vs outer statements. + // For example in `a==0`, token "a" is part of the stmts "a" and "a==0". + // However "a" has a smaller range, so is more specific. Clicking on the + // token "a" should select the stmt "a". + void assign(unsigned BB, unsigned Elt, unsigned RangeLen) { + // A worse BB (larger range) => ignore. + if (this->BB != Missing && BB != this->BB && BBPriority <= RangeLen) + return; + if (BB != this->BB) { + this->BB = BB; + Elts.clear(); + BBPriority = RangeLen; + } + BBPriority = std::min(BBPriority, RangeLen); + Elts.push_back(Elt); + if (this->Elt == Missing || EltPriority > RangeLen) + this->Elt = Elt; + } + bool operator==(const TokenInfo &Other) const { + return std::tie(BB, Elt, Elts) == + std::tie(Other.BB, Other.Elt, Other.Elts); + } + // Write the attributes for the <span> on this token. + void write(llvm::raw_ostream &OS) const { + OS << "class='c"; + if (BB != Missing) + OS << " " << blockID(BB); + for (unsigned Elt : Elts) + OS << " " << eltID(BB, Elt); + OS << "'"; + + if (Elt != Missing) + OS << " data-elt='" << eltID(BB, Elt) << "'"; + if (BB != Missing) + OS << " data-bb='" << blockID(BB) << "'"; + } + }; + + // Construct one TokenInfo per character in a flat array. + // This is inefficient (chars in a token all have the same info) but simple. + std::vector<TokenInfo> State(Code.size()); + for (const auto *Block : ACFG->getCFG()) { + unsigned EltIndex = 0; + for (const auto& Elt : *Block) { + ++EltIndex; + if (const auto S = Elt.getAs<CFGStmt>()) { + auto EltRange = clang::Lexer::makeFileCharRange( + CharSourceRange::getTokenRange(S->getStmt()->getSourceRange()), + AST.getSourceManager(), AST.getLangOpts()); + if (EltRange.isInvalid()) + continue; + if (EltRange.getBegin() < Range.getBegin() || + EltRange.getEnd() >= Range.getEnd() || + EltRange.getEnd() < Range.getBegin() || + EltRange.getEnd() >= Range.getEnd()) + continue; + unsigned Off = EltRange.getBegin().getRawEncoding() - + Range.getBegin().getRawEncoding(); + unsigned Len = EltRange.getEnd().getRawEncoding() - + EltRange.getBegin().getRawEncoding(); + for (unsigned I = 0; I < Len; ++I) + State[Off + I].assign(Block->getBlockID(), EltIndex, Len); + } + } + } + + // Finally, write the code with the correct <span>s. + unsigned Line = + AST.getSourceManager().getSpellingLineNumber(Range.getBegin()); + *OS << "<template data-copy='code'>\n"; + *OS << "<code class='filename'>"; + llvm::printHTMLEscaped( + llvm::sys::path::filename( + AST.getSourceManager().getFilename(Range.getBegin())), + *OS); + *OS << "</code>"; + *OS << "<code class='line' data-line='" << Line++ << "'>"; + for (unsigned I = 0; I < Code.size(); ++I) { + // Don't actually write a <span> around each character, only break spans + // when the TokenInfo changes. + bool NeedOpen = I == 0 || !(State[I] == State[I-1]); + bool NeedClose = I + 1 == Code.size() || !(State[I] == State[I + 1]); + if (NeedOpen) { + *OS << "<span "; + State[I].write(*OS); + *OS << ">"; + } + if (Code[I] == '\n') + *OS << "</code>\n<code class='line' data-line='" << Line++ << "'>"; + else + llvm::printHTMLEscaped(Code.substr(I, 1), *OS); + if (NeedClose) *OS << "</span>"; + } + *OS << "</code>\n"; + *OS << "</template>"; + } + + // Write the CFG diagram, a graph of basic blocks. + // Laying out graphs is hard, so we construct a graphviz description and shell + // out to `dot` to turn it into an SVG. + void writeCFG() { + *OS << "<template data-copy='cfg'>\n"; + if (auto SVG = renderSVG(buildCFGDot(ACFG->getCFG()))) + *OS << *SVG; + else + *OS << "Can't draw CFG: " << toString(SVG.takeError()); + *OS << "</template>\n"; + } + + // Produce a graphviz description of a CFG. + std::string buildCFGDot(const clang::CFG &CFG) { + std::string Graph; + llvm::raw_string_ostream GraphS(Graph); + // Graphviz likes to add unhelpful tooltips everywhere, " " suppresses. + GraphS << R"(digraph { + tooltip=" " + node[class=bb, shape=square, fontname="sans-serif", tooltip=" "] + edge[tooltip = " "] +)"; + for (unsigned I = 0; I < CFG.getNumBlockIDs(); ++I) { + std::string Name = blockID(I); + // Rightwards arrow, vertical line + const char *ConvergenceMarker = (const char *)u8"\\n\u2192\u007c"; + if (BlockConverged[I]) + Name += ConvergenceMarker; + GraphS << " " << blockID(I) << " [id=" << blockID(I) << " label=\"" + << Name << "\"]\n"; + } + for (const auto *Block : CFG) { + for (const auto &Succ : Block->succs()) { + if (Succ.getReachableBlock()) + GraphS << " " << blockID(Block->getBlockID()) << " -> " + << blockID(Succ.getReachableBlock()->getBlockID()) << "\n"; + } + } + GraphS << "}\n"; + return Graph; + } +}; + +// Nothing interesting here, just subprocess/temp-file plumbing. +llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph) { + std::string DotPath; + if (const auto *FromEnv = ::getenv("GRAPHVIZ_DOT")) + DotPath = FromEnv; + else { + auto FromPath = llvm::sys::findProgramByName("dot"); + if (!FromPath) + return llvm::createStringError(FromPath.getError(), + "'dot' not found on PATH"); + DotPath = FromPath.get(); + } + + // Create input and output files for `dot` subprocess. + // (We create the output file as empty, to reserve the temp filename). + llvm::SmallString<256> Input, Output; + int InputFD; + if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".dot", InputFD, + Input)) + return llvm::createStringError(EC, "failed to create `dot` temp input"); + llvm::raw_fd_ostream(InputFD, /*shouldClose=*/true) << DotGraph; + auto DeleteInput = + llvm::make_scope_exit([&] { llvm::sys::fs::remove(Input); }); + if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".svg", Output)) + return llvm::createStringError(EC, "failed to create `dot` temp output"); + auto DeleteOutput = + llvm::make_scope_exit([&] { llvm::sys::fs::remove(Output); }); + + std::vector<std::optional<llvm::StringRef>> Redirects = { + Input, Output, + /*stderr=*/std::nullopt}; + std::string ErrMsg; + int Code = llvm::sys::ExecuteAndWait( + DotPath, {"dot", "-Tsvg"}, /*Env=*/std::nullopt, Redirects, + /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg); + if (!ErrMsg.empty()) + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "'dot' failed: " + ErrMsg); + if (Code != 0) + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "'dot' failed (" + llvm::Twine(Code) + ")"); + + auto Buf = llvm::MemoryBuffer::getFile(Output); + if (!Buf) + return llvm::createStringError(Buf.getError(), "Can't read `dot` output"); + + // Output has <?xml> prefix we don't want. Skip to <svg> tag. + llvm::StringRef Result = Buf.get()->getBuffer(); + auto Pos = Result.find("<svg"); + if (Pos == llvm::StringRef::npos) + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "Can't find <svg> tag in `dot` output"); + return Result.substr(Pos).str(); +} + +} // namespace + +std::unique_ptr<Logger> +Logger::html(std::function<std::unique_ptr<llvm::raw_ostream>()> Streams) { + return std::make_unique<HTMLLogger>(std::move(Streams)); +} + +} // namespace clang::dataflow diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.css b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.css new file mode 100644 index 000000000000..e25270430efc --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.css @@ -0,0 +1,169 @@ +/*===-- HTMLLogger.css ----------------------------------------------------=== +* +* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +* See https://llvm.org/LICENSE.txt for license information. +* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +* +*===----------------------------------------------------------------------===*/ +html { font-family: sans-serif; } +body { margin: 0; display: flex; justify-content: left; } +body > * { box-sizing: border-box; } +body > section { + border: 1px solid black; + min-width: 20em; + overflow: auto; + max-height: 100vh; +} +section header { + background-color: #008; + color: white; + font-weight: bold; + font-size: large; + padding-right: 0.5em; +} +section h2 { + font-size: medium; + margin-bottom: 0.5em; + padding-top: 0.5em; + border-top: 1px solid #aaa; +} +#timeline { + min-width: max-content; + counter-reset: entry_counter; +} +#timeline .entry .counter::before { + counter-increment: entry_counter; + content: counter(entry_counter) ":"; +} +#timeline .entry .counter { + display: inline-block; + min-width: 2em; /* Enough space for two digits and a colon */ + text-align: right; +} +#timeline .entry.hover { + background-color: #aaa; +} +#timeline .entry.iter-select { + background-color: #aac; +} + +#bb-elements { + font-family: monospace; + font-size: x-small; + border-collapse: collapse; +} +#bb-elements td:nth-child(1) { + text-align: right; + width: 4em; + border-right: 1px solid #008; + padding: 0.3em 0.5em; + + font-weight: bold; + color: #888; +}; +#bb-elements tr.hover { + background-color: #abc; +} +#bb-elements tr.elt-select { + background-color: #acf; +} +#iterations { + display: flex; +} +#iterations .chooser { + flex-grow: 1; + text-align: center; + padding-left: 0.2em; +} +#iterations .chooser :last-child { + padding-right: 0.2em; +} +#iterations .chooser:not(.iter-select).hover { + background-color: #ddd; +} +#iterations .iter-select { + font-weight: bold; +} +#iterations .chooser:not(.iter-select) { + text-decoration: underline; + color: blue; + cursor: pointer; + background-color: #ccc; +} + +code.filename { + font-weight: bold; + color: black; + background-color: #ccc; + display: block; + text-align: center; +} +code.line { + display: block; + white-space: pre; +} +code.line:before { /* line numbers */ + content: attr(data-line); + display: inline-block; + width: 2em; + text-align: right; + padding-right: 2px; + background-color: #ccc; + border-right: 1px solid #888; + margin-right: 8px; +} +code.line:has(.bb-select):before { + border-right: 4px solid black; + margin-right: 5px; +} +.c.hover, .bb.hover { + filter: saturate(200%) brightness(90%); +} +.c.elt-select { + box-shadow: inset 0 -4px 2px -2px #a00; +} +.bb.bb-select polygon { + stroke-width: 4px; + filter: brightness(70%) saturate(150%); +} +.bb { user-select: none; } +.bb polygon { fill: white; } +#cfg { + position: relative; + margin-left: 0.5em; +} + +.value { + border: 1px solid #888; + font-size: x-small; + flex-grow: 1; +} +.value > summary { + background-color: #ace; + display: flex; + cursor: pointer; +} +.value > summary::before { + content: '\25ba'; /* Black Right-Pointing Pointer */ + margin-right: 0.5em; + font-size: 0.9em; +} +.value[open] > summary::before { + content: '\25bc'; /* Black Down-Pointing Triangle */ +} +.value > summary > .location { + margin-left: auto; +} +.value .address { + font-size: xx-small; + font-family: monospace; + color: #888; +} +.value .property { + display: flex; + margin-top: 0.5em; +} +.value .property .key { + font-weight: bold; + min-width: 5em; +} diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.html b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.html new file mode 100644 index 000000000000..be173e8b2854 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.html @@ -0,0 +1,119 @@ +<!doctype html> +<html> +<!-- HTMLLogger.cpp ---------------------------------------------------- + + Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + See https://llvm.org/LICENSE.txt for license information. + SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +//===------------------------------------------------------------------------> + +<head> +<?INJECT?> + +<template id="value-template"> + <details class="value" open> + <summary> + <span>{{v.kind}} + <template data-if="v.value_id"><span class="address">#{{v.value_id}}</span></template> + </span> + <template data-if="v.location"> + <span class="location">{{v.type}} <span class="address">@{{v.location}}</span></span> + </template> + </summary> + <template + data-for="kv in Object.entries(v)" + data-if="['kind', 'value_id', 'type', 'location'].indexOf(kv[0]) < 0"> + <div class="property"><span class="key">{{kv[0]}}</span> + <template data-if="typeof(kv[1]) != 'object'">{{kv[1]}}</template> + <template data-if="typeof(kv[1]) == 'object'" data-let="v = kv[1]"> + <template data-use="value-template"></template> + </template> + </div> + </template> + </details> +</template> + +</head> + +<body> + +<section id="timeline" data-selection=""> +<header>Timeline</header> +<template data-for="entry in timeline"> + <div id="{{entry.block}}:{{entry.iter}}" data-bb="{{entry.block}}" class="entry"> + <span class="counter"></span> + {{entry.block}} + <template data-if="entry.post_visit">(post-visit)</template> + <template data-if="!entry.post_visit">({{entry.iter}})</template> + <template data-if="entry.converged"> →|<!--Rightwards arrow, vertical line--></template> + </div> +</template> +</section> + +<section id="function" data-selection=""> +<header>Function</header> +<div id="code"></div> +<div id="cfg"></div> +</section> + +<section id="block" data-selection="bb"> +<header><template>Block {{selection.bb}}</template></header> +<div id="iterations"> + <template data-for="iter in cfg[selection.bb].iters"> + <a class="chooser {{selection.bb}}:{{iter.iter}}" data-iter="{{selection.bb}}:{{iter.iter}}"> + <template data-if="iter.post_visit">Post-visit</template> + <template data-if="!iter.post_visit">{{iter.iter}}</template> + <template data-if="iter.converged"> →|<!--Rightwards arrow, vertical line--></template> + </a> + </template> +</div> +<table id="bb-elements"> +<template> + <tr id="{{selection.bb}}.0"> + <td class="{{selection.bb}}">{{selection.bb}}.0</td> + <td>(initial state)</td> + </tr> +</template> +<template data-for="elt in cfg[selection.bb].elements"> + <tr id="{{selection.bb}}.{{elt_index+1}}"> + <td class="{{selection.bb}}">{{selection.bb}}.{{elt_index+1}}</td> + <td>{{elt}}</td> + </tr> +</template> +</table> +</section> + +<section id="element" data-selection="iter,elt"> +<template data-let="state = states[selection.iter + '_' + selection.elt]"> +<header> + <template data-if="state.element == 0">{{state.block}} initial state</template> + <template data-if="state.element != 0">Element {{selection.elt}}</template> + <template data-if="state.post_visit"> (post-visit)</template> + <template data-if="!state.post_visit"> (iteration {{state.iter}})</template> +</header> +<template data-if="state.value" data-let="v = state.value"> + <h2>Value</h2> + <template data-use="value-template"></template> +</template> +<template data-if="state.logs"> + <h2>Logs</h2> + <pre>{{state.logs}}</pre> +</template> +<h2>Built-in lattice</h2> +<pre>{{state.builtinLattice}}</pre> +</template> +</section> + +<script> +addBBColors(Object.keys(HTMLLoggerData.cfg).length); +watchSelection(HTMLLoggerData); +updateSelection({}, HTMLLoggerData); +// Copy code and cfg from <template>s into the body. +for (tmpl of document.querySelectorAll('template[data-copy]')) + document.getElementById(tmpl.dataset.copy).replaceChildren( + ...tmpl.content.cloneNode(/*deep=*/true).childNodes); +</script> + +</body> +</html> diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.js b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.js new file mode 100644 index 000000000000..6e04bc00f663 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.js @@ -0,0 +1,219 @@ +//===-- HTMLLogger.js -----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Based on selected objects, hide/show sections & populate data from templates. +// +// For example, if the selection is {bb="BB4", elt="BB4.6" iter="BB4:2"}: +// - show the "block" and "element" sections +// - re-render templates within these sections (if selection changed) +// - apply "bb-select" to items with class class "BB4", etc +let selection = {}; +function updateSelection(changes, data) { + Object.assign(selection, changes); + + data = Object.create(data); + data.selection = selection; + for (root of document.querySelectorAll('[data-selection]')) + updateSection(root, data); + + for (var k in changes) + applyClassIf(k + '-select', classSelector(changes[k])); +} + +// Given <section data-selection="x,y">: +// - hide section if selections x or y are null +// - re-render templates if x or y have changed +function updateSection(root, data) { + let changed = root.selection == null; + root.selection ||= {}; + for (key of root.dataset.selection.split(',')) { + if (!key) continue; + if (data.selection[key] != root.selection[key]) { + root.selection[key] = data.selection[key]; + changed = true; + } + if (data.selection[key] == null) { + root.hidden = true; + return; + } + } + if (changed) { + root.hidden = false; + for (tmpl of root.getElementsByTagName('template')) + reinflate(tmpl, data); + } +} + +// Expands template `tmpl` based on input `data`: +// - interpolates {{expressions}} in text and attributes +// - <template> tags can modify expansion: if, for etc +// Outputs to `parent` element, inserting before `next`. +function inflate(tmpl, data, parent, next) { + // We use eval() as our expression language in templates! + // The templates are static and trusted. + let evalExpr = (expr, data) => eval('with (data) { ' + expr + ' }'); + let interpolate = (str, data) => + str.replace(/\{\{(.*?)\}\}/g, (_, expr) => evalExpr(expr, data)) + // Anything other than <template> tag: copy, interpolate, recursively inflate. + if (tmpl.nodeName != 'TEMPLATE') { + let clone = tmpl.cloneNode(); + clone.inflated = true; + if (clone instanceof Text) + clone.textContent = interpolate(clone.textContent, data); + if (clone instanceof Element) { + for (attr of clone.attributes) + attr.value = interpolate(attr.value, data); + for (c of tmpl.childNodes) + inflate(c, data, clone, /*next=*/null); + } + return parent.insertBefore(clone, next); + } + // data-use="xyz": use <template id="xyz"> instead. (Allows recursion.) + if ('use' in tmpl.dataset) + return inflate(document.getElementById(tmpl.dataset.use), data, parent, next); + // <template> tag handling. Base case: recursively inflate. + function handle(data) { + for (c of tmpl.content.childNodes) + inflate(c, data, parent, next); + } + // Directives on <template> tags modify behavior. + const directives = { + // data-for="x in expr": expr is enumerable, bind x to each in turn + 'for': (nameInExpr, data, proceed) => { + let [name, expr] = nameInExpr.split(' in '); + let newData = Object.create(data); + let index = 0; + for (val of evalExpr(expr, data) || []) { + newData[name] = val; + newData[name + '_index'] = index++; + proceed(newData); + } + }, + // data-if="expr": only include contents if expression is truthy + 'if': (expr, data, proceed) => { if (evalExpr(expr, data)) proceed(data); }, + // data-let="x = expr": bind x to value of expr + 'let': (nameEqExpr, data, proceed) => { + let [name, expr] = nameEqExpr.split(' = '); + let newData = Object.create(data); + newData[name] = evalExpr(expr, data); + proceed(newData); + }, + } + // Compose directive handlers on top of the base handler. + for (let [dir, value] of Object.entries(tmpl.dataset).reverse()) { + if (dir in directives) { + let proceed = handle; + handle = (data) => directives[dir](value, data, proceed); + } + } + handle(data); +} +// Expand a template, after first removing any prior expansion of it. +function reinflate(tmpl, data) { + // Clear previously rendered template contents. + while (tmpl.nextSibling && tmpl.nextSibling.inflated) + tmpl.parentNode.removeChild(tmpl.nextSibling); + inflate(tmpl, data, tmpl.parentNode, tmpl.nextSibling); +} + +// Handle a mouse event on a region containing selectable items. +// This might end up changing the hover state or the selection state. +// +// targetSelector describes what target HTML element is selectable. +// targetToID specifies how to determine the selection from it: +// hover: a function from target to the class name to highlight +// bb: a function from target to the basic-block name to select (BB4) +// elt: a function from target to the CFG element name to select (BB4.5) +// iter: a function from target to the BB iteration to select (BB4:2) +// If an entry is missing, the selection is unmodified. +// If an entry is null, the selection is always cleared. +function mouseEventHandler(event, targetSelector, targetToID, data) { + var target = event.type == "mouseout" ? null : event.target.closest(targetSelector); + let selTarget = k => (target && targetToID[k]) ? targetToID[k](target) : null; + if (event.type == "click") { + let newSel = {}; + for (var k in targetToID) { + if (k == 'hover') continue; + let t = selTarget(k); + newSel[k] = t; + } + updateSelection(newSel, data); + } else if ("hover" in targetToID) { + applyClassIf("hover", classSelector(selTarget("hover"))); + } +} +function watch(rootSelector, targetSelector, targetToID, data) { + var root = document.querySelector(rootSelector); + for (event of ['mouseout', 'mousemove', 'click']) + root.addEventListener(event, e => mouseEventHandler(e, targetSelector, targetToID, data)); +} +function watchSelection(data) { + let lastIter = (bb) => `${bb}:${data.cfg[bb].iters}`; + watch('#code', '.c', { + hover: e => e.dataset.elt, + bb: e => e.dataset.bb, + elt: e => e.dataset.elt, + // If we're already viewing an iteration of this BB, stick with the same. + iter: e => (selection.iter && selection.bb == e.dataset.bb) ? selection.iter : lastIter(e.dataset.bb), + }, data); + watch('#cfg', '.bb', { + hover: e => e.id, + bb: e => e.id, + elt: e => e.id + ".0", + iter: e => lastIter(e.id), + }, data); + watch('#timeline', '.entry', { + hover: e => [e.id, e.dataset.bb], + bb: e => e.dataset.bb, + elt: e => e.dataset.bb + ".0", + iter: e => e.id, + }, data); + watch('#bb-elements', 'tr', { + hover: e => e.id, + elt: e => e.id, + }, data); + watch('#iterations', '.chooser', { + hover: e => e.dataset.iter, + iter: e => e.dataset.iter, + }, data); + updateSelection({}, data); +} +function applyClassIf(cls, query) { + document.querySelectorAll('.' + cls).forEach(elt => elt.classList.remove(cls)); + document.querySelectorAll(query).forEach(elt => elt.classList.add(cls)); +} +// Turns a class name into a CSS selector matching it, with some wrinkles: +// - we treat id="foo" just like class="foo" to avoid repetition in the HTML +// - cls can be an array of strings, we match them all +function classSelector(cls) { + if (cls == null) return null; + if (Array.isArray(cls)) return cls.map(classSelector).join(', '); + var escaped = cls.replace('.', '\\.').replace(':', '\\:'); + // don't require id="foo" class="foo" + return '.' + escaped + ", #" + escaped; +} + +// Add a stylesheet defining colors for n basic blocks. +function addBBColors(n) { + let sheet = new CSSStyleSheet(); + // hex values to subtract from fff to get a base color + options = [0x001, 0x010, 0x011, 0x100, 0x101, 0x110, 0x111]; + function color(hex) { + return "#" + hex.toString(16).padStart(3, "0"); + } + function add(selector, property, hex) { + sheet.insertRule(`${selector} { ${property}: ${color(hex)}; }`) + } + for (var i = 0; i < n; ++i) { + let opt = options[i%options.length]; + add(`.B${i}`, 'background-color', 0xfff - 2*opt); + add(`#B${i} polygon`, 'fill', 0xfff - 2*opt); + add(`#B${i} polygon`, 'stroke', 0x888 - 4*opt); + } + document.adoptedStyleSheets.push(sheet); +} diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Logger.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Logger.cpp new file mode 100644 index 000000000000..8f40768171c9 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Logger.cpp @@ -0,0 +1,111 @@ +//===-- Logger.cpp --------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/Logger.h" +#include "clang/Analysis/FlowSensitive/AdornedCFG.h" +#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" +#include "llvm/Support/WithColor.h" + +namespace clang::dataflow { + +Logger &Logger::null() { + struct NullLogger final : Logger {}; + static auto *Instance = new NullLogger(); + return *Instance; +} + +namespace { +struct TextualLogger final : Logger { + llvm::raw_ostream &OS; + const CFG *CurrentCFG; + const CFGBlock *CurrentBlock; + const CFGElement *CurrentElement; + unsigned CurrentElementIndex; + bool ShowColors; + llvm::DenseMap<const CFGBlock *, unsigned> VisitCount; + TypeErasedDataflowAnalysis *CurrentAnalysis; + + TextualLogger(llvm::raw_ostream &OS) + : OS(OS), ShowColors(llvm::WithColor::defaultAutoDetectFunction()(OS)) {} + + virtual void beginAnalysis(const AdornedCFG &ACFG, + TypeErasedDataflowAnalysis &Analysis) override { + { + llvm::WithColor Header(OS, llvm::raw_ostream::Colors::RED, /*Bold=*/true); + OS << "=== Beginning data flow analysis ===\n"; + } + auto &D = ACFG.getDecl(); + D.print(OS); + OS << "\n"; + D.dump(OS); + CurrentCFG = &ACFG.getCFG(); + CurrentCFG->print(OS, Analysis.getASTContext().getLangOpts(), ShowColors); + CurrentAnalysis = &Analysis; + } + virtual void endAnalysis() override { + llvm::WithColor Header(OS, llvm::raw_ostream::Colors::RED, /*Bold=*/true); + unsigned Blocks = 0, Steps = 0; + for (const auto &E : VisitCount) { + ++Blocks; + Steps += E.second; + } + llvm::errs() << "=== Finished analysis: " << Blocks << " blocks in " + << Steps << " total steps ===\n"; + } + virtual void enterBlock(const CFGBlock &Block, bool PostVisit) override { + unsigned Count = ++VisitCount[&Block]; + { + llvm::WithColor Header(OS, llvm::raw_ostream::Colors::RED, /*Bold=*/true); + OS << "=== Entering block B" << Block.getBlockID(); + if (PostVisit) + OS << " (post-visit)"; + else + OS << " (iteration " << Count << ")"; + OS << " ===\n"; + } + Block.print(OS, CurrentCFG, CurrentAnalysis->getASTContext().getLangOpts(), + ShowColors); + CurrentBlock = &Block; + CurrentElement = nullptr; + CurrentElementIndex = 0; + } + virtual void enterElement(const CFGElement &Element) override { + ++CurrentElementIndex; + CurrentElement = ∈ + { + llvm::WithColor Subheader(OS, llvm::raw_ostream::Colors::CYAN, + /*Bold=*/true); + OS << "Processing element B" << CurrentBlock->getBlockID() << "." + << CurrentElementIndex << ": "; + Element.dumpToStream(OS); + } + } + void recordState(TypeErasedDataflowAnalysisState &State) override { + { + llvm::WithColor Subheader(OS, llvm::raw_ostream::Colors::CYAN, + /*Bold=*/true); + OS << "Computed state for B" << CurrentBlock->getBlockID() << "." + << CurrentElementIndex << ":\n"; + } + // FIXME: currently the environment dump is verbose and unenlightening. + // FIXME: dump the user-defined lattice, too. + State.Env.dump(OS); + OS << "\n"; + } + void blockConverged() override { + OS << "B" << CurrentBlock->getBlockID() << " has converged!\n"; + } + virtual void logText(llvm::StringRef S) override { OS << S << "\n"; } +}; +} // namespace + +std::unique_ptr<Logger> Logger::textual(llvm::raw_ostream &OS) { + return std::make_unique<TextualLogger>(OS); +} + +} // namespace clang::dataflow diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Models/ChromiumCheckModel.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Models/ChromiumCheckModel.cpp new file mode 100644 index 000000000000..5ac71e1d6bf6 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Models/ChromiumCheckModel.cpp @@ -0,0 +1,71 @@ +//===-- ChromiumCheckModel.cpp ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/Models/ChromiumCheckModel.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" +#include "llvm/ADT/DenseSet.h" + +namespace clang { +namespace dataflow { + +/// Determines whether `D` is one of the methods used to implement Chromium's +/// `CHECK` macros. Populates `CheckDecls`, if empty. +bool isCheckLikeMethod(llvm::SmallDenseSet<const CXXMethodDecl *> &CheckDecls, + const CXXMethodDecl &D) { + // All of the methods of interest are static, so avoid any lookup for + // non-static methods (the common case). + if (!D.isStatic()) + return false; + + if (CheckDecls.empty()) { + // Attempt to initialize `CheckDecls` with the methods in class + // `CheckError`. + const CXXRecordDecl *ParentClass = D.getParent(); + if (ParentClass == nullptr || !ParentClass->getDeclName().isIdentifier() || + ParentClass->getName() != "CheckError") + return false; + + // Check whether namespace is "logging". + const auto *N = + dyn_cast_or_null<NamespaceDecl>(ParentClass->getDeclContext()); + if (N == nullptr || !N->getDeclName().isIdentifier() || + N->getName() != "logging") + return false; + + // Check whether "logging" is a top-level namespace. + if (N->getParent() == nullptr || !N->getParent()->isTranslationUnit()) + return false; + + for (const CXXMethodDecl *M : ParentClass->methods()) + if (M->getDeclName().isIdentifier() && M->getName().ends_with("Check")) + CheckDecls.insert(M); + } + + return CheckDecls.contains(&D); +} + +bool ChromiumCheckModel::transfer(const CFGElement &Element, Environment &Env) { + auto CS = Element.getAs<CFGStmt>(); + if (!CS) + return false; + auto Stmt = CS->getStmt(); + if (const auto *Call = dyn_cast<CallExpr>(Stmt)) { + if (const auto *M = dyn_cast<CXXMethodDecl>(Call->getDirectCallee())) { + if (isCheckLikeMethod(CheckDecls, *M)) { + // Mark this branch as unreachable. + Env.assume(Env.arena().makeLiteral(false)); + return true; + } + } + } + return false; +} + +} // namespace dataflow +} // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp new file mode 100644 index 000000000000..0707aa662e4c --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp @@ -0,0 +1,944 @@ +//===-- UncheckedOptionalAccessModel.cpp ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a dataflow analysis that detects unsafe uses of optional +// values. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/Stmt.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/ASTMatchers/ASTMatchersMacros.h" +#include "clang/Analysis/CFG.h" +#include "clang/Analysis/FlowSensitive/CFGMatchSwitch.h" +#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" +#include "clang/Analysis/FlowSensitive/Formula.h" +#include "clang/Analysis/FlowSensitive/NoopLattice.h" +#include "clang/Analysis/FlowSensitive/StorageLocation.h" +#include "clang/Analysis/FlowSensitive/Value.h" +#include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include <cassert> +#include <memory> +#include <optional> +#include <utility> + +namespace clang { +namespace dataflow { + +static bool isTopLevelNamespaceWithName(const NamespaceDecl &NS, + llvm::StringRef Name) { + return NS.getDeclName().isIdentifier() && NS.getName() == Name && + NS.getParent() != nullptr && NS.getParent()->isTranslationUnit(); +} + +static bool hasOptionalClassName(const CXXRecordDecl &RD) { + if (!RD.getDeclName().isIdentifier()) + return false; + + if (RD.getName() == "optional") { + if (const auto *N = dyn_cast_or_null<NamespaceDecl>(RD.getDeclContext())) + return N->isStdNamespace() || isTopLevelNamespaceWithName(*N, "absl"); + return false; + } + + if (RD.getName() == "Optional") { + // Check whether namespace is "::base" or "::folly". + const auto *N = dyn_cast_or_null<NamespaceDecl>(RD.getDeclContext()); + return N != nullptr && (isTopLevelNamespaceWithName(*N, "base") || + isTopLevelNamespaceWithName(*N, "folly")); + } + + return false; +} + +static const CXXRecordDecl *getOptionalBaseClass(const CXXRecordDecl *RD) { + if (RD == nullptr) + return nullptr; + if (hasOptionalClassName(*RD)) + return RD; + + if (!RD->hasDefinition()) + return nullptr; + + for (const CXXBaseSpecifier &Base : RD->bases()) + if (const CXXRecordDecl *BaseClass = + getOptionalBaseClass(Base.getType()->getAsCXXRecordDecl())) + return BaseClass; + + return nullptr; +} + +namespace { + +using namespace ::clang::ast_matchers; +using LatticeTransferState = TransferState<NoopLattice>; + +AST_MATCHER(CXXRecordDecl, optionalClass) { return hasOptionalClassName(Node); } + +AST_MATCHER(CXXRecordDecl, optionalOrDerivedClass) { + return getOptionalBaseClass(&Node) != nullptr; +} + +auto desugarsToOptionalType() { + return hasUnqualifiedDesugaredType( + recordType(hasDeclaration(cxxRecordDecl(optionalClass())))); +} + +auto desugarsToOptionalOrDerivedType() { + return hasUnqualifiedDesugaredType( + recordType(hasDeclaration(cxxRecordDecl(optionalOrDerivedClass())))); +} + +auto hasOptionalType() { return hasType(desugarsToOptionalType()); } + +/// Matches any of the spellings of the optional types and sugar, aliases, +/// derived classes, etc. +auto hasOptionalOrDerivedType() { + return hasType(desugarsToOptionalOrDerivedType()); +} + +QualType getPublicType(const Expr *E) { + auto *Cast = dyn_cast<ImplicitCastExpr>(E->IgnoreParens()); + if (Cast == nullptr || Cast->getCastKind() != CK_UncheckedDerivedToBase) { + QualType Ty = E->getType(); + if (Ty->isPointerType()) + return Ty->getPointeeType(); + return Ty; + } + + // Is the derived type that we're casting from the type of `*this`? In this + // special case, we can upcast to the base class even if the base is + // non-public. + bool CastingFromThis = isa<CXXThisExpr>(Cast->getSubExpr()); + + // Find the least-derived type in the path (i.e. the last entry in the list) + // that we can access. + const CXXBaseSpecifier *PublicBase = nullptr; + for (const CXXBaseSpecifier *Base : Cast->path()) { + if (Base->getAccessSpecifier() != AS_public && !CastingFromThis) + break; + PublicBase = Base; + CastingFromThis = false; + } + + if (PublicBase != nullptr) + return PublicBase->getType(); + + // We didn't find any public type that we could cast to. There may be more + // casts in `getSubExpr()`, so recurse. (If there aren't any more casts, this + // will return the type of `getSubExpr()`.) + return getPublicType(Cast->getSubExpr()); +} + +// Returns the least-derived type for the receiver of `MCE` that +// `MCE.getImplicitObjectArgument()->IgnoreParentImpCasts()` can be downcast to. +// Effectively, we upcast until we reach a non-public base class, unless that +// base is a base of `*this`. +// +// This is needed to correctly match methods called on types derived from +// `std::optional`. +// +// Say we have a `struct Derived : public std::optional<int> {} d;` For a call +// `d.has_value()`, the `getImplicitObjectArgument()` looks like this: +// +// ImplicitCastExpr 'const std::__optional_storage_base<int>' lvalue +// | <UncheckedDerivedToBase (optional -> __optional_storage_base)> +// `-DeclRefExpr 'Derived' lvalue Var 'd' 'Derived' +// +// The type of the implicit object argument is `__optional_storage_base` +// (since this is the internal type that `has_value()` is declared on). If we +// call `IgnoreParenImpCasts()` on the implicit object argument, we get the +// `DeclRefExpr`, which has type `Derived`. Neither of these types is +// `optional`, and hence neither is sufficient for querying whether we are +// calling a method on `optional`. +// +// Instead, starting with the most derived type, we need to follow the chain of +// casts +QualType getPublicReceiverType(const CXXMemberCallExpr &MCE) { + return getPublicType(MCE.getImplicitObjectArgument()); +} + +AST_MATCHER_P(CXXMemberCallExpr, publicReceiverType, + ast_matchers::internal::Matcher<QualType>, InnerMatcher) { + return InnerMatcher.matches(getPublicReceiverType(Node), Finder, Builder); +} + +auto isOptionalMemberCallWithNameMatcher( + ast_matchers::internal::Matcher<NamedDecl> matcher, + const std::optional<StatementMatcher> &Ignorable = std::nullopt) { + return cxxMemberCallExpr(Ignorable ? on(expr(unless(*Ignorable))) + : anything(), + publicReceiverType(desugarsToOptionalType()), + callee(cxxMethodDecl(matcher))); +} + +auto isOptionalOperatorCallWithName( + llvm::StringRef operator_name, + const std::optional<StatementMatcher> &Ignorable = std::nullopt) { + return cxxOperatorCallExpr( + hasOverloadedOperatorName(operator_name), + callee(cxxMethodDecl(ofClass(optionalClass()))), + Ignorable ? callExpr(unless(hasArgument(0, *Ignorable))) : callExpr()); +} + +auto isMakeOptionalCall() { + return callExpr(callee(functionDecl(hasAnyName( + "std::make_optional", "base::make_optional", + "absl::make_optional", "folly::make_optional"))), + hasOptionalType()); +} + +auto nulloptTypeDecl() { + return namedDecl(hasAnyName("std::nullopt_t", "absl::nullopt_t", + "base::nullopt_t", "folly::None")); +} + +auto hasNulloptType() { return hasType(nulloptTypeDecl()); } + +auto inPlaceClass() { + return recordDecl(hasAnyName("std::in_place_t", "absl::in_place_t", + "base::in_place_t", "folly::in_place_t")); +} + +auto isOptionalNulloptConstructor() { + return cxxConstructExpr( + hasDeclaration(cxxConstructorDecl(parameterCountIs(1), + hasParameter(0, hasNulloptType()))), + hasOptionalOrDerivedType()); +} + +auto isOptionalInPlaceConstructor() { + return cxxConstructExpr(hasArgument(0, hasType(inPlaceClass())), + hasOptionalOrDerivedType()); +} + +auto isOptionalValueOrConversionConstructor() { + return cxxConstructExpr( + unless(hasDeclaration( + cxxConstructorDecl(anyOf(isCopyConstructor(), isMoveConstructor())))), + argumentCountIs(1), hasArgument(0, unless(hasNulloptType())), + hasOptionalOrDerivedType()); +} + +auto isOptionalValueOrConversionAssignment() { + return cxxOperatorCallExpr( + hasOverloadedOperatorName("="), + callee(cxxMethodDecl(ofClass(optionalOrDerivedClass()))), + unless(hasDeclaration(cxxMethodDecl( + anyOf(isCopyAssignmentOperator(), isMoveAssignmentOperator())))), + argumentCountIs(2), hasArgument(1, unless(hasNulloptType()))); +} + +auto isOptionalNulloptAssignment() { + return cxxOperatorCallExpr( + hasOverloadedOperatorName("="), + callee(cxxMethodDecl(ofClass(optionalOrDerivedClass()))), + argumentCountIs(2), hasArgument(1, hasNulloptType())); +} + +auto isStdSwapCall() { + return callExpr(callee(functionDecl(hasName("std::swap"))), + argumentCountIs(2), + hasArgument(0, hasOptionalOrDerivedType()), + hasArgument(1, hasOptionalOrDerivedType())); +} + +auto isStdForwardCall() { + return callExpr(callee(functionDecl(hasName("std::forward"))), + argumentCountIs(1), + hasArgument(0, hasOptionalOrDerivedType())); +} + +constexpr llvm::StringLiteral ValueOrCallID = "ValueOrCall"; + +auto isValueOrStringEmptyCall() { + // `opt.value_or("").empty()` + return cxxMemberCallExpr( + callee(cxxMethodDecl(hasName("empty"))), + onImplicitObjectArgument(ignoringImplicit( + cxxMemberCallExpr(on(expr(unless(cxxThisExpr()))), + callee(cxxMethodDecl(hasName("value_or"), + ofClass(optionalClass()))), + hasArgument(0, stringLiteral(hasSize(0)))) + .bind(ValueOrCallID)))); +} + +auto isValueOrNotEqX() { + auto ComparesToSame = [](ast_matchers::internal::Matcher<Stmt> Arg) { + return hasOperands( + ignoringImplicit( + cxxMemberCallExpr(on(expr(unless(cxxThisExpr()))), + callee(cxxMethodDecl(hasName("value_or"), + ofClass(optionalClass()))), + hasArgument(0, Arg)) + .bind(ValueOrCallID)), + ignoringImplicit(Arg)); + }; + + // `opt.value_or(X) != X`, for X is `nullptr`, `""`, or `0`. Ideally, we'd + // support this pattern for any expression, but the AST does not have a + // generic expression comparison facility, so we specialize to common cases + // seen in practice. FIXME: define a matcher that compares values across + // nodes, which would let us generalize this to any `X`. + return binaryOperation(hasOperatorName("!="), + anyOf(ComparesToSame(cxxNullPtrLiteralExpr()), + ComparesToSame(stringLiteral(hasSize(0))), + ComparesToSame(integerLiteral(equals(0))))); +} + +auto isCallReturningOptional() { + return callExpr(hasType(qualType( + anyOf(desugarsToOptionalOrDerivedType(), + referenceType(pointee(desugarsToOptionalOrDerivedType())))))); +} + +template <typename L, typename R> +auto isComparisonOperatorCall(L lhs_arg_matcher, R rhs_arg_matcher) { + return cxxOperatorCallExpr( + anyOf(hasOverloadedOperatorName("=="), hasOverloadedOperatorName("!=")), + argumentCountIs(2), hasArgument(0, lhs_arg_matcher), + hasArgument(1, rhs_arg_matcher)); +} + +/// Ensures that `Expr` is mapped to a `BoolValue` and returns its formula. +const Formula &forceBoolValue(Environment &Env, const Expr &Expr) { + auto *Value = Env.get<BoolValue>(Expr); + if (Value != nullptr) + return Value->formula(); + + Value = &Env.makeAtomicBoolValue(); + Env.setValue(Expr, *Value); + return Value->formula(); +} + +StorageLocation &locForHasValue(const RecordStorageLocation &OptionalLoc) { + return OptionalLoc.getSyntheticField("has_value"); +} + +StorageLocation &locForValue(const RecordStorageLocation &OptionalLoc) { + return OptionalLoc.getSyntheticField("value"); +} + +/// Sets `HasValueVal` as the symbolic value that represents the "has_value" +/// property of the optional at `OptionalLoc`. +void setHasValue(RecordStorageLocation &OptionalLoc, BoolValue &HasValueVal, + Environment &Env) { + Env.setValue(locForHasValue(OptionalLoc), HasValueVal); +} + +/// Returns the symbolic value that represents the "has_value" property of the +/// optional at `OptionalLoc`. Returns null if `OptionalLoc` is null. +BoolValue *getHasValue(Environment &Env, RecordStorageLocation *OptionalLoc) { + if (OptionalLoc == nullptr) + return nullptr; + StorageLocation &HasValueLoc = locForHasValue(*OptionalLoc); + auto *HasValueVal = Env.get<BoolValue>(HasValueLoc); + if (HasValueVal == nullptr) { + HasValueVal = &Env.makeAtomicBoolValue(); + Env.setValue(HasValueLoc, *HasValueVal); + } + return HasValueVal; +} + +QualType valueTypeFromOptionalDecl(const CXXRecordDecl &RD) { + auto &CTSD = cast<ClassTemplateSpecializationDecl>(RD); + return CTSD.getTemplateArgs()[0].getAsType(); +} + +/// Returns the number of optional wrappers in `Type`. +/// +/// For example, if `Type` is `optional<optional<int>>`, the result of this +/// function will be 2. +int countOptionalWrappers(const ASTContext &ASTCtx, QualType Type) { + const CXXRecordDecl *Optional = + getOptionalBaseClass(Type->getAsCXXRecordDecl()); + if (Optional == nullptr) + return 0; + return 1 + countOptionalWrappers( + ASTCtx, + valueTypeFromOptionalDecl(*Optional).getDesugaredType(ASTCtx)); +} + +StorageLocation *getLocBehindPossiblePointer(const Expr &E, + const Environment &Env) { + if (E.isPRValue()) { + if (auto *PointerVal = dyn_cast_or_null<PointerValue>(Env.getValue(E))) + return &PointerVal->getPointeeLoc(); + return nullptr; + } + return Env.getStorageLocation(E); +} + +void transferUnwrapCall(const Expr *UnwrapExpr, const Expr *ObjectExpr, + LatticeTransferState &State) { + if (auto *OptionalLoc = cast_or_null<RecordStorageLocation>( + getLocBehindPossiblePointer(*ObjectExpr, State.Env))) { + if (State.Env.getStorageLocation(*UnwrapExpr) == nullptr) + State.Env.setStorageLocation(*UnwrapExpr, locForValue(*OptionalLoc)); + } +} + +void transferArrowOpCall(const Expr *UnwrapExpr, const Expr *ObjectExpr, + LatticeTransferState &State) { + if (auto *OptionalLoc = cast_or_null<RecordStorageLocation>( + getLocBehindPossiblePointer(*ObjectExpr, State.Env))) + State.Env.setValue( + *UnwrapExpr, State.Env.create<PointerValue>(locForValue(*OptionalLoc))); +} + +void transferMakeOptionalCall(const CallExpr *E, + const MatchFinder::MatchResult &, + LatticeTransferState &State) { + setHasValue(State.Env.getResultObjectLocation(*E), + State.Env.getBoolLiteralValue(true), State.Env); +} + +void transferOptionalHasValueCall(const CXXMemberCallExpr *CallExpr, + const MatchFinder::MatchResult &, + LatticeTransferState &State) { + if (auto *HasValueVal = getHasValue( + State.Env, getImplicitObjectLocation(*CallExpr, State.Env))) { + State.Env.setValue(*CallExpr, *HasValueVal); + } +} + +/// `ModelPred` builds a logical formula relating the predicate in +/// `ValueOrPredExpr` to the optional's `has_value` property. +void transferValueOrImpl( + const clang::Expr *ValueOrPredExpr, const MatchFinder::MatchResult &Result, + LatticeTransferState &State, + const Formula &(*ModelPred)(Environment &Env, const Formula &ExprVal, + const Formula &HasValueVal)) { + auto &Env = State.Env; + + const auto *MCE = + Result.Nodes.getNodeAs<clang::CXXMemberCallExpr>(ValueOrCallID); + + auto *HasValueVal = + getHasValue(State.Env, getImplicitObjectLocation(*MCE, State.Env)); + if (HasValueVal == nullptr) + return; + + Env.assume(ModelPred(Env, forceBoolValue(Env, *ValueOrPredExpr), + HasValueVal->formula())); +} + +void transferValueOrStringEmptyCall(const clang::Expr *ComparisonExpr, + const MatchFinder::MatchResult &Result, + LatticeTransferState &State) { + return transferValueOrImpl(ComparisonExpr, Result, State, + [](Environment &Env, const Formula &ExprVal, + const Formula &HasValueVal) -> const Formula & { + auto &A = Env.arena(); + // If the result is *not* empty, then we know the + // optional must have been holding a value. If + // `ExprVal` is true, though, we don't learn + // anything definite about `has_value`, so we + // don't add any corresponding implications to + // the flow condition. + return A.makeImplies(A.makeNot(ExprVal), + HasValueVal); + }); +} + +void transferValueOrNotEqX(const Expr *ComparisonExpr, + const MatchFinder::MatchResult &Result, + LatticeTransferState &State) { + transferValueOrImpl(ComparisonExpr, Result, State, + [](Environment &Env, const Formula &ExprVal, + const Formula &HasValueVal) -> const Formula & { + auto &A = Env.arena(); + // We know that if `(opt.value_or(X) != X)` then + // `opt.hasValue()`, even without knowing further + // details about the contents of `opt`. + return A.makeImplies(ExprVal, HasValueVal); + }); +} + +void transferCallReturningOptional(const CallExpr *E, + const MatchFinder::MatchResult &Result, + LatticeTransferState &State) { + RecordStorageLocation *Loc = nullptr; + if (E->isPRValue()) { + Loc = &State.Env.getResultObjectLocation(*E); + } else { + Loc = State.Env.get<RecordStorageLocation>(*E); + if (Loc == nullptr) { + Loc = &cast<RecordStorageLocation>(State.Env.createStorageLocation(*E)); + State.Env.setStorageLocation(*E, *Loc); + } + } + + if (State.Env.getValue(locForHasValue(*Loc)) != nullptr) + return; + + setHasValue(*Loc, State.Env.makeAtomicBoolValue(), State.Env); +} + +void constructOptionalValue(const Expr &E, Environment &Env, + BoolValue &HasValueVal) { + RecordStorageLocation &Loc = Env.getResultObjectLocation(E); + setHasValue(Loc, HasValueVal, Env); +} + +/// Returns a symbolic value for the "has_value" property of an `optional<T>` +/// value that is constructed/assigned from a value of type `U` or `optional<U>` +/// where `T` is constructible from `U`. +BoolValue &valueOrConversionHasValue(QualType DestType, const Expr &E, + const MatchFinder::MatchResult &MatchRes, + LatticeTransferState &State) { + const int DestTypeOptionalWrappersCount = + countOptionalWrappers(*MatchRes.Context, DestType); + const int ArgTypeOptionalWrappersCount = countOptionalWrappers( + *MatchRes.Context, E.getType().getNonReferenceType()); + + // Is this an constructor of the form `template<class U> optional(U &&)` / + // assignment of the form `template<class U> optional& operator=(U &&)` + // (where `T` is assignable / constructible from `U`)? + // We recognize this because the number of optionals in the optional being + // assigned to is different from the function argument type. + if (DestTypeOptionalWrappersCount != ArgTypeOptionalWrappersCount) + return State.Env.getBoolLiteralValue(true); + + // Otherwise, this must be a constructor of the form + // `template <class U> optional<optional<U> &&)` / assignment of the form + // `template <class U> optional& operator=(optional<U> &&) + // (where, again, `T` is assignable / constructible from `U`). + auto *Loc = State.Env.get<RecordStorageLocation>(E); + if (auto *HasValueVal = getHasValue(State.Env, Loc)) + return *HasValueVal; + return State.Env.makeAtomicBoolValue(); +} + +void transferValueOrConversionConstructor( + const CXXConstructExpr *E, const MatchFinder::MatchResult &MatchRes, + LatticeTransferState &State) { + assert(E->getNumArgs() > 0); + + constructOptionalValue( + *E, State.Env, + valueOrConversionHasValue( + E->getConstructor()->getThisType()->getPointeeType(), *E->getArg(0), + MatchRes, State)); +} + +void transferAssignment(const CXXOperatorCallExpr *E, BoolValue &HasValueVal, + LatticeTransferState &State) { + assert(E->getNumArgs() > 0); + + if (auto *Loc = State.Env.get<RecordStorageLocation>(*E->getArg(0))) { + setHasValue(*Loc, HasValueVal, State.Env); + + // Assign a storage location for the whole expression. + State.Env.setStorageLocation(*E, *Loc); + } +} + +void transferValueOrConversionAssignment( + const CXXOperatorCallExpr *E, const MatchFinder::MatchResult &MatchRes, + LatticeTransferState &State) { + assert(E->getNumArgs() > 1); + transferAssignment( + E, + valueOrConversionHasValue(E->getArg(0)->getType().getNonReferenceType(), + *E->getArg(1), MatchRes, State), + State); +} + +void transferNulloptAssignment(const CXXOperatorCallExpr *E, + const MatchFinder::MatchResult &, + LatticeTransferState &State) { + transferAssignment(E, State.Env.getBoolLiteralValue(false), State); +} + +void transferSwap(RecordStorageLocation *Loc1, RecordStorageLocation *Loc2, + Environment &Env) { + // We account for cases where one or both of the optionals are not modeled, + // either lacking associated storage locations, or lacking values associated + // to such storage locations. + + if (Loc1 == nullptr) { + if (Loc2 != nullptr) + setHasValue(*Loc2, Env.makeAtomicBoolValue(), Env); + return; + } + if (Loc2 == nullptr) { + setHasValue(*Loc1, Env.makeAtomicBoolValue(), Env); + return; + } + + // Both expressions have locations, though they may not have corresponding + // values. In that case, we create a fresh value at this point. Note that if + // two branches both do this, they will not share the value, but it at least + // allows for local reasoning about the value. To avoid the above, we would + // need *lazy* value allocation. + // FIXME: allocate values lazily, instead of just creating a fresh value. + BoolValue *BoolVal1 = getHasValue(Env, Loc1); + if (BoolVal1 == nullptr) + BoolVal1 = &Env.makeAtomicBoolValue(); + + BoolValue *BoolVal2 = getHasValue(Env, Loc2); + if (BoolVal2 == nullptr) + BoolVal2 = &Env.makeAtomicBoolValue(); + + setHasValue(*Loc1, *BoolVal2, Env); + setHasValue(*Loc2, *BoolVal1, Env); +} + +void transferSwapCall(const CXXMemberCallExpr *E, + const MatchFinder::MatchResult &, + LatticeTransferState &State) { + assert(E->getNumArgs() == 1); + auto *OtherLoc = State.Env.get<RecordStorageLocation>(*E->getArg(0)); + transferSwap(getImplicitObjectLocation(*E, State.Env), OtherLoc, State.Env); +} + +void transferStdSwapCall(const CallExpr *E, const MatchFinder::MatchResult &, + LatticeTransferState &State) { + assert(E->getNumArgs() == 2); + auto *Arg0Loc = State.Env.get<RecordStorageLocation>(*E->getArg(0)); + auto *Arg1Loc = State.Env.get<RecordStorageLocation>(*E->getArg(1)); + transferSwap(Arg0Loc, Arg1Loc, State.Env); +} + +void transferStdForwardCall(const CallExpr *E, const MatchFinder::MatchResult &, + LatticeTransferState &State) { + assert(E->getNumArgs() == 1); + + if (auto *Loc = State.Env.getStorageLocation(*E->getArg(0))) + State.Env.setStorageLocation(*E, *Loc); +} + +const Formula &evaluateEquality(Arena &A, const Formula &EqVal, + const Formula &LHS, const Formula &RHS) { + // Logically, an optional<T> object is composed of two values - a `has_value` + // bit and a value of type T. Equality of optional objects compares both + // values. Therefore, merely comparing the `has_value` bits isn't sufficient: + // when two optional objects are engaged, the equality of their respective + // values of type T matters. Since we only track the `has_value` bits, we + // can't make any conclusions about equality when we know that two optional + // objects are engaged. + // + // We express this as two facts about the equality: + // a) EqVal => (LHS & RHS) v (!RHS & !LHS) + // If they are equal, then either both are set or both are unset. + // b) (!LHS & !RHS) => EqVal + // If neither is set, then they are equal. + // We rewrite b) as !EqVal => (LHS v RHS), for a more compact formula. + return A.makeAnd( + A.makeImplies(EqVal, A.makeOr(A.makeAnd(LHS, RHS), + A.makeAnd(A.makeNot(LHS), A.makeNot(RHS)))), + A.makeImplies(A.makeNot(EqVal), A.makeOr(LHS, RHS))); +} + +void transferOptionalAndOptionalCmp(const clang::CXXOperatorCallExpr *CmpExpr, + const MatchFinder::MatchResult &, + LatticeTransferState &State) { + Environment &Env = State.Env; + auto &A = Env.arena(); + auto *CmpValue = &forceBoolValue(Env, *CmpExpr); + auto *Arg0Loc = Env.get<RecordStorageLocation>(*CmpExpr->getArg(0)); + if (auto *LHasVal = getHasValue(Env, Arg0Loc)) { + auto *Arg1Loc = Env.get<RecordStorageLocation>(*CmpExpr->getArg(1)); + if (auto *RHasVal = getHasValue(Env, Arg1Loc)) { + if (CmpExpr->getOperator() == clang::OO_ExclaimEqual) + CmpValue = &A.makeNot(*CmpValue); + Env.assume(evaluateEquality(A, *CmpValue, LHasVal->formula(), + RHasVal->formula())); + } + } +} + +void transferOptionalAndValueCmp(const clang::CXXOperatorCallExpr *CmpExpr, + const clang::Expr *E, Environment &Env) { + auto &A = Env.arena(); + auto *CmpValue = &forceBoolValue(Env, *CmpExpr); + auto *Loc = Env.get<RecordStorageLocation>(*E); + if (auto *HasVal = getHasValue(Env, Loc)) { + if (CmpExpr->getOperator() == clang::OO_ExclaimEqual) + CmpValue = &A.makeNot(*CmpValue); + Env.assume( + evaluateEquality(A, *CmpValue, HasVal->formula(), A.makeLiteral(true))); + } +} + +void transferOptionalAndNulloptCmp(const clang::CXXOperatorCallExpr *CmpExpr, + const clang::Expr *E, Environment &Env) { + auto &A = Env.arena(); + auto *CmpValue = &forceBoolValue(Env, *CmpExpr); + auto *Loc = Env.get<RecordStorageLocation>(*E); + if (auto *HasVal = getHasValue(Env, Loc)) { + if (CmpExpr->getOperator() == clang::OO_ExclaimEqual) + CmpValue = &A.makeNot(*CmpValue); + Env.assume(evaluateEquality(A, *CmpValue, HasVal->formula(), + A.makeLiteral(false))); + } +} + +std::optional<StatementMatcher> +ignorableOptional(const UncheckedOptionalAccessModelOptions &Options) { + if (Options.IgnoreSmartPointerDereference) { + auto SmartPtrUse = expr(ignoringParenImpCasts(cxxOperatorCallExpr( + anyOf(hasOverloadedOperatorName("->"), hasOverloadedOperatorName("*")), + unless(hasArgument(0, expr(hasOptionalType())))))); + return expr( + anyOf(SmartPtrUse, memberExpr(hasObjectExpression(SmartPtrUse)))); + } + return std::nullopt; +} + +StatementMatcher +valueCall(const std::optional<StatementMatcher> &IgnorableOptional) { + return isOptionalMemberCallWithNameMatcher(hasName("value"), + IgnorableOptional); +} + +StatementMatcher +valueOperatorCall(const std::optional<StatementMatcher> &IgnorableOptional) { + return expr(anyOf(isOptionalOperatorCallWithName("*", IgnorableOptional), + isOptionalOperatorCallWithName("->", IgnorableOptional))); +} + +auto buildTransferMatchSwitch() { + // FIXME: Evaluate the efficiency of matchers. If using matchers results in a + // lot of duplicated work (e.g. string comparisons), consider providing APIs + // that avoid it through memoization. + return CFGMatchSwitchBuilder<LatticeTransferState>() + // make_optional + .CaseOfCFGStmt<CallExpr>(isMakeOptionalCall(), transferMakeOptionalCall) + + // optional::optional (in place) + .CaseOfCFGStmt<CXXConstructExpr>( + isOptionalInPlaceConstructor(), + [](const CXXConstructExpr *E, const MatchFinder::MatchResult &, + LatticeTransferState &State) { + constructOptionalValue(*E, State.Env, + State.Env.getBoolLiteralValue(true)); + }) + // optional::optional(nullopt_t) + .CaseOfCFGStmt<CXXConstructExpr>( + isOptionalNulloptConstructor(), + [](const CXXConstructExpr *E, const MatchFinder::MatchResult &, + LatticeTransferState &State) { + constructOptionalValue(*E, State.Env, + State.Env.getBoolLiteralValue(false)); + }) + // optional::optional (value/conversion) + .CaseOfCFGStmt<CXXConstructExpr>(isOptionalValueOrConversionConstructor(), + transferValueOrConversionConstructor) + + // optional::operator= + .CaseOfCFGStmt<CXXOperatorCallExpr>( + isOptionalValueOrConversionAssignment(), + transferValueOrConversionAssignment) + .CaseOfCFGStmt<CXXOperatorCallExpr>(isOptionalNulloptAssignment(), + transferNulloptAssignment) + + // optional::value + .CaseOfCFGStmt<CXXMemberCallExpr>( + valueCall(std::nullopt), + [](const CXXMemberCallExpr *E, const MatchFinder::MatchResult &, + LatticeTransferState &State) { + transferUnwrapCall(E, E->getImplicitObjectArgument(), State); + }) + + // optional::operator* + .CaseOfCFGStmt<CallExpr>(isOptionalOperatorCallWithName("*"), + [](const CallExpr *E, + const MatchFinder::MatchResult &, + LatticeTransferState &State) { + transferUnwrapCall(E, E->getArg(0), State); + }) + + // optional::operator-> + .CaseOfCFGStmt<CallExpr>(isOptionalOperatorCallWithName("->"), + [](const CallExpr *E, + const MatchFinder::MatchResult &, + LatticeTransferState &State) { + transferArrowOpCall(E, E->getArg(0), State); + }) + + // optional::has_value, optional::hasValue + // Of the supported optionals only folly::Optional uses hasValue, but this + // will also pass for other types + .CaseOfCFGStmt<CXXMemberCallExpr>( + isOptionalMemberCallWithNameMatcher( + hasAnyName("has_value", "hasValue")), + transferOptionalHasValueCall) + + // optional::operator bool + .CaseOfCFGStmt<CXXMemberCallExpr>( + isOptionalMemberCallWithNameMatcher(hasName("operator bool")), + transferOptionalHasValueCall) + + // optional::emplace + .CaseOfCFGStmt<CXXMemberCallExpr>( + isOptionalMemberCallWithNameMatcher(hasName("emplace")), + [](const CXXMemberCallExpr *E, const MatchFinder::MatchResult &, + LatticeTransferState &State) { + if (RecordStorageLocation *Loc = + getImplicitObjectLocation(*E, State.Env)) { + setHasValue(*Loc, State.Env.getBoolLiteralValue(true), State.Env); + } + }) + + // optional::reset + .CaseOfCFGStmt<CXXMemberCallExpr>( + isOptionalMemberCallWithNameMatcher(hasName("reset")), + [](const CXXMemberCallExpr *E, const MatchFinder::MatchResult &, + LatticeTransferState &State) { + if (RecordStorageLocation *Loc = + getImplicitObjectLocation(*E, State.Env)) { + setHasValue(*Loc, State.Env.getBoolLiteralValue(false), + State.Env); + } + }) + + // optional::swap + .CaseOfCFGStmt<CXXMemberCallExpr>( + isOptionalMemberCallWithNameMatcher(hasName("swap")), + transferSwapCall) + + // std::swap + .CaseOfCFGStmt<CallExpr>(isStdSwapCall(), transferStdSwapCall) + + // std::forward + .CaseOfCFGStmt<CallExpr>(isStdForwardCall(), transferStdForwardCall) + + // opt.value_or("").empty() + .CaseOfCFGStmt<Expr>(isValueOrStringEmptyCall(), + transferValueOrStringEmptyCall) + + // opt.value_or(X) != X + .CaseOfCFGStmt<Expr>(isValueOrNotEqX(), transferValueOrNotEqX) + + // Comparisons (==, !=): + .CaseOfCFGStmt<CXXOperatorCallExpr>( + isComparisonOperatorCall(hasOptionalType(), hasOptionalType()), + transferOptionalAndOptionalCmp) + .CaseOfCFGStmt<CXXOperatorCallExpr>( + isComparisonOperatorCall(hasOptionalType(), hasNulloptType()), + [](const clang::CXXOperatorCallExpr *Cmp, + const MatchFinder::MatchResult &, LatticeTransferState &State) { + transferOptionalAndNulloptCmp(Cmp, Cmp->getArg(0), State.Env); + }) + .CaseOfCFGStmt<CXXOperatorCallExpr>( + isComparisonOperatorCall(hasNulloptType(), hasOptionalType()), + [](const clang::CXXOperatorCallExpr *Cmp, + const MatchFinder::MatchResult &, LatticeTransferState &State) { + transferOptionalAndNulloptCmp(Cmp, Cmp->getArg(1), State.Env); + }) + .CaseOfCFGStmt<CXXOperatorCallExpr>( + isComparisonOperatorCall( + hasOptionalType(), + unless(anyOf(hasOptionalType(), hasNulloptType()))), + [](const clang::CXXOperatorCallExpr *Cmp, + const MatchFinder::MatchResult &, LatticeTransferState &State) { + transferOptionalAndValueCmp(Cmp, Cmp->getArg(0), State.Env); + }) + .CaseOfCFGStmt<CXXOperatorCallExpr>( + isComparisonOperatorCall( + unless(anyOf(hasOptionalType(), hasNulloptType())), + hasOptionalType()), + [](const clang::CXXOperatorCallExpr *Cmp, + const MatchFinder::MatchResult &, LatticeTransferState &State) { + transferOptionalAndValueCmp(Cmp, Cmp->getArg(1), State.Env); + }) + + // returns optional + .CaseOfCFGStmt<CallExpr>(isCallReturningOptional(), + transferCallReturningOptional) + + .Build(); +} + +llvm::SmallVector<SourceLocation> diagnoseUnwrapCall(const Expr *ObjectExpr, + const Environment &Env) { + if (auto *OptionalLoc = cast_or_null<RecordStorageLocation>( + getLocBehindPossiblePointer(*ObjectExpr, Env))) { + auto *Prop = Env.getValue(locForHasValue(*OptionalLoc)); + if (auto *HasValueVal = cast_or_null<BoolValue>(Prop)) { + if (Env.proves(HasValueVal->formula())) + return {}; + } + } + + // Record that this unwrap is *not* provably safe. + // FIXME: include either the name of the optional (if applicable) or a source + // range of the access for easier interpretation of the result. + return {ObjectExpr->getBeginLoc()}; +} + +auto buildDiagnoseMatchSwitch( + const UncheckedOptionalAccessModelOptions &Options) { + // FIXME: Evaluate the efficiency of matchers. If using matchers results in a + // lot of duplicated work (e.g. string comparisons), consider providing APIs + // that avoid it through memoization. + auto IgnorableOptional = ignorableOptional(Options); + return CFGMatchSwitchBuilder<const Environment, + llvm::SmallVector<SourceLocation>>() + // optional::value + .CaseOfCFGStmt<CXXMemberCallExpr>( + valueCall(IgnorableOptional), + [](const CXXMemberCallExpr *E, const MatchFinder::MatchResult &, + const Environment &Env) { + return diagnoseUnwrapCall(E->getImplicitObjectArgument(), Env); + }) + + // optional::operator*, optional::operator-> + .CaseOfCFGStmt<CallExpr>(valueOperatorCall(IgnorableOptional), + [](const CallExpr *E, + const MatchFinder::MatchResult &, + const Environment &Env) { + return diagnoseUnwrapCall(E->getArg(0), Env); + }) + .Build(); +} + +} // namespace + +ast_matchers::DeclarationMatcher +UncheckedOptionalAccessModel::optionalClassDecl() { + return cxxRecordDecl(optionalClass()); +} + +UncheckedOptionalAccessModel::UncheckedOptionalAccessModel(ASTContext &Ctx, + Environment &Env) + : DataflowAnalysis<UncheckedOptionalAccessModel, NoopLattice>(Ctx), + TransferMatchSwitch(buildTransferMatchSwitch()) { + Env.getDataflowAnalysisContext().setSyntheticFieldCallback( + [&Ctx](QualType Ty) -> llvm::StringMap<QualType> { + const CXXRecordDecl *Optional = + getOptionalBaseClass(Ty->getAsCXXRecordDecl()); + if (Optional == nullptr) + return {}; + return {{"value", valueTypeFromOptionalDecl(*Optional)}, + {"has_value", Ctx.BoolTy}}; + }); +} + +void UncheckedOptionalAccessModel::transfer(const CFGElement &Elt, + NoopLattice &L, Environment &Env) { + LatticeTransferState State(L, Env); + TransferMatchSwitch(Elt, getASTContext(), State); +} + +UncheckedOptionalAccessDiagnoser::UncheckedOptionalAccessDiagnoser( + UncheckedOptionalAccessModelOptions Options) + : DiagnoseMatchSwitch(buildDiagnoseMatchSwitch(Options)) {} + +} // namespace dataflow +} // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/RecordOps.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/RecordOps.cpp new file mode 100644 index 000000000000..b8401230a83d --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/RecordOps.cpp @@ -0,0 +1,133 @@ +//===-- RecordOps.cpp -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Operations on records (structs, classes, and unions). +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/RecordOps.h" + +#define DEBUG_TYPE "dataflow" + +namespace clang::dataflow { + +static void copyField(const ValueDecl &Field, StorageLocation *SrcFieldLoc, + StorageLocation *DstFieldLoc, RecordStorageLocation &Dst, + Environment &Env) { + assert(Field.getType()->isReferenceType() || + (SrcFieldLoc != nullptr && DstFieldLoc != nullptr)); + + if (Field.getType()->isRecordType()) { + copyRecord(cast<RecordStorageLocation>(*SrcFieldLoc), + cast<RecordStorageLocation>(*DstFieldLoc), Env); + } else if (Field.getType()->isReferenceType()) { + Dst.setChild(Field, SrcFieldLoc); + } else { + if (Value *Val = Env.getValue(*SrcFieldLoc)) + Env.setValue(*DstFieldLoc, *Val); + else + Env.clearValue(*DstFieldLoc); + } +} + +static void copySyntheticField(QualType FieldType, StorageLocation &SrcFieldLoc, + StorageLocation &DstFieldLoc, Environment &Env) { + if (FieldType->isRecordType()) { + copyRecord(cast<RecordStorageLocation>(SrcFieldLoc), + cast<RecordStorageLocation>(DstFieldLoc), Env); + } else { + if (Value *Val = Env.getValue(SrcFieldLoc)) + Env.setValue(DstFieldLoc, *Val); + else + Env.clearValue(DstFieldLoc); + } +} + +void copyRecord(RecordStorageLocation &Src, RecordStorageLocation &Dst, + Environment &Env) { + auto SrcType = Src.getType().getCanonicalType().getUnqualifiedType(); + auto DstType = Dst.getType().getCanonicalType().getUnqualifiedType(); + + auto SrcDecl = SrcType->getAsCXXRecordDecl(); + auto DstDecl = DstType->getAsCXXRecordDecl(); + + [[maybe_unused]] bool compatibleTypes = + SrcType == DstType || + (SrcDecl != nullptr && DstDecl != nullptr && + (SrcDecl->isDerivedFrom(DstDecl) || DstDecl->isDerivedFrom(SrcDecl))); + + LLVM_DEBUG({ + if (!compatibleTypes) { + llvm::dbgs() << "Source type " << Src.getType() << "\n"; + llvm::dbgs() << "Destination type " << Dst.getType() << "\n"; + } + }); + assert(compatibleTypes); + + if (SrcType == DstType || (SrcDecl != nullptr && DstDecl != nullptr && + SrcDecl->isDerivedFrom(DstDecl))) { + for (auto [Field, DstFieldLoc] : Dst.children()) + copyField(*Field, Src.getChild(*Field), DstFieldLoc, Dst, Env); + for (const auto &[Name, DstFieldLoc] : Dst.synthetic_fields()) + copySyntheticField(DstFieldLoc->getType(), Src.getSyntheticField(Name), + *DstFieldLoc, Env); + } else { + for (auto [Field, SrcFieldLoc] : Src.children()) + copyField(*Field, SrcFieldLoc, Dst.getChild(*Field), Dst, Env); + for (const auto &[Name, SrcFieldLoc] : Src.synthetic_fields()) + copySyntheticField(SrcFieldLoc->getType(), *SrcFieldLoc, + Dst.getSyntheticField(Name), Env); + } +} + +bool recordsEqual(const RecordStorageLocation &Loc1, const Environment &Env1, + const RecordStorageLocation &Loc2, const Environment &Env2) { + LLVM_DEBUG({ + if (Loc2.getType().getCanonicalType().getUnqualifiedType() != + Loc1.getType().getCanonicalType().getUnqualifiedType()) { + llvm::dbgs() << "Loc1 type " << Loc1.getType() << "\n"; + llvm::dbgs() << "Loc2 type " << Loc2.getType() << "\n"; + } + }); + assert(Loc2.getType().getCanonicalType().getUnqualifiedType() == + Loc1.getType().getCanonicalType().getUnqualifiedType()); + + for (auto [Field, FieldLoc1] : Loc1.children()) { + StorageLocation *FieldLoc2 = Loc2.getChild(*Field); + + assert(Field->getType()->isReferenceType() || + (FieldLoc1 != nullptr && FieldLoc2 != nullptr)); + + if (Field->getType()->isRecordType()) { + if (!recordsEqual(cast<RecordStorageLocation>(*FieldLoc1), Env1, + cast<RecordStorageLocation>(*FieldLoc2), Env2)) + return false; + } else if (Field->getType()->isReferenceType()) { + if (FieldLoc1 != FieldLoc2) + return false; + } else if (Env1.getValue(*FieldLoc1) != Env2.getValue(*FieldLoc2)) { + return false; + } + } + + for (const auto &[Name, SynthFieldLoc1] : Loc1.synthetic_fields()) { + if (SynthFieldLoc1->getType()->isRecordType()) { + if (!recordsEqual( + *cast<RecordStorageLocation>(SynthFieldLoc1), Env1, + cast<RecordStorageLocation>(Loc2.getSyntheticField(Name)), Env2)) + return false; + } else if (Env1.getValue(*SynthFieldLoc1) != + Env2.getValue(Loc2.getSyntheticField(Name))) { + return false; + } + } + + return true; +} + +} // namespace clang::dataflow diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/SimplifyConstraints.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/SimplifyConstraints.cpp new file mode 100644 index 000000000000..cc20202768b9 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/SimplifyConstraints.cpp @@ -0,0 +1,180 @@ +//===-- SimplifyConstraints.cpp ---------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/SimplifyConstraints.h" +#include "llvm/ADT/EquivalenceClasses.h" + +namespace clang { +namespace dataflow { + +// Substitutes all occurrences of a given atom in `F` by a given formula and +// returns the resulting formula. +static const Formula & +substitute(const Formula &F, + const llvm::DenseMap<Atom, const Formula *> &Substitutions, + Arena &arena) { + switch (F.kind()) { + case Formula::AtomRef: + if (auto iter = Substitutions.find(F.getAtom()); + iter != Substitutions.end()) + return *iter->second; + return F; + case Formula::Literal: + return F; + case Formula::Not: + return arena.makeNot(substitute(*F.operands()[0], Substitutions, arena)); + case Formula::And: + return arena.makeAnd(substitute(*F.operands()[0], Substitutions, arena), + substitute(*F.operands()[1], Substitutions, arena)); + case Formula::Or: + return arena.makeOr(substitute(*F.operands()[0], Substitutions, arena), + substitute(*F.operands()[1], Substitutions, arena)); + case Formula::Implies: + return arena.makeImplies( + substitute(*F.operands()[0], Substitutions, arena), + substitute(*F.operands()[1], Substitutions, arena)); + case Formula::Equal: + return arena.makeEquals(substitute(*F.operands()[0], Substitutions, arena), + substitute(*F.operands()[1], Substitutions, arena)); + } + llvm_unreachable("Unknown formula kind"); +} + +// Returns the result of replacing atoms in `Atoms` with the leader of their +// equivalence class in `EquivalentAtoms`. +// Atoms that don't have an equivalence class in `EquivalentAtoms` are inserted +// into it as single-member equivalence classes. +static llvm::DenseSet<Atom> +projectToLeaders(const llvm::DenseSet<Atom> &Atoms, + llvm::EquivalenceClasses<Atom> &EquivalentAtoms) { + llvm::DenseSet<Atom> Result; + + for (Atom Atom : Atoms) + Result.insert(EquivalentAtoms.getOrInsertLeaderValue(Atom)); + + return Result; +} + +// Returns the atoms in the equivalence class for the leader identified by +// `LeaderIt`. +static llvm::SmallVector<Atom> +atomsInEquivalenceClass(const llvm::EquivalenceClasses<Atom> &EquivalentAtoms, + llvm::EquivalenceClasses<Atom>::iterator LeaderIt) { + llvm::SmallVector<Atom> Result; + for (auto MemberIt = EquivalentAtoms.member_begin(LeaderIt); + MemberIt != EquivalentAtoms.member_end(); ++MemberIt) + Result.push_back(*MemberIt); + return Result; +} + +void simplifyConstraints(llvm::SetVector<const Formula *> &Constraints, + Arena &arena, SimplifyConstraintsInfo *Info) { + auto contradiction = [&]() { + Constraints.clear(); + Constraints.insert(&arena.makeLiteral(false)); + }; + + llvm::EquivalenceClasses<Atom> EquivalentAtoms; + llvm::DenseSet<Atom> TrueAtoms; + llvm::DenseSet<Atom> FalseAtoms; + + while (true) { + for (const auto *Constraint : Constraints) { + switch (Constraint->kind()) { + case Formula::AtomRef: + TrueAtoms.insert(Constraint->getAtom()); + break; + case Formula::Not: + if (Constraint->operands()[0]->kind() == Formula::AtomRef) + FalseAtoms.insert(Constraint->operands()[0]->getAtom()); + break; + case Formula::Equal: { + ArrayRef<const Formula *> operands = Constraint->operands(); + if (operands[0]->kind() == Formula::AtomRef && + operands[1]->kind() == Formula::AtomRef) { + EquivalentAtoms.unionSets(operands[0]->getAtom(), + operands[1]->getAtom()); + } + break; + } + default: + break; + } + } + + TrueAtoms = projectToLeaders(TrueAtoms, EquivalentAtoms); + FalseAtoms = projectToLeaders(FalseAtoms, EquivalentAtoms); + + llvm::DenseMap<Atom, const Formula *> Substitutions; + for (auto It = EquivalentAtoms.begin(); It != EquivalentAtoms.end(); ++It) { + Atom TheAtom = It->getData(); + Atom Leader = EquivalentAtoms.getLeaderValue(TheAtom); + if (TrueAtoms.contains(Leader)) { + if (FalseAtoms.contains(Leader)) { + contradiction(); + return; + } + Substitutions.insert({TheAtom, &arena.makeLiteral(true)}); + } else if (FalseAtoms.contains(Leader)) { + Substitutions.insert({TheAtom, &arena.makeLiteral(false)}); + } else if (TheAtom != Leader) { + Substitutions.insert({TheAtom, &arena.makeAtomRef(Leader)}); + } + } + + llvm::SetVector<const Formula *> NewConstraints; + for (const auto *Constraint : Constraints) { + const Formula &NewConstraint = + substitute(*Constraint, Substitutions, arena); + if (NewConstraint.isLiteral(true)) + continue; + if (NewConstraint.isLiteral(false)) { + contradiction(); + return; + } + if (NewConstraint.kind() == Formula::And) { + NewConstraints.insert(NewConstraint.operands()[0]); + NewConstraints.insert(NewConstraint.operands()[1]); + continue; + } + NewConstraints.insert(&NewConstraint); + } + + if (NewConstraints == Constraints) + break; + Constraints = std::move(NewConstraints); + } + + if (Info) { + for (auto It = EquivalentAtoms.begin(), End = EquivalentAtoms.end(); + It != End; ++It) { + if (!It->isLeader()) + continue; + Atom At = *EquivalentAtoms.findLeader(It); + if (TrueAtoms.contains(At) || FalseAtoms.contains(At)) + continue; + llvm::SmallVector<Atom> Atoms = + atomsInEquivalenceClass(EquivalentAtoms, It); + if (Atoms.size() == 1) + continue; + std::sort(Atoms.begin(), Atoms.end()); + Info->EquivalentAtoms.push_back(std::move(Atoms)); + } + for (Atom At : TrueAtoms) + Info->TrueAtoms.append(atomsInEquivalenceClass( + EquivalentAtoms, EquivalentAtoms.findValue(At))); + std::sort(Info->TrueAtoms.begin(), Info->TrueAtoms.end()); + for (Atom At : FalseAtoms) + Info->FalseAtoms.append(atomsInEquivalenceClass( + EquivalentAtoms, EquivalentAtoms.findValue(At))); + std::sort(Info->FalseAtoms.begin(), Info->FalseAtoms.end()); + } +} + +} // namespace dataflow +} // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Transfer.cpp new file mode 100644 index 000000000000..3c896d373a21 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Transfer.cpp @@ -0,0 +1,897 @@ +//===-- Transfer.cpp --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines transfer functions that evaluate program statements and +// update an environment accordingly. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/Transfer.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclBase.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/OperationKinds.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/Analysis/FlowSensitive/ASTOps.h" +#include "clang/Analysis/FlowSensitive/AdornedCFG.h" +#include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" +#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" +#include "clang/Analysis/FlowSensitive/NoopAnalysis.h" +#include "clang/Analysis/FlowSensitive/RecordOps.h" +#include "clang/Analysis/FlowSensitive/Value.h" +#include "clang/Basic/Builtins.h" +#include "clang/Basic/OperatorKinds.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include <assert.h> +#include <cassert> + +#define DEBUG_TYPE "dataflow" + +namespace clang { +namespace dataflow { + +const Environment *StmtToEnvMap::getEnvironment(const Stmt &S) const { + auto BlockIt = ACFG.getStmtToBlock().find(&ignoreCFGOmittedNodes(S)); + if (BlockIt == ACFG.getStmtToBlock().end()) { + assert(false); + // Return null to avoid dereferencing the end iterator in non-assert builds. + return nullptr; + } + if (!ACFG.isBlockReachable(*BlockIt->getSecond())) + return nullptr; + if (BlockIt->getSecond()->getBlockID() == CurBlockID) + return &CurState.Env; + const auto &State = BlockToState[BlockIt->getSecond()->getBlockID()]; + if (!(State)) + return nullptr; + return &State->Env; +} + +static BoolValue &evaluateBooleanEquality(const Expr &LHS, const Expr &RHS, + Environment &Env) { + Value *LHSValue = Env.getValue(LHS); + Value *RHSValue = Env.getValue(RHS); + + if (LHSValue == RHSValue) + return Env.getBoolLiteralValue(true); + + if (auto *LHSBool = dyn_cast_or_null<BoolValue>(LHSValue)) + if (auto *RHSBool = dyn_cast_or_null<BoolValue>(RHSValue)) + return Env.makeIff(*LHSBool, *RHSBool); + + if (auto *LHSPtr = dyn_cast_or_null<PointerValue>(LHSValue)) + if (auto *RHSPtr = dyn_cast_or_null<PointerValue>(RHSValue)) + // If the storage locations are the same, the pointers definitely compare + // the same. If the storage locations are different, they may still alias, + // so we fall through to the case below that returns an atom. + if (&LHSPtr->getPointeeLoc() == &RHSPtr->getPointeeLoc()) + return Env.getBoolLiteralValue(true); + + return Env.makeAtomicBoolValue(); +} + +static BoolValue &unpackValue(BoolValue &V, Environment &Env) { + if (auto *Top = llvm::dyn_cast<TopBoolValue>(&V)) { + auto &A = Env.getDataflowAnalysisContext().arena(); + return A.makeBoolValue(A.makeAtomRef(Top->getAtom())); + } + return V; +} + +// Unpacks the value (if any) associated with `E` and updates `E` to the new +// value, if any unpacking occured. Also, does the lvalue-to-rvalue conversion, +// by skipping past the reference. +static Value *maybeUnpackLValueExpr(const Expr &E, Environment &Env) { + auto *Loc = Env.getStorageLocation(E); + if (Loc == nullptr) + return nullptr; + auto *Val = Env.getValue(*Loc); + + auto *B = dyn_cast_or_null<BoolValue>(Val); + if (B == nullptr) + return Val; + + auto &UnpackedVal = unpackValue(*B, Env); + if (&UnpackedVal == Val) + return Val; + Env.setValue(*Loc, UnpackedVal); + return &UnpackedVal; +} + +static void propagateValue(const Expr &From, const Expr &To, Environment &Env) { + if (From.getType()->isRecordType()) + return; + if (auto *Val = Env.getValue(From)) + Env.setValue(To, *Val); +} + +static void propagateStorageLocation(const Expr &From, const Expr &To, + Environment &Env) { + if (auto *Loc = Env.getStorageLocation(From)) + Env.setStorageLocation(To, *Loc); +} + +// Propagates the value or storage location of `From` to `To` in cases where +// `From` may be either a glvalue or a prvalue. `To` must be a glvalue iff +// `From` is a glvalue. +static void propagateValueOrStorageLocation(const Expr &From, const Expr &To, + Environment &Env) { + assert(From.isGLValue() == To.isGLValue()); + if (From.isGLValue()) + propagateStorageLocation(From, To, Env); + else + propagateValue(From, To, Env); +} + +namespace { + +class TransferVisitor : public ConstStmtVisitor<TransferVisitor> { +public: + TransferVisitor(const StmtToEnvMap &StmtToEnv, Environment &Env, + Environment::ValueModel &Model) + : StmtToEnv(StmtToEnv), Env(Env), Model(Model) {} + + void VisitBinaryOperator(const BinaryOperator *S) { + const Expr *LHS = S->getLHS(); + assert(LHS != nullptr); + + const Expr *RHS = S->getRHS(); + assert(RHS != nullptr); + + // Do compound assignments up-front, as there are so many of them and we + // don't want to list all of them in the switch statement below. + // To avoid generating unnecessary values, we don't create a new value but + // instead leave it to the specific analysis to do this if desired. + if (S->isCompoundAssignmentOp()) + propagateStorageLocation(*S->getLHS(), *S, Env); + + switch (S->getOpcode()) { + case BO_Assign: { + auto *LHSLoc = Env.getStorageLocation(*LHS); + if (LHSLoc == nullptr) + break; + + auto *RHSVal = Env.getValue(*RHS); + if (RHSVal == nullptr) + break; + + // Assign a value to the storage location of the left-hand side. + Env.setValue(*LHSLoc, *RHSVal); + + // Assign a storage location for the whole expression. + Env.setStorageLocation(*S, *LHSLoc); + break; + } + case BO_LAnd: + case BO_LOr: { + BoolValue &LHSVal = getLogicOperatorSubExprValue(*LHS); + BoolValue &RHSVal = getLogicOperatorSubExprValue(*RHS); + + if (S->getOpcode() == BO_LAnd) + Env.setValue(*S, Env.makeAnd(LHSVal, RHSVal)); + else + Env.setValue(*S, Env.makeOr(LHSVal, RHSVal)); + break; + } + case BO_NE: + case BO_EQ: { + auto &LHSEqRHSValue = evaluateBooleanEquality(*LHS, *RHS, Env); + Env.setValue(*S, S->getOpcode() == BO_EQ ? LHSEqRHSValue + : Env.makeNot(LHSEqRHSValue)); + break; + } + case BO_Comma: { + propagateValueOrStorageLocation(*RHS, *S, Env); + break; + } + default: + break; + } + } + + void VisitDeclRefExpr(const DeclRefExpr *S) { + const ValueDecl *VD = S->getDecl(); + assert(VD != nullptr); + + // Some `DeclRefExpr`s aren't glvalues, so we can't associate them with a + // `StorageLocation`, and there's also no sensible `Value` that we can + // assign to them. Examples: + // - Non-static member variables + // - Non static member functions + // Note: Member operators are an exception to this, but apparently only + // if the `DeclRefExpr` is used within the callee of a + // `CXXOperatorCallExpr`. In other cases, for example when applying the + // address-of operator, the `DeclRefExpr` is a prvalue. + if (!S->isGLValue()) + return; + + auto *DeclLoc = Env.getStorageLocation(*VD); + if (DeclLoc == nullptr) + return; + + Env.setStorageLocation(*S, *DeclLoc); + } + + void VisitDeclStmt(const DeclStmt *S) { + // Group decls are converted into single decls in the CFG so the cast below + // is safe. + const auto &D = *cast<VarDecl>(S->getSingleDecl()); + + ProcessVarDecl(D); + } + + void ProcessVarDecl(const VarDecl &D) { + // Static local vars are already initialized in `Environment`. + if (D.hasGlobalStorage()) + return; + + // If this is the holding variable for a `BindingDecl`, we may already + // have a storage location set up -- so check. (See also explanation below + // where we process the `BindingDecl`.) + if (D.getType()->isReferenceType() && Env.getStorageLocation(D) != nullptr) + return; + + assert(Env.getStorageLocation(D) == nullptr); + + Env.setStorageLocation(D, Env.createObject(D)); + + // `DecompositionDecl` must be handled after we've interpreted the loc + // itself, because the binding expression refers back to the + // `DecompositionDecl` (even though it has no written name). + if (const auto *Decomp = dyn_cast<DecompositionDecl>(&D)) { + // If VarDecl is a DecompositionDecl, evaluate each of its bindings. This + // needs to be evaluated after initializing the values in the storage for + // VarDecl, as the bindings refer to them. + // FIXME: Add support for ArraySubscriptExpr. + // FIXME: Consider adding AST nodes used in BindingDecls to the CFG. + for (const auto *B : Decomp->bindings()) { + if (auto *ME = dyn_cast_or_null<MemberExpr>(B->getBinding())) { + auto *DE = dyn_cast_or_null<DeclRefExpr>(ME->getBase()); + if (DE == nullptr) + continue; + + // ME and its base haven't been visited because they aren't included + // in the statements of the CFG basic block. + VisitDeclRefExpr(DE); + VisitMemberExpr(ME); + + if (auto *Loc = Env.getStorageLocation(*ME)) + Env.setStorageLocation(*B, *Loc); + } else if (auto *VD = B->getHoldingVar()) { + // Holding vars are used to back the `BindingDecl`s of tuple-like + // types. The holding var declarations appear after the + // `DecompositionDecl`, so we have to explicitly process them here + // to know their storage location. They will be processed a second + // time when we visit their `VarDecl`s, so we have code that protects + // against this above. + ProcessVarDecl(*VD); + auto *VDLoc = Env.getStorageLocation(*VD); + assert(VDLoc != nullptr); + Env.setStorageLocation(*B, *VDLoc); + } + } + } + } + + void VisitImplicitCastExpr(const ImplicitCastExpr *S) { + const Expr *SubExpr = S->getSubExpr(); + assert(SubExpr != nullptr); + + switch (S->getCastKind()) { + case CK_IntegralToBoolean: { + // This cast creates a new, boolean value from the integral value. We + // model that with a fresh value in the environment, unless it's already a + // boolean. + if (auto *SubExprVal = + dyn_cast_or_null<BoolValue>(Env.getValue(*SubExpr))) + Env.setValue(*S, *SubExprVal); + else + // FIXME: If integer modeling is added, then update this code to create + // the boolean based on the integer model. + Env.setValue(*S, Env.makeAtomicBoolValue()); + break; + } + + case CK_LValueToRValue: { + // When an L-value is used as an R-value, it may result in sharing, so we + // need to unpack any nested `Top`s. + auto *SubExprVal = maybeUnpackLValueExpr(*SubExpr, Env); + if (SubExprVal == nullptr) + break; + + Env.setValue(*S, *SubExprVal); + break; + } + + case CK_IntegralCast: + // FIXME: This cast creates a new integral value from the + // subexpression. But, because we don't model integers, we don't + // distinguish between this new value and the underlying one. If integer + // modeling is added, then update this code to create a fresh location and + // value. + case CK_UncheckedDerivedToBase: + case CK_ConstructorConversion: + case CK_UserDefinedConversion: + // FIXME: Add tests that excercise CK_UncheckedDerivedToBase, + // CK_ConstructorConversion, and CK_UserDefinedConversion. + case CK_NoOp: { + // FIXME: Consider making `Environment::getStorageLocation` skip noop + // expressions (this and other similar expressions in the file) instead + // of assigning them storage locations. + propagateValueOrStorageLocation(*SubExpr, *S, Env); + break; + } + case CK_NullToPointer: { + auto &NullPointerVal = + Env.getOrCreateNullPointerValue(S->getType()->getPointeeType()); + Env.setValue(*S, NullPointerVal); + break; + } + case CK_NullToMemberPointer: + // FIXME: Implement pointers to members. For now, don't associate a value + // with this expression. + break; + case CK_FunctionToPointerDecay: { + StorageLocation *PointeeLoc = Env.getStorageLocation(*SubExpr); + if (PointeeLoc == nullptr) + break; + + Env.setValue(*S, Env.create<PointerValue>(*PointeeLoc)); + break; + } + case CK_BuiltinFnToFnPtr: + // Despite its name, the result type of `BuiltinFnToFnPtr` is a function, + // not a function pointer. In addition, builtin functions can only be + // called directly; it is not legal to take their address. We therefore + // don't need to create a value or storage location for them. + break; + default: + break; + } + } + + void VisitUnaryOperator(const UnaryOperator *S) { + const Expr *SubExpr = S->getSubExpr(); + assert(SubExpr != nullptr); + + switch (S->getOpcode()) { + case UO_Deref: { + const auto *SubExprVal = Env.get<PointerValue>(*SubExpr); + if (SubExprVal == nullptr) + break; + + Env.setStorageLocation(*S, SubExprVal->getPointeeLoc()); + break; + } + case UO_AddrOf: { + // FIXME: Model pointers to members. + if (S->getType()->isMemberPointerType()) + break; + + if (StorageLocation *PointeeLoc = Env.getStorageLocation(*SubExpr)) + Env.setValue(*S, Env.create<PointerValue>(*PointeeLoc)); + break; + } + case UO_LNot: { + auto *SubExprVal = dyn_cast_or_null<BoolValue>(Env.getValue(*SubExpr)); + if (SubExprVal == nullptr) + break; + + Env.setValue(*S, Env.makeNot(*SubExprVal)); + break; + } + case UO_PreInc: + case UO_PreDec: + // Propagate the storage location and clear out any value associated with + // it (to represent the fact that the value has definitely changed). + // To avoid generating unnecessary values, we leave it to the specific + // analysis to create a new value if desired. + propagateStorageLocation(*S->getSubExpr(), *S, Env); + if (StorageLocation *Loc = Env.getStorageLocation(*S->getSubExpr())) + Env.clearValue(*Loc); + break; + case UO_PostInc: + case UO_PostDec: + // Propagate the old value, then clear out any value associated with the + // storage location (to represent the fact that the value has definitely + // changed). See above for rationale. + propagateValue(*S->getSubExpr(), *S, Env); + if (StorageLocation *Loc = Env.getStorageLocation(*S->getSubExpr())) + Env.clearValue(*Loc); + break; + default: + break; + } + } + + void VisitCXXThisExpr(const CXXThisExpr *S) { + auto *ThisPointeeLoc = Env.getThisPointeeStorageLocation(); + if (ThisPointeeLoc == nullptr) + // Unions are not supported yet, and will not have a location for the + // `this` expression's pointee. + return; + + Env.setValue(*S, Env.create<PointerValue>(*ThisPointeeLoc)); + } + + void VisitCXXNewExpr(const CXXNewExpr *S) { + if (Value *Val = Env.createValue(S->getType())) + Env.setValue(*S, *Val); + } + + void VisitCXXDeleteExpr(const CXXDeleteExpr *S) { + // Empty method. + // We consciously don't do anything on deletes. Diagnosing double deletes + // (for example) should be done by a specific analysis, not by the + // framework. + } + + void VisitReturnStmt(const ReturnStmt *S) { + if (!Env.getDataflowAnalysisContext().getOptions().ContextSensitiveOpts) + return; + + auto *Ret = S->getRetValue(); + if (Ret == nullptr) + return; + + if (Ret->isPRValue()) { + if (Ret->getType()->isRecordType()) + return; + + auto *Val = Env.getValue(*Ret); + if (Val == nullptr) + return; + + // FIXME: Model NRVO. + Env.setReturnValue(Val); + } else { + auto *Loc = Env.getStorageLocation(*Ret); + if (Loc == nullptr) + return; + + // FIXME: Model NRVO. + Env.setReturnStorageLocation(Loc); + } + } + + void VisitMemberExpr(const MemberExpr *S) { + ValueDecl *Member = S->getMemberDecl(); + assert(Member != nullptr); + + // FIXME: Consider assigning pointer values to function member expressions. + if (Member->isFunctionOrFunctionTemplate()) + return; + + // FIXME: if/when we add support for modeling enums, use that support here. + if (isa<EnumConstantDecl>(Member)) + return; + + if (auto *D = dyn_cast<VarDecl>(Member)) { + if (D->hasGlobalStorage()) { + auto *VarDeclLoc = Env.getStorageLocation(*D); + if (VarDeclLoc == nullptr) + return; + + Env.setStorageLocation(*S, *VarDeclLoc); + return; + } + } + + RecordStorageLocation *BaseLoc = getBaseObjectLocation(*S, Env); + if (BaseLoc == nullptr) + return; + + auto *MemberLoc = BaseLoc->getChild(*Member); + if (MemberLoc == nullptr) + return; + Env.setStorageLocation(*S, *MemberLoc); + } + + void VisitCXXDefaultArgExpr(const CXXDefaultArgExpr *S) { + const Expr *ArgExpr = S->getExpr(); + assert(ArgExpr != nullptr); + propagateValueOrStorageLocation(*ArgExpr, *S, Env); + + if (S->isPRValue() && S->getType()->isRecordType()) { + auto &Loc = Env.getResultObjectLocation(*S); + Env.initializeFieldsWithValues(Loc); + } + } + + void VisitCXXDefaultInitExpr(const CXXDefaultInitExpr *S) { + const Expr *InitExpr = S->getExpr(); + assert(InitExpr != nullptr); + + // If this is a prvalue of record type, the handler for `*InitExpr` (if one + // exists) will initialize the result object; there is no value to propgate + // here. + if (S->getType()->isRecordType() && S->isPRValue()) + return; + + propagateValueOrStorageLocation(*InitExpr, *S, Env); + } + + void VisitCXXConstructExpr(const CXXConstructExpr *S) { + const CXXConstructorDecl *ConstructorDecl = S->getConstructor(); + assert(ConstructorDecl != nullptr); + + // `CXXConstructExpr` can have array type if default-initializing an array + // of records. We don't handle this specifically beyond potentially inlining + // the call. + if (!S->getType()->isRecordType()) { + transferInlineCall(S, ConstructorDecl); + return; + } + + RecordStorageLocation &Loc = Env.getResultObjectLocation(*S); + + if (ConstructorDecl->isCopyOrMoveConstructor()) { + // It is permissible for a copy/move constructor to have additional + // parameters as long as they have default arguments defined for them. + assert(S->getNumArgs() != 0); + + const Expr *Arg = S->getArg(0); + assert(Arg != nullptr); + + auto *ArgLoc = Env.get<RecordStorageLocation>(*Arg); + if (ArgLoc == nullptr) + return; + + // Even if the copy/move constructor call is elidable, we choose to copy + // the record in all cases (which isn't wrong, just potentially not + // optimal). + copyRecord(*ArgLoc, Loc, Env); + return; + } + + Env.initializeFieldsWithValues(Loc, S->getType()); + + transferInlineCall(S, ConstructorDecl); + } + + void VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *S) { + if (S->getOperator() == OO_Equal) { + assert(S->getNumArgs() == 2); + + const Expr *Arg0 = S->getArg(0); + assert(Arg0 != nullptr); + + const Expr *Arg1 = S->getArg(1); + assert(Arg1 != nullptr); + + // Evaluate only copy and move assignment operators. + const auto *Method = + dyn_cast_or_null<CXXMethodDecl>(S->getDirectCallee()); + if (!Method) + return; + if (!Method->isCopyAssignmentOperator() && + !Method->isMoveAssignmentOperator()) + return; + + RecordStorageLocation *LocSrc = nullptr; + if (Arg1->isPRValue()) { + LocSrc = &Env.getResultObjectLocation(*Arg1); + } else { + LocSrc = Env.get<RecordStorageLocation>(*Arg1); + } + auto *LocDst = Env.get<RecordStorageLocation>(*Arg0); + + if (LocSrc == nullptr || LocDst == nullptr) + return; + + copyRecord(*LocSrc, *LocDst, Env); + + // The assignment operator can have an arbitrary return type. We model the + // return value only if the return type is the same as or a base class of + // the destination type. + if (S->getType().getCanonicalType().getUnqualifiedType() != + LocDst->getType().getCanonicalType().getUnqualifiedType()) { + auto ReturnDecl = S->getType()->getAsCXXRecordDecl(); + auto DstDecl = LocDst->getType()->getAsCXXRecordDecl(); + if (ReturnDecl == nullptr || DstDecl == nullptr) + return; + if (!DstDecl->isDerivedFrom(ReturnDecl)) + return; + } + + if (S->isGLValue()) + Env.setStorageLocation(*S, *LocDst); + else + copyRecord(*LocDst, Env.getResultObjectLocation(*S), Env); + + return; + } + + // `CXXOperatorCallExpr` can be a prvalue. Call `VisitCallExpr`() to + // initialize the prvalue's fields with values. + VisitCallExpr(S); + } + + void VisitCXXRewrittenBinaryOperator(const CXXRewrittenBinaryOperator *RBO) { + propagateValue(*RBO->getSemanticForm(), *RBO, Env); + } + + void VisitCallExpr(const CallExpr *S) { + // Of clang's builtins, only `__builtin_expect` is handled explicitly, since + // others (like trap, debugtrap, and unreachable) are handled by CFG + // construction. + if (S->isCallToStdMove()) { + assert(S->getNumArgs() == 1); + + const Expr *Arg = S->getArg(0); + assert(Arg != nullptr); + + auto *ArgLoc = Env.getStorageLocation(*Arg); + if (ArgLoc == nullptr) + return; + + Env.setStorageLocation(*S, *ArgLoc); + } else if (S->getDirectCallee() != nullptr && + S->getDirectCallee()->getBuiltinID() == + Builtin::BI__builtin_expect) { + assert(S->getNumArgs() > 0); + assert(S->getArg(0) != nullptr); + auto *ArgVal = Env.getValue(*S->getArg(0)); + if (ArgVal == nullptr) + return; + Env.setValue(*S, *ArgVal); + } else if (const FunctionDecl *F = S->getDirectCallee()) { + transferInlineCall(S, F); + + // If this call produces a prvalue of record type, initialize its fields + // with values. + if (S->getType()->isRecordType() && S->isPRValue()) { + RecordStorageLocation &Loc = Env.getResultObjectLocation(*S); + Env.initializeFieldsWithValues(Loc); + } + } + } + + void VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *S) { + const Expr *SubExpr = S->getSubExpr(); + assert(SubExpr != nullptr); + + StorageLocation &Loc = Env.createStorageLocation(*S); + Env.setStorageLocation(*S, Loc); + + if (SubExpr->getType()->isRecordType()) + // Nothing else left to do -- we initialized the record when transferring + // `SubExpr`. + return; + + if (Value *SubExprVal = Env.getValue(*SubExpr)) + Env.setValue(Loc, *SubExprVal); + } + + void VisitCXXBindTemporaryExpr(const CXXBindTemporaryExpr *S) { + const Expr *SubExpr = S->getSubExpr(); + assert(SubExpr != nullptr); + + propagateValue(*SubExpr, *S, Env); + } + + void VisitCXXStaticCastExpr(const CXXStaticCastExpr *S) { + if (S->getCastKind() == CK_NoOp) { + const Expr *SubExpr = S->getSubExpr(); + assert(SubExpr != nullptr); + + propagateValueOrStorageLocation(*SubExpr, *S, Env); + } + } + + void VisitConditionalOperator(const ConditionalOperator *S) { + const Environment *TrueEnv = StmtToEnv.getEnvironment(*S->getTrueExpr()); + const Environment *FalseEnv = StmtToEnv.getEnvironment(*S->getFalseExpr()); + + if (TrueEnv == nullptr || FalseEnv == nullptr) { + // If the true or false branch is dead, we may not have an environment for + // it. We could handle this specifically by forwarding the value or + // location of the live branch, but this case is rare enough that this + // probably isn't worth the additional complexity. + return; + } + + if (S->isGLValue()) { + StorageLocation *TrueLoc = TrueEnv->getStorageLocation(*S->getTrueExpr()); + StorageLocation *FalseLoc = + FalseEnv->getStorageLocation(*S->getFalseExpr()); + if (TrueLoc == FalseLoc && TrueLoc != nullptr) + Env.setStorageLocation(*S, *TrueLoc); + } else if (!S->getType()->isRecordType()) { + // The conditional operator can evaluate to either of the values of the + // two branches. To model this, join these two values together to yield + // the result of the conditional operator. + // Note: Most joins happen in `computeBlockInputState()`, but this case is + // different: + // - `computeBlockInputState()` (which in turn calls `Environment::join()` + // joins values associated with the _same_ expression or storage + // location, then associates the joined value with that expression or + // storage location. This join has nothing to do with transfer -- + // instead, it joins together the results of performing transfer on two + // different blocks. + // - Here, we join values associated with _different_ expressions (the + // true and false branch), then associate the joined value with a third + // expression (the conditional operator itself). This join is what it + // means to perform transfer on the conditional operator. + if (Value *Val = Environment::joinValues( + S->getType(), TrueEnv->getValue(*S->getTrueExpr()), *TrueEnv, + FalseEnv->getValue(*S->getFalseExpr()), *FalseEnv, Env, Model)) + Env.setValue(*S, *Val); + } + } + + void VisitInitListExpr(const InitListExpr *S) { + QualType Type = S->getType(); + + if (!Type->isRecordType()) { + // Until array initialization is implemented, we skip arrays and don't + // need to care about cases where `getNumInits() > 1`. + if (!Type->isArrayType() && S->getNumInits() == 1) + propagateValueOrStorageLocation(*S->getInit(0), *S, Env); + return; + } + + // If the initializer list is transparent, there's nothing to do. + if (S->isSemanticForm() && S->isTransparent()) + return; + + RecordStorageLocation &Loc = Env.getResultObjectLocation(*S); + + // Initialization of base classes and fields of record type happens when we + // visit the nested `CXXConstructExpr` or `InitListExpr` for that base class + // or field. We therefore only need to deal with fields of non-record type + // here. + + RecordInitListHelper InitListHelper(S); + + for (auto [Field, Init] : InitListHelper.field_inits()) { + if (Field->getType()->isRecordType()) + continue; + if (Field->getType()->isReferenceType()) { + assert(Field->getType().getCanonicalType()->getPointeeType() == + Init->getType().getCanonicalType()); + Loc.setChild(*Field, &Env.createObject(Field->getType(), Init)); + continue; + } + assert(Field->getType().getCanonicalType().getUnqualifiedType() == + Init->getType().getCanonicalType().getUnqualifiedType()); + StorageLocation *FieldLoc = Loc.getChild(*Field); + // Locations for non-reference fields must always be non-null. + assert(FieldLoc != nullptr); + Value *Val = Env.getValue(*Init); + if (Val == nullptr && isa<ImplicitValueInitExpr>(Init) && + Init->getType()->isPointerType()) + Val = + &Env.getOrCreateNullPointerValue(Init->getType()->getPointeeType()); + if (Val == nullptr) + Val = Env.createValue(Field->getType()); + if (Val != nullptr) + Env.setValue(*FieldLoc, *Val); + } + + for (const auto &[FieldName, FieldLoc] : Loc.synthetic_fields()) { + QualType FieldType = FieldLoc->getType(); + if (FieldType->isRecordType()) { + Env.initializeFieldsWithValues(*cast<RecordStorageLocation>(FieldLoc)); + } else { + if (Value *Val = Env.createValue(FieldType)) + Env.setValue(*FieldLoc, *Val); + } + } + + // FIXME: Implement array initialization. + } + + void VisitCXXBoolLiteralExpr(const CXXBoolLiteralExpr *S) { + Env.setValue(*S, Env.getBoolLiteralValue(S->getValue())); + } + + void VisitIntegerLiteral(const IntegerLiteral *S) { + Env.setValue(*S, Env.getIntLiteralValue(S->getValue())); + } + + void VisitParenExpr(const ParenExpr *S) { + // The CFG does not contain `ParenExpr` as top-level statements in basic + // blocks, however manual traversal to sub-expressions may encounter them. + // Redirect to the sub-expression. + auto *SubExpr = S->getSubExpr(); + assert(SubExpr != nullptr); + Visit(SubExpr); + } + + void VisitExprWithCleanups(const ExprWithCleanups *S) { + // The CFG does not contain `ExprWithCleanups` as top-level statements in + // basic blocks, however manual traversal to sub-expressions may encounter + // them. Redirect to the sub-expression. + auto *SubExpr = S->getSubExpr(); + assert(SubExpr != nullptr); + Visit(SubExpr); + } + +private: + /// Returns the value for the sub-expression `SubExpr` of a logic operator. + BoolValue &getLogicOperatorSubExprValue(const Expr &SubExpr) { + // `SubExpr` and its parent logic operator might be part of different basic + // blocks. We try to access the value that is assigned to `SubExpr` in the + // corresponding environment. + if (const Environment *SubExprEnv = StmtToEnv.getEnvironment(SubExpr)) + if (auto *Val = + dyn_cast_or_null<BoolValue>(SubExprEnv->getValue(SubExpr))) + return *Val; + + // The sub-expression may lie within a basic block that isn't reachable, + // even if we need it to evaluate the current (reachable) expression + // (see https://discourse.llvm.org/t/70775). In this case, visit `SubExpr` + // within the current environment and then try to get the value that gets + // assigned to it. + if (Env.getValue(SubExpr) == nullptr) + Visit(&SubExpr); + if (auto *Val = dyn_cast_or_null<BoolValue>(Env.getValue(SubExpr))) + return *Val; + + // If the value of `SubExpr` is still unknown, we create a fresh symbolic + // boolean value for it. + return Env.makeAtomicBoolValue(); + } + + // If context sensitivity is enabled, try to analyze the body of the callee + // `F` of `S`. The type `E` must be either `CallExpr` or `CXXConstructExpr`. + template <typename E> + void transferInlineCall(const E *S, const FunctionDecl *F) { + const auto &Options = Env.getDataflowAnalysisContext().getOptions(); + if (!(Options.ContextSensitiveOpts && + Env.canDescend(Options.ContextSensitiveOpts->Depth, F))) + return; + + const AdornedCFG *ACFG = Env.getDataflowAnalysisContext().getAdornedCFG(F); + if (!ACFG) + return; + + // FIXME: We don't support context-sensitive analysis of recursion, so + // we should return early here if `F` is the same as the `FunctionDecl` + // holding `S` itself. + + auto ExitBlock = ACFG->getCFG().getExit().getBlockID(); + + auto CalleeEnv = Env.pushCall(S); + + // FIXME: Use the same analysis as the caller for the callee. Note, + // though, that doing so would require support for changing the analysis's + // ASTContext. + auto Analysis = NoopAnalysis(ACFG->getDecl().getASTContext(), + DataflowAnalysisOptions{Options}); + + auto BlockToOutputState = + dataflow::runDataflowAnalysis(*ACFG, Analysis, CalleeEnv); + assert(BlockToOutputState); + assert(ExitBlock < BlockToOutputState->size()); + + auto &ExitState = (*BlockToOutputState)[ExitBlock]; + assert(ExitState); + + Env.popCall(S, ExitState->Env); + } + + const StmtToEnvMap &StmtToEnv; + Environment &Env; + Environment::ValueModel &Model; +}; + +} // namespace + +void transfer(const StmtToEnvMap &StmtToEnv, const Stmt &S, Environment &Env, + Environment::ValueModel &Model) { + TransferVisitor(StmtToEnv, Env, Model).Visit(&S); +} + +} // namespace dataflow +} // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp new file mode 100644 index 000000000000..200682faafd6 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -0,0 +1,571 @@ +//===- TypeErasedDataflowAnalysis.cpp -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines type-erased base types and functions for building dataflow +// analyses that run over Control-Flow Graphs (CFGs). +// +//===----------------------------------------------------------------------===// + +#include <optional> +#include <system_error> +#include <utility> +#include <vector> + +#include "clang/AST/ASTDumper.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/OperationKinds.h" +#include "clang/AST/StmtCXX.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/Analysis/Analyses/PostOrderCFGView.h" +#include "clang/Analysis/CFG.h" +#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" +#include "clang/Analysis/FlowSensitive/DataflowLattice.h" +#include "clang/Analysis/FlowSensitive/DataflowWorklist.h" +#include "clang/Analysis/FlowSensitive/RecordOps.h" +#include "clang/Analysis/FlowSensitive/Transfer.h" +#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" +#include "clang/Analysis/FlowSensitive/Value.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" + +#define DEBUG_TYPE "clang-dataflow" + +namespace clang { +namespace dataflow { + +/// Returns the index of `Block` in the successors of `Pred`. +static int blockIndexInPredecessor(const CFGBlock &Pred, + const CFGBlock &Block) { + auto BlockPos = llvm::find_if( + Pred.succs(), [&Block](const CFGBlock::AdjacentBlock &Succ) { + return Succ && Succ->getBlockID() == Block.getBlockID(); + }); + return BlockPos - Pred.succ_begin(); +} + +// A "backedge" node is a block introduced in the CFG exclusively to indicate a +// loop backedge. They are exactly identified by the presence of a non-null +// pointer to the entry block of the loop condition. Note that this is not +// necessarily the block with the loop statement as terminator, because +// short-circuit operators will result in multiple blocks encoding the loop +// condition, only one of which will contain the loop statement as terminator. +static bool isBackedgeNode(const CFGBlock &B) { + return B.getLoopTarget() != nullptr; +} + +namespace { + +/// Extracts the terminator's condition expression. +class TerminatorVisitor + : public ConstStmtVisitor<TerminatorVisitor, const Expr *> { +public: + TerminatorVisitor() = default; + const Expr *VisitIfStmt(const IfStmt *S) { return S->getCond(); } + const Expr *VisitWhileStmt(const WhileStmt *S) { return S->getCond(); } + const Expr *VisitDoStmt(const DoStmt *S) { return S->getCond(); } + const Expr *VisitForStmt(const ForStmt *S) { return S->getCond(); } + const Expr *VisitCXXForRangeStmt(const CXXForRangeStmt *) { + // Don't do anything special for CXXForRangeStmt, because the condition + // (being implicitly generated) isn't visible from the loop body. + return nullptr; + } + const Expr *VisitBinaryOperator(const BinaryOperator *S) { + assert(S->getOpcode() == BO_LAnd || S->getOpcode() == BO_LOr); + return S->getLHS(); + } + const Expr *VisitConditionalOperator(const ConditionalOperator *S) { + return S->getCond(); + } +}; + +/// Holds data structures required for running dataflow analysis. +struct AnalysisContext { + AnalysisContext(const AdornedCFG &ACFG, TypeErasedDataflowAnalysis &Analysis, + const Environment &InitEnv, + llvm::ArrayRef<std::optional<TypeErasedDataflowAnalysisState>> + BlockStates) + : ACFG(ACFG), Analysis(Analysis), InitEnv(InitEnv), + Log(*InitEnv.getDataflowAnalysisContext().getOptions().Log), + BlockStates(BlockStates) { + Log.beginAnalysis(ACFG, Analysis); + } + ~AnalysisContext() { Log.endAnalysis(); } + + /// Contains the CFG being analyzed. + const AdornedCFG &ACFG; + /// The analysis to be run. + TypeErasedDataflowAnalysis &Analysis; + /// Initial state to start the analysis. + const Environment &InitEnv; + Logger &Log; + /// Stores the state of a CFG block if it has been evaluated by the analysis. + /// The indices correspond to the block IDs. + llvm::ArrayRef<std::optional<TypeErasedDataflowAnalysisState>> BlockStates; +}; + +class PrettyStackTraceAnalysis : public llvm::PrettyStackTraceEntry { +public: + PrettyStackTraceAnalysis(const AdornedCFG &ACFG, const char *Message) + : ACFG(ACFG), Message(Message) {} + + void print(raw_ostream &OS) const override { + OS << Message << "\n"; + OS << "Decl:\n"; + ACFG.getDecl().dump(OS); + OS << "CFG:\n"; + ACFG.getCFG().print(OS, LangOptions(), false); + } + +private: + const AdornedCFG &ACFG; + const char *Message; +}; + +class PrettyStackTraceCFGElement : public llvm::PrettyStackTraceEntry { +public: + PrettyStackTraceCFGElement(const CFGElement &Element, int BlockIdx, + int ElementIdx, const char *Message) + : Element(Element), BlockIdx(BlockIdx), ElementIdx(ElementIdx), + Message(Message) {} + + void print(raw_ostream &OS) const override { + OS << Message << ": Element [B" << BlockIdx << "." << ElementIdx << "]\n"; + if (auto Stmt = Element.getAs<CFGStmt>()) { + OS << "Stmt:\n"; + ASTDumper Dumper(OS, false); + Dumper.Visit(Stmt->getStmt()); + } + } + +private: + const CFGElement ∈ + int BlockIdx; + int ElementIdx; + const char *Message; +}; + +// Builds a joined TypeErasedDataflowAnalysisState from 0 or more sources, +// each of which may be owned (built as part of the join) or external (a +// reference to an Environment that will outlive the builder). +// Avoids unneccesary copies of the environment. +class JoinedStateBuilder { + AnalysisContext &AC; + Environment::ExprJoinBehavior JoinBehavior; + std::vector<const TypeErasedDataflowAnalysisState *> All; + std::deque<TypeErasedDataflowAnalysisState> Owned; + + TypeErasedDataflowAnalysisState + join(const TypeErasedDataflowAnalysisState &L, + const TypeErasedDataflowAnalysisState &R) { + return {AC.Analysis.joinTypeErased(L.Lattice, R.Lattice), + Environment::join(L.Env, R.Env, AC.Analysis, JoinBehavior)}; + } + +public: + JoinedStateBuilder(AnalysisContext &AC, + Environment::ExprJoinBehavior JoinBehavior) + : AC(AC), JoinBehavior(JoinBehavior) {} + + void addOwned(TypeErasedDataflowAnalysisState State) { + Owned.push_back(std::move(State)); + All.push_back(&Owned.back()); + } + void addUnowned(const TypeErasedDataflowAnalysisState &State) { + All.push_back(&State); + } + TypeErasedDataflowAnalysisState take() && { + if (All.empty()) + // FIXME: Consider passing `Block` to Analysis.typeErasedInitialElement + // to enable building analyses like computation of dominators that + // initialize the state of each basic block differently. + return {AC.Analysis.typeErasedInitialElement(), AC.InitEnv.fork()}; + if (All.size() == 1) + // Join the environment with itself so that we discard expression state if + // desired. + // FIXME: We could consider writing special-case code for this that only + // does the discarding, but it's not clear if this is worth it. + return {All[0]->Lattice, Environment::join(All[0]->Env, All[0]->Env, + AC.Analysis, JoinBehavior)}; + + auto Result = join(*All[0], *All[1]); + for (unsigned I = 2; I < All.size(); ++I) + Result = join(Result, *All[I]); + return Result; + } +}; +} // namespace + +static const Expr *getTerminatorCondition(const Stmt *TerminatorStmt) { + return TerminatorStmt == nullptr ? nullptr + : TerminatorVisitor().Visit(TerminatorStmt); +} + +/// Computes the input state for a given basic block by joining the output +/// states of its predecessors. +/// +/// Requirements: +/// +/// All predecessors of `Block` except those with loop back edges must have +/// already been transferred. States in `AC.BlockStates` that are set to +/// `std::nullopt` represent basic blocks that are not evaluated yet. +static TypeErasedDataflowAnalysisState +computeBlockInputState(const CFGBlock &Block, AnalysisContext &AC) { + std::vector<const CFGBlock *> Preds(Block.pred_begin(), Block.pred_end()); + if (Block.getTerminator().isTemporaryDtorsBranch()) { + // This handles a special case where the code that produced the CFG includes + // a conditional operator with a branch that constructs a temporary and + // calls a destructor annotated as noreturn. The CFG models this as follows: + // + // B1 (contains the condition of the conditional operator) - succs: B2, B3 + // B2 (contains code that does not call a noreturn destructor) - succs: B4 + // B3 (contains code that calls a noreturn destructor) - succs: B4 + // B4 (has temporary destructor terminator) - succs: B5, B6 + // B5 (noreturn block that is associated with the noreturn destructor call) + // B6 (contains code that follows the conditional operator statement) + // + // The first successor (B5 above) of a basic block with a temporary + // destructor terminator (B4 above) is the block that evaluates the + // destructor. If that block has a noreturn element then the predecessor + // block that constructed the temporary object (B3 above) is effectively a + // noreturn block and its state should not be used as input for the state + // of the block that has a temporary destructor terminator (B4 above). This + // holds regardless of which branch of the ternary operator calls the + // noreturn destructor. However, it doesn't cases where a nested ternary + // operator includes a branch that contains a noreturn destructor call. + // + // See `NoreturnDestructorTest` for concrete examples. + if (Block.succ_begin()->getReachableBlock() != nullptr && + Block.succ_begin()->getReachableBlock()->hasNoReturnElement()) { + auto &StmtToBlock = AC.ACFG.getStmtToBlock(); + auto StmtBlock = StmtToBlock.find(Block.getTerminatorStmt()); + assert(StmtBlock != StmtToBlock.end()); + llvm::erase(Preds, StmtBlock->getSecond()); + } + } + + // If any of the predecessor blocks contains an expression consumed in a + // different block, we need to keep expression state. + // Note that in this case, we keep expression state for all predecessors, + // rather than only those predecessors that actually contain an expression + // consumed in a different block. While this is potentially suboptimal, it's + // actually likely, if we have control flow within a full expression, that + // all predecessors have expression state consumed in a different block. + Environment::ExprJoinBehavior JoinBehavior = Environment::DiscardExprState; + for (const CFGBlock *Pred : Preds) { + if (Pred && AC.ACFG.containsExprConsumedInDifferentBlock(*Pred)) { + JoinBehavior = Environment::KeepExprState; + break; + } + } + + JoinedStateBuilder Builder(AC, JoinBehavior); + for (const CFGBlock *Pred : Preds) { + // Skip if the `Block` is unreachable or control flow cannot get past it. + if (!Pred || Pred->hasNoReturnElement()) + continue; + + // Skip if `Pred` was not evaluated yet. This could happen if `Pred` has a + // loop back edge to `Block`. + const std::optional<TypeErasedDataflowAnalysisState> &MaybePredState = + AC.BlockStates[Pred->getBlockID()]; + if (!MaybePredState) + continue; + + const TypeErasedDataflowAnalysisState &PredState = *MaybePredState; + const Expr *Cond = getTerminatorCondition(Pred->getTerminatorStmt()); + if (Cond == nullptr) { + Builder.addUnowned(PredState); + continue; + } + + bool BranchVal = blockIndexInPredecessor(*Pred, Block) == 0; + + // `transferBranch` may need to mutate the environment to describe the + // dynamic effect of the terminator for a given branch. Copy now. + TypeErasedDataflowAnalysisState Copy = MaybePredState->fork(); + if (AC.Analysis.builtinOptions()) { + auto *CondVal = Copy.Env.get<BoolValue>(*Cond); + // In transferCFGBlock(), we ensure that we always have a `Value` + // for the terminator condition, so assert this. We consciously + // assert ourselves instead of asserting via `cast()` so that we get + // a more meaningful line number if the assertion fails. + assert(CondVal != nullptr); + BoolValue *AssertedVal = + BranchVal ? CondVal : &Copy.Env.makeNot(*CondVal); + Copy.Env.assume(AssertedVal->formula()); + } + AC.Analysis.transferBranchTypeErased(BranchVal, Cond, Copy.Lattice, + Copy.Env); + Builder.addOwned(std::move(Copy)); + } + return std::move(Builder).take(); +} + +/// Built-in transfer function for `CFGStmt`. +static void +builtinTransferStatement(unsigned CurBlockID, const CFGStmt &Elt, + TypeErasedDataflowAnalysisState &InputState, + AnalysisContext &AC) { + const Stmt *S = Elt.getStmt(); + assert(S != nullptr); + transfer(StmtToEnvMap(AC.ACFG, AC.BlockStates, CurBlockID, InputState), *S, + InputState.Env, AC.Analysis); +} + +/// Built-in transfer function for `CFGInitializer`. +static void +builtinTransferInitializer(const CFGInitializer &Elt, + TypeErasedDataflowAnalysisState &InputState) { + const CXXCtorInitializer *Init = Elt.getInitializer(); + assert(Init != nullptr); + + auto &Env = InputState.Env; + auto &ThisLoc = *Env.getThisPointeeStorageLocation(); + + if (!Init->isAnyMemberInitializer()) + // FIXME: Handle base initialization + return; + + auto *InitExpr = Init->getInit(); + assert(InitExpr != nullptr); + + const FieldDecl *Member = nullptr; + RecordStorageLocation *ParentLoc = &ThisLoc; + StorageLocation *MemberLoc = nullptr; + if (Init->isMemberInitializer()) { + Member = Init->getMember(); + MemberLoc = ThisLoc.getChild(*Member); + } else { + IndirectFieldDecl *IndirectField = Init->getIndirectMember(); + assert(IndirectField != nullptr); + MemberLoc = &ThisLoc; + for (const auto *I : IndirectField->chain()) { + Member = cast<FieldDecl>(I); + ParentLoc = cast<RecordStorageLocation>(MemberLoc); + MemberLoc = ParentLoc->getChild(*Member); + } + } + assert(Member != nullptr); + + // FIXME: Instead of these case distinctions, we would ideally want to be able + // to simply use `Environment::createObject()` here, the same way that we do + // this in `TransferVisitor::VisitInitListExpr()`. However, this would require + // us to be able to build a list of fields that we then use to initialize an + // `RecordStorageLocation` -- and the problem is that, when we get here, + // the `RecordStorageLocation` already exists. We should explore if there's + // anything that we can do to change this. + if (Member->getType()->isReferenceType()) { + auto *InitExprLoc = Env.getStorageLocation(*InitExpr); + if (InitExprLoc == nullptr) + return; + + ParentLoc->setChild(*Member, InitExprLoc); + // Record-type initializers construct themselves directly into the result + // object, so there is no need to handle them here. + } else if (!Member->getType()->isRecordType()) { + assert(MemberLoc != nullptr); + if (auto *InitExprVal = Env.getValue(*InitExpr)) + Env.setValue(*MemberLoc, *InitExprVal); + } +} + +static void builtinTransfer(unsigned CurBlockID, const CFGElement &Elt, + TypeErasedDataflowAnalysisState &State, + AnalysisContext &AC) { + switch (Elt.getKind()) { + case CFGElement::Statement: + builtinTransferStatement(CurBlockID, Elt.castAs<CFGStmt>(), State, AC); + break; + case CFGElement::Initializer: + builtinTransferInitializer(Elt.castAs<CFGInitializer>(), State); + break; + case CFGElement::LifetimeEnds: + // Removing declarations when their lifetime ends serves two purposes: + // - Eliminate unnecessary clutter from `Environment::DeclToLoc` + // - Allow us to assert that, when joining two `Environment`s, the two + // `DeclToLoc` maps never contain entries that map the same declaration to + // different storage locations. + if (const ValueDecl *VD = Elt.castAs<CFGLifetimeEnds>().getVarDecl()) + State.Env.removeDecl(*VD); + break; + default: + // FIXME: Evaluate other kinds of `CFGElement` + break; + } +} + +/// Transfers `State` by evaluating each element in the `Block` based on the +/// `AC.Analysis` specified. +/// +/// Built-in transfer functions (if the option for `ApplyBuiltinTransfer` is set +/// by the analysis) will be applied to the element before evaluation by the +/// user-specified analysis. +/// `PostVisitCFG` (if provided) will be applied to the element after evaluation +/// by the user-specified analysis. +static TypeErasedDataflowAnalysisState +transferCFGBlock(const CFGBlock &Block, AnalysisContext &AC, + const CFGEltCallbacksTypeErased &PostAnalysisCallbacks = {}) { + AC.Log.enterBlock(Block, PostAnalysisCallbacks.Before != nullptr || + PostAnalysisCallbacks.After != nullptr); + auto State = computeBlockInputState(Block, AC); + AC.Log.recordState(State); + int ElementIdx = 1; + for (const auto &Element : Block) { + PrettyStackTraceCFGElement CrashInfo(Element, Block.getBlockID(), + ElementIdx++, "transferCFGBlock"); + + AC.Log.enterElement(Element); + + if (PostAnalysisCallbacks.Before) { + PostAnalysisCallbacks.Before(Element, State); + } + + // Built-in analysis + if (AC.Analysis.builtinOptions()) { + builtinTransfer(Block.getBlockID(), Element, State, AC); + } + + // User-provided analysis + AC.Analysis.transferTypeErased(Element, State.Lattice, State.Env); + + if (PostAnalysisCallbacks.After) { + PostAnalysisCallbacks.After(Element, State); + } + + AC.Log.recordState(State); + } + + // If we have a terminator, evaluate its condition. + // This `Expr` may not appear as a `CFGElement` anywhere else, and it's + // important that we evaluate it here (rather than while processing the + // terminator) so that we put the corresponding value in the right + // environment. + if (const Expr *TerminatorCond = + dyn_cast_or_null<Expr>(Block.getTerminatorCondition())) { + if (State.Env.getValue(*TerminatorCond) == nullptr) + // FIXME: This only runs the builtin transfer, not the analysis-specific + // transfer. Fixing this isn't trivial, as the analysis-specific transfer + // takes a `CFGElement` as input, but some expressions only show up as a + // terminator condition, but not as a `CFGElement`. The condition of an if + // statement is one such example. + transfer(StmtToEnvMap(AC.ACFG, AC.BlockStates, Block.getBlockID(), State), + *TerminatorCond, State.Env, AC.Analysis); + + // If the transfer function didn't produce a value, create an atom so that + // we have *some* value for the condition expression. This ensures that + // when we extend the flow condition, it actually changes. + if (State.Env.getValue(*TerminatorCond) == nullptr) + State.Env.setValue(*TerminatorCond, State.Env.makeAtomicBoolValue()); + AC.Log.recordState(State); + } + + return State; +} + +llvm::Expected<std::vector<std::optional<TypeErasedDataflowAnalysisState>>> +runTypeErasedDataflowAnalysis( + const AdornedCFG &ACFG, TypeErasedDataflowAnalysis &Analysis, + const Environment &InitEnv, + const CFGEltCallbacksTypeErased &PostAnalysisCallbacks, + std::int32_t MaxBlockVisits) { + PrettyStackTraceAnalysis CrashInfo(ACFG, "runTypeErasedDataflowAnalysis"); + + std::optional<Environment> MaybeStartingEnv; + if (InitEnv.callStackSize() == 0) { + MaybeStartingEnv = InitEnv.fork(); + MaybeStartingEnv->initialize(); + } + const Environment &StartingEnv = + MaybeStartingEnv ? *MaybeStartingEnv : InitEnv; + + const clang::CFG &CFG = ACFG.getCFG(); + PostOrderCFGView POV(&CFG); + ForwardDataflowWorklist Worklist(CFG, &POV); + + std::vector<std::optional<TypeErasedDataflowAnalysisState>> BlockStates( + CFG.size()); + + // The entry basic block doesn't contain statements so it can be skipped. + const CFGBlock &Entry = CFG.getEntry(); + BlockStates[Entry.getBlockID()] = {Analysis.typeErasedInitialElement(), + StartingEnv.fork()}; + Worklist.enqueueSuccessors(&Entry); + + AnalysisContext AC(ACFG, Analysis, StartingEnv, BlockStates); + std::int32_t BlockVisits = 0; + while (const CFGBlock *Block = Worklist.dequeue()) { + LLVM_DEBUG(llvm::dbgs() + << "Processing Block " << Block->getBlockID() << "\n"); + if (++BlockVisits > MaxBlockVisits) { + return llvm::createStringError(std::errc::timed_out, + "maximum number of blocks processed"); + } + + const std::optional<TypeErasedDataflowAnalysisState> &OldBlockState = + BlockStates[Block->getBlockID()]; + TypeErasedDataflowAnalysisState NewBlockState = + transferCFGBlock(*Block, AC); + LLVM_DEBUG({ + llvm::errs() << "New Env:\n"; + NewBlockState.Env.dump(); + }); + + if (OldBlockState) { + LLVM_DEBUG({ + llvm::errs() << "Old Env:\n"; + OldBlockState->Env.dump(); + }); + if (isBackedgeNode(*Block)) { + LatticeJoinEffect Effect1 = Analysis.widenTypeErased( + NewBlockState.Lattice, OldBlockState->Lattice); + LatticeJoinEffect Effect2 = + NewBlockState.Env.widen(OldBlockState->Env, Analysis); + if (Effect1 == LatticeJoinEffect::Unchanged && + Effect2 == LatticeJoinEffect::Unchanged) { + // The state of `Block` didn't change from widening so there's no need + // to revisit its successors. + AC.Log.blockConverged(); + continue; + } + } else if (Analysis.isEqualTypeErased(OldBlockState->Lattice, + NewBlockState.Lattice) && + OldBlockState->Env.equivalentTo(NewBlockState.Env, Analysis)) { + // The state of `Block` didn't change after transfer so there's no need + // to revisit its successors. + AC.Log.blockConverged(); + continue; + } + } + + BlockStates[Block->getBlockID()] = std::move(NewBlockState); + + // Do not add unreachable successor blocks to `Worklist`. + if (Block->hasNoReturnElement()) + continue; + + Worklist.enqueueSuccessors(Block); + } + // FIXME: Consider evaluating unreachable basic blocks (those that have a + // state set to `std::nullopt` at this point) to also analyze dead code. + + if (PostAnalysisCallbacks.Before || PostAnalysisCallbacks.After) { + for (const CFGBlock *Block : ACFG.getCFG()) { + // Skip blocks that were not evaluated. + if (!BlockStates[Block->getBlockID()]) + continue; + transferCFGBlock(*Block, AC, PostAnalysisCallbacks); + } + } + + return std::move(BlockStates); +} + +} // namespace dataflow +} // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Value.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Value.cpp new file mode 100644 index 000000000000..d70e5a82ea23 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Value.cpp @@ -0,0 +1,60 @@ +//===-- Value.cpp -----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines support functions for the `Value` type. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/Value.h" +#include "clang/Analysis/FlowSensitive/DebugSupport.h" +#include "llvm/Support/Casting.h" + +namespace clang { +namespace dataflow { + +static bool areEquivalentIndirectionValues(const Value &Val1, + const Value &Val2) { + if (auto *IndVal1 = dyn_cast<PointerValue>(&Val1)) { + auto *IndVal2 = cast<PointerValue>(&Val2); + return &IndVal1->getPointeeLoc() == &IndVal2->getPointeeLoc(); + } + return false; +} + +bool areEquivalentValues(const Value &Val1, const Value &Val2) { + if (&Val1 == &Val2) + return true; + if (Val1.getKind() != Val2.getKind()) + return false; + // If values are distinct and have properties, we don't consider them equal, + // leaving equality up to the user model. + if (!Val1.properties().empty() || !Val2.properties().empty()) + return false; + if (isa<TopBoolValue>(&Val1)) + return true; + return areEquivalentIndirectionValues(Val1, Val2); +} + +raw_ostream &operator<<(raw_ostream &OS, const Value &Val) { + switch (Val.getKind()) { + case Value::Kind::Integer: + return OS << "Integer(@" << &Val << ")"; + case Value::Kind::Pointer: + return OS << "Pointer(" << &cast<PointerValue>(Val).getPointeeLoc() << ")"; + case Value::Kind::TopBool: + return OS << "TopBool(" << cast<TopBoolValue>(Val).getAtom() << ")"; + case Value::Kind::AtomicBool: + return OS << "AtomicBool(" << cast<AtomicBoolValue>(Val).getAtom() << ")"; + case Value::Kind::FormulaBool: + return OS << "FormulaBool(" << cast<FormulaBoolValue>(Val).formula() << ")"; + } + llvm_unreachable("Unknown clang::dataflow::Value::Kind enum"); +} + +} // namespace dataflow +} // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp new file mode 100644 index 000000000000..a39f0e0b29ad --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp @@ -0,0 +1,418 @@ +//===- WatchedLiteralsSolver.cpp --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a SAT solver implementation that can be used by dataflow +// analyses. +// +//===----------------------------------------------------------------------===// + +#include <cassert> +#include <vector> + +#include "clang/Analysis/FlowSensitive/CNFFormula.h" +#include "clang/Analysis/FlowSensitive/Formula.h" +#include "clang/Analysis/FlowSensitive/Solver.h" +#include "clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" + + +namespace clang { +namespace dataflow { + +namespace { + +class WatchedLiteralsSolverImpl { + /// Stores the variable identifier and Atom for atomic booleans in the + /// formula. + llvm::DenseMap<Variable, Atom> Atomics; + + /// A boolean formula in conjunctive normal form that the solver will attempt + /// to prove satisfiable. The formula will be modified in the process. + CNFFormula CNF; + + /// Maps literals (indices of the vector) to clause identifiers (elements of + /// the vector) that watch the respective literals. + /// + /// For a given clause, its watched literal is always its first literal in + /// `Clauses`. This invariant is maintained when watched literals change. + std::vector<ClauseID> WatchedHead; + + /// Maps clause identifiers (elements of the vector) to identifiers of other + /// clauses that watch the same literals, forming a set of linked lists. + /// + /// The element at index 0 stands for the identifier of the clause that + /// follows the null clause. It is set to 0 and isn't used. Identifiers of + /// clauses in the formula start from the element at index 1. + std::vector<ClauseID> NextWatched; + + /// The search for a satisfying assignment of the variables in `Formula` will + /// proceed in levels, starting from 1 and going up to `Formula.LargestVar` + /// (inclusive). The current level is stored in `Level`. At each level the + /// solver will assign a value to an unassigned variable. If this leads to a + /// consistent partial assignment, `Level` will be incremented. Otherwise, if + /// it results in a conflict, the solver will backtrack by decrementing + /// `Level` until it reaches the most recent level where a decision was made. + size_t Level = 0; + + /// Maps levels (indices of the vector) to variables (elements of the vector) + /// that are assigned values at the respective levels. + /// + /// The element at index 0 isn't used. Variables start from the element at + /// index 1. + std::vector<Variable> LevelVars; + + /// State of the solver at a particular level. + enum class State : uint8_t { + /// Indicates that the solver made a decision. + Decision = 0, + + /// Indicates that the solver made a forced move. + Forced = 1, + }; + + /// State of the solver at a particular level. It keeps track of previous + /// decisions that the solver can refer to when backtracking. + /// + /// The element at index 0 isn't used. States start from the element at index + /// 1. + std::vector<State> LevelStates; + + enum class Assignment : int8_t { + Unassigned = -1, + AssignedFalse = 0, + AssignedTrue = 1 + }; + + /// Maps variables (indices of the vector) to their assignments (elements of + /// the vector). + /// + /// The element at index 0 isn't used. Variable assignments start from the + /// element at index 1. + std::vector<Assignment> VarAssignments; + + /// A set of unassigned variables that appear in watched literals in + /// `Formula`. The vector is guaranteed to contain unique elements. + std::vector<Variable> ActiveVars; + +public: + explicit WatchedLiteralsSolverImpl( + const llvm::ArrayRef<const Formula *> &Vals) + // `Atomics` needs to be initialized first so that we can use it as an + // output argument of `buildCNF()`. + : Atomics(), CNF(buildCNF(Vals, Atomics)), + LevelVars(CNF.largestVar() + 1), LevelStates(CNF.largestVar() + 1) { + assert(!Vals.empty()); + + // Skip initialization if the formula is known to be contradictory. + if (CNF.knownContradictory()) + return; + + // Initialize `NextWatched` and `WatchedHead`. + NextWatched.push_back(0); + const size_t NumLiterals = 2 * CNF.largestVar() + 1; + WatchedHead.resize(NumLiterals + 1, 0); + for (ClauseID C = 1; C <= CNF.numClauses(); ++C) { + // Designate the first literal as the "watched" literal of the clause. + Literal FirstLit = CNF.clauseLiterals(C).front(); + NextWatched.push_back(WatchedHead[FirstLit]); + WatchedHead[FirstLit] = C; + } + + // Initialize the state at the root level to a decision so that in + // `reverseForcedMoves` we don't have to check that `Level >= 0` on each + // iteration. + LevelStates[0] = State::Decision; + + // Initialize all variables as unassigned. + VarAssignments.resize(CNF.largestVar() + 1, Assignment::Unassigned); + + // Initialize the active variables. + for (Variable Var = CNF.largestVar(); Var != NullVar; --Var) { + if (isWatched(posLit(Var)) || isWatched(negLit(Var))) + ActiveVars.push_back(Var); + } + } + + // Returns the `Result` and the number of iterations "remaining" from + // `MaxIterations` (that is, `MaxIterations` - iterations in this call). + std::pair<Solver::Result, std::int64_t> solve(std::int64_t MaxIterations) && { + if (CNF.knownContradictory()) { + // Short-cut the solving process. We already found out at CNF + // construction time that the formula is unsatisfiable. + return std::make_pair(Solver::Result::Unsatisfiable(), MaxIterations); + } + size_t I = 0; + while (I < ActiveVars.size()) { + if (MaxIterations == 0) + return std::make_pair(Solver::Result::TimedOut(), 0); + --MaxIterations; + + // Assert that the following invariants hold: + // 1. All active variables are unassigned. + // 2. All active variables form watched literals. + // 3. Unassigned variables that form watched literals are active. + // FIXME: Consider replacing these with test cases that fail if the any + // of the invariants is broken. That might not be easy due to the + // transformations performed by `buildCNF`. + assert(activeVarsAreUnassigned()); + assert(activeVarsFormWatchedLiterals()); + assert(unassignedVarsFormingWatchedLiteralsAreActive()); + + const Variable ActiveVar = ActiveVars[I]; + + // Look for unit clauses that contain the active variable. + const bool unitPosLit = watchedByUnitClause(posLit(ActiveVar)); + const bool unitNegLit = watchedByUnitClause(negLit(ActiveVar)); + if (unitPosLit && unitNegLit) { + // We found a conflict! + + // Backtrack and rewind the `Level` until the most recent non-forced + // assignment. + reverseForcedMoves(); + + // If the root level is reached, then all possible assignments lead to + // a conflict. + if (Level == 0) + return std::make_pair(Solver::Result::Unsatisfiable(), MaxIterations); + + // Otherwise, take the other branch at the most recent level where a + // decision was made. + LevelStates[Level] = State::Forced; + const Variable Var = LevelVars[Level]; + VarAssignments[Var] = VarAssignments[Var] == Assignment::AssignedTrue + ? Assignment::AssignedFalse + : Assignment::AssignedTrue; + + updateWatchedLiterals(); + } else if (unitPosLit || unitNegLit) { + // We found a unit clause! The value of its unassigned variable is + // forced. + ++Level; + + LevelVars[Level] = ActiveVar; + LevelStates[Level] = State::Forced; + VarAssignments[ActiveVar] = + unitPosLit ? Assignment::AssignedTrue : Assignment::AssignedFalse; + + // Remove the variable that was just assigned from the set of active + // variables. + if (I + 1 < ActiveVars.size()) { + // Replace the variable that was just assigned with the last active + // variable for efficient removal. + ActiveVars[I] = ActiveVars.back(); + } else { + // This was the last active variable. Repeat the process from the + // beginning. + I = 0; + } + ActiveVars.pop_back(); + + updateWatchedLiterals(); + } else if (I + 1 == ActiveVars.size()) { + // There are no remaining unit clauses in the formula! Make a decision + // for one of the active variables at the current level. + ++Level; + + LevelVars[Level] = ActiveVar; + LevelStates[Level] = State::Decision; + VarAssignments[ActiveVar] = decideAssignment(ActiveVar); + + // Remove the variable that was just assigned from the set of active + // variables. + ActiveVars.pop_back(); + + updateWatchedLiterals(); + + // This was the last active variable. Repeat the process from the + // beginning. + I = 0; + } else { + ++I; + } + } + return std::make_pair(Solver::Result::Satisfiable(buildSolution()), + MaxIterations); + } + +private: + /// Returns a satisfying truth assignment to the atoms in the boolean formula. + llvm::DenseMap<Atom, Solver::Result::Assignment> buildSolution() { + llvm::DenseMap<Atom, Solver::Result::Assignment> Solution; + for (auto &Atomic : Atomics) { + // A variable may have a definite true/false assignment, or it may be + // unassigned indicating its truth value does not affect the result of + // the formula. Unassigned variables are assigned to true as a default. + Solution[Atomic.second] = + VarAssignments[Atomic.first] == Assignment::AssignedFalse + ? Solver::Result::Assignment::AssignedFalse + : Solver::Result::Assignment::AssignedTrue; + } + return Solution; + } + + /// Reverses forced moves until the most recent level where a decision was + /// made on the assignment of a variable. + void reverseForcedMoves() { + for (; LevelStates[Level] == State::Forced; --Level) { + const Variable Var = LevelVars[Level]; + + VarAssignments[Var] = Assignment::Unassigned; + + // If the variable that we pass through is watched then we add it to the + // active variables. + if (isWatched(posLit(Var)) || isWatched(negLit(Var))) + ActiveVars.push_back(Var); + } + } + + /// Updates watched literals that are affected by a variable assignment. + void updateWatchedLiterals() { + const Variable Var = LevelVars[Level]; + + // Update the watched literals of clauses that currently watch the literal + // that falsifies `Var`. + const Literal FalseLit = VarAssignments[Var] == Assignment::AssignedTrue + ? negLit(Var) + : posLit(Var); + ClauseID FalseLitWatcher = WatchedHead[FalseLit]; + WatchedHead[FalseLit] = NullClause; + while (FalseLitWatcher != NullClause) { + const ClauseID NextFalseLitWatcher = NextWatched[FalseLitWatcher]; + + // Pick the first non-false literal as the new watched literal. + const CNFFormula::Iterator FalseLitWatcherStart = + CNF.startOfClause(FalseLitWatcher); + CNFFormula::Iterator NewWatchedLitIter = FalseLitWatcherStart.next(); + while (isCurrentlyFalse(*NewWatchedLitIter)) + ++NewWatchedLitIter; + const Literal NewWatchedLit = *NewWatchedLitIter; + const Variable NewWatchedLitVar = var(NewWatchedLit); + + // Swap the old watched literal for the new one in `FalseLitWatcher` to + // maintain the invariant that the watched literal is at the beginning of + // the clause. + *NewWatchedLitIter = FalseLit; + *FalseLitWatcherStart = NewWatchedLit; + + // If the new watched literal isn't watched by any other clause and its + // variable isn't assigned we need to add it to the active variables. + if (!isWatched(NewWatchedLit) && !isWatched(notLit(NewWatchedLit)) && + VarAssignments[NewWatchedLitVar] == Assignment::Unassigned) + ActiveVars.push_back(NewWatchedLitVar); + + NextWatched[FalseLitWatcher] = WatchedHead[NewWatchedLit]; + WatchedHead[NewWatchedLit] = FalseLitWatcher; + + // Go to the next clause that watches `FalseLit`. + FalseLitWatcher = NextFalseLitWatcher; + } + } + + /// Returns true if and only if one of the clauses that watch `Lit` is a unit + /// clause. + bool watchedByUnitClause(Literal Lit) const { + for (ClauseID LitWatcher = WatchedHead[Lit]; LitWatcher != NullClause; + LitWatcher = NextWatched[LitWatcher]) { + llvm::ArrayRef<Literal> Clause = CNF.clauseLiterals(LitWatcher); + + // Assert the invariant that the watched literal is always the first one + // in the clause. + // FIXME: Consider replacing this with a test case that fails if the + // invariant is broken by `updateWatchedLiterals`. That might not be easy + // due to the transformations performed by `buildCNF`. + assert(Clause.front() == Lit); + + if (isUnit(Clause)) + return true; + } + return false; + } + + /// Returns true if and only if `Clause` is a unit clause. + bool isUnit(llvm::ArrayRef<Literal> Clause) const { + return llvm::all_of(Clause.drop_front(), + [this](Literal L) { return isCurrentlyFalse(L); }); + } + + /// Returns true if and only if `Lit` evaluates to `false` in the current + /// partial assignment. + bool isCurrentlyFalse(Literal Lit) const { + return static_cast<int8_t>(VarAssignments[var(Lit)]) == + static_cast<int8_t>(Lit & 1); + } + + /// Returns true if and only if `Lit` is watched by a clause in `Formula`. + bool isWatched(Literal Lit) const { return WatchedHead[Lit] != NullClause; } + + /// Returns an assignment for an unassigned variable. + Assignment decideAssignment(Variable Var) const { + return !isWatched(posLit(Var)) || isWatched(negLit(Var)) + ? Assignment::AssignedFalse + : Assignment::AssignedTrue; + } + + /// Returns a set of all watched literals. + llvm::DenseSet<Literal> watchedLiterals() const { + llvm::DenseSet<Literal> WatchedLiterals; + for (Literal Lit = 2; Lit < WatchedHead.size(); Lit++) { + if (WatchedHead[Lit] == NullClause) + continue; + WatchedLiterals.insert(Lit); + } + return WatchedLiterals; + } + + /// Returns true if and only if all active variables are unassigned. + bool activeVarsAreUnassigned() const { + return llvm::all_of(ActiveVars, [this](Variable Var) { + return VarAssignments[Var] == Assignment::Unassigned; + }); + } + + /// Returns true if and only if all active variables form watched literals. + bool activeVarsFormWatchedLiterals() const { + const llvm::DenseSet<Literal> WatchedLiterals = watchedLiterals(); + return llvm::all_of(ActiveVars, [&WatchedLiterals](Variable Var) { + return WatchedLiterals.contains(posLit(Var)) || + WatchedLiterals.contains(negLit(Var)); + }); + } + + /// Returns true if and only if all unassigned variables that are forming + /// watched literals are active. + bool unassignedVarsFormingWatchedLiteralsAreActive() const { + const llvm::DenseSet<Variable> ActiveVarsSet(ActiveVars.begin(), + ActiveVars.end()); + for (Literal Lit : watchedLiterals()) { + const Variable Var = var(Lit); + if (VarAssignments[Var] != Assignment::Unassigned) + continue; + if (ActiveVarsSet.contains(Var)) + continue; + return false; + } + return true; + } +}; + +} // namespace + +Solver::Result +WatchedLiteralsSolver::solve(llvm::ArrayRef<const Formula *> Vals) { + if (Vals.empty()) + return Solver::Result::Satisfiable({{}}); + auto [Res, Iterations] = WatchedLiteralsSolverImpl(Vals).solve(MaxIterations); + MaxIterations = Iterations; + return Res; +} + +} // namespace dataflow +} // namespace clang |