diff options
Diffstat (limited to 'clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp')
-rw-r--r-- | clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp | 842 |
1 files changed, 701 insertions, 141 deletions
diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp index 9752a0e22832..cb6f61e86ae3 100644 --- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -16,6 +16,7 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" #include "clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/ImmutableSet.h" #include "llvm/Support/raw_ostream.h" @@ -23,10 +24,89 @@ using namespace clang; using namespace ento; +// This class can be extended with other tables which will help to reason +// about ranges more precisely. +class OperatorRelationsTable { + static_assert(BO_LT < BO_GT && BO_GT < BO_LE && BO_LE < BO_GE && + BO_GE < BO_EQ && BO_EQ < BO_NE, + "This class relies on operators order. Rework it otherwise."); + +public: + enum TriStateKind { + False = 0, + True, + Unknown, + }; + +private: + // CmpOpTable holds states which represent the corresponding range for + // branching an exploded graph. We can reason about the branch if there is + // a previously known fact of the existence of a comparison expression with + // operands used in the current expression. + // E.g. assuming (x < y) is true that means (x != y) is surely true. + // if (x previous_operation y) // < | != | > + // if (x operation y) // != | > | < + // tristate // True | Unknown | False + // + // CmpOpTable represents next: + // __|< |> |<=|>=|==|!=|UnknownX2| + // < |1 |0 |* |0 |0 |* |1 | + // > |0 |1 |0 |* |0 |* |1 | + // <=|1 |0 |1 |* |1 |* |0 | + // >=|0 |1 |* |1 |1 |* |0 | + // ==|0 |0 |* |* |1 |0 |1 | + // !=|1 |1 |* |* |0 |1 |0 | + // + // Columns stands for a previous operator. + // Rows stands for a current operator. + // Each row has exactly two `Unknown` cases. + // UnknownX2 means that both `Unknown` previous operators are met in code, + // and there is a special column for that, for example: + // if (x >= y) + // if (x != y) + // if (x <= y) + // False only + static constexpr size_t CmpOpCount = BO_NE - BO_LT + 1; + const TriStateKind CmpOpTable[CmpOpCount][CmpOpCount + 1] = { + // < > <= >= == != UnknownX2 + {True, False, Unknown, False, False, Unknown, True}, // < + {False, True, False, Unknown, False, Unknown, True}, // > + {True, False, True, Unknown, True, Unknown, False}, // <= + {False, True, Unknown, True, True, Unknown, False}, // >= + {False, False, Unknown, Unknown, True, False, True}, // == + {True, True, Unknown, Unknown, False, True, False}, // != + }; + + static size_t getIndexFromOp(BinaryOperatorKind OP) { + return static_cast<size_t>(OP - BO_LT); + } + +public: + constexpr size_t getCmpOpCount() const { return CmpOpCount; } + + static BinaryOperatorKind getOpFromIndex(size_t Index) { + return static_cast<BinaryOperatorKind>(Index + BO_LT); + } + + TriStateKind getCmpOpState(BinaryOperatorKind CurrentOP, + BinaryOperatorKind QueriedOP) const { + return CmpOpTable[getIndexFromOp(CurrentOP)][getIndexFromOp(QueriedOP)]; + } + + TriStateKind getCmpOpStateForUnknownX2(BinaryOperatorKind CurrentOP) const { + return CmpOpTable[getIndexFromOp(CurrentOP)][CmpOpCount]; + } +}; +//===----------------------------------------------------------------------===// +// RangeSet implementation +//===----------------------------------------------------------------------===// + void RangeSet::IntersectInRange(BasicValueFactory &BV, Factory &F, - const llvm::APSInt &Lower, const llvm::APSInt &Upper, - PrimRangeSet &newRanges, PrimRangeSet::iterator &i, - PrimRangeSet::iterator &e) const { + const llvm::APSInt &Lower, + const llvm::APSInt &Upper, + PrimRangeSet &newRanges, + PrimRangeSet::iterator &i, + PrimRangeSet::iterator &e) const { // There are six cases for each range R in the set: // 1. R is entirely before the intersection range. // 2. R is entirely after the intersection range. @@ -62,10 +142,27 @@ void RangeSet::IntersectInRange(BasicValueFactory &BV, Factory &F, const llvm::APSInt &RangeSet::getMinValue() const { assert(!isEmpty()); - return ranges.begin()->From(); + return begin()->From(); +} + +const llvm::APSInt &RangeSet::getMaxValue() const { + assert(!isEmpty()); + // NOTE: It's a shame that we can't implement 'getMaxValue' without scanning + // the whole tree to get to the last element. + // llvm::ImmutableSet should support decrement for 'end' iterators + // or reverse order iteration. + auto It = begin(); + for (auto End = end(); std::next(It) != End; ++It) { + } + return It->To(); } bool RangeSet::pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const { + if (isEmpty()) { + // This range is already infeasible. + return false; + } + // This function has nine cases, the cartesian product of range-testing // both the upper and lower bounds against the symbol's type. // Each case requires a different pinning operation. @@ -155,11 +252,11 @@ bool RangeSet::pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const { // or, alternatively, /removing/ all integers between Upper and Lower. RangeSet RangeSet::Intersect(BasicValueFactory &BV, Factory &F, llvm::APSInt Lower, llvm::APSInt Upper) const { - if (!pin(Lower, Upper)) - return F.getEmptySet(); - PrimRangeSet newRanges = F.getEmptySet(); + if (isEmpty() || !pin(Lower, Upper)) + return newRanges; + PrimRangeSet::iterator i = begin(), e = end(); if (Lower <= Upper) IntersectInRange(BV, F, Lower, Upper, newRanges, i, e); @@ -190,33 +287,78 @@ RangeSet RangeSet::Intersect(BasicValueFactory &BV, Factory &F, return newRanges; } -// Turn all [A, B] ranges to [-B, -A]. Ranges [MIN, B] are turned to range set -// [MIN, MIN] U [-B, MAX], when MIN and MAX are the minimal and the maximal -// signed values of the type. +// Turn all [A, B] ranges to [-B, -A], when "-" is a C-like unary minus +// operation under the values of the type. +// +// We also handle MIN because applying unary minus to MIN does not change it. +// Example 1: +// char x = -128; // -128 is a MIN value in a range of 'char' +// char y = -x; // y: -128 +// Example 2: +// unsigned char x = 0; // 0 is a MIN value in a range of 'unsigned char' +// unsigned char y = -x; // y: 0 +// +// And it makes us to separate the range +// like [MIN, N] to [MIN, MIN] U [-N,MAX]. +// For instance, whole range is {-128..127} and subrange is [-128,-126], +// thus [-128,-127,-126,.....] negates to [-128,.....,126,127]. +// +// Negate restores disrupted ranges on bounds, +// e.g. [MIN, B] => [MIN, MIN] U [-B, MAX] => [MIN, B]. RangeSet RangeSet::Negate(BasicValueFactory &BV, Factory &F) const { PrimRangeSet newRanges = F.getEmptySet(); - for (iterator i = begin(), e = end(); i != e; ++i) { - const llvm::APSInt &from = i->From(), &to = i->To(); - const llvm::APSInt &newTo = (from.isMinSignedValue() ? - BV.getMaxValue(from) : - BV.getValue(- from)); - if (to.isMaxSignedValue() && !newRanges.isEmpty() && - newRanges.begin()->From().isMinSignedValue()) { - assert(newRanges.begin()->To().isMinSignedValue() && - "Ranges should not overlap"); - assert(!from.isMinSignedValue() && "Ranges should not overlap"); - const llvm::APSInt &newFrom = newRanges.begin()->From(); - newRanges = - F.add(F.remove(newRanges, *newRanges.begin()), Range(newFrom, newTo)); - } else if (!to.isMinSignedValue()) { - const llvm::APSInt &newFrom = BV.getValue(- to); - newRanges = F.add(newRanges, Range(newFrom, newTo)); - } - if (from.isMinSignedValue()) { - newRanges = F.add(newRanges, Range(BV.getMinValue(from), - BV.getMinValue(from))); + if (isEmpty()) + return newRanges; + + const llvm::APSInt sampleValue = getMinValue(); + const llvm::APSInt &MIN = BV.getMinValue(sampleValue); + const llvm::APSInt &MAX = BV.getMaxValue(sampleValue); + + // Handle a special case for MIN value. + iterator i = begin(); + const llvm::APSInt &from = i->From(); + const llvm::APSInt &to = i->To(); + if (from == MIN) { + // If [from, to] are [MIN, MAX], then just return the same [MIN, MAX]. + if (to == MAX) { + newRanges = ranges; + } else { + // Add separate range for the lowest value. + newRanges = F.add(newRanges, Range(MIN, MIN)); + // Skip adding the second range in case when [from, to] are [MIN, MIN]. + if (to != MIN) { + newRanges = F.add(newRanges, Range(BV.getValue(-to), MAX)); + } } + // Skip the first range in the loop. + ++i; + } + + // Negate all other ranges. + for (iterator e = end(); i != e; ++i) { + // Negate int values. + const llvm::APSInt &newFrom = BV.getValue(-i->To()); + const llvm::APSInt &newTo = BV.getValue(-i->From()); + // Add a negated range. + newRanges = F.add(newRanges, Range(newFrom, newTo)); + } + + if (newRanges.isSingleton()) + return newRanges; + + // Try to find and unite next ranges: + // [MIN, MIN] & [MIN + 1, N] => [MIN, N]. + iterator iter1 = newRanges.begin(); + iterator iter2 = std::next(iter1); + + if (iter1->To() == MIN && (iter2->From() - 1) == MIN) { + const llvm::APSInt &to = iter2->To(); + // remove adjacent ranges + newRanges = F.remove(newRanges, *iter1); + newRanges = F.remove(newRanges, *newRanges.begin()); + // add united range + newRanges = F.add(newRanges, Range(MIN, to)); } return newRanges; @@ -238,10 +380,534 @@ void RangeSet::print(raw_ostream &os) const { } namespace { + +/// A little component aggregating all of the reasoning we have about +/// the ranges of symbolic expressions. +/// +/// Even when we don't know the exact values of the operands, we still +/// can get a pretty good estimate of the result's range. +class SymbolicRangeInferrer + : public SymExprVisitor<SymbolicRangeInferrer, RangeSet> { +public: + static RangeSet inferRange(BasicValueFactory &BV, RangeSet::Factory &F, + ProgramStateRef State, SymbolRef Sym) { + SymbolicRangeInferrer Inferrer(BV, F, State); + return Inferrer.infer(Sym); + } + + RangeSet VisitSymExpr(SymbolRef Sym) { + // If we got to this function, the actual type of the symbolic + // expression is not supported for advanced inference. + // In this case, we simply backoff to the default "let's simply + // infer the range from the expression's type". + return infer(Sym->getType()); + } + + RangeSet VisitSymIntExpr(const SymIntExpr *Sym) { + return VisitBinaryOperator(Sym); + } + + RangeSet VisitIntSymExpr(const IntSymExpr *Sym) { + return VisitBinaryOperator(Sym); + } + + RangeSet VisitSymSymExpr(const SymSymExpr *Sym) { + return VisitBinaryOperator(Sym); + } + +private: + SymbolicRangeInferrer(BasicValueFactory &BV, RangeSet::Factory &F, + ProgramStateRef S) + : ValueFactory(BV), RangeFactory(F), State(S) {} + + /// Infer range information from the given integer constant. + /// + /// It's not a real "inference", but is here for operating with + /// sub-expressions in a more polymorphic manner. + RangeSet inferAs(const llvm::APSInt &Val, QualType) { + return {RangeFactory, Val}; + } + + /// Infer range information from symbol in the context of the given type. + RangeSet inferAs(SymbolRef Sym, QualType DestType) { + QualType ActualType = Sym->getType(); + // Check that we can reason about the symbol at all. + if (ActualType->isIntegralOrEnumerationType() || + Loc::isLocType(ActualType)) { + return infer(Sym); + } + // Otherwise, let's simply infer from the destination type. + // We couldn't figure out nothing else about that expression. + return infer(DestType); + } + + RangeSet infer(SymbolRef Sym) { + const RangeSet *AssociatedRange = State->get<ConstraintRange>(Sym); + + // If Sym is a difference of symbols A - B, then maybe we have range set + // stored for B - A. + const RangeSet *RangeAssociatedWithNegatedSym = + getRangeForMinusSymbol(State, Sym); + + // If we have range set stored for both A - B and B - A then calculate the + // effective range set by intersecting the range set for A - B and the + // negated range set of B - A. + if (AssociatedRange && RangeAssociatedWithNegatedSym) + return AssociatedRange->Intersect( + ValueFactory, RangeFactory, + RangeAssociatedWithNegatedSym->Negate(ValueFactory, RangeFactory)); + + if (AssociatedRange) + return *AssociatedRange; + + if (RangeAssociatedWithNegatedSym) + return RangeAssociatedWithNegatedSym->Negate(ValueFactory, RangeFactory); + + // If Sym is a comparison expression (except <=>), + // find any other comparisons with the same operands. + // See function description. + const RangeSet CmpRangeSet = getRangeForComparisonSymbol(State, Sym); + if (!CmpRangeSet.isEmpty()) + return CmpRangeSet; + + return Visit(Sym); + } + + /// Infer range information solely from the type. + RangeSet infer(QualType T) { + // Lazily generate a new RangeSet representing all possible values for the + // given symbol type. + RangeSet Result(RangeFactory, ValueFactory.getMinValue(T), + ValueFactory.getMaxValue(T)); + + // References are known to be non-zero. + if (T->isReferenceType()) + return assumeNonZero(Result, T); + + return Result; + } + + template <class BinarySymExprTy> + RangeSet VisitBinaryOperator(const BinarySymExprTy *Sym) { + // TODO #1: VisitBinaryOperator implementation might not make a good + // use of the inferred ranges. In this case, we might be calculating + // everything for nothing. This being said, we should introduce some + // sort of laziness mechanism here. + // + // TODO #2: We didn't go into the nested expressions before, so it + // might cause us spending much more time doing the inference. + // This can be a problem for deeply nested expressions that are + // involved in conditions and get tested continuously. We definitely + // need to address this issue and introduce some sort of caching + // in here. + QualType ResultType = Sym->getType(); + return VisitBinaryOperator(inferAs(Sym->getLHS(), ResultType), + Sym->getOpcode(), + inferAs(Sym->getRHS(), ResultType), ResultType); + } + + RangeSet VisitBinaryOperator(RangeSet LHS, BinaryOperator::Opcode Op, + RangeSet RHS, QualType T) { + switch (Op) { + case BO_Or: + return VisitBinaryOperator<BO_Or>(LHS, RHS, T); + case BO_And: + return VisitBinaryOperator<BO_And>(LHS, RHS, T); + case BO_Rem: + return VisitBinaryOperator<BO_Rem>(LHS, RHS, T); + default: + return infer(T); + } + } + + //===----------------------------------------------------------------------===// + // Ranges and operators + //===----------------------------------------------------------------------===// + + /// Return a rough approximation of the given range set. + /// + /// For the range set: + /// { [x_0, y_0], [x_1, y_1], ... , [x_N, y_N] } + /// it will return the range [x_0, y_N]. + static Range fillGaps(RangeSet Origin) { + assert(!Origin.isEmpty()); + return {Origin.getMinValue(), Origin.getMaxValue()}; + } + + /// Try to convert given range into the given type. + /// + /// It will return llvm::None only when the trivial conversion is possible. + llvm::Optional<Range> convert(const Range &Origin, APSIntType To) { + if (To.testInRange(Origin.From(), false) != APSIntType::RTR_Within || + To.testInRange(Origin.To(), false) != APSIntType::RTR_Within) { + return llvm::None; + } + return Range(ValueFactory.Convert(To, Origin.From()), + ValueFactory.Convert(To, Origin.To())); + } + + template <BinaryOperator::Opcode Op> + RangeSet VisitBinaryOperator(RangeSet LHS, RangeSet RHS, QualType T) { + // We should propagate information about unfeasbility of one of the + // operands to the resulting range. + if (LHS.isEmpty() || RHS.isEmpty()) { + return RangeFactory.getEmptySet(); + } + + Range CoarseLHS = fillGaps(LHS); + Range CoarseRHS = fillGaps(RHS); + + APSIntType ResultType = ValueFactory.getAPSIntType(T); + + // We need to convert ranges to the resulting type, so we can compare values + // and combine them in a meaningful (in terms of the given operation) way. + auto ConvertedCoarseLHS = convert(CoarseLHS, ResultType); + auto ConvertedCoarseRHS = convert(CoarseRHS, ResultType); + + // It is hard to reason about ranges when conversion changes + // borders of the ranges. + if (!ConvertedCoarseLHS || !ConvertedCoarseRHS) { + return infer(T); + } + + return VisitBinaryOperator<Op>(*ConvertedCoarseLHS, *ConvertedCoarseRHS, T); + } + + template <BinaryOperator::Opcode Op> + RangeSet VisitBinaryOperator(Range LHS, Range RHS, QualType T) { + return infer(T); + } + + /// Return a symmetrical range for the given range and type. + /// + /// If T is signed, return the smallest range [-x..x] that covers the original + /// range, or [-min(T), max(T)] if the aforementioned symmetric range doesn't + /// exist due to original range covering min(T)). + /// + /// If T is unsigned, return the smallest range [0..x] that covers the + /// original range. + Range getSymmetricalRange(Range Origin, QualType T) { + APSIntType RangeType = ValueFactory.getAPSIntType(T); + + if (RangeType.isUnsigned()) { + return Range(ValueFactory.getMinValue(RangeType), Origin.To()); + } + + if (Origin.From().isMinSignedValue()) { + // If mini is a minimal signed value, absolute value of it is greater + // than the maximal signed value. In order to avoid these + // complications, we simply return the whole range. + return {ValueFactory.getMinValue(RangeType), + ValueFactory.getMaxValue(RangeType)}; + } + + // At this point, we are sure that the type is signed and we can safely + // use unary - operator. + // + // While calculating absolute maximum, we can use the following formula + // because of these reasons: + // * If From >= 0 then To >= From and To >= -From. + // AbsMax == To == max(To, -From) + // * If To <= 0 then -From >= -To and -From >= From. + // AbsMax == -From == max(-From, To) + // * Otherwise, From <= 0, To >= 0, and + // AbsMax == max(abs(From), abs(To)) + llvm::APSInt AbsMax = std::max(-Origin.From(), Origin.To()); + + // Intersection is guaranteed to be non-empty. + return {ValueFactory.getValue(-AbsMax), ValueFactory.getValue(AbsMax)}; + } + + /// Return a range set subtracting zero from \p Domain. + RangeSet assumeNonZero(RangeSet Domain, QualType T) { + APSIntType IntType = ValueFactory.getAPSIntType(T); + return Domain.Intersect(ValueFactory, RangeFactory, + ++IntType.getZeroValue(), --IntType.getZeroValue()); + } + + // FIXME: Once SValBuilder supports unary minus, we should use SValBuilder to + // obtain the negated symbolic expression instead of constructing the + // symbol manually. This will allow us to support finding ranges of not + // only negated SymSymExpr-type expressions, but also of other, simpler + // expressions which we currently do not know how to negate. + const RangeSet *getRangeForMinusSymbol(ProgramStateRef State, SymbolRef Sym) { + if (const SymSymExpr *SSE = dyn_cast<SymSymExpr>(Sym)) { + if (SSE->getOpcode() == BO_Sub) { + QualType T = Sym->getType(); + SymbolManager &SymMgr = State->getSymbolManager(); + SymbolRef negSym = + SymMgr.getSymSymExpr(SSE->getRHS(), BO_Sub, SSE->getLHS(), T); + + if (const RangeSet *negV = State->get<ConstraintRange>(negSym)) { + // Unsigned range set cannot be negated, unless it is [0, 0]. + if (T->isUnsignedIntegerOrEnumerationType() || + T->isSignedIntegerOrEnumerationType()) + return negV; + } + } + } + return nullptr; + } + + // Returns ranges only for binary comparison operators (except <=>) + // when left and right operands are symbolic values. + // Finds any other comparisons with the same operands. + // Then do logical calculations and refuse impossible branches. + // E.g. (x < y) and (x > y) at the same time are impossible. + // E.g. (x >= y) and (x != y) at the same time makes (x > y) true only. + // E.g. (x == y) and (y == x) are just reversed but the same. + // It covers all possible combinations (see CmpOpTable description). + // Note that `x` and `y` can also stand for subexpressions, + // not only for actual symbols. + RangeSet getRangeForComparisonSymbol(ProgramStateRef State, SymbolRef Sym) { + const RangeSet EmptyRangeSet = RangeFactory.getEmptySet(); + + auto SSE = dyn_cast<SymSymExpr>(Sym); + if (!SSE) + return EmptyRangeSet; + + BinaryOperatorKind CurrentOP = SSE->getOpcode(); + + // We currently do not support <=> (C++20). + if (!BinaryOperator::isComparisonOp(CurrentOP) || (CurrentOP == BO_Cmp)) + return EmptyRangeSet; + + static const OperatorRelationsTable CmpOpTable{}; + + const SymExpr *LHS = SSE->getLHS(); + const SymExpr *RHS = SSE->getRHS(); + QualType T = SSE->getType(); + + SymbolManager &SymMgr = State->getSymbolManager(); + const llvm::APSInt &Zero = ValueFactory.getValue(0, T); + const llvm::APSInt &One = ValueFactory.getValue(1, T); + const RangeSet TrueRangeSet(RangeFactory, One, One); + const RangeSet FalseRangeSet(RangeFactory, Zero, Zero); + + int UnknownStates = 0; + + // Loop goes through all of the columns exept the last one ('UnknownX2'). + // We treat `UnknownX2` column separately at the end of the loop body. + for (size_t i = 0; i < CmpOpTable.getCmpOpCount(); ++i) { + + // Let's find an expression e.g. (x < y). + BinaryOperatorKind QueriedOP = OperatorRelationsTable::getOpFromIndex(i); + const SymSymExpr *SymSym = SymMgr.getSymSymExpr(LHS, QueriedOP, RHS, T); + const RangeSet *QueriedRangeSet = State->get<ConstraintRange>(SymSym); + + // If ranges were not previously found, + // try to find a reversed expression (y > x). + if (!QueriedRangeSet) { + const BinaryOperatorKind ROP = + BinaryOperator::reverseComparisonOp(QueriedOP); + SymSym = SymMgr.getSymSymExpr(RHS, ROP, LHS, T); + QueriedRangeSet = State->get<ConstraintRange>(SymSym); + } + + if (!QueriedRangeSet || QueriedRangeSet->isEmpty()) + continue; + + const llvm::APSInt *ConcreteValue = QueriedRangeSet->getConcreteValue(); + const bool isInFalseBranch = + ConcreteValue ? (*ConcreteValue == 0) : false; + + // If it is a false branch, we shall be guided by opposite operator, + // because the table is made assuming we are in the true branch. + // E.g. when (x <= y) is false, then (x > y) is true. + if (isInFalseBranch) + QueriedOP = BinaryOperator::negateComparisonOp(QueriedOP); + + OperatorRelationsTable::TriStateKind BranchState = + CmpOpTable.getCmpOpState(CurrentOP, QueriedOP); + + if (BranchState == OperatorRelationsTable::Unknown) { + if (++UnknownStates == 2) + // If we met both Unknown states. + // if (x <= y) // assume true + // if (x != y) // assume true + // if (x < y) // would be also true + // Get a state from `UnknownX2` column. + BranchState = CmpOpTable.getCmpOpStateForUnknownX2(CurrentOP); + else + continue; + } + + return (BranchState == OperatorRelationsTable::True) ? TrueRangeSet + : FalseRangeSet; + } + + return EmptyRangeSet; + } + + BasicValueFactory &ValueFactory; + RangeSet::Factory &RangeFactory; + ProgramStateRef State; +}; + +template <> +RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Or>(Range LHS, Range RHS, + QualType T) { + APSIntType ResultType = ValueFactory.getAPSIntType(T); + llvm::APSInt Zero = ResultType.getZeroValue(); + + bool IsLHSPositiveOrZero = LHS.From() >= Zero; + bool IsRHSPositiveOrZero = RHS.From() >= Zero; + + bool IsLHSNegative = LHS.To() < Zero; + bool IsRHSNegative = RHS.To() < Zero; + + // Check if both ranges have the same sign. + if ((IsLHSPositiveOrZero && IsRHSPositiveOrZero) || + (IsLHSNegative && IsRHSNegative)) { + // The result is definitely greater or equal than any of the operands. + const llvm::APSInt &Min = std::max(LHS.From(), RHS.From()); + + // We estimate maximal value for positives as the maximal value for the + // given type. For negatives, we estimate it with -1 (e.g. 0x11111111). + // + // TODO: We basically, limit the resulting range from below, but don't do + // anything with the upper bound. + // + // For positive operands, it can be done as follows: for the upper + // bound of LHS and RHS we calculate the most significant bit set. + // Let's call it the N-th bit. Then we can estimate the maximal + // number to be 2^(N+1)-1, i.e. the number with all the bits up to + // the N-th bit set. + const llvm::APSInt &Max = IsLHSNegative + ? ValueFactory.getValue(--Zero) + : ValueFactory.getMaxValue(ResultType); + + return {RangeFactory, ValueFactory.getValue(Min), Max}; + } + + // Otherwise, let's check if at least one of the operands is negative. + if (IsLHSNegative || IsRHSNegative) { + // This means that the result is definitely negative as well. + return {RangeFactory, ValueFactory.getMinValue(ResultType), + ValueFactory.getValue(--Zero)}; + } + + RangeSet DefaultRange = infer(T); + + // It is pretty hard to reason about operands with different signs + // (and especially with possibly different signs). We simply check if it + // can be zero. In order to conclude that the result could not be zero, + // at least one of the operands should be definitely not zero itself. + if (!LHS.Includes(Zero) || !RHS.Includes(Zero)) { + return assumeNonZero(DefaultRange, T); + } + + // Nothing much else to do here. + return DefaultRange; +} + +template <> +RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_And>(Range LHS, + Range RHS, + QualType T) { + APSIntType ResultType = ValueFactory.getAPSIntType(T); + llvm::APSInt Zero = ResultType.getZeroValue(); + + bool IsLHSPositiveOrZero = LHS.From() >= Zero; + bool IsRHSPositiveOrZero = RHS.From() >= Zero; + + bool IsLHSNegative = LHS.To() < Zero; + bool IsRHSNegative = RHS.To() < Zero; + + // Check if both ranges have the same sign. + if ((IsLHSPositiveOrZero && IsRHSPositiveOrZero) || + (IsLHSNegative && IsRHSNegative)) { + // The result is definitely less or equal than any of the operands. + const llvm::APSInt &Max = std::min(LHS.To(), RHS.To()); + + // We conservatively estimate lower bound to be the smallest positive + // or negative value corresponding to the sign of the operands. + const llvm::APSInt &Min = IsLHSNegative + ? ValueFactory.getMinValue(ResultType) + : ValueFactory.getValue(Zero); + + return {RangeFactory, Min, Max}; + } + + // Otherwise, let's check if at least one of the operands is positive. + if (IsLHSPositiveOrZero || IsRHSPositiveOrZero) { + // This makes result definitely positive. + // + // We can also reason about a maximal value by finding the maximal + // value of the positive operand. + const llvm::APSInt &Max = IsLHSPositiveOrZero ? LHS.To() : RHS.To(); + + // The minimal value on the other hand is much harder to reason about. + // The only thing we know for sure is that the result is positive. + return {RangeFactory, ValueFactory.getValue(Zero), + ValueFactory.getValue(Max)}; + } + + // Nothing much else to do here. + return infer(T); +} + +template <> +RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Rem>(Range LHS, + Range RHS, + QualType T) { + llvm::APSInt Zero = ValueFactory.getAPSIntType(T).getZeroValue(); + + Range ConservativeRange = getSymmetricalRange(RHS, T); + + llvm::APSInt Max = ConservativeRange.To(); + llvm::APSInt Min = ConservativeRange.From(); + + if (Max == Zero) { + // It's an undefined behaviour to divide by 0 and it seems like we know + // for sure that RHS is 0. Let's say that the resulting range is + // simply infeasible for that matter. + return RangeFactory.getEmptySet(); + } + + // At this point, our conservative range is closed. The result, however, + // couldn't be greater than the RHS' maximal absolute value. Because of + // this reason, we turn the range into open (or half-open in case of + // unsigned integers). + // + // While we operate on integer values, an open interval (a, b) can be easily + // represented by the closed interval [a + 1, b - 1]. And this is exactly + // what we do next. + // + // If we are dealing with unsigned case, we shouldn't move the lower bound. + if (Min.isSigned()) { + ++Min; + } + --Max; + + bool IsLHSPositiveOrZero = LHS.From() >= Zero; + bool IsRHSPositiveOrZero = RHS.From() >= Zero; + + // Remainder operator results with negative operands is implementation + // defined. Positive cases are much easier to reason about though. + if (IsLHSPositiveOrZero && IsRHSPositiveOrZero) { + // If maximal value of LHS is less than maximal value of RHS, + // the result won't get greater than LHS.To(). + Max = std::min(LHS.To(), Max); + // We want to check if it is a situation similar to the following: + // + // <------------|---[ LHS ]--------[ RHS ]-----> + // -INF 0 +INF + // + // In this situation, we can conclude that (LHS / RHS) == 0 and + // (LHS % RHS) == LHS. + Min = LHS.To() < RHS.From() ? LHS.From() : Zero; + } + + // Nevertheless, the symmetrical range for RHS is a conservative estimate + // for any sign of either LHS, or RHS. + return {RangeFactory, ValueFactory.getValue(Min), ValueFactory.getValue(Max)}; +} + class RangeConstraintManager : public RangedConstraintManager { public: - RangeConstraintManager(SubEngine *SE, SValBuilder &SVB) - : RangedConstraintManager(SE, SVB) {} + RangeConstraintManager(ExprEngine *EE, SValBuilder &SVB) + : RangedConstraintManager(EE, SVB) {} //===------------------------------------------------------------------===// // Implementation for interface from ConstraintManager. @@ -305,8 +971,6 @@ private: RangeSet::Factory F; RangeSet getRange(ProgramStateRef State, SymbolRef Sym); - const RangeSet* getRangeForMinusSymbol(ProgramStateRef State, - SymbolRef Sym); RangeSet getSymLTRange(ProgramStateRef St, SymbolRef Sym, const llvm::APSInt &Int, @@ -323,13 +987,13 @@ private: RangeSet getSymGERange(ProgramStateRef St, SymbolRef Sym, const llvm::APSInt &Int, const llvm::APSInt &Adjustment); - }; } // end anonymous namespace std::unique_ptr<ConstraintManager> -ento::CreateRangeConstraintManager(ProgramStateManager &StMgr, SubEngine *Eng) { +ento::CreateRangeConstraintManager(ProgramStateManager &StMgr, + ExprEngine *Eng) { return std::make_unique<RangeConstraintManager>(Eng, StMgr.getSValBuilder()); } @@ -429,113 +1093,9 @@ RangeConstraintManager::removeDeadBindings(ProgramStateRef State, return Changed ? State->set<ConstraintRange>(CR) : State; } -/// Return a range set subtracting zero from \p Domain. -static RangeSet assumeNonZero( - BasicValueFactory &BV, - RangeSet::Factory &F, - SymbolRef Sym, - RangeSet Domain) { - APSIntType IntType = BV.getAPSIntType(Sym->getType()); - return Domain.Intersect(BV, F, ++IntType.getZeroValue(), - --IntType.getZeroValue()); -} - -/// Apply implicit constraints for bitwise OR- and AND-. -/// For unsigned types, bitwise OR with a constant always returns -/// a value greater-or-equal than the constant, and bitwise AND -/// returns a value less-or-equal then the constant. -/// -/// Pattern matches the expression \p Sym against those rule, -/// and applies the required constraints. -/// \p Input Previously established expression range set -static RangeSet applyBitwiseConstraints( - BasicValueFactory &BV, - RangeSet::Factory &F, - RangeSet Input, - const SymIntExpr* SIE) { - QualType T = SIE->getType(); - bool IsUnsigned = T->isUnsignedIntegerType(); - const llvm::APSInt &RHS = SIE->getRHS(); - const llvm::APSInt &Zero = BV.getAPSIntType(T).getZeroValue(); - BinaryOperator::Opcode Operator = SIE->getOpcode(); - - // For unsigned types, the output of bitwise-or is bigger-or-equal than RHS. - if (Operator == BO_Or && IsUnsigned) - return Input.Intersect(BV, F, RHS, BV.getMaxValue(T)); - - // Bitwise-or with a non-zero constant is always non-zero. - if (Operator == BO_Or && RHS != Zero) - return assumeNonZero(BV, F, SIE, Input); - - // For unsigned types, or positive RHS, - // bitwise-and output is always smaller-or-equal than RHS (assuming two's - // complement representation of signed types). - if (Operator == BO_And && (IsUnsigned || RHS >= Zero)) - return Input.Intersect(BV, F, BV.getMinValue(T), RHS); - - return Input; -} - RangeSet RangeConstraintManager::getRange(ProgramStateRef State, SymbolRef Sym) { - ConstraintRangeTy::data_type *V = State->get<ConstraintRange>(Sym); - - // If Sym is a difference of symbols A - B, then maybe we have range set - // stored for B - A. - BasicValueFactory &BV = getBasicVals(); - const RangeSet *R = getRangeForMinusSymbol(State, Sym); - - // If we have range set stored for both A - B and B - A then calculate the - // effective range set by intersecting the range set for A - B and the - // negated range set of B - A. - if (V && R) - return V->Intersect(BV, F, R->Negate(BV, F)); - if (V) - return *V; - if (R) - return R->Negate(BV, F); - - // Lazily generate a new RangeSet representing all possible values for the - // given symbol type. - QualType T = Sym->getType(); - - RangeSet Result(F, BV.getMinValue(T), BV.getMaxValue(T)); - - // References are known to be non-zero. - if (T->isReferenceType()) - return assumeNonZero(BV, F, Sym, Result); - - // Known constraints on ranges of bitwise expressions. - if (const SymIntExpr* SIE = dyn_cast<SymIntExpr>(Sym)) - return applyBitwiseConstraints(BV, F, Result, SIE); - - return Result; -} - -// FIXME: Once SValBuilder supports unary minus, we should use SValBuilder to -// obtain the negated symbolic expression instead of constructing the -// symbol manually. This will allow us to support finding ranges of not -// only negated SymSymExpr-type expressions, but also of other, simpler -// expressions which we currently do not know how to negate. -const RangeSet* -RangeConstraintManager::getRangeForMinusSymbol(ProgramStateRef State, - SymbolRef Sym) { - if (const SymSymExpr *SSE = dyn_cast<SymSymExpr>(Sym)) { - if (SSE->getOpcode() == BO_Sub) { - QualType T = Sym->getType(); - SymbolManager &SymMgr = State->getSymbolManager(); - SymbolRef negSym = SymMgr.getSymSymExpr(SSE->getRHS(), BO_Sub, - SSE->getLHS(), T); - if (const RangeSet *negV = State->get<ConstraintRange>(negSym)) { - // Unsigned range set cannot be negated, unless it is [0, 0]. - if ((negV->getConcreteValue() && - (*negV->getConcreteValue() == 0)) || - T->isSignedIntegerOrEnumerationType()) - return negV; - } - } - } - return nullptr; + return SymbolicRangeInferrer::inferRange(getBasicVals(), F, State, Sym); } //===------------------------------------------------------------------------=== |