aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/clang/lib/StaticAnalyzer/Checkers/Taint.cpp
blob: 6362c82b009d7284c9e72638ac6ca0b97d02597e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
//=== Taint.cpp - Taint tracking and basic propagation rules. ------*- C++ -*-//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Defines basic, non-domain-specific mechanisms for tracking tainted values.
//
//===----------------------------------------------------------------------===//

#include "clang/StaticAnalyzer/Checkers/Taint.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
#include <optional>

using namespace clang;
using namespace ento;
using namespace taint;

// Fully tainted symbols.
REGISTER_MAP_WITH_PROGRAMSTATE(TaintMap, SymbolRef, TaintTagType)

// Partially tainted symbols.
REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(TaintedSubRegions, const SubRegion *,
                                       TaintTagType)
REGISTER_MAP_WITH_PROGRAMSTATE(DerivedSymTaint, SymbolRef, TaintedSubRegions)

void taint::printTaint(ProgramStateRef State, raw_ostream &Out, const char *NL,
                       const char *Sep) {
  TaintMapTy TM = State->get<TaintMap>();

  if (!TM.isEmpty())
    Out << "Tainted symbols:" << NL;

  for (const auto &I : TM)
    Out << I.first << " : " << I.second << NL;
}

void taint::dumpTaint(ProgramStateRef State) {
  printTaint(State, llvm::errs());
}

ProgramStateRef taint::addTaint(ProgramStateRef State, const Stmt *S,
                                const LocationContext *LCtx,
                                TaintTagType Kind) {
  return addTaint(State, State->getSVal(S, LCtx), Kind);
}

ProgramStateRef taint::addTaint(ProgramStateRef State, SVal V,
                                TaintTagType Kind) {
  SymbolRef Sym = V.getAsSymbol();
  if (Sym)
    return addTaint(State, Sym, Kind);

  // If the SVal represents a structure, try to mass-taint all values within the
  // structure. For now it only works efficiently on lazy compound values that
  // were conjured during a conservative evaluation of a function - either as
  // return values of functions that return structures or arrays by value, or as
  // values of structures or arrays passed into the function by reference,
  // directly or through pointer aliasing. Such lazy compound values are
  // characterized by having exactly one binding in their captured store within
  // their parent region, which is a conjured symbol default-bound to the base
  // region of the parent region.
  if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) {
    if (std::optional<SVal> binding =
            State->getStateManager().getStoreManager().getDefaultBinding(
                *LCV)) {
      if (SymbolRef Sym = binding->getAsSymbol())
        return addPartialTaint(State, Sym, LCV->getRegion(), Kind);
    }
  }

  const MemRegion *R = V.getAsRegion();
  return addTaint(State, R, Kind);
}

ProgramStateRef taint::addTaint(ProgramStateRef State, const MemRegion *R,
                                TaintTagType Kind) {
  if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
    return addTaint(State, SR->getSymbol(), Kind);
  return State;
}

ProgramStateRef taint::addTaint(ProgramStateRef State, SymbolRef Sym,
                                TaintTagType Kind) {
  // If this is a symbol cast, remove the cast before adding the taint. Taint
  // is cast agnostic.
  while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
    Sym = SC->getOperand();

  ProgramStateRef NewState = State->set<TaintMap>(Sym, Kind);
  assert(NewState);
  return NewState;
}

ProgramStateRef taint::removeTaint(ProgramStateRef State, SVal V) {
  SymbolRef Sym = V.getAsSymbol();
  if (Sym)
    return removeTaint(State, Sym);

  const MemRegion *R = V.getAsRegion();
  return removeTaint(State, R);
}

ProgramStateRef taint::removeTaint(ProgramStateRef State, const MemRegion *R) {
  if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
    return removeTaint(State, SR->getSymbol());
  return State;
}

ProgramStateRef taint::removeTaint(ProgramStateRef State, SymbolRef Sym) {
  // If this is a symbol cast, remove the cast before adding the taint. Taint
  // is cast agnostic.
  while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
    Sym = SC->getOperand();

  ProgramStateRef NewState = State->remove<TaintMap>(Sym);
  assert(NewState);
  return NewState;
}

ProgramStateRef taint::addPartialTaint(ProgramStateRef State,
                                       SymbolRef ParentSym,
                                       const SubRegion *SubRegion,
                                       TaintTagType Kind) {
  // Ignore partial taint if the entire parent symbol is already tainted.
  if (const TaintTagType *T = State->get<TaintMap>(ParentSym))
    if (*T == Kind)
      return State;

  // Partial taint applies if only a portion of the symbol is tainted.
  if (SubRegion == SubRegion->getBaseRegion())
    return addTaint(State, ParentSym, Kind);

  const TaintedSubRegions *SavedRegs = State->get<DerivedSymTaint>(ParentSym);
  TaintedSubRegions::Factory &F = State->get_context<TaintedSubRegions>();
  TaintedSubRegions Regs = SavedRegs ? *SavedRegs : F.getEmptyMap();

  Regs = F.add(Regs, SubRegion, Kind);
  ProgramStateRef NewState = State->set<DerivedSymTaint>(ParentSym, Regs);
  assert(NewState);
  return NewState;
}

bool taint::isTainted(ProgramStateRef State, const Stmt *S,
                      const LocationContext *LCtx, TaintTagType Kind) {
  return !getTaintedSymbolsImpl(State, S, LCtx, Kind, /*ReturnFirstOnly=*/true)
              .empty();
}

bool taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) {
  return !getTaintedSymbolsImpl(State, V, Kind, /*ReturnFirstOnly=*/true)
              .empty();
}

bool taint::isTainted(ProgramStateRef State, const MemRegion *Reg,
                      TaintTagType K) {
  return !getTaintedSymbolsImpl(State, Reg, K, /*ReturnFirstOnly=*/true)
              .empty();
}

bool taint::isTainted(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind) {
  return !getTaintedSymbolsImpl(State, Sym, Kind, /*ReturnFirstOnly=*/true)
              .empty();
}

std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State,
                                                const Stmt *S,
                                                const LocationContext *LCtx,
                                                TaintTagType Kind) {
  return getTaintedSymbolsImpl(State, S, LCtx, Kind, /*ReturnFirstOnly=*/false);
}

std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State, SVal V,
                                                TaintTagType Kind) {
  return getTaintedSymbolsImpl(State, V, Kind, /*ReturnFirstOnly=*/false);
}

std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State,
                                                SymbolRef Sym,
                                                TaintTagType Kind) {
  return getTaintedSymbolsImpl(State, Sym, Kind, /*ReturnFirstOnly=*/false);
}

std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State,
                                                const MemRegion *Reg,
                                                TaintTagType Kind) {
  return getTaintedSymbolsImpl(State, Reg, Kind, /*ReturnFirstOnly=*/false);
}

std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State,
                                                    const Stmt *S,
                                                    const LocationContext *LCtx,
                                                    TaintTagType Kind,
                                                    bool returnFirstOnly) {
  SVal val = State->getSVal(S, LCtx);
  return getTaintedSymbolsImpl(State, val, Kind, returnFirstOnly);
}

std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State,
                                                    SVal V, TaintTagType Kind,
                                                    bool returnFirstOnly) {
  if (SymbolRef Sym = V.getAsSymbol())
    return getTaintedSymbolsImpl(State, Sym, Kind, returnFirstOnly);
  if (const MemRegion *Reg = V.getAsRegion())
    return getTaintedSymbolsImpl(State, Reg, Kind, returnFirstOnly);
  return {};
}

std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State,
                                                    const MemRegion *Reg,
                                                    TaintTagType K,
                                                    bool returnFirstOnly) {
  std::vector<SymbolRef> TaintedSymbols;
  if (!Reg)
    return TaintedSymbols;

  // Element region (array element) is tainted if the offset is tainted.
  if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg)) {
    std::vector<SymbolRef> TaintedIndex =
        getTaintedSymbolsImpl(State, ER->getIndex(), K, returnFirstOnly);
    llvm::append_range(TaintedSymbols, TaintedIndex);
    if (returnFirstOnly && !TaintedSymbols.empty())
      return TaintedSymbols; // return early if needed
  }

  // Symbolic region is tainted if the corresponding symbol is tainted.
  if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Reg)) {
    std::vector<SymbolRef> TaintedRegions =
        getTaintedSymbolsImpl(State, SR->getSymbol(), K, returnFirstOnly);
    llvm::append_range(TaintedSymbols, TaintedRegions);
    if (returnFirstOnly && !TaintedSymbols.empty())
      return TaintedSymbols; // return early if needed
  }

  // Any subregion (including Element and Symbolic regions) is tainted if its
  // super-region is tainted.
  if (const SubRegion *ER = dyn_cast<SubRegion>(Reg)) {
    std::vector<SymbolRef> TaintedSubRegions =
        getTaintedSymbolsImpl(State, ER->getSuperRegion(), K, returnFirstOnly);
    llvm::append_range(TaintedSymbols, TaintedSubRegions);
    if (returnFirstOnly && !TaintedSymbols.empty())
      return TaintedSymbols; // return early if needed
  }

  return TaintedSymbols;
}

std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State,
                                                    SymbolRef Sym,
                                                    TaintTagType Kind,
                                                    bool returnFirstOnly) {
  std::vector<SymbolRef> TaintedSymbols;
  if (!Sym)
    return TaintedSymbols;

  // Traverse all the symbols this symbol depends on to see if any are tainted.
  for (SymbolRef SubSym : Sym->symbols()) {
    if (!isa<SymbolData>(SubSym))
      continue;

    if (const TaintTagType *Tag = State->get<TaintMap>(SubSym)) {
      if (*Tag == Kind) {
        TaintedSymbols.push_back(SubSym);
        if (returnFirstOnly)
          return TaintedSymbols; // return early if needed
      }
    }

    if (const auto *SD = dyn_cast<SymbolDerived>(SubSym)) {
      // If this is a SymbolDerived with a tainted parent, it's also tainted.
      std::vector<SymbolRef> TaintedParents = getTaintedSymbolsImpl(
          State, SD->getParentSymbol(), Kind, returnFirstOnly);
      llvm::append_range(TaintedSymbols, TaintedParents);
      if (returnFirstOnly && !TaintedSymbols.empty())
        return TaintedSymbols; // return early if needed

      // If this is a SymbolDerived with the same parent symbol as another
      // tainted SymbolDerived and a region that's a sub-region of that
      // tainted symbol, it's also tainted.
      if (const TaintedSubRegions *Regs =
              State->get<DerivedSymTaint>(SD->getParentSymbol())) {
        const TypedValueRegion *R = SD->getRegion();
        for (auto I : *Regs) {
          // FIXME: The logic to identify tainted regions could be more
          // complete. For example, this would not currently identify
          // overlapping fields in a union as tainted. To identify this we can
          // check for overlapping/nested byte offsets.
          if (Kind == I.second && R->isSubRegionOf(I.first)) {
            TaintedSymbols.push_back(SD->getParentSymbol());
            if (returnFirstOnly && !TaintedSymbols.empty())
              return TaintedSymbols; // return early if needed
          }
        }
      }
    }

    // If memory region is tainted, data is also tainted.
    if (const auto *SRV = dyn_cast<SymbolRegionValue>(SubSym)) {
      std::vector<SymbolRef> TaintedRegions =
          getTaintedSymbolsImpl(State, SRV->getRegion(), Kind, returnFirstOnly);
      llvm::append_range(TaintedSymbols, TaintedRegions);
      if (returnFirstOnly && !TaintedSymbols.empty())
        return TaintedSymbols; // return early if needed
    }

    // If this is a SymbolCast from a tainted value, it's also tainted.
    if (const auto *SC = dyn_cast<SymbolCast>(SubSym)) {
      std::vector<SymbolRef> TaintedCasts =
          getTaintedSymbolsImpl(State, SC->getOperand(), Kind, returnFirstOnly);
      llvm::append_range(TaintedSymbols, TaintedCasts);
      if (returnFirstOnly && !TaintedSymbols.empty())
        return TaintedSymbols; // return early if needed
    }
  }
  return TaintedSymbols;
}