aboutsummaryrefslogtreecommitdiff
path: root/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
blob: b3882c227eaffc40f05231df8f0a5648184713ad (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
//===- DependencyScanningFilesystem.h - clang-scan-deps fs ===---*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H
#define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H

#include "clang/Basic/LLVM.h"
#include "clang/Lex/DependencyDirectivesScanner.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/VirtualFileSystem.h"
#include <mutex>

namespace clang {
namespace tooling {
namespace dependencies {

using DependencyDirectivesTy =
    SmallVector<dependency_directives_scan::Directive, 20>;

/// Contents and directive tokens of a cached file entry. Single instance can
/// be shared between multiple entries.
struct CachedFileContents {
  CachedFileContents(std::unique_ptr<llvm::MemoryBuffer> Contents)
      : Original(std::move(Contents)), DepDirectives(nullptr) {}

  /// Owning storage for the original contents.
  std::unique_ptr<llvm::MemoryBuffer> Original;

  /// The mutex that must be locked before mutating directive tokens.
  std::mutex ValueLock;
  SmallVector<dependency_directives_scan::Token, 10> DepDirectiveTokens;
  /// Accessor to the directive tokens that's atomic to avoid data races.
  /// \p CachedFileContents has ownership of the pointer.
  std::atomic<const Optional<DependencyDirectivesTy> *> DepDirectives;

  ~CachedFileContents() { delete DepDirectives.load(); }
};

/// An in-memory representation of a file system entity that is of interest to
/// the dependency scanning filesystem.
///
/// It represents one of the following:
/// - opened file with contents and a stat value,
/// - opened file with contents, directive tokens and a stat value,
/// - directory entry with its stat value,
/// - filesystem error.
///
/// Single instance of this class can be shared across different filenames (e.g.
/// a regular file and a symlink). For this reason the status filename is empty
/// and is only materialized by \c EntryRef that knows the requested filename.
class CachedFileSystemEntry {
public:
  /// Creates an entry without contents: either a filesystem error or
  /// a directory with stat value.
  CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat)
      : MaybeStat(std::move(Stat)), Contents(nullptr) {
    clearStatName();
  }

  /// Creates an entry representing a file with contents.
  CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat,
                        CachedFileContents *Contents)
      : MaybeStat(std::move(Stat)), Contents(std::move(Contents)) {
    clearStatName();
  }

  /// \returns True if the entry is a filesystem error.
  bool isError() const { return !MaybeStat; }

  /// \returns True if the current entry represents a directory.
  bool isDirectory() const { return !isError() && MaybeStat->isDirectory(); }

  /// \returns Original contents of the file.
  StringRef getOriginalContents() const {
    assert(!isError() && "error");
    assert(!MaybeStat->isDirectory() && "not a file");
    assert(Contents && "contents not initialized");
    return Contents->Original->getBuffer();
  }

  /// \returns The scanned preprocessor directive tokens of the file that are
  /// used to speed up preprocessing, if available.
  Optional<ArrayRef<dependency_directives_scan::Directive>>
  getDirectiveTokens() const {
    assert(!isError() && "error");
    assert(!isDirectory() && "not a file");
    assert(Contents && "contents not initialized");
    if (auto *Directives = Contents->DepDirectives.load()) {
      if (Directives->has_value())
        return ArrayRef<dependency_directives_scan::Directive>(
            Directives->value());
    }
    return None;
  }

  /// \returns The error.
  std::error_code getError() const { return MaybeStat.getError(); }

  /// \returns The entry status with empty filename.
  llvm::vfs::Status getStatus() const {
    assert(!isError() && "error");
    assert(MaybeStat->getName().empty() && "stat name must be empty");
    return *MaybeStat;
  }

  /// \returns The unique ID of the entry.
  llvm::sys::fs::UniqueID getUniqueID() const {
    assert(!isError() && "error");
    return MaybeStat->getUniqueID();
  }

  /// \returns The data structure holding both contents and directive tokens.
  CachedFileContents *getCachedContents() const {
    assert(!isError() && "error");
    assert(!isDirectory() && "not a file");
    return Contents;
  }

private:
  void clearStatName() {
    if (MaybeStat)
      MaybeStat = llvm::vfs::Status::copyWithNewName(*MaybeStat, "");
  }

  /// Either the filesystem error or status of the entry.
  /// The filename is empty and only materialized by \c EntryRef.
  llvm::ErrorOr<llvm::vfs::Status> MaybeStat;

  /// Non-owning pointer to the file contents.
  ///
  /// We're using pointer here to keep the size of this class small. Instances
  /// representing directories and filesystem errors don't hold any contents
  /// anyway.
  CachedFileContents *Contents;
};

/// This class is a shared cache, that caches the 'stat' and 'open' calls to the
/// underlying real file system, and the scanned preprocessor directives of
/// files.
///
/// It is sharded based on the hash of the key to reduce the lock contention for
/// the worker threads.
class DependencyScanningFilesystemSharedCache {
public:
  struct CacheShard {
    /// The mutex that needs to be locked before mutation of any member.
    mutable std::mutex CacheLock;

    /// Map from filenames to cached entries.
    llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator>
        EntriesByFilename;

    /// Map from unique IDs to cached entries.
    llvm::DenseMap<llvm::sys::fs::UniqueID, const CachedFileSystemEntry *>
        EntriesByUID;

    /// The backing storage for cached entries.
    llvm::SpecificBumpPtrAllocator<CachedFileSystemEntry> EntryStorage;

    /// The backing storage for cached contents.
    llvm::SpecificBumpPtrAllocator<CachedFileContents> ContentsStorage;

    /// Returns entry associated with the filename or nullptr if none is found.
    const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const;

    /// Returns entry associated with the unique ID or nullptr if none is found.
    const CachedFileSystemEntry *
    findEntryByUID(llvm::sys::fs::UniqueID UID) const;

    /// Returns entry associated with the filename if there is some. Otherwise,
    /// constructs new one with the given status, associates it with the
    /// filename and returns the result.
    const CachedFileSystemEntry &
    getOrEmplaceEntryForFilename(StringRef Filename,
                                 llvm::ErrorOr<llvm::vfs::Status> Stat);

    /// Returns entry associated with the unique ID if there is some. Otherwise,
    /// constructs new one with the given status and contents, associates it
    /// with the unique ID and returns the result.
    const CachedFileSystemEntry &
    getOrEmplaceEntryForUID(llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat,
                            std::unique_ptr<llvm::MemoryBuffer> Contents);

    /// Returns entry associated with the filename if there is some. Otherwise,
    /// associates the given entry with the filename and returns it.
    const CachedFileSystemEntry &
    getOrInsertEntryForFilename(StringRef Filename,
                                const CachedFileSystemEntry &Entry);
  };

  DependencyScanningFilesystemSharedCache();

  /// Returns shard for the given key.
  CacheShard &getShardForFilename(StringRef Filename) const;
  CacheShard &getShardForUID(llvm::sys::fs::UniqueID UID) const;

private:
  std::unique_ptr<CacheShard[]> CacheShards;
  unsigned NumShards;
};

/// This class is a local cache, that caches the 'stat' and 'open' calls to the
/// underlying real file system.
class DependencyScanningFilesystemLocalCache {
  llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator> Cache;

public:
  /// Returns entry associated with the filename or nullptr if none is found.
  const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const {
    auto It = Cache.find(Filename);
    return It == Cache.end() ? nullptr : It->getValue();
  }

  /// Associates the given entry with the filename and returns the given entry
  /// pointer (for convenience).
  const CachedFileSystemEntry &
  insertEntryForFilename(StringRef Filename,
                         const CachedFileSystemEntry &Entry) {
    const auto *InsertedEntry = Cache.insert({Filename, &Entry}).first->second;
    assert(InsertedEntry == &Entry && "entry already present");
    return *InsertedEntry;
  }
};

/// Reference to a CachedFileSystemEntry.
/// If the underlying entry is an opened file, this wrapper returns the file
/// contents and the scanned preprocessor directives.
class EntryRef {
  /// The filename used to access this entry.
  std::string Filename;

  /// The underlying cached entry.
  const CachedFileSystemEntry &Entry;

public:
  EntryRef(StringRef Name, const CachedFileSystemEntry &Entry)
      : Filename(Name), Entry(Entry) {}

  llvm::vfs::Status getStatus() const {
    llvm::vfs::Status Stat = Entry.getStatus();
    if (!Stat.isDirectory())
      Stat = llvm::vfs::Status::copyWithNewSize(Stat, getContents().size());
    return llvm::vfs::Status::copyWithNewName(Stat, Filename);
  }

  bool isError() const { return Entry.isError(); }
  bool isDirectory() const { return Entry.isDirectory(); }

  /// If the cached entry represents an error, promotes it into `ErrorOr`.
  llvm::ErrorOr<EntryRef> unwrapError() const {
    if (isError())
      return Entry.getError();
    return *this;
  }

  StringRef getContents() const { return Entry.getOriginalContents(); }

  Optional<ArrayRef<dependency_directives_scan::Directive>>
  getDirectiveTokens() const {
    return Entry.getDirectiveTokens();
  }
};

/// A virtual file system optimized for the dependency discovery.
///
/// It is primarily designed to work with source files whose contents was was
/// preprocessed to remove any tokens that are unlikely to affect the dependency
/// computation.
///
/// This is not a thread safe VFS. A single instance is meant to be used only in
/// one thread. Multiple instances are allowed to service multiple threads
/// running in parallel.
class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem {
public:
  DependencyScanningWorkerFilesystem(
      DependencyScanningFilesystemSharedCache &SharedCache,
      IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS)
      : ProxyFileSystem(std::move(FS)), SharedCache(SharedCache) {}

  llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override;
  llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
  openFileForRead(const Twine &Path) override;

  /// Returns entry for the given filename.
  ///
  /// Attempts to use the local and shared caches first, then falls back to
  /// using the underlying filesystem.
  llvm::ErrorOr<EntryRef>
  getOrCreateFileSystemEntry(StringRef Filename,
                             bool DisableDirectivesScanning = false);

private:
  /// Check whether the file should be scanned for preprocessor directives.
  bool shouldScanForDirectives(StringRef Filename);

  /// For a filename that's not yet associated with any entry in the caches,
  /// uses the underlying filesystem to either look up the entry based in the
  /// shared cache indexed by unique ID, or creates new entry from scratch.
  llvm::ErrorOr<const CachedFileSystemEntry &>
  computeAndStoreResult(StringRef Filename);

  /// Scan for preprocessor directives for the given entry if necessary and
  /// returns a wrapper object with reference semantics.
  EntryRef scanForDirectivesIfNecessary(const CachedFileSystemEntry &Entry,
                                        StringRef Filename, bool Disable);

  /// Represents a filesystem entry that has been stat-ed (and potentially read)
  /// and that's about to be inserted into the cache as `CachedFileSystemEntry`.
  struct TentativeEntry {
    llvm::vfs::Status Status;
    std::unique_ptr<llvm::MemoryBuffer> Contents;

    TentativeEntry(llvm::vfs::Status Status,
                   std::unique_ptr<llvm::MemoryBuffer> Contents = nullptr)
        : Status(std::move(Status)), Contents(std::move(Contents)) {}
  };

  /// Reads file at the given path. Enforces consistency between the file size
  /// in status and size of read contents.
  llvm::ErrorOr<TentativeEntry> readFile(StringRef Filename);

  /// Returns entry associated with the unique ID of the given tentative entry
  /// if there is some in the shared cache. Otherwise, constructs new one,
  /// associates it with the unique ID and returns the result.
  const CachedFileSystemEntry &
  getOrEmplaceSharedEntryForUID(TentativeEntry TEntry);

  /// Returns entry associated with the filename or nullptr if none is found.
  ///
  /// Returns entry from local cache if there is some. Otherwise, if the entry
  /// is found in the shared cache, writes it through the local cache and
  /// returns it. Otherwise returns nullptr.
  const CachedFileSystemEntry *
  findEntryByFilenameWithWriteThrough(StringRef Filename);

  /// Returns entry associated with the unique ID in the shared cache or nullptr
  /// if none is found.
  const CachedFileSystemEntry *
  findSharedEntryByUID(llvm::vfs::Status Stat) const {
    return SharedCache.getShardForUID(Stat.getUniqueID())
        .findEntryByUID(Stat.getUniqueID());
  }

  /// Associates the given entry with the filename in the local cache and
  /// returns it.
  const CachedFileSystemEntry &
  insertLocalEntryForFilename(StringRef Filename,
                              const CachedFileSystemEntry &Entry) {
    return LocalCache.insertEntryForFilename(Filename, Entry);
  }

  /// Returns entry associated with the filename in the shared cache if there is
  /// some. Otherwise, constructs new one with the given error code, associates
  /// it with the filename and returns the result.
  const CachedFileSystemEntry &
  getOrEmplaceSharedEntryForFilename(StringRef Filename, std::error_code EC) {
    return SharedCache.getShardForFilename(Filename)
        .getOrEmplaceEntryForFilename(Filename, EC);
  }

  /// Returns entry associated with the filename in the shared cache if there is
  /// some. Otherwise, associates the given entry with the filename and returns
  /// it.
  const CachedFileSystemEntry &
  getOrInsertSharedEntryForFilename(StringRef Filename,
                                    const CachedFileSystemEntry &Entry) {
    return SharedCache.getShardForFilename(Filename)
        .getOrInsertEntryForFilename(Filename, Entry);
  }

  /// The global cache shared between worker threads.
  DependencyScanningFilesystemSharedCache &SharedCache;
  /// The local cache is used by the worker thread to cache file system queries
  /// locally instead of querying the global cache every time.
  DependencyScanningFilesystemLocalCache LocalCache;
};

} // end namespace dependencies
} // end namespace tooling
} // end namespace clang

#endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H