aboutsummaryrefslogtreecommitdiff
path: root/lib/Rewrite/InclusionRewriter.cpp
blob: 3dfc3b00898725eef97a8a27f1aea72a8414b4ca (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
//===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This code rewrites include invocations into their expansions.  This gives you
// a file with all included files merged into it.
//
//===----------------------------------------------------------------------===//

#include "clang/Rewrite/Rewriters.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Frontend/PreprocessorOutputOptions.h"
#include "llvm/Support/raw_ostream.h"

using namespace clang;
using namespace llvm;

namespace {

class InclusionRewriter : public PPCallbacks {
  /// Information about which #includes were actually performed,
  /// created by preprocessor callbacks.
  struct FileChange {
    SourceLocation From;
    FileID Id;
    SrcMgr::CharacteristicKind FileType;
    FileChange(SourceLocation From) : From(From) {
    }
  };
  Preprocessor &PP; ///< Used to find inclusion directives.
  SourceManager &SM; ///< Used to read and manage source files.
  raw_ostream &OS; ///< The destination stream for rewritten contents.
  bool ShowLineMarkers; ///< Show #line markers.
  bool UseLineDirective; ///< Use of line directives or line markers.
  typedef std::map<unsigned, FileChange> FileChangeMap;
  FileChangeMap FileChanges; /// Tracks which files were included where.
  /// Used transitively for building up the FileChanges mapping over the
  /// various \c PPCallbacks callbacks.
  FileChangeMap::iterator LastInsertedFileChange;
public:
  InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers);
  bool Process(FileID FileId, SrcMgr::CharacteristicKind FileType);
private:
  virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
                           SrcMgr::CharacteristicKind FileType,
                           FileID PrevFID);
  virtual void FileSkipped(const FileEntry &ParentFile,
                           const Token &FilenameTok,
                           SrcMgr::CharacteristicKind FileType);
  virtual void InclusionDirective(SourceLocation HashLoc,
                                  const Token &IncludeTok,
                                  StringRef FileName,
                                  bool IsAngled,
                                  const FileEntry *File,
                                  SourceLocation EndLoc,
                                  StringRef SearchPath,
                                  StringRef RelativePath);
  void WriteLineInfo(const char *Filename, int Line,
                     SrcMgr::CharacteristicKind FileType,
                     StringRef EOL, StringRef Extra = StringRef());
  void OutputContentUpTo(const MemoryBuffer &FromFile,
                         unsigned &WriteFrom, unsigned WriteTo,
                         StringRef EOL, int &lines,
                         bool EnsureNewline = false);
  void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken,
                           const MemoryBuffer &FromFile, StringRef EOL,
                           unsigned &NextToWrite, int &Lines);
  const FileChange *FindFileChangeLocation(SourceLocation Loc) const;
  StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken);
};

}  // end anonymous namespace

/// Initializes an InclusionRewriter with a \p PP source and \p OS destination.
InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS,
                                     bool ShowLineMarkers)
    : PP(PP), SM(PP.getSourceManager()), OS(OS),
    ShowLineMarkers(ShowLineMarkers),
    LastInsertedFileChange(FileChanges.end()) {
  // If we're in microsoft mode, use normal #line instead of line markers.
  UseLineDirective = PP.getLangOpts().MicrosoftExt;
}

/// Write appropriate line information as either #line directives or GNU line
/// markers depending on what mode we're in, including the \p Filename and
/// \p Line we are located at, using the specified \p EOL line separator, and
/// any \p Extra context specifiers in GNU line directives.
void InclusionRewriter::WriteLineInfo(const char *Filename, int Line,
                                      SrcMgr::CharacteristicKind FileType,
                                      StringRef EOL, StringRef Extra) {
  if (!ShowLineMarkers)
    return;
  if (UseLineDirective) {
    OS << "#line" << ' ' << Line << ' ' << '"' << Filename << '"';
  } else {
    // Use GNU linemarkers as described here:
    // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
    OS << '#' << ' ' << Line << ' ' << '"' << Filename << '"';
    if (!Extra.empty())
      OS << Extra;
    if (FileType == SrcMgr::C_System)
      // "`3' This indicates that the following text comes from a system header
      // file, so certain warnings should be suppressed."
      OS << " 3";
    else if (FileType == SrcMgr::C_ExternCSystem)
      // as above for `3', plus "`4' This indicates that the following text
      // should be treated as being wrapped in an implicit extern "C" block."
      OS << " 3 4";
  }
  OS << EOL;
}

/// FileChanged - Whenever the preprocessor enters or exits a #include file
/// it invokes this handler.
void InclusionRewriter::FileChanged(SourceLocation Loc,
                                    FileChangeReason Reason,
                                    SrcMgr::CharacteristicKind NewFileType,
                                    FileID) {
  if (Reason != EnterFile)
    return;
  if (LastInsertedFileChange == FileChanges.end())
    // we didn't reach this file (eg: the main file) via an inclusion directive
    return;
  LastInsertedFileChange->second.Id = FullSourceLoc(Loc, SM).getFileID();
  LastInsertedFileChange->second.FileType = NewFileType;
  LastInsertedFileChange = FileChanges.end();
}

/// Called whenever an inclusion is skipped due to canonical header protection
/// macros.
void InclusionRewriter::FileSkipped(const FileEntry &/*ParentFile*/,
                                    const Token &/*FilenameTok*/,
                                    SrcMgr::CharacteristicKind /*FileType*/) {
  assert(LastInsertedFileChange != FileChanges.end() && "A file, that wasn't "
    "found via an inclusion directive, was skipped");
  FileChanges.erase(LastInsertedFileChange);
  LastInsertedFileChange = FileChanges.end();
}

/// This should be called whenever the preprocessor encounters include
/// directives. It does not say whether the file has been included, but it
/// provides more information about the directive (hash location instead
/// of location inside the included file). It is assumed that the matching
/// FileChanged() or FileSkipped() is called after this.
void InclusionRewriter::InclusionDirective(SourceLocation HashLoc,
                                           const Token &/*IncludeTok*/,
                                           StringRef /*FileName*/,
                                           bool /*IsAngled*/,
                                           const FileEntry * /*File*/,
                                           SourceLocation /*EndLoc*/,
                                           StringRef /*SearchPath*/,
                                           StringRef /*RelativePath*/) {
  assert(LastInsertedFileChange == FileChanges.end() && "Another inclusion "
    "directive was found before the previous one was processed");
  std::pair<FileChangeMap::iterator, bool> p = FileChanges.insert(
    std::make_pair(HashLoc.getRawEncoding(), FileChange(HashLoc)));
  assert(p.second && "Unexpected revisitation of the same include directive");
  LastInsertedFileChange = p.first;
}

/// Simple lookup for a SourceLocation (specifically one denoting the hash in
/// an inclusion directive) in the map of inclusion information, FileChanges.
const InclusionRewriter::FileChange *
InclusionRewriter::FindFileChangeLocation(SourceLocation Loc) const {
  FileChangeMap::const_iterator I = FileChanges.find(Loc.getRawEncoding());
  if (I != FileChanges.end())
    return &I->second;
  return NULL;
}

/// Detect the likely line ending style of \p FromFile by examining the first
/// newline found within it.
static StringRef DetectEOL(const MemoryBuffer &FromFile) {
  // detect what line endings the file uses, so that added content does not mix
  // the style
  const char *Pos = strchr(FromFile.getBufferStart(), '\n');
  if (Pos == NULL)
    return "\n";
  if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r')
    return "\n\r";
  if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r')
    return "\r\n";
  return "\n";
}

/// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at
/// \p WriteTo - 1.
void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile,
                                          unsigned &WriteFrom, unsigned WriteTo,
                                          StringRef EOL, int &Line,
                                          bool EnsureNewline) {
  if (WriteTo <= WriteFrom)
    return;
  OS.write(FromFile.getBufferStart() + WriteFrom, WriteTo - WriteFrom);
  // count lines manually, it's faster than getPresumedLoc()
  Line += std::count(FromFile.getBufferStart() + WriteFrom,
                     FromFile.getBufferStart() + WriteTo, '\n');
  if (EnsureNewline) {
    char LastChar = FromFile.getBufferStart()[WriteTo - 1];
    if (LastChar != '\n' && LastChar != '\r')
      OS << EOL;
  }
  WriteFrom = WriteTo;
}

/// Print characters from \p FromFile starting at \p NextToWrite up until the
/// inclusion directive at \p StartToken, then print out the inclusion
/// inclusion directive disabled by a #if directive, updating \p NextToWrite
/// and \p Line to track the number of source lines visited and the progress
/// through the \p FromFile buffer.
void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex,
                                            const Token &StartToken,
                                            const MemoryBuffer &FromFile,
                                            StringRef EOL,
                                            unsigned &NextToWrite, int &Line) {
  OutputContentUpTo(FromFile, NextToWrite,
    SM.getFileOffset(StartToken.getLocation()), EOL, Line);
  Token DirectiveToken;
  do {
    DirectiveLex.LexFromRawLexer(DirectiveToken);
  } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof));
  OS << "#if 0 /* expanded by -frewrite-includes */" << EOL;
  OutputContentUpTo(FromFile, NextToWrite,
    SM.getFileOffset(DirectiveToken.getLocation()) + DirectiveToken.getLength(),
    EOL, Line);
  OS << "#endif /* expanded by -frewrite-includes */" << EOL;
}

/// Find the next identifier in the pragma directive specified by \p RawToken.
StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex,
                                                Token &RawToken) {
  RawLex.LexFromRawLexer(RawToken);
  if (RawToken.is(tok::raw_identifier))
    PP.LookUpIdentifierInfo(RawToken);
  if (RawToken.is(tok::identifier))
    return RawToken.getIdentifierInfo()->getName();
  return StringRef();
}

/// Use a raw lexer to analyze \p FileId, inccrementally copying parts of it
/// and including content of included files recursively.
bool InclusionRewriter::Process(FileID FileId,
                                SrcMgr::CharacteristicKind FileType)
{
  bool Invalid;
  const MemoryBuffer &FromFile = *SM.getBuffer(FileId, &Invalid);
  if (Invalid) // invalid inclusion
    return true;
  const char *FileName = FromFile.getBufferIdentifier();
  Lexer RawLex(FileId, &FromFile, PP.getSourceManager(), PP.getLangOpts());
  RawLex.SetCommentRetentionState(false);

  StringRef EOL = DetectEOL(FromFile);

  // Per the GNU docs: "1" indicates the start of a new file.
  WriteLineInfo(FileName, 1, FileType, EOL, " 1");

  if (SM.getFileIDSize(FileId) == 0)
    return true;

  // The next byte to be copied from the source file
  unsigned NextToWrite = 0;
  int Line = 1; // The current input file line number.

  Token RawToken;
  RawLex.LexFromRawLexer(RawToken);

  // TODO: Consider adding a switch that strips possibly unimportant content,
  // such as comments, to reduce the size of repro files.
  while (RawToken.isNot(tok::eof)) {
    if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) {
      RawLex.setParsingPreprocessorDirective(true);
      Token HashToken = RawToken;
      RawLex.LexFromRawLexer(RawToken);
      if (RawToken.is(tok::raw_identifier))
        PP.LookUpIdentifierInfo(RawToken);
      if (RawToken.is(tok::identifier)) {
        switch (RawToken.getIdentifierInfo()->getPPKeywordID()) {
          case tok::pp_include:
          case tok::pp_include_next:
          case tok::pp_import: {
            CommentOutDirective(RawLex, HashToken, FromFile, EOL, NextToWrite,
              Line);
            if (const FileChange *Change = FindFileChangeLocation(
                HashToken.getLocation())) {
              // now include and recursively process the file
              if (Process(Change->Id, Change->FileType))
                // and set lineinfo back to this file, if the nested one was
                // actually included
                // `2' indicates returning to a file (after having included
                // another file.
                WriteLineInfo(FileName, Line, FileType, EOL, " 2");
            } else
              // fix up lineinfo (since commented out directive changed line
              // numbers) for inclusions that were skipped due to header guards
              WriteLineInfo(FileName, Line, FileType, EOL);
            break;
          }
          case tok::pp_pragma: {
            StringRef Identifier = NextIdentifierName(RawLex, RawToken);
            if (Identifier == "clang" || Identifier == "GCC") {
              if (NextIdentifierName(RawLex, RawToken) == "system_header") {
                // keep the directive in, commented out
                CommentOutDirective(RawLex, HashToken, FromFile, EOL,
                  NextToWrite, Line);
                // update our own type
                FileType = SM.getFileCharacteristic(RawToken.getLocation());
                WriteLineInfo(FileName, Line, FileType, EOL);
              }
            } else if (Identifier == "once") {
              // keep the directive in, commented out
              CommentOutDirective(RawLex, HashToken, FromFile, EOL,
                NextToWrite, Line);
              WriteLineInfo(FileName, Line, FileType, EOL);
            }
            break;
          }
          default:
            break;
        }
      }
      RawLex.setParsingPreprocessorDirective(false);
    }
    RawLex.LexFromRawLexer(RawToken);
  }
  OutputContentUpTo(FromFile, NextToWrite,
    SM.getFileOffset(SM.getLocForEndOfFile(FileId)) + 1, EOL, Line,
    /*EnsureNewline*/true);
  return true;
}

/// InclusionRewriterInInput - Implement -frewrite-includes mode.
void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS,
                                   const PreprocessorOutputOptions &Opts) {
  SourceManager &SM = PP.getSourceManager();
  InclusionRewriter *Rewrite = new InclusionRewriter(PP, *OS,
                                                     Opts.ShowLineMarkers);
  PP.addPPCallbacks(Rewrite);

  // First let the preprocessor process the entire file and call callbacks.
  // Callbacks will record which #include's were actually performed.
  PP.EnterMainSourceFile();
  Token Tok;
  // Only preprocessor directives matter here, so disable macro expansion
  // everywhere else as an optimization.
  // TODO: It would be even faster if the preprocessor could be switched
  // to a mode where it would parse only preprocessor directives and comments,
  // nothing else matters for parsing or processing.
  PP.SetMacroExpansionOnlyInDirectives();
  do {
    PP.Lex(Tok);
  } while (Tok.isNot(tok::eof));
  Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User);
  OS->flush();
}