diff options
Diffstat (limited to 'contrib/llvm-project/clang/lib/Frontend/PrintPreprocessedOutput.cpp')
-rw-r--r-- | contrib/llvm-project/clang/lib/Frontend/PrintPreprocessedOutput.cpp | 434 |
1 files changed, 268 insertions, 166 deletions
diff --git a/contrib/llvm-project/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/contrib/llvm-project/clang/lib/Frontend/PrintPreprocessedOutput.cpp index 24ea1ccba207..1b262d9e6f7c 100644 --- a/contrib/llvm-project/clang/lib/Frontend/PrintPreprocessedOutput.cpp +++ b/contrib/llvm-project/clang/lib/Frontend/PrintPreprocessedOutput.cpp @@ -95,14 +95,22 @@ private: bool DumpIncludeDirectives; bool UseLineDirectives; bool IsFirstFileEntered; + bool MinimizeWhitespace; + bool DirectivesOnly; + + Token PrevTok; + Token PrevPrevTok; + public: PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream &os, bool lineMarkers, bool defines, bool DumpIncludeDirectives, - bool UseLineDirectives) + bool UseLineDirectives, bool MinimizeWhitespace, + bool DirectivesOnly) : PP(pp), SM(PP.getSourceManager()), ConcatInfo(PP), OS(os), DisableLineMarkers(lineMarkers), DumpDefines(defines), DumpIncludeDirectives(DumpIncludeDirectives), - UseLineDirectives(UseLineDirectives) { + UseLineDirectives(UseLineDirectives), + MinimizeWhitespace(MinimizeWhitespace), DirectivesOnly(DirectivesOnly) { CurLine = 0; CurFilename += "<uninit>"; EmittedTokensOnThisLine = false; @@ -110,8 +118,13 @@ public: FileType = SrcMgr::C_User; Initialized = false; IsFirstFileEntered = false; + + PrevTok.startToken(); + PrevPrevTok.startToken(); } + bool isMinimizeWhitespace() const { return MinimizeWhitespace; } + void setEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; } bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; } @@ -120,16 +133,21 @@ public: return EmittedDirectiveOnThisLine; } - bool startNewLineIfNeeded(bool ShouldUpdateCurrentLine = true); + /// Ensure that the output stream position is at the beginning of a new line + /// and inserts one if it does not. It is intended to ensure that directives + /// inserted by the directives not from the input source (such as #line) are + /// in the first column. To insert newlines that represent the input, use + /// MoveToLine(/*...*/, /*RequireStartOfLine=*/true). + void startNewLineIfNeeded(); void FileChanged(SourceLocation Loc, FileChangeReason Reason, SrcMgr::CharacteristicKind FileType, FileID PrevFID) override; void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, bool IsAngled, - CharSourceRange FilenameRange, const FileEntry *File, - StringRef SearchPath, StringRef RelativePath, - const Module *Imported, + CharSourceRange FilenameRange, + OptionalFileEntryRef File, StringRef SearchPath, + StringRef RelativePath, const Module *Imported, SrcMgr::CharacteristicKind FileType) override; void Ident(SourceLocation Loc, StringRef str) override; void PragmaMessage(SourceLocation Loc, StringRef Namespace, @@ -139,7 +157,7 @@ public: void PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) override; void PragmaDiagnostic(SourceLocation Loc, StringRef Namespace, diag::Severity Map, StringRef Str) override; - void PragmaWarning(SourceLocation Loc, StringRef WarningSpec, + void PragmaWarning(SourceLocation Loc, PragmaWarningSpecifier WarningSpec, ArrayRef<int> Ids) override; void PragmaWarningPush(SourceLocation Loc, int Level) override; void PragmaWarningPop(SourceLocation Loc) override; @@ -148,18 +166,44 @@ public: void PragmaAssumeNonNullBegin(SourceLocation Loc) override; void PragmaAssumeNonNullEnd(SourceLocation Loc) override; - bool HandleFirstTokOnLine(Token &Tok); + /// Insert whitespace before emitting the next token. + /// + /// @param Tok Next token to be emitted. + /// @param RequireSpace Ensure at least one whitespace is emitted. Useful + /// if non-tokens have been emitted to the stream. + /// @param RequireSameLine Never emit newlines. Useful when semantics depend + /// on being on the same line, such as directives. + void HandleWhitespaceBeforeTok(const Token &Tok, bool RequireSpace, + bool RequireSameLine); /// Move to the line of the provided source location. This will - /// return true if the output stream required adjustment or if - /// the requested location is on the first line. - bool MoveToLine(SourceLocation Loc) { + /// return true if a newline was inserted or if + /// the requested location is the first token on the first line. + /// In these cases the next output will be the first column on the line and + /// make it possible to insert indention. The newline was inserted + /// implicitly when at the beginning of the file. + /// + /// @param Tok Token where to move to. + /// @param RequireStartOfLine Whether the next line depends on being in the + /// first column, such as a directive. + /// + /// @return Whether column adjustments are necessary. + bool MoveToLine(const Token &Tok, bool RequireStartOfLine) { + PresumedLoc PLoc = SM.getPresumedLoc(Tok.getLocation()); + unsigned TargetLine = PLoc.isValid() ? PLoc.getLine() : CurLine; + bool IsFirstInFile = + Tok.isAtStartOfLine() && PLoc.isValid() && PLoc.getLine() == 1; + return MoveToLine(TargetLine, RequireStartOfLine) || IsFirstInFile; + } + + /// Move to the line of the provided source location. Returns true if a new + /// line was inserted. + bool MoveToLine(SourceLocation Loc, bool RequireStartOfLine) { PresumedLoc PLoc = SM.getPresumedLoc(Loc); - if (PLoc.isInvalid()) - return false; - return MoveToLine(PLoc.getLine()) || (PLoc.getLine() == 1); + unsigned TargetLine = PLoc.isValid() ? PLoc.getLine() : CurLine; + return MoveToLine(TargetLine, RequireStartOfLine); } - bool MoveToLine(unsigned LineNo); + bool MoveToLine(unsigned LineNo, bool RequireStartOfLine); bool AvoidConcat(const Token &PrevPrevTok, const Token &PrevTok, const Token &Tok) { @@ -187,7 +231,7 @@ public: void PrintPPOutputPPCallbacks::WriteLineInfo(unsigned LineNo, const char *Extra, unsigned ExtraLen) { - startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false); + startNewLineIfNeeded(); // Emit #line directives or GNU line markers depending on what mode we're in. if (UseLineDirectives) { @@ -214,43 +258,64 @@ void PrintPPOutputPPCallbacks::WriteLineInfo(unsigned LineNo, /// object. We can do this by emitting some number of \n's, or be emitting a /// #line directive. This returns false if already at the specified line, true /// if some newlines were emitted. -bool PrintPPOutputPPCallbacks::MoveToLine(unsigned LineNo) { +bool PrintPPOutputPPCallbacks::MoveToLine(unsigned LineNo, + bool RequireStartOfLine) { + // If it is required to start a new line or finish the current, insert + // vertical whitespace now and take it into account when moving to the + // expected line. + bool StartedNewLine = false; + if ((RequireStartOfLine && EmittedTokensOnThisLine) || + EmittedDirectiveOnThisLine) { + OS << '\n'; + StartedNewLine = true; + CurLine += 1; + EmittedTokensOnThisLine = false; + EmittedDirectiveOnThisLine = false; + } + // If this line is "close enough" to the original line, just print newlines, // otherwise print a #line directive. - if (LineNo-CurLine <= 8) { - if (LineNo-CurLine == 1) - OS << '\n'; - else if (LineNo == CurLine) - return false; // Spelling line moved, but expansion line didn't. - else { + if (CurLine == LineNo) { + // Nothing to do if we are already on the correct line. + } else if (MinimizeWhitespace && DisableLineMarkers) { + // With -E -P -fminimize-whitespace, don't emit anything if not necessary. + } else if (!StartedNewLine && LineNo - CurLine == 1) { + // Printing a single line has priority over printing a #line directive, even + // when minimizing whitespace which otherwise would print #line directives + // for every single line. + OS << '\n'; + StartedNewLine = true; + } else if (!DisableLineMarkers) { + if (LineNo - CurLine <= 8) { const char *NewLines = "\n\n\n\n\n\n\n\n"; - OS.write(NewLines, LineNo-CurLine); + OS.write(NewLines, LineNo - CurLine); + } else { + // Emit a #line or line marker. + WriteLineInfo(LineNo, nullptr, 0); } - } else if (!DisableLineMarkers) { - // Emit a #line or line marker. - WriteLineInfo(LineNo, nullptr, 0); - } else { - // Okay, we're in -P mode, which turns off line markers. However, we still - // need to emit a newline between tokens on different lines. - startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false); + StartedNewLine = true; + } else if (EmittedTokensOnThisLine) { + // If we are not on the correct line and don't need to be line-correct, + // at least ensure we start on a new line. + OS << '\n'; + StartedNewLine = true; + } + + if (StartedNewLine) { + EmittedTokensOnThisLine = false; + EmittedDirectiveOnThisLine = false; } CurLine = LineNo; - return true; + return StartedNewLine; } -bool -PrintPPOutputPPCallbacks::startNewLineIfNeeded(bool ShouldUpdateCurrentLine) { +void PrintPPOutputPPCallbacks::startNewLineIfNeeded() { if (EmittedTokensOnThisLine || EmittedDirectiveOnThisLine) { OS << '\n'; EmittedTokensOnThisLine = false; EmittedDirectiveOnThisLine = false; - if (ShouldUpdateCurrentLine) - ++CurLine; - return true; } - - return false; } /// FileChanged - Whenever the preprocessor enters or exits a #include file @@ -273,7 +338,7 @@ void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc, if (Reason == PPCallbacks::EnterFile) { SourceLocation IncludeLoc = UserLoc.getIncludeLoc(); if (IncludeLoc.isValid()) - MoveToLine(IncludeLoc); + MoveToLine(IncludeLoc, /*RequireStartOfLine=*/false); } else if (Reason == PPCallbacks::SystemHeaderPragma) { // GCC emits the # directive for this directive on the line AFTER the // directive and emits a bunch of spaces that aren't needed. This is because @@ -290,7 +355,8 @@ void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc, FileType = NewFileType; if (DisableLineMarkers) { - startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false); + if (!MinimizeWhitespace) + startNewLineIfNeeded(); return; } @@ -323,28 +389,20 @@ void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc, } void PrintPPOutputPPCallbacks::InclusionDirective( - SourceLocation HashLoc, - const Token &IncludeTok, - StringRef FileName, - bool IsAngled, - CharSourceRange FilenameRange, - const FileEntry *File, - StringRef SearchPath, - StringRef RelativePath, - const Module *Imported, + SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, + bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File, + StringRef SearchPath, StringRef RelativePath, const Module *Imported, SrcMgr::CharacteristicKind FileType) { // In -dI mode, dump #include directives prior to dumping their content or // interpretation. if (DumpIncludeDirectives) { - startNewLineIfNeeded(); - MoveToLine(HashLoc); + MoveToLine(HashLoc, /*RequireStartOfLine=*/true); const std::string TokenText = PP.getSpelling(IncludeTok); assert(!TokenText.empty()); OS << "#" << TokenText << " " << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"') << " /* clang -E -dI */"; setEmittedDirectiveOnThisLine(); - startNewLineIfNeeded(); } // When preprocessing, turn implicit imports into module import pragmas. @@ -353,17 +411,13 @@ void PrintPPOutputPPCallbacks::InclusionDirective( case tok::pp_include: case tok::pp_import: case tok::pp_include_next: - startNewLineIfNeeded(); - MoveToLine(HashLoc); + MoveToLine(HashLoc, /*RequireStartOfLine=*/true); OS << "#pragma clang module import " << Imported->getFullModuleName(true) << " /* clang -E: implicit import for " << "#" << PP.getSpelling(IncludeTok) << " " << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"') << " */"; - // Since we want a newline after the pragma, but not a #<line>, start a - // new line immediately. - EmittedTokensOnThisLine = true; - startNewLineIfNeeded(); + setEmittedDirectiveOnThisLine(); break; case tok::pp___include_macros: @@ -398,23 +452,32 @@ void PrintPPOutputPPCallbacks::EndModule(const Module *M) { /// Ident - Handle #ident directives when read by the preprocessor. /// void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, StringRef S) { - MoveToLine(Loc); + MoveToLine(Loc, /*RequireStartOfLine=*/true); OS.write("#ident ", strlen("#ident ")); OS.write(S.begin(), S.size()); - EmittedTokensOnThisLine = true; + setEmittedTokensOnThisLine(); } /// MacroDefined - This hook is called whenever a macro definition is seen. void PrintPPOutputPPCallbacks::MacroDefined(const Token &MacroNameTok, const MacroDirective *MD) { const MacroInfo *MI = MD->getMacroInfo(); - // Only print out macro definitions in -dD mode. - if (!DumpDefines || + // Print out macro definitions in -dD mode and when we have -fdirectives-only + // for C++20 header units. + if ((!DumpDefines && !DirectivesOnly) || // Ignore __FILE__ etc. - MI->isBuiltinMacro()) return; + MI->isBuiltinMacro()) + return; - MoveToLine(MI->getDefinitionLoc()); + SourceLocation DefLoc = MI->getDefinitionLoc(); + if (DirectivesOnly && !MI->isUsed()) { + SourceManager &SM = PP.getSourceManager(); + if (SM.isWrittenInBuiltinFile(DefLoc) || + SM.isWrittenInCommandLineFile(DefLoc)) + return; + } + MoveToLine(DefLoc, /*RequireStartOfLine=*/true); PrintMacroDefinition(*MacroNameTok.getIdentifierInfo(), *MI, PP, OS); setEmittedDirectiveOnThisLine(); } @@ -422,10 +485,12 @@ void PrintPPOutputPPCallbacks::MacroDefined(const Token &MacroNameTok, void PrintPPOutputPPCallbacks::MacroUndefined(const Token &MacroNameTok, const MacroDefinition &MD, const MacroDirective *Undef) { - // Only print out macro definitions in -dD mode. - if (!DumpDefines) return; + // Print out macro definitions in -dD mode and when we have -fdirectives-only + // for C++20 header units. + if (!DumpDefines && !DirectivesOnly) + return; - MoveToLine(MacroNameTok.getLocation()); + MoveToLine(MacroNameTok.getLocation(), /*RequireStartOfLine=*/true); OS << "#undef " << MacroNameTok.getIdentifierInfo()->getName(); setEmittedDirectiveOnThisLine(); } @@ -446,8 +511,7 @@ void PrintPPOutputPPCallbacks::PragmaMessage(SourceLocation Loc, StringRef Namespace, PragmaMessageKind Kind, StringRef Str) { - startNewLineIfNeeded(); - MoveToLine(Loc); + MoveToLine(Loc, /*RequireStartOfLine=*/true); OS << "#pragma "; if (!Namespace.empty()) OS << Namespace << ' '; @@ -472,8 +536,7 @@ void PrintPPOutputPPCallbacks::PragmaMessage(SourceLocation Loc, void PrintPPOutputPPCallbacks::PragmaDebug(SourceLocation Loc, StringRef DebugType) { - startNewLineIfNeeded(); - MoveToLine(Loc); + MoveToLine(Loc, /*RequireStartOfLine=*/true); OS << "#pragma clang __debug "; OS << DebugType; @@ -483,16 +546,14 @@ void PrintPPOutputPPCallbacks::PragmaDebug(SourceLocation Loc, void PrintPPOutputPPCallbacks:: PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) { - startNewLineIfNeeded(); - MoveToLine(Loc); + MoveToLine(Loc, /*RequireStartOfLine=*/true); OS << "#pragma " << Namespace << " diagnostic push"; setEmittedDirectiveOnThisLine(); } void PrintPPOutputPPCallbacks:: PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) { - startNewLineIfNeeded(); - MoveToLine(Loc); + MoveToLine(Loc, /*RequireStartOfLine=*/true); OS << "#pragma " << Namespace << " diagnostic pop"; setEmittedDirectiveOnThisLine(); } @@ -501,8 +562,7 @@ void PrintPPOutputPPCallbacks::PragmaDiagnostic(SourceLocation Loc, StringRef Namespace, diag::Severity Map, StringRef Str) { - startNewLineIfNeeded(); - MoveToLine(Loc); + MoveToLine(Loc, /*RequireStartOfLine=*/true); OS << "#pragma " << Namespace << " diagnostic "; switch (Map) { case diag::Severity::Remark: @@ -526,11 +586,24 @@ void PrintPPOutputPPCallbacks::PragmaDiagnostic(SourceLocation Loc, } void PrintPPOutputPPCallbacks::PragmaWarning(SourceLocation Loc, - StringRef WarningSpec, + PragmaWarningSpecifier WarningSpec, ArrayRef<int> Ids) { - startNewLineIfNeeded(); - MoveToLine(Loc); - OS << "#pragma warning(" << WarningSpec << ':'; + MoveToLine(Loc, /*RequireStartOfLine=*/true); + + OS << "#pragma warning("; + switch(WarningSpec) { + case PWS_Default: OS << "default"; break; + case PWS_Disable: OS << "disable"; break; + case PWS_Error: OS << "error"; break; + case PWS_Once: OS << "once"; break; + case PWS_Suppress: OS << "suppress"; break; + case PWS_Level1: OS << '1'; break; + case PWS_Level2: OS << '2'; break; + case PWS_Level3: OS << '3'; break; + case PWS_Level4: OS << '4'; break; + } + OS << ':'; + for (ArrayRef<int>::iterator I = Ids.begin(), E = Ids.end(); I != E; ++I) OS << ' ' << *I; OS << ')'; @@ -539,8 +612,7 @@ void PrintPPOutputPPCallbacks::PragmaWarning(SourceLocation Loc, void PrintPPOutputPPCallbacks::PragmaWarningPush(SourceLocation Loc, int Level) { - startNewLineIfNeeded(); - MoveToLine(Loc); + MoveToLine(Loc, /*RequireStartOfLine=*/true); OS << "#pragma warning(push"; if (Level >= 0) OS << ", " << Level; @@ -549,16 +621,14 @@ void PrintPPOutputPPCallbacks::PragmaWarningPush(SourceLocation Loc, } void PrintPPOutputPPCallbacks::PragmaWarningPop(SourceLocation Loc) { - startNewLineIfNeeded(); - MoveToLine(Loc); + MoveToLine(Loc, /*RequireStartOfLine=*/true); OS << "#pragma warning(pop)"; setEmittedDirectiveOnThisLine(); } void PrintPPOutputPPCallbacks::PragmaExecCharsetPush(SourceLocation Loc, StringRef Str) { - startNewLineIfNeeded(); - MoveToLine(Loc); + MoveToLine(Loc, /*RequireStartOfLine=*/true); OS << "#pragma character_execution_set(push"; if (!Str.empty()) OS << ", " << Str; @@ -567,64 +637,83 @@ void PrintPPOutputPPCallbacks::PragmaExecCharsetPush(SourceLocation Loc, } void PrintPPOutputPPCallbacks::PragmaExecCharsetPop(SourceLocation Loc) { - startNewLineIfNeeded(); - MoveToLine(Loc); + MoveToLine(Loc, /*RequireStartOfLine=*/true); OS << "#pragma character_execution_set(pop)"; setEmittedDirectiveOnThisLine(); } void PrintPPOutputPPCallbacks:: PragmaAssumeNonNullBegin(SourceLocation Loc) { - startNewLineIfNeeded(); - MoveToLine(Loc); + MoveToLine(Loc, /*RequireStartOfLine=*/true); OS << "#pragma clang assume_nonnull begin"; setEmittedDirectiveOnThisLine(); } void PrintPPOutputPPCallbacks:: PragmaAssumeNonNullEnd(SourceLocation Loc) { - startNewLineIfNeeded(); - MoveToLine(Loc); + MoveToLine(Loc, /*RequireStartOfLine=*/true); OS << "#pragma clang assume_nonnull end"; setEmittedDirectiveOnThisLine(); } -/// HandleFirstTokOnLine - When emitting a preprocessed file in -E mode, this -/// is called for the first token on each new line. If this really is the start -/// of a new logical line, handle it and return true, otherwise return false. -/// This may not be the start of a logical line because the "start of line" -/// marker is set for spelling lines, not expansion ones. -bool PrintPPOutputPPCallbacks::HandleFirstTokOnLine(Token &Tok) { - // Figure out what line we went to and insert the appropriate number of - // newline characters. - if (!MoveToLine(Tok.getLocation())) - return false; - - // Print out space characters so that the first token on a line is - // indented for easy reading. - unsigned ColNo = SM.getExpansionColumnNumber(Tok.getLocation()); - - // The first token on a line can have a column number of 1, yet still expect - // leading white space, if a macro expansion in column 1 starts with an empty - // macro argument, or an empty nested macro expansion. In this case, move the - // token to column 2. - if (ColNo == 1 && Tok.hasLeadingSpace()) - ColNo = 2; - - // This hack prevents stuff like: - // #define HASH # - // HASH define foo bar - // From having the # character end up at column 1, which makes it so it - // is not handled as a #define next time through the preprocessor if in - // -fpreprocessed mode. - if (ColNo <= 1 && Tok.is(tok::hash)) - OS << ' '; +void PrintPPOutputPPCallbacks::HandleWhitespaceBeforeTok(const Token &Tok, + bool RequireSpace, + bool RequireSameLine) { + // These tokens are not expanded to anything and don't need whitespace before + // them. + if (Tok.is(tok::eof) || + (Tok.isAnnotation() && !Tok.is(tok::annot_header_unit) && + !Tok.is(tok::annot_module_begin) && !Tok.is(tok::annot_module_end) && + !Tok.is(tok::annot_repl_input_end))) + return; - // Otherwise, indent the appropriate number of spaces. - for (; ColNo > 1; --ColNo) - OS << ' '; + // EmittedDirectiveOnThisLine takes priority over RequireSameLine. + if ((!RequireSameLine || EmittedDirectiveOnThisLine) && + MoveToLine(Tok, /*RequireStartOfLine=*/EmittedDirectiveOnThisLine)) { + if (MinimizeWhitespace) { + // Avoid interpreting hash as a directive under -fpreprocessed. + if (Tok.is(tok::hash)) + OS << ' '; + } else { + // Print out space characters so that the first token on a line is + // indented for easy reading. + unsigned ColNo = SM.getExpansionColumnNumber(Tok.getLocation()); + + // The first token on a line can have a column number of 1, yet still + // expect leading white space, if a macro expansion in column 1 starts + // with an empty macro argument, or an empty nested macro expansion. In + // this case, move the token to column 2. + if (ColNo == 1 && Tok.hasLeadingSpace()) + ColNo = 2; + + // This hack prevents stuff like: + // #define HASH # + // HASH define foo bar + // From having the # character end up at column 1, which makes it so it + // is not handled as a #define next time through the preprocessor if in + // -fpreprocessed mode. + if (ColNo <= 1 && Tok.is(tok::hash)) + OS << ' '; + + // Otherwise, indent the appropriate number of spaces. + for (; ColNo > 1; --ColNo) + OS << ' '; + } + } else { + // Insert whitespace between the previous and next token if either + // - The caller requires it + // - The input had whitespace between them and we are not in + // whitespace-minimization mode + // - The whitespace is necessary to keep the tokens apart and there is not + // already a newline between them + if (RequireSpace || (!MinimizeWhitespace && Tok.hasLeadingSpace()) || + ((EmittedTokensOnThisLine || EmittedDirectiveOnThisLine) && + AvoidConcat(PrevPrevTok, PrevTok, Tok))) + OS << ' '; + } - return true; + PrevPrevTok = PrevTok; + PrevTok = Tok; } void PrintPPOutputPPCallbacks::HandleNewlinesInToken(const char *TokStr, @@ -668,9 +757,9 @@ struct UnknownPragmaHandler : public PragmaHandler { Token &PragmaTok) override { // Figure out what line we went to and insert the appropriate number of // newline characters. - Callbacks->startNewLineIfNeeded(); - Callbacks->MoveToLine(PragmaTok.getLocation()); + Callbacks->MoveToLine(PragmaTok.getLocation(), /*RequireStartOfLine=*/true); Callbacks->OS.write(Prefix, strlen(Prefix)); + Callbacks->setEmittedTokensOnThisLine(); if (ShouldExpandTokens) { // The first token does not have expanded macros. Expand them, if @@ -682,21 +771,16 @@ struct UnknownPragmaHandler : public PragmaHandler { /*IsReinject=*/false); PP.Lex(PragmaTok); } - Token PrevToken; - Token PrevPrevToken; - PrevToken.startToken(); - PrevPrevToken.startToken(); // Read and print all of the pragma tokens. + bool IsFirst = true; while (PragmaTok.isNot(tok::eod)) { - if (PragmaTok.hasLeadingSpace() || - Callbacks->AvoidConcat(PrevPrevToken, PrevToken, PragmaTok)) - Callbacks->OS << ' '; + Callbacks->HandleWhitespaceBeforeTok(PragmaTok, /*RequireSpace=*/IsFirst, + /*RequireSameLine=*/true); + IsFirst = false; std::string TokSpell = PP.getSpelling(PragmaTok); Callbacks->OS.write(&TokSpell[0], TokSpell.size()); - - PrevPrevToken = PrevToken; - PrevToken = PragmaTok; + Callbacks->setEmittedTokensOnThisLine(); if (ShouldExpandTokens) PP.Lex(PragmaTok); @@ -715,44 +799,44 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok, bool DropComments = PP.getLangOpts().TraditionalCPP && !PP.getCommentRetentionState(); + bool IsStartOfLine = false; char Buffer[256]; - Token PrevPrevTok, PrevTok; - PrevPrevTok.startToken(); - PrevTok.startToken(); - while (1) { - if (Callbacks->hasEmittedDirectiveOnThisLine()) { - Callbacks->startNewLineIfNeeded(); - Callbacks->MoveToLine(Tok.getLocation()); - } - - // If this token is at the start of a line, emit newlines if needed. - if (Tok.isAtStartOfLine() && Callbacks->HandleFirstTokOnLine(Tok)) { - // done. - } else if (Tok.hasLeadingSpace() || - // If we haven't emitted a token on this line yet, PrevTok isn't - // useful to look at and no concatenation could happen anyway. - (Callbacks->hasEmittedTokensOnThisLine() && - // Don't print "-" next to "-", it would form "--". - Callbacks->AvoidConcat(PrevPrevTok, PrevTok, Tok))) { - OS << ' '; - } + while (true) { + // Two lines joined with line continuation ('\' as last character on the + // line) must be emitted as one line even though Tok.getLine() returns two + // different values. In this situation Tok.isAtStartOfLine() is false even + // though it may be the first token on the lexical line. When + // dropping/skipping a token that is at the start of a line, propagate the + // start-of-line-ness to the next token to not append it to the previous + // line. + IsStartOfLine = IsStartOfLine || Tok.isAtStartOfLine(); + + Callbacks->HandleWhitespaceBeforeTok(Tok, /*RequireSpace=*/false, + /*RequireSameLine=*/!IsStartOfLine); if (DropComments && Tok.is(tok::comment)) { // Skip comments. Normally the preprocessor does not generate // tok::comment nodes at all when not keeping comments, but under // -traditional-cpp the lexer keeps /all/ whitespace, including comments. - SourceLocation StartLoc = Tok.getLocation(); - Callbacks->MoveToLine(StartLoc.getLocWithOffset(Tok.getLength())); + PP.Lex(Tok); + continue; + } else if (Tok.is(tok::annot_repl_input_end)) { + PP.Lex(Tok); + continue; } else if (Tok.is(tok::eod)) { // Don't print end of directive tokens, since they are typically newlines // that mess up our line tracking. These come from unknown pre-processor // directives or hash-prefixed comments in standalone assembly files. PP.Lex(Tok); + // FIXME: The token on the next line after #include should have + // Tok.isAtStartOfLine() set. + IsStartOfLine = true; continue; } else if (Tok.is(tok::annot_module_include)) { // PrintPPOutputPPCallbacks::InclusionDirective handles producing // appropriate output here. Ignore this token entirely. PP.Lex(Tok); + IsStartOfLine = true; continue; } else if (Tok.is(tok::annot_module_begin)) { // FIXME: We retrieve this token after the FileChanged callback, and @@ -764,11 +848,13 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok, Callbacks->BeginModule( reinterpret_cast<Module *>(Tok.getAnnotationValue())); PP.Lex(Tok); + IsStartOfLine = true; continue; } else if (Tok.is(tok::annot_module_end)) { Callbacks->EndModule( reinterpret_cast<Module *>(Tok.getAnnotationValue())); PP.Lex(Tok); + IsStartOfLine = true; continue; } else if (Tok.is(tok::annot_header_unit)) { // This is a header-name that has been (effectively) converted into a @@ -789,15 +875,24 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok, } else if (Tok.isLiteral() && !Tok.needsCleaning() && Tok.getLiteralData()) { OS.write(Tok.getLiteralData(), Tok.getLength()); - } else if (Tok.getLength() < llvm::array_lengthof(Buffer)) { + } else if (Tok.getLength() < std::size(Buffer)) { const char *TokPtr = Buffer; unsigned Len = PP.getSpelling(Tok, TokPtr); OS.write(TokPtr, Len); // Tokens that can contain embedded newlines need to adjust our current // line number. + // FIXME: The token may end with a newline in which case + // setEmittedDirectiveOnThisLine/setEmittedTokensOnThisLine afterwards is + // wrong. if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown) Callbacks->HandleNewlinesInToken(TokPtr, Len); + if (Tok.is(tok::comment) && Len >= 2 && TokPtr[0] == '/' && + TokPtr[1] == '/') { + // It's a line comment; + // Ensure that we don't concatenate anything behind it. + Callbacks->setEmittedDirectiveOnThisLine(); + } } else { std::string S = PP.getSpelling(Tok); OS.write(S.data(), S.size()); @@ -806,13 +901,17 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok, // line number. if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown) Callbacks->HandleNewlinesInToken(S.data(), S.size()); + if (Tok.is(tok::comment) && S.size() >= 2 && S[0] == '/' && S[1] == '/') { + // It's a line comment; + // Ensure that we don't concatenate anything behind it. + Callbacks->setEmittedDirectiveOnThisLine(); + } } Callbacks->setEmittedTokensOnThisLine(); + IsStartOfLine = false; if (Tok.is(tok::eof)) break; - PrevPrevTok = PrevTok; - PrevTok = Tok; PP.Lex(Tok); } } @@ -870,7 +969,8 @@ void clang::DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS, PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks( PP, *OS, !Opts.ShowLineMarkers, Opts.ShowMacros, - Opts.ShowIncludeDirectives, Opts.UseLineDirectives); + Opts.ShowIncludeDirectives, Opts.UseLineDirectives, + Opts.MinimizeWhitespace, Opts.DirectivesOnly); // Expand macros in pragmas with -fms-extensions. The assumption is that // the majority of pragmas in such a file will be Microsoft pragmas. @@ -906,6 +1006,8 @@ void clang::DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS, // After we have configured the preprocessor, enter the main file. PP.EnterMainSourceFile(); + if (Opts.DirectivesOnly) + PP.SetMacroExpansionOnlyInDirectives(); // Consume all of the tokens that come from the predefines buffer. Those // should not be emitted into the output and are guaranteed to be at the |