diff options
Diffstat (limited to 'lib/Lex/DependencyDirectivesSourceMinimizer.cpp')
-rw-r--r-- | lib/Lex/DependencyDirectivesSourceMinimizer.cpp | 250 |
1 files changed, 221 insertions, 29 deletions
diff --git a/lib/Lex/DependencyDirectivesSourceMinimizer.cpp b/lib/Lex/DependencyDirectivesSourceMinimizer.cpp index cfc37c5d3c62..f063ed711c44 100644 --- a/lib/Lex/DependencyDirectivesSourceMinimizer.cpp +++ b/lib/Lex/DependencyDirectivesSourceMinimizer.cpp @@ -59,6 +59,7 @@ private: LLVM_NODISCARD bool minimizeImpl(const char *First, const char *const End); LLVM_NODISCARD bool lexPPLine(const char *&First, const char *const End); LLVM_NODISCARD bool lexAt(const char *&First, const char *const End); + LLVM_NODISCARD bool lexModule(const char *&First, const char *const End); LLVM_NODISCARD bool lexDefine(const char *&First, const char *const End); LLVM_NODISCARD bool lexPragma(const char *&First, const char *const End); LLVM_NODISCARD bool lexEndif(const char *&First, const char *const End); @@ -184,26 +185,58 @@ static void skipRawString(const char *&First, const char *const End) { } } +// Returns the length of EOL, either 0 (no end-of-line), 1 (\n) or 2 (\r\n) +static unsigned isEOL(const char *First, const char *const End) { + if (First == End) + return 0; + if (End - First > 1 && isVerticalWhitespace(First[0]) && + isVerticalWhitespace(First[1]) && First[0] != First[1]) + return 2; + return !!isVerticalWhitespace(First[0]); +} + static void skipString(const char *&First, const char *const End) { - assert(*First == '\'' || *First == '"'); - const char Terminator = *First; - for (++First; First != End && *First != Terminator; ++First) - if (*First == '\\') - if (++First == End) - return; + assert(*First == '\'' || *First == '"' || *First == '<'); + const char Terminator = *First == '<' ? '>' : *First; + for (++First; First != End && *First != Terminator; ++First) { + // String and character literals don't extend past the end of the line. + if (isVerticalWhitespace(*First)) + return; + if (*First != '\\') + continue; + // Skip past backslash to the next character. This ensures that the + // character right after it is skipped as well, which matters if it's + // the terminator. + if (++First == End) + return; + if (!isWhitespace(*First)) + continue; + // Whitespace after the backslash might indicate a line continuation. + const char *FirstAfterBackslashPastSpace = First; + skipOverSpaces(FirstAfterBackslashPastSpace, End); + if (unsigned NLSize = isEOL(FirstAfterBackslashPastSpace, End)) { + // Advance the character pointer to the next line for the next + // iteration. + First = FirstAfterBackslashPastSpace + NLSize - 1; + } + } if (First != End) ++First; // Finish off the string. } -static void skipNewline(const char *&First, const char *End) { - assert(isVerticalWhitespace(*First)); - ++First; +// Returns the length of the skipped newline +static unsigned skipNewline(const char *&First, const char *End) { if (First == End) - return; + return 0; + assert(isVerticalWhitespace(*First)); + unsigned Len = isEOL(First, End); + assert(Len && "expected newline"); + First += Len; + return Len; +} - // Check for "\n\r" and "\r\n". - if (LLVM_UNLIKELY(isVerticalWhitespace(*First) && First[-1] != First[0])) - ++First; +static bool wasLineContinuation(const char *First, unsigned EOLLen) { + return *(First - (int)EOLLen - 1) == '\\'; } static void skipToNewlineRaw(const char *&First, const char *const End) { @@ -211,27 +244,40 @@ static void skipToNewlineRaw(const char *&First, const char *const End) { if (First == End) return; - if (isVerticalWhitespace(*First)) + unsigned Len = isEOL(First, End); + if (Len) return; - while (!isVerticalWhitespace(*First)) + do { if (++First == End) return; + Len = isEOL(First, End); + } while (!Len); if (First[-1] != '\\') return; - ++First; // Keep going... + First += Len; + // Keep skipping lines... } } -static const char *reverseOverSpaces(const char *First, const char *Last) { +static const char *findLastNonSpace(const char *First, const char *Last) { assert(First <= Last); while (First != Last && isHorizontalWhitespace(Last[-1])) --Last; return Last; } +static const char *findFirstTrailingSpace(const char *First, + const char *Last) { + const char *LastNonSpace = findLastNonSpace(First, Last); + if (Last == LastNonSpace) + return Last; + assert(isHorizontalWhitespace(LastNonSpace[0])); + return LastNonSpace + 1; +} + static void skipLineComment(const char *&First, const char *const End) { assert(First[0] == '/' && First[1] == '/'); First += 2; @@ -276,7 +322,7 @@ static bool isQuoteCppDigitSeparator(const char *const Start, } static void skipLine(const char *&First, const char *const End) { - do { + for (;;) { assert(First <= End); if (First == End) return; @@ -321,9 +367,10 @@ static void skipLine(const char *&First, const char *const End) { return; // Skip over the newline. - assert(isVerticalWhitespace(*First)); - skipNewline(First, End); - } while (First[-2] == '\\'); // Continue past line-continuations. + unsigned Len = skipNewline(First, End); + if (!wasLineContinuation(First, Len)) // Continue past line-continuations. + break; + } } static void skipDirective(StringRef Name, const char *&First, @@ -343,7 +390,8 @@ void Minimizer::printToNewline(const char *&First, const char *const End) { const char *Last = First; do { // Iterate over strings correctly to avoid comments and newlines. - if (*Last == '"' || *Last == '\'') { + if (*Last == '"' || *Last == '\'' || + (*Last == '<' && top() == pp_include)) { if (LLVM_UNLIKELY(isRawStringLiteral(First, Last))) skipRawString(Last, End); else @@ -361,7 +409,7 @@ void Minimizer::printToNewline(const char *&First, const char *const End) { } // Deal with "//..." and "/*...*/". - append(First, reverseOverSpaces(First, Last)); + append(First, findFirstTrailingSpace(First, Last)); First = Last; if (Last[1] == '/') { @@ -376,13 +424,20 @@ void Minimizer::printToNewline(const char *&First, const char *const End) { } while (Last != End && !isVerticalWhitespace(*Last)); // Print out the string. - if (Last == End || Last == First || Last[-1] != '\\') { - append(First, reverseOverSpaces(First, Last)); + const char *LastBeforeTrailingSpace = findLastNonSpace(First, Last); + if (Last == End || LastBeforeTrailingSpace == First || + LastBeforeTrailingSpace[-1] != '\\') { + append(First, LastBeforeTrailingSpace); + First = Last; + skipNewline(First, End); return; } - // Print up to the backslash, backing up over spaces. - append(First, reverseOverSpaces(First, Last - 1)); + // Print up to the backslash, backing up over spaces. Preserve at least one + // space, as the space matters when tokens are separated by a line + // continuation. + append(First, findFirstTrailingSpace( + First, LastBeforeTrailingSpace - 1)); First = Last; skipNewline(First, End); @@ -576,6 +631,59 @@ bool Minimizer::lexAt(const char *&First, const char *const End) { return false; } +bool Minimizer::lexModule(const char *&First, const char *const End) { + IdInfo Id = lexIdentifier(First, End); + First = Id.Last; + bool Export = false; + if (Id.Name == "export") { + Export = true; + skipWhitespace(First, End); + if (!isIdentifierBody(*First)) { + skipLine(First, End); + return false; + } + Id = lexIdentifier(First, End); + First = Id.Last; + } + + if (Id.Name != "module" && Id.Name != "import") { + skipLine(First, End); + return false; + } + + skipWhitespace(First, End); + + // Ignore this as a module directive if the next character can't be part of + // an import. + + switch (*First) { + case ':': + case '<': + case '"': + break; + default: + if (!isIdentifierBody(*First)) { + skipLine(First, End); + return false; + } + } + + if (Export) { + makeToken(cxx_export_decl); + append("export "); + } + + if (Id.Name == "module") + makeToken(cxx_module_decl); + else + makeToken(cxx_import_decl); + append(Id.Name); + append(" "); + printToNewline(First, End); + append("\n"); + return false; +} + bool Minimizer::lexDefine(const char *&First, const char *const End) { makeToken(pp_define); append("#define "); @@ -612,7 +720,21 @@ bool Minimizer::lexDefine(const char *&First, const char *const End) { bool Minimizer::lexPragma(const char *&First, const char *const End) { // #pragma. - if (!isNextIdentifier("clang", First, End)) { + skipWhitespace(First, End); + if (First == End || !isIdentifierHead(*First)) + return false; + + IdInfo FoundId = lexIdentifier(First, End); + First = FoundId.Last; + if (FoundId.Name == "once") { + // #pragma once + skipLine(First, End); + makeToken(pp_pragma_once); + append("#pragma once\n"); + return false; + } + + if (FoundId.Name != "clang") { skipLine(First, End); return false; } @@ -663,6 +785,18 @@ bool Minimizer::lexDefault(TokenKind Kind, StringRef Directive, return false; } +static bool isStartOfRelevantLine(char First) { + switch (First) { + case '#': + case '@': + case 'i': + case 'e': + case 'm': + return true; + } + return false; +} + bool Minimizer::lexPPLine(const char *&First, const char *const End) { assert(First != End); @@ -671,7 +805,7 @@ bool Minimizer::lexPPLine(const char *&First, const char *const End) { if (First == End) return false; - if (*First != '#' && *First != '@') { + if (!isStartOfRelevantLine(*First)) { skipLine(First, End); assert(First <= End); return false; @@ -681,6 +815,9 @@ bool Minimizer::lexPPLine(const char *&First, const char *const End) { if (*First == '@') return lexAt(First, End); + if (*First == 'i' || *First == 'e' || *First == 'm') + return lexModule(First, End); + // Handle preprocessing directives. ++First; // Skip over '#'. skipWhitespace(First, End); @@ -729,7 +866,14 @@ bool Minimizer::lexPPLine(const char *&First, const char *const End) { return lexDefault(Kind, Id.Name, First, End); } +static void skipUTF8ByteOrderMark(const char *&First, const char *const End) { + if ((End - First) >= 3 && First[0] == '\xef' && First[1] == '\xbb' && + First[2] == '\xbf') + First += 3; +} + bool Minimizer::minimizeImpl(const char *First, const char *const End) { + skipUTF8ByteOrderMark(First, End); while (First != End) if (lexPPLine(First, End)) return true; @@ -753,6 +897,54 @@ bool Minimizer::minimize() { return Error; } +bool clang::minimize_source_to_dependency_directives::computeSkippedRanges( + ArrayRef<Token> Input, llvm::SmallVectorImpl<SkippedRange> &Range) { + struct Directive { + enum DirectiveKind { + If, // if/ifdef/ifndef + Else // elif,else + }; + int Offset; + DirectiveKind Kind; + }; + llvm::SmallVector<Directive, 32> Offsets; + for (const Token &T : Input) { + switch (T.K) { + case pp_if: + case pp_ifdef: + case pp_ifndef: + Offsets.push_back({T.Offset, Directive::If}); + break; + + case pp_elif: + case pp_else: { + if (Offsets.empty()) + return true; + int PreviousOffset = Offsets.back().Offset; + Range.push_back({PreviousOffset, T.Offset - PreviousOffset}); + Offsets.push_back({T.Offset, Directive::Else}); + break; + } + + case pp_endif: { + if (Offsets.empty()) + return true; + int PreviousOffset = Offsets.back().Offset; + Range.push_back({PreviousOffset, T.Offset - PreviousOffset}); + do { + Directive::DirectiveKind Kind = Offsets.pop_back_val().Kind; + if (Kind == Directive::If) + break; + } while (!Offsets.empty()); + break; + } + default: + break; + } + } + return false; +} + bool clang::minimizeSourceToDependencyDirectives( StringRef Input, SmallVectorImpl<char> &Output, SmallVectorImpl<Token> &Tokens, DiagnosticsEngine *Diags, |