aboutsummaryrefslogtreecommitdiff
path: root/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Lex/DependencyDirectivesSourceMinimizer.cpp')
-rw-r--r--lib/Lex/DependencyDirectivesSourceMinimizer.cpp250
1 files changed, 221 insertions, 29 deletions
diff --git a/lib/Lex/DependencyDirectivesSourceMinimizer.cpp b/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
index cfc37c5d3c62..f063ed711c44 100644
--- a/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
+++ b/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
@@ -59,6 +59,7 @@ private:
LLVM_NODISCARD bool minimizeImpl(const char *First, const char *const End);
LLVM_NODISCARD bool lexPPLine(const char *&First, const char *const End);
LLVM_NODISCARD bool lexAt(const char *&First, const char *const End);
+ LLVM_NODISCARD bool lexModule(const char *&First, const char *const End);
LLVM_NODISCARD bool lexDefine(const char *&First, const char *const End);
LLVM_NODISCARD bool lexPragma(const char *&First, const char *const End);
LLVM_NODISCARD bool lexEndif(const char *&First, const char *const End);
@@ -184,26 +185,58 @@ static void skipRawString(const char *&First, const char *const End) {
}
}
+// Returns the length of EOL, either 0 (no end-of-line), 1 (\n) or 2 (\r\n)
+static unsigned isEOL(const char *First, const char *const End) {
+ if (First == End)
+ return 0;
+ if (End - First > 1 && isVerticalWhitespace(First[0]) &&
+ isVerticalWhitespace(First[1]) && First[0] != First[1])
+ return 2;
+ return !!isVerticalWhitespace(First[0]);
+}
+
static void skipString(const char *&First, const char *const End) {
- assert(*First == '\'' || *First == '"');
- const char Terminator = *First;
- for (++First; First != End && *First != Terminator; ++First)
- if (*First == '\\')
- if (++First == End)
- return;
+ assert(*First == '\'' || *First == '"' || *First == '<');
+ const char Terminator = *First == '<' ? '>' : *First;
+ for (++First; First != End && *First != Terminator; ++First) {
+ // String and character literals don't extend past the end of the line.
+ if (isVerticalWhitespace(*First))
+ return;
+ if (*First != '\\')
+ continue;
+ // Skip past backslash to the next character. This ensures that the
+ // character right after it is skipped as well, which matters if it's
+ // the terminator.
+ if (++First == End)
+ return;
+ if (!isWhitespace(*First))
+ continue;
+ // Whitespace after the backslash might indicate a line continuation.
+ const char *FirstAfterBackslashPastSpace = First;
+ skipOverSpaces(FirstAfterBackslashPastSpace, End);
+ if (unsigned NLSize = isEOL(FirstAfterBackslashPastSpace, End)) {
+ // Advance the character pointer to the next line for the next
+ // iteration.
+ First = FirstAfterBackslashPastSpace + NLSize - 1;
+ }
+ }
if (First != End)
++First; // Finish off the string.
}
-static void skipNewline(const char *&First, const char *End) {
- assert(isVerticalWhitespace(*First));
- ++First;
+// Returns the length of the skipped newline
+static unsigned skipNewline(const char *&First, const char *End) {
if (First == End)
- return;
+ return 0;
+ assert(isVerticalWhitespace(*First));
+ unsigned Len = isEOL(First, End);
+ assert(Len && "expected newline");
+ First += Len;
+ return Len;
+}
- // Check for "\n\r" and "\r\n".
- if (LLVM_UNLIKELY(isVerticalWhitespace(*First) && First[-1] != First[0]))
- ++First;
+static bool wasLineContinuation(const char *First, unsigned EOLLen) {
+ return *(First - (int)EOLLen - 1) == '\\';
}
static void skipToNewlineRaw(const char *&First, const char *const End) {
@@ -211,27 +244,40 @@ static void skipToNewlineRaw(const char *&First, const char *const End) {
if (First == End)
return;
- if (isVerticalWhitespace(*First))
+ unsigned Len = isEOL(First, End);
+ if (Len)
return;
- while (!isVerticalWhitespace(*First))
+ do {
if (++First == End)
return;
+ Len = isEOL(First, End);
+ } while (!Len);
if (First[-1] != '\\')
return;
- ++First; // Keep going...
+ First += Len;
+ // Keep skipping lines...
}
}
-static const char *reverseOverSpaces(const char *First, const char *Last) {
+static const char *findLastNonSpace(const char *First, const char *Last) {
assert(First <= Last);
while (First != Last && isHorizontalWhitespace(Last[-1]))
--Last;
return Last;
}
+static const char *findFirstTrailingSpace(const char *First,
+ const char *Last) {
+ const char *LastNonSpace = findLastNonSpace(First, Last);
+ if (Last == LastNonSpace)
+ return Last;
+ assert(isHorizontalWhitespace(LastNonSpace[0]));
+ return LastNonSpace + 1;
+}
+
static void skipLineComment(const char *&First, const char *const End) {
assert(First[0] == '/' && First[1] == '/');
First += 2;
@@ -276,7 +322,7 @@ static bool isQuoteCppDigitSeparator(const char *const Start,
}
static void skipLine(const char *&First, const char *const End) {
- do {
+ for (;;) {
assert(First <= End);
if (First == End)
return;
@@ -321,9 +367,10 @@ static void skipLine(const char *&First, const char *const End) {
return;
// Skip over the newline.
- assert(isVerticalWhitespace(*First));
- skipNewline(First, End);
- } while (First[-2] == '\\'); // Continue past line-continuations.
+ unsigned Len = skipNewline(First, End);
+ if (!wasLineContinuation(First, Len)) // Continue past line-continuations.
+ break;
+ }
}
static void skipDirective(StringRef Name, const char *&First,
@@ -343,7 +390,8 @@ void Minimizer::printToNewline(const char *&First, const char *const End) {
const char *Last = First;
do {
// Iterate over strings correctly to avoid comments and newlines.
- if (*Last == '"' || *Last == '\'') {
+ if (*Last == '"' || *Last == '\'' ||
+ (*Last == '<' && top() == pp_include)) {
if (LLVM_UNLIKELY(isRawStringLiteral(First, Last)))
skipRawString(Last, End);
else
@@ -361,7 +409,7 @@ void Minimizer::printToNewline(const char *&First, const char *const End) {
}
// Deal with "//..." and "/*...*/".
- append(First, reverseOverSpaces(First, Last));
+ append(First, findFirstTrailingSpace(First, Last));
First = Last;
if (Last[1] == '/') {
@@ -376,13 +424,20 @@ void Minimizer::printToNewline(const char *&First, const char *const End) {
} while (Last != End && !isVerticalWhitespace(*Last));
// Print out the string.
- if (Last == End || Last == First || Last[-1] != '\\') {
- append(First, reverseOverSpaces(First, Last));
+ const char *LastBeforeTrailingSpace = findLastNonSpace(First, Last);
+ if (Last == End || LastBeforeTrailingSpace == First ||
+ LastBeforeTrailingSpace[-1] != '\\') {
+ append(First, LastBeforeTrailingSpace);
+ First = Last;
+ skipNewline(First, End);
return;
}
- // Print up to the backslash, backing up over spaces.
- append(First, reverseOverSpaces(First, Last - 1));
+ // Print up to the backslash, backing up over spaces. Preserve at least one
+ // space, as the space matters when tokens are separated by a line
+ // continuation.
+ append(First, findFirstTrailingSpace(
+ First, LastBeforeTrailingSpace - 1));
First = Last;
skipNewline(First, End);
@@ -576,6 +631,59 @@ bool Minimizer::lexAt(const char *&First, const char *const End) {
return false;
}
+bool Minimizer::lexModule(const char *&First, const char *const End) {
+ IdInfo Id = lexIdentifier(First, End);
+ First = Id.Last;
+ bool Export = false;
+ if (Id.Name == "export") {
+ Export = true;
+ skipWhitespace(First, End);
+ if (!isIdentifierBody(*First)) {
+ skipLine(First, End);
+ return false;
+ }
+ Id = lexIdentifier(First, End);
+ First = Id.Last;
+ }
+
+ if (Id.Name != "module" && Id.Name != "import") {
+ skipLine(First, End);
+ return false;
+ }
+
+ skipWhitespace(First, End);
+
+ // Ignore this as a module directive if the next character can't be part of
+ // an import.
+
+ switch (*First) {
+ case ':':
+ case '<':
+ case '"':
+ break;
+ default:
+ if (!isIdentifierBody(*First)) {
+ skipLine(First, End);
+ return false;
+ }
+ }
+
+ if (Export) {
+ makeToken(cxx_export_decl);
+ append("export ");
+ }
+
+ if (Id.Name == "module")
+ makeToken(cxx_module_decl);
+ else
+ makeToken(cxx_import_decl);
+ append(Id.Name);
+ append(" ");
+ printToNewline(First, End);
+ append("\n");
+ return false;
+}
+
bool Minimizer::lexDefine(const char *&First, const char *const End) {
makeToken(pp_define);
append("#define ");
@@ -612,7 +720,21 @@ bool Minimizer::lexDefine(const char *&First, const char *const End) {
bool Minimizer::lexPragma(const char *&First, const char *const End) {
// #pragma.
- if (!isNextIdentifier("clang", First, End)) {
+ skipWhitespace(First, End);
+ if (First == End || !isIdentifierHead(*First))
+ return false;
+
+ IdInfo FoundId = lexIdentifier(First, End);
+ First = FoundId.Last;
+ if (FoundId.Name == "once") {
+ // #pragma once
+ skipLine(First, End);
+ makeToken(pp_pragma_once);
+ append("#pragma once\n");
+ return false;
+ }
+
+ if (FoundId.Name != "clang") {
skipLine(First, End);
return false;
}
@@ -663,6 +785,18 @@ bool Minimizer::lexDefault(TokenKind Kind, StringRef Directive,
return false;
}
+static bool isStartOfRelevantLine(char First) {
+ switch (First) {
+ case '#':
+ case '@':
+ case 'i':
+ case 'e':
+ case 'm':
+ return true;
+ }
+ return false;
+}
+
bool Minimizer::lexPPLine(const char *&First, const char *const End) {
assert(First != End);
@@ -671,7 +805,7 @@ bool Minimizer::lexPPLine(const char *&First, const char *const End) {
if (First == End)
return false;
- if (*First != '#' && *First != '@') {
+ if (!isStartOfRelevantLine(*First)) {
skipLine(First, End);
assert(First <= End);
return false;
@@ -681,6 +815,9 @@ bool Minimizer::lexPPLine(const char *&First, const char *const End) {
if (*First == '@')
return lexAt(First, End);
+ if (*First == 'i' || *First == 'e' || *First == 'm')
+ return lexModule(First, End);
+
// Handle preprocessing directives.
++First; // Skip over '#'.
skipWhitespace(First, End);
@@ -729,7 +866,14 @@ bool Minimizer::lexPPLine(const char *&First, const char *const End) {
return lexDefault(Kind, Id.Name, First, End);
}
+static void skipUTF8ByteOrderMark(const char *&First, const char *const End) {
+ if ((End - First) >= 3 && First[0] == '\xef' && First[1] == '\xbb' &&
+ First[2] == '\xbf')
+ First += 3;
+}
+
bool Minimizer::minimizeImpl(const char *First, const char *const End) {
+ skipUTF8ByteOrderMark(First, End);
while (First != End)
if (lexPPLine(First, End))
return true;
@@ -753,6 +897,54 @@ bool Minimizer::minimize() {
return Error;
}
+bool clang::minimize_source_to_dependency_directives::computeSkippedRanges(
+ ArrayRef<Token> Input, llvm::SmallVectorImpl<SkippedRange> &Range) {
+ struct Directive {
+ enum DirectiveKind {
+ If, // if/ifdef/ifndef
+ Else // elif,else
+ };
+ int Offset;
+ DirectiveKind Kind;
+ };
+ llvm::SmallVector<Directive, 32> Offsets;
+ for (const Token &T : Input) {
+ switch (T.K) {
+ case pp_if:
+ case pp_ifdef:
+ case pp_ifndef:
+ Offsets.push_back({T.Offset, Directive::If});
+ break;
+
+ case pp_elif:
+ case pp_else: {
+ if (Offsets.empty())
+ return true;
+ int PreviousOffset = Offsets.back().Offset;
+ Range.push_back({PreviousOffset, T.Offset - PreviousOffset});
+ Offsets.push_back({T.Offset, Directive::Else});
+ break;
+ }
+
+ case pp_endif: {
+ if (Offsets.empty())
+ return true;
+ int PreviousOffset = Offsets.back().Offset;
+ Range.push_back({PreviousOffset, T.Offset - PreviousOffset});
+ do {
+ Directive::DirectiveKind Kind = Offsets.pop_back_val().Kind;
+ if (Kind == Directive::If)
+ break;
+ } while (!Offsets.empty());
+ break;
+ }
+ default:
+ break;
+ }
+ }
+ return false;
+}
+
bool clang::minimizeSourceToDependencyDirectives(
StringRef Input, SmallVectorImpl<char> &Output,
SmallVectorImpl<Token> &Tokens, DiagnosticsEngine *Diags,