aboutsummaryrefslogtreecommitdiff
path: root/lib/Format/BreakableToken.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Format/BreakableToken.cpp')
-rw-r--r--lib/Format/BreakableToken.cpp736
1 files changed, 595 insertions, 141 deletions
diff --git a/lib/Format/BreakableToken.cpp b/lib/Format/BreakableToken.cpp
index 6363f895f95b..c97486e4e4a7 100644
--- a/lib/Format/BreakableToken.cpp
+++ b/lib/Format/BreakableToken.cpp
@@ -14,7 +14,7 @@
//===----------------------------------------------------------------------===//
#include "BreakableToken.h"
-#include "Comments.h"
+#include "ContinuationIndenter.h"
#include "clang/Basic/CharInfo.h"
#include "clang/Format/Format.h"
#include "llvm/ADT/STLExtras.h"
@@ -40,6 +40,21 @@ static bool IsBlank(char C) {
}
}
+static StringRef getLineCommentIndentPrefix(StringRef Comment) {
+ static const char *const KnownPrefixes[] = {"///", "//", "//!"};
+ StringRef LongestPrefix;
+ for (StringRef KnownPrefix : KnownPrefixes) {
+ if (Comment.startswith(KnownPrefix)) {
+ size_t PrefixLength = KnownPrefix.size();
+ while (PrefixLength < Comment.size() && Comment[PrefixLength] == ' ')
+ ++PrefixLength;
+ if (PrefixLength > LongestPrefix.size())
+ LongestPrefix = Comment.substr(0, PrefixLength);
+ }
+ }
+ return LongestPrefix;
+}
+
static BreakableToken::Split getCommentSplit(StringRef Text,
unsigned ContentStartColumn,
unsigned ColumnLimit,
@@ -132,37 +147,61 @@ getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit,
return BreakableToken::Split(StringRef::npos, 0);
}
+bool switchesFormatting(const FormatToken &Token) {
+ assert((Token.is(TT_BlockComment) || Token.is(TT_LineComment)) &&
+ "formatting regions are switched by comment tokens");
+ StringRef Content = Token.TokenText.substr(2).ltrim();
+ return Content.startswith("clang-format on") ||
+ Content.startswith("clang-format off");
+}
+
+unsigned
+BreakableToken::getLineLengthAfterCompression(unsigned RemainingTokenColumns,
+ Split Split) const {
+ // Example: consider the content
+ // lala lala
+ // - RemainingTokenColumns is the original number of columns, 10;
+ // - Split is (4, 2), denoting the two spaces between the two words;
+ //
+ // We compute the number of columns when the split is compressed into a single
+ // space, like:
+ // lala lala
+ return RemainingTokenColumns + 1 - Split.second;
+}
+
unsigned BreakableSingleLineToken::getLineCount() const { return 1; }
unsigned BreakableSingleLineToken::getLineLengthAfterSplit(
- unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {
+ unsigned LineIndex, unsigned TailOffset,
+ StringRef::size_type Length) const {
return StartColumn + Prefix.size() + Postfix.size() +
- encoding::columnWidthWithTabs(Line.substr(Offset, Length),
+ encoding::columnWidthWithTabs(Line.substr(TailOffset, Length),
StartColumn + Prefix.size(),
Style.TabWidth, Encoding);
}
BreakableSingleLineToken::BreakableSingleLineToken(
- const FormatToken &Tok, unsigned IndentLevel, unsigned StartColumn,
- StringRef Prefix, StringRef Postfix, bool InPPDirective,
- encoding::Encoding Encoding, const FormatStyle &Style)
- : BreakableToken(Tok, IndentLevel, InPPDirective, Encoding, Style),
+ const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
+ StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding,
+ const FormatStyle &Style)
+ : BreakableToken(Tok, InPPDirective, Encoding, Style),
StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix) {
- assert(Tok.TokenText.endswith(Postfix));
+ assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix));
Line = Tok.TokenText.substr(
Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
}
BreakableStringLiteral::BreakableStringLiteral(
- const FormatToken &Tok, unsigned IndentLevel, unsigned StartColumn,
- StringRef Prefix, StringRef Postfix, bool InPPDirective,
- encoding::Encoding Encoding, const FormatStyle &Style)
- : BreakableSingleLineToken(Tok, IndentLevel, StartColumn, Prefix, Postfix,
- InPPDirective, Encoding, Style) {}
+ const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
+ StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding,
+ const FormatStyle &Style)
+ : BreakableSingleLineToken(Tok, StartColumn, Prefix, Postfix, InPPDirective,
+ Encoding, Style) {}
BreakableToken::Split
BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,
- unsigned ColumnLimit) const {
+ unsigned ColumnLimit,
+ llvm::Regex &CommentPragmasRegex) const {
return getStringSplit(Line.substr(TailOffset),
StartColumn + Prefix.size() + Postfix.size(),
ColumnLimit, Style.TabWidth, Encoding);
@@ -171,86 +210,149 @@ BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,
void BreakableStringLiteral::insertBreak(unsigned LineIndex,
unsigned TailOffset, Split Split,
WhitespaceManager &Whitespaces) {
- unsigned LeadingSpaces = StartColumn;
- // The '@' of an ObjC string literal (@"Test") does not become part of the
- // string token.
- // FIXME: It might be a cleaner solution to merge the tokens as a
- // precomputation step.
- if (Prefix.startswith("@"))
- --LeadingSpaces;
Whitespaces.replaceWhitespaceInToken(
Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix,
- Prefix, InPPDirective, 1, IndentLevel, LeadingSpaces);
-}
-
-BreakableLineComment::BreakableLineComment(
- const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn,
- bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
- : BreakableSingleLineToken(Token, IndentLevel, StartColumn,
- getLineCommentIndentPrefix(Token.TokenText), "",
- InPPDirective, Encoding, Style) {
- OriginalPrefix = Prefix;
- if (Token.TokenText.size() > Prefix.size() &&
- isAlphanumeric(Token.TokenText[Prefix.size()])) {
- if (Prefix == "//")
- Prefix = "// ";
- else if (Prefix == "///")
- Prefix = "/// ";
- else if (Prefix == "//!")
- Prefix = "//! ";
- }
+ Prefix, InPPDirective, 1, StartColumn);
}
+BreakableComment::BreakableComment(const FormatToken &Token,
+ unsigned StartColumn,
+ bool InPPDirective,
+ encoding::Encoding Encoding,
+ const FormatStyle &Style)
+ : BreakableToken(Token, InPPDirective, Encoding, Style),
+ StartColumn(StartColumn) {}
+
+unsigned BreakableComment::getLineCount() const { return Lines.size(); }
+
BreakableToken::Split
-BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset,
- unsigned ColumnLimit) const {
- return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(),
+BreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset,
+ unsigned ColumnLimit,
+ llvm::Regex &CommentPragmasRegex) const {
+ // Don't break lines matching the comment pragmas regex.
+ if (CommentPragmasRegex.match(Content[LineIndex]))
+ return Split(StringRef::npos, 0);
+ return getCommentSplit(Content[LineIndex].substr(TailOffset),
+ getContentStartColumn(LineIndex, TailOffset),
ColumnLimit, Style.TabWidth, Encoding);
}
-void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
- Split Split,
- WhitespaceManager &Whitespaces) {
+void BreakableComment::compressWhitespace(unsigned LineIndex,
+ unsigned TailOffset, Split Split,
+ WhitespaceManager &Whitespaces) {
+ StringRef Text = Content[LineIndex].substr(TailOffset);
+ // Text is relative to the content line, but Whitespaces operates relative to
+ // the start of the corresponding token, so compute the start of the Split
+ // that needs to be compressed into a single space relative to the start of
+ // its token.
+ unsigned BreakOffsetInToken =
+ Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
+ unsigned CharsToRemove = Split.second;
Whitespaces.replaceWhitespaceInToken(
- Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second,
- Postfix, Prefix, InPPDirective, /*Newlines=*/1, IndentLevel, StartColumn);
+ tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", "",
+ /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1);
}
-void BreakableLineComment::replaceWhitespace(unsigned LineIndex,
- unsigned TailOffset, Split Split,
- WhitespaceManager &Whitespaces) {
- Whitespaces.replaceWhitespaceInToken(
- Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second, "",
- "", /*InPPDirective=*/false, /*Newlines=*/0, /*IndentLevel=*/0,
- /*Spaces=*/1);
-}
-
-void BreakableLineComment::replaceWhitespaceBefore(
- unsigned LineIndex, WhitespaceManager &Whitespaces) {
- if (OriginalPrefix != Prefix) {
- Whitespaces.replaceWhitespaceInToken(Tok, OriginalPrefix.size(), 0, "", "",
- /*InPPDirective=*/false,
- /*Newlines=*/0, /*IndentLevel=*/0,
- /*Spaces=*/1);
+BreakableToken::Split
+BreakableComment::getReflowSplit(StringRef Text, StringRef ReflowPrefix,
+ unsigned PreviousEndColumn,
+ unsigned ColumnLimit) const {
+ unsigned ReflowStartColumn = PreviousEndColumn + ReflowPrefix.size();
+ StringRef TrimmedText = Text.rtrim(Blanks);
+ // This is the width of the resulting line in case the full line of Text gets
+ // reflown up starting at ReflowStartColumn.
+ unsigned FullWidth = ReflowStartColumn + encoding::columnWidthWithTabs(
+ TrimmedText, ReflowStartColumn,
+ Style.TabWidth, Encoding);
+ // If the full line fits up, we return a reflow split after it,
+ // otherwise we compute the largest piece of text that fits after
+ // ReflowStartColumn.
+ Split ReflowSplit =
+ FullWidth <= ColumnLimit
+ ? Split(TrimmedText.size(), Text.size() - TrimmedText.size())
+ : getCommentSplit(Text, ReflowStartColumn, ColumnLimit,
+ Style.TabWidth, Encoding);
+
+ // We need to be extra careful here, because while it's OK to keep a long line
+ // if it can't be broken into smaller pieces (like when the first word of a
+ // long line is longer than the column limit), it's not OK to reflow that long
+ // word up. So we recompute the size of the previous line after reflowing and
+ // only return the reflow split if that's under the line limit.
+ if (ReflowSplit.first != StringRef::npos &&
+ // Check if the width of the newly reflown line is under the limit.
+ PreviousEndColumn + ReflowPrefix.size() +
+ encoding::columnWidthWithTabs(Text.substr(0, ReflowSplit.first),
+ PreviousEndColumn +
+ ReflowPrefix.size(),
+ Style.TabWidth, Encoding) <=
+ ColumnLimit) {
+ return ReflowSplit;
}
+ return Split(StringRef::npos, 0);
+}
+
+const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const {
+ return Tokens[LineIndex] ? *Tokens[LineIndex] : Tok;
+}
+
+static bool mayReflowContent(StringRef Content) {
+ Content = Content.trim(Blanks);
+ // Lines starting with '@' commonly have special meaning.
+ static const SmallVector<StringRef, 4> kSpecialMeaningPrefixes = {
+ "@", "TODO", "FIXME", "XXX"};
+ bool hasSpecialMeaningPrefix = false;
+ for (StringRef Prefix : kSpecialMeaningPrefixes) {
+ if (Content.startswith(Prefix)) {
+ hasSpecialMeaningPrefix = true;
+ break;
+ }
+ }
+ // Simple heuristic for what to reflow: content should contain at least two
+ // characters and either the first or second character must be
+ // non-punctuation.
+ return Content.size() >= 2 && !hasSpecialMeaningPrefix &&
+ !Content.endswith("\\") &&
+ // Note that this is UTF-8 safe, since if isPunctuation(Content[0]) is
+ // true, then the first code point must be 1 byte long.
+ (!isPunctuation(Content[0]) || !isPunctuation(Content[1]));
}
BreakableBlockComment::BreakableBlockComment(
- const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn,
+ const FormatToken &Token, unsigned StartColumn,
unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
encoding::Encoding Encoding, const FormatStyle &Style)
- : BreakableToken(Token, IndentLevel, InPPDirective, Encoding, Style) {
- StringRef TokenText(Token.TokenText);
+ : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) {
+ assert(Tok.is(TT_BlockComment) &&
+ "block comment section must start with a block comment");
+
+ StringRef TokenText(Tok.TokenText);
assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n");
int IndentDelta = StartColumn - OriginalStartColumn;
- LeadingWhitespace.resize(Lines.size());
- StartOfLineColumn.resize(Lines.size());
- StartOfLineColumn[0] = StartColumn + 2;
+ Content.resize(Lines.size());
+ Content[0] = Lines[0];
+ ContentColumn.resize(Lines.size());
+ // Account for the initial '/*'.
+ ContentColumn[0] = StartColumn + 2;
+ Tokens.resize(Lines.size());
for (size_t i = 1; i < Lines.size(); ++i)
adjustWhitespace(i, IndentDelta);
+ // Align decorations with the column of the star on the first line,
+ // that is one column after the start "/*".
+ DecorationColumn = StartColumn + 1;
+
+ // Account for comment decoration patterns like this:
+ //
+ // /*
+ // ** blah blah blah
+ // */
+ if (Lines.size() >= 2 && Content[1].startswith("**") &&
+ static_cast<unsigned>(ContentColumn[1]) == StartColumn) {
+ DecorationColumn = StartColumn;
+ }
+
Decoration = "* ";
if (Lines.size() == 1 && !FirstInLine) {
// Comments for which FirstInLine is false can start on arbitrary column,
@@ -262,49 +364,60 @@ BreakableBlockComment::BreakableBlockComment(
}
for (size_t i = 1, e = Lines.size(); i < e && !Decoration.empty(); ++i) {
// If the last line is empty, the closing "*/" will have a star.
- if (i + 1 == e && Lines[i].empty())
+ if (i + 1 == e && Content[i].empty())
break;
- if (!Lines[i].empty() && i + 1 != e && Decoration.startswith(Lines[i]))
+ if (!Content[i].empty() && i + 1 != e &&
+ Decoration.startswith(Content[i]))
continue;
- while (!Lines[i].startswith(Decoration))
+ while (!Content[i].startswith(Decoration))
Decoration = Decoration.substr(0, Decoration.size() - 1);
}
LastLineNeedsDecoration = true;
- IndentAtLineBreak = StartOfLineColumn[0] + 1;
- for (size_t i = 1; i < Lines.size(); ++i) {
- if (Lines[i].empty()) {
- if (i + 1 == Lines.size()) {
+ IndentAtLineBreak = ContentColumn[0] + 1;
+ for (size_t i = 1, e = Lines.size(); i < e; ++i) {
+ if (Content[i].empty()) {
+ if (i + 1 == e) {
// Empty last line means that we already have a star as a part of the
// trailing */. We also need to preserve whitespace, so that */ is
// correctly indented.
LastLineNeedsDecoration = false;
+ // Align the star in the last '*/' with the stars on the previous lines.
+ if (e >= 2 && !Decoration.empty()) {
+ ContentColumn[i] = DecorationColumn;
+ }
} else if (Decoration.empty()) {
// For all other lines, set the start column to 0 if they're empty, so
// we do not insert trailing whitespace anywhere.
- StartOfLineColumn[i] = 0;
+ ContentColumn[i] = 0;
}
continue;
}
// The first line already excludes the star.
+ // The last line excludes the star if LastLineNeedsDecoration is false.
// For all other lines, adjust the line to exclude the star and
// (optionally) the first whitespace.
- unsigned DecorationSize =
- Decoration.startswith(Lines[i]) ? Lines[i].size() : Decoration.size();
- StartOfLineColumn[i] += DecorationSize;
- Lines[i] = Lines[i].substr(DecorationSize);
- LeadingWhitespace[i] += DecorationSize;
- if (!Decoration.startswith(Lines[i]))
+ unsigned DecorationSize = Decoration.startswith(Content[i])
+ ? Content[i].size()
+ : Decoration.size();
+ if (DecorationSize) {
+ ContentColumn[i] = DecorationColumn + DecorationSize;
+ }
+ Content[i] = Content[i].substr(DecorationSize);
+ if (!Decoration.startswith(Content[i]))
IndentAtLineBreak =
- std::min<int>(IndentAtLineBreak, std::max(0, StartOfLineColumn[i]));
+ std::min<int>(IndentAtLineBreak, std::max(0, ContentColumn[i]));
}
- IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size());
+ IndentAtLineBreak =
+ std::max<unsigned>(IndentAtLineBreak, Decoration.size());
+
DEBUG({
llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n";
for (size_t i = 0; i < Lines.size(); ++i) {
- llvm::dbgs() << i << " |" << Lines[i] << "| " << LeadingWhitespace[i]
- << "\n";
+ llvm::dbgs() << i << " |" << Content[i] << "| "
+ << "CC=" << ContentColumn[i] << "| "
+ << "IN=" << (Content[i].data() - Lines[i].data()) << "\n";
}
});
}
@@ -334,78 +447,162 @@ void BreakableBlockComment::adjustWhitespace(unsigned LineIndex,
StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine);
// Adjust Lines to only contain relevant text.
- Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine);
- Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine);
- // Adjust LeadingWhitespace to account all whitespace between the lines
- // to the current line.
- LeadingWhitespace[LineIndex] =
- Lines[LineIndex].begin() - Lines[LineIndex - 1].end();
+ size_t PreviousContentOffset =
+ Content[LineIndex - 1].data() - Lines[LineIndex - 1].data();
+ Content[LineIndex - 1] = Lines[LineIndex - 1].substr(
+ PreviousContentOffset, EndOfPreviousLine - PreviousContentOffset);
+ Content[LineIndex] = Lines[LineIndex].substr(StartOfLine);
// Adjust the start column uniformly across all lines.
- StartOfLineColumn[LineIndex] =
+ ContentColumn[LineIndex] =
encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) +
IndentDelta;
}
-unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); }
-
unsigned BreakableBlockComment::getLineLengthAfterSplit(
- unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {
- unsigned ContentStartColumn = getContentStartColumn(LineIndex, Offset);
- return ContentStartColumn +
- encoding::columnWidthWithTabs(Lines[LineIndex].substr(Offset, Length),
- ContentStartColumn, Style.TabWidth,
- Encoding) +
- // The last line gets a "*/" postfix.
- (LineIndex + 1 == Lines.size() ? 2 : 0);
-}
-
-BreakableToken::Split
-BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset,
- unsigned ColumnLimit) const {
- return getCommentSplit(Lines[LineIndex].substr(TailOffset),
- getContentStartColumn(LineIndex, TailOffset),
- ColumnLimit, Style.TabWidth, Encoding);
+ unsigned LineIndex, unsigned TailOffset,
+ StringRef::size_type Length) const {
+ unsigned ContentStartColumn = getContentStartColumn(LineIndex, TailOffset);
+ unsigned LineLength =
+ ContentStartColumn + encoding::columnWidthWithTabs(
+ Content[LineIndex].substr(TailOffset, Length),
+ ContentStartColumn, Style.TabWidth, Encoding);
+ // The last line gets a "*/" postfix.
+ if (LineIndex + 1 == Lines.size()) {
+ LineLength += 2;
+ // We never need a decoration when breaking just the trailing "*/" postfix.
+ // Note that checking that Length == 0 is not enough, since Length could
+ // also be StringRef::npos.
+ if (Content[LineIndex].substr(TailOffset, Length).empty()) {
+ LineLength -= Decoration.size();
+ }
+ }
+ return LineLength;
}
void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
Split Split,
WhitespaceManager &Whitespaces) {
- StringRef Text = Lines[LineIndex].substr(TailOffset);
+ StringRef Text = Content[LineIndex].substr(TailOffset);
StringRef Prefix = Decoration;
+ // We need this to account for the case when we have a decoration "* " for all
+ // the lines except for the last one, where the star in "*/" acts as a
+ // decoration.
+ unsigned LocalIndentAtLineBreak = IndentAtLineBreak;
if (LineIndex + 1 == Lines.size() &&
Text.size() == Split.first + Split.second) {
// For the last line we need to break before "*/", but not to add "* ".
Prefix = "";
+ if (LocalIndentAtLineBreak >= 2)
+ LocalIndentAtLineBreak -= 2;
}
-
+ // The split offset is from the beginning of the line. Convert it to an offset
+ // from the beginning of the token text.
unsigned BreakOffsetInToken =
- Text.data() - Tok.TokenText.data() + Split.first;
+ Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
unsigned CharsToRemove = Split.second;
- assert(IndentAtLineBreak >= Decoration.size());
+ assert(LocalIndentAtLineBreak >= Prefix.size());
Whitespaces.replaceWhitespaceInToken(
- Tok, BreakOffsetInToken, CharsToRemove, "", Prefix, InPPDirective, 1,
- IndentLevel, IndentAtLineBreak - Decoration.size());
+ tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", Prefix,
+ InPPDirective, /*Newlines=*/1,
+ /*Spaces=*/LocalIndentAtLineBreak - Prefix.size());
}
-void BreakableBlockComment::replaceWhitespace(unsigned LineIndex,
- unsigned TailOffset, Split Split,
- WhitespaceManager &Whitespaces) {
- StringRef Text = Lines[LineIndex].substr(TailOffset);
- unsigned BreakOffsetInToken =
- Text.data() - Tok.TokenText.data() + Split.first;
- unsigned CharsToRemove = Split.second;
- Whitespaces.replaceWhitespaceInToken(
- Tok, BreakOffsetInToken, CharsToRemove, "", "", /*InPPDirective=*/false,
- /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1);
+BreakableToken::Split BreakableBlockComment::getSplitBefore(
+ unsigned LineIndex,
+ unsigned PreviousEndColumn,
+ unsigned ColumnLimit,
+ llvm::Regex &CommentPragmasRegex) const {
+ if (!mayReflow(LineIndex, CommentPragmasRegex))
+ return Split(StringRef::npos, 0);
+ StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
+ return getReflowSplit(TrimmedContent, ReflowPrefix, PreviousEndColumn,
+ ColumnLimit);
+}
+
+unsigned BreakableBlockComment::getReflownColumn(
+ StringRef Content,
+ unsigned LineIndex,
+ unsigned PreviousEndColumn) const {
+ unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size();
+ // If this is the last line, it will carry around its '*/' postfix.
+ unsigned PostfixLength = (LineIndex + 1 == Lines.size() ? 2 : 0);
+ // The line is composed of previous text, reflow prefix, reflown text and
+ // postfix.
+ unsigned ReflownColumn =
+ StartColumn + encoding::columnWidthWithTabs(Content, StartColumn,
+ Style.TabWidth, Encoding) +
+ PostfixLength;
+ return ReflownColumn;
}
+unsigned BreakableBlockComment::getLineLengthAfterSplitBefore(
+ unsigned LineIndex, unsigned TailOffset,
+ unsigned PreviousEndColumn,
+ unsigned ColumnLimit,
+ Split SplitBefore) const {
+ if (SplitBefore.first == StringRef::npos ||
+ // Block comment line contents contain the trailing whitespace after the
+ // decoration, so the need of left trim. Note that this behavior is
+ // consistent with the breaking of block comments where the indentation of
+ // a broken line is uniform across all the lines of the block comment.
+ SplitBefore.first + SplitBefore.second <
+ Content[LineIndex].ltrim().size()) {
+ // A piece of line, not the whole, gets reflown.
+ return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
+ } else {
+ // The whole line gets reflown, need to check if we need to insert a break
+ // for the postfix or not.
+ StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
+ unsigned ReflownColumn =
+ getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn);
+ if (ReflownColumn <= ColumnLimit) {
+ return ReflownColumn;
+ }
+ return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
+ }
+}
void BreakableBlockComment::replaceWhitespaceBefore(
- unsigned LineIndex, WhitespaceManager &Whitespaces) {
- if (LineIndex == 0)
+ unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,
+ Split SplitBefore, WhitespaceManager &Whitespaces) {
+ if (LineIndex == 0) return;
+ StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
+ if (SplitBefore.first != StringRef::npos) {
+ // Here we need to reflow.
+ assert(Tokens[LineIndex - 1] == Tokens[LineIndex] &&
+ "Reflowing whitespace within a token");
+ // This is the offset of the end of the last line relative to the start of
+ // the token text in the token.
+ unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
+ Content[LineIndex - 1].size() -
+ tokenAt(LineIndex).TokenText.data();
+ unsigned WhitespaceLength = TrimmedContent.data() -
+ tokenAt(LineIndex).TokenText.data() -
+ WhitespaceOffsetInToken;
+ Whitespaces.replaceWhitespaceInToken(
+ tokenAt(LineIndex), WhitespaceOffsetInToken,
+ /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"",
+ /*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0,
+ /*Spaces=*/0);
+ // Check if we need to also insert a break at the whitespace range.
+ // For this we first adapt the reflow split relative to the beginning of the
+ // content.
+ // Note that we don't need a penalty for this break, since it doesn't change
+ // the total number of lines.
+ Split BreakSplit = SplitBefore;
+ BreakSplit.first += TrimmedContent.data() - Content[LineIndex].data();
+ unsigned ReflownColumn =
+ getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn);
+ if (ReflownColumn > ColumnLimit) {
+ insertBreak(LineIndex, 0, BreakSplit, Whitespaces);
+ }
return;
+ }
+
+ // Here no reflow with the previous line will happen.
+ // Fix the decoration of the line at LineIndex.
StringRef Prefix = Decoration;
- if (Lines[LineIndex].empty()) {
+ if (Content[LineIndex].empty()) {
if (LineIndex + 1 == Lines.size()) {
if (!LastLineNeedsDecoration) {
// If the last line was empty, we don't need a prefix, as the */ will
@@ -418,19 +615,35 @@ void BreakableBlockComment::replaceWhitespaceBefore(
Prefix = Prefix.substr(0, 1);
}
} else {
- if (StartOfLineColumn[LineIndex] == 1) {
+ if (ContentColumn[LineIndex] == 1) {
// This line starts immediately after the decorating *.
Prefix = Prefix.substr(0, 1);
}
}
-
- unsigned WhitespaceOffsetInToken = Lines[LineIndex].data() -
- Tok.TokenText.data() -
- LeadingWhitespace[LineIndex];
+ // This is the offset of the end of the last line relative to the start of the
+ // token text in the token.
+ unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
+ Content[LineIndex - 1].size() -
+ tokenAt(LineIndex).TokenText.data();
+ unsigned WhitespaceLength = Content[LineIndex].data() -
+ tokenAt(LineIndex).TokenText.data() -
+ WhitespaceOffsetInToken;
Whitespaces.replaceWhitespaceInToken(
- Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix,
- InPPDirective, 1, IndentLevel,
- StartOfLineColumn[LineIndex] - Prefix.size());
+ tokenAt(LineIndex), WhitespaceOffsetInToken, WhitespaceLength, "", Prefix,
+ InPPDirective, /*Newlines=*/1, ContentColumn[LineIndex] - Prefix.size());
+}
+
+bool BreakableBlockComment::mayReflow(unsigned LineIndex,
+ llvm::Regex &CommentPragmasRegex) const {
+ // Content[LineIndex] may exclude the indent after the '*' decoration. In that
+ // case, we compute the start of the comment pragma manually.
+ StringRef IndentContent = Content[LineIndex];
+ if (Lines[LineIndex].ltrim(Blanks).startswith("*")) {
+ IndentContent = Lines[LineIndex].ltrim(Blanks).substr(1);
+ }
+ return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) &&
+ mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
+ !switchesFormatting(tokenAt(LineIndex));
}
unsigned
@@ -439,7 +652,248 @@ BreakableBlockComment::getContentStartColumn(unsigned LineIndex,
// If we break, we always break at the predefined indent.
if (TailOffset != 0)
return IndentAtLineBreak;
- return std::max(0, StartOfLineColumn[LineIndex]);
+ return std::max(0, ContentColumn[LineIndex]);
+}
+
+BreakableLineCommentSection::BreakableLineCommentSection(
+ const FormatToken &Token, unsigned StartColumn,
+ unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
+ encoding::Encoding Encoding, const FormatStyle &Style)
+ : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) {
+ assert(Tok.is(TT_LineComment) &&
+ "line comment section must start with a line comment");
+ FormatToken *LineTok = nullptr;
+ for (const FormatToken *CurrentTok = &Tok;
+ CurrentTok && CurrentTok->is(TT_LineComment);
+ CurrentTok = CurrentTok->Next) {
+ LastLineTok = LineTok;
+ StringRef TokenText(CurrentTok->TokenText);
+ assert(TokenText.startswith("//"));
+ size_t FirstLineIndex = Lines.size();
+ TokenText.split(Lines, "\n");
+ Content.resize(Lines.size());
+ ContentColumn.resize(Lines.size());
+ OriginalContentColumn.resize(Lines.size());
+ Tokens.resize(Lines.size());
+ Prefix.resize(Lines.size());
+ OriginalPrefix.resize(Lines.size());
+ for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) {
+ // We need to trim the blanks in case this is not the first line in a
+ // multiline comment. Then the indent is included in Lines[i].
+ StringRef IndentPrefix =
+ getLineCommentIndentPrefix(Lines[i].ltrim(Blanks));
+ assert(IndentPrefix.startswith("//"));
+ OriginalPrefix[i] = Prefix[i] = IndentPrefix;
+ if (Lines[i].size() > Prefix[i].size() &&
+ isAlphanumeric(Lines[i][Prefix[i].size()])) {
+ if (Prefix[i] == "//")
+ Prefix[i] = "// ";
+ else if (Prefix[i] == "///")
+ Prefix[i] = "/// ";
+ else if (Prefix[i] == "//!")
+ Prefix[i] = "//! ";
+ }
+
+ Tokens[i] = LineTok;
+ Content[i] = Lines[i].substr(IndentPrefix.size());
+ OriginalContentColumn[i] =
+ StartColumn +
+ encoding::columnWidthWithTabs(OriginalPrefix[i],
+ StartColumn,
+ Style.TabWidth,
+ Encoding);
+ ContentColumn[i] =
+ StartColumn +
+ encoding::columnWidthWithTabs(Prefix[i],
+ StartColumn,
+ Style.TabWidth,
+ Encoding);
+
+ // Calculate the end of the non-whitespace text in this line.
+ size_t EndOfLine = Content[i].find_last_not_of(Blanks);
+ if (EndOfLine == StringRef::npos)
+ EndOfLine = Content[i].size();
+ else
+ ++EndOfLine;
+ Content[i] = Content[i].substr(0, EndOfLine);
+ }
+ LineTok = CurrentTok->Next;
+ if (CurrentTok->Next && !CurrentTok->Next->ContinuesLineCommentSection) {
+ // A line comment section needs to broken by a line comment that is
+ // preceded by at least two newlines. Note that we put this break here
+ // instead of breaking at a previous stage during parsing, since that
+ // would split the contents of the enum into two unwrapped lines in this
+ // example, which is undesirable:
+ // enum A {
+ // a, // comment about a
+ //
+ // // comment about b
+ // b
+ // };
+ //
+ // FIXME: Consider putting separate line comment sections as children to
+ // the unwrapped line instead.
+ break;
+ }
+ }
+}
+
+unsigned BreakableLineCommentSection::getLineLengthAfterSplit(
+ unsigned LineIndex, unsigned TailOffset,
+ StringRef::size_type Length) const {
+ unsigned ContentStartColumn =
+ (TailOffset == 0 ? ContentColumn[LineIndex]
+ : OriginalContentColumn[LineIndex]);
+ return ContentStartColumn + encoding::columnWidthWithTabs(
+ Content[LineIndex].substr(TailOffset, Length),
+ ContentStartColumn, Style.TabWidth, Encoding);
+}
+
+void BreakableLineCommentSection::insertBreak(unsigned LineIndex,
+ unsigned TailOffset, Split Split,
+ WhitespaceManager &Whitespaces) {
+ StringRef Text = Content[LineIndex].substr(TailOffset);
+ // Compute the offset of the split relative to the beginning of the token
+ // text.
+ unsigned BreakOffsetInToken =
+ Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
+ unsigned CharsToRemove = Split.second;
+ // Compute the size of the new indent, including the size of the new prefix of
+ // the newly broken line.
+ unsigned IndentAtLineBreak = OriginalContentColumn[LineIndex] +
+ Prefix[LineIndex].size() -
+ OriginalPrefix[LineIndex].size();
+ assert(IndentAtLineBreak >= Prefix[LineIndex].size());
+ Whitespaces.replaceWhitespaceInToken(
+ tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "",
+ Prefix[LineIndex], InPPDirective, /*Newlines=*/1,
+ /*Spaces=*/IndentAtLineBreak - Prefix[LineIndex].size());
+}
+
+BreakableComment::Split BreakableLineCommentSection::getSplitBefore(
+ unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,
+ llvm::Regex &CommentPragmasRegex) const {
+ if (!mayReflow(LineIndex, CommentPragmasRegex))
+ return Split(StringRef::npos, 0);
+ return getReflowSplit(Content[LineIndex], ReflowPrefix, PreviousEndColumn,
+ ColumnLimit);
+}
+
+unsigned BreakableLineCommentSection::getLineLengthAfterSplitBefore(
+ unsigned LineIndex, unsigned TailOffset,
+ unsigned PreviousEndColumn,
+ unsigned ColumnLimit,
+ Split SplitBefore) const {
+ if (SplitBefore.first == StringRef::npos ||
+ SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) {
+ // A piece of line, not the whole line, gets reflown.
+ return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
+ } else {
+ // The whole line gets reflown.
+ unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size();
+ return StartColumn + encoding::columnWidthWithTabs(Content[LineIndex],
+ StartColumn,
+ Style.TabWidth,
+ Encoding);
+ }
+}
+
+void BreakableLineCommentSection::replaceWhitespaceBefore(
+ unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,
+ Split SplitBefore, WhitespaceManager &Whitespaces) {
+ // If this is the first line of a token, we need to inform Whitespace Manager
+ // about it: either adapt the whitespace range preceding it, or mark it as an
+ // untouchable token.
+ // This happens for instance here:
+ // // line 1 \
+ // // line 2
+ if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {
+ if (SplitBefore.first != StringRef::npos) {
+ // Reflow happens between tokens. Replace the whitespace between the
+ // tokens by the empty string.
+ Whitespaces.replaceWhitespace(
+ *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0,
+ /*StartOfTokenColumn=*/StartColumn, /*InPPDirective=*/false);
+ // Replace the indent and prefix of the token with the reflow prefix.
+ unsigned WhitespaceLength =
+ Content[LineIndex].data() - tokenAt(LineIndex).TokenText.data();
+ Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex],
+ /*Offset=*/0,
+ /*ReplaceChars=*/WhitespaceLength,
+ /*PreviousPostfix=*/"",
+ /*CurrentPrefix=*/ReflowPrefix,
+ /*InPPDirective=*/false,
+ /*Newlines=*/0,
+ /*Spaces=*/0);
+ } else {
+ // This is the first line for the current token, but no reflow with the
+ // previous token is necessary. However, we still may need to adjust the
+ // start column. Note that ContentColumn[LineIndex] is the expected
+ // content column after a possible update to the prefix, hence the prefix
+ // length change is included.
+ unsigned LineColumn =
+ ContentColumn[LineIndex] -
+ (Content[LineIndex].data() - Lines[LineIndex].data()) +
+ (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size());
+
+ // We always want to create a replacement instead of adding an untouchable
+ // token, even if LineColumn is the same as the original column of the
+ // token. This is because WhitespaceManager doesn't align trailing
+ // comments if they are untouchable.
+ Whitespaces.replaceWhitespace(*Tokens[LineIndex],
+ /*Newlines=*/1,
+ /*Spaces=*/LineColumn,
+ /*StartOfTokenColumn=*/LineColumn,
+ /*InPPDirective=*/false);
+ }
+ }
+ if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) {
+ // Adjust the prefix if necessary.
+
+ // Take care of the space possibly introduced after a decoration.
+ assert(Prefix[LineIndex] == (OriginalPrefix[LineIndex] + " ").str() &&
+ "Expecting a line comment prefix to differ from original by at most "
+ "a space");
+ Whitespaces.replaceWhitespaceInToken(
+ tokenAt(LineIndex), OriginalPrefix[LineIndex].size(), 0, "", "",
+ /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1);
+ }
+ // Add a break after a reflow split has been introduced, if necessary.
+ // Note that this break doesn't need to be penalized, since it doesn't change
+ // the number of lines.
+ if (SplitBefore.first != StringRef::npos &&
+ SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) {
+ insertBreak(LineIndex, 0, SplitBefore, Whitespaces);
+ }
+}
+
+void BreakableLineCommentSection::updateNextToken(LineState& State) const {
+ if (LastLineTok) {
+ State.NextToken = LastLineTok->Next;
+ }
+}
+
+bool BreakableLineCommentSection::mayReflow(
+ unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const {
+ // Line comments have the indent as part of the prefix, so we need to
+ // recompute the start of the line.
+ StringRef IndentContent = Content[LineIndex];
+ if (Lines[LineIndex].startswith("//")) {
+ IndentContent = Lines[LineIndex].substr(2);
+ }
+ return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) &&
+ mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
+ !switchesFormatting(tokenAt(LineIndex)) &&
+ OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1];
+}
+
+unsigned
+BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex,
+ unsigned TailOffset) const {
+ if (TailOffset != 0) {
+ return OriginalContentColumn[LineIndex];
+ }
+ return ContentColumn[LineIndex];
}
} // namespace format