1 files changed, 101 insertions, 9 deletions
diff --git a/lib/AST/RawCommentList.cpp b/lib/AST/RawCommentList.cpp
index 881a7d9c61be..95da9ed6d238 100644
--- a/lib/AST/RawCommentList.cpp
+++ b/lib/AST/RawCommentList.cpp
@@ -80,7 +80,7 @@ bool commentsStartOnSameColumn(const SourceManager &SM, const RawComment &R1,
 }
 } // unnamed namespace
 
-/// \brief Determines whether there is only whitespace in `Buffer` between `P`
+/// Determines whether there is only whitespace in `Buffer` between `P`
 /// and the previous line.
 /// \param Buffer The buffer to search in.
 /// \param P The offset from the beginning of `Buffer` to start from.
@@ -107,10 +107,10 @@ static bool isOrdinaryKind(RawComment::CommentKind K) {
 }
 
 RawComment::RawComment(const SourceManager &SourceMgr, SourceRange SR,
-                       bool Merged, bool ParseAllComments) :
+                       const CommentOptions &CommentOpts, bool Merged) :
     Range(SR), RawTextValid(false), BriefTextValid(false),
-    IsAttached(false), IsTrailingComment(false), IsAlmostTrailingComment(false),
-    ParseAllComments(ParseAllComments) {
+    IsAttached(false), IsTrailingComment(false),
+    IsAlmostTrailingComment(false) {
   // Extract raw comment text, if possible.
   if (SR.getBegin() == SR.getEnd() || getRawText(SourceMgr).empty()) {
     Kind = RCK_Invalid;
@@ -118,10 +118,11 @@ RawComment::RawComment(const SourceManager &SourceMgr, SourceRange SR,
   }
 
   // Guess comment kind.
-  std::pair<CommentKind, bool> K = getCommentKind(RawText, ParseAllComments);
+  std::pair<CommentKind, bool> K =
+      getCommentKind(RawText, CommentOpts.ParseAllComments);
 
   // Guess whether an ordinary comment is trailing.
-  if (ParseAllComments && isOrdinaryKind(K.first)) {
+  if (CommentOpts.ParseAllComments && isOrdinaryKind(K.first)) {
     FileID BeginFileID;
     unsigned BeginOffset;
     std::tie(BeginFileID, BeginOffset) =
@@ -270,6 +271,7 @@ static bool onlyWhitespaceBetween(SourceManager &SM,
 }
 
 void RawCommentList::addComment(const RawComment &RC,
+                                const CommentOptions &CommentOpts,
                                 llvm::BumpPtrAllocator &Allocator) {
   if (RC.isInvalid())
     return;
@@ -284,7 +286,7 @@ void RawCommentList::addComment(const RawComment &RC,
   }
 
   // Ordinary comments are not interesting for us.
-  if (RC.isOrdinary())
+  if (RC.isOrdinary() && !CommentOpts.ParseAllComments)
     return;
 
   // If this is the first Doxygen comment, save it (because there isn't
@@ -317,8 +319,7 @@ void RawCommentList::addComment(const RawComment &RC,
       onlyWhitespaceBetween(SourceMgr, C1.getLocEnd(), C2.getLocStart(),
                             /*MaxNewlinesAllowed=*/1)) {
     SourceRange MergedRange(C1.getLocStart(), C2.getLocEnd());
-    *Comments.back() = RawComment(SourceMgr, MergedRange, true,
-                                  RC.isParseAllComments());
+    *Comments.back() = RawComment(SourceMgr, MergedRange, CommentOpts, true);
   } else {
     Comments.push_back(new (Allocator) RawComment(RC));
   }
@@ -334,3 +335,94 @@ void RawCommentList::addDeserializedComments(ArrayRef<RawComment *> Deserialized
              BeforeThanCompare<RawComment>(SourceMgr));
   std::swap(Comments, MergedComments);
 }
+
+std::string RawComment::getFormattedText(const SourceManager &SourceMgr,
+                                         DiagnosticsEngine &Diags) const {
+  llvm::StringRef CommentText = getRawText(SourceMgr);
+  if (CommentText.empty())
+    return "";
+
+  llvm::BumpPtrAllocator Allocator;
+  // We do not parse any commands, so CommentOptions are ignored by
+  // comments::Lexer. Therefore, we just use default-constructed options.
+  CommentOptions DefOpts;
+  comments::CommandTraits EmptyTraits(Allocator, DefOpts);
+  comments::Lexer L(Allocator, Diags, EmptyTraits, getSourceRange().getBegin(),
+                    CommentText.begin(), CommentText.end(),
+                    /*ParseCommands=*/false);
+
+  std::string Result;
+  // A column number of the first non-whitespace token in the comment text.
+  // We skip whitespace up to this column, but keep the whitespace after this
+  // column. IndentColumn is calculated when lexing the first line and reused
+  // for the rest of lines.
+  unsigned IndentColumn = 0;
+
+  // Processes one line of the comment and adds it to the result.
+  // Handles skipping the indent at the start of the line.
+  // Returns false when eof is reached and true otherwise.
+  auto LexLine = [&](bool IsFirstLine) -> bool {
+    comments::Token Tok;
+    // Lex the first token on the line. We handle it separately, because we to
+    // fix up its indentation.
+    L.lex(Tok);
+    if (Tok.is(comments::tok::eof))
+      return false;
+    if (Tok.is(comments::tok::newline)) {
+      Result += "\n";
+      return true;
+    }
+    llvm::StringRef TokText = L.getSpelling(Tok, SourceMgr);
+    bool LocInvalid = false;
+    unsigned TokColumn =
+        SourceMgr.getSpellingColumnNumber(Tok.getLocation(), &LocInvalid);
+    assert(!LocInvalid && "getFormattedText for invalid location");
+
+    // Amount of leading whitespace in TokText.
+    size_t WhitespaceLen = TokText.find_first_not_of(" \t");
+    if (WhitespaceLen == StringRef::npos)
+      WhitespaceLen = TokText.size();
+    // Remember the amount of whitespace we skipped in the first line to remove
+    // indent up to that column in the following lines.
+    if (IsFirstLine)
+      IndentColumn = TokColumn + WhitespaceLen;
+
+    // Amount of leading whitespace we actually want to skip.
+    // For the first line we skip all the whitespace.
+    // For the rest of the lines, we skip whitespace up to IndentColumn.
+    unsigned SkipLen =
+        IsFirstLine
+            ? WhitespaceLen
+            : std::min<size_t>(
+                  WhitespaceLen,
+                  std::max<int>(static_cast<int>(IndentColumn) - TokColumn, 0));
+    llvm::StringRef Trimmed = TokText.drop_front(SkipLen);
+    Result += Trimmed;
+    // Lex all tokens in the rest of the line.
+    for (L.lex(Tok); Tok.isNot(comments::tok::eof); L.lex(Tok)) {
+      if (Tok.is(comments::tok::newline)) {
+        Result += "\n";
+        return true;
+      }
+      Result += L.getSpelling(Tok, SourceMgr);
+    }
+    // We've reached the end of file token.
+    return false;
+  };
+
+  auto DropTrailingNewLines = [](std::string &Str) {
+    while (Str.back() == '\n')
+      Str.pop_back();
+  };
+
+  // Proces first line separately to remember indent for the following lines.
+  if (!LexLine(/*IsFirstLine=*/true)) {
+    DropTrailingNewLines(Result);
+    return Result;
+  }
+  // Process the rest of the lines.
+  while (LexLine(/*IsFirstLine=*/false))
+    ;
+  DropTrailingNewLines(Result);
+  return Result;
+}