1 files changed, 350 insertions, 351 deletions
diff --git a/contrib/llvm-project/clang/lib/Frontend/TextDiagnostic.cpp b/contrib/llvm-project/clang/lib/Frontend/TextDiagnostic.cpp
index 8df7496c6ddd..779dead5d058 100644
--- a/contrib/llvm-project/clang/lib/Frontend/TextDiagnostic.cpp
+++ b/contrib/llvm-project/clang/lib/Frontend/TextDiagnostic.cpp
@@ -20,11 +20,11 @@
 #include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
+#include <optional>
 
 using namespace clang;
 
-static const enum raw_ostream::Colors noteColor =
-  raw_ostream::BLACK;
+static const enum raw_ostream::Colors noteColor = raw_ostream::CYAN;
 static const enum raw_ostream::Colors remarkColor =
   raw_ostream::BLUE;
 static const enum raw_ostream::Colors fixitColor =
@@ -44,7 +44,7 @@ static const enum raw_ostream::Colors savedColor =
 /// Add highlights to differences in template strings.
 static void applyTemplateHighlighting(raw_ostream &OS, StringRef Str,
                                       bool &Normal, bool Bold) {
-  while (1) {
+  while (true) {
     size_t Pos = Str.find(ToggleHighlight);
     OS << Str.slice(0, Pos);
     if (Pos == StringRef::npos)
@@ -90,89 +90,108 @@ static int bytesSincePreviousTabOrLineBegin(StringRef SourceLine, size_t i) {
 ///        printableTextForNextCharacter.
 ///
 /// \param SourceLine The line of source
-/// \param i Pointer to byte index,
+/// \param I Pointer to byte index,
 /// \param TabStop used to expand tabs
 /// \return pair(printable text, 'true' iff original text was printable)
 ///
 static std::pair<SmallString<16>, bool>
-printableTextForNextCharacter(StringRef SourceLine, size_t *i,
+printableTextForNextCharacter(StringRef SourceLine, size_t *I,
                               unsigned TabStop) {
-  assert(i && "i must not be null");
-  assert(*i<SourceLine.size() && "must point to a valid index");
+  assert(I && "I must not be null");
+  assert(*I < SourceLine.size() && "must point to a valid index");
 
-  if (SourceLine[*i]=='\t') {
+  if (SourceLine[*I] == '\t') {
     assert(0 < TabStop && TabStop <= DiagnosticOptions::MaxTabStop &&
            "Invalid -ftabstop value");
-    unsigned col = bytesSincePreviousTabOrLineBegin(SourceLine, *i);
-    unsigned NumSpaces = TabStop - col%TabStop;
+    unsigned Col = bytesSincePreviousTabOrLineBegin(SourceLine, *I);
+    unsigned NumSpaces = TabStop - (Col % TabStop);
     assert(0 < NumSpaces && NumSpaces <= TabStop
            && "Invalid computation of space amt");
-    ++(*i);
+    ++(*I);
 
-    SmallString<16> expandedTab;
-    expandedTab.assign(NumSpaces, ' ');
-    return std::make_pair(expandedTab, true);
+    SmallString<16> ExpandedTab;
+    ExpandedTab.assign(NumSpaces, ' ');
+    return std::make_pair(ExpandedTab, true);
   }
 
-  unsigned char const *begin, *end;
-  begin = reinterpret_cast<unsigned char const *>(&*(SourceLine.begin() + *i));
-  end = begin + (SourceLine.size() - *i);
-
-  if (llvm::isLegalUTF8Sequence(begin, end)) {
-    llvm::UTF32 c;
-    llvm::UTF32 *cptr = &c;
-    unsigned char const *original_begin = begin;
-    unsigned char const *cp_end =
-        begin + llvm::getNumBytesForUTF8(SourceLine[*i]);
-
-    llvm::ConversionResult res = llvm::ConvertUTF8toUTF32(
-        &begin, cp_end, &cptr, cptr + 1, llvm::strictConversion);
-    (void)res;
-    assert(llvm::conversionOK == res);
-    assert(0 < begin-original_begin
-           && "we must be further along in the string now");
-    *i += begin-original_begin;
-
-    if (!llvm::sys::locale::isPrint(c)) {
-      // If next character is valid UTF-8, but not printable
-      SmallString<16> expandedCP("<U+>");
-      while (c) {
-        expandedCP.insert(expandedCP.begin()+3, llvm::hexdigit(c%16));
-        c/=16;
-      }
-      while (expandedCP.size() < 8)
-        expandedCP.insert(expandedCP.begin()+3, llvm::hexdigit(0));
-      return std::make_pair(expandedCP, false);
-    }
-
-    // If next character is valid UTF-8, and printable
-    return std::make_pair(SmallString<16>(original_begin, cp_end), true);
+  const unsigned char *Begin = SourceLine.bytes_begin() + *I;
 
+  // Fast path for the common ASCII case.
+  if (*Begin < 0x80 && llvm::sys::locale::isPrint(*Begin)) {
+    ++(*I);
+    return std::make_pair(SmallString<16>(Begin, Begin + 1), true);
+  }
+  unsigned CharSize = llvm::getNumBytesForUTF8(*Begin);
+  const unsigned char *End = Begin + CharSize;
+
+  // Convert it to UTF32 and check if it's printable.
+  if (End <= SourceLine.bytes_end() && llvm::isLegalUTF8Sequence(Begin, End)) {
+    llvm::UTF32 C;
+    llvm::UTF32 *CPtr = &C;
+
+    // Begin and end before conversion.
+    unsigned char const *OriginalBegin = Begin;
+    llvm::ConversionResult Res = llvm::ConvertUTF8toUTF32(
+        &Begin, End, &CPtr, CPtr + 1, llvm::strictConversion);
+    (void)Res;
+    assert(Res == llvm::conversionOK);
+    assert(OriginalBegin < Begin);
+    assert((Begin - OriginalBegin) == CharSize);
+
+    (*I) += (Begin - OriginalBegin);
+
+    // Valid, multi-byte, printable UTF8 character.
+    if (llvm::sys::locale::isPrint(C))
+      return std::make_pair(SmallString<16>(OriginalBegin, End), true);
+
+    // Valid but not printable.
+    SmallString<16> Str("<U+>");
+    while (C) {
+      Str.insert(Str.begin() + 3, llvm::hexdigit(C % 16));
+      C /= 16;
+    }
+    while (Str.size() < 8)
+      Str.insert(Str.begin() + 3, llvm::hexdigit(0));
+    return std::make_pair(Str, false);
   }
 
-  // If next byte is not valid UTF-8 (and therefore not printable)
-  SmallString<16> expandedByte("<XX>");
-  unsigned char byte = SourceLine[*i];
-  expandedByte[1] = llvm::hexdigit(byte / 16);
-  expandedByte[2] = llvm::hexdigit(byte % 16);
-  ++(*i);
-  return std::make_pair(expandedByte, false);
+  // Otherwise, not printable since it's not valid UTF8.
+  SmallString<16> ExpandedByte("<XX>");
+  unsigned char Byte = SourceLine[*I];
+  ExpandedByte[1] = llvm::hexdigit(Byte / 16);
+  ExpandedByte[2] = llvm::hexdigit(Byte % 16);
+  ++(*I);
+  return std::make_pair(ExpandedByte, false);
 }
 
 static void expandTabs(std::string &SourceLine, unsigned TabStop) {
-  size_t i = SourceLine.size();
-  while (i>0) {
-    i--;
-    if (SourceLine[i]!='\t')
+  size_t I = SourceLine.size();
+  while (I > 0) {
+    I--;
+    if (SourceLine[I] != '\t')
       continue;
-    size_t tmp_i = i;
-    std::pair<SmallString<16>,bool> res
-      = printableTextForNextCharacter(SourceLine, &tmp_i, TabStop);
-    SourceLine.replace(i, 1, res.first.c_str());
+    size_t TmpI = I;
+    auto [Str, Printable] =
+        printableTextForNextCharacter(SourceLine, &TmpI, TabStop);
+    SourceLine.replace(I, 1, Str.c_str());
   }
 }
 
-/// This function takes a raw source line and produces a mapping from the bytes
+/// \p BytesOut:
+///  A mapping from columns to the byte of the source line that produced the
+///  character displaying at that column. This is the inverse of \p ColumnsOut.
+///
+/// The last element in the array is the number of bytes in the source string.
+///
+/// example: (given a tabstop of 8)
+///
+///    "a \t \u3042" -> {0,1,2,-1,-1,-1,-1,-1,3,4,-1,7}
+///
+///  (\\u3042 is represented in UTF-8 by three bytes and takes two columns to
+///   display)
+///
+/// \p ColumnsOut:
+///  A mapping from the bytes
 ///  of the printable representation of the line to the columns those printable
 ///  characters will appear at (numbering the first column as 0).
 ///
@@ -194,60 +213,34 @@ static void expandTabs(std::string &SourceLine, unsigned TabStop) {
 ///
 ///  (\\u3042 is represented in UTF-8 by three bytes and takes two columns to
 ///   display)
-static void byteToColumn(StringRef SourceLine, unsigned TabStop,
-                         SmallVectorImpl<int> &out) {
-  out.clear();
+static void genColumnByteMapping(StringRef SourceLine, unsigned TabStop,
+                                 SmallVectorImpl<int> &BytesOut,
+                                 SmallVectorImpl<int> &ColumnsOut) {
+  assert(BytesOut.empty());
+  assert(ColumnsOut.empty());
 
   if (SourceLine.empty()) {
-    out.resize(1u,0);
+    BytesOut.resize(1u, 0);
+    ColumnsOut.resize(1u, 0);
     return;
   }
 
-  out.resize(SourceLine.size()+1, -1);
-
-  int columns = 0;
-  size_t i = 0;
-  while (i<SourceLine.size()) {
-    out[i] = columns;
-    std::pair<SmallString<16>,bool> res
-      = printableTextForNextCharacter(SourceLine, &i, TabStop);
-    columns += llvm::sys::locale::columnWidth(res.first);
-  }
-  out.back() = columns;
-}
-
-/// This function takes a raw source line and produces a mapping from columns
-///  to the byte of the source line that produced the character displaying at
-///  that column. This is the inverse of the mapping produced by byteToColumn()
-///
-/// The last element in the array is the number of bytes in the source string
-///
-/// example: (given a tabstop of 8)
-///
-///    "a \t \u3042" -> {0,1,2,-1,-1,-1,-1,-1,3,4,-1,7}
-///
-///  (\\u3042 is represented in UTF-8 by three bytes and takes two columns to
-///   display)
-static void columnToByte(StringRef SourceLine, unsigned TabStop,
-                         SmallVectorImpl<int> &out) {
-  out.clear();
-
-  if (SourceLine.empty()) {
-    out.resize(1u, 0);
-    return;
+  ColumnsOut.resize(SourceLine.size() + 1, -1);
+
+  int Columns = 0;
+  size_t I = 0;
+  while (I < SourceLine.size()) {
+    ColumnsOut[I] = Columns;
+    BytesOut.resize(Columns + 1, -1);
+    BytesOut.back() = I;
+    auto [Str, Printable] =
+        printableTextForNextCharacter(SourceLine, &I, TabStop);
+    Columns += llvm::sys::locale::columnWidth(Str);
   }
 
-  int columns = 0;
-  size_t i = 0;
-  while (i<SourceLine.size()) {
-    out.resize(columns+1, -1);
-    out.back() = i;
-    std::pair<SmallString<16>,bool> res
-      = printableTextForNextCharacter(SourceLine, &i, TabStop);
-    columns += llvm::sys::locale::columnWidth(res.first);
-  }
-  out.resize(columns+1, -1);
-  out.back() = i;
+  ColumnsOut.back() = Columns;
+  BytesOut.resize(Columns + 1, -1);
+  BytesOut.back() = I;
 }
 
 namespace {
@@ -255,8 +248,7 @@ struct SourceColumnMap {
   SourceColumnMap(StringRef SourceLine, unsigned TabStop)
   : m_SourceLine(SourceLine) {
 
-    ::byteToColumn(SourceLine, TabStop, m_byteToColumn);
-    ::columnToByte(SourceLine, TabStop, m_columnToByte);
+    genColumnByteMapping(SourceLine, TabStop, m_columnToByte, m_byteToColumn);
 
     assert(m_byteToColumn.size()==SourceLine.size()+1);
     assert(0 < m_byteToColumn.size() && 0 < m_columnToByte.size());
@@ -332,8 +324,7 @@ static void selectInterestingSourceRegion(std::string &SourceLine,
     return;
 
   // No special characters are allowed in CaretLine.
-  assert(CaretLine.end() ==
-         llvm::find_if(CaretLine, [](char c) { return c < ' ' || '~' < c; }));
+  assert(llvm::none_of(CaretLine, [](char c) { return c < ' ' || '~' < c; }));
 
   // Find the slice that we need to display the full caret line
   // correctly.
@@ -471,9 +462,7 @@ static void selectInterestingSourceRegion(std::string &SourceLine,
   CaretEnd = map.byteToColumn(SourceEnd) + CaretColumnsOutsideSource;
 
   // [CaretStart, CaretEnd) is the slice we want. Update the various
-  // output lines to show only this slice, with two-space padding
-  // before the lines so that it looks nicer.
-
+  // output lines to show only this slice.
   assert(CaretStart!=(unsigned)-1 && CaretEnd!=(unsigned)-1 &&
          SourceStart!=(unsigned)-1 && SourceEnd!=(unsigned)-1);
   assert(SourceStart <= SourceEnd);
@@ -605,21 +594,13 @@ static unsigned findEndOfWord(unsigned Start, StringRef Str,
 /// Str will be printed. This will be non-zero when part of the first
 /// line has already been printed.
 /// \param Bold if the current text should be bold
-/// \param Indentation the number of spaces to indent any lines beyond
-/// the first line.
 /// \returns true if word-wrapping was required, or false if the
 /// string fit on the first line.
-static bool printWordWrapped(raw_ostream &OS, StringRef Str,
-                             unsigned Columns,
-                             unsigned Column = 0,
-                             bool Bold = false,
-                             unsigned Indentation = WordWrapIndentation) {
+static bool printWordWrapped(raw_ostream &OS, StringRef Str, unsigned Columns,
+                             unsigned Column, bool Bold) {
   const unsigned Length = std::min(Str.find('\n'), Str.size());
   bool TextNormal = true;
 
-  // The string used to indent each line.
-  SmallString<16> IndentStr;
-  IndentStr.assign(Indentation, ' ');
   bool Wrapped = false;
   for (unsigned WordStart = 0, WordEnd; WordStart < Length;
        WordStart = WordEnd) {
@@ -648,10 +629,10 @@ static bool printWordWrapped(raw_ostream &OS, StringRef Str,
     // This word does not fit on the current line, so wrap to the next
     // line.
     OS << '\n';
-    OS.write(&IndentStr[0], Indentation);
+    OS.indent(WordWrapIndentation);
     applyTemplateHighlighting(OS, Str.substr(WordStart, WordLength),
                               TextNormal, Bold);
-    Column = Indentation + WordLength;
+    Column = WordWrapIndentation + WordLength;
     Wrapped = true;
   }
 
@@ -754,7 +735,7 @@ void TextDiagnostic::emitFilename(StringRef Filename, const SourceManager &SM) {
   SmallString<4096> TmpFilename;
 #endif
   if (DiagOpts->AbsolutePath) {
-    auto File = SM.getFileManager().getFile(Filename);
+    auto File = SM.getFileManager().getOptionalFileRef(Filename);
     if (File) {
       // We want to print a simplified absolute path, i. e. without "dots".
       //
@@ -771,7 +752,7 @@ void TextDiagnostic::emitFilename(StringRef Filename, const SourceManager &SM) {
       // on Windows we can just use llvm::sys::path::remove_dots(), because,
       // on that system, both aforementioned paths point to the same place.
 #ifdef _WIN32
-      TmpFilename = (*File)->getName();
+      TmpFilename = File->getName();
       llvm::sys::fs::make_absolute(TmpFilename);
       llvm::sys::path::native(TmpFilename);
       llvm::sys::path::remove_dots(TmpFilename, /* remove_dot_dot */ true);
@@ -787,7 +768,7 @@ void TextDiagnostic::emitFilename(StringRef Filename, const SourceManager &SM) {
 
 /// Print out the file/line/column information and include trace.
 ///
-/// This method handlen the emission of the diagnostic location information.
+/// This method handles the emission of the diagnostic location information.
 /// This includes extracting as much location information as is present for
 /// the diagnostic and printing it, as well as any include stack or source
 /// ranges necessary.
@@ -796,10 +777,8 @@ void TextDiagnostic::emitDiagnosticLoc(FullSourceLoc Loc, PresumedLoc PLoc,
                                        ArrayRef<CharSourceRange> Ranges) {
   if (PLoc.isInvalid()) {
     // At least print the file name if available:
-    FileID FID = Loc.getFileID();
-    if (FID.isValid()) {
-      const FileEntry *FE = Loc.getFileEntry();
-      if (FE && FE->isValid()) {
+    if (FileID FID = Loc.getFileID(); FID.isValid()) {
+      if (OptionalFileEntryRef FE = Loc.getFileEntryRef()) {
         emitFilename(FE->getName(), Loc.getManager());
         OS << ": ";
       }
@@ -816,6 +795,7 @@ void TextDiagnostic::emitDiagnosticLoc(FullSourceLoc Loc, PresumedLoc PLoc,
 
   emitFilename(PLoc.getFilename(), Loc.getManager());
   switch (DiagOpts->getFormat()) {
+  case DiagnosticOptions::SARIF:
   case DiagnosticOptions::Clang:
     if (DiagOpts->ShowLine)
       OS << ':' << LineNo;
@@ -838,6 +818,7 @@ void TextDiagnostic::emitDiagnosticLoc(FullSourceLoc Loc, PresumedLoc PLoc,
       OS << ColNo;
     }
   switch (DiagOpts->getFormat()) {
+  case DiagnosticOptions::SARIF:
   case DiagnosticOptions::Clang:
   case DiagnosticOptions::Vi:    OS << ':';    break;
   case DiagnosticOptions::MSVC:
@@ -854,31 +835,26 @@ void TextDiagnostic::emitDiagnosticLoc(FullSourceLoc Loc, PresumedLoc PLoc,
   if (DiagOpts->ShowSourceRanges && !Ranges.empty()) {
     FileID CaretFileID = Loc.getExpansionLoc().getFileID();
     bool PrintedRange = false;
+    const SourceManager &SM = Loc.getManager();
 
-    for (ArrayRef<CharSourceRange>::const_iterator RI = Ranges.begin(),
-         RE = Ranges.end();
-         RI != RE; ++RI) {
+    for (const auto &R : Ranges) {
       // Ignore invalid ranges.
-      if (!RI->isValid()) continue;
+      if (!R.isValid())
+        continue;
 
-      auto &SM = Loc.getManager();
-      SourceLocation B = SM.getExpansionLoc(RI->getBegin());
-      CharSourceRange ERange = SM.getExpansionRange(RI->getEnd());
+      SourceLocation B = SM.getExpansionLoc(R.getBegin());
+      CharSourceRange ERange = SM.getExpansionRange(R.getEnd());
       SourceLocation E = ERange.getEnd();
-      bool IsTokenRange = ERange.isTokenRange();
 
-      std::pair<FileID, unsigned> BInfo = SM.getDecomposedLoc(B);
-      std::pair<FileID, unsigned> EInfo = SM.getDecomposedLoc(E);
-
-      // If the start or end of the range is in another file, just discard
-      // it.
-      if (BInfo.first != CaretFileID || EInfo.first != CaretFileID)
+      // If the start or end of the range is in another file, just
+      // discard it.
+      if (SM.getFileID(B) != CaretFileID || SM.getFileID(E) != CaretFileID)
         continue;
 
       // Add in the length of the token, so that we cover multi-char
       // tokens.
       unsigned TokSize = 0;
-      if (IsTokenRange)
+      if (ERange.isTokenRange())
         TokSize = Lexer::MeasureTokenLength(E, SM, LangOpts);
 
       FullSourceLoc BF(B, SM), EF(E, SM);
@@ -896,10 +872,11 @@ void TextDiagnostic::emitDiagnosticLoc(FullSourceLoc Loc, PresumedLoc PLoc,
 }
 
 void TextDiagnostic::emitIncludeLocation(FullSourceLoc Loc, PresumedLoc PLoc) {
-  if (DiagOpts->ShowLocation && PLoc.isValid())
-    OS << "In file included from " << PLoc.getFilename() << ':'
-       << PLoc.getLine() << ":\n";
-  else
+  if (DiagOpts->ShowLocation && PLoc.isValid()) {
+    OS << "In file included from ";
+    emitFilename(PLoc.getFilename(), Loc.getManager());
+    OS << ':' << PLoc.getLine() << ":\n";
+  } else
     OS << "In included file:\n";
 }
 
@@ -923,15 +900,16 @@ void TextDiagnostic::emitBuildingModuleLocation(FullSourceLoc Loc,
 }
 
 /// Find the suitable set of lines to show to include a set of ranges.
-static llvm::Optional<std::pair<unsigned, unsigned>>
+static std::optional<std::pair<unsigned, unsigned>>
 findLinesForRange(const CharSourceRange &R, FileID FID,
                   const SourceManager &SM) {
-  if (!R.isValid()) return None;
+  if (!R.isValid())
+    return std::nullopt;
 
   SourceLocation Begin = R.getBegin();
   SourceLocation End = R.getEnd();
   if (SM.getFileID(Begin) != FID || SM.getFileID(End) != FID)
-    return None;
+    return std::nullopt;
 
   return std::make_pair(SM.getExpansionLineNumber(Begin),
                         SM.getExpansionLineNumber(End));
@@ -972,87 +950,43 @@ maybeAddRange(std::pair<unsigned, unsigned> A, std::pair<unsigned, unsigned> B,
   return A;
 }
 
-/// Highlight a SourceRange (with ~'s) for any characters on LineNo.
-static void highlightRange(const CharSourceRange &R,
-                           unsigned LineNo, FileID FID,
-                           const SourceColumnMap &map,
-                           std::string &CaretLine,
-                           const SourceManager &SM,
-                           const LangOptions &LangOpts) {
-  if (!R.isValid()) return;
-
-  SourceLocation Begin = R.getBegin();
-  SourceLocation End = R.getEnd();
-
-  unsigned StartLineNo = SM.getExpansionLineNumber(Begin);
-  if (StartLineNo > LineNo || SM.getFileID(Begin) != FID)
-    return;  // No intersection.
-
-  unsigned EndLineNo = SM.getExpansionLineNumber(End);
-  if (EndLineNo < LineNo || SM.getFileID(End) != FID)
-    return;  // No intersection.
-
-  // Compute the column number of the start.
-  unsigned StartColNo = 0;
-  if (StartLineNo == LineNo) {
-    StartColNo = SM.getExpansionColumnNumber(Begin);
-    if (StartColNo) --StartColNo;  // Zero base the col #.
-  }
-
-  // Compute the column number of the end.
-  unsigned EndColNo = map.getSourceLine().size();
-  if (EndLineNo == LineNo) {
-    EndColNo = SM.getExpansionColumnNumber(End);
-    if (EndColNo) {
-      --EndColNo;  // Zero base the col #.
-
-      // Add in the length of the token, so that we cover multi-char tokens if
-      // this is a token range.
-      if (R.isTokenRange())
-        EndColNo += Lexer::MeasureTokenLength(End, SM, LangOpts);
-    } else {
-      EndColNo = CaretLine.size();
-    }
-  }
-
-  assert(StartColNo <= EndColNo && "Invalid range!");
-
-  // Check that a token range does not highlight only whitespace.
-  if (R.isTokenRange()) {
-    // Pick the first non-whitespace column.
-    while (StartColNo < map.getSourceLine().size() &&
-           (map.getSourceLine()[StartColNo] == ' ' ||
-            map.getSourceLine()[StartColNo] == '\t'))
-      StartColNo = map.startOfNextColumn(StartColNo);
-
-    // Pick the last non-whitespace column.
-    if (EndColNo > map.getSourceLine().size())
-      EndColNo = map.getSourceLine().size();
-    while (EndColNo &&
-           (map.getSourceLine()[EndColNo-1] == ' ' ||
-            map.getSourceLine()[EndColNo-1] == '\t'))
-      EndColNo = map.startOfPreviousColumn(EndColNo);
-
-    // If the start/end passed each other, then we are trying to highlight a
-    // range that just exists in whitespace. That most likely means we have
-    // a multi-line highlighting range that covers a blank line.
-    if (StartColNo > EndColNo) {
-      assert(StartLineNo != EndLineNo && "trying to highlight whitespace");
-      StartColNo = EndColNo;
-    }
-  }
+struct LineRange {
+  unsigned LineNo;
+  unsigned StartCol;
+  unsigned EndCol;
+};
 
-  assert(StartColNo <= map.getSourceLine().size() && "Invalid range!");
-  assert(EndColNo <= map.getSourceLine().size() && "Invalid range!");
+/// Highlight \p R (with ~'s) on the current source line.
+static void highlightRange(const LineRange &R, const SourceColumnMap &Map,
+                           std::string &CaretLine) {
+  // Pick the first non-whitespace column.
+  unsigned StartColNo = R.StartCol;
+  while (StartColNo < Map.getSourceLine().size() &&
+         (Map.getSourceLine()[StartColNo] == ' ' ||
+          Map.getSourceLine()[StartColNo] == '\t'))
+    StartColNo = Map.startOfNextColumn(StartColNo);
+
+  // Pick the last non-whitespace column.
+  unsigned EndColNo =
+      std::min(static_cast<size_t>(R.EndCol), Map.getSourceLine().size());
+  while (EndColNo && (Map.getSourceLine()[EndColNo - 1] == ' ' ||
+                      Map.getSourceLine()[EndColNo - 1] == '\t'))
+    EndColNo = Map.startOfPreviousColumn(EndColNo);
+
+  // If the start/end passed each other, then we are trying to highlight a
+  // range that just exists in whitespace. That most likely means we have
+  // a multi-line highlighting range that covers a blank line.
+  if (StartColNo > EndColNo)
+    return;
 
   // Fill the range with ~'s.
-  StartColNo = map.byteToContainingColumn(StartColNo);
-  EndColNo = map.byteToContainingColumn(EndColNo);
+  StartColNo = Map.byteToContainingColumn(StartColNo);
+  EndColNo = Map.byteToContainingColumn(EndColNo);
 
   assert(StartColNo <= EndColNo && "Invalid range!");
   if (CaretLine.size() < EndColNo)
-    CaretLine.resize(EndColNo,' ');
-  std::fill(CaretLine.begin()+StartColNo,CaretLine.begin()+EndColNo,'~');
+    CaretLine.resize(EndColNo, ' ');
+  std::fill(CaretLine.begin() + StartColNo, CaretLine.begin() + EndColNo, '~');
 }
 
 static std::string buildFixItInsertionLine(FileID FID,
@@ -1066,51 +1000,51 @@ static std::string buildFixItInsertionLine(FileID FID,
     return FixItInsertionLine;
   unsigned PrevHintEndCol = 0;
 
-  for (ArrayRef<FixItHint>::iterator I = Hints.begin(), E = Hints.end();
-       I != E; ++I) {
-    if (!I->CodeToInsert.empty()) {
-      // We have an insertion hint. Determine whether the inserted
-      // code contains no newlines and is on the same line as the caret.
-      std::pair<FileID, unsigned> HintLocInfo
-        = SM.getDecomposedExpansionLoc(I->RemoveRange.getBegin());
-      if (FID == HintLocInfo.first &&
-          LineNo == SM.getLineNumber(HintLocInfo.first, HintLocInfo.second) &&
-          StringRef(I->CodeToInsert).find_first_of("\n\r") == StringRef::npos) {
-        // Insert the new code into the line just below the code
-        // that the user wrote.
-        // Note: When modifying this function, be very careful about what is a
-        // "column" (printed width, platform-dependent) and what is a
-        // "byte offset" (SourceManager "column").
-        unsigned HintByteOffset
-          = SM.getColumnNumber(HintLocInfo.first, HintLocInfo.second) - 1;
-
-        // The hint must start inside the source or right at the end
-        assert(HintByteOffset < static_cast<unsigned>(map.bytes())+1);
-        unsigned HintCol = map.byteToContainingColumn(HintByteOffset);
-
-        // If we inserted a long previous hint, push this one forwards, and add
-        // an extra space to show that this is not part of the previous
-        // completion. This is sort of the best we can do when two hints appear
-        // to overlap.
-        //
-        // Note that if this hint is located immediately after the previous
-        // hint, no space will be added, since the location is more important.
-        if (HintCol < PrevHintEndCol)
-          HintCol = PrevHintEndCol + 1;
-
-        // This should NOT use HintByteOffset, because the source might have
-        // Unicode characters in earlier columns.
-        unsigned NewFixItLineSize = FixItInsertionLine.size() +
-          (HintCol - PrevHintEndCol) + I->CodeToInsert.size();
-        if (NewFixItLineSize > FixItInsertionLine.size())
-          FixItInsertionLine.resize(NewFixItLineSize, ' ');
-
-        std::copy(I->CodeToInsert.begin(), I->CodeToInsert.end(),
-                  FixItInsertionLine.end() - I->CodeToInsert.size());
-
-        PrevHintEndCol =
-          HintCol + llvm::sys::locale::columnWidth(I->CodeToInsert);
-      }
+  for (const auto &H : Hints) {
+    if (H.CodeToInsert.empty())
+      continue;
+
+    // We have an insertion hint. Determine whether the inserted
+    // code contains no newlines and is on the same line as the caret.
+    std::pair<FileID, unsigned> HintLocInfo =
+        SM.getDecomposedExpansionLoc(H.RemoveRange.getBegin());
+    if (FID == HintLocInfo.first &&
+        LineNo == SM.getLineNumber(HintLocInfo.first, HintLocInfo.second) &&
+        StringRef(H.CodeToInsert).find_first_of("\n\r") == StringRef::npos) {
+      // Insert the new code into the line just below the code
+      // that the user wrote.
+      // Note: When modifying this function, be very careful about what is a
+      // "column" (printed width, platform-dependent) and what is a
+      // "byte offset" (SourceManager "column").
+      unsigned HintByteOffset =
+          SM.getColumnNumber(HintLocInfo.first, HintLocInfo.second) - 1;
+
+      // The hint must start inside the source or right at the end
+      assert(HintByteOffset < static_cast<unsigned>(map.bytes()) + 1);
+      unsigned HintCol = map.byteToContainingColumn(HintByteOffset);
+
+      // If we inserted a long previous hint, push this one forwards, and add
+      // an extra space to show that this is not part of the previous
+      // completion. This is sort of the best we can do when two hints appear
+      // to overlap.
+      //
+      // Note that if this hint is located immediately after the previous
+      // hint, no space will be added, since the location is more important.
+      if (HintCol < PrevHintEndCol)
+        HintCol = PrevHintEndCol + 1;
+
+      // This should NOT use HintByteOffset, because the source might have
+      // Unicode characters in earlier columns.
+      unsigned NewFixItLineSize = FixItInsertionLine.size() +
+                                  (HintCol - PrevHintEndCol) +
+                                  H.CodeToInsert.size();
+      if (NewFixItLineSize > FixItInsertionLine.size())
+        FixItInsertionLine.resize(NewFixItLineSize, ' ');
+
+      std::copy(H.CodeToInsert.begin(), H.CodeToInsert.end(),
+                FixItInsertionLine.end() - H.CodeToInsert.size());
+
+      PrevHintEndCol = HintCol + llvm::sys::locale::columnWidth(H.CodeToInsert);
     }
   }
 
@@ -1119,6 +1053,65 @@ static std::string buildFixItInsertionLine(FileID FID,
   return FixItInsertionLine;
 }
 
+static unsigned getNumDisplayWidth(unsigned N) {
+  unsigned L = 1u, M = 10u;
+  while (M <= N && ++L != std::numeric_limits<unsigned>::digits10 + 1)
+    M *= 10u;
+
+  return L;
+}
+
+/// Filter out invalid ranges, ranges that don't fit into the window of
+/// source lines we will print, and ranges from other files.
+///
+/// For the remaining ranges, convert them to simple LineRange structs,
+/// which only cover one line at a time.
+static SmallVector<LineRange>
+prepareAndFilterRanges(const SmallVectorImpl<CharSourceRange> &Ranges,
+                       const SourceManager &SM,
+                       const std::pair<unsigned, unsigned> &Lines, FileID FID,
+                       const LangOptions &LangOpts) {
+  SmallVector<LineRange> LineRanges;
+
+  for (const CharSourceRange &R : Ranges) {
+    if (R.isInvalid())
+      continue;
+    SourceLocation Begin = R.getBegin();
+    SourceLocation End = R.getEnd();
+
+    unsigned StartLineNo = SM.getExpansionLineNumber(Begin);
+    if (StartLineNo > Lines.second || SM.getFileID(Begin) != FID)
+      continue;
+
+    unsigned EndLineNo = SM.getExpansionLineNumber(End);
+    if (EndLineNo < Lines.first || SM.getFileID(End) != FID)
+      continue;
+
+    unsigned StartColumn = SM.getExpansionColumnNumber(Begin);
+    unsigned EndColumn = SM.getExpansionColumnNumber(End);
+    if (R.isTokenRange())
+      EndColumn += Lexer::MeasureTokenLength(End, SM, LangOpts);
+
+    // Only a single line.
+    if (StartLineNo == EndLineNo) {
+      LineRanges.push_back({StartLineNo, StartColumn - 1, EndColumn - 1});
+      continue;
+    }
+
+    // Start line.
+    LineRanges.push_back({StartLineNo, StartColumn - 1, ~0u});
+
+    // Middle lines.
+    for (unsigned S = StartLineNo + 1; S != EndLineNo; ++S)
+      LineRanges.push_back({S, 0, ~0u});
+
+    // End line.
+    LineRanges.push_back({EndLineNo, 0, EndColumn - 1});
+  }
+
+  return LineRanges;
+}
+
 /// Emit a code snippet and caret line.
 ///
 /// This routine emits a single line's code snippet and caret line..
@@ -1144,9 +1137,7 @@ void TextDiagnostic::emitSnippetAndCaret(
       (LastLevel != DiagnosticsEngine::Note || Level == LastLevel))
     return;
 
-  // Decompose the location into a FID/Offset pair.
-  std::pair<FileID, unsigned> LocInfo = Loc.getDecomposedLoc();
-  FileID FID = LocInfo.first;
+  FileID FID = Loc.getFileID();
   const SourceManager &SM = Loc.getManager();
 
   // Get information about the buffer it points into.
@@ -1154,6 +1145,8 @@ void TextDiagnostic::emitSnippetAndCaret(
   StringRef BufData = Loc.getBufferData(&Invalid);
   if (Invalid)
     return;
+  const char *BufStart = BufData.data();
+  const char *BufEnd = BufStart + BufData.size();
 
   unsigned CaretLineNo = Loc.getLineNumber();
   unsigned CaretColNo = Loc.getColumnNumber();
@@ -1166,16 +1159,33 @@ void TextDiagnostic::emitSnippetAndCaret(
   // Find the set of lines to include.
   const unsigned MaxLines = DiagOpts->SnippetLineLimit;
   std::pair<unsigned, unsigned> Lines = {CaretLineNo, CaretLineNo};
-  for (SmallVectorImpl<CharSourceRange>::iterator I = Ranges.begin(),
-                                                  E = Ranges.end();
-       I != E; ++I)
-    if (auto OptionalRange = findLinesForRange(*I, FID, SM))
+  unsigned DisplayLineNo = Loc.getPresumedLoc().getLine();
+  for (const auto &I : Ranges) {
+    if (auto OptionalRange = findLinesForRange(I, FID, SM))
       Lines = maybeAddRange(Lines, *OptionalRange, MaxLines);
 
-  for (unsigned LineNo = Lines.first; LineNo != Lines.second + 1; ++LineNo) {
-    const char *BufStart = BufData.data();
-    const char *BufEnd = BufStart + BufData.size();
+    DisplayLineNo =
+        std::min(DisplayLineNo, SM.getPresumedLineNumber(I.getBegin()));
+  }
 
+  // Our line numbers look like:
+  // " [number] | "
+  // Where [number] is MaxLineNoDisplayWidth columns
+  // and the full thing is therefore MaxLineNoDisplayWidth + 4 columns.
+  unsigned MaxLineNoDisplayWidth =
+      DiagOpts->ShowLineNumbers
+          ? std::max(4u, getNumDisplayWidth(DisplayLineNo + MaxLines))
+          : 0;
+  auto indentForLineNumbers = [&] {
+    if (MaxLineNoDisplayWidth > 0)
+      OS.indent(MaxLineNoDisplayWidth + 2) << "| ";
+  };
+
+  SmallVector<LineRange> LineRanges =
+      prepareAndFilterRanges(Ranges, SM, Lines, FID, LangOpts);
+
+  for (unsigned LineNo = Lines.first; LineNo != Lines.second + 1;
+       ++LineNo, ++DisplayLineNo) {
     // Rewind from the current position to the start of the line.
     const char *LineStart =
         BufStart +
@@ -1193,34 +1203,28 @@ void TextDiagnostic::emitSnippetAndCaret(
     if (size_t(LineEnd - LineStart) > MaxLineLengthToPrint)
       return;
 
-    // Trim trailing null-bytes.
-    StringRef Line(LineStart, LineEnd - LineStart);
-    while (!Line.empty() && Line.back() == '\0' &&
-           (LineNo != CaretLineNo || Line.size() > CaretColNo))
-      Line = Line.drop_back();
-
     // Copy the line of code into an std::string for ease of manipulation.
-    std::string SourceLine(Line.begin(), Line.end());
+    std::string SourceLine(LineStart, LineEnd);
+    // Remove trailing null bytes.
+    while (!SourceLine.empty() && SourceLine.back() == '\0' &&
+           (LineNo != CaretLineNo || SourceLine.size() > CaretColNo))
+      SourceLine.pop_back();
 
     // Build the byte to column map.
     const SourceColumnMap sourceColMap(SourceLine, DiagOpts->TabStop);
 
-    // Create a line for the caret that is filled with spaces that is the same
-    // number of columns as the line of source code.
-    std::string CaretLine(sourceColMap.columns(), ' ');
-
+    std::string CaretLine;
     // Highlight all of the characters covered by Ranges with ~ characters.
-    for (SmallVectorImpl<CharSourceRange>::iterator I = Ranges.begin(),
-                                                    E = Ranges.end();
-         I != E; ++I)
-      highlightRange(*I, LineNo, FID, sourceColMap, CaretLine, SM, LangOpts);
+    for (const auto &LR : LineRanges) {
+      if (LR.LineNo == LineNo)
+        highlightRange(LR, sourceColMap, CaretLine);
+    }
 
     // Next, insert the caret itself.
     if (CaretLineNo == LineNo) {
-      CaretColNo = sourceColMap.byteToContainingColumn(CaretColNo - 1);
-      if (CaretLine.size() < CaretColNo + 1)
-        CaretLine.resize(CaretColNo + 1, ' ');
-      CaretLine[CaretColNo] = '^';
+      size_t Col = sourceColMap.byteToContainingColumn(CaretColNo - 1);
+      CaretLine.resize(std::max(Col + 1, CaretLine.size()), ' ');
+      CaretLine[Col] = '^';
     }
 
     std::string FixItInsertionLine = buildFixItInsertionLine(
@@ -1237,19 +1241,16 @@ void TextDiagnostic::emitSnippetAndCaret(
     // to produce easily machine parsable output.  Add a space before the
     // source line and the caret to make it trivial to tell the main diagnostic
     // line from what the user is intended to see.
-    if (DiagOpts->ShowSourceRanges) {
+    if (DiagOpts->ShowSourceRanges && !SourceLine.empty()) {
       SourceLine = ' ' + SourceLine;
       CaretLine = ' ' + CaretLine;
     }
 
-    // Finally, remove any blank spaces from the end of CaretLine.
-    while (!CaretLine.empty() && CaretLine[CaretLine.size() - 1] == ' ')
-      CaretLine.erase(CaretLine.end() - 1);
-
     // Emit what we have computed.
-    emitSnippet(SourceLine);
+    emitSnippet(SourceLine, MaxLineNoDisplayWidth, DisplayLineNo);
 
     if (!CaretLine.empty()) {
+      indentForLineNumbers();
       if (DiagOpts->ShowColors)
         OS.changeColor(caretColor, true);
       OS << CaretLine << '\n';
@@ -1258,6 +1259,7 @@ void TextDiagnostic::emitSnippetAndCaret(
     }
 
     if (!FixItInsertionLine.empty()) {
+      indentForLineNumbers();
       if (DiagOpts->ShowColors)
         // Print fixit line in color
         OS.changeColor(fixitColor, false);
@@ -1273,37 +1275,37 @@ void TextDiagnostic::emitSnippetAndCaret(
   emitParseableFixits(Hints, SM);
 }
 
-void TextDiagnostic::emitSnippet(StringRef line) {
-  if (line.empty())
-    return;
-
-  size_t i = 0;
-
-  std::string to_print;
-  bool print_reversed = false;
-
-  while (i<line.size()) {
-    std::pair<SmallString<16>,bool> res
-        = printableTextForNextCharacter(line, &i, DiagOpts->TabStop);
-    bool was_printable = res.second;
+void TextDiagnostic::emitSnippet(StringRef SourceLine,
+                                 unsigned MaxLineNoDisplayWidth,
+                                 unsigned LineNo) {
+  // Emit line number.
+  if (MaxLineNoDisplayWidth > 0) {
+    unsigned LineNoDisplayWidth = getNumDisplayWidth(LineNo);
+    OS.indent(MaxLineNoDisplayWidth - LineNoDisplayWidth + 1)
+        << LineNo << " | ";
+  }
 
-    if (DiagOpts->ShowColors && was_printable == print_reversed) {
-      if (print_reversed)
-        OS.reverseColor();
-      OS << to_print;
-      to_print.clear();
-      if (DiagOpts->ShowColors)
-        OS.resetColor();
+  // Print the source line one character at a time.
+  bool PrintReversed = false;
+  size_t I = 0;
+  while (I < SourceLine.size()) {
+    auto [Str, WasPrintable] =
+        printableTextForNextCharacter(SourceLine, &I, DiagOpts->TabStop);
+
+    // Toggle inverted colors on or off for this character.
+    if (DiagOpts->ShowColors) {
+      if (WasPrintable == PrintReversed) {
+        PrintReversed = !PrintReversed;
+        if (PrintReversed)
+          OS.reverseColor();
+        else
+          OS.resetColor();
+      }
     }
-
-    print_reversed = !was_printable;
-    to_print += res.first.str();
+    OS << Str;
   }
 
-  if (print_reversed && DiagOpts->ShowColors)
-    OS.reverseColor();
-  OS << to_print;
-  if (print_reversed && DiagOpts->ShowColors)
+  if (DiagOpts->ShowColors)
     OS.resetColor();
 
   OS << '\n';
@@ -1316,24 +1318,21 @@ void TextDiagnostic::emitParseableFixits(ArrayRef<FixItHint> Hints,
 
   // We follow FixItRewriter's example in not (yet) handling
   // fix-its in macros.
-  for (ArrayRef<FixItHint>::iterator I = Hints.begin(), E = Hints.end();
-       I != E; ++I) {
-    if (I->RemoveRange.isInvalid() ||
-        I->RemoveRange.getBegin().isMacroID() ||
-        I->RemoveRange.getEnd().isMacroID())
+  for (const auto &H : Hints) {
+    if (H.RemoveRange.isInvalid() || H.RemoveRange.getBegin().isMacroID() ||
+        H.RemoveRange.getEnd().isMacroID())
       return;
   }
 
-  for (ArrayRef<FixItHint>::iterator I = Hints.begin(), E = Hints.end();
-       I != E; ++I) {
-    SourceLocation BLoc = I->RemoveRange.getBegin();
-    SourceLocation ELoc = I->RemoveRange.getEnd();
+  for (const auto &H : Hints) {
+    SourceLocation BLoc = H.RemoveRange.getBegin();
+    SourceLocation ELoc = H.RemoveRange.getEnd();
 
     std::pair<FileID, unsigned> BInfo = SM.getDecomposedLoc(BLoc);
     std::pair<FileID, unsigned> EInfo = SM.getDecomposedLoc(ELoc);
 
     // Adjust for token ranges.
-    if (I->RemoveRange.isTokenRange())
+    if (H.RemoveRange.isTokenRange())
       EInfo.second += Lexer::MeasureTokenLength(ELoc, SM, LangOpts);
 
     // We specifically do not do word-wrapping or tab-expansion here,
@@ -1349,7 +1348,7 @@ void TextDiagnostic::emitParseableFixits(ArrayRef<FixItHint> Hints,
       << '-' << SM.getLineNumber(EInfo.first, EInfo.second)
       << ':' << SM.getColumnNumber(EInfo.first, EInfo.second)
       << "}:\"";
-    OS.write_escaped(I->CodeToInsert);
+    OS.write_escaped(H.CodeToInsert);
     OS << "\"\n";
   }
 }