1 files changed, 68 insertions, 8 deletions
diff --git a/contrib/llvm-project/clang/include/clang/Tooling/Syntax/Tokens.h b/contrib/llvm-project/clang/include/clang/Tooling/Syntax/Tokens.h
index a210815d49f9..a7f9369ddfff 100644
--- a/contrib/llvm-project/clang/include/clang/Tooling/Syntax/Tokens.h
+++ b/contrib/llvm-project/clang/include/clang/Tooling/Syntax/Tokens.h
@@ -171,11 +171,16 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Token &T);
 /// To build a token buffer use the TokenCollector class. You can also compute
 /// the spelled tokens of a file using the tokenize() helper.
 ///
-/// FIXME: allow to map from spelled to expanded tokens when use-case shows up.
 /// FIXME: allow mappings into macro arguments.
 class TokenBuffer {
 public:
   TokenBuffer(const SourceManager &SourceMgr) : SourceMgr(&SourceMgr) {}
+
+  TokenBuffer(TokenBuffer &&) = default;
+  TokenBuffer(const TokenBuffer &) = delete;
+  TokenBuffer &operator=(TokenBuffer &&) = default;
+  TokenBuffer &operator=(const TokenBuffer &) = delete;
+
   /// All tokens produced by the preprocessor after all macro replacements,
   /// directives, etc. Source locations found in the clang AST will always
   /// point to one of these tokens.
@@ -191,18 +196,20 @@ public:
   /// token range R.
   llvm::ArrayRef<syntax::Token> expandedTokens(SourceRange R) const;
 
-  /// Find the subrange of spelled tokens that produced the corresponding \p
-  /// Expanded tokens.
+  /// Returns the subrange of spelled tokens corresponding to AST node spanning
+  /// \p Expanded. This is the text that should be replaced if a refactoring
+  /// were to rewrite the node. If \p Expanded is empty, the returned value is
+  /// llvm::None.
   ///
-  /// EXPECTS: \p Expanded is a subrange of expandedTokens().
-  ///
-  /// Will fail if the expanded tokens do not correspond to a
-  /// sequence of spelled tokens. E.g. for the following example:
+  /// Will fail if the expanded tokens do not correspond to a sequence of
+  /// spelled tokens. E.g. for the following example:
   ///
   ///   #define FIRST f1 f2 f3
   ///   #define SECOND s1 s2 s3
+  ///   #define ID2(X, Y) X Y
   ///
   ///   a FIRST b SECOND c // expanded tokens are: a f1 f2 f3 b s1 s2 s3 c
+  ///   d ID2(e f g, h) i  // expanded tokens are: d e f g h i
   ///
   /// the results would be:
   ///   expanded   => spelled
@@ -212,12 +219,44 @@ public:
   ///   a f1 f2 f3 => a FIRST
   ///         a f1 => can't map
   ///        s1 s2 => can't map
+  ///         e f  => e f
+  ///         g h  => can't map
   ///
-  /// If \p Expanded is empty, the returned value is llvm::None.
+  /// EXPECTS: \p Expanded is a subrange of expandedTokens().
   /// Complexity is logarithmic.
   llvm::Optional<llvm::ArrayRef<syntax::Token>>
   spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const;
 
+  /// Find the subranges of expanded tokens, corresponding to \p Spelled.
+  ///
+  /// Some spelled tokens may not be present in the expanded token stream, so
+  /// this function can return an empty vector, e.g. for tokens of macro
+  /// directives or disabled preprocessor branches.
+  ///
+  /// Some spelled tokens can be duplicated in the expanded token stream
+  /// multiple times and this function will return multiple results in those
+  /// cases. This happens when \p Spelled is inside a macro argument.
+  ///
+  /// FIXME: return correct results on macro arguments. For now, we return an
+  ///        empty list.
+  ///
+  /// (!) will return empty vector on tokens from #define body:
+  /// E.g. for the following example:
+  ///
+  ///   #define FIRST(A) f1 A = A f2
+  ///   #define SECOND s
+  ///
+  ///   a FIRST(arg) b SECOND c // expanded tokens are: a f1 arg = arg f2 b s
+  /// The results would be
+  ///   spelled           => expanded
+  ///   ------------------------
+  ///   #define FIRST     => {}
+  ///   a FIRST(arg)      => {a f1 arg = arg f2}
+  ///   arg               => {arg, arg} // arg #1 is before `=` and arg #2 is
+  ///                                   // after `=` in the expanded tokens.
+  llvm::SmallVector<llvm::ArrayRef<syntax::Token>, 1>
+  expandedForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const;
+
   /// An expansion produced by the preprocessor, includes macro expansions and
   /// preprocessor directives. Preprocessor always maps a non-empty range of
   /// spelled tokens to a (possibly empty) range of expanded tokens. Here is a
@@ -245,6 +284,10 @@ public:
   ///     "DECL", "(", "a", ")", ";"}
   llvm::ArrayRef<syntax::Token> spelledTokens(FileID FID) const;
 
+  /// Returns the spelled Token starting at Loc, if there are no such tokens
+  /// returns nullptr.
+  const syntax::Token *spelledTokenAt(SourceLocation Loc) const;
+
   /// Get all tokens that expand a macro in \p FID. For the following input
   ///     #define FOO B
   ///     #define FOO2(X) int X
@@ -303,6 +346,12 @@ private:
   std::pair<const syntax::Token *, const Mapping *>
   spelledForExpandedToken(const syntax::Token *Expanded) const;
 
+  /// Returns a mapping starting before \p Spelled token, or nullptr if no
+  /// such mapping exists.
+  static const Mapping *
+  mappingStartingBeforeSpelled(const MarkedFile &F,
+                               const syntax::Token *Spelled);
+
   /// Token stream produced after preprocessing, conceputally this captures the
   /// same stream as 'clang -E' (excluding the preprocessor directives like
   /// #file, etc.).
@@ -317,11 +366,16 @@ private:
 /// This always returns 0-2 tokens.
 llvm::ArrayRef<syntax::Token>
 spelledTokensTouching(SourceLocation Loc, const syntax::TokenBuffer &Tokens);
+llvm::ArrayRef<syntax::Token>
+spelledTokensTouching(SourceLocation Loc, llvm::ArrayRef<syntax::Token> Tokens);
 
 /// The identifier token that overlaps or touches a spelling location Loc.
 /// If there is none, returns nullptr.
 const syntax::Token *
 spelledIdentifierTouching(SourceLocation Loc,
+                          llvm::ArrayRef<syntax::Token> Tokens);
+const syntax::Token *
+spelledIdentifierTouching(SourceLocation Loc,
                           const syntax::TokenBuffer &Tokens);
 
 /// Lex the text buffer, corresponding to \p FID, in raw mode and record the
@@ -334,6 +388,12 @@ spelledIdentifierTouching(SourceLocation Loc,
 /// The result will *not* have a 'eof' token at the end.
 std::vector<syntax::Token> tokenize(FileID FID, const SourceManager &SM,
                                     const LangOptions &LO);
+/// Similar to one above, instead of whole file tokenizes a part of it. Note
+/// that, the first token might be incomplete if FR.startOffset is not at the
+/// beginning of a token, and the last token returned will start before the
+/// FR.endOffset but might end after it.
+std::vector<syntax::Token>
+tokenize(const FileRange &FR, const SourceManager &SM, const LangOptions &LO);
 
 /// Collects tokens for the main file while running the frontend action. An
 /// instance of this object should be created on