diff options
Diffstat (limited to 'contrib/llvm-project/clang/lib/Format/Macros.h')
-rw-r--r-- | contrib/llvm-project/clang/lib/Format/Macros.h | 305 |
1 files changed, 276 insertions, 29 deletions
diff --git a/contrib/llvm-project/clang/lib/Format/Macros.h b/contrib/llvm-project/clang/lib/Format/Macros.h index 591ef8b5be3c..1964624e828c 100644 --- a/contrib/llvm-project/clang/lib/Format/Macros.h +++ b/contrib/llvm-project/clang/lib/Format/Macros.h @@ -1,9 +1,8 @@ -//===--- MacroExpander.h - Format C++ code ----------------------*- C++ -*-===// +//===--- Macros.h - Format C++ code -----------------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// @@ -23,40 +22,38 @@ /// spelled token streams into expanded token streams when it encounters a /// macro call. The UnwrappedLineParser continues to parse UnwrappedLines /// from the expanded token stream. -/// After the expanded unwrapped lines are parsed, the MacroUnexpander matches -/// the spelled token stream into unwrapped lines that best resemble the -/// structure of the expanded unwrapped lines. +/// After the expanded unwrapped lines are parsed, the MacroCallReconstructor +/// matches the spelled token stream into unwrapped lines that best resemble the +/// structure of the expanded unwrapped lines. These reconstructed unwrapped +/// lines are aliasing the tokens in the expanded token stream, so that token +/// annotations will be reused when formatting the spelled macro calls. /// -/// When formatting, clang-format formats the expanded unwrapped lines first, -/// determining the token types. Next, it formats the spelled unwrapped lines, -/// keeping the token types fixed, while allowing other formatting decisions -/// to change. +/// When formatting, clang-format annotates and formats the expanded unwrapped +/// lines first, determining the token types. Next, it formats the spelled +/// unwrapped lines, keeping the token types fixed, while allowing other +/// formatting decisions to change. /// //===----------------------------------------------------------------------===// #ifndef CLANG_LIB_FORMAT_MACROS_H #define CLANG_LIB_FORMAT_MACROS_H +#include <list> +#include <map> #include <string> -#include <unordered_map> #include <vector> -#include "Encoding.h" #include "FormatToken.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -namespace llvm { -class MemoryBuffer; -} // namespace llvm - namespace clang { -class IdentifierTable; -class SourceManager; - namespace format { -struct FormatStyle; + +struct UnwrappedLine; +struct UnwrappedLineNode; /// Takes a set of macro definitions as strings and allows expanding calls to /// those macros. @@ -109,17 +106,23 @@ public: IdentifierTable &IdentTable); ~MacroExpander(); - /// Returns whether a macro \p Name is defined. + /// Returns whether any macro \p Name is defined, regardless of overloads. bool defined(llvm::StringRef Name) const; - /// Returns whether the macro has no arguments and should not consume - /// subsequent parentheses. + /// Returns whetherh there is an object-like overload, i.e. where the macro + /// has no arguments and should not consume subsequent parentheses. bool objectLike(llvm::StringRef Name) const; + /// Returns whether macro \p Name provides an overload with the given arity. + bool hasArity(llvm::StringRef Name, unsigned Arity) const; + /// Returns the expanded stream of format tokens for \p ID, where /// each element in \p Args is a positional argument to the macro call. - llvm::SmallVector<FormatToken *, 8> expand(FormatToken *ID, - ArgsList Args) const; + /// If \p Args is not set, the object-like overload is used. + /// If \p Args is set, the overload with the arity equal to \c Args.size() is + /// used. + llvm::SmallVector<FormatToken *, 8> + expand(FormatToken *ID, std::optional<ArgsList> OptionalArgs) const; private: struct Definition; @@ -131,8 +134,252 @@ private: const FormatStyle &Style; llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator; IdentifierTable &IdentTable; - std::vector<std::unique_ptr<llvm::MemoryBuffer>> Buffers; - llvm::StringMap<Definition> Definitions; + SmallVector<std::unique_ptr<llvm::MemoryBuffer>> Buffers; + llvm::StringMap<llvm::DenseMap<int, Definition>> FunctionLike; + llvm::StringMap<Definition> ObjectLike; +}; + +/// Converts a sequence of UnwrappedLines containing expanded macros into a +/// single UnwrappedLine containing the macro calls. This UnwrappedLine may be +/// broken into child lines, in a way that best conveys the structure of the +/// expanded code. +/// +/// In the simplest case, a spelled UnwrappedLine contains one macro, and after +/// expanding it we have one expanded UnwrappedLine. In general, macro +/// expansions can span UnwrappedLines, and multiple macros can contribute +/// tokens to the same line. We keep consuming expanded lines until: +/// * all expansions that started have finished (we're not chopping any macros +/// in half) +/// * *and* we've reached the end of a *spelled* unwrapped line. +/// +/// A single UnwrappedLine represents this chunk of code. +/// +/// After this point, the state of the spelled/expanded stream is "in sync" +/// (both at the start of an UnwrappedLine, with no macros open), so the +/// Reconstructor can be thrown away and parsing can continue. +/// +/// Given a mapping from the macro name identifier token in the macro call +/// to the tokens of the macro call, for example: +/// CLASSA -> CLASSA({public: void x();}) +/// +/// When getting the formatted lines of the expansion via the \c addLine method +/// (each '->' specifies a call to \c addLine ): +/// -> class A { +/// -> public: +/// -> void x(); +/// -> }; +/// +/// Creates the tree of unwrapped lines containing the macro call tokens so that +/// the macro call tokens fit the semantic structure of the expanded formatted +/// lines: +/// -> CLASSA({ +/// -> public: +/// -> void x(); +/// -> }) +class MacroCallReconstructor { +public: + /// Create an Reconstructor whose resulting \p UnwrappedLine will start at + /// \p Level, using the map from name identifier token to the corresponding + /// tokens of the spelled macro call. + MacroCallReconstructor( + unsigned Level, + const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>> + &ActiveExpansions); + + /// For the given \p Line, match all occurences of tokens expanded from a + /// macro to unwrapped lines in the spelled macro call so that the resulting + /// tree of unwrapped lines best resembles the structure of unwrapped lines + /// passed in via \c addLine. + void addLine(const UnwrappedLine &Line); + + /// Check whether at the current state there is no open macro expansion + /// that needs to be processed to finish an macro call. + /// Only when \c finished() is true, \c takeResult() can be called to retrieve + /// the resulting \c UnwrappedLine. + /// If there are multiple subsequent macro calls within an unwrapped line in + /// the spelled token stream, the calling code may also continue to call + /// \c addLine() when \c finished() is true. + bool finished() const { return ActiveExpansions.empty(); } + + /// Retrieve the formatted \c UnwrappedLine containing the orginal + /// macro calls, formatted according to the expanded token stream received + /// via \c addLine(). + /// Generally, this line tries to have the same structure as the expanded, + /// formatted unwrapped lines handed in via \c addLine(), with the exception + /// that for multiple top-level lines, each subsequent line will be the + /// child of the last token in its predecessor. This representation is chosen + /// because it is a precondition to the formatter that we get what looks like + /// a single statement in a single \c UnwrappedLine (i.e. matching parens). + /// + /// If a token in a macro argument is a child of a token in the expansion, + /// the parent will be the corresponding token in the macro call. + /// For example: + /// #define C(a, b) class C { a b + /// C(int x;, int y;) + /// would expand to + /// class C { int x; int y; + /// where in a formatted line "int x;" and "int y;" would both be new separate + /// lines. + /// + /// In the result, "int x;" will be a child of the opening parenthesis in "C(" + /// and "int y;" will be a child of the "," token: + /// C ( + /// \- int x; + /// , + /// \- int y; + /// ) + UnwrappedLine takeResult() &&; + +private: + void add(FormatToken *Token, FormatToken *ExpandedParent, bool First); + void prepareParent(FormatToken *ExpandedParent, bool First); + FormatToken *getParentInResult(FormatToken *Parent); + void reconstruct(FormatToken *Token); + void startReconstruction(FormatToken *Token); + bool reconstructActiveCallUntil(FormatToken *Token); + void endReconstruction(FormatToken *Token); + bool processNextReconstructed(); + void finalize(); + + struct ReconstructedLine; + + void appendToken(FormatToken *Token, ReconstructedLine *L = nullptr); + UnwrappedLine createUnwrappedLine(const ReconstructedLine &Line, int Level); + void debug(const ReconstructedLine &Line, int Level); + ReconstructedLine &parentLine(); + ReconstructedLine *currentLine(); + void debugParentMap() const; + +#ifndef NDEBUG + enum ReconstructorState { + Start, // No macro expansion was found in the input yet. + InProgress, // During a macro reconstruction. + Finalized, // Past macro reconstruction, the result is finalized. + }; + ReconstructorState State = Start; +#endif + + // Node in which we build up the resulting unwrapped line; this type is + // analogous to UnwrappedLineNode. + struct LineNode { + LineNode() = default; + LineNode(FormatToken *Tok) : Tok(Tok) {} + FormatToken *Tok = nullptr; + llvm::SmallVector<std::unique_ptr<ReconstructedLine>> Children; + }; + + // Line in which we build up the resulting unwrapped line. + // FIXME: Investigate changing UnwrappedLine to a pointer type and using it + // instead of rolling our own type. + struct ReconstructedLine { + llvm::SmallVector<std::unique_ptr<LineNode>> Tokens; + }; + + // The line in which we collect the resulting reconstructed output. + // To reduce special cases in the algorithm, the first level of the line + // contains a single null token that has the reconstructed incoming + // lines as children. + // In the end, we stich the lines together so that each subsequent line + // is a child of the last token of the previous line. This is necessary + // in order to format the overall expression as a single logical line - + // if we created separate lines, we'd format them with their own top-level + // indent depending on the semantic structure, which is not desired. + ReconstructedLine Result; + + // Stack of currently "open" lines, where each line's predecessor's last + // token is the parent token for that line. + llvm::SmallVector<ReconstructedLine *> ActiveReconstructedLines; + + // Maps from the expanded token to the token that takes its place in the + // reconstructed token stream in terms of parent-child relationships. + // Note that it might take multiple steps to arrive at the correct + // parent in the output. + // Given: #define C(a, b) []() { a; b; } + // And a call: C(f(), g()) + // The structure in the incoming formatted unwrapped line will be: + // []() { + // |- f(); + // \- g(); + // } + // with f and g being children of the opening brace. + // In the reconstructed call: + // C(f(), g()) + // \- f() + // \- g() + // We want f to be a child of the opening parenthesis and g to be a child + // of the comma token in the macro call. + // Thus, we map + // { -> ( + // and add + // ( -> , + // once we're past the comma in the reconstruction. + llvm::DenseMap<FormatToken *, FormatToken *> + SpelledParentToReconstructedParent; + + // Keeps track of a single expansion while we're reconstructing tokens it + // generated. + struct Expansion { + // The identifier token of the macro call. + FormatToken *ID; + // Our current position in the reconstruction. + std::list<UnwrappedLineNode>::iterator SpelledI; + // The end of the reconstructed token sequence. + std::list<UnwrappedLineNode>::iterator SpelledE; + }; + + // Stack of macro calls for which we're in the middle of an expansion. + llvm::SmallVector<Expansion> ActiveExpansions; + + struct MacroCallState { + MacroCallState(ReconstructedLine *Line, FormatToken *ParentLastToken, + FormatToken *MacroCallLParen); + + ReconstructedLine *Line; + + // The last token in the parent line or expansion, or nullptr if the macro + // expansion is on a top-level line. + // + // For example, in the macro call: + // auto f = []() { ID(1); }; + // The MacroCallState for ID will have '{' as ParentLastToken. + // + // In the macro call: + // ID(ID(void f())); + // The MacroCallState of the outer ID will have nullptr as ParentLastToken, + // while the MacroCallState for the inner ID will have the '(' of the outer + // ID as ParentLastToken. + // + // In the macro call: + // ID2(a, ID(b)); + // The MacroCallState of ID will have ',' as ParentLastToken. + FormatToken *ParentLastToken; + + // The l_paren of this MacroCallState's macro call. + FormatToken *MacroCallLParen; + }; + + // Keeps track of the lines into which the opening brace/parenthesis & + // argument separating commas for each level in the macro call go in order to + // put the corresponding closing brace/parenthesis into the same line in the + // output and keep track of which parents in the expanded token stream map to + // which tokens in the reconstructed stream. + // When an opening brace/parenthesis has children, we want the structure of + // the output line to be: + // |- MACRO + // |- ( + // | \- <argument> + // |- , + // | \- <argument> + // \- ) + llvm::SmallVector<MacroCallState> MacroCallStructure; + + // Level the generated UnwrappedLine will be at. + const unsigned Level; + + // Maps from identifier of the macro call to an unwrapped line containing + // all tokens of the macro call. + const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>> + &IdToReconstructed; }; } // namespace format |