aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/clang/lib/Format/Macros.h
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/clang/lib/Format/Macros.h')
-rw-r--r--contrib/llvm-project/clang/lib/Format/Macros.h305
1 files changed, 276 insertions, 29 deletions
diff --git a/contrib/llvm-project/clang/lib/Format/Macros.h b/contrib/llvm-project/clang/lib/Format/Macros.h
index 591ef8b5be3c..1964624e828c 100644
--- a/contrib/llvm-project/clang/lib/Format/Macros.h
+++ b/contrib/llvm-project/clang/lib/Format/Macros.h
@@ -1,9 +1,8 @@
-//===--- MacroExpander.h - Format C++ code ----------------------*- C++ -*-===//
+//===--- Macros.h - Format C++ code -----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -23,40 +22,38 @@
/// spelled token streams into expanded token streams when it encounters a
/// macro call. The UnwrappedLineParser continues to parse UnwrappedLines
/// from the expanded token stream.
-/// After the expanded unwrapped lines are parsed, the MacroUnexpander matches
-/// the spelled token stream into unwrapped lines that best resemble the
-/// structure of the expanded unwrapped lines.
+/// After the expanded unwrapped lines are parsed, the MacroCallReconstructor
+/// matches the spelled token stream into unwrapped lines that best resemble the
+/// structure of the expanded unwrapped lines. These reconstructed unwrapped
+/// lines are aliasing the tokens in the expanded token stream, so that token
+/// annotations will be reused when formatting the spelled macro calls.
///
-/// When formatting, clang-format formats the expanded unwrapped lines first,
-/// determining the token types. Next, it formats the spelled unwrapped lines,
-/// keeping the token types fixed, while allowing other formatting decisions
-/// to change.
+/// When formatting, clang-format annotates and formats the expanded unwrapped
+/// lines first, determining the token types. Next, it formats the spelled
+/// unwrapped lines, keeping the token types fixed, while allowing other
+/// formatting decisions to change.
///
//===----------------------------------------------------------------------===//
#ifndef CLANG_LIB_FORMAT_MACROS_H
#define CLANG_LIB_FORMAT_MACROS_H
+#include <list>
+#include <map>
#include <string>
-#include <unordered_map>
#include <vector>
-#include "Encoding.h"
#include "FormatToken.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
-namespace llvm {
-class MemoryBuffer;
-} // namespace llvm
-
namespace clang {
-class IdentifierTable;
-class SourceManager;
-
namespace format {
-struct FormatStyle;
+
+struct UnwrappedLine;
+struct UnwrappedLineNode;
/// Takes a set of macro definitions as strings and allows expanding calls to
/// those macros.
@@ -109,17 +106,23 @@ public:
IdentifierTable &IdentTable);
~MacroExpander();
- /// Returns whether a macro \p Name is defined.
+ /// Returns whether any macro \p Name is defined, regardless of overloads.
bool defined(llvm::StringRef Name) const;
- /// Returns whether the macro has no arguments and should not consume
- /// subsequent parentheses.
+ /// Returns whetherh there is an object-like overload, i.e. where the macro
+ /// has no arguments and should not consume subsequent parentheses.
bool objectLike(llvm::StringRef Name) const;
+ /// Returns whether macro \p Name provides an overload with the given arity.
+ bool hasArity(llvm::StringRef Name, unsigned Arity) const;
+
/// Returns the expanded stream of format tokens for \p ID, where
/// each element in \p Args is a positional argument to the macro call.
- llvm::SmallVector<FormatToken *, 8> expand(FormatToken *ID,
- ArgsList Args) const;
+ /// If \p Args is not set, the object-like overload is used.
+ /// If \p Args is set, the overload with the arity equal to \c Args.size() is
+ /// used.
+ llvm::SmallVector<FormatToken *, 8>
+ expand(FormatToken *ID, std::optional<ArgsList> OptionalArgs) const;
private:
struct Definition;
@@ -131,8 +134,252 @@ private:
const FormatStyle &Style;
llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator;
IdentifierTable &IdentTable;
- std::vector<std::unique_ptr<llvm::MemoryBuffer>> Buffers;
- llvm::StringMap<Definition> Definitions;
+ SmallVector<std::unique_ptr<llvm::MemoryBuffer>> Buffers;
+ llvm::StringMap<llvm::DenseMap<int, Definition>> FunctionLike;
+ llvm::StringMap<Definition> ObjectLike;
+};
+
+/// Converts a sequence of UnwrappedLines containing expanded macros into a
+/// single UnwrappedLine containing the macro calls. This UnwrappedLine may be
+/// broken into child lines, in a way that best conveys the structure of the
+/// expanded code.
+///
+/// In the simplest case, a spelled UnwrappedLine contains one macro, and after
+/// expanding it we have one expanded UnwrappedLine. In general, macro
+/// expansions can span UnwrappedLines, and multiple macros can contribute
+/// tokens to the same line. We keep consuming expanded lines until:
+/// * all expansions that started have finished (we're not chopping any macros
+/// in half)
+/// * *and* we've reached the end of a *spelled* unwrapped line.
+///
+/// A single UnwrappedLine represents this chunk of code.
+///
+/// After this point, the state of the spelled/expanded stream is "in sync"
+/// (both at the start of an UnwrappedLine, with no macros open), so the
+/// Reconstructor can be thrown away and parsing can continue.
+///
+/// Given a mapping from the macro name identifier token in the macro call
+/// to the tokens of the macro call, for example:
+/// CLASSA -> CLASSA({public: void x();})
+///
+/// When getting the formatted lines of the expansion via the \c addLine method
+/// (each '->' specifies a call to \c addLine ):
+/// -> class A {
+/// -> public:
+/// -> void x();
+/// -> };
+///
+/// Creates the tree of unwrapped lines containing the macro call tokens so that
+/// the macro call tokens fit the semantic structure of the expanded formatted
+/// lines:
+/// -> CLASSA({
+/// -> public:
+/// -> void x();
+/// -> })
+class MacroCallReconstructor {
+public:
+ /// Create an Reconstructor whose resulting \p UnwrappedLine will start at
+ /// \p Level, using the map from name identifier token to the corresponding
+ /// tokens of the spelled macro call.
+ MacroCallReconstructor(
+ unsigned Level,
+ const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>
+ &ActiveExpansions);
+
+ /// For the given \p Line, match all occurences of tokens expanded from a
+ /// macro to unwrapped lines in the spelled macro call so that the resulting
+ /// tree of unwrapped lines best resembles the structure of unwrapped lines
+ /// passed in via \c addLine.
+ void addLine(const UnwrappedLine &Line);
+
+ /// Check whether at the current state there is no open macro expansion
+ /// that needs to be processed to finish an macro call.
+ /// Only when \c finished() is true, \c takeResult() can be called to retrieve
+ /// the resulting \c UnwrappedLine.
+ /// If there are multiple subsequent macro calls within an unwrapped line in
+ /// the spelled token stream, the calling code may also continue to call
+ /// \c addLine() when \c finished() is true.
+ bool finished() const { return ActiveExpansions.empty(); }
+
+ /// Retrieve the formatted \c UnwrappedLine containing the orginal
+ /// macro calls, formatted according to the expanded token stream received
+ /// via \c addLine().
+ /// Generally, this line tries to have the same structure as the expanded,
+ /// formatted unwrapped lines handed in via \c addLine(), with the exception
+ /// that for multiple top-level lines, each subsequent line will be the
+ /// child of the last token in its predecessor. This representation is chosen
+ /// because it is a precondition to the formatter that we get what looks like
+ /// a single statement in a single \c UnwrappedLine (i.e. matching parens).
+ ///
+ /// If a token in a macro argument is a child of a token in the expansion,
+ /// the parent will be the corresponding token in the macro call.
+ /// For example:
+ /// #define C(a, b) class C { a b
+ /// C(int x;, int y;)
+ /// would expand to
+ /// class C { int x; int y;
+ /// where in a formatted line "int x;" and "int y;" would both be new separate
+ /// lines.
+ ///
+ /// In the result, "int x;" will be a child of the opening parenthesis in "C("
+ /// and "int y;" will be a child of the "," token:
+ /// C (
+ /// \- int x;
+ /// ,
+ /// \- int y;
+ /// )
+ UnwrappedLine takeResult() &&;
+
+private:
+ void add(FormatToken *Token, FormatToken *ExpandedParent, bool First);
+ void prepareParent(FormatToken *ExpandedParent, bool First);
+ FormatToken *getParentInResult(FormatToken *Parent);
+ void reconstruct(FormatToken *Token);
+ void startReconstruction(FormatToken *Token);
+ bool reconstructActiveCallUntil(FormatToken *Token);
+ void endReconstruction(FormatToken *Token);
+ bool processNextReconstructed();
+ void finalize();
+
+ struct ReconstructedLine;
+
+ void appendToken(FormatToken *Token, ReconstructedLine *L = nullptr);
+ UnwrappedLine createUnwrappedLine(const ReconstructedLine &Line, int Level);
+ void debug(const ReconstructedLine &Line, int Level);
+ ReconstructedLine &parentLine();
+ ReconstructedLine *currentLine();
+ void debugParentMap() const;
+
+#ifndef NDEBUG
+ enum ReconstructorState {
+ Start, // No macro expansion was found in the input yet.
+ InProgress, // During a macro reconstruction.
+ Finalized, // Past macro reconstruction, the result is finalized.
+ };
+ ReconstructorState State = Start;
+#endif
+
+ // Node in which we build up the resulting unwrapped line; this type is
+ // analogous to UnwrappedLineNode.
+ struct LineNode {
+ LineNode() = default;
+ LineNode(FormatToken *Tok) : Tok(Tok) {}
+ FormatToken *Tok = nullptr;
+ llvm::SmallVector<std::unique_ptr<ReconstructedLine>> Children;
+ };
+
+ // Line in which we build up the resulting unwrapped line.
+ // FIXME: Investigate changing UnwrappedLine to a pointer type and using it
+ // instead of rolling our own type.
+ struct ReconstructedLine {
+ llvm::SmallVector<std::unique_ptr<LineNode>> Tokens;
+ };
+
+ // The line in which we collect the resulting reconstructed output.
+ // To reduce special cases in the algorithm, the first level of the line
+ // contains a single null token that has the reconstructed incoming
+ // lines as children.
+ // In the end, we stich the lines together so that each subsequent line
+ // is a child of the last token of the previous line. This is necessary
+ // in order to format the overall expression as a single logical line -
+ // if we created separate lines, we'd format them with their own top-level
+ // indent depending on the semantic structure, which is not desired.
+ ReconstructedLine Result;
+
+ // Stack of currently "open" lines, where each line's predecessor's last
+ // token is the parent token for that line.
+ llvm::SmallVector<ReconstructedLine *> ActiveReconstructedLines;
+
+ // Maps from the expanded token to the token that takes its place in the
+ // reconstructed token stream in terms of parent-child relationships.
+ // Note that it might take multiple steps to arrive at the correct
+ // parent in the output.
+ // Given: #define C(a, b) []() { a; b; }
+ // And a call: C(f(), g())
+ // The structure in the incoming formatted unwrapped line will be:
+ // []() {
+ // |- f();
+ // \- g();
+ // }
+ // with f and g being children of the opening brace.
+ // In the reconstructed call:
+ // C(f(), g())
+ // \- f()
+ // \- g()
+ // We want f to be a child of the opening parenthesis and g to be a child
+ // of the comma token in the macro call.
+ // Thus, we map
+ // { -> (
+ // and add
+ // ( -> ,
+ // once we're past the comma in the reconstruction.
+ llvm::DenseMap<FormatToken *, FormatToken *>
+ SpelledParentToReconstructedParent;
+
+ // Keeps track of a single expansion while we're reconstructing tokens it
+ // generated.
+ struct Expansion {
+ // The identifier token of the macro call.
+ FormatToken *ID;
+ // Our current position in the reconstruction.
+ std::list<UnwrappedLineNode>::iterator SpelledI;
+ // The end of the reconstructed token sequence.
+ std::list<UnwrappedLineNode>::iterator SpelledE;
+ };
+
+ // Stack of macro calls for which we're in the middle of an expansion.
+ llvm::SmallVector<Expansion> ActiveExpansions;
+
+ struct MacroCallState {
+ MacroCallState(ReconstructedLine *Line, FormatToken *ParentLastToken,
+ FormatToken *MacroCallLParen);
+
+ ReconstructedLine *Line;
+
+ // The last token in the parent line or expansion, or nullptr if the macro
+ // expansion is on a top-level line.
+ //
+ // For example, in the macro call:
+ // auto f = []() { ID(1); };
+ // The MacroCallState for ID will have '{' as ParentLastToken.
+ //
+ // In the macro call:
+ // ID(ID(void f()));
+ // The MacroCallState of the outer ID will have nullptr as ParentLastToken,
+ // while the MacroCallState for the inner ID will have the '(' of the outer
+ // ID as ParentLastToken.
+ //
+ // In the macro call:
+ // ID2(a, ID(b));
+ // The MacroCallState of ID will have ',' as ParentLastToken.
+ FormatToken *ParentLastToken;
+
+ // The l_paren of this MacroCallState's macro call.
+ FormatToken *MacroCallLParen;
+ };
+
+ // Keeps track of the lines into which the opening brace/parenthesis &
+ // argument separating commas for each level in the macro call go in order to
+ // put the corresponding closing brace/parenthesis into the same line in the
+ // output and keep track of which parents in the expanded token stream map to
+ // which tokens in the reconstructed stream.
+ // When an opening brace/parenthesis has children, we want the structure of
+ // the output line to be:
+ // |- MACRO
+ // |- (
+ // | \- <argument>
+ // |- ,
+ // | \- <argument>
+ // \- )
+ llvm::SmallVector<MacroCallState> MacroCallStructure;
+
+ // Level the generated UnwrappedLine will be at.
+ const unsigned Level;
+
+ // Maps from identifier of the macro call to an unwrapped line containing
+ // all tokens of the macro call.
+ const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>
+ &IdToReconstructed;
};
} // namespace format