aboutsummaryrefslogtreecommitdiff
path: root/include/clang/Tooling/Syntax/Tree.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/clang/Tooling/Syntax/Tree.h')
-rw-r--r--include/clang/Tooling/Syntax/Tree.h142
1 files changed, 142 insertions, 0 deletions
diff --git a/include/clang/Tooling/Syntax/Tree.h b/include/clang/Tooling/Syntax/Tree.h
new file mode 100644
index 000000000000..b66777561a6a
--- /dev/null
+++ b/include/clang/Tooling/Syntax/Tree.h
@@ -0,0 +1,142 @@
+//===- Tree.h - structure of the syntax tree ------------------*- C++ -*-=====//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Defines the basic structure of the syntax tree. There are two kinds of nodes:
+// - leaf nodes correspond to a token in the expanded token stream,
+// - tree nodes correspond to language grammar constructs.
+//
+// The tree is initially built from an AST. Each node of a newly built tree
+// covers a continous subrange of expanded tokens (i.e. tokens after
+// preprocessing), the specific tokens coverered are stored in the leaf nodes of
+// a tree. A post-order traversal of a tree will visit leaf nodes in an order
+// corresponding the original order of expanded tokens.
+//
+// This is still work in progress and highly experimental, we leave room for
+// ourselves to completely change the design and/or implementation.
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H
+#define LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H
+
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TokenKinds.h"
+#include "clang/Tooling/Syntax/Tokens.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/Allocator.h"
+#include <cstdint>
+
+namespace clang {
+namespace syntax {
+
+/// A memory arena for syntax trees. Also tracks the underlying token buffers,
+/// source manager, etc.
+class Arena {
+public:
+ Arena(SourceManager &SourceMgr, const LangOptions &LangOpts,
+ TokenBuffer Tokens);
+
+ const SourceManager &sourceManager() const { return SourceMgr; }
+ const LangOptions &langOptions() const { return LangOpts; }
+
+ const TokenBuffer &tokenBuffer() const;
+ llvm::BumpPtrAllocator &allocator() { return Allocator; }
+
+ /// Add \p Buffer to the underlying source manager, tokenize it and store the
+ /// resulting tokens. Useful when there is a need to materialize tokens that
+ /// were not written in user code.
+ std::pair<FileID, llvm::ArrayRef<syntax::Token>>
+ lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Buffer);
+
+private:
+ SourceManager &SourceMgr;
+ const LangOptions &LangOpts;
+ TokenBuffer Tokens;
+ /// IDs and storage for additional tokenized files.
+ llvm::DenseMap<FileID, std::vector<syntax::Token>> ExtraTokens;
+ /// Keeps all the allocated nodes and their intermediate data structures.
+ llvm::BumpPtrAllocator Allocator;
+};
+
+class Tree;
+class TreeBuilder;
+enum class NodeKind : uint16_t;
+enum class NodeRole : uint8_t;
+
+/// A node in a syntax tree. Each node is either a Leaf (representing tokens) or
+/// a Tree (representing language constructrs).
+class Node {
+public:
+ /// Newly created nodes are detached from a tree, parent and sibling links are
+ /// set when the node is added as a child to another one.
+ Node(NodeKind Kind);
+
+ NodeKind kind() const { return static_cast<NodeKind>(Kind); }
+ NodeRole role() const { return static_cast<NodeRole>(Role); }
+
+ const Tree *parent() const { return Parent; }
+ Tree *parent() { return Parent; }
+
+ const Node *nextSibling() const { return NextSibling; }
+ Node *nextSibling() { return NextSibling; }
+
+ /// Dumps the structure of a subtree. For debugging and testing purposes.
+ std::string dump(const Arena &A) const;
+ /// Dumps the tokens forming this subtree.
+ std::string dumpTokens(const Arena &A) const;
+
+private:
+ // Tree is allowed to change the Parent link and Role.
+ friend class Tree;
+
+ Tree *Parent;
+ Node *NextSibling;
+ unsigned Kind : 16;
+ unsigned Role : 8;
+};
+
+/// A leaf node points to a single token inside the expanded token stream.
+class Leaf final : public Node {
+public:
+ Leaf(const syntax::Token *T);
+ static bool classof(const Node *N);
+
+ const syntax::Token *token() const { return Tok; }
+
+private:
+ const syntax::Token *Tok;
+};
+
+/// A node that has children and represents a syntactic language construct.
+class Tree : public Node {
+public:
+ using Node::Node;
+ static bool classof(const Node *N);
+
+ Node *firstChild() { return FirstChild; }
+ const Node *firstChild() const { return FirstChild; }
+
+protected:
+ /// Find the first node with a corresponding role.
+ syntax::Node *findChild(NodeRole R);
+
+private:
+ /// Prepend \p Child to the list of children and and sets the parent pointer.
+ /// A very low-level operation that does not check any invariants, only used
+ /// by TreeBuilder.
+ /// EXPECTS: Role != NodeRoleDetached.
+ void prependChildLowLevel(Node *Child, NodeRole Role);
+ friend class TreeBuilder;
+
+ Node *FirstChild = nullptr;
+};
+
+} // namespace syntax
+} // namespace clang
+
+#endif