diff options
Diffstat (limited to 'include/clang/Tooling/Syntax/Tree.h')
-rw-r--r-- | include/clang/Tooling/Syntax/Tree.h | 142 |
1 files changed, 142 insertions, 0 deletions
diff --git a/include/clang/Tooling/Syntax/Tree.h b/include/clang/Tooling/Syntax/Tree.h new file mode 100644 index 000000000000..b66777561a6a --- /dev/null +++ b/include/clang/Tooling/Syntax/Tree.h @@ -0,0 +1,142 @@ +//===- Tree.h - structure of the syntax tree ------------------*- C++ -*-=====// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Defines the basic structure of the syntax tree. There are two kinds of nodes: +// - leaf nodes correspond to a token in the expanded token stream, +// - tree nodes correspond to language grammar constructs. +// +// The tree is initially built from an AST. Each node of a newly built tree +// covers a continous subrange of expanded tokens (i.e. tokens after +// preprocessing), the specific tokens coverered are stored in the leaf nodes of +// a tree. A post-order traversal of a tree will visit leaf nodes in an order +// corresponding the original order of expanded tokens. +// +// This is still work in progress and highly experimental, we leave room for +// ourselves to completely change the design and/or implementation. +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H +#define LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H + +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/TokenKinds.h" +#include "clang/Tooling/Syntax/Tokens.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/Allocator.h" +#include <cstdint> + +namespace clang { +namespace syntax { + +/// A memory arena for syntax trees. Also tracks the underlying token buffers, +/// source manager, etc. +class Arena { +public: + Arena(SourceManager &SourceMgr, const LangOptions &LangOpts, + TokenBuffer Tokens); + + const SourceManager &sourceManager() const { return SourceMgr; } + const LangOptions &langOptions() const { return LangOpts; } + + const TokenBuffer &tokenBuffer() const; + llvm::BumpPtrAllocator &allocator() { return Allocator; } + + /// Add \p Buffer to the underlying source manager, tokenize it and store the + /// resulting tokens. Useful when there is a need to materialize tokens that + /// were not written in user code. + std::pair<FileID, llvm::ArrayRef<syntax::Token>> + lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Buffer); + +private: + SourceManager &SourceMgr; + const LangOptions &LangOpts; + TokenBuffer Tokens; + /// IDs and storage for additional tokenized files. + llvm::DenseMap<FileID, std::vector<syntax::Token>> ExtraTokens; + /// Keeps all the allocated nodes and their intermediate data structures. + llvm::BumpPtrAllocator Allocator; +}; + +class Tree; +class TreeBuilder; +enum class NodeKind : uint16_t; +enum class NodeRole : uint8_t; + +/// A node in a syntax tree. Each node is either a Leaf (representing tokens) or +/// a Tree (representing language constructrs). +class Node { +public: + /// Newly created nodes are detached from a tree, parent and sibling links are + /// set when the node is added as a child to another one. + Node(NodeKind Kind); + + NodeKind kind() const { return static_cast<NodeKind>(Kind); } + NodeRole role() const { return static_cast<NodeRole>(Role); } + + const Tree *parent() const { return Parent; } + Tree *parent() { return Parent; } + + const Node *nextSibling() const { return NextSibling; } + Node *nextSibling() { return NextSibling; } + + /// Dumps the structure of a subtree. For debugging and testing purposes. + std::string dump(const Arena &A) const; + /// Dumps the tokens forming this subtree. + std::string dumpTokens(const Arena &A) const; + +private: + // Tree is allowed to change the Parent link and Role. + friend class Tree; + + Tree *Parent; + Node *NextSibling; + unsigned Kind : 16; + unsigned Role : 8; +}; + +/// A leaf node points to a single token inside the expanded token stream. +class Leaf final : public Node { +public: + Leaf(const syntax::Token *T); + static bool classof(const Node *N); + + const syntax::Token *token() const { return Tok; } + +private: + const syntax::Token *Tok; +}; + +/// A node that has children and represents a syntactic language construct. +class Tree : public Node { +public: + using Node::Node; + static bool classof(const Node *N); + + Node *firstChild() { return FirstChild; } + const Node *firstChild() const { return FirstChild; } + +protected: + /// Find the first node with a corresponding role. + syntax::Node *findChild(NodeRole R); + +private: + /// Prepend \p Child to the list of children and and sets the parent pointer. + /// A very low-level operation that does not check any invariants, only used + /// by TreeBuilder. + /// EXPECTS: Role != NodeRoleDetached. + void prependChildLowLevel(Node *Child, NodeRole Role); + friend class TreeBuilder; + + Node *FirstChild = nullptr; +}; + +} // namespace syntax +} // namespace clang + +#endif |