aboutsummaryrefslogtreecommitdiff
path: root/include/clang/Lex/Lexer.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/clang/Lex/Lexer.h')
-rw-r--r--include/clang/Lex/Lexer.h127
1 files changed, 89 insertions, 38 deletions
diff --git a/include/clang/Lex/Lexer.h b/include/clang/Lex/Lexer.h
index 3be733167e5c..d58849654cb8 100644
--- a/include/clang/Lex/Lexer.h
+++ b/include/clang/Lex/Lexer.h
@@ -1,4 +1,4 @@
-//===--- Lexer.h - C Language Family Lexer ----------------------*- C++ -*-===//
+//===- Lexer.h - C Language Family Lexer ------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,45 +15,88 @@
#define LLVM_CLANG_LEX_LEXER_H
#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/TokenKinds.h"
#include "clang/Lex/PreprocessorLexer.h"
+#include "clang/Lex/Token.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
#include <cassert>
+#include <cstdint>
#include <string>
+namespace llvm {
+
+class MemoryBuffer;
+
+} // namespace llvm
+
namespace clang {
-class DiagnosticsEngine;
-class SourceManager;
-class Preprocessor;
+
class DiagnosticBuilder;
+class Preprocessor;
+class SourceManager;
/// ConflictMarkerKind - Kinds of conflict marker which the lexer might be
/// recovering from.
enum ConflictMarkerKind {
/// Not within a conflict marker.
CMK_None,
+
/// A normal or diff3 conflict marker, initiated by at least 7 "<"s,
/// separated by at least 7 "="s or "|"s, and terminated by at least 7 ">"s.
CMK_Normal,
+
/// A Perforce-style conflict marker, initiated by 4 ">"s,
/// separated by 4 "="s, and terminated by 4 "<"s.
CMK_Perforce
};
+/// Describes the bounds (start, size) of the preamble and a flag required by
+/// PreprocessorOptions::PrecompiledPreambleBytes.
+/// The preamble includes the BOM, if any.
+struct PreambleBounds {
+ /// \brief Size of the preamble in bytes.
+ unsigned Size;
+
+ /// \brief Whether the preamble ends at the start of a new line.
+ ///
+ /// Used to inform the lexer as to whether it's starting at the beginning of
+ /// a line after skipping the preamble.
+ bool PreambleEndsAtStartOfLine;
+
+ PreambleBounds(unsigned Size, bool PreambleEndsAtStartOfLine)
+ : Size(Size), PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine) {}
+};
+
/// Lexer - This provides a simple interface that turns a text buffer into a
/// stream of tokens. This provides no support for file reading or buffering,
/// or buffering/seeking of tokens, only forward lexing is supported. It relies
/// on the specified Preprocessor object to handle preprocessor directives, etc.
class Lexer : public PreprocessorLexer {
+ friend class Preprocessor;
+
void anchor() override;
//===--------------------------------------------------------------------===//
// Constant configuration values for this lexer.
- const char *BufferStart; // Start of the buffer.
- const char *BufferEnd; // End of the buffer.
- SourceLocation FileLoc; // Location for start of file.
- LangOptions LangOpts; // LangOpts enabled by this language (cache).
- bool Is_PragmaLexer; // True if lexer for _Pragma handling.
-
+
+ // Start of the buffer.
+ const char *BufferStart;
+
+ // End of the buffer.
+ const char *BufferEnd;
+
+ // Location for start of file.
+ SourceLocation FileLoc;
+
+ // LangOpts enabled by this language (cache).
+ LangOptions LangOpts;
+
+ // True if lexer for _Pragma handling.
+ bool Is_PragmaLexer;
+
//===--------------------------------------------------------------------===//
// Context-specific lexing flags set by the preprocessor.
//
@@ -89,13 +132,9 @@ class Lexer : public PreprocessorLexer {
// CurrentConflictMarkerState - The kind of conflict marker we are handling.
ConflictMarkerKind CurrentConflictMarkerState;
- Lexer(const Lexer &) = delete;
- void operator=(const Lexer &) = delete;
- friend class Preprocessor;
-
void InitLexer(const char *BufStart, const char *BufPtr, const char *BufEnd);
-public:
+public:
/// Lexer constructor - Create a new lexer object for the specified buffer
/// with the specified preprocessor managing the lexing process. This lexer
/// assumes that the associated file buffer and Preprocessor objects will
@@ -114,6 +153,9 @@ public:
Lexer(FileID FID, const llvm::MemoryBuffer *InputBuffer,
const SourceManager &SM, const LangOptions &LangOpts);
+ Lexer(const Lexer &) = delete;
+ Lexer &operator=(const Lexer &) = delete;
+
/// Create_PragmaLexer: Lexer constructor - Create a new lexer object for
/// _Pragma expansion. This has a variety of magic semantics that this method
/// sets up. It returns a new'd Lexer that must be delete'd when done.
@@ -122,7 +164,6 @@ public:
SourceLocation ExpansionLocEnd,
unsigned TokLen, Preprocessor &PP);
-
/// getLangOpts - Return the language features currently enabled.
/// NOTE: this lexer modifies features as a file is parsed!
const LangOptions &getLangOpts() const { return LangOpts; }
@@ -224,17 +265,16 @@ public:
/// \brief Return the current location in the buffer.
const char *getBufferLocation() const { return BufferPtr; }
-
- /// Stringify - Convert the specified string into a C string by escaping '\'
- /// and " characters. This does not add surrounding ""'s to the string.
+
+ /// Stringify - Convert the specified string into a C string by i) escaping
+ /// '\\' and " characters and ii) replacing newline character(s) with "\\n".
/// If Charify is true, this escapes the ' character instead of ".
static std::string Stringify(StringRef Str, bool Charify = false);
- /// Stringify - Convert the specified string into a C string by escaping '\'
- /// and " characters. This does not add surrounding ""'s to the string.
+ /// Stringify - Convert the specified string into a C string by i) escaping
+ /// '\\' and " characters and ii) replacing newline character(s) with "\\n".
static void Stringify(SmallVectorImpl<char> &Str);
-
/// getSpelling - This method is used to get the spelling of a token into a
/// preallocated buffer, instead of as an std::string. The caller is required
/// to allocate enough space for the token, which is guaranteed to be at least
@@ -245,11 +285,11 @@ public:
/// to point to a constant buffer with the data already in it (avoiding a
/// copy). The caller is not allowed to modify the returned buffer pointer
/// if an internal buffer is returned.
- static unsigned getSpelling(const Token &Tok, const char *&Buffer,
+ static unsigned getSpelling(const Token &Tok, const char *&Buffer,
const SourceManager &SourceMgr,
const LangOptions &LangOpts,
bool *Invalid = nullptr);
-
+
/// getSpelling() - Return the 'spelling' of the Tok token. The spelling of a
/// token is the characters used to represent the token in the source file
/// after trigraph expansion and escaped-newline folding. In particular, this
@@ -257,7 +297,7 @@ public:
/// UCNs, etc.
static std::string getSpelling(const Token &Tok,
const SourceManager &SourceMgr,
- const LangOptions &LangOpts,
+ const LangOptions &LangOpts,
bool *Invalid = nullptr);
/// getSpelling - This method is used to get the spelling of the
@@ -273,7 +313,7 @@ public:
const SourceManager &SourceMgr,
const LangOptions &LangOpts,
bool *invalid = nullptr);
-
+
/// MeasureTokenLength - Relex the token at the specified location and return
/// its length in bytes in the input file. If the token needs cleaning (e.g.
/// includes a trigraph or an escaped newline) then this count includes bytes
@@ -295,7 +335,7 @@ public:
static SourceLocation GetBeginningOfToken(SourceLocation Loc,
const SourceManager &SM,
const LangOptions &LangOpts);
-
+
/// AdvanceToTokenCharacter - If the current SourceLocation specifies a
/// location at the start of a token, return a new location that specifies a
/// character within the token. This handles trigraphs and escaped newlines.
@@ -303,7 +343,7 @@ public:
unsigned Character,
const SourceManager &SM,
const LangOptions &LangOpts);
-
+
/// \brief Computes the source location just past the end of the
/// token at this source location.
///
@@ -443,11 +483,18 @@ public:
/// to fewer than this number of lines.
///
/// \returns The offset into the file where the preamble ends and the rest
- /// of the file begins along with a boolean value indicating whether
+ /// of the file begins along with a boolean value indicating whether
/// the preamble ends at the beginning of a new line.
- static std::pair<unsigned, bool> ComputePreamble(StringRef Buffer,
- const LangOptions &LangOpts,
- unsigned MaxLines = 0);
+ static PreambleBounds ComputePreamble(StringRef Buffer,
+ const LangOptions &LangOpts,
+ unsigned MaxLines = 0);
+
+ /// Finds the token that comes right after the given location.
+ ///
+ /// Returns the next token, or none if the location is inside a macro.
+ static Optional<Token> findNextToken(SourceLocation Loc,
+ const SourceManager &SM,
+ const LangOptions &LangOpts);
/// \brief Checks that the given token is the first token that occurs after
/// the given location (this excludes comments and whitespace). Returns the
@@ -463,6 +510,10 @@ public:
/// \brief Returns true if the given character could appear in an identifier.
static bool isIdentifierBodyChar(char c, const LangOptions &LangOpts);
+ /// \brief Checks whether new line pointed by Str is preceded by escape
+ /// sequence.
+ static bool isNewLineEscaped(const char *BufferStart, const char *Str);
+
/// getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever
/// emit a warning.
static inline char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size,
@@ -483,9 +534,9 @@ public:
static StringRef getIndentationForLine(SourceLocation Loc,
const SourceManager &SM);
+private:
//===--------------------------------------------------------------------===//
// Internal implementation interfaces.
-private:
/// LexTokenInternal - Internal interface to lex a preprocessing token. Called
/// by Lex.
@@ -614,7 +665,7 @@ private:
//===--------------------------------------------------------------------===//
// Other lexer functions.
- void SkipBytes(unsigned Bytes, bool StartOfLine);
+ void SetByteOffset(unsigned Offset, bool StartOfLine);
void PropagateLineStartLeadingSpaceInfo(Token &Result);
@@ -639,7 +690,7 @@ private:
bool SkipBlockComment (Token &Result, const char *CurPtr,
bool &TokAtPhysicalStartOfLine);
bool SaveLineComment (Token &Result, const char *CurPtr);
-
+
bool IsStartOfConflictMarker(const char *CurPtr);
bool HandleEndOfConflictMarker(const char *CurPtr);
@@ -658,7 +709,7 @@ private:
/// valid), this parameter will be updated to point to the
/// character after the UCN.
/// \param SlashLoc The position in the source buffer of the '\'.
- /// \param Tok The token being formed. Pass \c NULL to suppress diagnostics
+ /// \param Tok The token being formed. Pass \c nullptr to suppress diagnostics
/// and handle token formation in the caller.
///
/// \return The Unicode codepoint specified by the UCN, or 0 if the UCN is
@@ -687,6 +738,6 @@ private:
bool tryConsumeIdentifierUTF8Char(const char *&CurPtr);
};
-} // end namespace clang
+} // namespace clang
-#endif
+#endif // LLVM_CLANG_LEX_LEXER_H