aboutsummaryrefslogtreecommitdiff
path: root/lib/Lex/Lexer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Lex/Lexer.cpp')
-rw-r--r--lib/Lex/Lexer.cpp82
1 files changed, 46 insertions, 36 deletions
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 9c2a0163acea..6025a6675125 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -14,18 +14,27 @@
#include "clang/Lex/Lexer.h"
#include "UnicodeCharSets.h"
#include "clang/Basic/CharInfo.h"
+#include "clang/Basic/IdentifierTable.h"
#include "clang/Basic/SourceManager.h"
-#include "clang/Lex/CodeCompletionHandler.h"
#include "clang/Lex/LexDiagnostic.h"
#include "clang/Lex/LiteralSupport.h"
#include "clang/Lex/Preprocessor.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/UnicodeCharRanges.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
#include <cstring>
+#include <string>
+#include <tuple>
+#include <utility>
+
using namespace clang;
//===----------------------------------------------------------------------===//
@@ -45,7 +54,6 @@ tok::ObjCKeywordKind Token::getObjCKeywordID() const {
return specId ? specId->getObjCKeywordID() : tok::objc_not_keyword;
}
-
//===----------------------------------------------------------------------===//
// Lexer Class Implementation
//===----------------------------------------------------------------------===//
@@ -196,7 +204,6 @@ Lexer *Lexer::Create_PragmaLexer(SourceLocation SpellingLoc,
return L;
}
-
/// Stringify - Convert the specified string into a C string, with surrounding
/// ""'s, and with escaped \ and " characters.
std::string Lexer::Stringify(StringRef Str, bool Charify) {
@@ -398,7 +405,6 @@ unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer,
return getSpellingSlow(Tok, TokStart, LangOpts, const_cast<char*>(Buffer));
}
-
/// MeasureTokenLength - Relex the token at the specified location and return
/// its length in bytes in the input file. If the token needs cleaning (e.g.
/// includes a trigraph or an escaped newline) then this count includes bytes
@@ -526,13 +532,15 @@ SourceLocation Lexer::GetBeginningOfToken(SourceLocation Loc,
}
namespace {
+
enum PreambleDirectiveKind {
PDK_Skipped,
PDK_StartIf,
PDK_EndIf,
PDK_Unknown
};
-}
+
+} // end anonymous namespace
std::pair<unsigned, bool> Lexer::ComputePreamble(StringRef Buffer,
const LangOptions &LangOpts,
@@ -694,7 +702,6 @@ std::pair<unsigned, bool> Lexer::ComputePreamble(StringRef Buffer,
: TheTok.isAtStartOfLine());
}
-
/// AdvanceToTokenCharacter - Given a location that specifies the start of a
/// token, return a new location that specifies a character within the token.
SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart,
@@ -961,7 +968,7 @@ StringRef Lexer::getImmediateMacroName(SourceLocation Loc,
assert(Loc.isMacroID() && "Only reasonble to call this on macros");
// Find the location of the immediate macro expansion.
- while (1) {
+ while (true) {
FileID FID = SM.getFileID(Loc);
const SrcMgr::SLocEntry *E = &SM.getSLocEntry(FID);
const SrcMgr::ExpansionInfo &Expansion = E->getExpansion();
@@ -1031,7 +1038,6 @@ bool Lexer::isIdentifierBodyChar(char c, const LangOptions &LangOpts) {
return isIdentifierBody(c, LangOpts.DollarIdents);
}
-
//===----------------------------------------------------------------------===//
// Diagnostics forwarding code.
//===----------------------------------------------------------------------===//
@@ -1157,7 +1163,7 @@ unsigned Lexer::getEscapedNewLineSize(const char *Ptr) {
/// them), skip over them and return the first non-escaped-newline found,
/// otherwise return P.
const char *Lexer::SkipEscapedNewLines(const char *P) {
- while (1) {
+ while (true) {
const char *AfterEscape;
if (*P == '\\') {
AfterEscape = P+1;
@@ -1310,7 +1316,6 @@ Slash:
return *Ptr;
}
-
/// getCharAndSizeSlowNoWarn - Handle the slow/uncommon case of the
/// getCharAndSizeNoWarn method. Here we know that we can accumulate into Size,
/// and that we have already incremented Ptr by Size bytes.
@@ -1480,13 +1485,13 @@ bool Lexer::tryConsumeIdentifierUCN(const char *&CurPtr, unsigned Size,
bool Lexer::tryConsumeIdentifierUTF8Char(const char *&CurPtr) {
const char *UnicodePtr = CurPtr;
- UTF32 CodePoint;
- ConversionResult Result =
- llvm::convertUTF8Sequence((const UTF8 **)&UnicodePtr,
- (const UTF8 *)BufferEnd,
+ llvm::UTF32 CodePoint;
+ llvm::ConversionResult Result =
+ llvm::convertUTF8Sequence((const llvm::UTF8 **)&UnicodePtr,
+ (const llvm::UTF8 *)BufferEnd,
&CodePoint,
- strictConversion);
- if (Result != conversionOK ||
+ llvm::strictConversion);
+ if (Result != llvm::conversionOK ||
!isAllowedIDChar(static_cast<uint32_t>(CodePoint), LangOpts))
return false;
@@ -1533,14 +1538,22 @@ FinishIdentifier:
// preprocessor, which may macro expand it or something.
if (II->isHandleIdentifierCase())
return PP->HandleIdentifier(Result);
-
+
+ if (II->getTokenID() == tok::identifier && isCodeCompletionPoint(CurPtr)
+ && II->getPPKeywordID() == tok::pp_not_keyword
+ && II->getObjCKeywordID() == tok::objc_not_keyword) {
+ // Return the code-completion token.
+ Result.setKind(tok::code_completion);
+ cutOffLexing();
+ return true;
+ }
return true;
}
// Otherwise, $,\,? in identifier found. Enter slower path.
C = getCharAndSize(CurPtr, Size);
- while (1) {
+ while (true) {
if (C == '$') {
// If we hit a $ and they are not supported in identifiers, we are done.
if (!LangOpts.DollarIdents) goto FinishIdentifier;
@@ -1700,9 +1713,9 @@ const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr,
getLangOpts());
if (!isIdentifierBody(Next)) {
// End of suffix. Check whether this is on the whitelist.
- IsUDSuffix = (Chars == 1 && Buffer[0] == 's') ||
- NumericLiteralParser::isValidUDSuffix(
- getLangOpts(), StringRef(Buffer, Chars));
+ const StringRef CompleteSuffix(Buffer, Chars);
+ IsUDSuffix = StringLiteralParser::isValidUDSuffix(getLangOpts(),
+ CompleteSuffix);
break;
}
@@ -1829,7 +1842,7 @@ bool Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr,
// Search for the next '"' in hopes of salvaging the lexer. Unfortunately,
// it's possible the '"' was intended to be part of the raw string, but
// there's not much we can do about that.
- while (1) {
+ while (true) {
char C = *CurPtr++;
if (C == '"')
@@ -1848,7 +1861,7 @@ bool Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr,
const char *Prefix = CurPtr;
CurPtr += PrefixLen + 1; // skip over prefix and '('
- while (1) {
+ while (true) {
char C = *CurPtr++;
if (C == ')') {
@@ -1913,7 +1926,6 @@ bool Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
return true;
}
-
/// LexCharConstant - Lex the remainder of a character constant, after having
/// lexed either ' or L' or u8' or u' or U'.
bool Lexer::LexCharConstant(Token &Result, const char *CurPtr,
@@ -1992,7 +2004,7 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr,
unsigned char Char = *CurPtr;
// Skip consecutive spaces efficiently.
- while (1) {
+ while (true) {
// Skip horizontal whitespace very aggressively.
while (isHorizontalWhitespace(Char))
Char = *++CurPtr;
@@ -2315,7 +2327,7 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr,
if (C == '/')
C = *CurPtr++;
- while (1) {
+ while (true) {
// Skip over all non-interesting characters until we find end of buffer or a
// (probably ending) '/' character.
if (CurPtr + 24 < BufferEnd &&
@@ -2456,7 +2468,7 @@ void Lexer::ReadToEndOfLine(SmallVectorImpl<char> *Result) {
// CurPtr - Cache BufferPtr in an automatic variable.
const char *CurPtr = BufferPtr;
- while (1) {
+ while (true) {
char Char = getAndAdvanceChar(CurPtr, Tmp);
switch (Char) {
default:
@@ -2669,7 +2681,6 @@ bool Lexer::IsStartOfConflictMarker(const char *CurPtr) {
return false;
}
-
/// HandleEndOfConflictMarker - If this is a '====' or '||||' or '>>>>', or if
/// it is '<<<<' and the conflict marker started with a '>>>>' marker, then it
/// is the end of a conflict marker. Handle it by ignoring up until the end of
@@ -3498,7 +3509,6 @@ LexNextToken:
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
Kind = tok::greatergreater;
}
-
} else {
Kind = tok::greater;
}
@@ -3615,17 +3625,17 @@ LexNextToken:
break;
}
- UTF32 CodePoint;
+ llvm::UTF32 CodePoint;
// We can't just reset CurPtr to BufferPtr because BufferPtr may point to
// an escaped newline.
--CurPtr;
- ConversionResult Status =
- llvm::convertUTF8Sequence((const UTF8 **)&CurPtr,
- (const UTF8 *)BufferEnd,
+ llvm::ConversionResult Status =
+ llvm::convertUTF8Sequence((const llvm::UTF8 **)&CurPtr,
+ (const llvm::UTF8 *)BufferEnd,
&CodePoint,
- strictConversion);
- if (Status == conversionOK) {
+ llvm::strictConversion);
+ if (Status == llvm::conversionOK) {
if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
return true; // KeepWhitespaceMode