diff options
Diffstat (limited to 'lib/Lex/Lexer.cpp')
-rw-r--r-- | lib/Lex/Lexer.cpp | 82 |
1 files changed, 46 insertions, 36 deletions
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index 9c2a0163acea..6025a6675125 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -14,18 +14,27 @@ #include "clang/Lex/Lexer.h" #include "UnicodeCharSets.h" #include "clang/Basic/CharInfo.h" +#include "clang/Basic/IdentifierTable.h" #include "clang/Basic/SourceManager.h" -#include "clang/Lex/CodeCompletionHandler.h" #include "clang/Lex/LexDiagnostic.h" #include "clang/Lex/LiteralSupport.h" #include "clang/Lex/Preprocessor.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/UnicodeCharRanges.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> #include <cstring> +#include <string> +#include <tuple> +#include <utility> + using namespace clang; //===----------------------------------------------------------------------===// @@ -45,7 +54,6 @@ tok::ObjCKeywordKind Token::getObjCKeywordID() const { return specId ? specId->getObjCKeywordID() : tok::objc_not_keyword; } - //===----------------------------------------------------------------------===// // Lexer Class Implementation //===----------------------------------------------------------------------===// @@ -196,7 +204,6 @@ Lexer *Lexer::Create_PragmaLexer(SourceLocation SpellingLoc, return L; } - /// Stringify - Convert the specified string into a C string, with surrounding /// ""'s, and with escaped \ and " characters. std::string Lexer::Stringify(StringRef Str, bool Charify) { @@ -398,7 +405,6 @@ unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer, return getSpellingSlow(Tok, TokStart, LangOpts, const_cast<char*>(Buffer)); } - /// MeasureTokenLength - Relex the token at the specified location and return /// its length in bytes in the input file. If the token needs cleaning (e.g. /// includes a trigraph or an escaped newline) then this count includes bytes @@ -526,13 +532,15 @@ SourceLocation Lexer::GetBeginningOfToken(SourceLocation Loc, } namespace { + enum PreambleDirectiveKind { PDK_Skipped, PDK_StartIf, PDK_EndIf, PDK_Unknown }; -} + +} // end anonymous namespace std::pair<unsigned, bool> Lexer::ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, @@ -694,7 +702,6 @@ std::pair<unsigned, bool> Lexer::ComputePreamble(StringRef Buffer, : TheTok.isAtStartOfLine()); } - /// AdvanceToTokenCharacter - Given a location that specifies the start of a /// token, return a new location that specifies a character within the token. SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart, @@ -961,7 +968,7 @@ StringRef Lexer::getImmediateMacroName(SourceLocation Loc, assert(Loc.isMacroID() && "Only reasonble to call this on macros"); // Find the location of the immediate macro expansion. - while (1) { + while (true) { FileID FID = SM.getFileID(Loc); const SrcMgr::SLocEntry *E = &SM.getSLocEntry(FID); const SrcMgr::ExpansionInfo &Expansion = E->getExpansion(); @@ -1031,7 +1038,6 @@ bool Lexer::isIdentifierBodyChar(char c, const LangOptions &LangOpts) { return isIdentifierBody(c, LangOpts.DollarIdents); } - //===----------------------------------------------------------------------===// // Diagnostics forwarding code. //===----------------------------------------------------------------------===// @@ -1157,7 +1163,7 @@ unsigned Lexer::getEscapedNewLineSize(const char *Ptr) { /// them), skip over them and return the first non-escaped-newline found, /// otherwise return P. const char *Lexer::SkipEscapedNewLines(const char *P) { - while (1) { + while (true) { const char *AfterEscape; if (*P == '\\') { AfterEscape = P+1; @@ -1310,7 +1316,6 @@ Slash: return *Ptr; } - /// getCharAndSizeSlowNoWarn - Handle the slow/uncommon case of the /// getCharAndSizeNoWarn method. Here we know that we can accumulate into Size, /// and that we have already incremented Ptr by Size bytes. @@ -1480,13 +1485,13 @@ bool Lexer::tryConsumeIdentifierUCN(const char *&CurPtr, unsigned Size, bool Lexer::tryConsumeIdentifierUTF8Char(const char *&CurPtr) { const char *UnicodePtr = CurPtr; - UTF32 CodePoint; - ConversionResult Result = - llvm::convertUTF8Sequence((const UTF8 **)&UnicodePtr, - (const UTF8 *)BufferEnd, + llvm::UTF32 CodePoint; + llvm::ConversionResult Result = + llvm::convertUTF8Sequence((const llvm::UTF8 **)&UnicodePtr, + (const llvm::UTF8 *)BufferEnd, &CodePoint, - strictConversion); - if (Result != conversionOK || + llvm::strictConversion); + if (Result != llvm::conversionOK || !isAllowedIDChar(static_cast<uint32_t>(CodePoint), LangOpts)) return false; @@ -1533,14 +1538,22 @@ FinishIdentifier: // preprocessor, which may macro expand it or something. if (II->isHandleIdentifierCase()) return PP->HandleIdentifier(Result); - + + if (II->getTokenID() == tok::identifier && isCodeCompletionPoint(CurPtr) + && II->getPPKeywordID() == tok::pp_not_keyword + && II->getObjCKeywordID() == tok::objc_not_keyword) { + // Return the code-completion token. + Result.setKind(tok::code_completion); + cutOffLexing(); + return true; + } return true; } // Otherwise, $,\,? in identifier found. Enter slower path. C = getCharAndSize(CurPtr, Size); - while (1) { + while (true) { if (C == '$') { // If we hit a $ and they are not supported in identifiers, we are done. if (!LangOpts.DollarIdents) goto FinishIdentifier; @@ -1700,9 +1713,9 @@ const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr, getLangOpts()); if (!isIdentifierBody(Next)) { // End of suffix. Check whether this is on the whitelist. - IsUDSuffix = (Chars == 1 && Buffer[0] == 's') || - NumericLiteralParser::isValidUDSuffix( - getLangOpts(), StringRef(Buffer, Chars)); + const StringRef CompleteSuffix(Buffer, Chars); + IsUDSuffix = StringLiteralParser::isValidUDSuffix(getLangOpts(), + CompleteSuffix); break; } @@ -1829,7 +1842,7 @@ bool Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr, // Search for the next '"' in hopes of salvaging the lexer. Unfortunately, // it's possible the '"' was intended to be part of the raw string, but // there's not much we can do about that. - while (1) { + while (true) { char C = *CurPtr++; if (C == '"') @@ -1848,7 +1861,7 @@ bool Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr, const char *Prefix = CurPtr; CurPtr += PrefixLen + 1; // skip over prefix and '(' - while (1) { + while (true) { char C = *CurPtr++; if (C == ')') { @@ -1913,7 +1926,6 @@ bool Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) { return true; } - /// LexCharConstant - Lex the remainder of a character constant, after having /// lexed either ' or L' or u8' or u' or U'. bool Lexer::LexCharConstant(Token &Result, const char *CurPtr, @@ -1992,7 +2004,7 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr, unsigned char Char = *CurPtr; // Skip consecutive spaces efficiently. - while (1) { + while (true) { // Skip horizontal whitespace very aggressively. while (isHorizontalWhitespace(Char)) Char = *++CurPtr; @@ -2315,7 +2327,7 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr, if (C == '/') C = *CurPtr++; - while (1) { + while (true) { // Skip over all non-interesting characters until we find end of buffer or a // (probably ending) '/' character. if (CurPtr + 24 < BufferEnd && @@ -2456,7 +2468,7 @@ void Lexer::ReadToEndOfLine(SmallVectorImpl<char> *Result) { // CurPtr - Cache BufferPtr in an automatic variable. const char *CurPtr = BufferPtr; - while (1) { + while (true) { char Char = getAndAdvanceChar(CurPtr, Tmp); switch (Char) { default: @@ -2669,7 +2681,6 @@ bool Lexer::IsStartOfConflictMarker(const char *CurPtr) { return false; } - /// HandleEndOfConflictMarker - If this is a '====' or '||||' or '>>>>', or if /// it is '<<<<' and the conflict marker started with a '>>>>' marker, then it /// is the end of a conflict marker. Handle it by ignoring up until the end of @@ -3498,7 +3509,6 @@ LexNextToken: CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); Kind = tok::greatergreater; } - } else { Kind = tok::greater; } @@ -3615,17 +3625,17 @@ LexNextToken: break; } - UTF32 CodePoint; + llvm::UTF32 CodePoint; // We can't just reset CurPtr to BufferPtr because BufferPtr may point to // an escaped newline. --CurPtr; - ConversionResult Status = - llvm::convertUTF8Sequence((const UTF8 **)&CurPtr, - (const UTF8 *)BufferEnd, + llvm::ConversionResult Status = + llvm::convertUTF8Sequence((const llvm::UTF8 **)&CurPtr, + (const llvm::UTF8 *)BufferEnd, &CodePoint, - strictConversion); - if (Status == conversionOK) { + llvm::strictConversion); + if (Status == llvm::conversionOK) { if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) { if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) return true; // KeepWhitespaceMode |