aboutsummaryrefslogtreecommitdiff
path: root/lib/Lex/Lexer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Lex/Lexer.cpp')
-rw-r--r--lib/Lex/Lexer.cpp48
1 files changed, 28 insertions, 20 deletions
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 6f6b50b246d7..ca5252e1c9ce 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -540,16 +540,16 @@ namespace {
};
}
-std::pair<unsigned, bool>
-Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer,
- const LangOptions &LangOpts, unsigned MaxLines) {
+std::pair<unsigned, bool> Lexer::ComputePreamble(StringRef Buffer,
+ const LangOptions &LangOpts,
+ unsigned MaxLines) {
// Create a lexer starting at the beginning of the file. Note that we use a
// "fake" file source location at offset 1 so that the lexer will track our
// position within the file.
const unsigned StartOffset = 1;
SourceLocation FileLoc = SourceLocation::getFromRawEncoding(StartOffset);
- Lexer TheLexer(FileLoc, LangOpts, Buffer->getBufferStart(),
- Buffer->getBufferStart(), Buffer->getBufferEnd());
+ Lexer TheLexer(FileLoc, LangOpts, Buffer.begin(), Buffer.begin(),
+ Buffer.end());
TheLexer.SetCommentRetentionState(true);
// StartLoc will differ from FileLoc if there is a BOM that was skipped.
@@ -563,9 +563,9 @@ Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer,
unsigned MaxLineOffset = 0;
if (MaxLines) {
- const char *CurPtr = Buffer->getBufferStart();
+ const char *CurPtr = Buffer.begin();
unsigned CurLine = 0;
- while (CurPtr != Buffer->getBufferEnd()) {
+ while (CurPtr != Buffer.end()) {
char ch = *CurPtr++;
if (ch == '\n') {
++CurLine;
@@ -573,8 +573,8 @@ Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer,
break;
}
}
- if (CurPtr != Buffer->getBufferEnd())
- MaxLineOffset = CurPtr - Buffer->getBufferStart();
+ if (CurPtr != Buffer.end())
+ MaxLineOffset = CurPtr - Buffer.begin();
}
do {
@@ -1597,7 +1597,7 @@ bool Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
}
// If we have a digit separator, continue.
- if (C == '\'' && getLangOpts().CPlusPlus1y) {
+ if (C == '\'' && getLangOpts().CPlusPlus14) {
unsigned NextSize;
char Next = getCharAndSizeNoWarn(CurPtr + Size, NextSize, getLangOpts());
if (isIdentifierBody(Next)) {
@@ -1660,7 +1660,7 @@ const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr,
bool IsUDSuffix = false;
if (C == '_')
IsUDSuffix = true;
- else if (IsStringLiteral && getLangOpts().CPlusPlus1y) {
+ else if (IsStringLiteral && getLangOpts().CPlusPlus14) {
// In C++1y, we need to look ahead a few characters to see if this is a
// valid suffix for a string literal or a numeric literal (this could be
// the 'operator""if' defining a numeric literal operator).
@@ -1889,17 +1889,20 @@ bool Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
/// LexCharConstant - Lex the remainder of a character constant, after having
-/// lexed either ' or L' or u' or U'.
+/// lexed either ' or L' or u8' or u' or U'.
bool Lexer::LexCharConstant(Token &Result, const char *CurPtr,
tok::TokenKind Kind) {
// Does this character contain the \0 character?
const char *NulCharacter = nullptr;
- if (!isLexingRawMode() &&
- (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant))
- Diag(BufferPtr, getLangOpts().CPlusPlus
- ? diag::warn_cxx98_compat_unicode_literal
- : diag::warn_c99_compat_unicode_literal);
+ if (!isLexingRawMode()) {
+ if (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant)
+ Diag(BufferPtr, getLangOpts().CPlusPlus
+ ? diag::warn_cxx98_compat_unicode_literal
+ : diag::warn_c99_compat_unicode_literal);
+ else if (Kind == tok::utf8_char_constant)
+ Diag(BufferPtr, diag::warn_cxx14_compat_u8_character_literal);
+ }
char C = getAndAdvanceChar(CurPtr, Result);
if (C == '\'') {
@@ -2319,7 +2322,7 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr,
'/', '/', '/', '/', '/', '/', '/', '/'
};
while (CurPtr+16 <= BufferEnd &&
- !vec_any_eq(*(vector unsigned char*)CurPtr, Slashes))
+ !vec_any_eq(*(const vector unsigned char*)CurPtr, Slashes))
CurPtr += 16;
#else
// Scan for '/' quickly. Many block comments are very large.
@@ -2585,8 +2588,8 @@ static const char *FindConflictEnd(const char *CurPtr, const char *BufferEnd,
size_t Pos = RestOfBuffer.find(Terminator);
while (Pos != StringRef::npos) {
// Must occur at start of line.
- if (RestOfBuffer[Pos-1] != '\r' &&
- RestOfBuffer[Pos-1] != '\n') {
+ if (Pos == 0 ||
+ (RestOfBuffer[Pos - 1] != '\r' && RestOfBuffer[Pos - 1] != '\n')) {
RestOfBuffer = RestOfBuffer.substr(Pos+TermLen);
Pos = RestOfBuffer.find(Terminator);
continue;
@@ -3068,6 +3071,11 @@ LexNextToken:
ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
SizeTmp2, Result),
tok::utf8_string_literal);
+ if (Char2 == '\'' && LangOpts.CPlusPlus1z)
+ return LexCharConstant(
+ Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+ SizeTmp2, Result),
+ tok::utf8_char_constant);
if (Char2 == 'R' && LangOpts.CPlusPlus11) {
unsigned SizeTmp3;