aboutsummaryrefslogtreecommitdiff
path: root/lib/Lex
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2011-10-20 21:14:49 +0000
committerDimitry Andric <dim@FreeBSD.org>2011-10-20 21:14:49 +0000
commit36981b17ed939300f6f8fc2355a255f711fcef71 (patch)
treeee2483e98b09cac943dc93a6969d83ca737ff139 /lib/Lex
parent180abc3db9ae3b4fc63cd65b15697e6ffcc8a657 (diff)
downloadsrc-36981b17ed939300f6f8fc2355a255f711fcef71.tar.gz
src-36981b17ed939300f6f8fc2355a255f711fcef71.zip
Vendor import of clang release_30 branch r142614:vendor/clang/clang-r142614
Notes
Notes: svn path=/vendor/clang/dist/; revision=226586 svn path=/vendor/clang/clang-r142614/; revision=226587; tag=vendor/clang/clang-r142614
Diffstat (limited to 'lib/Lex')
-rw-r--r--lib/Lex/HeaderMap.cpp4
-rw-r--r--lib/Lex/HeaderSearch.cpp208
-rw-r--r--lib/Lex/Lexer.cpp650
-rw-r--r--lib/Lex/LiteralSupport.cpp394
-rw-r--r--lib/Lex/MacroArgs.cpp48
-rw-r--r--lib/Lex/MacroArgs.h13
-rw-r--r--lib/Lex/MacroInfo.cpp6
-rw-r--r--lib/Lex/PPCaching.cpp2
-rw-r--r--lib/Lex/PPDirectives.cpp140
-rw-r--r--lib/Lex/PPExpressions.cpp56
-rw-r--r--lib/Lex/PPLexerChange.cpp64
-rw-r--r--lib/Lex/PPMacroExpansion.cpp182
-rw-r--r--lib/Lex/PTHLexer.cpp19
-rw-r--r--lib/Lex/Pragma.cpp95
-rw-r--r--lib/Lex/PreprocessingRecord.cpp283
-rw-r--r--lib/Lex/Preprocessor.cpp268
-rw-r--r--lib/Lex/PreprocessorLexer.cpp8
-rw-r--r--lib/Lex/ScratchBuffer.cpp2
-rw-r--r--lib/Lex/TokenConcatenation.cpp72
-rw-r--r--lib/Lex/TokenLexer.cpp219
20 files changed, 1983 insertions, 750 deletions
diff --git a/lib/Lex/HeaderMap.cpp b/lib/Lex/HeaderMap.cpp
index e102a6da608c..0cb564c222d5 100644
--- a/lib/Lex/HeaderMap.cpp
+++ b/lib/Lex/HeaderMap.cpp
@@ -57,7 +57,7 @@ struct HMapHeader {
/// HashHMapKey - This is the 'well known' hash function required by the file
/// format, used to look up keys in the hash table. The hash table uses simple
/// linear probing based on this function.
-static inline unsigned HashHMapKey(llvm::StringRef Str) {
+static inline unsigned HashHMapKey(StringRef Str) {
unsigned Result = 0;
const char *S = Str.begin(), *End = Str.end();
@@ -200,7 +200,7 @@ void HeaderMap::dump() const {
/// LookupFile - Check to see if the specified relative filename is located in
/// this HeaderMap. If so, open it and return its FileEntry.
const FileEntry *HeaderMap::LookupFile(
- llvm::StringRef Filename, FileManager &FM) const {
+ StringRef Filename, FileManager &FM) const {
const HMapHeader &Hdr = getHeader();
unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets);
diff --git a/lib/Lex/HeaderSearch.cpp b/lib/Lex/HeaderSearch.cpp
index 86ab9564a235..931145a8d655 100644
--- a/lib/Lex/HeaderSearch.cpp
+++ b/lib/Lex/HeaderSearch.cpp
@@ -18,6 +18,7 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Capacity.h"
#include <cstdio>
using namespace clang;
@@ -97,6 +98,60 @@ const HeaderMap *HeaderSearch::CreateHeaderMap(const FileEntry *FE) {
return 0;
}
+const FileEntry *HeaderSearch::lookupModule(StringRef ModuleName,
+ std::string *ModuleFileName,
+ std::string *UmbrellaHeader) {
+ // If we don't have a module cache path, we can't do anything.
+ if (ModuleCachePath.empty()) {
+ if (ModuleFileName)
+ ModuleFileName->clear();
+ return 0;
+ }
+
+ // Try to find the module path.
+ llvm::SmallString<256> FileName(ModuleCachePath);
+ llvm::sys::path::append(FileName, ModuleName + ".pcm");
+ if (ModuleFileName)
+ *ModuleFileName = FileName.str();
+
+ if (const FileEntry *ModuleFile
+ = getFileMgr().getFile(FileName, /*OpenFile=*/false,
+ /*CacheFailure=*/false))
+ return ModuleFile;
+
+ // We didn't find the module. If we're not supposed to look for an
+ // umbrella header, this is the end of the road.
+ if (!UmbrellaHeader)
+ return 0;
+
+ // Look in each of the framework directories for an umbrella header with
+ // the same name as the module.
+ // FIXME: We need a way for non-frameworks to provide umbrella headers.
+ llvm::SmallString<128> UmbrellaHeaderName;
+ UmbrellaHeaderName = ModuleName;
+ UmbrellaHeaderName += '/';
+ UmbrellaHeaderName += ModuleName;
+ UmbrellaHeaderName += ".h";
+ for (unsigned Idx = 0, N = SearchDirs.size(); Idx != N; ++Idx) {
+ // Skip non-framework include paths
+ if (!SearchDirs[Idx].isFramework())
+ continue;
+
+ // Look for the umbrella header in this directory.
+ if (const FileEntry *HeaderFile
+ = SearchDirs[Idx].LookupFile(UmbrellaHeaderName, *this, 0, 0,
+ StringRef(), 0)) {
+ *UmbrellaHeader = HeaderFile->getName();
+ return 0;
+ }
+ }
+
+ // We did not find an umbrella header. Clear out the UmbrellaHeader pointee
+ // so our caller knows that we failed.
+ UmbrellaHeader->clear();
+ return 0;
+}
+
//===----------------------------------------------------------------------===//
// File lookup within a DirectoryLookup scope
//===----------------------------------------------------------------------===//
@@ -116,17 +171,19 @@ const char *DirectoryLookup::getName() const {
/// LookupFile - Lookup the specified file in this search path, returning it
/// if it exists or returning null if not.
const FileEntry *DirectoryLookup::LookupFile(
- llvm::StringRef Filename,
+ StringRef Filename,
HeaderSearch &HS,
- llvm::SmallVectorImpl<char> *SearchPath,
- llvm::SmallVectorImpl<char> *RelativePath) const {
+ SmallVectorImpl<char> *SearchPath,
+ SmallVectorImpl<char> *RelativePath,
+ StringRef BuildingModule,
+ StringRef *SuggestedModule) const {
llvm::SmallString<1024> TmpDir;
if (isNormalDir()) {
// Concatenate the requested file onto the directory.
TmpDir = getDir()->getName();
llvm::sys::path::append(TmpDir, Filename);
if (SearchPath != NULL) {
- llvm::StringRef SearchPathRef(getDir()->getName());
+ StringRef SearchPathRef(getDir()->getName());
SearchPath->clear();
SearchPath->append(SearchPathRef.begin(), SearchPathRef.end());
}
@@ -138,14 +195,15 @@ const FileEntry *DirectoryLookup::LookupFile(
}
if (isFramework())
- return DoFrameworkLookup(Filename, HS, SearchPath, RelativePath);
+ return DoFrameworkLookup(Filename, HS, SearchPath, RelativePath,
+ BuildingModule, SuggestedModule);
assert(isHeaderMap() && "Unknown directory lookup");
const FileEntry * const Result = getHeaderMap()->LookupFile(
Filename, HS.getFileMgr());
if (Result) {
if (SearchPath != NULL) {
- llvm::StringRef SearchPathRef(getName());
+ StringRef SearchPathRef(getName());
SearchPath->clear();
SearchPath->append(SearchPathRef.begin(), SearchPathRef.end());
}
@@ -161,15 +219,18 @@ const FileEntry *DirectoryLookup::LookupFile(
/// DoFrameworkLookup - Do a lookup of the specified file in the current
/// DirectoryLookup, which is a framework directory.
const FileEntry *DirectoryLookup::DoFrameworkLookup(
- llvm::StringRef Filename,
+ StringRef Filename,
HeaderSearch &HS,
- llvm::SmallVectorImpl<char> *SearchPath,
- llvm::SmallVectorImpl<char> *RelativePath) const {
+ SmallVectorImpl<char> *SearchPath,
+ SmallVectorImpl<char> *RelativePath,
+ StringRef BuildingModule,
+ StringRef *SuggestedModule) const
+{
FileManager &FileMgr = HS.getFileMgr();
// Framework names must have a '/' in the filename.
size_t SlashPos = Filename.find('/');
- if (SlashPos == llvm::StringRef::npos) return 0;
+ if (SlashPos == StringRef::npos) return 0;
// Find out if this is the home for the specified framework, by checking
// HeaderSearch. Possible answer are yes/no and unknown.
@@ -226,9 +287,16 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup(
SearchPath->append(FrameworkName.begin(), FrameworkName.end()-1);
}
+ /// Determine whether this is the module we're building or not.
+ bool AutomaticImport = SuggestedModule &&
+ (BuildingModule != StringRef(Filename.begin(), SlashPos)) &&
+ !Filename.substr(SlashPos + 1).startswith("..");
+
FrameworkName.append(Filename.begin()+SlashPos+1, Filename.end());
if (const FileEntry *FE = FileMgr.getFile(FrameworkName.str(),
- /*openFile=*/true)) {
+ /*openFile=*/!AutomaticImport)) {
+ if (AutomaticImport)
+ *SuggestedModule = StringRef(Filename.begin(), SlashPos);
return FE;
}
@@ -240,7 +308,11 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup(
SearchPath->insert(SearchPath->begin()+OrigSize, Private,
Private+strlen(Private));
- return FileMgr.getFile(FrameworkName.str(), /*openFile=*/true);
+ const FileEntry *FE = FileMgr.getFile(FrameworkName.str(),
+ /*openFile=*/!AutomaticImport);
+ if (FE && AutomaticImport)
+ *SuggestedModule = StringRef(Filename.begin(), SlashPos);
+ return FE;
}
@@ -255,13 +327,18 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup(
/// non-null, indicates where the #including file is, in case a relative search
/// is needed.
const FileEntry *HeaderSearch::LookupFile(
- llvm::StringRef Filename,
+ StringRef Filename,
bool isAngled,
const DirectoryLookup *FromDir,
const DirectoryLookup *&CurDir,
const FileEntry *CurFileEnt,
- llvm::SmallVectorImpl<char> *SearchPath,
- llvm::SmallVectorImpl<char> *RelativePath) {
+ SmallVectorImpl<char> *SearchPath,
+ SmallVectorImpl<char> *RelativePath,
+ StringRef *SuggestedModule)
+{
+ if (SuggestedModule)
+ *SuggestedModule = StringRef();
+
// If 'Filename' is absolute, check to see if it exists and no searching.
if (llvm::sys::path::is_absolute(Filename)) {
CurDir = 0;
@@ -279,7 +356,7 @@ const FileEntry *HeaderSearch::LookupFile(
return FileMgr.getFile(Filename, /*openFile=*/true);
}
- // Step #0, unless disabled, check to see if the file is in the #includer's
+ // Unless disabled, check to see if the file is in the #includer's
// directory. This has to be based on CurFileEnt, not CurDir, because
// CurFileEnt could be a #include of a subdirectory (#include "foo/bar.h") and
// a subsequent include of "baz.h" should resolve to "whatever/foo/baz.h".
@@ -301,7 +378,7 @@ const FileEntry *HeaderSearch::LookupFile(
unsigned DirInfo = getFileInfo(CurFileEnt).DirInfo;
getFileInfo(FE).DirInfo = DirInfo;
if (SearchPath != NULL) {
- llvm::StringRef SearchPathRef(CurFileEnt->getDir()->getName());
+ StringRef SearchPathRef(CurFileEnt->getDir()->getName());
SearchPath->clear();
SearchPath->append(SearchPathRef.begin(), SearchPathRef.end());
}
@@ -346,19 +423,56 @@ const FileEntry *HeaderSearch::LookupFile(
// Check each directory in sequence to see if it contains this file.
for (; i != SearchDirs.size(); ++i) {
const FileEntry *FE =
- SearchDirs[i].LookupFile(Filename, *this, SearchPath, RelativePath);
+ SearchDirs[i].LookupFile(Filename, *this, SearchPath, RelativePath,
+ BuildingModule, SuggestedModule);
if (!FE) continue;
CurDir = &SearchDirs[i];
// This file is a system header or C++ unfriendly if the dir is.
- getFileInfo(FE).DirInfo = CurDir->getDirCharacteristic();
-
+ HeaderFileInfo &HFI = getFileInfo(FE);
+ HFI.DirInfo = CurDir->getDirCharacteristic();
+
+ // If this file is found in a header map and uses the framework style of
+ // includes, then this header is part of a framework we're building.
+ if (CurDir->isIndexHeaderMap()) {
+ size_t SlashPos = Filename.find('/');
+ if (SlashPos != StringRef::npos) {
+ HFI.IndexHeaderMapHeader = 1;
+ HFI.Framework = getUniqueFrameworkName(StringRef(Filename.begin(),
+ SlashPos));
+ }
+ }
+
// Remember this location for the next lookup we do.
CacheLookup.second = i;
return FE;
}
+ // If we are including a file with a quoted include "foo.h" from inside
+ // a header in a framework that is currently being built, and we couldn't
+ // resolve "foo.h" any other way, change the include to <Foo/foo.h>, where
+ // "Foo" is the name of the framework in which the including header was found.
+ if (CurFileEnt && !isAngled && Filename.find('/') == StringRef::npos) {
+ HeaderFileInfo &IncludingHFI = getFileInfo(CurFileEnt);
+ if (IncludingHFI.IndexHeaderMapHeader) {
+ llvm::SmallString<128> ScratchFilename;
+ ScratchFilename += IncludingHFI.Framework;
+ ScratchFilename += '/';
+ ScratchFilename += Filename;
+
+ const FileEntry *Result = LookupFile(ScratchFilename, /*isAngled=*/true,
+ FromDir, CurDir, CurFileEnt,
+ SearchPath, RelativePath,
+ SuggestedModule);
+ std::pair<unsigned, unsigned> &CacheLookup
+ = LookupFileCache.GetOrCreateValue(Filename).getValue();
+ CacheLookup.second
+ = LookupFileCache.GetOrCreateValue(ScratchFilename).getValue().second;
+ return Result;
+ }
+ }
+
// Otherwise, didn't find it. Remember we didn't find this.
CacheLookup.second = SearchDirs.size();
return 0;
@@ -370,15 +484,15 @@ const FileEntry *HeaderSearch::LookupFile(
/// is a subframework within Carbon.framework. If so, return the FileEntry
/// for the designated file, otherwise return null.
const FileEntry *HeaderSearch::
-LookupSubframeworkHeader(llvm::StringRef Filename,
+LookupSubframeworkHeader(StringRef Filename,
const FileEntry *ContextFileEnt,
- llvm::SmallVectorImpl<char> *SearchPath,
- llvm::SmallVectorImpl<char> *RelativePath) {
+ SmallVectorImpl<char> *SearchPath,
+ SmallVectorImpl<char> *RelativePath) {
assert(ContextFileEnt && "No context file?");
// Framework names must have a '/' in the filename. Find it.
size_t SlashPos = Filename.find('/');
- if (SlashPos == llvm::StringRef::npos) return 0;
+ if (SlashPos == StringRef::npos) return 0;
// Look up the base framework name of the ContextFileEnt.
const char *ContextName = ContextFileEnt->getName();
@@ -466,7 +580,31 @@ LookupSubframeworkHeader(llvm::StringRef Filename,
// File Info Management.
//===----------------------------------------------------------------------===//
+/// \brief Merge the header file info provided by \p OtherHFI into the current
+/// header file info (\p HFI)
+static void mergeHeaderFileInfo(HeaderFileInfo &HFI,
+ const HeaderFileInfo &OtherHFI) {
+ HFI.isImport |= OtherHFI.isImport;
+ HFI.isPragmaOnce |= OtherHFI.isPragmaOnce;
+ HFI.NumIncludes += OtherHFI.NumIncludes;
+
+ if (!HFI.ControllingMacro && !HFI.ControllingMacroID) {
+ HFI.ControllingMacro = OtherHFI.ControllingMacro;
+ HFI.ControllingMacroID = OtherHFI.ControllingMacroID;
+ }
+
+ if (OtherHFI.External) {
+ HFI.DirInfo = OtherHFI.DirInfo;
+ HFI.External = OtherHFI.External;
+ HFI.IndexHeaderMapHeader = OtherHFI.IndexHeaderMapHeader;
+ }
+ if (HFI.Framework.empty())
+ HFI.Framework = OtherHFI.Framework;
+
+ HFI.Resolved = true;
+}
+
/// getFileInfo - Return the HeaderFileInfo structure for the specified
/// FileEntry.
HeaderFileInfo &HeaderSearch::getFileInfo(const FileEntry *FE) {
@@ -474,10 +612,8 @@ HeaderFileInfo &HeaderSearch::getFileInfo(const FileEntry *FE) {
FileInfo.resize(FE->getUID()+1);
HeaderFileInfo &HFI = FileInfo[FE->getUID()];
- if (ExternalSource && !HFI.Resolved) {
- HFI = ExternalSource->GetHeaderFileInfo(FE);
- HFI.Resolved = true;
- }
+ if (ExternalSource && !HFI.Resolved)
+ mergeHeaderFileInfo(HFI, ExternalSource->GetHeaderFileInfo(FE));
return HFI;
}
@@ -488,10 +624,8 @@ bool HeaderSearch::isFileMultipleIncludeGuarded(const FileEntry *File) {
// Resolve header file info from the external source, if needed.
HeaderFileInfo &HFI = FileInfo[File->getUID()];
- if (ExternalSource && !HFI.Resolved) {
- HFI = ExternalSource->GetHeaderFileInfo(File);
- HFI.Resolved = true;
- }
+ if (ExternalSource && !HFI.Resolved)
+ mergeHeaderFileInfo(HFI, ExternalSource->GetHeaderFileInfo(File));
return HFI.isPragmaOnce || HFI.ControllingMacro || HFI.ControllingMacroID;
}
@@ -542,4 +676,14 @@ bool HeaderSearch::ShouldEnterIncludeFile(const FileEntry *File, bool isImport){
return true;
}
+size_t HeaderSearch::getTotalMemory() const {
+ return SearchDirs.capacity()
+ + llvm::capacity_in_bytes(FileInfo)
+ + llvm::capacity_in_bytes(HeaderMaps)
+ + LookupFileCache.getAllocator().getTotalMemory()
+ + FrameworkMap.getAllocator().getTotalMemory();
+}
+StringRef HeaderSearch::getUniqueFrameworkName(StringRef Framework) {
+ return FrameworkNames.GetOrCreateValue(Framework).getKey();
+}
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index a28b8f6e7b9f..a98d889dbc98 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -32,7 +32,7 @@
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/MemoryBuffer.h"
-#include <cctype>
+#include <cstring>
using namespace clang;
static void InitCharacterInfo();
@@ -76,7 +76,7 @@ void Lexer::InitLexer(const char *BufStart, const char *BufPtr,
// skip the UTF-8 BOM if it's present.
if (BufferStart == BufferPtr) {
// Determine the size of the BOM.
- llvm::StringRef Buf(BufferStart, BufferEnd - BufferStart);
+ StringRef Buf(BufferStart, BufferEnd - BufferStart);
size_t BOMLength = llvm::StringSwitch<size_t>(Buf)
.StartsWith("\xEF\xBB\xBF", 3) // UTF-8 BOM
.Default(0);
@@ -86,7 +86,7 @@ void Lexer::InitLexer(const char *BufStart, const char *BufPtr,
}
Is_PragmaLexer = false;
- IsInConflictMarker = false;
+ CurrentConflictMarkerState = CMK_None;
// Start of the file is a start of line.
IsAtStartOfLine = true;
@@ -187,9 +187,9 @@ Lexer *Lexer::Create_PragmaLexer(SourceLocation SpellingLoc,
// Set the SourceLocation with the remapping information. This ensures that
// GetMappedTokenLoc will remap the tokens as they are lexed.
- L->FileLoc = SM.createInstantiationLoc(SM.getLocForStartOfFile(SpellingFID),
- ExpansionLocStart,
- ExpansionLocEnd, TokLen);
+ L->FileLoc = SM.createExpansionLoc(SM.getLocForStartOfFile(SpellingFID),
+ ExpansionLocStart,
+ ExpansionLocEnd, TokLen);
// Ensure that the lexer thinks it is inside a directive, so that end \n will
// return an EOD token.
@@ -217,7 +217,7 @@ std::string Lexer::Stringify(const std::string &Str, bool Charify) {
/// Stringify - Convert the specified string into a C string by escaping '\'
/// and " characters. This does not add surrounding ""'s to the string.
-void Lexer::Stringify(llvm::SmallVectorImpl<char> &Str) {
+void Lexer::Stringify(SmallVectorImpl<char> &Str) {
for (unsigned i = 0, e = Str.size(); i != e; ++i) {
if (Str[i] == '\\' || Str[i] == '"') {
Str.insert(Str.begin()+i, '\\');
@@ -235,8 +235,8 @@ void Lexer::Stringify(llvm::SmallVectorImpl<char> &Str) {
/// after trigraph expansion and escaped-newline folding. In particular, this
/// wants to get the true, uncanonicalized, spelling of things like digraphs
/// UCNs, etc.
-llvm::StringRef Lexer::getSpelling(SourceLocation loc,
- llvm::SmallVectorImpl<char> &buffer,
+StringRef Lexer::getSpelling(SourceLocation loc,
+ SmallVectorImpl<char> &buffer,
const SourceManager &SM,
const LangOptions &options,
bool *invalid) {
@@ -245,10 +245,10 @@ llvm::StringRef Lexer::getSpelling(SourceLocation loc,
// Try to the load the file buffer.
bool invalidTemp = false;
- llvm::StringRef file = SM.getBufferData(locInfo.first, &invalidTemp);
+ StringRef file = SM.getBufferData(locInfo.first, &invalidTemp);
if (invalidTemp) {
if (invalid) *invalid = true;
- return llvm::StringRef();
+ return StringRef();
}
const char *tokenBegin = file.data() + locInfo.second;
@@ -263,7 +263,7 @@ llvm::StringRef Lexer::getSpelling(SourceLocation loc,
// Common case: no need for cleaning.
if (!token.needsCleaning())
- return llvm::StringRef(tokenBegin, length);
+ return StringRef(tokenBegin, length);
// Hard case, we need to relex the characters into the string.
buffer.clear();
@@ -275,7 +275,7 @@ llvm::StringRef Lexer::getSpelling(SourceLocation loc,
ti += charSize;
}
- return llvm::StringRef(buffer.data(), buffer.size());
+ return StringRef(buffer.data(), buffer.size());
}
/// getSpelling() - Return the 'spelling' of this token. The spelling of a
@@ -394,10 +394,10 @@ unsigned Lexer::MeasureTokenLength(SourceLocation Loc,
// If this comes from a macro expansion, we really do want the macro name, not
// the token this macro expanded to.
- Loc = SM.getInstantiationLoc(Loc);
+ Loc = SM.getExpansionLoc(Loc);
std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc);
bool Invalid = false;
- llvm::StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
+ StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
if (Invalid)
return 0;
@@ -415,15 +415,16 @@ unsigned Lexer::MeasureTokenLength(SourceLocation Loc,
return TheTok.getLength();
}
-SourceLocation Lexer::GetBeginningOfToken(SourceLocation Loc,
- const SourceManager &SM,
- const LangOptions &LangOpts) {
+static SourceLocation getBeginningOfFileToken(SourceLocation Loc,
+ const SourceManager &SM,
+ const LangOptions &LangOpts) {
+ assert(Loc.isFileID());
std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc);
if (LocInfo.first.isInvalid())
return Loc;
bool Invalid = false;
- llvm::StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
+ StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
if (Invalid)
return Loc;
@@ -448,7 +449,7 @@ SourceLocation Lexer::GetBeginningOfToken(SourceLocation Loc,
}
// Create a lexer starting at the beginning of this token.
- SourceLocation LexerStartLoc = Loc.getFileLocWithOffset(-LocInfo.second);
+ SourceLocation LexerStartLoc = Loc.getLocWithOffset(-LocInfo.second);
Lexer TheLexer(LexerStartLoc, LangOpts, BufStart, LexStart, Buffer.end());
TheLexer.SetCommentRetentionState(true);
@@ -474,6 +475,25 @@ SourceLocation Lexer::GetBeginningOfToken(SourceLocation Loc,
return Loc;
}
+SourceLocation Lexer::GetBeginningOfToken(SourceLocation Loc,
+ const SourceManager &SM,
+ const LangOptions &LangOpts) {
+ if (Loc.isFileID())
+ return getBeginningOfFileToken(Loc, SM, LangOpts);
+
+ if (!SM.isMacroArgExpansion(Loc))
+ return Loc;
+
+ SourceLocation FileLoc = SM.getSpellingLoc(Loc);
+ SourceLocation BeginFileLoc = getBeginningOfFileToken(FileLoc, SM, LangOpts);
+ std::pair<FileID, unsigned> FileLocInfo = SM.getDecomposedLoc(FileLoc);
+ std::pair<FileID, unsigned> BeginFileLocInfo= SM.getDecomposedLoc(BeginFileLoc);
+ assert(FileLocInfo.first == BeginFileLocInfo.first &&
+ FileLocInfo.second >= BeginFileLocInfo.second);
+ return Loc.getLocWithOffset(SM.getDecomposedLoc(BeginFileLoc).second -
+ SM.getDecomposedLoc(FileLoc).second);
+}
+
namespace {
enum PreambleDirectiveKind {
PDK_Skipped,
@@ -484,21 +504,36 @@ namespace {
}
std::pair<unsigned, bool>
-Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer, unsigned MaxLines) {
+Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer,
+ const LangOptions &Features, unsigned MaxLines) {
// Create a lexer starting at the beginning of the file. Note that we use a
// "fake" file source location at offset 1 so that the lexer will track our
// position within the file.
const unsigned StartOffset = 1;
SourceLocation StartLoc = SourceLocation::getFromRawEncoding(StartOffset);
- LangOptions LangOpts;
- Lexer TheLexer(StartLoc, LangOpts, Buffer->getBufferStart(),
+ Lexer TheLexer(StartLoc, Features, Buffer->getBufferStart(),
Buffer->getBufferStart(), Buffer->getBufferEnd());
bool InPreprocessorDirective = false;
Token TheTok;
Token IfStartTok;
unsigned IfCount = 0;
- unsigned Line = 0;
+
+ unsigned MaxLineOffset = 0;
+ if (MaxLines) {
+ const char *CurPtr = Buffer->getBufferStart();
+ unsigned CurLine = 0;
+ while (CurPtr != Buffer->getBufferEnd()) {
+ char ch = *CurPtr++;
+ if (ch == '\n') {
+ ++CurLine;
+ if (CurLine == MaxLines)
+ break;
+ }
+ }
+ if (CurPtr != Buffer->getBufferEnd())
+ MaxLineOffset = CurPtr - Buffer->getBufferStart();
+ }
do {
TheLexer.LexFromRawLexer(TheTok);
@@ -522,11 +557,11 @@ Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer, unsigned MaxLines) {
// Keep track of the # of lines in the preamble.
if (TheTok.isAtStartOfLine()) {
- ++Line;
+ unsigned TokOffset = TheTok.getLocation().getRawEncoding() - StartOffset;
// If we were asked to limit the number of lines in the preamble,
// and we're about to exceed that limit, we're done.
- if (MaxLines && Line >= MaxLines)
+ if (MaxLineOffset && TokOffset >= MaxLineOffset)
break;
}
@@ -539,12 +574,12 @@ Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer, unsigned MaxLines) {
Token HashTok = TheTok;
InPreprocessorDirective = true;
- // Figure out which direective this is. Since we're lexing raw tokens,
+ // Figure out which directive this is. Since we're lexing raw tokens,
// we don't have an identifier table available. Instead, just look at
// the raw identifier to recognize and categorize preprocessor directives.
TheLexer.LexFromRawLexer(TheTok);
if (TheTok.getKind() == tok::raw_identifier && !TheTok.needsCleaning()) {
- llvm::StringRef Keyword(TheTok.getRawIdentifierData(),
+ StringRef Keyword(TheTok.getRawIdentifierData(),
TheTok.getLength());
PreambleDirectiveKind PDK
= llvm::StringSwitch<PreambleDirectiveKind>(Keyword)
@@ -638,7 +673,7 @@ SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart,
// chars, this method is extremely fast.
while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
if (CharNo == 0)
- return TokStart.getFileLocWithOffset(PhysOffset);
+ return TokStart.getLocWithOffset(PhysOffset);
++TokPtr, --CharNo, ++PhysOffset;
}
@@ -658,7 +693,7 @@ SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart,
if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
- return TokStart.getFileLocWithOffset(PhysOffset);
+ return TokStart.getLocWithOffset(PhysOffset);
}
/// \brief Computes the source location just past the end of the
@@ -687,7 +722,7 @@ SourceLocation Lexer::getLocForEndOfToken(SourceLocation Loc, unsigned Offset,
return SourceLocation(); // Points inside the macro expansion.
// Continue and find the location just after the macro expansion.
- Loc = SM.getInstantiationRange(Loc).second;
+ Loc = SM.getExpansionRange(Loc).second;
}
unsigned Len = Lexer::MeasureTokenLength(Loc, SM, Features);
@@ -696,14 +731,14 @@ SourceLocation Lexer::getLocForEndOfToken(SourceLocation Loc, unsigned Offset,
else
return Loc;
- return Loc.getFileLocWithOffset(Len);
+ return Loc.getLocWithOffset(Len);
}
/// \brief Returns true if the given MacroID location points at the first
/// token of the macro expansion.
bool Lexer::isAtStartOfMacroExpansion(SourceLocation loc,
- const SourceManager &SM,
- const LangOptions &LangOpts) {
+ const SourceManager &SM,
+ const LangOptions &LangOpts) {
assert(loc.isValid() && loc.isMacroID() && "Expected a valid macro loc");
std::pair<FileID, unsigned> infoLoc = SM.getDecomposedLoc(loc);
@@ -713,8 +748,7 @@ bool Lexer::isAtStartOfMacroExpansion(SourceLocation loc,
return false; // Does not point at the start of token.
SourceLocation expansionLoc =
- SM.getSLocEntry(infoLoc.first)
- .getInstantiation().getInstantiationLocStart();
+ SM.getSLocEntry(infoLoc.first).getExpansion().getExpansionLocStart();
if (expansionLoc.isFileID())
return true; // No other macro expansions, this is the first.
@@ -734,17 +768,15 @@ bool Lexer::isAtEndOfMacroExpansion(SourceLocation loc,
return false;
FileID FID = SM.getFileID(loc);
- SourceLocation afterLoc = loc.getFileLocWithOffset(tokLen+1);
- if (!SM.isBeforeInSourceLocationOffset(afterLoc, SM.getNextOffset()))
- return true; // We got past the last FileID, this points to the last token.
+ SourceLocation afterLoc = loc.getLocWithOffset(tokLen+1);
+ if (SM.isInFileID(afterLoc, FID))
+ return false; // Still in the same FileID, does not point to the last token.
// FIXME: If the token comes from the macro token paste operator ('##')
// or the stringify operator ('#') this function will always return false;
- if (FID == SM.getFileID(afterLoc))
- return false; // Still in the same FileID, does not point to the last token.
-
+
SourceLocation expansionLoc =
- SM.getSLocEntry(FID).getInstantiation().getInstantiationLocEnd();
+ SM.getSLocEntry(FID).getExpansion().getExpansionLocEnd();
if (expansionLoc.isFileID())
return true; // No other macro expansions.
@@ -761,7 +793,8 @@ enum {
CHAR_LETTER = 0x04, // a-z,A-Z
CHAR_NUMBER = 0x08, // 0-9
CHAR_UNDER = 0x10, // _
- CHAR_PERIOD = 0x20 // .
+ CHAR_PERIOD = 0x20, // .
+ CHAR_RAWDEL = 0x40 // {}[]#<>%:;?*+-/^&|~!=,"'
};
// Statically initialize CharInfo table based on ASCII character set
@@ -786,20 +819,20 @@ static const unsigned char CharInfo[256] =
0 , 0 , 0 , 0 ,
//32 SP 33 ! 34 " 35 #
//36 $ 37 % 38 & 39 '
- CHAR_HORZ_WS, 0 , 0 , 0 ,
- 0 , 0 , 0 , 0 ,
+ CHAR_HORZ_WS, CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,
+ 0 , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,
//40 ( 41 ) 42 * 43 +
//44 , 45 - 46 . 47 /
- 0 , 0 , 0 , 0 ,
- 0 , 0 , CHAR_PERIOD , 0 ,
+ 0 , 0 , CHAR_RAWDEL , CHAR_RAWDEL ,
+ CHAR_RAWDEL , CHAR_RAWDEL , CHAR_PERIOD , CHAR_RAWDEL ,
//48 0 49 1 50 2 51 3
//52 4 53 5 54 6 55 7
CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER ,
CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER ,
//56 8 57 9 58 : 59 ;
//60 < 61 = 62 > 63 ?
- CHAR_NUMBER , CHAR_NUMBER , 0 , 0 ,
- 0 , 0 , 0 , 0 ,
+ CHAR_NUMBER , CHAR_NUMBER , CHAR_RAWDEL , CHAR_RAWDEL ,
+ CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,
//64 @ 65 A 66 B 67 C
//68 D 69 E 70 F 71 G
0 , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
@@ -814,8 +847,8 @@ static const unsigned char CharInfo[256] =
CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
//88 X 89 Y 90 Z 91 [
//92 \ 93 ] 94 ^ 95 _
- CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , 0 ,
- 0 , 0 , 0 , CHAR_UNDER ,
+ CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_RAWDEL ,
+ 0 , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_UNDER ,
//96 ` 97 a 98 b 99 c
//100 d 101 e 102 f 103 g
0 , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
@@ -829,9 +862,9 @@ static const unsigned char CharInfo[256] =
CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
//120 x 121 y 122 z 123 {
-//124 | 125 } 126 ~ 127 DEL
- CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , 0 ,
- 0 , 0 , 0 , 0
+//124 | 125 } 126 ~ 127 DEL
+ CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_RAWDEL ,
+ CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , 0
};
static void InitCharacterInfo() {
@@ -869,6 +902,12 @@ static inline bool isHorizontalWhitespace(unsigned char c) {
return (CharInfo[c] & CHAR_HORZ_WS) ? true : false;
}
+/// isVerticalWhitespace - Return true if this character is vertical
+/// whitespace: '\n', '\r'. Note that this returns false for '\0'.
+static inline bool isVerticalWhitespace(unsigned char c) {
+ return (CharInfo[c] & CHAR_VERT_WS) ? true : false;
+}
+
/// isWhitespace - Return true if this character is horizontal or vertical
/// whitespace: ' ', '\t', '\f', '\v', '\n', '\r'. Note that this returns false
/// for '\0'.
@@ -883,6 +922,14 @@ static inline bool isNumberBody(unsigned char c) {
true : false;
}
+/// isRawStringDelimBody - Return true if this is the body character of a
+/// raw string delimiter.
+static inline bool isRawStringDelimBody(unsigned char c) {
+ return (CharInfo[c] &
+ (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER|CHAR_PERIOD|CHAR_RAWDEL)) ?
+ true : false;
+}
+
//===----------------------------------------------------------------------===//
// Diagnostics forwarding code.
@@ -907,14 +954,14 @@ static SourceLocation GetMappedTokenLoc(Preprocessor &PP,
// Create a new SLoc which is expanded from Expansion(FileLoc) but whose
// characters come from spelling(FileLoc)+Offset.
SourceLocation SpellingLoc = SM.getSpellingLoc(FileLoc);
- SpellingLoc = SpellingLoc.getFileLocWithOffset(CharNo);
+ SpellingLoc = SpellingLoc.getLocWithOffset(CharNo);
// Figure out the expansion loc range, which is the range covered by the
// original _Pragma(...) sequence.
std::pair<SourceLocation,SourceLocation> II =
- SM.getImmediateInstantiationRange(FileLoc);
+ SM.getImmediateExpansionRange(FileLoc);
- return SM.createInstantiationLoc(SpellingLoc, II.first, II.second, TokLen);
+ return SM.createExpansionLoc(SpellingLoc, II.first, II.second, TokLen);
}
/// getSourceLocation - Return a source location identifier for the specified
@@ -928,7 +975,7 @@ SourceLocation Lexer::getSourceLocation(const char *Loc,
// the file id from FileLoc with the offset specified.
unsigned CharNo = Loc-BufferStart;
if (FileLoc.isFileID())
- return FileLoc.getFileLocWithOffset(CharNo);
+ return FileLoc.getLocWithOffset(CharNo);
// Otherwise, this is the _Pragma lexer case, which pretends that all of the
// tokens are lexed from where the _Pragma was defined.
@@ -978,7 +1025,7 @@ static char DecodeTrigraphChar(const char *CP, Lexer *L) {
}
if (!L->isLexingRawMode())
- L->Diag(CP-2, diag::trigraph_converted) << llvm::StringRef(&Res, 1);
+ L->Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1);
return Res;
}
@@ -1028,6 +1075,59 @@ const char *Lexer::SkipEscapedNewLines(const char *P) {
}
}
+/// \brief Checks that the given token is the first token that occurs after the
+/// given location (this excludes comments and whitespace). Returns the location
+/// immediately after the specified token. If the token is not found or the
+/// location is inside a macro, the returned source location will be invalid.
+SourceLocation Lexer::findLocationAfterToken(SourceLocation Loc,
+ tok::TokenKind TKind,
+ const SourceManager &SM,
+ const LangOptions &LangOpts,
+ bool SkipTrailingWhitespaceAndNewLine) {
+ if (Loc.isMacroID()) {
+ if (!Lexer::isAtEndOfMacroExpansion(Loc, SM, LangOpts))
+ return SourceLocation();
+ Loc = SM.getExpansionRange(Loc).second;
+ }
+ Loc = Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts);
+
+ // Break down the source location.
+ std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc);
+
+ // Try to load the file buffer.
+ bool InvalidTemp = false;
+ llvm::StringRef File = SM.getBufferData(LocInfo.first, &InvalidTemp);
+ if (InvalidTemp)
+ return SourceLocation();
+
+ const char *TokenBegin = File.data() + LocInfo.second;
+
+ // Lex from the start of the given location.
+ Lexer lexer(SM.getLocForStartOfFile(LocInfo.first), LangOpts, File.begin(),
+ TokenBegin, File.end());
+ // Find the token.
+ Token Tok;
+ lexer.LexFromRawLexer(Tok);
+ if (Tok.isNot(TKind))
+ return SourceLocation();
+ SourceLocation TokenLoc = Tok.getLocation();
+
+ // Calculate how much whitespace needs to be skipped if any.
+ unsigned NumWhitespaceChars = 0;
+ if (SkipTrailingWhitespaceAndNewLine) {
+ const char *TokenEnd = SM.getCharacterData(TokenLoc) +
+ Tok.getLength();
+ unsigned char C = *TokenEnd;
+ while (isHorizontalWhitespace(C)) {
+ C = *(++TokenEnd);
+ NumWhitespaceChars++;
+ }
+ if (isVerticalWhitespace(C))
+ NumWhitespaceChars++;
+ }
+
+ return TokenLoc.getLocWithOffset(Tok.getLength() + NumWhitespaceChars);
+}
/// getCharAndSizeSlow - Peek a single 'character' from the specified buffer,
/// get its size, and return it. This is tricky in several cases:
@@ -1191,6 +1291,7 @@ FinishIdentifier:
// preprocessor, which may macro expand it or something.
if (II->isHandleIdentifierCase())
PP->HandleIdentifier(Result);
+
return;
}
@@ -1252,13 +1353,12 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
if ((C == '-' || C == '+') && (PrevCh == 'E' || PrevCh == 'e')) {
// If we are in Microsoft mode, don't continue if the constant is hex.
// For example, MSVC will accept the following as 3 tokens: 0x1234567e+1
- if (!Features.Microsoft || !isHexaLiteral(BufferPtr, Features))
+ if (!Features.MicrosoftExt || !isHexaLiteral(BufferPtr, Features))
return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
}
// If we have a hex FP constant, continue.
- if ((C == '-' || C == '+') && (PrevCh == 'P' || PrevCh == 'p') &&
- !Features.CPlusPlus0x)
+ if ((C == '-' || C == '+') && (PrevCh == 'P' || PrevCh == 'p'))
return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
// Update the location of token as well as BufferPtr.
@@ -1268,10 +1368,17 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
}
/// LexStringLiteral - Lex the remainder of a string literal, after having lexed
-/// either " or L".
-void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, bool Wide) {
+/// either " or L" or u8" or u" or U".
+void Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
+ tok::TokenKind Kind) {
const char *NulCharacter = 0; // Does this string contain the \0 character?
+ if (!isLexingRawMode() &&
+ (Kind == tok::utf8_string_literal ||
+ Kind == tok::utf16_string_literal ||
+ Kind == tok::utf32_string_literal))
+ Diag(BufferPtr, diag::warn_cxx98_compat_unicode_literal);
+
char C = getAndAdvanceChar(CurPtr, Result);
while (C != '"') {
// Skip escaped characters. Escaped newlines will already be processed by
@@ -1281,16 +1388,21 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, bool Wide) {
if (C == '\n' || C == '\r' || // Newline.
(C == 0 && CurPtr-1 == BufferEnd)) { // End of file.
- if (C == 0 && PP && PP->isCodeCompletionFile(FileLoc))
- PP->CodeCompleteNaturalLanguage();
- else if (!isLexingRawMode() && !Features.AsmPreprocessor)
+ if (!isLexingRawMode() && !Features.AsmPreprocessor)
Diag(BufferPtr, diag::warn_unterminated_string);
FormTokenWithChars(Result, CurPtr-1, tok::unknown);
return;
}
- if (C == 0)
+ if (C == 0) {
+ if (isCodeCompletionPoint(CurPtr-1)) {
+ PP->CodeCompleteNaturalLanguage();
+ FormTokenWithChars(Result, CurPtr-1, tok::unknown);
+ return cutOffLexing();
+ }
+
NulCharacter = CurPtr-1;
+ }
C = getAndAdvanceChar(CurPtr, Result);
}
@@ -1300,8 +1412,82 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, bool Wide) {
// Update the location of the token as well as the BufferPtr instance var.
const char *TokStart = BufferPtr;
- FormTokenWithChars(Result, CurPtr,
- Wide ? tok::wide_string_literal : tok::string_literal);
+ FormTokenWithChars(Result, CurPtr, Kind);
+ Result.setLiteralData(TokStart);
+}
+
+/// LexRawStringLiteral - Lex the remainder of a raw string literal, after
+/// having lexed R", LR", u8R", uR", or UR".
+void Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr,
+ tok::TokenKind Kind) {
+ // This function doesn't use getAndAdvanceChar because C++0x [lex.pptoken]p3:
+ // Between the initial and final double quote characters of the raw string,
+ // any transformations performed in phases 1 and 2 (trigraphs,
+ // universal-character-names, and line splicing) are reverted.
+
+ if (!isLexingRawMode())
+ Diag(BufferPtr, diag::warn_cxx98_compat_raw_string_literal);
+
+ unsigned PrefixLen = 0;
+
+ while (PrefixLen != 16 && isRawStringDelimBody(CurPtr[PrefixLen]))
+ ++PrefixLen;
+
+ // If the last character was not a '(', then we didn't lex a valid delimiter.
+ if (CurPtr[PrefixLen] != '(') {
+ if (!isLexingRawMode()) {
+ const char *PrefixEnd = &CurPtr[PrefixLen];
+ if (PrefixLen == 16) {
+ Diag(PrefixEnd, diag::err_raw_delim_too_long);
+ } else {
+ Diag(PrefixEnd, diag::err_invalid_char_raw_delim)
+ << StringRef(PrefixEnd, 1);
+ }
+ }
+
+ // Search for the next '"' in hopes of salvaging the lexer. Unfortunately,
+ // it's possible the '"' was intended to be part of the raw string, but
+ // there's not much we can do about that.
+ while (1) {
+ char C = *CurPtr++;
+
+ if (C == '"')
+ break;
+ if (C == 0 && CurPtr-1 == BufferEnd) {
+ --CurPtr;
+ break;
+ }
+ }
+
+ FormTokenWithChars(Result, CurPtr, tok::unknown);
+ return;
+ }
+
+ // Save prefix and move CurPtr past it
+ const char *Prefix = CurPtr;
+ CurPtr += PrefixLen + 1; // skip over prefix and '('
+
+ while (1) {
+ char C = *CurPtr++;
+
+ if (C == ')') {
+ // Check for prefix match and closing quote.
+ if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] == '"') {
+ CurPtr += PrefixLen + 1; // skip over prefix and '"'
+ break;
+ }
+ } else if (C == 0 && CurPtr-1 == BufferEnd) { // End of file.
+ if (!isLexingRawMode())
+ Diag(BufferPtr, diag::err_unterminated_raw_string)
+ << StringRef(Prefix, PrefixLen);
+ FormTokenWithChars(Result, CurPtr-1, tok::unknown);
+ return;
+ }
+ }
+
+ // Update the location of token as well as BufferPtr.
+ const char *TokStart = BufferPtr;
+ FormTokenWithChars(Result, CurPtr, Kind);
Result.setLiteralData(TokStart);
}
@@ -1317,7 +1503,8 @@ void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
// Skip the escaped character.
C = getAndAdvanceChar(CurPtr, Result);
} else if (C == '\n' || C == '\r' || // Newline.
- (C == 0 && CurPtr-1 == BufferEnd)) { // End of file.
+ (C == 0 && (CurPtr-1 == BufferEnd || // End of file.
+ isCodeCompletionPoint(CurPtr-1)))) {
// If the filename is unterminated, then it must just be a lone <
// character. Return this as such.
FormTokenWithChars(Result, AfterLessPos, tok::less);
@@ -1340,10 +1527,15 @@ void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
/// LexCharConstant - Lex the remainder of a character constant, after having
-/// lexed either ' or L'.
-void Lexer::LexCharConstant(Token &Result, const char *CurPtr) {
+/// lexed either ' or L' or u' or U'.
+void Lexer::LexCharConstant(Token &Result, const char *CurPtr,
+ tok::TokenKind Kind) {
const char *NulCharacter = 0; // Does this character contain the \0 character?
+ if (!isLexingRawMode() &&
+ (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant))
+ Diag(BufferPtr, diag::warn_cxx98_compat_unicode_literal);
+
char C = getAndAdvanceChar(CurPtr, Result);
if (C == '\'') {
if (!isLexingRawMode() && !Features.AsmPreprocessor)
@@ -1360,13 +1552,17 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr) {
C = getAndAdvanceChar(CurPtr, Result);
} else if (C == '\n' || C == '\r' || // Newline.
(C == 0 && CurPtr-1 == BufferEnd)) { // End of file.
- if (C == 0 && PP && PP->isCodeCompletionFile(FileLoc))
- PP->CodeCompleteNaturalLanguage();
- else if (!isLexingRawMode() && !Features.AsmPreprocessor)
+ if (!isLexingRawMode() && !Features.AsmPreprocessor)
Diag(BufferPtr, diag::warn_unterminated_char);
FormTokenWithChars(Result, CurPtr-1, tok::unknown);
return;
} else if (C == 0) {
+ if (isCodeCompletionPoint(CurPtr-1)) {
+ PP->CodeCompleteNaturalLanguage();
+ FormTokenWithChars(Result, CurPtr-1, tok::unknown);
+ return cutOffLexing();
+ }
+
NulCharacter = CurPtr-1;
}
C = getAndAdvanceChar(CurPtr, Result);
@@ -1378,7 +1574,7 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr) {
// Update the location of token as well as BufferPtr.
const char *TokStart = BufferPtr;
- FormTokenWithChars(Result, CurPtr, tok::char_constant);
+ FormTokenWithChars(Result, CurPtr, Kind);
Result.setLiteralData(TokStart);
}
@@ -1451,20 +1647,28 @@ bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) {
char C;
do {
C = *CurPtr;
- // FIXME: Speedup BCPL comment lexing. Just scan for a \n or \r character.
- // If we find a \n character, scan backwards, checking to see if it's an
- // escaped newline, like we do for block comments.
-
// Skip over characters in the fast loop.
while (C != 0 && // Potentially EOF.
- C != '\\' && // Potentially escaped newline.
- C != '?' && // Potentially trigraph.
C != '\n' && C != '\r') // Newline or DOS-style newline.
C = *++CurPtr;
- // If this is a newline, we're done.
- if (C == '\n' || C == '\r')
- break; // Found the newline? Break out!
+ const char *NextLine = CurPtr;
+ if (C != 0) {
+ // We found a newline, see if it's escaped.
+ const char *EscapePtr = CurPtr-1;
+ while (isHorizontalWhitespace(*EscapePtr)) // Skip whitespace.
+ --EscapePtr;
+
+ if (*EscapePtr == '\\') // Escaped newline.
+ CurPtr = EscapePtr;
+ else if (EscapePtr[0] == '/' && EscapePtr[-1] == '?' &&
+ EscapePtr[-2] == '?') // Trigraph-escaped newline.
+ CurPtr = EscapePtr-2;
+ else
+ break; // This is a newline, we're done.
+
+ C = *CurPtr;
+ }
// Otherwise, this is a hard case. Fall back on getAndAdvanceChar to
// properly decode the character. Read it in raw mode to avoid emitting
@@ -1476,6 +1680,13 @@ bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) {
C = getAndAdvanceChar(CurPtr, Result);
LexingRawMode = OldRawMode;
+ // If we only read only one character, then no special handling is needed.
+ // We're done and can skip forward to the newline.
+ if (C != 0 && CurPtr == OldPtr+1) {
+ CurPtr = NextLine;
+ break;
+ }
+
// If the char that we finally got was a \n, then we must have had something
// like \<newline><newline>. We don't want to have consumed the second
// newline, we want CurPtr, to end up pointing to it down below.
@@ -1492,9 +1703,9 @@ bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) {
if (OldPtr[0] == '\n' || OldPtr[0] == '\r') {
// Okay, we found a // comment that ends in a newline, if the next
// line is also a // comment, but has spaces, don't emit a diagnostic.
- if (isspace(C)) {
+ if (isWhitespace(C)) {
const char *ForwardPtr = CurPtr;
- while (isspace(*ForwardPtr)) // Skip whitespace.
+ while (isWhitespace(*ForwardPtr)) // Skip whitespace.
++ForwardPtr;
if (ForwardPtr[0] == '/' && ForwardPtr[1] == '/')
break;
@@ -1507,12 +1718,16 @@ bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) {
}
if (CurPtr == BufferEnd+1) {
- if (PP && PP->isCodeCompletionFile(FileLoc))
- PP->CodeCompleteNaturalLanguage();
-
--CurPtr;
break;
}
+
+ if (C == '\0' && isCodeCompletionPoint(CurPtr-1)) {
+ PP->CodeCompleteNaturalLanguage();
+ cutOffLexing();
+ return false;
+ }
+
} while (C != '\n' && C != '\r');
// Found but did not consume the newline. Notify comment handlers about the
@@ -1573,7 +1788,7 @@ bool Lexer::SaveBCPLComment(Token &Result, const char *CurPtr) {
Result.setKind(tok::comment);
PP->CreateString(&Spelling[0], Spelling.size(), Result,
- Result.getLocation());
+ Result.getLocation(), Result.getLocation());
return true;
}
@@ -1667,8 +1882,7 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) {
unsigned char C = getCharAndSize(CurPtr, CharSize);
CurPtr += CharSize;
if (C == 0 && CurPtr == BufferEnd+1) {
- if (!isLexingRawMode() &&
- !PP->isCodeCompletionFile(FileLoc))
+ if (!isLexingRawMode())
Diag(BufferPtr, diag::err_unterminated_block_comment);
--CurPtr;
@@ -1691,7 +1905,10 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) {
while (1) {
// Skip over all non-interesting characters until we find end of buffer or a
// (probably ending) '/' character.
- if (CurPtr + 24 < BufferEnd) {
+ if (CurPtr + 24 < BufferEnd &&
+ // If there is a code-completion point avoid the fast scan because it
+ // doesn't check for '\0'.
+ !(PP && PP->getCodeCompletionFileLoc() == FileLoc)) {
// While not aligned to a 16-byte boundary.
while (C != '/' && ((intptr_t)CurPtr & 0x0F) != 0)
C = *CurPtr++;
@@ -1751,9 +1968,7 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) {
Diag(CurPtr-1, diag::warn_nested_block_comment);
}
} else if (C == 0 && CurPtr == BufferEnd+1) {
- if (PP && PP->isCodeCompletionFile(FileLoc))
- PP->CodeCompleteNaturalLanguage();
- else if (!isLexingRawMode())
+ if (!isLexingRawMode())
Diag(BufferPtr, diag::err_unterminated_block_comment);
// Note: the user probably forgot a */. We could continue immediately
// after the /*, but this would involve lexing a lot of what really is the
@@ -1769,7 +1984,12 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) {
BufferPtr = CurPtr;
return false;
+ } else if (C == '\0' && isCodeCompletionPoint(CurPtr-1)) {
+ PP->CodeCompleteNaturalLanguage();
+ cutOffLexing();
+ return false;
}
+
C = *CurPtr++;
}
@@ -1826,6 +2046,12 @@ std::string Lexer::ReadToEndOfLine() {
case 0: // Null.
// Found end of file?
if (CurPtr-1 != BufferEnd) {
+ if (isCodeCompletionPoint(CurPtr-1)) {
+ PP->CodeCompleteNaturalLanguage();
+ cutOffLexing();
+ return Result;
+ }
+
// Nope, normal character, continue.
Result += Char;
break;
@@ -1840,8 +2066,8 @@ std::string Lexer::ReadToEndOfLine() {
// Next, lex the character, which should handle the EOD transition.
Lex(Tmp);
if (Tmp.is(tok::code_completion)) {
- if (PP && PP->getCodeCompletionHandler())
- PP->getCodeCompletionHandler()->CodeCompleteNaturalLanguage();
+ if (PP)
+ PP->CodeCompleteNaturalLanguage();
Lex(Tmp);
}
assert(Tmp.is(tok::eod) && "Unexpected token!");
@@ -1857,22 +2083,6 @@ std::string Lexer::ReadToEndOfLine() {
/// This returns true if Result contains a token, false if PP.Lex should be
/// called again.
bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
- // Check if we are performing code completion.
- if (PP && PP->isCodeCompletionFile(FileLoc)) {
- // We're at the end of the file, but we've been asked to consider the
- // end of the file to be a code-completion token. Return the
- // code-completion token.
- Result.startToken();
- FormTokenWithChars(Result, CurPtr, tok::code_completion);
-
- // Only do the eof -> code_completion translation once.
- PP->SetCodeCompletionPoint(0, 0, 0);
-
- // Silence any diagnostics that occur once we hit the code-completion point.
- PP->getDiagnostics().setSuppressAllDiagnostics(true);
- return true;
- }
-
// If we hit the end of the file while parsing a preprocessor directive,
// end the preprocessor directive first. The next token returned will
// then be the end of file.
@@ -1900,7 +2110,7 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
// If we are in a #if directive, emit an error.
while (!ConditionalStack.empty()) {
- if (!PP->isCodeCompletionFile(FileLoc))
+ if (PP->getCodeCompletionFileLoc() != FileLoc)
PP->Diag(ConditionalStack.back().IfLoc,
diag::err_pp_unterminated_conditional);
ConditionalStack.pop_back();
@@ -1951,15 +2161,18 @@ unsigned Lexer::isNextPPTokenLParen() {
}
/// FindConflictEnd - Find the end of a version control conflict marker.
-static const char *FindConflictEnd(const char *CurPtr, const char *BufferEnd) {
- llvm::StringRef RestOfBuffer(CurPtr+7, BufferEnd-CurPtr-7);
- size_t Pos = RestOfBuffer.find(">>>>>>>");
- while (Pos != llvm::StringRef::npos) {
+static const char *FindConflictEnd(const char *CurPtr, const char *BufferEnd,
+ ConflictMarkerKind CMK) {
+ const char *Terminator = CMK == CMK_Perforce ? "<<<<\n" : ">>>>>>>";
+ size_t TermLen = CMK == CMK_Perforce ? 5 : 7;
+ StringRef RestOfBuffer(CurPtr+TermLen, BufferEnd-CurPtr-TermLen);
+ size_t Pos = RestOfBuffer.find(Terminator);
+ while (Pos != StringRef::npos) {
// Must occur at start of line.
if (RestOfBuffer[Pos-1] != '\r' &&
RestOfBuffer[Pos-1] != '\n') {
- RestOfBuffer = RestOfBuffer.substr(Pos+7);
- Pos = RestOfBuffer.find(">>>>>>>");
+ RestOfBuffer = RestOfBuffer.substr(Pos+TermLen);
+ Pos = RestOfBuffer.find(Terminator);
continue;
}
return RestOfBuffer.data()+Pos;
@@ -1977,23 +2190,25 @@ bool Lexer::IsStartOfConflictMarker(const char *CurPtr) {
CurPtr[-1] != '\n' && CurPtr[-1] != '\r')
return false;
- // Check to see if we have <<<<<<<.
- if (BufferEnd-CurPtr < 8 ||
- llvm::StringRef(CurPtr, 7) != "<<<<<<<")
+ // Check to see if we have <<<<<<< or >>>>.
+ if ((BufferEnd-CurPtr < 8 || StringRef(CurPtr, 7) != "<<<<<<<") &&
+ (BufferEnd-CurPtr < 6 || StringRef(CurPtr, 5) != ">>>> "))
return false;
// If we have a situation where we don't care about conflict markers, ignore
// it.
- if (IsInConflictMarker || isLexingRawMode())
+ if (CurrentConflictMarkerState || isLexingRawMode())
return false;
- // Check to see if there is a >>>>>>> somewhere in the buffer at the start of
- // a line to terminate this conflict marker.
- if (FindConflictEnd(CurPtr, BufferEnd)) {
+ ConflictMarkerKind Kind = *CurPtr == '<' ? CMK_Normal : CMK_Perforce;
+
+ // Check to see if there is an ending marker somewhere in the buffer at the
+ // start of a line to terminate this conflict marker.
+ if (FindConflictEnd(CurPtr, BufferEnd, Kind)) {
// We found a match. We are really in a conflict marker.
// Diagnose this, and ignore to the end of line.
Diag(CurPtr, diag::err_conflict_marker);
- IsInConflictMarker = true;
+ CurrentConflictMarkerState = Kind;
// Skip ahead to the end of line. We know this exists because the
// end-of-conflict marker starts with \r or \n.
@@ -2010,10 +2225,10 @@ bool Lexer::IsStartOfConflictMarker(const char *CurPtr) {
}
-/// HandleEndOfConflictMarker - If this is a '=======' or '|||||||' or '>>>>>>>'
-/// marker, then it is the end of a conflict marker. Handle it by ignoring up
-/// until the end of the line. This returns true if it is a conflict marker and
-/// false if not.
+/// HandleEndOfConflictMarker - If this is a '====' or '||||' or '>>>>', or if
+/// it is '<<<<' and the conflict marker started with a '>>>>' marker, then it
+/// is the end of a conflict marker. Handle it by ignoring up until the end of
+/// the line. This returns true if it is a conflict marker and false if not.
bool Lexer::HandleEndOfConflictMarker(const char *CurPtr) {
// Only a conflict marker if it starts at the beginning of a line.
if (CurPtr != BufferStart &&
@@ -2022,18 +2237,19 @@ bool Lexer::HandleEndOfConflictMarker(const char *CurPtr) {
// If we have a situation where we don't care about conflict markers, ignore
// it.
- if (!IsInConflictMarker || isLexingRawMode())
+ if (!CurrentConflictMarkerState || isLexingRawMode())
return false;
- // Check to see if we have the marker (7 characters in a row).
- for (unsigned i = 1; i != 7; ++i)
+ // Check to see if we have the marker (4 characters in a row).
+ for (unsigned i = 1; i != 4; ++i)
if (CurPtr[i] != CurPtr[0])
return false;
// If we do have it, search for the end of the conflict marker. This could
// fail if it got skipped with a '#if 0' or something. Note that CurPtr might
// be the end of conflict marker.
- if (const char *End = FindConflictEnd(CurPtr, BufferEnd)) {
+ if (const char *End = FindConflictEnd(CurPtr, BufferEnd,
+ CurrentConflictMarkerState)) {
CurPtr = End;
// Skip ahead to the end of line.
@@ -2043,13 +2259,22 @@ bool Lexer::HandleEndOfConflictMarker(const char *CurPtr) {
BufferPtr = CurPtr;
// No longer in the conflict marker.
- IsInConflictMarker = false;
+ CurrentConflictMarkerState = CMK_None;
return true;
}
return false;
}
+bool Lexer::isCodeCompletionPoint(const char *CurPtr) const {
+ if (PP && PP->isCodeCompletionEnabled()) {
+ SourceLocation Loc = FileLoc.getLocWithOffset(CurPtr-BufferStart);
+ return Loc == PP->getCodeCompletionLoc();
+ }
+
+ return false;
+}
+
/// LexTokenInternal - This implements a simple C family lexer. It is an
/// extremely performance critical piece of code. This assumes that the buffer
@@ -2102,6 +2327,14 @@ LexNextToken:
return PPCache->Lex(Result);
}
+ // Check if we are performing code completion.
+ if (isCodeCompletionPoint(CurPtr-1)) {
+ // Return the code-completion token.
+ Result.startToken();
+ FormTokenWithChars(Result, CurPtr, tok::code_completion);
+ return;
+ }
+
if (!isLexingRawMode())
Diag(CurPtr-1, diag::null_in_file);
Result.setFlag(Token::LeadingSpace);
@@ -2112,7 +2345,7 @@ LexNextToken:
case 26: // DOS & CP/M EOF: "^Z".
// If we're in Microsoft extensions mode, treat this as end of file.
- if (Features.Microsoft) {
+ if (Features.MicrosoftExt) {
// Read the PP instance variable into an automatic variable, because
// LexEndOfFile will often delete 'this'.
Preprocessor *PPCache = PP;
@@ -2186,6 +2419,102 @@ LexNextToken:
MIOpt.ReadToken();
return LexNumericConstant(Result, CurPtr);
+ case 'u': // Identifier (uber) or C++0x UTF-8 or UTF-16 string literal
+ // Notify MIOpt that we read a non-whitespace/non-comment token.
+ MIOpt.ReadToken();
+
+ if (Features.CPlusPlus0x) {
+ Char = getCharAndSize(CurPtr, SizeTmp);
+
+ // UTF-16 string literal
+ if (Char == '"')
+ return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
+ tok::utf16_string_literal);
+
+ // UTF-16 character constant
+ if (Char == '\'')
+ return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
+ tok::utf16_char_constant);
+
+ // UTF-16 raw string literal
+ if (Char == 'R' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
+ return LexRawStringLiteral(Result,
+ ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+ SizeTmp2, Result),
+ tok::utf16_string_literal);
+
+ if (Char == '8') {
+ char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
+
+ // UTF-8 string literal
+ if (Char2 == '"')
+ return LexStringLiteral(Result,
+ ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+ SizeTmp2, Result),
+ tok::utf8_string_literal);
+
+ if (Char2 == 'R') {
+ unsigned SizeTmp3;
+ char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
+ // UTF-8 raw string literal
+ if (Char3 == '"') {
+ return LexRawStringLiteral(Result,
+ ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+ SizeTmp2, Result),
+ SizeTmp3, Result),
+ tok::utf8_string_literal);
+ }
+ }
+ }
+ }
+
+ // treat u like the start of an identifier.
+ return LexIdentifier(Result, CurPtr);
+
+ case 'U': // Identifier (Uber) or C++0x UTF-32 string literal
+ // Notify MIOpt that we read a non-whitespace/non-comment token.
+ MIOpt.ReadToken();
+
+ if (Features.CPlusPlus0x) {
+ Char = getCharAndSize(CurPtr, SizeTmp);
+
+ // UTF-32 string literal
+ if (Char == '"')
+ return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
+ tok::utf32_string_literal);
+
+ // UTF-32 character constant
+ if (Char == '\'')
+ return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
+ tok::utf32_char_constant);
+
+ // UTF-32 raw string literal
+ if (Char == 'R' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
+ return LexRawStringLiteral(Result,
+ ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+ SizeTmp2, Result),
+ tok::utf32_string_literal);
+ }
+
+ // treat U like the start of an identifier.
+ return LexIdentifier(Result, CurPtr);
+
+ case 'R': // Identifier or C++0x raw string literal
+ // Notify MIOpt that we read a non-whitespace/non-comment token.
+ MIOpt.ReadToken();
+
+ if (Features.CPlusPlus0x) {
+ Char = getCharAndSize(CurPtr, SizeTmp);
+
+ if (Char == '"')
+ return LexRawStringLiteral(Result,
+ ConsumeChar(CurPtr, SizeTmp, Result),
+ tok::string_literal);
+ }
+
+ // treat R like the start of an identifier.
+ return LexIdentifier(Result, CurPtr);
+
case 'L': // Identifier (Loony) or wide literal (L'x' or L"xyz").
// Notify MIOpt that we read a non-whitespace/non-comment token.
MIOpt.ReadToken();
@@ -2194,21 +2523,30 @@ LexNextToken:
// Wide string literal.
if (Char == '"')
return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
- true);
+ tok::wide_string_literal);
+
+ // Wide raw string literal.
+ if (Features.CPlusPlus0x && Char == 'R' &&
+ getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
+ return LexRawStringLiteral(Result,
+ ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+ SizeTmp2, Result),
+ tok::wide_string_literal);
// Wide character constant.
if (Char == '\'')
- return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));
+ return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
+ tok::wide_char_constant);
// FALL THROUGH, treating L like the start of an identifier.
// C99 6.4.2: Identifiers.
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
case 'H': case 'I': case 'J': case 'K': /*'L'*/case 'M': case 'N':
- case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
+ case 'O': case 'P': case 'Q': /*'R'*/case 'S': case 'T': /*'U'*/
case 'V': case 'W': case 'X': case 'Y': case 'Z':
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
- case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
+ case 'o': case 'p': case 'q': case 'r': case 's': case 't': /*'u'*/
case 'v': case 'w': case 'x': case 'y': case 'z':
case '_':
// Notify MIOpt that we read a non-whitespace/non-comment token.
@@ -2231,13 +2569,13 @@ LexNextToken:
case '\'':
// Notify MIOpt that we read a non-whitespace/non-comment token.
MIOpt.ReadToken();
- return LexCharConstant(Result, CurPtr);
+ return LexCharConstant(Result, CurPtr, tok::char_constant);
// C99 6.4.5: String Literals.
case '"':
// Notify MIOpt that we read a non-whitespace/non-comment token.
MIOpt.ReadToken();
- return LexStringLiteral(Result, CurPtr, false);
+ return LexStringLiteral(Result, CurPtr, tok::string_literal);
// C99 6.4.6: Punctuators.
case '?':
@@ -2396,7 +2734,7 @@ LexNextToken:
Kind = tok::hashhash; // '%:%:' -> '##'
CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
SizeTmp2, Result);
- } else if (Char == '@' && Features.Microsoft) { // %:@ -> #@ -> Charize
+ } else if (Char == '@' && Features.MicrosoftExt) {// %:@ -> #@ -> Charize
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
if (!isLexingRawMode())
Diag(BufferPtr, diag::charize_microsoft_ext);
@@ -2447,6 +2785,10 @@ LexNextToken:
// If this is actually a '<<<<<<<' version control conflict marker,
// recognize it as such and recover nicely.
goto LexNextToken;
+ } else if (After == '<' && HandleEndOfConflictMarker(CurPtr-1)) {
+ // If this is '<<<<' and we're in a Perforce-style conflict marker,
+ // ignore it.
+ goto LexNextToken;
} else if (Features.CUDA && After == '<') {
Kind = tok::lesslessless;
CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
@@ -2470,6 +2812,8 @@ LexNextToken:
char After = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
if (After != ':' && After != '>') {
Kind = tok::less;
+ if (!isLexingRawMode())
+ Diag(BufferPtr, diag::warn_cxx98_compat_less_colon_colon);
break;
}
}
@@ -2494,6 +2838,10 @@ LexNextToken:
CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
SizeTmp2, Result);
Kind = tok::greatergreaterequal;
+ } else if (After == '>' && IsStartOfConflictMarker(CurPtr-1)) {
+ // If this is actually a '>>>>' conflict marker, recognize it as such
+ // and recover nicely.
+ goto LexNextToken;
} else if (After == '>' && HandleEndOfConflictMarker(CurPtr-1)) {
// If this is '>>>>>>>' and we're in a conflict marker, ignore it.
goto LexNextToken;
@@ -2552,7 +2900,7 @@ LexNextToken:
case '=':
Char = getCharAndSize(CurPtr, SizeTmp);
if (Char == '=') {
- // If this is '=======' and we're in a conflict marker, ignore it.
+ // If this is '====' and we're in a conflict marker, ignore it.
if (CurPtr[1] == '=' && HandleEndOfConflictMarker(CurPtr-1))
goto LexNextToken;
@@ -2570,7 +2918,7 @@ LexNextToken:
if (Char == '#') {
Kind = tok::hashhash;
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
- } else if (Char == '@' && Features.Microsoft) { // #@ -> Charize
+ } else if (Char == '@' && Features.MicrosoftExt) { // #@ -> Charize
Kind = tok::hashat;
if (!isLexingRawMode())
Diag(BufferPtr, diag::charize_microsoft_ext);
diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp
index 2c96c4d4ee24..70183fd1a0ea 100644
--- a/lib/Lex/LiteralSupport.cpp
+++ b/lib/Lex/LiteralSupport.cpp
@@ -16,8 +16,8 @@
#include "clang/Lex/Preprocessor.h"
#include "clang/Lex/LexDiagnostic.h"
#include "clang/Basic/TargetInfo.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/ErrorHandling.h"
using namespace clang;
/// HexDigitValue - Return the value of the specified hex digit, or -1 if it's
@@ -29,12 +29,31 @@ static int HexDigitValue(char C) {
return -1;
}
+static unsigned getCharWidth(tok::TokenKind kind, const TargetInfo &Target) {
+ switch (kind) {
+ default: llvm_unreachable("Unknown token type!");
+ case tok::char_constant:
+ case tok::string_literal:
+ case tok::utf8_string_literal:
+ return Target.getCharWidth();
+ case tok::wide_char_constant:
+ case tok::wide_string_literal:
+ return Target.getWCharWidth();
+ case tok::utf16_char_constant:
+ case tok::utf16_string_literal:
+ return Target.getChar16Width();
+ case tok::utf32_char_constant:
+ case tok::utf32_string_literal:
+ return Target.getChar32Width();
+ }
+}
+
/// ProcessCharEscape - Parse a standard C escape sequence, which can occur in
/// either a character or a string literal.
static unsigned ProcessCharEscape(const char *&ThisTokBuf,
const char *ThisTokEnd, bool &HadError,
- FullSourceLoc Loc, bool IsWide,
- Diagnostic *Diags, const TargetInfo &Target) {
+ FullSourceLoc Loc, unsigned CharWidth,
+ DiagnosticsEngine *Diags) {
// Skip the '\' char.
++ThisTokBuf;
@@ -99,9 +118,6 @@ static unsigned ProcessCharEscape(const char *&ThisTokBuf,
}
// See if any bits will be truncated when evaluated as a character.
- unsigned CharWidth =
- IsWide ? Target.getWCharWidth() : Target.getCharWidth();
-
if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
Overflow = true;
ResultChar &= ~0U >> (32-CharWidth);
@@ -129,9 +145,6 @@ static unsigned ProcessCharEscape(const char *&ThisTokBuf,
ThisTokBuf[0] >= '0' && ThisTokBuf[0] <= '7');
// Check for overflow. Reject '\777', but not L'\777'.
- unsigned CharWidth =
- IsWide ? Target.getWCharWidth() : Target.getCharWidth();
-
if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
if (Diags)
Diags->Report(Loc, diag::warn_octal_escape_too_large);
@@ -167,7 +180,7 @@ static unsigned ProcessCharEscape(const char *&ThisTokBuf,
/// return the UTF32.
static bool ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
uint32_t &UcnVal, unsigned short &UcnLen,
- FullSourceLoc Loc, Diagnostic *Diags,
+ FullSourceLoc Loc, DiagnosticsEngine *Diags,
const LangOptions &Features) {
if (!Features.CPlusPlus && !Features.C99 && Diags)
Diags->Report(Loc, diag::warn_ucn_not_valid_in_c89);
@@ -220,7 +233,8 @@ static bool ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
/// we will likely rework our support for UCN's.
static void EncodeUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
char *&ResultBuf, bool &HadError,
- FullSourceLoc Loc, bool wide, Diagnostic *Diags,
+ FullSourceLoc Loc, unsigned CharByteWidth,
+ DiagnosticsEngine *Diags,
const LangOptions &Features) {
typedef uint32_t UTF32;
UTF32 UcnVal = 0;
@@ -231,19 +245,22 @@ static void EncodeUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
return;
}
- if (wide) {
- (void)UcnLen;
- assert((UcnLen== 4 || UcnLen== 8) && "only ucn length of 4 or 8 supported");
+ assert((CharByteWidth == 1 || CharByteWidth == 2 || CharByteWidth) &&
+ "only character widths of 1, 2, or 4 bytes supported");
- if (!Features.ShortWChar) {
- // Note: our internal rep of wide char tokens is always little-endian.
- *ResultBuf++ = (UcnVal & 0x000000FF);
- *ResultBuf++ = (UcnVal & 0x0000FF00) >> 8;
- *ResultBuf++ = (UcnVal & 0x00FF0000) >> 16;
- *ResultBuf++ = (UcnVal & 0xFF000000) >> 24;
- return;
- }
+ (void)UcnLen;
+ assert((UcnLen== 4 || UcnLen== 8) && "only ucn length of 4 or 8 supported");
+ if (CharByteWidth == 4) {
+ // Note: our internal rep of wide char tokens is always little-endian.
+ *ResultBuf++ = (UcnVal & 0x000000FF);
+ *ResultBuf++ = (UcnVal & 0x0000FF00) >> 8;
+ *ResultBuf++ = (UcnVal & 0x00FF0000) >> 16;
+ *ResultBuf++ = (UcnVal & 0xFF000000) >> 24;
+ return;
+ }
+
+ if (CharByteWidth == 2) {
// Convert to UTF16.
if (UcnVal < (UTF32)0xFFFF) {
*ResultBuf++ = (UcnVal & 0x000000FF);
@@ -262,6 +279,9 @@ static void EncodeUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
*ResultBuf++ = (surrogate2 & 0x0000FF00) >> 8;
return;
}
+
+ assert(CharByteWidth == 1 && "UTF-8 encoding is only for 1 byte characters");
+
// Now that we've parsed/checked the UCN, we convert from UTF32->UTF8.
// The conversion below was inspired by:
// http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
@@ -371,7 +391,7 @@ NumericLiteralParser(const char *begin, const char *end,
// Done.
} else if (isxdigit(*s) && !(*s == 'e' || *s == 'E')) {
PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
- diag::err_invalid_decimal_digit) << llvm::StringRef(s, 1);
+ diag::err_invalid_decimal_digit) << StringRef(s, 1);
hadError = true;
return;
} else if (*s == '.') {
@@ -434,7 +454,7 @@ NumericLiteralParser(const char *begin, const char *end,
continue; // Success.
case 'i':
case 'I':
- if (PP.getLangOptions().Microsoft) {
+ if (PP.getLangOptions().MicrosoftExt) {
if (isFPConstant || isLong || isLongLong) break;
// Allow i8, i16, i32, i64, and i128.
@@ -498,7 +518,7 @@ NumericLiteralParser(const char *begin, const char *end,
PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
isFPConstant ? diag::err_invalid_suffix_float_constant :
diag::err_invalid_suffix_integer_constant)
- << llvm::StringRef(SuffixBegin, ThisTokEnd-SuffixBegin);
+ << StringRef(SuffixBegin, ThisTokEnd-SuffixBegin);
hadError = true;
return;
}
@@ -528,7 +548,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
}
// A binary exponent can appear with or with a '.'. If dotted, the
// binary exponent is required.
- if ((*s == 'p' || *s == 'P') && !PP.getLangOptions().CPlusPlus0x) {
+ if (*s == 'p' || *s == 'P') {
const char *Exponent = s;
s++;
saw_exponent = true;
@@ -542,12 +562,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
}
s = first_non_digit;
- // In C++0x, we cannot support hexadecmial floating literals because
- // they conflict with user-defined literals, so we warn in previous
- // versions of C++ by default.
- if (PP.getLangOptions().CPlusPlus)
- PP.Diag(TokLoc, diag::ext_hexconstant_cplusplus);
- else if (!PP.getLangOptions().HexFloats)
+ if (!PP.getLangOptions().HexFloats)
PP.Diag(TokLoc, diag::ext_hexconstant_invalid);
} else if (saw_period) {
PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
@@ -569,7 +584,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
// Done.
} else if (isxdigit(*s)) {
PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
- diag::err_invalid_binary_digit) << llvm::StringRef(s, 1);
+ diag::err_invalid_binary_digit) << StringRef(s, 1);
hadError = true;
}
// Other suffixes will be diagnosed by the caller.
@@ -599,7 +614,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
// the code is using an incorrect base.
if (isxdigit(*s) && *s != 'e' && *s != 'E') {
PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
- diag::err_invalid_octal_digit) << llvm::StringRef(s, 1);
+ diag::err_invalid_octal_digit) << StringRef(s, 1);
hadError = true;
return;
}
@@ -688,7 +703,6 @@ bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) {
llvm::APFloat::opStatus
NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
using llvm::APFloat;
- using llvm::StringRef;
unsigned n = std::min(SuffixBegin - ThisTokBegin, ThisTokEnd - ThisTokBegin);
return Result.convertFromString(StringRef(ThisTokBegin, n),
@@ -696,14 +710,51 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
}
+/// character-literal: [C++0x lex.ccon]
+/// ' c-char-sequence '
+/// u' c-char-sequence '
+/// U' c-char-sequence '
+/// L' c-char-sequence '
+/// c-char-sequence:
+/// c-char
+/// c-char-sequence c-char
+/// c-char:
+/// any member of the source character set except the single-quote ',
+/// backslash \, or new-line character
+/// escape-sequence
+/// universal-character-name
+/// escape-sequence: [C++0x lex.ccon]
+/// simple-escape-sequence
+/// octal-escape-sequence
+/// hexadecimal-escape-sequence
+/// simple-escape-sequence:
+/// one of \' \" \? \\ \a \b \f \n \r \t \v
+/// octal-escape-sequence:
+/// \ octal-digit
+/// \ octal-digit octal-digit
+/// \ octal-digit octal-digit octal-digit
+/// hexadecimal-escape-sequence:
+/// \x hexadecimal-digit
+/// hexadecimal-escape-sequence hexadecimal-digit
+/// universal-character-name:
+/// \u hex-quad
+/// \U hex-quad hex-quad
+/// hex-quad:
+/// hex-digit hex-digit hex-digit hex-digit
+///
CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
- SourceLocation Loc, Preprocessor &PP) {
+ SourceLocation Loc, Preprocessor &PP,
+ tok::TokenKind kind) {
// At this point we know that the character matches the regex "L?'.*'".
HadError = false;
- // Determine if this is a wide character.
- IsWide = begin[0] == 'L';
- if (IsWide) ++begin;
+ Kind = kind;
+
+ // Determine if this is a wide or UTF character.
+ if (Kind == tok::wide_char_constant || Kind == tok::utf16_char_constant ||
+ Kind == tok::utf32_char_constant) {
+ ++begin;
+ }
// Skip over the entry quote.
assert(begin[0] == '\'' && "Invalid token lexed");
@@ -730,8 +781,9 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
// Is this a Universal Character Name escape?
if (begin[0] != '\\') // If this is a normal character, consume it.
- ResultChar = *begin++;
+ ResultChar = (unsigned char)*begin++;
else { // Otherwise, this is an escape character.
+ unsigned CharWidth = getCharWidth(Kind, PP.getTargetInfo());
// Check for UCN.
if (begin[1] == 'u' || begin[1] == 'U') {
uint32_t utf32 = 0;
@@ -742,19 +794,22 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
HadError = 1;
}
ResultChar = utf32;
+ if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
+ PP.Diag(Loc, diag::warn_ucn_escape_too_large);
+ ResultChar &= ~0U >> (32-CharWidth);
+ }
} else {
// Otherwise, this is a non-UCN escape character. Process it.
ResultChar = ProcessCharEscape(begin, end, HadError,
FullSourceLoc(Loc,PP.getSourceManager()),
- IsWide,
- &PP.getDiagnostics(), PP.getTargetInfo());
+ CharWidth, &PP.getDiagnostics());
}
}
// If this is a multi-character constant (e.g. 'abc'), handle it. These are
// implementation defined (C99 6.4.4.4p10).
if (NumCharsSoFar) {
- if (IsWide) {
+ if (!isAscii()) {
// Emulate GCC's (unintentional?) behavior: L'ab' -> L'b'.
LitVal = 0;
} else {
@@ -776,8 +831,8 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
if (NumCharsSoFar > 1) {
// Warn about discarding the top bits for multi-char wide-character
// constants (L'abcd').
- if (IsWide)
- PP.Diag(Loc, diag::warn_extraneous_wide_char_constant);
+ if (!isAscii())
+ PP.Diag(Loc, diag::warn_extraneous_char_constant);
else if (NumCharsSoFar != 4)
PP.Diag(Loc, diag::ext_multichar_character_literal);
else
@@ -789,47 +844,62 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
// Transfer the value from APInt to uint64_t
Value = LitVal.getZExtValue();
- if (IsWide && PP.getLangOptions().ShortWChar && Value > 0xFFFF)
- PP.Diag(Loc, diag::warn_ucn_escape_too_large);
-
// If this is a single narrow character, sign extend it (e.g. '\xFF' is "-1")
// if 'char' is signed for this target (C99 6.4.4.4p10). Note that multiple
// character constants are not sign extended in the this implementation:
// '\xFF\xFF' = 65536 and '\x0\xFF' = 255, which matches GCC.
- if (!IsWide && NumCharsSoFar == 1 && (Value & 128) &&
+ if (isAscii() && NumCharsSoFar == 1 && (Value & 128) &&
PP.getLangOptions().CharIsSigned)
Value = (signed char)Value;
}
-/// string-literal: [C99 6.4.5]
-/// " [s-char-sequence] "
-/// L" [s-char-sequence] "
+/// string-literal: [C++0x lex.string]
+/// encoding-prefix " [s-char-sequence] "
+/// encoding-prefix R raw-string
+/// encoding-prefix:
+/// u8
+/// u
+/// U
+/// L
/// s-char-sequence:
/// s-char
/// s-char-sequence s-char
/// s-char:
-/// any source character except the double quote ",
-/// backslash \, or newline character
-/// escape-character
-/// universal-character-name
-/// escape-character: [C99 6.4.4.4]
-/// \ escape-code
+/// any member of the source character set except the double-quote ",
+/// backslash \, or new-line character
+/// escape-sequence
/// universal-character-name
-/// escape-code:
-/// character-escape-code
-/// octal-escape-code
-/// hex-escape-code
-/// character-escape-code: one of
-/// n t b r f v a
-/// \ ' " ?
-/// octal-escape-code:
-/// octal-digit
-/// octal-digit octal-digit
-/// octal-digit octal-digit octal-digit
-/// hex-escape-code:
-/// x hex-digit
-/// hex-escape-code hex-digit
+/// raw-string:
+/// " d-char-sequence ( r-char-sequence ) d-char-sequence "
+/// r-char-sequence:
+/// r-char
+/// r-char-sequence r-char
+/// r-char:
+/// any member of the source character set, except a right parenthesis )
+/// followed by the initial d-char-sequence (which may be empty)
+/// followed by a double quote ".
+/// d-char-sequence:
+/// d-char
+/// d-char-sequence d-char
+/// d-char:
+/// any member of the basic source character set except:
+/// space, the left parenthesis (, the right parenthesis ),
+/// the backslash \, and the control characters representing horizontal
+/// tab, vertical tab, form feed, and newline.
+/// escape-sequence: [C++0x lex.ccon]
+/// simple-escape-sequence
+/// octal-escape-sequence
+/// hexadecimal-escape-sequence
+/// simple-escape-sequence:
+/// one of \' \" \? \\ \a \b \f \n \r \t \v
+/// octal-escape-sequence:
+/// \ octal-digit
+/// \ octal-digit octal-digit
+/// \ octal-digit octal-digit octal-digit
+/// hexadecimal-escape-sequence:
+/// \x hexadecimal-digit
+/// hexadecimal-escape-sequence hexadecimal-digit
/// universal-character-name:
/// \u hex-quad
/// \U hex-quad hex-quad
@@ -841,8 +911,8 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
Preprocessor &PP, bool Complain)
: SM(PP.getSourceManager()), Features(PP.getLangOptions()),
Target(PP.getTargetInfo()), Diags(Complain ? &PP.getDiagnostics() : 0),
- MaxTokenLength(0), SizeBound(0), wchar_tByteWidth(0),
- ResultPtr(ResultBuf.data()), hadError(false), AnyWide(false), Pascal(false) {
+ MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
+ ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
init(StringToks, NumStringToks);
}
@@ -862,7 +932,7 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
MaxTokenLength = StringToks[0].getLength();
assert(StringToks[0].getLength() >= 2 && "literal token is invalid!");
SizeBound = StringToks[0].getLength()-2; // -2 for "".
- AnyWide = StringToks[0].is(tok::wide_string_literal);
+ Kind = StringToks[0].getKind();
hadError = false;
@@ -883,8 +953,18 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
if (StringToks[i].getLength() > MaxTokenLength)
MaxTokenLength = StringToks[i].getLength();
- // Remember if we see any wide strings.
- AnyWide |= StringToks[i].is(tok::wide_string_literal);
+ // Remember if we see any wide or utf-8/16/32 strings.
+ // Also check for illegal concatenations.
+ if (StringToks[i].isNot(Kind) && StringToks[i].isNot(tok::string_literal)) {
+ if (isAscii()) {
+ Kind = StringToks[i].getKind();
+ } else {
+ if (Diags)
+ Diags->Report(FullSourceLoc(StringToks[i].getLocation(), SM),
+ diag::err_unsupported_string_concat);
+ hadError = true;
+ }
+ }
}
// Include space for the null terminator.
@@ -892,19 +972,14 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
// TODO: K&R warning: "traditional C rejects string constant concatenation"
- // Get the width in bytes of wchar_t. If no wchar_t strings are used, do not
- // query the target. As such, wchar_tByteWidth is only valid if AnyWide=true.
- wchar_tByteWidth = ~0U;
- if (AnyWide) {
- wchar_tByteWidth = Target.getWCharWidth();
- assert((wchar_tByteWidth & 7) == 0 && "Assumes wchar_t is byte multiple!");
- wchar_tByteWidth /= 8;
- }
+ // Get the width in bytes of char/wchar_t/char16_t/char32_t
+ CharByteWidth = getCharWidth(Kind, Target);
+ assert((CharByteWidth & 7) == 0 && "Assumes character size is byte multiple");
+ CharByteWidth /= 8;
// The output buffer size needs to be large enough to hold wide characters.
// This is a worst-case assumption which basically corresponds to L"" "long".
- if (AnyWide)
- SizeBound *= wchar_tByteWidth;
+ SizeBound *= CharByteWidth;
// Size the temporary buffer to hold the result string data.
ResultBuf.resize(SizeBound);
@@ -929,78 +1004,82 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
Lexer::getSpelling(StringToks[i], ThisTokBuf, SM, Features,
&StringInvalid);
if (StringInvalid) {
- hadError = 1;
+ hadError = true;
continue;
}
const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1; // Skip end quote.
- bool wide = false;
// TODO: Input character set mapping support.
- // Skip L marker for wide strings.
- if (ThisTokBuf[0] == 'L') {
- wide = true;
+ // Skip marker for wide or unicode strings.
+ if (ThisTokBuf[0] == 'L' || ThisTokBuf[0] == 'u' || ThisTokBuf[0] == 'U') {
++ThisTokBuf;
+ // Skip 8 of u8 marker for utf8 strings.
+ if (ThisTokBuf[0] == '8')
+ ++ThisTokBuf;
}
- assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
- ++ThisTokBuf;
-
- // Check if this is a pascal string
- if (Features.PascalStrings && ThisTokBuf + 1 != ThisTokEnd &&
- ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') {
+ // Check for raw string
+ if (ThisTokBuf[0] == 'R') {
+ ThisTokBuf += 2; // skip R"
- // If the \p sequence is found in the first token, we have a pascal string
- // Otherwise, if we already have a pascal string, ignore the first \p
- if (i == 0) {
+ const char *Prefix = ThisTokBuf;
+ while (ThisTokBuf[0] != '(')
++ThisTokBuf;
- Pascal = true;
- } else if (Pascal)
- ThisTokBuf += 2;
- }
+ ++ThisTokBuf; // skip '('
+
+ // remove same number of characters from the end
+ if (ThisTokEnd >= ThisTokBuf + (ThisTokBuf - Prefix))
+ ThisTokEnd -= (ThisTokBuf - Prefix);
+
+ // Copy the string over
+ CopyStringFragment(StringRef(ThisTokBuf, ThisTokEnd - ThisTokBuf));
+ } else {
+ assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
+ ++ThisTokBuf; // skip "
+
+ // Check if this is a pascal string
+ if (Features.PascalStrings && ThisTokBuf + 1 != ThisTokEnd &&
+ ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') {
- while (ThisTokBuf != ThisTokEnd) {
- // Is this a span of non-escape characters?
- if (ThisTokBuf[0] != '\\') {
- const char *InStart = ThisTokBuf;
- do {
+ // If the \p sequence is found in the first token, we have a pascal string
+ // Otherwise, if we already have a pascal string, ignore the first \p
+ if (i == 0) {
++ThisTokBuf;
- } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
-
- // Copy the character span over.
- unsigned Len = ThisTokBuf-InStart;
- if (!AnyWide) {
- memcpy(ResultPtr, InStart, Len);
- ResultPtr += Len;
- } else {
- // Note: our internal rep of wide char tokens is always little-endian.
- for (; Len; --Len, ++InStart) {
- *ResultPtr++ = InStart[0];
- // Add zeros at the end.
- for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
- *ResultPtr++ = 0;
- }
- }
- continue;
- }
- // Is this a Universal Character Name escape?
- if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') {
- EncodeUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr,
- hadError, FullSourceLoc(StringToks[i].getLocation(),SM),
- wide, Diags, Features);
- continue;
+ Pascal = true;
+ } else if (Pascal)
+ ThisTokBuf += 2;
}
- // Otherwise, this is a non-UCN escape character. Process it.
- unsigned ResultChar =
- ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError,
- FullSourceLoc(StringToks[i].getLocation(), SM),
- AnyWide, Diags, Target);
- // Note: our internal rep of wide char tokens is always little-endian.
- *ResultPtr++ = ResultChar & 0xFF;
+ while (ThisTokBuf != ThisTokEnd) {
+ // Is this a span of non-escape characters?
+ if (ThisTokBuf[0] != '\\') {
+ const char *InStart = ThisTokBuf;
+ do {
+ ++ThisTokBuf;
+ } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
+
+ // Copy the character span over.
+ CopyStringFragment(StringRef(InStart, ThisTokBuf - InStart));
+ continue;
+ }
+ // Is this a Universal Character Name escape?
+ if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') {
+ EncodeUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr,
+ hadError, FullSourceLoc(StringToks[i].getLocation(),SM),
+ CharByteWidth, Diags, Features);
+ continue;
+ }
+ // Otherwise, this is a non-UCN escape character. Process it.
+ unsigned ResultChar =
+ ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError,
+ FullSourceLoc(StringToks[i].getLocation(), SM),
+ CharByteWidth*8, Diags);
+
+ // Note: our internal rep of wide char tokens is always little-endian.
+ *ResultPtr++ = ResultChar & 0xFF;
- if (AnyWide) {
- for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
+ for (unsigned i = 1, e = CharByteWidth; i != e; ++i)
*ResultPtr++ = ResultChar >> i*8;
}
}
@@ -1008,8 +1087,7 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
if (Pascal) {
ResultBuf[0] = ResultPtr-&ResultBuf[0]-1;
- if (AnyWide)
- ResultBuf[0] /= wchar_tByteWidth;
+ ResultBuf[0] /= CharByteWidth;
// Verify that pascal strings aren't too large.
if (GetStringLength() > 256) {
@@ -1018,7 +1096,7 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
diag::err_pascal_string_too_long)
<< SourceRange(StringToks[0].getLocation(),
StringToks[NumStringToks-1].getLocation());
- hadError = 1;
+ hadError = true;
return;
}
} else if (Diags) {
@@ -1036,6 +1114,25 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
}
+/// copyStringFragment - This function copies from Start to End into ResultPtr.
+/// Performs widening for multi-byte characters.
+void StringLiteralParser::CopyStringFragment(StringRef Fragment) {
+ // Copy the character span over.
+ if (CharByteWidth == 1) {
+ memcpy(ResultPtr, Fragment.data(), Fragment.size());
+ ResultPtr += Fragment.size();
+ } else {
+ // Note: our internal rep of wide char tokens is always little-endian.
+ for (StringRef::iterator I=Fragment.begin(), E=Fragment.end(); I!=E; ++I) {
+ *ResultPtr++ = *I;
+ // Add zeros at the end.
+ for (unsigned i = 1, e = CharByteWidth; i != e; ++i)
+ *ResultPtr++ = 0;
+ }
+ }
+}
+
+
/// getOffsetOfStringByte - This function returns the offset of the
/// specified byte of the string data represented by Token. This handles
/// advancing over escape sequences in the string.
@@ -1052,7 +1149,8 @@ unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok,
if (StringInvalid)
return 0;
- assert(SpellingPtr[0] != 'L' && "Doesn't handle wide strings yet");
+ assert(SpellingPtr[0] != 'L' && SpellingPtr[0] != 'u' &&
+ SpellingPtr[0] != 'U' && "Doesn't handle wide or utf strings yet");
const char *SpellingStart = SpellingPtr;
@@ -1077,7 +1175,7 @@ unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok,
bool HadError = false;
ProcessCharEscape(SpellingPtr, SpellingEnd, HadError,
FullSourceLoc(Tok.getLocation(), SM),
- false, Diags, Target);
+ CharByteWidth*8, Diags);
assert(!HadError && "This method isn't valid on erroneous strings");
--ByteNo;
}
diff --git a/lib/Lex/MacroArgs.cpp b/lib/Lex/MacroArgs.cpp
index 968c15e3c27b..1846d1c05e30 100644
--- a/lib/Lex/MacroArgs.cpp
+++ b/lib/Lex/MacroArgs.cpp
@@ -15,13 +15,15 @@
#include "clang/Lex/MacroInfo.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Lex/LexDiagnostic.h"
+
+#include <algorithm>
+
using namespace clang;
/// MacroArgs ctor function - This destroys the vector passed in.
MacroArgs *MacroArgs::create(const MacroInfo *MI,
- const Token *UnexpArgTokens,
- unsigned NumToks, bool VarargsElided,
- Preprocessor &PP) {
+ llvm::ArrayRef<Token> UnexpArgTokens,
+ bool VarargsElided, Preprocessor &PP) {
assert(MI->isFunctionLike() &&
"Can't have args for an object-like macro!");
MacroArgs **ResultEnt = 0;
@@ -31,12 +33,12 @@ MacroArgs *MacroArgs::create(const MacroInfo *MI,
// free list. If so, reuse it.
for (MacroArgs **Entry = &PP.MacroArgCache; *Entry;
Entry = &(*Entry)->ArgCache)
- if ((*Entry)->NumUnexpArgTokens >= NumToks &&
+ if ((*Entry)->NumUnexpArgTokens >= UnexpArgTokens.size() &&
(*Entry)->NumUnexpArgTokens < ClosestMatch) {
ResultEnt = Entry;
// If we have an exact match, use it.
- if ((*Entry)->NumUnexpArgTokens == NumToks)
+ if ((*Entry)->NumUnexpArgTokens == UnexpArgTokens.size())
break;
// Otherwise, use the best fit.
ClosestMatch = (*Entry)->NumUnexpArgTokens;
@@ -45,21 +47,22 @@ MacroArgs *MacroArgs::create(const MacroInfo *MI,
MacroArgs *Result;
if (ResultEnt == 0) {
// Allocate memory for a MacroArgs object with the lexer tokens at the end.
- Result = (MacroArgs*)malloc(sizeof(MacroArgs) + NumToks*sizeof(Token));
+ Result = (MacroArgs*)malloc(sizeof(MacroArgs) +
+ UnexpArgTokens.size() * sizeof(Token));
// Construct the MacroArgs object.
- new (Result) MacroArgs(NumToks, VarargsElided);
+ new (Result) MacroArgs(UnexpArgTokens.size(), VarargsElided);
} else {
Result = *ResultEnt;
// Unlink this node from the preprocessors singly linked list.
*ResultEnt = Result->ArgCache;
- Result->NumUnexpArgTokens = NumToks;
+ Result->NumUnexpArgTokens = UnexpArgTokens.size();
Result->VarargsElided = VarargsElided;
}
// Copy the actual unexpanded tokens to immediately after the result ptr.
- if (NumToks)
- memcpy(const_cast<Token*>(Result->getUnexpArgument(0)),
- UnexpArgTokens, NumToks*sizeof(Token));
+ if (!UnexpArgTokens.empty())
+ std::copy(UnexpArgTokens.begin(), UnexpArgTokens.end(),
+ const_cast<Token*>(Result->getUnexpArgument(0)));
return Result;
}
@@ -186,7 +189,8 @@ MacroArgs::getPreExpArgument(unsigned Arg, const MacroInfo *MI,
///
Token MacroArgs::StringifyArgument(const Token *ArgToks,
Preprocessor &PP, bool Charify,
- SourceLocation hashInstLoc) {
+ SourceLocation ExpansionLocStart,
+ SourceLocation ExpansionLocEnd) {
Token Tok;
Tok.startToken();
Tok.setKind(Charify ? tok::char_constant : tok::string_literal);
@@ -208,13 +212,21 @@ Token MacroArgs::StringifyArgument(const Token *ArgToks,
// by 6.10.3.2p2.
if (Tok.is(tok::string_literal) || // "foo"
Tok.is(tok::wide_string_literal) || // L"foo"
- Tok.is(tok::char_constant)) { // 'x' and L'x'.
+ Tok.is(tok::utf8_string_literal) || // u8"foo"
+ Tok.is(tok::utf16_string_literal) || // u"foo"
+ Tok.is(tok::utf32_string_literal) || // U"foo"
+ Tok.is(tok::char_constant) || // 'x'
+ Tok.is(tok::wide_char_constant) || // L'x'.
+ Tok.is(tok::utf16_char_constant) || // u'x'.
+ Tok.is(tok::utf32_char_constant)) { // U'x'.
bool Invalid = false;
std::string TokStr = PP.getSpelling(Tok, &Invalid);
if (!Invalid) {
std::string Str = Lexer::Stringify(TokStr);
Result.append(Str.begin(), Str.end());
}
+ } else if (Tok.is(tok::code_completion)) {
+ PP.CodeCompleteNaturalLanguage();
} else {
// Otherwise, just append the token. Do some gymnastics to get the token
// in place and avoid copies where possible.
@@ -274,7 +286,8 @@ Token MacroArgs::StringifyArgument(const Token *ArgToks,
}
}
- PP.CreateString(&Result[0], Result.size(), Tok, hashInstLoc);
+ PP.CreateString(&Result[0], Result.size(), Tok,
+ ExpansionLocStart, ExpansionLocEnd);
return Tok;
}
@@ -282,7 +295,8 @@ Token MacroArgs::StringifyArgument(const Token *ArgToks,
/// that has been 'stringified' as required by the # operator.
const Token &MacroArgs::getStringifiedArgument(unsigned ArgNo,
Preprocessor &PP,
- SourceLocation hashInstLoc) {
+ SourceLocation ExpansionLocStart,
+ SourceLocation ExpansionLocEnd) {
assert(ArgNo < NumUnexpArgTokens && "Invalid argument number!");
if (StringifiedArgs.empty()) {
StringifiedArgs.resize(getNumArguments());
@@ -291,6 +305,8 @@ const Token &MacroArgs::getStringifiedArgument(unsigned ArgNo,
}
if (StringifiedArgs[ArgNo].isNot(tok::string_literal))
StringifiedArgs[ArgNo] = StringifyArgument(getUnexpArgument(ArgNo), PP,
- /*Charify=*/false, hashInstLoc);
+ /*Charify=*/false,
+ ExpansionLocStart,
+ ExpansionLocEnd);
return StringifiedArgs[ArgNo];
}
diff --git a/lib/Lex/MacroArgs.h b/lib/Lex/MacroArgs.h
index a962dacf7c93..cf86d710adb7 100644
--- a/lib/Lex/MacroArgs.h
+++ b/lib/Lex/MacroArgs.h
@@ -14,6 +14,8 @@
#ifndef LLVM_CLANG_MACROARGS_H
#define LLVM_CLANG_MACROARGS_H
+#include "llvm/ADT/ArrayRef.h"
+
#include <vector>
namespace clang {
@@ -58,9 +60,8 @@ public:
/// MacroArgs ctor function - Create a new MacroArgs object with the specified
/// macro and argument info.
static MacroArgs *create(const MacroInfo *MI,
- const Token *UnexpArgTokens,
- unsigned NumArgTokens, bool VarargsElided,
- Preprocessor &PP);
+ llvm::ArrayRef<Token> UnexpArgTokens,
+ bool VarargsElided, Preprocessor &PP);
/// destroy - Destroy and deallocate the memory for this object.
///
@@ -88,7 +89,8 @@ public:
/// getStringifiedArgument - Compute, cache, and return the specified argument
/// that has been 'stringified' as required by the # operator.
const Token &getStringifiedArgument(unsigned ArgNo, Preprocessor &PP,
- SourceLocation hashInstLoc);
+ SourceLocation ExpansionLocStart,
+ SourceLocation ExpansionLocEnd);
/// getNumArguments - Return the number of arguments passed into this macro
/// invocation.
@@ -109,7 +111,8 @@ public:
///
static Token StringifyArgument(const Token *ArgToks,
Preprocessor &PP, bool Charify,
- SourceLocation hashInstLoc);
+ SourceLocation ExpansionLocStart,
+ SourceLocation ExpansionLocEnd);
/// deallocate - This should only be called by the Preprocessor when managing
diff --git a/lib/Lex/MacroInfo.cpp b/lib/Lex/MacroInfo.cpp
index 0a16a2567219..5a7af5639830 100644
--- a/lib/Lex/MacroInfo.cpp
+++ b/lib/Lex/MacroInfo.cpp
@@ -21,6 +21,7 @@ MacroInfo::MacroInfo(SourceLocation DefLoc) : Location(DefLoc) {
IsGNUVarargs = false;
IsBuiltinMacro = false;
IsFromAST = false;
+ ChangedAfterLoad = false;
IsDisabled = false;
IsUsed = false;
IsAllowRedefinitionsWithoutWarning = false;
@@ -40,6 +41,7 @@ MacroInfo::MacroInfo(const MacroInfo &MI, llvm::BumpPtrAllocator &PPAllocator) {
IsGNUVarargs = MI.IsGNUVarargs;
IsBuiltinMacro = MI.IsBuiltinMacro;
IsFromAST = MI.IsFromAST;
+ ChangedAfterLoad = MI.ChangedAfterLoad;
IsDisabled = MI.IsDisabled;
IsUsed = MI.IsUsed;
IsAllowRedefinitionsWithoutWarning = MI.IsAllowRedefinitionsWithoutWarning;
@@ -68,9 +70,9 @@ unsigned MacroInfo::getDefinitionLengthSlow(SourceManager &SM) const {
assert((macroEnd.isFileID() || lastToken.is(tok::comment)) &&
"Macro defined in macro?");
std::pair<FileID, unsigned>
- startInfo = SM.getDecomposedInstantiationLoc(macroStart);
+ startInfo = SM.getDecomposedExpansionLoc(macroStart);
std::pair<FileID, unsigned>
- endInfo = SM.getDecomposedInstantiationLoc(macroEnd);
+ endInfo = SM.getDecomposedExpansionLoc(macroEnd);
assert(startInfo.first == endInfo.first &&
"Macro definition spanning multiple FileIDs ?");
assert(startInfo.second <= endInfo.second);
diff --git a/lib/Lex/PPCaching.cpp b/lib/Lex/PPCaching.cpp
index 33106591c3ba..986341b98668 100644
--- a/lib/Lex/PPCaching.cpp
+++ b/lib/Lex/PPCaching.cpp
@@ -74,6 +74,8 @@ void Preprocessor::EnterCachingLexMode() {
return;
PushIncludeMacroStack();
+ if (CurLexerKind != CLK_LexAfterModuleImport)
+ CurLexerKind = CLK_CachingLexer;
}
diff --git a/lib/Lex/PPDirectives.cpp b/lib/Lex/PPDirectives.cpp
index 4af5fabe5c80..de50c750e4d6 100644
--- a/lib/Lex/PPDirectives.cpp
+++ b/lib/Lex/PPDirectives.cpp
@@ -17,6 +17,7 @@
#include "clang/Lex/MacroInfo.h"
#include "clang/Lex/LexDiagnostic.h"
#include "clang/Lex/CodeCompletionHandler.h"
+#include "clang/Lex/ModuleLoader.h"
#include "clang/Lex/Pragma.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/SourceManager.h"
@@ -102,8 +103,8 @@ void Preprocessor::ReadMacroName(Token &MacroNameTok, char isDefineUndef) {
if (MacroNameTok.is(tok::code_completion)) {
if (CodeComplete)
CodeComplete->CodeCompleteMacroName(isDefineUndef == 1);
+ setCodeCompletionReached();
LexUnexpandedToken(MacroNameTok);
- return;
}
// Missing macro name?
@@ -192,7 +193,8 @@ void Preprocessor::CheckEndOfDirective(const char *DirType, bool EnableMacros) {
/// the first valid token.
void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
bool FoundNonSkipPortion,
- bool FoundElse) {
+ bool FoundElse,
+ SourceLocation ElseLoc) {
++NumSkipped;
assert(CurTokenLexer == 0 && CurPPLexer && "Lexing a macro, not a file?");
@@ -214,6 +216,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
if (Tok.is(tok::code_completion)) {
if (CodeComplete)
CodeComplete->CodeCompleteInConditionalExclusion();
+ setCodeCompletionReached();
continue;
}
@@ -222,7 +225,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
// Emit errors for each unterminated conditional on the stack, including
// the current one.
while (!CurPPLexer->ConditionalStack.empty()) {
- if (!isCodeCompletionFile(Tok.getLocation()))
+ if (CurLexer->getFileLoc() != CodeCompletionFileLoc)
Diag(CurPPLexer->ConditionalStack.back().IfLoc,
diag::err_pp_unterminated_conditional);
CurPPLexer->ConditionalStack.pop_back();
@@ -275,9 +278,9 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
// that we can't use Tok.getIdentifierInfo() because its lookup is disabled
// when skipping.
char DirectiveBuf[20];
- llvm::StringRef Directive;
+ StringRef Directive;
if (!Tok.needsCleaning() && Tok.getLength() < 20) {
- Directive = llvm::StringRef(RawCharData, Tok.getLength());
+ Directive = StringRef(RawCharData, Tok.getLength());
} else {
std::string DirectiveStr = getSpelling(Tok);
unsigned IdLen = DirectiveStr.size();
@@ -288,11 +291,11 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
continue;
}
memcpy(DirectiveBuf, &DirectiveStr[0], IdLen);
- Directive = llvm::StringRef(DirectiveBuf, IdLen);
+ Directive = StringRef(DirectiveBuf, IdLen);
}
if (Directive.startswith("if")) {
- llvm::StringRef Sub = Directive.substr(2);
+ StringRef Sub = Directive.substr(2);
if (Sub.empty() || // "if"
Sub == "def" || // "ifdef"
Sub == "ndef") { // "ifndef"
@@ -307,7 +310,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
Callbacks->Endif();
}
} else if (Directive[0] == 'e') {
- llvm::StringRef Sub = Directive.substr(1);
+ StringRef Sub = Directive.substr(1);
if (Sub == "ndif") { // "endif"
CheckEndOfDirective("endif");
PPConditionalInfo CondInfo;
@@ -387,6 +390,11 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
// of the file, just stop skipping and return to lexing whatever came after
// the #if block.
CurPPLexer->LexingRawMode = false;
+
+ if (Callbacks) {
+ SourceLocation BeginLoc = ElseLoc.isValid() ? ElseLoc : IfTokenLoc;
+ Callbacks->SourceRangeSkipped(SourceRange(BeginLoc, Tok.getLocation()));
+ }
}
void Preprocessor::PTHSkipExcludedConditionalBlock() {
@@ -472,12 +480,13 @@ void Preprocessor::PTHSkipExcludedConditionalBlock() {
/// return null on failure. isAngled indicates whether the file reference is
/// for system #include's or not (i.e. using <> instead of "").
const FileEntry *Preprocessor::LookupFile(
- llvm::StringRef Filename,
+ StringRef Filename,
bool isAngled,
const DirectoryLookup *FromDir,
const DirectoryLookup *&CurDir,
- llvm::SmallVectorImpl<char> *SearchPath,
- llvm::SmallVectorImpl<char> *RelativePath) {
+ SmallVectorImpl<char> *SearchPath,
+ SmallVectorImpl<char> *RelativePath,
+ StringRef *SuggestedModule) {
// If the header lookup mechanism may be relative to the current file, pass in
// info about where the current file is.
const FileEntry *CurFileEnt = 0;
@@ -501,12 +510,13 @@ const FileEntry *Preprocessor::LookupFile(
CurDir = CurDirLookup;
const FileEntry *FE = HeaderInfo.LookupFile(
Filename, isAngled, FromDir, CurDir, CurFileEnt,
- SearchPath, RelativePath);
+ SearchPath, RelativePath, SuggestedModule);
if (FE) return FE;
// Otherwise, see if this is a subframework header. If so, this is relative
// to one of the headers on the #include stack. Walk the list of the current
// headers on the #include stack and pass them to HeaderInfo.
+ // FIXME: SuggestedModule!
if (IsFileLexer()) {
if ((CurFileEnt = SourceMgr.getFileEntryForID(CurPPLexer->getFileID())))
if ((FE = HeaderInfo.LookupSubframeworkHeader(Filename, CurFileEnt,
@@ -581,6 +591,7 @@ TryAgain:
if (CodeComplete)
CodeComplete->CodeCompleteDirective(
CurPPLexer->getConditionalStackDepth() > 0);
+ setCodeCompletionReached();
return;
case tok::numeric_constant: // # 7 GNU line marker directive.
if (getLangOptions().AsmPreprocessor)
@@ -652,6 +663,9 @@ TryAgain:
case tok::pp_unassert:
//isExtension = true; // FIXME: implement #unassert
break;
+
+ case tok::pp___export_macro__:
+ return HandleMacroExportDirective(Result);
}
break;
}
@@ -758,9 +772,13 @@ void Preprocessor::HandleLineDirective(Token &Tok) {
// Enforce C99 6.10.4p3: "The digit sequence shall not specify ... a
// number greater than 2147483647". C90 requires that the line # be <= 32767.
- unsigned LineLimit = Features.C99 ? 2147483648U : 32768U;
+ unsigned LineLimit = 32768U;
+ if (Features.C99 || Features.CPlusPlus0x)
+ LineLimit = 2147483648U;
if (LineNo >= LineLimit)
Diag(DigitTok, diag::ext_pp_line_too_big) << LineLimit;
+ else if (Features.CPlusPlus0x && LineNo >= 32768U)
+ Diag(DigitTok, diag::warn_cxx98_compat_pp_line_too_big);
int FilenameID = -1;
Token StrTok;
@@ -777,7 +795,7 @@ void Preprocessor::HandleLineDirective(Token &Tok) {
} else {
// Parse and validate the string, converting it into a unique ID.
StringLiteralParser Literal(&StrTok, 1, *this);
- assert(!Literal.AnyWide && "Didn't allow wide strings in");
+ assert(Literal.isAscii() && "Didn't allow wide strings in");
if (Literal.hadError)
return DiscardUntilEndOfDirective();
if (Literal.Pascal) {
@@ -825,7 +843,7 @@ static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit,
// If we are leaving the current presumed file, check to make sure the
// presumed include stack isn't empty!
FileID CurFileID =
- SM.getDecomposedInstantiationLoc(FlagTok.getLocation()).first;
+ SM.getDecomposedExpansionLoc(FlagTok.getLocation()).first;
PresumedLoc PLoc = SM.getPresumedLoc(FlagTok.getLocation());
if (PLoc.isInvalid())
return true;
@@ -834,7 +852,7 @@ static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit,
// different physical file, then we aren't in a "1" line marker flag region.
SourceLocation IncLoc = PLoc.getIncludeLoc();
if (IncLoc.isInvalid() ||
- SM.getDecomposedInstantiationLoc(IncLoc).first != CurFileID) {
+ SM.getDecomposedExpansionLoc(IncLoc).first != CurFileID) {
PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_pop);
PP.DiscardUntilEndOfDirective();
return true;
@@ -910,7 +928,7 @@ void Preprocessor::HandleDigitDirective(Token &DigitTok) {
} else {
// Parse and validate the string, converting it into a unique ID.
StringLiteralParser Literal(&StrTok, 1, *this);
- assert(!Literal.AnyWide && "Didn't allow wide strings in");
+ assert(Literal.isAscii() && "Didn't allow wide strings in");
if (Literal.hadError)
return DiscardUntilEndOfDirective();
if (Literal.Pascal) {
@@ -1000,6 +1018,37 @@ void Preprocessor::HandleIdentSCCSDirective(Token &Tok) {
}
}
+/// \brief Handle a #__export_macro__ directive.
+void Preprocessor::HandleMacroExportDirective(Token &Tok) {
+ Token MacroNameTok;
+ ReadMacroName(MacroNameTok, 2);
+
+ // Error reading macro name? If so, diagnostic already issued.
+ if (MacroNameTok.is(tok::eod))
+ return;
+
+ // Check to see if this is the last token on the #__export_macro__ line.
+ CheckEndOfDirective("__export_macro__");
+
+ // Okay, we finally have a valid identifier to undef.
+ MacroInfo *MI = getMacroInfo(MacroNameTok.getIdentifierInfo());
+
+ // If the macro is not defined, this is an error.
+ if (MI == 0) {
+ Diag(MacroNameTok, diag::err_pp_export_non_macro)
+ << MacroNameTok.getIdentifierInfo();
+ return;
+ }
+
+ // Note that this macro has now been exported.
+ MI->setExportLocation(MacroNameTok.getLocation());
+
+ // If this macro definition came from a PCH file, mark it
+ // as having changed since serialization.
+ if (MI->isFromAST())
+ MI->setChangedAfterLoad();
+}
+
//===----------------------------------------------------------------------===//
// Preprocessor Include Directive Handling.
//===----------------------------------------------------------------------===//
@@ -1011,7 +1060,7 @@ void Preprocessor::HandleIdentSCCSDirective(Token &Tok) {
/// spelling of the filename, but is also expected to handle the case when
/// this method decides to use a different buffer.
bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc,
- llvm::StringRef &Buffer) {
+ StringRef &Buffer) {
// Get the text form of the filename.
assert(!Buffer.empty() && "Can't have tokens with empty spellings!");
@@ -1020,27 +1069,27 @@ bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc,
if (Buffer[0] == '<') {
if (Buffer.back() != '>') {
Diag(Loc, diag::err_pp_expects_filename);
- Buffer = llvm::StringRef();
+ Buffer = StringRef();
return true;
}
isAngled = true;
} else if (Buffer[0] == '"') {
if (Buffer.back() != '"') {
Diag(Loc, diag::err_pp_expects_filename);
- Buffer = llvm::StringRef();
+ Buffer = StringRef();
return true;
}
isAngled = false;
} else {
Diag(Loc, diag::err_pp_expects_filename);
- Buffer = llvm::StringRef();
+ Buffer = StringRef();
return true;
}
// Diagnose #include "" as invalid.
if (Buffer.size() <= 2) {
Diag(Loc, diag::err_pp_empty_filename);
- Buffer = llvm::StringRef();
+ Buffer = StringRef();
return true;
}
@@ -1070,6 +1119,7 @@ bool Preprocessor::ConcatenateIncludeName(
// FIXME: Provide code completion for #includes.
if (CurTok.is(tok::code_completion)) {
+ setCodeCompletionReached();
Lex(CurTok);
continue;
}
@@ -1122,7 +1172,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
// Reserve a buffer to get the spelling.
llvm::SmallString<128> FilenameBuffer;
- llvm::StringRef Filename;
+ StringRef Filename;
SourceLocation End;
switch (FilenameTok.getKind()) {
@@ -1171,23 +1221,44 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
return;
}
+ // Complain about attempts to #include files in an audit pragma.
+ if (PragmaARCCFCodeAuditedLoc.isValid()) {
+ Diag(HashLoc, diag::err_pp_include_in_arc_cf_code_audited);
+ Diag(PragmaARCCFCodeAuditedLoc, diag::note_pragma_entered_here);
+
+ // Immediately leave the pragma.
+ PragmaARCCFCodeAuditedLoc = SourceLocation();
+ }
+
// Search include directories.
const DirectoryLookup *CurDir;
llvm::SmallString<1024> SearchPath;
llvm::SmallString<1024> RelativePath;
// We get the raw path only if we have 'Callbacks' to which we later pass
// the path.
+ StringRef SuggestedModule;
const FileEntry *File = LookupFile(
Filename, isAngled, LookupFrom, CurDir,
- Callbacks ? &SearchPath : NULL, Callbacks ? &RelativePath : NULL);
-
+ Callbacks ? &SearchPath : NULL, Callbacks ? &RelativePath : NULL,
+ AutoModuleImport? &SuggestedModule : 0);
+
+ // If we are supposed to import a module rather than including the header,
+ // do so now.
+ if (!SuggestedModule.empty()) {
+ TheModuleLoader.loadModule(IncludeTok.getLocation(),
+ Identifiers.get(SuggestedModule),
+ FilenameTok.getLocation());
+ return;
+ }
+
// Notify the callback object that we've seen an inclusion directive.
if (Callbacks)
Callbacks->InclusionDirective(HashLoc, IncludeTok, Filename, isAngled, File,
End, SearchPath, RelativePath);
if (File == 0) {
- Diag(FilenameTok, diag::warn_pp_file_not_found) << Filename;
+ if (!SuppressIncludeNotFoundError)
+ Diag(FilenameTok, diag::err_pp_file_not_found) << Filename;
return;
}
@@ -1284,7 +1355,7 @@ void Preprocessor::HandleIncludeMacrosDirective(SourceLocation HashLoc,
/// closing ), updating MI with what we learn. Return true if an error occurs
/// parsing the arg list.
bool Preprocessor::ReadMacroDefinitionArgList(MacroInfo *MI) {
- llvm::SmallVector<IdentifierInfo*, 32> Arguments;
+ SmallVector<IdentifierInfo*, 32> Arguments;
Token Tok;
while (1) {
@@ -1298,8 +1369,10 @@ bool Preprocessor::ReadMacroDefinitionArgList(MacroInfo *MI) {
Diag(Tok, diag::err_pp_expected_ident_in_arg_list);
return true;
case tok::ellipsis: // #define X(... -> C99 varargs
- // Warn if use of C99 feature in non-C99 mode.
- if (!Features.C99) Diag(Tok, diag::ext_variadic_macro);
+ if (!Features.C99)
+ Diag(Tok, Features.CPlusPlus0x ?
+ diag::warn_cxx98_compat_variadic_macro :
+ diag::ext_variadic_macro);
// Lex the token after the identifier.
LexUnexpandedToken(Tok);
@@ -1423,7 +1496,7 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) {
// Read the first token after the arg list for down below.
LexUnexpandedToken(Tok);
- } else if (Features.C99) {
+ } else if (Features.C99 || Features.CPlusPlus0x) {
// C99 requires whitespace between the macro definition and the body. Emit
// a diagnostic for something like "#define X+".
Diag(Tok, diag::ext_c99_whitespace_required_after_macro_name);
@@ -1564,7 +1637,7 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) {
// warn-because-unused-macro set. If it gets used it will be removed from set.
if (isInPrimaryFile() && // don't warn for include'd macros.
Diags->getDiagnosticLevel(diag::pp_macro_not_used,
- MI->getDefinitionLoc()) != Diagnostic::Ignored) {
+ MI->getDefinitionLoc()) != DiagnosticsEngine::Ignored) {
MI->setIsWarnIfUnused(true);
WarnUnusedMacroLocs.insert(MI->getDefinitionLoc());
}
@@ -1765,7 +1838,7 @@ void Preprocessor::HandleElseDirective(Token &Result) {
// Finally, skip the rest of the contents of this block.
SkipExcludedConditionalBlock(CI.IfLoc, /*Foundnonskip*/true,
- /*FoundElse*/true);
+ /*FoundElse*/true, Result.getLocation());
if (Callbacks)
Callbacks->Else();
@@ -1798,7 +1871,8 @@ void Preprocessor::HandleElifDirective(Token &ElifToken) {
// Finally, skip the rest of the contents of this block.
SkipExcludedConditionalBlock(CI.IfLoc, /*Foundnonskip*/true,
- /*FoundElse*/CI.FoundElse);
+ /*FoundElse*/CI.FoundElse,
+ ElifToken.getLocation());
if (Callbacks)
Callbacks->Elif(SourceRange(ConditionalBegin, ConditionalEnd));
diff --git a/lib/Lex/PPExpressions.cpp b/lib/Lex/PPExpressions.cpp
index 8fcfc70a7c67..20f624a0bb12 100644
--- a/lib/Lex/PPExpressions.cpp
+++ b/lib/Lex/PPExpressions.cpp
@@ -23,6 +23,7 @@
#include "clang/Basic/TargetInfo.h"
#include "clang/Lex/LexDiagnostic.h"
#include "llvm/ADT/APSInt.h"
+#include "llvm/Support/ErrorHandling.h"
using namespace clang;
namespace {
@@ -83,20 +84,21 @@ static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
Result.setBegin(PeekTok.getLocation());
// Get the next token, don't expand it.
- PP.LexUnexpandedToken(PeekTok);
+ PP.LexUnexpandedNonComment(PeekTok);
// Two options, it can either be a pp-identifier or a (.
SourceLocation LParenLoc;
if (PeekTok.is(tok::l_paren)) {
// Found a paren, remember we saw it and skip it.
LParenLoc = PeekTok.getLocation();
- PP.LexUnexpandedToken(PeekTok);
+ PP.LexUnexpandedNonComment(PeekTok);
}
if (PeekTok.is(tok::code_completion)) {
if (PP.getCodeCompletionHandler())
PP.getCodeCompletionHandler()->CodeCompleteMacroName(false);
- PP.LexUnexpandedToken(PeekTok);
+ PP.setCodeCompletionReached();
+ PP.LexUnexpandedNonComment(PeekTok);
}
// If we don't have a pp-identifier now, this is an error.
@@ -115,12 +117,16 @@ static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
PP.markMacroAsUsed(Macro);
}
- // Consume identifier.
- Result.setEnd(PeekTok.getLocation());
- PP.LexUnexpandedToken(PeekTok);
+ // Invoke the 'defined' callback.
+ if (PPCallbacks *Callbacks = PP.getPPCallbacks())
+ Callbacks->Defined(PeekTok);
// If we are in parens, ensure we have a trailing ).
if (LParenLoc.isValid()) {
+ // Consume identifier.
+ Result.setEnd(PeekTok.getLocation());
+ PP.LexUnexpandedNonComment(PeekTok);
+
if (PeekTok.isNot(tok::r_paren)) {
PP.Diag(PeekTok.getLocation(), diag::err_pp_missing_rparen) << "defined";
PP.Diag(LParenLoc, diag::note_matching) << "(";
@@ -129,6 +135,10 @@ static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
// Consume the ).
Result.setEnd(PeekTok.getLocation());
PP.LexNonComment(PeekTok);
+ } else {
+ // Consume identifier.
+ Result.setEnd(PeekTok.getLocation());
+ PP.LexNonComment(PeekTok);
}
// Success, remember that we saw defined(X).
@@ -152,7 +162,8 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
if (PeekTok.is(tok::code_completion)) {
if (PP.getCodeCompletionHandler())
PP.getCodeCompletionHandler()->CodeCompletePreprocessorExpression();
- PP.LexUnexpandedToken(PeekTok);
+ PP.setCodeCompletionReached();
+ PP.LexNonComment(PeekTok);
}
// If this token's spelling is a pp-identifier, check to see if it is
@@ -188,7 +199,7 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
case tok::numeric_constant: {
llvm::SmallString<64> IntegerBuffer;
bool NumberInvalid = false;
- llvm::StringRef Spelling = PP.getSpelling(PeekTok, IntegerBuffer,
+ StringRef Spelling = PP.getSpelling(PeekTok, IntegerBuffer,
&NumberInvalid);
if (NumberInvalid)
return true; // a diagnostic was already reported
@@ -205,9 +216,9 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
assert(Literal.isIntegerLiteral() && "Unknown ppnumber");
// long long is a C99 feature.
- if (!PP.getLangOptions().C99 && !PP.getLangOptions().CPlusPlus0x
- && Literal.isLongLong)
- PP.Diag(PeekTok, diag::ext_longlong);
+ if (!PP.getLangOptions().C99 && Literal.isLongLong)
+ PP.Diag(PeekTok, PP.getLangOptions().CPlusPlus0x ?
+ diag::warn_cxx98_compat_longlong : diag::ext_longlong);
// Parse the integer literal into Result.
if (Literal.GetIntegerValue(Result.Val)) {
@@ -236,15 +247,18 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
PP.LexNonComment(PeekTok);
return false;
}
- case tok::char_constant: { // 'x'
+ case tok::char_constant: // 'x'
+ case tok::wide_char_constant: { // L'x'
+ case tok::utf16_char_constant: // u'x'
+ case tok::utf32_char_constant: // U'x'
llvm::SmallString<32> CharBuffer;
bool CharInvalid = false;
- llvm::StringRef ThisTok = PP.getSpelling(PeekTok, CharBuffer, &CharInvalid);
+ StringRef ThisTok = PP.getSpelling(PeekTok, CharBuffer, &CharInvalid);
if (CharInvalid)
return true;
CharLiteralParser Literal(ThisTok.begin(), ThisTok.end(),
- PeekTok.getLocation(), PP);
+ PeekTok.getLocation(), PP, PeekTok.getKind());
if (Literal.hadError())
return true; // A diagnostic was already emitted.
@@ -255,6 +269,10 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
NumBits = TI.getIntWidth();
else if (Literal.isWide())
NumBits = TI.getWCharWidth();
+ else if (Literal.isUTF16())
+ NumBits = TI.getChar16Width();
+ else if (Literal.isUTF32())
+ NumBits = TI.getChar32Width();
else
NumBits = TI.getCharWidth();
@@ -262,8 +280,9 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
llvm::APSInt Val(NumBits);
// Set the value.
Val = Literal.getValue();
- // Set the signedness.
- Val.setIsUnsigned(!PP.getLangOptions().CharIsSigned);
+ // Set the signedness. UTF-16 and UTF-32 are always unsigned
+ if (!Literal.isUTF16() && !Literal.isUTF32())
+ Val.setIsUnsigned(!PP.getLangOptions().CharIsSigned);
if (Result.Val.getBitWidth() > Val.getBitWidth()) {
Result.Val = Val.extend(Result.Val.getBitWidth());
@@ -521,7 +540,7 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec,
bool Overflow = false;
switch (Operator) {
- default: assert(0 && "Unknown operator token!");
+ default: llvm_unreachable("Unknown operator token!");
case tok::percent:
if (RHS.Val != 0)
Res = LHS.Val % RHS.Val;
@@ -704,7 +723,7 @@ EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
// Peek ahead one token.
Token Tok;
- Lex(Tok);
+ LexNonComment(Tok);
// C99 6.10.1p3 - All expressions are evaluated as intmax_t or uintmax_t.
unsigned BitWidth = getTargetInfo().getIntMaxTWidth();
@@ -759,4 +778,3 @@ EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
return ResVal.Val != 0;
}
-
diff --git a/lib/Lex/PPLexerChange.cpp b/lib/Lex/PPLexerChange.cpp
index bf28199b888a..25a98ae47b6e 100644
--- a/lib/Lex/PPLexerChange.cpp
+++ b/lib/Lex/PPLexerChange.cpp
@@ -89,7 +89,14 @@ void Preprocessor::EnterSourceFile(FileID FID, const DirectoryLookup *CurDir,
<< std::string(SourceMgr.getBufferName(FileStart)) << "";
return;
}
-
+
+ if (isCodeCompletionEnabled() &&
+ SourceMgr.getFileEntryForID(FID) == CodeCompletionFile) {
+ CodeCompletionFileLoc = SourceMgr.getLocForStartOfFile(FID);
+ CodeCompletionLoc =
+ CodeCompletionFileLoc.getLocWithOffset(CodeCompletionOffset);
+ }
+
EnterSourceFileWithLexer(new Lexer(FID, InputFile, *this), CurDir);
return;
}
@@ -106,7 +113,9 @@ void Preprocessor::EnterSourceFileWithLexer(Lexer *TheLexer,
CurLexer.reset(TheLexer);
CurPPLexer = TheLexer;
CurDirLookup = CurDir;
-
+ if (CurLexerKind != CLK_LexAfterModuleImport)
+ CurLexerKind = CLK_Lexer;
+
// Notify the client, if desired, that we are in a new source file.
if (Callbacks && !CurLexer->Is_PragmaLexer) {
SrcMgr::CharacteristicKind FileType =
@@ -128,7 +137,9 @@ void Preprocessor::EnterSourceFileWithPTH(PTHLexer *PL,
CurDirLookup = CurDir;
CurPTHLexer.reset(PL);
CurPPLexer = CurPTHLexer.get();
-
+ if (CurLexerKind != CLK_LexAfterModuleImport)
+ CurLexerKind = CLK_PTHLexer;
+
// Notify the client, if desired, that we are in a new source file.
if (Callbacks) {
FileID FID = CurPPLexer->getFileID();
@@ -152,6 +163,8 @@ void Preprocessor::EnterMacro(Token &Tok, SourceLocation ILEnd,
CurTokenLexer.reset(TokenLexerCache[--NumCachedTokenLexers]);
CurTokenLexer->Init(Tok, ILEnd, Args);
}
+ if (CurLexerKind != CLK_LexAfterModuleImport)
+ CurLexerKind = CLK_TokenLexer;
}
/// EnterTokenStream - Add a "macro" context to the top of the include stack,
@@ -181,6 +194,8 @@ void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks,
CurTokenLexer.reset(TokenLexerCache[--NumCachedTokenLexers]);
CurTokenLexer->Init(Toks, NumToks, DisableMacroExpansion, OwnsTokens);
}
+ if (CurLexerKind != CLK_LexAfterModuleImport)
+ CurLexerKind = CLK_TokenLexer;
}
/// HandleEndOfFile - This callback is invoked when the lexer hits the end of
@@ -201,9 +216,50 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) {
}
}
+ // Complain about reaching an EOF within arc_cf_code_audited.
+ if (PragmaARCCFCodeAuditedLoc.isValid()) {
+ Diag(PragmaARCCFCodeAuditedLoc, diag::err_pp_eof_in_arc_cf_code_audited);
+
+ // Recover by leaving immediately.
+ PragmaARCCFCodeAuditedLoc = SourceLocation();
+ }
+
// If this is a #include'd file, pop it off the include stack and continue
// lexing the #includer file.
if (!IncludeMacroStack.empty()) {
+
+ // If we lexed the code-completion file, act as if we reached EOF.
+ if (isCodeCompletionEnabled() && CurPPLexer &&
+ SourceMgr.getLocForStartOfFile(CurPPLexer->getFileID()) ==
+ CodeCompletionFileLoc) {
+ if (CurLexer) {
+ Result.startToken();
+ CurLexer->FormTokenWithChars(Result, CurLexer->BufferEnd, tok::eof);
+ CurLexer.reset();
+ } else {
+ assert(CurPTHLexer && "Got EOF but no current lexer set!");
+ CurPTHLexer->getEOF(Result);
+ CurPTHLexer.reset();
+ }
+
+ CurPPLexer = 0;
+ return true;
+ }
+
+ if (!isEndOfMacro && CurPPLexer &&
+ SourceMgr.getIncludeLoc(CurPPLexer->getFileID()).isValid()) {
+ // Notify SourceManager to record the number of FileIDs that were created
+ // during lexing of the #include'd file.
+ unsigned NumFIDs =
+ SourceMgr.local_sloc_entry_size() -
+ CurPPLexer->getInitialNumSLocEntries() + 1/*#include'd file*/;
+ SourceMgr.setNumCreatedFIDsForFileID(CurPPLexer->getFileID(), NumFIDs);
+ }
+
+ FileID ExitedFID;
+ if (Callbacks && !isEndOfMacro && CurPPLexer)
+ ExitedFID = CurPPLexer->getFileID();
+
// We're done with the #included file.
RemoveTopOfLexerStack();
@@ -212,7 +268,7 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) {
SrcMgr::CharacteristicKind FileType =
SourceMgr.getFileCharacteristic(CurPPLexer->getSourceLocation());
Callbacks->FileChanged(CurPPLexer->getSourceLocation(),
- PPCallbacks::ExitFile, FileType);
+ PPCallbacks::ExitFile, FileType, ExitedFID);
}
// Client should lex another token.
diff --git a/lib/Lex/PPMacroExpansion.cpp b/lib/Lex/PPMacroExpansion.cpp
index ecd4d4cfc68b..e10c95c75f25 100644
--- a/lib/Lex/PPMacroExpansion.cpp
+++ b/lib/Lex/PPMacroExpansion.cpp
@@ -21,10 +21,12 @@
#include "clang/Lex/LexDiagnostic.h"
#include "clang/Lex/CodeCompletionHandler.h"
#include "clang/Lex/ExternalPreprocessorSource.h"
+#include "clang/Lex/LiteralSupport.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Config/config.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ErrorHandling.h"
#include <cstdio>
#include <ctime>
using namespace clang;
@@ -91,9 +93,10 @@ void Preprocessor::RegisterBuiltinMacros() {
Ident__has_attribute = RegisterBuiltinMacro(*this, "__has_attribute");
Ident__has_include = RegisterBuiltinMacro(*this, "__has_include");
Ident__has_include_next = RegisterBuiltinMacro(*this, "__has_include_next");
+ Ident__has_warning = RegisterBuiltinMacro(*this, "__has_warning");
// Microsoft Extensions.
- if (Features.Microsoft)
+ if (Features.MicrosoftExt)
Ident__pragma = RegisterBuiltinMacro(*this, "__pragma");
else
Ident__pragma = 0;
@@ -185,7 +188,8 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
// If this is a builtin macro, like __LINE__ or _Pragma, handle it specially.
if (MI->isBuiltinMacro()) {
- if (Callbacks) Callbacks->MacroExpands(Identifier, MI);
+ if (Callbacks) Callbacks->MacroExpands(Identifier, MI,
+ Identifier.getLocation());
ExpandBuiltinMacro(Identifier);
return false;
}
@@ -226,13 +230,14 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
// Notice that this macro has been used.
markMacroAsUsed(MI);
- if (Callbacks) Callbacks->MacroExpands(Identifier, MI);
-
- // If we started lexing a macro, enter the macro expansion body.
-
// Remember where the token is expanded.
SourceLocation ExpandLoc = Identifier.getLocation();
+ if (Callbacks) Callbacks->MacroExpands(Identifier, MI,
+ SourceRange(ExpandLoc, ExpansionEnd));
+
+ // If we started lexing a macro, enter the macro expansion body.
+
// If this macro expands to no tokens, don't bother to push it onto the
// expansion stack, only to take it right back off.
if (MI->getNumTokens() == 0) {
@@ -255,7 +260,6 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
if (HadLeadingSpace) Identifier.setFlag(Token::LeadingSpace);
}
Identifier.setFlag(Token::LeadingEmptyMacro);
- LastEmptyMacroExpansionLoc = ExpandLoc;
++NumFastMacroExpanded;
return false;
@@ -284,8 +288,8 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
// Update the tokens location to include both its expansion and physical
// locations.
SourceLocation Loc =
- SourceMgr.createInstantiationLoc(Identifier.getLocation(), ExpandLoc,
- ExpansionEnd,Identifier.getLength());
+ SourceMgr.createExpansionLoc(Identifier.getLocation(), ExpandLoc,
+ ExpansionEnd,Identifier.getLength());
Identifier.setLocation(Loc);
// If this is a disabled macro or #define X X, we must mark the result as
@@ -333,7 +337,7 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
// ArgTokens - Build up a list of tokens that make up each argument. Each
// argument is separated by an EOF token. Use a SmallVector so we can avoid
// heap allocations in the common case.
- llvm::SmallVector<Token, 64> ArgTokens;
+ SmallVector<Token, 64> ArgTokens;
unsigned NumActuals = 0;
while (Tok.isNot(tok::r_paren)) {
@@ -352,13 +356,6 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
// an argument value in a macro could expand to ',' or '(' or ')'.
LexUnexpandedToken(Tok);
- if (Tok.is(tok::code_completion)) {
- if (CodeComplete)
- CodeComplete->CodeCompleteMacroArgument(MacroName.getIdentifierInfo(),
- MI, NumActuals);
- LexUnexpandedToken(Tok);
- }
-
if (Tok.is(tok::eof) || Tok.is(tok::eod)) { // "#if f(<eof>" & "#if f(\n"
Diag(MacroName, diag::err_unterm_macro_invoc);
// Do not lose the EOF/EOD. Return it to the client.
@@ -393,7 +390,15 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
if (MacroInfo *MI = getMacroInfo(Tok.getIdentifierInfo()))
if (!MI->isEnabled())
Tok.setFlag(Token::DisableExpand);
+ } else if (Tok.is(tok::code_completion)) {
+ if (CodeComplete)
+ CodeComplete->CodeCompleteMacroArgument(MacroName.getIdentifierInfo(),
+ MI, NumActuals);
+ // Don't mark that we reached the code-completion point because the
+ // parser is going to handle the token and there will be another
+ // code-completion callback.
}
+
ArgTokens.push_back(Tok);
}
@@ -416,8 +421,10 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
// Empty arguments are standard in C99 and C++0x, and are supported as an extension in
// other modes.
- if (ArgTokens.size() == ArgTokenStart && !Features.C99 && !Features.CPlusPlus0x)
- Diag(Tok, diag::ext_empty_fnmacro_arg);
+ if (ArgTokens.size() == ArgTokenStart && !Features.C99)
+ Diag(Tok, Features.CPlusPlus0x ?
+ diag::warn_cxx98_compat_empty_fnmacro_arg :
+ diag::ext_empty_fnmacro_arg);
// Add a marker EOF token to the end of the token list for this argument.
Token EOFTok;
@@ -487,8 +494,7 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
return 0;
}
- return MacroArgs::create(MI, ArgTokens.data(), ArgTokens.size(),
- isVarargsElided, *this);
+ return MacroArgs::create(MI, ArgTokens, isVarargsElided, *this);
}
/// \brief Keeps macro expanded tokens for TokenLexers.
@@ -497,7 +503,7 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
/// going to lex in the cache and when it finishes the tokens are removed
/// from the end of the cache.
Token *Preprocessor::cacheMacroExpandedTokens(TokenLexer *tokLexer,
- llvm::ArrayRef<Token> tokens) {
+ ArrayRef<Token> tokens) {
assert(tokLexer);
if (tokens.empty())
return 0;
@@ -597,34 +603,48 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {
.Case("objc_arc", LangOpts.ObjCAutoRefCount)
.Case("objc_arc_weak", LangOpts.ObjCAutoRefCount &&
LangOpts.ObjCRuntimeHasWeak)
+ .Case("objc_fixed_enum", LangOpts.ObjC2)
+ .Case("objc_instancetype", LangOpts.ObjC2)
.Case("objc_nonfragile_abi", LangOpts.ObjCNonFragileABI)
.Case("objc_weak_class", LangOpts.ObjCNonFragileABI)
.Case("ownership_holds", true)
.Case("ownership_returns", true)
.Case("ownership_takes", true)
// C1X features
+ .Case("c_alignas", LangOpts.C1X)
.Case("c_generic_selections", LangOpts.C1X)
.Case("c_static_assert", LangOpts.C1X)
// C++0x features
.Case("cxx_access_control_sfinae", LangOpts.CPlusPlus0x)
.Case("cxx_alias_templates", LangOpts.CPlusPlus0x)
+ .Case("cxx_alignas", LangOpts.CPlusPlus0x)
.Case("cxx_attributes", LangOpts.CPlusPlus0x)
.Case("cxx_auto_type", LangOpts.CPlusPlus0x)
+ //.Case("cxx_constexpr", false);
.Case("cxx_decltype", LangOpts.CPlusPlus0x)
.Case("cxx_default_function_template_args", LangOpts.CPlusPlus0x)
.Case("cxx_delegating_constructors", LangOpts.CPlusPlus0x)
.Case("cxx_deleted_functions", LangOpts.CPlusPlus0x)
+ .Case("cxx_explicit_conversions", LangOpts.CPlusPlus0x)
+ //.Case("cxx_generalized_initializers", LangOpts.CPlusPlus0x)
+ .Case("cxx_implicit_moves", LangOpts.CPlusPlus0x)
+ //.Case("cxx_inheriting_constructors", false)
.Case("cxx_inline_namespaces", LangOpts.CPlusPlus0x)
//.Case("cxx_lambdas", false)
+ .Case("cxx_nonstatic_member_init", LangOpts.CPlusPlus0x)
.Case("cxx_noexcept", LangOpts.CPlusPlus0x)
.Case("cxx_nullptr", LangOpts.CPlusPlus0x)
.Case("cxx_override_control", LangOpts.CPlusPlus0x)
.Case("cxx_range_for", LangOpts.CPlusPlus0x)
+ //.Case("cxx_raw_string_literals", false)
.Case("cxx_reference_qualified_functions", LangOpts.CPlusPlus0x)
.Case("cxx_rvalue_references", LangOpts.CPlusPlus0x)
.Case("cxx_strong_enums", LangOpts.CPlusPlus0x)
.Case("cxx_static_assert", LangOpts.CPlusPlus0x)
.Case("cxx_trailing_return", LangOpts.CPlusPlus0x)
+ //.Case("cxx_unicode_literals", false)
+ //.Case("cxx_unrestricted_unions", false)
+ //.Case("cxx_user_literals", false)
.Case("cxx_variadic_templates", LangOpts.CPlusPlus0x)
// Type traits
.Case("has_nothrow_assign", LangOpts.CPlusPlus)
@@ -639,16 +659,31 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {
.Case("is_base_of", LangOpts.CPlusPlus)
.Case("is_class", LangOpts.CPlusPlus)
.Case("is_convertible_to", LangOpts.CPlusPlus)
- .Case("is_empty", LangOpts.CPlusPlus)
+ // __is_empty is available only if the horrible
+ // "struct __is_empty" parsing hack hasn't been needed in this
+ // translation unit. If it has, __is_empty reverts to a normal
+ // identifier and __has_feature(is_empty) evaluates false.
+ .Case("is_empty",
+ LangOpts.CPlusPlus &&
+ PP.getIdentifierInfo("__is_empty")->getTokenID()
+ != tok::identifier)
.Case("is_enum", LangOpts.CPlusPlus)
.Case("is_literal", LangOpts.CPlusPlus)
.Case("is_standard_layout", LangOpts.CPlusPlus)
- .Case("is_pod", LangOpts.CPlusPlus)
+ // __is_pod is available only if the horrible
+ // "struct __is_pod" parsing hack hasn't been needed in this
+ // translation unit. If it has, __is_pod reverts to a normal
+ // identifier and __has_feature(is_pod) evaluates false.
+ .Case("is_pod",
+ LangOpts.CPlusPlus &&
+ PP.getIdentifierInfo("__is_pod")->getTokenID()
+ != tok::identifier)
.Case("is_polymorphic", LangOpts.CPlusPlus)
.Case("is_trivial", LangOpts.CPlusPlus)
.Case("is_trivially_copyable", LangOpts.CPlusPlus)
.Case("is_union", LangOpts.CPlusPlus)
.Case("tls", PP.getTargetInfo().isTLSSupported())
+ .Case("underlying_type", LangOpts.CPlusPlus)
.Default(false);
}
@@ -661,7 +696,8 @@ static bool HasExtension(const Preprocessor &PP, const IdentifierInfo *II) {
// If the use of an extension results in an error diagnostic, extensions are
// effectively unavailable, so just return false here.
- if (PP.getDiagnostics().getExtensionHandlingBehavior()==Diagnostic::Ext_Error)
+ if (PP.getDiagnostics().getExtensionHandlingBehavior() ==
+ DiagnosticsEngine::Ext_Error)
return false;
const LangOptions &LangOpts = PP.getLangOptions();
@@ -670,12 +706,16 @@ static bool HasExtension(const Preprocessor &PP, const IdentifierInfo *II) {
// must be less restrictive than HasFeature's.
return llvm::StringSwitch<bool>(II->getName())
// C1X features supported by other languages as extensions.
+ .Case("c_alignas", true)
.Case("c_generic_selections", true)
.Case("c_static_assert", true)
// C++0x features supported by other languages as extensions.
.Case("cxx_deleted_functions", LangOpts.CPlusPlus)
+ .Case("cxx_explicit_conversions", LangOpts.CPlusPlus)
.Case("cxx_inline_namespaces", LangOpts.CPlusPlus)
+ .Case("cxx_nonstatic_member_init", LangOpts.CPlusPlus)
.Case("cxx_override_control", LangOpts.CPlusPlus)
+ .Case("cxx_range_for", LangOpts.CPlusPlus)
.Case("cxx_reference_qualified_functions", LangOpts.CPlusPlus)
.Case("cxx_rvalue_references", LangOpts.CPlusPlus)
.Default(false);
@@ -714,7 +754,7 @@ static bool EvaluateHasIncludeCommon(Token &Tok,
// Reserve a buffer to get the spelling.
llvm::SmallString<128> FilenameBuffer;
- llvm::StringRef Filename;
+ StringRef Filename;
SourceLocation EndLoc;
switch (Tok.getKind()) {
@@ -753,7 +793,7 @@ static bool EvaluateHasIncludeCommon(Token &Tok,
// Search include directories.
const DirectoryLookup *CurDir;
const FileEntry *File =
- PP.LookupFile(Filename, isAngled, LookupFrom, CurDir, NULL, NULL);
+ PP.LookupFile(Filename, isAngled, LookupFrom, CurDir, NULL, NULL, NULL);
// Get the result value. Result = true means the file exists.
bool Result = File != 0;
@@ -837,7 +877,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
// can matter for a function-like macro that expands to contain __LINE__.
// Skip down through expansion points until we find a file loc for the
// end of the expansion history.
- Loc = SourceMgr.getInstantiationRange(Loc).second;
+ Loc = SourceMgr.getExpansionRange(Loc).second;
PresumedLoc PLoc = SourceMgr.getPresumedLoc(Loc);
// __LINE__ expands to a simple numeric value.
@@ -874,18 +914,18 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
ComputeDATE_TIME(DATELoc, TIMELoc, *this);
Tok.setKind(tok::string_literal);
Tok.setLength(strlen("\"Mmm dd yyyy\""));
- Tok.setLocation(SourceMgr.createInstantiationLoc(DATELoc, Tok.getLocation(),
- Tok.getLocation(),
- Tok.getLength()));
+ Tok.setLocation(SourceMgr.createExpansionLoc(DATELoc, Tok.getLocation(),
+ Tok.getLocation(),
+ Tok.getLength()));
return;
} else if (II == Ident__TIME__) {
if (!TIMELoc.isValid())
ComputeDATE_TIME(DATELoc, TIMELoc, *this);
Tok.setKind(tok::string_literal);
Tok.setLength(strlen("\"hh:mm:ss\""));
- Tok.setLocation(SourceMgr.createInstantiationLoc(TIMELoc, Tok.getLocation(),
- Tok.getLocation(),
- Tok.getLength()));
+ Tok.setLocation(SourceMgr.createExpansionLoc(TIMELoc, Tok.getLocation(),
+ Tok.getLocation(),
+ Tok.getLength()));
return;
} else if (II == Ident__INCLUDE_LEVEL__) {
// Compute the presumed include depth of this token. This can be affected
@@ -923,7 +963,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
Result = "??? ??? ?? ??:??:?? ????\n";
}
// Surround the string with " and strip the trailing newline.
- OS << '"' << llvm::StringRef(Result, strlen(Result)-1) << '"';
+ OS << '"' << StringRef(Result, strlen(Result)-1) << '"';
Tok.setKind(tok::string_literal);
} else if (II == Ident__COUNTER__) {
// __COUNTER__ expands to a simple numeric value.
@@ -983,10 +1023,78 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
Value = EvaluateHasIncludeNext(Tok, II, *this);
OS << (int)Value;
Tok.setKind(tok::numeric_constant);
+ } else if (II == Ident__has_warning) {
+ // The argument should be a parenthesized string literal.
+ // The argument to these builtins should be a parenthesized identifier.
+ SourceLocation StartLoc = Tok.getLocation();
+ bool IsValid = false;
+ bool Value = false;
+ // Read the '('.
+ Lex(Tok);
+ do {
+ if (Tok.is(tok::l_paren)) {
+ // Read the string.
+ Lex(Tok);
+
+ // We need at least one string literal.
+ if (!Tok.is(tok::string_literal)) {
+ StartLoc = Tok.getLocation();
+ IsValid = false;
+ // Eat tokens until ')'.
+ do Lex(Tok); while (!(Tok.is(tok::r_paren) || Tok.is(tok::eod)));
+ break;
+ }
+
+ // String concatenation allows multiple strings, which can even come
+ // from macro expansion.
+ SmallVector<Token, 4> StrToks;
+ while (Tok.is(tok::string_literal)) {
+ StrToks.push_back(Tok);
+ LexUnexpandedToken(Tok);
+ }
+
+ // Is the end a ')'?
+ if (!(IsValid = Tok.is(tok::r_paren)))
+ break;
+
+ // Concatenate and parse the strings.
+ StringLiteralParser Literal(&StrToks[0], StrToks.size(), *this);
+ assert(Literal.isAscii() && "Didn't allow wide strings in");
+ if (Literal.hadError)
+ break;
+ if (Literal.Pascal) {
+ Diag(Tok, diag::warn_pragma_diagnostic_invalid);
+ break;
+ }
+
+ StringRef WarningName(Literal.GetString());
+
+ if (WarningName.size() < 3 || WarningName[0] != '-' ||
+ WarningName[1] != 'W') {
+ Diag(StrToks[0].getLocation(), diag::warn_has_warning_invalid_option);
+ break;
+ }
+
+ // Finally, check if the warning flags maps to a diagnostic group.
+ // We construct a SmallVector here to talk to getDiagnosticIDs().
+ // Although we don't use the result, this isn't a hot path, and not
+ // worth special casing.
+ llvm::SmallVector<diag::kind, 10> Diags;
+ Value = !getDiagnostics().getDiagnosticIDs()->
+ getDiagnosticsInGroup(WarningName.substr(2), Diags);
+ }
+ } while (false);
+
+ if (!IsValid)
+ Diag(StartLoc, diag::err_warning_check_malformed);
+
+ OS << (int)Value;
+ Tok.setKind(tok::numeric_constant);
} else {
- assert(0 && "Unknown identifier!");
+ llvm_unreachable("Unknown identifier!");
}
- CreateString(OS.str().data(), OS.str().size(), Tok, Tok.getLocation());
+ CreateString(OS.str().data(), OS.str().size(), Tok,
+ Tok.getLocation(), Tok.getLocation());
}
void Preprocessor::markMacroAsUsed(MacroInfo *MI) {
diff --git a/lib/Lex/PTHLexer.cpp b/lib/Lex/PTHLexer.cpp
index e5ef0fdf20eb..e0c4cf0c16c8 100644
--- a/lib/Lex/PTHLexer.cpp
+++ b/lib/Lex/PTHLexer.cpp
@@ -73,7 +73,7 @@ LexNextToken:
Tok.setKind(TKind);
Tok.setFlag(TFlags);
assert(!LexingRawMode);
- Tok.setLocation(FileStartLoc.getFileLocWithOffset(FileOffset));
+ Tok.setLocation(FileStartLoc.getLocWithOffset(FileOffset));
Tok.setLength(Len);
// Handle identifiers.
@@ -147,7 +147,7 @@ bool PTHLexer::LexEndOfFile(Token &Result) {
// If we are in a #if directive, emit an error.
while (!ConditionalStack.empty()) {
- if (!PP->isCodeCompletionFile(FileStartLoc))
+ if (PP->getCodeCompletionFileLoc() != FileStartLoc)
PP->Diag(ConditionalStack.back().IfLoc,
diag::err_pp_unterminated_conditional);
ConditionalStack.pop_back();
@@ -297,7 +297,7 @@ SourceLocation PTHLexer::getSourceLocation() {
// NOTE: This is a virtual function; hence it is defined out-of-line.
const unsigned char *OffsetPtr = CurPtr + (DISK_TOKEN_SIZE - 4);
uint32_t Offset = ReadLE32(OffsetPtr);
- return FileStartLoc.getFileLocWithOffset(Offset);
+ return FileStartLoc.getLocWithOffset(Offset);
}
//===----------------------------------------------------------------------===//
@@ -380,7 +380,7 @@ public:
}
static unsigned ComputeHash(const internal_key_type& a) {
- return llvm::HashString(llvm::StringRef(a.first, a.second));
+ return llvm::HashString(StringRef(a.first, a.second));
}
// This hopefully will just get inlined and removed by the optimizer.
@@ -431,11 +431,12 @@ PTHManager::~PTHManager() {
free(PerIDCache);
}
-static void InvalidPTH(Diagnostic &Diags, const char *Msg) {
- Diags.Report(Diags.getCustomDiagID(Diagnostic::Error, Msg));
+static void InvalidPTH(DiagnosticsEngine &Diags, const char *Msg) {
+ Diags.Report(Diags.getCustomDiagID(DiagnosticsEngine::Error, Msg));
}
-PTHManager *PTHManager::Create(const std::string &file, Diagnostic &Diags) {
+PTHManager *PTHManager::Create(const std::string &file,
+ DiagnosticsEngine &Diags) {
// Memory map the PTH file.
llvm::OwningPtr<llvm::MemoryBuffer> File;
@@ -572,10 +573,10 @@ IdentifierInfo* PTHManager::LazilyCreateIdentifierInfo(unsigned PersistentID) {
return II;
}
-IdentifierInfo* PTHManager::get(llvm::StringRef Name) {
+IdentifierInfo* PTHManager::get(StringRef Name) {
PTHStringIdLookup& SL = *((PTHStringIdLookup*)StringIdLookup);
// Double check our assumption that the last character isn't '\0'.
- assert(Name.empty() || Name.data()[Name.size()-1] != '\0');
+ assert(Name.empty() || Name.back() != '\0');
PTHStringIdLookup::iterator I = SL.find(std::make_pair(Name.data(),
Name.size()));
if (I == SL.end()) // No identifier found?
diff --git a/lib/Lex/Pragma.cpp b/lib/Lex/Pragma.cpp
index e6b28c13317b..f6532c2175a1 100644
--- a/lib/Lex/Pragma.cpp
+++ b/lib/Lex/Pragma.cpp
@@ -54,11 +54,11 @@ PragmaNamespace::~PragmaNamespace() {
/// specified name. If not, return the handler for the null identifier if it
/// exists, otherwise return null. If IgnoreNull is true (the default) then
/// the null handler isn't returned on failure to match.
-PragmaHandler *PragmaNamespace::FindHandler(llvm::StringRef Name,
+PragmaHandler *PragmaNamespace::FindHandler(StringRef Name,
bool IgnoreNull) const {
if (PragmaHandler *Handler = Handlers.lookup(Name))
return Handler;
- return IgnoreNull ? 0 : Handlers.lookup(llvm::StringRef());
+ return IgnoreNull ? 0 : Handlers.lookup(StringRef());
}
void PragmaNamespace::AddPragma(PragmaHandler *Handler) {
@@ -85,7 +85,7 @@ void PragmaNamespace::HandlePragma(Preprocessor &PP,
// Get the handler for this token. If there is no handler, ignore the pragma.
PragmaHandler *Handler
= FindHandler(Tok.getIdentifierInfo() ? Tok.getIdentifierInfo()->getName()
- : llvm::StringRef(),
+ : StringRef(),
/*IgnoreNull=*/false);
if (Handler == 0) {
PP.Diag(Tok, diag::warn_pragma_ignored);
@@ -210,7 +210,7 @@ void Preprocessor::HandleMicrosoft__pragma(Token &Tok) {
}
// Get the tokens enclosed within the __pragma(), as well as the final ')'.
- llvm::SmallVector<Token, 32> PragmaToks;
+ SmallVector<Token, 32> PragmaToks;
int NumParens = 0;
Lex(Tok);
while (Tok.isNot(tok::eof)) {
@@ -353,7 +353,7 @@ void Preprocessor::HandlePragmaDependency(Token &DependencyTok) {
// Reserve a buffer to get the spelling.
llvm::SmallString<128> FilenameBuffer;
bool Invalid = false;
- llvm::StringRef Filename = getSpelling(FilenameTok, FilenameBuffer, &Invalid);
+ StringRef Filename = getSpelling(FilenameTok, FilenameBuffer, &Invalid);
if (Invalid)
return;
@@ -366,9 +366,11 @@ void Preprocessor::HandlePragmaDependency(Token &DependencyTok) {
// Search include directories for this file.
const DirectoryLookup *CurDir;
- const FileEntry *File = LookupFile(Filename, isAngled, 0, CurDir, NULL, NULL);
+ const FileEntry *File = LookupFile(Filename, isAngled, 0, CurDir, NULL, NULL,
+ NULL);
if (File == 0) {
- Diag(FilenameTok, diag::warn_pp_file_not_found) << Filename;
+ if (!SuppressIncludeNotFoundError)
+ Diag(FilenameTok, diag::err_pp_file_not_found) << Filename;
return;
}
@@ -436,7 +438,7 @@ void Preprocessor::HandlePragmaComment(Token &Tok) {
// String concatenation allows multiple strings, which can even come from
// macro expansion.
// "foo " "bar" "Baz"
- llvm::SmallVector<Token, 4> StrToks;
+ SmallVector<Token, 4> StrToks;
while (Tok.is(tok::string_literal)) {
StrToks.push_back(Tok);
Lex(Tok);
@@ -444,7 +446,7 @@ void Preprocessor::HandlePragmaComment(Token &Tok) {
// Concatenate and parse the strings.
StringLiteralParser Literal(&StrToks[0], StrToks.size(), *this);
- assert(!Literal.AnyWide && "Didn't allow wide strings in");
+ assert(Literal.isAscii() && "Didn't allow wide strings in");
if (Literal.hadError)
return;
if (Literal.Pascal) {
@@ -512,7 +514,7 @@ void Preprocessor::HandlePragmaMessage(Token &Tok) {
// String concatenation allows multiple strings, which can even come from
// macro expansion.
// "foo " "bar" "Baz"
- llvm::SmallVector<Token, 4> StrToks;
+ SmallVector<Token, 4> StrToks;
while (Tok.is(tok::string_literal)) {
StrToks.push_back(Tok);
Lex(Tok);
@@ -520,7 +522,7 @@ void Preprocessor::HandlePragmaMessage(Token &Tok) {
// Concatenate and parse the strings.
StringLiteralParser Literal(&StrToks[0], StrToks.size(), *this);
- assert(!Literal.AnyWide && "Didn't allow wide strings in");
+ assert(Literal.isAscii() && "Didn't allow wide strings in");
if (Literal.hadError)
return;
if (Literal.Pascal) {
@@ -528,7 +530,7 @@ void Preprocessor::HandlePragmaMessage(Token &Tok) {
return;
}
- llvm::StringRef MessageString(Literal.GetString());
+ StringRef MessageString(Literal.GetString());
if (ExpectClosingParen) {
if (Tok.isNot(tok::r_paren)) {
@@ -662,7 +664,7 @@ void Preprocessor::HandlePragmaPopMacro(Token &PopMacroTok) {
/// AddPragmaHandler - Add the specified pragma handler to the preprocessor.
/// If 'Namespace' is non-null, then it is a token required to exist on the
/// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
-void Preprocessor::AddPragmaHandler(llvm::StringRef Namespace,
+void Preprocessor::AddPragmaHandler(StringRef Namespace,
PragmaHandler *Handler) {
PragmaNamespace *InsertNS = PragmaHandlers;
@@ -693,7 +695,7 @@ void Preprocessor::AddPragmaHandler(llvm::StringRef Namespace,
/// preprocessor. If \arg Namespace is non-null, then it should be the
/// namespace that \arg Handler was added to. It is an error to remove
/// a handler that has not been registered.
-void Preprocessor::RemovePragmaHandler(llvm::StringRef Namespace,
+void Preprocessor::RemovePragmaHandler(StringRef Namespace,
PragmaHandler *Handler) {
PragmaNamespace *NS = PragmaHandlers;
@@ -802,7 +804,7 @@ struct PragmaDebugHandler : public PragmaHandler {
IdentifierInfo *II = Tok.getIdentifierInfo();
if (II->isStr("assert")) {
- assert(0 && "This is an assertion!");
+ llvm_unreachable("This is an assertion!");
} else if (II->isStr("crash")) {
*(volatile int*) 0x11 = 0;
} else if (II->isStr("llvm_fatal_error")) {
@@ -889,7 +891,7 @@ public:
// String concatenation allows multiple strings, which can even come from
// macro expansion.
// "foo " "bar" "Baz"
- llvm::SmallVector<Token, 4> StrToks;
+ SmallVector<Token, 4> StrToks;
while (Tok.is(tok::string_literal)) {
StrToks.push_back(Tok);
PP.LexUnexpandedToken(Tok);
@@ -902,7 +904,7 @@ public:
// Concatenate and parse the strings.
StringLiteralParser Literal(&StrToks[0], StrToks.size(), PP);
- assert(!Literal.AnyWide && "Didn't allow wide strings in");
+ assert(Literal.isAscii() && "Didn't allow wide strings in");
if (Literal.hadError)
return;
if (Literal.Pascal) {
@@ -910,7 +912,7 @@ public:
return;
}
- llvm::StringRef WarningName(Literal.GetString());
+ StringRef WarningName(Literal.GetString());
if (WarningName.size() < 3 || WarningName[0] != '-' ||
WarningName[1] != 'W') {
@@ -1003,6 +1005,60 @@ struct PragmaSTDC_UnknownHandler : public PragmaHandler {
}
};
+/// PragmaARCCFCodeAuditedHandler -
+/// #pragma clang arc_cf_code_audited begin/end
+struct PragmaARCCFCodeAuditedHandler : public PragmaHandler {
+ PragmaARCCFCodeAuditedHandler() : PragmaHandler("arc_cf_code_audited") {}
+ virtual void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
+ Token &NameTok) {
+ SourceLocation Loc = NameTok.getLocation();
+ bool IsBegin;
+
+ Token Tok;
+
+ // Lex the 'begin' or 'end'.
+ PP.LexUnexpandedToken(Tok);
+ const IdentifierInfo *BeginEnd = Tok.getIdentifierInfo();
+ if (BeginEnd && BeginEnd->isStr("begin")) {
+ IsBegin = true;
+ } else if (BeginEnd && BeginEnd->isStr("end")) {
+ IsBegin = false;
+ } else {
+ PP.Diag(Tok.getLocation(), diag::err_pp_arc_cf_code_audited_syntax);
+ return;
+ }
+
+ // Verify that this is followed by EOD.
+ PP.LexUnexpandedToken(Tok);
+ if (Tok.isNot(tok::eod))
+ PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma";
+
+ // The start location of the active audit.
+ SourceLocation BeginLoc = PP.getPragmaARCCFCodeAuditedLoc();
+
+ // The start location we want after processing this.
+ SourceLocation NewLoc;
+
+ if (IsBegin) {
+ // Complain about attempts to re-enter an audit.
+ if (BeginLoc.isValid()) {
+ PP.Diag(Loc, diag::err_pp_double_begin_of_arc_cf_code_audited);
+ PP.Diag(BeginLoc, diag::note_pragma_entered_here);
+ }
+ NewLoc = Loc;
+ } else {
+ // Complain about attempts to leave an audit that doesn't exist.
+ if (!BeginLoc.isValid()) {
+ PP.Diag(Loc, diag::err_pp_unmatched_end_of_arc_cf_code_audited);
+ return;
+ }
+ NewLoc = SourceLocation();
+ }
+
+ PP.setPragmaARCCFCodeAuditedLoc(NewLoc);
+ }
+};
+
} // end anonymous namespace
@@ -1026,13 +1082,14 @@ void Preprocessor::RegisterBuiltinPragmas() {
AddPragmaHandler("clang", new PragmaDebugHandler());
AddPragmaHandler("clang", new PragmaDependencyHandler());
AddPragmaHandler("clang", new PragmaDiagnosticHandler("clang"));
+ AddPragmaHandler("clang", new PragmaARCCFCodeAuditedHandler());
AddPragmaHandler("STDC", new PragmaSTDC_FENV_ACCESSHandler());
AddPragmaHandler("STDC", new PragmaSTDC_CX_LIMITED_RANGEHandler());
AddPragmaHandler("STDC", new PragmaSTDC_UnknownHandler());
// MS extensions.
- if (Features.Microsoft) {
+ if (Features.MicrosoftExt) {
AddPragmaHandler(new PragmaCommentHandler());
}
}
diff --git a/lib/Lex/PreprocessingRecord.cpp b/lib/Lex/PreprocessingRecord.cpp
index 9f93ab04502a..2816609d5f8f 100644
--- a/lib/Lex/PreprocessingRecord.cpp
+++ b/lib/Lex/PreprocessingRecord.cpp
@@ -14,8 +14,8 @@
#include "clang/Lex/PreprocessingRecord.h"
#include "clang/Lex/MacroInfo.h"
#include "clang/Lex/Token.h"
-#include "clang/Basic/IdentifierTable.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Capacity.h"
using namespace clang;
@@ -24,7 +24,7 @@ ExternalPreprocessingRecordSource::~ExternalPreprocessingRecordSource() { }
InclusionDirective::InclusionDirective(PreprocessingRecord &PPRec,
InclusionKind Kind,
- llvm::StringRef FileName,
+ StringRef FileName,
bool InQuotes, const FileEntry *File,
SourceRange Range)
: PreprocessingDirective(InclusionDirectiveKind, Range),
@@ -34,116 +34,254 @@ InclusionDirective::InclusionDirective(PreprocessingRecord &PPRec,
= (char*)PPRec.Allocate(FileName.size() + 1, llvm::alignOf<char>());
memcpy(Memory, FileName.data(), FileName.size());
Memory[FileName.size()] = 0;
- this->FileName = llvm::StringRef(Memory, FileName.size());
+ this->FileName = StringRef(Memory, FileName.size());
}
-void PreprocessingRecord::MaybeLoadPreallocatedEntities() const {
- if (!ExternalSource || LoadedPreallocatedEntities)
- return;
-
- LoadedPreallocatedEntities = true;
- ExternalSource->ReadPreprocessedEntities();
+PreprocessingRecord::PreprocessingRecord(SourceManager &SM,
+ bool IncludeNestedMacroExpansions)
+ : SourceMgr(SM), IncludeNestedMacroExpansions(IncludeNestedMacroExpansions),
+ ExternalSource(0)
+{
}
-PreprocessingRecord::PreprocessingRecord(bool IncludeNestedMacroExpansions)
- : IncludeNestedMacroExpansions(IncludeNestedMacroExpansions),
- ExternalSource(0), NumPreallocatedEntities(0),
- LoadedPreallocatedEntities(false)
-{
+/// \brief Returns a pair of [Begin, End) iterators of preprocessed entities
+/// that source range \arg R encompasses.
+std::pair<PreprocessingRecord::iterator, PreprocessingRecord::iterator>
+PreprocessingRecord::getPreprocessedEntitiesInRange(SourceRange Range) {
+ if (Range.isInvalid())
+ return std::make_pair(iterator(this, 0), iterator(this, 0));
+ assert(!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(),Range.getBegin()));
+
+ std::pair<unsigned, unsigned>
+ Local = findLocalPreprocessedEntitiesInRange(Range);
+
+ // Check if range spans local entities.
+ if (!ExternalSource || SourceMgr.isLocalSourceLocation(Range.getBegin()))
+ return std::make_pair(iterator(this, Local.first),
+ iterator(this, Local.second));
+
+ std::pair<unsigned, unsigned>
+ Loaded = ExternalSource->findPreprocessedEntitiesInRange(Range);
+
+ // Check if range spans local entities.
+ if (Loaded.first == Loaded.second)
+ return std::make_pair(iterator(this, Local.first),
+ iterator(this, Local.second));
+
+ unsigned TotalLoaded = LoadedPreprocessedEntities.size();
+
+ // Check if range spans loaded entities.
+ if (Local.first == Local.second)
+ return std::make_pair(iterator(this, int(Loaded.first)-TotalLoaded),
+ iterator(this, int(Loaded.second)-TotalLoaded));
+
+ // Range spands loaded and local entities.
+ return std::make_pair(iterator(this, int(Loaded.first)-TotalLoaded),
+ iterator(this, Local.second));
}
-PreprocessingRecord::iterator
-PreprocessingRecord::begin(bool OnlyLocalEntities) {
- if (OnlyLocalEntities)
- return PreprocessedEntities.begin() + NumPreallocatedEntities;
-
- MaybeLoadPreallocatedEntities();
- return PreprocessedEntities.begin();
+std::pair<unsigned, unsigned>
+PreprocessingRecord::findLocalPreprocessedEntitiesInRange(
+ SourceRange Range) const {
+ if (Range.isInvalid())
+ return std::make_pair(0,0);
+ assert(!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(),Range.getBegin()));
+
+ unsigned Begin = findBeginLocalPreprocessedEntity(Range.getBegin());
+ unsigned End = findEndLocalPreprocessedEntity(Range.getEnd());
+ return std::make_pair(Begin, End);
}
-PreprocessingRecord::iterator PreprocessingRecord::end(bool OnlyLocalEntities) {
- if (!OnlyLocalEntities)
- MaybeLoadPreallocatedEntities();
-
- return PreprocessedEntities.end();
+namespace {
+
+template <SourceLocation (SourceRange::*getRangeLoc)() const>
+struct PPEntityComp {
+ const SourceManager &SM;
+
+ explicit PPEntityComp(const SourceManager &SM) : SM(SM) { }
+
+ bool operator()(PreprocessedEntity *L, PreprocessedEntity *R) const {
+ SourceLocation LHS = getLoc(L);
+ SourceLocation RHS = getLoc(R);
+ return SM.isBeforeInTranslationUnit(LHS, RHS);
+ }
+
+ bool operator()(PreprocessedEntity *L, SourceLocation RHS) const {
+ SourceLocation LHS = getLoc(L);
+ return SM.isBeforeInTranslationUnit(LHS, RHS);
+ }
+
+ bool operator()(SourceLocation LHS, PreprocessedEntity *R) const {
+ SourceLocation RHS = getLoc(R);
+ return SM.isBeforeInTranslationUnit(LHS, RHS);
+ }
+
+ SourceLocation getLoc(PreprocessedEntity *PPE) const {
+ SourceRange Range = PPE->getSourceRange();
+ return (Range.*getRangeLoc)();
+ }
+};
+
}
-PreprocessingRecord::const_iterator
-PreprocessingRecord::begin(bool OnlyLocalEntities) const {
- if (OnlyLocalEntities)
- return PreprocessedEntities.begin() + NumPreallocatedEntities;
-
- MaybeLoadPreallocatedEntities();
- return PreprocessedEntities.begin();
+unsigned PreprocessingRecord::findBeginLocalPreprocessedEntity(
+ SourceLocation Loc) const {
+ if (SourceMgr.isLoadedSourceLocation(Loc))
+ return 0;
+
+ size_t Count = PreprocessedEntities.size();
+ size_t Half;
+ std::vector<PreprocessedEntity *>::const_iterator
+ First = PreprocessedEntities.begin();
+ std::vector<PreprocessedEntity *>::const_iterator I;
+
+ // Do a binary search manually instead of using std::lower_bound because
+ // The end locations of entities may be unordered (when a macro expansion
+ // is inside another macro argument), but for this case it is not important
+ // whether we get the first macro expansion or its containing macro.
+ while (Count > 0) {
+ Half = Count/2;
+ I = First;
+ std::advance(I, Half);
+ if (SourceMgr.isBeforeInTranslationUnit((*I)->getSourceRange().getEnd(),
+ Loc)){
+ First = I;
+ ++First;
+ Count = Count - Half - 1;
+ } else
+ Count = Half;
+ }
+
+ return First - PreprocessedEntities.begin();
}
-PreprocessingRecord::const_iterator
-PreprocessingRecord::end(bool OnlyLocalEntities) const {
- if (!OnlyLocalEntities)
- MaybeLoadPreallocatedEntities();
-
- return PreprocessedEntities.end();
+unsigned PreprocessingRecord::findEndLocalPreprocessedEntity(
+ SourceLocation Loc) const {
+ if (SourceMgr.isLoadedSourceLocation(Loc))
+ return 0;
+
+ std::vector<PreprocessedEntity *>::const_iterator
+ I = std::upper_bound(PreprocessedEntities.begin(),
+ PreprocessedEntities.end(),
+ Loc,
+ PPEntityComp<&SourceRange::getBegin>(SourceMgr));
+ return I - PreprocessedEntities.begin();
}
void PreprocessingRecord::addPreprocessedEntity(PreprocessedEntity *Entity) {
- PreprocessedEntities.push_back(Entity);
+ assert(Entity);
+ SourceLocation BeginLoc = Entity->getSourceRange().getBegin();
+
+ // Check normal case, this entity begin location is after the previous one.
+ if (PreprocessedEntities.empty() ||
+ !SourceMgr.isBeforeInTranslationUnit(BeginLoc,
+ PreprocessedEntities.back()->getSourceRange().getBegin())) {
+ PreprocessedEntities.push_back(Entity);
+ return;
+ }
+
+ // The entity's location is not after the previous one; this can happen rarely
+ // e.g. with "#include MACRO".
+ // Iterate the entities vector in reverse until we find the right place to
+ // insert the new entity.
+ for (std::vector<PreprocessedEntity *>::iterator
+ RI = PreprocessedEntities.end(), Begin = PreprocessedEntities.begin();
+ RI != Begin; --RI) {
+ std::vector<PreprocessedEntity *>::iterator I = RI;
+ --I;
+ if (!SourceMgr.isBeforeInTranslationUnit(BeginLoc,
+ (*I)->getSourceRange().getBegin())) {
+ PreprocessedEntities.insert(RI, Entity);
+ return;
+ }
+ }
}
void PreprocessingRecord::SetExternalSource(
- ExternalPreprocessingRecordSource &Source,
- unsigned NumPreallocatedEntities) {
+ ExternalPreprocessingRecordSource &Source) {
assert(!ExternalSource &&
"Preprocessing record already has an external source");
ExternalSource = &Source;
- this->NumPreallocatedEntities = NumPreallocatedEntities;
- PreprocessedEntities.insert(PreprocessedEntities.begin(),
- NumPreallocatedEntities, 0);
}
-void PreprocessingRecord::SetPreallocatedEntity(unsigned Index,
- PreprocessedEntity *Entity) {
- assert(Index < NumPreallocatedEntities &&"Out-of-bounds preallocated entity");
- PreprocessedEntities[Index] = Entity;
+unsigned PreprocessingRecord::allocateLoadedEntities(unsigned NumEntities) {
+ unsigned Result = LoadedPreprocessedEntities.size();
+ LoadedPreprocessedEntities.resize(LoadedPreprocessedEntities.size()
+ + NumEntities);
+ return Result;
+}
+
+void PreprocessingRecord::RegisterMacroDefinition(MacroInfo *Macro,
+ PPEntityID PPID) {
+ MacroDefinitions[Macro] = PPID;
+}
+
+/// \brief Retrieve the preprocessed entity at the given ID.
+PreprocessedEntity *PreprocessingRecord::getPreprocessedEntity(PPEntityID PPID){
+ if (PPID < 0) {
+ assert(unsigned(-PPID-1) < LoadedPreprocessedEntities.size() &&
+ "Out-of bounds loaded preprocessed entity");
+ return getLoadedPreprocessedEntity(LoadedPreprocessedEntities.size()+PPID);
+ }
+ assert(unsigned(PPID) < PreprocessedEntities.size() &&
+ "Out-of bounds local preprocessed entity");
+ return PreprocessedEntities[PPID];
}
-void PreprocessingRecord::RegisterMacroDefinition(MacroInfo *Macro,
- MacroDefinition *MD) {
- MacroDefinitions[Macro] = MD;
+/// \brief Retrieve the loaded preprocessed entity at the given index.
+PreprocessedEntity *
+PreprocessingRecord::getLoadedPreprocessedEntity(unsigned Index) {
+ assert(Index < LoadedPreprocessedEntities.size() &&
+ "Out-of bounds loaded preprocessed entity");
+ assert(ExternalSource && "No external source to load from");
+ PreprocessedEntity *&Entity = LoadedPreprocessedEntities[Index];
+ if (!Entity) {
+ Entity = ExternalSource->ReadPreprocessedEntity(Index);
+ if (!Entity) // Failed to load.
+ Entity = new (*this)
+ PreprocessedEntity(PreprocessedEntity::InvalidKind, SourceRange());
+ }
+ return Entity;
}
MacroDefinition *PreprocessingRecord::findMacroDefinition(const MacroInfo *MI) {
- llvm::DenseMap<const MacroInfo *, MacroDefinition *>::iterator Pos
+ llvm::DenseMap<const MacroInfo *, PPEntityID>::iterator Pos
= MacroDefinitions.find(MI);
if (Pos == MacroDefinitions.end())
return 0;
- return Pos->second;
+ PreprocessedEntity *Entity = getPreprocessedEntity(Pos->second);
+ if (Entity->isInvalid())
+ return 0;
+ return cast<MacroDefinition>(Entity);
}
-void PreprocessingRecord::MacroExpands(const Token &Id, const MacroInfo* MI) {
+void PreprocessingRecord::MacroExpands(const Token &Id, const MacroInfo* MI,
+ SourceRange Range) {
if (!IncludeNestedMacroExpansions && Id.getLocation().isMacroID())
return;
- if (MacroDefinition *Def = findMacroDefinition(MI))
- PreprocessedEntities.push_back(
- new (*this) MacroExpansion(Id.getIdentifierInfo(),
- Id.getLocation(), Def));
+ if (MI->isBuiltinMacro())
+ addPreprocessedEntity(
+ new (*this) MacroExpansion(Id.getIdentifierInfo(),Range));
+ else if (MacroDefinition *Def = findMacroDefinition(MI))
+ addPreprocessedEntity(
+ new (*this) MacroExpansion(Def, Range));
}
void PreprocessingRecord::MacroDefined(const Token &Id,
const MacroInfo *MI) {
SourceRange R(MI->getDefinitionLoc(), MI->getDefinitionEndLoc());
MacroDefinition *Def
- = new (*this) MacroDefinition(Id.getIdentifierInfo(),
- MI->getDefinitionLoc(),
- R);
- MacroDefinitions[MI] = Def;
- PreprocessedEntities.push_back(Def);
+ = new (*this) MacroDefinition(Id.getIdentifierInfo(), R);
+ addPreprocessedEntity(Def);
+ MacroDefinitions[MI] = getPPEntityID(PreprocessedEntities.size()-1,
+ /*isLoaded=*/false);
}
void PreprocessingRecord::MacroUndefined(const Token &Id,
const MacroInfo *MI) {
- llvm::DenseMap<const MacroInfo *, MacroDefinition *>::iterator Pos
+ llvm::DenseMap<const MacroInfo *, PPEntityID>::iterator Pos
= MacroDefinitions.find(MI);
if (Pos != MacroDefinitions.end())
MacroDefinitions.erase(Pos);
@@ -152,12 +290,12 @@ void PreprocessingRecord::MacroUndefined(const Token &Id,
void PreprocessingRecord::InclusionDirective(
SourceLocation HashLoc,
const clang::Token &IncludeTok,
- llvm::StringRef FileName,
+ StringRef FileName,
bool IsAngled,
const FileEntry *File,
clang::SourceLocation EndLoc,
- llvm::StringRef SearchPath,
- llvm::StringRef RelativePath) {
+ StringRef SearchPath,
+ StringRef RelativePath) {
InclusionDirective::InclusionKind Kind = InclusionDirective::Include;
switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) {
@@ -185,5 +323,12 @@ void PreprocessingRecord::InclusionDirective(
clang::InclusionDirective *ID
= new (*this) clang::InclusionDirective(*this, Kind, FileName, !IsAngled,
File, SourceRange(HashLoc, EndLoc));
- PreprocessedEntities.push_back(ID);
+ addPreprocessedEntity(ID);
+}
+
+size_t PreprocessingRecord::getTotalMemory() const {
+ return BumpAlloc.getTotalMemory()
+ + llvm::capacity_in_bytes(MacroDefinitions)
+ + llvm::capacity_in_bytes(PreprocessedEntities)
+ + llvm::capacity_in_bytes(LoadedPreprocessedEntities);
}
diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp
index e7aa286a16bf..31662ad0c116 100644
--- a/lib/Lex/Preprocessor.cpp
+++ b/lib/Lex/Preprocessor.cpp
@@ -35,6 +35,7 @@
#include "clang/Lex/ScratchBuffer.h"
#include "clang/Lex/LexDiagnostic.h"
#include "clang/Lex/CodeCompletionHandler.h"
+#include "clang/Lex/ModuleLoader.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/TargetInfo.h"
@@ -42,27 +43,83 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Capacity.h"
using namespace clang;
//===----------------------------------------------------------------------===//
ExternalPreprocessorSource::~ExternalPreprocessorSource() { }
-Preprocessor::Preprocessor(Diagnostic &diags, const LangOptions &opts,
- const TargetInfo &target, SourceManager &SM,
- HeaderSearch &Headers,
+Preprocessor::Preprocessor(DiagnosticsEngine &diags, LangOptions &opts,
+ const TargetInfo *target, SourceManager &SM,
+ HeaderSearch &Headers, ModuleLoader &TheModuleLoader,
IdentifierInfoLookup* IILookup,
- bool OwnsHeaders)
+ bool OwnsHeaders,
+ bool DelayInitialization)
: Diags(&diags), Features(opts), Target(target),FileMgr(Headers.getFileMgr()),
- SourceMgr(SM),
- HeaderInfo(Headers), ExternalSource(0),
- Identifiers(opts, IILookup), BuiltinInfo(Target), CodeComplete(0),
- CodeCompletionFile(0), SkipMainFilePreamble(0, true), CurPPLexer(0),
- CurDirLookup(0), Callbacks(0), MacroArgCache(0), Record(0), MIChainHead(0),
- MICache(0) {
- ScratchBuf = new ScratchBuffer(SourceMgr);
- CounterValue = 0; // __COUNTER__ starts at 0.
+ SourceMgr(SM), HeaderInfo(Headers), TheModuleLoader(TheModuleLoader),
+ ExternalSource(0),
+ Identifiers(opts, IILookup), CodeComplete(0),
+ CodeCompletionFile(0), CodeCompletionOffset(0), CodeCompletionReached(0),
+ SkipMainFilePreamble(0, true), CurPPLexer(0),
+ CurDirLookup(0), CurLexerKind(CLK_Lexer), Callbacks(0), MacroArgCache(0),
+ Record(0), MIChainHead(0), MICache(0)
+{
OwnsHeaderSearch = OwnsHeaders;
+
+ if (!DelayInitialization) {
+ assert(Target && "Must provide target information for PP initialization");
+ Initialize(*Target);
+ }
+}
+
+Preprocessor::~Preprocessor() {
+ assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
+ assert(((MacroExpandingLexersStack.empty() && MacroExpandedTokens.empty()) ||
+ isCodeCompletionReached()) &&
+ "Preprocessor::HandleEndOfTokenLexer should have cleared those");
+
+ while (!IncludeMacroStack.empty()) {
+ delete IncludeMacroStack.back().TheLexer;
+ delete IncludeMacroStack.back().TheTokenLexer;
+ IncludeMacroStack.pop_back();
+ }
+ // Free any macro definitions.
+ for (MacroInfoChain *I = MIChainHead ; I ; I = I->Next)
+ I->MI.Destroy();
+
+ // Free any cached macro expanders.
+ for (unsigned i = 0, e = NumCachedTokenLexers; i != e; ++i)
+ delete TokenLexerCache[i];
+
+ // Free any cached MacroArgs.
+ for (MacroArgs *ArgList = MacroArgCache; ArgList; )
+ ArgList = ArgList->deallocate();
+
+ // Release pragma information.
+ delete PragmaHandlers;
+
+ // Delete the scratch buffer info.
+ delete ScratchBuf;
+
+ // Delete the header search info, if we own it.
+ if (OwnsHeaderSearch)
+ delete &HeaderInfo;
+
+ delete Callbacks;
+}
+
+void Preprocessor::Initialize(const TargetInfo &Target) {
+ assert((!this->Target || this->Target == &Target) &&
+ "Invalid override of target information");
+ this->Target = &Target;
+
+ // Initialize information about built-ins.
+ BuiltinInfo.InitializeTarget(Target);
+
+ ScratchBuf = new ScratchBuffer(SourceMgr);
+ CounterValue = 0; // __COUNTER__ starts at 0.
+
// Clear stats.
NumDirectives = NumDefined = NumUndefined = NumPragma = 0;
NumIf = NumElse = NumEndif = 0;
@@ -71,33 +128,35 @@ Preprocessor::Preprocessor(Diagnostic &diags, const LangOptions &opts,
NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0;
MaxIncludeStackDepth = 0;
NumSkipped = 0;
-
+
// Default to discarding comments.
KeepComments = false;
KeepMacroComments = false;
-
+ SuppressIncludeNotFoundError = false;
+ AutoModuleImport = false;
+
// Macro expansion is enabled.
DisableMacroExpansion = false;
InMacroArgs = false;
NumCachedTokenLexers = 0;
-
+
CachedLexPos = 0;
-
+
// We haven't read anything from the external source.
ReadMacrosFromExternalSource = false;
-
+
// "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
// This gets unpoisoned where it is allowed.
(Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
-
+
// Initialize the pragma handlers.
- PragmaHandlers = new PragmaNamespace(llvm::StringRef());
+ PragmaHandlers = new PragmaNamespace(StringRef());
RegisterBuiltinPragmas();
-
+
// Initialize builtin macros like __LINE__ and friends.
RegisterBuiltinMacros();
-
+
if(Features.Borland) {
Ident__exception_info = getIdentifierInfo("_exception_info");
Ident___exception_info = getIdentifierInfo("__exception_info");
@@ -112,44 +171,7 @@ Preprocessor::Preprocessor(Diagnostic &diags, const LangOptions &opts,
Ident__exception_info = Ident__exception_code = Ident__abnormal_termination = 0;
Ident___exception_info = Ident___exception_code = Ident___abnormal_termination = 0;
Ident_GetExceptionInfo = Ident_GetExceptionCode = Ident_AbnormalTermination = 0;
- }
-
-}
-
-Preprocessor::~Preprocessor() {
- assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
- assert(MacroExpandingLexersStack.empty() && MacroExpandedTokens.empty() &&
- "Preprocessor::HandleEndOfTokenLexer should have cleared those");
-
- while (!IncludeMacroStack.empty()) {
- delete IncludeMacroStack.back().TheLexer;
- delete IncludeMacroStack.back().TheTokenLexer;
- IncludeMacroStack.pop_back();
- }
-
- // Free any macro definitions.
- for (MacroInfoChain *I = MIChainHead ; I ; I = I->Next)
- I->MI.Destroy();
-
- // Free any cached macro expanders.
- for (unsigned i = 0, e = NumCachedTokenLexers; i != e; ++i)
- delete TokenLexerCache[i];
-
- // Free any cached MacroArgs.
- for (MacroArgs *ArgList = MacroArgCache; ArgList; )
- ArgList = ArgList->deallocate();
-
- // Release pragma information.
- delete PragmaHandlers;
-
- // Delete the scratch buffer info.
- delete ScratchBuf;
-
- // Delete the header search info, if we own it.
- if (OwnsHeaderSearch)
- delete &HeaderInfo;
-
- delete Callbacks;
+ }
}
void Preprocessor::setPTHManager(PTHManager* pm) {
@@ -172,7 +194,7 @@ void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
llvm::errs() << " [ExpandDisabled]";
if (Tok.needsCleaning()) {
const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
- llvm::errs() << " [UnClean='" << llvm::StringRef(Start, Tok.getLength())
+ llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
<< "']";
}
@@ -228,7 +250,13 @@ Preprocessor::macro_begin(bool IncludeExternalMacros) const {
}
size_t Preprocessor::getTotalMemory() const {
- return BP.getTotalMemory() + MacroExpandedTokens.capacity()*sizeof(Token);
+ return BP.getTotalMemory()
+ + llvm::capacity_in_bytes(MacroExpandedTokens)
+ + Predefines.capacity() /* Predefines buffer. */
+ + llvm::capacity_in_bytes(Macros)
+ + llvm::capacity_in_bytes(PragmaPushMacroInfo)
+ + llvm::capacity_in_bytes(PoisonReasons)
+ + llvm::capacity_in_bytes(CommentHandlers);
}
Preprocessor::macro_iterator
@@ -243,15 +271,13 @@ Preprocessor::macro_end(bool IncludeExternalMacros) const {
}
bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
- unsigned TruncateAtLine,
- unsigned TruncateAtColumn) {
- using llvm::MemoryBuffer;
-
- CodeCompletionFile = File;
+ unsigned CompleteLine,
+ unsigned CompleteColumn) {
+ assert(File);
+ assert(CompleteLine && CompleteColumn && "Starts from 1:1");
+ assert(!CodeCompletionFile && "Already set");
- // Okay to clear out the code-completion point by passing NULL.
- if (!CodeCompletionFile)
- return false;
+ using llvm::MemoryBuffer;
// Load the actual file's contents.
bool Invalid = false;
@@ -261,7 +287,7 @@ bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
// Find the byte position of the truncation point.
const char *Position = Buffer->getBufferStart();
- for (unsigned Line = 1; Line < TruncateAtLine; ++Line) {
+ for (unsigned Line = 1; Line < CompleteLine; ++Line) {
for (; *Position; ++Position) {
if (*Position != '\r' && *Position != '\n')
continue;
@@ -275,38 +301,37 @@ bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
}
}
- Position += TruncateAtColumn - 1;
+ Position += CompleteColumn - 1;
- // Truncate the buffer.
+ // Insert '\0' at the code-completion point.
if (Position < Buffer->getBufferEnd()) {
- llvm::StringRef Data(Buffer->getBufferStart(),
- Position-Buffer->getBufferStart());
- MemoryBuffer *TruncatedBuffer
- = MemoryBuffer::getMemBufferCopy(Data, Buffer->getBufferIdentifier());
- SourceMgr.overrideFileContents(File, TruncatedBuffer);
+ CodeCompletionFile = File;
+ CodeCompletionOffset = Position - Buffer->getBufferStart();
+
+ MemoryBuffer *NewBuffer =
+ MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1,
+ Buffer->getBufferIdentifier());
+ char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart());
+ char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
+ *NewPos = '\0';
+ std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
+ SourceMgr.overrideFileContents(File, NewBuffer);
}
return false;
}
-bool Preprocessor::isCodeCompletionFile(SourceLocation FileLoc) const {
- return CodeCompletionFile && FileLoc.isFileID() &&
- SourceMgr.getFileEntryForID(SourceMgr.getFileID(FileLoc))
- == CodeCompletionFile;
-}
-
void Preprocessor::CodeCompleteNaturalLanguage() {
- SetCodeCompletionPoint(0, 0, 0);
- getDiagnostics().setSuppressAllDiagnostics(true);
if (CodeComplete)
CodeComplete->CodeCompleteNaturalLanguage();
+ setCodeCompletionReached();
}
/// getSpelling - This method is used to get the spelling of a token into a
/// SmallVector. Note that the returned StringRef may not point to the
/// supplied buffer if a copy can be avoided.
-llvm::StringRef Preprocessor::getSpelling(const Token &Tok,
- llvm::SmallVectorImpl<char> &Buffer,
+StringRef Preprocessor::getSpelling(const Token &Tok,
+ SmallVectorImpl<char> &Buffer,
bool *Invalid) const {
// NOTE: this has to be checked *before* testing for an IdentifierInfo.
if (Tok.isNot(tok::raw_identifier)) {
@@ -321,22 +346,23 @@ llvm::StringRef Preprocessor::getSpelling(const Token &Tok,
const char *Ptr = Buffer.data();
unsigned Len = getSpelling(Tok, Ptr, Invalid);
- return llvm::StringRef(Ptr, Len);
+ return StringRef(Ptr, Len);
}
/// CreateString - Plop the specified string into a scratch buffer and return a
/// location for it. If specified, the source location provides a source
/// location for the token.
void Preprocessor::CreateString(const char *Buf, unsigned Len, Token &Tok,
- SourceLocation ExpansionLoc) {
+ SourceLocation ExpansionLocStart,
+ SourceLocation ExpansionLocEnd) {
Tok.setLength(Len);
const char *DestPtr;
SourceLocation Loc = ScratchBuf->getToken(Buf, Len, DestPtr);
- if (ExpansionLoc.isValid())
- Loc = SourceMgr.createInstantiationLoc(Loc, ExpansionLoc,
- ExpansionLoc, Len);
+ if (ExpansionLocStart.isValid())
+ Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
+ ExpansionLocEnd, Len);
Tok.setLocation(Loc);
// If this is a raw identifier or a literal token, set the pointer data.
@@ -407,12 +433,12 @@ IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
IdentifierInfo *II;
if (!Identifier.needsCleaning()) {
// No cleaning needed, just use the characters from the lexed buffer.
- II = getIdentifierInfo(llvm::StringRef(Identifier.getRawIdentifierData(),
+ II = getIdentifierInfo(StringRef(Identifier.getRawIdentifierData(),
Identifier.getLength()));
} else {
// Cleaning needed, alloca a buffer, clean into it, then use the buffer.
llvm::SmallString<64> IdentifierBuffer;
- llvm::StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
+ StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
II = getIdentifierInfo(CleanedStr);
}
@@ -487,6 +513,17 @@ void Preprocessor::HandleIdentifier(Token &Identifier) {
}
}
+ // If this identifier is a keyword in C++11, produce a warning. Don't warn if
+ // we're not considering macro expansion, since this identifier might be the
+ // name of a macro.
+ // FIXME: This warning is disabled in cases where it shouldn't be, like
+ // "#define constexpr constexpr", "int constexpr;"
+ if (II.isCXX11CompatKeyword() & !DisableMacroExpansion) {
+ Diag(Identifier, diag::warn_cxx11_keyword) << II.getName();
+ // Don't diagnose this keyword again in this translation unit.
+ II.setIsCXX11CompatKeyword(false);
+ }
+
// C++ 2.11p2: If this is an alternative representation of a C++ operator,
// then we act as if it is the actual operator and not the textual
// representation of it.
@@ -499,6 +536,44 @@ void Preprocessor::HandleIdentifier(Token &Identifier) {
// like "#define TY typeof", "TY(1) x".
if (II.isExtensionToken() && !DisableMacroExpansion)
Diag(Identifier, diag::ext_token_used);
+
+ // If this is the '__import_module__' keyword, note that the next token
+ // indicates a module name.
+ if (II.getTokenID() == tok::kw___import_module__ &&
+ !InMacroArgs && !DisableMacroExpansion) {
+ ModuleImportLoc = Identifier.getLocation();
+ CurLexerKind = CLK_LexAfterModuleImport;
+ }
+}
+
+/// \brief Lex a token following the __import_module__ keyword.
+void Preprocessor::LexAfterModuleImport(Token &Result) {
+ // Figure out what kind of lexer we actually have.
+ if (CurLexer)
+ CurLexerKind = CLK_Lexer;
+ else if (CurPTHLexer)
+ CurLexerKind = CLK_PTHLexer;
+ else if (CurTokenLexer)
+ CurLexerKind = CLK_TokenLexer;
+ else
+ CurLexerKind = CLK_CachingLexer;
+
+ // Lex the next token.
+ Lex(Result);
+
+ // The token sequence
+ //
+ // __import_module__ identifier
+ //
+ // indicates a module import directive. We already saw the __import_module__
+ // keyword, so now we're looking for the identifier.
+ if (Result.getKind() != tok::identifier)
+ return;
+
+ // Load the module.
+ (void)TheModuleLoader.loadModule(ModuleImportLoc,
+ *Result.getIdentifierInfo(),
+ Result.getLocation());
}
void Preprocessor::AddCommentHandler(CommentHandler *Handler) {
@@ -529,6 +604,8 @@ bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
return true;
}
+ModuleLoader::~ModuleLoader() { }
+
CommentHandler::~CommentHandler() { }
CodeCompletionHandler::~CodeCompletionHandler() { }
@@ -538,6 +615,7 @@ void Preprocessor::createPreprocessingRecord(
if (Record)
return;
- Record = new PreprocessingRecord(IncludeNestedMacroExpansions);
+ Record = new PreprocessingRecord(getSourceManager(),
+ IncludeNestedMacroExpansions);
addPPCallbacks(Record);
}
diff --git a/lib/Lex/PreprocessorLexer.cpp b/lib/Lex/PreprocessorLexer.cpp
index 808a81bd5e87..0da9ef5531e7 100644
--- a/lib/Lex/PreprocessorLexer.cpp
+++ b/lib/Lex/PreprocessorLexer.cpp
@@ -17,6 +17,14 @@
#include "clang/Basic/SourceManager.h"
using namespace clang;
+PreprocessorLexer::PreprocessorLexer(Preprocessor *pp, FileID fid)
+ : PP(pp), FID(fid), InitialNumSLocEntries(0),
+ ParsingPreprocessorDirective(false),
+ ParsingFilename(false), LexingRawMode(false) {
+ if (pp)
+ InitialNumSLocEntries = pp->getSourceManager().local_sloc_entry_size();
+}
+
/// LexIncludeFilename - After the preprocessor has parsed a #include, lex and
/// (potentially) macro expand the filename.
void PreprocessorLexer::LexIncludeFilename(Token &FilenameTok) {
diff --git a/lib/Lex/ScratchBuffer.cpp b/lib/Lex/ScratchBuffer.cpp
index 0e98c1751985..3d363fa4b472 100644
--- a/lib/Lex/ScratchBuffer.cpp
+++ b/lib/Lex/ScratchBuffer.cpp
@@ -53,7 +53,7 @@ SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len,
// diagnostic points to one.
CurBuffer[BytesUsed-1] = '\0';
- return BufferStartLoc.getFileLocWithOffset(BytesUsed-Len-1);
+ return BufferStartLoc.getLocWithOffset(BytesUsed-Len-1);
}
void ScratchBuffer::AllocScratchBuffer(unsigned RequestLen) {
diff --git a/lib/Lex/TokenConcatenation.cpp b/lib/Lex/TokenConcatenation.cpp
index 3e9e8550313c..dc6d686d6cc1 100644
--- a/lib/Lex/TokenConcatenation.cpp
+++ b/lib/Lex/TokenConcatenation.cpp
@@ -17,42 +17,53 @@
using namespace clang;
-/// StartsWithL - Return true if the spelling of this token starts with 'L'.
-bool TokenConcatenation::StartsWithL(const Token &Tok) const {
- if (!Tok.needsCleaning()) {
- SourceManager &SM = PP.getSourceManager();
- return *SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation())) == 'L';
- }
+/// IsStringPrefix - Return true if Str is a string prefix.
+/// 'L', 'u', 'U', or 'u8'. Including raw versions.
+static bool IsStringPrefix(StringRef Str, bool CPlusPlus0x) {
- if (Tok.getLength() < 256) {
- char Buffer[256];
- const char *TokPtr = Buffer;
- PP.getSpelling(Tok, TokPtr);
- return TokPtr[0] == 'L';
+ if (Str[0] == 'L' ||
+ (CPlusPlus0x && (Str[0] == 'u' || Str[0] == 'U' || Str[0] == 'R'))) {
+
+ if (Str.size() == 1)
+ return true; // "L", "u", "U", and "R"
+
+ // Check for raw flavors. Need to make sure the first character wasn't
+ // already R. Need CPlusPlus0x check for "LR".
+ if (Str[1] == 'R' && Str[0] != 'R' && Str.size() == 2 && CPlusPlus0x)
+ return true; // "LR", "uR", "UR"
+
+ // Check for "u8" and "u8R"
+ if (Str[0] == 'u' && Str[1] == '8') {
+ if (Str.size() == 2) return true; // "u8"
+ if (Str.size() == 3 && Str[2] == 'R') return true; // "u8R"
+ }
}
- return PP.getSpelling(Tok)[0] == 'L';
+ return false;
}
-/// IsIdentifierL - Return true if the spelling of this token is literally
-/// 'L'.
-bool TokenConcatenation::IsIdentifierL(const Token &Tok) const {
+/// IsIdentifierStringPrefix - Return true if the spelling of the token
+/// is literally 'L', 'u', 'U', or 'u8'. Including raw versions.
+bool TokenConcatenation::IsIdentifierStringPrefix(const Token &Tok) const {
+ const LangOptions &LangOpts = PP.getLangOptions();
+
if (!Tok.needsCleaning()) {
- if (Tok.getLength() != 1)
+ if (Tok.getLength() < 1 || Tok.getLength() > 3)
return false;
SourceManager &SM = PP.getSourceManager();
- return *SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation())) == 'L';
+ const char *Ptr = SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation()));
+ return IsStringPrefix(StringRef(Ptr, Tok.getLength()),
+ LangOpts.CPlusPlus0x);
}
if (Tok.getLength() < 256) {
char Buffer[256];
const char *TokPtr = Buffer;
- if (PP.getSpelling(Tok, TokPtr) != 1)
- return false;
- return TokPtr[0] == 'L';
+ unsigned length = PP.getSpelling(Tok, TokPtr);
+ return IsStringPrefix(StringRef(TokPtr, length), LangOpts.CPlusPlus0x);
}
- return PP.getSpelling(Tok) == "L";
+ return IsStringPrefix(StringRef(PP.getSpelling(Tok)), LangOpts.CPlusPlus0x);
}
TokenConcatenation::TokenConcatenation(Preprocessor &pp) : PP(pp) {
@@ -132,7 +143,7 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok,
// source. If they were, it must be okay to stick them together: if there
// were an issue, the tokens would have been lexed differently.
if (PrevTok.getLocation().isFileID() && Tok.getLocation().isFileID() &&
- PrevTok.getLocation().getFileLocWithOffset(PrevTok.getLength()) ==
+ PrevTok.getLocation().getLocWithOffset(PrevTok.getLength()) ==
Tok.getLocation())
return false;
@@ -179,24 +190,19 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok,
if (Tok.is(tok::numeric_constant))
return GetFirstChar(PP, Tok) != '.';
- if (Tok.getIdentifierInfo() || Tok.is(tok::wide_string_literal) /* ||
- Tok.is(tok::wide_char_literal)*/)
+ if (Tok.getIdentifierInfo() || Tok.is(tok::wide_string_literal) ||
+ Tok.is(tok::utf8_string_literal) || Tok.is(tok::utf16_string_literal) ||
+ Tok.is(tok::utf32_string_literal) || Tok.is(tok::wide_char_constant) ||
+ Tok.is(tok::utf16_char_constant) || Tok.is(tok::utf32_char_constant))
return true;
// If this isn't identifier + string, we're done.
if (Tok.isNot(tok::char_constant) && Tok.isNot(tok::string_literal))
return false;
- // FIXME: need a wide_char_constant!
-
- // If the string was a wide string L"foo" or wide char L'f', it would
- // concat with the previous identifier into fooL"bar". Avoid this.
- if (StartsWithL(Tok))
- return true;
-
// Otherwise, this is a narrow character or string. If the *identifier*
- // is a literal 'L', avoid pasting L "foo" -> L"foo".
- return IsIdentifierL(PrevTok);
+ // is a literal 'L', 'u8', 'u' or 'U', avoid pasting L "foo" -> L"foo".
+ return IsIdentifierStringPrefix(PrevTok);
case tok::numeric_constant:
return isalnum(FirstChar) || Tok.is(tok::numeric_constant) ||
FirstChar == '+' || FirstChar == '-' || FirstChar == '.';
diff --git a/lib/Lex/TokenLexer.cpp b/lib/Lex/TokenLexer.cpp
index 8ff82f160033..a58054490fcd 100644
--- a/lib/Lex/TokenLexer.cpp
+++ b/lib/Lex/TokenLexer.cpp
@@ -43,7 +43,7 @@ void TokenLexer::Init(Token &Tok, SourceLocation ELEnd, MacroArgs *Actuals) {
MacroExpansionStart = SourceLocation();
SourceManager &SM = PP.getSourceManager();
- MacroStartSLocOffset = SM.getNextOffset();
+ MacroStartSLocOffset = SM.getNextLocalOffset();
if (NumTokens > 0) {
assert(Tokens[0].getLocation().isValid());
@@ -55,12 +55,12 @@ void TokenLexer::Init(Token &Tok, SourceLocation ELEnd, MacroArgs *Actuals) {
// definition. Tokens that get lexed directly from the definition will
// have their locations pointing inside this chunk. This is to avoid
// creating separate source location entries for each token.
- SourceLocation macroStart = SM.getInstantiationLoc(Tokens[0].getLocation());
- MacroDefStartInfo = SM.getDecomposedLoc(macroStart);
- MacroExpansionStart = SM.createInstantiationLoc(macroStart,
- ExpandLocStart,
- ExpandLocEnd,
- Macro->getDefinitionLength(SM));
+ MacroDefStart = SM.getExpansionLoc(Tokens[0].getLocation());
+ MacroDefLength = Macro->getDefinitionLength(SM);
+ MacroExpansionStart = SM.createExpansionLoc(MacroDefStart,
+ ExpandLocStart,
+ ExpandLocEnd,
+ MacroDefLength);
}
// If this is a function-like macro, expand the arguments and change
@@ -121,9 +121,8 @@ void TokenLexer::destroy() {
/// Expand the arguments of a function-like macro so that we can quickly
/// return preexpanded tokens from Tokens.
void TokenLexer::ExpandFunctionArguments() {
- SourceManager &SM = PP.getSourceManager();
- llvm::SmallVector<Token, 128> ResultToks;
+ SmallVector<Token, 128> ResultToks;
// Loop through 'Tokens', expanding them into ResultToks. Keep
// track of whether we change anything. If not, no need to keep them. If so,
@@ -144,19 +143,22 @@ void TokenLexer::ExpandFunctionArguments() {
int ArgNo = Macro->getArgumentNum(Tokens[i+1].getIdentifierInfo());
assert(ArgNo != -1 && "Token following # is not an argument?");
- SourceLocation hashInstLoc;
- if(ExpandLocStart.isValid()) {
- hashInstLoc = getMacroExpansionLocation(CurTok.getLocation());
- assert(hashInstLoc.isValid() && "Expected '#' to come from definition");
- }
+ SourceLocation ExpansionLocStart =
+ getExpansionLocForMacroDefLoc(CurTok.getLocation());
+ SourceLocation ExpansionLocEnd =
+ getExpansionLocForMacroDefLoc(Tokens[i+1].getLocation());
Token Res;
if (CurTok.is(tok::hash)) // Stringify
- Res = ActualArgs->getStringifiedArgument(ArgNo, PP, hashInstLoc);
+ Res = ActualArgs->getStringifiedArgument(ArgNo, PP,
+ ExpansionLocStart,
+ ExpansionLocEnd);
else {
// 'charify': don't bother caching these.
Res = MacroArgs::StringifyArgument(ActualArgs->getUnexpArgument(ArgNo),
- PP, true, hashInstLoc);
+ PP, true,
+ ExpansionLocStart,
+ ExpansionLocEnd);
}
// The stringified/charified string leading space flag gets set to match
@@ -225,16 +227,9 @@ void TokenLexer::ExpandFunctionArguments() {
}
if(ExpandLocStart.isValid()) {
- SourceLocation curInst =
- getMacroExpansionLocation(CurTok.getLocation());
- assert(curInst.isValid() &&
- "Expected arg identifier to come from definition");
- for (unsigned i = FirstResult, e = ResultToks.size(); i != e; ++i) {
- Token &Tok = ResultToks[i];
- Tok.setLocation(SM.createMacroArgInstantiationLoc(Tok.getLocation(),
- curInst,
- Tok.getLength()));
- }
+ updateLocForMacroArgTokens(CurTok.getLocation(),
+ ResultToks.begin()+FirstResult,
+ ResultToks.end());
}
// If any tokens were substituted from the argument, the whitespace
@@ -282,17 +277,8 @@ void TokenLexer::ExpandFunctionArguments() {
}
if (ExpandLocStart.isValid()) {
- SourceLocation curInst =
- getMacroExpansionLocation(CurTok.getLocation());
- assert(curInst.isValid() &&
- "Expected arg identifier to come from definition");
- for (unsigned i = ResultToks.size() - NumToks, e = ResultToks.size();
- i != e; ++i) {
- Token &Tok = ResultToks[i];
- Tok.setLocation(SM.createMacroArgInstantiationLoc(Tok.getLocation(),
- curInst,
- Tok.getLength()));
- }
+ updateLocForMacroArgTokens(CurTok.getLocation(),
+ ResultToks.end()-NumToks, ResultToks.end());
}
// If this token (the macro argument) was supposed to get leading
@@ -417,18 +403,15 @@ void TokenLexer::Lex(Token &Tok) {
// that captures all of this.
if (ExpandLocStart.isValid() && // Don't do this for token streams.
// Check that the token's location was not already set properly.
- SM.isBeforeInSourceLocationOffset(Tok.getLocation(),
- MacroStartSLocOffset)) {
+ SM.isBeforeInSLocAddrSpace(Tok.getLocation(), MacroStartSLocOffset)) {
SourceLocation instLoc;
if (Tok.is(tok::comment)) {
- instLoc = SM.createInstantiationLoc(Tok.getLocation(),
- ExpandLocStart,
- ExpandLocEnd,
- Tok.getLength());
+ instLoc = SM.createExpansionLoc(Tok.getLocation(),
+ ExpandLocStart,
+ ExpandLocEnd,
+ Tok.getLength());
} else {
- instLoc = getMacroExpansionLocation(Tok.getLocation());
- assert(instLoc.isValid() &&
- "Location for token not coming from definition was not set!");
+ instLoc = getExpansionLocForMacroDefLoc(Tok.getLocation());
}
Tok.setLocation(instLoc);
@@ -469,6 +452,7 @@ void TokenLexer::Lex(Token &Tok) {
bool TokenLexer::PasteTokens(Token &Tok) {
llvm::SmallString<128> Buffer;
const char *ResultTokStrPtr = 0;
+ SourceLocation StartLoc = Tok.getLocation();
SourceLocation PasteOpLoc;
do {
// Consume the ## operator.
@@ -562,7 +546,7 @@ bool TokenLexer::PasteTokens(Token &Tok) {
if (isInvalid) {
// Test for the Microsoft extension of /##/ turning into // here on the
// error path.
- if (PP.getLangOptions().Microsoft && Tok.is(tok::slash) &&
+ if (PP.getLangOptions().MicrosoftExt && Tok.is(tok::slash) &&
RHS.is(tok::slash)) {
HandleMicrosoftCommentPaste(Tok);
return true;
@@ -574,14 +558,13 @@ bool TokenLexer::PasteTokens(Token &Tok) {
// information so that the user knows where it came from.
SourceManager &SM = PP.getSourceManager();
SourceLocation Loc =
- SM.createInstantiationLoc(PasteOpLoc, ExpandLocStart,
- ExpandLocEnd, 2);
+ SM.createExpansionLoc(PasteOpLoc, ExpandLocStart, ExpandLocEnd, 2);
// If we're in microsoft extensions mode, downgrade this from a hard
// error to a warning that defaults to an error. This allows
// disabling it.
PP.Diag(Loc,
- PP.getLangOptions().Microsoft ? diag::err_pp_bad_paste_ms
- : diag::err_pp_bad_paste)
+ PP.getLangOptions().MicrosoftExt ? diag::err_pp_bad_paste_ms
+ : diag::err_pp_bad_paste)
<< Buffer.str();
}
@@ -604,23 +587,20 @@ bool TokenLexer::PasteTokens(Token &Tok) {
Tok = Result;
} while (!isAtEnd() && Tokens[CurToken].is(tok::hashhash));
+ SourceLocation EndLoc = Tokens[CurToken - 1].getLocation();
+
// The token's current location indicate where the token was lexed from. We
// need this information to compute the spelling of the token, but any
// diagnostics for the expanded token should appear as if the token was
- // expanded from the (##) operator. Pull this information together into
+ // expanded from the full ## expression. Pull this information together into
// a new SourceLocation that captures all of this.
- if (ExpandLocStart.isValid()) {
- SourceManager &SM = PP.getSourceManager();
- SourceLocation pasteLocInst =
- getMacroExpansionLocation(PasteOpLoc);
- assert(pasteLocInst.isValid() &&
- "Expected '##' to come from definition");
-
- Tok.setLocation(SM.createInstantiationLoc(Tok.getLocation(),
- pasteLocInst,
- pasteLocInst,
- Tok.getLength()));
- }
+ SourceManager &SM = PP.getSourceManager();
+ if (StartLoc.isFileID())
+ StartLoc = getExpansionLocForMacroDefLoc(StartLoc);
+ if (EndLoc.isFileID())
+ EndLoc = getExpansionLocForMacroDefLoc(EndLoc);
+ Tok.setLocation(SM.createExpansionLoc(Tok.getLocation(), StartLoc, EndLoc,
+ Tok.getLength()));
// Now that we got the result token, it will be subject to expansion. Since
// token pasting re-lexes the result token in raw mode, identifier information
@@ -666,22 +646,111 @@ void TokenLexer::HandleMicrosoftCommentPaste(Token &Tok) {
PP.HandleMicrosoftCommentPaste(Tok);
}
-/// \brief If \arg loc is a FileID and points inside the current macro
+/// \brief If \arg loc is a file ID and points inside the current macro
/// definition, returns the appropriate source location pointing at the
-/// macro expansion source location entry.
-SourceLocation TokenLexer::getMacroExpansionLocation(SourceLocation loc) const {
+/// macro expansion source location entry, otherwise it returns an invalid
+/// SourceLocation.
+SourceLocation
+TokenLexer::getExpansionLocForMacroDefLoc(SourceLocation loc) const {
assert(ExpandLocStart.isValid() && MacroExpansionStart.isValid() &&
"Not appropriate for token streams");
- assert(loc.isValid());
+ assert(loc.isValid() && loc.isFileID());
SourceManager &SM = PP.getSourceManager();
- unsigned relativeOffset;
- if (loc.isFileID() &&
- SM.isInFileID(loc,
- MacroDefStartInfo.first, MacroDefStartInfo.second,
- Macro->getDefinitionLength(SM), &relativeOffset)) {
- return MacroExpansionStart.getFileLocWithOffset(relativeOffset);
+ assert(SM.isInSLocAddrSpace(loc, MacroDefStart, MacroDefLength) &&
+ "Expected loc to come from the macro definition");
+
+ unsigned relativeOffset = 0;
+ SM.isInSLocAddrSpace(loc, MacroDefStart, MacroDefLength, &relativeOffset);
+ return MacroExpansionStart.getLocWithOffset(relativeOffset);
+}
+
+/// \brief Finds the tokens that are consecutive (from the same FileID)
+/// creates a single SLocEntry, and assigns SourceLocations to each token that
+/// point to that SLocEntry. e.g for
+/// assert(foo == bar);
+/// There will be a single SLocEntry for the "foo == bar" chunk and locations
+/// for the 'foo', '==', 'bar' tokens will point inside that chunk.
+///
+/// \arg begin_tokens will be updated to a position past all the found
+/// consecutive tokens.
+static void updateConsecutiveMacroArgTokens(SourceManager &SM,
+ SourceLocation InstLoc,
+ Token *&begin_tokens,
+ Token * end_tokens) {
+ assert(begin_tokens < end_tokens);
+
+ SourceLocation FirstLoc = begin_tokens->getLocation();
+ SourceLocation CurLoc = FirstLoc;
+
+ // Compare the source location offset of tokens and group together tokens that
+ // are close, even if their locations point to different FileIDs. e.g.
+ //
+ // |bar | foo | cake | (3 tokens from 3 consecutive FileIDs)
+ // ^ ^
+ // |bar foo cake| (one SLocEntry chunk for all tokens)
+ //
+ // we can perform this "merge" since the token's spelling location depends
+ // on the relative offset.
+
+ Token *NextTok = begin_tokens + 1;
+ for (; NextTok < end_tokens; ++NextTok) {
+ int RelOffs;
+ if (!SM.isInSameSLocAddrSpace(CurLoc, NextTok->getLocation(), &RelOffs))
+ break; // Token from different local/loaded location.
+ // Check that token is not before the previous token or more than 50
+ // "characters" away.
+ if (RelOffs < 0 || RelOffs > 50)
+ break;
+ CurLoc = NextTok->getLocation();
}
- return SourceLocation();
+ // For the consecutive tokens, find the length of the SLocEntry to contain
+ // all of them.
+ Token &LastConsecutiveTok = *(NextTok-1);
+ int LastRelOffs = 0;
+ SM.isInSameSLocAddrSpace(FirstLoc, LastConsecutiveTok.getLocation(),
+ &LastRelOffs);
+ unsigned FullLength = LastRelOffs + LastConsecutiveTok.getLength();
+
+ // Create a macro expansion SLocEntry that will "contain" all of the tokens.
+ SourceLocation Expansion =
+ SM.createMacroArgExpansionLoc(FirstLoc, InstLoc,FullLength);
+
+ // Change the location of the tokens from the spelling location to the new
+ // expanded location.
+ for (; begin_tokens < NextTok; ++begin_tokens) {
+ Token &Tok = *begin_tokens;
+ int RelOffs = 0;
+ SM.isInSameSLocAddrSpace(FirstLoc, Tok.getLocation(), &RelOffs);
+ Tok.setLocation(Expansion.getLocWithOffset(RelOffs));
+ }
+}
+
+/// \brief Creates SLocEntries and updates the locations of macro argument
+/// tokens to their new expanded locations.
+///
+/// \param ArgIdDefLoc the location of the macro argument id inside the macro
+/// definition.
+/// \param Tokens the macro argument tokens to update.
+void TokenLexer::updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc,
+ Token *begin_tokens,
+ Token *end_tokens) {
+ SourceManager &SM = PP.getSourceManager();
+
+ SourceLocation InstLoc =
+ getExpansionLocForMacroDefLoc(ArgIdSpellLoc);
+
+ while (begin_tokens < end_tokens) {
+ // If there's only one token just create a SLocEntry for it.
+ if (end_tokens - begin_tokens == 1) {
+ Token &Tok = *begin_tokens;
+ Tok.setLocation(SM.createMacroArgExpansionLoc(Tok.getLocation(),
+ InstLoc,
+ Tok.getLength()));
+ return;
+ }
+
+ updateConsecutiveMacroArgTokens(SM, InstLoc, begin_tokens, end_tokens);
+ }
}