src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2011-10-20 21:14:49 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2011-10-20 21:14:49 +0000
commit	36981b17ed939300f6f8fc2355a255f711fcef71 (patch)
tree	ee2483e98b09cac943dc93a6969d83ca737ff139 /lib/Lex
parent	180abc3db9ae3b4fc63cd65b15697e6ffcc8a657 (diff)
download	src-36981b17ed939300f6f8fc2355a255f711fcef71.tar.gz src-36981b17ed939300f6f8fc2355a255f711fcef71.zip

Vendor import of clang release_30 branch r142614:vendor/clang/clang-r142614

http://llvm.org/svn/llvm-project/cfe/branches/release_30@142614

Notes

Notes: svn path=/vendor/clang/dist/; revision=226586 svn path=/vendor/clang/clang-r142614/; revision=226587; tag=vendor/clang/clang-r142614

Diffstat (limited to 'lib/Lex')

-rw-r--r--

lib/Lex/HeaderMap.cpp

-rw-r--r--

lib/Lex/HeaderSearch.cpp

208

-rw-r--r--

lib/Lex/Lexer.cpp

650

-rw-r--r--

lib/Lex/LiteralSupport.cpp

394

-rw-r--r--

lib/Lex/MacroArgs.cpp

-rw-r--r--

lib/Lex/MacroArgs.h

-rw-r--r--

lib/Lex/MacroInfo.cpp

-rw-r--r--

lib/Lex/PPCaching.cpp

-rw-r--r--

lib/Lex/PPDirectives.cpp

140

-rw-r--r--

lib/Lex/PPExpressions.cpp

-rw-r--r--

lib/Lex/PPLexerChange.cpp

-rw-r--r--

lib/Lex/PPMacroExpansion.cpp

182

-rw-r--r--

lib/Lex/PTHLexer.cpp

-rw-r--r--

lib/Lex/Pragma.cpp

-rw-r--r--

lib/Lex/PreprocessingRecord.cpp

283

-rw-r--r--

lib/Lex/Preprocessor.cpp

268

-rw-r--r--

lib/Lex/PreprocessorLexer.cpp

-rw-r--r--

lib/Lex/ScratchBuffer.cpp

-rw-r--r--

lib/Lex/TokenConcatenation.cpp

-rw-r--r--

lib/Lex/TokenLexer.cpp

219

20 files changed, 1983 insertions, 750 deletions

diff --git a/lib/Lex/HeaderMap.cpp b/lib/Lex/HeaderMap.cpp
index e102a6da608c..0cb564c222d5 100644
--- a/lib/Lex/HeaderMap.cpp
+++ b/lib/Lex/HeaderMap.cpp

@@ -57,7 +57,7 @@ struct HMapHeader {

/// HashHMapKey - This is the 'well known' hash function required by the file

/// format, used to look up keys in the hash table. The hash table uses simple

/// linear probing based on this function.

-static inline unsigned HashHMapKey(llvm::StringRef Str) {

+static inline unsigned HashHMapKey(StringRef Str) {

unsigned Result = 0;

const char *S = Str.begin(), *End = Str.end();

@@ -200,7 +200,7 @@ void HeaderMap::dump() const {

/// LookupFile - Check to see if the specified relative filename is located in

/// this HeaderMap. If so, open it and return its FileEntry.

const FileEntry *HeaderMap::LookupFile(

- llvm::StringRef Filename, FileManager &FM) const {

+ StringRef Filename, FileManager &FM) const {

const HMapHeader &Hdr = getHeader();

unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets);

diff --git a/lib/Lex/HeaderSearch.cpp b/lib/Lex/HeaderSearch.cpp
index 86ab9564a235..931145a8d655 100644
--- a/lib/Lex/HeaderSearch.cpp
+++ b/lib/Lex/HeaderSearch.cpp

@@ -18,6 +18,7 @@

#include "llvm/Support/FileSystem.h"

#include "llvm/Support/Path.h"

#include "llvm/ADT/SmallString.h"

+#include "llvm/Support/Capacity.h"

#include <cstdio>

using namespace clang;

@@ -97,6 +98,60 @@ const HeaderMap *HeaderSearch::CreateHeaderMap(const FileEntry *FE) {

return 0;

}

+const FileEntry *HeaderSearch::lookupModule(StringRef ModuleName,

+ std::string *ModuleFileName,

+ std::string *UmbrellaHeader) {

+ // If we don't have a module cache path, we can't do anything.

+ if (ModuleCachePath.empty()) {

+ if (ModuleFileName)

+ ModuleFileName->clear();

+ return 0;

+ }

+ // Try to find the module path.

+ llvm::SmallString<256> FileName(ModuleCachePath);

+ llvm::sys::path::append(FileName, ModuleName + ".pcm");

+ if (ModuleFileName)

+ *ModuleFileName = FileName.str();

+ if (const FileEntry *ModuleFile

+ = getFileMgr().getFile(FileName, /*OpenFile=*/false,

+ /*CacheFailure=*/false))

+ return ModuleFile;

+ // We didn't find the module. If we're not supposed to look for an

+ // umbrella header, this is the end of the road.

+ if (!UmbrellaHeader)

+ return 0;

+ // Look in each of the framework directories for an umbrella header with

+ // the same name as the module.

+ // FIXME: We need a way for non-frameworks to provide umbrella headers.

+ llvm::SmallString<128> UmbrellaHeaderName;

+ UmbrellaHeaderName = ModuleName;

+ UmbrellaHeaderName += '/';

+ UmbrellaHeaderName += ModuleName;

+ UmbrellaHeaderName += ".h";

+ for (unsigned Idx = 0, N = SearchDirs.size(); Idx != N; ++Idx) {

+ // Skip non-framework include paths

+ if (!SearchDirs[Idx].isFramework())

+ continue;

+ // Look for the umbrella header in this directory.

+ if (const FileEntry *HeaderFile

+ = SearchDirs[Idx].LookupFile(UmbrellaHeaderName, *this, 0, 0,

+ StringRef(), 0)) {

+ *UmbrellaHeader = HeaderFile->getName();

+ return 0;

+ }

+ // We did not find an umbrella header. Clear out the UmbrellaHeader pointee

+ // so our caller knows that we failed.

+ UmbrellaHeader->clear();

+ return 0;

//===----------------------------------------------------------------------===//

// File lookup within a DirectoryLookup scope

//===----------------------------------------------------------------------===//

@@ -116,17 +171,19 @@ const char *DirectoryLookup::getName() const {

/// LookupFile - Lookup the specified file in this search path, returning it

/// if it exists or returning null if not.

const FileEntry *DirectoryLookup::LookupFile(

- llvm::StringRef Filename,

+ StringRef Filename,

HeaderSearch &HS,

- llvm::SmallVectorImpl<char> *SearchPath,

- llvm::SmallVectorImpl<char> *RelativePath) const {

+ SmallVectorImpl<char> *SearchPath,

+ SmallVectorImpl<char> *RelativePath,

+ StringRef BuildingModule,

+ StringRef *SuggestedModule) const {

llvm::SmallString<1024> TmpDir;

if (isNormalDir()) {

// Concatenate the requested file onto the directory.

TmpDir = getDir()->getName();

llvm::sys::path::append(TmpDir, Filename);

if (SearchPath != NULL) {

- llvm::StringRef SearchPathRef(getDir()->getName());

+ StringRef SearchPathRef(getDir()->getName());

SearchPath->clear();

SearchPath->append(SearchPathRef.begin(), SearchPathRef.end());

}

@@ -138,14 +195,15 @@ const FileEntry *DirectoryLookup::LookupFile(

}

if (isFramework())

- return DoFrameworkLookup(Filename, HS, SearchPath, RelativePath);

+ return DoFrameworkLookup(Filename, HS, SearchPath, RelativePath,

+ BuildingModule, SuggestedModule);

assert(isHeaderMap() && "Unknown directory lookup");

const FileEntry * const Result = getHeaderMap()->LookupFile(

Filename, HS.getFileMgr());

if (Result) {

if (SearchPath != NULL) {

- llvm::StringRef SearchPathRef(getName());

+ StringRef SearchPathRef(getName());

SearchPath->clear();

SearchPath->append(SearchPathRef.begin(), SearchPathRef.end());

}

@@ -161,15 +219,18 @@ const FileEntry *DirectoryLookup::LookupFile(

/// DoFrameworkLookup - Do a lookup of the specified file in the current

/// DirectoryLookup, which is a framework directory.

const FileEntry *DirectoryLookup::DoFrameworkLookup(

- llvm::StringRef Filename,

+ StringRef Filename,

HeaderSearch &HS,

- llvm::SmallVectorImpl<char> *SearchPath,

- llvm::SmallVectorImpl<char> *RelativePath) const {

+ SmallVectorImpl<char> *SearchPath,

+ SmallVectorImpl<char> *RelativePath,

+ StringRef BuildingModule,

+ StringRef *SuggestedModule) const

FileManager &FileMgr = HS.getFileMgr();

// Framework names must have a '/' in the filename.

size_t SlashPos = Filename.find('/');

- if (SlashPos == llvm::StringRef::npos) return 0;

+ if (SlashPos == StringRef::npos) return 0;

// Find out if this is the home for the specified framework, by checking

// HeaderSearch. Possible answer are yes/no and unknown.

@@ -226,9 +287,16 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup(

SearchPath->append(FrameworkName.begin(), FrameworkName.end()-1);

}

+ /// Determine whether this is the module we're building or not.

+ bool AutomaticImport = SuggestedModule &&

+ (BuildingModule != StringRef(Filename.begin(), SlashPos)) &&

+ !Filename.substr(SlashPos + 1).startswith("..");

FrameworkName.append(Filename.begin()+SlashPos+1, Filename.end());

if (const FileEntry *FE = FileMgr.getFile(FrameworkName.str(),

- /*openFile=*/true)) {

+ /*openFile=*/!AutomaticImport)) {

+ if (AutomaticImport)

+ *SuggestedModule = StringRef(Filename.begin(), SlashPos);

return FE;

}

@@ -240,7 +308,11 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup(

SearchPath->insert(SearchPath->begin()+OrigSize, Private,

Private+strlen(Private));

- return FileMgr.getFile(FrameworkName.str(), /*openFile=*/true);

+ const FileEntry *FE = FileMgr.getFile(FrameworkName.str(),

+ /*openFile=*/!AutomaticImport);

+ if (FE && AutomaticImport)

+ *SuggestedModule = StringRef(Filename.begin(), SlashPos);

+ return FE;

}

@@ -255,13 +327,18 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup(

/// non-null, indicates where the #including file is, in case a relative search

/// is needed.

const FileEntry *HeaderSearch::LookupFile(

- llvm::StringRef Filename,

+ StringRef Filename,

bool isAngled,

const DirectoryLookup *FromDir,

const DirectoryLookup *&CurDir,

const FileEntry *CurFileEnt,

- llvm::SmallVectorImpl<char> *SearchPath,

- llvm::SmallVectorImpl<char> *RelativePath) {

+ SmallVectorImpl<char> *SearchPath,

+ SmallVectorImpl<char> *RelativePath,

+ StringRef *SuggestedModule)

+ if (SuggestedModule)

+ *SuggestedModule = StringRef();

// If 'Filename' is absolute, check to see if it exists and no searching.

if (llvm::sys::path::is_absolute(Filename)) {

CurDir = 0;

@@ -279,7 +356,7 @@ const FileEntry *HeaderSearch::LookupFile(

return FileMgr.getFile(Filename, /*openFile=*/true);

}

- // Step #0, unless disabled, check to see if the file is in the #includer's

+ // Unless disabled, check to see if the file is in the #includer's

// directory. This has to be based on CurFileEnt, not CurDir, because

// CurFileEnt could be a #include of a subdirectory (#include "foo/bar.h") and

// a subsequent include of "baz.h" should resolve to "whatever/foo/baz.h".

@@ -301,7 +378,7 @@ const FileEntry *HeaderSearch::LookupFile(

unsigned DirInfo = getFileInfo(CurFileEnt).DirInfo;

getFileInfo(FE).DirInfo = DirInfo;

if (SearchPath != NULL) {

- llvm::StringRef SearchPathRef(CurFileEnt->getDir()->getName());

+ StringRef SearchPathRef(CurFileEnt->getDir()->getName());

SearchPath->clear();

SearchPath->append(SearchPathRef.begin(), SearchPathRef.end());

}

@@ -346,19 +423,56 @@ const FileEntry *HeaderSearch::LookupFile(

// Check each directory in sequence to see if it contains this file.

for (; i != SearchDirs.size(); ++i) {

const FileEntry *FE =

- SearchDirs[i].LookupFile(Filename, *this, SearchPath, RelativePath);

+ SearchDirs[i].LookupFile(Filename, *this, SearchPath, RelativePath,

+ BuildingModule, SuggestedModule);

if (!FE) continue;

CurDir = &SearchDirs[i];

// This file is a system header or C++ unfriendly if the dir is.

- getFileInfo(FE).DirInfo = CurDir->getDirCharacteristic();

+ HeaderFileInfo &HFI = getFileInfo(FE);

+ HFI.DirInfo = CurDir->getDirCharacteristic();

+ // If this file is found in a header map and uses the framework style of

+ // includes, then this header is part of a framework we're building.

+ if (CurDir->isIndexHeaderMap()) {

+ size_t SlashPos = Filename.find('/');

+ if (SlashPos != StringRef::npos) {

+ HFI.IndexHeaderMapHeader = 1;

+ HFI.Framework = getUniqueFrameworkName(StringRef(Filename.begin(),

+ SlashPos));

+ }

// Remember this location for the next lookup we do.

CacheLookup.second = i;

return FE;

}

+ // If we are including a file with a quoted include "foo.h" from inside

+ // a header in a framework that is currently being built, and we couldn't

+ // resolve "foo.h" any other way, change the include to <Foo/foo.h>, where

+ // "Foo" is the name of the framework in which the including header was found.

+ if (CurFileEnt && !isAngled && Filename.find('/') == StringRef::npos) {

+ HeaderFileInfo &IncludingHFI = getFileInfo(CurFileEnt);

+ if (IncludingHFI.IndexHeaderMapHeader) {

+ llvm::SmallString<128> ScratchFilename;

+ ScratchFilename += IncludingHFI.Framework;

+ ScratchFilename += '/';

+ ScratchFilename += Filename;

+ const FileEntry *Result = LookupFile(ScratchFilename, /*isAngled=*/true,

+ FromDir, CurDir, CurFileEnt,

+ SearchPath, RelativePath,

+ SuggestedModule);

+ std::pair<unsigned, unsigned> &CacheLookup

+ = LookupFileCache.GetOrCreateValue(Filename).getValue();

+ CacheLookup.second

+ = LookupFileCache.GetOrCreateValue(ScratchFilename).getValue().second;

+ return Result;

+ }

// Otherwise, didn't find it. Remember we didn't find this.

CacheLookup.second = SearchDirs.size();

return 0;

@@ -370,15 +484,15 @@ const FileEntry *HeaderSearch::LookupFile(

/// is a subframework within Carbon.framework. If so, return the FileEntry

/// for the designated file, otherwise return null.

const FileEntry *HeaderSearch::

-LookupSubframeworkHeader(llvm::StringRef Filename,

+LookupSubframeworkHeader(StringRef Filename,

const FileEntry *ContextFileEnt,

- llvm::SmallVectorImpl<char> *SearchPath,

- llvm::SmallVectorImpl<char> *RelativePath) {

+ SmallVectorImpl<char> *SearchPath,

+ SmallVectorImpl<char> *RelativePath) {

assert(ContextFileEnt && "No context file?");

// Framework names must have a '/' in the filename. Find it.

size_t SlashPos = Filename.find('/');

- if (SlashPos == llvm::StringRef::npos) return 0;

+ if (SlashPos == StringRef::npos) return 0;

// Look up the base framework name of the ContextFileEnt.

const char *ContextName = ContextFileEnt->getName();

@@ -466,7 +580,31 @@ LookupSubframeworkHeader(llvm::StringRef Filename,

// File Info Management.

//===----------------------------------------------------------------------===//

+/// \brief Merge the header file info provided by \p OtherHFI into the current

+/// header file info (\p HFI)

+static void mergeHeaderFileInfo(HeaderFileInfo &HFI,

+ const HeaderFileInfo &OtherHFI) {

+ HFI.isImport |= OtherHFI.isImport;

+ HFI.isPragmaOnce |= OtherHFI.isPragmaOnce;

+ HFI.NumIncludes += OtherHFI.NumIncludes;

+ if (!HFI.ControllingMacro && !HFI.ControllingMacroID) {

+ HFI.ControllingMacro = OtherHFI.ControllingMacro;

+ HFI.ControllingMacroID = OtherHFI.ControllingMacroID;

+ }

+ if (OtherHFI.External) {

+ HFI.DirInfo = OtherHFI.DirInfo;

+ HFI.External = OtherHFI.External;

+ HFI.IndexHeaderMapHeader = OtherHFI.IndexHeaderMapHeader;

+ }

+ if (HFI.Framework.empty())

+ HFI.Framework = OtherHFI.Framework;

+ HFI.Resolved = true;

/// getFileInfo - Return the HeaderFileInfo structure for the specified

/// FileEntry.

HeaderFileInfo &HeaderSearch::getFileInfo(const FileEntry *FE) {

@@ -474,10 +612,8 @@ HeaderFileInfo &HeaderSearch::getFileInfo(const FileEntry *FE) {

FileInfo.resize(FE->getUID()+1);

HeaderFileInfo &HFI = FileInfo[FE->getUID()];

- if (ExternalSource && !HFI.Resolved) {

- HFI = ExternalSource->GetHeaderFileInfo(FE);

- HFI.Resolved = true;

- }

+ if (ExternalSource && !HFI.Resolved)

+ mergeHeaderFileInfo(HFI, ExternalSource->GetHeaderFileInfo(FE));

return HFI;

}

@@ -488,10 +624,8 @@ bool HeaderSearch::isFileMultipleIncludeGuarded(const FileEntry *File) {

// Resolve header file info from the external source, if needed.

HeaderFileInfo &HFI = FileInfo[File->getUID()];

- if (ExternalSource && !HFI.Resolved) {

- HFI = ExternalSource->GetHeaderFileInfo(File);

- HFI.Resolved = true;

- }

+ if (ExternalSource && !HFI.Resolved)

+ mergeHeaderFileInfo(HFI, ExternalSource->GetHeaderFileInfo(File));

return HFI.isPragmaOnce || HFI.ControllingMacro || HFI.ControllingMacroID;

}

@@ -542,4 +676,14 @@ bool HeaderSearch::ShouldEnterIncludeFile(const FileEntry *File, bool isImport){

return true;

}

+size_t HeaderSearch::getTotalMemory() const {

+ return SearchDirs.capacity()

+ + llvm::capacity_in_bytes(FileInfo)

+ + llvm::capacity_in_bytes(HeaderMaps)

+ + LookupFileCache.getAllocator().getTotalMemory()

+ + FrameworkMap.getAllocator().getTotalMemory();

+StringRef HeaderSearch::getUniqueFrameworkName(StringRef Framework) {

+ return FrameworkNames.GetOrCreateValue(Framework).getKey();

diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index a28b8f6e7b9f..a98d889dbc98 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp

@@ -32,7 +32,7 @@

#include "llvm/ADT/StringSwitch.h"

#include "llvm/Support/Compiler.h"

#include "llvm/Support/MemoryBuffer.h"

-#include <cctype>

+#include <cstring>

using namespace clang;

static void InitCharacterInfo();

@@ -76,7 +76,7 @@ void Lexer::InitLexer(const char *BufStart, const char *BufPtr,

// skip the UTF-8 BOM if it's present.

if (BufferStart == BufferPtr) {

// Determine the size of the BOM.

- llvm::StringRef Buf(BufferStart, BufferEnd - BufferStart);

+ StringRef Buf(BufferStart, BufferEnd - BufferStart);

size_t BOMLength = llvm::StringSwitch<size_t>(Buf)

.StartsWith("\xEF\xBB\xBF", 3) // UTF-8 BOM

.Default(0);

@@ -86,7 +86,7 @@ void Lexer::InitLexer(const char *BufStart, const char *BufPtr,

}

Is_PragmaLexer = false;

- IsInConflictMarker = false;

+ CurrentConflictMarkerState = CMK_None;

// Start of the file is a start of line.

IsAtStartOfLine = true;

@@ -187,9 +187,9 @@ Lexer *Lexer::Create_PragmaLexer(SourceLocation SpellingLoc,

// Set the SourceLocation with the remapping information. This ensures that

// GetMappedTokenLoc will remap the tokens as they are lexed.

- L->FileLoc = SM.createInstantiationLoc(SM.getLocForStartOfFile(SpellingFID),

- ExpansionLocStart,

- ExpansionLocEnd, TokLen);

+ L->FileLoc = SM.createExpansionLoc(SM.getLocForStartOfFile(SpellingFID),

+ ExpansionLocStart,

+ ExpansionLocEnd, TokLen);

// Ensure that the lexer thinks it is inside a directive, so that end \n will

// return an EOD token.

@@ -217,7 +217,7 @@ std::string Lexer::Stringify(const std::string &Str, bool Charify) {

/// Stringify - Convert the specified string into a C string by escaping '\'

/// and " characters. This does not add surrounding ""'s to the string.

-void Lexer::Stringify(llvm::SmallVectorImpl<char> &Str) {

+void Lexer::Stringify(SmallVectorImpl<char> &Str) {

for (unsigned i = 0, e = Str.size(); i != e; ++i) {

if (Str[i] == '\\' || Str[i] == '"') {

Str.insert(Str.begin()+i, '\\');

@@ -235,8 +235,8 @@ void Lexer::Stringify(llvm::SmallVectorImpl<char> &Str) {

/// after trigraph expansion and escaped-newline folding. In particular, this

/// wants to get the true, uncanonicalized, spelling of things like digraphs

/// UCNs, etc.

-llvm::StringRef Lexer::getSpelling(SourceLocation loc,

- llvm::SmallVectorImpl<char> &buffer,

+StringRef Lexer::getSpelling(SourceLocation loc,

+ SmallVectorImpl<char> &buffer,

const SourceManager &SM,

const LangOptions &options,

bool *invalid) {

@@ -245,10 +245,10 @@ llvm::StringRef Lexer::getSpelling(SourceLocation loc,

// Try to the load the file buffer.

bool invalidTemp = false;

- llvm::StringRef file = SM.getBufferData(locInfo.first, &invalidTemp);

+ StringRef file = SM.getBufferData(locInfo.first, &invalidTemp);

if (invalidTemp) {

if (invalid) *invalid = true;

- return llvm::StringRef();

+ return StringRef();

}

const char *tokenBegin = file.data() + locInfo.second;

@@ -263,7 +263,7 @@ llvm::StringRef Lexer::getSpelling(SourceLocation loc,

// Common case: no need for cleaning.

if (!token.needsCleaning())

- return llvm::StringRef(tokenBegin, length);

+ return StringRef(tokenBegin, length);

// Hard case, we need to relex the characters into the string.

buffer.clear();

@@ -275,7 +275,7 @@ llvm::StringRef Lexer::getSpelling(SourceLocation loc,

ti += charSize;

}

- return llvm::StringRef(buffer.data(), buffer.size());

+ return StringRef(buffer.data(), buffer.size());

}

/// getSpelling() - Return the 'spelling' of this token. The spelling of a

@@ -394,10 +394,10 @@ unsigned Lexer::MeasureTokenLength(SourceLocation Loc,

// If this comes from a macro expansion, we really do want the macro name, not

// the token this macro expanded to.

- Loc = SM.getInstantiationLoc(Loc);

+ Loc = SM.getExpansionLoc(Loc);

std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc);

bool Invalid = false;

- llvm::StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);

+ StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);

if (Invalid)

return 0;

@@ -415,15 +415,16 @@ unsigned Lexer::MeasureTokenLength(SourceLocation Loc,

return TheTok.getLength();

}

-SourceLocation Lexer::GetBeginningOfToken(SourceLocation Loc,

- const SourceManager &SM,

- const LangOptions &LangOpts) {

+static SourceLocation getBeginningOfFileToken(SourceLocation Loc,

+ const SourceManager &SM,

+ const LangOptions &LangOpts) {

+ assert(Loc.isFileID());

std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc);

if (LocInfo.first.isInvalid())

return Loc;

bool Invalid = false;

- llvm::StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);

+ StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);

if (Invalid)

return Loc;

@@ -448,7 +449,7 @@ SourceLocation Lexer::GetBeginningOfToken(SourceLocation Loc,

}

// Create a lexer starting at the beginning of this token.

- SourceLocation LexerStartLoc = Loc.getFileLocWithOffset(-LocInfo.second);

+ SourceLocation LexerStartLoc = Loc.getLocWithOffset(-LocInfo.second);

Lexer TheLexer(LexerStartLoc, LangOpts, BufStart, LexStart, Buffer.end());

TheLexer.SetCommentRetentionState(true);

@@ -474,6 +475,25 @@ SourceLocation Lexer::GetBeginningOfToken(SourceLocation Loc,

return Loc;

}

+SourceLocation Lexer::GetBeginningOfToken(SourceLocation Loc,

+ const SourceManager &SM,

+ const LangOptions &LangOpts) {

+ if (Loc.isFileID())

+ return getBeginningOfFileToken(Loc, SM, LangOpts);

+ if (!SM.isMacroArgExpansion(Loc))

+ return Loc;

+ SourceLocation FileLoc = SM.getSpellingLoc(Loc);

+ SourceLocation BeginFileLoc = getBeginningOfFileToken(FileLoc, SM, LangOpts);

+ std::pair<FileID, unsigned> FileLocInfo = SM.getDecomposedLoc(FileLoc);

+ std::pair<FileID, unsigned> BeginFileLocInfo= SM.getDecomposedLoc(BeginFileLoc);

+ assert(FileLocInfo.first == BeginFileLocInfo.first &&

+ FileLocInfo.second >= BeginFileLocInfo.second);

+ return Loc.getLocWithOffset(SM.getDecomposedLoc(BeginFileLoc).second -

+ SM.getDecomposedLoc(FileLoc).second);

namespace {

enum PreambleDirectiveKind {

PDK_Skipped,

@@ -484,21 +504,36 @@ namespace {

}

std::pair<unsigned, bool>

-Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer, unsigned MaxLines) {

+Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer,

+ const LangOptions &Features, unsigned MaxLines) {

// Create a lexer starting at the beginning of the file. Note that we use a

// "fake" file source location at offset 1 so that the lexer will track our

// position within the file.

const unsigned StartOffset = 1;

SourceLocation StartLoc = SourceLocation::getFromRawEncoding(StartOffset);

- LangOptions LangOpts;

- Lexer TheLexer(StartLoc, LangOpts, Buffer->getBufferStart(),

+ Lexer TheLexer(StartLoc, Features, Buffer->getBufferStart(),

Buffer->getBufferStart(), Buffer->getBufferEnd());

bool InPreprocessorDirective = false;

Token TheTok;

Token IfStartTok;

unsigned IfCount = 0;

- unsigned Line = 0;

+ unsigned MaxLineOffset = 0;

+ if (MaxLines) {

+ const char *CurPtr = Buffer->getBufferStart();

+ unsigned CurLine = 0;

+ while (CurPtr != Buffer->getBufferEnd()) {

+ char ch = *CurPtr++;

+ if (ch == '\n') {

+ ++CurLine;

+ if (CurLine == MaxLines)

+ break;

+ }

+ if (CurPtr != Buffer->getBufferEnd())

+ MaxLineOffset = CurPtr - Buffer->getBufferStart();

+ }

do {

TheLexer.LexFromRawLexer(TheTok);

@@ -522,11 +557,11 @@ Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer, unsigned MaxLines) {

// Keep track of the # of lines in the preamble.

if (TheTok.isAtStartOfLine()) {

- ++Line;

+ unsigned TokOffset = TheTok.getLocation().getRawEncoding() - StartOffset;

// If we were asked to limit the number of lines in the preamble,

// and we're about to exceed that limit, we're done.

- if (MaxLines && Line >= MaxLines)

+ if (MaxLineOffset && TokOffset >= MaxLineOffset)

break;

}

@@ -539,12 +574,12 @@ Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer, unsigned MaxLines) {

Token HashTok = TheTok;

InPreprocessorDirective = true;

- // Figure out which direective this is. Since we're lexing raw tokens,

+ // Figure out which directive this is. Since we're lexing raw tokens,

// we don't have an identifier table available. Instead, just look at

// the raw identifier to recognize and categorize preprocessor directives.

TheLexer.LexFromRawLexer(TheTok);

if (TheTok.getKind() == tok::raw_identifier && !TheTok.needsCleaning()) {

- llvm::StringRef Keyword(TheTok.getRawIdentifierData(),

+ StringRef Keyword(TheTok.getRawIdentifierData(),

TheTok.getLength());

PreambleDirectiveKind PDK

= llvm::StringSwitch<PreambleDirectiveKind>(Keyword)

@@ -638,7 +673,7 @@ SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart,

// chars, this method is extremely fast.

while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {

if (CharNo == 0)

- return TokStart.getFileLocWithOffset(PhysOffset);

+ return TokStart.getLocWithOffset(PhysOffset);

++TokPtr, --CharNo, ++PhysOffset;

}

@@ -658,7 +693,7 @@ SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart,

if (!Lexer::isObviouslySimpleCharacter(*TokPtr))

PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;

- return TokStart.getFileLocWithOffset(PhysOffset);

+ return TokStart.getLocWithOffset(PhysOffset);

}

/// \brief Computes the source location just past the end of the

@@ -687,7 +722,7 @@ SourceLocation Lexer::getLocForEndOfToken(SourceLocation Loc, unsigned Offset,

return SourceLocation(); // Points inside the macro expansion.

// Continue and find the location just after the macro expansion.

- Loc = SM.getInstantiationRange(Loc).second;

+ Loc = SM.getExpansionRange(Loc).second;

}

unsigned Len = Lexer::MeasureTokenLength(Loc, SM, Features);

@@ -696,14 +731,14 @@ SourceLocation Lexer::getLocForEndOfToken(SourceLocation Loc, unsigned Offset,

else

return Loc;

- return Loc.getFileLocWithOffset(Len);

+ return Loc.getLocWithOffset(Len);

}

/// \brief Returns true if the given MacroID location points at the first

/// token of the macro expansion.

bool Lexer::isAtStartOfMacroExpansion(SourceLocation loc,

- const SourceManager &SM,

- const LangOptions &LangOpts) {

+ const SourceManager &SM,

+ const LangOptions &LangOpts) {

assert(loc.isValid() && loc.isMacroID() && "Expected a valid macro loc");

std::pair<FileID, unsigned> infoLoc = SM.getDecomposedLoc(loc);

@@ -713,8 +748,7 @@ bool Lexer::isAtStartOfMacroExpansion(SourceLocation loc,

return false; // Does not point at the start of token.

SourceLocation expansionLoc =

- SM.getSLocEntry(infoLoc.first)

- .getInstantiation().getInstantiationLocStart();

+ SM.getSLocEntry(infoLoc.first).getExpansion().getExpansionLocStart();

if (expansionLoc.isFileID())

return true; // No other macro expansions, this is the first.

@@ -734,17 +768,15 @@ bool Lexer::isAtEndOfMacroExpansion(SourceLocation loc,

return false;

FileID FID = SM.getFileID(loc);

- SourceLocation afterLoc = loc.getFileLocWithOffset(tokLen+1);

- if (!SM.isBeforeInSourceLocationOffset(afterLoc, SM.getNextOffset()))

- return true; // We got past the last FileID, this points to the last token.

+ SourceLocation afterLoc = loc.getLocWithOffset(tokLen+1);

+ if (SM.isInFileID(afterLoc, FID))

+ return false; // Still in the same FileID, does not point to the last token.

// FIXME: If the token comes from the macro token paste operator ('##')

// or the stringify operator ('#') this function will always return false;

- if (FID == SM.getFileID(afterLoc))

- return false; // Still in the same FileID, does not point to the last token.

SourceLocation expansionLoc =

- SM.getSLocEntry(FID).getInstantiation().getInstantiationLocEnd();

+ SM.getSLocEntry(FID).getExpansion().getExpansionLocEnd();

if (expansionLoc.isFileID())

return true; // No other macro expansions.

@@ -761,7 +793,8 @@ enum {

CHAR_LETTER = 0x04, // a-z,A-Z

CHAR_NUMBER = 0x08, // 0-9

CHAR_UNDER = 0x10, // _

- CHAR_PERIOD = 0x20 // .

+ CHAR_PERIOD = 0x20, // .

+ CHAR_RAWDEL = 0x40 // {}[]#<>%:;?*+-/^&|~!=,"'

};

// Statically initialize CharInfo table based on ASCII character set

@@ -786,20 +819,20 @@ static const unsigned char CharInfo[256] =

0 , 0 , 0 , 0 ,

//32 SP 33 ! 34 " 35 #

//36 $ 37 % 38 & 39 '

- CHAR_HORZ_WS, 0 , 0 , 0 ,

- 0 , 0 , 0 , 0 ,

+ CHAR_HORZ_WS, CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,

+ 0 , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,

//40 ( 41 ) 42 * 43 +

//44 , 45 - 46 . 47 /

- 0 , 0 , 0 , 0 ,

- 0 , 0 , CHAR_PERIOD , 0 ,

+ 0 , 0 , CHAR_RAWDEL , CHAR_RAWDEL ,

+ CHAR_RAWDEL , CHAR_RAWDEL , CHAR_PERIOD , CHAR_RAWDEL ,

//48 0 49 1 50 2 51 3

//52 4 53 5 54 6 55 7

CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER ,

//56 8 57 9 58 : 59 ;

//60 < 61 = 62 > 63 ?

- CHAR_NUMBER , CHAR_NUMBER , 0 , 0 ,

- 0 , 0 , 0 , 0 ,

+ CHAR_NUMBER , CHAR_NUMBER , CHAR_RAWDEL , CHAR_RAWDEL ,

+ CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,

//64 @ 65 A 66 B 67 C

//68 D 69 E 70 F 71 G

0 , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,

@@ -814,8 +847,8 @@ static const unsigned char CharInfo[256] =

CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,

//88 X 89 Y 90 Z 91 [

//92 \ 93 ] 94 ^ 95 _

- CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , 0 ,

- 0 , 0 , 0 , CHAR_UNDER ,

+ CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_RAWDEL ,

+ 0 , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_UNDER ,

//96 ` 97 a 98 b 99 c

//100 d 101 e 102 f 103 g

0 , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,

@@ -829,9 +862,9 @@ static const unsigned char CharInfo[256] =

CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,

//120 x 121 y 122 z 123 {

-//124 | 125 } 126 ~ 127 DEL

- CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , 0 ,

- 0 , 0 , 0 , 0

+//124 | 125 } 126 ~ 127 DEL

+ CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_RAWDEL ,

+ CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , 0

};

static void InitCharacterInfo() {

@@ -869,6 +902,12 @@ static inline bool isHorizontalWhitespace(unsigned char c) {

return (CharInfo[c] & CHAR_HORZ_WS) ? true : false;

}

+/// isVerticalWhitespace - Return true if this character is vertical

+/// whitespace: '\n', '\r'. Note that this returns false for '\0'.

+static inline bool isVerticalWhitespace(unsigned char c) {

+ return (CharInfo[c] & CHAR_VERT_WS) ? true : false;

/// isWhitespace - Return true if this character is horizontal or vertical

/// whitespace: ' ', '\t', '\f', '\v', '\n', '\r'. Note that this returns false

/// for '\0'.

@@ -883,6 +922,14 @@ static inline bool isNumberBody(unsigned char c) {

true : false;

}

+/// isRawStringDelimBody - Return true if this is the body character of a

+/// raw string delimiter.

+static inline bool isRawStringDelimBody(unsigned char c) {

+ return (CharInfo[c] &

+ (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER|CHAR_PERIOD|CHAR_RAWDEL)) ?

+ true : false;

//===----------------------------------------------------------------------===//

// Diagnostics forwarding code.

@@ -907,14 +954,14 @@ static SourceLocation GetMappedTokenLoc(Preprocessor &PP,

// Create a new SLoc which is expanded from Expansion(FileLoc) but whose

// characters come from spelling(FileLoc)+Offset.

SourceLocation SpellingLoc = SM.getSpellingLoc(FileLoc);

- SpellingLoc = SpellingLoc.getFileLocWithOffset(CharNo);

+ SpellingLoc = SpellingLoc.getLocWithOffset(CharNo);

// Figure out the expansion loc range, which is the range covered by the

// original _Pragma(...) sequence.

std::pair<SourceLocation,SourceLocation> II =

- SM.getImmediateInstantiationRange(FileLoc);

+ SM.getImmediateExpansionRange(FileLoc);

- return SM.createInstantiationLoc(SpellingLoc, II.first, II.second, TokLen);

+ return SM.createExpansionLoc(SpellingLoc, II.first, II.second, TokLen);

}

/// getSourceLocation - Return a source location identifier for the specified

@@ -928,7 +975,7 @@ SourceLocation Lexer::getSourceLocation(const char *Loc,

// the file id from FileLoc with the offset specified.

unsigned CharNo = Loc-BufferStart;

if (FileLoc.isFileID())

- return FileLoc.getFileLocWithOffset(CharNo);

+ return FileLoc.getLocWithOffset(CharNo);

// Otherwise, this is the _Pragma lexer case, which pretends that all of the

// tokens are lexed from where the _Pragma was defined.

@@ -978,7 +1025,7 @@ static char DecodeTrigraphChar(const char *CP, Lexer *L) {

}

if (!L->isLexingRawMode())

- L->Diag(CP-2, diag::trigraph_converted) << llvm::StringRef(&Res, 1);

+ L->Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1);

return Res;

}

@@ -1028,6 +1075,59 @@ const char *Lexer::SkipEscapedNewLines(const char *P) {

}

+/// \brief Checks that the given token is the first token that occurs after the

+/// given location (this excludes comments and whitespace). Returns the location

+/// immediately after the specified token. If the token is not found or the

+/// location is inside a macro, the returned source location will be invalid.

+SourceLocation Lexer::findLocationAfterToken(SourceLocation Loc,

+ tok::TokenKind TKind,

+ const SourceManager &SM,

+ const LangOptions &LangOpts,

+ bool SkipTrailingWhitespaceAndNewLine) {

+ if (Loc.isMacroID()) {

+ if (!Lexer::isAtEndOfMacroExpansion(Loc, SM, LangOpts))

+ return SourceLocation();

+ Loc = SM.getExpansionRange(Loc).second;

+ }

+ Loc = Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts);

+ // Break down the source location.

+ std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc);

+ // Try to load the file buffer.

+ bool InvalidTemp = false;

+ llvm::StringRef File = SM.getBufferData(LocInfo.first, &InvalidTemp);

+ if (InvalidTemp)

+ return SourceLocation();

+ const char *TokenBegin = File.data() + LocInfo.second;

+ // Lex from the start of the given location.

+ Lexer lexer(SM.getLocForStartOfFile(LocInfo.first), LangOpts, File.begin(),

+ TokenBegin, File.end());

+ // Find the token.

+ Token Tok;

+ lexer.LexFromRawLexer(Tok);

+ if (Tok.isNot(TKind))

+ return SourceLocation();

+ SourceLocation TokenLoc = Tok.getLocation();

+ // Calculate how much whitespace needs to be skipped if any.

+ unsigned NumWhitespaceChars = 0;

+ if (SkipTrailingWhitespaceAndNewLine) {

+ const char *TokenEnd = SM.getCharacterData(TokenLoc) +

+ Tok.getLength();

+ unsigned char C = *TokenEnd;

+ while (isHorizontalWhitespace(C)) {

+ C = *(++TokenEnd);

+ NumWhitespaceChars++;

+ }

+ if (isVerticalWhitespace(C))

+ NumWhitespaceChars++;

+ }

+ return TokenLoc.getLocWithOffset(Tok.getLength() + NumWhitespaceChars);

/// getCharAndSizeSlow - Peek a single 'character' from the specified buffer,

/// get its size, and return it. This is tricky in several cases:

@@ -1191,6 +1291,7 @@ FinishIdentifier:

// preprocessor, which may macro expand it or something.

if (II->isHandleIdentifierCase())

PP->HandleIdentifier(Result);

return;

}

@@ -1252,13 +1353,12 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {

if ((C == '-' || C == '+') && (PrevCh == 'E' || PrevCh == 'e')) {

// If we are in Microsoft mode, don't continue if the constant is hex.

// For example, MSVC will accept the following as 3 tokens: 0x1234567e+1

- if (!Features.Microsoft || !isHexaLiteral(BufferPtr, Features))

+ if (!Features.MicrosoftExt || !isHexaLiteral(BufferPtr, Features))

return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));

}

// If we have a hex FP constant, continue.

- if ((C == '-' || C == '+') && (PrevCh == 'P' || PrevCh == 'p') &&

- !Features.CPlusPlus0x)

+ if ((C == '-' || C == '+') && (PrevCh == 'P' || PrevCh == 'p'))

return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));

// Update the location of token as well as BufferPtr.

@@ -1268,10 +1368,17 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {

}

/// LexStringLiteral - Lex the remainder of a string literal, after having lexed

-/// either " or L".

-void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, bool Wide) {

+/// either " or L" or u8" or u" or U".

+void Lexer::LexStringLiteral(Token &Result, const char *CurPtr,

+ tok::TokenKind Kind) {

const char *NulCharacter = 0; // Does this string contain the \0 character?

+ if (!isLexingRawMode() &&

+ (Kind == tok::utf8_string_literal ||

+ Kind == tok::utf16_string_literal ||

+ Kind == tok::utf32_string_literal))

+ Diag(BufferPtr, diag::warn_cxx98_compat_unicode_literal);

char C = getAndAdvanceChar(CurPtr, Result);

while (C != '"') {

// Skip escaped characters. Escaped newlines will already be processed by

@@ -1281,16 +1388,21 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, bool Wide) {

if (C == '\n' || C == '\r' || // Newline.

(C == 0 && CurPtr-1 == BufferEnd)) { // End of file.

- if (C == 0 && PP && PP->isCodeCompletionFile(FileLoc))

- PP->CodeCompleteNaturalLanguage();

- else if (!isLexingRawMode() && !Features.AsmPreprocessor)

+ if (!isLexingRawMode() && !Features.AsmPreprocessor)

Diag(BufferPtr, diag::warn_unterminated_string);

FormTokenWithChars(Result, CurPtr-1, tok::unknown);

return;

}

- if (C == 0)

+ if (C == 0) {

+ if (isCodeCompletionPoint(CurPtr-1)) {

+ PP->CodeCompleteNaturalLanguage();

+ FormTokenWithChars(Result, CurPtr-1, tok::unknown);

+ return cutOffLexing();

+ }

NulCharacter = CurPtr-1;

+ }

C = getAndAdvanceChar(CurPtr, Result);

}

@@ -1300,8 +1412,82 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, bool Wide) {

// Update the location of the token as well as the BufferPtr instance var.

const char *TokStart = BufferPtr;

- FormTokenWithChars(Result, CurPtr,

- Wide ? tok::wide_string_literal : tok::string_literal);

+ FormTokenWithChars(Result, CurPtr, Kind);

+ Result.setLiteralData(TokStart);

+/// LexRawStringLiteral - Lex the remainder of a raw string literal, after

+/// having lexed R", LR", u8R", uR", or UR".

+void Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr,

+ tok::TokenKind Kind) {

+ // This function doesn't use getAndAdvanceChar because C++0x [lex.pptoken]p3:

+ // Between the initial and final double quote characters of the raw string,

+ // any transformations performed in phases 1 and 2 (trigraphs,

+ // universal-character-names, and line splicing) are reverted.

+ if (!isLexingRawMode())

+ Diag(BufferPtr, diag::warn_cxx98_compat_raw_string_literal);

+ unsigned PrefixLen = 0;

+ while (PrefixLen != 16 && isRawStringDelimBody(CurPtr[PrefixLen]))

+ ++PrefixLen;

+ // If the last character was not a '(', then we didn't lex a valid delimiter.

+ if (CurPtr[PrefixLen] != '(') {

+ if (!isLexingRawMode()) {

+ const char *PrefixEnd = &CurPtr[PrefixLen];

+ if (PrefixLen == 16) {

+ Diag(PrefixEnd, diag::err_raw_delim_too_long);

+ } else {

+ Diag(PrefixEnd, diag::err_invalid_char_raw_delim)

+ << StringRef(PrefixEnd, 1);

+ }

+ // Search for the next '"' in hopes of salvaging the lexer. Unfortunately,

+ // it's possible the '"' was intended to be part of the raw string, but

+ // there's not much we can do about that.

+ while (1) {

+ char C = *CurPtr++;

+ if (C == '"')

+ break;

+ if (C == 0 && CurPtr-1 == BufferEnd) {

+ --CurPtr;

+ break;

+ }

+ FormTokenWithChars(Result, CurPtr, tok::unknown);

+ return;

+ }

+ // Save prefix and move CurPtr past it

+ const char *Prefix = CurPtr;

+ CurPtr += PrefixLen + 1; // skip over prefix and '('

+ while (1) {

+ char C = *CurPtr++;

+ if (C == ')') {

+ // Check for prefix match and closing quote.

+ if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] == '"') {

+ CurPtr += PrefixLen + 1; // skip over prefix and '"'

+ break;

+ }

+ } else if (C == 0 && CurPtr-1 == BufferEnd) { // End of file.

+ if (!isLexingRawMode())

+ Diag(BufferPtr, diag::err_unterminated_raw_string)

+ << StringRef(Prefix, PrefixLen);

+ FormTokenWithChars(Result, CurPtr-1, tok::unknown);

+ return;

+ }

+ // Update the location of token as well as BufferPtr.

+ const char *TokStart = BufferPtr;

+ FormTokenWithChars(Result, CurPtr, Kind);

Result.setLiteralData(TokStart);

}

@@ -1317,7 +1503,8 @@ void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {

// Skip the escaped character.

C = getAndAdvanceChar(CurPtr, Result);

} else if (C == '\n' || C == '\r' || // Newline.

- (C == 0 && CurPtr-1 == BufferEnd)) { // End of file.

+ (C == 0 && (CurPtr-1 == BufferEnd || // End of file.

+ isCodeCompletionPoint(CurPtr-1)))) {

// If the filename is unterminated, then it must just be a lone <

// character. Return this as such.

FormTokenWithChars(Result, AfterLessPos, tok::less);

@@ -1340,10 +1527,15 @@ void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {

/// LexCharConstant - Lex the remainder of a character constant, after having

-/// lexed either ' or L'.

-void Lexer::LexCharConstant(Token &Result, const char *CurPtr) {

+/// lexed either ' or L' or u' or U'.

+void Lexer::LexCharConstant(Token &Result, const char *CurPtr,

+ tok::TokenKind Kind) {

const char *NulCharacter = 0; // Does this character contain the \0 character?

+ if (!isLexingRawMode() &&

+ (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant))

+ Diag(BufferPtr, diag::warn_cxx98_compat_unicode_literal);

char C = getAndAdvanceChar(CurPtr, Result);

if (C == '\'') {

if (!isLexingRawMode() && !Features.AsmPreprocessor)

@@ -1360,13 +1552,17 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr) {

C = getAndAdvanceChar(CurPtr, Result);

} else if (C == '\n' || C == '\r' || // Newline.

(C == 0 && CurPtr-1 == BufferEnd)) { // End of file.

- if (C == 0 && PP && PP->isCodeCompletionFile(FileLoc))

- PP->CodeCompleteNaturalLanguage();

- else if (!isLexingRawMode() && !Features.AsmPreprocessor)

+ if (!isLexingRawMode() && !Features.AsmPreprocessor)

Diag(BufferPtr, diag::warn_unterminated_char);

FormTokenWithChars(Result, CurPtr-1, tok::unknown);

return;

} else if (C == 0) {

+ if (isCodeCompletionPoint(CurPtr-1)) {

+ PP->CodeCompleteNaturalLanguage();

+ FormTokenWithChars(Result, CurPtr-1, tok::unknown);

+ return cutOffLexing();

+ }

NulCharacter = CurPtr-1;

}

C = getAndAdvanceChar(CurPtr, Result);

@@ -1378,7 +1574,7 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr) {

// Update the location of token as well as BufferPtr.

const char *TokStart = BufferPtr;

- FormTokenWithChars(Result, CurPtr, tok::char_constant);

+ FormTokenWithChars(Result, CurPtr, Kind);

Result.setLiteralData(TokStart);

}

@@ -1451,20 +1647,28 @@ bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) {

char C;

do {

C = *CurPtr;

- // FIXME: Speedup BCPL comment lexing. Just scan for a \n or \r character.

- // If we find a \n character, scan backwards, checking to see if it's an

- // escaped newline, like we do for block comments.

// Skip over characters in the fast loop.

while (C != 0 && // Potentially EOF.

- C != '\\' && // Potentially escaped newline.

- C != '?' && // Potentially trigraph.

C != '\n' && C != '\r') // Newline or DOS-style newline.

C = *++CurPtr;

- // If this is a newline, we're done.

- if (C == '\n' || C == '\r')

- break; // Found the newline? Break out!

+ const char *NextLine = CurPtr;

+ if (C != 0) {

+ // We found a newline, see if it's escaped.

+ const char *EscapePtr = CurPtr-1;

+ while (isHorizontalWhitespace(*EscapePtr)) // Skip whitespace.

+ --EscapePtr;

+ if (*EscapePtr == '\\') // Escaped newline.

+ CurPtr = EscapePtr;

+ else if (EscapePtr[0] == '/' && EscapePtr[-1] == '?' &&

+ EscapePtr[-2] == '?') // Trigraph-escaped newline.

+ CurPtr = EscapePtr-2;

+ else

+ break; // This is a newline, we're done.

+ C = *CurPtr;

+ }

// Otherwise, this is a hard case. Fall back on getAndAdvanceChar to

// properly decode the character. Read it in raw mode to avoid emitting

@@ -1476,6 +1680,13 @@ bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) {

C = getAndAdvanceChar(CurPtr, Result);

LexingRawMode = OldRawMode;

+ // If we only read only one character, then no special handling is needed.

+ // We're done and can skip forward to the newline.

+ if (C != 0 && CurPtr == OldPtr+1) {

+ CurPtr = NextLine;

+ break;

+ }

// If the char that we finally got was a \n, then we must have had something

// like \<newline><newline>. We don't want to have consumed the second

// newline, we want CurPtr, to end up pointing to it down below.

@@ -1492,9 +1703,9 @@ bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) {

if (OldPtr[0] == '\n' || OldPtr[0] == '\r') {

// Okay, we found a // comment that ends in a newline, if the next

// line is also a // comment, but has spaces, don't emit a diagnostic.

- if (isspace(C)) {

+ if (isWhitespace(C)) {

const char *ForwardPtr = CurPtr;

- while (isspace(*ForwardPtr)) // Skip whitespace.

+ while (isWhitespace(*ForwardPtr)) // Skip whitespace.

++ForwardPtr;

if (ForwardPtr[0] == '/' && ForwardPtr[1] == '/')

break;

@@ -1507,12 +1718,16 @@ bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) {

}

if (CurPtr == BufferEnd+1) {

- if (PP && PP->isCodeCompletionFile(FileLoc))

- PP->CodeCompleteNaturalLanguage();

--CurPtr;

break;

}

+ if (C == '\0' && isCodeCompletionPoint(CurPtr-1)) {

+ PP->CodeCompleteNaturalLanguage();

+ cutOffLexing();

+ return false;

+ }

} while (C != '\n' && C != '\r');

// Found but did not consume the newline. Notify comment handlers about the

@@ -1573,7 +1788,7 @@ bool Lexer::SaveBCPLComment(Token &Result, const char *CurPtr) {

Result.setKind(tok::comment);

PP->CreateString(&Spelling[0], Spelling.size(), Result,

- Result.getLocation());

+ Result.getLocation(), Result.getLocation());

return true;

}

@@ -1667,8 +1882,7 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) {

unsigned char C = getCharAndSize(CurPtr, CharSize);

CurPtr += CharSize;

if (C == 0 && CurPtr == BufferEnd+1) {

- if (!isLexingRawMode() &&

- !PP->isCodeCompletionFile(FileLoc))

+ if (!isLexingRawMode())

Diag(BufferPtr, diag::err_unterminated_block_comment);

--CurPtr;

@@ -1691,7 +1905,10 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) {

while (1) {

// Skip over all non-interesting characters until we find end of buffer or a

// (probably ending) '/' character.

- if (CurPtr + 24 < BufferEnd) {

+ if (CurPtr + 24 < BufferEnd &&

+ // If there is a code-completion point avoid the fast scan because it

+ // doesn't check for '\0'.

+ !(PP && PP->getCodeCompletionFileLoc() == FileLoc)) {

// While not aligned to a 16-byte boundary.

while (C != '/' && ((intptr_t)CurPtr & 0x0F) != 0)

C = *CurPtr++;

@@ -1751,9 +1968,7 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) {

Diag(CurPtr-1, diag::warn_nested_block_comment);

}

} else if (C == 0 && CurPtr == BufferEnd+1) {

- if (PP && PP->isCodeCompletionFile(FileLoc))

- PP->CodeCompleteNaturalLanguage();

- else if (!isLexingRawMode())

+ if (!isLexingRawMode())

Diag(BufferPtr, diag::err_unterminated_block_comment);

// Note: the user probably forgot a */. We could continue immediately

// after the /*, but this would involve lexing a lot of what really is the

@@ -1769,7 +1984,12 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) {

BufferPtr = CurPtr;

return false;

+ } else if (C == '\0' && isCodeCompletionPoint(CurPtr-1)) {

+ PP->CodeCompleteNaturalLanguage();

+ cutOffLexing();

+ return false;

}

C = *CurPtr++;

}

@@ -1826,6 +2046,12 @@ std::string Lexer::ReadToEndOfLine() {

case 0: // Null.

// Found end of file?

if (CurPtr-1 != BufferEnd) {

+ if (isCodeCompletionPoint(CurPtr-1)) {

+ PP->CodeCompleteNaturalLanguage();

+ cutOffLexing();

+ return Result;

+ }

// Nope, normal character, continue.

Result += Char;

break;

@@ -1840,8 +2066,8 @@ std::string Lexer::ReadToEndOfLine() {

// Next, lex the character, which should handle the EOD transition.

Lex(Tmp);

if (Tmp.is(tok::code_completion)) {

- if (PP && PP->getCodeCompletionHandler())

- PP->getCodeCompletionHandler()->CodeCompleteNaturalLanguage();

+ if (PP)

+ PP->CodeCompleteNaturalLanguage();

Lex(Tmp);

}

assert(Tmp.is(tok::eod) && "Unexpected token!");

@@ -1857,22 +2083,6 @@ std::string Lexer::ReadToEndOfLine() {

/// This returns true if Result contains a token, false if PP.Lex should be

/// called again.

bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {

- // Check if we are performing code completion.

- if (PP && PP->isCodeCompletionFile(FileLoc)) {

- // We're at the end of the file, but we've been asked to consider the

- // end of the file to be a code-completion token. Return the

- // code-completion token.

- Result.startToken();

- FormTokenWithChars(Result, CurPtr, tok::code_completion);

- // Only do the eof -> code_completion translation once.

- PP->SetCodeCompletionPoint(0, 0, 0);

- // Silence any diagnostics that occur once we hit the code-completion point.

- PP->getDiagnostics().setSuppressAllDiagnostics(true);

- return true;

- }

// If we hit the end of the file while parsing a preprocessor directive,

// end the preprocessor directive first. The next token returned will

// then be the end of file.

@@ -1900,7 +2110,7 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {

// If we are in a #if directive, emit an error.

while (!ConditionalStack.empty()) {

- if (!PP->isCodeCompletionFile(FileLoc))

+ if (PP->getCodeCompletionFileLoc() != FileLoc)

PP->Diag(ConditionalStack.back().IfLoc,

diag::err_pp_unterminated_conditional);

ConditionalStack.pop_back();

@@ -1951,15 +2161,18 @@ unsigned Lexer::isNextPPTokenLParen() {

}

/// FindConflictEnd - Find the end of a version control conflict marker.

-static const char *FindConflictEnd(const char *CurPtr, const char *BufferEnd) {

- llvm::StringRef RestOfBuffer(CurPtr+7, BufferEnd-CurPtr-7);

- size_t Pos = RestOfBuffer.find(">>>>>>>");

- while (Pos != llvm::StringRef::npos) {

+static const char *FindConflictEnd(const char *CurPtr, const char *BufferEnd,

+ ConflictMarkerKind CMK) {

+ const char *Terminator = CMK == CMK_Perforce ? "<<<<\n" : ">>>>>>>";

+ size_t TermLen = CMK == CMK_Perforce ? 5 : 7;

+ StringRef RestOfBuffer(CurPtr+TermLen, BufferEnd-CurPtr-TermLen);

+ size_t Pos = RestOfBuffer.find(Terminator);

+ while (Pos != StringRef::npos) {

// Must occur at start of line.

if (RestOfBuffer[Pos-1] != '\r' &&

RestOfBuffer[Pos-1] != '\n') {

- RestOfBuffer = RestOfBuffer.substr(Pos+7);

- Pos = RestOfBuffer.find(">>>>>>>");

+ RestOfBuffer = RestOfBuffer.substr(Pos+TermLen);

+ Pos = RestOfBuffer.find(Terminator);

continue;

}

return RestOfBuffer.data()+Pos;

@@ -1977,23 +2190,25 @@ bool Lexer::IsStartOfConflictMarker(const char *CurPtr) {

CurPtr[-1] != '\n' && CurPtr[-1] != '\r')

return false;

- // Check to see if we have <<<<<<<.

- if (BufferEnd-CurPtr < 8 ||

- llvm::StringRef(CurPtr, 7) != "<<<<<<<")

+ // Check to see if we have <<<<<<< or >>>>.

+ if ((BufferEnd-CurPtr < 8 || StringRef(CurPtr, 7) != "<<<<<<<") &&

+ (BufferEnd-CurPtr < 6 || StringRef(CurPtr, 5) != ">>>> "))

return false;

// If we have a situation where we don't care about conflict markers, ignore

// it.

- if (IsInConflictMarker || isLexingRawMode())

+ if (CurrentConflictMarkerState || isLexingRawMode())

return false;

- // Check to see if there is a >>>>>>> somewhere in the buffer at the start of

- // a line to terminate this conflict marker.

- if (FindConflictEnd(CurPtr, BufferEnd)) {

+ ConflictMarkerKind Kind = *CurPtr == '<' ? CMK_Normal : CMK_Perforce;

+ // Check to see if there is an ending marker somewhere in the buffer at the

+ // start of a line to terminate this conflict marker.

+ if (FindConflictEnd(CurPtr, BufferEnd, Kind)) {

// We found a match. We are really in a conflict marker.

// Diagnose this, and ignore to the end of line.

Diag(CurPtr, diag::err_conflict_marker);

- IsInConflictMarker = true;

+ CurrentConflictMarkerState = Kind;

// Skip ahead to the end of line. We know this exists because the

// end-of-conflict marker starts with \r or \n.

@@ -2010,10 +2225,10 @@ bool Lexer::IsStartOfConflictMarker(const char *CurPtr) {

}

-/// HandleEndOfConflictMarker - If this is a '=======' or '|||||||' or '>>>>>>>'

-/// marker, then it is the end of a conflict marker. Handle it by ignoring up

-/// until the end of the line. This returns true if it is a conflict marker and

-/// false if not.

+/// HandleEndOfConflictMarker - If this is a '====' or '||||' or '>>>>', or if

+/// it is '<<<<' and the conflict marker started with a '>>>>' marker, then it

+/// is the end of a conflict marker. Handle it by ignoring up until the end of

+/// the line. This returns true if it is a conflict marker and false if not.

bool Lexer::HandleEndOfConflictMarker(const char *CurPtr) {

// Only a conflict marker if it starts at the beginning of a line.

if (CurPtr != BufferStart &&

@@ -2022,18 +2237,19 @@ bool Lexer::HandleEndOfConflictMarker(const char *CurPtr) {

// If we have a situation where we don't care about conflict markers, ignore

// it.

- if (!IsInConflictMarker || isLexingRawMode())

+ if (!CurrentConflictMarkerState || isLexingRawMode())

return false;

- // Check to see if we have the marker (7 characters in a row).

- for (unsigned i = 1; i != 7; ++i)

+ // Check to see if we have the marker (4 characters in a row).

+ for (unsigned i = 1; i != 4; ++i)

if (CurPtr[i] != CurPtr[0])

return false;

// If we do have it, search for the end of the conflict marker. This could

// fail if it got skipped with a '#if 0' or something. Note that CurPtr might

// be the end of conflict marker.

- if (const char *End = FindConflictEnd(CurPtr, BufferEnd)) {

+ if (const char *End = FindConflictEnd(CurPtr, BufferEnd,

+ CurrentConflictMarkerState)) {

CurPtr = End;

// Skip ahead to the end of line.

@@ -2043,13 +2259,22 @@ bool Lexer::HandleEndOfConflictMarker(const char *CurPtr) {

BufferPtr = CurPtr;

// No longer in the conflict marker.

- IsInConflictMarker = false;

+ CurrentConflictMarkerState = CMK_None;

return true;

}

return false;

}

+bool Lexer::isCodeCompletionPoint(const char *CurPtr) const {

+ if (PP && PP->isCodeCompletionEnabled()) {

+ SourceLocation Loc = FileLoc.getLocWithOffset(CurPtr-BufferStart);

+ return Loc == PP->getCodeCompletionLoc();

+ }

+ return false;

/// LexTokenInternal - This implements a simple C family lexer. It is an

/// extremely performance critical piece of code. This assumes that the buffer

@@ -2102,6 +2327,14 @@ LexNextToken:

return PPCache->Lex(Result);

}

+ // Check if we are performing code completion.

+ if (isCodeCompletionPoint(CurPtr-1)) {

+ // Return the code-completion token.

+ Result.startToken();

+ FormTokenWithChars(Result, CurPtr, tok::code_completion);

+ return;

+ }

if (!isLexingRawMode())

Diag(CurPtr-1, diag::null_in_file);

Result.setFlag(Token::LeadingSpace);

@@ -2112,7 +2345,7 @@ LexNextToken:

case 26: // DOS & CP/M EOF: "^Z".

// If we're in Microsoft extensions mode, treat this as end of file.

- if (Features.Microsoft) {

+ if (Features.MicrosoftExt) {

// Read the PP instance variable into an automatic variable, because

// LexEndOfFile will often delete 'this'.

Preprocessor *PPCache = PP;

@@ -2186,6 +2419,102 @@ LexNextToken:

MIOpt.ReadToken();

return LexNumericConstant(Result, CurPtr);

+ case 'u': // Identifier (uber) or C++0x UTF-8 or UTF-16 string literal

+ // Notify MIOpt that we read a non-whitespace/non-comment token.

+ MIOpt.ReadToken();

+ if (Features.CPlusPlus0x) {

+ Char = getCharAndSize(CurPtr, SizeTmp);

+ // UTF-16 string literal

+ if (Char == '"')

+ return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),

+ tok::utf16_string_literal);

+ // UTF-16 character constant

+ if (Char == '\'')

+ return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),

+ tok::utf16_char_constant);

+ // UTF-16 raw string literal

+ if (Char == 'R' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')

+ return LexRawStringLiteral(Result,

+ ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),

+ SizeTmp2, Result),

+ tok::utf16_string_literal);

+ if (Char == '8') {

+ char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);

+ // UTF-8 string literal

+ if (Char2 == '"')

+ return LexStringLiteral(Result,

+ ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),

+ SizeTmp2, Result),

+ tok::utf8_string_literal);

+ if (Char2 == 'R') {

+ unsigned SizeTmp3;

+ char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);

+ // UTF-8 raw string literal

+ if (Char3 == '"') {

+ return LexRawStringLiteral(Result,

+ ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),

+ SizeTmp2, Result),

+ SizeTmp3, Result),

+ tok::utf8_string_literal);

+ }

+ // treat u like the start of an identifier.

+ return LexIdentifier(Result, CurPtr);

+ case 'U': // Identifier (Uber) or C++0x UTF-32 string literal

+ // Notify MIOpt that we read a non-whitespace/non-comment token.

+ MIOpt.ReadToken();

+ if (Features.CPlusPlus0x) {

+ Char = getCharAndSize(CurPtr, SizeTmp);

+ // UTF-32 string literal

+ if (Char == '"')

+ return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),

+ tok::utf32_string_literal);

+ // UTF-32 character constant

+ if (Char == '\'')

+ return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),

+ tok::utf32_char_constant);

+ // UTF-32 raw string literal

+ if (Char == 'R' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')

+ return LexRawStringLiteral(Result,

+ ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),

+ SizeTmp2, Result),

+ tok::utf32_string_literal);

+ }

+ // treat U like the start of an identifier.

+ return LexIdentifier(Result, CurPtr);

+ case 'R': // Identifier or C++0x raw string literal

+ // Notify MIOpt that we read a non-whitespace/non-comment token.

+ MIOpt.ReadToken();

+ if (Features.CPlusPlus0x) {

+ Char = getCharAndSize(CurPtr, SizeTmp);

+ if (Char == '"')

+ return LexRawStringLiteral(Result,

+ ConsumeChar(CurPtr, SizeTmp, Result),

+ tok::string_literal);

+ }

+ // treat R like the start of an identifier.

+ return LexIdentifier(Result, CurPtr);

case 'L': // Identifier (Loony) or wide literal (L'x' or L"xyz").

// Notify MIOpt that we read a non-whitespace/non-comment token.

MIOpt.ReadToken();

@@ -2194,21 +2523,30 @@ LexNextToken:

// Wide string literal.

if (Char == '"')

return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),

- true);

+ tok::wide_string_literal);

+ // Wide raw string literal.

+ if (Features.CPlusPlus0x && Char == 'R' &&

+ getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')

+ return LexRawStringLiteral(Result,

+ ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),

+ SizeTmp2, Result),

+ tok::wide_string_literal);

// Wide character constant.

if (Char == '\'')

- return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));

+ return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),

+ tok::wide_char_constant);

// FALL THROUGH, treating L like the start of an identifier.

// C99 6.4.2: Identifiers.

case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':

case 'H': case 'I': case 'J': case 'K': /*'L'*/case 'M': case 'N':

- case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':

+ case 'O': case 'P': case 'Q': /*'R'*/case 'S': case 'T': /*'U'*/

case 'V': case 'W': case 'X': case 'Y': case 'Z':

case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':

case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':

- case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':

+ case 'o': case 'p': case 'q': case 'r': case 's': case 't': /*'u'*/

case 'v': case 'w': case 'x': case 'y': case 'z':

case '_':

// Notify MIOpt that we read a non-whitespace/non-comment token.

@@ -2231,13 +2569,13 @@ LexNextToken:

case '\'':

// Notify MIOpt that we read a non-whitespace/non-comment token.

MIOpt.ReadToken();

- return LexCharConstant(Result, CurPtr);

+ return LexCharConstant(Result, CurPtr, tok::char_constant);

// C99 6.4.5: String Literals.

case '"':

// Notify MIOpt that we read a non-whitespace/non-comment token.

MIOpt.ReadToken();

- return LexStringLiteral(Result, CurPtr, false);

+ return LexStringLiteral(Result, CurPtr, tok::string_literal);

// C99 6.4.6: Punctuators.

case '?':

@@ -2396,7 +2734,7 @@ LexNextToken:

Kind = tok::hashhash; // '%:%:' -> '##'

CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),

SizeTmp2, Result);

- } else if (Char == '@' && Features.Microsoft) { // %:@ -> #@ -> Charize

+ } else if (Char == '@' && Features.MicrosoftExt) {// %:@ -> #@ -> Charize

CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);

if (!isLexingRawMode())

Diag(BufferPtr, diag::charize_microsoft_ext);

@@ -2447,6 +2785,10 @@ LexNextToken:

// If this is actually a '<<<<<<<' version control conflict marker,

// recognize it as such and recover nicely.

goto LexNextToken;

+ } else if (After == '<' && HandleEndOfConflictMarker(CurPtr-1)) {

+ // If this is '<<<<' and we're in a Perforce-style conflict marker,

+ // ignore it.

+ goto LexNextToken;

} else if (Features.CUDA && After == '<') {

Kind = tok::lesslessless;

CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),

@@ -2470,6 +2812,8 @@ LexNextToken:

char After = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);

if (After != ':' && After != '>') {

Kind = tok::less;

+ if (!isLexingRawMode())

+ Diag(BufferPtr, diag::warn_cxx98_compat_less_colon_colon);

break;

}

@@ -2494,6 +2838,10 @@ LexNextToken:

CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),

SizeTmp2, Result);

Kind = tok::greatergreaterequal;

+ } else if (After == '>' && IsStartOfConflictMarker(CurPtr-1)) {

+ // If this is actually a '>>>>' conflict marker, recognize it as such

+ // and recover nicely.

+ goto LexNextToken;

} else if (After == '>' && HandleEndOfConflictMarker(CurPtr-1)) {

// If this is '>>>>>>>' and we're in a conflict marker, ignore it.

goto LexNextToken;

@@ -2552,7 +2900,7 @@ LexNextToken:

case '=':

Char = getCharAndSize(CurPtr, SizeTmp);

if (Char == '=') {

- // If this is '=======' and we're in a conflict marker, ignore it.

+ // If this is '====' and we're in a conflict marker, ignore it.

if (CurPtr[1] == '=' && HandleEndOfConflictMarker(CurPtr-1))

goto LexNextToken;

@@ -2570,7 +2918,7 @@ LexNextToken:

if (Char == '#') {

Kind = tok::hashhash;

CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);

- } else if (Char == '@' && Features.Microsoft) { // #@ -> Charize

+ } else if (Char == '@' && Features.MicrosoftExt) { // #@ -> Charize

Kind = tok::hashat;

if (!isLexingRawMode())

Diag(BufferPtr, diag::charize_microsoft_ext);

diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp
index 2c96c4d4ee24..70183fd1a0ea 100644
--- a/lib/Lex/LiteralSupport.cpp
+++ b/lib/Lex/LiteralSupport.cpp

@@ -16,8 +16,8 @@

#include "clang/Lex/Preprocessor.h"

#include "clang/Lex/LexDiagnostic.h"

#include "clang/Basic/TargetInfo.h"

-#include "llvm/ADT/StringRef.h"

#include "llvm/ADT/StringExtras.h"

+#include "llvm/Support/ErrorHandling.h"

using namespace clang;

/// HexDigitValue - Return the value of the specified hex digit, or -1 if it's

@@ -29,12 +29,31 @@ static int HexDigitValue(char C) {

return -1;

}

+static unsigned getCharWidth(tok::TokenKind kind, const TargetInfo &Target) {

+ switch (kind) {

+ default: llvm_unreachable("Unknown token type!");

+ case tok::char_constant:

+ case tok::string_literal:

+ case tok::utf8_string_literal:

+ return Target.getCharWidth();

+ case tok::wide_char_constant:

+ case tok::wide_string_literal:

+ return Target.getWCharWidth();

+ case tok::utf16_char_constant:

+ case tok::utf16_string_literal:

+ return Target.getChar16Width();

+ case tok::utf32_char_constant:

+ case tok::utf32_string_literal:

+ return Target.getChar32Width();

+ }

/// ProcessCharEscape - Parse a standard C escape sequence, which can occur in

/// either a character or a string literal.

static unsigned ProcessCharEscape(const char *&ThisTokBuf,

const char *ThisTokEnd, bool &HadError,

- FullSourceLoc Loc, bool IsWide,

- Diagnostic *Diags, const TargetInfo &Target) {

+ FullSourceLoc Loc, unsigned CharWidth,

+ DiagnosticsEngine *Diags) {

// Skip the '\' char.

++ThisTokBuf;

@@ -99,9 +118,6 @@ static unsigned ProcessCharEscape(const char *&ThisTokBuf,

}

// See if any bits will be truncated when evaluated as a character.

- unsigned CharWidth =

- IsWide ? Target.getWCharWidth() : Target.getCharWidth();

if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {

Overflow = true;

ResultChar &= ~0U >> (32-CharWidth);

@@ -129,9 +145,6 @@ static unsigned ProcessCharEscape(const char *&ThisTokBuf,

ThisTokBuf[0] >= '0' && ThisTokBuf[0] <= '7');

// Check for overflow. Reject '\777', but not L'\777'.

- unsigned CharWidth =

- IsWide ? Target.getWCharWidth() : Target.getCharWidth();

if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {

if (Diags)

Diags->Report(Loc, diag::warn_octal_escape_too_large);

@@ -167,7 +180,7 @@ static unsigned ProcessCharEscape(const char *&ThisTokBuf,

/// return the UTF32.

static bool ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,

uint32_t &UcnVal, unsigned short &UcnLen,

- FullSourceLoc Loc, Diagnostic *Diags,

+ FullSourceLoc Loc, DiagnosticsEngine *Diags,

const LangOptions &Features) {

if (!Features.CPlusPlus && !Features.C99 && Diags)

Diags->Report(Loc, diag::warn_ucn_not_valid_in_c89);

@@ -220,7 +233,8 @@ static bool ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,

/// we will likely rework our support for UCN's.

static void EncodeUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,

char *&ResultBuf, bool &HadError,

- FullSourceLoc Loc, bool wide, Diagnostic *Diags,

+ FullSourceLoc Loc, unsigned CharByteWidth,

+ DiagnosticsEngine *Diags,

const LangOptions &Features) {

typedef uint32_t UTF32;

UTF32 UcnVal = 0;

@@ -231,19 +245,22 @@ static void EncodeUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,

return;

}

- if (wide) {

- (void)UcnLen;

- assert((UcnLen== 4 || UcnLen== 8) && "only ucn length of 4 or 8 supported");

+ assert((CharByteWidth == 1 || CharByteWidth == 2 || CharByteWidth) &&

+ "only character widths of 1, 2, or 4 bytes supported");

- if (!Features.ShortWChar) {

- // Note: our internal rep of wide char tokens is always little-endian.

- *ResultBuf++ = (UcnVal & 0x000000FF);

- *ResultBuf++ = (UcnVal & 0x0000FF00) >> 8;

- *ResultBuf++ = (UcnVal & 0x00FF0000) >> 16;

- *ResultBuf++ = (UcnVal & 0xFF000000) >> 24;

- return;

- }

+ (void)UcnLen;

+ assert((UcnLen== 4 || UcnLen== 8) && "only ucn length of 4 or 8 supported");

+ if (CharByteWidth == 4) {

+ // Note: our internal rep of wide char tokens is always little-endian.

+ *ResultBuf++ = (UcnVal & 0x000000FF);

+ *ResultBuf++ = (UcnVal & 0x0000FF00) >> 8;

+ *ResultBuf++ = (UcnVal & 0x00FF0000) >> 16;

+ *ResultBuf++ = (UcnVal & 0xFF000000) >> 24;

+ return;

+ }

+ if (CharByteWidth == 2) {

// Convert to UTF16.

if (UcnVal < (UTF32)0xFFFF) {

*ResultBuf++ = (UcnVal & 0x000000FF);

@@ -262,6 +279,9 @@ static void EncodeUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,

*ResultBuf++ = (surrogate2 & 0x0000FF00) >> 8;

return;

}

+ assert(CharByteWidth == 1 && "UTF-8 encoding is only for 1 byte characters");

// Now that we've parsed/checked the UCN, we convert from UTF32->UTF8.

// The conversion below was inspired by:

// http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c

@@ -371,7 +391,7 @@ NumericLiteralParser(const char *begin, const char *end,

// Done.

} else if (isxdigit(*s) && !(*s == 'e' || *s == 'E')) {

PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),

- diag::err_invalid_decimal_digit) << llvm::StringRef(s, 1);

+ diag::err_invalid_decimal_digit) << StringRef(s, 1);

hadError = true;

return;

} else if (*s == '.') {

@@ -434,7 +454,7 @@ NumericLiteralParser(const char *begin, const char *end,

continue; // Success.

case 'i':

case 'I':

- if (PP.getLangOptions().Microsoft) {

+ if (PP.getLangOptions().MicrosoftExt) {

if (isFPConstant || isLong || isLongLong) break;

// Allow i8, i16, i32, i64, and i128.

@@ -498,7 +518,7 @@ NumericLiteralParser(const char *begin, const char *end,

PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),

isFPConstant ? diag::err_invalid_suffix_float_constant :

diag::err_invalid_suffix_integer_constant)

- << llvm::StringRef(SuffixBegin, ThisTokEnd-SuffixBegin);

+ << StringRef(SuffixBegin, ThisTokEnd-SuffixBegin);

hadError = true;

return;

}

@@ -528,7 +548,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {

}

// A binary exponent can appear with or with a '.'. If dotted, the

// binary exponent is required.

- if ((*s == 'p' || *s == 'P') && !PP.getLangOptions().CPlusPlus0x) {

+ if (*s == 'p' || *s == 'P') {

const char *Exponent = s;

s++;

saw_exponent = true;

@@ -542,12 +562,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {

}

s = first_non_digit;

- // In C++0x, we cannot support hexadecmial floating literals because

- // they conflict with user-defined literals, so we warn in previous

- // versions of C++ by default.

- if (PP.getLangOptions().CPlusPlus)

- PP.Diag(TokLoc, diag::ext_hexconstant_cplusplus);

- else if (!PP.getLangOptions().HexFloats)

+ if (!PP.getLangOptions().HexFloats)

PP.Diag(TokLoc, diag::ext_hexconstant_invalid);

} else if (saw_period) {

PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),

@@ -569,7 +584,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {

// Done.

} else if (isxdigit(*s)) {

PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),

- diag::err_invalid_binary_digit) << llvm::StringRef(s, 1);

+ diag::err_invalid_binary_digit) << StringRef(s, 1);

hadError = true;

}

// Other suffixes will be diagnosed by the caller.

@@ -599,7 +614,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {

// the code is using an incorrect base.

if (isxdigit(*s) && *s != 'e' && *s != 'E') {

PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),

- diag::err_invalid_octal_digit) << llvm::StringRef(s, 1);

+ diag::err_invalid_octal_digit) << StringRef(s, 1);

hadError = true;

return;

}

@@ -688,7 +703,6 @@ bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) {

llvm::APFloat::opStatus

NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {

using llvm::APFloat;

- using llvm::StringRef;

unsigned n = std::min(SuffixBegin - ThisTokBegin, ThisTokEnd - ThisTokBegin);

return Result.convertFromString(StringRef(ThisTokBegin, n),

@@ -696,14 +710,51 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {

}

+/// character-literal: [C++0x lex.ccon]

+/// ' c-char-sequence '

+/// u' c-char-sequence '

+/// U' c-char-sequence '

+/// L' c-char-sequence '

+/// c-char-sequence:

+/// c-char

+/// c-char-sequence c-char

+/// c-char:

+/// any member of the source character set except the single-quote ',

+/// backslash \, or new-line character

+/// escape-sequence

+/// universal-character-name

+/// escape-sequence: [C++0x lex.ccon]

+/// simple-escape-sequence

+/// octal-escape-sequence

+/// hexadecimal-escape-sequence

+/// simple-escape-sequence:

+/// one of \' \" \? \\ \a \b \f \n \r \t \v

+/// octal-escape-sequence:

+/// \ octal-digit

+/// \ octal-digit octal-digit

+/// \ octal-digit octal-digit octal-digit

+/// hexadecimal-escape-sequence:

+/// \x hexadecimal-digit

+/// hexadecimal-escape-sequence hexadecimal-digit

+/// universal-character-name:

+/// \u hex-quad

+/// \U hex-quad hex-quad

+/// hex-quad:

+/// hex-digit hex-digit hex-digit hex-digit

+///

CharLiteralParser::CharLiteralParser(const char *begin, const char *end,

- SourceLocation Loc, Preprocessor &PP) {

+ SourceLocation Loc, Preprocessor &PP,

+ tok::TokenKind kind) {

// At this point we know that the character matches the regex "L?'.*'".

HadError = false;

- // Determine if this is a wide character.

- IsWide = begin[0] == 'L';

- if (IsWide) ++begin;

+ Kind = kind;

+ // Determine if this is a wide or UTF character.

+ if (Kind == tok::wide_char_constant || Kind == tok::utf16_char_constant ||

+ Kind == tok::utf32_char_constant) {

+ ++begin;

+ }

// Skip over the entry quote.

assert(begin[0] == '\'' && "Invalid token lexed");

@@ -730,8 +781,9 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,

// Is this a Universal Character Name escape?

if (begin[0] != '\\') // If this is a normal character, consume it.

- ResultChar = *begin++;

+ ResultChar = (unsigned char)*begin++;

else { // Otherwise, this is an escape character.

+ unsigned CharWidth = getCharWidth(Kind, PP.getTargetInfo());

// Check for UCN.

if (begin[1] == 'u' || begin[1] == 'U') {

uint32_t utf32 = 0;

@@ -742,19 +794,22 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,

HadError = 1;

}

ResultChar = utf32;

+ if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {

+ PP.Diag(Loc, diag::warn_ucn_escape_too_large);

+ ResultChar &= ~0U >> (32-CharWidth);

+ }

} else {

// Otherwise, this is a non-UCN escape character. Process it.

ResultChar = ProcessCharEscape(begin, end, HadError,

FullSourceLoc(Loc,PP.getSourceManager()),

- IsWide,

- &PP.getDiagnostics(), PP.getTargetInfo());

+ CharWidth, &PP.getDiagnostics());

}

// If this is a multi-character constant (e.g. 'abc'), handle it. These are

// implementation defined (C99 6.4.4.4p10).

if (NumCharsSoFar) {

- if (IsWide) {

+ if (!isAscii()) {

// Emulate GCC's (unintentional?) behavior: L'ab' -> L'b'.

LitVal = 0;

} else {

@@ -776,8 +831,8 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,

if (NumCharsSoFar > 1) {

// Warn about discarding the top bits for multi-char wide-character

// constants (L'abcd').

- if (IsWide)

- PP.Diag(Loc, diag::warn_extraneous_wide_char_constant);

+ if (!isAscii())

+ PP.Diag(Loc, diag::warn_extraneous_char_constant);

else if (NumCharsSoFar != 4)

PP.Diag(Loc, diag::ext_multichar_character_literal);

else

@@ -789,47 +844,62 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,

// Transfer the value from APInt to uint64_t

Value = LitVal.getZExtValue();

- if (IsWide && PP.getLangOptions().ShortWChar && Value > 0xFFFF)

- PP.Diag(Loc, diag::warn_ucn_escape_too_large);

// If this is a single narrow character, sign extend it (e.g. '\xFF' is "-1")

// if 'char' is signed for this target (C99 6.4.4.4p10). Note that multiple

// character constants are not sign extended in the this implementation:

// '\xFF\xFF' = 65536 and '\x0\xFF' = 255, which matches GCC.

- if (!IsWide && NumCharsSoFar == 1 && (Value & 128) &&

+ if (isAscii() && NumCharsSoFar == 1 && (Value & 128) &&

PP.getLangOptions().CharIsSigned)

Value = (signed char)Value;

}

-/// string-literal: [C99 6.4.5]

-/// " [s-char-sequence] "

-/// L" [s-char-sequence] "

+/// string-literal: [C++0x lex.string]

+/// encoding-prefix " [s-char-sequence] "

+/// encoding-prefix R raw-string

+/// encoding-prefix:

+/// u8

+/// u

+/// U

+/// L

/// s-char-sequence:

/// s-char

/// s-char-sequence s-char

/// s-char:

-/// any source character except the double quote ",

-/// backslash \, or newline character

-/// escape-character

-/// universal-character-name

-/// escape-character: [C99 6.4.4.4]

-/// \ escape-code

+/// any member of the source character set except the double-quote ",

+/// backslash \, or new-line character

+/// escape-sequence

/// universal-character-name

-/// escape-code:

-/// character-escape-code

-/// octal-escape-code

-/// hex-escape-code

-/// character-escape-code: one of

-/// n t b r f v a

-/// \ ' " ?

-/// octal-escape-code:

-/// octal-digit

-/// octal-digit octal-digit

-/// octal-digit octal-digit octal-digit

-/// hex-escape-code:

-/// x hex-digit

-/// hex-escape-code hex-digit

+/// raw-string:

+/// " d-char-sequence ( r-char-sequence ) d-char-sequence "

+/// r-char-sequence:

+/// r-char

+/// r-char-sequence r-char

+/// r-char:

+/// any member of the source character set, except a right parenthesis )

+/// followed by the initial d-char-sequence (which may be empty)

+/// followed by a double quote ".

+/// d-char-sequence:

+/// d-char

+/// d-char-sequence d-char

+/// d-char:

+/// any member of the basic source character set except:

+/// space, the left parenthesis (, the right parenthesis ),

+/// the backslash \, and the control characters representing horizontal

+/// tab, vertical tab, form feed, and newline.

+/// escape-sequence: [C++0x lex.ccon]

+/// simple-escape-sequence

+/// octal-escape-sequence

+/// hexadecimal-escape-sequence

+/// simple-escape-sequence:

+/// one of \' \" \? \\ \a \b \f \n \r \t \v

+/// octal-escape-sequence:

+/// \ octal-digit

+/// \ octal-digit octal-digit

+/// \ octal-digit octal-digit octal-digit

+/// hexadecimal-escape-sequence:

+/// \x hexadecimal-digit

+/// hexadecimal-escape-sequence hexadecimal-digit

/// universal-character-name:

/// \u hex-quad

/// \U hex-quad hex-quad

@@ -841,8 +911,8 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks,

Preprocessor &PP, bool Complain)

: SM(PP.getSourceManager()), Features(PP.getLangOptions()),

Target(PP.getTargetInfo()), Diags(Complain ? &PP.getDiagnostics() : 0),

- MaxTokenLength(0), SizeBound(0), wchar_tByteWidth(0),

- ResultPtr(ResultBuf.data()), hadError(false), AnyWide(false), Pascal(false) {

+ MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),

+ ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {

init(StringToks, NumStringToks);

}

@@ -862,7 +932,7 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){

MaxTokenLength = StringToks[0].getLength();

assert(StringToks[0].getLength() >= 2 && "literal token is invalid!");

SizeBound = StringToks[0].getLength()-2; // -2 for "".

- AnyWide = StringToks[0].is(tok::wide_string_literal);

+ Kind = StringToks[0].getKind();

hadError = false;

@@ -883,8 +953,18 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){

if (StringToks[i].getLength() > MaxTokenLength)

MaxTokenLength = StringToks[i].getLength();

- // Remember if we see any wide strings.

- AnyWide |= StringToks[i].is(tok::wide_string_literal);

+ // Remember if we see any wide or utf-8/16/32 strings.

+ // Also check for illegal concatenations.

+ if (StringToks[i].isNot(Kind) && StringToks[i].isNot(tok::string_literal)) {

+ if (isAscii()) {

+ Kind = StringToks[i].getKind();

+ } else {

+ if (Diags)

+ Diags->Report(FullSourceLoc(StringToks[i].getLocation(), SM),

+ diag::err_unsupported_string_concat);

+ hadError = true;

+ }

}

// Include space for the null terminator.

@@ -892,19 +972,14 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){

// TODO: K&R warning: "traditional C rejects string constant concatenation"

- // Get the width in bytes of wchar_t. If no wchar_t strings are used, do not

- // query the target. As such, wchar_tByteWidth is only valid if AnyWide=true.

- wchar_tByteWidth = ~0U;

- if (AnyWide) {

- wchar_tByteWidth = Target.getWCharWidth();

- assert((wchar_tByteWidth & 7) == 0 && "Assumes wchar_t is byte multiple!");

- wchar_tByteWidth /= 8;

- }

+ // Get the width in bytes of char/wchar_t/char16_t/char32_t

+ CharByteWidth = getCharWidth(Kind, Target);

+ assert((CharByteWidth & 7) == 0 && "Assumes character size is byte multiple");

+ CharByteWidth /= 8;

// The output buffer size needs to be large enough to hold wide characters.

// This is a worst-case assumption which basically corresponds to L"" "long".

- if (AnyWide)

- SizeBound *= wchar_tByteWidth;

+ SizeBound *= CharByteWidth;

// Size the temporary buffer to hold the result string data.

ResultBuf.resize(SizeBound);

@@ -929,78 +1004,82 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){

Lexer::getSpelling(StringToks[i], ThisTokBuf, SM, Features,

&StringInvalid);

if (StringInvalid) {

- hadError = 1;

+ hadError = true;

continue;

}

const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1; // Skip end quote.

- bool wide = false;

// TODO: Input character set mapping support.

- // Skip L marker for wide strings.

- if (ThisTokBuf[0] == 'L') {

- wide = true;

+ // Skip marker for wide or unicode strings.

+ if (ThisTokBuf[0] == 'L' || ThisTokBuf[0] == 'u' || ThisTokBuf[0] == 'U') {

++ThisTokBuf;

+ // Skip 8 of u8 marker for utf8 strings.

+ if (ThisTokBuf[0] == '8')

+ ++ThisTokBuf;

}

- assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");

- ++ThisTokBuf;

- // Check if this is a pascal string

- if (Features.PascalStrings && ThisTokBuf + 1 != ThisTokEnd &&

- ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') {

+ // Check for raw string

+ if (ThisTokBuf[0] == 'R') {

+ ThisTokBuf += 2; // skip R"

- // If the \p sequence is found in the first token, we have a pascal string

- // Otherwise, if we already have a pascal string, ignore the first \p

- if (i == 0) {

+ const char *Prefix = ThisTokBuf;

+ while (ThisTokBuf[0] != '(')

++ThisTokBuf;

- Pascal = true;

- } else if (Pascal)

- ThisTokBuf += 2;

- }

+ ++ThisTokBuf; // skip '('

+ // remove same number of characters from the end

+ if (ThisTokEnd >= ThisTokBuf + (ThisTokBuf - Prefix))

+ ThisTokEnd -= (ThisTokBuf - Prefix);

+ // Copy the string over

+ CopyStringFragment(StringRef(ThisTokBuf, ThisTokEnd - ThisTokBuf));

+ } else {

+ assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");

+ ++ThisTokBuf; // skip "

+ // Check if this is a pascal string

+ if (Features.PascalStrings && ThisTokBuf + 1 != ThisTokEnd &&

+ ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') {

- while (ThisTokBuf != ThisTokEnd) {

- // Is this a span of non-escape characters?

- if (ThisTokBuf[0] != '\\') {

- const char *InStart = ThisTokBuf;

- do {

+ // If the \p sequence is found in the first token, we have a pascal string

+ // Otherwise, if we already have a pascal string, ignore the first \p

+ if (i == 0) {

++ThisTokBuf;

- } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');

- // Copy the character span over.

- unsigned Len = ThisTokBuf-InStart;

- if (!AnyWide) {

- memcpy(ResultPtr, InStart, Len);

- ResultPtr += Len;

- } else {

- // Note: our internal rep of wide char tokens is always little-endian.

- for (; Len; --Len, ++InStart) {

- *ResultPtr++ = InStart[0];

- // Add zeros at the end.

- for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)

- *ResultPtr++ = 0;

- }

- continue;

- }

- // Is this a Universal Character Name escape?

- if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') {

- EncodeUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr,

- hadError, FullSourceLoc(StringToks[i].getLocation(),SM),

- wide, Diags, Features);

- continue;

+ Pascal = true;

+ } else if (Pascal)

+ ThisTokBuf += 2;

}

- // Otherwise, this is a non-UCN escape character. Process it.

- unsigned ResultChar =

- ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError,

- FullSourceLoc(StringToks[i].getLocation(), SM),

- AnyWide, Diags, Target);

- // Note: our internal rep of wide char tokens is always little-endian.

- *ResultPtr++ = ResultChar & 0xFF;

+ while (ThisTokBuf != ThisTokEnd) {

+ // Is this a span of non-escape characters?

+ if (ThisTokBuf[0] != '\\') {

+ const char *InStart = ThisTokBuf;

+ do {

+ ++ThisTokBuf;

+ } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');

+ // Copy the character span over.

+ CopyStringFragment(StringRef(InStart, ThisTokBuf - InStart));

+ continue;

+ }

+ // Is this a Universal Character Name escape?

+ if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') {

+ EncodeUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr,

+ hadError, FullSourceLoc(StringToks[i].getLocation(),SM),

+ CharByteWidth, Diags, Features);

+ continue;

+ }

+ // Otherwise, this is a non-UCN escape character. Process it.

+ unsigned ResultChar =

+ ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError,

+ FullSourceLoc(StringToks[i].getLocation(), SM),

+ CharByteWidth*8, Diags);

+ // Note: our internal rep of wide char tokens is always little-endian.

+ *ResultPtr++ = ResultChar & 0xFF;

- if (AnyWide) {

- for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)

+ for (unsigned i = 1, e = CharByteWidth; i != e; ++i)

*ResultPtr++ = ResultChar >> i*8;

}

@@ -1008,8 +1087,7 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){

if (Pascal) {

ResultBuf[0] = ResultPtr-&ResultBuf[0]-1;

- if (AnyWide)

- ResultBuf[0] /= wchar_tByteWidth;

+ ResultBuf[0] /= CharByteWidth;

// Verify that pascal strings aren't too large.

if (GetStringLength() > 256) {

@@ -1018,7 +1096,7 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){

diag::err_pascal_string_too_long)

<< SourceRange(StringToks[0].getLocation(),

StringToks[NumStringToks-1].getLocation());

- hadError = 1;

+ hadError = true;

return;

}

} else if (Diags) {

@@ -1036,6 +1114,25 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){

}

+/// copyStringFragment - This function copies from Start to End into ResultPtr.

+/// Performs widening for multi-byte characters.

+void StringLiteralParser::CopyStringFragment(StringRef Fragment) {

+ // Copy the character span over.

+ if (CharByteWidth == 1) {

+ memcpy(ResultPtr, Fragment.data(), Fragment.size());

+ ResultPtr += Fragment.size();

+ } else {

+ // Note: our internal rep of wide char tokens is always little-endian.

+ for (StringRef::iterator I=Fragment.begin(), E=Fragment.end(); I!=E; ++I) {

+ *ResultPtr++ = *I;

+ // Add zeros at the end.

+ for (unsigned i = 1, e = CharByteWidth; i != e; ++i)

+ *ResultPtr++ = 0;

+ }

/// getOffsetOfStringByte - This function returns the offset of the

/// specified byte of the string data represented by Token. This handles

/// advancing over escape sequences in the string.

@@ -1052,7 +1149,8 @@ unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok,

if (StringInvalid)

return 0;

- assert(SpellingPtr[0] != 'L' && "Doesn't handle wide strings yet");

+ assert(SpellingPtr[0] != 'L' && SpellingPtr[0] != 'u' &&

+ SpellingPtr[0] != 'U' && "Doesn't handle wide or utf strings yet");

const char *SpellingStart = SpellingPtr;

@@ -1077,7 +1175,7 @@ unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok,

bool HadError = false;

ProcessCharEscape(SpellingPtr, SpellingEnd, HadError,

FullSourceLoc(Tok.getLocation(), SM),

- false, Diags, Target);

+ CharByteWidth*8, Diags);

assert(!HadError && "This method isn't valid on erroneous strings");

--ByteNo;

}

diff --git a/lib/Lex/MacroArgs.cpp b/lib/Lex/MacroArgs.cpp
index 968c15e3c27b..1846d1c05e30 100644
--- a/lib/Lex/MacroArgs.cpp
+++ b/lib/Lex/MacroArgs.cpp

@@ -15,13 +15,15 @@

#include "clang/Lex/MacroInfo.h"

#include "clang/Lex/Preprocessor.h"

#include "clang/Lex/LexDiagnostic.h"

+#include <algorithm>

using namespace clang;

/// MacroArgs ctor function - This destroys the vector passed in.

MacroArgs *MacroArgs::create(const MacroInfo *MI,

- const Token *UnexpArgTokens,

- unsigned NumToks, bool VarargsElided,

- Preprocessor &PP) {

+ llvm::ArrayRef<Token> UnexpArgTokens,

+ bool VarargsElided, Preprocessor &PP) {

assert(MI->isFunctionLike() &&

"Can't have args for an object-like macro!");

MacroArgs **ResultEnt = 0;

@@ -31,12 +33,12 @@ MacroArgs *MacroArgs::create(const MacroInfo *MI,

// free list. If so, reuse it.

for (MacroArgs **Entry = &PP.MacroArgCache; *Entry;

Entry = &(*Entry)->ArgCache)

- if ((*Entry)->NumUnexpArgTokens >= NumToks &&

+ if ((*Entry)->NumUnexpArgTokens >= UnexpArgTokens.size() &&

(*Entry)->NumUnexpArgTokens < ClosestMatch) {

ResultEnt = Entry;

// If we have an exact match, use it.

- if ((*Entry)->NumUnexpArgTokens == NumToks)

+ if ((*Entry)->NumUnexpArgTokens == UnexpArgTokens.size())

break;

// Otherwise, use the best fit.

ClosestMatch = (*Entry)->NumUnexpArgTokens;

@@ -45,21 +47,22 @@ MacroArgs *MacroArgs::create(const MacroInfo *MI,

MacroArgs *Result;

if (ResultEnt == 0) {

// Allocate memory for a MacroArgs object with the lexer tokens at the end.

- Result = (MacroArgs*)malloc(sizeof(MacroArgs) + NumToks*sizeof(Token));

+ Result = (MacroArgs*)malloc(sizeof(MacroArgs) +

+ UnexpArgTokens.size() * sizeof(Token));

// Construct the MacroArgs object.

- new (Result) MacroArgs(NumToks, VarargsElided);

+ new (Result) MacroArgs(UnexpArgTokens.size(), VarargsElided);

} else {

Result = *ResultEnt;

// Unlink this node from the preprocessors singly linked list.

*ResultEnt = Result->ArgCache;

- Result->NumUnexpArgTokens = NumToks;

+ Result->NumUnexpArgTokens = UnexpArgTokens.size();

Result->VarargsElided = VarargsElided;

}

// Copy the actual unexpanded tokens to immediately after the result ptr.

- if (NumToks)

- memcpy(const_cast<Token*>(Result->getUnexpArgument(0)),

- UnexpArgTokens, NumToks*sizeof(Token));

+ if (!UnexpArgTokens.empty())

+ std::copy(UnexpArgTokens.begin(), UnexpArgTokens.end(),

+ const_cast<Token*>(Result->getUnexpArgument(0)));

return Result;

}

@@ -186,7 +189,8 @@ MacroArgs::getPreExpArgument(unsigned Arg, const MacroInfo *MI,

///

Token MacroArgs::StringifyArgument(const Token *ArgToks,

Preprocessor &PP, bool Charify,

- SourceLocation hashInstLoc) {

+ SourceLocation ExpansionLocStart,

+ SourceLocation ExpansionLocEnd) {

Token Tok;

Tok.startToken();

Tok.setKind(Charify ? tok::char_constant : tok::string_literal);

@@ -208,13 +212,21 @@ Token MacroArgs::StringifyArgument(const Token *ArgToks,

// by 6.10.3.2p2.

if (Tok.is(tok::string_literal) || // "foo"

Tok.is(tok::wide_string_literal) || // L"foo"

- Tok.is(tok::char_constant)) { // 'x' and L'x'.

+ Tok.is(tok::utf8_string_literal) || // u8"foo"

+ Tok.is(tok::utf16_string_literal) || // u"foo"

+ Tok.is(tok::utf32_string_literal) || // U"foo"

+ Tok.is(tok::char_constant) || // 'x'

+ Tok.is(tok::wide_char_constant) || // L'x'.

+ Tok.is(tok::utf16_char_constant) || // u'x'.

+ Tok.is(tok::utf32_char_constant)) { // U'x'.

bool Invalid = false;

std::string TokStr = PP.getSpelling(Tok, &Invalid);

if (!Invalid) {

std::string Str = Lexer::Stringify(TokStr);

Result.append(Str.begin(), Str.end());

}

+ } else if (Tok.is(tok::code_completion)) {

+ PP.CodeCompleteNaturalLanguage();

} else {

// Otherwise, just append the token. Do some gymnastics to get the token

// in place and avoid copies where possible.

@@ -274,7 +286,8 @@ Token MacroArgs::StringifyArgument(const Token *ArgToks,

}

- PP.CreateString(&Result[0], Result.size(), Tok, hashInstLoc);

+ PP.CreateString(&Result[0], Result.size(), Tok,

+ ExpansionLocStart, ExpansionLocEnd);

return Tok;

}

@@ -282,7 +295,8 @@ Token MacroArgs::StringifyArgument(const Token *ArgToks,

/// that has been 'stringified' as required by the # operator.

const Token &MacroArgs::getStringifiedArgument(unsigned ArgNo,

Preprocessor &PP,

- SourceLocation hashInstLoc) {

+ SourceLocation ExpansionLocStart,

+ SourceLocation ExpansionLocEnd) {

assert(ArgNo < NumUnexpArgTokens && "Invalid argument number!");

if (StringifiedArgs.empty()) {

StringifiedArgs.resize(getNumArguments());

@@ -291,6 +305,8 @@ const Token &MacroArgs::getStringifiedArgument(unsigned ArgNo,

}

if (StringifiedArgs[ArgNo].isNot(tok::string_literal))

StringifiedArgs[ArgNo] = StringifyArgument(getUnexpArgument(ArgNo), PP,

- /*Charify=*/false, hashInstLoc);

+ /*Charify=*/false,

+ ExpansionLocStart,

+ ExpansionLocEnd);

return StringifiedArgs[ArgNo];

}

diff --git a/lib/Lex/MacroArgs.h b/lib/Lex/MacroArgs.h
index a962dacf7c93..cf86d710adb7 100644
--- a/lib/Lex/MacroArgs.h
+++ b/lib/Lex/MacroArgs.h

@@ -14,6 +14,8 @@

#ifndef LLVM_CLANG_MACROARGS_H

#define LLVM_CLANG_MACROARGS_H

+#include "llvm/ADT/ArrayRef.h"

#include <vector>

namespace clang {

@@ -58,9 +60,8 @@ public:

/// MacroArgs ctor function - Create a new MacroArgs object with the specified

/// macro and argument info.

static MacroArgs *create(const MacroInfo *MI,

- const Token *UnexpArgTokens,

- unsigned NumArgTokens, bool VarargsElided,

- Preprocessor &PP);

+ llvm::ArrayRef<Token> UnexpArgTokens,

+ bool VarargsElided, Preprocessor &PP);

/// destroy - Destroy and deallocate the memory for this object.

///

@@ -88,7 +89,8 @@ public:

/// getStringifiedArgument - Compute, cache, and return the specified argument

/// that has been 'stringified' as required by the # operator.

const Token &getStringifiedArgument(unsigned ArgNo, Preprocessor &PP,

- SourceLocation hashInstLoc);

+ SourceLocation ExpansionLocStart,

+ SourceLocation ExpansionLocEnd);

/// getNumArguments - Return the number of arguments passed into this macro

/// invocation.

@@ -109,7 +111,8 @@ public:

///

static Token StringifyArgument(const Token *ArgToks,

Preprocessor &PP, bool Charify,

- SourceLocation hashInstLoc);

+ SourceLocation ExpansionLocStart,

+ SourceLocation ExpansionLocEnd);

/// deallocate - This should only be called by the Preprocessor when managing

diff --git a/lib/Lex/MacroInfo.cpp b/lib/Lex/MacroInfo.cpp
index 0a16a2567219..5a7af5639830 100644
--- a/lib/Lex/MacroInfo.cpp
+++ b/lib/Lex/MacroInfo.cpp

@@ -21,6 +21,7 @@ MacroInfo::MacroInfo(SourceLocation DefLoc) : Location(DefLoc) {

IsGNUVarargs = false;

IsBuiltinMacro = false;

IsFromAST = false;

+ ChangedAfterLoad = false;

IsDisabled = false;

IsUsed = false;

IsAllowRedefinitionsWithoutWarning = false;

@@ -40,6 +41,7 @@ MacroInfo::MacroInfo(const MacroInfo &MI, llvm::BumpPtrAllocator &PPAllocator) {

IsGNUVarargs = MI.IsGNUVarargs;

IsBuiltinMacro = MI.IsBuiltinMacro;

IsFromAST = MI.IsFromAST;

+ ChangedAfterLoad = MI.ChangedAfterLoad;

IsDisabled = MI.IsDisabled;

IsUsed = MI.IsUsed;

IsAllowRedefinitionsWithoutWarning = MI.IsAllowRedefinitionsWithoutWarning;

@@ -68,9 +70,9 @@ unsigned MacroInfo::getDefinitionLengthSlow(SourceManager &SM) const {

assert((macroEnd.isFileID() || lastToken.is(tok::comment)) &&

"Macro defined in macro?");

std::pair<FileID, unsigned>

- startInfo = SM.getDecomposedInstantiationLoc(macroStart);

+ startInfo = SM.getDecomposedExpansionLoc(macroStart);

std::pair<FileID, unsigned>

- endInfo = SM.getDecomposedInstantiationLoc(macroEnd);

+ endInfo = SM.getDecomposedExpansionLoc(macroEnd);

assert(startInfo.first == endInfo.first &&

"Macro definition spanning multiple FileIDs ?");

assert(startInfo.second <= endInfo.second);

diff --git a/lib/Lex/PPCaching.cpp b/lib/Lex/PPCaching.cpp
index 33106591c3ba..986341b98668 100644
--- a/lib/Lex/PPCaching.cpp
+++ b/lib/Lex/PPCaching.cpp

@@ -74,6 +74,8 @@ void Preprocessor::EnterCachingLexMode() {

return;

PushIncludeMacroStack();

+ if (CurLexerKind != CLK_LexAfterModuleImport)

+ CurLexerKind = CLK_CachingLexer;

}

diff --git a/lib/Lex/PPDirectives.cpp b/lib/Lex/PPDirectives.cpp
index 4af5fabe5c80..de50c750e4d6 100644
--- a/lib/Lex/PPDirectives.cpp
+++ b/lib/Lex/PPDirectives.cpp

@@ -17,6 +17,7 @@

#include "clang/Lex/MacroInfo.h"

#include "clang/Lex/LexDiagnostic.h"

#include "clang/Lex/CodeCompletionHandler.h"

+#include "clang/Lex/ModuleLoader.h"

#include "clang/Lex/Pragma.h"

#include "clang/Basic/FileManager.h"

#include "clang/Basic/SourceManager.h"

@@ -102,8 +103,8 @@ void Preprocessor::ReadMacroName(Token &MacroNameTok, char isDefineUndef) {

if (MacroNameTok.is(tok::code_completion)) {

if (CodeComplete)

CodeComplete->CodeCompleteMacroName(isDefineUndef == 1);

+ setCodeCompletionReached();

LexUnexpandedToken(MacroNameTok);

- return;

}

// Missing macro name?

@@ -192,7 +193,8 @@ void Preprocessor::CheckEndOfDirective(const char *DirType, bool EnableMacros) {

/// the first valid token.

void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,

bool FoundNonSkipPortion,

- bool FoundElse) {

+ bool FoundElse,

+ SourceLocation ElseLoc) {

++NumSkipped;

assert(CurTokenLexer == 0 && CurPPLexer && "Lexing a macro, not a file?");

@@ -214,6 +216,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,

if (Tok.is(tok::code_completion)) {

if (CodeComplete)

CodeComplete->CodeCompleteInConditionalExclusion();

+ setCodeCompletionReached();

continue;

}

@@ -222,7 +225,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,

// Emit errors for each unterminated conditional on the stack, including

// the current one.

while (!CurPPLexer->ConditionalStack.empty()) {

- if (!isCodeCompletionFile(Tok.getLocation()))

+ if (CurLexer->getFileLoc() != CodeCompletionFileLoc)

Diag(CurPPLexer->ConditionalStack.back().IfLoc,

diag::err_pp_unterminated_conditional);

CurPPLexer->ConditionalStack.pop_back();

@@ -275,9 +278,9 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,

// that we can't use Tok.getIdentifierInfo() because its lookup is disabled

// when skipping.

char DirectiveBuf[20];

- llvm::StringRef Directive;

+ StringRef Directive;

if (!Tok.needsCleaning() && Tok.getLength() < 20) {

- Directive = llvm::StringRef(RawCharData, Tok.getLength());

+ Directive = StringRef(RawCharData, Tok.getLength());

} else {

std::string DirectiveStr = getSpelling(Tok);

unsigned IdLen = DirectiveStr.size();

@@ -288,11 +291,11 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,

continue;

}

memcpy(DirectiveBuf, &DirectiveStr[0], IdLen);

- Directive = llvm::StringRef(DirectiveBuf, IdLen);

+ Directive = StringRef(DirectiveBuf, IdLen);

}

if (Directive.startswith("if")) {

- llvm::StringRef Sub = Directive.substr(2);

+ StringRef Sub = Directive.substr(2);

if (Sub.empty() || // "if"

Sub == "def" || // "ifdef"

Sub == "ndef") { // "ifndef"

@@ -307,7 +310,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,

Callbacks->Endif();

}

} else if (Directive[0] == 'e') {

- llvm::StringRef Sub = Directive.substr(1);

+ StringRef Sub = Directive.substr(1);

if (Sub == "ndif") { // "endif"

CheckEndOfDirective("endif");

PPConditionalInfo CondInfo;

@@ -387,6 +390,11 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,

// of the file, just stop skipping and return to lexing whatever came after

// the #if block.

CurPPLexer->LexingRawMode = false;

+ if (Callbacks) {

+ SourceLocation BeginLoc = ElseLoc.isValid() ? ElseLoc : IfTokenLoc;

+ Callbacks->SourceRangeSkipped(SourceRange(BeginLoc, Tok.getLocation()));

+ }

}

void Preprocessor::PTHSkipExcludedConditionalBlock() {

@@ -472,12 +480,13 @@ void Preprocessor::PTHSkipExcludedConditionalBlock() {

/// return null on failure. isAngled indicates whether the file reference is

/// for system #include's or not (i.e. using <> instead of "").

const FileEntry *Preprocessor::LookupFile(

- llvm::StringRef Filename,

+ StringRef Filename,

bool isAngled,

const DirectoryLookup *FromDir,

const DirectoryLookup *&CurDir,

- llvm::SmallVectorImpl<char> *SearchPath,

- llvm::SmallVectorImpl<char> *RelativePath) {

+ SmallVectorImpl<char> *SearchPath,

+ SmallVectorImpl<char> *RelativePath,

+ StringRef *SuggestedModule) {

// If the header lookup mechanism may be relative to the current file, pass in

// info about where the current file is.

const FileEntry *CurFileEnt = 0;

@@ -501,12 +510,13 @@ const FileEntry *Preprocessor::LookupFile(

CurDir = CurDirLookup;

const FileEntry *FE = HeaderInfo.LookupFile(

Filename, isAngled, FromDir, CurDir, CurFileEnt,

- SearchPath, RelativePath);

+ SearchPath, RelativePath, SuggestedModule);

if (FE) return FE;

// Otherwise, see if this is a subframework header. If so, this is relative

// to one of the headers on the #include stack. Walk the list of the current

// headers on the #include stack and pass them to HeaderInfo.

+ // FIXME: SuggestedModule!

if (IsFileLexer()) {

if ((CurFileEnt = SourceMgr.getFileEntryForID(CurPPLexer->getFileID())))

if ((FE = HeaderInfo.LookupSubframeworkHeader(Filename, CurFileEnt,

@@ -581,6 +591,7 @@ TryAgain:

if (CodeComplete)

CodeComplete->CodeCompleteDirective(

CurPPLexer->getConditionalStackDepth() > 0);

+ setCodeCompletionReached();

return;

case tok::numeric_constant: // # 7 GNU line marker directive.

if (getLangOptions().AsmPreprocessor)

@@ -652,6 +663,9 @@ TryAgain:

case tok::pp_unassert:

//isExtension = true; // FIXME: implement #unassert

break;

+ case tok::pp___export_macro__:

+ return HandleMacroExportDirective(Result);

}

break;

}

@@ -758,9 +772,13 @@ void Preprocessor::HandleLineDirective(Token &Tok) {

// Enforce C99 6.10.4p3: "The digit sequence shall not specify ... a

// number greater than 2147483647". C90 requires that the line # be <= 32767.

- unsigned LineLimit = Features.C99 ? 2147483648U : 32768U;

+ unsigned LineLimit = 32768U;

+ if (Features.C99 || Features.CPlusPlus0x)

+ LineLimit = 2147483648U;

if (LineNo >= LineLimit)

Diag(DigitTok, diag::ext_pp_line_too_big) << LineLimit;

+ else if (Features.CPlusPlus0x && LineNo >= 32768U)

+ Diag(DigitTok, diag::warn_cxx98_compat_pp_line_too_big);

int FilenameID = -1;

Token StrTok;

@@ -777,7 +795,7 @@ void Preprocessor::HandleLineDirective(Token &Tok) {

} else {

// Parse and validate the string, converting it into a unique ID.

StringLiteralParser Literal(&StrTok, 1, *this);

- assert(!Literal.AnyWide && "Didn't allow wide strings in");

+ assert(Literal.isAscii() && "Didn't allow wide strings in");

if (Literal.hadError)

return DiscardUntilEndOfDirective();

if (Literal.Pascal) {

@@ -825,7 +843,7 @@ static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit,

// If we are leaving the current presumed file, check to make sure the

// presumed include stack isn't empty!

FileID CurFileID =

- SM.getDecomposedInstantiationLoc(FlagTok.getLocation()).first;

+ SM.getDecomposedExpansionLoc(FlagTok.getLocation()).first;

PresumedLoc PLoc = SM.getPresumedLoc(FlagTok.getLocation());

if (PLoc.isInvalid())

return true;

@@ -834,7 +852,7 @@ static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit,

// different physical file, then we aren't in a "1" line marker flag region.

SourceLocation IncLoc = PLoc.getIncludeLoc();

if (IncLoc.isInvalid() ||

- SM.getDecomposedInstantiationLoc(IncLoc).first != CurFileID) {

+ SM.getDecomposedExpansionLoc(IncLoc).first != CurFileID) {

PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_pop);

PP.DiscardUntilEndOfDirective();

return true;

@@ -910,7 +928,7 @@ void Preprocessor::HandleDigitDirective(Token &DigitTok) {

} else {

// Parse and validate the string, converting it into a unique ID.

StringLiteralParser Literal(&StrTok, 1, *this);

- assert(!Literal.AnyWide && "Didn't allow wide strings in");

+ assert(Literal.isAscii() && "Didn't allow wide strings in");

if (Literal.hadError)

return DiscardUntilEndOfDirective();

if (Literal.Pascal) {

@@ -1000,6 +1018,37 @@ void Preprocessor::HandleIdentSCCSDirective(Token &Tok) {

}

+/// \brief Handle a #__export_macro__ directive.

+void Preprocessor::HandleMacroExportDirective(Token &Tok) {

+ Token MacroNameTok;

+ ReadMacroName(MacroNameTok, 2);

+ // Error reading macro name? If so, diagnostic already issued.

+ if (MacroNameTok.is(tok::eod))

+ return;

+ // Check to see if this is the last token on the #__export_macro__ line.

+ CheckEndOfDirective("__export_macro__");

+ // Okay, we finally have a valid identifier to undef.

+ MacroInfo *MI = getMacroInfo(MacroNameTok.getIdentifierInfo());

+ // If the macro is not defined, this is an error.

+ if (MI == 0) {

+ Diag(MacroNameTok, diag::err_pp_export_non_macro)

+ << MacroNameTok.getIdentifierInfo();

+ return;

+ }

+ // Note that this macro has now been exported.

+ MI->setExportLocation(MacroNameTok.getLocation());

+ // If this macro definition came from a PCH file, mark it

+ // as having changed since serialization.

+ if (MI->isFromAST())

+ MI->setChangedAfterLoad();

//===----------------------------------------------------------------------===//

// Preprocessor Include Directive Handling.

//===----------------------------------------------------------------------===//

@@ -1011,7 +1060,7 @@ void Preprocessor::HandleIdentSCCSDirective(Token &Tok) {

/// spelling of the filename, but is also expected to handle the case when

/// this method decides to use a different buffer.

bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc,

- llvm::StringRef &Buffer) {

+ StringRef &Buffer) {

// Get the text form of the filename.

assert(!Buffer.empty() && "Can't have tokens with empty spellings!");

@@ -1020,27 +1069,27 @@ bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc,

if (Buffer[0] == '<') {

if (Buffer.back() != '>') {

Diag(Loc, diag::err_pp_expects_filename);

- Buffer = llvm::StringRef();

+ Buffer = StringRef();

return true;

}

isAngled = true;

} else if (Buffer[0] == '"') {

if (Buffer.back() != '"') {

Diag(Loc, diag::err_pp_expects_filename);

- Buffer = llvm::StringRef();

+ Buffer = StringRef();

return true;

}

isAngled = false;

} else {

Diag(Loc, diag::err_pp_expects_filename);

- Buffer = llvm::StringRef();

+ Buffer = StringRef();

return true;

}

// Diagnose #include "" as invalid.

if (Buffer.size() <= 2) {

Diag(Loc, diag::err_pp_empty_filename);

- Buffer = llvm::StringRef();

+ Buffer = StringRef();

return true;

}

@@ -1070,6 +1119,7 @@ bool Preprocessor::ConcatenateIncludeName(

// FIXME: Provide code completion for #includes.

if (CurTok.is(tok::code_completion)) {

+ setCodeCompletionReached();

Lex(CurTok);

continue;

}

@@ -1122,7 +1172,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,

// Reserve a buffer to get the spelling.

llvm::SmallString<128> FilenameBuffer;

- llvm::StringRef Filename;

+ StringRef Filename;

SourceLocation End;

switch (FilenameTok.getKind()) {

@@ -1171,23 +1221,44 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,

return;

}

+ // Complain about attempts to #include files in an audit pragma.

+ if (PragmaARCCFCodeAuditedLoc.isValid()) {

+ Diag(HashLoc, diag::err_pp_include_in_arc_cf_code_audited);

+ Diag(PragmaARCCFCodeAuditedLoc, diag::note_pragma_entered_here);

+ // Immediately leave the pragma.

+ PragmaARCCFCodeAuditedLoc = SourceLocation();

+ }

// Search include directories.

const DirectoryLookup *CurDir;

llvm::SmallString<1024> SearchPath;

llvm::SmallString<1024> RelativePath;

// We get the raw path only if we have 'Callbacks' to which we later pass

// the path.

+ StringRef SuggestedModule;

const FileEntry *File = LookupFile(

Filename, isAngled, LookupFrom, CurDir,

- Callbacks ? &SearchPath : NULL, Callbacks ? &RelativePath : NULL);

+ Callbacks ? &SearchPath : NULL, Callbacks ? &RelativePath : NULL,

+ AutoModuleImport? &SuggestedModule : 0);

+ // If we are supposed to import a module rather than including the header,

+ // do so now.

+ if (!SuggestedModule.empty()) {

+ TheModuleLoader.loadModule(IncludeTok.getLocation(),

+ Identifiers.get(SuggestedModule),

+ FilenameTok.getLocation());

+ return;

+ }

// Notify the callback object that we've seen an inclusion directive.

if (Callbacks)

Callbacks->InclusionDirective(HashLoc, IncludeTok, Filename, isAngled, File,

End, SearchPath, RelativePath);

if (File == 0) {

- Diag(FilenameTok, diag::warn_pp_file_not_found) << Filename;

+ if (!SuppressIncludeNotFoundError)

+ Diag(FilenameTok, diag::err_pp_file_not_found) << Filename;

return;

}

@@ -1284,7 +1355,7 @@ void Preprocessor::HandleIncludeMacrosDirective(SourceLocation HashLoc,

/// closing ), updating MI with what we learn. Return true if an error occurs

/// parsing the arg list.

bool Preprocessor::ReadMacroDefinitionArgList(MacroInfo *MI) {

- llvm::SmallVector<IdentifierInfo*, 32> Arguments;

+ SmallVector<IdentifierInfo*, 32> Arguments;

Token Tok;

while (1) {

@@ -1298,8 +1369,10 @@ bool Preprocessor::ReadMacroDefinitionArgList(MacroInfo *MI) {

Diag(Tok, diag::err_pp_expected_ident_in_arg_list);

return true;

case tok::ellipsis: // #define X(... -> C99 varargs

- // Warn if use of C99 feature in non-C99 mode.

- if (!Features.C99) Diag(Tok, diag::ext_variadic_macro);

+ if (!Features.C99)

+ Diag(Tok, Features.CPlusPlus0x ?

+ diag::warn_cxx98_compat_variadic_macro :

+ diag::ext_variadic_macro);

// Lex the token after the identifier.

LexUnexpandedToken(Tok);

@@ -1423,7 +1496,7 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) {

// Read the first token after the arg list for down below.

LexUnexpandedToken(Tok);

- } else if (Features.C99) {

+ } else if (Features.C99 || Features.CPlusPlus0x) {

// C99 requires whitespace between the macro definition and the body. Emit

// a diagnostic for something like "#define X+".

Diag(Tok, diag::ext_c99_whitespace_required_after_macro_name);

@@ -1564,7 +1637,7 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) {

// warn-because-unused-macro set. If it gets used it will be removed from set.

if (isInPrimaryFile() && // don't warn for include'd macros.

Diags->getDiagnosticLevel(diag::pp_macro_not_used,

- MI->getDefinitionLoc()) != Diagnostic::Ignored) {

+ MI->getDefinitionLoc()) != DiagnosticsEngine::Ignored) {

MI->setIsWarnIfUnused(true);

WarnUnusedMacroLocs.insert(MI->getDefinitionLoc());

}

@@ -1765,7 +1838,7 @@ void Preprocessor::HandleElseDirective(Token &Result) {

// Finally, skip the rest of the contents of this block.

SkipExcludedConditionalBlock(CI.IfLoc, /*Foundnonskip*/true,

- /*FoundElse*/true);

+ /*FoundElse*/true, Result.getLocation());

if (Callbacks)

Callbacks->Else();

@@ -1798,7 +1871,8 @@ void Preprocessor::HandleElifDirective(Token &ElifToken) {

// Finally, skip the rest of the contents of this block.

SkipExcludedConditionalBlock(CI.IfLoc, /*Foundnonskip*/true,

- /*FoundElse*/CI.FoundElse);

+ /*FoundElse*/CI.FoundElse,

+ ElifToken.getLocation());

if (Callbacks)

Callbacks->Elif(SourceRange(ConditionalBegin, ConditionalEnd));

diff --git a/lib/Lex/PPExpressions.cpp b/lib/Lex/PPExpressions.cpp
index 8fcfc70a7c67..20f624a0bb12 100644
--- a/lib/Lex/PPExpressions.cpp
+++ b/lib/Lex/PPExpressions.cpp

@@ -23,6 +23,7 @@

#include "clang/Basic/TargetInfo.h"

#include "clang/Lex/LexDiagnostic.h"

#include "llvm/ADT/APSInt.h"

+#include "llvm/Support/ErrorHandling.h"

using namespace clang;

namespace {

@@ -83,20 +84,21 @@ static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT,

Result.setBegin(PeekTok.getLocation());

// Get the next token, don't expand it.

- PP.LexUnexpandedToken(PeekTok);

+ PP.LexUnexpandedNonComment(PeekTok);

// Two options, it can either be a pp-identifier or a (.

SourceLocation LParenLoc;

if (PeekTok.is(tok::l_paren)) {

// Found a paren, remember we saw it and skip it.

LParenLoc = PeekTok.getLocation();

- PP.LexUnexpandedToken(PeekTok);

+ PP.LexUnexpandedNonComment(PeekTok);

}

if (PeekTok.is(tok::code_completion)) {

if (PP.getCodeCompletionHandler())

PP.getCodeCompletionHandler()->CodeCompleteMacroName(false);

- PP.LexUnexpandedToken(PeekTok);

+ PP.setCodeCompletionReached();

+ PP.LexUnexpandedNonComment(PeekTok);

}

// If we don't have a pp-identifier now, this is an error.

@@ -115,12 +117,16 @@ static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT,

PP.markMacroAsUsed(Macro);

}

- // Consume identifier.

- Result.setEnd(PeekTok.getLocation());

- PP.LexUnexpandedToken(PeekTok);

+ // Invoke the 'defined' callback.

+ if (PPCallbacks *Callbacks = PP.getPPCallbacks())

+ Callbacks->Defined(PeekTok);

// If we are in parens, ensure we have a trailing ).

if (LParenLoc.isValid()) {

+ // Consume identifier.

+ Result.setEnd(PeekTok.getLocation());

+ PP.LexUnexpandedNonComment(PeekTok);

if (PeekTok.isNot(tok::r_paren)) {

PP.Diag(PeekTok.getLocation(), diag::err_pp_missing_rparen) << "defined";

PP.Diag(LParenLoc, diag::note_matching) << "(";

@@ -129,6 +135,10 @@ static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT,

// Consume the ).

Result.setEnd(PeekTok.getLocation());

PP.LexNonComment(PeekTok);

+ } else {

+ // Consume identifier.

+ Result.setEnd(PeekTok.getLocation());

+ PP.LexNonComment(PeekTok);

}

// Success, remember that we saw defined(X).

@@ -152,7 +162,8 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,

if (PeekTok.is(tok::code_completion)) {

if (PP.getCodeCompletionHandler())

PP.getCodeCompletionHandler()->CodeCompletePreprocessorExpression();

- PP.LexUnexpandedToken(PeekTok);

+ PP.setCodeCompletionReached();

+ PP.LexNonComment(PeekTok);

}

// If this token's spelling is a pp-identifier, check to see if it is

@@ -188,7 +199,7 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,

case tok::numeric_constant: {

llvm::SmallString<64> IntegerBuffer;

bool NumberInvalid = false;

- llvm::StringRef Spelling = PP.getSpelling(PeekTok, IntegerBuffer,

+ StringRef Spelling = PP.getSpelling(PeekTok, IntegerBuffer,

&NumberInvalid);

if (NumberInvalid)

return true; // a diagnostic was already reported

@@ -205,9 +216,9 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,

assert(Literal.isIntegerLiteral() && "Unknown ppnumber");

// long long is a C99 feature.

- if (!PP.getLangOptions().C99 && !PP.getLangOptions().CPlusPlus0x

- && Literal.isLongLong)

- PP.Diag(PeekTok, diag::ext_longlong);

+ if (!PP.getLangOptions().C99 && Literal.isLongLong)

+ PP.Diag(PeekTok, PP.getLangOptions().CPlusPlus0x ?

+ diag::warn_cxx98_compat_longlong : diag::ext_longlong);

// Parse the integer literal into Result.

if (Literal.GetIntegerValue(Result.Val)) {

@@ -236,15 +247,18 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,

PP.LexNonComment(PeekTok);

return false;

}

- case tok::char_constant: { // 'x'

+ case tok::char_constant: // 'x'

+ case tok::wide_char_constant: { // L'x'

+ case tok::utf16_char_constant: // u'x'

+ case tok::utf32_char_constant: // U'x'

llvm::SmallString<32> CharBuffer;

bool CharInvalid = false;

- llvm::StringRef ThisTok = PP.getSpelling(PeekTok, CharBuffer, &CharInvalid);

+ StringRef ThisTok = PP.getSpelling(PeekTok, CharBuffer, &CharInvalid);

if (CharInvalid)

return true;

CharLiteralParser Literal(ThisTok.begin(), ThisTok.end(),

- PeekTok.getLocation(), PP);

+ PeekTok.getLocation(), PP, PeekTok.getKind());

if (Literal.hadError())

return true; // A diagnostic was already emitted.

@@ -255,6 +269,10 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,

NumBits = TI.getIntWidth();

else if (Literal.isWide())

NumBits = TI.getWCharWidth();

+ else if (Literal.isUTF16())

+ NumBits = TI.getChar16Width();

+ else if (Literal.isUTF32())

+ NumBits = TI.getChar32Width();

else

NumBits = TI.getCharWidth();

@@ -262,8 +280,9 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,

llvm::APSInt Val(NumBits);

// Set the value.

Val = Literal.getValue();

- // Set the signedness.

- Val.setIsUnsigned(!PP.getLangOptions().CharIsSigned);

+ // Set the signedness. UTF-16 and UTF-32 are always unsigned

+ if (!Literal.isUTF16() && !Literal.isUTF32())

+ Val.setIsUnsigned(!PP.getLangOptions().CharIsSigned);

if (Result.Val.getBitWidth() > Val.getBitWidth()) {

Result.Val = Val.extend(Result.Val.getBitWidth());

@@ -521,7 +540,7 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec,

bool Overflow = false;

switch (Operator) {

- default: assert(0 && "Unknown operator token!");

+ default: llvm_unreachable("Unknown operator token!");

case tok::percent:

if (RHS.Val != 0)

Res = LHS.Val % RHS.Val;

@@ -704,7 +723,7 @@ EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {

// Peek ahead one token.

Token Tok;

- Lex(Tok);

+ LexNonComment(Tok);

// C99 6.10.1p3 - All expressions are evaluated as intmax_t or uintmax_t.

unsigned BitWidth = getTargetInfo().getIntMaxTWidth();

@@ -759,4 +778,3 @@ EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {

DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;

return ResVal.Val != 0;

}

diff --git a/lib/Lex/PPLexerChange.cpp b/lib/Lex/PPLexerChange.cpp
index bf28199b888a..25a98ae47b6e 100644
--- a/lib/Lex/PPLexerChange.cpp
+++ b/lib/Lex/PPLexerChange.cpp

@@ -89,7 +89,14 @@ void Preprocessor::EnterSourceFile(FileID FID, const DirectoryLookup *CurDir,

<< std::string(SourceMgr.getBufferName(FileStart)) << "";

return;

}

+ if (isCodeCompletionEnabled() &&

+ SourceMgr.getFileEntryForID(FID) == CodeCompletionFile) {

+ CodeCompletionFileLoc = SourceMgr.getLocForStartOfFile(FID);

+ CodeCompletionLoc =

+ CodeCompletionFileLoc.getLocWithOffset(CodeCompletionOffset);

+ }

EnterSourceFileWithLexer(new Lexer(FID, InputFile, *this), CurDir);

return;

}

@@ -106,7 +113,9 @@ void Preprocessor::EnterSourceFileWithLexer(Lexer *TheLexer,

CurLexer.reset(TheLexer);

CurPPLexer = TheLexer;

CurDirLookup = CurDir;

+ if (CurLexerKind != CLK_LexAfterModuleImport)

+ CurLexerKind = CLK_Lexer;

// Notify the client, if desired, that we are in a new source file.

if (Callbacks && !CurLexer->Is_PragmaLexer) {

SrcMgr::CharacteristicKind FileType =

@@ -128,7 +137,9 @@ void Preprocessor::EnterSourceFileWithPTH(PTHLexer *PL,

CurDirLookup = CurDir;

CurPTHLexer.reset(PL);

CurPPLexer = CurPTHLexer.get();

+ if (CurLexerKind != CLK_LexAfterModuleImport)

+ CurLexerKind = CLK_PTHLexer;

// Notify the client, if desired, that we are in a new source file.

if (Callbacks) {

FileID FID = CurPPLexer->getFileID();

@@ -152,6 +163,8 @@ void Preprocessor::EnterMacro(Token &Tok, SourceLocation ILEnd,

CurTokenLexer.reset(TokenLexerCache[--NumCachedTokenLexers]);

CurTokenLexer->Init(Tok, ILEnd, Args);

}

+ if (CurLexerKind != CLK_LexAfterModuleImport)

+ CurLexerKind = CLK_TokenLexer;

}

/// EnterTokenStream - Add a "macro" context to the top of the include stack,

@@ -181,6 +194,8 @@ void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks,

CurTokenLexer.reset(TokenLexerCache[--NumCachedTokenLexers]);

CurTokenLexer->Init(Toks, NumToks, DisableMacroExpansion, OwnsTokens);

}

+ if (CurLexerKind != CLK_LexAfterModuleImport)

+ CurLexerKind = CLK_TokenLexer;

}

/// HandleEndOfFile - This callback is invoked when the lexer hits the end of

@@ -201,9 +216,50 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) {

}

+ // Complain about reaching an EOF within arc_cf_code_audited.

+ if (PragmaARCCFCodeAuditedLoc.isValid()) {

+ Diag(PragmaARCCFCodeAuditedLoc, diag::err_pp_eof_in_arc_cf_code_audited);

+ // Recover by leaving immediately.

+ PragmaARCCFCodeAuditedLoc = SourceLocation();

+ }

// If this is a #include'd file, pop it off the include stack and continue

// lexing the #includer file.

if (!IncludeMacroStack.empty()) {

+ // If we lexed the code-completion file, act as if we reached EOF.

+ if (isCodeCompletionEnabled() && CurPPLexer &&

+ SourceMgr.getLocForStartOfFile(CurPPLexer->getFileID()) ==

+ CodeCompletionFileLoc) {

+ if (CurLexer) {

+ Result.startToken();

+ CurLexer->FormTokenWithChars(Result, CurLexer->BufferEnd, tok::eof);

+ CurLexer.reset();

+ } else {

+ assert(CurPTHLexer && "Got EOF but no current lexer set!");

+ CurPTHLexer->getEOF(Result);

+ CurPTHLexer.reset();

+ }

+ CurPPLexer = 0;

+ return true;

+ }

+ if (!isEndOfMacro && CurPPLexer &&

+ SourceMgr.getIncludeLoc(CurPPLexer->getFileID()).isValid()) {

+ // Notify SourceManager to record the number of FileIDs that were created

+ // during lexing of the #include'd file.

+ unsigned NumFIDs =

+ SourceMgr.local_sloc_entry_size() -

+ CurPPLexer->getInitialNumSLocEntries() + 1/*#include'd file*/;

+ SourceMgr.setNumCreatedFIDsForFileID(CurPPLexer->getFileID(), NumFIDs);

+ }

+ FileID ExitedFID;

+ if (Callbacks && !isEndOfMacro && CurPPLexer)

+ ExitedFID = CurPPLexer->getFileID();

// We're done with the #included file.

RemoveTopOfLexerStack();

@@ -212,7 +268,7 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) {

SrcMgr::CharacteristicKind FileType =

SourceMgr.getFileCharacteristic(CurPPLexer->getSourceLocation());

Callbacks->FileChanged(CurPPLexer->getSourceLocation(),

- PPCallbacks::ExitFile, FileType);

+ PPCallbacks::ExitFile, FileType, ExitedFID);

}

// Client should lex another token.

diff --git a/lib/Lex/PPMacroExpansion.cpp b/lib/Lex/PPMacroExpansion.cpp
index ecd4d4cfc68b..e10c95c75f25 100644
--- a/lib/Lex/PPMacroExpansion.cpp
+++ b/lib/Lex/PPMacroExpansion.cpp

@@ -21,10 +21,12 @@

#include "clang/Lex/LexDiagnostic.h"

#include "clang/Lex/CodeCompletionHandler.h"

#include "clang/Lex/ExternalPreprocessorSource.h"

+#include "clang/Lex/LiteralSupport.h"

#include "llvm/ADT/StringSwitch.h"

#include "llvm/ADT/STLExtras.h"

#include "llvm/Config/config.h"

#include "llvm/Support/raw_ostream.h"

+#include "llvm/Support/ErrorHandling.h"

#include <cstdio>

#include <ctime>

using namespace clang;

@@ -91,9 +93,10 @@ void Preprocessor::RegisterBuiltinMacros() {

Ident__has_attribute = RegisterBuiltinMacro(*this, "__has_attribute");

Ident__has_include = RegisterBuiltinMacro(*this, "__has_include");

Ident__has_include_next = RegisterBuiltinMacro(*this, "__has_include_next");

+ Ident__has_warning = RegisterBuiltinMacro(*this, "__has_warning");

// Microsoft Extensions.

- if (Features.Microsoft)

+ if (Features.MicrosoftExt)

Ident__pragma = RegisterBuiltinMacro(*this, "__pragma");

else

Ident__pragma = 0;

@@ -185,7 +188,8 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,

// If this is a builtin macro, like __LINE__ or _Pragma, handle it specially.

if (MI->isBuiltinMacro()) {

- if (Callbacks) Callbacks->MacroExpands(Identifier, MI);

+ if (Callbacks) Callbacks->MacroExpands(Identifier, MI,

+ Identifier.getLocation());

ExpandBuiltinMacro(Identifier);

return false;

}

@@ -226,13 +230,14 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,

// Notice that this macro has been used.

markMacroAsUsed(MI);

- if (Callbacks) Callbacks->MacroExpands(Identifier, MI);

- // If we started lexing a macro, enter the macro expansion body.

// Remember where the token is expanded.

SourceLocation ExpandLoc = Identifier.getLocation();

+ if (Callbacks) Callbacks->MacroExpands(Identifier, MI,

+ SourceRange(ExpandLoc, ExpansionEnd));

+ // If we started lexing a macro, enter the macro expansion body.

// If this macro expands to no tokens, don't bother to push it onto the

// expansion stack, only to take it right back off.

if (MI->getNumTokens() == 0) {

@@ -255,7 +260,6 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,

if (HadLeadingSpace) Identifier.setFlag(Token::LeadingSpace);

}

Identifier.setFlag(Token::LeadingEmptyMacro);

- LastEmptyMacroExpansionLoc = ExpandLoc;

++NumFastMacroExpanded;

return false;

@@ -284,8 +288,8 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,

// Update the tokens location to include both its expansion and physical

// locations.

SourceLocation Loc =

- SourceMgr.createInstantiationLoc(Identifier.getLocation(), ExpandLoc,

- ExpansionEnd,Identifier.getLength());

+ SourceMgr.createExpansionLoc(Identifier.getLocation(), ExpandLoc,

+ ExpansionEnd,Identifier.getLength());

Identifier.setLocation(Loc);

// If this is a disabled macro or #define X X, we must mark the result as

@@ -333,7 +337,7 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,

// ArgTokens - Build up a list of tokens that make up each argument. Each

// argument is separated by an EOF token. Use a SmallVector so we can avoid

// heap allocations in the common case.

- llvm::SmallVector<Token, 64> ArgTokens;

+ SmallVector<Token, 64> ArgTokens;

unsigned NumActuals = 0;

while (Tok.isNot(tok::r_paren)) {

@@ -352,13 +356,6 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,

// an argument value in a macro could expand to ',' or '(' or ')'.

LexUnexpandedToken(Tok);

- if (Tok.is(tok::code_completion)) {

- if (CodeComplete)

- CodeComplete->CodeCompleteMacroArgument(MacroName.getIdentifierInfo(),

- MI, NumActuals);

- LexUnexpandedToken(Tok);

- }

if (Tok.is(tok::eof) || Tok.is(tok::eod)) { // "#if f(<eof>" & "#if f(\n"

Diag(MacroName, diag::err_unterm_macro_invoc);

// Do not lose the EOF/EOD. Return it to the client.

@@ -393,7 +390,15 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,

if (MacroInfo *MI = getMacroInfo(Tok.getIdentifierInfo()))

if (!MI->isEnabled())

Tok.setFlag(Token::DisableExpand);

+ } else if (Tok.is(tok::code_completion)) {

+ if (CodeComplete)

+ CodeComplete->CodeCompleteMacroArgument(MacroName.getIdentifierInfo(),

+ MI, NumActuals);

+ // Don't mark that we reached the code-completion point because the

+ // parser is going to handle the token and there will be another

+ // code-completion callback.

}

ArgTokens.push_back(Tok);

}

@@ -416,8 +421,10 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,

// Empty arguments are standard in C99 and C++0x, and are supported as an extension in

// other modes.

- if (ArgTokens.size() == ArgTokenStart && !Features.C99 && !Features.CPlusPlus0x)

- Diag(Tok, diag::ext_empty_fnmacro_arg);

+ if (ArgTokens.size() == ArgTokenStart && !Features.C99)

+ Diag(Tok, Features.CPlusPlus0x ?

+ diag::warn_cxx98_compat_empty_fnmacro_arg :

+ diag::ext_empty_fnmacro_arg);

// Add a marker EOF token to the end of the token list for this argument.

Token EOFTok;

@@ -487,8 +494,7 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,

return 0;

}

- return MacroArgs::create(MI, ArgTokens.data(), ArgTokens.size(),

- isVarargsElided, *this);

+ return MacroArgs::create(MI, ArgTokens, isVarargsElided, *this);

}

/// \brief Keeps macro expanded tokens for TokenLexers.

@@ -497,7 +503,7 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,

/// going to lex in the cache and when it finishes the tokens are removed

/// from the end of the cache.

Token *Preprocessor::cacheMacroExpandedTokens(TokenLexer *tokLexer,

- llvm::ArrayRef<Token> tokens) {

+ ArrayRef<Token> tokens) {

assert(tokLexer);

if (tokens.empty())

return 0;

@@ -597,34 +603,48 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {

.Case("objc_arc", LangOpts.ObjCAutoRefCount)

.Case("objc_arc_weak", LangOpts.ObjCAutoRefCount &&

LangOpts.ObjCRuntimeHasWeak)

+ .Case("objc_fixed_enum", LangOpts.ObjC2)

+ .Case("objc_instancetype", LangOpts.ObjC2)

.Case("objc_nonfragile_abi", LangOpts.ObjCNonFragileABI)

.Case("objc_weak_class", LangOpts.ObjCNonFragileABI)

.Case("ownership_holds", true)

.Case("ownership_returns", true)

.Case("ownership_takes", true)

// C1X features

+ .Case("c_alignas", LangOpts.C1X)

.Case("c_generic_selections", LangOpts.C1X)

.Case("c_static_assert", LangOpts.C1X)

// C++0x features

.Case("cxx_access_control_sfinae", LangOpts.CPlusPlus0x)

.Case("cxx_alias_templates", LangOpts.CPlusPlus0x)

+ .Case("cxx_alignas", LangOpts.CPlusPlus0x)

.Case("cxx_attributes", LangOpts.CPlusPlus0x)

.Case("cxx_auto_type", LangOpts.CPlusPlus0x)

+ //.Case("cxx_constexpr", false);

.Case("cxx_decltype", LangOpts.CPlusPlus0x)

.Case("cxx_default_function_template_args", LangOpts.CPlusPlus0x)

.Case("cxx_delegating_constructors", LangOpts.CPlusPlus0x)

.Case("cxx_deleted_functions", LangOpts.CPlusPlus0x)

+ .Case("cxx_explicit_conversions", LangOpts.CPlusPlus0x)

+ //.Case("cxx_generalized_initializers", LangOpts.CPlusPlus0x)

+ .Case("cxx_implicit_moves", LangOpts.CPlusPlus0x)

+ //.Case("cxx_inheriting_constructors", false)

.Case("cxx_inline_namespaces", LangOpts.CPlusPlus0x)

//.Case("cxx_lambdas", false)

+ .Case("cxx_nonstatic_member_init", LangOpts.CPlusPlus0x)

.Case("cxx_noexcept", LangOpts.CPlusPlus0x)

.Case("cxx_nullptr", LangOpts.CPlusPlus0x)

.Case("cxx_override_control", LangOpts.CPlusPlus0x)

.Case("cxx_range_for", LangOpts.CPlusPlus0x)

+ //.Case("cxx_raw_string_literals", false)

.Case("cxx_reference_qualified_functions", LangOpts.CPlusPlus0x)

.Case("cxx_rvalue_references", LangOpts.CPlusPlus0x)

.Case("cxx_strong_enums", LangOpts.CPlusPlus0x)

.Case("cxx_static_assert", LangOpts.CPlusPlus0x)

.Case("cxx_trailing_return", LangOpts.CPlusPlus0x)

+ //.Case("cxx_unicode_literals", false)

+ //.Case("cxx_unrestricted_unions", false)

+ //.Case("cxx_user_literals", false)

.Case("cxx_variadic_templates", LangOpts.CPlusPlus0x)

// Type traits

.Case("has_nothrow_assign", LangOpts.CPlusPlus)

@@ -639,16 +659,31 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {

.Case("is_base_of", LangOpts.CPlusPlus)

.Case("is_class", LangOpts.CPlusPlus)

.Case("is_convertible_to", LangOpts.CPlusPlus)

- .Case("is_empty", LangOpts.CPlusPlus)

+ // __is_empty is available only if the horrible

+ // "struct __is_empty" parsing hack hasn't been needed in this

+ // translation unit. If it has, __is_empty reverts to a normal

+ // identifier and __has_feature(is_empty) evaluates false.

+ .Case("is_empty",

+ LangOpts.CPlusPlus &&

+ PP.getIdentifierInfo("__is_empty")->getTokenID()

+ != tok::identifier)

.Case("is_enum", LangOpts.CPlusPlus)

.Case("is_literal", LangOpts.CPlusPlus)

.Case("is_standard_layout", LangOpts.CPlusPlus)

- .Case("is_pod", LangOpts.CPlusPlus)

+ // __is_pod is available only if the horrible

+ // "struct __is_pod" parsing hack hasn't been needed in this

+ // translation unit. If it has, __is_pod reverts to a normal

+ // identifier and __has_feature(is_pod) evaluates false.

+ .Case("is_pod",

+ LangOpts.CPlusPlus &&

+ PP.getIdentifierInfo("__is_pod")->getTokenID()

+ != tok::identifier)

.Case("is_polymorphic", LangOpts.CPlusPlus)

.Case("is_trivial", LangOpts.CPlusPlus)

.Case("is_trivially_copyable", LangOpts.CPlusPlus)

.Case("is_union", LangOpts.CPlusPlus)

.Case("tls", PP.getTargetInfo().isTLSSupported())

+ .Case("underlying_type", LangOpts.CPlusPlus)

.Default(false);

}

@@ -661,7 +696,8 @@ static bool HasExtension(const Preprocessor &PP, const IdentifierInfo *II) {

// If the use of an extension results in an error diagnostic, extensions are

// effectively unavailable, so just return false here.

- if (PP.getDiagnostics().getExtensionHandlingBehavior()==Diagnostic::Ext_Error)

+ if (PP.getDiagnostics().getExtensionHandlingBehavior() ==

+ DiagnosticsEngine::Ext_Error)

return false;

const LangOptions &LangOpts = PP.getLangOptions();

@@ -670,12 +706,16 @@ static bool HasExtension(const Preprocessor &PP, const IdentifierInfo *II) {

// must be less restrictive than HasFeature's.

return llvm::StringSwitch<bool>(II->getName())

// C1X features supported by other languages as extensions.

+ .Case("c_alignas", true)

.Case("c_generic_selections", true)

.Case("c_static_assert", true)

// C++0x features supported by other languages as extensions.

.Case("cxx_deleted_functions", LangOpts.CPlusPlus)

+ .Case("cxx_explicit_conversions", LangOpts.CPlusPlus)

.Case("cxx_inline_namespaces", LangOpts.CPlusPlus)

+ .Case("cxx_nonstatic_member_init", LangOpts.CPlusPlus)

.Case("cxx_override_control", LangOpts.CPlusPlus)

+ .Case("cxx_range_for", LangOpts.CPlusPlus)

.Case("cxx_reference_qualified_functions", LangOpts.CPlusPlus)

.Case("cxx_rvalue_references", LangOpts.CPlusPlus)

.Default(false);

@@ -714,7 +754,7 @@ static bool EvaluateHasIncludeCommon(Token &Tok,

// Reserve a buffer to get the spelling.

llvm::SmallString<128> FilenameBuffer;

- llvm::StringRef Filename;

+ StringRef Filename;

SourceLocation EndLoc;

switch (Tok.getKind()) {

@@ -753,7 +793,7 @@ static bool EvaluateHasIncludeCommon(Token &Tok,

// Search include directories.

const DirectoryLookup *CurDir;

const FileEntry *File =

- PP.LookupFile(Filename, isAngled, LookupFrom, CurDir, NULL, NULL);

+ PP.LookupFile(Filename, isAngled, LookupFrom, CurDir, NULL, NULL, NULL);

// Get the result value. Result = true means the file exists.

bool Result = File != 0;

@@ -837,7 +877,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {

// can matter for a function-like macro that expands to contain __LINE__.

// Skip down through expansion points until we find a file loc for the

// end of the expansion history.

- Loc = SourceMgr.getInstantiationRange(Loc).second;

+ Loc = SourceMgr.getExpansionRange(Loc).second;

PresumedLoc PLoc = SourceMgr.getPresumedLoc(Loc);

// __LINE__ expands to a simple numeric value.

@@ -874,18 +914,18 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {

ComputeDATE_TIME(DATELoc, TIMELoc, *this);

Tok.setKind(tok::string_literal);

Tok.setLength(strlen("\"Mmm dd yyyy\""));

- Tok.setLocation(SourceMgr.createInstantiationLoc(DATELoc, Tok.getLocation(),

- Tok.getLocation(),

- Tok.getLength()));

+ Tok.setLocation(SourceMgr.createExpansionLoc(DATELoc, Tok.getLocation(),

+ Tok.getLocation(),

+ Tok.getLength()));

return;

} else if (II == Ident__TIME__) {

if (!TIMELoc.isValid())

ComputeDATE_TIME(DATELoc, TIMELoc, *this);

Tok.setKind(tok::string_literal);

Tok.setLength(strlen("\"hh:mm:ss\""));

- Tok.setLocation(SourceMgr.createInstantiationLoc(TIMELoc, Tok.getLocation(),

- Tok.getLocation(),

- Tok.getLength()));

+ Tok.setLocation(SourceMgr.createExpansionLoc(TIMELoc, Tok.getLocation(),

+ Tok.getLocation(),

+ Tok.getLength()));

return;

} else if (II == Ident__INCLUDE_LEVEL__) {

// Compute the presumed include depth of this token. This can be affected

@@ -923,7 +963,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {

Result = "??? ??? ?? ??:??:?? ????\n";

}

// Surround the string with " and strip the trailing newline.

- OS << '"' << llvm::StringRef(Result, strlen(Result)-1) << '"';

+ OS << '"' << StringRef(Result, strlen(Result)-1) << '"';

Tok.setKind(tok::string_literal);

} else if (II == Ident__COUNTER__) {

// __COUNTER__ expands to a simple numeric value.

@@ -983,10 +1023,78 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {

Value = EvaluateHasIncludeNext(Tok, II, *this);

OS << (int)Value;

Tok.setKind(tok::numeric_constant);

+ } else if (II == Ident__has_warning) {

+ // The argument should be a parenthesized string literal.

+ // The argument to these builtins should be a parenthesized identifier.

+ SourceLocation StartLoc = Tok.getLocation();

+ bool IsValid = false;

+ bool Value = false;

+ // Read the '('.

+ Lex(Tok);

+ do {

+ if (Tok.is(tok::l_paren)) {

+ // Read the string.

+ Lex(Tok);

+ // We need at least one string literal.

+ if (!Tok.is(tok::string_literal)) {

+ StartLoc = Tok.getLocation();

+ IsValid = false;

+ // Eat tokens until ')'.

+ do Lex(Tok); while (!(Tok.is(tok::r_paren) || Tok.is(tok::eod)));

+ break;

+ }

+ // String concatenation allows multiple strings, which can even come

+ // from macro expansion.

+ SmallVector<Token, 4> StrToks;

+ while (Tok.is(tok::string_literal)) {

+ StrToks.push_back(Tok);

+ LexUnexpandedToken(Tok);

+ }

+ // Is the end a ')'?

+ if (!(IsValid = Tok.is(tok::r_paren)))

+ break;

+ // Concatenate and parse the strings.

+ StringLiteralParser Literal(&StrToks[0], StrToks.size(), *this);

+ assert(Literal.isAscii() && "Didn't allow wide strings in");

+ if (Literal.hadError)

+ break;

+ if (Literal.Pascal) {

+ Diag(Tok, diag::warn_pragma_diagnostic_invalid);

+ break;

+ }

+ StringRef WarningName(Literal.GetString());

+ if (WarningName.size() < 3 || WarningName[0] != '-' ||

+ WarningName[1] != 'W') {

+ Diag(StrToks[0].getLocation(), diag::warn_has_warning_invalid_option);

+ break;

+ }

+ // Finally, check if the warning flags maps to a diagnostic group.

+ // We construct a SmallVector here to talk to getDiagnosticIDs().

+ // Although we don't use the result, this isn't a hot path, and not

+ // worth special casing.

+ llvm::SmallVector<diag::kind, 10> Diags;

+ Value = !getDiagnostics().getDiagnosticIDs()->

+ getDiagnosticsInGroup(WarningName.substr(2), Diags);

+ }

+ } while (false);

+ if (!IsValid)

+ Diag(StartLoc, diag::err_warning_check_malformed);

+ OS << (int)Value;

+ Tok.setKind(tok::numeric_constant);

} else {

- assert(0 && "Unknown identifier!");

+ llvm_unreachable("Unknown identifier!");

}

- CreateString(OS.str().data(), OS.str().size(), Tok, Tok.getLocation());

+ CreateString(OS.str().data(), OS.str().size(), Tok,

+ Tok.getLocation(), Tok.getLocation());

}

void Preprocessor::markMacroAsUsed(MacroInfo *MI) {

diff --git a/lib/Lex/PTHLexer.cpp b/lib/Lex/PTHLexer.cpp
index e5ef0fdf20eb..e0c4cf0c16c8 100644
--- a/lib/Lex/PTHLexer.cpp
+++ b/lib/Lex/PTHLexer.cpp

@@ -73,7 +73,7 @@ LexNextToken:

Tok.setKind(TKind);

Tok.setFlag(TFlags);

assert(!LexingRawMode);

- Tok.setLocation(FileStartLoc.getFileLocWithOffset(FileOffset));

+ Tok.setLocation(FileStartLoc.getLocWithOffset(FileOffset));

Tok.setLength(Len);

// Handle identifiers.

@@ -147,7 +147,7 @@ bool PTHLexer::LexEndOfFile(Token &Result) {

// If we are in a #if directive, emit an error.

while (!ConditionalStack.empty()) {

- if (!PP->isCodeCompletionFile(FileStartLoc))

+ if (PP->getCodeCompletionFileLoc() != FileStartLoc)

PP->Diag(ConditionalStack.back().IfLoc,

diag::err_pp_unterminated_conditional);

ConditionalStack.pop_back();

@@ -297,7 +297,7 @@ SourceLocation PTHLexer::getSourceLocation() {

// NOTE: This is a virtual function; hence it is defined out-of-line.

const unsigned char *OffsetPtr = CurPtr + (DISK_TOKEN_SIZE - 4);

uint32_t Offset = ReadLE32(OffsetPtr);

- return FileStartLoc.getFileLocWithOffset(Offset);

+ return FileStartLoc.getLocWithOffset(Offset);

}

//===----------------------------------------------------------------------===//

@@ -380,7 +380,7 @@ public:

}

static unsigned ComputeHash(const internal_key_type& a) {

- return llvm::HashString(llvm::StringRef(a.first, a.second));

+ return llvm::HashString(StringRef(a.first, a.second));

}

// This hopefully will just get inlined and removed by the optimizer.

@@ -431,11 +431,12 @@ PTHManager::~PTHManager() {

free(PerIDCache);

}

-static void InvalidPTH(Diagnostic &Diags, const char *Msg) {

- Diags.Report(Diags.getCustomDiagID(Diagnostic::Error, Msg));

+static void InvalidPTH(DiagnosticsEngine &Diags, const char *Msg) {

+ Diags.Report(Diags.getCustomDiagID(DiagnosticsEngine::Error, Msg));

}

-PTHManager *PTHManager::Create(const std::string &file, Diagnostic &Diags) {

+PTHManager *PTHManager::Create(const std::string &file,

+ DiagnosticsEngine &Diags) {

// Memory map the PTH file.

llvm::OwningPtr<llvm::MemoryBuffer> File;

@@ -572,10 +573,10 @@ IdentifierInfo* PTHManager::LazilyCreateIdentifierInfo(unsigned PersistentID) {

return II;

}

-IdentifierInfo* PTHManager::get(llvm::StringRef Name) {

+IdentifierInfo* PTHManager::get(StringRef Name) {

PTHStringIdLookup& SL = *((PTHStringIdLookup*)StringIdLookup);

// Double check our assumption that the last character isn't '\0'.

- assert(Name.empty() || Name.data()[Name.size()-1] != '\0');

+ assert(Name.empty() || Name.back() != '\0');

PTHStringIdLookup::iterator I = SL.find(std::make_pair(Name.data(),

Name.size()));

if (I == SL.end()) // No identifier found?

diff --git a/lib/Lex/Pragma.cpp b/lib/Lex/Pragma.cpp
index e6b28c13317b..f6532c2175a1 100644
--- a/lib/Lex/Pragma.cpp
+++ b/lib/Lex/Pragma.cpp

@@ -54,11 +54,11 @@ PragmaNamespace::~PragmaNamespace() {

/// specified name. If not, return the handler for the null identifier if it

/// exists, otherwise return null. If IgnoreNull is true (the default) then

/// the null handler isn't returned on failure to match.

-PragmaHandler *PragmaNamespace::FindHandler(llvm::StringRef Name,

+PragmaHandler *PragmaNamespace::FindHandler(StringRef Name,

bool IgnoreNull) const {

if (PragmaHandler *Handler = Handlers.lookup(Name))

return Handler;

- return IgnoreNull ? 0 : Handlers.lookup(llvm::StringRef());

+ return IgnoreNull ? 0 : Handlers.lookup(StringRef());

}

void PragmaNamespace::AddPragma(PragmaHandler *Handler) {

@@ -85,7 +85,7 @@ void PragmaNamespace::HandlePragma(Preprocessor &PP,

// Get the handler for this token. If there is no handler, ignore the pragma.

PragmaHandler *Handler

= FindHandler(Tok.getIdentifierInfo() ? Tok.getIdentifierInfo()->getName()

- : llvm::StringRef(),

+ : StringRef(),

/*IgnoreNull=*/false);

if (Handler == 0) {

PP.Diag(Tok, diag::warn_pragma_ignored);

@@ -210,7 +210,7 @@ void Preprocessor::HandleMicrosoft__pragma(Token &Tok) {

}

// Get the tokens enclosed within the __pragma(), as well as the final ')'.

- llvm::SmallVector<Token, 32> PragmaToks;

+ SmallVector<Token, 32> PragmaToks;

int NumParens = 0;

Lex(Tok);

while (Tok.isNot(tok::eof)) {

@@ -353,7 +353,7 @@ void Preprocessor::HandlePragmaDependency(Token &DependencyTok) {

// Reserve a buffer to get the spelling.

llvm::SmallString<128> FilenameBuffer;

bool Invalid = false;

- llvm::StringRef Filename = getSpelling(FilenameTok, FilenameBuffer, &Invalid);

+ StringRef Filename = getSpelling(FilenameTok, FilenameBuffer, &Invalid);

if (Invalid)

return;

@@ -366,9 +366,11 @@ void Preprocessor::HandlePragmaDependency(Token &DependencyTok) {

// Search include directories for this file.

const DirectoryLookup *CurDir;

- const FileEntry *File = LookupFile(Filename, isAngled, 0, CurDir, NULL, NULL);

+ const FileEntry *File = LookupFile(Filename, isAngled, 0, CurDir, NULL, NULL,

+ NULL);

if (File == 0) {

- Diag(FilenameTok, diag::warn_pp_file_not_found) << Filename;

+ if (!SuppressIncludeNotFoundError)

+ Diag(FilenameTok, diag::err_pp_file_not_found) << Filename;

return;

}

@@ -436,7 +438,7 @@ void Preprocessor::HandlePragmaComment(Token &Tok) {

// String concatenation allows multiple strings, which can even come from

// macro expansion.

// "foo " "bar" "Baz"

- llvm::SmallVector<Token, 4> StrToks;

+ SmallVector<Token, 4> StrToks;

while (Tok.is(tok::string_literal)) {

StrToks.push_back(Tok);

Lex(Tok);

@@ -444,7 +446,7 @@ void Preprocessor::HandlePragmaComment(Token &Tok) {

// Concatenate and parse the strings.

StringLiteralParser Literal(&StrToks[0], StrToks.size(), *this);

- assert(!Literal.AnyWide && "Didn't allow wide strings in");

+ assert(Literal.isAscii() && "Didn't allow wide strings in");

if (Literal.hadError)

return;

if (Literal.Pascal) {

@@ -512,7 +514,7 @@ void Preprocessor::HandlePragmaMessage(Token &Tok) {

// String concatenation allows multiple strings, which can even come from

// macro expansion.

// "foo " "bar" "Baz"

- llvm::SmallVector<Token, 4> StrToks;

+ SmallVector<Token, 4> StrToks;

while (Tok.is(tok::string_literal)) {

StrToks.push_back(Tok);

Lex(Tok);

@@ -520,7 +522,7 @@ void Preprocessor::HandlePragmaMessage(Token &Tok) {

// Concatenate and parse the strings.

StringLiteralParser Literal(&StrToks[0], StrToks.size(), *this);

- assert(!Literal.AnyWide && "Didn't allow wide strings in");

+ assert(Literal.isAscii() && "Didn't allow wide strings in");

if (Literal.hadError)

return;

if (Literal.Pascal) {

@@ -528,7 +530,7 @@ void Preprocessor::HandlePragmaMessage(Token &Tok) {

return;

}

- llvm::StringRef MessageString(Literal.GetString());

+ StringRef MessageString(Literal.GetString());

if (ExpectClosingParen) {

if (Tok.isNot(tok::r_paren)) {

@@ -662,7 +664,7 @@ void Preprocessor::HandlePragmaPopMacro(Token &PopMacroTok) {

/// AddPragmaHandler - Add the specified pragma handler to the preprocessor.

/// If 'Namespace' is non-null, then it is a token required to exist on the

/// pragma line before the pragma string starts, e.g. "STDC" or "GCC".

-void Preprocessor::AddPragmaHandler(llvm::StringRef Namespace,

+void Preprocessor::AddPragmaHandler(StringRef Namespace,

PragmaHandler *Handler) {

PragmaNamespace *InsertNS = PragmaHandlers;

@@ -693,7 +695,7 @@ void Preprocessor::AddPragmaHandler(llvm::StringRef Namespace,

/// preprocessor. If \arg Namespace is non-null, then it should be the

/// namespace that \arg Handler was added to. It is an error to remove

/// a handler that has not been registered.

-void Preprocessor::RemovePragmaHandler(llvm::StringRef Namespace,

+void Preprocessor::RemovePragmaHandler(StringRef Namespace,

PragmaHandler *Handler) {

PragmaNamespace *NS = PragmaHandlers;

@@ -802,7 +804,7 @@ struct PragmaDebugHandler : public PragmaHandler {

IdentifierInfo *II = Tok.getIdentifierInfo();

if (II->isStr("assert")) {

- assert(0 && "This is an assertion!");

+ llvm_unreachable("This is an assertion!");

} else if (II->isStr("crash")) {

*(volatile int*) 0x11 = 0;

} else if (II->isStr("llvm_fatal_error")) {

@@ -889,7 +891,7 @@ public:

// String concatenation allows multiple strings, which can even come from

// macro expansion.

// "foo " "bar" "Baz"

- llvm::SmallVector<Token, 4> StrToks;

+ SmallVector<Token, 4> StrToks;

while (Tok.is(tok::string_literal)) {

StrToks.push_back(Tok);

PP.LexUnexpandedToken(Tok);

@@ -902,7 +904,7 @@ public:

// Concatenate and parse the strings.

StringLiteralParser Literal(&StrToks[0], StrToks.size(), PP);

- assert(!Literal.AnyWide && "Didn't allow wide strings in");

+ assert(Literal.isAscii() && "Didn't allow wide strings in");

if (Literal.hadError)

return;

if (Literal.Pascal) {

@@ -910,7 +912,7 @@ public:

return;

}

- llvm::StringRef WarningName(Literal.GetString());

+ StringRef WarningName(Literal.GetString());

if (WarningName.size() < 3 || WarningName[0] != '-' ||

WarningName[1] != 'W') {

@@ -1003,6 +1005,60 @@ struct PragmaSTDC_UnknownHandler : public PragmaHandler {

}

};

+/// PragmaARCCFCodeAuditedHandler -

+/// #pragma clang arc_cf_code_audited begin/end

+struct PragmaARCCFCodeAuditedHandler : public PragmaHandler {

+ PragmaARCCFCodeAuditedHandler() : PragmaHandler("arc_cf_code_audited") {}

+ virtual void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,

+ Token &NameTok) {

+ SourceLocation Loc = NameTok.getLocation();

+ bool IsBegin;

+ Token Tok;

+ // Lex the 'begin' or 'end'.

+ PP.LexUnexpandedToken(Tok);

+ const IdentifierInfo *BeginEnd = Tok.getIdentifierInfo();

+ if (BeginEnd && BeginEnd->isStr("begin")) {

+ IsBegin = true;

+ } else if (BeginEnd && BeginEnd->isStr("end")) {

+ IsBegin = false;

+ } else {

+ PP.Diag(Tok.getLocation(), diag::err_pp_arc_cf_code_audited_syntax);

+ return;

+ }

+ // Verify that this is followed by EOD.

+ PP.LexUnexpandedToken(Tok);

+ if (Tok.isNot(tok::eod))

+ PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma";

+ // The start location of the active audit.

+ SourceLocation BeginLoc = PP.getPragmaARCCFCodeAuditedLoc();

+ // The start location we want after processing this.

+ SourceLocation NewLoc;

+ if (IsBegin) {

+ // Complain about attempts to re-enter an audit.

+ if (BeginLoc.isValid()) {

+ PP.Diag(Loc, diag::err_pp_double_begin_of_arc_cf_code_audited);

+ PP.Diag(BeginLoc, diag::note_pragma_entered_here);

+ }

+ NewLoc = Loc;

+ } else {

+ // Complain about attempts to leave an audit that doesn't exist.

+ if (!BeginLoc.isValid()) {

+ PP.Diag(Loc, diag::err_pp_unmatched_end_of_arc_cf_code_audited);

+ return;

+ }

+ NewLoc = SourceLocation();

+ }

+ PP.setPragmaARCCFCodeAuditedLoc(NewLoc);

+ }

+};

} // end anonymous namespace

@@ -1026,13 +1082,14 @@ void Preprocessor::RegisterBuiltinPragmas() {

AddPragmaHandler("clang", new PragmaDebugHandler());

AddPragmaHandler("clang", new PragmaDependencyHandler());

AddPragmaHandler("clang", new PragmaDiagnosticHandler("clang"));

+ AddPragmaHandler("clang", new PragmaARCCFCodeAuditedHandler());

AddPragmaHandler("STDC", new PragmaSTDC_FENV_ACCESSHandler());

AddPragmaHandler("STDC", new PragmaSTDC_CX_LIMITED_RANGEHandler());

AddPragmaHandler("STDC", new PragmaSTDC_UnknownHandler());

// MS extensions.

- if (Features.Microsoft) {

+ if (Features.MicrosoftExt) {

AddPragmaHandler(new PragmaCommentHandler());

}

diff --git a/lib/Lex/PreprocessingRecord.cpp b/lib/Lex/PreprocessingRecord.cpp
index 9f93ab04502a..2816609d5f8f 100644
--- a/lib/Lex/PreprocessingRecord.cpp
+++ b/lib/Lex/PreprocessingRecord.cpp

@@ -14,8 +14,8 @@

#include "clang/Lex/PreprocessingRecord.h"

#include "clang/Lex/MacroInfo.h"

#include "clang/Lex/Token.h"

-#include "clang/Basic/IdentifierTable.h"

#include "llvm/Support/ErrorHandling.h"

+#include "llvm/Support/Capacity.h"

using namespace clang;

@@ -24,7 +24,7 @@ ExternalPreprocessingRecordSource::~ExternalPreprocessingRecordSource() { }

InclusionDirective::InclusionDirective(PreprocessingRecord &PPRec,

InclusionKind Kind,

- llvm::StringRef FileName,

+ StringRef FileName,

bool InQuotes, const FileEntry *File,

SourceRange Range)

: PreprocessingDirective(InclusionDirectiveKind, Range),

@@ -34,116 +34,254 @@ InclusionDirective::InclusionDirective(PreprocessingRecord &PPRec,

= (char*)PPRec.Allocate(FileName.size() + 1, llvm::alignOf<char>());

memcpy(Memory, FileName.data(), FileName.size());

Memory[FileName.size()] = 0;

- this->FileName = llvm::StringRef(Memory, FileName.size());

+ this->FileName = StringRef(Memory, FileName.size());

}

-void PreprocessingRecord::MaybeLoadPreallocatedEntities() const {

- if (!ExternalSource || LoadedPreallocatedEntities)

- return;

- LoadedPreallocatedEntities = true;

- ExternalSource->ReadPreprocessedEntities();

+PreprocessingRecord::PreprocessingRecord(SourceManager &SM,

+ bool IncludeNestedMacroExpansions)

+ : SourceMgr(SM), IncludeNestedMacroExpansions(IncludeNestedMacroExpansions),

+ ExternalSource(0)

}

-PreprocessingRecord::PreprocessingRecord(bool IncludeNestedMacroExpansions)

- : IncludeNestedMacroExpansions(IncludeNestedMacroExpansions),

- ExternalSource(0), NumPreallocatedEntities(0),

- LoadedPreallocatedEntities(false)

+/// \brief Returns a pair of [Begin, End) iterators of preprocessed entities

+/// that source range \arg R encompasses.

+std::pair<PreprocessingRecord::iterator, PreprocessingRecord::iterator>

+PreprocessingRecord::getPreprocessedEntitiesInRange(SourceRange Range) {

+ if (Range.isInvalid())

+ return std::make_pair(iterator(this, 0), iterator(this, 0));

+ assert(!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(),Range.getBegin()));

+ std::pair<unsigned, unsigned>

+ Local = findLocalPreprocessedEntitiesInRange(Range);

+ // Check if range spans local entities.

+ if (!ExternalSource || SourceMgr.isLocalSourceLocation(Range.getBegin()))

+ return std::make_pair(iterator(this, Local.first),

+ iterator(this, Local.second));

+ std::pair<unsigned, unsigned>

+ Loaded = ExternalSource->findPreprocessedEntitiesInRange(Range);

+ // Check if range spans local entities.

+ if (Loaded.first == Loaded.second)

+ return std::make_pair(iterator(this, Local.first),

+ iterator(this, Local.second));

+ unsigned TotalLoaded = LoadedPreprocessedEntities.size();

+ // Check if range spans loaded entities.

+ if (Local.first == Local.second)

+ return std::make_pair(iterator(this, int(Loaded.first)-TotalLoaded),

+ iterator(this, int(Loaded.second)-TotalLoaded));

+ // Range spands loaded and local entities.

+ return std::make_pair(iterator(this, int(Loaded.first)-TotalLoaded),

+ iterator(this, Local.second));

}

-PreprocessingRecord::iterator

-PreprocessingRecord::begin(bool OnlyLocalEntities) {

- if (OnlyLocalEntities)

- return PreprocessedEntities.begin() + NumPreallocatedEntities;

- MaybeLoadPreallocatedEntities();

- return PreprocessedEntities.begin();

+std::pair<unsigned, unsigned>

+PreprocessingRecord::findLocalPreprocessedEntitiesInRange(

+ SourceRange Range) const {

+ if (Range.isInvalid())

+ return std::make_pair(0,0);

+ assert(!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(),Range.getBegin()));

+ unsigned Begin = findBeginLocalPreprocessedEntity(Range.getBegin());

+ unsigned End = findEndLocalPreprocessedEntity(Range.getEnd());

+ return std::make_pair(Begin, End);

}

-PreprocessingRecord::iterator PreprocessingRecord::end(bool OnlyLocalEntities) {

- if (!OnlyLocalEntities)

- MaybeLoadPreallocatedEntities();

- return PreprocessedEntities.end();

+namespace {

+template <SourceLocation (SourceRange::*getRangeLoc)() const>

+struct PPEntityComp {

+ const SourceManager &SM;

+ explicit PPEntityComp(const SourceManager &SM) : SM(SM) { }

+ bool operator()(PreprocessedEntity *L, PreprocessedEntity *R) const {

+ SourceLocation LHS = getLoc(L);

+ SourceLocation RHS = getLoc(R);

+ return SM.isBeforeInTranslationUnit(LHS, RHS);

+ }

+ bool operator()(PreprocessedEntity *L, SourceLocation RHS) const {

+ SourceLocation LHS = getLoc(L);

+ return SM.isBeforeInTranslationUnit(LHS, RHS);

+ }

+ bool operator()(SourceLocation LHS, PreprocessedEntity *R) const {

+ SourceLocation RHS = getLoc(R);

+ return SM.isBeforeInTranslationUnit(LHS, RHS);

+ }

+ SourceLocation getLoc(PreprocessedEntity *PPE) const {

+ SourceRange Range = PPE->getSourceRange();

+ return (Range.*getRangeLoc)();

+ }

+};

}

-PreprocessingRecord::const_iterator

-PreprocessingRecord::begin(bool OnlyLocalEntities) const {

- if (OnlyLocalEntities)

- return PreprocessedEntities.begin() + NumPreallocatedEntities;

- MaybeLoadPreallocatedEntities();

- return PreprocessedEntities.begin();

+unsigned PreprocessingRecord::findBeginLocalPreprocessedEntity(

+ SourceLocation Loc) const {

+ if (SourceMgr.isLoadedSourceLocation(Loc))

+ return 0;

+ size_t Count = PreprocessedEntities.size();

+ size_t Half;

+ std::vector<PreprocessedEntity *>::const_iterator

+ First = PreprocessedEntities.begin();

+ std::vector<PreprocessedEntity *>::const_iterator I;

+ // Do a binary search manually instead of using std::lower_bound because

+ // The end locations of entities may be unordered (when a macro expansion

+ // is inside another macro argument), but for this case it is not important

+ // whether we get the first macro expansion or its containing macro.

+ while (Count > 0) {

+ Half = Count/2;

+ I = First;

+ std::advance(I, Half);

+ if (SourceMgr.isBeforeInTranslationUnit((*I)->getSourceRange().getEnd(),

+ Loc)){

+ First = I;

+ ++First;

+ Count = Count - Half - 1;

+ } else

+ Count = Half;

+ }

+ return First - PreprocessedEntities.begin();

}

-PreprocessingRecord::const_iterator

-PreprocessingRecord::end(bool OnlyLocalEntities) const {

- if (!OnlyLocalEntities)

- MaybeLoadPreallocatedEntities();

- return PreprocessedEntities.end();

+unsigned PreprocessingRecord::findEndLocalPreprocessedEntity(

+ SourceLocation Loc) const {

+ if (SourceMgr.isLoadedSourceLocation(Loc))

+ return 0;

+ std::vector<PreprocessedEntity *>::const_iterator

+ I = std::upper_bound(PreprocessedEntities.begin(),

+ PreprocessedEntities.end(),

+ Loc,

+ PPEntityComp<&SourceRange::getBegin>(SourceMgr));

+ return I - PreprocessedEntities.begin();

}

void PreprocessingRecord::addPreprocessedEntity(PreprocessedEntity *Entity) {

- PreprocessedEntities.push_back(Entity);

+ assert(Entity);

+ SourceLocation BeginLoc = Entity->getSourceRange().getBegin();

+ // Check normal case, this entity begin location is after the previous one.

+ if (PreprocessedEntities.empty() ||

+ !SourceMgr.isBeforeInTranslationUnit(BeginLoc,

+ PreprocessedEntities.back()->getSourceRange().getBegin())) {

+ PreprocessedEntities.push_back(Entity);

+ return;

+ }

+ // The entity's location is not after the previous one; this can happen rarely

+ // e.g. with "#include MACRO".

+ // Iterate the entities vector in reverse until we find the right place to

+ // insert the new entity.

+ for (std::vector<PreprocessedEntity *>::iterator

+ RI = PreprocessedEntities.end(), Begin = PreprocessedEntities.begin();

+ RI != Begin; --RI) {

+ std::vector<PreprocessedEntity *>::iterator I = RI;

+ --I;

+ if (!SourceMgr.isBeforeInTranslationUnit(BeginLoc,

+ (*I)->getSourceRange().getBegin())) {

+ PreprocessedEntities.insert(RI, Entity);

+ return;

+ }

}

void PreprocessingRecord::SetExternalSource(

- ExternalPreprocessingRecordSource &Source,

- unsigned NumPreallocatedEntities) {

+ ExternalPreprocessingRecordSource &Source) {

assert(!ExternalSource &&

"Preprocessing record already has an external source");

ExternalSource = &Source;

- this->NumPreallocatedEntities = NumPreallocatedEntities;

- PreprocessedEntities.insert(PreprocessedEntities.begin(),

- NumPreallocatedEntities, 0);

}

-void PreprocessingRecord::SetPreallocatedEntity(unsigned Index,

- PreprocessedEntity *Entity) {

- assert(Index < NumPreallocatedEntities &&"Out-of-bounds preallocated entity");

- PreprocessedEntities[Index] = Entity;

+unsigned PreprocessingRecord::allocateLoadedEntities(unsigned NumEntities) {

+ unsigned Result = LoadedPreprocessedEntities.size();

+ LoadedPreprocessedEntities.resize(LoadedPreprocessedEntities.size()

+ + NumEntities);

+ return Result;

+void PreprocessingRecord::RegisterMacroDefinition(MacroInfo *Macro,

+ PPEntityID PPID) {

+ MacroDefinitions[Macro] = PPID;

+/// \brief Retrieve the preprocessed entity at the given ID.

+PreprocessedEntity *PreprocessingRecord::getPreprocessedEntity(PPEntityID PPID){

+ if (PPID < 0) {

+ assert(unsigned(-PPID-1) < LoadedPreprocessedEntities.size() &&

+ "Out-of bounds loaded preprocessed entity");

+ return getLoadedPreprocessedEntity(LoadedPreprocessedEntities.size()+PPID);

+ }

+ assert(unsigned(PPID) < PreprocessedEntities.size() &&

+ "Out-of bounds local preprocessed entity");

+ return PreprocessedEntities[PPID];

}

-void PreprocessingRecord::RegisterMacroDefinition(MacroInfo *Macro,

- MacroDefinition *MD) {

- MacroDefinitions[Macro] = MD;

+/// \brief Retrieve the loaded preprocessed entity at the given index.

+PreprocessedEntity *

+PreprocessingRecord::getLoadedPreprocessedEntity(unsigned Index) {

+ assert(Index < LoadedPreprocessedEntities.size() &&

+ "Out-of bounds loaded preprocessed entity");

+ assert(ExternalSource && "No external source to load from");

+ PreprocessedEntity *&Entity = LoadedPreprocessedEntities[Index];

+ if (!Entity) {

+ Entity = ExternalSource->ReadPreprocessedEntity(Index);

+ if (!Entity) // Failed to load.

+ Entity = new (*this)

+ PreprocessedEntity(PreprocessedEntity::InvalidKind, SourceRange());

+ }

+ return Entity;

}

MacroDefinition *PreprocessingRecord::findMacroDefinition(const MacroInfo *MI) {

- llvm::DenseMap<const MacroInfo *, MacroDefinition *>::iterator Pos

+ llvm::DenseMap<const MacroInfo *, PPEntityID>::iterator Pos

= MacroDefinitions.find(MI);

if (Pos == MacroDefinitions.end())

return 0;

- return Pos->second;

+ PreprocessedEntity *Entity = getPreprocessedEntity(Pos->second);

+ if (Entity->isInvalid())

+ return 0;

+ return cast<MacroDefinition>(Entity);

}

-void PreprocessingRecord::MacroExpands(const Token &Id, const MacroInfo* MI) {

+void PreprocessingRecord::MacroExpands(const Token &Id, const MacroInfo* MI,

+ SourceRange Range) {

if (!IncludeNestedMacroExpansions && Id.getLocation().isMacroID())

return;

- if (MacroDefinition *Def = findMacroDefinition(MI))

- PreprocessedEntities.push_back(

- new (*this) MacroExpansion(Id.getIdentifierInfo(),

- Id.getLocation(), Def));

+ if (MI->isBuiltinMacro())

+ addPreprocessedEntity(

+ new (*this) MacroExpansion(Id.getIdentifierInfo(),Range));

+ else if (MacroDefinition *Def = findMacroDefinition(MI))

+ addPreprocessedEntity(

+ new (*this) MacroExpansion(Def, Range));

}

void PreprocessingRecord::MacroDefined(const Token &Id,

const MacroInfo *MI) {

SourceRange R(MI->getDefinitionLoc(), MI->getDefinitionEndLoc());

MacroDefinition *Def

- = new (*this) MacroDefinition(Id.getIdentifierInfo(),

- MI->getDefinitionLoc(),

- R);

- MacroDefinitions[MI] = Def;

- PreprocessedEntities.push_back(Def);

+ = new (*this) MacroDefinition(Id.getIdentifierInfo(), R);

+ addPreprocessedEntity(Def);

+ MacroDefinitions[MI] = getPPEntityID(PreprocessedEntities.size()-1,

+ /*isLoaded=*/false);

}

void PreprocessingRecord::MacroUndefined(const Token &Id,

const MacroInfo *MI) {

- llvm::DenseMap<const MacroInfo *, MacroDefinition *>::iterator Pos

+ llvm::DenseMap<const MacroInfo *, PPEntityID>::iterator Pos

= MacroDefinitions.find(MI);

if (Pos != MacroDefinitions.end())

MacroDefinitions.erase(Pos);

@@ -152,12 +290,12 @@ void PreprocessingRecord::MacroUndefined(const Token &Id,

void PreprocessingRecord::InclusionDirective(

SourceLocation HashLoc,

const clang::Token &IncludeTok,

- llvm::StringRef FileName,

+ StringRef FileName,

bool IsAngled,

const FileEntry *File,

clang::SourceLocation EndLoc,

- llvm::StringRef SearchPath,

- llvm::StringRef RelativePath) {

+ StringRef SearchPath,

+ StringRef RelativePath) {

InclusionDirective::InclusionKind Kind = InclusionDirective::Include;

switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) {

@@ -185,5 +323,12 @@ void PreprocessingRecord::InclusionDirective(

clang::InclusionDirective *ID

= new (*this) clang::InclusionDirective(*this, Kind, FileName, !IsAngled,

File, SourceRange(HashLoc, EndLoc));

- PreprocessedEntities.push_back(ID);

+ addPreprocessedEntity(ID);

+size_t PreprocessingRecord::getTotalMemory() const {

+ return BumpAlloc.getTotalMemory()

+ + llvm::capacity_in_bytes(MacroDefinitions)

+ + llvm::capacity_in_bytes(PreprocessedEntities)

+ + llvm::capacity_in_bytes(LoadedPreprocessedEntities);

}

diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp
index e7aa286a16bf..31662ad0c116 100644
--- a/lib/Lex/Preprocessor.cpp
+++ b/lib/Lex/Preprocessor.cpp

@@ -35,6 +35,7 @@

#include "clang/Lex/ScratchBuffer.h"

#include "clang/Lex/LexDiagnostic.h"

#include "clang/Lex/CodeCompletionHandler.h"

+#include "clang/Lex/ModuleLoader.h"

#include "clang/Basic/SourceManager.h"

#include "clang/Basic/FileManager.h"

#include "clang/Basic/TargetInfo.h"

@@ -42,27 +43,83 @@

#include "llvm/ADT/SmallVector.h"

#include "llvm/Support/MemoryBuffer.h"

#include "llvm/Support/raw_ostream.h"

+#include "llvm/Support/Capacity.h"

using namespace clang;

//===----------------------------------------------------------------------===//

ExternalPreprocessorSource::~ExternalPreprocessorSource() { }

-Preprocessor::Preprocessor(Diagnostic &diags, const LangOptions &opts,

- const TargetInfo &target, SourceManager &SM,

- HeaderSearch &Headers,

+Preprocessor::Preprocessor(DiagnosticsEngine &diags, LangOptions &opts,

+ const TargetInfo *target, SourceManager &SM,

+ HeaderSearch &Headers, ModuleLoader &TheModuleLoader,

IdentifierInfoLookup* IILookup,

- bool OwnsHeaders)

+ bool OwnsHeaders,

+ bool DelayInitialization)

: Diags(&diags), Features(opts), Target(target),FileMgr(Headers.getFileMgr()),

- SourceMgr(SM),

- HeaderInfo(Headers), ExternalSource(0),

- Identifiers(opts, IILookup), BuiltinInfo(Target), CodeComplete(0),

- CodeCompletionFile(0), SkipMainFilePreamble(0, true), CurPPLexer(0),

- CurDirLookup(0), Callbacks(0), MacroArgCache(0), Record(0), MIChainHead(0),

- MICache(0) {

- ScratchBuf = new ScratchBuffer(SourceMgr);

- CounterValue = 0; // __COUNTER__ starts at 0.

+ SourceMgr(SM), HeaderInfo(Headers), TheModuleLoader(TheModuleLoader),

+ ExternalSource(0),

+ Identifiers(opts, IILookup), CodeComplete(0),

+ CodeCompletionFile(0), CodeCompletionOffset(0), CodeCompletionReached(0),

+ SkipMainFilePreamble(0, true), CurPPLexer(0),

+ CurDirLookup(0), CurLexerKind(CLK_Lexer), Callbacks(0), MacroArgCache(0),

+ Record(0), MIChainHead(0), MICache(0)

OwnsHeaderSearch = OwnsHeaders;

+ if (!DelayInitialization) {

+ assert(Target && "Must provide target information for PP initialization");

+ Initialize(*Target);

+ }

+Preprocessor::~Preprocessor() {

+ assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");

+ assert(((MacroExpandingLexersStack.empty() && MacroExpandedTokens.empty()) ||

+ isCodeCompletionReached()) &&

+ "Preprocessor::HandleEndOfTokenLexer should have cleared those");

+ while (!IncludeMacroStack.empty()) {

+ delete IncludeMacroStack.back().TheLexer;

+ delete IncludeMacroStack.back().TheTokenLexer;

+ IncludeMacroStack.pop_back();

+ }

+ // Free any macro definitions.

+ for (MacroInfoChain *I = MIChainHead ; I ; I = I->Next)

+ I->MI.Destroy();

+ // Free any cached macro expanders.

+ for (unsigned i = 0, e = NumCachedTokenLexers; i != e; ++i)

+ delete TokenLexerCache[i];

+ // Free any cached MacroArgs.

+ for (MacroArgs *ArgList = MacroArgCache; ArgList; )

+ ArgList = ArgList->deallocate();

+ // Release pragma information.

+ delete PragmaHandlers;

+ // Delete the scratch buffer info.

+ delete ScratchBuf;

+ // Delete the header search info, if we own it.

+ if (OwnsHeaderSearch)

+ delete &HeaderInfo;

+ delete Callbacks;

+void Preprocessor::Initialize(const TargetInfo &Target) {

+ assert((!this->Target || this->Target == &Target) &&

+ "Invalid override of target information");

+ this->Target = &Target;

+ // Initialize information about built-ins.

+ BuiltinInfo.InitializeTarget(Target);

+ ScratchBuf = new ScratchBuffer(SourceMgr);

+ CounterValue = 0; // __COUNTER__ starts at 0.

// Clear stats.

NumDirectives = NumDefined = NumUndefined = NumPragma = 0;

NumIf = NumElse = NumEndif = 0;

@@ -71,33 +128,35 @@ Preprocessor::Preprocessor(Diagnostic &diags, const LangOptions &opts,

NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0;

MaxIncludeStackDepth = 0;

NumSkipped = 0;

// Default to discarding comments.

KeepComments = false;

KeepMacroComments = false;

+ SuppressIncludeNotFoundError = false;

+ AutoModuleImport = false;

// Macro expansion is enabled.

DisableMacroExpansion = false;

InMacroArgs = false;

NumCachedTokenLexers = 0;

CachedLexPos = 0;

// We haven't read anything from the external source.

ReadMacrosFromExternalSource = false;

// "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.

// This gets unpoisoned where it is allowed.

(Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();

SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);

// Initialize the pragma handlers.

- PragmaHandlers = new PragmaNamespace(llvm::StringRef());

+ PragmaHandlers = new PragmaNamespace(StringRef());

RegisterBuiltinPragmas();

// Initialize builtin macros like __LINE__ and friends.

RegisterBuiltinMacros();

if(Features.Borland) {

Ident__exception_info = getIdentifierInfo("_exception_info");

Ident___exception_info = getIdentifierInfo("__exception_info");

@@ -112,44 +171,7 @@ Preprocessor::Preprocessor(Diagnostic &diags, const LangOptions &opts,

Ident__exception_info = Ident__exception_code = Ident__abnormal_termination = 0;

Ident___exception_info = Ident___exception_code = Ident___abnormal_termination = 0;

Ident_GetExceptionInfo = Ident_GetExceptionCode = Ident_AbnormalTermination = 0;

- }

-Preprocessor::~Preprocessor() {

- assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");

- assert(MacroExpandingLexersStack.empty() && MacroExpandedTokens.empty() &&

- "Preprocessor::HandleEndOfTokenLexer should have cleared those");

- while (!IncludeMacroStack.empty()) {

- delete IncludeMacroStack.back().TheLexer;

- delete IncludeMacroStack.back().TheTokenLexer;

- IncludeMacroStack.pop_back();

- }

- // Free any macro definitions.

- for (MacroInfoChain *I = MIChainHead ; I ; I = I->Next)

- I->MI.Destroy();

- // Free any cached macro expanders.

- for (unsigned i = 0, e = NumCachedTokenLexers; i != e; ++i)

- delete TokenLexerCache[i];

- // Free any cached MacroArgs.

- for (MacroArgs *ArgList = MacroArgCache; ArgList; )

- ArgList = ArgList->deallocate();

- // Release pragma information.

- delete PragmaHandlers;

- // Delete the scratch buffer info.

- delete ScratchBuf;

- // Delete the header search info, if we own it.

- if (OwnsHeaderSearch)

- delete &HeaderInfo;

- delete Callbacks;

+ }

}

void Preprocessor::setPTHManager(PTHManager* pm) {

@@ -172,7 +194,7 @@ void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {

llvm::errs() << " [ExpandDisabled]";

if (Tok.needsCleaning()) {

const char *Start = SourceMgr.getCharacterData(Tok.getLocation());

- llvm::errs() << " [UnClean='" << llvm::StringRef(Start, Tok.getLength())

+ llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())

<< "']";

}

@@ -228,7 +250,13 @@ Preprocessor::macro_begin(bool IncludeExternalMacros) const {

}

size_t Preprocessor::getTotalMemory() const {

- return BP.getTotalMemory() + MacroExpandedTokens.capacity()*sizeof(Token);

+ return BP.getTotalMemory()

+ + llvm::capacity_in_bytes(MacroExpandedTokens)

+ + Predefines.capacity() /* Predefines buffer. */

+ + llvm::capacity_in_bytes(Macros)

+ + llvm::capacity_in_bytes(PragmaPushMacroInfo)

+ + llvm::capacity_in_bytes(PoisonReasons)

+ + llvm::capacity_in_bytes(CommentHandlers);

}

Preprocessor::macro_iterator

@@ -243,15 +271,13 @@ Preprocessor::macro_end(bool IncludeExternalMacros) const {

}

bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,

- unsigned TruncateAtLine,

- unsigned TruncateAtColumn) {

- using llvm::MemoryBuffer;

- CodeCompletionFile = File;

+ unsigned CompleteLine,

+ unsigned CompleteColumn) {

+ assert(File);

+ assert(CompleteLine && CompleteColumn && "Starts from 1:1");

+ assert(!CodeCompletionFile && "Already set");

- // Okay to clear out the code-completion point by passing NULL.

- if (!CodeCompletionFile)

- return false;

+ using llvm::MemoryBuffer;

// Load the actual file's contents.

bool Invalid = false;

@@ -261,7 +287,7 @@ bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,

// Find the byte position of the truncation point.

const char *Position = Buffer->getBufferStart();

- for (unsigned Line = 1; Line < TruncateAtLine; ++Line) {

+ for (unsigned Line = 1; Line < CompleteLine; ++Line) {

for (; *Position; ++Position) {

if (*Position != '\r' && *Position != '\n')

continue;

@@ -275,38 +301,37 @@ bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,

}

- Position += TruncateAtColumn - 1;

+ Position += CompleteColumn - 1;

- // Truncate the buffer.

+ // Insert '\0' at the code-completion point.

if (Position < Buffer->getBufferEnd()) {

- llvm::StringRef Data(Buffer->getBufferStart(),

- Position-Buffer->getBufferStart());

- MemoryBuffer *TruncatedBuffer

- = MemoryBuffer::getMemBufferCopy(Data, Buffer->getBufferIdentifier());

- SourceMgr.overrideFileContents(File, TruncatedBuffer);

+ CodeCompletionFile = File;

+ CodeCompletionOffset = Position - Buffer->getBufferStart();

+ MemoryBuffer *NewBuffer =

+ MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1,

+ Buffer->getBufferIdentifier());

+ char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart());

+ char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);

+ *NewPos = '\0';

+ std::copy(Position, Buffer->getBufferEnd(), NewPos+1);

+ SourceMgr.overrideFileContents(File, NewBuffer);

}

return false;

}

-bool Preprocessor::isCodeCompletionFile(SourceLocation FileLoc) const {

- return CodeCompletionFile && FileLoc.isFileID() &&

- SourceMgr.getFileEntryForID(SourceMgr.getFileID(FileLoc))

- == CodeCompletionFile;

void Preprocessor::CodeCompleteNaturalLanguage() {

- SetCodeCompletionPoint(0, 0, 0);

- getDiagnostics().setSuppressAllDiagnostics(true);

if (CodeComplete)

CodeComplete->CodeCompleteNaturalLanguage();

+ setCodeCompletionReached();

}

/// getSpelling - This method is used to get the spelling of a token into a

/// SmallVector. Note that the returned StringRef may not point to the

/// supplied buffer if a copy can be avoided.

-llvm::StringRef Preprocessor::getSpelling(const Token &Tok,

- llvm::SmallVectorImpl<char> &Buffer,

+StringRef Preprocessor::getSpelling(const Token &Tok,

+ SmallVectorImpl<char> &Buffer,

bool *Invalid) const {

// NOTE: this has to be checked *before* testing for an IdentifierInfo.

if (Tok.isNot(tok::raw_identifier)) {

@@ -321,22 +346,23 @@ llvm::StringRef Preprocessor::getSpelling(const Token &Tok,

const char *Ptr = Buffer.data();

unsigned Len = getSpelling(Tok, Ptr, Invalid);

- return llvm::StringRef(Ptr, Len);

+ return StringRef(Ptr, Len);

}

/// CreateString - Plop the specified string into a scratch buffer and return a

/// location for it. If specified, the source location provides a source

/// location for the token.

void Preprocessor::CreateString(const char *Buf, unsigned Len, Token &Tok,

- SourceLocation ExpansionLoc) {

+ SourceLocation ExpansionLocStart,

+ SourceLocation ExpansionLocEnd) {

Tok.setLength(Len);

const char *DestPtr;

SourceLocation Loc = ScratchBuf->getToken(Buf, Len, DestPtr);

- if (ExpansionLoc.isValid())

- Loc = SourceMgr.createInstantiationLoc(Loc, ExpansionLoc,

- ExpansionLoc, Len);

+ if (ExpansionLocStart.isValid())

+ Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,

+ ExpansionLocEnd, Len);

Tok.setLocation(Loc);

// If this is a raw identifier or a literal token, set the pointer data.

@@ -407,12 +433,12 @@ IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {

IdentifierInfo *II;

if (!Identifier.needsCleaning()) {

// No cleaning needed, just use the characters from the lexed buffer.

- II = getIdentifierInfo(llvm::StringRef(Identifier.getRawIdentifierData(),

+ II = getIdentifierInfo(StringRef(Identifier.getRawIdentifierData(),

Identifier.getLength()));

} else {

// Cleaning needed, alloca a buffer, clean into it, then use the buffer.

llvm::SmallString<64> IdentifierBuffer;

- llvm::StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);

+ StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);

II = getIdentifierInfo(CleanedStr);

}

@@ -487,6 +513,17 @@ void Preprocessor::HandleIdentifier(Token &Identifier) {

}

+ // If this identifier is a keyword in C++11, produce a warning. Don't warn if

+ // we're not considering macro expansion, since this identifier might be the

+ // name of a macro.

+ // FIXME: This warning is disabled in cases where it shouldn't be, like

+ // "#define constexpr constexpr", "int constexpr;"

+ if (II.isCXX11CompatKeyword() & !DisableMacroExpansion) {

+ Diag(Identifier, diag::warn_cxx11_keyword) << II.getName();

+ // Don't diagnose this keyword again in this translation unit.

+ II.setIsCXX11CompatKeyword(false);

+ }

// C++ 2.11p2: If this is an alternative representation of a C++ operator,

// then we act as if it is the actual operator and not the textual

// representation of it.

@@ -499,6 +536,44 @@ void Preprocessor::HandleIdentifier(Token &Identifier) {

// like "#define TY typeof", "TY(1) x".

if (II.isExtensionToken() && !DisableMacroExpansion)

Diag(Identifier, diag::ext_token_used);

+ // If this is the '__import_module__' keyword, note that the next token

+ // indicates a module name.

+ if (II.getTokenID() == tok::kw___import_module__ &&

+ !InMacroArgs && !DisableMacroExpansion) {

+ ModuleImportLoc = Identifier.getLocation();

+ CurLexerKind = CLK_LexAfterModuleImport;

+ }

+/// \brief Lex a token following the __import_module__ keyword.

+void Preprocessor::LexAfterModuleImport(Token &Result) {

+ // Figure out what kind of lexer we actually have.

+ if (CurLexer)

+ CurLexerKind = CLK_Lexer;

+ else if (CurPTHLexer)

+ CurLexerKind = CLK_PTHLexer;

+ else if (CurTokenLexer)

+ CurLexerKind = CLK_TokenLexer;

+ else

+ CurLexerKind = CLK_CachingLexer;

+ // Lex the next token.

+ Lex(Result);

+ // The token sequence

+ //

+ // __import_module__ identifier

+ //

+ // indicates a module import directive. We already saw the __import_module__

+ // keyword, so now we're looking for the identifier.

+ if (Result.getKind() != tok::identifier)

+ return;

+ // Load the module.

+ (void)TheModuleLoader.loadModule(ModuleImportLoc,

+ *Result.getIdentifierInfo(),

+ Result.getLocation());

}

void Preprocessor::AddCommentHandler(CommentHandler *Handler) {

@@ -529,6 +604,8 @@ bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {

return true;

}

+ModuleLoader::~ModuleLoader() { }

CommentHandler::~CommentHandler() { }

CodeCompletionHandler::~CodeCompletionHandler() { }

@@ -538,6 +615,7 @@ void Preprocessor::createPreprocessingRecord(

if (Record)

return;

- Record = new PreprocessingRecord(IncludeNestedMacroExpansions);

+ Record = new PreprocessingRecord(getSourceManager(),

+ IncludeNestedMacroExpansions);

addPPCallbacks(Record);

}

diff --git a/lib/Lex/PreprocessorLexer.cpp b/lib/Lex/PreprocessorLexer.cpp
index 808a81bd5e87..0da9ef5531e7 100644
--- a/lib/Lex/PreprocessorLexer.cpp
+++ b/lib/Lex/PreprocessorLexer.cpp

@@ -17,6 +17,14 @@

#include "clang/Basic/SourceManager.h"

using namespace clang;

+PreprocessorLexer::PreprocessorLexer(Preprocessor *pp, FileID fid)

+ : PP(pp), FID(fid), InitialNumSLocEntries(0),

+ ParsingPreprocessorDirective(false),

+ ParsingFilename(false), LexingRawMode(false) {

+ if (pp)

+ InitialNumSLocEntries = pp->getSourceManager().local_sloc_entry_size();

/// LexIncludeFilename - After the preprocessor has parsed a #include, lex and

/// (potentially) macro expand the filename.

void PreprocessorLexer::LexIncludeFilename(Token &FilenameTok) {

diff --git a/lib/Lex/ScratchBuffer.cpp b/lib/Lex/ScratchBuffer.cpp
index 0e98c1751985..3d363fa4b472 100644
--- a/lib/Lex/ScratchBuffer.cpp
+++ b/lib/Lex/ScratchBuffer.cpp

@@ -53,7 +53,7 @@ SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len,

// diagnostic points to one.

CurBuffer[BytesUsed-1] = '\0';

- return BufferStartLoc.getFileLocWithOffset(BytesUsed-Len-1);

+ return BufferStartLoc.getLocWithOffset(BytesUsed-Len-1);

}

void ScratchBuffer::AllocScratchBuffer(unsigned RequestLen) {

diff --git a/lib/Lex/TokenConcatenation.cpp b/lib/Lex/TokenConcatenation.cpp
index 3e9e8550313c..dc6d686d6cc1 100644
--- a/lib/Lex/TokenConcatenation.cpp
+++ b/lib/Lex/TokenConcatenation.cpp

@@ -17,42 +17,53 @@

using namespace clang;

-/// StartsWithL - Return true if the spelling of this token starts with 'L'.

-bool TokenConcatenation::StartsWithL(const Token &Tok) const {

- if (!Tok.needsCleaning()) {

- SourceManager &SM = PP.getSourceManager();

- return *SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation())) == 'L';

- }

+/// IsStringPrefix - Return true if Str is a string prefix.

+/// 'L', 'u', 'U', or 'u8'. Including raw versions.

+static bool IsStringPrefix(StringRef Str, bool CPlusPlus0x) {

- if (Tok.getLength() < 256) {

- char Buffer[256];

- const char *TokPtr = Buffer;

- PP.getSpelling(Tok, TokPtr);

- return TokPtr[0] == 'L';

+ if (Str[0] == 'L' ||

+ (CPlusPlus0x && (Str[0] == 'u' || Str[0] == 'U' || Str[0] == 'R'))) {

+ if (Str.size() == 1)

+ return true; // "L", "u", "U", and "R"

+ // Check for raw flavors. Need to make sure the first character wasn't

+ // already R. Need CPlusPlus0x check for "LR".

+ if (Str[1] == 'R' && Str[0] != 'R' && Str.size() == 2 && CPlusPlus0x)

+ return true; // "LR", "uR", "UR"

+ // Check for "u8" and "u8R"

+ if (Str[0] == 'u' && Str[1] == '8') {

+ if (Str.size() == 2) return true; // "u8"

+ if (Str.size() == 3 && Str[2] == 'R') return true; // "u8R"

+ }

}

- return PP.getSpelling(Tok)[0] == 'L';

+ return false;

}

-/// IsIdentifierL - Return true if the spelling of this token is literally

-/// 'L'.

-bool TokenConcatenation::IsIdentifierL(const Token &Tok) const {

+/// IsIdentifierStringPrefix - Return true if the spelling of the token

+/// is literally 'L', 'u', 'U', or 'u8'. Including raw versions.

+bool TokenConcatenation::IsIdentifierStringPrefix(const Token &Tok) const {

+ const LangOptions &LangOpts = PP.getLangOptions();

if (!Tok.needsCleaning()) {

- if (Tok.getLength() != 1)

+ if (Tok.getLength() < 1 || Tok.getLength() > 3)

return false;

SourceManager &SM = PP.getSourceManager();

- return *SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation())) == 'L';

+ const char *Ptr = SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation()));

+ return IsStringPrefix(StringRef(Ptr, Tok.getLength()),

+ LangOpts.CPlusPlus0x);

}

if (Tok.getLength() < 256) {

char Buffer[256];

const char *TokPtr = Buffer;

- if (PP.getSpelling(Tok, TokPtr) != 1)

- return false;

- return TokPtr[0] == 'L';

+ unsigned length = PP.getSpelling(Tok, TokPtr);

+ return IsStringPrefix(StringRef(TokPtr, length), LangOpts.CPlusPlus0x);

}

- return PP.getSpelling(Tok) == "L";

+ return IsStringPrefix(StringRef(PP.getSpelling(Tok)), LangOpts.CPlusPlus0x);

}

TokenConcatenation::TokenConcatenation(Preprocessor &pp) : PP(pp) {

@@ -132,7 +143,7 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok,

// source. If they were, it must be okay to stick them together: if there

// were an issue, the tokens would have been lexed differently.

if (PrevTok.getLocation().isFileID() && Tok.getLocation().isFileID() &&

- PrevTok.getLocation().getFileLocWithOffset(PrevTok.getLength()) ==

+ PrevTok.getLocation().getLocWithOffset(PrevTok.getLength()) ==

Tok.getLocation())

return false;

@@ -179,24 +190,19 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok,

if (Tok.is(tok::numeric_constant))

return GetFirstChar(PP, Tok) != '.';

- if (Tok.getIdentifierInfo() || Tok.is(tok::wide_string_literal) /* ||

- Tok.is(tok::wide_char_literal)*/)

+ if (Tok.getIdentifierInfo() || Tok.is(tok::wide_string_literal) ||

+ Tok.is(tok::utf8_string_literal) || Tok.is(tok::utf16_string_literal) ||

+ Tok.is(tok::utf32_string_literal) || Tok.is(tok::wide_char_constant) ||

+ Tok.is(tok::utf16_char_constant) || Tok.is(tok::utf32_char_constant))

return true;

// If this isn't identifier + string, we're done.

if (Tok.isNot(tok::char_constant) && Tok.isNot(tok::string_literal))

return false;

- // FIXME: need a wide_char_constant!

- // If the string was a wide string L"foo" or wide char L'f', it would

- // concat with the previous identifier into fooL"bar". Avoid this.

- if (StartsWithL(Tok))

- return true;

// Otherwise, this is a narrow character or string. If the *identifier*

- // is a literal 'L', avoid pasting L "foo" -> L"foo".

- return IsIdentifierL(PrevTok);

+ // is a literal 'L', 'u8', 'u' or 'U', avoid pasting L "foo" -> L"foo".

+ return IsIdentifierStringPrefix(PrevTok);

case tok::numeric_constant:

return isalnum(FirstChar) || Tok.is(tok::numeric_constant) ||

FirstChar == '+' || FirstChar == '-' || FirstChar == '.';

diff --git a/lib/Lex/TokenLexer.cpp b/lib/Lex/TokenLexer.cpp
index 8ff82f160033..a58054490fcd 100644
--- a/lib/Lex/TokenLexer.cpp
+++ b/lib/Lex/TokenLexer.cpp

@@ -43,7 +43,7 @@ void TokenLexer::Init(Token &Tok, SourceLocation ELEnd, MacroArgs *Actuals) {

MacroExpansionStart = SourceLocation();

SourceManager &SM = PP.getSourceManager();

- MacroStartSLocOffset = SM.getNextOffset();

+ MacroStartSLocOffset = SM.getNextLocalOffset();

if (NumTokens > 0) {

assert(Tokens[0].getLocation().isValid());

@@ -55,12 +55,12 @@ void TokenLexer::Init(Token &Tok, SourceLocation ELEnd, MacroArgs *Actuals) {

// definition. Tokens that get lexed directly from the definition will

// have their locations pointing inside this chunk. This is to avoid

// creating separate source location entries for each token.

- SourceLocation macroStart = SM.getInstantiationLoc(Tokens[0].getLocation());

- MacroDefStartInfo = SM.getDecomposedLoc(macroStart);

- MacroExpansionStart = SM.createInstantiationLoc(macroStart,

- ExpandLocStart,

- ExpandLocEnd,

- Macro->getDefinitionLength(SM));

+ MacroDefStart = SM.getExpansionLoc(Tokens[0].getLocation());

+ MacroDefLength = Macro->getDefinitionLength(SM);

+ MacroExpansionStart = SM.createExpansionLoc(MacroDefStart,

+ ExpandLocStart,

+ ExpandLocEnd,

+ MacroDefLength);

}

// If this is a function-like macro, expand the arguments and change

@@ -121,9 +121,8 @@ void TokenLexer::destroy() {

/// Expand the arguments of a function-like macro so that we can quickly

/// return preexpanded tokens from Tokens.

void TokenLexer::ExpandFunctionArguments() {

- SourceManager &SM = PP.getSourceManager();

- llvm::SmallVector<Token, 128> ResultToks;

+ SmallVector<Token, 128> ResultToks;

// Loop through 'Tokens', expanding them into ResultToks. Keep

// track of whether we change anything. If not, no need to keep them. If so,

@@ -144,19 +143,22 @@ void TokenLexer::ExpandFunctionArguments() {

int ArgNo = Macro->getArgumentNum(Tokens[i+1].getIdentifierInfo());

assert(ArgNo != -1 && "Token following # is not an argument?");

- SourceLocation hashInstLoc;

- if(ExpandLocStart.isValid()) {

- hashInstLoc = getMacroExpansionLocation(CurTok.getLocation());

- assert(hashInstLoc.isValid() && "Expected '#' to come from definition");

- }

+ SourceLocation ExpansionLocStart =

+ getExpansionLocForMacroDefLoc(CurTok.getLocation());

+ SourceLocation ExpansionLocEnd =

+ getExpansionLocForMacroDefLoc(Tokens[i+1].getLocation());

Token Res;

if (CurTok.is(tok::hash)) // Stringify

- Res = ActualArgs->getStringifiedArgument(ArgNo, PP, hashInstLoc);

+ Res = ActualArgs->getStringifiedArgument(ArgNo, PP,

+ ExpansionLocStart,

+ ExpansionLocEnd);

else {

// 'charify': don't bother caching these.

Res = MacroArgs::StringifyArgument(ActualArgs->getUnexpArgument(ArgNo),

- PP, true, hashInstLoc);

+ PP, true,

+ ExpansionLocStart,

+ ExpansionLocEnd);

}

// The stringified/charified string leading space flag gets set to match

@@ -225,16 +227,9 @@ void TokenLexer::ExpandFunctionArguments() {

}

if(ExpandLocStart.isValid()) {

- SourceLocation curInst =

- getMacroExpansionLocation(CurTok.getLocation());

- assert(curInst.isValid() &&

- "Expected arg identifier to come from definition");

- for (unsigned i = FirstResult, e = ResultToks.size(); i != e; ++i) {

- Token &Tok = ResultToks[i];

- Tok.setLocation(SM.createMacroArgInstantiationLoc(Tok.getLocation(),

- curInst,

- Tok.getLength()));

- }

+ updateLocForMacroArgTokens(CurTok.getLocation(),

+ ResultToks.begin()+FirstResult,

+ ResultToks.end());

}

// If any tokens were substituted from the argument, the whitespace

@@ -282,17 +277,8 @@ void TokenLexer::ExpandFunctionArguments() {

}

if (ExpandLocStart.isValid()) {

- SourceLocation curInst =

- getMacroExpansionLocation(CurTok.getLocation());

- assert(curInst.isValid() &&

- "Expected arg identifier to come from definition");

- for (unsigned i = ResultToks.size() - NumToks, e = ResultToks.size();

- i != e; ++i) {

- Token &Tok = ResultToks[i];

- Tok.setLocation(SM.createMacroArgInstantiationLoc(Tok.getLocation(),

- curInst,

- Tok.getLength()));

- }

+ updateLocForMacroArgTokens(CurTok.getLocation(),

+ ResultToks.end()-NumToks, ResultToks.end());

}

// If this token (the macro argument) was supposed to get leading

@@ -417,18 +403,15 @@ void TokenLexer::Lex(Token &Tok) {

// that captures all of this.

if (ExpandLocStart.isValid() && // Don't do this for token streams.

// Check that the token's location was not already set properly.

- SM.isBeforeInSourceLocationOffset(Tok.getLocation(),

- MacroStartSLocOffset)) {

+ SM.isBeforeInSLocAddrSpace(Tok.getLocation(), MacroStartSLocOffset)) {

SourceLocation instLoc;

if (Tok.is(tok::comment)) {

- instLoc = SM.createInstantiationLoc(Tok.getLocation(),

- ExpandLocStart,

- ExpandLocEnd,

- Tok.getLength());

+ instLoc = SM.createExpansionLoc(Tok.getLocation(),

+ ExpandLocStart,

+ ExpandLocEnd,

+ Tok.getLength());

} else {

- instLoc = getMacroExpansionLocation(Tok.getLocation());

- assert(instLoc.isValid() &&

- "Location for token not coming from definition was not set!");

+ instLoc = getExpansionLocForMacroDefLoc(Tok.getLocation());

}

Tok.setLocation(instLoc);

@@ -469,6 +452,7 @@ void TokenLexer::Lex(Token &Tok) {

bool TokenLexer::PasteTokens(Token &Tok) {

llvm::SmallString<128> Buffer;

const char *ResultTokStrPtr = 0;

+ SourceLocation StartLoc = Tok.getLocation();

SourceLocation PasteOpLoc;

do {

// Consume the ## operator.

@@ -562,7 +546,7 @@ bool TokenLexer::PasteTokens(Token &Tok) {

if (isInvalid) {

// Test for the Microsoft extension of /##/ turning into // here on the

// error path.

- if (PP.getLangOptions().Microsoft && Tok.is(tok::slash) &&

+ if (PP.getLangOptions().MicrosoftExt && Tok.is(tok::slash) &&

RHS.is(tok::slash)) {

HandleMicrosoftCommentPaste(Tok);

return true;

@@ -574,14 +558,13 @@ bool TokenLexer::PasteTokens(Token &Tok) {

// information so that the user knows where it came from.

SourceManager &SM = PP.getSourceManager();

SourceLocation Loc =

- SM.createInstantiationLoc(PasteOpLoc, ExpandLocStart,

- ExpandLocEnd, 2);

+ SM.createExpansionLoc(PasteOpLoc, ExpandLocStart, ExpandLocEnd, 2);

// If we're in microsoft extensions mode, downgrade this from a hard

// error to a warning that defaults to an error. This allows

// disabling it.

PP.Diag(Loc,

- PP.getLangOptions().Microsoft ? diag::err_pp_bad_paste_ms

- : diag::err_pp_bad_paste)

+ PP.getLangOptions().MicrosoftExt ? diag::err_pp_bad_paste_ms

+ : diag::err_pp_bad_paste)

<< Buffer.str();

}

@@ -604,23 +587,20 @@ bool TokenLexer::PasteTokens(Token &Tok) {

Tok = Result;

} while (!isAtEnd() && Tokens[CurToken].is(tok::hashhash));

+ SourceLocation EndLoc = Tokens[CurToken - 1].getLocation();

// The token's current location indicate where the token was lexed from. We

// need this information to compute the spelling of the token, but any

// diagnostics for the expanded token should appear as if the token was

- // expanded from the (##) operator. Pull this information together into

+ // expanded from the full ## expression. Pull this information together into

// a new SourceLocation that captures all of this.

- if (ExpandLocStart.isValid()) {

- SourceManager &SM = PP.getSourceManager();

- SourceLocation pasteLocInst =

- getMacroExpansionLocation(PasteOpLoc);

- assert(pasteLocInst.isValid() &&

- "Expected '##' to come from definition");

- Tok.setLocation(SM.createInstantiationLoc(Tok.getLocation(),

- pasteLocInst,

- Tok.getLength()));

- }

+ SourceManager &SM = PP.getSourceManager();

+ if (StartLoc.isFileID())

+ StartLoc = getExpansionLocForMacroDefLoc(StartLoc);

+ if (EndLoc.isFileID())

+ EndLoc = getExpansionLocForMacroDefLoc(EndLoc);

+ Tok.setLocation(SM.createExpansionLoc(Tok.getLocation(), StartLoc, EndLoc,

+ Tok.getLength()));

// Now that we got the result token, it will be subject to expansion. Since

// token pasting re-lexes the result token in raw mode, identifier information

@@ -666,22 +646,111 @@ void TokenLexer::HandleMicrosoftCommentPaste(Token &Tok) {

PP.HandleMicrosoftCommentPaste(Tok);

}

-/// \brief If \arg loc is a FileID and points inside the current macro

+/// \brief If \arg loc is a file ID and points inside the current macro

/// definition, returns the appropriate source location pointing at the

-/// macro expansion source location entry.

-SourceLocation TokenLexer::getMacroExpansionLocation(SourceLocation loc) const {

+/// macro expansion source location entry, otherwise it returns an invalid

+/// SourceLocation.

+SourceLocation

+TokenLexer::getExpansionLocForMacroDefLoc(SourceLocation loc) const {

assert(ExpandLocStart.isValid() && MacroExpansionStart.isValid() &&

"Not appropriate for token streams");

- assert(loc.isValid());

+ assert(loc.isValid() && loc.isFileID());

SourceManager &SM = PP.getSourceManager();

- unsigned relativeOffset;

- if (loc.isFileID() &&

- SM.isInFileID(loc,

- MacroDefStartInfo.first, MacroDefStartInfo.second,

- Macro->getDefinitionLength(SM), &relativeOffset)) {

- return MacroExpansionStart.getFileLocWithOffset(relativeOffset);

+ assert(SM.isInSLocAddrSpace(loc, MacroDefStart, MacroDefLength) &&

+ "Expected loc to come from the macro definition");

+ unsigned relativeOffset = 0;

+ SM.isInSLocAddrSpace(loc, MacroDefStart, MacroDefLength, &relativeOffset);

+ return MacroExpansionStart.getLocWithOffset(relativeOffset);

+/// \brief Finds the tokens that are consecutive (from the same FileID)

+/// creates a single SLocEntry, and assigns SourceLocations to each token that

+/// point to that SLocEntry. e.g for

+/// assert(foo == bar);

+/// There will be a single SLocEntry for the "foo == bar" chunk and locations

+/// for the 'foo', '==', 'bar' tokens will point inside that chunk.

+///

+/// \arg begin_tokens will be updated to a position past all the found

+/// consecutive tokens.

+static void updateConsecutiveMacroArgTokens(SourceManager &SM,

+ SourceLocation InstLoc,

+ Token *&begin_tokens,

+ Token * end_tokens) {

+ assert(begin_tokens < end_tokens);

+ SourceLocation FirstLoc = begin_tokens->getLocation();

+ SourceLocation CurLoc = FirstLoc;

+ // Compare the source location offset of tokens and group together tokens that

+ // are close, even if their locations point to different FileIDs. e.g.

+ //

+ // |bar | foo | cake | (3 tokens from 3 consecutive FileIDs)

+ // ^ ^

+ // |bar foo cake| (one SLocEntry chunk for all tokens)

+ //

+ // we can perform this "merge" since the token's spelling location depends

+ // on the relative offset.

+ Token *NextTok = begin_tokens + 1;

+ for (; NextTok < end_tokens; ++NextTok) {

+ int RelOffs;

+ if (!SM.isInSameSLocAddrSpace(CurLoc, NextTok->getLocation(), &RelOffs))

+ break; // Token from different local/loaded location.

+ // Check that token is not before the previous token or more than 50

+ // "characters" away.

+ if (RelOffs < 0 || RelOffs > 50)

+ break;

+ CurLoc = NextTok->getLocation();

}

- return SourceLocation();

+ // For the consecutive tokens, find the length of the SLocEntry to contain

+ // all of them.

+ Token &LastConsecutiveTok = *(NextTok-1);

+ int LastRelOffs = 0;

+ SM.isInSameSLocAddrSpace(FirstLoc, LastConsecutiveTok.getLocation(),

+ &LastRelOffs);

+ unsigned FullLength = LastRelOffs + LastConsecutiveTok.getLength();

+ // Create a macro expansion SLocEntry that will "contain" all of the tokens.

+ SourceLocation Expansion =

+ SM.createMacroArgExpansionLoc(FirstLoc, InstLoc,FullLength);

+ // Change the location of the tokens from the spelling location to the new

+ // expanded location.

+ for (; begin_tokens < NextTok; ++begin_tokens) {

+ Token &Tok = *begin_tokens;

+ int RelOffs = 0;

+ SM.isInSameSLocAddrSpace(FirstLoc, Tok.getLocation(), &RelOffs);

+ Tok.setLocation(Expansion.getLocWithOffset(RelOffs));

+ }

+/// \brief Creates SLocEntries and updates the locations of macro argument

+/// tokens to their new expanded locations.

+///

+/// \param ArgIdDefLoc the location of the macro argument id inside the macro

+/// definition.

+/// \param Tokens the macro argument tokens to update.

+void TokenLexer::updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc,

+ Token *begin_tokens,

+ Token *end_tokens) {

+ SourceManager &SM = PP.getSourceManager();

+ SourceLocation InstLoc =

+ getExpansionLocForMacroDefLoc(ArgIdSpellLoc);

+ while (begin_tokens < end_tokens) {

+ // If there's only one token just create a SLocEntry for it.

+ if (end_tokens - begin_tokens == 1) {

+ Token &Tok = *begin_tokens;

+ Tok.setLocation(SM.createMacroArgExpansionLoc(Tok.getLocation(),

+ InstLoc,

+ Tok.getLength()));

+ return;

+ }

+ updateConsecutiveMacroArgTokens(SM, InstLoc, begin_tokens, end_tokens);

+ }

}