9 files changed, 192 insertions, 53 deletions
diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp
index be7b7d6e17b2..567ca81f6ac2 100644
--- a/clang/lib/Lex/DependencyDirectivesScanner.cpp
+++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp
@@ -550,7 +550,7 @@ Scanner::tryLexIdentifierOrSkipLine(const char *&First, const char *const End) {
 StringRef Scanner::lexIdentifier(const char *&First, const char *const End) {
   Optional<StringRef> Id = tryLexIdentifierOrSkipLine(First, End);
   assert(Id && "expected identifier token");
-  return Id.getValue();
+  return Id.value();
 }
 
 bool Scanner::isNextIdentifierOrSkipLine(StringRef Id, const char *&First,
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 6820057642be..b3aac9df6546 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -2392,13 +2392,37 @@ bool Lexer::SkipLineComment(Token &Result, const char *CurPtr,
   //
   // This loop terminates with CurPtr pointing at the newline (or end of buffer)
   // character that ends the line comment.
+
+  // C++23 [lex.phases] p1
+  // Diagnose invalid UTF-8 if the corresponding warning is enabled, emitting a
+  // diagnostic only once per entire ill-formed subsequence to avoid
+  // emiting to many diagnostics (see http://unicode.org/review/pr-121.html).
+  bool UnicodeDecodingAlreadyDiagnosed = false;
+
   char C;
   while (true) {
     C = *CurPtr;
     // Skip over characters in the fast loop.
-    while (C != 0 &&                // Potentially EOF.
-           C != '\n' && C != '\r')  // Newline or DOS-style newline.
+    while (isASCII(C) && C != 0 &&   // Potentially EOF.
+           C != '\n' && C != '\r') { // Newline or DOS-style newline.
       C = *++CurPtr;
+      UnicodeDecodingAlreadyDiagnosed = false;
+    }
+
+    if (!isASCII(C)) {
+      unsigned Length = llvm::getUTF8SequenceSize(
+          (const llvm::UTF8 *)CurPtr, (const llvm::UTF8 *)BufferEnd);
+      if (Length == 0) {
+        if (!UnicodeDecodingAlreadyDiagnosed && !isLexingRawMode())
+          Diag(CurPtr, diag::warn_invalid_utf8_in_comment);
+        UnicodeDecodingAlreadyDiagnosed = true;
+        ++CurPtr;
+      } else {
+        UnicodeDecodingAlreadyDiagnosed = false;
+        CurPtr += Length;
+      }
+      continue;
+    }
 
     const char *NextLine = CurPtr;
     if (C != 0) {
@@ -2665,6 +2689,12 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr,
   if (C == '/')
     C = *CurPtr++;
 
+  // C++23 [lex.phases] p1
+  // Diagnose invalid UTF-8 if the corresponding warning is enabled, emitting a
+  // diagnostic only once per entire ill-formed subsequence to avoid
+  // emiting to many diagnostics (see http://unicode.org/review/pr-121.html).
+  bool UnicodeDecodingAlreadyDiagnosed = false;
+
   while (true) {
     // Skip over all non-interesting characters until we find end of buffer or a
     // (probably ending) '/' character.
@@ -2673,14 +2703,21 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr,
         // doesn't check for '\0'.
         !(PP && PP->getCodeCompletionFileLoc() == FileLoc)) {
       // While not aligned to a 16-byte boundary.
-      while (C != '/' && ((intptr_t)CurPtr & 0x0F) != 0)
+      while (C != '/' && (intptr_t)CurPtr % 16 != 0) {
+        if (!isASCII(C))
+          goto MultiByteUTF8;
         C = *CurPtr++;
-
+      }
       if (C == '/') goto FoundSlash;
 
 #ifdef __SSE2__
       __m128i Slashes = _mm_set1_epi8('/');
-      while (CurPtr+16 <= BufferEnd) {
+      while (CurPtr + 16 < BufferEnd) {
+        int Mask = _mm_movemask_epi8(*(const __m128i *)CurPtr);
+        if (LLVM_UNLIKELY(Mask != 0)) {
+          goto MultiByteUTF8;
+        }
+        // look for slashes
         int cmp = _mm_movemask_epi8(_mm_cmpeq_epi8(*(const __m128i*)CurPtr,
                                     Slashes));
         if (cmp != 0) {
@@ -2693,21 +2730,38 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr,
         CurPtr += 16;
       }
 #elif __ALTIVEC__
+      __vector unsigned char LongUTF = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+                                        0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+                                        0x80, 0x80, 0x80, 0x80};
       __vector unsigned char Slashes = {
         '/', '/', '/', '/',  '/', '/', '/', '/',
         '/', '/', '/', '/',  '/', '/', '/', '/'
       };
-      while (CurPtr + 16 <= BufferEnd &&
-             !vec_any_eq(*(const __vector unsigned char *)CurPtr, Slashes))
+      while (CurPtr + 16 < BufferEnd) {
+        if (LLVM_UNLIKELY(
+                vec_any_ge(*(const __vector unsigned char *)CurPtr, LongUTF)))
+          goto MultiByteUTF8;
+        if (vec_any_eq(*(const __vector unsigned char *)CurPtr, Slashes)) {
+          break;
+        }
         CurPtr += 16;
+      }
+
 #else
-      // Scan for '/' quickly.  Many block comments are very large.
-      while (CurPtr[0] != '/' &&
-             CurPtr[1] != '/' &&
-             CurPtr[2] != '/' &&
-             CurPtr[3] != '/' &&
-             CurPtr+4 < BufferEnd) {
-        CurPtr += 4;
+      while (CurPtr + 16 < BufferEnd) {
+        bool HasNonASCII = false;
+        for (unsigned I = 0; I < 16; ++I)
+          HasNonASCII |= !isASCII(CurPtr[I]);
+
+        if (LLVM_UNLIKELY(HasNonASCII))
+          goto MultiByteUTF8;
+
+        bool HasSlash = false;
+        for (unsigned I = 0; I < 16; ++I)
+          HasSlash |= CurPtr[I] == '/';
+        if (HasSlash)
+          break;
+        CurPtr += 16;
       }
 #endif
 
@@ -2715,9 +2769,30 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr,
       C = *CurPtr++;
     }
 
-    // Loop to scan the remainder.
-    while (C != '/' && C != '\0')
+    // Loop to scan the remainder, warning on invalid UTF-8
+    // if the corresponding warning is enabled, emitting a diagnostic only once
+    // per sequence that cannot be decoded.
+    while (C != '/' && C != '\0') {
+      if (isASCII(C)) {
+        UnicodeDecodingAlreadyDiagnosed = false;
+        C = *CurPtr++;
+        continue;
+      }
+    MultiByteUTF8:
+      // CurPtr is 1 code unit past C, so to decode
+      // the codepoint, we need to read from the previous position.
+      unsigned Length = llvm::getUTF8SequenceSize(
+          (const llvm::UTF8 *)CurPtr - 1, (const llvm::UTF8 *)BufferEnd);
+      if (Length == 0) {
+        if (!UnicodeDecodingAlreadyDiagnosed && !isLexingRawMode())
+          Diag(CurPtr - 1, diag::warn_invalid_utf8_in_comment);
+        UnicodeDecodingAlreadyDiagnosed = true;
+      } else {
+        UnicodeDecodingAlreadyDiagnosed = false;
+        CurPtr += Length - 1;
+      }
       C = *CurPtr++;
+    }
 
     if (C == '/') {
   FoundSlash:
@@ -3212,7 +3287,10 @@ llvm::Optional<uint32_t> Lexer::tryReadNumericUCN(const char *&StartPtr,
   }
 
   if (Delimited && PP) {
-    Diag(BufferPtr, diag::ext_delimited_escape_sequence) << /*delimited*/ 0;
+    Diag(BufferPtr, PP->getLangOpts().CPlusPlus2b
+                        ? diag::warn_cxx2b_delimited_escape_sequence
+                        : diag::ext_delimited_escape_sequence)
+        << /*delimited*/ 0 << (PP->getLangOpts().CPlusPlus ? 1 : 0);
   }
 
   if (Result) {
@@ -3296,7 +3374,10 @@ llvm::Optional<uint32_t> Lexer::tryReadNamedUCN(const char *&StartPtr,
   }
 
   if (Diagnose && PP && !LooseMatch)
-    Diag(BufferPtr, diag::ext_delimited_escape_sequence) << /*named*/ 1;
+    Diag(BufferPtr, PP->getLangOpts().CPlusPlus2b
+                        ? diag::warn_cxx2b_delimited_escape_sequence
+                        : diag::ext_delimited_escape_sequence)
+        << /*named*/ 1 << (PP->getLangOpts().CPlusPlus ? 1 : 0);
 
   if (LooseMatch)
     Res = LooseMatch->CodePoint;
diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp
index ebf30c9f01a9..53635a7385ec 100644
--- a/clang/lib/Lex/LiteralSupport.cpp
+++ b/clang/lib/Lex/LiteralSupport.cpp
@@ -311,8 +311,9 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
           << tok::r_brace;
     else if (!HadError) {
       Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
-           diag::ext_delimited_escape_sequence)
-          << /*delimited*/ 0;
+           Features.CPlusPlus2b ? diag::warn_cxx2b_delimited_escape_sequence
+                                : diag::ext_delimited_escape_sequence)
+          << /*delimited*/ 0 << (Features.CPlusPlus ? 1 : 0);
     }
   }
 
@@ -641,8 +642,9 @@ static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
 
   if ((IsDelimitedEscapeSequence || IsNamedEscapeSequence) && Diags)
     Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
-         diag::ext_delimited_escape_sequence)
-        << (IsNamedEscapeSequence ? 1 : 0);
+         Features.CPlusPlus2b ? diag::warn_cxx2b_delimited_escape_sequence
+                              : diag::ext_delimited_escape_sequence)
+        << (IsNamedEscapeSequence ? 1 : 0) << (Features.CPlusPlus ? 1 : 0);
 
   return true;
 }
diff --git a/clang/lib/Lex/MacroInfo.cpp b/clang/lib/Lex/MacroInfo.cpp
index 310b95f36771..eae12beb6244 100644
--- a/clang/lib/Lex/MacroInfo.cpp
+++ b/clang/lib/Lex/MacroInfo.cpp
@@ -213,7 +213,7 @@ MacroDirective::DefInfo MacroDirective::getDefinition() {
       isPublic = VisMD->isPublic();
   }
 
-  return DefInfo(nullptr, UndefLoc, !isPublic || isPublic.getValue());
+  return DefInfo(nullptr, UndefLoc, !isPublic || isPublic.value());
 }
 
 const MacroDirective::DefInfo
diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp
index c791e3e4e5ca..57e344622f25 100644
--- a/clang/lib/Lex/ModuleMap.cpp
+++ b/clang/lib/Lex/ModuleMap.cpp
@@ -1219,8 +1219,8 @@ void ModuleMap::resolveHeaderDirectives(
     Module *Mod, llvm::Optional<const FileEntry *> File) const {
   bool NeedsFramework = false;
   SmallVector<Module::UnresolvedHeaderDirective, 1> NewHeaders;
-  const auto Size = File ? File.getValue()->getSize() : 0;
-  const auto ModTime = File ? File.getValue()->getModificationTime() : 0;
+  const auto Size = File ? File.value()->getSize() : 0;
+  const auto ModTime = File ? File.value()->getModificationTime() : 0;
 
   for (auto &Header : Mod->UnresolvedHeaders) {
     if (File && ((Header.ModTime && Header.ModTime != ModTime) ||
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index 4dcef01e3e4c..352e1f217819 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -1983,6 +1983,10 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
     EnterAnnotationToken(SourceRange(HashLoc, EndLoc),
                          tok::annot_module_begin, Action.ModuleForHeader);
     break;
+  case ImportAction::HeaderUnitImport:
+    EnterAnnotationToken(SourceRange(HashLoc, EndLoc), tok::annot_header_unit,
+                         Action.ModuleForHeader);
+    break;
   case ImportAction::ModuleImport:
     EnterAnnotationToken(SourceRange(HashLoc, EndLoc),
                          tok::annot_module_include, Action.ModuleForHeader);
@@ -2191,6 +2195,17 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
   // known to have no effect beyond its effect on module visibility -- that is,
   // if it's got an include guard that is already defined, set to Import if it
   // is a modular header we've already built and should import.
+
+  // For C++20 Modules
+  // [cpp.include]/7 If the header identified by the header-name denotes an
+  // importable header, it is implementation-defined whether the #include
+  // preprocessing directive is instead replaced by an import directive.
+  // For this implementation, the translation is permitted when we are parsing
+  // the Global Module Fragment, and not otherwise (the cases where it would be
+  // valid to replace an include with an import are highly constrained once in
+  // named module purview; this choice avoids considerable complexity in
+  // determining valid cases).
+
   enum { Enter, Import, Skip, IncludeLimitReached } Action = Enter;
 
   if (PPOpts->SingleFileParseMode)
@@ -2203,13 +2218,34 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
       alreadyIncluded(*File))
     Action = IncludeLimitReached;
 
+  bool MaybeTranslateInclude = Action == Enter && File && SuggestedModule &&
+                               !isForModuleBuilding(SuggestedModule.getModule(),
+                                                    getLangOpts().CurrentModule,
+                                                    getLangOpts().ModuleName);
+
+  // FIXME: We do not have a good way to disambiguate C++ clang modules from
+  // C++ standard modules (other than use/non-use of Header Units).
+  Module *SM = SuggestedModule.getModule();
+  // Maybe a usable Header Unit
+  bool UsableHeaderUnit = false;
+  if (getLangOpts().CPlusPlusModules && SM && SM->isHeaderUnit()) {
+    if (TrackGMFState.inGMF() || IsImportDecl)
+      UsableHeaderUnit = true;
+    else if (!IsImportDecl) {
+      // This is a Header Unit that we do not include-translate
+      SuggestedModule = ModuleMap::KnownHeader();
+      SM = nullptr;
+    }
+  }
+  // Maybe a usable clang header module.
+  bool UsableHeaderModule =
+      (getLangOpts().CPlusPlusModules || getLangOpts().Modules) && SM &&
+      !SM->isHeaderUnit();
+
   // Determine whether we should try to import the module for this #include, if
   // there is one. Don't do so if precompiled module support is disabled or we
   // are processing this module textually (because we're building the module).
-  if (Action == Enter && File && SuggestedModule && getLangOpts().Modules &&
-      !isForModuleBuilding(SuggestedModule.getModule(),
-                           getLangOpts().CurrentModule,
-                           getLangOpts().ModuleName)) {
+  if (MaybeTranslateInclude && (UsableHeaderUnit || UsableHeaderModule)) {
     // If this include corresponds to a module but that module is
     // unavailable, diagnose the situation and bail out.
     // FIXME: Remove this; loadModule does the same check (but produces
@@ -2226,7 +2262,7 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
     // FIXME: Should we have a second loadModule() overload to avoid this
     // extra lookup step?
     SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path;
-    for (Module *Mod = SuggestedModule.getModule(); Mod; Mod = Mod->Parent)
+    for (Module *Mod = SM; Mod; Mod = Mod->Parent)
       Path.push_back(std::make_pair(getIdentifierInfo(Mod->Name),
                                     FilenameTok.getLocation()));
     std::reverse(Path.begin(), Path.end());
@@ -2293,9 +2329,12 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
   // Ask HeaderInfo if we should enter this #include file.  If not, #including
   // this file will have no effect.
   if (Action == Enter && File &&
-      !HeaderInfo.ShouldEnterIncludeFile(
-          *this, &File->getFileEntry(), EnterOnce, getLangOpts().Modules,
-          SuggestedModule.getModule(), IsFirstIncludeOfFile)) {
+      !HeaderInfo.ShouldEnterIncludeFile(*this, &File->getFileEntry(),
+                                         EnterOnce, getLangOpts().Modules, SM,
+                                         IsFirstIncludeOfFile)) {
+    // C++ standard modules:
+    // If we are not in the GMF, then we textually include only
+    // clang modules:
     // Even if we've already preprocessed this header once and know that we
     // don't need to see its contents again, we still need to import it if it's
     // modular because we might not have imported it from this submodule before.
@@ -2303,7 +2342,10 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
     // FIXME: We don't do this when compiling a PCH because the AST
     // serialization layer can't cope with it. This means we get local
     // submodule visibility semantics wrong in that case.
-    Action = (SuggestedModule && !getLangOpts().CompilingPCH) ? Import : Skip;
+    if (UsableHeaderUnit && !getLangOpts().CompilingPCH)
+      Action = TrackGMFState.inGMF() ? Import : Skip;
+    else
+      Action = (SuggestedModule && !getLangOpts().CompilingPCH) ? Import : Skip;
   }
 
   // Check for circular inclusion of the main file.
@@ -2440,8 +2482,8 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
   switch (Action) {
   case Skip:
     // If we don't need to enter the file, stop now.
-    if (Module *M = SuggestedModule.getModule())
-      return {ImportAction::SkippedModuleImport, M};
+    if (SM)
+      return {ImportAction::SkippedModuleImport, SM};
     return {ImportAction::None};
 
   case IncludeLimitReached:
@@ -2451,16 +2493,15 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
 
   case Import: {
     // If this is a module import, make it visible if needed.
-    Module *M = SuggestedModule.getModule();
-    assert(M && "no module to import");
+    assert(SM && "no module to import");
 
-    makeModuleVisible(M, EndLoc);
+    makeModuleVisible(SM, EndLoc);
 
     if (IncludeTok.getIdentifierInfo()->getPPKeywordID() ==
         tok::pp___include_macros)
       return {ImportAction::None};
 
-    return {ImportAction::ModuleImport, M};
+    return {ImportAction::ModuleImport, SM};
   }
 
   case Enter:
@@ -2492,13 +2533,14 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
     return {ImportAction::None};
 
   // Determine if we're switching to building a new submodule, and which one.
-  if (auto *M = SuggestedModule.getModule()) {
-    if (M->getTopLevelModule()->ShadowingModule) {
+  // This does not apply for C++20 modules header units.
+  if (SM && !SM->isHeaderUnit()) {
+    if (SM->getTopLevelModule()->ShadowingModule) {
       // We are building a submodule that belongs to a shadowed module. This
       // means we find header files in the shadowed module.
-      Diag(M->DefinitionLoc, diag::err_module_build_shadowed_submodule)
-        << M->getFullModuleName();
-      Diag(M->getTopLevelModule()->ShadowingModule->DefinitionLoc,
+      Diag(SM->DefinitionLoc, diag::err_module_build_shadowed_submodule)
+          << SM->getFullModuleName();
+      Diag(SM->getTopLevelModule()->ShadowingModule->DefinitionLoc,
            diag::note_previous_definition);
       return {ImportAction::None};
     }
@@ -2511,22 +2553,22 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
     // that PCH, which means we should enter the submodule. We need to teach
     // the AST serialization layer to deal with the resulting AST.
     if (getLangOpts().CompilingPCH &&
-        isForModuleBuilding(M, getLangOpts().CurrentModule,
+        isForModuleBuilding(SM, getLangOpts().CurrentModule,
                             getLangOpts().ModuleName))
       return {ImportAction::None};
 
     assert(!CurLexerSubmodule && "should not have marked this as a module yet");
-    CurLexerSubmodule = M;
+    CurLexerSubmodule = SM;
 
     // Let the macro handling code know that any future macros are within
     // the new submodule.
-    EnterSubmodule(M, EndLoc, /*ForPragma*/false);
+    EnterSubmodule(SM, EndLoc, /*ForPragma*/ false);
 
     // Let the parser know that any future declarations are within the new
     // submodule.
     // FIXME: There's no point doing this if we're handling a #__include_macros
     // directive.
-    return {ImportAction::ModuleBegin, M};
+    return {ImportAction::ModuleBegin, SM};
   }
 
   assert(!IsImportDecl && "failed to diagnose missing module for import decl");
diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index bf46e5422bc8..f3be2107f985 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -1326,10 +1326,10 @@ already_lexed:
         // The last ')' has been reached; return the value if one found or
         // a diagnostic and a dummy value.
         if (Result) {
-          OS << Result.getValue();
+          OS << Result.value();
           // For strict conformance to __has_cpp_attribute rules, use 'L'
           // suffix for dated literals.
-          if (Result.getValue() > 1)
+          if (Result.value() > 1)
             OS << 'L';
         } else {
           OS << 0;
diff --git a/clang/lib/Lex/PreprocessingRecord.cpp b/clang/lib/Lex/PreprocessingRecord.cpp
index 673ef637e396..2146a7c04217 100644
--- a/clang/lib/Lex/PreprocessingRecord.cpp
+++ b/clang/lib/Lex/PreprocessingRecord.cpp
@@ -115,7 +115,7 @@ bool PreprocessingRecord::isEntityInFileID(iterator PPEI, FileID FID) {
     Optional<bool> IsInFile =
         ExternalSource->isPreprocessedEntityInFileID(LoadedIndex, FID);
     if (IsInFile)
-      return IsInFile.getValue();
+      return IsInFile.value();
 
     // The external source did not provide a definite answer, go and deserialize
     // the entity to check it.
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 281f01fb28a4..5310db3c882b 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -941,6 +941,9 @@ void Preprocessor::Lex(Token &Result) {
 
   // Update ImportSeqState to track our position within a C++20 import-seq
   // if this token is being produced as a result of phase 4 of translation.
+  // Update TrackGMFState to decide if we are currently in a Global Module
+  // Fragment. GMF state updates should precede ImportSeq ones, since GMF state
+  // depends on the prevailing ImportSeq state in two cases.
   if (getLangOpts().CPlusPlusModules && LexLevel == 1 &&
       !Result.getFlag(Token::IsReinjected)) {
     switch (Result.getKind()) {
@@ -953,7 +956,11 @@ void Preprocessor::Lex(Token &Result) {
     case tok::r_brace:
       ImportSeqState.handleCloseBrace();
       break;
+    // This token is injected to represent the translation of '#include "a.h"'
+    // into "import a.h;". Mimic the notional ';'.
+    case tok::annot_module_include:
     case tok::semi:
+      TrackGMFState.handleSemi();
       ImportSeqState.handleSemi();
       break;
     case tok::header_name:
@@ -961,10 +968,12 @@ void Preprocessor::Lex(Token &Result) {
       ImportSeqState.handleHeaderName();
       break;
     case tok::kw_export:
+      TrackGMFState.handleExport();
       ImportSeqState.handleExport();
       break;
     case tok::identifier:
       if (Result.getIdentifierInfo()->isModulesImport()) {
+        TrackGMFState.handleImport(ImportSeqState.afterTopLevelSeq());
         ImportSeqState.handleImport();
         if (ImportSeqState.afterImportSeq()) {
           ModuleImportLoc = Result.getLocation();
@@ -973,9 +982,13 @@ void Preprocessor::Lex(Token &Result) {
           CurLexerKind = CLK_LexAfterModuleImport;
         }
         break;
+      } else if (Result.getIdentifierInfo() == getIdentifierInfo("module")) {
+        TrackGMFState.handleModule(ImportSeqState.afterTopLevelSeq());
+        break;
       }
       LLVM_FALLTHROUGH;
     default:
+      TrackGMFState.handleMisc();
       ImportSeqState.handleMisc();
       break;
     }
@@ -1222,6 +1235,7 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) {
       LLVM_FALLTHROUGH;
 
     case ImportAction::ModuleImport:
+    case ImportAction::HeaderUnitImport:
     case ImportAction::SkippedModuleImport:
       // We chose to import (or textually enter) the file. Convert the
       // header-name token into a header unit annotation token.