25 files changed, 2428 insertions, 632 deletions
diff --git a/contrib/llvm-project/lld/COFF/CallGraphSort.cpp b/contrib/llvm-project/lld/COFF/CallGraphSort.cpp
new file mode 100644
index 000000000000..d3e5312ce7fd
--- /dev/null
+++ b/contrib/llvm-project/lld/COFF/CallGraphSort.cpp
@@ -0,0 +1,245 @@
+//===- CallGraphSort.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This is based on the ELF port, see ELF/CallGraphSort.cpp for the details
+/// about the algorithm.
+///
+//===----------------------------------------------------------------------===//
+
+#include "CallGraphSort.h"
+#include "InputFiles.h"
+#include "SymbolTable.h"
+#include "Symbols.h"
+#include "lld/Common/ErrorHandler.h"
+
+#include <numeric>
+
+using namespace llvm;
+using namespace lld;
+using namespace lld::coff;
+
+namespace {
+struct Edge {
+  int from;
+  uint64_t weight;
+};
+
+struct Cluster {
+  Cluster(int sec, size_t s) : next(sec), prev(sec), size(s) {}
+
+  double getDensity() const {
+    if (size == 0)
+      return 0;
+    return double(weight) / double(size);
+  }
+
+  int next;
+  int prev;
+  uint64_t size;
+  uint64_t weight = 0;
+  uint64_t initialWeight = 0;
+  Edge bestPred = {-1, 0};
+};
+
+class CallGraphSort {
+public:
+  CallGraphSort();
+
+  DenseMap<const SectionChunk *, int> run();
+
+private:
+  std::vector<Cluster> clusters;
+  std::vector<const SectionChunk *> sections;
+};
+
+// Maximum amount the combined cluster density can be worse than the original
+// cluster to consider merging.
+constexpr int MAX_DENSITY_DEGRADATION = 8;
+
+// Maximum cluster size in bytes.
+constexpr uint64_t MAX_CLUSTER_SIZE = 1024 * 1024;
+} // end anonymous namespace
+
+using SectionPair = std::pair<const SectionChunk *, const SectionChunk *>;
+
+// Take the edge list in Config->CallGraphProfile, resolve symbol names to
+// Symbols, and generate a graph between InputSections with the provided
+// weights.
+CallGraphSort::CallGraphSort() {
+  MapVector<SectionPair, uint64_t> &profile = config->callGraphProfile;
+  DenseMap<const SectionChunk *, int> secToCluster;
+
+  auto getOrCreateNode = [&](const SectionChunk *isec) -> int {
+    auto res = secToCluster.try_emplace(isec, clusters.size());
+    if (res.second) {
+      sections.push_back(isec);
+      clusters.emplace_back(clusters.size(), isec->getSize());
+    }
+    return res.first->second;
+  };
+
+  // Create the graph.
+  for (std::pair<SectionPair, uint64_t> &c : profile) {
+    const auto *fromSec = cast<SectionChunk>(c.first.first->repl);
+    const auto *toSec = cast<SectionChunk>(c.first.second->repl);
+    uint64_t weight = c.second;
+
+    // Ignore edges between input sections belonging to different output
+    // sections.  This is done because otherwise we would end up with clusters
+    // containing input sections that can't actually be placed adjacently in the
+    // output.  This messes with the cluster size and density calculations.  We
+    // would also end up moving input sections in other output sections without
+    // moving them closer to what calls them.
+    if (fromSec->getOutputSection() != toSec->getOutputSection())
+      continue;
+
+    int from = getOrCreateNode(fromSec);
+    int to = getOrCreateNode(toSec);
+
+    clusters[to].weight += weight;
+
+    if (from == to)
+      continue;
+
+    // Remember the best edge.
+    Cluster &toC = clusters[to];
+    if (toC.bestPred.from == -1 || toC.bestPred.weight < weight) {
+      toC.bestPred.from = from;
+      toC.bestPred.weight = weight;
+    }
+  }
+  for (Cluster &c : clusters)
+    c.initialWeight = c.weight;
+}
+
+// It's bad to merge clusters which would degrade the density too much.
+static bool isNewDensityBad(Cluster &a, Cluster &b) {
+  double newDensity = double(a.weight + b.weight) / double(a.size + b.size);
+  return newDensity < a.getDensity() / MAX_DENSITY_DEGRADATION;
+}
+
+// Find the leader of V's belonged cluster (represented as an equivalence
+// class). We apply union-find path-halving technique (simple to implement) in
+// the meantime as it decreases depths and the time complexity.
+static int getLeader(std::vector<int> &leaders, int v) {
+  while (leaders[v] != v) {
+    leaders[v] = leaders[leaders[v]];
+    v = leaders[v];
+  }
+  return v;
+}
+
+static void mergeClusters(std::vector<Cluster> &cs, Cluster &into, int intoIdx,
+                          Cluster &from, int fromIdx) {
+  int tail1 = into.prev, tail2 = from.prev;
+  into.prev = tail2;
+  cs[tail2].next = intoIdx;
+  from.prev = tail1;
+  cs[tail1].next = fromIdx;
+  into.size += from.size;
+  into.weight += from.weight;
+  from.size = 0;
+  from.weight = 0;
+}
+
+// Group InputSections into clusters using the Call-Chain Clustering heuristic
+// then sort the clusters by density.
+DenseMap<const SectionChunk *, int> CallGraphSort::run() {
+  std::vector<int> sorted(clusters.size());
+  std::vector<int> leaders(clusters.size());
+
+  std::iota(leaders.begin(), leaders.end(), 0);
+  std::iota(sorted.begin(), sorted.end(), 0);
+  llvm::stable_sort(sorted, [&](int a, int b) {
+    return clusters[a].getDensity() > clusters[b].getDensity();
+  });
+
+  for (int l : sorted) {
+    // The cluster index is the same as the index of its leader here because
+    // clusters[L] has not been merged into another cluster yet.
+    Cluster &c = clusters[l];
+
+    // Don't consider merging if the edge is unlikely.
+    if (c.bestPred.from == -1 || c.bestPred.weight * 10 <= c.initialWeight)
+      continue;
+
+    int predL = getLeader(leaders, c.bestPred.from);
+    if (l == predL)
+      continue;
+
+    Cluster *predC = &clusters[predL];
+    if (c.size + predC->size > MAX_CLUSTER_SIZE)
+      continue;
+
+    if (isNewDensityBad(*predC, c))
+      continue;
+
+    leaders[l] = predL;
+    mergeClusters(clusters, *predC, predL, c, l);
+  }
+
+  // Sort remaining non-empty clusters by density.
+  sorted.clear();
+  for (int i = 0, e = (int)clusters.size(); i != e; ++i)
+    if (clusters[i].size > 0)
+      sorted.push_back(i);
+  llvm::stable_sort(sorted, [&](int a, int b) {
+    return clusters[a].getDensity() > clusters[b].getDensity();
+  });
+
+  DenseMap<const SectionChunk *, int> orderMap;
+  // Sections will be sorted by increasing order. Absent sections will have
+  // priority 0 and be placed at the end of sections.
+  int curOrder = INT_MIN;
+  for (int leader : sorted) {
+    for (int i = leader;;) {
+      orderMap[sections[i]] = curOrder++;
+      i = clusters[i].next;
+      if (i == leader)
+        break;
+    }
+  }
+  if (!config->printSymbolOrder.empty()) {
+    std::error_code ec;
+    raw_fd_ostream os(config->printSymbolOrder, ec, sys::fs::OF_None);
+    if (ec) {
+      error("cannot open " + config->printSymbolOrder + ": " + ec.message());
+      return orderMap;
+    }
+    // Print the symbols ordered by C3, in the order of increasing curOrder
+    // Instead of sorting all the orderMap, just repeat the loops above.
+    for (int leader : sorted)
+      for (int i = leader;;) {
+        const SectionChunk *sc = sections[i];
+
+        // Search all the symbols in the file of the section
+        // and find out a DefinedCOFF symbol with name that is within the
+        // section.
+        for (Symbol *sym : sc->file->getSymbols())
+          if (auto *d = dyn_cast_or_null<DefinedCOFF>(sym))
+            // Filter out non-COMDAT symbols and section symbols.
+            if (d->isCOMDAT && !d->getCOFFSymbol().isSection() &&
+                sc == d->getChunk())
+              os << sym->getName() << "\n";
+        i = clusters[i].next;
+        if (i == leader)
+          break;
+      }
+  }
+
+  return orderMap;
+}
+
+// Sort sections by the profile data provided by  /call-graph-ordering-file
+//
+// This first builds a call graph based on the profile data then merges sections
+// according to the C³ heuristic. All clusters are then sorted by a density
+// metric to further improve locality.
+DenseMap<const SectionChunk *, int> coff::computeCallGraphProfileOrder() {
+  return CallGraphSort().run();
+}
diff --git a/contrib/llvm-project/lld/COFF/CallGraphSort.h b/contrib/llvm-project/lld/COFF/CallGraphSort.h
new file mode 100644
index 000000000000..e4f372137448
--- /dev/null
+++ b/contrib/llvm-project/lld/COFF/CallGraphSort.h
@@ -0,0 +1,22 @@
+//===- CallGraphSort.h ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_COFF_CALL_GRAPH_SORT_H
+#define LLD_COFF_CALL_GRAPH_SORT_H
+
+#include "llvm/ADT/DenseMap.h"
+
+namespace lld {
+namespace coff {
+class SectionChunk;
+
+llvm::DenseMap<const SectionChunk *, int> computeCallGraphProfileOrder();
+} // namespace coff
+} // namespace lld
+
+#endif
diff --git a/contrib/llvm-project/lld/COFF/Chunks.cpp b/contrib/llvm-project/lld/COFF/Chunks.cpp
index e04ceed505c2..14d0a5ad716c 100644
--- a/contrib/llvm-project/lld/COFF/Chunks.cpp
+++ b/contrib/llvm-project/lld/COFF/Chunks.cpp
@@ -357,9 +357,7 @@ void SectionChunk::writeTo(uint8_t *buf) const {
 
   // Apply relocations.
   size_t inputSize = getSize();
-  for (size_t i = 0, e = relocsSize; i < e; i++) {
-    const coff_relocation &rel = relocsData[i];
-
+  for (const coff_relocation &rel : getRelocs()) {
     // Check for an invalid relocation offset. This check isn't perfect, because
     // we don't have the relocation size, which is only known after checking the
     // machine and relocation type. As a result, a relocation may overwrite the
@@ -369,47 +367,89 @@ void SectionChunk::writeTo(uint8_t *buf) const {
       continue;
     }
 
-    uint8_t *off = buf + rel.VirtualAddress;
+    applyRelocation(buf + rel.VirtualAddress, rel);
+  }
+}
 
-    auto *sym =
-        dyn_cast_or_null<Defined>(file->getSymbol(rel.SymbolTableIndex));
+void SectionChunk::applyRelocation(uint8_t *off,
+                                   const coff_relocation &rel) const {
+  auto *sym = dyn_cast_or_null<Defined>(file->getSymbol(rel.SymbolTableIndex));
 
-    // Get the output section of the symbol for this relocation.  The output
-    // section is needed to compute SECREL and SECTION relocations used in debug
-    // info.
-    Chunk *c = sym ? sym->getChunk() : nullptr;
-    OutputSection *os = c ? c->getOutputSection() : nullptr;
-
-    // Skip the relocation if it refers to a discarded section, and diagnose it
-    // as an error if appropriate. If a symbol was discarded early, it may be
-    // null. If it was discarded late, the output section will be null, unless
-    // it was an absolute or synthetic symbol.
-    if (!sym ||
-        (!os && !isa<DefinedAbsolute>(sym) && !isa<DefinedSynthetic>(sym))) {
-      maybeReportRelocationToDiscarded(this, sym, rel);
-      continue;
-    }
+  // Get the output section of the symbol for this relocation.  The output
+  // section is needed to compute SECREL and SECTION relocations used in debug
+  // info.
+  Chunk *c = sym ? sym->getChunk() : nullptr;
+  OutputSection *os = c ? c->getOutputSection() : nullptr;
 
-    uint64_t s = sym->getRVA();
+  // Skip the relocation if it refers to a discarded section, and diagnose it
+  // as an error if appropriate. If a symbol was discarded early, it may be
+  // null. If it was discarded late, the output section will be null, unless
+  // it was an absolute or synthetic symbol.
+  if (!sym ||
+      (!os && !isa<DefinedAbsolute>(sym) && !isa<DefinedSynthetic>(sym))) {
+    maybeReportRelocationToDiscarded(this, sym, rel);
+    return;
+  }
 
-    // Compute the RVA of the relocation for relative relocations.
-    uint64_t p = rva + rel.VirtualAddress;
-    switch (config->machine) {
-    case AMD64:
-      applyRelX64(off, rel.Type, os, s, p);
-      break;
-    case I386:
-      applyRelX86(off, rel.Type, os, s, p);
-      break;
-    case ARMNT:
-      applyRelARM(off, rel.Type, os, s, p);
-      break;
-    case ARM64:
-      applyRelARM64(off, rel.Type, os, s, p);
+  uint64_t s = sym->getRVA();
+
+  // Compute the RVA of the relocation for relative relocations.
+  uint64_t p = rva + rel.VirtualAddress;
+  switch (config->machine) {
+  case AMD64:
+    applyRelX64(off, rel.Type, os, s, p);
+    break;
+  case I386:
+    applyRelX86(off, rel.Type, os, s, p);
+    break;
+  case ARMNT:
+    applyRelARM(off, rel.Type, os, s, p);
+    break;
+  case ARM64:
+    applyRelARM64(off, rel.Type, os, s, p);
+    break;
+  default:
+    llvm_unreachable("unknown machine type");
+  }
+}
+
+// Defend against unsorted relocations. This may be overly conservative.
+void SectionChunk::sortRelocations() {
+  auto cmpByVa = [](const coff_relocation &l, const coff_relocation &r) {
+    return l.VirtualAddress < r.VirtualAddress;
+  };
+  if (llvm::is_sorted(getRelocs(), cmpByVa))
+    return;
+  warn("some relocations in " + file->getName() + " are not sorted");
+  MutableArrayRef<coff_relocation> newRelocs(
+      bAlloc.Allocate<coff_relocation>(relocsSize), relocsSize);
+  memcpy(newRelocs.data(), relocsData, relocsSize * sizeof(coff_relocation));
+  llvm::sort(newRelocs, cmpByVa);
+  setRelocs(newRelocs);
+}
+
+// Similar to writeTo, but suitable for relocating a subsection of the overall
+// section.
+void SectionChunk::writeAndRelocateSubsection(ArrayRef<uint8_t> sec,
+                                              ArrayRef<uint8_t> subsec,
+                                              uint32_t &nextRelocIndex,
+                                              uint8_t *buf) const {
+  assert(!subsec.empty() && !sec.empty());
+  assert(sec.begin() <= subsec.begin() && subsec.end() <= sec.end() &&
+         "subsection is not part of this section");
+  size_t vaBegin = std::distance(sec.begin(), subsec.begin());
+  size_t vaEnd = std::distance(sec.begin(), subsec.end());
+  memcpy(buf, subsec.data(), subsec.size());
+  for (; nextRelocIndex < relocsSize; ++nextRelocIndex) {
+    const coff_relocation &rel = relocsData[nextRelocIndex];
+    // Only apply relocations that apply to this subsection. These checks
+    // assume that all subsections completely contain their relocations.
+    // Relocations must not straddle the beginning or end of a subsection.
+    if (rel.VirtualAddress < vaBegin)
+      continue;
+    if (rel.VirtualAddress + 1 >= vaEnd)
       break;
-    default:
-      llvm_unreachable("unknown machine type");
-    }
+    applyRelocation(&buf[rel.VirtualAddress - vaBegin], rel);
   }
 }
 
@@ -451,8 +491,7 @@ static uint8_t getBaserelType(const coff_relocation &rel) {
 // fixed by the loader if load-time relocation is needed.
 // Only called when base relocation is enabled.
 void SectionChunk::getBaserels(std::vector<Baserel> *res) {
-  for (size_t i = 0, e = relocsSize; i < e; i++) {
-    const coff_relocation &rel = relocsData[i];
+  for (const coff_relocation &rel : getRelocs()) {
     uint8_t ty = getBaserelType(rel);
     if (ty == IMAGE_REL_BASED_ABSOLUTE)
       continue;
diff --git a/contrib/llvm-project/lld/COFF/Chunks.h b/contrib/llvm-project/lld/COFF/Chunks.h
index 0528143383c5..e076d8e71109 100644
--- a/contrib/llvm-project/lld/COFF/Chunks.h
+++ b/contrib/llvm-project/lld/COFF/Chunks.h
@@ -204,6 +204,15 @@ public:
   ArrayRef<uint8_t> getContents() const;
   void writeTo(uint8_t *buf) const;
 
+  // Defend against unsorted relocations. This may be overly conservative.
+  void sortRelocations();
+
+  // Write and relocate a portion of the section. This is intended to be called
+  // in a loop. Relocations must be sorted first.
+  void writeAndRelocateSubsection(ArrayRef<uint8_t> sec,
+                                  ArrayRef<uint8_t> subsec,
+                                  uint32_t &nextRelocIndex, uint8_t *buf) const;
+
   uint32_t getOutputCharacteristics() const {
     return header->Characteristics & (permMask | typeMask);
   }
@@ -212,6 +221,7 @@ public:
   }
   void getBaserels(std::vector<Baserel> *res);
   bool isCOMDAT() const;
+  void applyRelocation(uint8_t *off, const coff_relocation &rel) const;
   void applyRelX64(uint8_t *off, uint16_t type, OutputSection *os, uint64_t s,
                    uint64_t p) const;
   void applyRelX86(uint8_t *off, uint16_t type, OutputSection *os, uint64_t s,
diff --git a/contrib/llvm-project/lld/COFF/Config.h b/contrib/llvm-project/lld/COFF/Config.h
index 7c439176f3a4..65ddc326ba78 100644
--- a/contrib/llvm-project/lld/COFF/Config.h
+++ b/contrib/llvm-project/lld/COFF/Config.h
@@ -9,6 +9,7 @@
 #ifndef LLD_COFF_CONFIG_H
 #define LLD_COFF_CONFIG_H
 
+#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Object/COFF.h"
@@ -29,6 +30,7 @@ class DefinedRelative;
 class StringChunk;
 class Symbol;
 class InputFile;
+class SectionChunk;
 
 // Short aliases.
 static const auto AMD64 = llvm::COFF::IMAGE_FILE_MACHINE_AMD64;
@@ -155,6 +157,11 @@ struct Configuration {
   // Used for /opt:lldltocachepolicy=policy
   llvm::CachePruningPolicy ltoCachePolicy;
 
+  // Used for /opt:[no]ltonewpassmanager
+  bool ltoNewPassManager = false;
+  // Used for /opt:[no]ltodebugpassmanager
+  bool ltoDebugPassManager = false;
+
   // Used for /merge:from=to (e.g. /merge:.rdata=.text)
   std::map<StringRef, StringRef> merge;
 
@@ -201,6 +208,15 @@ struct Configuration {
   // Used for /lto-obj-path:
   llvm::StringRef ltoObjPath;
 
+  // Used for /call-graph-ordering-file:
+  llvm::MapVector<std::pair<const SectionChunk *, const SectionChunk *>,
+                  uint64_t>
+      callGraphProfile;
+  bool callGraphProfileSort = false;
+
+  // Used for /print-symbol-order:
+  StringRef printSymbolOrder;
+
   uint64_t align = 4096;
   uint64_t imageBase = -1;
   uint64_t fileAlign = 512;
@@ -210,8 +226,12 @@ struct Configuration {
   uint64_t heapCommit = 4096;
   uint32_t majorImageVersion = 0;
   uint32_t minorImageVersion = 0;
+  // If changing the default os/subsys version here, update the default in
+  // the MinGW driver accordingly.
   uint32_t majorOSVersion = 6;
   uint32_t minorOSVersion = 0;
+  uint32_t majorSubsystemVersion = 6;
+  uint32_t minorSubsystemVersion = 0;
   uint32_t timestamp = 0;
   uint32_t functionPadMin = 0;
   bool dynamicBase = true;
diff --git a/contrib/llvm-project/lld/COFF/DLL.cpp b/contrib/llvm-project/lld/COFF/DLL.cpp
index 50301ad91b1d..e88a6b1bffb0 100644
--- a/contrib/llvm-project/lld/COFF/DLL.cpp
+++ b/contrib/llvm-project/lld/COFF/DLL.cpp
@@ -19,6 +19,7 @@
 
 #include "DLL.h"
 #include "Chunks.h"
+#include "SymbolTable.h"
 #include "llvm/Object/COFF.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Path.h"
@@ -653,9 +654,18 @@ void DelayLoadContents::create(Defined *h) {
         auto *c = make<HintNameChunk>(extName, 0);
         names.push_back(make<LookupChunk>(c));
         hintNames.push_back(c);
+        // Add a syntentic symbol for this load thunk, using the "__imp_load"
+        // prefix, in case this thunk needs to be added to the list of valid
+        // call targets for Control Flow Guard.
+        StringRef symName = saver.save("__imp_load_" + extName);
+        s->loadThunkSym =
+            cast<DefinedSynthetic>(symtab->addSynthetic(symName, t));
       }
     }
     thunks.push_back(tm);
+    StringRef tmName =
+        saver.save("__tailMerge_" + syms[0]->getDLLName().lower());
+    symtab->addSynthetic(tmName, tm);
     // Terminate with null values.
     addresses.push_back(make<NullChunk>(8));
     names.push_back(make<NullChunk>(8));
diff --git a/contrib/llvm-project/lld/COFF/DebugTypes.cpp b/contrib/llvm-project/lld/COFF/DebugTypes.cpp
index abe3bb9eef5b..fedcb054540f 100644
--- a/contrib/llvm-project/lld/COFF/DebugTypes.cpp
+++ b/contrib/llvm-project/lld/COFF/DebugTypes.cpp
@@ -10,9 +10,12 @@
 #include "Chunks.h"
 #include "Driver.h"
 #include "InputFiles.h"
+#include "PDB.h"
 #include "TypeMerger.h"
 #include "lld/Common/ErrorHandler.h"
 #include "lld/Common/Memory.h"
+#include "lld/Common/Timer.h"
+#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
 #include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
 #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h"
@@ -20,7 +23,10 @@
 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/TpiHashing.h"
 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/Parallel.h"
 #include "llvm/Support/Path.h"
 
 using namespace llvm;
@@ -29,6 +35,8 @@ using namespace lld;
 using namespace lld::coff;
 
 namespace {
+class TypeServerIpiSource;
+
 // The TypeServerSource class represents a PDB type server, a file referenced by
 // OBJ files compiled with MSVC /Zi. A single PDB can be shared by several OBJ
 // files, therefore there must be only once instance per OBJ lot. The file path
@@ -49,29 +57,54 @@ public:
     auto it = mappings.emplace(expectedInfo->getGuid(), this);
     assert(it.second);
     (void)it;
-    tsIndexMap.isTypeServerMap = true;
   }
 
-  Expected<const CVIndexMap *> mergeDebugT(TypeMerger *m,
-                                           CVIndexMap *indexMap) override;
+  Error mergeDebugT(TypeMerger *m) override;
+
+  void loadGHashes() override;
+  void remapTpiWithGHashes(GHashState *g) override;
+
   bool isDependency() const override { return true; }
 
   PDBInputFile *pdbInputFile = nullptr;
 
-  CVIndexMap tsIndexMap;
+  // TpiSource for IPI stream.
+  TypeServerIpiSource *ipiSrc = nullptr;
 
   static std::map<codeview::GUID, TypeServerSource *> mappings;
 };
 
+// Companion to TypeServerSource. Stores the index map for the IPI stream in the
+// PDB. Modeling PDBs with two sources for TPI and IPI helps establish the
+// invariant of one type index space per source.
+class TypeServerIpiSource : public TpiSource {
+public:
+  explicit TypeServerIpiSource() : TpiSource(PDBIpi, nullptr) {}
+
+  friend class TypeServerSource;
+
+  // All of the TpiSource methods are no-ops. The parent TypeServerSource
+  // handles both TPI and IPI.
+  Error mergeDebugT(TypeMerger *m) override { return Error::success(); }
+  void loadGHashes() override {}
+  void remapTpiWithGHashes(GHashState *g) override {}
+  bool isDependency() const override { return true; }
+};
+
 // This class represents the debug type stream of an OBJ file that depends on a
 // PDB type server (see TypeServerSource).
 class UseTypeServerSource : public TpiSource {
+  Expected<TypeServerSource *> getTypeServerSource();
+
 public:
   UseTypeServerSource(ObjFile *f, TypeServer2Record ts)
       : TpiSource(UsingPDB, f), typeServerDependency(ts) {}
 
-  Expected<const CVIndexMap *> mergeDebugT(TypeMerger *m,
-                                           CVIndexMap *indexMap) override;
+  Error mergeDebugT(TypeMerger *m) override;
+
+  // No need to load ghashes from /Zi objects.
+  void loadGHashes() override {}
+  void remapTpiWithGHashes(GHashState *g) override;
 
   // Information about the PDB type server dependency, that needs to be loaded
   // in before merging this OBJ.
@@ -92,14 +125,11 @@ public:
     if (!it.second)
       fatal("a PCH object with the same signature has already been provided (" +
             toString(it.first->second->file) + " and " + toString(file) + ")");
-    precompIndexMap.isPrecompiledTypeMap = true;
   }
 
-  Expected<const CVIndexMap *> mergeDebugT(TypeMerger *m,
-                                           CVIndexMap *indexMap) override;
-  bool isDependency() const override { return true; }
+  void loadGHashes() override;
 
-  CVIndexMap precompIndexMap;
+  bool isDependency() const override { return true; }
 
   static std::map<uint32_t, PrecompSource *> mappings;
 };
@@ -111,30 +141,62 @@ public:
   UsePrecompSource(ObjFile *f, PrecompRecord precomp)
       : TpiSource(UsingPCH, f), precompDependency(precomp) {}
 
-  Expected<const CVIndexMap *> mergeDebugT(TypeMerger *m,
-                                           CVIndexMap *indexMap) override;
+  Error mergeDebugT(TypeMerger *m) override;
+
+  void loadGHashes() override;
+  void remapTpiWithGHashes(GHashState *g) override;
+
+private:
+  Error mergeInPrecompHeaderObj();
 
+public:
   // Information about the Precomp OBJ dependency, that needs to be loaded in
   // before merging this OBJ.
   PrecompRecord precompDependency;
 };
 } // namespace
 
-static std::vector<TpiSource *> gc;
+std::vector<TpiSource *> TpiSource::instances;
+ArrayRef<TpiSource *> TpiSource::dependencySources;
+ArrayRef<TpiSource *> TpiSource::objectSources;
 
-TpiSource::TpiSource(TpiKind k, ObjFile *f) : kind(k), file(f) {
-  gc.push_back(this);
+TpiSource::TpiSource(TpiKind k, ObjFile *f)
+    : kind(k), tpiSrcIdx(instances.size()), file(f) {
+  instances.push_back(this);
 }
 
 // Vtable key method.
-TpiSource::~TpiSource() = default;
+TpiSource::~TpiSource() {
+  // Silence any assertions about unchecked errors.
+  consumeError(std::move(typeMergingError));
+}
+
+void TpiSource::sortDependencies() {
+  // Order dependencies first, but preserve the existing order.
+  std::vector<TpiSource *> deps;
+  std::vector<TpiSource *> objs;
+  for (TpiSource *s : instances)
+    (s->isDependency() ? deps : objs).push_back(s);
+  uint32_t numDeps = deps.size();
+  uint32_t numObjs = objs.size();
+  instances = std::move(deps);
+  instances.insert(instances.end(), objs.begin(), objs.end());
+  for (uint32_t i = 0, e = instances.size(); i < e; ++i)
+    instances[i]->tpiSrcIdx = i;
+  dependencySources = makeArrayRef(instances.data(), numDeps);
+  objectSources = makeArrayRef(instances.data() + numDeps, numObjs);
+}
 
 TpiSource *lld::coff::makeTpiSource(ObjFile *file) {
   return make<TpiSource>(TpiSource::Regular, file);
 }
 
 TpiSource *lld::coff::makeTypeServerSource(PDBInputFile *pdbInputFile) {
-  return make<TypeServerSource>(pdbInputFile);
+  // Type server sources come in pairs: the TPI stream, and the IPI stream.
+  auto *tpiSource = make<TypeServerSource>(pdbInputFile);
+  if (pdbInputFile->session->getPDBFile().hasPDBIpiStream())
+    tpiSource->ipiSrc = make<TypeServerIpiSource>();
+  return tpiSource;
 }
 
 TpiSource *lld::coff::makeUseTypeServerSource(ObjFile *file,
@@ -151,14 +213,68 @@ TpiSource *lld::coff::makeUsePrecompSource(ObjFile *file,
   return make<UsePrecompSource>(file, precomp);
 }
 
-void TpiSource::forEachSource(llvm::function_ref<void(TpiSource *)> fn) {
-  for_each(gc, fn);
-}
-
 std::map<codeview::GUID, TypeServerSource *> TypeServerSource::mappings;
 
 std::map<uint32_t, PrecompSource *> PrecompSource::mappings;
 
+bool TpiSource::remapTypeIndex(TypeIndex &ti, TiRefKind refKind) const {
+  if (ti.isSimple())
+    return true;
+
+  // This can be an item index or a type index. Choose the appropriate map.
+  ArrayRef<TypeIndex> tpiOrIpiMap =
+      (refKind == TiRefKind::IndexRef) ? ipiMap : tpiMap;
+  if (ti.toArrayIndex() >= tpiOrIpiMap.size())
+    return false;
+  ti = tpiOrIpiMap[ti.toArrayIndex()];
+  return true;
+}
+
+void TpiSource::remapRecord(MutableArrayRef<uint8_t> rec,
+                            ArrayRef<TiReference> typeRefs) {
+  MutableArrayRef<uint8_t> contents = rec.drop_front(sizeof(RecordPrefix));
+  for (const TiReference &ref : typeRefs) {
+    unsigned byteSize = ref.Count * sizeof(TypeIndex);
+    if (contents.size() < ref.Offset + byteSize)
+      fatal("symbol record too short");
+
+    MutableArrayRef<TypeIndex> indices(
+        reinterpret_cast<TypeIndex *>(contents.data() + ref.Offset), ref.Count);
+    for (TypeIndex &ti : indices) {
+      if (!remapTypeIndex(ti, ref.Kind)) {
+        if (config->verbose) {
+          uint16_t kind =
+              reinterpret_cast<const RecordPrefix *>(rec.data())->RecordKind;
+          StringRef fname = file ? file->getName() : "<unknown PDB>";
+          log("failed to remap type index in record of kind 0x" +
+              utohexstr(kind) + " in " + fname + " with bad " +
+              (ref.Kind == TiRefKind::IndexRef ? "item" : "type") +
+              " index 0x" + utohexstr(ti.getIndex()));
+        }
+        ti = TypeIndex(SimpleTypeKind::NotTranslated);
+        continue;
+      }
+    }
+  }
+}
+
+void TpiSource::remapTypesInTypeRecord(MutableArrayRef<uint8_t> rec) {
+  // TODO: Handle errors similar to symbols.
+  SmallVector<TiReference, 32> typeRefs;
+  discoverTypeIndices(CVType(rec), typeRefs);
+  remapRecord(rec, typeRefs);
+}
+
+bool TpiSource::remapTypesInSymbolRecord(MutableArrayRef<uint8_t> rec) {
+  // Discover type index references in the record. Skip it if we don't
+  // know where they are.
+  SmallVector<TiReference, 32> typeRefs;
+  if (!discoverTypeIndicesInSymbol(rec, typeRefs))
+    return false;
+  remapRecord(rec, typeRefs);
+  return true;
+}
+
 // A COFF .debug$H section is currently a clang extension.  This function checks
 // if a .debug$H section is in a format that we expect / understand, so that we
 // can ignore any sections which are coincidentally also named .debug$H but do
@@ -189,46 +305,35 @@ static Optional<ArrayRef<uint8_t>> getDebugH(ObjFile *file) {
 static ArrayRef<GloballyHashedType>
 getHashesFromDebugH(ArrayRef<uint8_t> debugH) {
   assert(canUseDebugH(debugH));
-
   debugH = debugH.drop_front(sizeof(object::debug_h_header));
   uint32_t count = debugH.size() / sizeof(GloballyHashedType);
   return {reinterpret_cast<const GloballyHashedType *>(debugH.data()), count};
 }
 
 // Merge .debug$T for a generic object file.
-Expected<const CVIndexMap *> TpiSource::mergeDebugT(TypeMerger *m,
-                                                    CVIndexMap *indexMap) {
+Error TpiSource::mergeDebugT(TypeMerger *m) {
+  assert(!config->debugGHashes &&
+         "use remapTpiWithGHashes when ghash is enabled");
+
   CVTypeArray types;
   BinaryStreamReader reader(file->debugTypes, support::little);
   cantFail(reader.readArray(types, reader.getLength()));
 
   // When dealing with PCH.OBJ, some indices were already merged.
-  unsigned nbHeadIndices = indexMap->tpiMap.size();
-
-  if (config->debugGHashes) {
-    ArrayRef<GloballyHashedType> hashes;
-    std::vector<GloballyHashedType> ownedHashes;
-    if (Optional<ArrayRef<uint8_t>> debugH = getDebugH(file))
-      hashes = getHashesFromDebugH(*debugH);
-    else {
-      ownedHashes = GloballyHashedType::hashTypes(types);
-      hashes = ownedHashes;
-    }
+  unsigned nbHeadIndices = indexMapStorage.size();
 
-    if (auto err = mergeTypeAndIdRecords(m->globalIDTable, m->globalTypeTable,
-                                         indexMap->tpiMap, types, hashes,
-                                         file->pchSignature))
-      fatal("codeview::mergeTypeAndIdRecords failed: " +
-            toString(std::move(err)));
-  } else {
-    if (auto err =
-            mergeTypeAndIdRecords(m->idTable, m->typeTable, indexMap->tpiMap,
-                                  types, file->pchSignature))
-      fatal("codeview::mergeTypeAndIdRecords failed: " +
-            toString(std::move(err)));
-  }
+  if (auto err = mergeTypeAndIdRecords(
+          m->idTable, m->typeTable, indexMapStorage, types, file->pchSignature))
+    fatal("codeview::mergeTypeAndIdRecords failed: " +
+          toString(std::move(err)));
+
+  // In an object, there is only one mapping for both types and items.
+  tpiMap = indexMapStorage;
+  ipiMap = indexMapStorage;
 
   if (config->showSummary) {
+    nbTypeRecords = indexMapStorage.size() - nbHeadIndices;
+    nbTypeRecordsBytes = reader.getLength();
     // Count how many times we saw each type record in our input. This
     // calculation requires a second pass over the type records to classify each
     // record as a type or index. This is slow, but this code executes when
@@ -237,7 +342,7 @@ Expected<const CVIndexMap *> TpiSource::mergeDebugT(TypeMerger *m,
     m->ipiCounts.resize(m->getIDTable().size());
     uint32_t srcIdx = nbHeadIndices;
     for (CVType &ty : types) {
-      TypeIndex dstIdx = indexMap->tpiMap[srcIdx++];
+      TypeIndex dstIdx = tpiMap[srcIdx++];
       // Type merging may fail, so a complex source type may become the simple
       // NotTranslated type, which cannot be used as an array index.
       if (dstIdx.isSimple())
@@ -248,12 +353,14 @@ Expected<const CVIndexMap *> TpiSource::mergeDebugT(TypeMerger *m,
     }
   }
 
-  return indexMap;
+  return Error::success();
 }
 
 // Merge types from a type server PDB.
-Expected<const CVIndexMap *> TypeServerSource::mergeDebugT(TypeMerger *m,
-                                                           CVIndexMap *) {
+Error TypeServerSource::mergeDebugT(TypeMerger *m) {
+  assert(!config->debugGHashes &&
+         "use remapTpiWithGHashes when ghash is enabled");
+
   pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
   Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
   if (auto e = expectedTpi.takeError())
@@ -266,62 +373,44 @@ Expected<const CVIndexMap *> TypeServerSource::mergeDebugT(TypeMerger *m,
     maybeIpi = &*expectedIpi;
   }
 
-  if (config->debugGHashes) {
-    // PDBs do not actually store global hashes, so when merging a type server
-    // PDB we have to synthesize global hashes.  To do this, we first synthesize
-    // global hashes for the TPI stream, since it is independent, then we
-    // synthesize hashes for the IPI stream, using the hashes for the TPI stream
-    // as inputs.
-    auto tpiHashes = GloballyHashedType::hashTypes(expectedTpi->typeArray());
-    Optional<uint32_t> endPrecomp;
-    // Merge TPI first, because the IPI stream will reference type indices.
-    if (auto err =
-            mergeTypeRecords(m->globalTypeTable, tsIndexMap.tpiMap,
-                             expectedTpi->typeArray(), tpiHashes, endPrecomp))
-      fatal("codeview::mergeTypeRecords failed: " + toString(std::move(err)));
-
-    // Merge IPI.
-    if (maybeIpi) {
-      auto ipiHashes =
-          GloballyHashedType::hashIds(maybeIpi->typeArray(), tpiHashes);
-      if (auto err = mergeIdRecords(m->globalIDTable, tsIndexMap.tpiMap,
-                                    tsIndexMap.ipiMap, maybeIpi->typeArray(),
-                                    ipiHashes))
-        fatal("codeview::mergeIdRecords failed: " + toString(std::move(err)));
-    }
-  } else {
-    // Merge TPI first, because the IPI stream will reference type indices.
-    if (auto err = mergeTypeRecords(m->typeTable, tsIndexMap.tpiMap,
-                                    expectedTpi->typeArray()))
-      fatal("codeview::mergeTypeRecords failed: " + toString(std::move(err)));
-
-    // Merge IPI.
-    if (maybeIpi) {
-      if (auto err = mergeIdRecords(m->idTable, tsIndexMap.tpiMap,
-                                    tsIndexMap.ipiMap, maybeIpi->typeArray()))
-        fatal("codeview::mergeIdRecords failed: " + toString(std::move(err)));
-    }
+  // Merge TPI first, because the IPI stream will reference type indices.
+  if (auto err = mergeTypeRecords(m->typeTable, indexMapStorage,
+                                  expectedTpi->typeArray()))
+    fatal("codeview::mergeTypeRecords failed: " + toString(std::move(err)));
+  tpiMap = indexMapStorage;
+
+  // Merge IPI.
+  if (maybeIpi) {
+    if (auto err = mergeIdRecords(m->idTable, tpiMap, ipiSrc->indexMapStorage,
+                                  maybeIpi->typeArray()))
+      fatal("codeview::mergeIdRecords failed: " + toString(std::move(err)));
+    ipiMap = ipiSrc->indexMapStorage;
   }
 
   if (config->showSummary) {
+    nbTypeRecords = tpiMap.size() + ipiMap.size();
+    nbTypeRecordsBytes =
+        expectedTpi->typeArray().getUnderlyingStream().getLength() +
+        (maybeIpi ? maybeIpi->typeArray().getUnderlyingStream().getLength()
+                  : 0);
+
     // Count how many times we saw each type record in our input. If a
     // destination type index is present in the source to destination type index
     // map, that means we saw it once in the input. Add it to our histogram.
     m->tpiCounts.resize(m->getTypeTable().size());
     m->ipiCounts.resize(m->getIDTable().size());
-    for (TypeIndex ti : tsIndexMap.tpiMap)
+    for (TypeIndex ti : tpiMap)
       if (!ti.isSimple())
         ++m->tpiCounts[ti.toArrayIndex()];
-    for (TypeIndex ti : tsIndexMap.ipiMap)
+    for (TypeIndex ti : ipiMap)
       if (!ti.isSimple())
         ++m->ipiCounts[ti.toArrayIndex()];
   }
 
-  return &tsIndexMap;
+  return Error::success();
 }
 
-Expected<const CVIndexMap *>
-UseTypeServerSource::mergeDebugT(TypeMerger *m, CVIndexMap *indexMap) {
+Expected<TypeServerSource *> UseTypeServerSource::getTypeServerSource() {
   const codeview::GUID &tsId = typeServerDependency.getGuid();
   StringRef tsPath = typeServerDependency.getName();
 
@@ -341,21 +430,31 @@ UseTypeServerSource::mergeDebugT(TypeMerger *m, CVIndexMap *indexMap) {
 
     tsSrc = (TypeServerSource *)pdb->debugTypesObj;
   }
+  return tsSrc;
+}
+
+Error UseTypeServerSource::mergeDebugT(TypeMerger *m) {
+  Expected<TypeServerSource *> tsSrc = getTypeServerSource();
+  if (!tsSrc)
+    return tsSrc.takeError();
 
-  pdb::PDBFile &pdbSession = tsSrc->pdbInputFile->session->getPDBFile();
+  pdb::PDBFile &pdbSession = (*tsSrc)->pdbInputFile->session->getPDBFile();
   auto expectedInfo = pdbSession.getPDBInfoStream();
   if (!expectedInfo)
-    return &tsSrc->tsIndexMap;
+    return expectedInfo.takeError();
 
   // Just because a file with a matching name was found and it was an actual
   // PDB file doesn't mean it matches.  For it to match the InfoStream's GUID
   // must match the GUID specified in the TypeServer2 record.
   if (expectedInfo->getGuid() != typeServerDependency.getGuid())
     return createFileError(
-        tsPath,
+        typeServerDependency.getName(),
         make_error<pdb::PDBError>(pdb::pdb_error_code::signature_out_of_date));
 
-  return &tsSrc->tsIndexMap;
+  // Reuse the type index map of the type server.
+  tpiMap = (*tsSrc)->tpiMap;
+  ipiMap = (*tsSrc)->ipiMap;
+  return Error::success();
 }
 
 static bool equalsPath(StringRef path1, StringRef path2) {
@@ -380,25 +479,28 @@ static PrecompSource *findObjByName(StringRef fileNameOnly) {
   return nullptr;
 }
 
-Expected<const CVIndexMap *> findPrecompMap(ObjFile *file, PrecompRecord &pr) {
+static PrecompSource *findPrecompSource(ObjFile *file, PrecompRecord &pr) {
   // Cross-compile warning: given that Clang doesn't generate LF_PRECOMP
   // records, we assume the OBJ comes from a Windows build of cl.exe. Thusly,
   // the paths embedded in the OBJs are in the Windows format.
   SmallString<128> prFileName =
       sys::path::filename(pr.getPrecompFilePath(), sys::path::Style::windows);
 
-  PrecompSource *precomp;
   auto it = PrecompSource::mappings.find(pr.getSignature());
   if (it != PrecompSource::mappings.end()) {
-    precomp = it->second;
-  } else {
-    // Lookup by name
-    precomp = findObjByName(prFileName);
+    return it->second;
   }
+  // Lookup by name
+  return findObjByName(prFileName);
+}
+
+static Expected<PrecompSource *> findPrecompMap(ObjFile *file,
+                                                PrecompRecord &pr) {
+  PrecompSource *precomp = findPrecompSource(file, pr);
 
   if (!precomp)
     return createFileError(
-        prFileName,
+        pr.getPrecompFilePath(),
         make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch));
 
   if (pr.getSignature() != file->pchSignature)
@@ -411,63 +513,41 @@ Expected<const CVIndexMap *> findPrecompMap(ObjFile *file, PrecompRecord &pr) {
         toString(precomp->file),
         make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch));
 
-  return &precomp->precompIndexMap;
+  return precomp;
 }
 
 /// Merges a precompiled headers TPI map into the current TPI map. The
 /// precompiled headers object will also be loaded and remapped in the
 /// process.
-static Expected<const CVIndexMap *>
-mergeInPrecompHeaderObj(ObjFile *file, CVIndexMap *indexMap,
-                        PrecompRecord &precomp) {
-  auto e = findPrecompMap(file, precomp);
+Error UsePrecompSource::mergeInPrecompHeaderObj() {
+  auto e = findPrecompMap(file, precompDependency);
   if (!e)
     return e.takeError();
 
-  const CVIndexMap *precompIndexMap = *e;
-  assert(precompIndexMap->isPrecompiledTypeMap);
+  PrecompSource *precompSrc = *e;
+  if (precompSrc->tpiMap.empty())
+    return Error::success();
 
-  if (precompIndexMap->tpiMap.empty())
-    return precompIndexMap;
-
-  assert(precomp.getStartTypeIndex() == TypeIndex::FirstNonSimpleIndex);
-  assert(precomp.getTypesCount() <= precompIndexMap->tpiMap.size());
+  assert(precompDependency.getStartTypeIndex() ==
+         TypeIndex::FirstNonSimpleIndex);
+  assert(precompDependency.getTypesCount() <= precompSrc->tpiMap.size());
   // Use the previously remapped index map from the precompiled headers.
-  indexMap->tpiMap.append(precompIndexMap->tpiMap.begin(),
-                          precompIndexMap->tpiMap.begin() +
-                              precomp.getTypesCount());
-  return indexMap;
+  indexMapStorage.insert(indexMapStorage.begin(), precompSrc->tpiMap.begin(),
+                         precompSrc->tpiMap.begin() +
+                             precompDependency.getTypesCount());
+
+  return Error::success();
 }
 
-Expected<const CVIndexMap *>
-UsePrecompSource::mergeDebugT(TypeMerger *m, CVIndexMap *indexMap) {
+Error UsePrecompSource::mergeDebugT(TypeMerger *m) {
   // This object was compiled with /Yu, so process the corresponding
   // precompiled headers object (/Yc) first. Some type indices in the current
   // object are referencing data in the precompiled headers object, so we need
   // both to be loaded.
-  auto e = mergeInPrecompHeaderObj(file, indexMap, precompDependency);
-  if (!e)
-    return e.takeError();
+  if (Error e = mergeInPrecompHeaderObj())
+    return e;
 
-  // Drop LF_PRECOMP record from the input stream, as it has been replaced
-  // with the precompiled headers Type stream in the mergeInPrecompHeaderObj()
-  // call above. Note that we can't just call Types.drop_front(), as we
-  // explicitly want to rebase the stream.
-  CVTypeArray types;
-  BinaryStreamReader reader(file->debugTypes, support::little);
-  cantFail(reader.readArray(types, reader.getLength()));
-  auto firstType = types.begin();
-  file->debugTypes = file->debugTypes.drop_front(firstType->RecordData.size());
-
-  return TpiSource::mergeDebugT(m, indexMap);
-}
-
-Expected<const CVIndexMap *> PrecompSource::mergeDebugT(TypeMerger *m,
-                                                        CVIndexMap *) {
-  // Note that we're not using the provided CVIndexMap. Instead, we use our
-  // local one. Precompiled headers objects need to save the index map for
-  // further reference by other objects which use the precompiled headers.
-  return TpiSource::mergeDebugT(m, &precompIndexMap);
+  return TpiSource::mergeDebugT(m);
 }
 
 uint32_t TpiSource::countTypeServerPDBs() {
@@ -479,7 +559,633 @@ uint32_t TpiSource::countPrecompObjs() {
 }
 
 void TpiSource::clear() {
-  gc.clear();
+  // Clean up any owned ghash allocations.
+  clearGHashes();
+  TpiSource::instances.clear();
   TypeServerSource::mappings.clear();
   PrecompSource::mappings.clear();
 }
+
+//===----------------------------------------------------------------------===//
+// Parellel GHash type merging implementation.
+//===----------------------------------------------------------------------===//
+
+void TpiSource::loadGHashes() {
+  if (Optional<ArrayRef<uint8_t>> debugH = getDebugH(file)) {
+    ghashes = getHashesFromDebugH(*debugH);
+    ownedGHashes = false;
+  } else {
+    CVTypeArray types;
+    BinaryStreamReader reader(file->debugTypes, support::little);
+    cantFail(reader.readArray(types, reader.getLength()));
+    assignGHashesFromVector(GloballyHashedType::hashTypes(types));
+  }
+
+  fillIsItemIndexFromDebugT();
+}
+
+// Copies ghashes from a vector into an array. These are long lived, so it's
+// worth the time to copy these into an appropriately sized vector to reduce
+// memory usage.
+void TpiSource::assignGHashesFromVector(
+    std::vector<GloballyHashedType> &&hashVec) {
+  GloballyHashedType *hashes = new GloballyHashedType[hashVec.size()];
+  memcpy(hashes, hashVec.data(), hashVec.size() * sizeof(GloballyHashedType));
+  ghashes = makeArrayRef(hashes, hashVec.size());
+  ownedGHashes = true;
+}
+
+// Faster way to iterate type records. forEachTypeChecked is faster than
+// iterating CVTypeArray. It avoids virtual readBytes calls in inner loops.
+static void forEachTypeChecked(ArrayRef<uint8_t> types,
+                               function_ref<void(const CVType &)> fn) {
+  checkError(
+      forEachCodeViewRecord<CVType>(types, [fn](const CVType &ty) -> Error {
+        fn(ty);
+        return Error::success();
+      }));
+}
+
+// Walk over file->debugTypes and fill in the isItemIndex bit vector.
+// TODO: Store this information in .debug$H so that we don't have to recompute
+// it. This is the main bottleneck slowing down parallel ghashing with one
+// thread over single-threaded ghashing.
+void TpiSource::fillIsItemIndexFromDebugT() {
+  uint32_t index = 0;
+  isItemIndex.resize(ghashes.size());
+  forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
+    if (isIdRecord(ty.kind()))
+      isItemIndex.set(index);
+    ++index;
+  });
+}
+
+void TpiSource::mergeTypeRecord(TypeIndex curIndex, CVType ty) {
+  // Decide if the merged type goes into TPI or IPI.
+  bool isItem = isIdRecord(ty.kind());
+  MergedInfo &merged = isItem ? mergedIpi : mergedTpi;
+
+  // Copy the type into our mutable buffer.
+  assert(ty.length() <= codeview::MaxRecordLength);
+  size_t offset = merged.recs.size();
+  size_t newSize = alignTo(ty.length(), 4);
+  merged.recs.resize(offset + newSize);
+  auto newRec = makeMutableArrayRef(&merged.recs[offset], newSize);
+  memcpy(newRec.data(), ty.data().data(), newSize);
+
+  // Fix up the record prefix and padding bytes if it required resizing.
+  if (newSize != ty.length()) {
+    reinterpret_cast<RecordPrefix *>(newRec.data())->RecordLen = newSize - 2;
+    for (size_t i = ty.length(); i < newSize; ++i)
+      newRec[i] = LF_PAD0 + (newSize - i);
+  }
+
+  // Remap the type indices in the new record.
+  remapTypesInTypeRecord(newRec);
+  uint32_t pdbHash = check(pdb::hashTypeRecord(CVType(newRec)));
+  merged.recSizes.push_back(static_cast<uint16_t>(newSize));
+  merged.recHashes.push_back(pdbHash);
+
+  // Retain a mapping from PDB function id to PDB function type. This mapping is
+  // used during symbol processing to rewrite S_GPROC32_ID symbols to S_GPROC32
+  // symbols.
+  if (ty.kind() == LF_FUNC_ID || ty.kind() == LF_MFUNC_ID) {
+    bool success = ty.length() >= 12;
+    TypeIndex funcId = curIndex;
+    if (success)
+      success &= remapTypeIndex(funcId, TiRefKind::IndexRef);
+    TypeIndex funcType =
+        *reinterpret_cast<const TypeIndex *>(&newRec.data()[8]);
+    if (success) {
+      funcIdToType.push_back({funcId, funcType});
+    } else {
+      StringRef fname = file ? file->getName() : "<unknown PDB>";
+      warn("corrupt LF_[M]FUNC_ID record 0x" + utohexstr(curIndex.getIndex()) +
+           " in " + fname);
+    }
+  }
+}
+
+void TpiSource::mergeUniqueTypeRecords(ArrayRef<uint8_t> typeRecords,
+                                       TypeIndex beginIndex) {
+  // Re-sort the list of unique types by index.
+  if (kind == PDB)
+    assert(std::is_sorted(uniqueTypes.begin(), uniqueTypes.end()));
+  else
+    llvm::sort(uniqueTypes);
+
+  // Accumulate all the unique types into one buffer in mergedTypes.
+  uint32_t ghashIndex = 0;
+  auto nextUniqueIndex = uniqueTypes.begin();
+  assert(mergedTpi.recs.empty());
+  assert(mergedIpi.recs.empty());
+
+  // Pre-compute the number of elements in advance to avoid std::vector resizes.
+  unsigned nbTpiRecs = 0;
+  unsigned nbIpiRecs = 0;
+  forEachTypeChecked(typeRecords, [&](const CVType &ty) {
+    if (nextUniqueIndex != uniqueTypes.end() &&
+        *nextUniqueIndex == ghashIndex) {
+      assert(ty.length() <= codeview::MaxRecordLength);
+      size_t newSize = alignTo(ty.length(), 4);
+      (isIdRecord(ty.kind()) ? nbIpiRecs : nbTpiRecs) += newSize;
+      ++nextUniqueIndex;
+    }
+    ++ghashIndex;
+  });
+  mergedTpi.recs.reserve(nbTpiRecs);
+  mergedIpi.recs.reserve(nbIpiRecs);
+
+  // Do the actual type merge.
+  ghashIndex = 0;
+  nextUniqueIndex = uniqueTypes.begin();
+  forEachTypeChecked(typeRecords, [&](const CVType &ty) {
+    if (nextUniqueIndex != uniqueTypes.end() &&
+        *nextUniqueIndex == ghashIndex) {
+      mergeTypeRecord(beginIndex + ghashIndex, ty);
+      ++nextUniqueIndex;
+    }
+    ++ghashIndex;
+  });
+  assert(nextUniqueIndex == uniqueTypes.end() &&
+         "failed to merge all desired records");
+  assert(uniqueTypes.size() ==
+             mergedTpi.recSizes.size() + mergedIpi.recSizes.size() &&
+         "missing desired record");
+}
+
+void TpiSource::remapTpiWithGHashes(GHashState *g) {
+  assert(config->debugGHashes && "ghashes must be enabled");
+  fillMapFromGHashes(g);
+  tpiMap = indexMapStorage;
+  ipiMap = indexMapStorage;
+  mergeUniqueTypeRecords(file->debugTypes);
+  // TODO: Free all unneeded ghash resources now that we have a full index map.
+
+  if (config->showSummary) {
+    nbTypeRecords = ghashes.size();
+    nbTypeRecordsBytes = file->debugTypes.size();
+  }
+}
+
+// PDBs do not actually store global hashes, so when merging a type server
+// PDB we have to synthesize global hashes.  To do this, we first synthesize
+// global hashes for the TPI stream, since it is independent, then we
+// synthesize hashes for the IPI stream, using the hashes for the TPI stream
+// as inputs.
+void TypeServerSource::loadGHashes() {
+  // Don't hash twice.
+  if (!ghashes.empty())
+    return;
+  pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
+
+  // Hash TPI stream.
+  Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
+  if (auto e = expectedTpi.takeError())
+    fatal("Type server does not have TPI stream: " + toString(std::move(e)));
+  assignGHashesFromVector(
+      GloballyHashedType::hashTypes(expectedTpi->typeArray()));
+  isItemIndex.resize(ghashes.size());
+
+  // Hash IPI stream, which depends on TPI ghashes.
+  if (!pdbFile.hasPDBIpiStream())
+    return;
+  Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream();
+  if (auto e = expectedIpi.takeError())
+    fatal("error retrieving IPI stream: " + toString(std::move(e)));
+  ipiSrc->assignGHashesFromVector(
+      GloballyHashedType::hashIds(expectedIpi->typeArray(), ghashes));
+
+  // The IPI stream isItemIndex bitvector should be all ones.
+  ipiSrc->isItemIndex.resize(ipiSrc->ghashes.size());
+  ipiSrc->isItemIndex.set(0, ipiSrc->ghashes.size());
+}
+
+// Flatten discontiguous PDB type arrays to bytes so that we can use
+// forEachTypeChecked instead of CVTypeArray iteration. Copying all types from
+// type servers is faster than iterating all object files compiled with /Z7 with
+// CVTypeArray, which has high overheads due to the virtual interface of
+// BinaryStream::readBytes.
+static ArrayRef<uint8_t> typeArrayToBytes(const CVTypeArray &types) {
+  BinaryStreamRef stream = types.getUnderlyingStream();
+  ArrayRef<uint8_t> debugTypes;
+  checkError(stream.readBytes(0, stream.getLength(), debugTypes));
+  return debugTypes;
+}
+
+// Merge types from a type server PDB.
+void TypeServerSource::remapTpiWithGHashes(GHashState *g) {
+  assert(config->debugGHashes && "ghashes must be enabled");
+
+  // IPI merging depends on TPI, so do TPI first, then do IPI.  No need to
+  // propagate errors, those should've been handled during ghash loading.
+  pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
+  pdb::TpiStream &tpi = check(pdbFile.getPDBTpiStream());
+  fillMapFromGHashes(g);
+  tpiMap = indexMapStorage;
+  mergeUniqueTypeRecords(typeArrayToBytes(tpi.typeArray()));
+  if (pdbFile.hasPDBIpiStream()) {
+    pdb::TpiStream &ipi = check(pdbFile.getPDBIpiStream());
+    ipiSrc->indexMapStorage.resize(ipiSrc->ghashes.size());
+    ipiSrc->fillMapFromGHashes(g);
+    ipiMap = ipiSrc->indexMapStorage;
+    ipiSrc->tpiMap = tpiMap;
+    ipiSrc->ipiMap = ipiMap;
+    ipiSrc->mergeUniqueTypeRecords(typeArrayToBytes(ipi.typeArray()));
+
+    if (config->showSummary) {
+      nbTypeRecords = ipiSrc->ghashes.size();
+      nbTypeRecordsBytes = ipi.typeArray().getUnderlyingStream().getLength();
+    }
+  }
+
+  if (config->showSummary) {
+    nbTypeRecords += ghashes.size();
+    nbTypeRecordsBytes += tpi.typeArray().getUnderlyingStream().getLength();
+  }
+}
+
+void UseTypeServerSource::remapTpiWithGHashes(GHashState *g) {
+  // No remapping to do with /Zi objects. Simply use the index map from the type
+  // server. Errors should have been reported earlier. Symbols from this object
+  // will be ignored.
+  Expected<TypeServerSource *> maybeTsSrc = getTypeServerSource();
+  if (!maybeTsSrc) {
+    typeMergingError =
+        joinErrors(std::move(typeMergingError), maybeTsSrc.takeError());
+    return;
+  }
+  TypeServerSource *tsSrc = *maybeTsSrc;
+  tpiMap = tsSrc->tpiMap;
+  ipiMap = tsSrc->ipiMap;
+}
+
+void PrecompSource::loadGHashes() {
+  if (getDebugH(file)) {
+    warn("ignoring .debug$H section; pch with ghash is not implemented");
+  }
+
+  uint32_t ghashIdx = 0;
+  std::vector<GloballyHashedType> hashVec;
+  forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
+    // Remember the index of the LF_ENDPRECOMP record so it can be excluded from
+    // the PDB. There must be an entry in the list of ghashes so that the type
+    // indexes of the following records in the /Yc PCH object line up.
+    if (ty.kind() == LF_ENDPRECOMP)
+      endPrecompGHashIdx = ghashIdx;
+
+    hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec));
+    isItemIndex.push_back(isIdRecord(ty.kind()));
+    ++ghashIdx;
+  });
+  assignGHashesFromVector(std::move(hashVec));
+}
+
+void UsePrecompSource::loadGHashes() {
+  PrecompSource *pchSrc = findPrecompSource(file, precompDependency);
+  if (!pchSrc)
+    return;
+
+  // To compute ghashes of a /Yu object file, we need to build on the the
+  // ghashes of the /Yc PCH object. After we are done hashing, discard the
+  // ghashes from the PCH source so we don't unnecessarily try to deduplicate
+  // them.
+  std::vector<GloballyHashedType> hashVec =
+      pchSrc->ghashes.take_front(precompDependency.getTypesCount());
+  forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
+    hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec));
+    isItemIndex.push_back(isIdRecord(ty.kind()));
+  });
+  hashVec.erase(hashVec.begin(),
+                hashVec.begin() + precompDependency.getTypesCount());
+  assignGHashesFromVector(std::move(hashVec));
+}
+
+void UsePrecompSource::remapTpiWithGHashes(GHashState *g) {
+  fillMapFromGHashes(g);
+  // This object was compiled with /Yu, so process the corresponding
+  // precompiled headers object (/Yc) first. Some type indices in the current
+  // object are referencing data in the precompiled headers object, so we need
+  // both to be loaded.
+  if (Error e = mergeInPrecompHeaderObj()) {
+    typeMergingError = joinErrors(std::move(typeMergingError), std::move(e));
+    return;
+  }
+
+  tpiMap = indexMapStorage;
+  ipiMap = indexMapStorage;
+  mergeUniqueTypeRecords(file->debugTypes,
+                         TypeIndex(precompDependency.getStartTypeIndex() +
+                                   precompDependency.getTypesCount()));
+  if (config->showSummary) {
+    nbTypeRecords = ghashes.size();
+    nbTypeRecordsBytes = file->debugTypes.size();
+  }
+}
+
+namespace {
+/// A concurrent hash table for global type hashing. It is based on this paper:
+/// Concurrent Hash Tables: Fast and General(?)!
+/// https://dl.acm.org/doi/10.1145/3309206
+///
+/// This hash table is meant to be used in two phases:
+/// 1. concurrent insertions
+/// 2. concurrent reads
+/// It does not support lookup, deletion, or rehashing. It uses linear probing.
+///
+/// The paper describes storing a key-value pair in two machine words.
+/// Generally, the values stored in this map are type indices, and we can use
+/// those values to recover the ghash key from a side table. This allows us to
+/// shrink the table entries further at the cost of some loads, and sidesteps
+/// the need for a 128 bit atomic compare-and-swap operation.
+///
+/// During insertion, a priority function is used to decide which insertion
+/// should be preferred. This ensures that the output is deterministic. For
+/// ghashing, lower tpiSrcIdx values (earlier inputs) are preferred.
+///
+class GHashCell;
+struct GHashTable {
+  GHashCell *table = nullptr;
+  uint32_t tableSize = 0;
+
+  GHashTable() = default;
+  ~GHashTable();
+
+  /// Initialize the table with the given size. Because the table cannot be
+  /// resized, the initial size of the table must be large enough to contain all
+  /// inputs, or insertion may not be able to find an empty cell.
+  void init(uint32_t newTableSize);
+
+  /// Insert the cell with the given ghash into the table. Return the insertion
+  /// position in the table. It is safe for the caller to store the insertion
+  /// position because the table cannot be resized.
+  uint32_t insert(GloballyHashedType ghash, GHashCell newCell);
+};
+
+/// A ghash table cell for deduplicating types from TpiSources.
+class GHashCell {
+  uint64_t data = 0;
+
+public:
+  GHashCell() = default;
+
+  // Construct data most to least significant so that sorting works well:
+  // - isItem
+  // - tpiSrcIdx
+  // - ghashIdx
+  // Add one to the tpiSrcIdx so that the 0th record from the 0th source has a
+  // non-zero representation.
+  GHashCell(bool isItem, uint32_t tpiSrcIdx, uint32_t ghashIdx)
+      : data((uint64_t(isItem) << 63U) | (uint64_t(tpiSrcIdx + 1) << 32ULL) |
+             ghashIdx) {
+    assert(tpiSrcIdx == getTpiSrcIdx() && "round trip failure");
+    assert(ghashIdx == getGHashIdx() && "round trip failure");
+  }
+
+  explicit GHashCell(uint64_t data) : data(data) {}
+
+  // The empty cell is all zeros.
+  bool isEmpty() const { return data == 0ULL; }
+
+  /// Extract the tpiSrcIdx.
+  uint32_t getTpiSrcIdx() const {
+    return ((uint32_t)(data >> 32U) & 0x7FFFFFFF) - 1;
+  }
+
+  /// Extract the index into the ghash array of the TpiSource.
+  uint32_t getGHashIdx() const { return (uint32_t)data; }
+
+  bool isItem() const { return data & (1ULL << 63U); }
+
+  /// Get the ghash key for this cell.
+  GloballyHashedType getGHash() const {
+    return TpiSource::instances[getTpiSrcIdx()]->ghashes[getGHashIdx()];
+  }
+
+  /// The priority function for the cell. The data is stored such that lower
+  /// tpiSrcIdx and ghashIdx values are preferred, which means that type record
+  /// from earlier sources are more likely to prevail.
+  friend inline bool operator<(const GHashCell &l, const GHashCell &r) {
+    return l.data < r.data;
+  }
+};
+} // namespace
+
+namespace lld {
+namespace coff {
+/// This type is just a wrapper around GHashTable with external linkage so it
+/// can be used from a header.
+struct GHashState {
+  GHashTable table;
+};
+} // namespace coff
+} // namespace lld
+
+GHashTable::~GHashTable() { delete[] table; }
+
+void GHashTable::init(uint32_t newTableSize) {
+  table = new GHashCell[newTableSize];
+  memset(table, 0, newTableSize * sizeof(GHashCell));
+  tableSize = newTableSize;
+}
+
+uint32_t GHashTable::insert(GloballyHashedType ghash, GHashCell newCell) {
+  assert(!newCell.isEmpty() && "cannot insert empty cell value");
+
+  // FIXME: The low bytes of SHA1 have low entropy for short records, which
+  // type records are. Swap the byte order for better entropy. A better ghash
+  // won't need this.
+  uint32_t startIdx =
+      ByteSwap_64(*reinterpret_cast<uint64_t *>(&ghash)) % tableSize;
+
+  // Do a linear probe starting at startIdx.
+  uint32_t idx = startIdx;
+  while (true) {
+    // Run a compare and swap loop. There are four cases:
+    // - cell is empty: CAS into place and return
+    // - cell has matching key, earlier priority: do nothing, return
+    // - cell has matching key, later priority: CAS into place and return
+    // - cell has non-matching key: hash collision, probe next cell
+    auto *cellPtr = reinterpret_cast<std::atomic<GHashCell> *>(&table[idx]);
+    GHashCell oldCell(cellPtr->load());
+    while (oldCell.isEmpty() || oldCell.getGHash() == ghash) {
+      // Check if there is an existing ghash entry with a higher priority
+      // (earlier ordering). If so, this is a duplicate, we are done.
+      if (!oldCell.isEmpty() && oldCell < newCell)
+        return idx;
+      // Either the cell is empty, or our value is higher priority. Try to
+      // compare and swap. If it succeeds, we are done.
+      if (cellPtr->compare_exchange_weak(oldCell, newCell))
+        return idx;
+      // If the CAS failed, check this cell again.
+    }
+
+    // Advance the probe. Wrap around to the beginning if we run off the end.
+    ++idx;
+    idx = idx == tableSize ? 0 : idx;
+    if (idx == startIdx) {
+      // If this becomes an issue, we could mark failure and rehash from the
+      // beginning with a bigger table. There is no difference between rehashing
+      // internally and starting over.
+      report_fatal_error("ghash table is full");
+    }
+  }
+  llvm_unreachable("left infloop");
+}
+
+TypeMerger::TypeMerger(llvm::BumpPtrAllocator &alloc)
+    : typeTable(alloc), idTable(alloc) {}
+
+TypeMerger::~TypeMerger() = default;
+
+void TypeMerger::mergeTypesWithGHash() {
+  // Load ghashes. Do type servers and PCH objects first.
+  {
+    ScopedTimer t1(loadGHashTimer);
+    parallelForEach(TpiSource::dependencySources,
+                    [&](TpiSource *source) { source->loadGHashes(); });
+    parallelForEach(TpiSource::objectSources,
+                    [&](TpiSource *source) { source->loadGHashes(); });
+  }
+
+  ScopedTimer t2(mergeGHashTimer);
+  GHashState ghashState;
+
+  // Estimate the size of hash table needed to deduplicate ghashes. This *must*
+  // be larger than the number of unique types, or hash table insertion may not
+  // be able to find a vacant slot. Summing the input types guarantees this, but
+  // it is a gross overestimate. The table size could be reduced to save memory,
+  // but it would require implementing rehashing, and this table is generally
+  // small compared to total memory usage, at eight bytes per input type record,
+  // and most input type records are larger than eight bytes.
+  size_t tableSize = 0;
+  for (TpiSource *source : TpiSource::instances)
+    tableSize += source->ghashes.size();
+
+  // Cap the table size so that we can use 32-bit cell indices. Type indices are
+  // also 32-bit, so this is an inherent PDB file format limit anyway.
+  tableSize = std::min(size_t(INT32_MAX), tableSize);
+  ghashState.table.init(static_cast<uint32_t>(tableSize));
+
+  // Insert ghashes in parallel. During concurrent insertion, we cannot observe
+  // the contents of the hash table cell, but we can remember the insertion
+  // position. Because the table does not rehash, the position will not change
+  // under insertion. After insertion is done, the value of the cell can be read
+  // to retrieve the final PDB type index.
+  parallelForEachN(0, TpiSource::instances.size(), [&](size_t tpiSrcIdx) {
+    TpiSource *source = TpiSource::instances[tpiSrcIdx];
+    source->indexMapStorage.resize(source->ghashes.size());
+    for (uint32_t i = 0, e = source->ghashes.size(); i < e; i++) {
+      if (source->shouldOmitFromPdb(i)) {
+        source->indexMapStorage[i] = TypeIndex(SimpleTypeKind::NotTranslated);
+        continue;
+      }
+      GloballyHashedType ghash = source->ghashes[i];
+      bool isItem = source->isItemIndex.test(i);
+      uint32_t cellIdx =
+          ghashState.table.insert(ghash, GHashCell(isItem, tpiSrcIdx, i));
+
+      // Store the ghash cell index as a type index in indexMapStorage. Later
+      // we will replace it with the PDB type index.
+      source->indexMapStorage[i] = TypeIndex::fromArrayIndex(cellIdx);
+    }
+  });
+
+  // Collect all non-empty cells and sort them. This will implicitly assign
+  // destination type indices, and partition the entries into type records and
+  // item records. It arranges types in this order:
+  // - type records
+  //   - source 0, type 0...
+  //   - source 1, type 1...
+  // - item records
+  //   - source 0, type 1...
+  //   - source 1, type 0...
+  std::vector<GHashCell> entries;
+  for (const GHashCell &cell :
+       makeArrayRef(ghashState.table.table, tableSize)) {
+    if (!cell.isEmpty())
+      entries.push_back(cell);
+  }
+  parallelSort(entries, std::less<GHashCell>());
+  log(formatv("ghash table load factor: {0:p} (size {1} / capacity {2})\n",
+              tableSize ? double(entries.size()) / tableSize : 0,
+              entries.size(), tableSize));
+
+  // Find out how many type and item indices there are.
+  auto mid =
+      std::lower_bound(entries.begin(), entries.end(), GHashCell(true, 0, 0));
+  assert((mid == entries.end() || mid->isItem()) &&
+         (mid == entries.begin() || !std::prev(mid)->isItem()) &&
+         "midpoint is not midpoint");
+  uint32_t numTypes = std::distance(entries.begin(), mid);
+  uint32_t numItems = std::distance(mid, entries.end());
+  log("Tpi record count: " + Twine(numTypes));
+  log("Ipi record count: " + Twine(numItems));
+
+  // Make a list of the "unique" type records to merge for each tpi source. Type
+  // merging will skip indices not on this list. Store the destination PDB type
+  // index for these unique types in the tpiMap for each source. The entries for
+  // non-unique types will be filled in prior to type merging.
+  for (uint32_t i = 0, e = entries.size(); i < e; ++i) {
+    auto &cell = entries[i];
+    uint32_t tpiSrcIdx = cell.getTpiSrcIdx();
+    TpiSource *source = TpiSource::instances[tpiSrcIdx];
+    source->uniqueTypes.push_back(cell.getGHashIdx());
+
+    // Update the ghash table to store the destination PDB type index in the
+    // table.
+    uint32_t pdbTypeIndex = i < numTypes ? i : i - numTypes;
+    uint32_t ghashCellIndex =
+        source->indexMapStorage[cell.getGHashIdx()].toArrayIndex();
+    ghashState.table.table[ghashCellIndex] =
+        GHashCell(cell.isItem(), cell.getTpiSrcIdx(), pdbTypeIndex);
+  }
+
+  // In parallel, remap all types.
+  for_each(TpiSource::dependencySources, [&](TpiSource *source) {
+    source->remapTpiWithGHashes(&ghashState);
+  });
+  parallelForEach(TpiSource::objectSources, [&](TpiSource *source) {
+    source->remapTpiWithGHashes(&ghashState);
+  });
+
+  // Build a global map of from function ID to function type.
+  for (TpiSource *source : TpiSource::instances) {
+    for (auto idToType : source->funcIdToType)
+      funcIdToType.insert(idToType);
+    source->funcIdToType.clear();
+  }
+
+  TpiSource::clearGHashes();
+}
+
+/// Given the index into the ghash table for a particular type, return the type
+/// index for that type in the output PDB.
+static TypeIndex loadPdbTypeIndexFromCell(GHashState *g,
+                                          uint32_t ghashCellIdx) {
+  GHashCell cell = g->table.table[ghashCellIdx];
+  return TypeIndex::fromArrayIndex(cell.getGHashIdx());
+}
+
+// Fill in a TPI or IPI index map using ghashes. For each source type, use its
+// ghash to lookup its final type index in the PDB, and store that in the map.
+void TpiSource::fillMapFromGHashes(GHashState *g) {
+  for (size_t i = 0, e = ghashes.size(); i < e; ++i) {
+    TypeIndex fakeCellIndex = indexMapStorage[i];
+    if (fakeCellIndex.isSimple())
+      indexMapStorage[i] = fakeCellIndex;
+    else
+      indexMapStorage[i] =
+          loadPdbTypeIndexFromCell(g, fakeCellIndex.toArrayIndex());
+  }
+}
+
+void TpiSource::clearGHashes() {
+  for (TpiSource *src : TpiSource::instances) {
+    if (src->ownedGHashes)
+      delete[] src->ghashes.data();
+    src->ghashes = {};
+    src->isItemIndex.clear();
+    src->uniqueTypes.clear();
+  }
+}
diff --git a/contrib/llvm-project/lld/COFF/DebugTypes.h b/contrib/llvm-project/lld/COFF/DebugTypes.h
index 24d79d83e4c6..faad30b141e9 100644
--- a/contrib/llvm-project/lld/COFF/DebugTypes.h
+++ b/contrib/llvm-project/lld/COFF/DebugTypes.h
@@ -9,30 +9,38 @@
 #ifndef LLD_COFF_DEBUGTYPES_H
 #define LLD_COFF_DEBUGTYPES_H
 
+#include "lld/Common/LLVM.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/MemoryBuffer.h"
 
 namespace llvm {
 namespace codeview {
-class PrecompRecord;
-class TypeServer2Record;
+struct GloballyHashedType;
 } // namespace codeview
 namespace pdb {
 class NativeSession;
+class TpiStream;
 }
 } // namespace llvm
 
 namespace lld {
 namespace coff {
 
+using llvm::codeview::GloballyHashedType;
+using llvm::codeview::TypeIndex;
+
 class ObjFile;
 class PDBInputFile;
-struct CVIndexMap;
 class TypeMerger;
+struct GHashState;
 
 class TpiSource {
 public:
-  enum TpiKind { Regular, PCH, UsingPCH, PDB, UsingPDB };
+  enum TpiKind : uint8_t { Regular, PCH, UsingPCH, PDB, PDBIpi, UsingPDB };
 
   TpiSource(TpiKind k, ObjFile *f);
   virtual ~TpiSource();
@@ -48,22 +56,134 @@ public:
   /// If the object does not use a type server PDB (compiled with /Z7), we merge
   /// all the type and item records from the .debug$S stream and fill in the
   /// caller-provided ObjectIndexMap.
-  virtual llvm::Expected<const CVIndexMap *> mergeDebugT(TypeMerger *m,
-                                                         CVIndexMap *indexMap);
+  virtual Error mergeDebugT(TypeMerger *m);
+
+  /// Load global hashes, either by hashing types directly, or by loading them
+  /// from LLVM's .debug$H section.
+  virtual void loadGHashes();
+
+  /// Use global hashes to merge type information.
+  virtual void remapTpiWithGHashes(GHashState *g);
+
+  // Remap a type index in place.
+  bool remapTypeIndex(TypeIndex &ti, llvm::codeview::TiRefKind refKind) const;
+
+protected:
+  void remapRecord(MutableArrayRef<uint8_t> rec,
+                   ArrayRef<llvm::codeview::TiReference> typeRefs);
+
+  void mergeTypeRecord(TypeIndex curIndex, llvm::codeview::CVType ty);
+
+  // Merge the type records listed in uniqueTypes. beginIndex is the TypeIndex
+  // of the first record in this source, typically 0x1000. When PCHs are
+  // involved, it may start higher.
+  void mergeUniqueTypeRecords(
+      ArrayRef<uint8_t> debugTypes,
+      TypeIndex beginIndex = TypeIndex(TypeIndex::FirstNonSimpleIndex));
+
+  // Use the ghash table to construct a map from source type index to
+  // destination PDB type index. Usable for either TPI or IPI.
+  void fillMapFromGHashes(GHashState *m);
+
+  // Copies ghashes from a vector into an array. These are long lived, so it's
+  // worth the time to copy these into an appropriately sized vector to reduce
+  // memory usage.
+  void assignGHashesFromVector(std::vector<GloballyHashedType> &&hashVec);
+
+  // Walk over file->debugTypes and fill in the isItemIndex bit vector.
+  void fillIsItemIndexFromDebugT();
+
+public:
+  bool remapTypesInSymbolRecord(MutableArrayRef<uint8_t> rec);
+
+  void remapTypesInTypeRecord(MutableArrayRef<uint8_t> rec);
+
   /// Is this a dependent file that needs to be processed first, before other
   /// OBJs?
   virtual bool isDependency() const { return false; }
 
-  static void forEachSource(llvm::function_ref<void(TpiSource *)> fn);
+  /// Returns true if this type record should be omitted from the PDB, even if
+  /// it is unique. This prevents a record from being added to the input ghash
+  /// table.
+  bool shouldOmitFromPdb(uint32_t ghashIdx) {
+    return ghashIdx == endPrecompGHashIdx;
+  }
+
+  /// All sources of type information in the program.
+  static std::vector<TpiSource *> instances;
+
+  /// Dependency type sources, such as type servers or PCH object files. These
+  /// must be processed before objects that rely on them. Set by
+  /// TpiSources::sortDependencies.
+  static ArrayRef<TpiSource *> dependencySources;
+
+  /// Object file sources. These must be processed after dependencySources.
+  static ArrayRef<TpiSource *> objectSources;
+
+  /// Sorts the dependencies and reassigns TpiSource indices.
+  static void sortDependencies();
 
   static uint32_t countTypeServerPDBs();
   static uint32_t countPrecompObjs();
 
+  /// Free heap allocated ghashes.
+  static void clearGHashes();
+
   /// Clear global data structures for TpiSources.
   static void clear();
 
   const TpiKind kind;
+  bool ownedGHashes = true;
+  uint32_t tpiSrcIdx = 0;
+
+protected:
+  /// The ghash index (zero based, not 0x1000-based) of the LF_ENDPRECOMP record
+  /// in this object, if one exists. This is the all ones value otherwise. It is
+  /// recorded here so that it can be omitted from the final ghash table.
+  uint32_t endPrecompGHashIdx = ~0U;
+
+public:
   ObjFile *file;
+
+  /// An error encountered during type merging, if any.
+  Error typeMergingError = Error::success();
+
+  // Storage for tpiMap or ipiMap, depending on the kind of source.
+  llvm::SmallVector<TypeIndex, 0> indexMapStorage;
+
+  // Source type index to PDB type index mapping for type and item records.
+  // These mappings will be the same for /Z7 objects, and distinct for /Zi
+  // objects.
+  llvm::ArrayRef<TypeIndex> tpiMap;
+  llvm::ArrayRef<TypeIndex> ipiMap;
+
+  /// Array of global type hashes, indexed by TypeIndex. May be calculated on
+  /// demand, or present in input object files.
+  llvm::ArrayRef<llvm::codeview::GloballyHashedType> ghashes;
+
+  /// When ghashing is used, record the mapping from LF_[M]FUNC_ID to function
+  /// type index here. Both indices are PDB indices, not object type indexes.
+  std::vector<std::pair<TypeIndex, TypeIndex>> funcIdToType;
+
+  /// Indicates if a type record is an item index or a type index.
+  llvm::BitVector isItemIndex;
+
+  /// A list of all "unique" type indices which must be merged into the final
+  /// PDB. GHash type deduplication produces this list, and it should be
+  /// considerably smaller than the input.
+  std::vector<uint32_t> uniqueTypes;
+
+  struct MergedInfo {
+    std::vector<uint8_t> recs;
+    std::vector<uint16_t> recSizes;
+    std::vector<uint32_t> recHashes;
+  };
+
+  MergedInfo mergedTpi;
+  MergedInfo mergedIpi;
+
+  uint64_t nbTypeRecords = 0;
+  uint64_t nbTypeRecordsBytes = 0;
 };
 
 TpiSource *makeTpiSource(ObjFile *file);
diff --git a/contrib/llvm-project/lld/COFF/Driver.cpp b/contrib/llvm-project/lld/COFF/Driver.cpp
index 9ceccef86779..96ac7957f557 100644
--- a/contrib/llvm-project/lld/COFF/Driver.cpp
+++ b/contrib/llvm-project/lld/COFF/Driver.cpp
@@ -26,6 +26,7 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/BinaryFormat/Magic.h"
+#include "llvm/Config/llvm-config.h"
 #include "llvm/LTO/LTO.h"
 #include "llvm/Object/ArchiveWriter.h"
 #include "llvm/Object/COFFImportFile.h"
@@ -34,6 +35,7 @@
 #include "llvm/Option/Arg.h"
 #include "llvm/Option/ArgList.h"
 #include "llvm/Option/Option.h"
+#include "llvm/Support/BinaryStreamReader.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/LEB128.h"
@@ -67,6 +69,17 @@ bool link(ArrayRef<const char *> args, bool canExitEarly, raw_ostream &stdoutOS,
   lld::stdoutOS = &stdoutOS;
   lld::stderrOS = &stderrOS;
 
+  errorHandler().cleanupCallback = []() {
+    TpiSource::clear();
+    freeArena();
+    ObjFile::instances.clear();
+    PDBInputFile::instances.clear();
+    ImportFile::instances.clear();
+    BitcodeFile::instances.clear();
+    memset(MergeChunk::instances, 0, sizeof(MergeChunk::instances));
+    OutputSection::clear();
+  };
+
   errorHandler().logName = args::getFilenameWithoutExe(args[0]);
   errorHandler().errorLimitExceededMsg =
       "too many errors emitted, stopping now"
@@ -78,20 +91,16 @@ bool link(ArrayRef<const char *> args, bool canExitEarly, raw_ostream &stdoutOS,
   symtab = make<SymbolTable>();
   driver = make<LinkerDriver>();
 
-  driver->link(args);
+  driver->linkerMain(args);
 
   // Call exit() if we can to avoid calling destructors.
   if (canExitEarly)
     exitLld(errorCount() ? 1 : 0);
 
-  freeArena();
-  ObjFile::instances.clear();
-  ImportFile::instances.clear();
-  BitcodeFile::instances.clear();
-  memset(MergeChunk::instances, 0, sizeof(MergeChunk::instances));
-  TpiSource::clear();
-
-  return !errorCount();
+  bool ret = errorCount() == 0;
+  if (!canExitEarly)
+    errorHandler().reset();
+  return ret;
 }
 
 // Parse options of the form "old;new".
@@ -400,10 +409,17 @@ void LinkerDriver::parseDirectives(InputFile *file) {
     case OPT_section:
       parseSection(arg->getValue());
       break;
-    case OPT_subsystem:
+    case OPT_subsystem: {
+      bool gotVersion = false;
       parseSubsystem(arg->getValue(), &config->subsystem,
-                     &config->majorOSVersion, &config->minorOSVersion);
+                     &config->majorSubsystemVersion,
+                     &config->minorSubsystemVersion, &gotVersion);
+      if (gotVersion) {
+        config->majorOSVersion = config->majorSubsystemVersion;
+        config->minorOSVersion = config->minorSubsystemVersion;
+      }
       break;
+    }
     // Only add flags here that link.exe accepts in
     // `#pragma comment(linker, "/flag")`-generated sections.
     case OPT_editandcontinue:
@@ -924,6 +940,75 @@ static void parseOrderFile(StringRef arg) {
   }
 }
 
+static void parseCallGraphFile(StringRef path) {
+  std::unique_ptr<MemoryBuffer> mb = CHECK(
+      MemoryBuffer::getFile(path, -1, false, true), "could not open " + path);
+
+  // Build a map from symbol name to section.
+  DenseMap<StringRef, Symbol *> map;
+  for (ObjFile *file : ObjFile::instances)
+    for (Symbol *sym : file->getSymbols())
+      if (sym)
+        map[sym->getName()] = sym;
+
+  auto findSection = [&](StringRef name) -> SectionChunk * {
+    Symbol *sym = map.lookup(name);
+    if (!sym) {
+      if (config->warnMissingOrderSymbol)
+        warn(path + ": no such symbol: " + name);
+      return nullptr;
+    }
+
+    if (DefinedCOFF *dr = dyn_cast_or_null<DefinedCOFF>(sym))
+      return dyn_cast_or_null<SectionChunk>(dr->getChunk());
+    return nullptr;
+  };
+
+  for (StringRef line : args::getLines(*mb)) {
+    SmallVector<StringRef, 3> fields;
+    line.split(fields, ' ');
+    uint64_t count;
+
+    if (fields.size() != 3 || !to_integer(fields[2], count)) {
+      error(path + ": parse error");
+      return;
+    }
+
+    if (SectionChunk *from = findSection(fields[0]))
+      if (SectionChunk *to = findSection(fields[1]))
+        config->callGraphProfile[{from, to}] += count;
+  }
+}
+
+static void readCallGraphsFromObjectFiles() {
+  for (ObjFile *obj : ObjFile::instances) {
+    if (obj->callgraphSec) {
+      ArrayRef<uint8_t> contents;
+      cantFail(
+          obj->getCOFFObj()->getSectionContents(obj->callgraphSec, contents));
+      BinaryStreamReader reader(contents, support::little);
+      while (!reader.empty()) {
+        uint32_t fromIndex, toIndex;
+        uint64_t count;
+        if (Error err = reader.readInteger(fromIndex))
+          fatal(toString(obj) + ": Expected 32-bit integer");
+        if (Error err = reader.readInteger(toIndex))
+          fatal(toString(obj) + ": Expected 32-bit integer");
+        if (Error err = reader.readInteger(count))
+          fatal(toString(obj) + ": Expected 64-bit integer");
+        auto *fromSym = dyn_cast_or_null<Defined>(obj->getSymbol(fromIndex));
+        auto *toSym = dyn_cast_or_null<Defined>(obj->getSymbol(toIndex));
+        if (!fromSym || !toSym)
+          continue;
+        auto *from = dyn_cast_or_null<SectionChunk>(fromSym->getChunk());
+        auto *to = dyn_cast_or_null<SectionChunk>(toSym->getChunk());
+        if (from && to)
+          config->callGraphProfile[{from, to}] += count;
+      }
+    }
+  }
+}
+
 static void markAddrsig(Symbol *s) {
   if (auto *d = dyn_cast_or_null<Defined>(s))
     if (SectionChunk *c = dyn_cast_or_null<SectionChunk>(d->getChunk()))
@@ -1104,10 +1189,15 @@ Optional<std::string> getReproduceFile(const opt::InputArgList &args) {
     return std::string(path);
   }
 
+  // This is intentionally not guarded by OPT_lldignoreenv since writing
+  // a repro tar file doesn't affect the main output.
+  if (auto *path = getenv("LLD_REPRODUCE"))
+    return std::string(path);
+
   return None;
 }
 
-void LinkerDriver::link(ArrayRef<const char *> argsArr) {
+void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
   ScopedTimer rootTimer(Timer::root());
 
   // Needed for LTO.
@@ -1134,6 +1224,7 @@ void LinkerDriver::link(ArrayRef<const char *> argsArr) {
   v.push_back("lld-link (LLVM option parsing)");
   for (auto *arg : args.filtered(OPT_mllvm))
     v.push_back(arg->getValue());
+  cl::ResetAllOptionOccurrences();
   cl::ParseCommandLineOptions(v.size(), v.data());
 
   // Handle /errorlimit early, because error() depends on it.
@@ -1172,7 +1263,7 @@ void LinkerDriver::link(ArrayRef<const char *> argsArr) {
   // because it doesn't start with "/", but we deliberately chose "--" to
   // avoid conflict with /version and for compatibility with clang-cl.
   if (args.hasArg(OPT_dash_dash_version)) {
-    lld::outs() << getLLDVersion() << "\n";
+    message(getLLDVersion());
     return;
   }
 
@@ -1381,8 +1472,18 @@ void LinkerDriver::link(ArrayRef<const char *> argsArr) {
 
   // Handle /subsystem
   if (auto *arg = args.getLastArg(OPT_subsystem))
-    parseSubsystem(arg->getValue(), &config->subsystem, &config->majorOSVersion,
-                   &config->minorOSVersion);
+    parseSubsystem(arg->getValue(), &config->subsystem,
+                   &config->majorSubsystemVersion,
+                   &config->minorSubsystemVersion);
+
+  // Handle /osversion
+  if (auto *arg = args.getLastArg(OPT_osversion)) {
+    parseVersion(arg->getValue(), &config->majorOSVersion,
+                 &config->minorOSVersion);
+  } else {
+    config->majorOSVersion = config->majorSubsystemVersion;
+    config->minorOSVersion = config->minorSubsystemVersion;
+  }
 
   // Handle /timestamp
   if (llvm::opt::Arg *arg = args.getLastArg(OPT_timestamp, OPT_repro)) {
@@ -1418,6 +1519,8 @@ void LinkerDriver::link(ArrayRef<const char *> argsArr) {
   unsigned icfLevel =
       args.hasArg(OPT_profile) ? 0 : 1; // 0: off, 1: limited, 2: on
   unsigned tailMerge = 1;
+  bool ltoNewPM = LLVM_ENABLE_NEW_PASS_MANAGER;
+  bool ltoDebugPM = false;
   for (auto *arg : args.filtered(OPT_opt)) {
     std::string str = StringRef(arg->getValue()).lower();
     SmallVector<StringRef, 1> vec;
@@ -1435,6 +1538,14 @@ void LinkerDriver::link(ArrayRef<const char *> argsArr) {
         tailMerge = 2;
       } else if (s == "nolldtailmerge") {
         tailMerge = 0;
+      } else if (s == "ltonewpassmanager") {
+        ltoNewPM = true;
+      } else if (s == "noltonewpassmanager") {
+        ltoNewPM = false;
+      } else if (s == "ltodebugpassmanager") {
+        ltoDebugPM = true;
+      } else if (s == "noltodebugpassmanager") {
+        ltoDebugPM = false;
       } else if (s.startswith("lldlto=")) {
         StringRef optLevel = s.substr(7);
         if (optLevel.getAsInteger(10, config->ltoo) || config->ltoo > 3)
@@ -1464,6 +1575,8 @@ void LinkerDriver::link(ArrayRef<const char *> argsArr) {
   config->doGC = doGC;
   config->doICF = icfLevel > 0;
   config->tailMerge = (tailMerge == 1 && config->doICF) || tailMerge == 2;
+  config->ltoNewPassManager = ltoNewPM;
+  config->ltoDebugPassManager = ltoDebugPM;
 
   // Handle /lldsavetemps
   if (args.hasArg(OPT_lldsavetemps))
@@ -1587,9 +1700,11 @@ void LinkerDriver::link(ArrayRef<const char *> argsArr) {
       args.hasFlag(OPT_auto_import, OPT_auto_import_no, config->mingw);
   config->pseudoRelocs = args.hasFlag(
       OPT_runtime_pseudo_reloc, OPT_runtime_pseudo_reloc_no, config->mingw);
+  config->callGraphProfileSort = args.hasFlag(
+      OPT_call_graph_profile_sort, OPT_call_graph_profile_sort_no, true);
 
-  // Don't warn about long section names, such as .debug_info, for mingw or when
-  // -debug:dwarf is requested.
+  // Don't warn about long section names, such as .debug_info, for mingw or
+  // when -debug:dwarf is requested.
   if (config->mingw || config->debugDwarf)
     config->warnLongSectionNames = false;
 
@@ -1911,6 +2026,12 @@ void LinkerDriver::link(ArrayRef<const char *> argsArr) {
     while (run());
   }
 
+  // Create wrapped symbols for -wrap option.
+  std::vector<WrappedSymbol> wrapped = addWrappedSymbols(args);
+  // Load more object files that might be needed for wrapped symbols.
+  if (!wrapped.empty())
+    while (run());
+
   if (config->autoImport) {
     // MinGW specific.
     // Load any further object files that might be needed for doing automatic
@@ -1954,6 +2075,10 @@ void LinkerDriver::link(ArrayRef<const char *> argsArr) {
   // references to the symbols we use from them.
   run();
 
+  // Apply symbol renames for -wrap.
+  if (!wrapped.empty())
+    wrapSymbols(wrapped);
+
   // Resolve remaining undefined symbols and warn about imported locals.
   symtab->resolveRemainingUndefines();
   if (errorCount())
@@ -2024,8 +2149,24 @@ void LinkerDriver::link(ArrayRef<const char *> argsArr) {
   // Handle /order. We want to do this at this moment because we
   // need a complete list of comdat sections to warn on nonexistent
   // functions.
-  if (auto *arg = args.getLastArg(OPT_order))
+  if (auto *arg = args.getLastArg(OPT_order)) {
+    if (args.hasArg(OPT_call_graph_ordering_file))
+      error("/order and /call-graph-order-file may not be used together");
     parseOrderFile(arg->getValue());
+    config->callGraphProfileSort = false;
+  }
+
+  // Handle /call-graph-ordering-file and /call-graph-profile-sort (default on).
+  if (config->callGraphProfileSort) {
+    if (auto *arg = args.getLastArg(OPT_call_graph_ordering_file)) {
+      parseCallGraphFile(arg->getValue());
+    }
+    readCallGraphsFromObjectFiles();
+  }
+
+  // Handle /print-symbol-order.
+  if (auto *arg = args.getLastArg(OPT_print_symbol_order))
+    config->printSymbolOrder = arg->getValue();
 
   // Identify unreferenced COMDAT sections.
   if (config->doGC)
diff --git a/contrib/llvm-project/lld/COFF/Driver.h b/contrib/llvm-project/lld/COFF/Driver.h
index 3fee9b1fe50e..6f71a37f729f 100644
--- a/contrib/llvm-project/lld/COFF/Driver.h
+++ b/contrib/llvm-project/lld/COFF/Driver.h
@@ -78,7 +78,7 @@ private:
 
 class LinkerDriver {
 public:
-  void link(llvm::ArrayRef<const char *> args);
+  void linkerMain(llvm::ArrayRef<const char *> args);
 
   // Used by the resolver to parse .drectve section contents.
   void parseDirectives(InputFile *file);
@@ -96,9 +96,6 @@ public:
 private:
   std::unique_ptr<llvm::TarWriter> tar; // for /linkrepro
 
-  // Opens a file. Path has to be resolved already.
-  MemoryBufferRef openFile(StringRef path);
-
   // Searches a file from search paths.
   Optional<StringRef> findFile(StringRef filename);
   Optional<StringRef> findLib(StringRef filename);
@@ -168,7 +165,7 @@ void parseVersion(StringRef arg, uint32_t *major, uint32_t *minor);
 
 // Parses a string in the form of "<subsystem>[,<integer>[.<integer>]]".
 void parseSubsystem(StringRef arg, WindowsSubsystem *sys, uint32_t *major,
-                    uint32_t *minor);
+                    uint32_t *minor, bool *gotVersion = nullptr);
 
 void parseAlternateName(StringRef);
 void parseMerge(StringRef);
@@ -206,8 +203,6 @@ void checkFailIfMismatch(StringRef arg, InputFile *source);
 MemoryBufferRef convertResToCOFF(ArrayRef<MemoryBufferRef> mbs,
                                  ArrayRef<ObjFile *> objs);
 
-void runMSVCLinker(std::string rsp, ArrayRef<StringRef> objects);
-
 // Create enum with OPT_xxx values for each option in Options.td
 enum {
   OPT_INVALID = 0,
diff --git a/contrib/llvm-project/lld/COFF/DriverUtils.cpp b/contrib/llvm-project/lld/COFF/DriverUtils.cpp
index 6cb761abea4e..19964428050b 100644
--- a/contrib/llvm-project/lld/COFF/DriverUtils.cpp
+++ b/contrib/llvm-project/lld/COFF/DriverUtils.cpp
@@ -32,6 +32,7 @@
 #include "llvm/Support/Program.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/WindowsManifest/WindowsManifestMerger.h"
+#include <limits>
 #include <memory>
 
 using namespace llvm::COFF;
@@ -87,10 +88,10 @@ void parseNumbers(StringRef arg, uint64_t *addr, uint64_t *size) {
 void parseVersion(StringRef arg, uint32_t *major, uint32_t *minor) {
   StringRef s1, s2;
   std::tie(s1, s2) = arg.split('.');
-  if (s1.getAsInteger(0, *major))
+  if (s1.getAsInteger(10, *major))
     fatal("invalid number: " + s1);
   *minor = 0;
-  if (!s2.empty() && s2.getAsInteger(0, *minor))
+  if (!s2.empty() && s2.getAsInteger(10, *minor))
     fatal("invalid number: " + s2);
 }
 
@@ -111,7 +112,7 @@ void parseGuard(StringRef fullArg) {
 
 // Parses a string in the form of "<subsystem>[,<integer>[.<integer>]]".
 void parseSubsystem(StringRef arg, WindowsSubsystem *sys, uint32_t *major,
-                    uint32_t *minor) {
+                    uint32_t *minor, bool *gotVersion) {
   StringRef sysStr, ver;
   std::tie(sysStr, ver) = arg.split(',');
   std::string sysStrLower = sysStr.lower();
@@ -131,6 +132,8 @@ void parseSubsystem(StringRef arg, WindowsSubsystem *sys, uint32_t *major,
     fatal("unknown subsystem: " + sysStr);
   if (!ver.empty())
     parseVersion(ver, major, minor);
+  if (gotVersion)
+    *gotVersion = !ver.empty();
 }
 
 // Parse a string of the form of "<from>=<to>".
@@ -673,12 +676,15 @@ void fixupExports() {
 
 void assignExportOrdinals() {
   // Assign unique ordinals if default (= 0).
-  uint16_t max = 0;
+  uint32_t max = 0;
   for (Export &e : config->exports)
-    max = std::max(max, e.ordinal);
+    max = std::max(max, (uint32_t)e.ordinal);
   for (Export &e : config->exports)
     if (e.ordinal == 0)
       e.ordinal = ++max;
+  if (max > std::numeric_limits<uint16_t>::max())
+    fatal("too many exported symbols (max " +
+          Twine(std::numeric_limits<uint16_t>::max()) + ")");
 }
 
 // Parses a string in the form of "key=value" and check
@@ -846,7 +852,7 @@ opt::InputArgList ArgParser::parse(ArrayRef<const char *> argv) {
 
   handleColorDiagnostics(args);
 
-  for (auto *arg : args.filtered(OPT_UNKNOWN)) {
+  for (opt::Arg *arg : args.filtered(OPT_UNKNOWN)) {
     std::string nearest;
     if (optTable.findNearest(arg->getAsString(args), nearest) > 1)
       warn("ignoring unknown argument '" + arg->getAsString(args) + "'");
@@ -877,8 +883,10 @@ ParsedDirectives ArgParser::parseDirectives(StringRef s) {
              tok.startswith_lower("-include:"))
       result.includes.push_back(tok.substr(strlen("/include:")));
     else {
-      // Save non-null-terminated strings to make proper C strings.
-      bool HasNul = tok.data()[tok.size()] == '\0';
+      // Copy substrings that are not valid C strings. The tokenizer may have
+      // already copied quoted arguments for us, so those do not need to be
+      // copied again.
+      bool HasNul = tok.end() != s.end() && tok.data()[tok.size()] == '\0';
       rest.push_back(HasNul ? tok.data() : saver.save(tok).data());
     }
   }
diff --git a/contrib/llvm-project/lld/COFF/ICF.cpp b/contrib/llvm-project/lld/COFF/ICF.cpp
index 1b33634b63d6..386f861fb27f 100644
--- a/contrib/llvm-project/lld/COFF/ICF.cpp
+++ b/contrib/llvm-project/lld/COFF/ICF.cpp
@@ -131,7 +131,7 @@ bool ICF::assocEquals(const SectionChunk *a, const SectionChunk *b) {
   auto considerForICF = [](const SectionChunk &assoc) {
     StringRef Name = assoc.getSectionName();
     return !(Name.startswith(".debug") || Name == ".gfids$y" ||
-             Name == ".gljmp$y");
+             Name == ".giats$y" || Name == ".gljmp$y");
   };
   auto ra = make_filter_range(a->children(), considerForICF);
   auto rb = make_filter_range(b->children(), considerForICF);
diff --git a/contrib/llvm-project/lld/COFF/InputFiles.cpp b/contrib/llvm-project/lld/COFF/InputFiles.cpp
index 4346b3a2ffa7..37f66131620e 100644
--- a/contrib/llvm-project/lld/COFF/InputFiles.cpp
+++ b/contrib/llvm-project/lld/COFF/InputFiles.cpp
@@ -249,6 +249,11 @@ SectionChunk *ObjFile::readSection(uint32_t sectionNumber,
     return nullptr;
   }
 
+  if (name == ".llvm.call-graph-profile") {
+    callgraphSec = sec;
+    return nullptr;
+  }
+
   // Object files may have DWARF debug info or MS CodeView debug info
   // (or both).
   //
@@ -275,6 +280,8 @@ SectionChunk *ObjFile::readSection(uint32_t sectionNumber,
     debugChunks.push_back(c);
   else if (name == ".gfids$y")
     guardFidChunks.push_back(c);
+  else if (name == ".giats$y")
+    guardIATChunks.push_back(c);
   else if (name == ".gljmp$y")
     guardLJmpChunks.push_back(c);
   else if (name == ".sxdata")
@@ -467,8 +474,23 @@ Symbol *ObjFile::createUndefined(COFFSymbolRef sym) {
   return symtab->addUndefined(name, this, sym.isWeakExternal());
 }
 
-void ObjFile::handleComdatSelection(COFFSymbolRef sym, COMDATType &selection,
-                                    bool &prevailing, DefinedRegular *leader) {
+static const coff_aux_section_definition *findSectionDef(COFFObjectFile *obj,
+                                                         int32_t section) {
+  uint32_t numSymbols = obj->getNumberOfSymbols();
+  for (uint32_t i = 0; i < numSymbols; ++i) {
+    COFFSymbolRef sym = check(obj->getSymbol(i));
+    if (sym.getSectionNumber() != section)
+      continue;
+    if (const coff_aux_section_definition *def = sym.getSectionDefinition())
+      return def;
+  }
+  return nullptr;
+}
+
+void ObjFile::handleComdatSelection(
+    COFFSymbolRef sym, COMDATType &selection, bool &prevailing,
+    DefinedRegular *leader,
+    const llvm::object::coff_aux_section_definition *def) {
   if (prevailing)
     return;
   // There's already an existing comdat for this symbol: `Leader`.
@@ -535,8 +557,16 @@ void ObjFile::handleComdatSelection(COFFSymbolRef sym, COMDATType &selection,
     break;
 
   case IMAGE_COMDAT_SELECT_SAME_SIZE:
-    if (leaderChunk->getSize() != getSection(sym)->SizeOfRawData)
-      symtab->reportDuplicate(leader, this);
+    if (leaderChunk->getSize() != getSection(sym)->SizeOfRawData) {
+      if (!config->mingw) {
+        symtab->reportDuplicate(leader, this);
+      } else {
+        const coff_aux_section_definition *leaderDef = findSectionDef(
+            leaderChunk->file->getCOFFObj(), leaderChunk->getSectionNumber());
+        if (!leaderDef || leaderDef->Length != def->Length)
+          symtab->reportDuplicate(leader, this);
+      }
+    }
     break;
 
   case IMAGE_COMDAT_SELECT_EXACT_MATCH: {
@@ -652,7 +682,7 @@ Optional<Symbol *> ObjFile::createDefined(
     COMDATType selection = (COMDATType)def->Selection;
 
     if (leader->isCOMDAT)
-      handleComdatSelection(sym, selection, prevailing, leader);
+      handleComdatSelection(sym, selection, prevailing, leader, def);
 
     if (prevailing) {
       SectionChunk *c = readSection(sectionNumber, def, getName());
@@ -757,8 +787,14 @@ void ObjFile::initializeDependencies() {
   else
     data = getDebugSection(".debug$T");
 
-  if (data.empty())
+  // Don't make a TpiSource for objects with no debug info. If the object has
+  // symbols but no types, make a plain, empty TpiSource anyway, because it
+  // simplifies adding the symbols later.
+  if (data.empty()) {
+    if (!debugChunks.empty())
+      debugTypesObj = makeTpiSource(this);
     return;
+  }
 
   // Get the first type record. It will indicate if this object uses a type
   // server (/Zi) or a PCH file (/Yu).
@@ -793,6 +829,8 @@ void ObjFile::initializeDependencies() {
     PrecompRecord precomp = cantFail(
         TypeDeserializer::deserializeAs<PrecompRecord>(firstType->data()));
     debugTypesObj = makeUsePrecompSource(this, precomp);
+    // Drop the LF_PRECOMP record from the input stream.
+    debugTypes = debugTypes.drop_front(firstType->RecordData.size());
     return;
   }
 
diff --git a/contrib/llvm-project/lld/COFF/InputFiles.h b/contrib/llvm-project/lld/COFF/InputFiles.h
index 50323f596e2c..3fa6819157a9 100644
--- a/contrib/llvm-project/lld/COFF/InputFiles.h
+++ b/contrib/llvm-project/lld/COFF/InputFiles.h
@@ -144,9 +144,12 @@ public:
   ArrayRef<SectionChunk *> getDebugChunks() { return debugChunks; }
   ArrayRef<SectionChunk *> getSXDataChunks() { return sxDataChunks; }
   ArrayRef<SectionChunk *> getGuardFidChunks() { return guardFidChunks; }
+  ArrayRef<SectionChunk *> getGuardIATChunks() { return guardIATChunks; }
   ArrayRef<SectionChunk *> getGuardLJmpChunks() { return guardLJmpChunks; }
   ArrayRef<Symbol *> getSymbols() { return symbols; }
 
+  MutableArrayRef<Symbol *> getMutableSymbols() { return symbols; }
+
   ArrayRef<uint8_t> getDebugSection(StringRef secName);
 
   // Returns a Symbol object for the symbolIndex'th symbol in the
@@ -191,6 +194,8 @@ public:
 
   const coff_section *addrsigSec = nullptr;
 
+  const coff_section *callgraphSec = nullptr;
+
   // When using Microsoft precompiled headers, this is the PCH's key.
   // The same key is used by both the precompiled object, and objects using the
   // precompiled object. Any difference indicates out-of-date objects.
@@ -253,9 +258,10 @@ private:
   // match the existing symbol and its selection. If either old or new
   // symbol have selection IMAGE_COMDAT_SELECT_LARGEST, Sym might replace
   // the existing leader. In that case, Prevailing is set to true.
-  void handleComdatSelection(COFFSymbolRef sym,
-                             llvm::COFF::COMDATType &selection,
-                             bool &prevailing, DefinedRegular *leader);
+  void
+  handleComdatSelection(COFFSymbolRef sym, llvm::COFF::COMDATType &selection,
+                        bool &prevailing, DefinedRegular *leader,
+                        const llvm::object::coff_aux_section_definition *def);
 
   llvm::Optional<Symbol *>
   createDefined(COFFSymbolRef sym,
@@ -280,9 +286,11 @@ private:
   // 32-bit x86.
   std::vector<SectionChunk *> sxDataChunks;
 
-  // Chunks containing symbol table indices of address taken symbols and longjmp
-  // targets.  These are not linked into the final binary when /guard:cf is set.
+  // Chunks containing symbol table indices of address taken symbols, address
+  // taken IAT entries, and longjmp targets. These are not linked into the
+  // final binary when /guard:cf is set.
   std::vector<SectionChunk *> guardFidChunks;
+  std::vector<SectionChunk *> guardIATChunks;
   std::vector<SectionChunk *> guardLJmpChunks;
 
   // This vector contains a list of all symbols defined or referenced by this
@@ -350,7 +358,7 @@ public:
   const coff_import_header *hdr;
   Chunk *location = nullptr;
 
-  // We want to eliminate dllimported symbols if no one actually refers them.
+  // We want to eliminate dllimported symbols if no one actually refers to them.
   // These "Live" bits are used to keep track of which import library members
   // are actually in use.
   //
diff --git a/contrib/llvm-project/lld/COFF/LTO.cpp b/contrib/llvm-project/lld/COFF/LTO.cpp
index bb44819e60f8..2fa3536db873 100644
--- a/contrib/llvm-project/lld/COFF/LTO.cpp
+++ b/contrib/llvm-project/lld/COFF/LTO.cpp
@@ -82,6 +82,8 @@ static lto::Config createConfig() {
   c.MAttrs = getMAttrs();
   c.CGOptLevel = args::getCGOptLevel(config->ltoo);
   c.AlwaysEmitRegularLTOObj = !config->ltoObjPath.empty();
+  c.UseNewPM = config->ltoNewPassManager;
+  c.DebugPassManager = config->ltoDebugPassManager;
 
   if (config->saveTemps)
     checkError(c.addSaveTemps(std::string(config->outputFile) + ".",
@@ -139,6 +141,11 @@ void BitcodeCompiler::add(BitcodeFile &f) {
     r.VisibleToRegularObj = sym->isUsedInRegularObj;
     if (r.Prevailing)
       undefine(sym);
+
+    // We tell LTO to not apply interprocedural optimization for wrapped
+    // (with -wrap) symbols because otherwise LTO would inline them while
+    // their values are still not final.
+    r.LinkerRedefined = !sym->canInline;
   }
   checkError(ltoObj->add(std::move(f.obj), resols));
 }
diff --git a/contrib/llvm-project/lld/COFF/MinGW.cpp b/contrib/llvm-project/lld/COFF/MinGW.cpp
index e24cdca6ee34..5bb7467afe5e 100644
--- a/contrib/llvm-project/lld/COFF/MinGW.cpp
+++ b/contrib/llvm-project/lld/COFF/MinGW.cpp
@@ -7,9 +7,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "MinGW.h"
+#include "Driver.h"
+#include "InputFiles.h"
 #include "SymbolTable.h"
 #include "lld/Common/ErrorHandler.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/Object/COFF.h"
+#include "llvm/Support/Parallel.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -173,3 +178,86 @@ void lld::coff::writeDefFile(StringRef name) {
     os << "\n";
   }
 }
+
+static StringRef mangle(Twine sym) {
+  assert(config->machine != IMAGE_FILE_MACHINE_UNKNOWN);
+  if (config->machine == I386)
+    return saver.save("_" + sym);
+  return saver.save(sym);
+}
+
+// Handles -wrap option.
+//
+// This function instantiates wrapper symbols. At this point, they seem
+// like they are not being used at all, so we explicitly set some flags so
+// that LTO won't eliminate them.
+std::vector<WrappedSymbol>
+lld::coff::addWrappedSymbols(opt::InputArgList &args) {
+  std::vector<WrappedSymbol> v;
+  DenseSet<StringRef> seen;
+
+  for (auto *arg : args.filtered(OPT_wrap)) {
+    StringRef name = arg->getValue();
+    if (!seen.insert(name).second)
+      continue;
+
+    Symbol *sym = symtab->findUnderscore(name);
+    if (!sym)
+      continue;
+
+    Symbol *real = symtab->addUndefined(mangle("__real_" + name));
+    Symbol *wrap = symtab->addUndefined(mangle("__wrap_" + name));
+    v.push_back({sym, real, wrap});
+
+    // These symbols may seem undefined initially, but don't bail out
+    // at symtab->reportUnresolvable() due to them, but let wrapSymbols
+    // below sort things out before checking finally with
+    // symtab->resolveRemainingUndefines().
+    sym->deferUndefined = true;
+    real->deferUndefined = true;
+    // We want to tell LTO not to inline symbols to be overwritten
+    // because LTO doesn't know the final symbol contents after renaming.
+    real->canInline = false;
+    sym->canInline = false;
+
+    // Tell LTO not to eliminate these symbols.
+    sym->isUsedInRegularObj = true;
+    if (!isa<Undefined>(wrap))
+      wrap->isUsedInRegularObj = true;
+  }
+  return v;
+}
+
+// Do renaming for -wrap by updating pointers to symbols.
+//
+// When this function is executed, only InputFiles and symbol table
+// contain pointers to symbol objects. We visit them to replace pointers,
+// so that wrapped symbols are swapped as instructed by the command line.
+void lld::coff::wrapSymbols(ArrayRef<WrappedSymbol> wrapped) {
+  DenseMap<Symbol *, Symbol *> map;
+  for (const WrappedSymbol &w : wrapped) {
+    map[w.sym] = w.wrap;
+    map[w.real] = w.sym;
+    if (Defined *d = dyn_cast<Defined>(w.wrap)) {
+      Symbol *imp = symtab->find(("__imp_" + w.sym->getName()).str());
+      // Create a new defined local import for the wrap symbol. If
+      // no imp prefixed symbol existed, there's no need for it.
+      // (We can't easily distinguish whether any object file actually
+      // referenced it or not, though.)
+      if (imp) {
+        DefinedLocalImport *wrapimp = make<DefinedLocalImport>(
+            saver.save("__imp_" + w.wrap->getName()), d);
+        symtab->localImportChunks.push_back(wrapimp->getChunk());
+        map[imp] = wrapimp;
+      }
+    }
+  }
+
+  // Update pointers in input files.
+  parallelForEach(ObjFile::instances, [&](ObjFile *file) {
+    MutableArrayRef<Symbol *> syms = file->getMutableSymbols();
+    for (size_t i = 0, e = syms.size(); i != e; ++i)
+      if (Symbol *s = map.lookup(syms[i]))
+        syms[i] = s;
+  });
+}
diff --git a/contrib/llvm-project/lld/COFF/MinGW.h b/contrib/llvm-project/lld/COFF/MinGW.h
index 3d7a186aa199..2f2bd119c33d 100644
--- a/contrib/llvm-project/lld/COFF/MinGW.h
+++ b/contrib/llvm-project/lld/COFF/MinGW.h
@@ -12,7 +12,10 @@
 #include "Config.h"
 #include "Symbols.h"
 #include "lld/Common/LLVM.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringSet.h"
+#include "llvm/Option/ArgList.h"
+#include <vector>
 
 namespace lld {
 namespace coff {
@@ -36,6 +39,24 @@ public:
 
 void writeDefFile(StringRef name);
 
+// The -wrap option is a feature to rename symbols so that you can write
+// wrappers for existing functions. If you pass `-wrap:foo`, all
+// occurrences of symbol `foo` are resolved to `__wrap_foo` (so, you are
+// expected to write `__wrap_foo` function as a wrapper). The original
+// symbol becomes accessible as `__real_foo`, so you can call that from your
+// wrapper.
+//
+// This data structure is instantiated for each -wrap option.
+struct WrappedSymbol {
+  Symbol *sym;
+  Symbol *real;
+  Symbol *wrap;
+};
+
+std::vector<WrappedSymbol> addWrappedSymbols(llvm::opt::InputArgList &args);
+
+void wrapSymbols(ArrayRef<WrappedSymbol> wrapped);
+
 } // namespace coff
 } // namespace lld
 
diff --git a/contrib/llvm-project/lld/COFF/Options.td b/contrib/llvm-project/lld/COFF/Options.td
index 087d53b5d2dd..73c3380df17c 100644
--- a/contrib/llvm-project/lld/COFF/Options.td
+++ b/contrib/llvm-project/lld/COFF/Options.td
@@ -9,6 +9,11 @@ class F<string name> : Flag<["/", "-", "/?", "-?"], name>;
 class P<string name, string help> :
       Joined<["/", "-", "/?", "-?"], name#":">, HelpText<help>;
 
+// Same as P<> above, but without help texts, for private undocumented
+// options.
+class P_priv<string name> :
+      Joined<["/", "-", "/?", "-?"], name#":">;
+
 // Boolean flag which can be suffixed by ":no". Using it unsuffixed turns the
 // flag on and using it suffixed by ":no" turns it off.
 multiclass B<string name, string help_on, string help_off> {
@@ -28,9 +33,12 @@ def aligncomm : P<"aligncomm", "Set common symbol alignment">;
 def alternatename : P<"alternatename", "Define weak alias">;
 def base    : P<"base", "Base address of the program">;
 def color_diagnostics: Flag<["--"], "color-diagnostics">,
-    HelpText<"Use colors in diagnostics">;
+    HelpText<"Alias for --color-diagnostics=always">;
+def no_color_diagnostics: Flag<["--"], "no-color-diagnostics">,
+    HelpText<"Alias for --color-diagnostics=never">;
 def color_diagnostics_eq: Joined<["--"], "color-diagnostics=">,
-    HelpText<"Use colors in diagnostics; one of 'always', 'never', 'auto'">;
+    HelpText<"Use colors in diagnostics (default: auto)">,
+    MetaVarName<"[auto,always,never]">;
 def defaultlib : P<"defaultlib", "Add the library to the list of input files">;
 def delayload : P<"delayload", "Delay loaded DLL name">;
 def entry   : P<"entry", "Name of entry point symbol">;
@@ -50,8 +58,9 @@ def implib  : P<"implib", "Import library name">;
 def lib : F<"lib">,
     HelpText<"Act like lib.exe; must be first argument if present">;
 def libpath : P<"libpath", "Additional library search path">;
-def linkrepro : P<"linkrepro",
-    "Dump linker invocation and input files for debugging">;
+def linkrepro : Joined<["/", "-", "/?", "-?"], "linkrepro:">,
+    MetaVarName<"directory">,
+    HelpText<"Write repro.tar containing inputs and command to reproduce link">;
 def lldignoreenv : F<"lldignoreenv">,
     HelpText<"Ignore environment variables like %LIB%">;
 def lldltocache : P<"lldltocache",
@@ -59,7 +68,7 @@ def lldltocache : P<"lldltocache",
 def lldltocachepolicy : P<"lldltocachepolicy",
     "Pruning policy for the ThinLTO cache">;
 def lldsavetemps : F<"lldsavetemps">,
-    HelpText<"Save temporary files instead of deleting them">;
+    HelpText<"Save intermediate LTO compilation results">;
 def machine : P<"machine", "Specify target platform">;
 def merge   : P<"merge", "Combine sections">;
 def mllvm   : P<"mllvm", "Options to pass to LLVM">;
@@ -68,8 +77,6 @@ def opt     : P<"opt", "Control optimizations">;
 def order   : P<"order", "Put functions in order">;
 def out     : P<"out", "Path to file to write output">;
 def natvis : P<"natvis", "Path to natvis file to embed in the PDB">;
-def no_color_diagnostics: F<"no-color-diagnostics">,
-    HelpText<"Do not use colors in diagnostics">;
 def pdb : P<"pdb", "PDB file path">;
 def pdbstripped : P<"pdbstripped", "Stripped PDB file path">;
 def pdbaltpath : P<"pdbaltpath", "PDB file path to embed in the image">;
@@ -129,8 +136,9 @@ def noentry : F<"noentry">,
 def profile : F<"profile">;
 def repro : F<"Brepro">,
     HelpText<"Use a hash of the executable as the PE header timestamp">;
-def reproduce : P<"reproduce",
-    "Dump linker invocation and input files for debugging">;
+def reproduce : Joined<["/", "-", "/?", "-?"], "reproduce:">,
+    MetaVarName<"filename">,
+    HelpText<"Write tar file containing inputs and command to reproduce link">;
 def swaprun : P<"swaprun",
   "Comma-separated list of 'cd' or 'net'">;
 def swaprun_cd : F<"swaprun:cd">, Alias<swaprun>, AliasArgs<["cd"]>,
@@ -194,7 +202,7 @@ def help_q : Flag<["/??", "-??", "/?", "-?"], "">, Alias<help>;
 defm auto_import : B_priv<"auto-import">;
 defm runtime_pseudo_reloc : B_priv<"runtime-pseudo-reloc">;
 def end_lib : F<"end-lib">,
-  HelpText<"Ends group of objects treated as if they were in a library">;
+  HelpText<"End group of objects treated as if they were in a library">;
 def exclude_all_symbols : F<"exclude-all-symbols">;
 def export_all_symbols : F<"export-all-symbols">;
 defm demangle : B<"demangle",
@@ -205,13 +213,14 @@ def include_optional : Joined<["/", "-", "/?", "-?"], "includeoptional:">,
 def kill_at : F<"kill-at">;
 def lldmingw : F<"lldmingw">;
 def noseh : F<"noseh">;
+def osversion : P_priv<"osversion">;
 def output_def : Joined<["/", "-", "/?", "-?"], "output-def:">;
 def pdb_source_path : P<"pdbsourcepath",
     "Base path used to make relative source file path absolute in PDB">;
 def rsp_quoting : Joined<["--"], "rsp-quoting=">,
   HelpText<"Quoting style for response files, 'windows' (default) or 'posix'">;
 def start_lib : F<"start-lib">,
-  HelpText<"Starts group of objects treated as if they were in a library">;
+  HelpText<"Start group of objects treated as if they were in a library">;
 def thinlto_emit_imports_files :
     F<"thinlto-emit-imports-files">,
     HelpText<"Emit .imports files with -thinlto-index-only">;
@@ -231,10 +240,22 @@ def lto_obj_path : P<
     "lto-obj-path",
     "output native object for merged LTO unit to this path">;
 def dash_dash_version : Flag<["--"], "version">,
-  HelpText<"Print version information">;
+  HelpText<"Display the version number and exit">;
 def threads
     : P<"threads", "Number of threads. '1' disables multi-threading. By "
                    "default all available hardware threads are used">;
+def call_graph_ordering_file: P<
+    "call-graph-ordering-file", 
+    "Layout sections to optimize the given callgraph">;
+defm call_graph_profile_sort: B<
+    "call-graph-profile-sort",
+    "Reorder sections with call graph profile (default)",
+    "Do not reorder sections with call graph profile">;
+def print_symbol_order: P<
+    "print-symbol-order",
+    "Print a symbol order specified by /call-graph-ordering-file and "
+    "/call-graph-profile-sort into the specified file">;
+def wrap : P_priv<"wrap">;
 
 // Flags for debugging
 def lldmap : F<"lldmap">;
diff --git a/contrib/llvm-project/lld/COFF/PDB.cpp b/contrib/llvm-project/lld/COFF/PDB.cpp
index 49d04add5be0..fe362ccaf0dc 100644
--- a/contrib/llvm-project/lld/COFF/PDB.cpp
+++ b/contrib/llvm-project/lld/COFF/PDB.cpp
@@ -62,12 +62,14 @@ using namespace lld;
 using namespace lld::coff;
 
 using llvm::object::coff_section;
+using llvm::pdb::StringTableFixup;
 
 static ExitOnError exitOnErr;
 
 static Timer totalPdbLinkTimer("PDB Emission (Cumulative)", Timer::root());
-
 static Timer addObjectsTimer("Add Objects", totalPdbLinkTimer);
+Timer lld::coff::loadGHashTimer("Global Type Hashing", addObjectsTimer);
+Timer lld::coff::mergeGHashTimer("GHash Type Merging", addObjectsTimer);
 static Timer typeMergingTimer("Type Merging", addObjectsTimer);
 static Timer symbolMergingTimer("Symbol Merging", addObjectsTimer);
 static Timer publicsLayoutTimer("Publics Stream Layout", totalPdbLinkTimer);
@@ -107,18 +109,39 @@ public:
   /// Link info for each import file in the symbol table into the PDB.
   void addImportFilesToPDB(ArrayRef<OutputSection *> outputSections);
 
+  void createModuleDBI(ObjFile *file);
+
   /// Link CodeView from a single object file into the target (output) PDB.
   /// When a precompiled headers object is linked, its TPI map might be provided
   /// externally.
   void addDebug(TpiSource *source);
 
-  const CVIndexMap *mergeTypeRecords(TpiSource *source, CVIndexMap *localMap);
-
-  void addDebugSymbols(ObjFile *file, const CVIndexMap *indexMap);
-
-  void mergeSymbolRecords(ObjFile *file, const CVIndexMap &indexMap,
-                          std::vector<ulittle32_t *> &stringTableRefs,
-                          BinaryStreamRef symData);
+  void addDebugSymbols(TpiSource *source);
+
+  // Analyze the symbol records to separate module symbols from global symbols,
+  // find string references, and calculate how large the symbol stream will be
+  // in the PDB.
+  void analyzeSymbolSubsection(SectionChunk *debugChunk,
+                               uint32_t &moduleSymOffset,
+                               uint32_t &nextRelocIndex,
+                               std::vector<StringTableFixup> &stringTableFixups,
+                               BinaryStreamRef symData);
+
+  // Write all module symbols from all all live debug symbol subsections of the
+  // given object file into the given stream writer.
+  Error writeAllModuleSymbolRecords(ObjFile *file, BinaryStreamWriter &writer);
+
+  // Callback to copy and relocate debug symbols during PDB file writing.
+  static Error commitSymbolsForObject(void *ctx, void *obj,
+                                      BinaryStreamWriter &writer);
+
+  // Copy the symbol record, relocate it, and fix the alignment if necessary.
+  // Rewrite type indices in the record. Replace unrecognized symbol records
+  // with S_SKIP records.
+  void writeSymbolRecord(SectionChunk *debugChunk,
+                         ArrayRef<uint8_t> sectionContents, CVSymbol sym,
+                         size_t alignedSize, uint32_t &nextRelocIndex,
+                         std::vector<uint8_t> &storage);
 
   /// Add the section map and section contributions to the PDB.
   void addSections(ArrayRef<OutputSection *> outputSections,
@@ -147,8 +170,20 @@ private:
   uint64_t globalSymbols = 0;
   uint64_t moduleSymbols = 0;
   uint64_t publicSymbols = 0;
+  uint64_t nbTypeRecords = 0;
+  uint64_t nbTypeRecordsBytes = 0;
+};
+
+/// Represents an unrelocated DEBUG_S_FRAMEDATA subsection.
+struct UnrelocatedFpoData {
+  SectionChunk *debugChunk = nullptr;
+  ArrayRef<uint8_t> subsecData;
+  uint32_t relocIndex = 0;
 };
 
+/// The size of the magic bytes at the beginning of a symbol section or stream.
+enum : uint32_t { kSymbolStreamMagicSize = 4 };
+
 class DebugSHandler {
   PDBLinker &linker;
 
@@ -156,7 +191,7 @@ class DebugSHandler {
   ObjFile &file;
 
   /// The result of merging type indices.
-  const CVIndexMap *indexMap;
+  TpiSource *source;
 
   /// The DEBUG_S_STRINGTABLE subsection.  These strings are referred to by
   /// index from other records in the .debug$S section.  All of these strings
@@ -175,23 +210,36 @@ class DebugSHandler {
   /// contain string table references which need to be re-written, so we
   /// collect them all here and re-write them after all subsections have been
   /// discovered and processed.
-  std::vector<DebugFrameDataSubsectionRef> newFpoFrames;
+  std::vector<UnrelocatedFpoData> frameDataSubsecs;
+
+  /// List of string table references in symbol records. Later they will be
+  /// applied to the symbols during PDB writing.
+  std::vector<StringTableFixup> stringTableFixups;
+
+  /// Sum of the size of all module symbol records across all .debug$S sections.
+  /// Includes record realignment and the size of the symbol stream magic
+  /// prefix.
+  uint32_t moduleStreamSize = kSymbolStreamMagicSize;
+
+  /// Next relocation index in the current .debug$S section. Resets every
+  /// handleDebugS call.
+  uint32_t nextRelocIndex = 0;
 
-  /// Pointers to raw memory that we determine have string table references
-  /// that need to be re-written.  We first process all .debug$S subsections
-  /// to ensure that we can handle subsections written in any order, building
-  /// up this list as we go.  At the end, we use the string table (which must
-  /// have been discovered by now else it is an error) to re-write these
-  /// references.
-  std::vector<ulittle32_t *> stringTableReferences;
+  void advanceRelocIndex(SectionChunk *debugChunk, ArrayRef<uint8_t> subsec);
 
-  void mergeInlineeLines(const DebugSubsectionRecord &inlineeLines);
+  void addUnrelocatedSubsection(SectionChunk *debugChunk,
+                                const DebugSubsectionRecord &ss);
+
+  void addFrameDataSubsection(SectionChunk *debugChunk,
+                              const DebugSubsectionRecord &ss);
+
+  void recordStringTableReferences(CVSymbol sym, uint32_t symOffset);
 
 public:
-  DebugSHandler(PDBLinker &linker, ObjFile &file, const CVIndexMap *indexMap)
-      : linker(linker), file(file), indexMap(indexMap) {}
+  DebugSHandler(PDBLinker &linker, ObjFile &file, TpiSource *source)
+      : linker(linker), file(file), source(source) {}
 
-  void handleDebugS(ArrayRef<uint8_t> relocatedDebugContents);
+  void handleDebugS(SectionChunk *debugChunk);
 
   void finish();
 };
@@ -250,68 +298,34 @@ static void addTypeInfo(pdb::TpiStreamBuilder &tpiBuilder,
   });
 }
 
-static bool remapTypeIndex(TypeIndex &ti, ArrayRef<TypeIndex> typeIndexMap) {
-  if (ti.isSimple())
-    return true;
-  if (ti.toArrayIndex() >= typeIndexMap.size())
-    return false;
-  ti = typeIndexMap[ti.toArrayIndex()];
-  return true;
-}
-
-static void remapTypesInSymbolRecord(ObjFile *file, SymbolKind symKind,
-                                     MutableArrayRef<uint8_t> recordBytes,
-                                     const CVIndexMap &indexMap,
-                                     ArrayRef<TiReference> typeRefs) {
-  MutableArrayRef<uint8_t> contents =
-      recordBytes.drop_front(sizeof(RecordPrefix));
-  for (const TiReference &ref : typeRefs) {
-    unsigned byteSize = ref.Count * sizeof(TypeIndex);
-    if (contents.size() < ref.Offset + byteSize)
-      fatal("symbol record too short");
-
-    // This can be an item index or a type index. Choose the appropriate map.
-    ArrayRef<TypeIndex> typeOrItemMap = indexMap.tpiMap;
-    bool isItemIndex = ref.Kind == TiRefKind::IndexRef;
-    if (isItemIndex && indexMap.isTypeServerMap)
-      typeOrItemMap = indexMap.ipiMap;
-
-    MutableArrayRef<TypeIndex> tIs(
-        reinterpret_cast<TypeIndex *>(contents.data() + ref.Offset), ref.Count);
-    for (TypeIndex &ti : tIs) {
-      if (!remapTypeIndex(ti, typeOrItemMap)) {
-        log("ignoring symbol record of kind 0x" + utohexstr(symKind) + " in " +
-            file->getName() + " with bad " + (isItemIndex ? "item" : "type") +
-            " index 0x" + utohexstr(ti.getIndex()));
-        ti = TypeIndex(SimpleTypeKind::NotTranslated);
-        continue;
-      }
-    }
-  }
-}
-
-static void
-recordStringTableReferenceAtOffset(MutableArrayRef<uint8_t> contents,
-                                   uint32_t offset,
-                                   std::vector<ulittle32_t *> &strTableRefs) {
-  contents =
-      contents.drop_front(offset).take_front(sizeof(support::ulittle32_t));
-  ulittle32_t *index = reinterpret_cast<ulittle32_t *>(contents.data());
-  strTableRefs.push_back(index);
+static void addGHashTypeInfo(pdb::PDBFileBuilder &builder) {
+  // Start the TPI or IPI stream header.
+  builder.getTpiBuilder().setVersionHeader(pdb::PdbTpiV80);
+  builder.getIpiBuilder().setVersionHeader(pdb::PdbTpiV80);
+  for_each(TpiSource::instances, [&](TpiSource *source) {
+    builder.getTpiBuilder().addTypeRecords(source->mergedTpi.recs,
+                                           source->mergedTpi.recSizes,
+                                           source->mergedTpi.recHashes);
+    builder.getIpiBuilder().addTypeRecords(source->mergedIpi.recs,
+                                           source->mergedIpi.recSizes,
+                                           source->mergedIpi.recHashes);
+  });
 }
 
 static void
-recordStringTableReferences(SymbolKind kind, MutableArrayRef<uint8_t> contents,
-                            std::vector<ulittle32_t *> &strTableRefs) {
+recordStringTableReferences(CVSymbol sym, uint32_t symOffset,
+                            std::vector<StringTableFixup> &stringTableFixups) {
   // For now we only handle S_FILESTATIC, but we may need the same logic for
   // S_DEFRANGE and S_DEFRANGE_SUBFIELD.  However, I cannot seem to generate any
   // PDBs that contain these types of records, so because of the uncertainty
   // they are omitted here until we can prove that it's necessary.
-  switch (kind) {
-  case SymbolKind::S_FILESTATIC:
+  switch (sym.kind()) {
+  case SymbolKind::S_FILESTATIC: {
     // FileStaticSym::ModFileOffset
-    recordStringTableReferenceAtOffset(contents, 8, strTableRefs);
+    uint32_t ref = *reinterpret_cast<const ulittle32_t *>(&sym.data()[8]);
+    stringTableFixups.push_back({ref, symOffset + 8});
     break;
+  }
   case SymbolKind::S_DEFRANGE:
   case SymbolKind::S_DEFRANGE_SUBFIELD:
     log("Not fixing up string table reference in S_DEFRANGE / "
@@ -330,7 +344,7 @@ static SymbolKind symbolKind(ArrayRef<uint8_t> recordData) {
 
 /// MSVC translates S_PROC_ID_END to S_END, and S_[LG]PROC32_ID to S_[LG]PROC32
 static void translateIdSymbols(MutableArrayRef<uint8_t> &recordData,
-                               TypeCollection &idTable) {
+                               TypeMerger &tMerger, TpiSource *source) {
   RecordPrefix *prefix = reinterpret_cast<RecordPrefix *>(recordData.data());
 
   SymbolKind kind = symbolKind(recordData);
@@ -357,13 +371,25 @@ static void translateIdSymbols(MutableArrayRef<uint8_t> &recordData,
         reinterpret_cast<TypeIndex *>(content.data() + refs[0].Offset);
     // `ti` is the index of a FuncIdRecord or MemberFuncIdRecord which lives in
     // the IPI stream, whose `FunctionType` member refers to the TPI stream.
-    // Note that LF_FUNC_ID and LF_MEMFUNC_ID have the same record layout, and
+    // Note that LF_FUNC_ID and LF_MFUNC_ID have the same record layout, and
     // in both cases we just need the second type index.
     if (!ti->isSimple() && !ti->isNoneType()) {
-      CVType funcIdData = idTable.getType(*ti);
-      ArrayRef<uint8_t> tiBuf = funcIdData.data().slice(8, 4);
-      assert(tiBuf.size() == 4 && "corrupt LF_[MEM]FUNC_ID record");
-      *ti = *reinterpret_cast<const TypeIndex *>(tiBuf.data());
+      if (config->debugGHashes) {
+        auto idToType = tMerger.funcIdToType.find(*ti);
+        if (idToType == tMerger.funcIdToType.end()) {
+          warn(formatv("S_[GL]PROC32_ID record in {0} refers to PDB item "
+                       "index {1:X} which is not a LF_[M]FUNC_ID record",
+                       source->file->getName(), ti->getIndex()));
+          *ti = TypeIndex(SimpleTypeKind::NotTranslated);
+        } else {
+          *ti = idToType->second;
+        }
+      } else {
+        CVType funcIdData = tMerger.getIDTable().getType(*ti);
+        ArrayRef<uint8_t> tiBuf = funcIdData.data().slice(8, 4);
+        assert(tiBuf.size() == 4 && "corrupt LF_[M]FUNC_ID record");
+        *ti = *reinterpret_cast<const TypeIndex *>(tiBuf.data());
+      }
     }
 
     kind = (kind == SymbolKind::S_GPROC32_ID) ? SymbolKind::S_GPROC32
@@ -372,60 +398,48 @@ static void translateIdSymbols(MutableArrayRef<uint8_t> &recordData,
   }
 }
 
-/// Copy the symbol record. In a PDB, symbol records must be 4 byte aligned.
-/// The object file may not be aligned.
-static MutableArrayRef<uint8_t>
-copyAndAlignSymbol(const CVSymbol &sym, MutableArrayRef<uint8_t> &alignedMem) {
-  size_t size = alignTo(sym.length(), alignOf(CodeViewContainer::Pdb));
-  assert(size >= 4 && "record too short");
-  assert(size <= MaxRecordLength && "record too long");
-  assert(alignedMem.size() >= size && "didn't preallocate enough");
-
-  // Copy the symbol record and zero out any padding bytes.
-  MutableArrayRef<uint8_t> newData = alignedMem.take_front(size);
-  alignedMem = alignedMem.drop_front(size);
-  memcpy(newData.data(), sym.data().data(), sym.length());
-  memset(newData.data() + sym.length(), 0, size - sym.length());
-
-  // Update the record prefix length. It should point to the beginning of the
-  // next record.
-  auto *prefix = reinterpret_cast<RecordPrefix *>(newData.data());
-  prefix->RecordLen = size - 2;
-  return newData;
-}
-
+namespace {
 struct ScopeRecord {
   ulittle32_t ptrParent;
   ulittle32_t ptrEnd;
 };
+} // namespace
 
-struct SymbolScope {
-  ScopeRecord *openingRecord;
-  uint32_t scopeOffset;
-};
+/// Given a pointer to a symbol record that opens a scope, return a pointer to
+/// the scope fields.
+static ScopeRecord *getSymbolScopeFields(void *sym) {
+  return reinterpret_cast<ScopeRecord *>(reinterpret_cast<char *>(sym) +
+                                         sizeof(RecordPrefix));
+}
 
-static void scopeStackOpen(SmallVectorImpl<SymbolScope> &stack,
-                           uint32_t curOffset, CVSymbol &sym) {
-  assert(symbolOpensScope(sym.kind()));
-  SymbolScope s;
-  s.scopeOffset = curOffset;
-  s.openingRecord = const_cast<ScopeRecord *>(
-      reinterpret_cast<const ScopeRecord *>(sym.content().data()));
-  s.openingRecord->ptrParent = stack.empty() ? 0 : stack.back().scopeOffset;
-  stack.push_back(s);
+// To open a scope, push the offset of the current symbol record onto the
+// stack.
+static void scopeStackOpen(SmallVectorImpl<uint32_t> &stack,
+                           std::vector<uint8_t> &storage) {
+  stack.push_back(storage.size());
 }
 
-static void scopeStackClose(SmallVectorImpl<SymbolScope> &stack,
-                            uint32_t curOffset, InputFile *file) {
+// To close a scope, update the record that opened the scope.
+static void scopeStackClose(SmallVectorImpl<uint32_t> &stack,
+                            std::vector<uint8_t> &storage,
+                            uint32_t storageBaseOffset, ObjFile *file) {
   if (stack.empty()) {
     warn("symbol scopes are not balanced in " + file->getName());
     return;
   }
-  SymbolScope s = stack.pop_back_val();
-  s.openingRecord->ptrEnd = curOffset;
+
+  // Update ptrEnd of the record that opened the scope to point to the
+  // current record, if we are writing into the module symbol stream.
+  uint32_t offOpen = stack.pop_back_val();
+  uint32_t offEnd = storageBaseOffset + storage.size();
+  uint32_t offParent = stack.empty() ? 0 : (stack.back() + storageBaseOffset);
+  ScopeRecord *scopeRec = getSymbolScopeFields(&(storage)[offOpen]);
+  scopeRec->ptrParent = offParent;
+  scopeRec->ptrEnd = offEnd;
 }
 
-static bool symbolGoesInModuleStream(const CVSymbol &sym, bool isGlobalScope) {
+static bool symbolGoesInModuleStream(const CVSymbol &sym,
+                                     unsigned symbolScopeDepth) {
   switch (sym.kind()) {
   case SymbolKind::S_GDATA32:
   case SymbolKind::S_CONSTANT:
@@ -439,7 +453,7 @@ static bool symbolGoesInModuleStream(const CVSymbol &sym, bool isGlobalScope) {
     return false;
   // S_UDT records go in the module stream if it is not a global S_UDT.
   case SymbolKind::S_UDT:
-    return !isGlobalScope;
+    return symbolScopeDepth > 0;
   // S_GDATA32 does not go in the module stream, but S_LDATA32 does.
   case SymbolKind::S_LDATA32:
   case SymbolKind::S_LTHREAD32:
@@ -449,13 +463,15 @@ static bool symbolGoesInModuleStream(const CVSymbol &sym, bool isGlobalScope) {
 }
 
 static bool symbolGoesInGlobalsStream(const CVSymbol &sym,
-                                      bool isFunctionScope) {
+                                      unsigned symbolScopeDepth) {
   switch (sym.kind()) {
   case SymbolKind::S_CONSTANT:
   case SymbolKind::S_GDATA32:
   case SymbolKind::S_GTHREAD32:
   case SymbolKind::S_GPROC32:
   case SymbolKind::S_LPROC32:
+  case SymbolKind::S_GPROC32_ID:
+  case SymbolKind::S_LPROC32_ID:
   // We really should not be seeing S_PROCREF and S_LPROCREF in the first place
   // since they are synthesized by the linker in response to S_GPROC32 and
   // S_LPROC32, but if we do see them, copy them straight through.
@@ -466,14 +482,16 @@ static bool symbolGoesInGlobalsStream(const CVSymbol &sym,
   case SymbolKind::S_UDT:
   case SymbolKind::S_LDATA32:
   case SymbolKind::S_LTHREAD32:
-    return !isFunctionScope;
+    return symbolScopeDepth == 0;
   default:
     return false;
   }
 }
 
 static void addGlobalSymbol(pdb::GSIStreamBuilder &builder, uint16_t modIndex,
-                            unsigned symOffset, const CVSymbol &sym) {
+                            unsigned symOffset,
+                            std::vector<uint8_t> &symStorage) {
+  CVSymbol sym(makeArrayRef(symStorage));
   switch (sym.kind()) {
   case SymbolKind::S_CONSTANT:
   case SymbolKind::S_UDT:
@@ -482,9 +500,14 @@ static void addGlobalSymbol(pdb::GSIStreamBuilder &builder, uint16_t modIndex,
   case SymbolKind::S_LTHREAD32:
   case SymbolKind::S_LDATA32:
   case SymbolKind::S_PROCREF:
-  case SymbolKind::S_LPROCREF:
-    builder.addGlobalSymbol(sym);
+  case SymbolKind::S_LPROCREF: {
+    // sym is a temporary object, so we have to copy and reallocate the record
+    // to stabilize it.
+    uint8_t *mem = bAlloc.Allocate<uint8_t>(sym.length());
+    memcpy(mem, sym.data().data(), sym.length());
+    builder.addGlobalSymbol(CVSymbol(makeArrayRef(mem, sym.length())));
     break;
+  }
   case SymbolKind::S_GPROC32:
   case SymbolKind::S_LPROC32: {
     SymbolRecordKind k = SymbolRecordKind::ProcRefSym;
@@ -505,119 +528,189 @@ static void addGlobalSymbol(pdb::GSIStreamBuilder &builder, uint16_t modIndex,
   }
 }
 
-void PDBLinker::mergeSymbolRecords(ObjFile *file, const CVIndexMap &indexMap,
-                                   std::vector<ulittle32_t *> &stringTableRefs,
-                                   BinaryStreamRef symData) {
-  ArrayRef<uint8_t> symsBuffer;
-  cantFail(symData.readBytes(0, symData.getLength(), symsBuffer));
-  SmallVector<SymbolScope, 4> scopes;
-
-  // Iterate every symbol to check if any need to be realigned, and if so, how
-  // much space we need to allocate for them.
-  bool needsRealignment = false;
-  unsigned totalRealignedSize = 0;
-  auto ec = forEachCodeViewRecord<CVSymbol>(
-      symsBuffer, [&](CVSymbol sym) -> llvm::Error {
-        unsigned realignedSize =
-            alignTo(sym.length(), alignOf(CodeViewContainer::Pdb));
-        needsRealignment |= realignedSize != sym.length();
-        totalRealignedSize += realignedSize;
-        return Error::success();
-      });
-
-  // If any of the symbol record lengths was corrupt, ignore them all, warn
-  // about it, and move on.
-  if (ec) {
-    warn("corrupt symbol records in " + file->getName());
-    consumeError(std::move(ec));
+// Check if the given symbol record was padded for alignment. If so, zero out
+// the padding bytes and update the record prefix with the new size.
+static void fixRecordAlignment(MutableArrayRef<uint8_t> recordBytes,
+                               size_t oldSize) {
+  size_t alignedSize = recordBytes.size();
+  if (oldSize == alignedSize)
     return;
-  }
+  reinterpret_cast<RecordPrefix *>(recordBytes.data())->RecordLen =
+      alignedSize - 2;
+  memset(recordBytes.data() + oldSize, 0, alignedSize - oldSize);
+}
 
-  // If any symbol needed realignment, allocate enough contiguous memory for
-  // them all. Typically symbol subsections are small enough that this will not
-  // cause fragmentation.
-  MutableArrayRef<uint8_t> alignedSymbolMem;
-  if (needsRealignment) {
-    void *alignedData =
-        bAlloc.Allocate(totalRealignedSize, alignOf(CodeViewContainer::Pdb));
-    alignedSymbolMem = makeMutableArrayRef(
-        reinterpret_cast<uint8_t *>(alignedData), totalRealignedSize);
+// Replace any record with a skip record of the same size. This is useful when
+// we have reserved size for a symbol record, but type index remapping fails.
+static void replaceWithSkipRecord(MutableArrayRef<uint8_t> recordBytes) {
+  memset(recordBytes.data(), 0, recordBytes.size());
+  auto *prefix = reinterpret_cast<RecordPrefix *>(recordBytes.data());
+  prefix->RecordKind = SymbolKind::S_SKIP;
+  prefix->RecordLen = recordBytes.size() - 2;
+}
+
+// Copy the symbol record, relocate it, and fix the alignment if necessary.
+// Rewrite type indices in the record. Replace unrecognized symbol records with
+// S_SKIP records.
+void PDBLinker::writeSymbolRecord(SectionChunk *debugChunk,
+                                  ArrayRef<uint8_t> sectionContents,
+                                  CVSymbol sym, size_t alignedSize,
+                                  uint32_t &nextRelocIndex,
+                                  std::vector<uint8_t> &storage) {
+  // Allocate space for the new record at the end of the storage.
+  storage.resize(storage.size() + alignedSize);
+  auto recordBytes = MutableArrayRef<uint8_t>(storage).take_back(alignedSize);
+
+  // Copy the symbol record and relocate it.
+  debugChunk->writeAndRelocateSubsection(sectionContents, sym.data(),
+                                         nextRelocIndex, recordBytes.data());
+  fixRecordAlignment(recordBytes, sym.length());
+
+  // Re-map all the type index references.
+  TpiSource *source = debugChunk->file->debugTypesObj;
+  if (!source->remapTypesInSymbolRecord(recordBytes)) {
+    log("ignoring unknown symbol record with kind 0x" + utohexstr(sym.kind()));
+    replaceWithSkipRecord(recordBytes);
   }
 
-  // Iterate again, this time doing the real work.
-  unsigned curSymOffset = file->moduleDBI->getNextSymbolOffset();
-  ArrayRef<uint8_t> bulkSymbols;
-  cantFail(forEachCodeViewRecord<CVSymbol>(
-      symsBuffer, [&](CVSymbol sym) -> llvm::Error {
-        // Align the record if required.
-        MutableArrayRef<uint8_t> recordBytes;
-        if (needsRealignment) {
-          recordBytes = copyAndAlignSymbol(sym, alignedSymbolMem);
-          sym = CVSymbol(recordBytes);
-        } else {
-          // Otherwise, we can actually mutate the symbol directly, since we
-          // copied it to apply relocations.
-          recordBytes = makeMutableArrayRef(
-              const_cast<uint8_t *>(sym.data().data()), sym.length());
-        }
+  // An object file may have S_xxx_ID symbols, but these get converted to
+  // "real" symbols in a PDB.
+  translateIdSymbols(recordBytes, tMerger, source);
+}
 
-        // Discover type index references in the record. Skip it if we don't
-        // know where they are.
-        SmallVector<TiReference, 32> typeRefs;
-        if (!discoverTypeIndicesInSymbol(sym, typeRefs)) {
-          log("ignoring unknown symbol record with kind 0x" +
-              utohexstr(sym.kind()));
-          return Error::success();
-        }
+void PDBLinker::analyzeSymbolSubsection(
+    SectionChunk *debugChunk, uint32_t &moduleSymOffset,
+    uint32_t &nextRelocIndex, std::vector<StringTableFixup> &stringTableFixups,
+    BinaryStreamRef symData) {
+  ObjFile *file = debugChunk->file;
+  uint32_t moduleSymStart = moduleSymOffset;
 
-        // Re-map all the type index references.
-        remapTypesInSymbolRecord(file, sym.kind(), recordBytes, indexMap,
-                                 typeRefs);
+  uint32_t scopeLevel = 0;
+  std::vector<uint8_t> storage;
+  ArrayRef<uint8_t> sectionContents = debugChunk->getContents();
 
-        // An object file may have S_xxx_ID symbols, but these get converted to
-        // "real" symbols in a PDB.
-        translateIdSymbols(recordBytes, tMerger.getIDTable());
-        sym = CVSymbol(recordBytes);
+  ArrayRef<uint8_t> symsBuffer;
+  cantFail(symData.readBytes(0, symData.getLength(), symsBuffer));
 
-        // If this record refers to an offset in the object file's string table,
-        // add that item to the global PDB string table and re-write the index.
-        recordStringTableReferences(sym.kind(), recordBytes, stringTableRefs);
+  if (symsBuffer.empty())
+    warn("empty symbols subsection in " + file->getName());
 
-        // Fill in "Parent" and "End" fields by maintaining a stack of scopes.
+  Error ec = forEachCodeViewRecord<CVSymbol>(
+      symsBuffer, [&](CVSymbol sym) -> llvm::Error {
+        // Track the current scope.
         if (symbolOpensScope(sym.kind()))
-          scopeStackOpen(scopes, curSymOffset, sym);
+          ++scopeLevel;
         else if (symbolEndsScope(sym.kind()))
-          scopeStackClose(scopes, curSymOffset, file);
+          --scopeLevel;
+
+        uint32_t alignedSize =
+            alignTo(sym.length(), alignOf(CodeViewContainer::Pdb));
 
-        // Add the symbol to the globals stream if necessary.  Do this before
-        // adding the symbol to the module since we may need to get the next
-        // symbol offset, and writing to the module's symbol stream will update
-        // that offset.
-        if (symbolGoesInGlobalsStream(sym, !scopes.empty())) {
+        // Copy global records. Some global records (mainly procedures)
+        // reference the current offset into the module stream.
+        if (symbolGoesInGlobalsStream(sym, scopeLevel)) {
+          storage.clear();
+          writeSymbolRecord(debugChunk, sectionContents, sym, alignedSize,
+                            nextRelocIndex, storage);
           addGlobalSymbol(builder.getGsiBuilder(),
-                          file->moduleDBI->getModuleIndex(), curSymOffset, sym);
+                          file->moduleDBI->getModuleIndex(), moduleSymOffset,
+                          storage);
           ++globalSymbols;
         }
 
-        if (symbolGoesInModuleStream(sym, scopes.empty())) {
-          // Add symbols to the module in bulk. If this symbol is contiguous
-          // with the previous run of symbols to add, combine the ranges. If
-          // not, close the previous range of symbols and start a new one.
-          if (sym.data().data() == bulkSymbols.end()) {
-            bulkSymbols = makeArrayRef(bulkSymbols.data(),
-                                       bulkSymbols.size() + sym.length());
-          } else {
-            file->moduleDBI->addSymbolsInBulk(bulkSymbols);
-            bulkSymbols = recordBytes;
-          }
-          curSymOffset += sym.length();
+        // Update the module stream offset and record any string table index
+        // references. There are very few of these and they will be rewritten
+        // later during PDB writing.
+        if (symbolGoesInModuleStream(sym, scopeLevel)) {
+          recordStringTableReferences(sym, moduleSymOffset, stringTableFixups);
+          moduleSymOffset += alignedSize;
           ++moduleSymbols;
         }
+
         return Error::success();
-      }));
+      });
+
+  // If we encountered corrupt records, ignore the whole subsection. If we wrote
+  // any partial records, undo that. For globals, we just keep what we have and
+  // continue.
+  if (ec) {
+    warn("corrupt symbol records in " + file->getName());
+    moduleSymOffset = moduleSymStart;
+    consumeError(std::move(ec));
+  }
+}
+
+Error PDBLinker::writeAllModuleSymbolRecords(ObjFile *file,
+                                             BinaryStreamWriter &writer) {
+  std::vector<uint8_t> storage;
+  SmallVector<uint32_t, 4> scopes;
+
+  // Visit all live .debug$S sections a second time, and write them to the PDB.
+  for (SectionChunk *debugChunk : file->getDebugChunks()) {
+    if (!debugChunk->live || debugChunk->getSize() == 0 ||
+        debugChunk->getSectionName() != ".debug$S")
+      continue;
+
+    ArrayRef<uint8_t> sectionContents = debugChunk->getContents();
+    auto contents =
+        SectionChunk::consumeDebugMagic(sectionContents, ".debug$S");
+    DebugSubsectionArray subsections;
+    BinaryStreamReader reader(contents, support::little);
+    exitOnErr(reader.readArray(subsections, contents.size()));
 
-  // Add any remaining symbols we've accumulated.
-  file->moduleDBI->addSymbolsInBulk(bulkSymbols);
+    uint32_t nextRelocIndex = 0;
+    for (const DebugSubsectionRecord &ss : subsections) {
+      if (ss.kind() != DebugSubsectionKind::Symbols)
+        continue;
+
+      uint32_t moduleSymStart = writer.getOffset();
+      scopes.clear();
+      storage.clear();
+      ArrayRef<uint8_t> symsBuffer;
+      BinaryStreamRef sr = ss.getRecordData();
+      cantFail(sr.readBytes(0, sr.getLength(), symsBuffer));
+      auto ec = forEachCodeViewRecord<CVSymbol>(
+          symsBuffer, [&](CVSymbol sym) -> llvm::Error {
+            // Track the current scope. Only update records in the postmerge
+            // pass.
+            if (symbolOpensScope(sym.kind()))
+              scopeStackOpen(scopes, storage);
+            else if (symbolEndsScope(sym.kind()))
+              scopeStackClose(scopes, storage, moduleSymStart, file);
+
+            // Copy, relocate, and rewrite each module symbol.
+            if (symbolGoesInModuleStream(sym, scopes.size())) {
+              uint32_t alignedSize =
+                  alignTo(sym.length(), alignOf(CodeViewContainer::Pdb));
+              writeSymbolRecord(debugChunk, sectionContents, sym, alignedSize,
+                                nextRelocIndex, storage);
+            }
+            return Error::success();
+          });
+
+      // If we encounter corrupt records in the second pass, ignore them. We
+      // already warned about them in the first analysis pass.
+      if (ec) {
+        consumeError(std::move(ec));
+        storage.clear();
+      }
+
+      // Writing bytes has a very high overhead, so write the entire subsection
+      // at once.
+      // TODO: Consider buffering symbols for the entire object file to reduce
+      // overhead even further.
+      if (Error e = writer.writeBytes(storage))
+        return e;
+    }
+  }
+
+  return Error::success();
+}
+
+Error PDBLinker::commitSymbolsForObject(void *ctx, void *obj,
+                                        BinaryStreamWriter &writer) {
+  return static_cast<PDBLinker *>(ctx)->writeAllModuleSymbolRecords(
+      static_cast<ObjFile *>(obj), writer);
 }
 
 static pdb::SectionContrib createSectionContrib(const Chunk *c, uint32_t modi) {
@@ -657,18 +750,18 @@ translateStringTableIndex(uint32_t objIndex,
   return pdbStrTable.insert(*expectedString);
 }
 
-void DebugSHandler::handleDebugS(ArrayRef<uint8_t> relocatedDebugContents) {
-  relocatedDebugContents =
-      SectionChunk::consumeDebugMagic(relocatedDebugContents, ".debug$S");
-
+void DebugSHandler::handleDebugS(SectionChunk *debugChunk) {
+  // Note that we are processing the *unrelocated* section contents. They will
+  // be relocated later during PDB writing.
+  ArrayRef<uint8_t> contents = debugChunk->getContents();
+  contents = SectionChunk::consumeDebugMagic(contents, ".debug$S");
   DebugSubsectionArray subsections;
-  BinaryStreamReader reader(relocatedDebugContents, support::little);
-  exitOnErr(reader.readArray(subsections, relocatedDebugContents.size()));
+  BinaryStreamReader reader(contents, support::little);
+  exitOnErr(reader.readArray(subsections, contents.size()));
+  debugChunk->sortRelocations();
 
-  // If there is no index map, use an empty one.
-  CVIndexMap tempIndexMap;
-  if (!indexMap)
-    indexMap = &tempIndexMap;
+  // Reset the relocation index, since this is a new section.
+  nextRelocIndex = 0;
 
   for (const DebugSubsectionRecord &ss : subsections) {
     // Ignore subsections with the 'ignore' bit. Some versions of the Visual C++
@@ -689,30 +782,17 @@ void DebugSHandler::handleDebugS(ArrayRef<uint8_t> relocatedDebugContents) {
       exitOnErr(checksums.initialize(ss.getRecordData()));
       break;
     case DebugSubsectionKind::Lines:
-      // We can add the relocated line table directly to the PDB without
-      // modification because the file checksum offsets will stay the same.
-      file.moduleDBI->addDebugSubsection(ss);
-      break;
     case DebugSubsectionKind::InlineeLines:
-      // The inlinee lines subsection also has file checksum table references
-      // that can be used directly, but it contains function id references that
-      // must be remapped.
-      mergeInlineeLines(ss);
+      addUnrelocatedSubsection(debugChunk, ss);
       break;
-    case DebugSubsectionKind::FrameData: {
-      // We need to re-write string table indices here, so save off all
-      // frame data subsections until we've processed the entire list of
-      // subsections so that we can be sure we have the string table.
-      DebugFrameDataSubsectionRef fds;
-      exitOnErr(fds.initialize(ss.getRecordData()));
-      newFpoFrames.push_back(std::move(fds));
+    case DebugSubsectionKind::FrameData:
+      addFrameDataSubsection(debugChunk, ss);
       break;
-    }
-    case DebugSubsectionKind::Symbols: {
-      linker.mergeSymbolRecords(&file, *indexMap, stringTableReferences,
-                                ss.getRecordData());
+    case DebugSubsectionKind::Symbols:
+      linker.analyzeSymbolSubsection(debugChunk, moduleStreamSize,
+                                     nextRelocIndex, stringTableFixups,
+                                     ss.getRecordData());
       break;
-    }
 
     case DebugSubsectionKind::CrossScopeImports:
     case DebugSubsectionKind::CrossScopeExports:
@@ -739,6 +819,85 @@ void DebugSHandler::handleDebugS(ArrayRef<uint8_t> relocatedDebugContents) {
   }
 }
 
+void DebugSHandler::advanceRelocIndex(SectionChunk *sc,
+                                      ArrayRef<uint8_t> subsec) {
+  ptrdiff_t vaBegin = subsec.data() - sc->getContents().data();
+  assert(vaBegin > 0);
+  auto relocs = sc->getRelocs();
+  for (; nextRelocIndex < relocs.size(); ++nextRelocIndex) {
+    if (relocs[nextRelocIndex].VirtualAddress >= vaBegin)
+      break;
+  }
+}
+
+namespace {
+/// Wrapper class for unrelocated line and inlinee line subsections, which
+/// require only relocation and type index remapping to add to the PDB.
+class UnrelocatedDebugSubsection : public DebugSubsection {
+public:
+  UnrelocatedDebugSubsection(DebugSubsectionKind k, SectionChunk *debugChunk,
+                             ArrayRef<uint8_t> subsec, uint32_t relocIndex)
+      : DebugSubsection(k), debugChunk(debugChunk), subsec(subsec),
+        relocIndex(relocIndex) {}
+
+  Error commit(BinaryStreamWriter &writer) const override;
+  uint32_t calculateSerializedSize() const override { return subsec.size(); }
+
+  SectionChunk *debugChunk;
+  ArrayRef<uint8_t> subsec;
+  uint32_t relocIndex;
+};
+} // namespace
+
+Error UnrelocatedDebugSubsection::commit(BinaryStreamWriter &writer) const {
+  std::vector<uint8_t> relocatedBytes(subsec.size());
+  uint32_t tmpRelocIndex = relocIndex;
+  debugChunk->writeAndRelocateSubsection(debugChunk->getContents(), subsec,
+                                         tmpRelocIndex, relocatedBytes.data());
+
+  // Remap type indices in inlinee line records in place. Skip the remapping if
+  // there is no type source info.
+  if (kind() == DebugSubsectionKind::InlineeLines &&
+      debugChunk->file->debugTypesObj) {
+    TpiSource *source = debugChunk->file->debugTypesObj;
+    DebugInlineeLinesSubsectionRef inlineeLines;
+    BinaryStreamReader storageReader(relocatedBytes, support::little);
+    exitOnErr(inlineeLines.initialize(storageReader));
+    for (const InlineeSourceLine &line : inlineeLines) {
+      TypeIndex &inlinee = *const_cast<TypeIndex *>(&line.Header->Inlinee);
+      if (!source->remapTypeIndex(inlinee, TiRefKind::IndexRef)) {
+        log("bad inlinee line record in " + debugChunk->file->getName() +
+            " with bad inlinee index 0x" + utohexstr(inlinee.getIndex()));
+      }
+    }
+  }
+
+  return writer.writeBytes(relocatedBytes);
+}
+
+void DebugSHandler::addUnrelocatedSubsection(SectionChunk *debugChunk,
+                                             const DebugSubsectionRecord &ss) {
+  ArrayRef<uint8_t> subsec;
+  BinaryStreamRef sr = ss.getRecordData();
+  cantFail(sr.readBytes(0, sr.getLength(), subsec));
+  advanceRelocIndex(debugChunk, subsec);
+  file.moduleDBI->addDebugSubsection(
+      std::make_shared<UnrelocatedDebugSubsection>(ss.kind(), debugChunk,
+                                                   subsec, nextRelocIndex));
+}
+
+void DebugSHandler::addFrameDataSubsection(SectionChunk *debugChunk,
+                                           const DebugSubsectionRecord &ss) {
+  // We need to re-write string table indices here, so save off all
+  // frame data subsections until we've processed the entire list of
+  // subsections so that we can be sure we have the string table.
+  ArrayRef<uint8_t> subsec;
+  BinaryStreamRef sr = ss.getRecordData();
+  cantFail(sr.readBytes(0, sr.getLength(), subsec));
+  advanceRelocIndex(debugChunk, subsec);
+  frameDataSubsecs.push_back({debugChunk, subsec, nextRelocIndex});
+}
+
 static Expected<StringRef>
 getFileName(const DebugStringTableSubsectionRef &strings,
             const DebugChecksumsSubsectionRef &checksums, uint32_t fileID) {
@@ -749,29 +908,14 @@ getFileName(const DebugStringTableSubsectionRef &strings,
   return strings.getString(offset);
 }
 
-void DebugSHandler::mergeInlineeLines(
-    const DebugSubsectionRecord &inlineeSubsection) {
-  DebugInlineeLinesSubsectionRef inlineeLines;
-  exitOnErr(inlineeLines.initialize(inlineeSubsection.getRecordData()));
-
-  // Remap type indices in inlinee line records in place.
-  for (const InlineeSourceLine &line : inlineeLines) {
-    TypeIndex &inlinee = *const_cast<TypeIndex *>(&line.Header->Inlinee);
-    ArrayRef<TypeIndex> typeOrItemMap =
-        indexMap->isTypeServerMap ? indexMap->ipiMap : indexMap->tpiMap;
-    if (!remapTypeIndex(inlinee, typeOrItemMap)) {
-      log("bad inlinee line record in " + file.getName() +
-          " with bad inlinee index 0x" + utohexstr(inlinee.getIndex()));
-    }
-  }
-
-  // Add the modified inlinee line subsection directly.
-  file.moduleDBI->addDebugSubsection(inlineeSubsection);
-}
-
 void DebugSHandler::finish() {
   pdb::DbiStreamBuilder &dbiBuilder = linker.builder.getDbiBuilder();
 
+  // If we found any symbol records for the module symbol stream, defer them.
+  if (moduleStreamSize > kSymbolStreamMagicSize)
+    file.moduleDBI->addUnmergedSymbols(&file, moduleStreamSize -
+                                                  kSymbolStreamMagicSize);
+
   // We should have seen all debug subsections across the entire object file now
   // which means that if a StringTable subsection and Checksums subsection were
   // present, now is the time to handle them.
@@ -780,26 +924,50 @@ void DebugSHandler::finish() {
       fatal(".debug$S sections with a checksums subsection must also contain a "
             "string table subsection");
 
-    if (!stringTableReferences.empty())
+    if (!stringTableFixups.empty())
       warn("No StringTable subsection was encountered, but there are string "
            "table references");
     return;
   }
 
-  // Rewrite string table indices in the Fpo Data and symbol records to refer to
-  // the global PDB string table instead of the object file string table.
-  for (DebugFrameDataSubsectionRef &fds : newFpoFrames) {
-    const ulittle32_t *reloc = fds.getRelocPtr();
+  // Handle FPO data. Each subsection begins with a single image base
+  // relocation, which is then added to the RvaStart of each frame data record
+  // when it is added to the PDB. The string table indices for the FPO program
+  // must also be rewritten to use the PDB string table.
+  for (const UnrelocatedFpoData &subsec : frameDataSubsecs) {
+    // Relocate the first four bytes of the subection and reinterpret them as a
+    // 32 bit integer.
+    SectionChunk *debugChunk = subsec.debugChunk;
+    ArrayRef<uint8_t> subsecData = subsec.subsecData;
+    uint32_t relocIndex = subsec.relocIndex;
+    auto unrelocatedRvaStart = subsecData.take_front(sizeof(uint32_t));
+    uint8_t relocatedRvaStart[sizeof(uint32_t)];
+    debugChunk->writeAndRelocateSubsection(debugChunk->getContents(),
+                                           unrelocatedRvaStart, relocIndex,
+                                           &relocatedRvaStart[0]);
+    uint32_t rvaStart;
+    memcpy(&rvaStart, &relocatedRvaStart[0], sizeof(uint32_t));
+
+    // Copy each frame data record, add in rvaStart, translate string table
+    // indices, and add the record to the PDB.
+    DebugFrameDataSubsectionRef fds;
+    BinaryStreamReader reader(subsecData, support::little);
+    exitOnErr(fds.initialize(reader));
     for (codeview::FrameData fd : fds) {
-      fd.RvaStart += *reloc;
+      fd.RvaStart += rvaStart;
       fd.FrameFunc =
           translateStringTableIndex(fd.FrameFunc, cvStrTab, linker.pdbStrTab);
       dbiBuilder.addNewFpoData(fd);
     }
   }
 
-  for (ulittle32_t *ref : stringTableReferences)
-    *ref = translateStringTableIndex(*ref, cvStrTab, linker.pdbStrTab);
+  // Translate the fixups and pass them off to the module builder so they will
+  // be applied during writing.
+  for (StringTableFixup &ref : stringTableFixups) {
+    ref.StrTabOffset =
+        translateStringTableIndex(ref.StrTabOffset, cvStrTab, linker.pdbStrTab);
+  }
+  file.moduleDBI->setStringTableFixups(std::move(stringTableFixups));
 
   // Make a new file checksum table that refers to offsets in the PDB-wide
   // string table. Generally the string table subsection appears after the
@@ -834,23 +1002,6 @@ static void warnUnusable(InputFile *f, Error e) {
     warn(msg);
 }
 
-const CVIndexMap *PDBLinker::mergeTypeRecords(TpiSource *source,
-                                              CVIndexMap *localMap) {
-  ScopedTimer t(typeMergingTimer);
-  // Before we can process symbol substreams from .debug$S, we need to process
-  // type information, file checksums, and the string table.  Add type info to
-  // the PDB first, so that we can get the map from object file type and item
-  // indices to PDB type and item indices.
-  Expected<const CVIndexMap *> r = source->mergeDebugT(&tMerger, localMap);
-
-  // If the .debug$T sections fail to merge, assume there is no debug info.
-  if (!r) {
-    warnUnusable(source->file, r.takeError());
-    return nullptr;
-  }
-  return *r;
-}
-
 // Allocate memory for a .debug$S / .debug$F section and relocate it.
 static ArrayRef<uint8_t> relocateDebugChunk(SectionChunk &debugChunk) {
   uint8_t *buffer = bAlloc.Allocate<uint8_t>(debugChunk.getSize());
@@ -860,12 +1011,17 @@ static ArrayRef<uint8_t> relocateDebugChunk(SectionChunk &debugChunk) {
   return makeArrayRef(buffer, debugChunk.getSize());
 }
 
-void PDBLinker::addDebugSymbols(ObjFile *file, const CVIndexMap *indexMap) {
+void PDBLinker::addDebugSymbols(TpiSource *source) {
+  // If this TpiSource doesn't have an object file, it must be from a type
+  // server PDB. Type server PDBs do not contain symbols, so stop here.
+  if (!source->file)
+    return;
+
   ScopedTimer t(symbolMergingTimer);
   pdb::DbiStreamBuilder &dbiBuilder = builder.getDbiBuilder();
-  DebugSHandler dsh(*this, *file, indexMap);
+  DebugSHandler dsh(*this, *source->file, source);
   // Now do all live .debug$S and .debug$F sections.
-  for (SectionChunk *debugChunk : file->getDebugChunks()) {
+  for (SectionChunk *debugChunk : source->file->getDebugChunks()) {
     if (!debugChunk->live || debugChunk->getSize() == 0)
       continue;
 
@@ -874,11 +1030,12 @@ void PDBLinker::addDebugSymbols(ObjFile *file, const CVIndexMap *indexMap) {
     if (!isDebugS && !isDebugF)
       continue;
 
-    ArrayRef<uint8_t> relocatedDebugContents = relocateDebugChunk(*debugChunk);
-
     if (isDebugS) {
-      dsh.handleDebugS(relocatedDebugContents);
+      dsh.handleDebugS(debugChunk);
     } else if (isDebugF) {
+      // Handle old FPO data .debug$F sections. These are relatively rare.
+      ArrayRef<uint8_t> relocatedDebugContents =
+          relocateDebugChunk(*debugChunk);
       FixedStreamArray<object::FpoData> fpoRecords;
       BinaryStreamReader reader(relocatedDebugContents, support::little);
       uint32_t count = relocatedDebugContents.size() / sizeof(object::FpoData);
@@ -899,7 +1056,7 @@ void PDBLinker::addDebugSymbols(ObjFile *file, const CVIndexMap *indexMap) {
 // path to the object into the PDB. If this is a plain object, we make its
 // path absolute. If it's an object in an archive, we make the archive path
 // absolute.
-static void createModuleDBI(pdb::PDBFileBuilder &builder, ObjFile *file) {
+void PDBLinker::createModuleDBI(ObjFile *file) {
   pdb::DbiStreamBuilder &dbiBuilder = builder.getDbiBuilder();
   SmallString<128> objName;
 
@@ -910,6 +1067,7 @@ static void createModuleDBI(pdb::PDBFileBuilder &builder, ObjFile *file) {
 
   file->moduleDBI = &exitOnErr(dbiBuilder.addModuleInfo(modName));
   file->moduleDBI->setObjFileName(objName);
+  file->moduleDBI->setMergeSymbolsCallback(this, &commitSymbolsForObject);
 
   ArrayRef<Chunk *> chunks = file->getChunks();
   uint32_t modi = file->moduleDBI->getModuleIndex();
@@ -925,13 +1083,28 @@ static void createModuleDBI(pdb::PDBFileBuilder &builder, ObjFile *file) {
 }
 
 void PDBLinker::addDebug(TpiSource *source) {
-  CVIndexMap localMap;
-  const CVIndexMap *indexMap = mergeTypeRecords(source, &localMap);
+  // Before we can process symbol substreams from .debug$S, we need to process
+  // type information, file checksums, and the string table. Add type info to
+  // the PDB first, so that we can get the map from object file type and item
+  // indices to PDB type and item indices.  If we are using ghashes, types have
+  // already been merged.
+  if (!config->debugGHashes) {
+    ScopedTimer t(typeMergingTimer);
+    if (Error e = source->mergeDebugT(&tMerger)) {
+      // If type merging failed, ignore the symbols.
+      warnUnusable(source->file, std::move(e));
+      return;
+    }
+  }
 
-  if (source->kind == TpiSource::PDB)
-    return; // No symbols in TypeServer PDBs
+  // If type merging failed, ignore the symbols.
+  Error typeError = std::move(source->typeMergingError);
+  if (typeError) {
+    warnUnusable(source->file, std::move(typeError));
+    return;
+  }
 
-  addDebugSymbols(source->file, indexMap);
+  addDebugSymbols(source);
 }
 
 static pdb::BulkPublic createPublic(Defined *def) {
@@ -961,38 +1134,41 @@ void PDBLinker::addObjectsToPDB() {
   ScopedTimer t1(addObjectsTimer);
 
   // Create module descriptors
-  for_each(ObjFile::instances,
-           [&](ObjFile *obj) { createModuleDBI(builder, obj); });
+  for_each(ObjFile::instances, [&](ObjFile *obj) { createModuleDBI(obj); });
 
-  // Merge OBJs that do not have debug types
-  for_each(ObjFile::instances, [&](ObjFile *obj) {
-    if (obj->debugTypesObj)
-      return;
-    // Even if there're no types, still merge non-symbol .Debug$S and .Debug$F
-    // sections
-    addDebugSymbols(obj, nullptr);
-  });
+  // Reorder dependency type sources to come first.
+  TpiSource::sortDependencies();
 
-  // Merge dependencies
-  TpiSource::forEachSource([&](TpiSource *source) {
-    if (source->isDependency())
-      addDebug(source);
-  });
+  // Merge type information from input files using global type hashing.
+  if (config->debugGHashes)
+    tMerger.mergeTypesWithGHash();
 
-  // Merge regular and dependent OBJs
-  TpiSource::forEachSource([&](TpiSource *source) {
-    if (!source->isDependency())
-      addDebug(source);
-  });
+  // Merge dependencies and then regular objects.
+  for_each(TpiSource::dependencySources,
+           [&](TpiSource *source) { addDebug(source); });
+  for_each(TpiSource::objectSources,
+           [&](TpiSource *source) { addDebug(source); });
 
   builder.getStringTableBuilder().setStrings(pdbStrTab);
   t1.stop();
 
   // Construct TPI and IPI stream contents.
   ScopedTimer t2(tpiStreamLayoutTimer);
-  addTypeInfo(builder.getTpiBuilder(), tMerger.getTypeTable());
-  addTypeInfo(builder.getIpiBuilder(), tMerger.getIDTable());
+  // Collect all the merged types.
+  if (config->debugGHashes) {
+    addGHashTypeInfo(builder);
+  } else {
+    addTypeInfo(builder.getTpiBuilder(), tMerger.getTypeTable());
+    addTypeInfo(builder.getIpiBuilder(), tMerger.getIDTable());
+  }
   t2.stop();
+
+  if (config->showSummary) {
+    for_each(TpiSource::instances, [&](TpiSource *source) {
+      nbTypeRecords += source->nbTypeRecords;
+      nbTypeRecordsBytes += source->nbTypeRecordsBytes;
+    });
+  }
 }
 
 void PDBLinker::addPublicsToPDB() {
@@ -1032,8 +1208,10 @@ void PDBLinker::printStats() {
         "Input OBJ files (expanded from all cmd-line inputs)");
   print(TpiSource::countTypeServerPDBs(), "PDB type server dependencies");
   print(TpiSource::countPrecompObjs(), "Precomp OBJ dependencies");
-  print(tMerger.getTypeTable().size() + tMerger.getIDTable().size(),
-        "Merged TPI records");
+  print(nbTypeRecords, "Input type records");
+  print(nbTypeRecordsBytes, "Input type records bytes");
+  print(builder.getTpiBuilder().getRecordCount(), "Merged TPI records");
+  print(builder.getIpiBuilder().getRecordCount(), "Merged IPI records");
   print(pdbStrTab.size(), "Output PDB strings");
   print(globalSymbols, "Global symbol records");
   print(moduleSymbols, "Module symbol records");
@@ -1085,8 +1263,11 @@ void PDBLinker::printStats() {
     }
   };
 
-  printLargeInputTypeRecs("TPI", tMerger.tpiCounts, tMerger.getTypeTable());
-  printLargeInputTypeRecs("IPI", tMerger.ipiCounts, tMerger.getIDTable());
+  if (!config->debugGHashes) {
+    // FIXME: Reimplement for ghash.
+    printLargeInputTypeRecs("TPI", tMerger.tpiCounts, tMerger.getTypeTable());
+    printLargeInputTypeRecs("IPI", tMerger.ipiCounts, tMerger.getIDTable());
+  }
 
   message(buffer);
 }
@@ -1336,16 +1517,18 @@ void PDBLinker::addImportFilesToPDB(ArrayRef<OutputSection *> outputSections) {
     mod->addSymbol(codeview::SymbolSerializer::writeOneSymbol(
         cs, bAlloc, CodeViewContainer::Pdb));
 
-    SmallVector<SymbolScope, 4> scopes;
     CVSymbol newSym = codeview::SymbolSerializer::writeOneSymbol(
         ts, bAlloc, CodeViewContainer::Pdb);
-    scopeStackOpen(scopes, mod->getNextSymbolOffset(), newSym);
+
+    // Write ptrEnd for the S_THUNK32.
+    ScopeRecord *thunkSymScope =
+        getSymbolScopeFields(const_cast<uint8_t *>(newSym.data().data()));
 
     mod->addSymbol(newSym);
 
     newSym = codeview::SymbolSerializer::writeOneSymbol(es, bAlloc,
                                                         CodeViewContainer::Pdb);
-    scopeStackClose(scopes, mod->getNextSymbolOffset(), file);
+    thunkSymScope->ptrEnd = mod->getNextSymbolOffset();
 
     mod->addSymbol(newSym);
 
diff --git a/contrib/llvm-project/lld/COFF/PDB.h b/contrib/llvm-project/lld/COFF/PDB.h
index 273609ea788c..53506d40baef 100644
--- a/contrib/llvm-project/lld/COFF/PDB.h
+++ b/contrib/llvm-project/lld/COFF/PDB.h
@@ -20,6 +20,8 @@ union DebugInfo;
 }
 
 namespace lld {
+class Timer;
+
 namespace coff {
 class OutputSection;
 class SectionChunk;
@@ -32,6 +34,10 @@ void createPDB(SymbolTable *symtab,
 
 llvm::Optional<std::pair<llvm::StringRef, uint32_t>>
 getFileLineCodeView(const SectionChunk *c, uint32_t addr);
+
+extern Timer loadGHashTimer;
+extern Timer mergeGHashTimer;
+
 } // namespace coff
 } // namespace lld
 
diff --git a/contrib/llvm-project/lld/COFF/SymbolTable.cpp b/contrib/llvm-project/lld/COFF/SymbolTable.cpp
index 173e32f628ef..024a408ca454 100644
--- a/contrib/llvm-project/lld/COFF/SymbolTable.cpp
+++ b/contrib/llvm-project/lld/COFF/SymbolTable.cpp
@@ -390,7 +390,7 @@ void SymbolTable::reportUnresolvable() {
   for (auto &i : symMap) {
     Symbol *sym = i.second;
     auto *undef = dyn_cast<Undefined>(sym);
-    if (!undef)
+    if (!undef || sym->deferUndefined)
       continue;
     if (undef->getWeakAlias())
       continue;
@@ -402,7 +402,7 @@ void SymbolTable::reportUnresolvable() {
     }
     if (name.contains("_PchSym_"))
       continue;
-    if (config->mingw && impSymbol(name))
+    if (config->autoImport && impSymbol(name))
       continue;
     undefs.insert(sym);
   }
@@ -482,6 +482,7 @@ std::pair<Symbol *, bool> SymbolTable::insert(StringRef name) {
     sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
     sym->isUsedInRegularObj = false;
     sym->pendingArchiveLoad = false;
+    sym->canInline = true;
     inserted = true;
   }
   return {sym, inserted};
diff --git a/contrib/llvm-project/lld/COFF/Symbols.h b/contrib/llvm-project/lld/COFF/Symbols.h
index 1da4df366966..13e7488d6b87 100644
--- a/contrib/llvm-project/lld/COFF/Symbols.h
+++ b/contrib/llvm-project/lld/COFF/Symbols.h
@@ -103,8 +103,8 @@ protected:
   explicit Symbol(Kind k, StringRef n = "")
       : symbolKind(k), isExternal(true), isCOMDAT(false),
         writtenToSymtab(false), pendingArchiveLoad(false), isGCRoot(false),
-        isRuntimePseudoReloc(false), nameSize(n.size()),
-        nameData(n.empty() ? nullptr : n.data()) {}
+        isRuntimePseudoReloc(false), deferUndefined(false), canInline(true),
+        nameSize(n.size()), nameData(n.empty() ? nullptr : n.data()) {}
 
   const unsigned symbolKind : 8;
   unsigned isExternal : 1;
@@ -130,6 +130,16 @@ public:
 
   unsigned isRuntimePseudoReloc : 1;
 
+  // True if we want to allow this symbol to be undefined in the early
+  // undefined check pass in SymbolTable::reportUnresolvable(), as it
+  // might be fixed up later.
+  unsigned deferUndefined : 1;
+
+  // False if LTO shouldn't inline whatever this symbol points to. If a symbol
+  // is overwritten after LTO, LTO shouldn't inline the symbol because it
+  // doesn't know the final contents of the symbol.
+  unsigned canInline : 1;
+
 protected:
   // Symbol name length. Assume symbol lengths fit in a 32-bit integer.
   uint32_t nameSize;
@@ -343,6 +353,13 @@ public:
   uint16_t getOrdinal() { return file->hdr->OrdinalHint; }
 
   ImportFile *file;
+
+  // This is a pointer to the synthetic symbol associated with the load thunk
+  // for this symbol that will be called if the DLL is delay-loaded. This is
+  // needed for Control Flow Guard because if this DefinedImportData symbol is a
+  // valid call target, the corresponding load thunk must also be marked as a
+  // valid call target.
+  DefinedSynthetic *loadThunkSym = nullptr;
 };
 
 // This class represents a symbol for a jump table entry which jumps
@@ -461,7 +478,9 @@ void replaceSymbol(Symbol *s, ArgT &&... arg) {
                 "SymbolUnion not aligned enough");
   assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr &&
          "Not a Symbol");
+  bool canInline = s->canInline;
   new (s) T(std::forward<ArgT>(arg)...);
+  s->canInline = canInline;
 }
 } // namespace coff
 
diff --git a/contrib/llvm-project/lld/COFF/TypeMerger.h b/contrib/llvm-project/lld/COFF/TypeMerger.h
index 858f55b6856d..72fd5fc72b01 100644
--- a/contrib/llvm-project/lld/COFF/TypeMerger.h
+++ b/contrib/llvm-project/lld/COFF/TypeMerger.h
@@ -10,60 +10,57 @@
 #define LLD_COFF_TYPEMERGER_H
 
 #include "Config.h"
-#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h"
 #include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h"
+#include "llvm/DebugInfo/CodeView/TypeHashing.h"
 #include "llvm/Support/Allocator.h"
+#include <atomic>
 
 namespace lld {
 namespace coff {
 
+using llvm::codeview::GloballyHashedType;
+using llvm::codeview::TypeIndex;
+
+struct GHashState;
+
 class TypeMerger {
 public:
-  TypeMerger(llvm::BumpPtrAllocator &alloc)
-      : typeTable(alloc), idTable(alloc), globalTypeTable(alloc),
-        globalIDTable(alloc) {}
+  TypeMerger(llvm::BumpPtrAllocator &alloc);
+
+  ~TypeMerger();
 
   /// Get the type table or the global type table if /DEBUG:GHASH is enabled.
   inline llvm::codeview::TypeCollection &getTypeTable() {
-    if (config->debugGHashes)
-      return globalTypeTable;
+    assert(!config->debugGHashes);
     return typeTable;
   }
 
   /// Get the ID table or the global ID table if /DEBUG:GHASH is enabled.
   inline llvm::codeview::TypeCollection &getIDTable() {
-    if (config->debugGHashes)
-      return globalIDTable;
+    assert(!config->debugGHashes);
     return idTable;
   }
 
+  /// Use global hashes to eliminate duplicate types and identify unique type
+  /// indices in each TpiSource.
+  void mergeTypesWithGHash();
+
+  /// Map from PDB function id type indexes to PDB function type indexes.
+  /// Populated after mergeTypesWithGHash.
+  llvm::DenseMap<TypeIndex, TypeIndex> funcIdToType;
+
   /// Type records that will go into the PDB TPI stream.
   llvm::codeview::MergingTypeTableBuilder typeTable;
 
   /// Item records that will go into the PDB IPI stream.
   llvm::codeview::MergingTypeTableBuilder idTable;
 
-  /// Type records that will go into the PDB TPI stream (for /DEBUG:GHASH)
-  llvm::codeview::GlobalTypeTableBuilder globalTypeTable;
-
-  /// Item records that will go into the PDB IPI stream (for /DEBUG:GHASH)
-  llvm::codeview::GlobalTypeTableBuilder globalIDTable;
-
   // When showSummary is enabled, these are histograms of TPI and IPI records
   // keyed by type index.
   SmallVector<uint32_t, 0> tpiCounts;
   SmallVector<uint32_t, 0> ipiCounts;
 };
 
-/// Map from type index and item index in a type server PDB to the
-/// corresponding index in the destination PDB.
-struct CVIndexMap {
-  llvm::SmallVector<llvm::codeview::TypeIndex, 0> tpiMap;
-  llvm::SmallVector<llvm::codeview::TypeIndex, 0> ipiMap;
-  bool isTypeServerMap = false;
-  bool isPrecompiledTypeMap = false;
-};
-
 } // namespace coff
 } // namespace lld
 
diff --git a/contrib/llvm-project/lld/COFF/Writer.cpp b/contrib/llvm-project/lld/COFF/Writer.cpp
index 0188f0971a75..5e8b8a624c3b 100644
--- a/contrib/llvm-project/lld/COFF/Writer.cpp
+++ b/contrib/llvm-project/lld/COFF/Writer.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "Writer.h"
+#include "CallGraphSort.h"
 #include "Config.h"
 #include "DLL.h"
 #include "InputFiles.h"
@@ -87,6 +88,8 @@ OutputSection *Chunk::getOutputSection() const {
   return osidx == 0 ? nullptr : outputSections[osidx - 1];
 }
 
+void OutputSection::clear() { outputSections.clear(); }
+
 namespace {
 
 class DebugDirectoryChunk : public NonSectionChunk {
@@ -224,16 +227,21 @@ private:
   void markSymbolsForRVATable(ObjFile *file,
                               ArrayRef<SectionChunk *> symIdxChunks,
                               SymbolRVASet &tableSymbols);
+  void getSymbolsFromSections(ObjFile *file,
+                              ArrayRef<SectionChunk *> symIdxChunks,
+                              std::vector<Symbol *> &symbols);
   void maybeAddRVATable(SymbolRVASet tableSymbols, StringRef tableSym,
                         StringRef countSym);
   void setSectionPermissions();
   void writeSections();
   void writeBuildId();
+  void sortSections();
   void sortExceptionTable();
   void sortCRTSectionChunks(std::vector<Chunk *> &chunks);
   void addSyntheticIdata();
   void fixPartialSectionChars(StringRef name, uint32_t chars);
   bool fixGnuImportChunks();
+  void fixTlsAlignment();
   PartialSection *createPartialSection(StringRef name, uint32_t outChars);
   PartialSection *findPartialSection(StringRef name, uint32_t outChars);
 
@@ -260,6 +268,7 @@ private:
   DelayLoadContents delayIdata;
   EdataContents edata;
   bool setNoSEHCharacteristic = false;
+  uint32_t tlsAlignment = 0;
 
   DebugDirectoryChunk *debugDirectory = nullptr;
   std::vector<std::pair<COFF::DebugType, Chunk *>> debugRecords;
@@ -604,8 +613,9 @@ void Writer::run() {
 
   createImportTables();
   createSections();
-  createMiscChunks();
   appendImportThunks();
+  // Import thunks must be added before the Control Flow Guard tables are added.
+  createMiscChunks();
   createExportTable();
   mergeSections();
   removeUnusedSections();
@@ -628,6 +638,11 @@ void Writer::run() {
   writeSections();
   sortExceptionTable();
 
+  // Fix up the alignment in the TLS Directory's characteristic field,
+  // if a specific alignment value is needed
+  if (tlsAlignment)
+    fixTlsAlignment();
+
   t1.stop();
 
   if (!config->pdbPath.empty() && config->debug) {
@@ -801,6 +816,19 @@ static bool shouldStripSectionSuffix(SectionChunk *sc, StringRef name) {
          name.startswith(".xdata$") || name.startswith(".eh_frame$");
 }
 
+void Writer::sortSections() {
+  if (!config->callGraphProfile.empty()) {
+    DenseMap<const SectionChunk *, int> order = computeCallGraphProfileOrder();
+    for (auto it : order) {
+      if (DefinedRegular *sym = it.first->sym)
+        config->order[sym->getName()] = it.second;
+    }
+  }
+  if (!config->order.empty())
+    for (auto it : partialSections)
+      sortBySectionOrder(it.second->chunks);
+}
+
 // Create output section objects and add them to OutputSections.
 void Writer::createSections() {
   // First, create the builtin sections.
@@ -848,6 +876,10 @@ void Writer::createSections() {
     StringRef name = c->getSectionName();
     if (shouldStripSectionSuffix(sc, name))
       name = name.split('$').first;
+
+    if (name.startswith(".tls"))
+      tlsAlignment = std::max(tlsAlignment, c->getAlignment());
+
     PartialSection *pSec = createPartialSection(name,
                                                 c->getOutputCharacteristics());
     pSec->chunks.push_back(c);
@@ -864,10 +896,7 @@ void Writer::createSections() {
   if (hasIdata)
     addSyntheticIdata();
 
-  // Process an /order option.
-  if (!config->order.empty())
-    for (auto it : partialSections)
-      sortBySectionOrder(it.second->chunks);
+  sortSections();
 
   if (hasIdata)
     locateImportTables();
@@ -952,16 +981,15 @@ void Writer::createMiscChunks() {
   }
 
   if (config->cetCompat) {
-    ExtendedDllCharacteristicsChunk *extendedDllChars =
-        make<ExtendedDllCharacteristicsChunk>(
-            IMAGE_DLL_CHARACTERISTICS_EX_CET_COMPAT);
-    debugRecords.push_back(
-        {COFF::IMAGE_DEBUG_TYPE_EX_DLLCHARACTERISTICS, extendedDllChars});
+    debugRecords.push_back({COFF::IMAGE_DEBUG_TYPE_EX_DLLCHARACTERISTICS,
+                            make<ExtendedDllCharacteristicsChunk>(
+                                IMAGE_DLL_CHARACTERISTICS_EX_CET_COMPAT)});
   }
 
-  if (debugRecords.size() > 0) {
-    for (std::pair<COFF::DebugType, Chunk *> r : debugRecords)
-      debugInfoSec->addChunk(r.second);
+  // Align and add each chunk referenced by the debug data directory.
+  for (std::pair<COFF::DebugType, Chunk *> r : debugRecords) {
+    r.second->setAlignment(4);
+    debugInfoSec->addChunk(r.second);
   }
 
   // Create SEH table. x86-only.
@@ -1362,8 +1390,8 @@ template <typename PEHeaderTy> void Writer::writeHeader() {
   pe->MinorImageVersion = config->minorImageVersion;
   pe->MajorOperatingSystemVersion = config->majorOSVersion;
   pe->MinorOperatingSystemVersion = config->minorOSVersion;
-  pe->MajorSubsystemVersion = config->majorOSVersion;
-  pe->MinorSubsystemVersion = config->minorOSVersion;
+  pe->MajorSubsystemVersion = config->majorSubsystemVersion;
+  pe->MinorSubsystemVersion = config->minorSubsystemVersion;
   pe->Subsystem = config->subsystem;
   pe->SizeOfImage = sizeOfImage;
   pe->SizeOfHeaders = sizeOfHeaders;
@@ -1607,6 +1635,8 @@ static void markSymbolsWithRelocations(ObjFile *file,
 // table.
 void Writer::createGuardCFTables() {
   SymbolRVASet addressTakenSyms;
+  SymbolRVASet giatsRVASet;
+  std::vector<Symbol *> giatsSymbols;
   SymbolRVASet longJmpTargets;
   for (ObjFile *file : ObjFile::instances) {
     // If the object was compiled with /guard:cf, the address taken symbols
@@ -1616,6 +1646,8 @@ void Writer::createGuardCFTables() {
     // possibly address-taken.
     if (file->hasGuardCF()) {
       markSymbolsForRVATable(file, file->getGuardFidChunks(), addressTakenSyms);
+      markSymbolsForRVATable(file, file->getGuardIATChunks(), giatsRVASet);
+      getSymbolsFromSections(file, file->getGuardIATChunks(), giatsSymbols);
       markSymbolsForRVATable(file, file->getGuardLJmpChunks(), longJmpTargets);
     } else {
       markSymbolsWithRelocations(file, addressTakenSyms);
@@ -1630,6 +1662,16 @@ void Writer::createGuardCFTables() {
   for (Export &e : config->exports)
     maybeAddAddressTakenFunction(addressTakenSyms, e.sym);
 
+  // For each entry in the .giats table, check if it has a corresponding load
+  // thunk (e.g. because the DLL that defines it will be delay-loaded) and, if
+  // so, add the load thunk to the address taken (.gfids) table.
+  for (Symbol *s : giatsSymbols) {
+    if (auto *di = dyn_cast<DefinedImportData>(s)) {
+      if (di->loadThunkSym)
+        addSymbolToRVASet(addressTakenSyms, di->loadThunkSym);
+    }
+  }
+
   // Ensure sections referenced in the gfid table are 16-byte aligned.
   for (const ChunkAndOffset &c : addressTakenSyms)
     if (c.inputChunk->getAlignment() < 16)
@@ -1638,6 +1680,10 @@ void Writer::createGuardCFTables() {
   maybeAddRVATable(std::move(addressTakenSyms), "__guard_fids_table",
                    "__guard_fids_count");
 
+  // Add the Guard Address Taken IAT Entry Table (.giats).
+  maybeAddRVATable(std::move(giatsRVASet), "__guard_iat_table",
+                   "__guard_iat_count");
+
   // Add the longjmp target table unless the user told us not to.
   if (config->guardCF == GuardCFLevel::Full)
     maybeAddRVATable(std::move(longJmpTargets), "__guard_longjmp_table",
@@ -1654,11 +1700,11 @@ void Writer::createGuardCFTables() {
 }
 
 // Take a list of input sections containing symbol table indices and add those
-// symbols to an RVA table. The challenge is that symbol RVAs are not known and
+// symbols to a vector. The challenge is that symbol RVAs are not known and
 // depend on the table size, so we can't directly build a set of integers.
-void Writer::markSymbolsForRVATable(ObjFile *file,
+void Writer::getSymbolsFromSections(ObjFile *file,
                                     ArrayRef<SectionChunk *> symIdxChunks,
-                                    SymbolRVASet &tableSymbols) {
+                                    std::vector<Symbol *> &symbols) {
   for (SectionChunk *c : symIdxChunks) {
     // Skip sections discarded by linker GC. This comes up when a .gfids section
     // is associated with something like a vtable and the vtable is discarded.
@@ -1676,7 +1722,7 @@ void Writer::markSymbolsForRVATable(ObjFile *file,
     }
 
     // Read each symbol table index and check if that symbol was included in the
-    // final link. If so, add it to the table symbol set.
+    // final link. If so, add it to the vector of symbols.
     ArrayRef<ulittle32_t> symIndices(
         reinterpret_cast<const ulittle32_t *>(data.data()), data.size() / 4);
     ArrayRef<Symbol *> objSymbols = file->getSymbols();
@@ -1688,12 +1734,24 @@ void Writer::markSymbolsForRVATable(ObjFile *file,
       }
       if (Symbol *s = objSymbols[symIndex]) {
         if (s->isLive())
-          addSymbolToRVASet(tableSymbols, cast<Defined>(s));
+          symbols.push_back(cast<Symbol>(s));
       }
     }
   }
 }
 
+// Take a list of input sections containing symbol table indices and add those
+// symbols to an RVA table.
+void Writer::markSymbolsForRVATable(ObjFile *file,
+                                    ArrayRef<SectionChunk *> symIdxChunks,
+                                    SymbolRVASet &tableSymbols) {
+  std::vector<Symbol *> syms;
+  getSymbolsFromSections(file, symIdxChunks, syms);
+
+  for (Symbol *s : syms)
+    addSymbolToRVASet(tableSymbols, cast<Defined>(s));
+}
+
 // Replace the absolute table symbol with a synthetic symbol pointing to
 // tableChunk so that we can emit base relocations for it and resolve section
 // relative relocations.
@@ -1993,3 +2051,33 @@ PartialSection *Writer::findPartialSection(StringRef name, uint32_t outChars) {
     return it->second;
   return nullptr;
 }
+
+void Writer::fixTlsAlignment() {
+  Defined *tlsSym =
+      dyn_cast_or_null<Defined>(symtab->findUnderscore("_tls_used"));
+  if (!tlsSym)
+    return;
+
+  OutputSection *sec = tlsSym->getChunk()->getOutputSection();
+  assert(sec && tlsSym->getRVA() >= sec->getRVA() &&
+         "no output section for _tls_used");
+
+  uint8_t *secBuf = buffer->getBufferStart() + sec->getFileOff();
+  uint64_t tlsOffset = tlsSym->getRVA() - sec->getRVA();
+  uint64_t directorySize = config->is64()
+                               ? sizeof(object::coff_tls_directory64)
+                               : sizeof(object::coff_tls_directory32);
+
+  if (tlsOffset + directorySize > sec->getRawSize())
+    fatal("_tls_used sym is malformed");
+
+  if (config->is64()) {
+    object::coff_tls_directory64 *tlsDir =
+        reinterpret_cast<object::coff_tls_directory64 *>(&secBuf[tlsOffset]);
+    tlsDir->setAlignment(tlsAlignment);
+  } else {
+    object::coff_tls_directory32 *tlsDir =
+        reinterpret_cast<object::coff_tls_directory32 *>(&secBuf[tlsOffset]);
+    tlsDir->setAlignment(tlsAlignment);
+  }
+}
diff --git a/contrib/llvm-project/lld/COFF/Writer.h b/contrib/llvm-project/lld/COFF/Writer.h
index 96389df2ac0a..2bb26da7d428 100644
--- a/contrib/llvm-project/lld/COFF/Writer.h
+++ b/contrib/llvm-project/lld/COFF/Writer.h
@@ -50,6 +50,9 @@ public:
   void writeHeaderTo(uint8_t *buf);
   void addContributingPartialSection(PartialSection *sec);
 
+  // Clear the output sections static container.
+  static void clear();
+
   // Returns the size of this section in an executable memory image.
   // This may be smaller than the raw size (the raw size is multiple
   // of disk sector size, so there may be padding at end), or may be