1 files changed, 849 insertions, 249 deletions
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 3d4d98b51606..a4fb9035193c 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -53,6 +53,7 @@
 #include "OutputSegment.h"
 #include "SymbolTable.h"
 #include "Symbols.h"
+#include "SyntheticSections.h"
 #include "Target.h"
 
 #include "lld/Common/DWARF.h"
@@ -66,6 +67,8 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/TarWriter.h"
+#include "llvm/TextAPI/Architecture.h"
+#include "llvm/TextAPI/InterfaceFile.h"
 
 using namespace llvm;
 using namespace llvm::MachO;
@@ -78,22 +81,102 @@ using namespace lld::macho;
 std::string lld::toString(const InputFile *f) {
   if (!f)
     return "<internal>";
+
+  // Multiple dylibs can be defined in one .tbd file.
+  if (auto dylibFile = dyn_cast<DylibFile>(f))
+    if (f->getName().endswith(".tbd"))
+      return (f->getName() + "(" + dylibFile->installName + ")").str();
+
   if (f->archiveName.empty())
     return std::string(f->getName());
-  return (path::filename(f->archiveName) + "(" + path::filename(f->getName()) +
-          ")")
-      .str();
+  return (f->archiveName + "(" + path::filename(f->getName()) + ")").str();
 }
 
 SetVector<InputFile *> macho::inputFiles;
 std::unique_ptr<TarWriter> macho::tar;
 int InputFile::idCount = 0;
 
+static VersionTuple decodeVersion(uint32_t version) {
+  unsigned major = version >> 16;
+  unsigned minor = (version >> 8) & 0xffu;
+  unsigned subMinor = version & 0xffu;
+  return VersionTuple(major, minor, subMinor);
+}
+
+static std::vector<PlatformInfo> getPlatformInfos(const InputFile *input) {
+  if (!isa<ObjFile>(input) && !isa<DylibFile>(input))
+    return {};
+
+  const char *hdr = input->mb.getBufferStart();
+
+  std::vector<PlatformInfo> platformInfos;
+  for (auto *cmd : findCommands<build_version_command>(hdr, LC_BUILD_VERSION)) {
+    PlatformInfo info;
+    info.target.Platform = static_cast<PlatformKind>(cmd->platform);
+    info.minimum = decodeVersion(cmd->minos);
+    platformInfos.emplace_back(std::move(info));
+  }
+  for (auto *cmd : findCommands<version_min_command>(
+           hdr, LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS,
+           LC_VERSION_MIN_TVOS, LC_VERSION_MIN_WATCHOS)) {
+    PlatformInfo info;
+    switch (cmd->cmd) {
+    case LC_VERSION_MIN_MACOSX:
+      info.target.Platform = PlatformKind::macOS;
+      break;
+    case LC_VERSION_MIN_IPHONEOS:
+      info.target.Platform = PlatformKind::iOS;
+      break;
+    case LC_VERSION_MIN_TVOS:
+      info.target.Platform = PlatformKind::tvOS;
+      break;
+    case LC_VERSION_MIN_WATCHOS:
+      info.target.Platform = PlatformKind::watchOS;
+      break;
+    }
+    info.minimum = decodeVersion(cmd->version);
+    platformInfos.emplace_back(std::move(info));
+  }
+
+  return platformInfos;
+}
+
+static bool checkCompatibility(const InputFile *input) {
+  std::vector<PlatformInfo> platformInfos = getPlatformInfos(input);
+  if (platformInfos.empty())
+    return true;
+
+  auto it = find_if(platformInfos, [&](const PlatformInfo &info) {
+    return removeSimulator(info.target.Platform) ==
+           removeSimulator(config->platform());
+  });
+  if (it == platformInfos.end()) {
+    std::string platformNames;
+    raw_string_ostream os(platformNames);
+    interleave(
+        platformInfos, os,
+        [&](const PlatformInfo &info) {
+          os << getPlatformName(info.target.Platform);
+        },
+        "/");
+    error(toString(input) + " has platform " + platformNames +
+          Twine(", which is different from target platform ") +
+          getPlatformName(config->platform()));
+    return false;
+  }
+
+  if (it->minimum > config->platformInfo.minimum)
+    warn(toString(input) + " has version " + it->minimum.getAsString() +
+         ", which is newer than target minimum of " +
+         config->platformInfo.minimum.getAsString());
+
+  return true;
+}
+
 // Open a given file path and return it as a memory-mapped file.
 Optional<MemoryBufferRef> macho::readFile(StringRef path) {
-  // Open a file.
-  auto mbOrErr = MemoryBuffer::getFile(path);
-  if (auto ec = mbOrErr.getError()) {
+  ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr = MemoryBuffer::getFile(path);
+  if (std::error_code ec = mbOrErr.getError()) {
     error("cannot open " + path + ": " + ec.message());
     return None;
   }
@@ -104,18 +187,18 @@ Optional<MemoryBufferRef> macho::readFile(StringRef path) {
 
   // If this is a regular non-fat file, return it.
   const char *buf = mbref.getBufferStart();
-  auto *hdr = reinterpret_cast<const MachO::fat_header *>(buf);
-  if (read32be(&hdr->magic) != MachO::FAT_MAGIC) {
+  const auto *hdr = reinterpret_cast<const fat_header *>(buf);
+  if (mbref.getBufferSize() < sizeof(uint32_t) ||
+      read32be(&hdr->magic) != FAT_MAGIC) {
     if (tar)
       tar->append(relativeToRoot(path), mbref.getBuffer());
     return mbref;
   }
 
-  // Object files and archive files may be fat files, which contains
-  // multiple real files for different CPU ISAs. Here, we search for a
-  // file that matches with the current link target and returns it as
-  // a MemoryBufferRef.
-  auto *arch = reinterpret_cast<const MachO::fat_arch *>(buf + sizeof(*hdr));
+  // Object files and archive files may be fat files, which contain multiple
+  // real files for different CPU ISAs. Here, we search for a file that matches
+  // with the current link target and returns it as a MemoryBufferRef.
+  const auto *arch = reinterpret_cast<const fat_arch *>(buf + sizeof(*hdr));
 
   for (uint32_t i = 0, n = read32be(&hdr->nfat_arch); i < n; ++i) {
     if (reinterpret_cast<const char *>(arch + i + 1) >
@@ -124,7 +207,7 @@ Optional<MemoryBufferRef> macho::readFile(StringRef path) {
       return None;
     }
 
-    if (read32be(&arch[i].cputype) != target->cpuType ||
+    if (read32be(&arch[i].cputype) != static_cast<uint32_t>(target->cpuType) ||
         read32be(&arch[i].cpusubtype) != target->cpuSubtype)
       continue;
 
@@ -141,50 +224,75 @@ Optional<MemoryBufferRef> macho::readFile(StringRef path) {
   return None;
 }
 
-const load_command *macho::findCommand(const mach_header_64 *hdr,
-                                       uint32_t type) {
-  const uint8_t *p =
-      reinterpret_cast<const uint8_t *>(hdr) + sizeof(mach_header_64);
-
-  for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) {
-    auto *cmd = reinterpret_cast<const load_command *>(p);
-    if (cmd->cmd == type)
-      return cmd;
-    p += cmd->cmdsize;
-  }
-  return nullptr;
-}
+InputFile::InputFile(Kind kind, const InterfaceFile &interface)
+    : id(idCount++), fileKind(kind), name(saver.save(interface.getPath())) {}
 
-void ObjFile::parseSections(ArrayRef<section_64> sections) {
+template <class Section>
+void ObjFile::parseSections(ArrayRef<Section> sections) {
   subsections.reserve(sections.size());
   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
 
-  for (const section_64 &sec : sections) {
-    InputSection *isec = make<InputSection>();
-    isec->file = this;
-    isec->name =
+  for (const Section &sec : sections) {
+    StringRef name =
         StringRef(sec.sectname, strnlen(sec.sectname, sizeof(sec.sectname)));
-    isec->segname =
+    StringRef segname =
         StringRef(sec.segname, strnlen(sec.segname, sizeof(sec.segname)));
-    isec->data = {isZeroFill(sec.flags) ? nullptr : buf + sec.offset,
-                  static_cast<size_t>(sec.size)};
-    if (sec.align >= 32)
-      error("alignment " + std::to_string(sec.align) + " of section " +
-            isec->name + " is too large");
-    else
-      isec->align = 1 << sec.align;
-    isec->flags = sec.flags;
-
-    if (!(isDebugSection(isec->flags) &&
-          isec->segname == segment_names::dwarf)) {
+    ArrayRef<uint8_t> data = {isZeroFill(sec.flags) ? nullptr
+                                                    : buf + sec.offset,
+                              static_cast<size_t>(sec.size)};
+    if (sec.align >= 32) {
+      error("alignment " + std::to_string(sec.align) + " of section " + name +
+            " is too large");
+      subsections.push_back({});
+      continue;
+    }
+    uint32_t align = 1 << sec.align;
+    uint32_t flags = sec.flags;
+
+    if (sectionType(sec.flags) == S_CSTRING_LITERALS ||
+        (config->dedupLiterals && isWordLiteralSection(sec.flags))) {
+      if (sec.nreloc && config->dedupLiterals)
+        fatal(toString(this) + " contains relocations in " + sec.segname + "," +
+              sec.sectname +
+              ", so LLD cannot deduplicate literals. Try re-running without "
+              "--deduplicate-literals.");
+
+      InputSection *isec;
+      if (sectionType(sec.flags) == S_CSTRING_LITERALS) {
+        isec =
+            make<CStringInputSection>(segname, name, this, data, align, flags);
+        // FIXME: parallelize this?
+        cast<CStringInputSection>(isec)->splitIntoPieces();
+      } else {
+        isec = make<WordLiteralInputSection>(segname, name, this, data, align,
+                                             flags);
+      }
       subsections.push_back({{0, isec}});
-    } else {
-      // Instead of emitting DWARF sections, we emit STABS symbols to the
-      // object files that contain them. We filter them out early to avoid
-      // parsing their relocations unnecessarily. But we must still push an
-      // empty map to ensure the indices line up for the remaining sections.
+    } else if (config->icfLevel != ICFLevel::none &&
+               (name == section_names::cfString &&
+                segname == segment_names::data)) {
+      uint64_t literalSize = target->wordSize == 8 ? 32 : 16;
       subsections.push_back({});
-      debugSections.push_back(isec);
+      SubsectionMap &subsecMap = subsections.back();
+      for (uint64_t off = 0; off < data.size(); off += literalSize)
+        subsecMap.push_back(
+            {off, make<ConcatInputSection>(segname, name, this,
+                                           data.slice(off, literalSize), align,
+                                           flags)});
+    } else {
+      auto *isec =
+          make<ConcatInputSection>(segname, name, this, data, align, flags);
+      if (!(isDebugSection(isec->getFlags()) &&
+            isec->getSegName() == segment_names::dwarf)) {
+        subsections.push_back({{0, isec}});
+      } else {
+        // Instead of emitting DWARF sections, we emit STABS symbols to the
+        // object files that contain them. We filter them out early to avoid
+        // parsing their relocations unnecessarily. But we must still push an
+        // empty map to ensure the indices line up for the remaining sections.
+        subsections.push_back({});
+        debugSections.push_back(isec);
+      }
     }
   }
 }
@@ -197,18 +305,55 @@ void ObjFile::parseSections(ArrayRef<section_64> sections) {
 // same location as an offset relative to the start of the containing
 // subsection.
 static InputSection *findContainingSubsection(SubsectionMap &map,
-                                              uint32_t *offset) {
-  auto it = std::prev(map.upper_bound(*offset));
-  *offset -= it->first;
-  return it->second;
+                                              uint64_t *offset) {
+  auto it = std::prev(llvm::upper_bound(
+      map, *offset, [](uint64_t value, SubsectionEntry subsecEntry) {
+        return value < subsecEntry.offset;
+      }));
+  *offset -= it->offset;
+  return it->isec;
+}
+
+template <class Section>
+static bool validateRelocationInfo(InputFile *file, const Section &sec,
+                                   relocation_info rel) {
+  const RelocAttrs &relocAttrs = target->getRelocAttrs(rel.r_type);
+  bool valid = true;
+  auto message = [relocAttrs, file, sec, rel, &valid](const Twine &diagnostic) {
+    valid = false;
+    return (relocAttrs.name + " relocation " + diagnostic + " at offset " +
+            std::to_string(rel.r_address) + " of " + sec.segname + "," +
+            sec.sectname + " in " + toString(file))
+        .str();
+  };
+
+  if (!relocAttrs.hasAttr(RelocAttrBits::LOCAL) && !rel.r_extern)
+    error(message("must be extern"));
+  if (relocAttrs.hasAttr(RelocAttrBits::PCREL) != rel.r_pcrel)
+    error(message(Twine("must ") + (rel.r_pcrel ? "not " : "") +
+                  "be PC-relative"));
+  if (isThreadLocalVariables(sec.flags) &&
+      !relocAttrs.hasAttr(RelocAttrBits::UNSIGNED))
+    error(message("not allowed in thread-local section, must be UNSIGNED"));
+  if (rel.r_length < 2 || rel.r_length > 3 ||
+      !relocAttrs.hasAttr(static_cast<RelocAttrBits>(1 << rel.r_length))) {
+    static SmallVector<StringRef, 4> widths{"0", "4", "8", "4 or 8"};
+    error(message("has width " + std::to_string(1 << rel.r_length) +
+                  " bytes, but must be " +
+                  widths[(static_cast<int>(relocAttrs.bits) >> 2) & 3] +
+                  " bytes"));
+  }
+  return valid;
 }
 
-void ObjFile::parseRelocations(const section_64 &sec,
-                               SubsectionMap &subsecMap) {
+template <class Section>
+void ObjFile::parseRelocations(ArrayRef<Section> sectionHeaders,
+                               const Section &sec, SubsectionMap &subsecMap) {
   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
   ArrayRef<relocation_info> relInfos(
       reinterpret_cast<const relocation_info *>(buf + sec.reloff), sec.nreloc);
 
+  auto subsecIt = subsecMap.rbegin();
   for (size_t i = 0; i < relInfos.size(); i++) {
     // Paired relocations serve as Mach-O's method for attaching a
     // supplemental datum to a primary relocation record. ELF does not
@@ -217,8 +362,8 @@ void ObjFile::parseRelocations(const section_64 &sec,
     //
     // The {X86_64,ARM64}_RELOC_SUBTRACTOR record holds the subtrahend,
     // and the paired *_RELOC_UNSIGNED record holds the minuend. The
-    // datum for each is a symbolic address. The result is the runtime
-    // offset between two addresses.
+    // datum for each is a symbolic address. The result is the offset
+    // between two addresses.
     //
     // The ARM64_RELOC_ADDEND record holds the addend, and the paired
     // ARM64_RELOC_BRANCH26 or ARM64_RELOC_PAGE21/PAGEOFF12 holds the
@@ -235,92 +380,199 @@ void ObjFile::parseRelocations(const section_64 &sec,
     // and insert them. Storing addends in the instruction stream is
     // possible, but inconvenient and more costly at link time.
 
-    relocation_info pairedInfo = relInfos[i];
-    relocation_info relInfo =
-        target->isPairedReloc(pairedInfo) ? relInfos[++i] : pairedInfo;
+    int64_t pairedAddend = 0;
+    relocation_info relInfo = relInfos[i];
+    if (target->hasAttr(relInfo.r_type, RelocAttrBits::ADDEND)) {
+      pairedAddend = SignExtend64<24>(relInfo.r_symbolnum);
+      relInfo = relInfos[++i];
+    }
     assert(i < relInfos.size());
+    if (!validateRelocationInfo(this, sec, relInfo))
+      continue;
     if (relInfo.r_address & R_SCATTERED)
       fatal("TODO: Scattered relocations not supported");
 
+    bool isSubtrahend =
+        target->hasAttr(relInfo.r_type, RelocAttrBits::SUBTRAHEND);
+    int64_t embeddedAddend = target->getEmbeddedAddend(mb, sec.offset, relInfo);
+    assert(!(embeddedAddend && pairedAddend));
+    int64_t totalAddend = pairedAddend + embeddedAddend;
     Reloc r;
     r.type = relInfo.r_type;
     r.pcrel = relInfo.r_pcrel;
     r.length = relInfo.r_length;
     r.offset = relInfo.r_address;
-    // For unpaired relocs, pairdInfo (just a copy of relInfo) is ignored
-    uint64_t rawAddend = target->getAddend(mb, sec, relInfo, pairedInfo);
     if (relInfo.r_extern) {
       r.referent = symbols[relInfo.r_symbolnum];
-      r.addend = rawAddend;
+      r.addend = isSubtrahend ? 0 : totalAddend;
     } else {
-      SubsectionMap &referentSubsecMap = subsections[relInfo.r_symbolnum - 1];
-      const section_64 &referentSec = sectionHeaders[relInfo.r_symbolnum - 1];
-      uint32_t referentOffset;
+      assert(!isSubtrahend);
+      const Section &referentSec = sectionHeaders[relInfo.r_symbolnum - 1];
+      uint64_t referentOffset;
       if (relInfo.r_pcrel) {
         // The implicit addend for pcrel section relocations is the pcrel offset
         // in terms of the addresses in the input file. Here we adjust it so
         // that it describes the offset from the start of the referent section.
-        // TODO: The offset of 4 is probably not right for ARM64, nor for
-        //       relocations with r_length != 2.
+        // FIXME This logic was written around x86_64 behavior -- ARM64 doesn't
+        // have pcrel section relocations. We may want to factor this out into
+        // the arch-specific .cpp file.
+        assert(target->hasAttr(r.type, RelocAttrBits::BYTE4));
         referentOffset =
-            sec.addr + relInfo.r_address + 4 + rawAddend - referentSec.addr;
+            sec.addr + relInfo.r_address + 4 + totalAddend - referentSec.addr;
       } else {
         // The addend for a non-pcrel relocation is its absolute address.
-        referentOffset = rawAddend - referentSec.addr;
+        referentOffset = totalAddend - referentSec.addr;
       }
+      SubsectionMap &referentSubsecMap = subsections[relInfo.r_symbolnum - 1];
       r.referent = findContainingSubsection(referentSubsecMap, &referentOffset);
       r.addend = referentOffset;
     }
 
-    InputSection *subsec = findContainingSubsection(subsecMap, &r.offset);
+    // Find the subsection that this relocation belongs to.
+    // Though not required by the Mach-O format, clang and gcc seem to emit
+    // relocations in order, so let's take advantage of it. However, ld64 emits
+    // unsorted relocations (in `-r` mode), so we have a fallback for that
+    // uncommon case.
+    InputSection *subsec;
+    while (subsecIt != subsecMap.rend() && subsecIt->offset > r.offset)
+      ++subsecIt;
+    if (subsecIt == subsecMap.rend() ||
+        subsecIt->offset + subsecIt->isec->getSize() <= r.offset) {
+      subsec = findContainingSubsection(subsecMap, &r.offset);
+      // Now that we know the relocs are unsorted, avoid trying the 'fast path'
+      // for the other relocations.
+      subsecIt = subsecMap.rend();
+    } else {
+      subsec = subsecIt->isec;
+      r.offset -= subsecIt->offset;
+    }
     subsec->relocs.push_back(r);
+
+    if (isSubtrahend) {
+      relocation_info minuendInfo = relInfos[++i];
+      // SUBTRACTOR relocations should always be followed by an UNSIGNED one
+      // attached to the same address.
+      assert(target->hasAttr(minuendInfo.r_type, RelocAttrBits::UNSIGNED) &&
+             relInfo.r_address == minuendInfo.r_address);
+      Reloc p;
+      p.type = minuendInfo.r_type;
+      if (minuendInfo.r_extern) {
+        p.referent = symbols[minuendInfo.r_symbolnum];
+        p.addend = totalAddend;
+      } else {
+        uint64_t referentOffset =
+            totalAddend - sectionHeaders[minuendInfo.r_symbolnum - 1].addr;
+        SubsectionMap &referentSubsecMap =
+            subsections[minuendInfo.r_symbolnum - 1];
+        p.referent =
+            findContainingSubsection(referentSubsecMap, &referentOffset);
+        p.addend = referentOffset;
+      }
+      subsec->relocs.push_back(p);
+    }
   }
 }
 
-static macho::Symbol *createDefined(const structs::nlist_64 &sym,
-                                    StringRef name, InputSection *isec,
-                                    uint32_t value) {
+template <class NList>
+static macho::Symbol *createDefined(const NList &sym, StringRef name,
+                                    InputSection *isec, uint64_t value,
+                                    uint64_t size) {
   // Symbol scope is determined by sym.n_type & (N_EXT | N_PEXT):
-  // N_EXT: Global symbols
-  // N_EXT | N_PEXT: Linkage unit (think: dylib) scoped
-  // N_PEXT: Does not occur in input files in practice,
-  //         a private extern must be external.
-  // 0: Translation-unit scoped. These are not in the symbol table.
-
-  if (sym.n_type & (N_EXT | N_PEXT)) {
-    assert((sym.n_type & N_EXT) && "invalid input");
-    return symtab->addDefined(name, isec, value, sym.n_desc & N_WEAK_DEF,
-                              sym.n_type & N_PEXT);
+  // N_EXT: Global symbols. These go in the symbol table during the link,
+  //        and also in the export table of the output so that the dynamic
+  //        linker sees them.
+  // N_EXT | N_PEXT: Linkage unit (think: dylib) scoped. These go in the
+  //                 symbol table during the link so that duplicates are
+  //                 either reported (for non-weak symbols) or merged
+  //                 (for weak symbols), but they do not go in the export
+  //                 table of the output.
+  // N_PEXT: llvm-mc does not emit these, but `ld -r` (wherein ld64 emits
+  //         object files) may produce them. LLD does not yet support -r.
+  //         These are translation-unit scoped, identical to the `0` case.
+  // 0: Translation-unit scoped. These are not in the symbol table during
+  //    link, and not in the export table of the output either.
+  bool isWeakDefCanBeHidden =
+      (sym.n_desc & (N_WEAK_DEF | N_WEAK_REF)) == (N_WEAK_DEF | N_WEAK_REF);
+
+  if (sym.n_type & N_EXT) {
+    bool isPrivateExtern = sym.n_type & N_PEXT;
+    // lld's behavior for merging symbols is slightly different from ld64:
+    // ld64 picks the winning symbol based on several criteria (see
+    // pickBetweenRegularAtoms() in ld64's SymbolTable.cpp), while lld
+    // just merges metadata and keeps the contents of the first symbol
+    // with that name (see SymbolTable::addDefined). For:
+    // * inline function F in a TU built with -fvisibility-inlines-hidden
+    // * and inline function F in another TU built without that flag
+    // ld64 will pick the one from the file built without
+    // -fvisibility-inlines-hidden.
+    // lld will instead pick the one listed first on the link command line and
+    // give it visibility as if the function was built without
+    // -fvisibility-inlines-hidden.
+    // If both functions have the same contents, this will have the same
+    // behavior. If not, it won't, but the input had an ODR violation in
+    // that case.
+    //
+    // Similarly, merging a symbol
+    // that's isPrivateExtern and not isWeakDefCanBeHidden with one
+    // that's not isPrivateExtern but isWeakDefCanBeHidden technically
+    // should produce one
+    // that's not isPrivateExtern but isWeakDefCanBeHidden. That matters
+    // with ld64's semantics, because it means the non-private-extern
+    // definition will continue to take priority if more private extern
+    // definitions are encountered. With lld's semantics there's no observable
+    // difference between a symbol that's isWeakDefCanBeHidden or one that's
+    // privateExtern -- neither makes it into the dynamic symbol table. So just
+    // promote isWeakDefCanBeHidden to isPrivateExtern here.
+    if (isWeakDefCanBeHidden)
+      isPrivateExtern = true;
+
+    return symtab->addDefined(
+        name, isec->getFile(), isec, value, size, sym.n_desc & N_WEAK_DEF,
+        isPrivateExtern, sym.n_desc & N_ARM_THUMB_DEF,
+        sym.n_desc & REFERENCED_DYNAMICALLY, sym.n_desc & N_NO_DEAD_STRIP);
   }
-  return make<Defined>(name, isec, value, sym.n_desc & N_WEAK_DEF,
-                       /*isExternal=*/false, /*isPrivateExtern=*/false);
+
+  assert(!isWeakDefCanBeHidden &&
+         "weak_def_can_be_hidden on already-hidden symbol?");
+  return make<Defined>(
+      name, isec->getFile(), isec, value, size, sym.n_desc & N_WEAK_DEF,
+      /*isExternal=*/false, /*isPrivateExtern=*/false,
+      sym.n_desc & N_ARM_THUMB_DEF, sym.n_desc & REFERENCED_DYNAMICALLY,
+      sym.n_desc & N_NO_DEAD_STRIP);
 }
 
 // Absolute symbols are defined symbols that do not have an associated
 // InputSection. They cannot be weak.
-static macho::Symbol *createAbsolute(const structs::nlist_64 &sym,
+template <class NList>
+static macho::Symbol *createAbsolute(const NList &sym, InputFile *file,
                                      StringRef name) {
-  if (sym.n_type & (N_EXT | N_PEXT)) {
-    assert((sym.n_type & N_EXT) && "invalid input");
-    return symtab->addDefined(name, nullptr, sym.n_value, /*isWeakDef=*/false,
-                              sym.n_type & N_PEXT);
+  if (sym.n_type & N_EXT) {
+    return symtab->addDefined(
+        name, file, nullptr, sym.n_value, /*size=*/0,
+        /*isWeakDef=*/false, sym.n_type & N_PEXT, sym.n_desc & N_ARM_THUMB_DEF,
+        /*isReferencedDynamically=*/false, sym.n_desc & N_NO_DEAD_STRIP);
   }
-  return make<Defined>(name, nullptr, sym.n_value, /*isWeakDef=*/false,
-                       /*isExternal=*/false, /*isPrivateExtern=*/false);
+  return make<Defined>(name, file, nullptr, sym.n_value, /*size=*/0,
+                       /*isWeakDef=*/false,
+                       /*isExternal=*/false, /*isPrivateExtern=*/false,
+                       sym.n_desc & N_ARM_THUMB_DEF,
+                       /*isReferencedDynamically=*/false,
+                       sym.n_desc & N_NO_DEAD_STRIP);
 }
 
-macho::Symbol *ObjFile::parseNonSectionSymbol(const structs::nlist_64 &sym,
+template <class NList>
+macho::Symbol *ObjFile::parseNonSectionSymbol(const NList &sym,
                                               StringRef name) {
   uint8_t type = sym.n_type & N_TYPE;
   switch (type) {
   case N_UNDF:
     return sym.n_value == 0
-               ? symtab->addUndefined(name, sym.n_desc & N_WEAK_REF)
+               ? symtab->addUndefined(name, this, sym.n_desc & N_WEAK_REF)
                : symtab->addCommon(name, this, sym.n_value,
                                    1 << GET_COMM_ALIGN(sym.n_desc),
                                    sym.n_type & N_PEXT);
   case N_ABS:
-    return createAbsolute(sym, name);
+    return createAbsolute(sym, this, name);
   case N_PBUD:
   case N_INDR:
     error("TODO: support symbols of type " + std::to_string(type));
@@ -333,132 +585,220 @@ macho::Symbol *ObjFile::parseNonSectionSymbol(const structs::nlist_64 &sym,
   }
 }
 
-void ObjFile::parseSymbols(ArrayRef<structs::nlist_64> nList,
+template <class NList>
+static bool isUndef(const NList &sym) {
+  return (sym.n_type & N_TYPE) == N_UNDF && sym.n_value == 0;
+}
+
+template <class LP>
+void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
+                           ArrayRef<typename LP::nlist> nList,
                            const char *strtab, bool subsectionsViaSymbols) {
-  // resize(), not reserve(), because we are going to create N_ALT_ENTRY symbols
-  // out-of-sequence.
+  using NList = typename LP::nlist;
+
+  // Groups indices of the symbols by the sections that contain them.
+  std::vector<std::vector<uint32_t>> symbolsBySection(subsections.size());
   symbols.resize(nList.size());
-  std::vector<size_t> altEntrySymIdxs;
+  SmallVector<unsigned, 32> undefineds;
+  for (uint32_t i = 0; i < nList.size(); ++i) {
+    const NList &sym = nList[i];
 
-  for (size_t i = 0, n = nList.size(); i < n; ++i) {
-    const structs::nlist_64 &sym = nList[i];
-    StringRef name = strtab + sym.n_strx;
+    // Ignore debug symbols for now.
+    // FIXME: may need special handling.
+    if (sym.n_type & N_STAB)
+      continue;
 
-    if ((sym.n_type & N_TYPE) != N_SECT) {
+    StringRef name = strtab + sym.n_strx;
+    if ((sym.n_type & N_TYPE) == N_SECT) {
+      SubsectionMap &subsecMap = subsections[sym.n_sect - 1];
+      // parseSections() may have chosen not to parse this section.
+      if (subsecMap.empty())
+        continue;
+      symbolsBySection[sym.n_sect - 1].push_back(i);
+    } else if (isUndef(sym)) {
+      undefineds.push_back(i);
+    } else {
       symbols[i] = parseNonSectionSymbol(sym, name);
-      continue;
     }
+  }
 
-    const section_64 &sec = sectionHeaders[sym.n_sect - 1];
-    SubsectionMap &subsecMap = subsections[sym.n_sect - 1];
-    assert(!subsecMap.empty());
-    uint64_t offset = sym.n_value - sec.addr;
-
-    // If the input file does not use subsections-via-symbols, all symbols can
-    // use the same subsection. Otherwise, we must split the sections along
-    // symbol boundaries.
-    if (!subsectionsViaSymbols) {
-      symbols[i] = createDefined(sym, name, subsecMap[0], offset);
+  for (size_t i = 0; i < subsections.size(); ++i) {
+    SubsectionMap &subsecMap = subsections[i];
+    if (subsecMap.empty())
       continue;
-    }
 
-    // nList entries aren't necessarily arranged in address order. Therefore,
-    // we can't create alt-entry symbols at this point because a later symbol
-    // may split its section, which may affect which subsection the alt-entry
-    // symbol is assigned to. So we need to handle them in a second pass below.
-    if (sym.n_desc & N_ALT_ENTRY) {
-      altEntrySymIdxs.push_back(i);
+    std::vector<uint32_t> &symbolIndices = symbolsBySection[i];
+    uint64_t sectionAddr = sectionHeaders[i].addr;
+    uint32_t sectionAlign = 1u << sectionHeaders[i].align;
+
+    InputSection *isec = subsecMap.back().isec;
+    // __cfstring has already been split into subsections during
+    // parseSections(), so we simply need to match Symbols to the corresponding
+    // subsection here.
+    if (config->icfLevel != ICFLevel::none && isCfStringSection(isec)) {
+      for (size_t j = 0; j < symbolIndices.size(); ++j) {
+        uint32_t symIndex = symbolIndices[j];
+        const NList &sym = nList[symIndex];
+        StringRef name = strtab + sym.n_strx;
+        uint64_t symbolOffset = sym.n_value - sectionAddr;
+        InputSection *isec = findContainingSubsection(subsecMap, &symbolOffset);
+        if (symbolOffset != 0) {
+          error(toString(this) + ": __cfstring contains symbol " + name +
+                " at misaligned offset");
+          continue;
+        }
+        symbols[symIndex] = createDefined(sym, name, isec, 0, isec->getSize());
+      }
       continue;
     }
 
-    // Find the subsection corresponding to the greatest section offset that is
-    // <= that of the current symbol. The subsection that we find either needs
-    // to be used directly or split in two.
-    uint32_t firstSize = offset;
-    InputSection *firstIsec = findContainingSubsection(subsecMap, &firstSize);
+    // Calculate symbol sizes and create subsections by splitting the sections
+    // along symbol boundaries.
+    // We populate subsecMap by repeatedly splitting the last (highest address)
+    // subsection.
+    llvm::stable_sort(symbolIndices, [&](uint32_t lhs, uint32_t rhs) {
+      return nList[lhs].n_value < nList[rhs].n_value;
+    });
+    SubsectionEntry subsecEntry = subsecMap.back();
+    for (size_t j = 0; j < symbolIndices.size(); ++j) {
+      uint32_t symIndex = symbolIndices[j];
+      const NList &sym = nList[symIndex];
+      StringRef name = strtab + sym.n_strx;
+      InputSection *isec = subsecEntry.isec;
+
+      uint64_t subsecAddr = sectionAddr + subsecEntry.offset;
+      size_t symbolOffset = sym.n_value - subsecAddr;
+      uint64_t symbolSize =
+          j + 1 < symbolIndices.size()
+              ? nList[symbolIndices[j + 1]].n_value - sym.n_value
+              : isec->data.size() - symbolOffset;
+      // There are 4 cases where we do not need to create a new subsection:
+      //   1. If the input file does not use subsections-via-symbols.
+      //   2. Multiple symbols at the same address only induce one subsection.
+      //      (The symbolOffset == 0 check covers both this case as well as
+      //      the first loop iteration.)
+      //   3. Alternative entry points do not induce new subsections.
+      //   4. If we have a literal section (e.g. __cstring and __literal4).
+      if (!subsectionsViaSymbols || symbolOffset == 0 ||
+          sym.n_desc & N_ALT_ENTRY || !isa<ConcatInputSection>(isec)) {
+        symbols[symIndex] =
+            createDefined(sym, name, isec, symbolOffset, symbolSize);
+        continue;
+      }
+      auto *concatIsec = cast<ConcatInputSection>(isec);
+
+      auto *nextIsec = make<ConcatInputSection>(*concatIsec);
+      nextIsec->numRefs = 0;
+      nextIsec->wasCoalesced = false;
+      if (isZeroFill(isec->getFlags())) {
+        // Zero-fill sections have NULL data.data() non-zero data.size()
+        nextIsec->data = {nullptr, isec->data.size() - symbolOffset};
+        isec->data = {nullptr, symbolOffset};
+      } else {
+        nextIsec->data = isec->data.slice(symbolOffset);
+        isec->data = isec->data.slice(0, symbolOffset);
+      }
 
-    if (firstSize == 0) {
-      // Alias of an existing symbol, or the first symbol in the section. These
-      // are handled by reusing the existing section.
-      symbols[i] = createDefined(sym, name, firstIsec, 0);
-      continue;
+      // By construction, the symbol will be at offset zero in the new
+      // subsection.
+      symbols[symIndex] =
+          createDefined(sym, name, nextIsec, /*value=*/0, symbolSize);
+      // TODO: ld64 appears to preserve the original alignment as well as each
+      // subsection's offset from the last aligned address. We should consider
+      // emulating that behavior.
+      nextIsec->align = MinAlign(sectionAlign, sym.n_value);
+      subsecMap.push_back({sym.n_value - sectionAddr, nextIsec});
+      subsecEntry = subsecMap.back();
     }
-
-    // We saw a symbol definition at a new offset. Split the section into two
-    // subsections. The new symbol uses the second subsection.
-    auto *secondIsec = make<InputSection>(*firstIsec);
-    secondIsec->data = firstIsec->data.slice(firstSize);
-    firstIsec->data = firstIsec->data.slice(0, firstSize);
-    // TODO: ld64 appears to preserve the original alignment as well as each
-    // subsection's offset from the last aligned address. We should consider
-    // emulating that behavior.
-    secondIsec->align = MinAlign(firstIsec->align, offset);
-
-    subsecMap[offset] = secondIsec;
-    // By construction, the symbol will be at offset zero in the new section.
-    symbols[i] = createDefined(sym, name, secondIsec, 0);
   }
 
-  for (size_t idx : altEntrySymIdxs) {
-    const structs::nlist_64 &sym = nList[idx];
+  // Undefined symbols can trigger recursive fetch from Archives due to
+  // LazySymbols. Process defined symbols first so that the relative order
+  // between a defined symbol and an undefined symbol does not change the
+  // symbol resolution behavior. In addition, a set of interconnected symbols
+  // will all be resolved to the same file, instead of being resolved to
+  // different files.
+  for (unsigned i : undefineds) {
+    const NList &sym = nList[i];
     StringRef name = strtab + sym.n_strx;
-    SubsectionMap &subsecMap = subsections[sym.n_sect - 1];
-    uint32_t off = sym.n_value - sectionHeaders[sym.n_sect - 1].addr;
-    InputSection *subsec = findContainingSubsection(subsecMap, &off);
-    symbols[idx] = createDefined(sym, name, subsec, off);
+    symbols[i] = parseNonSectionSymbol(sym, name);
   }
 }
 
 OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName,
                        StringRef sectName)
     : InputFile(OpaqueKind, mb) {
-  InputSection *isec = make<InputSection>();
-  isec->file = this;
-  isec->name = sectName.take_front(16);
-  isec->segname = segName.take_front(16);
   const auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
-  isec->data = {buf, mb.getBufferSize()};
+  ArrayRef<uint8_t> data = {buf, mb.getBufferSize()};
+  ConcatInputSection *isec =
+      make<ConcatInputSection>(segName.take_front(16), sectName.take_front(16),
+                               /*file=*/this, data);
+  isec->live = true;
   subsections.push_back({{0, isec}});
 }
 
 ObjFile::ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName)
     : InputFile(ObjKind, mb), modTime(modTime) {
   this->archiveName = std::string(archiveName);
+  if (target->wordSize == 8)
+    parse<LP64>();
+  else
+    parse<ILP32>();
+}
+
+template <class LP> void ObjFile::parse() {
+  using Header = typename LP::mach_header;
+  using SegmentCommand = typename LP::segment_command;
+  using Section = typename LP::section;
+  using NList = typename LP::nlist;
 
   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
-  auto *hdr = reinterpret_cast<const mach_header_64 *>(mb.getBufferStart());
+  auto *hdr = reinterpret_cast<const Header *>(mb.getBufferStart());
 
-  if (const load_command *cmd = findCommand(hdr, LC_LINKER_OPTION)) {
-    auto *c = reinterpret_cast<const linker_option_command *>(cmd);
-    StringRef data{reinterpret_cast<const char *>(c + 1),
-                   c->cmdsize - sizeof(linker_option_command)};
-    parseLCLinkerOption(this, c->count, data);
+  Architecture arch = getArchitectureFromCpuType(hdr->cputype, hdr->cpusubtype);
+  if (arch != config->arch()) {
+    error(toString(this) + " has architecture " + getArchitectureName(arch) +
+          " which is incompatible with target architecture " +
+          getArchitectureName(config->arch()));
+    return;
+  }
+
+  if (!checkCompatibility(this))
+    return;
+
+  for (auto *cmd : findCommands<linker_option_command>(hdr, LC_LINKER_OPTION)) {
+    StringRef data{reinterpret_cast<const char *>(cmd + 1),
+                   cmd->cmdsize - sizeof(linker_option_command)};
+    parseLCLinkerOption(this, cmd->count, data);
   }
 
-  if (const load_command *cmd = findCommand(hdr, LC_SEGMENT_64)) {
-    auto *c = reinterpret_cast<const segment_command_64 *>(cmd);
-    sectionHeaders = ArrayRef<section_64>{
-        reinterpret_cast<const section_64 *>(c + 1), c->nsects};
+  ArrayRef<Section> sectionHeaders;
+  if (const load_command *cmd = findCommand(hdr, LP::segmentLCType)) {
+    auto *c = reinterpret_cast<const SegmentCommand *>(cmd);
+    sectionHeaders =
+        ArrayRef<Section>{reinterpret_cast<const Section *>(c + 1), c->nsects};
     parseSections(sectionHeaders);
   }
 
   // TODO: Error on missing LC_SYMTAB?
   if (const load_command *cmd = findCommand(hdr, LC_SYMTAB)) {
     auto *c = reinterpret_cast<const symtab_command *>(cmd);
-    ArrayRef<structs::nlist_64> nList(
-        reinterpret_cast<const structs::nlist_64 *>(buf + c->symoff), c->nsyms);
+    ArrayRef<NList> nList(reinterpret_cast<const NList *>(buf + c->symoff),
+                          c->nsyms);
     const char *strtab = reinterpret_cast<const char *>(buf) + c->stroff;
     bool subsectionsViaSymbols = hdr->flags & MH_SUBSECTIONS_VIA_SYMBOLS;
-    parseSymbols(nList, strtab, subsectionsViaSymbols);
+    parseSymbols<LP>(sectionHeaders, nList, strtab, subsectionsViaSymbols);
   }
 
   // The relocations may refer to the symbols, so we parse them after we have
   // parsed all the symbols.
   for (size_t i = 0, n = subsections.size(); i < n; ++i)
     if (!subsections[i].empty())
-      parseRelocations(sectionHeaders[i], subsections[i]);
+      parseRelocations(sectionHeaders, sectionHeaders[i], subsections[i]);
 
   parseDebugInfo();
+  if (config->emitDataInCodeInfo)
+    parseDataInCode();
 }
 
 void ObjFile::parseDebugInfo() {
@@ -478,59 +818,122 @@ void ObjFile::parseDebugInfo() {
   // TODO: Since object files can contain a lot of DWARF info, we should verify
   // that we are parsing just the info we need
   const DWARFContext::compile_unit_range &units = ctx->compile_units();
+  // FIXME: There can be more than one compile unit per object file. See
+  // PR48637.
   auto it = units.begin();
   compileUnit = it->get();
-  assert(std::next(it) == units.end());
+}
+
+void ObjFile::parseDataInCode() {
+  const auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
+  const load_command *cmd = findCommand(buf, LC_DATA_IN_CODE);
+  if (!cmd)
+    return;
+  const auto *c = reinterpret_cast<const linkedit_data_command *>(cmd);
+  dataInCodeEntries = {
+      reinterpret_cast<const data_in_code_entry *>(buf + c->dataoff),
+      c->datasize / sizeof(data_in_code_entry)};
+  assert(is_sorted(dataInCodeEntries, [](const data_in_code_entry &lhs,
+                                         const data_in_code_entry &rhs) {
+    return lhs.offset < rhs.offset;
+  }));
 }
 
 // The path can point to either a dylib or a .tbd file.
-static Optional<DylibFile *> loadDylib(StringRef path, DylibFile *umbrella) {
+static DylibFile *loadDylib(StringRef path, DylibFile *umbrella) {
   Optional<MemoryBufferRef> mbref = readFile(path);
   if (!mbref) {
     error("could not read dylib file at " + path);
-    return {};
+    return nullptr;
   }
   return loadDylib(*mbref, umbrella);
 }
 
 // TBD files are parsed into a series of TAPI documents (InterfaceFiles), with
 // the first document storing child pointers to the rest of them. When we are
-// processing a given TBD file, we store that top-level document here. When
-// processing re-exports, we search its children for potentially matching
-// documents in the same TBD file. Note that the children themselves don't
-// point to further documents, i.e. this is a two-level tree.
+// processing a given TBD file, we store that top-level document in
+// currentTopLevelTapi. When processing re-exports, we search its children for
+// potentially matching documents in the same TBD file. Note that the children
+// themselves don't point to further documents, i.e. this is a two-level tree.
 //
-// ld64 allows a TAPI re-export to reference documents nested within other TBD
-// files, but that seems like a strange design, so this is an intentional
-// deviation.
-const InterfaceFile *currentTopLevelTapi = nullptr;
-
 // Re-exports can either refer to on-disk files, or to documents within .tbd
 // files.
-static Optional<DylibFile *> loadReexportHelper(StringRef path,
-                                                DylibFile *umbrella) {
+static DylibFile *findDylib(StringRef path, DylibFile *umbrella,
+                            const InterfaceFile *currentTopLevelTapi) {
+  // Search order:
+  // 1. Install name basename in -F / -L directories.
+  {
+    StringRef stem = path::stem(path);
+    SmallString<128> frameworkName;
+    path::append(frameworkName, path::Style::posix, stem + ".framework", stem);
+    bool isFramework = path.endswith(frameworkName);
+    if (isFramework) {
+      for (StringRef dir : config->frameworkSearchPaths) {
+        SmallString<128> candidate = dir;
+        path::append(candidate, frameworkName);
+        if (Optional<std::string> dylibPath = resolveDylibPath(candidate))
+          return loadDylib(*dylibPath, umbrella);
+      }
+    } else if (Optional<StringRef> dylibPath = findPathCombination(
+                   stem, config->librarySearchPaths, {".tbd", ".dylib"}))
+      return loadDylib(*dylibPath, umbrella);
+  }
+
+  // 2. As absolute path.
   if (path::is_absolute(path, path::Style::posix))
     for (StringRef root : config->systemLibraryRoots)
       if (Optional<std::string> dylibPath =
               resolveDylibPath((root + path).str()))
         return loadDylib(*dylibPath, umbrella);
 
-  // TODO: Expand @loader_path, @executable_path etc
+  // 3. As relative path.
+
+  // TODO: Handle -dylib_file
+
+  // Replace @executable_path, @loader_path, @rpath prefixes in install name.
+  SmallString<128> newPath;
+  if (config->outputType == MH_EXECUTE &&
+      path.consume_front("@executable_path/")) {
+    // ld64 allows overriding this with the undocumented flag -executable_path.
+    // lld doesn't currently implement that flag.
+    // FIXME: Consider using finalOutput instead of outputFile.
+    path::append(newPath, path::parent_path(config->outputFile), path);
+    path = newPath;
+  } else if (path.consume_front("@loader_path/")) {
+    fs::real_path(umbrella->getName(), newPath);
+    path::remove_filename(newPath);
+    path::append(newPath, path);
+    path = newPath;
+  } else if (path.startswith("@rpath/")) {
+    for (StringRef rpath : umbrella->rpaths) {
+      newPath.clear();
+      if (rpath.consume_front("@loader_path/")) {
+        fs::real_path(umbrella->getName(), newPath);
+        path::remove_filename(newPath);
+      }
+      path::append(newPath, rpath, path.drop_front(strlen("@rpath/")));
+      if (Optional<std::string> dylibPath = resolveDylibPath(newPath))
+        return loadDylib(*dylibPath, umbrella);
+    }
+  }
 
+  // FIXME: Should this be further up?
   if (currentTopLevelTapi) {
     for (InterfaceFile &child :
          make_pointee_range(currentTopLevelTapi->documents())) {
-      if (path == child.getInstallName())
-        return make<DylibFile>(child, umbrella);
       assert(child.documents().empty());
+      if (path == child.getInstallName()) {
+        auto file = make<DylibFile>(child, umbrella);
+        file->parseReexports(child);
+        return file;
+      }
     }
   }
 
   if (Optional<std::string> dylibPath = resolveDylibPath(path))
     return loadDylib(*dylibPath, umbrella);
 
-  error("unable to locate re-export with install name " + path);
-  return {};
+  return nullptr;
 }
 
 // If a re-exported dylib is public (lives in /usr/lib or
@@ -553,74 +956,142 @@ static bool isImplicitlyLinked(StringRef path) {
   return false;
 }
 
-void loadReexport(StringRef path, DylibFile *umbrella) {
-  Optional<DylibFile *> reexport = loadReexportHelper(path, umbrella);
-  if (reexport && isImplicitlyLinked(path))
-    inputFiles.insert(*reexport);
+static void loadReexport(StringRef path, DylibFile *umbrella,
+                         const InterfaceFile *currentTopLevelTapi) {
+  DylibFile *reexport = findDylib(path, umbrella, currentTopLevelTapi);
+  if (!reexport)
+    error("unable to locate re-export with install name " + path);
 }
 
-DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella)
-    : InputFile(DylibKind, mb), refState(RefState::Unreferenced) {
+DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella,
+                     bool isBundleLoader)
+    : InputFile(DylibKind, mb), refState(RefState::Unreferenced),
+      isBundleLoader(isBundleLoader) {
+  assert(!isBundleLoader || !umbrella);
   if (umbrella == nullptr)
     umbrella = this;
+  this->umbrella = umbrella;
 
   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
-  auto *hdr = reinterpret_cast<const mach_header_64 *>(mb.getBufferStart());
+  auto *hdr = reinterpret_cast<const mach_header *>(mb.getBufferStart());
 
-  // Initialize dylibName.
+  // Initialize installName.
   if (const load_command *cmd = findCommand(hdr, LC_ID_DYLIB)) {
     auto *c = reinterpret_cast<const dylib_command *>(cmd);
     currentVersion = read32le(&c->dylib.current_version);
     compatibilityVersion = read32le(&c->dylib.compatibility_version);
-    dylibName = reinterpret_cast<const char *>(cmd) + read32le(&c->dylib.name);
-  } else {
+    installName =
+        reinterpret_cast<const char *>(cmd) + read32le(&c->dylib.name);
+  } else if (!isBundleLoader) {
+    // macho_executable and macho_bundle don't have LC_ID_DYLIB,
+    // so it's OK.
     error("dylib " + toString(this) + " missing LC_ID_DYLIB load command");
     return;
   }
 
+  if (config->printEachFile)
+    message(toString(this));
+  inputFiles.insert(this);
+
+  deadStrippable = hdr->flags & MH_DEAD_STRIPPABLE_DYLIB;
+
+  if (!checkCompatibility(this))
+    return;
+
+  checkAppExtensionSafety(hdr->flags & MH_APP_EXTENSION_SAFE);
+
+  for (auto *cmd : findCommands<rpath_command>(hdr, LC_RPATH)) {
+    StringRef rpath{reinterpret_cast<const char *>(cmd) + cmd->path};
+    rpaths.push_back(rpath);
+  }
+
   // Initialize symbols.
-  DylibFile *exportingFile = isImplicitlyLinked(dylibName) ? this : umbrella;
+  exportingFile = isImplicitlyLinked(installName) ? this : this->umbrella;
   if (const load_command *cmd = findCommand(hdr, LC_DYLD_INFO_ONLY)) {
     auto *c = reinterpret_cast<const dyld_info_command *>(cmd);
     parseTrie(buf + c->export_off, c->export_size,
               [&](const Twine &name, uint64_t flags) {
+                StringRef savedName = saver.save(name);
+                if (handleLDSymbol(savedName))
+                  return;
                 bool isWeakDef = flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION;
                 bool isTlv = flags & EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL;
-                symbols.push_back(symtab->addDylib(
-                    saver.save(name), exportingFile, isWeakDef, isTlv));
+                symbols.push_back(symtab->addDylib(savedName, exportingFile,
+                                                   isWeakDef, isTlv));
               });
   } else {
     error("LC_DYLD_INFO_ONLY not found in " + toString(this));
     return;
   }
+}
 
-  if (hdr->flags & MH_NO_REEXPORTED_DYLIBS)
-    return;
-
-  const uint8_t *p =
-      reinterpret_cast<const uint8_t *>(hdr) + sizeof(mach_header_64);
+void DylibFile::parseLoadCommands(MemoryBufferRef mb) {
+  auto *hdr = reinterpret_cast<const mach_header *>(mb.getBufferStart());
+  const uint8_t *p = reinterpret_cast<const uint8_t *>(mb.getBufferStart()) +
+                     target->headerSize;
   for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) {
     auto *cmd = reinterpret_cast<const load_command *>(p);
     p += cmd->cmdsize;
-    if (cmd->cmd != LC_REEXPORT_DYLIB)
-      continue;
 
-    auto *c = reinterpret_cast<const dylib_command *>(cmd);
-    StringRef reexportPath =
-        reinterpret_cast<const char *>(c) + read32le(&c->dylib.name);
-    loadReexport(reexportPath, umbrella);
+    if (!(hdr->flags & MH_NO_REEXPORTED_DYLIBS) &&
+        cmd->cmd == LC_REEXPORT_DYLIB) {
+      const auto *c = reinterpret_cast<const dylib_command *>(cmd);
+      StringRef reexportPath =
+          reinterpret_cast<const char *>(c) + read32le(&c->dylib.name);
+      loadReexport(reexportPath, exportingFile, nullptr);
+    }
+
+    // FIXME: What about LC_LOAD_UPWARD_DYLIB, LC_LAZY_LOAD_DYLIB,
+    // LC_LOAD_WEAK_DYLIB, LC_REEXPORT_DYLIB (..are reexports from dylibs with
+    // MH_NO_REEXPORTED_DYLIBS loaded for -flat_namespace)?
+    if (config->namespaceKind == NamespaceKind::flat &&
+        cmd->cmd == LC_LOAD_DYLIB) {
+      const auto *c = reinterpret_cast<const dylib_command *>(cmd);
+      StringRef dylibPath =
+          reinterpret_cast<const char *>(c) + read32le(&c->dylib.name);
+      DylibFile *dylib = findDylib(dylibPath, umbrella, nullptr);
+      if (!dylib)
+        error(Twine("unable to locate library '") + dylibPath +
+              "' loaded from '" + toString(this) + "' for -flat_namespace");
+    }
   }
 }
 
-DylibFile::DylibFile(const InterfaceFile &interface, DylibFile *umbrella)
-    : InputFile(DylibKind, interface), refState(RefState::Unreferenced) {
+// Some versions of XCode ship with .tbd files that don't have the right
+// platform settings.
+static constexpr std::array<StringRef, 3> skipPlatformChecks{
+    "/usr/lib/system/libsystem_kernel.dylib",
+    "/usr/lib/system/libsystem_platform.dylib",
+    "/usr/lib/system/libsystem_pthread.dylib"};
+
+DylibFile::DylibFile(const InterfaceFile &interface, DylibFile *umbrella,
+                     bool isBundleLoader)
+    : InputFile(DylibKind, interface), refState(RefState::Unreferenced),
+      isBundleLoader(isBundleLoader) {
+  // FIXME: Add test for the missing TBD code path.
+
   if (umbrella == nullptr)
     umbrella = this;
+  this->umbrella = umbrella;
 
-  dylibName = saver.save(interface.getInstallName());
+  installName = saver.save(interface.getInstallName());
   compatibilityVersion = interface.getCompatibilityVersion().rawValue();
   currentVersion = interface.getCurrentVersion().rawValue();
-  DylibFile *exportingFile = isImplicitlyLinked(dylibName) ? this : umbrella;
+
+  if (config->printEachFile)
+    message(toString(this));
+  inputFiles.insert(this);
+
+  if (!is_contained(skipPlatformChecks, installName) &&
+      !is_contained(interface.targets(), config->platformInfo.target)) {
+    error(toString(this) + " is incompatible with " +
+          std::string(config->platformInfo.target));
+    return;
+  }
+
+  checkAppExtensionSafety(interface.isApplicationExtensionSafe());
+
+  exportingFile = isImplicitlyLinked(installName) ? this : umbrella;
   auto addSymbol = [&](const Twine &name) -> void {
     symbols.push_back(symtab->addDylib(saver.save(name), exportingFile,
                                        /*isWeakDef=*/false,
@@ -628,8 +1099,11 @@ DylibFile::DylibFile(const InterfaceFile &interface, DylibFile *umbrella)
   };
   // TODO(compnerd) filter out symbols based on the target platform
   // TODO: handle weak defs, thread locals
-  for (const auto symbol : interface.symbols()) {
-    if (!symbol->getArchitectures().has(config->arch))
+  for (const auto *symbol : interface.symbols()) {
+    if (!symbol->getArchitectures().has(config->arch()))
+      continue;
+
+    if (handleLDSymbol(symbol->getName()))
       continue;
 
     switch (symbol->getKind()) {
@@ -650,18 +1124,104 @@ DylibFile::DylibFile(const InterfaceFile &interface, DylibFile *umbrella)
       break;
     }
   }
+}
 
-  bool isTopLevelTapi = false;
-  if (currentTopLevelTapi == nullptr) {
-    currentTopLevelTapi = &interface;
-    isTopLevelTapi = true;
+void DylibFile::parseReexports(const InterfaceFile &interface) {
+  const InterfaceFile *topLevel =
+      interface.getParent() == nullptr ? &interface : interface.getParent();
+  for (InterfaceFileRef intfRef : interface.reexportedLibraries()) {
+    InterfaceFile::const_target_range targets = intfRef.targets();
+    if (is_contained(skipPlatformChecks, intfRef.getInstallName()) ||
+        is_contained(targets, config->platformInfo.target))
+      loadReexport(intfRef.getInstallName(), exportingFile, topLevel);
   }
+}
 
-  for (InterfaceFileRef intfRef : interface.reexportedLibraries())
-    loadReexport(intfRef.getInstallName(), umbrella);
+// $ld$ symbols modify the properties/behavior of the library (e.g. its install
+// name, compatibility version or hide/add symbols) for specific target
+// versions.
+bool DylibFile::handleLDSymbol(StringRef originalName) {
+  if (!originalName.startswith("$ld$"))
+    return false;
 
-  if (isTopLevelTapi)
-    currentTopLevelTapi = nullptr;
+  StringRef action;
+  StringRef name;
+  std::tie(action, name) = originalName.drop_front(strlen("$ld$")).split('$');
+  if (action == "previous")
+    handleLDPreviousSymbol(name, originalName);
+  else if (action == "install_name")
+    handleLDInstallNameSymbol(name, originalName);
+  return true;
+}
+
+void DylibFile::handleLDPreviousSymbol(StringRef name, StringRef originalName) {
+  // originalName: $ld$ previous $ <installname> $ <compatversion> $
+  // <platformstr> $ <startversion> $ <endversion> $ <symbol-name> $
+  StringRef installName;
+  StringRef compatVersion;
+  StringRef platformStr;
+  StringRef startVersion;
+  StringRef endVersion;
+  StringRef symbolName;
+  StringRef rest;
+
+  std::tie(installName, name) = name.split('$');
+  std::tie(compatVersion, name) = name.split('$');
+  std::tie(platformStr, name) = name.split('$');
+  std::tie(startVersion, name) = name.split('$');
+  std::tie(endVersion, name) = name.split('$');
+  std::tie(symbolName, rest) = name.split('$');
+  // TODO: ld64 contains some logic for non-empty symbolName as well.
+  if (!symbolName.empty())
+    return;
+  unsigned platform;
+  if (platformStr.getAsInteger(10, platform) ||
+      platform != static_cast<unsigned>(config->platform()))
+    return;
+
+  VersionTuple start;
+  if (start.tryParse(startVersion)) {
+    warn("failed to parse start version, symbol '" + originalName +
+         "' ignored");
+    return;
+  }
+  VersionTuple end;
+  if (end.tryParse(endVersion)) {
+    warn("failed to parse end version, symbol '" + originalName + "' ignored");
+    return;
+  }
+  if (config->platformInfo.minimum < start ||
+      config->platformInfo.minimum >= end)
+    return;
+
+  this->installName = saver.save(installName);
+
+  if (!compatVersion.empty()) {
+    VersionTuple cVersion;
+    if (cVersion.tryParse(compatVersion)) {
+      warn("failed to parse compatibility version, symbol '" + originalName +
+           "' ignored");
+      return;
+    }
+    compatibilityVersion = encodeVersion(cVersion);
+  }
+}
+
+void DylibFile::handleLDInstallNameSymbol(StringRef name,
+                                          StringRef originalName) {
+  // originalName: $ld$ install_name $ os<version> $ install_name
+  StringRef condition, installName;
+  std::tie(condition, installName) = name.split('$');
+  VersionTuple version;
+  if (!condition.consume_front("os") || version.tryParse(condition))
+    warn("failed to parse os version, symbol '" + originalName + "' ignored");
+  else if (version == config->platformInfo.minimum)
+    this->installName = saver.save(installName);
+}
+
+void DylibFile::checkAppExtensionSafety(bool dylibIsAppExtensionSafe) const {
+  if (config->applicationExtension && !dylibIsAppExtensionSafe)
+    warn("using '-application_extension' with unsafe dylib: " + toString(this));
 }
 
 ArchiveFile::ArchiveFile(std::unique_ptr<object::Archive> &&f)
@@ -694,34 +1254,74 @@ void ArchiveFile::fetch(const object::Archive::Symbol &sym) {
                                      "for the member defining symbol " +
                                      toMachOString(sym)));
 
-  // `sym` is owned by a LazySym, which will be replace<>() by make<ObjFile>
+  // `sym` is owned by a LazySym, which will be replace<>()d by make<ObjFile>
   // and become invalid after that call. Copy it to the stack so we can refer
   // to it later.
-  const object::Archive::Symbol sym_copy = sym;
+  const object::Archive::Symbol symCopy = sym;
+
+  if (Optional<InputFile *> file = loadArchiveMember(
+          mb, modTime, getName(), /*objCOnly=*/false, c.getChildOffset())) {
+    inputFiles.insert(*file);
+    // ld64 doesn't demangle sym here even with -demangle.
+    // Match that: intentionally don't call toMachOString().
+    printArchiveMemberLoad(symCopy.getName(), *file);
+  }
+}
 
-  InputFile *file;
-  switch (identify_magic(mb.getBuffer())) {
-  case file_magic::macho_object:
-    file = make<ObjFile>(mb, modTime, getName());
+static macho::Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &objSym,
+                                          BitcodeFile &file) {
+  StringRef name = saver.save(objSym.getName());
+
+  // TODO: support weak references
+  if (objSym.isUndefined())
+    return symtab->addUndefined(name, &file, /*isWeakRef=*/false);
+
+  assert(!objSym.isCommon() && "TODO: support common symbols in LTO");
+
+  // TODO: Write a test demonstrating why computing isPrivateExtern before
+  // LTO compilation is important.
+  bool isPrivateExtern = false;
+  switch (objSym.getVisibility()) {
+  case GlobalValue::HiddenVisibility:
+    isPrivateExtern = true;
     break;
-  case file_magic::bitcode:
-    file = make<BitcodeFile>(mb);
+  case GlobalValue::ProtectedVisibility:
+    error(name + " has protected visibility, which is not supported by Mach-O");
+    break;
+  case GlobalValue::DefaultVisibility:
     break;
-  default:
-    StringRef bufname =
-        CHECK(c.getName(), toString(this) + ": could not get buffer name");
-    error(toString(this) + ": archive member " + bufname +
-          " has unhandled file type");
-    return;
   }
-  inputFiles.insert(file);
 
-  // ld64 doesn't demangle sym here even with -demangle. Match that, so
-  // intentionally no call to toMachOString() here.
-  printArchiveMemberLoad(sym_copy.getName(), file);
+  return symtab->addDefined(name, &file, /*isec=*/nullptr, /*value=*/0,
+                            /*size=*/0, objSym.isWeak(), isPrivateExtern,
+                            /*isThumb=*/false,
+                            /*isReferencedDynamically=*/false,
+                            /*noDeadStrip=*/false);
 }
 
-BitcodeFile::BitcodeFile(MemoryBufferRef mbref)
-    : InputFile(BitcodeKind, mbref) {
+BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
+                         uint64_t offsetInArchive)
+    : InputFile(BitcodeKind, mb) {
+  std::string path = mb.getBufferIdentifier().str();
+  // ThinLTO assumes that all MemoryBufferRefs given to it have a unique
+  // name. If two members with the same name are provided, this causes a
+  // collision and ThinLTO can't proceed.
+  // So, we append the archive name to disambiguate two members with the same
+  // name from multiple different archives, and offset within the archive to
+  // disambiguate two members of the same name from a single archive.
+  MemoryBufferRef mbref(
+      mb.getBuffer(),
+      saver.save(archiveName.empty() ? path
+                                     : archiveName + sys::path::filename(path) +
+                                           utostr(offsetInArchive)));
+
   obj = check(lto::InputFile::create(mbref));
+
+  // Convert LTO Symbols to LLD Symbols in order to perform resolution. The
+  // "winning" symbol will then be marked as Prevailing at LTO compilation
+  // time.
+  for (const lto::InputFile::Symbol &objSym : obj->symbols())
+    symbols.push_back(createBitcodeSymbol(objSym, *this));
 }
+
+template void ObjFile::parse<LP64>();