diff options
Diffstat (limited to 'lld')
34 files changed, 625 insertions, 318 deletions
diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp index 6cabb22d98cf..3cdbd6c0337b 100644 --- a/lld/COFF/Chunks.cpp +++ b/lld/COFF/Chunks.cpp @@ -13,6 +13,7 @@ #include "Symbols.h" #include "Writer.h" #include "llvm/ADT/Twine.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/BinaryFormat/COFF.h" #include "llvm/Object/COFF.h" #include "llvm/Support/Debug.h" @@ -815,7 +816,7 @@ void RVATableChunk::writeTo(uint8_t *buf) const { size_t cnt = 0; for (const ChunkAndOffset &co : syms) begin[cnt++] = co.inputChunk->getRVA() + co.offset; - std::sort(begin, begin + cnt); + llvm::sort(begin, begin + cnt); assert(std::unique(begin, begin + cnt) == begin + cnt && "RVA tables should be de-duplicated"); } diff --git a/lld/COFF/DLL.cpp b/lld/COFF/DLL.cpp index bfa2a6910e2b..42a5a41f87ae 100644 --- a/lld/COFF/DLL.cpp +++ b/lld/COFF/DLL.cpp @@ -21,6 +21,7 @@ #include "COFFLinkerContext.h" #include "Chunks.h" #include "SymbolTable.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Object/COFF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Path.h" @@ -150,10 +151,9 @@ binImports(const std::vector<DefinedImportData *> &imports) { for (auto &kv : m) { // Sort symbols by name for each group. std::vector<DefinedImportData *> &syms = kv.second; - std::sort(syms.begin(), syms.end(), - [](DefinedImportData *a, DefinedImportData *b) { - return a->getName() < b->getName(); - }); + llvm::sort(syms, [](DefinedImportData *a, DefinedImportData *b) { + return a->getName() < b->getName(); + }); v.push_back(std::move(syms)); } return v; diff --git a/lld/COFF/DebugTypes.cpp b/lld/COFF/DebugTypes.cpp index 5878386aeb93..800b40f343aa 100644 --- a/lld/COFF/DebugTypes.cpp +++ b/lld/COFF/DebugTypes.cpp @@ -1126,9 +1126,8 @@ void TypeMerger::mergeTypesWithGHash() { } // In parallel, remap all types. - for_each(dependencySources, [&](TpiSource *source) { + for (TpiSource *source : dependencySources) source->remapTpiWithGHashes(&ghashState); - }); parallelForEach(objectSources, [&](TpiSource *source) { source->remapTpiWithGHashes(&ghashState); }); diff --git a/lld/COFF/DriverUtils.cpp b/lld/COFF/DriverUtils.cpp index ac0f1f972c79..29a2d0165839 100644 --- a/lld/COFF/DriverUtils.cpp +++ b/lld/COFF/DriverUtils.cpp @@ -18,6 +18,7 @@ #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/BinaryFormat/COFF.h" #include "llvm/Object/COFF.h" @@ -694,10 +695,9 @@ void fixupExports() { config->exports = std::move(v); // Sort by name. - std::sort(config->exports.begin(), config->exports.end(), - [](const Export &a, const Export &b) { - return a.exportName < b.exportName; - }); + llvm::sort(config->exports, [](const Export &a, const Export &b) { + return a.exportName < b.exportName; + }); } void assignExportOrdinals() { @@ -709,7 +709,7 @@ void assignExportOrdinals() { if (e.ordinal == 0) e.ordinal = ++max; if (max > std::numeric_limits<uint16_t>::max()) - fatal("too many exported symbols (max " + + fatal("too many exported symbols (got " + Twine(max) + ", max " + Twine(std::numeric_limits<uint16_t>::max()) + ")"); } diff --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp index 2ceb4fb98031..87b6bb55d610 100644 --- a/lld/COFF/PDB.cpp +++ b/lld/COFF/PDB.cpp @@ -296,14 +296,14 @@ static void addGHashTypeInfo(COFFLinkerContext &ctx, // Start the TPI or IPI stream header. builder.getTpiBuilder().setVersionHeader(pdb::PdbTpiV80); builder.getIpiBuilder().setVersionHeader(pdb::PdbTpiV80); - for_each(ctx.tpiSourceList, [&](TpiSource *source) { + for (TpiSource *source : ctx.tpiSourceList) { builder.getTpiBuilder().addTypeRecords(source->mergedTpi.recs, source->mergedTpi.recSizes, source->mergedTpi.recHashes); builder.getIpiBuilder().addTypeRecords(source->mergedIpi.recs, source->mergedIpi.recSizes, source->mergedIpi.recHashes); - }); + } } static void @@ -1134,7 +1134,8 @@ void PDBLinker::addObjectsToPDB() { ScopedTimer t1(ctx.addObjectsTimer); // Create module descriptors - for_each(ctx.objFileInstances, [&](ObjFile *obj) { createModuleDBI(obj); }); + for (ObjFile *obj : ctx.objFileInstances) + createModuleDBI(obj); // Reorder dependency type sources to come first. tMerger.sortDependencies(); @@ -1144,9 +1145,10 @@ void PDBLinker::addObjectsToPDB() { tMerger.mergeTypesWithGHash(); // Merge dependencies and then regular objects. - for_each(tMerger.dependencySources, - [&](TpiSource *source) { addDebug(source); }); - for_each(tMerger.objectSources, [&](TpiSource *source) { addDebug(source); }); + for (TpiSource *source : tMerger.dependencySources) + addDebug(source); + for (TpiSource *source : tMerger.objectSources) + addDebug(source); builder.getStringTableBuilder().setStrings(pdbStrTab); t1.stop(); @@ -1163,10 +1165,10 @@ void PDBLinker::addObjectsToPDB() { t2.stop(); if (config->showSummary) { - for_each(ctx.tpiSourceList, [&](TpiSource *source) { + for (TpiSource *source : ctx.tpiSourceList) { nbTypeRecords += source->nbTypeRecords; nbTypeRecordsBytes += source->nbTypeRecordsBytes; - }); + } } } diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp index c09bb2e60786..8fca1a686a79 100644 --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -57,6 +57,7 @@ enum Op { enum Reg { X_RA = 1, + X_TP = 4, X_T0 = 5, X_T1 = 6, X_T2 = 7, @@ -76,6 +77,19 @@ static uint32_t utype(uint32_t op, uint32_t rd, uint32_t imm) { return op | (rd << 7) | (imm << 12); } +// Extract bits v[begin:end], where range is inclusive, and begin must be < 63. +static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) { + return (v & ((1ULL << (begin + 1)) - 1)) >> end; +} + +static uint32_t setLO12_I(uint32_t insn, uint32_t imm) { + return (insn & 0xfffff) | (imm << 20); +} +static uint32_t setLO12_S(uint32_t insn, uint32_t imm) { + return (insn & 0x1fff07f) | (extractBits(imm, 11, 5) << 25) | + (extractBits(imm, 4, 0) << 7); +} + RISCV::RISCV() { copyRel = R_RISCV_COPY; pltRel = R_RISCV_JUMP_SLOT; @@ -270,10 +284,9 @@ RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s, case R_RISCV_TPREL_LO12_I: case R_RISCV_TPREL_LO12_S: return R_TPREL; - case R_RISCV_TPREL_ADD: - return R_NONE; case R_RISCV_ALIGN: return R_RELAX_HINT; + case R_RISCV_TPREL_ADD: case R_RISCV_RELAX: return config->relax ? R_RELAX_HINT : R_NONE; default: @@ -283,11 +296,6 @@ RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s, } } -// Extract bits V[Begin:End], where range is inclusive, and Begin must be < 63. -static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) { - return (v & ((1ULL << (begin + 1)) - 1)) >> end; -} - void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { const unsigned bits = config->wordsize * 8; @@ -404,7 +412,7 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { case R_RISCV_LO12_I: { uint64_t hi = (val + 0x800) >> 12; uint64_t lo = val - (hi << 12); - write32le(loc, (read32le(loc) & 0xFFFFF) | ((lo & 0xFFF) << 20)); + write32le(loc, setLO12_I(read32le(loc), lo & 0xfff)); return; } @@ -413,9 +421,7 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { case R_RISCV_LO12_S: { uint64_t hi = (val + 0x800) >> 12; uint64_t lo = val - (hi << 12); - uint32_t imm11_5 = extractBits(lo, 11, 5) << 25; - uint32_t imm4_0 = extractBits(lo, 4, 0) << 7; - write32le(loc, (read32le(loc) & 0x1FFF07F) | imm11_5 | imm4_0); + write32le(loc, setLO12_S(read32le(loc), lo)); return; } @@ -567,6 +573,35 @@ static void relaxCall(const InputSection &sec, size_t i, uint64_t loc, } } +// Relax local-exec TLS when hi20 is zero. +static void relaxTlsLe(const InputSection &sec, size_t i, uint64_t loc, + Relocation &r, uint32_t &remove) { + uint64_t val = r.sym->getVA(r.addend); + if (hi20(val) != 0) + return; + uint32_t insn = read32le(sec.rawData.data() + r.offset); + switch (r.type) { + case R_RISCV_TPREL_HI20: + case R_RISCV_TPREL_ADD: + // Remove lui rd, %tprel_hi(x) and add rd, rd, tp, %tprel_add(x). + sec.relaxAux->relocTypes[i] = R_RISCV_RELAX; + remove = 4; + break; + case R_RISCV_TPREL_LO12_I: + // addi rd, rd, %tprel_lo(x) => addi rd, tp, st_value(x) + sec.relaxAux->relocTypes[i] = R_RISCV_32; + insn = (insn & ~(31 << 15)) | (X_TP << 15); + sec.relaxAux->writes.push_back(setLO12_I(insn, val)); + break; + case R_RISCV_TPREL_LO12_S: + // sw rs, %tprel_lo(x)(rd) => sw rs, st_value(x)(rd) + sec.relaxAux->relocTypes[i] = R_RISCV_32; + insn = (insn & ~(31 << 15)) | (X_TP << 15); + sec.relaxAux->writes.push_back(setLO12_S(insn, val)); + break; + } +} + static bool relax(InputSection &sec) { const uint64_t secAddr = sec.getVA(); auto &aux = *sec.relaxAux; @@ -612,6 +647,14 @@ static bool relax(InputSection &sec) { sec.relocations[i + 1].type == R_RISCV_RELAX) relaxCall(sec, i, loc, r, remove); break; + case R_RISCV_TPREL_HI20: + case R_RISCV_TPREL_ADD: + case R_RISCV_TPREL_LO12_I: + case R_RISCV_TPREL_LO12_S: + if (i + 1 != sec.relocations.size() && + sec.relocations[i + 1].type == R_RISCV_RELAX) + relaxTlsLe(sec, i, loc, r, remove); + break; } // For all anchors whose offsets are <= r.offset, they are preceded by @@ -697,7 +740,7 @@ void elf::riscvFinalizeRelax(int passes) { for (size_t i = 0, e = rels.size(); i != e; ++i) { uint32_t remove = aux.relocDeltas[i] - delta; delta = aux.relocDeltas[i]; - if (remove == 0) + if (remove == 0 && aux.relocTypes[i] == R_RISCV_NONE) continue; // Copy from last location to the current relocated location. @@ -723,15 +766,24 @@ void elf::riscvFinalizeRelax(int passes) { } } } else if (RelType newType = aux.relocTypes[i]) { - const uint32_t insn = aux.writes[writesIdx++]; switch (newType) { + case R_RISCV_RELAX: + // Used by relaxTlsLe to indicate the relocation is ignored. + break; case R_RISCV_RVC_JUMP: skip = 2; - write16le(p, insn); + write16le(p, aux.writes[writesIdx++]); break; case R_RISCV_JAL: skip = 4; - write32le(p, insn); + write32le(p, aux.writes[writesIdx++]); + break; + case R_RISCV_32: + // Used by relaxTlsLe to write a uint32_t then suppress the handling + // in relocateAlloc. + skip = 4; + write32le(p, aux.writes[writesIdx++]); + aux.relocTypes[i] = R_RISCV_NONE; break; default: llvm_unreachable("unsupported type"); diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 4c26cba1cb4f..dd17adc4dbea 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -197,6 +197,10 @@ std::vector<std::pair<MemoryBufferRef, uint64_t>> static getArchiveMembers( return v; } +static bool isBitcode(MemoryBufferRef mb) { + return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode; +} + // Opens a file and create a file object. Path has to be resolved already. void LinkerDriver::addFile(StringRef path, bool withLOption) { using namespace sys::fs; @@ -217,8 +221,12 @@ void LinkerDriver::addFile(StringRef path, bool withLOption) { return; case file_magic::archive: { if (inWholeArchive) { - for (const auto &p : getArchiveMembers(mbref)) - files.push_back(createObjectFile(p.first, path, p.second)); + for (const auto &p : getArchiveMembers(mbref)) { + if (isBitcode(p.first)) + files.push_back(make<BitcodeFile>(p.first, path, p.second, false)); + else + files.push_back(createObjFile(p.first, path)); + } return; } @@ -241,8 +249,10 @@ void LinkerDriver::addFile(StringRef path, bool withLOption) { InputFile::isInGroup = true; for (const std::pair<MemoryBufferRef, uint64_t> &p : members) { auto magic = identify_magic(p.first.getBuffer()); - if (magic == file_magic::bitcode || magic == file_magic::elf_relocatable) - files.push_back(createLazyFile(p.first, path, p.second)); + if (magic == file_magic::elf_relocatable) + files.push_back(createObjFile(p.first, path, true)); + else if (magic == file_magic::bitcode) + files.push_back(make<BitcodeFile>(p.first, path, p.second, true)); else warn(path + ": archive member '" + p.first.getBufferIdentifier() + "' is neither ET_REL nor LLVM bitcode"); @@ -267,11 +277,10 @@ void LinkerDriver::addFile(StringRef path, bool withLOption) { make<SharedFile>(mbref, withLOption ? path::filename(path) : path)); return; case file_magic::bitcode: + files.push_back(make<BitcodeFile>(mbref, "", 0, inLib)); + break; case file_magic::elf_relocatable: - if (inLib) - files.push_back(createLazyFile(mbref, "", 0)); - else - files.push_back(createObjectFile(mbref)); + files.push_back(createObjFile(mbref, "", inLib)); break; default: error(path + ": unknown file type"); @@ -1612,7 +1621,7 @@ void LinkerDriver::createFiles(opt::InputArgList &args) { break; case OPT_just_symbols: if (Optional<MemoryBufferRef> mb = readFile(arg->getValue())) { - files.push_back(createObjectFile(*mb)); + files.push_back(createObjFile(*mb)); files.back()->justSymbols = true; } break; diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index 0ea2a2c74b63..c0076a3722fe 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -739,7 +739,7 @@ static void updateSupportedARMFeatures(const ARMAttributeParser &attributes) { attributes.getAttributeValue(ARMBuildAttrs::CPU_arch); if (!attr) return; - auto arch = attr.getValue(); + auto arch = attr.value(); switch (arch) { case ARMBuildAttrs::Pre_v4: case ARMBuildAttrs::v4: @@ -1710,34 +1710,27 @@ void BinaryFile::parse() { data.size(), 0, nullptr}); } -InputFile *elf::createObjectFile(MemoryBufferRef mb, StringRef archiveName, - uint64_t offsetInArchive) { - if (isBitcode(mb)) - return make<BitcodeFile>(mb, archiveName, offsetInArchive, /*lazy=*/false); - +ELFFileBase *elf::createObjFile(MemoryBufferRef mb, StringRef archiveName, + bool lazy) { + ELFFileBase *f; switch (getELFKind(mb, archiveName)) { case ELF32LEKind: - return make<ObjFile<ELF32LE>>(mb, archiveName); + f = make<ObjFile<ELF32LE>>(mb, archiveName); + break; case ELF32BEKind: - return make<ObjFile<ELF32BE>>(mb, archiveName); + f = make<ObjFile<ELF32BE>>(mb, archiveName); + break; case ELF64LEKind: - return make<ObjFile<ELF64LE>>(mb, archiveName); + f = make<ObjFile<ELF64LE>>(mb, archiveName); + break; case ELF64BEKind: - return make<ObjFile<ELF64BE>>(mb, archiveName); + f = make<ObjFile<ELF64BE>>(mb, archiveName); + break; default: llvm_unreachable("getELFKind"); } -} - -InputFile *elf::createLazyFile(MemoryBufferRef mb, StringRef archiveName, - uint64_t offsetInArchive) { - if (isBitcode(mb)) - return make<BitcodeFile>(mb, archiveName, offsetInArchive, /*lazy=*/true); - - auto *file = - cast<ELFFileBase>(createObjectFile(mb, archiveName, offsetInArchive)); - file->lazy = true; - return file; + f->lazy = lazy; + return f; } template <class ELFT> void ObjFile<ELFT>::parseLazy() { @@ -1763,7 +1756,7 @@ template <class ELFT> void ObjFile<ELFT>::parseLazy() { } bool InputFile::shouldExtractForCommon(StringRef name) { - if (isBitcode(mb)) + if (isa<BitcodeFile>(this)) return isBitcodeNonCommonDef(mb, name, archiveName); return isNonCommonDef(mb, name, archiveName); diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index f89246eb645e..a24e664a7e16 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -371,14 +371,8 @@ public: void parse(); }; -InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName = "", - uint64_t offsetInArchive = 0); -InputFile *createLazyFile(MemoryBufferRef mb, StringRef archiveName, - uint64_t offsetInArchive); - -inline bool isBitcode(MemoryBufferRef mb) { - return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode; -} +ELFFileBase *createObjFile(MemoryBufferRef mb, StringRef archiveName = "", + bool lazy = false); std::string replaceThinLTOSuffix(StringRef path); diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp index 8c5001af3a91..b8019bd7d240 100644 --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -383,10 +383,10 @@ std::vector<InputFile *> BitcodeCompiler::compile() { std::vector<InputFile *> ret; for (unsigned i = 0; i != maxTasks; ++i) if (!buf[i].empty()) - ret.push_back(createObjectFile(MemoryBufferRef(buf[i], "lto.tmp"))); + ret.push_back(createObjFile(MemoryBufferRef(buf[i], "lto.tmp"))); for (std::unique_ptr<MemoryBuffer> &file : files) if (file) - ret.push_back(createObjectFile(*file)); + ret.push_back(createObjFile(*file)); return ret; } diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index b8a2ebeefce9..a0c5e6d04748 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -29,6 +29,7 @@ #include "lld/Common/DWARF.h" #include "lld/Common/Strings.h" #include "lld/Common/Version.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/StringExtras.h" #include "llvm/BinaryFormat/Dwarf.h" @@ -1703,7 +1704,7 @@ void RelocationBaseSection::computeRels() { parallelSort(relocs.begin(), nonRelative, [&](auto &a, auto &b) { return a.r_offset < b.r_offset; }); // Non-relative relocations are few, so don't bother with parallelSort. - std::sort(nonRelative, relocs.end(), [&](auto &a, auto &b) { + llvm::sort(nonRelative, relocs.end(), [&](auto &a, auto &b) { return std::tie(a.r_sym, a.r_offset) < std::tie(b.r_sym, b.r_offset); }); } @@ -2039,7 +2040,7 @@ template <class ELFT> bool RelrSection<ELFT>::updateAllocSize() { std::unique_ptr<uint64_t[]> offsets(new uint64_t[relocs.size()]); for (auto it : llvm::enumerate(relocs)) offsets[it.index()] = it.value().getOffset(); - std::sort(offsets.get(), offsets.get() + relocs.size()); + llvm::sort(offsets.get(), offsets.get() + relocs.size()); // For each leading relocation, find following ones that can be folded // as a bitmap and fold them. @@ -3855,7 +3856,8 @@ void InStruct::reset() { constexpr char kMemtagAndroidNoteName[] = "Android"; void MemtagAndroidNote::writeTo(uint8_t *buf) { - assert(sizeof(kMemtagAndroidNoteName) == 8); // ABI check for Android 11 & 12. + static_assert(sizeof(kMemtagAndroidNoteName) == 8, + "ABI check for Android 11 & 12."); assert((config->androidMemtagStack || config->androidMemtagHeap) && "Should only be synthesizing a note if heap || stack is enabled."); diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 705cc7bf9766..2994e79cd1de 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -1973,8 +1973,8 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() { continue; for (Symbol *sym : file->requiredSymbols) if (sym->isUndefined() && !sym->isWeak()) - diagnose(toString(file) + ": undefined reference to " + - toString(*sym) + " [--no-allow-shlib-undefined]"); + diagnose("undefined reference due to --no-allow-shlib-undefined: " + + toString(*sym) + "\n>>> referenced by " + toString(file)); } } diff --git a/lld/MachO/Arch/ARM.cpp b/lld/MachO/Arch/ARM.cpp index fd215ed99b59..424df414229f 100644 --- a/lld/MachO/Arch/ARM.cpp +++ b/lld/MachO/Arch/ARM.cpp @@ -38,35 +38,27 @@ struct ARM : TargetInfo { uint64_t entryAddr) const override; void relaxGotLoad(uint8_t *loc, uint8_t type) const override; - const RelocAttrs &getRelocAttrs(uint8_t type) const override; uint64_t getPageSize() const override { return 4 * 1024; } void handleDtraceReloc(const Symbol *sym, const Reloc &r, uint8_t *loc) const override; }; - } // namespace -const RelocAttrs &ARM::getRelocAttrs(uint8_t type) const { - static const std::array<RelocAttrs, 10> relocAttrsArray{{ +static constexpr std::array<RelocAttrs, 10> relocAttrsArray{{ #define B(x) RelocAttrBits::x - {"VANILLA", /* FIXME populate this */ B(_0)}, - {"PAIR", /* FIXME populate this */ B(_0)}, - {"SECTDIFF", /* FIXME populate this */ B(_0)}, - {"LOCAL_SECTDIFF", /* FIXME populate this */ B(_0)}, - {"PB_LA_PTR", /* FIXME populate this */ B(_0)}, - {"BR24", B(PCREL) | B(LOCAL) | B(EXTERN) | B(BRANCH) | B(BYTE4)}, - {"BR22", B(PCREL) | B(LOCAL) | B(EXTERN) | B(BRANCH) | B(BYTE4)}, - {"32BIT_BRANCH", /* FIXME populate this */ B(_0)}, - {"HALF", /* FIXME populate this */ B(_0)}, - {"HALF_SECTDIFF", /* FIXME populate this */ B(_0)}, + {"VANILLA", /* FIXME populate this */ B(_0)}, + {"PAIR", /* FIXME populate this */ B(_0)}, + {"SECTDIFF", /* FIXME populate this */ B(_0)}, + {"LOCAL_SECTDIFF", /* FIXME populate this */ B(_0)}, + {"PB_LA_PTR", /* FIXME populate this */ B(_0)}, + {"BR24", B(PCREL) | B(LOCAL) | B(EXTERN) | B(BRANCH) | B(BYTE4)}, + {"BR22", B(PCREL) | B(LOCAL) | B(EXTERN) | B(BRANCH) | B(BYTE4)}, + {"32BIT_BRANCH", /* FIXME populate this */ B(_0)}, + {"HALF", /* FIXME populate this */ B(_0)}, + {"HALF_SECTDIFF", /* FIXME populate this */ B(_0)}, #undef B - }}; - assert(type < relocAttrsArray.size() && "invalid relocation type"); - if (type >= relocAttrsArray.size()) - return invalidRelocAttrs; - return relocAttrsArray[type]; -} +}}; int64_t ARM::getEmbeddedAddend(MemoryBufferRef mb, uint64_t offset, relocation_info rel) const { @@ -167,6 +159,8 @@ ARM::ARM(uint32_t cpuSubtype) : TargetInfo(ILP32()) { stubSize = 0 /* FIXME */; stubHelperHeaderSize = 0 /* FIXME */; stubHelperEntrySize = 0 /* FIXME */; + + relocAttrs = {relocAttrsArray.data(), relocAttrsArray.size()}; } TargetInfo *macho::createARMTargetInfo(uint32_t cpuSubtype) { diff --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp index 5901a9e09b35..46e935aa1fd1 100644 --- a/lld/MachO/Arch/ARM64.cpp +++ b/lld/MachO/Arch/ARM64.cpp @@ -34,7 +34,6 @@ struct ARM64 : ARM64Common { void writeStubHelperHeader(uint8_t *buf) const override; void writeStubHelperEntry(uint8_t *buf, const Symbol &, uint64_t entryAddr) const override; - const RelocAttrs &getRelocAttrs(uint8_t type) const override; void populateThunk(InputSection *thunk, Symbol *funcSym) override; void applyOptimizationHints(uint8_t *, const ConcatInputSection *, ArrayRef<uint64_t>) const override; @@ -48,31 +47,24 @@ struct ARM64 : ARM64Common { // absolute version of this relocation. The semantics of the absolute relocation // are weird -- it results in the value of the GOT slot being written, instead // of the address. Let's not support it unless we find a real-world use case. - -const RelocAttrs &ARM64::getRelocAttrs(uint8_t type) const { - static const std::array<RelocAttrs, 11> relocAttrsArray{{ +static constexpr std::array<RelocAttrs, 11> relocAttrsArray{{ #define B(x) RelocAttrBits::x - {"UNSIGNED", - B(UNSIGNED) | B(ABSOLUTE) | B(EXTERN) | B(LOCAL) | B(BYTE4) | B(BYTE8)}, - {"SUBTRACTOR", B(SUBTRAHEND) | B(EXTERN) | B(BYTE4) | B(BYTE8)}, - {"BRANCH26", B(PCREL) | B(EXTERN) | B(BRANCH) | B(BYTE4)}, - {"PAGE21", B(PCREL) | B(EXTERN) | B(BYTE4)}, - {"PAGEOFF12", B(ABSOLUTE) | B(EXTERN) | B(BYTE4)}, - {"GOT_LOAD_PAGE21", B(PCREL) | B(EXTERN) | B(GOT) | B(BYTE4)}, - {"GOT_LOAD_PAGEOFF12", - B(ABSOLUTE) | B(EXTERN) | B(GOT) | B(LOAD) | B(BYTE4)}, - {"POINTER_TO_GOT", B(PCREL) | B(EXTERN) | B(GOT) | B(POINTER) | B(BYTE4)}, - {"TLVP_LOAD_PAGE21", B(PCREL) | B(EXTERN) | B(TLV) | B(BYTE4)}, - {"TLVP_LOAD_PAGEOFF12", - B(ABSOLUTE) | B(EXTERN) | B(TLV) | B(LOAD) | B(BYTE4)}, - {"ADDEND", B(ADDEND)}, + {"UNSIGNED", + B(UNSIGNED) | B(ABSOLUTE) | B(EXTERN) | B(LOCAL) | B(BYTE4) | B(BYTE8)}, + {"SUBTRACTOR", B(SUBTRAHEND) | B(EXTERN) | B(BYTE4) | B(BYTE8)}, + {"BRANCH26", B(PCREL) | B(EXTERN) | B(BRANCH) | B(BYTE4)}, + {"PAGE21", B(PCREL) | B(EXTERN) | B(BYTE4)}, + {"PAGEOFF12", B(ABSOLUTE) | B(EXTERN) | B(BYTE4)}, + {"GOT_LOAD_PAGE21", B(PCREL) | B(EXTERN) | B(GOT) | B(BYTE4)}, + {"GOT_LOAD_PAGEOFF12", + B(ABSOLUTE) | B(EXTERN) | B(GOT) | B(LOAD) | B(BYTE4)}, + {"POINTER_TO_GOT", B(PCREL) | B(EXTERN) | B(GOT) | B(POINTER) | B(BYTE4)}, + {"TLVP_LOAD_PAGE21", B(PCREL) | B(EXTERN) | B(TLV) | B(BYTE4)}, + {"TLVP_LOAD_PAGEOFF12", + B(ABSOLUTE) | B(EXTERN) | B(TLV) | B(LOAD) | B(BYTE4)}, + {"ADDEND", B(ADDEND)}, #undef B - }}; - assert(type < relocAttrsArray.size() && "invalid relocation type"); - if (type >= relocAttrsArray.size()) - return invalidRelocAttrs; - return relocAttrsArray[type]; -} +}}; static constexpr uint32_t stubCode[] = { 0x90000010, // 00: adrp x16, __la_symbol_ptr@page @@ -150,6 +142,8 @@ ARM64::ARM64() : ARM64Common(LP64()) { stubHelperHeaderSize = sizeof(stubHelperHeaderCode); stubHelperEntrySize = sizeof(stubHelperEntryCode); + + relocAttrs = {relocAttrsArray.data(), relocAttrsArray.size()}; } namespace { diff --git a/lld/MachO/Arch/ARM64_32.cpp b/lld/MachO/Arch/ARM64_32.cpp index 5be411e40342..4830c2629761 100644 --- a/lld/MachO/Arch/ARM64_32.cpp +++ b/lld/MachO/Arch/ARM64_32.cpp @@ -33,36 +33,29 @@ struct ARM64_32 : ARM64Common { void writeStubHelperHeader(uint8_t *buf) const override; void writeStubHelperEntry(uint8_t *buf, const Symbol &, uint64_t entryAddr) const override; - const RelocAttrs &getRelocAttrs(uint8_t type) const override; }; } // namespace // These are very similar to ARM64's relocation attributes, except that we don't // have the BYTE8 flag set. -const RelocAttrs &ARM64_32::getRelocAttrs(uint8_t type) const { - static const std::array<RelocAttrs, 11> relocAttrsArray{{ +static constexpr std::array<RelocAttrs, 11> relocAttrsArray{{ #define B(x) RelocAttrBits::x - {"UNSIGNED", B(UNSIGNED) | B(ABSOLUTE) | B(EXTERN) | B(LOCAL) | B(BYTE4)}, - {"SUBTRACTOR", B(SUBTRAHEND) | B(EXTERN) | B(BYTE4)}, - {"BRANCH26", B(PCREL) | B(EXTERN) | B(BRANCH) | B(BYTE4)}, - {"PAGE21", B(PCREL) | B(EXTERN) | B(BYTE4)}, - {"PAGEOFF12", B(ABSOLUTE) | B(EXTERN) | B(BYTE4)}, - {"GOT_LOAD_PAGE21", B(PCREL) | B(EXTERN) | B(GOT) | B(BYTE4)}, - {"GOT_LOAD_PAGEOFF12", - B(ABSOLUTE) | B(EXTERN) | B(GOT) | B(LOAD) | B(BYTE4)}, - {"POINTER_TO_GOT", B(PCREL) | B(EXTERN) | B(GOT) | B(POINTER) | B(BYTE4)}, - {"TLVP_LOAD_PAGE21", B(PCREL) | B(EXTERN) | B(TLV) | B(BYTE4)}, - {"TLVP_LOAD_PAGEOFF12", - B(ABSOLUTE) | B(EXTERN) | B(TLV) | B(LOAD) | B(BYTE4)}, - {"ADDEND", B(ADDEND)}, + {"UNSIGNED", B(UNSIGNED) | B(ABSOLUTE) | B(EXTERN) | B(LOCAL) | B(BYTE4)}, + {"SUBTRACTOR", B(SUBTRAHEND) | B(EXTERN) | B(BYTE4)}, + {"BRANCH26", B(PCREL) | B(EXTERN) | B(BRANCH) | B(BYTE4)}, + {"PAGE21", B(PCREL) | B(EXTERN) | B(BYTE4)}, + {"PAGEOFF12", B(ABSOLUTE) | B(EXTERN) | B(BYTE4)}, + {"GOT_LOAD_PAGE21", B(PCREL) | B(EXTERN) | B(GOT) | B(BYTE4)}, + {"GOT_LOAD_PAGEOFF12", + B(ABSOLUTE) | B(EXTERN) | B(GOT) | B(LOAD) | B(BYTE4)}, + {"POINTER_TO_GOT", B(PCREL) | B(EXTERN) | B(GOT) | B(POINTER) | B(BYTE4)}, + {"TLVP_LOAD_PAGE21", B(PCREL) | B(EXTERN) | B(TLV) | B(BYTE4)}, + {"TLVP_LOAD_PAGEOFF12", + B(ABSOLUTE) | B(EXTERN) | B(TLV) | B(LOAD) | B(BYTE4)}, + {"ADDEND", B(ADDEND)}, #undef B - }}; - assert(type < relocAttrsArray.size() && "invalid relocation type"); - if (type >= relocAttrsArray.size()) - return invalidRelocAttrs; - return relocAttrsArray[type]; -} +}}; // The stub code is fairly similar to ARM64's, except that we load pointers into // 32-bit 'w' registers, instead of the 64-bit 'x' ones. @@ -112,6 +105,8 @@ ARM64_32::ARM64_32() : ARM64Common(ILP32()) { stubSize = sizeof(stubCode); stubHelperHeaderSize = sizeof(stubHelperHeaderCode); stubHelperEntrySize = sizeof(stubHelperEntryCode); + + relocAttrs = {relocAttrsArray.data(), relocAttrsArray.size()}; } TargetInfo *macho::createARM64_32TargetInfo() { diff --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp index d2efa5bb3451..b1c46cbab9d4 100644 --- a/lld/MachO/Arch/X86_64.cpp +++ b/lld/MachO/Arch/X86_64.cpp @@ -37,36 +37,28 @@ struct X86_64 : TargetInfo { uint64_t entryAddr) const override; void relaxGotLoad(uint8_t *loc, uint8_t type) const override; - const RelocAttrs &getRelocAttrs(uint8_t type) const override; uint64_t getPageSize() const override { return 4 * 1024; } void handleDtraceReloc(const Symbol *sym, const Reloc &r, uint8_t *loc) const override; }; - } // namespace -const RelocAttrs &X86_64::getRelocAttrs(uint8_t type) const { - static const std::array<RelocAttrs, 10> relocAttrsArray{{ +static constexpr std::array<RelocAttrs, 10> relocAttrsArray{{ #define B(x) RelocAttrBits::x - {"UNSIGNED", - B(UNSIGNED) | B(ABSOLUTE) | B(EXTERN) | B(LOCAL) | B(BYTE4) | B(BYTE8)}, - {"SIGNED", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)}, - {"BRANCH", B(PCREL) | B(EXTERN) | B(BRANCH) | B(BYTE4)}, - {"GOT_LOAD", B(PCREL) | B(EXTERN) | B(GOT) | B(LOAD) | B(BYTE4)}, - {"GOT", B(PCREL) | B(EXTERN) | B(GOT) | B(POINTER) | B(BYTE4)}, - {"SUBTRACTOR", B(SUBTRAHEND) | B(EXTERN) | B(BYTE4) | B(BYTE8)}, - {"SIGNED_1", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)}, - {"SIGNED_2", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)}, - {"SIGNED_4", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)}, - {"TLV", B(PCREL) | B(EXTERN) | B(TLV) | B(LOAD) | B(BYTE4)}, + {"UNSIGNED", + B(UNSIGNED) | B(ABSOLUTE) | B(EXTERN) | B(LOCAL) | B(BYTE4) | B(BYTE8)}, + {"SIGNED", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)}, + {"BRANCH", B(PCREL) | B(EXTERN) | B(BRANCH) | B(BYTE4)}, + {"GOT_LOAD", B(PCREL) | B(EXTERN) | B(GOT) | B(LOAD) | B(BYTE4)}, + {"GOT", B(PCREL) | B(EXTERN) | B(GOT) | B(POINTER) | B(BYTE4)}, + {"SUBTRACTOR", B(SUBTRAHEND) | B(EXTERN) | B(BYTE4) | B(BYTE8)}, + {"SIGNED_1", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)}, + {"SIGNED_2", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)}, + {"SIGNED_4", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)}, + {"TLV", B(PCREL) | B(EXTERN) | B(TLV) | B(LOAD) | B(BYTE4)}, #undef B - }}; - assert(type < relocAttrsArray.size() && "invalid relocation type"); - if (type >= relocAttrsArray.size()) - return invalidRelocAttrs; - return relocAttrsArray[type]; -} +}}; static int pcrelOffset(uint8_t type) { switch (type) { @@ -196,6 +188,8 @@ X86_64::X86_64() : TargetInfo(LP64()) { stubSize = sizeof(stub); stubHelperHeaderSize = sizeof(stubHelperHeader); stubHelperEntrySize = sizeof(stubHelperEntry); + + relocAttrs = {relocAttrsArray.data(), relocAttrsArray.size()}; } TargetInfo *macho::createX86_64TargetInfo() { diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h index ccf71b6535ea..c7e4b4f96782 100644 --- a/lld/MachO/Config.h +++ b/lld/MachO/Config.h @@ -109,7 +109,7 @@ struct Configuration { bool archMultiple = false; bool exportDynamic = false; bool forceLoadObjC = false; - bool forceLoadSwift = false; + bool forceLoadSwift = false; // Only applies to LC_LINKER_OPTIONs. bool staticLink = false; bool implicitDylibs = false; bool isPic = false; @@ -188,6 +188,8 @@ struct Configuration { SymbolPatterns unexportedSymbols; SymbolPatterns whyLive; + std::vector<std::pair<llvm::StringRef, llvm::StringRef>> aliasedSymbols; + SymtabPresence localSymbolsPresence = SymtabPresence::All; SymbolPatterns localSymbolPatterns; @@ -202,13 +204,6 @@ struct Configuration { } }; -// Whether to force-load an archive. -enum class ForceLoad { - Default, // Apply -all_load or -ObjC behaviors if those flags are enabled - Yes, // Always load the archive, regardless of other flags - No, // Never load the archive, regardless of other flags -}; - extern std::unique_ptr<Configuration> config; } // namespace macho diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index abfe381f41e0..454708fad4ef 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -247,9 +247,24 @@ static llvm::CachePruningPolicy getLTOCachePolicy(InputArgList &args) { return CHECK(parseCachePruningPolicy(ltoPolicy), "invalid LTO cache policy"); } -static DenseMap<StringRef, ArchiveFile *> loadedArchives; +// What caused a given library to be loaded. Only relevant for archives. +// Note that this does not tell us *how* we should load the library, i.e. +// whether we should do it lazily or eagerly (AKA force loading). The "how" is +// decided within addFile(). +enum class LoadType { + CommandLine, // Library was passed as a regular CLI argument + CommandLineForce, // Library was passed via `-force_load` + LCLinkerOption, // Library was passed via LC_LINKER_OPTIONS +}; + +struct ArchiveFileInfo { + ArchiveFile *file; + bool isCommandLineLoad; +}; + +static DenseMap<StringRef, ArchiveFileInfo> loadedArchives; -static InputFile *addFile(StringRef path, ForceLoad forceLoadArchive, +static InputFile *addFile(StringRef path, LoadType loadType, bool isLazy = false, bool isExplicit = true, bool isBundleLoader = false) { Optional<MemoryBufferRef> buffer = readFile(path); @@ -261,6 +276,7 @@ static InputFile *addFile(StringRef path, ForceLoad forceLoadArchive, file_magic magic = identify_magic(mbref.getBuffer()); switch (magic) { case file_magic::archive: { + bool isCommandLineLoad = loadType != LoadType::LCLinkerOption; // Avoid loading archives twice. If the archives are being force-loaded, // loading them twice would create duplicate symbol errors. In the // non-force-loading case, this is just a minor performance optimization. @@ -268,23 +284,45 @@ static InputFile *addFile(StringRef path, ForceLoad forceLoadArchive, // loadArchiveMember() call below may recursively call addFile() and // invalidate this reference. auto entry = loadedArchives.find(path); - if (entry != loadedArchives.end()) - return entry->second; - std::unique_ptr<object::Archive> archive = CHECK( - object::Archive::create(mbref), path + ": failed to parse archive"); + ArchiveFile *file; + if (entry == loadedArchives.end()) { + // No cached archive, we need to create a new one + std::unique_ptr<object::Archive> archive = CHECK( + object::Archive::create(mbref), path + ": failed to parse archive"); - if (!archive->isEmpty() && !archive->hasSymbolTable()) - error(path + ": archive has no index; run ranlib to add one"); + if (!archive->isEmpty() && !archive->hasSymbolTable()) + error(path + ": archive has no index; run ranlib to add one"); + file = make<ArchiveFile>(std::move(archive)); + } else { + file = entry->second.file; + // Command-line loads take precedence. If file is previously loaded via + // command line, or is loaded via LC_LINKER_OPTION and being loaded via + // LC_LINKER_OPTION again, using the cached archive is enough. + if (entry->second.isCommandLineLoad || !isCommandLineLoad) + return file; + } - auto *file = make<ArchiveFile>(std::move(archive)); - if ((forceLoadArchive == ForceLoad::Default && config->allLoad) || - forceLoadArchive == ForceLoad::Yes) { + bool isLCLinkerForceLoad = loadType == LoadType::LCLinkerOption && + config->forceLoadSwift && + path::filename(path).startswith("libswift"); + if ((isCommandLineLoad && config->allLoad) || + loadType == LoadType::CommandLineForce || isLCLinkerForceLoad) { if (Optional<MemoryBufferRef> buffer = readFile(path)) { Error e = Error::success(); for (const object::Archive::Child &c : file->getArchive().children(e)) { - StringRef reason = - forceLoadArchive == ForceLoad::Yes ? "-force_load" : "-all_load"; + StringRef reason; + switch (loadType) { + case LoadType::LCLinkerOption: + reason = "LC_LINKER_OPTION"; + break; + case LoadType::CommandLineForce: + reason = "-force_load"; + break; + case LoadType::CommandLine: + reason = "-all_load"; + break; + } if (Error e = file->fetch(c, reason)) error(toString(file) + ": " + reason + " failed to load archive member: " + toString(std::move(e))); @@ -293,8 +331,7 @@ static InputFile *addFile(StringRef path, ForceLoad forceLoadArchive, error(toString(file) + ": Archive::children failed: " + toString(std::move(e))); } - } else if (forceLoadArchive == ForceLoad::Default && - config->forceLoadObjC) { + } else if (isCommandLineLoad && config->forceLoadObjC) { for (const object::Archive::Symbol &sym : file->getArchive().symbols()) if (sym.getName().startswith(objc::klass)) file->fetch(sym); @@ -318,7 +355,8 @@ static InputFile *addFile(StringRef path, ForceLoad forceLoadArchive, } file->addLazySymbols(); - newFile = loadedArchives[path] = file; + loadedArchives[path] = ArchiveFileInfo{file, isCommandLineLoad}; + newFile = file; break; } case file_magic::macho_object: @@ -368,11 +406,10 @@ static InputFile *addFile(StringRef path, ForceLoad forceLoadArchive, } static void addLibrary(StringRef name, bool isNeeded, bool isWeak, - bool isReexport, bool isExplicit, - ForceLoad forceLoadArchive) { + bool isReexport, bool isExplicit, LoadType loadType) { if (Optional<StringRef> path = findLibrary(name)) { if (auto *dylibFile = dyn_cast_or_null<DylibFile>( - addFile(*path, forceLoadArchive, /*isLazy=*/false, isExplicit))) { + addFile(*path, loadType, /*isLazy=*/false, isExplicit))) { if (isNeeded) dylibFile->forceNeeded = true; if (isWeak) @@ -389,14 +426,13 @@ static void addLibrary(StringRef name, bool isNeeded, bool isWeak, static DenseSet<StringRef> loadedObjectFrameworks; static void addFramework(StringRef name, bool isNeeded, bool isWeak, - bool isReexport, bool isExplicit, - ForceLoad forceLoadArchive) { + bool isReexport, bool isExplicit, LoadType loadType) { if (Optional<StringRef> path = findFramework(name)) { if (loadedObjectFrameworks.contains(*path)) return; InputFile *file = - addFile(*path, forceLoadArchive, /*isLazy=*/false, isExplicit); + addFile(*path, loadType, /*isLazy=*/false, isExplicit, false); if (auto *dylibFile = dyn_cast_or_null<DylibFile>(file)) { if (isNeeded) dylibFile->forceNeeded = true; @@ -436,15 +472,14 @@ void macho::parseLCLinkerOption(InputFile *f, unsigned argc, StringRef data) { unsigned i = 0; StringRef arg = argv[i]; if (arg.consume_front("-l")) { - ForceLoad forceLoadArchive = - config->forceLoadSwift && arg.startswith("swift") ? ForceLoad::Yes - : ForceLoad::No; addLibrary(arg, /*isNeeded=*/false, /*isWeak=*/false, - /*isReexport=*/false, /*isExplicit=*/false, forceLoadArchive); + /*isReexport=*/false, /*isExplicit=*/false, + LoadType::LCLinkerOption); } else if (arg == "-framework") { StringRef name = argv[++i]; addFramework(name, /*isNeeded=*/false, /*isWeak=*/false, - /*isReexport=*/false, /*isExplicit=*/false, ForceLoad::No); + /*isReexport=*/false, /*isExplicit=*/false, + LoadType::LCLinkerOption); } else { error(arg + " is not allowed in LC_LINKER_OPTION"); } @@ -456,7 +491,7 @@ static void addFileList(StringRef path, bool isLazy) { return; MemoryBufferRef mbref = *buffer; for (StringRef path : args::getLines(mbref)) - addFile(rerootPath(path), ForceLoad::Default, isLazy); + addFile(rerootPath(path), LoadType::CommandLine, isLazy); } // We expect sub-library names of the form "libfoo", which will match a dylib @@ -468,8 +503,7 @@ static bool markReexport(StringRef searchName, ArrayRef<StringRef> extensions) { if (auto *dylibFile = dyn_cast<DylibFile>(file)) { StringRef filename = path::filename(dylibFile->getName()); if (filename.consume_front(searchName) && - (filename.empty() || - find(extensions, filename) != extensions.end())) { + (filename.empty() || llvm::is_contained(extensions, filename))) { dylibFile->reexport = true; return true; } @@ -552,7 +586,7 @@ static void initializeSectionRenameMap() { section_names::objcCatList, section_names::objcNonLazyCatList, section_names::objcProtoList, - section_names::objcImageInfo}; + section_names::objCImageInfo}; for (StringRef s : v) config->sectionRenameMap[{segment_names::data, s}] = { segment_names::dataConst, s}; @@ -976,30 +1010,30 @@ static void createFiles(const InputArgList &args) { switch (opt.getID()) { case OPT_INPUT: - addFile(rerootPath(arg->getValue()), ForceLoad::Default, isLazy); + addFile(rerootPath(arg->getValue()), LoadType::CommandLine, isLazy); break; case OPT_needed_library: if (auto *dylibFile = dyn_cast_or_null<DylibFile>( - addFile(rerootPath(arg->getValue()), ForceLoad::Default))) + addFile(rerootPath(arg->getValue()), LoadType::CommandLine))) dylibFile->forceNeeded = true; break; case OPT_reexport_library: if (auto *dylibFile = dyn_cast_or_null<DylibFile>( - addFile(rerootPath(arg->getValue()), ForceLoad::Default))) { + addFile(rerootPath(arg->getValue()), LoadType::CommandLine))) { config->hasReexports = true; dylibFile->reexport = true; } break; case OPT_weak_library: if (auto *dylibFile = dyn_cast_or_null<DylibFile>( - addFile(rerootPath(arg->getValue()), ForceLoad::Default))) + addFile(rerootPath(arg->getValue()), LoadType::CommandLine))) dylibFile->forceWeakImport = true; break; case OPT_filelist: addFileList(arg->getValue(), isLazy); break; case OPT_force_load: - addFile(rerootPath(arg->getValue()), ForceLoad::Yes); + addFile(rerootPath(arg->getValue()), LoadType::CommandLineForce); break; case OPT_l: case OPT_needed_l: @@ -1007,7 +1041,7 @@ static void createFiles(const InputArgList &args) { case OPT_weak_l: addLibrary(arg->getValue(), opt.getID() == OPT_needed_l, opt.getID() == OPT_weak_l, opt.getID() == OPT_reexport_l, - /*isExplicit=*/true, ForceLoad::Default); + /*isExplicit=*/true, LoadType::CommandLine); break; case OPT_framework: case OPT_needed_framework: @@ -1016,7 +1050,7 @@ static void createFiles(const InputArgList &args) { addFramework(arg->getValue(), opt.getID() == OPT_needed_framework, opt.getID() == OPT_weak_framework, opt.getID() == OPT_reexport_framework, /*isExplicit=*/true, - ForceLoad::Default); + LoadType::CommandLine); break; case OPT_start_lib: if (isLazy) @@ -1068,6 +1102,8 @@ static void gatherInputSections() { } } } + if (!file->objCImageInfo.empty()) + in.objCImageInfo->addFile(file); } assert(inputOrder <= UnspecifiedInputOrder); } @@ -1264,9 +1300,8 @@ bool macho::link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS, if (const Arg *arg = args.getLastArg(OPT_bundle_loader)) { if (config->outputType != MH_BUNDLE) error("-bundle_loader can only be used with MachO bundle output"); - addFile(arg->getValue(), ForceLoad::Default, /*isLazy=*/false, - /*isExplicit=*/false, - /*isBundleLoader=*/true); + addFile(arg->getValue(), LoadType::CommandLine, /*isLazy=*/false, + /*isExplicit=*/false, /*isBundleLoader=*/true); } if (const Arg *arg = args.getLastArg(OPT_umbrella)) { if (config->outputType != MH_DYLIB) @@ -1306,6 +1341,11 @@ bool macho::link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS, OPT_call_graph_profile_sort, OPT_no_call_graph_profile_sort, true); config->printSymbolOrder = args.getLastArgValue(OPT_print_symbol_order); + for (const Arg *arg : args.filtered(OPT_alias)) { + config->aliasedSymbols.push_back( + std::make_pair(arg->getValue(0), arg->getValue(1))); + } + // FIXME: Add a commandline flag for this too. config->zeroModTime = getenv("ZERO_AR_DATE"); @@ -1558,6 +1598,18 @@ bool macho::link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS, createSyntheticSections(); createSyntheticSymbols(); + for (const auto &pair : config->aliasedSymbols) { + if (const auto &sym = symtab->find(pair.first)) { + if (const auto &defined = dyn_cast<Defined>(sym)) { + symtab->aliasDefined(defined, pair.second); + continue; + } + } + + warn("undefined base symbol '" + pair.first + "' for alias '" + + pair.second + "'\n"); + } + if (config->hasExplicitExports) { parallelForEach(symtab->getSymbols(), [](Symbol *sym) { if (auto *defined = dyn_cast<Defined>(sym)) { @@ -1616,7 +1668,9 @@ bool macho::link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS, if (config->icfLevel != ICFLevel::none) { if (config->icfLevel == ICFLevel::safe) markAddrSigSymbols(); - foldIdenticalSections(); + foldIdenticalSections(/*onlyCfStrings=*/false); + } else if (config->dedupLiterals) { + foldIdenticalSections(/*onlyCfStrings=*/true); } // Write to an output file. diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp index ad029142681f..d06fbc6db840 100644 --- a/lld/MachO/ICF.cpp +++ b/lld/MachO/ICF.cpp @@ -384,23 +384,18 @@ void macho::markAddrSigSymbols() { continue; assert(addrSigSection->subsections.size() == 1); - Subsection *subSection = &addrSigSection->subsections[0]; - ArrayRef<unsigned char> &contents = subSection->isec->data; - - const uint8_t *pData = contents.begin(); - while (pData != contents.end()) { - unsigned size; - const char *err; - uint32_t symIndex = decodeULEB128(pData, &size, contents.end(), &err); - if (err) - fatal(toString(file) + ": could not decode addrsig section: " + err); - markSymAsAddrSig(obj->symbols[symIndex]); - pData += size; + const InputSection *isec = addrSigSection->subsections[0].isec; + + for (const Reloc &r : isec->relocs) { + if (auto *sym = r.referent.dyn_cast<Symbol *>()) + markSymAsAddrSig(sym); + else + error(toString(isec) + ": unexpected section relocation"); } } } -void macho::foldIdenticalSections() { +void macho::foldIdenticalSections(bool onlyCfStrings) { TimeTraceScope timeScope("Fold Identical Code Sections"); // The ICF equivalence-class segregation algorithm relies on pre-computed // hashes of InputSection::data for the ConcatOutputSection::inputs and all @@ -420,10 +415,12 @@ void macho::foldIdenticalSections() { uint64_t icfUniqueID = inputSections.size(); for (ConcatInputSection *isec : inputSections) { // FIXME: consider non-code __text sections as hashable? - bool isHashable = (isCodeSection(isec) || isCfStringSection(isec) || - isClassRefsSection(isec)) && - !isec->keepUnique && !isec->shouldOmitFromOutput() && - sectionType(isec->getFlags()) == MachO::S_REGULAR; + bool isHashable = + (!onlyCfStrings || isCfStringSection(isec)) && + (isCodeSection(isec) || isCfStringSection(isec) || + isClassRefsSection(isec) || isGccExceptTabSection(isec)) && + !isec->keepUnique && !isec->shouldOmitFromOutput() && + sectionType(isec->getFlags()) == MachO::S_REGULAR; if (isHashable) { hashable.push_back(isec); for (Defined *d : isec->symbols) diff --git a/lld/MachO/ICF.h b/lld/MachO/ICF.h index a287692d7ffa..b7e695d81d34 100644 --- a/lld/MachO/ICF.h +++ b/lld/MachO/ICF.h @@ -19,7 +19,7 @@ class Symbol; void markAddrSigSymbols(); void markSymAsAddrSig(Symbol *s); -void foldIdenticalSections(); +void foldIdenticalSections(bool onlyCfStrings); } // namespace macho } // namespace lld diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp index fda6900edabe..e3bf553e5334 100644 --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -263,11 +263,15 @@ static Optional<size_t> getRecordSize(StringRef segname, StringRef name) { if (segname == segment_names::ld) return target->wordSize == 8 ? 32 : 20; } - if (config->icfLevel == ICFLevel::none) + if (!config->dedupLiterals) return {}; if (name == section_names::cfString && segname == segment_names::data) return target->wordSize == 8 ? 32 : 16; + + if (config->icfLevel == ICFLevel::none) + return {}; + if (name == section_names::objcClassRefs && segname == segment_names::data) return target->wordSize; return {}; @@ -359,6 +363,9 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) { // have the same name without causing duplicate symbol errors. To avoid // spurious duplicate symbol errors, we do not parse these sections. // TODO: Evaluate whether the bitcode metadata is needed. + } else if (name == section_names::objCImageInfo && + segname == segment_names::data) { + objCImageInfo = data; } else { if (name == section_names::addrSig) addrSigSection = sections.back(); @@ -556,7 +563,7 @@ void ObjFile::parseOptimizationHints(ArrayRef<uint8_t> data) { if (section == sections.end()) return; ++subsection; - if (subsection == (*section)->subsections.end()) { + while (subsection == (*section)->subsections.end()) { ++section; if (section == sections.end()) return; @@ -578,6 +585,7 @@ void ObjFile::parseOptimizationHints(ArrayRef<uint8_t> data) { if (section == sections.end()) break; updateAddr(); + assert(hintStart->offset0 >= subsectionBase); } } @@ -899,7 +907,6 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders, if (sym.n_type & N_STAB) continue; - StringRef name = strtab + sym.n_strx; if ((sym.n_type & N_TYPE) == N_SECT) { Subsections &subsections = sections[sym.n_sect - 1]->subsections; // parseSections() may have chosen not to parse this section. @@ -909,7 +916,7 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders, } else if (isUndef(sym)) { undefineds.push_back(i); } else { - symbols[i] = parseNonSectionSymbol(sym, name); + symbols[i] = parseNonSectionSymbol(sym, StringRef(strtab + sym.n_strx)); } } @@ -1186,14 +1193,27 @@ ArrayRef<data_in_code_entry> ObjFile::getDataInCode() const { void ObjFile::registerCompactUnwind(Section &compactUnwindSection) { for (const Subsection &subsection : compactUnwindSection.subsections) { ConcatInputSection *isec = cast<ConcatInputSection>(subsection.isec); - // Hack!! Since each CUE contains a different function address, if ICF - // operated naively and compared the entire contents of each CUE, entries - // with identical unwind info but belonging to different functions would - // never be considered equivalent. To work around this problem, we slice - // away the function address here. (Note that we do not adjust the offsets - // of the corresponding relocations.) We rely on `relocateCompactUnwind()` - // to correctly handle these truncated input sections. - isec->data = isec->data.slice(target->wordSize); + // Hack!! Each compact unwind entry (CUE) has its UNSIGNED relocations embed + // their addends in its data. Thus if ICF operated naively and compared the + // entire contents of each CUE, entries with identical unwind info but e.g. + // belonging to different functions would never be considered equivalent. To + // work around this problem, we remove some parts of the data containing the + // embedded addends. In particular, we remove the function address and LSDA + // pointers. Since these locations are at the start and end of the entry, + // we can do this using a simple, efficient slice rather than performing a + // copy. We are not losing any information here because the embedded + // addends have already been parsed in the corresponding Reloc structs. + // + // Removing these pointers would not be safe if they were pointers to + // absolute symbols. In that case, there would be no corresponding + // relocation. However, (AFAIK) MC cannot emit references to absolute + // symbols for either the function address or the LSDA. However, it *can* do + // so for the personality pointer, so we are not slicing that field away. + // + // Note that we do not adjust the offsets of the corresponding relocations; + // instead, we rely on `relocateCompactUnwind()` to correctly handle these + // truncated input sections. + isec->data = isec->data.slice(target->wordSize, 8 + target->wordSize); uint32_t encoding = read32le(isec->data.data() + sizeof(uint32_t)); // llvm-mc omits CU entries for functions that need DWARF encoding, but // `ld -r` doesn't. We can ignore them because we will re-synthesize these @@ -1240,11 +1260,23 @@ void ObjFile::registerCompactUnwind(Section &compactUnwindSection) { continue; } d->unwindEntry = isec; - // Since we've sliced away the functionAddress, we should remove the - // corresponding relocation too. Given that clang emits relocations in - // reverse order of address, this relocation should be at the end of the - // vector for most of our input object files, so this is typically an O(1) - // operation. + // Now that the symbol points to the unwind entry, we can remove the reloc + // that points from the unwind entry back to the symbol. + // + // First, the symbol keeps the unwind entry alive (and not vice versa), so + // this keeps dead-stripping simple. + // + // Moreover, it reduces the work that ICF needs to do to figure out if + // functions with unwind info are foldable. + // + // However, this does make it possible for ICF to fold CUEs that point to + // distinct functions (if the CUEs are otherwise identical). + // UnwindInfoSection takes care of this by re-duplicating the CUEs so that + // each one can hold a distinct functionAddress value. + // + // Given that clang emits relocations in reverse order of address, this + // relocation should be at the end of the vector for most of our input + // object files, so this erase() is typically an O(1) operation. it = isec->relocs.erase(it); } } @@ -1500,6 +1532,14 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) { Defined *funcSym; if (funcAddrRelocIt != isec->relocs.end()) { funcSym = targetSymFromCanonicalSubtractor(isec, funcAddrRelocIt); + // Canonicalize the symbol. If there are multiple symbols at the same + // address, we want both `registerEhFrame` and `registerCompactUnwind` + // to register the unwind entry under same symbol. + // This is not particularly efficient, but we should run into this case + // infrequently (only when handling the output of `ld -r`). + if (funcSym->isec) + funcSym = findSymbolAtOffset(cast<ConcatInputSection>(funcSym->isec), + funcSym->value); } else { funcSym = findSymbolAtAddress(sections, funcAddr); ehRelocator.makePcRel(funcAddrOff, funcSym, target->p2WordSize); diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h index efddc1c46782..5deb05272a6b 100644 --- a/lld/MachO/InputFiles.h +++ b/lld/MachO/InputFiles.h @@ -120,6 +120,7 @@ public: std::vector<Symbol *> symbols; std::vector<Section *> sections; + ArrayRef<uint8_t> objCImageInfo; // If not empty, this stores the name of the archive containing this file. // We use this string for creating error messages. diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp index df312525df61..76b11d9da4f8 100644 --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -345,6 +345,11 @@ bool macho::isEhFrameSection(const InputSection *isec) { isec->getSegName() == segment_names::text; } +bool macho::isGccExceptTabSection(const InputSection *isec) { + return isec->getName() == section_names::gccExceptTab && + isec->getSegName() == segment_names::text; +} + std::string lld::toString(const InputSection *isec) { return (toString(isec->getFile()) + ":(" + isec->getName() + ")").str(); } diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h index e8710c25f534..afe76c56b536 100644 --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -281,6 +281,7 @@ bool isCodeSection(const InputSection *); bool isCfStringSection(const InputSection *); bool isClassRefsSection(const InputSection *); bool isEhFrameSection(const InputSection *); +bool isGccExceptTabSection(const InputSection *); extern std::vector<ConcatInputSection *> inputSections; @@ -320,7 +321,7 @@ constexpr const char objcCatList[] = "__objc_catlist"; constexpr const char objcClassList[] = "__objc_classlist"; constexpr const char objcClassRefs[] = "__objc_classrefs"; constexpr const char objcConst[] = "__objc_const"; -constexpr const char objcImageInfo[] = "__objc_imageinfo"; +constexpr const char objCImageInfo[] = "__objc_imageinfo"; constexpr const char objcNonLazyCatList[] = "__objc_nlcatlist"; constexpr const char objcNonLazyClassList[] = "__objc_nlclslist"; constexpr const char objcProtoList[] = "__objc_protolist"; diff --git a/lld/MachO/LTO.cpp b/lld/MachO/LTO.cpp index e87c4c413106..0b76216d24b5 100644 --- a/lld/MachO/LTO.cpp +++ b/lld/MachO/LTO.cpp @@ -138,8 +138,22 @@ std::vector<ObjFile *> BitcodeCompiler::compile() { saveBuffer(buf[i], config->outputFile + Twine(i) + ".lto.o"); } - if (!config->ltoObjPath.empty()) - fs::create_directories(config->ltoObjPath); + // In ThinLTO mode, Clang passes a temporary directory in -object_path_lto, + // while the argument is a single file in FullLTO mode. + bool objPathIsDir = true; + if (!config->ltoObjPath.empty()) { + if (std::error_code ec = fs::create_directories(config->ltoObjPath)) + fatal("cannot create LTO object path " + config->ltoObjPath + ": " + + ec.message()); + + if (!fs::is_directory(config->ltoObjPath)) { + objPathIsDir = false; + unsigned objCount = + count_if(buf, [](const SmallString<0> &b) { return !b.empty(); }); + if (objCount > 1) + fatal("-object_path_lto must specify a directory when using ThinLTO"); + } + } std::vector<ObjFile *> ret; for (unsigned i = 0; i != maxTasks; ++i) { @@ -149,9 +163,10 @@ std::vector<ObjFile *> BitcodeCompiler::compile() { uint32_t modTime = 0; if (!config->ltoObjPath.empty()) { filePath = config->ltoObjPath; - path::append(filePath, Twine(i) + "." + - getArchitectureName(config->arch()) + - ".lto.o"); + if (objPathIsDir) + path::append(filePath, Twine(i) + "." + + getArchitectureName(config->arch()) + + ".lto.o"); saveBuffer(buf[i], filePath); modTime = getModTime(filePath); } diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td index 9b57f8a0bd49..b3d74a83f582 100644 --- a/lld/MachO/Options.td +++ b/lld/MachO/Options.td @@ -505,7 +505,6 @@ def reexported_symbols_list : Separate<["-"], "reexported_symbols_list">, def alias : MultiArg<["-"], "alias", 2>, MetaVarName<"<symbol_name> <alternate_name>">, HelpText<"Create a symbol alias with default global visibility">, - Flags<[HelpHidden]>, Group<grp_resolve>; def alias_list : Separate<["-"], "alias_list">, MetaVarName<"<file>">, diff --git a/lld/MachO/SectionPriorities.cpp b/lld/MachO/SectionPriorities.cpp index ac4878343ac0..fbc7796c3f8a 100644 --- a/lld/MachO/SectionPriorities.cpp +++ b/lld/MachO/SectionPriorities.cpp @@ -370,7 +370,7 @@ macho::PriorityBuilder::buildInputSectionPriorities() { if (!symbolPriority) return; size_t &priority = sectionPriorities[sym->isec]; - priority = std::max(priority, symbolPriority.getValue()); + priority = std::max(priority, symbolPriority.value()); }; // TODO: Make sure this handles weak symbols correctly. diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp index 7bda1d13069f..3667a7137291 100644 --- a/lld/MachO/SymbolTable.cpp +++ b/lld/MachO/SymbolTable.cpp @@ -117,6 +117,13 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file, return defined; } +Defined *SymbolTable::aliasDefined(Defined *src, StringRef target) { + return addDefined(target, src->getFile(), src->isec, src->value, src->size, + src->isWeakDef(), src->privateExtern, src->thumb, + src->referencedDynamically, src->noDeadStrip, + src->weakDefCanBeHidden); +} + Symbol *SymbolTable::addUndefined(StringRef name, InputFile *file, bool isWeakRef) { Symbol *s; diff --git a/lld/MachO/SymbolTable.h b/lld/MachO/SymbolTable.h index 0ecfa6dcd093..1b090105e0ca 100644 --- a/lld/MachO/SymbolTable.h +++ b/lld/MachO/SymbolTable.h @@ -43,6 +43,8 @@ public: bool isReferencedDynamically, bool noDeadStrip, bool isWeakDefCanBeHidden); + Defined *aliasDefined(Defined *src, StringRef target); + Symbol *addUndefined(StringRef name, InputFile *, bool isWeakRef); Symbol *addCommon(StringRef name, InputFile *, uint64_t size, uint32_t align, diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index 0a57de319994..8e7ca520336c 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -57,7 +57,7 @@ static void sha256(const uint8_t *data, size_t len, uint8_t *output) { #else ArrayRef<uint8_t> block(data, len); std::array<uint8_t, 32> hash = SHA256::hash(block); - assert(hash.size() == CodeSignatureSection::hashSize); + static_assert(hash.size() == CodeSignatureSection::hashSize, ""); memcpy(output, hash.data(), hash.size()); #endif } @@ -164,62 +164,108 @@ RebaseSection::RebaseSection() : LinkEditSection(segment_names::linkEdit, section_names::rebase) {} namespace { -struct Rebase { - OutputSegment *segment = nullptr; - uint64_t offset = 0; - uint64_t consecutiveCount = 0; +struct RebaseState { + uint64_t sequenceLength; + uint64_t skipLength; }; } // namespace -// Rebase opcodes allow us to describe a contiguous sequence of rebase location -// using a single DO_REBASE opcode. To take advantage of it, we delay emitting -// `DO_REBASE` until we have reached the end of a contiguous sequence. -static void encodeDoRebase(Rebase &rebase, raw_svector_ostream &os) { - assert(rebase.consecutiveCount != 0); - if (rebase.consecutiveCount <= REBASE_IMMEDIATE_MASK) { - os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_IMM_TIMES | - rebase.consecutiveCount); +static void emitIncrement(uint64_t incr, raw_svector_ostream &os) { + assert(incr != 0); + + if ((incr >> target->p2WordSize) <= REBASE_IMMEDIATE_MASK && + (incr % target->wordSize) == 0) { + os << static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_IMM_SCALED | + (incr >> target->p2WordSize)); } else { - os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ULEB_TIMES); - encodeULEB128(rebase.consecutiveCount, os); + os << static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_ULEB); + encodeULEB128(incr, os); } - rebase.consecutiveCount = 0; } -static void encodeRebase(const OutputSection *osec, uint64_t outSecOff, - Rebase &lastRebase, raw_svector_ostream &os) { - OutputSegment *seg = osec->parent; - uint64_t offset = osec->getSegmentOffset() + outSecOff; - if (lastRebase.segment != seg || lastRebase.offset != offset) { - if (lastRebase.consecutiveCount != 0) - encodeDoRebase(lastRebase, os); - - if (lastRebase.segment != seg) { - os << static_cast<uint8_t>(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | - seg->index); - encodeULEB128(offset, os); - lastRebase.segment = seg; - lastRebase.offset = offset; +static void flushRebase(const RebaseState &state, raw_svector_ostream &os) { + assert(state.sequenceLength > 0); + + if (state.skipLength == target->wordSize) { + if (state.sequenceLength <= REBASE_IMMEDIATE_MASK) { + os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_IMM_TIMES | + state.sequenceLength); } else { - assert(lastRebase.offset != offset); - uint64_t delta = offset - lastRebase.offset; - // For unknown reasons, ld64 checks if the scaled offset is strictly less - // than REBASE_IMMEDIATE_MASK instead of allowing equality. We match this - // behavior as a precaution. - if ((delta % target->wordSize == 0) && - (delta / target->wordSize < REBASE_IMMEDIATE_MASK)) { - os << static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_IMM_SCALED | - (delta / target->wordSize)); - } else { - os << static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_ULEB); - encodeULEB128(delta, os); - } - lastRebase.offset = offset; + os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ULEB_TIMES); + encodeULEB128(state.sequenceLength, os); + } + } else if (state.sequenceLength == 1) { + os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB); + encodeULEB128(state.skipLength - target->wordSize, os); + } else { + os << static_cast<uint8_t>( + REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB); + encodeULEB128(state.sequenceLength, os); + encodeULEB128(state.skipLength - target->wordSize, os); + } +} + +// Rebases are communicated to dyld using a bytecode, whose opcodes cause the +// memory location at a specific address to be rebased and/or the address to be +// incremented. +// +// Opcode REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB is the most generic +// one, encoding a series of evenly spaced addresses. This algorithm works by +// splitting up the sorted list of addresses into such chunks. If the locations +// are consecutive or the sequence consists of a single location, flushRebase +// will use a smaller, more specialized encoding. +static void encodeRebases(const OutputSegment *seg, + MutableArrayRef<Location> locations, + raw_svector_ostream &os) { + // dyld operates on segments. Translate section offsets into segment offsets. + for (Location &loc : locations) + loc.offset = + loc.isec->parent->getSegmentOffset() + loc.isec->getOffset(loc.offset); + // The algorithm assumes that locations are unique. + Location *end = + llvm::unique(locations, [](const Location &a, const Location &b) { + return a.offset == b.offset; + }); + size_t count = end - locations.begin(); + + os << static_cast<uint8_t>(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | + seg->index); + assert(!locations.empty()); + uint64_t offset = locations[0].offset; + encodeULEB128(offset, os); + + RebaseState state{1, target->wordSize}; + + for (size_t i = 1; i < count; ++i) { + offset = locations[i].offset; + + uint64_t skip = offset - locations[i - 1].offset; + assert(skip != 0 && "duplicate locations should have been weeded out"); + + if (skip == state.skipLength) { + ++state.sequenceLength; + } else if (state.sequenceLength == 1) { + ++state.sequenceLength; + state.skipLength = skip; + } else if (skip < state.skipLength) { + // The address is lower than what the rebase pointer would be if the last + // location would be part of a sequence. We start a new sequence from the + // previous location. + --state.sequenceLength; + flushRebase(state, os); + + state.sequenceLength = 2; + state.skipLength = skip; + } else { + // The address is at some positive offset from the rebase pointer. We + // start a new sequence which begins with the current location. + flushRebase(state, os); + emitIncrement(skip - state.skipLength, os); + state.sequenceLength = 1; + state.skipLength = target->wordSize; } } - ++lastRebase.consecutiveCount; - // DO_REBASE causes dyld to both perform the binding and increment the offset - lastRebase.offset += target->wordSize; + flushRebase(state, os); } void RebaseSection::finalizeContents() { @@ -227,19 +273,20 @@ void RebaseSection::finalizeContents() { return; raw_svector_ostream os{contents}; - Rebase lastRebase; - os << static_cast<uint8_t>(REBASE_OPCODE_SET_TYPE_IMM | REBASE_TYPE_POINTER); llvm::sort(locations, [](const Location &a, const Location &b) { return a.isec->getVA(a.offset) < b.isec->getVA(b.offset); }); - for (const Location &loc : locations) - encodeRebase(loc.isec->parent, loc.isec->getOffset(loc.offset), lastRebase, - os); - if (lastRebase.consecutiveCount != 0) - encodeDoRebase(lastRebase, os); + for (size_t i = 0, count = locations.size(); i < count;) { + const OutputSegment *seg = locations[i].isec->parent->parent; + size_t j = i + 1; + while (j < count && locations[j].isec->parent->parent == seg) + ++j; + encodeRebases(seg, {locations.data() + i, locations.data() + j}, os); + i = j; + } os << static_cast<uint8_t>(REBASE_OPCODE_DONE); } @@ -1574,6 +1621,86 @@ void WordLiteralSection::writeTo(uint8_t *buf) const { memcpy(buf + p.second * 4, &p.first, 4); } +ObjCImageInfoSection::ObjCImageInfoSection() + : SyntheticSection(segment_names::data, section_names::objCImageInfo) {} + +ObjCImageInfoSection::ImageInfo +ObjCImageInfoSection::parseImageInfo(const InputFile *file) { + ImageInfo info; + ArrayRef<uint8_t> data = file->objCImageInfo; + // The image info struct has the following layout: + // struct { + // uint32_t version; + // uint32_t flags; + // }; + if (data.size() < 8) { + warn(toString(file) + ": invalid __objc_imageinfo size"); + return info; + } + + auto *buf = reinterpret_cast<const uint32_t *>(data.data()); + if (read32le(buf) != 0) { + warn(toString(file) + ": invalid __objc_imageinfo version"); + return info; + } + + uint32_t flags = read32le(buf + 1); + info.swiftVersion = (flags >> 8) & 0xff; + info.hasCategoryClassProperties = flags & 0x40; + return info; +} + +static std::string swiftVersionString(uint8_t version) { + switch (version) { + case 1: + return "1.0"; + case 2: + return "1.1"; + case 3: + return "2.0"; + case 4: + return "3.0"; + case 5: + return "4.0"; + default: + return ("0x" + Twine::utohexstr(version)).str(); + } +} + +// Validate each object file's __objc_imageinfo and use them to generate the +// image info for the output binary. Only two pieces of info are relevant: +// 1. The Swift version (should be identical across inputs) +// 2. `bool hasCategoryClassProperties` (true only if true for all inputs) +void ObjCImageInfoSection::finalizeContents() { + assert(files.size() != 0); // should have already been checked via isNeeded() + + info.hasCategoryClassProperties = true; + const InputFile *firstFile; + for (auto file : files) { + ImageInfo inputInfo = parseImageInfo(file); + info.hasCategoryClassProperties &= inputInfo.hasCategoryClassProperties; + + // swiftVersion 0 means no Swift is present, so no version checking required + if (inputInfo.swiftVersion == 0) + continue; + + if (info.swiftVersion != 0 && info.swiftVersion != inputInfo.swiftVersion) { + error("Swift version mismatch: " + toString(firstFile) + " has version " + + swiftVersionString(info.swiftVersion) + " but " + toString(file) + + " has version " + swiftVersionString(inputInfo.swiftVersion)); + } else { + info.swiftVersion = inputInfo.swiftVersion; + firstFile = file; + } + } +} + +void ObjCImageInfoSection::writeTo(uint8_t *buf) const { + uint32_t flags = info.hasCategoryClassProperties ? 0x40 : 0x0; + flags |= info.swiftVersion << 8; + write32le(buf + 4, flags); +} + void macho::createSyntheticSymbols() { auto addHeaderSymbol = [](const char *name) { symtab->addSynthetic(name, in.header->isec, /*value=*/0, diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h index 4f7d5288c9dc..afdd46d8a7de 100644 --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -19,6 +19,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SetVector.h" #include "llvm/MC/StringTableBuilder.h" #include "llvm/Support/MathExtras.h" @@ -600,6 +601,27 @@ private: std::unordered_map<uint32_t, uint64_t> literal4Map; }; +class ObjCImageInfoSection final : public SyntheticSection { +public: + ObjCImageInfoSection(); + bool isNeeded() const override { return !files.empty(); } + uint64_t getSize() const override { return 8; } + void addFile(const InputFile *file) { + assert(!file->objCImageInfo.empty()); + files.push_back(file); + } + void finalizeContents(); + void writeTo(uint8_t *buf) const override; + +private: + struct ImageInfo { + uint8_t swiftVersion = 0; + bool hasCategoryClassProperties = false; + } info; + static ImageInfo parseImageInfo(const InputFile *); + std::vector<const InputFile *> files; // files with image info +}; + struct InStruct { const uint8_t *bufferStart = nullptr; MachHeaderSection *header = nullptr; @@ -616,6 +638,7 @@ struct InStruct { StubsSection *stubs = nullptr; StubHelperSection *stubHelper = nullptr; UnwindInfoSection *unwindInfo = nullptr; + ObjCImageInfoSection *objCImageInfo = nullptr; ConcatInputSection *imageLoaderCache = nullptr; }; diff --git a/lld/MachO/Target.h b/lld/MachO/Target.h index 597502275dee..ef54dbb80408 100644 --- a/lld/MachO/Target.h +++ b/lld/MachO/Target.h @@ -65,14 +65,19 @@ public: // on a level of address indirection. virtual void relaxGotLoad(uint8_t *loc, uint8_t type) const = 0; - virtual const RelocAttrs &getRelocAttrs(uint8_t type) const = 0; - virtual uint64_t getPageSize() const = 0; virtual void populateThunk(InputSection *thunk, Symbol *funcSym) { llvm_unreachable("target does not use thunks"); } + const RelocAttrs &getRelocAttrs(uint8_t type) const { + assert(type < relocAttrs.size() && "invalid relocation type"); + if (type >= relocAttrs.size()) + return invalidRelocAttrs; + return relocAttrs[type]; + } + bool hasAttr(uint8_t type, RelocAttrBits bit) const { return getRelocAttrs(type).hasAttr(bit); } @@ -111,6 +116,8 @@ public: uint8_t subtractorRelocType; uint8_t unsignedRelocType; + llvm::ArrayRef<RelocAttrs> relocAttrs; + // We contrive this value as sufficiently far from any valid address that it // will always be out-of-range for any architecture. UINT64_MAX is not a // good choice because it is (a) only 1 away from wrapping to 0, and (b) the diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp index 8c3425a17459..322057947a3d 100644 --- a/lld/MachO/UnwindInfoSection.cpp +++ b/lld/MachO/UnwindInfoSection.cpp @@ -211,7 +211,7 @@ void UnwindInfoSection::addSymbol(const Defined *d) { // we use that as the key here. auto p = symbols.insert({{d->isec, d->value}, d}); // If we have multiple symbols at the same address, only one of them can have - // an associated CUE. + // an associated unwind entry. if (!p.second && d->unwindEntry) { assert(!p.first->second->unwindEntry); p.first->second = d; diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp index 9395e1a068a3..7fad9f5564ce 100644 --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -1164,6 +1164,10 @@ template <class LP> void Writer::run() { if (in.stubHelper->isNeeded()) in.stubHelper->setup(); + + if (in.objCImageInfo->isNeeded()) + in.objCImageInfo->finalizeContents(); + // At this point, we should know exactly which output sections are needed, // courtesy of scanSymbols() and scanRelocations(). createOutputSections<LP>(); @@ -1210,6 +1214,7 @@ void macho::createSyntheticSections() { in.stubs = make<StubsSection>(); in.stubHelper = make<StubHelperSection>(); in.unwindInfo = makeUnwindInfoSection(); + in.objCImageInfo = make<ObjCImageInfoSection>(); // This section contains space for just a single word, and will be used by // dyld to cache an address to the image loader it uses. |