aboutsummaryrefslogtreecommitdiff
path: root/lld/MachO/SyntheticSections.h
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2021-07-29 20:15:26 +0000
committerDimitry Andric <dim@FreeBSD.org>2021-07-29 20:15:26 +0000
commit344a3780b2e33f6ca763666c380202b18aab72a3 (patch)
treef0b203ee6eb71d7fdd792373e3c81eb18d6934dd /lld/MachO/SyntheticSections.h
parentb60736ec1405bb0a8dd40989f67ef4c93da068ab (diff)
downloadsrc-344a3780b2e33f6ca763666c380202b18aab72a3.tar.gz
src-344a3780b2e33f6ca763666c380202b18aab72a3.zip
the upstream release/13.x branch was created.
Diffstat (limited to 'lld/MachO/SyntheticSections.h')
-rw-r--r--lld/MachO/SyntheticSections.h363
1 files changed, 240 insertions, 123 deletions
diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index 7bca28de1386..bbb7adc37cb3 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -15,11 +15,17 @@
#include "OutputSection.h"
#include "OutputSegment.h"
#include "Target.h"
+#include "Writer.h"
-#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include <unordered_map>
+
namespace llvm {
class DWARFUnit;
} // namespace llvm
@@ -27,32 +33,11 @@ class DWARFUnit;
namespace lld {
namespace macho {
-namespace section_names {
-
-constexpr const char pageZero[] = "__pagezero";
-constexpr const char common[] = "__common";
-constexpr const char header[] = "__mach_header";
-constexpr const char rebase[] = "__rebase";
-constexpr const char binding[] = "__binding";
-constexpr const char weakBinding[] = "__weak_binding";
-constexpr const char lazyBinding[] = "__lazy_binding";
-constexpr const char export_[] = "__export";
-constexpr const char symbolTable[] = "__symbol_table";
-constexpr const char indirectSymbolTable[] = "__ind_sym_tab";
-constexpr const char stringTable[] = "__string_table";
-constexpr const char got[] = "__got";
-constexpr const char threadPtrs[] = "__thread_ptrs";
-constexpr const char unwindInfo[] = "__unwind_info";
-// these are not synthetic, but in service of synthetic __unwind_info
-constexpr const char compactUnwind[] = "__compact_unwind";
-constexpr const char ehFrame[] = "__eh_frame";
-
-} // namespace section_names
-
class Defined;
class DylibSymbol;
class LoadCommand;
class ObjFile;
+class UnwindInfoSection;
class SyntheticSection : public OutputSection {
public:
@@ -63,7 +48,10 @@ public:
return sec->kind() == SyntheticKind;
}
- const StringRef segname;
+ StringRef segname;
+ // This fake InputSection makes it easier for us to write code that applies
+ // generically to both user inputs and synthetics.
+ InputSection *isec;
};
// All sections in __LINKEDIT should inherit from this.
@@ -71,9 +59,11 @@ class LinkEditSection : public SyntheticSection {
public:
LinkEditSection(const char *segname, const char *name)
: SyntheticSection(segname, name) {
- align = WordSize;
+ align = target->wordSize;
}
+ virtual void finalizeContents() {}
+
// Sections in __LINKEDIT are special: their offsets are recorded in the
// load commands like LC_DYLD_INFO_ONLY and LC_SYMTAB, instead of in section
// headers.
@@ -88,31 +78,32 @@ public:
// NOTE: This assumes that the extra bytes required for alignment can be
// zero-valued bytes.
uint64_t getSize() const override final {
- return llvm::alignTo(getRawSize(), WordSize);
+ return llvm::alignTo(getRawSize(), align);
}
};
// The header of the Mach-O file, which must have a file offset of zero.
-class MachHeaderSection : public SyntheticSection {
+class MachHeaderSection final : public SyntheticSection {
public:
MachHeaderSection();
- void addLoadCommand(LoadCommand *);
bool isHidden() const override { return true; }
uint64_t getSize() const override;
void writeTo(uint8_t *buf) const override;
-private:
+ void addLoadCommand(LoadCommand *);
+
+protected:
std::vector<LoadCommand *> loadCommands;
uint32_t sizeOfCmds = 0;
};
// A hidden section that exists solely for the purpose of creating the
// __PAGEZERO segment, which is used to catch null pointer dereferences.
-class PageZeroSection : public SyntheticSection {
+class PageZeroSection final : public SyntheticSection {
public:
PageZeroSection();
bool isHidden() const override { return true; }
- uint64_t getSize() const override { return PageZeroSize; }
+ uint64_t getSize() const override { return target->pageZeroSize; }
uint64_t getFileSize() const override { return 0; }
void writeTo(uint8_t *buf) const override {}
};
@@ -124,64 +115,54 @@ public:
class NonLazyPointerSectionBase : public SyntheticSection {
public:
NonLazyPointerSectionBase(const char *segname, const char *name);
-
const llvm::SetVector<const Symbol *> &getEntries() const { return entries; }
-
bool isNeeded() const override { return !entries.empty(); }
-
- uint64_t getSize() const override { return entries.size() * WordSize; }
-
+ uint64_t getSize() const override {
+ return entries.size() * target->wordSize;
+ }
void writeTo(uint8_t *buf) const override;
-
void addEntry(Symbol *sym);
+ uint64_t getVA(uint32_t gotIndex) const {
+ return addr + gotIndex * target->wordSize;
+ }
private:
llvm::SetVector<const Symbol *> entries;
};
-class GotSection : public NonLazyPointerSectionBase {
+class GotSection final : public NonLazyPointerSectionBase {
public:
- GotSection()
- : NonLazyPointerSectionBase(segment_names::dataConst,
- section_names::got) {
- // TODO: section_64::reserved1 should be an index into the indirect symbol
- // table, which we do not currently emit
- }
+ GotSection();
};
-class TlvPointerSection : public NonLazyPointerSectionBase {
+class TlvPointerSection final : public NonLazyPointerSectionBase {
public:
- TlvPointerSection()
- : NonLazyPointerSectionBase(segment_names::data,
- section_names::threadPtrs) {}
+ TlvPointerSection();
};
-using SectionPointerUnion =
- llvm::PointerUnion<const InputSection *, const OutputSection *>;
-
struct Location {
- SectionPointerUnion section = nullptr;
- uint64_t offset = 0;
+ const InputSection *isec;
+ uint64_t offset;
- Location(SectionPointerUnion section, uint64_t offset)
- : section(section), offset(offset) {}
- uint64_t getVA() const;
+ Location(const InputSection *isec, uint64_t offset)
+ : isec(isec), offset(offset) {}
+ uint64_t getVA() const { return isec->getVA(offset); }
};
// Stores rebase opcodes, which tell dyld where absolute addresses have been
// encoded in the binary. If the binary is not loaded at its preferred address,
// dyld has to rebase these addresses by adding an offset to them.
-class RebaseSection : public LinkEditSection {
+class RebaseSection final : public LinkEditSection {
public:
RebaseSection();
- void finalizeContents();
+ void finalizeContents() override;
uint64_t getRawSize() const override { return contents.size(); }
bool isNeeded() const override { return !locations.empty(); }
void writeTo(uint8_t *buf) const override;
- void addEntry(SectionPointerUnion section, uint64_t offset) {
+ void addEntry(const InputSection *isec, uint64_t offset) {
if (config->isPic)
- locations.push_back({section, offset});
+ locations.push_back({isec, offset});
}
private:
@@ -190,40 +171,34 @@ private:
};
struct BindingEntry {
- const DylibSymbol *dysym;
int64_t addend;
Location target;
- BindingEntry(const DylibSymbol *dysym, int64_t addend, Location target)
- : dysym(dysym), addend(addend), target(std::move(target)) {}
+ BindingEntry(int64_t addend, Location target)
+ : addend(addend), target(std::move(target)) {}
};
+template <class Sym>
+using BindingsMap = llvm::DenseMap<Sym, std::vector<BindingEntry>>;
+
// Stores bind opcodes for telling dyld which symbols to load non-lazily.
-class BindingSection : public LinkEditSection {
+class BindingSection final : public LinkEditSection {
public:
BindingSection();
- void finalizeContents();
+ void finalizeContents() override;
uint64_t getRawSize() const override { return contents.size(); }
- bool isNeeded() const override { return !bindings.empty(); }
+ bool isNeeded() const override { return !bindingsMap.empty(); }
void writeTo(uint8_t *buf) const override;
- void addEntry(const DylibSymbol *dysym, SectionPointerUnion section,
+ void addEntry(const DylibSymbol *dysym, const InputSection *isec,
uint64_t offset, int64_t addend = 0) {
- bindings.emplace_back(dysym, addend, Location(section, offset));
+ bindingsMap[dysym].emplace_back(addend, Location(isec, offset));
}
private:
- std::vector<BindingEntry> bindings;
+ BindingsMap<const DylibSymbol *> bindingsMap;
SmallVector<char, 128> contents;
};
-struct WeakBindingEntry {
- const Symbol *symbol;
- int64_t addend;
- Location target;
- WeakBindingEntry(const Symbol *symbol, int64_t addend, Location target)
- : symbol(symbol), addend(addend), target(std::move(target)) {}
-};
-
// Stores bind opcodes for telling dyld which weak symbols need coalescing.
// There are two types of entries in this section:
//
@@ -231,26 +206,26 @@ struct WeakBindingEntry {
// other dylibs should coalesce to.
//
// 2) Weak bindings: These tell dyld that a given symbol reference should
-// coalesce to a non-weak definition if one is found. Note that unlike in the
+// coalesce to a non-weak definition if one is found. Note that unlike the
// entries in the BindingSection, the bindings here only refer to these
// symbols by name, but do not specify which dylib to load them from.
-class WeakBindingSection : public LinkEditSection {
+class WeakBindingSection final : public LinkEditSection {
public:
WeakBindingSection();
- void finalizeContents();
+ void finalizeContents() override;
uint64_t getRawSize() const override { return contents.size(); }
bool isNeeded() const override {
- return !bindings.empty() || !definitions.empty();
+ return !bindingsMap.empty() || !definitions.empty();
}
void writeTo(uint8_t *buf) const override;
- void addEntry(const Symbol *symbol, SectionPointerUnion section,
- uint64_t offset, int64_t addend = 0) {
- bindings.emplace_back(symbol, addend, Location(section, offset));
+ void addEntry(const Symbol *symbol, const InputSection *isec, uint64_t offset,
+ int64_t addend = 0) {
+ bindingsMap[symbol].emplace_back(addend, Location(isec, offset));
}
- bool hasEntry() const { return !bindings.empty(); }
+ bool hasEntry() const { return !bindingsMap.empty(); }
void addNonWeakDefinition(const Defined *defined) {
definitions.emplace_back(defined);
@@ -259,18 +234,11 @@ public:
bool hasNonWeakDefinition() const { return !definitions.empty(); }
private:
- std::vector<WeakBindingEntry> bindings;
+ BindingsMap<const Symbol *> bindingsMap;
std::vector<const Defined *> definitions;
SmallVector<char, 128> contents;
};
-// Whether a given symbol's address can only be resolved at runtime.
-bool needsBinding(const Symbol *);
-
-// Add bindings for symbols that need weak or non-lazy bindings.
-void addNonLazyBindingEntries(const Symbol *, SectionPointerUnion,
- uint64_t offset, int64_t addend = 0);
-
// The following sections implement lazy symbol binding -- very similar to the
// PLT mechanism in ELF.
//
@@ -302,22 +270,33 @@ void addNonLazyBindingEntries(const Symbol *, SectionPointerUnion,
// appropriate symbol is found at runtime. However, the bound addresses will
// still be written (non-lazily) into the LazyPointerSection.
-class StubsSection : public SyntheticSection {
+class StubsSection final : public SyntheticSection {
public:
StubsSection();
uint64_t getSize() const override;
bool isNeeded() const override { return !entries.empty(); }
+ void finalize() override;
void writeTo(uint8_t *buf) const override;
const llvm::SetVector<Symbol *> &getEntries() const { return entries; }
// Returns whether the symbol was added. Note that every stubs entry will
// have a corresponding entry in the LazyPointerSection.
bool addEntry(Symbol *);
+ uint64_t getVA(uint32_t stubsIndex) const {
+ assert(isFinal || target->usesThunks());
+ // ConcatOutputSection::finalize() can seek the address of a
+ // stub before its address is assigned. Before __stubs is
+ // finalized, return a contrived out-of-range address.
+ return isFinal ? addr + stubsIndex * target->stubSize
+ : TargetInfo::outOfRangeVA;
+ }
+
+ bool isFinal = false; // is address assigned?
private:
llvm::SetVector<Symbol *> entries;
};
-class StubHelperSection : public SyntheticSection {
+class StubHelperSection final : public SyntheticSection {
public:
StubHelperSection();
uint64_t getSize() const override;
@@ -330,19 +309,9 @@ public:
Defined *dyldPrivate = nullptr;
};
-// This section contains space for just a single word, and will be used by dyld
-// to cache an address to the image loader it uses. Note that unlike the other
-// synthetic sections, which are OutputSections, the ImageLoaderCacheSection is
-// an InputSection that gets merged into the __data OutputSection.
-class ImageLoaderCacheSection : public InputSection {
-public:
- ImageLoaderCacheSection();
- uint64_t getSize() const override { return WordSize; }
-};
-
// Note that this section may also be targeted by non-lazy bindings. In
// particular, this happens when branch relocations target weak symbols.
-class LazyPointerSection : public SyntheticSection {
+class LazyPointerSection final : public SyntheticSection {
public:
LazyPointerSection();
uint64_t getSize() const override;
@@ -350,10 +319,10 @@ public:
void writeTo(uint8_t *buf) const override;
};
-class LazyBindingSection : public LinkEditSection {
+class LazyBindingSection final : public LinkEditSection {
public:
LazyBindingSection();
- void finalizeContents();
+ void finalizeContents() override;
uint64_t getRawSize() const override { return contents.size(); }
bool isNeeded() const override { return !entries.empty(); }
void writeTo(uint8_t *buf) const override;
@@ -370,15 +339,11 @@ private:
llvm::raw_svector_ostream os{contents};
};
-// Adds stubs and bindings where necessary (e.g. if the symbol is a
-// DylibSymbol.)
-void prepareBranchTarget(Symbol *);
-
// Stores a trie that describes the set of exported symbols.
-class ExportSection : public LinkEditSection {
+class ExportSection final : public LinkEditSection {
public:
ExportSection();
- void finalizeContents();
+ void finalizeContents() override;
uint64_t getRawSize() const override { return size; }
void writeTo(uint8_t *buf) const override;
@@ -389,8 +354,35 @@ private:
size_t size = 0;
};
+// Stores 'data in code' entries that describe the locations of
+// data regions inside code sections.
+class DataInCodeSection final : public LinkEditSection {
+public:
+ DataInCodeSection();
+ void finalizeContents() override;
+ uint64_t getRawSize() const override {
+ return sizeof(llvm::MachO::data_in_code_entry) * entries.size();
+ }
+ void writeTo(uint8_t *buf) const override;
+
+private:
+ std::vector<llvm::MachO::data_in_code_entry> entries;
+};
+
+// Stores ULEB128 delta encoded addresses of functions.
+class FunctionStartsSection final : public LinkEditSection {
+public:
+ FunctionStartsSection();
+ void finalizeContents() override;
+ uint64_t getRawSize() const override { return contents.size(); }
+ void writeTo(uint8_t *buf) const override;
+
+private:
+ SmallVector<char, 128> contents;
+};
+
// Stores the strings referenced by the symbol table.
-class StringTableSection : public LinkEditSection {
+class StringTableSection final : public LinkEditSection {
public:
StringTableSection();
// Returns the start offset of the added string.
@@ -398,9 +390,12 @@ public:
uint64_t getRawSize() const override { return size; }
void writeTo(uint8_t *buf) const override;
+ static constexpr size_t emptyStringIndex = 1;
+
private:
// ld64 emits string tables which start with a space and a zero byte. We
// match its behavior here since some tools depend on it.
+ // Consequently, the empty string will be at index 1, not zero.
std::vector<StringRef> strings{" "};
size_t size = 2;
};
@@ -412,7 +407,7 @@ struct SymtabEntry {
struct StabsEntry {
uint8_t type = 0;
- uint32_t strx = 0;
+ uint32_t strx = StringTableSection::emptyStringIndex;
uint8_t sect = 0;
uint16_t desc = 0;
uint64_t value = 0;
@@ -427,16 +422,13 @@ struct StabsEntry {
// range (start index and total number) of those symbols in the symbol table.
class SymtabSection : public LinkEditSection {
public:
- SymtabSection(StringTableSection &);
- void finalizeContents();
+ void finalizeContents() override;
uint32_t getNumSymbols() const;
uint32_t getNumLocalSymbols() const {
return stabs.size() + localSymbols.size();
}
uint32_t getNumExternalSymbols() const { return externalSymbols.size(); }
uint32_t getNumUndefinedSymbols() const { return undefinedSymbols.size(); }
- uint64_t getRawSize() const override;
- void writeTo(uint8_t *buf) const override;
private:
void emitBeginSourceStab(llvm::DWARFUnit *compileUnit);
@@ -445,6 +437,9 @@ private:
void emitEndFunStab(Defined *);
void emitStabs();
+protected:
+ SymtabSection(StringTableSection &);
+
StringTableSection &stringTableSection;
// STABS symbols are always local symbols, but we represent them with special
// entries because they may use fields like n_sect and n_desc differently.
@@ -454,6 +449,8 @@ private:
std::vector<SymtabEntry> undefinedSymbols;
};
+template <class LP> SymtabSection *makeSymtabSection(StringTableSection &);
+
// The indirect symbol table is a list of 32-bit integers that serve as indices
// into the (actual) symbol table. The indirect symbol table is a
// concatenation of several sub-arrays of indices, each sub-array belonging to
@@ -464,10 +461,10 @@ private:
// contiguous sequences of symbol references. These references can be pointers
// (e.g. those in the GOT and TLVP sections) or assembly sequences (e.g.
// function stubs).
-class IndirectSymtabSection : public LinkEditSection {
+class IndirectSymtabSection final : public LinkEditSection {
public:
IndirectSymtabSection();
- void finalizeContents();
+ void finalizeContents() override;
uint32_t getNumSymbols() const;
uint64_t getRawSize() const override {
return getNumSymbols() * sizeof(uint32_t);
@@ -476,8 +473,125 @@ public:
void writeTo(uint8_t *buf) const override;
};
+// The code signature comes at the very end of the linked output file.
+class CodeSignatureSection final : public LinkEditSection {
+public:
+ static constexpr uint8_t blockSizeShift = 12;
+ static constexpr size_t blockSize = (1 << blockSizeShift); // 4 KiB
+ static constexpr size_t hashSize = 256 / 8;
+ static constexpr size_t blobHeadersSize = llvm::alignTo<8>(
+ sizeof(llvm::MachO::CS_SuperBlob) + sizeof(llvm::MachO::CS_BlobIndex));
+ static constexpr uint32_t fixedHeadersSize =
+ blobHeadersSize + sizeof(llvm::MachO::CS_CodeDirectory);
+
+ uint32_t fileNamePad = 0;
+ uint32_t allHeadersSize = 0;
+ StringRef fileName;
+
+ CodeSignatureSection();
+ uint64_t getRawSize() const override;
+ bool isNeeded() const override { return true; }
+ void writeTo(uint8_t *buf) const override;
+ uint32_t getBlockCount() const;
+ void writeHashes(uint8_t *buf) const;
+};
+
+class BitcodeBundleSection final : public SyntheticSection {
+public:
+ BitcodeBundleSection();
+ uint64_t getSize() const override { return xarSize; }
+ void finalize() override;
+ void writeTo(uint8_t *buf) const override;
+
+private:
+ llvm::SmallString<261> xarPath;
+ uint64_t xarSize;
+};
+
+class CStringSection : public SyntheticSection {
+public:
+ CStringSection();
+ void addInput(CStringInputSection *);
+ uint64_t getSize() const override { return size; }
+ virtual void finalizeContents();
+ bool isNeeded() const override { return !inputs.empty(); }
+ void writeTo(uint8_t *buf) const override;
+
+ std::vector<CStringInputSection *> inputs;
+
+private:
+ uint64_t size;
+};
+
+class DeduplicatedCStringSection final : public CStringSection {
+public:
+ DeduplicatedCStringSection();
+ uint64_t getSize() const override { return builder.getSize(); }
+ void finalizeContents() override;
+ void writeTo(uint8_t *buf) const override { builder.write(buf); }
+
+private:
+ llvm::StringTableBuilder builder;
+};
+
+/*
+ * This section contains deduplicated literal values. The 16-byte values are
+ * laid out first, followed by the 8- and then the 4-byte ones.
+ */
+class WordLiteralSection final : public SyntheticSection {
+public:
+ using UInt128 = std::pair<uint64_t, uint64_t>;
+ // I don't think the standard guarantees the size of a pair, so let's make
+ // sure it's exact -- that way we can construct it via `mmap`.
+ static_assert(sizeof(UInt128) == 16, "");
+
+ WordLiteralSection();
+ void addInput(WordLiteralInputSection *);
+ void finalizeContents();
+ void writeTo(uint8_t *buf) const override;
+
+ uint64_t getSize() const override {
+ return literal16Map.size() * 16 + literal8Map.size() * 8 +
+ literal4Map.size() * 4;
+ }
+
+ bool isNeeded() const override {
+ return !literal16Map.empty() || !literal4Map.empty() ||
+ !literal8Map.empty();
+ }
+
+ uint64_t getLiteral16Offset(const uint8_t *buf) const {
+ return literal16Map.at(*reinterpret_cast<const UInt128 *>(buf)) * 16;
+ }
+
+ uint64_t getLiteral8Offset(const uint8_t *buf) const {
+ return literal16Map.size() * 16 +
+ literal8Map.at(*reinterpret_cast<const uint64_t *>(buf)) * 8;
+ }
+
+ uint64_t getLiteral4Offset(const uint8_t *buf) const {
+ return literal16Map.size() * 16 + literal8Map.size() * 8 +
+ literal4Map.at(*reinterpret_cast<const uint32_t *>(buf)) * 4;
+ }
+
+private:
+ std::vector<WordLiteralInputSection *> inputs;
+
+ template <class T> struct Hasher {
+ llvm::hash_code operator()(T v) const { return llvm::hash_value(v); }
+ };
+ // We're using unordered_map instead of DenseMap here because we need to
+ // support all possible integer values -- there are no suitable tombstone
+ // values for DenseMap.
+ std::unordered_map<UInt128, uint64_t, Hasher<UInt128>> literal16Map;
+ std::unordered_map<uint64_t, uint64_t> literal8Map;
+ std::unordered_map<uint32_t, uint64_t> literal4Map;
+};
+
struct InStruct {
MachHeaderSection *header = nullptr;
+ CStringSection *cStringSection = nullptr;
+ WordLiteralSection *wordLiteralSection = nullptr;
RebaseSection *rebase = nullptr;
BindingSection *binding = nullptr;
WeakBindingSection *weakBinding = nullptr;
@@ -488,12 +602,15 @@ struct InStruct {
LazyPointerSection *lazyPointers = nullptr;
StubsSection *stubs = nullptr;
StubHelperSection *stubHelper = nullptr;
- ImageLoaderCacheSection *imageLoaderCache = nullptr;
+ UnwindInfoSection *unwindInfo = nullptr;
+ ConcatInputSection *imageLoaderCache = nullptr;
};
extern InStruct in;
extern std::vector<SyntheticSection *> syntheticSections;
+void createSyntheticSymbols();
+
} // namespace macho
} // namespace lld