diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2021-07-29 20:15:26 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2021-07-29 20:15:26 +0000 |
commit | 344a3780b2e33f6ca763666c380202b18aab72a3 (patch) | |
tree | f0b203ee6eb71d7fdd792373e3c81eb18d6934dd /lld/MachO/InputSection.h | |
parent | b60736ec1405bb0a8dd40989f67ef4c93da068ab (diff) | |
download | src-344a3780b2e33f6ca763666c380202b18aab72a3.tar.gz src-344a3780b2e33f6ca763666c380202b18aab72a3.zip |
Vendor import of llvm-project main 88e66fa60ae5, the last commit beforevendor/llvm-project/llvmorg-13-init-16847-g88e66fa60ae5vendor/llvm-project/llvmorg-12.0.1-rc2-0-ge7dac564cd0evendor/llvm-project/llvmorg-12.0.1-0-gfed41342a82f
the upstream release/13.x branch was created.
Diffstat (limited to 'lld/MachO/InputSection.h')
-rw-r--r-- | lld/MachO/InputSection.h | 310 |
1 files changed, 280 insertions, 30 deletions
diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h index 00b523fb8d46..a10457087043 100644 --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -9,55 +9,242 @@ #ifndef LLD_MACHO_INPUT_SECTION_H #define LLD_MACHO_INPUT_SECTION_H +#include "Config.h" +#include "Relocations.h" + #include "lld/Common/LLVM.h" +#include "lld/Common/Memory.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/CachedHashString.h" #include "llvm/BinaryFormat/MachO.h" namespace lld { namespace macho { class InputFile; -class InputSection; class OutputSection; -class Symbol; - -struct Reloc { - uint8_t type; - bool pcrel; - uint8_t length; - // The offset from the start of the subsection that this relocation belongs - // to. - uint32_t offset; - // Adding this offset to the address of the referent symbol or subsection - // gives the destination that this relocation refers to. - uint64_t addend; - llvm::PointerUnion<Symbol *, InputSection *> referent; -}; +class Defined; class InputSection { public: + enum Kind { + ConcatKind, + CStringLiteralKind, + WordLiteralKind, + }; + + Kind kind() const { return shared->sectionKind; } virtual ~InputSection() = default; virtual uint64_t getSize() const { return data.size(); } - virtual uint64_t getFileSize() const; - uint64_t getFileOffset() const; - uint64_t getVA() const; - - virtual void writeTo(uint8_t *buf); - - InputFile *file = nullptr; - StringRef name; - StringRef segname; + InputFile *getFile() const { return shared->file; } + StringRef getName() const { return shared->name; } + StringRef getSegName() const { return shared->segname; } + uint32_t getFlags() const { return shared->flags; } + uint64_t getFileSize() const; + // Translates \p off -- an offset relative to this InputSection -- into an + // offset from the beginning of its parent OutputSection. + virtual uint64_t getOffset(uint64_t off) const = 0; + // The offset from the beginning of the file. + uint64_t getVA(uint64_t off) const; + // Whether the data at \p off in this InputSection is live. + virtual bool isLive(uint64_t off) const = 0; + virtual void markLive(uint64_t off) = 0; + virtual InputSection *canonical() { return this; } OutputSection *parent = nullptr; - uint64_t outSecOff = 0; - uint64_t outSecFileOff = 0; uint32_t align = 1; - uint32_t flags = 0; + uint32_t callSiteCount : 31; + // is address assigned? + uint32_t isFinal : 1; ArrayRef<uint8_t> data; std::vector<Reloc> relocs; + +protected: + // The fields in this struct are immutable. Since we create a lot of + // InputSections with identical values for them (due to + // .subsections_via_symbols), factoring them out into a shared struct reduces + // memory consumption and makes copying cheaper. + struct Shared { + InputFile *file; + StringRef name; + StringRef segname; + uint32_t flags; + Kind sectionKind; + Shared(InputFile *file, StringRef name, StringRef segname, uint32_t flags, + Kind kind) + : file(file), name(name), segname(segname), flags(flags), + sectionKind(kind) {} + }; + + InputSection(Kind kind, StringRef segname, StringRef name) + : callSiteCount(0), isFinal(false), + shared(make<Shared>(nullptr, name, segname, 0, kind)) {} + + InputSection(Kind kind, StringRef segname, StringRef name, InputFile *file, + ArrayRef<uint8_t> data, uint32_t align, uint32_t flags) + : align(align), callSiteCount(0), isFinal(false), data(data), + shared(make<Shared>(file, name, segname, flags, kind)) {} + + const Shared *const shared; +}; + +// ConcatInputSections are combined into (Concat)OutputSections through simple +// concatenation, in contrast with literal sections which may have their +// contents merged before output. +class ConcatInputSection final : public InputSection { +public: + ConcatInputSection(StringRef segname, StringRef name) + : InputSection(ConcatKind, segname, name) {} + + ConcatInputSection(StringRef segname, StringRef name, InputFile *file, + ArrayRef<uint8_t> data, uint32_t align = 1, + uint32_t flags = 0) + : InputSection(ConcatKind, segname, name, file, data, align, flags) {} + + uint64_t getOffset(uint64_t off) const override { return outSecOff + off; } + uint64_t getVA() const { return InputSection::getVA(0); } + // ConcatInputSections are entirely live or dead, so the offset is irrelevant. + bool isLive(uint64_t off) const override { return live; } + void markLive(uint64_t off) override { live = true; } + bool isCoalescedWeak() const { return wasCoalesced && numRefs == 0; } + bool shouldOmitFromOutput() const { return !live || isCoalescedWeak(); } + bool isHashableForICF() const; + void hashForICF(); + void writeTo(uint8_t *buf); + + void foldIdentical(ConcatInputSection *redundant); + InputSection *canonical() override { + return replacement ? replacement : this; + } + + static bool classof(const InputSection *isec) { + return isec->kind() == ConcatKind; + } + + // Points to the surviving section after this one is folded by ICF + InputSection *replacement = nullptr; + // Equivalence-class ID for ICF + uint64_t icfEqClass[2] = {0, 0}; + + // With subsections_via_symbols, most symbols have their own InputSection, + // and for weak symbols (e.g. from inline functions), only the + // InputSection from one translation unit will make it to the output, + // while all copies in other translation units are coalesced into the + // first and not copied to the output. + bool wasCoalesced = false; + bool live = !config->deadStrip; + // How many symbols refer to this InputSection. + uint32_t numRefs = 0; + // This variable has two usages. Initially, it represents the input order. + // After assignAddresses is called, it represents the offset from the + // beginning of the output section this section was assigned to. + uint64_t outSecOff = 0; +}; + +// Verify ConcatInputSection's size on 64-bit builds. +static_assert(sizeof(int) != 8 || sizeof(ConcatInputSection) == 112, + "Try to minimize ConcatInputSection's size, we create many " + "instances of it"); + +// Helper functions to make it easy to sprinkle asserts. + +inline bool shouldOmitFromOutput(InputSection *isec) { + return isa<ConcatInputSection>(isec) && + cast<ConcatInputSection>(isec)->shouldOmitFromOutput(); +} + +inline bool isCoalescedWeak(InputSection *isec) { + return isa<ConcatInputSection>(isec) && + cast<ConcatInputSection>(isec)->isCoalescedWeak(); +} + +// We allocate a lot of these and binary search on them, so they should be as +// compact as possible. Hence the use of 31 rather than 64 bits for the hash. +struct StringPiece { + // Offset from the start of the containing input section. + uint32_t inSecOff; + uint32_t live : 1; + // Only set if deduplicating literals + uint32_t hash : 31; + // Offset from the start of the containing output section. + uint64_t outSecOff = 0; + + StringPiece(uint64_t off, uint32_t hash) + : inSecOff(off), live(!config->deadStrip), hash(hash) {} +}; + +static_assert(sizeof(StringPiece) == 16, "StringPiece is too big!"); + +// CStringInputSections are composed of multiple null-terminated string +// literals, which we represent using StringPieces. These literals can be +// deduplicated and tail-merged, so translating offsets between the input and +// outputs sections is more complicated. +// +// NOTE: One significant difference between LLD and ld64 is that we merge all +// cstring literals, even those referenced directly by non-private symbols. +// ld64 is more conservative and does not do that. This was mostly done for +// implementation simplicity; if we find programs that need the more +// conservative behavior we can certainly implement that. +class CStringInputSection final : public InputSection { +public: + CStringInputSection(StringRef segname, StringRef name, InputFile *file, + ArrayRef<uint8_t> data, uint32_t align, uint32_t flags) + : InputSection(CStringLiteralKind, segname, name, file, data, align, + flags) {} + uint64_t getOffset(uint64_t off) const override; + bool isLive(uint64_t off) const override { return getStringPiece(off).live; } + void markLive(uint64_t off) override { getStringPiece(off).live = true; } + // Find the StringPiece that contains this offset. + StringPiece &getStringPiece(uint64_t off); + const StringPiece &getStringPiece(uint64_t off) const; + // Split at each null byte. + void splitIntoPieces(); + + LLVM_ATTRIBUTE_ALWAYS_INLINE + StringRef getStringRef(size_t i) const { + size_t begin = pieces[i].inSecOff; + size_t end = + (pieces.size() - 1 == i) ? data.size() : pieces[i + 1].inSecOff; + return toStringRef(data.slice(begin, end - begin)); + } + + // Returns i'th piece as a CachedHashStringRef. This function is very hot when + // string merging is enabled, so we want to inline. + LLVM_ATTRIBUTE_ALWAYS_INLINE + llvm::CachedHashStringRef getCachedHashStringRef(size_t i) const { + assert(config->dedupLiterals); + return {getStringRef(i), pieces[i].hash}; + } + + static bool classof(const InputSection *isec) { + return isec->kind() == CStringLiteralKind; + } + + std::vector<StringPiece> pieces; +}; + +class WordLiteralInputSection final : public InputSection { +public: + WordLiteralInputSection(StringRef segname, StringRef name, InputFile *file, + ArrayRef<uint8_t> data, uint32_t align, + uint32_t flags); + uint64_t getOffset(uint64_t off) const override; + bool isLive(uint64_t off) const override { + return live[off >> power2LiteralSize]; + } + void markLive(uint64_t off) override { live[off >> power2LiteralSize] = 1; } + + static bool classof(const InputSection *isec) { + return isec->kind() == WordLiteralKind; + } + +private: + unsigned power2LiteralSize; + // The liveness of data[off] is tracked by live[off >> power2LiteralSize]. + llvm::BitVector live; }; inline uint8_t sectionType(uint32_t flags) { @@ -83,9 +270,72 @@ inline bool isDebugSection(uint32_t flags) { llvm::MachO::S_ATTR_DEBUG; } -bool isCodeSection(InputSection *); +inline bool isWordLiteralSection(uint32_t flags) { + return sectionType(flags) == llvm::MachO::S_4BYTE_LITERALS || + sectionType(flags) == llvm::MachO::S_8BYTE_LITERALS || + sectionType(flags) == llvm::MachO::S_16BYTE_LITERALS; +} + +bool isCodeSection(const InputSection *); + +bool isCfStringSection(const InputSection *); + +extern std::vector<ConcatInputSection *> inputSections; + +namespace section_names { + +constexpr const char authGot[] = "__auth_got"; +constexpr const char authPtr[] = "__auth_ptr"; +constexpr const char binding[] = "__binding"; +constexpr const char bitcodeBundle[] = "__bundle"; +constexpr const char cString[] = "__cstring"; +constexpr const char cfString[] = "__cfstring"; +constexpr const char codeSignature[] = "__code_signature"; +constexpr const char common[] = "__common"; +constexpr const char compactUnwind[] = "__compact_unwind"; +constexpr const char data[] = "__data"; +constexpr const char debugAbbrev[] = "__debug_abbrev"; +constexpr const char debugInfo[] = "__debug_info"; +constexpr const char debugStr[] = "__debug_str"; +constexpr const char ehFrame[] = "__eh_frame"; +constexpr const char export_[] = "__export"; +constexpr const char dataInCode[] = "__data_in_code"; +constexpr const char functionStarts[] = "__func_starts"; +constexpr const char got[] = "__got"; +constexpr const char header[] = "__mach_header"; +constexpr const char indirectSymbolTable[] = "__ind_sym_tab"; +constexpr const char const_[] = "__const"; +constexpr const char lazySymbolPtr[] = "__la_symbol_ptr"; +constexpr const char lazyBinding[] = "__lazy_binding"; +constexpr const char literals[] = "__literals"; +constexpr const char moduleInitFunc[] = "__mod_init_func"; +constexpr const char moduleTermFunc[] = "__mod_term_func"; +constexpr const char nonLazySymbolPtr[] = "__nl_symbol_ptr"; +constexpr const char objcCatList[] = "__objc_catlist"; +constexpr const char objcClassList[] = "__objc_classlist"; +constexpr const char objcConst[] = "__objc_const"; +constexpr const char objcImageInfo[] = "__objc_imageinfo"; +constexpr const char objcNonLazyCatList[] = "__objc_nlcatlist"; +constexpr const char objcNonLazyClassList[] = "__objc_nlclslist"; +constexpr const char objcProtoList[] = "__objc_protolist"; +constexpr const char pageZero[] = "__pagezero"; +constexpr const char pointers[] = "__pointers"; +constexpr const char rebase[] = "__rebase"; +constexpr const char staticInit[] = "__StaticInit"; +constexpr const char stringTable[] = "__string_table"; +constexpr const char stubHelper[] = "__stub_helper"; +constexpr const char stubs[] = "__stubs"; +constexpr const char swift[] = "__swift"; +constexpr const char symbolTable[] = "__symbol_table"; +constexpr const char textCoalNt[] = "__textcoal_nt"; +constexpr const char text[] = "__text"; +constexpr const char threadPtrs[] = "__thread_ptrs"; +constexpr const char threadVars[] = "__thread_vars"; +constexpr const char unwindInfo[] = "__unwind_info"; +constexpr const char weakBinding[] = "__weak_binding"; +constexpr const char zeroFill[] = "__zerofill"; -extern std::vector<InputSection *> inputSections; +} // namespace section_names } // namespace macho |