src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2021-07-29 20:15:26 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2021-07-29 20:15:26 +0000
commit	344a3780b2e33f6ca763666c380202b18aab72a3 (patch)
tree	f0b203ee6eb71d7fdd792373e3c81eb18d6934dd /lld/MachO/InputSection.h
parent	b60736ec1405bb0a8dd40989f67ef4c93da068ab (diff)
download	src-344a3780b2e33f6ca763666c380202b18aab72a3.tar.gz src-344a3780b2e33f6ca763666c380202b18aab72a3.zip

Vendor import of llvm-project main 88e66fa60ae5, the last commit beforevendor/llvm-project/llvmorg-13-init-16847-g88e66fa60ae5 vendor/llvm-project/llvmorg-12.0.1-rc2-0-ge7dac564cd0e vendor/llvm-project/llvmorg-12.0.1-0-gfed41342a82f

the upstream release/13.x branch was created.

Diffstat (limited to 'lld/MachO/InputSection.h')

-rw-r--r--

lld/MachO/InputSection.h

310

1 files changed, 280 insertions, 30 deletions

diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index 00b523fb8d46..a10457087043 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h

@@ -9,55 +9,242 @@

#ifndef LLD_MACHO_INPUT_SECTION_H

#define LLD_MACHO_INPUT_SECTION_H

+#include "Config.h"

+#include "Relocations.h"

#include "lld/Common/LLVM.h"

+#include "lld/Common/Memory.h"

#include "llvm/ADT/ArrayRef.h"

-#include "llvm/ADT/PointerUnion.h"

+#include "llvm/ADT/BitVector.h"

+#include "llvm/ADT/CachedHashString.h"

#include "llvm/BinaryFormat/MachO.h"

namespace lld {

namespace macho {

class InputFile;

-class InputSection;

class OutputSection;

-class Symbol;

-struct Reloc {

- uint8_t type;

- bool pcrel;

- uint8_t length;

- // The offset from the start of the subsection that this relocation belongs

- // to.

- uint32_t offset;

- // Adding this offset to the address of the referent symbol or subsection

- // gives the destination that this relocation refers to.

- uint64_t addend;

- llvm::PointerUnion<Symbol *, InputSection *> referent;

-};

+class Defined;

class InputSection {

public:

+ enum Kind {

+ ConcatKind,

+ CStringLiteralKind,

+ WordLiteralKind,

+ };

+ Kind kind() const { return shared->sectionKind; }

virtual ~InputSection() = default;

virtual uint64_t getSize() const { return data.size(); }

- virtual uint64_t getFileSize() const;

- uint64_t getFileOffset() const;

- uint64_t getVA() const;

- virtual void writeTo(uint8_t *buf);

- InputFile *file = nullptr;

- StringRef name;

- StringRef segname;

+ InputFile *getFile() const { return shared->file; }

+ StringRef getName() const { return shared->name; }

+ StringRef getSegName() const { return shared->segname; }

+ uint32_t getFlags() const { return shared->flags; }

+ uint64_t getFileSize() const;

+ // Translates \p off -- an offset relative to this InputSection -- into an

+ // offset from the beginning of its parent OutputSection.

+ virtual uint64_t getOffset(uint64_t off) const = 0;

+ // The offset from the beginning of the file.

+ uint64_t getVA(uint64_t off) const;

+ // Whether the data at \p off in this InputSection is live.

+ virtual bool isLive(uint64_t off) const = 0;

+ virtual void markLive(uint64_t off) = 0;

+ virtual InputSection *canonical() { return this; }

OutputSection *parent = nullptr;

- uint64_t outSecOff = 0;

- uint64_t outSecFileOff = 0;

uint32_t align = 1;

- uint32_t flags = 0;

+ uint32_t callSiteCount : 31;

+ // is address assigned?

+ uint32_t isFinal : 1;

ArrayRef<uint8_t> data;

std::vector<Reloc> relocs;

+protected:

+ // The fields in this struct are immutable. Since we create a lot of

+ // InputSections with identical values for them (due to

+ // .subsections_via_symbols), factoring them out into a shared struct reduces

+ // memory consumption and makes copying cheaper.

+ struct Shared {

+ InputFile *file;

+ StringRef name;

+ StringRef segname;

+ uint32_t flags;

+ Kind sectionKind;

+ Shared(InputFile *file, StringRef name, StringRef segname, uint32_t flags,

+ Kind kind)

+ : file(file), name(name), segname(segname), flags(flags),

+ sectionKind(kind) {}

+ };

+ InputSection(Kind kind, StringRef segname, StringRef name)

+ : callSiteCount(0), isFinal(false),

+ shared(make<Shared>(nullptr, name, segname, 0, kind)) {}

+ InputSection(Kind kind, StringRef segname, StringRef name, InputFile *file,

+ ArrayRef<uint8_t> data, uint32_t align, uint32_t flags)

+ : align(align), callSiteCount(0), isFinal(false), data(data),

+ shared(make<Shared>(file, name, segname, flags, kind)) {}

+ const Shared *const shared;

+};

+// ConcatInputSections are combined into (Concat)OutputSections through simple

+// concatenation, in contrast with literal sections which may have their

+// contents merged before output.

+class ConcatInputSection final : public InputSection {

+public:

+ ConcatInputSection(StringRef segname, StringRef name)

+ : InputSection(ConcatKind, segname, name) {}

+ ConcatInputSection(StringRef segname, StringRef name, InputFile *file,

+ ArrayRef<uint8_t> data, uint32_t align = 1,

+ uint32_t flags = 0)

+ : InputSection(ConcatKind, segname, name, file, data, align, flags) {}

+ uint64_t getOffset(uint64_t off) const override { return outSecOff + off; }

+ uint64_t getVA() const { return InputSection::getVA(0); }

+ // ConcatInputSections are entirely live or dead, so the offset is irrelevant.

+ bool isLive(uint64_t off) const override { return live; }

+ void markLive(uint64_t off) override { live = true; }

+ bool isCoalescedWeak() const { return wasCoalesced && numRefs == 0; }

+ bool shouldOmitFromOutput() const { return !live || isCoalescedWeak(); }

+ bool isHashableForICF() const;

+ void hashForICF();

+ void writeTo(uint8_t *buf);

+ void foldIdentical(ConcatInputSection *redundant);

+ InputSection *canonical() override {

+ return replacement ? replacement : this;

+ }

+ static bool classof(const InputSection *isec) {

+ return isec->kind() == ConcatKind;

+ }

+ // Points to the surviving section after this one is folded by ICF

+ InputSection *replacement = nullptr;

+ // Equivalence-class ID for ICF

+ uint64_t icfEqClass[2] = {0, 0};

+ // With subsections_via_symbols, most symbols have their own InputSection,

+ // and for weak symbols (e.g. from inline functions), only the

+ // InputSection from one translation unit will make it to the output,

+ // while all copies in other translation units are coalesced into the

+ // first and not copied to the output.

+ bool wasCoalesced = false;

+ bool live = !config->deadStrip;

+ // How many symbols refer to this InputSection.

+ uint32_t numRefs = 0;

+ // This variable has two usages. Initially, it represents the input order.

+ // After assignAddresses is called, it represents the offset from the

+ // beginning of the output section this section was assigned to.

+ uint64_t outSecOff = 0;

+};

+// Verify ConcatInputSection's size on 64-bit builds.

+static_assert(sizeof(int) != 8 || sizeof(ConcatInputSection) == 112,

+ "Try to minimize ConcatInputSection's size, we create many "

+ "instances of it");

+// Helper functions to make it easy to sprinkle asserts.

+inline bool shouldOmitFromOutput(InputSection *isec) {

+ return isa<ConcatInputSection>(isec) &&

+ cast<ConcatInputSection>(isec)->shouldOmitFromOutput();

+inline bool isCoalescedWeak(InputSection *isec) {

+ return isa<ConcatInputSection>(isec) &&

+ cast<ConcatInputSection>(isec)->isCoalescedWeak();

+// We allocate a lot of these and binary search on them, so they should be as

+// compact as possible. Hence the use of 31 rather than 64 bits for the hash.

+struct StringPiece {

+ // Offset from the start of the containing input section.

+ uint32_t inSecOff;

+ uint32_t live : 1;

+ // Only set if deduplicating literals

+ uint32_t hash : 31;

+ // Offset from the start of the containing output section.

+ uint64_t outSecOff = 0;

+ StringPiece(uint64_t off, uint32_t hash)

+ : inSecOff(off), live(!config->deadStrip), hash(hash) {}

+};

+static_assert(sizeof(StringPiece) == 16, "StringPiece is too big!");

+// CStringInputSections are composed of multiple null-terminated string

+// literals, which we represent using StringPieces. These literals can be

+// deduplicated and tail-merged, so translating offsets between the input and

+// outputs sections is more complicated.

+//

+// NOTE: One significant difference between LLD and ld64 is that we merge all

+// cstring literals, even those referenced directly by non-private symbols.

+// ld64 is more conservative and does not do that. This was mostly done for

+// implementation simplicity; if we find programs that need the more

+// conservative behavior we can certainly implement that.

+class CStringInputSection final : public InputSection {

+public:

+ CStringInputSection(StringRef segname, StringRef name, InputFile *file,

+ ArrayRef<uint8_t> data, uint32_t align, uint32_t flags)

+ : InputSection(CStringLiteralKind, segname, name, file, data, align,

+ flags) {}

+ uint64_t getOffset(uint64_t off) const override;

+ bool isLive(uint64_t off) const override { return getStringPiece(off).live; }

+ void markLive(uint64_t off) override { getStringPiece(off).live = true; }

+ // Find the StringPiece that contains this offset.

+ StringPiece &getStringPiece(uint64_t off);

+ const StringPiece &getStringPiece(uint64_t off) const;

+ // Split at each null byte.

+ void splitIntoPieces();

+ LLVM_ATTRIBUTE_ALWAYS_INLINE

+ StringRef getStringRef(size_t i) const {

+ size_t begin = pieces[i].inSecOff;

+ size_t end =

+ (pieces.size() - 1 == i) ? data.size() : pieces[i + 1].inSecOff;

+ return toStringRef(data.slice(begin, end - begin));

+ }

+ // Returns i'th piece as a CachedHashStringRef. This function is very hot when

+ // string merging is enabled, so we want to inline.

+ LLVM_ATTRIBUTE_ALWAYS_INLINE

+ llvm::CachedHashStringRef getCachedHashStringRef(size_t i) const {

+ assert(config->dedupLiterals);

+ return {getStringRef(i), pieces[i].hash};

+ }

+ static bool classof(const InputSection *isec) {

+ return isec->kind() == CStringLiteralKind;

+ }

+ std::vector<StringPiece> pieces;

+};

+class WordLiteralInputSection final : public InputSection {

+public:

+ WordLiteralInputSection(StringRef segname, StringRef name, InputFile *file,

+ ArrayRef<uint8_t> data, uint32_t align,

+ uint32_t flags);

+ uint64_t getOffset(uint64_t off) const override;

+ bool isLive(uint64_t off) const override {

+ return live[off >> power2LiteralSize];

+ }

+ void markLive(uint64_t off) override { live[off >> power2LiteralSize] = 1; }

+ static bool classof(const InputSection *isec) {

+ return isec->kind() == WordLiteralKind;

+ }

+private:

+ unsigned power2LiteralSize;

+ // The liveness of data[off] is tracked by live[off >> power2LiteralSize].

+ llvm::BitVector live;

};

inline uint8_t sectionType(uint32_t flags) {

@@ -83,9 +270,72 @@ inline bool isDebugSection(uint32_t flags) {

llvm::MachO::S_ATTR_DEBUG;

}

-bool isCodeSection(InputSection *);

+inline bool isWordLiteralSection(uint32_t flags) {

+ return sectionType(flags) == llvm::MachO::S_4BYTE_LITERALS ||

+ sectionType(flags) == llvm::MachO::S_8BYTE_LITERALS ||

+ sectionType(flags) == llvm::MachO::S_16BYTE_LITERALS;

+bool isCodeSection(const InputSection *);

+bool isCfStringSection(const InputSection *);

+extern std::vector<ConcatInputSection *> inputSections;

+namespace section_names {

+constexpr const char authGot[] = "__auth_got";

+constexpr const char authPtr[] = "__auth_ptr";

+constexpr const char binding[] = "__binding";

+constexpr const char bitcodeBundle[] = "__bundle";

+constexpr const char cString[] = "__cstring";

+constexpr const char cfString[] = "__cfstring";

+constexpr const char codeSignature[] = "__code_signature";

+constexpr const char common[] = "__common";

+constexpr const char compactUnwind[] = "__compact_unwind";

+constexpr const char data[] = "__data";

+constexpr const char debugAbbrev[] = "__debug_abbrev";

+constexpr const char debugInfo[] = "__debug_info";

+constexpr const char debugStr[] = "__debug_str";

+constexpr const char ehFrame[] = "__eh_frame";

+constexpr const char export_[] = "__export";

+constexpr const char dataInCode[] = "__data_in_code";

+constexpr const char functionStarts[] = "__func_starts";

+constexpr const char got[] = "__got";

+constexpr const char header[] = "__mach_header";

+constexpr const char indirectSymbolTable[] = "__ind_sym_tab";

+constexpr const char const_[] = "__const";

+constexpr const char lazySymbolPtr[] = "__la_symbol_ptr";

+constexpr const char lazyBinding[] = "__lazy_binding";

+constexpr const char literals[] = "__literals";

+constexpr const char moduleInitFunc[] = "__mod_init_func";

+constexpr const char moduleTermFunc[] = "__mod_term_func";

+constexpr const char nonLazySymbolPtr[] = "__nl_symbol_ptr";

+constexpr const char objcCatList[] = "__objc_catlist";

+constexpr const char objcClassList[] = "__objc_classlist";

+constexpr const char objcConst[] = "__objc_const";

+constexpr const char objcImageInfo[] = "__objc_imageinfo";

+constexpr const char objcNonLazyCatList[] = "__objc_nlcatlist";

+constexpr const char objcNonLazyClassList[] = "__objc_nlclslist";

+constexpr const char objcProtoList[] = "__objc_protolist";

+constexpr const char pageZero[] = "__pagezero";

+constexpr const char pointers[] = "__pointers";

+constexpr const char rebase[] = "__rebase";

+constexpr const char staticInit[] = "__StaticInit";

+constexpr const char stringTable[] = "__string_table";

+constexpr const char stubHelper[] = "__stub_helper";

+constexpr const char stubs[] = "__stubs";

+constexpr const char swift[] = "__swift";

+constexpr const char symbolTable[] = "__symbol_table";

+constexpr const char textCoalNt[] = "__textcoal_nt";

+constexpr const char text[] = "__text";

+constexpr const char threadPtrs[] = "__thread_ptrs";

+constexpr const char threadVars[] = "__thread_vars";

+constexpr const char unwindInfo[] = "__unwind_info";

+constexpr const char weakBinding[] = "__weak_binding";

+constexpr const char zeroFill[] = "__zerofill";

-extern std::vector<InputSection *> inputSections;

+} // namespace section_names

} // namespace macho