1 files changed, 280 insertions, 30 deletions
diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index 00b523fb8d46..a10457087043 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -9,55 +9,242 @@
 #ifndef LLD_MACHO_INPUT_SECTION_H
 #define LLD_MACHO_INPUT_SECTION_H
 
+#include "Config.h"
+#include "Relocations.h"
+
 #include "lld/Common/LLVM.h"
+#include "lld/Common/Memory.h"
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/CachedHashString.h"
 #include "llvm/BinaryFormat/MachO.h"
 
 namespace lld {
 namespace macho {
 
 class InputFile;
-class InputSection;
 class OutputSection;
-class Symbol;
-
-struct Reloc {
-  uint8_t type;
-  bool pcrel;
-  uint8_t length;
-  // The offset from the start of the subsection that this relocation belongs
-  // to.
-  uint32_t offset;
-  // Adding this offset to the address of the referent symbol or subsection
-  // gives the destination that this relocation refers to.
-  uint64_t addend;
-  llvm::PointerUnion<Symbol *, InputSection *> referent;
-};
+class Defined;
 
 class InputSection {
 public:
+  enum Kind {
+    ConcatKind,
+    CStringLiteralKind,
+    WordLiteralKind,
+  };
+
+  Kind kind() const { return shared->sectionKind; }
   virtual ~InputSection() = default;
   virtual uint64_t getSize() const { return data.size(); }
-  virtual uint64_t getFileSize() const;
-  uint64_t getFileOffset() const;
-  uint64_t getVA() const;
-
-  virtual void writeTo(uint8_t *buf);
-
-  InputFile *file = nullptr;
-  StringRef name;
-  StringRef segname;
+  InputFile *getFile() const { return shared->file; }
+  StringRef getName() const { return shared->name; }
+  StringRef getSegName() const { return shared->segname; }
+  uint32_t getFlags() const { return shared->flags; }
+  uint64_t getFileSize() const;
+  // Translates \p off -- an offset relative to this InputSection -- into an
+  // offset from the beginning of its parent OutputSection.
+  virtual uint64_t getOffset(uint64_t off) const = 0;
+  // The offset from the beginning of the file.
+  uint64_t getVA(uint64_t off) const;
+  // Whether the data at \p off in this InputSection is live.
+  virtual bool isLive(uint64_t off) const = 0;
+  virtual void markLive(uint64_t off) = 0;
+  virtual InputSection *canonical() { return this; }
 
   OutputSection *parent = nullptr;
-  uint64_t outSecOff = 0;
-  uint64_t outSecFileOff = 0;
 
   uint32_t align = 1;
-  uint32_t flags = 0;
+  uint32_t callSiteCount : 31;
+  // is address assigned?
+  uint32_t isFinal : 1;
 
   ArrayRef<uint8_t> data;
   std::vector<Reloc> relocs;
+
+protected:
+  // The fields in this struct are immutable. Since we create a lot of
+  // InputSections with identical values for them (due to
+  // .subsections_via_symbols), factoring them out into a shared struct reduces
+  // memory consumption and makes copying cheaper.
+  struct Shared {
+    InputFile *file;
+    StringRef name;
+    StringRef segname;
+    uint32_t flags;
+    Kind sectionKind;
+    Shared(InputFile *file, StringRef name, StringRef segname, uint32_t flags,
+           Kind kind)
+        : file(file), name(name), segname(segname), flags(flags),
+          sectionKind(kind) {}
+  };
+
+  InputSection(Kind kind, StringRef segname, StringRef name)
+      : callSiteCount(0), isFinal(false),
+        shared(make<Shared>(nullptr, name, segname, 0, kind)) {}
+
+  InputSection(Kind kind, StringRef segname, StringRef name, InputFile *file,
+               ArrayRef<uint8_t> data, uint32_t align, uint32_t flags)
+      : align(align), callSiteCount(0), isFinal(false), data(data),
+        shared(make<Shared>(file, name, segname, flags, kind)) {}
+
+  const Shared *const shared;
+};
+
+// ConcatInputSections are combined into (Concat)OutputSections through simple
+// concatenation, in contrast with literal sections which may have their
+// contents merged before output.
+class ConcatInputSection final : public InputSection {
+public:
+  ConcatInputSection(StringRef segname, StringRef name)
+      : InputSection(ConcatKind, segname, name) {}
+
+  ConcatInputSection(StringRef segname, StringRef name, InputFile *file,
+                     ArrayRef<uint8_t> data, uint32_t align = 1,
+                     uint32_t flags = 0)
+      : InputSection(ConcatKind, segname, name, file, data, align, flags) {}
+
+  uint64_t getOffset(uint64_t off) const override { return outSecOff + off; }
+  uint64_t getVA() const { return InputSection::getVA(0); }
+  // ConcatInputSections are entirely live or dead, so the offset is irrelevant.
+  bool isLive(uint64_t off) const override { return live; }
+  void markLive(uint64_t off) override { live = true; }
+  bool isCoalescedWeak() const { return wasCoalesced && numRefs == 0; }
+  bool shouldOmitFromOutput() const { return !live || isCoalescedWeak(); }
+  bool isHashableForICF() const;
+  void hashForICF();
+  void writeTo(uint8_t *buf);
+
+  void foldIdentical(ConcatInputSection *redundant);
+  InputSection *canonical() override {
+    return replacement ? replacement : this;
+  }
+
+  static bool classof(const InputSection *isec) {
+    return isec->kind() == ConcatKind;
+  }
+
+  // Points to the surviving section after this one is folded by ICF
+  InputSection *replacement = nullptr;
+  // Equivalence-class ID for ICF
+  uint64_t icfEqClass[2] = {0, 0};
+
+  // With subsections_via_symbols, most symbols have their own InputSection,
+  // and for weak symbols (e.g. from inline functions), only the
+  // InputSection from one translation unit will make it to the output,
+  // while all copies in other translation units are coalesced into the
+  // first and not copied to the output.
+  bool wasCoalesced = false;
+  bool live = !config->deadStrip;
+  // How many symbols refer to this InputSection.
+  uint32_t numRefs = 0;
+  // This variable has two usages. Initially, it represents the input order.
+  // After assignAddresses is called, it represents the offset from the
+  // beginning of the output section this section was assigned to.
+  uint64_t outSecOff = 0;
+};
+
+// Verify ConcatInputSection's size on 64-bit builds.
+static_assert(sizeof(int) != 8 || sizeof(ConcatInputSection) == 112,
+              "Try to minimize ConcatInputSection's size, we create many "
+              "instances of it");
+
+// Helper functions to make it easy to sprinkle asserts.
+
+inline bool shouldOmitFromOutput(InputSection *isec) {
+  return isa<ConcatInputSection>(isec) &&
+         cast<ConcatInputSection>(isec)->shouldOmitFromOutput();
+}
+
+inline bool isCoalescedWeak(InputSection *isec) {
+  return isa<ConcatInputSection>(isec) &&
+         cast<ConcatInputSection>(isec)->isCoalescedWeak();
+}
+
+// We allocate a lot of these and binary search on them, so they should be as
+// compact as possible. Hence the use of 31 rather than 64 bits for the hash.
+struct StringPiece {
+  // Offset from the start of the containing input section.
+  uint32_t inSecOff;
+  uint32_t live : 1;
+  // Only set if deduplicating literals
+  uint32_t hash : 31;
+  // Offset from the start of the containing output section.
+  uint64_t outSecOff = 0;
+
+  StringPiece(uint64_t off, uint32_t hash)
+      : inSecOff(off), live(!config->deadStrip), hash(hash) {}
+};
+
+static_assert(sizeof(StringPiece) == 16, "StringPiece is too big!");
+
+// CStringInputSections are composed of multiple null-terminated string
+// literals, which we represent using StringPieces. These literals can be
+// deduplicated and tail-merged, so translating offsets between the input and
+// outputs sections is more complicated.
+//
+// NOTE: One significant difference between LLD and ld64 is that we merge all
+// cstring literals, even those referenced directly by non-private symbols.
+// ld64 is more conservative and does not do that. This was mostly done for
+// implementation simplicity; if we find programs that need the more
+// conservative behavior we can certainly implement that.
+class CStringInputSection final : public InputSection {
+public:
+  CStringInputSection(StringRef segname, StringRef name, InputFile *file,
+                      ArrayRef<uint8_t> data, uint32_t align, uint32_t flags)
+      : InputSection(CStringLiteralKind, segname, name, file, data, align,
+                     flags) {}
+  uint64_t getOffset(uint64_t off) const override;
+  bool isLive(uint64_t off) const override { return getStringPiece(off).live; }
+  void markLive(uint64_t off) override { getStringPiece(off).live = true; }
+  // Find the StringPiece that contains this offset.
+  StringPiece &getStringPiece(uint64_t off);
+  const StringPiece &getStringPiece(uint64_t off) const;
+  // Split at each null byte.
+  void splitIntoPieces();
+
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  StringRef getStringRef(size_t i) const {
+    size_t begin = pieces[i].inSecOff;
+    size_t end =
+        (pieces.size() - 1 == i) ? data.size() : pieces[i + 1].inSecOff;
+    return toStringRef(data.slice(begin, end - begin));
+  }
+
+  // Returns i'th piece as a CachedHashStringRef. This function is very hot when
+  // string merging is enabled, so we want to inline.
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  llvm::CachedHashStringRef getCachedHashStringRef(size_t i) const {
+    assert(config->dedupLiterals);
+    return {getStringRef(i), pieces[i].hash};
+  }
+
+  static bool classof(const InputSection *isec) {
+    return isec->kind() == CStringLiteralKind;
+  }
+
+  std::vector<StringPiece> pieces;
+};
+
+class WordLiteralInputSection final : public InputSection {
+public:
+  WordLiteralInputSection(StringRef segname, StringRef name, InputFile *file,
+                          ArrayRef<uint8_t> data, uint32_t align,
+                          uint32_t flags);
+  uint64_t getOffset(uint64_t off) const override;
+  bool isLive(uint64_t off) const override {
+    return live[off >> power2LiteralSize];
+  }
+  void markLive(uint64_t off) override { live[off >> power2LiteralSize] = 1; }
+
+  static bool classof(const InputSection *isec) {
+    return isec->kind() == WordLiteralKind;
+  }
+
+private:
+  unsigned power2LiteralSize;
+  // The liveness of data[off] is tracked by live[off >> power2LiteralSize].
+  llvm::BitVector live;
 };
 
 inline uint8_t sectionType(uint32_t flags) {
@@ -83,9 +270,72 @@ inline bool isDebugSection(uint32_t flags) {
          llvm::MachO::S_ATTR_DEBUG;
 }
 
-bool isCodeSection(InputSection *);
+inline bool isWordLiteralSection(uint32_t flags) {
+  return sectionType(flags) == llvm::MachO::S_4BYTE_LITERALS ||
+         sectionType(flags) == llvm::MachO::S_8BYTE_LITERALS ||
+         sectionType(flags) == llvm::MachO::S_16BYTE_LITERALS;
+}
+
+bool isCodeSection(const InputSection *);
+
+bool isCfStringSection(const InputSection *);
+
+extern std::vector<ConcatInputSection *> inputSections;
+
+namespace section_names {
+
+constexpr const char authGot[] = "__auth_got";
+constexpr const char authPtr[] = "__auth_ptr";
+constexpr const char binding[] = "__binding";
+constexpr const char bitcodeBundle[] = "__bundle";
+constexpr const char cString[] = "__cstring";
+constexpr const char cfString[] = "__cfstring";
+constexpr const char codeSignature[] = "__code_signature";
+constexpr const char common[] = "__common";
+constexpr const char compactUnwind[] = "__compact_unwind";
+constexpr const char data[] = "__data";
+constexpr const char debugAbbrev[] = "__debug_abbrev";
+constexpr const char debugInfo[] = "__debug_info";
+constexpr const char debugStr[] = "__debug_str";
+constexpr const char ehFrame[] = "__eh_frame";
+constexpr const char export_[] = "__export";
+constexpr const char dataInCode[] = "__data_in_code";
+constexpr const char functionStarts[] = "__func_starts";
+constexpr const char got[] = "__got";
+constexpr const char header[] = "__mach_header";
+constexpr const char indirectSymbolTable[] = "__ind_sym_tab";
+constexpr const char const_[] = "__const";
+constexpr const char lazySymbolPtr[] = "__la_symbol_ptr";
+constexpr const char lazyBinding[] = "__lazy_binding";
+constexpr const char literals[] = "__literals";
+constexpr const char moduleInitFunc[] = "__mod_init_func";
+constexpr const char moduleTermFunc[] = "__mod_term_func";
+constexpr const char nonLazySymbolPtr[] = "__nl_symbol_ptr";
+constexpr const char objcCatList[] = "__objc_catlist";
+constexpr const char objcClassList[] = "__objc_classlist";
+constexpr const char objcConst[] = "__objc_const";
+constexpr const char objcImageInfo[] = "__objc_imageinfo";
+constexpr const char objcNonLazyCatList[] = "__objc_nlcatlist";
+constexpr const char objcNonLazyClassList[] = "__objc_nlclslist";
+constexpr const char objcProtoList[] = "__objc_protolist";
+constexpr const char pageZero[] = "__pagezero";
+constexpr const char pointers[] = "__pointers";
+constexpr const char rebase[] = "__rebase";
+constexpr const char staticInit[] = "__StaticInit";
+constexpr const char stringTable[] = "__string_table";
+constexpr const char stubHelper[] = "__stub_helper";
+constexpr const char stubs[] = "__stubs";
+constexpr const char swift[] = "__swift";
+constexpr const char symbolTable[] = "__symbol_table";
+constexpr const char textCoalNt[] = "__textcoal_nt";
+constexpr const char text[] = "__text";
+constexpr const char threadPtrs[] = "__thread_ptrs";
+constexpr const char threadVars[] = "__thread_vars";
+constexpr const char unwindInfo[] = "__unwind_info";
+constexpr const char weakBinding[] = "__weak_binding";
+constexpr const char zeroFill[] = "__zerofill";
 
-extern std::vector<InputSection *> inputSections;
+} // namespace section_names
 
 } // namespace macho